1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-10-24 19:28:36 +00:00
yt-dlp/yt_dlp/extractor/medialaan.py
pukkandan bfd973ece3 [extractors] Use new framework for existing embeds (#4307)
`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now

Note: Tests have not been migrated
2022-08-02 01:08:16 +05:30

113 lines
4.1 KiB
Python

import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
mimetype2ext,
parse_iso8601,
)
class MedialaanIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
(?:embed\.)?mychannels.video/embed/|
embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
(?:www\.)?(?:
(?:
7sur7|
demorgen|
hln|
joe|
qmusic
)\.be|
(?:
[abe]d|
bndestem|
destentor|
gelderlander|
pzc|
tubantia|
volkskrant
)\.nl
)/video/(?:[^/]+/)*[^/?&#]+~p
)
(?P<id>\d+)
'''
_TESTS = [{
'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
'info_dict': {
'id': '193993',
'ext': 'mp4',
'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
'timestamp': 1611663540,
'upload_date': '20210126',
'duration': 238,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
'only_matching': True,
}, {
'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
'only_matching': True,
}, {
'url': 'https://embed.mychannels.video/script/production/193993',
'only_matching': True,
}, {
'url': 'https://embed.mychannels.video/production/193993',
'only_matching': True,
}, {
'url': 'https://mychannels.video/embed/193993',
'only_matching': True,
}, {
'url': 'https://embed.mychannels.video/embed/193993',
'only_matching': True,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
entries = []
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
mychannels_id = extract_attributes(element).get('data-mychannels-id')
if mychannels_id:
entries.append('https://mychannels.video/embed/' + mychannels_id)
return entries
def _real_extract(self, url):
production_id = self._match_id(url)
production = self._download_json(
'https://embed.mychannels.video/sdk/production/' + production_id,
production_id, query={'options': 'UUUU_default'})['productions'][0]
title = production['title']
formats = []
for source in (production.get('sources') or []):
src = source.get('src')
if not src:
continue
ext = mimetype2ext(source.get('type'))
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, production_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'ext': ext,
'url': src,
})
self._sort_formats(formats)
return {
'id': production_id,
'title': title,
'formats': formats,
'thumbnail': production.get('posterUrl'),
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
'duration': int_or_none(production.get('duration')) or None,
}