1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-17 18:08:30 +00:00
This commit is contained in:
doe1080 2025-08-14 09:17:50 +08:00 committed by GitHub
commit f148cd4919
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 183 additions and 101 deletions

View File

@ -1,15 +1,73 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
determine_ext,
extract_attributes, extract_attributes,
int_or_none, int_or_none,
mimetype2ext, parse_resolution,
parse_iso8601, str_or_none,
url_or_none,
) )
from ..utils.traversal import find_elements, traverse_obj
class MedialaanIE(InfoExtractor): class MedialaanBaseIE(InfoExtractor):
def _extract_from_mychannels_api(self, mychannels_id):
webpage = self._download_webpage(
f'https://mychannels.video/embed/{mychannels_id}', mychannels_id)
brand_config = self._search_json(
r'window\.mychannels\.brand_config\s*=', webpage, 'brand config', mychannels_id)
response = self._download_json(
f'https://api.mychannels.world/v1/embed/video/{mychannels_id}',
mychannels_id, headers={'X-Mychannels-Brand': brand_config['brand']})
formats = []
for stream in traverse_obj(response, (
'streams', lambda _, v: url_or_none(v['url']),
)):
source_url = stream['url']
ext = determine_ext(source_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, mychannels_id, 'mp4', m3u8_id='hls', fatal=False))
else:
format_id = traverse_obj(stream, ('quality', {str}))
formats.append({
'ext': ext,
'format_id': format_id,
'url': source_url,
**parse_resolution(format_id),
})
return {
'id': mychannels_id,
'formats': formats,
**traverse_obj(response, {
'title': ('title', {clean_html}),
'description': ('description', {clean_html}, filter),
'duration': ('durationMs', {int_or_none(scale=1000)}, {lambda x: x if x >= 0 else None}),
'genres': ('genre', 'title', {str}, filter, all, filter),
'is_live': ('live', {bool}),
'release_timestamp': ('publicationTimestampMs', {int_or_none(scale=1000)}),
'tags': ('tags', ..., 'title', {str}, filter, all, filter),
'thumbnail': ('image', 'baseUrl', {url_or_none}),
}),
**traverse_obj(response, ('channel', {
'channel': ('title', {clean_html}),
'channel_id': ('id', {str_or_none}),
})),
**traverse_obj(response, ('organisation', {
'uploader': ('title', {clean_html}),
'uploader_id': ('id', {str_or_none}),
})),
**traverse_obj(response, ('show', {
'series': ('title', {clean_html}),
'series_id': ('id', {str_or_none}),
})),
}
class MedialaanIE(MedialaanBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
@ -32,7 +90,7 @@ class MedialaanIE(InfoExtractor):
tubantia| tubantia|
volkskrant volkskrant
)\.nl )\.nl
)/video/(?:[^/]+/)*[^/?&#]+~p )/videos?/(?:[^/]+/)*[^/?&#]+(?:-|~p)
) )
(?P<id>\d+) (?P<id>\d+)
''' '''
@ -42,19 +100,83 @@ class MedialaanIE(InfoExtractor):
'id': '193993', 'id': '193993',
'ext': 'mp4', 'ext': 'mp4',
'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?', 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+', 'description': 'In een nieuwe Gegenpressing video bespreken Yadran Blanco en Dennis Kas het nieuws omrent NAC.',
'timestamp': 1611663540,
'upload_date': '20210126',
'duration': 238, 'duration': 238,
}, 'channel': 'BN DeStem',
'params': { 'channel_id': '418',
'skip_download': True, 'genres': ['Sports'],
'release_date': '20210126',
'release_timestamp': 1611663540,
'series': 'Korte Reportage',
'series_id': '972',
'tags': 'count:2',
'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
'uploader': 'BN De Stem',
'uploader_id': '26',
}, },
}, { }, {
'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093', 'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
'only_matching': True, 'info_dict': {
'id': '194093',
'ext': 'mp4',
'title': 'Noodbevel in Doetinchem: politie stuurt mensen centrum uit',
'description': 'md5:77e85b2cb26cfff9dc1fe2b1db524001',
'duration': 44,
'channel': 'De Gelderlander',
'channel_id': '320',
'genres': ['News'],
'release_date': '20210126',
'release_timestamp': 1611690600,
'series': 'Snel Nieuws',
'series_id': '984',
'tags': 'count:1',
'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
'uploader': 'De Gelderlander',
'uploader_id': '25',
},
}, { }, {
'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default', 'url': 'https://www.7sur7.be/videos/production/lla-tendance-tiktok-qui-enflamme-lespagne-707650',
'info_dict': {
'id': '707650',
'ext': 'mp4',
'title': 'La tendance TikTok qui enflamme lEspagne',
'description': 'md5:c7ec4cb733190f227fc8935899f533b5',
'duration': 70,
'channel': 'Lifestyle',
'channel_id': '770',
'genres': ['Beauty & Lifestyle'],
'release_date': '20240906',
'release_timestamp': 1725617330,
'series': 'Lifestyle',
'series_id': '1848',
'tags': 'count:1',
'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
'uploader': '7sur7',
'uploader_id': '67',
},
}, {
'url': 'https://mychannels.video/embed/313117',
'info_dict': {
'id': '313117',
'ext': 'mp4',
'title': str,
'description': 'md5:255e2e52f6fe8a57103d06def438f016',
'channel': 'AD',
'channel_id': '238',
'genres': ['News'],
'live_status': 'is_live',
'release_date': '20241225',
'release_timestamp': 1735169425,
'series': 'Nieuws Update',
'series_id': '3337',
'tags': 'count:1',
'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
'uploader': 'AD',
'uploader_id': '1',
},
'params': {'skip_download': 'Livestream'},
}, {
'url': 'https://embed.mychannels.video/sdk/production/193993',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://embed.mychannels.video/script/production/193993', 'url': 'https://embed.mychannels.video/script/production/193993',
@ -62,9 +184,6 @@ class MedialaanIE(InfoExtractor):
}, { }, {
'url': 'https://embed.mychannels.video/production/193993', 'url': 'https://embed.mychannels.video/production/193993',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://mychannels.video/embed/193993',
'only_matching': True,
}, { }, {
'url': 'https://embed.mychannels.video/embed/193993', 'url': 'https://embed.mychannels.video/embed/193993',
'only_matching': True, 'only_matching': True,
@ -75,51 +194,32 @@ class MedialaanIE(InfoExtractor):
'id': '1576607', 'id': '1576607',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Tom Waes blaastest', 'title': 'Tom Waes blaastest',
'channel': 'De Morgen',
'channel_id': '352',
'description': 'Tom Waes werkt mee aan een alcoholcampagne op Werchter',
'duration': 62, 'duration': 62,
'genres': ['News'],
'release_date': '20250705',
'release_timestamp': 1751730795,
'series': 'Nieuwsvideo\'s',
'series_id': '1683',
'tags': 'count:1',
'thumbnail': r're:https?://video-images\.persgroep\.be/aws_generated.+\.jpg', 'thumbnail': r're:https?://video-images\.persgroep\.be/aws_generated.+\.jpg',
'timestamp': 1751730795, 'uploader': 'De Morgen',
'upload_date': '20250705', 'uploader_id': '17',
}, },
'params': {'extractor_args': {'generic': {'impersonate': ['chrome']}}}, 'params': {'extractor_args': {'generic': {'impersonate': ['chrome']}}},
}] }]
@classmethod @classmethod
def _extract_embed_urls(cls, url, webpage): def _extract_embed_urls(cls, url, webpage):
entries = [] return traverse_obj(webpage, (
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage): {find_elements(tag='div', attr='data-mychannels-type', value='video', html=True)},
mychannels_id = extract_attributes(element).get('data-mychannels-id') ..., {extract_attributes}, 'data-mychannels-id', {str},
if mychannels_id: {lambda x: f'https://mychannels.video/embed/{x}'}, {url_or_none}, filter, all, filter,
entries.append('https://mychannels.video/embed/' + mychannels_id) ))
return entries
def _real_extract(self, url): def _real_extract(self, url):
production_id = self._match_id(url) mychannels_id = self._match_id(url)
production = self._download_json(
'https://embed.mychannels.video/sdk/production/' + production_id,
production_id, query={'options': 'UUUU_default'})['productions'][0]
title = production['title']
formats = [] return self._extract_from_mychannels_api(mychannels_id)
for source in (production.get('sources') or []):
src = source.get('src')
if not src:
continue
ext = mimetype2ext(source.get('type'))
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, production_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'ext': ext,
'url': src,
})
return {
'id': production_id,
'title': title,
'formats': formats,
'thumbnail': production.get('posterUrl'),
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
'duration': int_or_none(production.get('duration')) or None,
}

View File

@ -1,60 +1,42 @@
from .common import InfoExtractor from .medialaan import MedialaanBaseIE
from ..utils import ( from ..utils import str_or_none
int_or_none, from ..utils.traversal import require, traverse_obj
parse_iso8601,
try_get,
)
class VTMIE(InfoExtractor): class VTMIE(MedialaanBaseIE):
_WORKING = False _VALID_URL = r'https?://(?:www\.)?vtm\.be/[^/?#]+~v(?P<id>[\da-f-]+)'
_VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})' _TESTS = [{
_TEST = {
'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1', 'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
'md5': '37dca85fbc3a33f2de28ceb834b071f8',
'info_dict': { 'info_dict': {
'id': '192445', 'id': '192445',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Gast vernielt Genkse hotelkamer', 'title': 'Gast vernielt Genkse hotelkamer',
'timestamp': 1611060180, 'channel': 'VTM',
'upload_date': '20210119', 'channel_id': '867',
'description': 'md5:75fce957d219646ff1b65ba449ab97b5',
'duration': 74, 'duration': 74,
# TODO: fix url _type result processing 'genres': ['Documentaries'],
# 'series': 'Op Interventie', 'release_date': '20210119',
'release_timestamp': 1611060180,
'series': 'Op Interventie',
'series_id': '2658',
'tags': 'count:2',
'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
'uploader': 'VTM',
'uploader_id': '74',
}, },
} }]
def _real_initialize(self):
if not self._get_cookies('https://vtm.be/').get('authId'):
self.raise_login_required()
def _real_extract(self, url): def _real_extract(self, url):
uuid = self._match_id(url) uuid = self._match_id(url)
video = self._download_json( webpage = self._download_webpage(url, uuid)
'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql', apollo_state = self._search_json(
uuid, query={ r'window\.__APOLLO_STATE__\s*=', webpage, 'apollo state', uuid)
'query': '''{ mychannels_id = traverse_obj(apollo_state, (
getComponent(type: Video, uuid: "%s") { f'Video:{{"uuid":"{uuid}"}}', 'myChannelsVideo', {str_or_none}, {require('mychannels ID')}))
... on Video {
description
duration
myChannelsVideo
program {
title
}
publishedAt
title
}
}
}''' % uuid, # noqa: UP031
}, headers={
'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
})['data']['getComponent']
return { return self._extract_from_mychannels_api(mychannels_id)
'_type': 'url',
'id': uuid,
'title': video.get('title'),
'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
'description': video.get('description'),
'timestamp': parse_iso8601(video.get('publishedAt')),
'duration': int_or_none(video.get('duration')),
'series': try_get(video, lambda x: x['program']['title']),
'ie_key': 'Medialaan',
}