Merge 4c0a55ac8e into aea85d525e

2025-08-17 18:08:30 +00:00 · 2025-08-14 09:17:50 +08:00 · 2025-08-14 09:17:50 +08:00 · f148cd4919
commit f148cd4919
parent aea85d525e 4c0a55ac8e
2 changed files with 183 additions and 101 deletions
--- a/yt_dlp/extractor/medialaan.py
+++ b/yt_dlp/extractor/medialaan.py
@ -1,15 +1,73 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    determine_ext,
    extract_attributes,
    int_or_none,
-    mimetype2ext,
+    parse_resolution,
-    parse_iso8601,
+    str_or_none,
    url_or_none,
 )
 from ..utils.traversal import find_elements, traverse_obj
-class MedialaanIE(InfoExtractor):
+class MedialaanBaseIE(InfoExtractor):
    def _extract_from_mychannels_api(self, mychannels_id):
        webpage = self._download_webpage(
            f'https://mychannels.video/embed/{mychannels_id}', mychannels_id)
        brand_config = self._search_json(
            r'window\.mychannels\.brand_config\s*=', webpage, 'brand config', mychannels_id)
        response = self._download_json(
            f'https://api.mychannels.world/v1/embed/video/{mychannels_id}',
            mychannels_id, headers={'X-Mychannels-Brand': brand_config['brand']})
        formats = []
        for stream in traverse_obj(response, (
            'streams', lambda _, v: url_or_none(v['url']),
        )):
            source_url = stream['url']
            ext = determine_ext(source_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    source_url, mychannels_id, 'mp4', m3u8_id='hls', fatal=False))
            else:
                format_id = traverse_obj(stream, ('quality', {str}))
                formats.append({
                    'ext': ext,
                    'format_id': format_id,
                    'url': source_url,
                    **parse_resolution(format_id),
                })
        return {
            'id': mychannels_id,
            'formats': formats,
            **traverse_obj(response, {
                'title': ('title', {clean_html}),
                'description': ('description', {clean_html}, filter),
                'duration': ('durationMs', {int_or_none(scale=1000)}, {lambda x: x if x >= 0 else None}),
                'genres': ('genre', 'title', {str}, filter, all, filter),
                'is_live': ('live', {bool}),
                'release_timestamp': ('publicationTimestampMs', {int_or_none(scale=1000)}),
                'tags': ('tags', ..., 'title', {str}, filter, all, filter),
                'thumbnail': ('image', 'baseUrl', {url_or_none}),
            }),
            **traverse_obj(response, ('channel', {
                'channel': ('title', {clean_html}),
                'channel_id': ('id', {str_or_none}),
            })),
            **traverse_obj(response, ('organisation', {
                'uploader': ('title', {clean_html}),
                'uploader_id': ('id', {str_or_none}),
            })),
            **traverse_obj(response, ('show', {
                'series': ('title', {clean_html}),
                'series_id': ('id', {str_or_none}),
            })),
        }
 class MedialaanIE(MedialaanBaseIE):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
@ -32,7 +90,7 @@ class MedialaanIE(InfoExtractor):
                                    tubantia|
                                    volkskrant
                                )\.nl
-                            )/video/(?:[^/]+/)*[^/?&#]+~p
+                            )/videos?/(?:[^/]+/)*[^/?&#]+(?:-|~p)
                        )
                        (?P<id>\d+)
                    '''
@ -42,19 +100,83 @@ class MedialaanIE(InfoExtractor):
            'id': '193993',
            'ext': 'mp4',
            'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
-            'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+',
+            'description': 'In een nieuwe Gegenpressing video bespreken Yadran Blanco en Dennis Kas het nieuws omrent NAC.',
            'timestamp': 1611663540,
            'upload_date': '20210126',
            'duration': 238,
-        },
+            'channel': 'BN DeStem',
-        'params': {
+            'channel_id': '418',
-            'skip_download': True,
+            'genres': ['Sports'],
            'release_date': '20210126',
            'release_timestamp': 1611663540,
            'series': 'Korte Reportage',
            'series_id': '972',
            'tags': 'count:2',
            'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
            'uploader': 'BN De Stem',
            'uploader_id': '26',
        },
    }, {
        'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
-        'only_matching': True,
+        'info_dict': {
            'id': '194093',
            'ext': 'mp4',
            'title': 'Noodbevel in Doetinchem: politie stuurt mensen centrum uit',
            'description': 'md5:77e85b2cb26cfff9dc1fe2b1db524001',
            'duration': 44,
            'channel': 'De Gelderlander',
            'channel_id': '320',
            'genres': ['News'],
            'release_date': '20210126',
            'release_timestamp': 1611690600,
            'series': 'Snel Nieuws',
            'series_id': '984',
            'tags': 'count:1',
            'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
            'uploader': 'De Gelderlander',
            'uploader_id': '25',
        },
    }, {
-        'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
+        'url': 'https://www.7sur7.be/videos/production/lla-tendance-tiktok-qui-enflamme-lespagne-707650',
        'info_dict': {
            'id': '707650',
            'ext': 'mp4',
            'title': 'La tendance TikTok qui enflamme l’Espagne',
            'description': 'md5:c7ec4cb733190f227fc8935899f533b5',
            'duration': 70,
            'channel': 'Lifestyle',
            'channel_id': '770',
            'genres': ['Beauty & Lifestyle'],
            'release_date': '20240906',
            'release_timestamp': 1725617330,
            'series': 'Lifestyle',
            'series_id': '1848',
            'tags': 'count:1',
            'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
            'uploader': '7sur7',
            'uploader_id': '67',
        },
    }, {
        'url': 'https://mychannels.video/embed/313117',
        'info_dict': {
            'id': '313117',
            'ext': 'mp4',
            'title': str,
            'description': 'md5:255e2e52f6fe8a57103d06def438f016',
            'channel': 'AD',
            'channel_id': '238',
            'genres': ['News'],
            'live_status': 'is_live',
            'release_date': '20241225',
            'release_timestamp': 1735169425,
            'series': 'Nieuws Update',
            'series_id': '3337',
            'tags': 'count:1',
            'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
            'uploader': 'AD',
            'uploader_id': '1',
        },
        'params': {'skip_download': 'Livestream'},
    }, {
        'url': 'https://embed.mychannels.video/sdk/production/193993',
        'only_matching': True,
    }, {
        'url': 'https://embed.mychannels.video/script/production/193993',
@ -62,9 +184,6 @@ class MedialaanIE(InfoExtractor):
    }, {
        'url': 'https://embed.mychannels.video/production/193993',
        'only_matching': True,
    }, {
        'url': 'https://mychannels.video/embed/193993',
        'only_matching': True,
    }, {
        'url': 'https://embed.mychannels.video/embed/193993',
        'only_matching': True,
@ -75,51 +194,32 @@ class MedialaanIE(InfoExtractor):
            'id': '1576607',
            'ext': 'mp4',
            'title': 'Tom Waes blaastest',
            'channel': 'De Morgen',
            'channel_id': '352',
            'description': 'Tom Waes werkt mee aan een alcoholcampagne op Werchter',
            'duration': 62,
            'genres': ['News'],
            'release_date': '20250705',
            'release_timestamp': 1751730795,
            'series': 'Nieuwsvideo\'s',
            'series_id': '1683',
            'tags': 'count:1',
            'thumbnail': r're:https?://video-images\.persgroep\.be/aws_generated.+\.jpg',
-            'timestamp': 1751730795,
+            'uploader': 'De Morgen',
-            'upload_date': '20250705',
+            'uploader_id': '17',
        },
        'params': {'extractor_args': {'generic': {'impersonate': ['chrome']}}},
    }]
    @classmethod
    def _extract_embed_urls(cls, url, webpage):
-        entries = []
+        return traverse_obj(webpage, (
-        for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
+            {find_elements(tag='div', attr='data-mychannels-type', value='video', html=True)},
-            mychannels_id = extract_attributes(element).get('data-mychannels-id')
+            ..., {extract_attributes}, 'data-mychannels-id', {str},
-            if mychannels_id:
+            {lambda x: f'https://mychannels.video/embed/{x}'}, {url_or_none}, filter, all, filter,
-                entries.append('https://mychannels.video/embed/' + mychannels_id)
+        ))
        return entries
    def _real_extract(self, url):
-        production_id = self._match_id(url)
+        mychannels_id = self._match_id(url)
        production = self._download_json(
            'https://embed.mychannels.video/sdk/production/' + production_id,
            production_id, query={'options': 'UUUU_default'})['productions'][0]
        title = production['title']
-        formats = []
+        return self._extract_from_mychannels_api(mychannels_id)
        for source in (production.get('sources') or []):
            src = source.get('src')
            if not src:
                continue
            ext = mimetype2ext(source.get('type'))
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    src, production_id, 'mp4', 'm3u8_native',
                    m3u8_id='hls', fatal=False))
            else:
                formats.append({
                    'ext': ext,
                    'url': src,
                })
        return {
            'id': production_id,
            'title': title,
            'formats': formats,
            'thumbnail': production.get('posterUrl'),
            'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
            'duration': int_or_none(production.get('duration')) or None,
        }
--- a/yt_dlp/extractor/vtm.py
+++ b/yt_dlp/extractor/vtm.py
@ -1,60 +1,42 @@
-from .common import InfoExtractor
+from .medialaan import MedialaanBaseIE
-from ..utils import (
+from ..utils import str_or_none
-    int_or_none,
+from ..utils.traversal import require, traverse_obj
    parse_iso8601,
    try_get,
 )
-class VTMIE(InfoExtractor):
+class VTMIE(MedialaanBaseIE):
-    _WORKING = False
+    _VALID_URL = r'https?://(?:www\.)?vtm\.be/[^/?#]+~v(?P<id>[\da-f-]+)'
-    _VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
+    _TESTS = [{
    _TEST = {
        'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
        'md5': '37dca85fbc3a33f2de28ceb834b071f8',
        'info_dict': {
            'id': '192445',
            'ext': 'mp4',
            'title': 'Gast vernielt Genkse hotelkamer',
-            'timestamp': 1611060180,
+            'channel': 'VTM',
-            'upload_date': '20210119',
+            'channel_id': '867',
            'description': 'md5:75fce957d219646ff1b65ba449ab97b5',
            'duration': 74,
-            # TODO: fix url _type result processing
+            'genres': ['Documentaries'],
-            # 'series': 'Op Interventie',
+            'release_date': '20210119',
            'release_timestamp': 1611060180,
            'series': 'Op Interventie',
            'series_id': '2658',
            'tags': 'count:2',
            'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+\.(?:jpe?g|png)',
            'uploader': 'VTM',
            'uploader_id': '74',
        },
-    }
+    }]
    def _real_initialize(self):
        if not self._get_cookies('https://vtm.be/').get('authId'):
            self.raise_login_required()
    def _real_extract(self, url):
        uuid = self._match_id(url)
-        video = self._download_json(
+        webpage = self._download_webpage(url, uuid)
-            'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
+        apollo_state = self._search_json(
-            uuid, query={
+            r'window\.__APOLLO_STATE__\s*=', webpage, 'apollo state', uuid)
-                'query': '''{
+        mychannels_id = traverse_obj(apollo_state, (
-  getComponent(type: Video, uuid: "%s") {
+            f'Video:{{"uuid":"{uuid}"}}', 'myChannelsVideo', {str_or_none}, {require('mychannels ID')}))
    ... on Video {
      description
      duration
      myChannelsVideo
      program {
        title
      }
      publishedAt
      title
    }
  }
 }''' % uuid,  # noqa: UP031
            }, headers={
                'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
            })['data']['getComponent']
-        return {
+        return self._extract_from_mychannels_api(mychannels_id)
            '_type': 'url',
            'id': uuid,
            'title': video.get('title'),
            'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
            'description': video.get('description'),
            'timestamp': parse_iso8601(video.get('publishedAt')),
            'duration': int_or_none(video.get('duration')),
            'series': try_get(video, lambda x: x['program']['title']),
            'ie_key': 'Medialaan',
        }