Update to ytdl-2021.01.03

2026-02-05 05:26:55 +00:00 · 2021-01-01 17:56:37 +05:30
parent c2b5f3114f
commit 29f7c58aaf
96 changed files with 5757 additions and 3771 deletions
--- a/youtube_dlc/extractor/wdr.py
+++ b/youtube_dlc/extractor/wdr.py
@@ -17,6 +17,7 @@ from ..utils import (
    unified_strdate,
    update_url_query,
    urlhandle_detect_ext,
+    url_or_none,
 )


@@ -42,15 +43,15 @@ class WDRIE(InfoExtractor):
        is_live = metadata.get('mediaType') == 'live'

        tracker_data = metadata['trackerData']
+        title = tracker_data['trackerClipTitle']
        media_resource = metadata['mediaResource']

        formats = []
-        subtitles = {}

        # check if the metadata contains a direct URL to a file
-        for kind, media_resource in media_resource.items():
+        for kind, media in media_resource.items():
            if kind == 'captionsHash':
-                for ext, url in media_resource.items():
+                for ext, url in media.items():
                    subtitles.setdefault('de', []).append({
                        'url': url,
                        'ext': ext,
@@ -59,8 +60,10 @@ class WDRIE(InfoExtractor):

            if kind not in ('dflt', 'alt'):
                continue
+            if not isinstance(media, dict):
+                continue

-            for tag_name, medium_url in media_resource.items():
+            for tag_name, medium_url in media.items():
                if tag_name not in ('videoURL', 'audioURL'):
                    continue

@@ -90,7 +93,23 @@ class WDRIE(InfoExtractor):

        self._sort_formats(formats)

-        title = tracker_data['trackerClipTitle']
+        subtitles = {}
+        caption_url = media_resource.get('captionURL')
+        if caption_url:
+            subtitles['de'] = [{
+                'url': caption_url,
+                'ext': 'ttml',
+            }]
+        captions_hash = media_resource.get('captionsHash')
+        if isinstance(captions_hash, dict):
+            for ext, format_url in captions_hash.items():
+                format_url = url_or_none(format_url)
+                if not format_url:
+                    continue
+                subtitles.setdefault('de', []).append({
+                    'url': format_url,
+                    'ext': determine_ext(format_url, None) or ext,
+                })

        return {
            'id': tracker_data.get('trackerClipId', video_id),
@@ -106,7 +125,7 @@ class WDRIE(InfoExtractor):
 class WDRPageIE(InfoExtractor):
    _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
    _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
-    _VALID_URL = r'https?://(?:www\d?\.)?(?:wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
+    _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL

    _TESTS = [
        {
@@ -213,7 +232,11 @@ class WDRPageIE(InfoExtractor):
        {
            'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
            'only_matching': True,
-        }
+        },
+        {
+            'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
+            'only_matching': True,
+        },
    ]

    def _real_extract(self, url):