Merge branch 'yt-dlp:master' into generic_tests

2025-08-16 01:18:29 +00:00 · 2025-07-19 14:37:15 +09:00 · 2025-07-19 14:37:15 +09:00 · bb6b063f45
commit bb6b063f45
parent 95e516b8b4 1a8474c3ca
6 changed files with 182 additions and 27 deletions
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -2195,7 +2195,7 @@ def _filter(f):
            return op(actual_value, comparison_value)
        return _filter

-    def _check_formats(self, formats):
+    def _check_formats(self, formats, warning=True):
        for f in formats:
            working = f.get('__working')
            if working is not None:
@ -2208,6 +2208,9 @@ def _check_formats(self, formats):
                continue
            temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
            temp_file.close()
+            # If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code.
+            # Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750
+            original_retcode = self._download_retcode
            try:
                success, _ = self.dl(temp_file.name, f, test=True)
            except (DownloadError, OSError, ValueError, *network_exceptions):
@ -2218,12 +2221,18 @@ def _check_formats(self, formats):
                        os.remove(temp_file.name)
                    except OSError:
                        self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
+            # Restore the actual return code
+            self._download_retcode = original_retcode
            f['__working'] = success
            if success:
                f.pop('__needs_testing', None)
                yield f
            else:
-                self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
+                msg = f'Unable to download format {f["format_id"]}. Skipping...'
+                if warning:
+                    self.report_warning(msg)
+                else:
+                    self.to_screen(f'[info] {msg}')

    def _select_formats(self, formats, selector):
        return list(selector({
@ -2949,7 +2958,7 @@ def is_wellformed(f):
                    )

        if self.params.get('check_formats') is True:
-            formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
+            formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True)

        if not formats or formats[0] is not info_dict:
            # only set the 'formats' fields if the original info_dict list them
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1557,6 +1557,7 @@
    PlatziCourseIE,
    PlatziIE,
 )
+from .playerfm import PlayerFmIE
 from .playplustv import PlayPlusTVIE
 from .playsuisse import PlaySuisseIE
 from .playtvak import PlaytvakIE
--- a/yt_dlp/extractor/aenetworks.py
+++ b/yt_dlp/extractor/aenetworks.py
@ -111,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE):
    IE_NAME = 'aenetworks'
    IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
    _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
-        shows/[^/]+/season-\d+/episode-\d+|
-        (?:
-            (?:movie|special)s/[^/]+|
-            (?:shows/[^/]+/)?videos
-        )/[^/?#&]+
+        shows/[^/?#]+/season-\d+/episode-\d+|
+        (?P<type>movie|special)s/[^/?#]+(?P<extra>/[^/?#]+)?|
+        (?:shows/[^/?#]+/)?videos/[^/?#]+
    )'''
    _TESTS = [{
        'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
@ -128,7 +126,7 @@ class AENetworksIE(AENetworksBaseIE):
            'upload_date': '20120529',
            'uploader': 'AENE-NEW',
            'duration': 2592.0,
-            'thumbnail': r're:^https?://.*\.jpe?g$',
+            'thumbnail': r're:https?://.+/.+\.jpg',
            'chapters': 'count:5',
            'tags': 'count:14',
            'categories': ['Mountain Men'],
@ -139,10 +137,7 @@ class AENetworksIE(AENetworksBaseIE):
            'series': 'Mountain Men',
            'age_limit': 0,
        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
+        'params': {'skip_download': 'm3u8'},
        'add_ie': ['ThePlatform'],
        'skip': 'Geo-restricted - This content is not available in your location.',
    }, {
@ -156,7 +151,7 @@ class AENetworksIE(AENetworksBaseIE):
            'upload_date': '20160112',
            'uploader': 'AENE-NEW',
            'duration': 1277.695,
-            'thumbnail': r're:^https?://.*\.jpe?g$',
+            'thumbnail': r're:https?://.+/.+\.jpg',
            'chapters': 'count:4',
            'tags': 'count:23',
            'episode': 'Inlawful Entry',
@ -166,10 +161,53 @@ class AENetworksIE(AENetworksBaseIE):
            'series': 'Duck Dynasty',
            'age_limit': 0,
        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
+        'params': {'skip_download': 'm3u8'},
+        'add_ie': ['ThePlatform'],
+    }, {
+        'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams',
+        'info_dict': {
+            'id': '1590627395981',
+            'ext': 'mp4',
+            'title': 'VC Andrews\' Web of Dreams',
+            'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce',
+            'uploader': 'AENE-NEW',
+            'age_limit': 14,
+            'duration': 5253.665,
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'chapters': 'count:8',
+            'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"],
+            'series': '',
+            'season': 'Season 0',
+            'season_number': 0,
+            'episode': 'VC Andrews\' Web of Dreams',
+            'episode_number': 0,
+            'timestamp': 1566489703.0,
+            'upload_date': '20190822',
        },
+        'params': {'skip_download': 'm3u8'},
+        'add_ie': ['ThePlatform'],
+    }, {
+        'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story',
+        'info_dict': {
+            'id': '1488235587551',
+            'ext': 'mp4',
+            'title': 'Hunting JonBenet\'s Killer: The Untold Story',
+            'description': 'md5:209869425ee392d74fe29201821e48b4',
+            'uploader': 'AENE-NEW',
+            'age_limit': 14,
+            'duration': 5003.903,
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'chapters': 'count:10',
+            'tags': 'count:11',
+            'series': '',
+            'season': 'Season 0',
+            'season_number': 0,
+            'episode': 'Hunting JonBenet\'s Killer: The Untold Story',
+            'episode_number': 0,
+            'timestamp': 1554987697.0,
+            'upload_date': '20190411',
+        },
+        'params': {'skip_download': 'm3u8'},
        'add_ie': ['ThePlatform'],
    }, {
        'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
@ -198,7 +236,9 @@ class AENetworksIE(AENetworksBaseIE):
    }]

    def _real_extract(self, url):
-        domain, canonical = self._match_valid_url(url).groups()
+        domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra')
+        if url_type in ('movie', 'special') and not extra:
+            canonical += f'/full-{url_type}'
        return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)


--- a/yt_dlp/extractor/dangalplay.py
+++ b/yt_dlp/extractor/dangalplay.py
@ -11,8 +11,14 @@

 class DangalPlayBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'dangalplay'
+    _REGION = 'IN'
    _OTV_USER_ID = None
-    _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage'
+    _LOGIN_HINT = (
+        'Pass credentials as -u "token" -p "USER_ID" '
+        '(where USER_ID is the value of "otv_user_id" in your browser local storage). '
+        'Your login region can be optionally suffixed to the username as @REGION '
+        '(where REGION is the two-letter "region" code found in your browser local storage), '
+        'e.g.: -u "token@IN" -p "USER_ID"')
    _API_BASE = 'https://ottapi.dangalplay.com'
    _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM'  # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
    _SECRET_KEY = 'f53d31a4377e4ef31fa0'  # same as above
@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor):
    def _perform_login(self, username, password):
        if self._OTV_USER_ID:
            return
-        if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password):
+        mobj = re.fullmatch(r'token(?:@(?P<region>[A-Z]{2}))?', username)
+        if not mobj or not re.fullmatch(r'[\da-f]{32}', password):
            raise ExtractorError(self._LOGIN_HINT, expected=True)
+        if region := mobj.group('region'):
+            self._REGION = region
+        self.write_debug(f'Setting login region to "{self._REGION}"')
        self._OTV_USER_ID = password

    def _real_initialize(self):
@ -52,7 +62,7 @@ def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=Tr
            f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
            headers={'Accept': 'application/json'}, query={
                'auth_token': self._AUTH_TOKEN,
-                'region': 'IN',
+                'region': self._REGION,
                **query,
            })

@ -106,7 +116,7 @@ def _generate_api_data(self, data):
            'catalog_id': catalog_id,
            'content_id': content_id,
            'category': '',
-            'region': 'IN',
+            'region': self._REGION,
            'auth_token': self._AUTH_TOKEN,
            'id': self._OTV_USER_ID,
            'md5': hashlib.md5(unhashed.encode()).hexdigest(),
@ -129,11 +139,14 @@ def _real_extract(self, url):
        except ExtractorError as e:
            if isinstance(e.cause, HTTPError) and e.cause.status == 422:
                error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
-                if error_info.get('code') == '1016':
+                error_code = error_info.get('code')
+                if error_code == '1016':
                    self.raise_login_required(
                        f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
-                elif msg := error_info.get('message'):
-                    raise ExtractorError(msg)
+                elif error_code == '4028':
+                    self.raise_login_required(
+                        f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None)
+                raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': '))
            raise

        m3u8_url = traverse_obj(details, (
--- a/yt_dlp/extractor/playerfm.py
+++ b/yt_dlp/extractor/playerfm.py
@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import clean_html, clean_podcast_url, int_or_none, str_or_none, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class PlayerFmIE(InfoExtractor):
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P<id>[\w-]+))'
+    _TESTS = [{
+        'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix',
+        'info_dict': {
+            'ext': 'mp3',
+            'id': '478606546',
+            'display_id': 'movie-mindset-33-casino-feat-felix',
+            'thumbnail': r're:^https://.*\.(jpg|png)',
+            'title': 'Movie Mindset 33 - Casino feat. Felix',
+            'creators': ['Chapo Trap House'],
+            'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.',
+            'duration': 6830,
+            'timestamp': 1745406000,
+            'upload_date': '20250423',
+        },
+    }, {
+        'url': 'https://player.fm/series/nbc-nightly-news-with-tom-llamas/thursday-april-17-2025',
+        'info_dict': {
+            'ext': 'mp3',
+            'id': '477635490',
+            'display_id': 'thursday-april-17-2025',
+            'title': 'Thursday, April 17, 2025',
+            'thumbnail': r're:^https://.*\.(jpg|png)',
+            'duration': 1143,
+            'description': 'md5:4890b8cf9a55a787561cd5d59dfcda82',
+            'creators': ['NBC News'],
+            'timestamp': 1744941374,
+            'upload_date': '20250418',
+        },
+    }, {
+        'url': 'https://player.fm/series/soccer-101/ep-109-its-kicking-off-how-have-the-rules-for-kickoff-changed-what-are-the-best-approaches-to-getting-the-game-underway-and-how-could-we-improve-on-the-present-system-ack3NzL3yibvs4pf',
+        'info_dict': {
+            'ext': 'mp3',
+            'id': '481418710',
+            'thumbnail': r're:^https://.*\.(jpg|png)',
+            'title': r're:#109 It\'s kicking off! How have the rules for kickoff changed, .+ the present system\?',
+            'creators': ['TSS'],
+            'duration': 1510,
+            'display_id': 'md5:b52ecacaefab891b59db69721bfd9b13',
+            'description': 'md5:52a39e36d08d8919527454f152ad3c25',
+            'timestamp': 1659102055,
+            'upload_date': '20220729',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id, url = self._match_valid_url(url).group('id', 'url')
+        data = self._download_json(f'{url}.json', display_id)
+
+        return {
+            'display_id': display_id,
+            'vcodec': 'none',
+            **traverse_obj(data, {
+                'id': ('id', {int}, {str_or_none}),
+                'url': ('url', {clean_podcast_url}),
+                'title': ('title', {str}),
+                'description': ('description', {clean_html}),
+                'duration': ('duration', {int_or_none}),
+                'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any),
+                'filesize': ('size', {int_or_none}),
+                'timestamp': ('publishedAt', {int_or_none}),
+                'creators': ('series', 'author', {str}, filter, all, filter),
+            }),
+        }
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@ -81,7 +81,7 @@ def fix_cdata(s):
        # geo flag is a bit unreliable and not properly set all the time
        geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y'

-        ext = determine_ext(media_url)
+        ext = determine_ext(media_url).lower()
        formats = []

        if ext == 'mp3':
@ -108,7 +108,7 @@ def fix_cdata(s):
                'format_id': join_nonempty('https', bitrate, delim='-'),
            })
        else:
-            raise ExtractorError('Unrecognized media file found')
+            raise ExtractorError(f'Unrecognized media extension "{ext}"')

        if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url:
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
@ -503,6 +503,28 @@ class RaiPlaySoundIE(RaiBaseIE):
            'upload_date': '20211201',
        },
        'params': {'skip_download': True},
+    }, {
+        # case-sensitivity test for uppercase extension
+        'url': 'https://www.raiplaysound.it/audio/2020/05/Storia--Lunita-dItalia-e-lunificazione-della-Germania-b4c16390-7f3f-4282-b353-d94897dacb7c.html',
+        'md5': 'c69ebd69282f0effd7ef67b7e2f6c7d8',
+        'info_dict': {
+            'id': 'b4c16390-7f3f-4282-b353-d94897dacb7c',
+            'ext': 'mp3',
+            'title': "Storia | 01 L'unità d'Italia e l'unificazione della Germania",
+            'alt_title': 'md5:ed4ed82585c52057b71b43994a59b705',
+            'description': 'md5:92818b6f31b2c150567d56b75db2ea7f',
+            'uploader': 'rai radio 3',
+            'duration': 2439.0,
+            'thumbnail': 'https://www.raiplaysound.it/dl/img/2023/09/07/1694084898279_Maturadio-LOGO-2048x1152.jpg',
+            'creators': ['rai radio 3'],
+            'series': 'Maturadio',
+            'season': 'Season 9',
+            'season_number': 9,
+            'episode': "01. L'unità d'Italia e l'unificazione della Germania",
+            'episode_number': 1,
+            'timestamp': 1590400740,
+            'upload_date': '20200525',
+        },
    }]

    def _real_extract(self, url):