diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 44a6696c0..9c9ee64a8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2195,7 +2195,7 @@ def _filter(f): return op(actual_value, comparison_value) return _filter - def _check_formats(self, formats): + def _check_formats(self, formats, warning=True): for f in formats: working = f.get('__working') if working is not None: @@ -2208,6 +2208,9 @@ def _check_formats(self, formats): continue temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file.close() + # If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code. + # Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750 + original_retcode = self._download_retcode try: success, _ = self.dl(temp_file.name, f, test=True) except (DownloadError, OSError, ValueError, *network_exceptions): @@ -2218,12 +2221,18 @@ def _check_formats(self, formats): os.remove(temp_file.name) except OSError: self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') + # Restore the actual return code + self._download_retcode = original_retcode f['__working'] = success if success: f.pop('__needs_testing', None) yield f else: - self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) + msg = f'Unable to download format {f["format_id"]}. Skipping...' + if warning: + self.report_warning(msg) + else: + self.to_screen(f'[info] {msg}') def _select_formats(self, formats, selector): return list(selector({ @@ -2949,7 +2958,7 @@ def is_wellformed(f): ) if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1]), reverse=True) + formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4d67e1caa..59a61e060 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1557,6 +1557,7 @@ PlatziCourseIE, PlatziIE, ) +from .playerfm import PlayerFmIE from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index e5c922b41..a4a5f409e 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -111,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P - shows/[^/]+/season-\d+/episode-\d+| - (?: - (?:movie|special)s/[^/]+| - (?:shows/[^/]+/)?videos - )/[^/?#&]+ + shows/[^/?#]+/season-\d+/episode-\d+| + (?Pmovie|special)s/[^/?#]+(?P/[^/?#]+)?| + (?:shows/[^/?#]+/)?videos/[^/?#]+ )''' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', @@ -128,7 +126,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20120529', 'uploader': 'AENE-NEW', 'duration': 2592.0, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:5', 'tags': 'count:14', 'categories': ['Mountain Men'], @@ -139,10 +137,7 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Mountain Men', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], 'skip': 'Geo-restricted - This content is not available in your location.', }, { @@ -156,7 +151,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20160112', 'uploader': 'AENE-NEW', 'duration': 1277.695, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:4', 'tags': 'count:23', 'episode': 'Inlawful Entry', @@ -166,10 +161,53 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Duck Dynasty', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams', + 'info_dict': { + 'id': '1590627395981', + 'ext': 'mp4', + 'title': 'VC Andrews\' Web of Dreams', + 'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5253.665, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:8', + 'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"], + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'VC Andrews\' Web of Dreams', + 'episode_number': 0, + 'timestamp': 1566489703.0, + 'upload_date': '20190822', }, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story', + 'info_dict': { + 'id': '1488235587551', + 'ext': 'mp4', + 'title': 'Hunting JonBenet\'s Killer: The Untold Story', + 'description': 'md5:209869425ee392d74fe29201821e48b4', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5003.903, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:10', + 'tags': 'count:11', + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'Hunting JonBenet\'s Killer: The Untold Story', + 'episode_number': 0, + 'timestamp': 1554987697.0, + 'upload_date': '20190411', + }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], }, { 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', @@ -198,7 +236,9 @@ class AENetworksIE(AENetworksBaseIE): }] def _real_extract(self, url): - domain, canonical = self._match_valid_url(url).groups() + domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra') + if url_type in ('movie', 'special') and not extra: + canonical += f'/full-{url_type}' return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py index f7b243234..3b0dc1f60 100644 --- a/yt_dlp/extractor/dangalplay.py +++ b/yt_dlp/extractor/dangalplay.py @@ -11,8 +11,14 @@ class DangalPlayBaseIE(InfoExtractor): _NETRC_MACHINE = 'dangalplay' + _REGION = 'IN' _OTV_USER_ID = None - _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _LOGIN_HINT = ( + 'Pass credentials as -u "token" -p "USER_ID" ' + '(where USER_ID is the value of "otv_user_id" in your browser local storage). ' + 'Your login region can be optionally suffixed to the username as @REGION ' + '(where REGION is the two-letter "region" code found in your browser local storage), ' + 'e.g.: -u "token@IN" -p "USER_ID"') _API_BASE = 'https://ottapi.dangalplay.com' _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above @@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor): def _perform_login(self, username, password): if self._OTV_USER_ID: return - if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + mobj = re.fullmatch(r'token(?:@(?P[A-Z]{2}))?', username) + if not mobj or not re.fullmatch(r'[\da-f]{32}', password): raise ExtractorError(self._LOGIN_HINT, expected=True) + if region := mobj.group('region'): + self._REGION = region + self.write_debug(f'Setting login region to "{self._REGION}"') self._OTV_USER_ID = password def _real_initialize(self): @@ -52,7 +62,7 @@ def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=Tr f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, headers={'Accept': 'application/json'}, query={ 'auth_token': self._AUTH_TOKEN, - 'region': 'IN', + 'region': self._REGION, **query, }) @@ -106,7 +116,7 @@ def _generate_api_data(self, data): 'catalog_id': catalog_id, 'content_id': content_id, 'category': '', - 'region': 'IN', + 'region': self._REGION, 'auth_token': self._AUTH_TOKEN, 'id': self._OTV_USER_ID, 'md5': hashlib.md5(unhashed.encode()).hexdigest(), @@ -129,11 +139,14 @@ def _real_extract(self, url): except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 422: error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} - if error_info.get('code') == '1016': + error_code = error_info.get('code') + if error_code == '1016': self.raise_login_required( f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) - elif msg := error_info.get('message'): - raise ExtractorError(msg) + elif error_code == '4028': + self.raise_login_required( + f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None) + raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': ')) raise m3u8_url = traverse_obj(details, ( diff --git a/yt_dlp/extractor/playerfm.py b/yt_dlp/extractor/playerfm.py new file mode 100644 index 000000000..d59d651a3 --- /dev/null +++ b/yt_dlp/extractor/playerfm.py @@ -0,0 +1,70 @@ +from .common import InfoExtractor +from ..utils import clean_html, clean_podcast_url, int_or_none, str_or_none, url_or_none +from ..utils.traversal import traverse_obj + + +class PlayerFmIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P[\w-]+))' + _TESTS = [{ + 'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix', + 'info_dict': { + 'ext': 'mp3', + 'id': '478606546', + 'display_id': 'movie-mindset-33-casino-feat-felix', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'title': 'Movie Mindset 33 - Casino feat. Felix', + 'creators': ['Chapo Trap House'], + 'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.', + 'duration': 6830, + 'timestamp': 1745406000, + 'upload_date': '20250423', + }, + }, { + 'url': 'https://player.fm/series/nbc-nightly-news-with-tom-llamas/thursday-april-17-2025', + 'info_dict': { + 'ext': 'mp3', + 'id': '477635490', + 'display_id': 'thursday-april-17-2025', + 'title': 'Thursday, April 17, 2025', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'duration': 1143, + 'description': 'md5:4890b8cf9a55a787561cd5d59dfcda82', + 'creators': ['NBC News'], + 'timestamp': 1744941374, + 'upload_date': '20250418', + }, + }, { + 'url': 'https://player.fm/series/soccer-101/ep-109-its-kicking-off-how-have-the-rules-for-kickoff-changed-what-are-the-best-approaches-to-getting-the-game-underway-and-how-could-we-improve-on-the-present-system-ack3NzL3yibvs4pf', + 'info_dict': { + 'ext': 'mp3', + 'id': '481418710', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'title': r're:#109 It\'s kicking off! How have the rules for kickoff changed, .+ the present system\?', + 'creators': ['TSS'], + 'duration': 1510, + 'display_id': 'md5:b52ecacaefab891b59db69721bfd9b13', + 'description': 'md5:52a39e36d08d8919527454f152ad3c25', + 'timestamp': 1659102055, + 'upload_date': '20220729', + }, + }] + + def _real_extract(self, url): + display_id, url = self._match_valid_url(url).group('id', 'url') + data = self._download_json(f'{url}.json', display_id) + + return { + 'display_id': display_id, + 'vcodec': 'none', + **traverse_obj(data, { + 'id': ('id', {int}, {str_or_none}), + 'url': ('url', {clean_podcast_url}), + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any), + 'filesize': ('size', {int_or_none}), + 'timestamp': ('publishedAt', {int_or_none}), + 'creators': ('series', 'author', {str}, filter, all, filter), + }), + } diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 027f7a7b6..d1a4d4c37 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -81,7 +81,7 @@ def fix_cdata(s): # geo flag is a bit unreliable and not properly set all the time geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y' - ext = determine_ext(media_url) + ext = determine_ext(media_url).lower() formats = [] if ext == 'mp3': @@ -108,7 +108,7 @@ def fix_cdata(s): 'format_id': join_nonempty('https', bitrate, delim='-'), }) else: - raise ExtractorError('Unrecognized media file found') + raise ExtractorError(f'Unrecognized media extension "{ext}"') if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url: self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) @@ -503,6 +503,28 @@ class RaiPlaySoundIE(RaiBaseIE): 'upload_date': '20211201', }, 'params': {'skip_download': True}, + }, { + # case-sensitivity test for uppercase extension + 'url': 'https://www.raiplaysound.it/audio/2020/05/Storia--Lunita-dItalia-e-lunificazione-della-Germania-b4c16390-7f3f-4282-b353-d94897dacb7c.html', + 'md5': 'c69ebd69282f0effd7ef67b7e2f6c7d8', + 'info_dict': { + 'id': 'b4c16390-7f3f-4282-b353-d94897dacb7c', + 'ext': 'mp3', + 'title': "Storia | 01 L'unità d'Italia e l'unificazione della Germania", + 'alt_title': 'md5:ed4ed82585c52057b71b43994a59b705', + 'description': 'md5:92818b6f31b2c150567d56b75db2ea7f', + 'uploader': 'rai radio 3', + 'duration': 2439.0, + 'thumbnail': 'https://www.raiplaysound.it/dl/img/2023/09/07/1694084898279_Maturadio-LOGO-2048x1152.jpg', + 'creators': ['rai radio 3'], + 'series': 'Maturadio', + 'season': 'Season 9', + 'season_number': 9, + 'episode': "01. L'unità d'Italia e l'unificazione della Germania", + 'episode_number': 1, + 'timestamp': 1590400740, + 'upload_date': '20200525', + }, }] def _real_extract(self, url):