From 6e5bee418bc108565108153fd745c8e7a59f16dd Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 12 Jul 2025 13:44:27 +1200 Subject: [PATCH 01/81] [ie/youtube] Ensure context params are consistent for web clients (#13701) Authored by: coletdjnz --- yt_dlp/extractor/youtube/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 7d9cbf8ee4..0a9b510c7d 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -105,6 +105,7 @@ class SubsPoTokenPolicy(BasePoTokenPolicy): 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'SUPPORTS_COOKIES': True, **WEB_PO_TOKEN_POLICIES, + 'PLAYER_PARAMS': '8AEB', }, # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats 'web_safari': { From a5d697f62d8be78ffd472acb2f52c8bc32833003 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 12 Jul 2025 14:23:22 -0500 Subject: [PATCH 02/81] [ie/vimeo] Fix extractor (#13692) Closes #13180, Closes #13689 Authored by: bashonly --- README.md | 4 + yt_dlp/extractor/vimeo.py | 277 ++++++++++++++++++++++++++++++++------ 2 files changed, 237 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index c1a9356923..925ebd8c5b 100644 --- a/README.md +++ b/README.md @@ -1901,6 +1901,10 @@ #### sonylivseries #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) +#### vimeo +* `client`: Client to extract video data from. One of `android` (default), `ios` or `web`. The `ios` client only works with previously cached OAuth tokens. The `web` client only works when authenticated with credentials or account cookies +* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability + **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index b268fad56d..fdd42ec94f 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -21,6 +21,7 @@ js_to_json, jwt_decode_hs256, merge_dicts, + mimetype2ext, parse_filesize, parse_iso8601, parse_qs, @@ -28,9 +29,11 @@ smuggle_url, str_or_none, traverse_obj, + try_call, try_get, unified_timestamp, unsmuggle_url, + url_basename, url_or_none, urlencode_postdata, urlhandle_detect_ext, @@ -45,14 +48,56 @@ class VimeoBaseInfoExtractor(InfoExtractor): _REFERER_HINT = ( 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' 'with the URL of the page that embeds this video.') - _IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw==' - _IOS_CLIENT_HEADERS = { + + _DEFAULT_CLIENT = 'android' + _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', 'Accept-Language': 'en', - 'User-Agent': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0', } - _IOS_OAUTH_CACHE_KEY = 'oauth-token-ios' - _ios_oauth_token = None + _CLIENT_CONFIGS = { + 'android': { + 'CACHE_KEY': 'oauth-token-android', + 'CACHE_ONLY': False, + 'VIEWER_JWT': False, + 'REQUIRES_AUTH': False, + 'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==', + 'USER_AGENT': 'com.vimeo.android.videoapp (OnePlus, ONEPLUS A6003, OnePlus, Android 14/34 Version 11.8.1) Kotlin VimeoNetworking/3.12.0', + 'VIDEOS_FIELDS': ( + 'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration', 'width', + 'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time', 'content_rating', + 'content_rating_class', 'rating_mod_locked', 'license', 'privacy', 'pictures', 'tags', 'stats', + 'categories', 'uploader', 'metadata', 'user', 'files', 'download', 'app', 'play', 'status', + 'resource_key', 'badge', 'upload', 'transcode', 'is_playable', 'has_audio', + ), + }, + 'ios': { + 'CACHE_KEY': 'oauth-token-ios', + 'CACHE_ONLY': True, + 'VIEWER_JWT': False, + 'REQUIRES_AUTH': False, + 'AUTH': 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw==', + 'USER_AGENT': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0', + 'VIDEOS_FIELDS': ( + 'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration', + 'width', 'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time', + 'content_rating', 'content_rating_class', 'rating_mod_locked', 'license', 'config_url', + 'embed_player_config_url', 'privacy', 'pictures', 'tags', 'stats', 'categories', 'uploader', + 'metadata', 'user', 'files', 'download', 'app', 'play', 'status', 'resource_key', 'badge', + 'upload', 'transcode', 'is_playable', 'has_audio', + ), + }, + 'web': { + 'VIEWER_JWT': True, + 'REQUIRES_AUTH': True, + 'USER_AGENT': None, + 'VIDEOS_FIELDS': ( + 'config_url', 'created_time', 'description', 'license', + 'metadata.connections.comments.total', 'metadata.connections.likes.total', + 'release_time', 'stats.plays', + ), + }, + } + _oauth_tokens = {} _viewer_info = None @staticmethod @@ -105,8 +150,8 @@ def _perform_login(self, username, password): raise ExtractorError('Unable to log in') def _real_initialize(self): - if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vuid'): - self._raise_login_required() + if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vimeo'): + self.raise_login_required() def _get_video_password(self): password = self.get_param('videopassword') @@ -277,52 +322,88 @@ def _parse_config(self, config, video_id): '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'), } - def _fetch_oauth_token(self): - if not self._ios_oauth_token: - self._ios_oauth_token = self.cache.load(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY) + def _fetch_oauth_token(self, client): + client_config = self._CLIENT_CONFIGS[client] - if not self._ios_oauth_token: - self._ios_oauth_token = self._download_json( + if client_config['VIEWER_JWT']: + return f'jwt {self._fetch_viewer_info()["jwt"]}' + + cache_key = client_config['CACHE_KEY'] + + if not self._oauth_tokens.get(cache_key): + self._oauth_tokens[cache_key] = self.cache.load(self._NETRC_MACHINE, cache_key) + + if not self._oauth_tokens.get(cache_key): + if client_config['CACHE_ONLY']: + raise ExtractorError( + f'The {client} client is unable to fetch new OAuth tokens ' + f'and is only intended for use with previously cached tokens', expected=True) + + self._oauth_tokens[cache_key] = self._download_json( 'https://api.vimeo.com/oauth/authorize/client', None, - 'Fetching OAuth token', 'Failed to fetch OAuth token', + f'Fetching {client} OAuth token', f'Failed to fetch {client} OAuth token', headers={ - 'Authorization': f'Basic {self._IOS_CLIENT_AUTH}', - **self._IOS_CLIENT_HEADERS, + 'Authorization': f'Basic {client_config["AUTH"]}', + 'User-Agent': client_config['USER_AGENT'], + **self._CLIENT_HEADERS, }, data=urlencode_postdata({ 'grant_type': 'client_credentials', - 'scope': 'private public create edit delete interact upload purchased stats', + 'scope': 'private public create edit delete interact upload purchased stats video_files', }, quote_via=urllib.parse.quote))['access_token'] - self.cache.store(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY, self._ios_oauth_token) + self.cache.store(self._NETRC_MACHINE, cache_key, self._oauth_tokens[cache_key]) - return self._ios_oauth_token + return f'Bearer {self._oauth_tokens[cache_key]}' + + def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): + client = force_client or self._configuration_arg('client', [self._DEFAULT_CLIENT], ie_key=VimeoIE)[0] + if client not in self._CLIENT_CONFIGS: + raise ExtractorError( + f'Unsupported API client "{client}" requested. ' + f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + + client_config = self._CLIENT_CONFIGS[client] + if client_config['REQUIRES_AUTH'] and not self._get_cookies('https://vimeo.com').get('vimeo'): + self.raise_login_required(f'The {client} client requires authentication') - def _call_videos_api(self, video_id, unlisted_hash=None, **kwargs): return self._download_json( - join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'), - video_id, 'Downloading API JSON', headers={ - 'Authorization': f'Bearer {self._fetch_oauth_token()}', - **self._IOS_CLIENT_HEADERS, - }, query={ - 'fields': ','.join(( - 'config_url', 'embed_player_config_url', 'player_embed_url', 'download', 'play', - 'files', 'description', 'license', 'release_time', 'created_time', 'stats.plays', - 'metadata.connections.comments.total', 'metadata.connections.likes.total')), + join_nonempty( + 'https://api.vimeo.com/videos', + join_nonempty(video_id, unlisted_hash, delim=':'), + path, delim='/'), + video_id, f'Downloading {client} API JSON', f'Unable to download {client} API JSON', + headers=filter_dict({ + 'Authorization': self._fetch_oauth_token(client), + 'User-Agent': client_config['USER_AGENT'], + **self._CLIENT_HEADERS, + }), query={ + 'fields': ','.join(client_config['VIDEOS_FIELDS']), + **(query or {}), }, **kwargs) - def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=None): + def _extract_original_format(self, url, video_id, unlisted_hash=None): # Original/source formats are only available when logged in if not self._get_cookies('https://vimeo.com/').get('vimeo'): - return + return None - query = {'action': 'load_download_config'} - if unlisted_hash: - query['unlisted_hash'] = unlisted_hash - download_data = self._download_json( - url, video_id, 'Loading download config JSON', fatal=False, - query=query, headers={'X-Requested-With': 'XMLHttpRequest'}, - expected_status=(403, 404)) or {} - source_file = download_data.get('source_file') - download_url = try_get(source_file, lambda x: x['download_url']) + policy = self._configuration_arg('original_format_policy', ['auto'], ie_key=VimeoIE)[0] + if policy == 'never': + return None + + try: + download_data = self._download_json( + url, video_id, 'Loading download config JSON', query=filter_dict({ + 'action': 'load_download_config', + 'unlisted_hash': unlisted_hash, + }), headers={ + 'Accept': 'application/json', + 'X-Requested-With': 'XMLHttpRequest', + }) + except ExtractorError as error: + self.write_debug(f'Unable to load download config JSON: {error.cause}') + download_data = None + + source_file = traverse_obj(download_data, ('source_file', {dict})) or {} + download_url = traverse_obj(source_file, ('download_url', {url_or_none})) if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): source_name = source_file.get('public_name', 'Original') if self._is_valid_url(download_url, video_id, f'{source_name} video'): @@ -340,8 +421,27 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=N 'quality': 1, } - original_response = api_data or self._call_videos_api( - video_id, unlisted_hash, fatal=False, expected_status=(403, 404)) + # Most web client API requests are subject to rate-limiting (429) when logged-in. + # Requesting only the 'privacy' field is NOT rate-limited, + # so first we should check if video even has 'download' formats available + try: + privacy_info = self._call_videos_api( + video_id, unlisted_hash, force_client='web', query={'fields': 'privacy'}) + except ExtractorError as error: + self.write_debug(f'Unable to download privacy info: {error.cause}') + return None + + if not traverse_obj(privacy_info, ('privacy', 'download', {bool})): + msg = f'{video_id}: Vimeo says this video is not downloadable' + if policy != 'always': + self.write_debug( + f'{msg}, so yt-dlp is not attempting to extract the original/source format. ' + f'To try anyways, use --extractor-args "vimeo:original_format_policy=always"') + return None + self.write_debug(f'{msg}; attempting to extract original/source format anyways') + + original_response = self._call_videos_api( + video_id, unlisted_hash, force_client='web', query={'fields': 'download'}, fatal=False) for download_data in traverse_obj(original_response, ('download', ..., {dict})): download_url = download_data.get('link') if not download_url or download_data.get('quality') != 'source': @@ -919,6 +1019,92 @@ def _verify_player_video_password(self, url, video_id, headers): raise ExtractorError('Wrong video password', expected=True) return checked + def _get_subtitles(self, video_id, unlisted_hash): + subs = {} + text_tracks = self._call_videos_api( + video_id, unlisted_hash, path='texttracks', query={ + 'include_transcript': 'true', + 'fields': ','.join(( + 'active', 'display_language', 'id', 'language', 'link', 'name', 'type', 'uri', + )), + }, fatal=False) + for tt in traverse_obj(text_tracks, ('data', lambda _, v: url_or_none(v['link']))): + subs.setdefault(tt.get('language'), []).append({ + 'url': tt['link'], + 'ext': 'vtt', + 'name': tt.get('display_language'), + }) + return subs + + def _parse_api_response(self, video, video_id, unlisted_hash=None): + formats, subtitles = [], {} + seen_urls = set() + duration = traverse_obj(video, ('duration', {int_or_none})) + + for file in traverse_obj(video, ( + (('play', (None, 'progressive')), 'files', 'download'), lambda _, v: url_or_none(v['link']), + )): + format_url = file['link'] + if format_url in seen_urls: + continue + seen_urls.add(format_url) + quality = file.get('quality') + ext = determine_ext(format_url) + if quality == 'hls' or ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + format_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + elif quality == 'dash' or ext == 'mpd': + fmts, subs = self._extract_mpd_formats_and_subtitles( + format_url, video_id, mpd_id='dash', fatal=False) + for fmt in fmts: + fmt['format_id'] = join_nonempty( + *fmt['format_id'].split('-', 2)[:2], int_or_none(fmt.get('tbr'))) + else: + fmt = traverse_obj(file, { + 'ext': ('type', {mimetype2ext(default='mp4')}), + 'vcodec': ('codec', {str.lower}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'filesize': ('size', {int_or_none}), + 'fps': ('fps', {int_or_none}), + }) + fmt.update({ + 'url': format_url, + 'format_id': join_nonempty( + 'http', traverse_obj(file, 'public_name', 'rendition'), quality), + 'tbr': try_call(lambda: fmt['filesize'] * 8 / duration / 1024), + }) + formats.append(fmt) + continue + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + if traverse_obj(video, ('metadata', 'connections', 'texttracks', 'total', {int})): + self._merge_subtitles(self.extract_subtitles(video_id, unlisted_hash), target=subtitles) + + return { + **traverse_obj(video, { + 'title': ('name', {str}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': ('user', 'link', {url_basename}), + 'uploader_url': ('user', 'link', {url_or_none}), + 'release_timestamp': ('live', 'scheduled_start_time', {int_or_none}), + 'thumbnails': ('pictures', 'sizes', lambda _, v: url_or_none(v['link']), { + 'url': 'link', + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + }), + 'id': video_id, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + 'live_status': { + 'streaming': 'is_live', + 'done': 'was_live', + }.get(traverse_obj(video, ('live', 'status', {str}))), + } + def _extract_from_api(self, video_id, unlisted_hash=None): for retry in (False, True): try: @@ -934,10 +1120,13 @@ def _extract_from_api(self, video_id, unlisted_hash=None): continue raise - info = self._parse_config(self._download_json( - video['config_url'], video_id), video_id) + if config_url := traverse_obj(video, ('config_url', {url_or_none})): + info = self._parse_config(self._download_json(config_url, video_id), video_id) + else: + info = self._parse_api_response(video, video_id, unlisted_hash) + source_format = self._extract_original_format( - f'https://vimeo.com/{video_id}', video_id, unlisted_hash, api_data=video) + f'https://vimeo.com/{video_id}', video_id, unlisted_hash) if source_format: info['formats'].append(source_format) From 3ae61e0f313dd03a09060abc7a212775c3717818 Mon Sep 17 00:00:00 2001 From: Lyuben Ivanov Date: Sat, 12 Jul 2025 22:56:11 +0300 Subject: [PATCH 03/81] [ie/BTVPlus] Add extractor (#13541) Authored by: bubo --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/btvplus.py | 73 +++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 yt_dlp/extractor/btvplus.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 84da570b0a..804536cce7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -309,6 +309,7 @@ BrilliantpalaClassesIE, BrilliantpalaElearnIE, ) +from .btvplus import BTVPlusIE from .bundesliga import BundesligaIE from .bundestag import BundestagIE from .bunnycdn import BunnyCdnIE diff --git a/yt_dlp/extractor/btvplus.py b/yt_dlp/extractor/btvplus.py new file mode 100644 index 0000000000..531ace1471 --- /dev/null +++ b/yt_dlp/extractor/btvplus.py @@ -0,0 +1,73 @@ +from .common import InfoExtractor +from ..utils import ( + bug_reports_message, + clean_html, + get_element_by_class, + js_to_json, + mimetype2ext, + strip_or_none, + url_or_none, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class BTVPlusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?btvplus\.bg/produkt/(?:predavaniya|seriali|novini)/(?P\d+)' + _TESTS = [{ + 'url': 'https://btvplus.bg/produkt/predavaniya/67271/btv-reporterite/btv-reporterite-12-07-2025-g', + 'info_dict': { + 'ext': 'mp4', + 'id': '67271', + 'title': 'bTV Репортерите - 12.07.2025 г.', + 'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jul2025/2113606319.jpg', + }, + }, { + 'url': 'https://btvplus.bg/produkt/seriali/66942/sezon-2/plen-sezon-2-epizod-55', + 'info_dict': { + 'ext': 'mp4', + 'id': '66942', + 'title': 'Плен - сезон 2, епизод 55', + 'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jun2025/2113595104.jpg', + }, + }, { + 'url': 'https://btvplus.bg/produkt/novini/67270/btv-novinite-centralna-emisija-12-07-2025', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_url = self._search_regex( + r'var\s+videoUrl\s*=\s*[\'"]([^\'"]+)[\'"]', + webpage, 'player URL') + + player_config = self._download_json( + urljoin('https://btvplus.bg', player_url), video_id)['config'] + + videojs_data = self._search_json( + r'videojs\(["\'][^"\']+["\'],', player_config, 'videojs data', + video_id, transform_source=js_to_json) + formats = [] + subtitles = {} + for src in traverse_obj(videojs_data, ('sources', lambda _, v: url_or_none(v['src']))): + ext = mimetype2ext(src.get('type')) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + self.report_warning(f'Unknown format type {ext}{bug_reports_message()}') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': ( + strip_or_none(self._og_search_title(webpage, default=None)) + or clean_html(get_element_by_class('product-title', webpage))), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + } From 5245231e4a39ecd5595d4337d46d85e150e2430a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 12 Jul 2025 17:12:46 -0500 Subject: [PATCH 04/81] [ie] Add `_search_nextjs_v13_data` helper (#13398) * Fixes FranceTVSiteIE livestream extraction * Fixes GoPlayIE metadata extraction Authored by: bashonly --- test/test_InfoExtractor.py | 26 +++++++++++++++++++ yt_dlp/extractor/common.py | 47 +++++++++++++++++++++++++++++++++++ yt_dlp/extractor/francetv.py | 48 +++++++++++++++++++----------------- yt_dlp/extractor/goplay.py | 44 +++++++++++---------------------- yt_dlp/extractor/ninenow.py | 21 ++++++---------- 5 files changed, 120 insertions(+), 66 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c9f70431f7..7c3825f779 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1959,6 +1959,32 @@ def test_search_nextjs_data(self): with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) + def test_search_nextjs_v13_data(self): + HTML = R''' + + + + + + + + ''' + EXPECTED = [{ + 'foo': 'bar', + }, { + 'meta': { + 'dateCreated': 1730489700, + 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + }, + }, { + 'duplicated_field_name': {'x': 1}, + }, { + 'duplicated_field_name': {'y': 2}, + }] + self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED) + self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), []) + self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), []) + def test_search_nuxt_json(self): HTML_TMPL = '' VALID_DATA = ''' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b75e806233..a3ff5a1c0b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1783,6 +1783,53 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU r']+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data', video_id, end_pattern='', fatal=fatal, default=default, **kw) + def _search_nextjs_v13_data(self, webpage, video_id, fatal=True): + """Parses Next.js app router flight data that was introduced in Next.js v13""" + nextjs_data = [] + if not fatal and not isinstance(webpage, str): + return nextjs_data + + def flatten(flight_data): + if not isinstance(flight_data, list): + return + if len(flight_data) == 4 and flight_data[0] == '$': + _, name, _, data = flight_data + if not isinstance(data, dict): + return + children = data.pop('children', None) + if data and isinstance(name, str) and name.startswith('$'): + # It is useful hydration JSON data + nextjs_data.append(data) + flatten(children) + return + for f in flight_data: + flatten(f) + + flight_text = '' + # The pattern for the surrounding JS/tag should be strict as it's a hardcoded string in the next.js source + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L189 + for flight_segment in re.findall(r']*>self\.__next_f\.push\((\[.+?\])\)', webpage): + segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False) + # Some earlier versions of next.js "optimized" away this array structure; this is unsupported + # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761 + if not isinstance(segment, list) or len(segment) != 2: + self.write_debug( + f'{video_id}: Unsupported next.js flight data structure detected', only_once=True) + continue + # Only use the relevant payload type (1 == data) + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L11-L14 + payload_type, chunk = segment + if payload_type == 1: + flight_text += chunk + + for f in flight_text.splitlines(): + prefix, _, body = f.partition(':') + if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix.lstrip()): + # The body isn't necessarily valid JSON, so this should always be non-fatal + flatten(self._parse_json(body, video_id, fatal=False, errnote=False)) + + return nextjs_data + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 5c9f8e36dd..edf6708a03 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -1,4 +1,3 @@ -import json import re import urllib.parse @@ -19,7 +18,11 @@ unsmuggle_url, url_or_none, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import ( + find_element, + get_first, + traverse_obj, +) class FranceTVBaseInfoExtractor(InfoExtractor): @@ -258,7 +261,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba' + 'id': 'b2cf9fd8-e971-4757-8651-848f2772df61', # old: ec217ecc-0733-48cf-ac06-af1347b849d1 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', 'timestamp': 1502623500, @@ -269,7 +272,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): 'params': { 'skip_download': True, }, - 'add_ie': [FranceTVIE.ie_key()], + 'skip': 'Unfortunately, this video is no longer available', }, { # geo-restricted 'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html', @@ -287,7 +290,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1441, }, - 'skip': 'No longer available', + 'skip': 'Unfortunately, this video is no longer available', }, { # geo-restricted livestream (workflow == 'token-akamai') 'url': 'https://www.france.tv/france-4/direct.html', @@ -308,6 +311,19 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): 'live_status': 'is_live', }, 'params': {'skip_download': 'livestream'}, + }, { + # Not geo-restricted + 'url': 'https://www.france.tv/france-2/la-maison-des-maternelles/5574051-nous-sommes-amis-et-nous-avons-fait-un-enfant-ensemble.html', + 'info_dict': { + 'id': 'b448bfe4-9fe7-11ee-97d8-2ba3426fa3df', + 'ext': 'mp4', + 'title': 'Nous sommes amis et nous avons fait un enfant ensemble - Émission du jeudi 21 décembre 2023', + 'duration': 1065, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'timestamp': 1703147921, + 'upload_date': '20231221', + }, + 'params': {'skip_download': 'm3u8'}, }, { # france3 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', @@ -342,30 +358,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): 'only_matching': True, }] - # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay - def _find_json(self, s): - return self._search_json( - r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None) - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + nextjs_data = self._search_nextjs_v13_data(webpage, display_id) - nextjs_data = traverse_obj( - re.findall(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), - (..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children')) - - if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)): + if get_first(nextjs_data, ('isLive', {bool})): # For livestreams we need the id of the stream instead of the currently airing episode id - video_id = traverse_obj(nextjs_data, ( - ..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ..., - 'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)), - 'options', 'id', {str}, any)) + video_id = get_first(nextjs_data, ('options', 'id', {str})) else: - video_id = traverse_obj(nextjs_data, ( - ..., ..., ..., 'children', - lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path, - 'video', ('playerReplayId', 'siId'), {str}, any)) + video_id = get_first(nextjs_data, ('video', ('playerReplayId', 'siId'), {str})) if not video_id: raise ExtractorError('Unable to extract video ID') diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py index c654c757c6..2e959cead2 100644 --- a/yt_dlp/extractor/goplay.py +++ b/yt_dlp/extractor/goplay.py @@ -5,16 +5,11 @@ import hmac import json import os -import re import urllib.parse from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - remove_end, - traverse_obj, -) +from ..utils import ExtractorError, int_or_none +from ..utils.traversal import get_first, traverse_obj class GoPlayIE(InfoExtractor): @@ -27,10 +22,10 @@ class GoPlayIE(InfoExtractor): 'info_dict': { 'id': '2baa4560-87a0-421b-bffc-359914e3c387', 'ext': 'mp4', - 'title': 'S22 - Aflevering 1', + 'title': 'De Slimste Mens ter Wereld - S22 - Aflevering 1', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}', 'series': 'De Slimste Mens ter Wereld', - 'episode': 'Episode 1', + 'episode': 'Wordt aangekondigd', 'season_number': 22, 'episode_number': 1, 'season': 'Season 22', @@ -52,7 +47,7 @@ class GoPlayIE(InfoExtractor): 'info_dict': { 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee', 'ext': 'mp4', - 'title': 'S11 - Aflevering 1', + 'title': 'De Mol - S11 - Aflevering 1', 'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}', 'episode': 'Episode 1', 'series': 'De Mol', @@ -75,21 +70,13 @@ def _real_initialize(self): if not self._id_token: raise self.raise_login_required(method='password') - # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv - def _find_json(self, s): - return self._search_json( - r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None) - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - nextjs_data = traverse_obj( - re.findall(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), - (..., {json.loads}, ..., {self._find_json}, ...)) - meta = traverse_obj(nextjs_data, ( - ..., ..., 'children', ..., ..., 'children', - lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any)) + nextjs_data = self._search_nextjs_v13_data(webpage, display_id) + meta = get_first(nextjs_data, ( + lambda k, v: k in ('video', 'meta') and v['path'] == urllib.parse.urlparse(url).path)) video_id = meta['uuid'] info_dict = traverse_obj(meta, { @@ -98,19 +85,18 @@ def _real_extract(self, url): }) if traverse_obj(meta, ('program', 'subtype')) != 'movie': - for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)): - episode_data = traverse_obj( - season_data, ('videos', lambda _, v: v['videoId'] == video_id, any)) + for season_data in traverse_obj(nextjs_data, (..., 'playlists', ..., {dict})): + episode_data = traverse_obj(season_data, ('videos', lambda _, v: v['videoId'] == video_id, any)) if not episode_data: continue - episode_title = traverse_obj( - episode_data, 'contextualTitle', 'episodeTitle', expected_type=str) + season_number = traverse_obj(season_data, ('season', {int_or_none})) info_dict.update({ - 'title': episode_title or info_dict.get('title'), - 'series': remove_end(info_dict.get('title'), f' - {episode_title}'), - 'season_number': traverse_obj(season_data, ('season', {int_or_none})), + 'episode': traverse_obj(episode_data, ('episodeTitle', {str})), 'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})), + 'season_number': season_number, + 'series': self._search_regex( + fr'^(.+)? - S{season_number} - ', info_dict.get('title'), 'series', default=None), }) break diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index 7b0cb77a74..2f3a4ed284 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -1,6 +1,3 @@ -import json -import re - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..utils import ( @@ -11,7 +8,12 @@ str_or_none, url_or_none, ) -from ..utils.traversal import require, traverse_obj, value +from ..utils.traversal import ( + get_first, + require, + traverse_obj, + value, +) class NineNowIE(InfoExtractor): @@ -101,20 +103,11 @@ class NineNowIE(InfoExtractor): }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}' - # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv and yt_dlp.extractor.goplay - def _find_json(self, s): - return self._search_json( - r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None) - def _real_extract(self, url): display_id, video_type = self._match_valid_url(url).group('id', 'type') webpage = self._download_webpage(url, display_id) - common_data = traverse_obj( - re.findall(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), - (..., {json.loads}, ..., {self._find_json}, - lambda _, v: v['payload'][video_type]['slug'] == display_id, - 'payload', any, {require('video data')})) + common_data = get_first(self._search_nextjs_v13_data(webpage, display_id), ('payload', {dict})) if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})): self.report_drm(display_id) From b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 12 Jul 2025 18:12:05 -0500 Subject: [PATCH 05/81] [ie] Rework `_search_nextjs_v13_data` helper (#13711) Fix 5245231e4a39ecd5595d4337d46d85e150e2430a Authored by: bashonly --- test/test_InfoExtractor.py | 31 ++++++++++++++++++------------- yt_dlp/extractor/common.py | 18 ++++++++++++------ 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 7c3825f779..40dd05e136 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1969,21 +1969,26 @@ def test_search_nextjs_v13_data(self): ''' - EXPECTED = [{ - 'foo': 'bar', - }, { - 'meta': { - 'dateCreated': 1730489700, - 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + EXPECTED = { + '18': { + 'foo': 'bar', }, - }, { - 'duplicated_field_name': {'x': 1}, - }, { - 'duplicated_field_name': {'y': 2}, - }] + '16': { + 'meta': { + 'dateCreated': 1730489700, + 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + }, + }, + '19': { + 'duplicated_field_name': {'x': 1}, + }, + '20': { + 'duplicated_field_name': {'y': 2}, + }, + } self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED) - self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), []) - self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), []) + self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {}) + self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {}) def test_search_nuxt_json(self): HTML_TMPL = '' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a3ff5a1c0b..d601e17514 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1785,7 +1785,7 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU def _search_nextjs_v13_data(self, webpage, video_id, fatal=True): """Parses Next.js app router flight data that was introduced in Next.js v13""" - nextjs_data = [] + nextjs_data = {} if not fatal and not isinstance(webpage, str): return nextjs_data @@ -1797,9 +1797,9 @@ def flatten(flight_data): if not isinstance(data, dict): return children = data.pop('children', None) - if data and isinstance(name, str) and name.startswith('$'): + if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name): # It is useful hydration JSON data - nextjs_data.append(data) + nextjs_data[name[2:]] = data flatten(children) return for f in flight_data: @@ -1823,10 +1823,16 @@ def flatten(flight_data): flight_text += chunk for f in flight_text.splitlines(): - prefix, _, body = f.partition(':') - if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix.lstrip()): - # The body isn't necessarily valid JSON, so this should always be non-fatal + prefix, _, body = f.lstrip().partition(':') + if not re.fullmatch(r'[0-9a-f]+', prefix): + continue + # The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal + if body.startswith('[') and body.endswith(']'): flatten(self._parse_json(body, video_id, fatal=False, errnote=False)) + elif body.startswith('{') and body.endswith('}'): + data = self._parse_json(body, video_id, fatal=False, errnote=False) + if data is not None: + nextjs_data[prefix] = data return nextjs_data From 0f33950c778331bf4803c76e8b0ba1862df93431 Mon Sep 17 00:00:00 2001 From: ShockedPlot7560 Date: Sun, 13 Jul 2025 01:35:51 +0200 Subject: [PATCH 06/81] [ie/mixlr] Add extractors (#13561) Authored by: ShockedPlot7560, seproDev Co-authored-by: sepro --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/mixlr.py | 134 ++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 yt_dlp/extractor/mixlr.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 804536cce7..18a3cac54b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1169,6 +1169,10 @@ MixcloudPlaylistIE, MixcloudUserIE, ) +from .mixlr import ( + MixlrIE, + MixlrRecoringIE, +) from .mlb import ( MLBIE, MLBTVIE, diff --git a/yt_dlp/extractor/mixlr.py b/yt_dlp/extractor/mixlr.py new file mode 100644 index 0000000000..53f3ffe6f8 --- /dev/null +++ b/yt_dlp/extractor/mixlr.py @@ -0,0 +1,134 @@ +from .common import InfoExtractor +from ..networking import HEADRequest +from ..utils import int_or_none, parse_iso8601, url_or_none, urlhandle_detect_ext +from ..utils.traversal import traverse_obj + + +class MixlrIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?P[\w-]+)\.mixlr\.com/events/(?P\d+)' + _TESTS = [{ + 'url': 'https://suncity-104-9fm.mixlr.com/events/4387115', + 'info_dict': { + 'id': '4387115', + 'ext': 'mp3', + 'title': r're:SUNCITY 104.9FM\'s live audio \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'uploader': 'suncity-104-9fm', + 'like_count': int, + 'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/cd5b34d05fa2cee72d80477724a2f02e.png', + 'timestamp': 1751943773, + 'upload_date': '20250708', + 'release_timestamp': 1751943764, + 'release_date': '20250708', + 'live_status': 'is_live', + }, + }, { + 'url': 'https://brcountdown.mixlr.com/events/4395480', + 'info_dict': { + 'id': '4395480', + 'ext': 'aac', + 'title': r're:Beats Revolution Countdown Episodio 461 \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'description': 'md5:5cacd089723f7add3f266bd588315bb3', + 'uploader': 'brcountdown', + 'like_count': int, + 'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/c48727a59f690b87a55d47d123ba0d6d.jpg', + 'timestamp': 1752354007, + 'upload_date': '20250712', + 'release_timestamp': 1752354000, + 'release_date': '20250712', + 'live_status': 'is_live', + }, + }, { + 'url': 'https://www.brcountdown.mixlr.com/events/4395480', + 'only_matching': True, + }] + + def _real_extract(self, url): + username, event_id = self._match_valid_url(url).group('username', 'id') + + broadcast_info = self._download_json( + f'https://api.mixlr.com/v3/channels/{username}/events/{event_id}', event_id) + + formats = [] + format_url = traverse_obj( + broadcast_info, ('included', 0, 'attributes', 'progressive_stream_url', {url_or_none})) + if format_url: + urlh = self._request_webpage( + HEADRequest(format_url), event_id, fatal=False, note='Checking stream') + if urlh and urlh.status == 200: + ext = urlhandle_detect_ext(urlh) + if ext == 'octet-stream': + self.report_warning( + 'The server did not return a valid file extension for the stream URL. ' + 'Assuming an mp3 stream; postprocessing may fail if this is incorrect') + ext = 'mp3' + formats.append({ + 'url': format_url, + 'ext': ext, + 'vcodec': 'none', + }) + + release_timestamp = traverse_obj( + broadcast_info, ('data', 'attributes', 'starts_at', {str})) + if not formats and release_timestamp: + self.raise_no_formats(f'This event will start at {release_timestamp}', expected=True) + + return { + 'id': event_id, + 'uploader': username, + 'formats': formats, + 'release_timestamp': parse_iso8601(release_timestamp), + **traverse_obj(broadcast_info, ('included', 0, 'attributes', { + 'title': ('title', {str}), + 'timestamp': ('started_at', {parse_iso8601}), + 'concurrent_view_count': ('concurrent_view_count', {int_or_none}), + 'like_count': ('heart_count', {int_or_none}), + 'is_live': ('live', {bool}), + })), + **traverse_obj(broadcast_info, ('data', 'attributes', { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'timestamp': ('started_at', {parse_iso8601}), + 'concurrent_view_count': ('concurrent_view_count', {int_or_none}), + 'like_count': ('heart_count', {int_or_none}), + 'thumbnail': ('artwork_url', {url_or_none}), + 'uploader_id': ('broadcaster_id', {str}), + })), + } + + +class MixlrRecoringIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?P[\w-]+)\.mixlr\.com/recordings/(?P\d+)' + _TESTS = [{ + 'url': 'https://biblewayng.mixlr.com/recordings/2375193', + 'info_dict': { + 'id': '2375193', + 'ext': 'mp3', + 'title': "God's Jewels and Their Resting Place Bro. Adeniji", + 'description': 'Preached February 21, 2024 in the evening', + 'uploader_id': '8659190', + 'duration': 10968, + 'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/ceca120ef707f642abeea6e29cd74238.jpg', + 'timestamp': 1708544542, + 'upload_date': '20240221', + }, + }] + + def _real_extract(self, url): + username, recording_id = self._match_valid_url(url).group('username', 'id') + + recording_info = self._download_json( + f'https://api.mixlr.com/v3/channels/{username}/recordings/{recording_id}', recording_id) + + return { + 'id': recording_id, + **traverse_obj(recording_info, ('data', 'attributes', { + 'ext': ('file_format', {str}), + 'url': ('url', {url_or_none}), + 'title': ('title', {str}), + 'description': ('description', {str}), + 'timestamp': ('created_at', {parse_iso8601}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('artwork_url', {url_or_none}), + 'uploader_id': ('user_id', {str}), + })), + } From a6db1d297ab40cc346de24aacbeab93112b2f4e1 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 13 Jul 2025 16:09:39 -0500 Subject: [PATCH 07/81] [ie/vimeo] Handle age-restricted videos (#13719) Closes #13716 Authored by: bashonly --- yt_dlp/extractor/vimeo.py | 50 +++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index fdd42ec94f..7ffe89f227 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -50,6 +50,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'with the URL of the page that embeds this video.') _DEFAULT_CLIENT = 'android' + _DEFAULT_AUTHED_CLIENT = 'web' _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', 'Accept-Language': 'en', @@ -125,7 +126,14 @@ def _fetch_viewer_info(self, display_id=None, fatal=True): return self._viewer_info + @property + def _is_logged_in(self): + return 'vimeo' in self._get_cookies('https://vimeo.com') + def _perform_login(self, username, password): + if self._is_logged_in: + return + viewer = self._fetch_viewer_info() data = { 'action': 'login', @@ -150,7 +158,7 @@ def _perform_login(self, username, password): raise ExtractorError('Unable to log in') def _real_initialize(self): - if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vimeo'): + if self._LOGIN_REQUIRED and not self._is_logged_in: self.raise_login_required() def _get_video_password(self): @@ -354,15 +362,22 @@ def _fetch_oauth_token(self, client): return f'Bearer {self._oauth_tokens[cache_key]}' - def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): - client = force_client or self._configuration_arg('client', [self._DEFAULT_CLIENT], ie_key=VimeoIE)[0] + def _get_requested_client(self): + default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT + + client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0] if client not in self._CLIENT_CONFIGS: raise ExtractorError( f'Unsupported API client "{client}" requested. ' f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + return client + + def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): + client = force_client or self._get_requested_client() + client_config = self._CLIENT_CONFIGS[client] - if client_config['REQUIRES_AUTH'] and not self._get_cookies('https://vimeo.com').get('vimeo'): + if client_config['REQUIRES_AUTH'] and not self._is_logged_in: self.raise_login_required(f'The {client} client requires authentication') return self._download_json( @@ -382,7 +397,7 @@ def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_cli def _extract_original_format(self, url, video_id, unlisted_hash=None): # Original/source formats are only available when logged in - if not self._get_cookies('https://vimeo.com/').get('vimeo'): + if not self._is_logged_in: return None policy = self._configuration_arg('original_format_policy', ['auto'], ie_key=VimeoIE)[0] @@ -1111,14 +1126,25 @@ def _extract_from_api(self, video_id, unlisted_hash=None): video = self._call_videos_api(video_id, unlisted_hash) break except ExtractorError as e: - if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400 - and 'password' in traverse_obj( - self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False), - ({json.loads}, 'invalid_parameters', ..., 'field'), - )): + if not isinstance(e.cause, HTTPError): + raise + response = traverse_obj( + self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False), + ({json.loads}, {dict})) or {} + if ( + not retry and e.cause.status == 400 + and 'password' in traverse_obj(response, ('invalid_parameters', ..., 'field')) + ): self._verify_video_password(video_id) - continue - raise + elif e.cause.status == 404 and response.get('error_code') == 5460: + self.raise_login_required(join_nonempty( + traverse_obj(response, ('error', {str.strip})), + 'Authentication may be needed due to your location.', + 'If your IP address is located in Europe you could try using a VPN/proxy,', + f'or else u{self._login_hint()[1:]}', + delim=' '), method=None) + else: + raise if config_url := traverse_obj(video, ('config_url', {url_or_none})): info = self._parse_config(self._download_json(config_url, video_id), video_id) From 630f3389c33f0f7f6ec97e8917d20aeb4e4078da Mon Sep 17 00:00:00 2001 From: Frank Cai <70647872+averageFOSSenjoyer@users.noreply.github.com> Date: Sun, 13 Jul 2025 16:16:01 -0500 Subject: [PATCH 08/81] [ie/UnitedNationsWebTv] Add extractor (#13538) Closes #2675 Authored by: averageFOSSenjoyer --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/unitednations.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 yt_dlp/extractor/unitednations.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 18a3cac54b..e99edfd40b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2290,6 +2290,7 @@ ) from .umg import UMGDeIE from .unistra import UnistraIE +from .unitednations import UnitedNationsWebTvIE from .unity import UnityIE from .unsupported import ( KnownDRMIE, diff --git a/yt_dlp/extractor/unitednations.py b/yt_dlp/extractor/unitednations.py new file mode 100644 index 0000000000..f9283fd6c1 --- /dev/null +++ b/yt_dlp/extractor/unitednations.py @@ -0,0 +1,32 @@ +from .common import InfoExtractor +from .kaltura import KalturaIE + + +class UnitedNationsWebTvIE(InfoExtractor): + _VALID_URL = r'https?://webtv\.un\.org/(?:ar|zh|en|fr|ru|es)/asset/\w+/(?P\w+)' + _TESTS = [{ + 'url': 'https://webtv.un.org/en/asset/k1o/k1o7stmi6p', + 'md5': 'b2f8b3030063298ae841b4b7ddc01477', + 'info_dict': { + 'id': '1_o7stmi6p', + 'ext': 'mp4', + 'title': 'António Guterres (Secretary-General) on Israel and Iran - Security Council, 9939th meeting', + 'thumbnail': 'http://cfvod.kaltura.com/p/2503451/sp/250345100/thumbnail/entry_id/1_o7stmi6p/version/100021', + 'uploader_id': 'evgeniia.alisova@un.org', + 'upload_date': '20250620', + 'timestamp': 1750430976, + 'duration': 234, + 'view_count': int, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + partner_id = self._html_search_regex( + r'partnerId:\s*(\d+)', webpage, 'partner_id') + entry_id = self._html_search_regex( + r'const\s+kentryID\s*=\s*["\'](\w+)["\']', webpage, 'kentry_id') + + return self.url_result(f'kaltura:{partner_id}:{entry_id}', KalturaIE) From b4b4486effdcb96bb6b8148171a49ff579b69a4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Povilas=20Balzaravi=C4=8Dius?= Date: Mon, 14 Jul 2025 00:24:37 +0300 Subject: [PATCH 09/81] [ie/LRTRadio] Fix extractor (#13717) Authored by: Pawka --- yt_dlp/extractor/lrt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index caff9125e0..34c9ece2d1 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -134,7 +134,7 @@ class LRTRadioIE(LRTBaseIE): def _real_extract(self, url): video_id, path = self._match_valid_url(url).group('id', 'path') media = self._download_json( - 'https://www.lrt.lt/radioteka/api/media', video_id, + 'https://www.lrt.lt/rest-api/media', video_id, query={'url': f'/mediateka/irasas/{video_id}/{path}'}) return { From 85c3fa1925a9057ef4ae8af682686d5b3eb8e568 Mon Sep 17 00:00:00 2001 From: barsnick Date: Sun, 13 Jul 2025 23:35:10 +0200 Subject: [PATCH 10/81] [ie/RaiSudtirol] Support alternative domain (#13718) Authored by: barsnick --- yt_dlp/extractor/rai.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index c489dc7312..027f7a7b6f 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -765,7 +765,7 @@ class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE class RaiSudtirolIE(RaiBaseIE): - _VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P\w+)' + _VALID_URL = r'https?://rai(?:bz|sudtirol)\.rai\.it/.+media=(?P\w+)' _TESTS = [{ # mp4 file 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460', @@ -791,6 +791,9 @@ class RaiSudtirolIE(RaiBaseIE): 'formats': 'count:6', }, 'params': {'skip_download': True}, + }, { + 'url': 'https://raibz.rai.it/de/index.php?media=Ptv1751660400', + 'only_matching': True, }] def _real_extract(self, url): From 6d39c420f7774562a106d90253e2ed5b75036321 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sun, 13 Jul 2025 23:42:45 +0200 Subject: [PATCH 11/81] [ie/JoqrAg] Remove extractor (#13152) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/joqrag.py | 112 -------------------------------- 2 files changed, 113 deletions(-) delete mode 100644 yt_dlp/extractor/joqrag.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e99edfd40b..e173f86883 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -929,7 +929,6 @@ JioSaavnSongIE, ) from .joj import JojIE -from .joqrag import JoqrAgIE from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( diff --git a/yt_dlp/extractor/joqrag.py b/yt_dlp/extractor/joqrag.py deleted file mode 100644 index 7a91d4a235..0000000000 --- a/yt_dlp/extractor/joqrag.py +++ /dev/null @@ -1,112 +0,0 @@ -import datetime as dt -import urllib.parse - -from .common import InfoExtractor -from ..utils import ( - clean_html, - datetime_from_str, - unified_timestamp, - urljoin, -) - - -class JoqrAgIE(InfoExtractor): - IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)' - _VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php', - r'https?://(?:www\.)?joqr\.co\.jp/ag/', - r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])'] - _TESTS = [{ - 'url': 'https://www.uniqueradio.jp/agplayer5/player.php', - 'info_dict': { - 'id': 'live', - 'title': str, - 'channel': '超!A&G+', - 'description': str, - 'live_status': 'is_live', - 'release_timestamp': int, - }, - 'params': { - 'skip_download': True, - 'ignore_no_formats_error': True, - }, - }, { - 'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', - 'only_matching': True, - }, { - 'url': 'https://www.joqr.co.jp/ag/article/103760/', - 'only_matching': True, - }, { - 'url': 'http://www.joqr.co.jp/qr/agdailyprogram/', - 'only_matching': True, - }, { - 'url': 'http://www.joqr.co.jp/qr/agregularprogram/', - 'only_matching': True, - }] - - def _extract_metadata(self, variable, html): - return clean_html(urllib.parse.unquote_plus(self._search_regex( - rf'var\s+{variable}\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - html, 'metadata', group='value', default=''))) or None - - def _extract_start_timestamp(self, video_id, is_live): - def extract_start_time_from(date_str): - dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9) - date = dt_.strftime('%Y%m%d') - start_time = self._search_regex( - r']+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})', - self._download_webpage( - f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id, - note=f'Downloading program list of {date}', fatal=False, - errnote=f'Failed to download program list of {date}') or '', - 'start time', default=None) - if start_time: - return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00') - return None - - start_timestamp = extract_start_time_from('today') - if not start_timestamp: - return None - - if not is_live or start_timestamp < datetime_from_str('now').timestamp(): - return start_timestamp - else: - return extract_start_time_from('yesterday') - - def _real_extract(self, url): - video_id = 'live' - - metadata = self._download_webpage( - 'https://www.uniqueradio.jp/aandg', video_id, - note='Downloading metadata', errnote='Failed to download metadata') - title = self._extract_metadata('Program_name', metadata) - - if not title or title == '放送休止': - formats = [] - live_status = 'is_upcoming' - release_timestamp = self._extract_start_timestamp(video_id, False) - msg = 'This stream is not currently live' - if release_timestamp: - msg += (' and will start at ' - + dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S')) - self.raise_no_formats(msg, expected=True) - else: - m3u8_path = self._search_regex( - r']*\bsrc="([^"]+)"', - self._download_webpage( - 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id, - note='Downloading player data', errnote='Failed to download player data'), - 'm3u8 url') - formats = self._extract_m3u8_formats( - urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id) - live_status = 'is_live' - release_timestamp = self._extract_start_timestamp(video_id, True) - - return { - 'id': video_id, - 'title': title, - 'channel': '超!A&G+', - 'description': self._extract_metadata('Program_text', metadata), - 'formats': formats, - 'live_status': live_status, - 'release_timestamp': release_timestamp, - } From 23e9389f936ec5236a87815b8576e5ce567b2f77 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sun, 13 Jul 2025 23:43:14 +0200 Subject: [PATCH 12/81] [ie/bandaichannel] Remove extractor (#13152) Closes #8829 Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/bandaichannel.py | 33 ------------------------------- yt_dlp/extractor/unsupported.py | 4 ++++ 3 files changed, 4 insertions(+), 34 deletions(-) delete mode 100644 yt_dlp/extractor/bandaichannel.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e173f86883..c3073ff47a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -201,7 +201,6 @@ BanByeChannelIE, BanByeIE, ) -from .bandaichannel import BandaiChannelIE from .bandcamp import ( BandcampAlbumIE, BandcampIE, diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py deleted file mode 100644 index d7fcf44bd9..0000000000 --- a/yt_dlp/extractor/bandaichannel.py +++ /dev/null @@ -1,33 +0,0 @@ -from .brightcove import BrightcoveNewBaseIE -from ..utils import extract_attributes - - -class BandaiChannelIE(BrightcoveNewBaseIE): - IE_NAME = 'bandaichannel' - _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P\d+/\d+)' - _TESTS = [{ - 'url': 'https://www.b-ch.com/titles/514/001', - 'md5': 'a0f2d787baa5729bed71108257f613a4', - 'info_dict': { - 'id': '6128044564001', - 'ext': 'mp4', - 'title': 'メタルファイターMIKU 第1話', - 'timestamp': 1580354056, - 'uploader_id': '5797077852001', - 'upload_date': '20200130', - 'duration': 1387.733, - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - attrs = extract_attributes(self._search_regex( - r'(]+\bid="bcplayer"[^>]*>)', webpage, 'player')) - bc = self._download_json( - 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'], - video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc'] - return self._parse_brightcove_metadata(bc, bc['id']) diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 31393b02a4..1b77198100 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -53,6 +53,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'(?:beta\.)?crunchyroll\.com', r'viki\.com', r'deezer\.com', + r'b-ch\.com', ) _TESTS = [{ @@ -168,6 +169,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'http://www.deezer.com/playlist/176747451', 'only_matching': True, + }, { + 'url': 'https://www.b-ch.com/titles/8203/001', + 'only_matching': True, }] def _real_extract(self, url): From 5d693446e882931618c40c99bb593f0b87b30eb9 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 14 Jul 2025 07:10:59 +0900 Subject: [PATCH 13/81] [ie/limelight] Remove extractors (#13267) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 5 - yt_dlp/extractor/generic.py | 24 --- yt_dlp/extractor/limelight.py | 358 -------------------------------- yt_dlp/extractor/tfo.py | 1 + yt_dlp/extractor/tv5unis.py | 2 + 5 files changed, 3 insertions(+), 387 deletions(-) delete mode 100644 yt_dlp/extractor/limelight.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c3073ff47a..1efc313be2 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1030,11 +1030,6 @@ LikeeIE, LikeeUserIE, ) -from .limelight import ( - LimelightChannelIE, - LimelightChannelListIE, - LimelightMediaIE, -) from .linkedin import ( LinkedInEventsIE, LinkedInIE, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 721d04e317..d9a666f991 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1481,30 +1481,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['SenateISVP'], }, - { - # Limelight embeds (1 channel embed + 4 media embeds) - 'url': 'http://www.sedona.com/FacilitatorTraining2017', - 'info_dict': { - 'id': 'FacilitatorTraining2017', - 'title': 'Facilitator Training 2017', - }, - 'playlist_mincount': 5, - }, - { - # Limelight embed (LimelightPlayerUtil.embed) - 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri', - 'info_dict': { - 'id': '95d035dc5c8a401588e9c0e6bd1e9c92', - 'ext': 'mp4', - 'title': '07448641', - 'timestamp': 1499890639, - 'upload_date': '20170712', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['LimelightMedia'], - }, { 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/', 'info_dict': { diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py deleted file mode 100644 index 763a01448c..0000000000 --- a/yt_dlp/extractor/limelight.py +++ /dev/null @@ -1,358 +0,0 @@ -import re - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - determine_ext, - float_or_none, - int_or_none, - smuggle_url, - try_get, - unsmuggle_url, -) - - -class LimelightBaseIE(InfoExtractor): - _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' - - @classmethod - def _extract_embed_urls(cls, url, webpage): - lm = { - 'Media': 'media', - 'Channel': 'channel', - 'ChannelList': 'channel_list', - } - - def smuggle(url): - return smuggle_url(url, {'source_url': url}) - - entries = [] - for kind, video_id in re.findall( - r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', - webpage): - entries.append(cls.url_result( - smuggle(f'limelight:{lm[kind]}:{video_id}'), - f'Limelight{kind}', video_id)) - for mobj in re.finditer( - # As per [1] class attribute should be exactly equal to - # LimelightEmbeddedPlayerFlash but numerous examples seen - # that don't exactly match it (e.g. [2]). - # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage - # 2. http://www.sedona.com/FacilitatorTraining2017 - r'''(?sx) - ]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*? - ]+ - name=(["\'])flashVars\2[^>]+ - value=(["\'])(?:(?!\3).)*(?Pmedia|channel(?:List)?)Id=(?P[a-z0-9]{32}) - ''', webpage): - kind, video_id = mobj.group('kind'), mobj.group('id') - entries.append(cls.url_result( - smuggle(f'limelight:{kind}:{video_id}'), - f'Limelight{kind.capitalize()}', video_id)) - # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page) - for video_id in re.findall( - r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P[a-z0-9]{32})', - webpage): - entries.append(cls.url_result( - smuggle(f'limelight:media:{video_id}'), - LimelightMediaIE.ie_key(), video_id)) - return entries - - def _call_playlist_service(self, item_id, method, fatal=True, referer=None): - headers = {} - if referer: - headers['Referer'] = referer - try: - return self._download_json( - self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, f'Downloading PlaylistService {method} JSON', - fatal=fatal, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 403: - error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission'] - if error == 'CountryDisabled': - self.raise_geo_restricted() - raise ExtractorError(error, expected=True) - raise - - def _extract(self, item_id, pc_method, mobile_method, referer=None): - pc = self._call_playlist_service(item_id, pc_method, referer=referer) - mobile = self._call_playlist_service( - item_id, mobile_method, fatal=False, referer=referer) - return pc, mobile - - def _extract_info(self, pc, mobile, i, referer): - get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {} - pc_item = get_item(pc, 'playlistItems') - mobile_item = get_item(mobile, 'mediaList') - video_id = pc_item.get('mediaId') or mobile_item['mediaId'] - title = pc_item.get('title') or mobile_item['title'] - - formats = [] - urls = [] - for stream in pc_item.get('streams', []): - stream_url = stream.get('url') - if not stream_url or stream_url in urls: - continue - if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'): - continue - urls.append(stream_url) - ext = determine_ext(stream_url) - if ext == 'f4m': - formats.extend(self._extract_f4m_formats( - stream_url, video_id, f4m_id='hds', fatal=False)) - else: - fmt = { - 'url': stream_url, - 'abr': float_or_none(stream.get('audioBitRate')), - 'fps': float_or_none(stream.get('videoFrameRate')), - 'ext': ext, - } - width = int_or_none(stream.get('videoWidthInPixels')) - height = int_or_none(stream.get('videoHeightInPixels')) - vbr = float_or_none(stream.get('videoBitRate')) - if width or height or vbr: - fmt.update({ - 'width': width, - 'height': height, - 'vbr': vbr, - }) - else: - fmt['vcodec'] = 'none' - rtmp = re.search(r'^(?Prtmpe?://(?P[^/]+)/(?P.+))/(?Pmp[34]:.+)$', stream_url) - if rtmp: - format_id = 'rtmp' - if stream.get('videoBitRate'): - format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_format_id = format_id.replace('rtmp', 'http') - - CDN_HOSTS = ( - ('delvenetworks.com', 'cpl.delvenetworks.com'), - ('video.llnw.net', 's2.content.video.llnw.net'), - ) - for cdn_host, http_host in CDN_HOSTS: - if cdn_host not in rtmp.group('host').lower(): - continue - http_url = 'http://{}/{}'.format(http_host, rtmp.group('playpath')[4:]) - urls.append(http_url) - if self._is_valid_url(http_url, video_id, http_format_id): - http_fmt = fmt.copy() - http_fmt.update({ - 'url': http_url, - 'format_id': http_format_id, - }) - formats.append(http_fmt) - break - - fmt.update({ - 'url': rtmp.group('url'), - 'play_path': rtmp.group('playpath'), - 'app': rtmp.group('app'), - 'ext': 'flv', - 'format_id': format_id, - }) - formats.append(fmt) - - for mobile_url in mobile_item.get('mobileUrls', []): - media_url = mobile_url.get('mobileUrl') - format_id = mobile_url.get('targetMediaPlatform') - if not media_url or media_url in urls: - continue - if (format_id in ('Widevine', 'SmoothStreaming') - and not self.get_param('allow_unplayable_formats', False)): - continue - urls.append(media_url) - ext = determine_ext(media_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', 'm3u8_native', - m3u8_id=format_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - stream_url, video_id, f4m_id=format_id, fatal=False)) - else: - formats.append({ - 'url': media_url, - 'format_id': format_id, - 'quality': -10, - 'ext': ext, - }) - - subtitles = {} - for flag in mobile_item.get('flags'): - if flag == 'ClosedCaptions': - closed_captions = self._call_playlist_service( - video_id, 'getClosedCaptionsDetailsByMediaId', - False, referer) or [] - for cc in closed_captions: - cc_url = cc.get('webvttFileUrl') - if not cc_url: - continue - lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en') - subtitles.setdefault(lang, []).append({ - 'url': cc_url, - }) - break - - get_meta = lambda x: pc_item.get(x) or mobile_item.get(x) - - return { - 'id': video_id, - 'title': title, - 'description': get_meta('description'), - 'formats': formats, - 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000), - 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'), - 'subtitles': subtitles, - } - - -class LimelightMediaIE(LimelightBaseIE): - IE_NAME = 'limelight' - _VALID_URL = r'''(?x) - (?: - limelight:media:| - https?:// - (?: - link\.videoplatform\.limelight\.com/media/| - assets\.delvenetworks\.com/player/loader\.swf - ) - \?.*?\bmediaId= - ) - (?P[a-z0-9]{32}) - ''' - _TESTS = [{ - 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', - 'info_dict': { - 'id': '3ffd040b522b4485b6d84effc750cd86', - 'ext': 'mp4', - 'title': 'HaP and the HB Prince Trailer', - 'description': 'md5:8005b944181778e313d95c1237ddb640', - 'thumbnail': r're:^https?://.*\.jpeg$', - 'duration': 144.23, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # video with subtitles - 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335', - 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d', - 'info_dict': { - 'id': 'a3e00274d4564ec4a9b29b9466432335', - 'ext': 'mp4', - 'title': '3Play Media Overview Video', - 'thumbnail': r're:^https?://.*\.jpeg$', - 'duration': 78.101, - # TODO: extract all languages that were accessible via API - # 'subtitles': 'mincount:9', - 'subtitles': 'mincount:1', - }, - }, { - 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452', - 'only_matching': True, - }] - _PLAYLIST_SERVICE_PATH = 'media' - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - video_id = self._match_id(url) - source_url = smuggled_data.get('source_url') - self._initialize_geo_bypass({ - 'countries': smuggled_data.get('geo_countries'), - }) - - pc, mobile = self._extract( - video_id, 'getPlaylistByMediaId', - 'getMobilePlaylistByMediaId', source_url) - - return self._extract_info(pc, mobile, 0, source_url) - - -class LimelightChannelIE(LimelightBaseIE): - IE_NAME = 'limelight:channel' - _VALID_URL = r'''(?x) - (?: - limelight:channel:| - https?:// - (?: - link\.videoplatform\.limelight\.com/media/| - assets\.delvenetworks\.com/player/loader\.swf - ) - \?.*?\bchannelId= - ) - (?P[a-z0-9]{32}) - ''' - _TESTS = [{ - 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', - 'info_dict': { - 'id': 'ab6a524c379342f9b23642917020c082', - 'title': 'Javascript Sample Code', - 'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html', - }, - 'playlist_mincount': 3, - }, { - 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082', - 'only_matching': True, - }] - _PLAYLIST_SERVICE_PATH = 'channel' - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - channel_id = self._match_id(url) - source_url = smuggled_data.get('source_url') - - pc, mobile = self._extract( - channel_id, 'getPlaylistByChannelId', - 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', - source_url) - - entries = [ - self._extract_info(pc, mobile, i, source_url) - for i in range(len(pc['playlistItems']))] - - return self.playlist_result( - entries, channel_id, pc.get('title'), mobile.get('description')) - - -class LimelightChannelListIE(LimelightBaseIE): - IE_NAME = 'limelight:channel_list' - _VALID_URL = r'''(?x) - (?: - limelight:channel_list:| - https?:// - (?: - link\.videoplatform\.limelight\.com/media/| - assets\.delvenetworks\.com/player/loader\.swf - ) - \?.*?\bchannelListId= - ) - (?P[a-z0-9]{32}) - ''' - _TESTS = [{ - 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', - 'info_dict': { - 'id': '301b117890c4465c8179ede21fd92e2b', - 'title': 'Website - Hero Player', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b', - 'only_matching': True, - }] - _PLAYLIST_SERVICE_PATH = 'channel_list' - - def _real_extract(self, url): - channel_list_id = self._match_id(url) - - channel_list = self._call_playlist_service( - channel_list_id, 'getMobileChannelListById') - - entries = [ - self.url_result('limelight:channel:{}'.format(channel['id']), 'LimelightChannel') - for channel in channel_list['channelList']] - - return self.playlist_result( - entries, channel_list_id, channel_list['title']) diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py index 0d1b252175..1884ab2e8e 100644 --- a/yt_dlp/extractor/tfo.py +++ b/yt_dlp/extractor/tfo.py @@ -6,6 +6,7 @@ class TFOIE(InfoExtractor): + _WORKING = False _GEO_COUNTRIES = ['CA'] _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P\d+)' _TEST = { diff --git a/yt_dlp/extractor/tv5unis.py b/yt_dlp/extractor/tv5unis.py index 88fd334822..fe7fd0325b 100644 --- a/yt_dlp/extractor/tv5unis.py +++ b/yt_dlp/extractor/tv5unis.py @@ -51,6 +51,7 @@ def _real_extract(self, url): class TV5UnisVideoIE(TV5UnisBaseIE): + _WORKING = False IE_NAME = 'tv5unis:video' _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/]+/(?P\d+)' _TEST = { @@ -71,6 +72,7 @@ def _gql_args(groups): class TV5UnisIE(TV5UnisBaseIE): + _WORKING = False IE_NAME = 'tv5unis' _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P[^/]+)(?:/saisons/(?P\d+)/episodes/(?P\d+))?/?(?:[?#&]|$)' _TESTS = [{ From 07d1d85f6387e4bdb107096f0131c7054f078bb9 Mon Sep 17 00:00:00 2001 From: chauhantirth <92777505+chauhantirth@users.noreply.github.com> Date: Mon, 14 Jul 2025 04:05:26 +0530 Subject: [PATCH 14/81] [ie/hotstar] Fix support for free accounts (#13700) Fixes b5bd057fe86550f3aa67f2fc8790d1c6a251c57b Closes #13600 Authored by: chauhantirth --- yt_dlp/extractor/hotstar.py | 57 ++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 891bcc8731..f10aab27a3 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -12,8 +12,11 @@ ExtractorError, OnDemandPagedList, determine_ext, + filter_dict, int_or_none, join_nonempty, + jwt_decode_hs256, + parse_iso8601, str_or_none, url_or_none, ) @@ -21,35 +24,48 @@ class HotStarBaseIE(InfoExtractor): + _TOKEN_NAME = 'userUP' _BASE_URL = 'https://www.hotstar.com' _API_URL = 'https://api.hotstar.com' _API_URL_V2 = 'https://apix.hotstar.com/v2' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' + _FREE_HEADERS = { + 'user-agent': 'Hotstar;in.startv.hotstar/25.06.30.0.11580 (Android/12)', + 'x-hs-client': 'platform:android;app_id:in.startv.hotstar;app_version:25.06.30.0;os:Android;os_version:12;schema_version:0.0.1523', + 'x-hs-platform': 'android', + } + _SUB_HEADERS = { + 'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)', + 'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970', + 'x-hs-platform': 'androidtv', + } + + def _has_active_subscription(self, cookies, server_time): + expiry = traverse_obj(cookies, ( + self._TOKEN_NAME, 'value', {jwt_decode_hs256}, 'sub', {json.loads}, + 'subscriptions', 'in', ..., 'expiry', {parse_iso8601}, all, {max})) or 0 + return expiry > server_time + def _call_api_v1(self, path, *args, **kwargs): return self._download_json( f'{self._API_URL}/o/v1/{path}', *args, **kwargs, headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'}) - def _call_api_impl(self, path, video_id, query, st=None, cookies=None): - if not cookies or not cookies.get('userUP'): - self.raise_login_required() - + def _call_api_impl(self, path, video_id, query, cookies=None, st=None): st = int_or_none(st) or int(time.time()) exp = st + 6000 auth = f'st={st}~exp={exp}~acl=/*' auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() response = self._download_json( f'{self._API_URL_V2}/{path}', video_id, query=query, - headers={ - 'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)', + headers=filter_dict({ + **(self._SUB_HEADERS if self._has_active_subscription(cookies, st) else self._FREE_HEADERS), 'hotstarauth': auth, - 'x-hs-usertoken': cookies['userUP'].value, + 'x-hs-usertoken': traverse_obj(cookies, (self._TOKEN_NAME, 'value')), 'x-hs-device-id': traverse_obj(cookies, ('deviceId', 'value')) or str(uuid.uuid4()), - 'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970', - 'x-hs-platform': 'androidtv', 'content-type': 'application/json', - }) + })) if not traverse_obj(response, ('success', {dict})): raise ExtractorError('API call was unsuccessful') @@ -61,21 +77,22 @@ def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None): 'filters': f'content_type={content_type}', 'client_capabilities': json.dumps({ 'package': ['dash', 'hls'], - 'container': ['fmp4br', 'fmp4'], + 'container': ['fmp4', 'fmp4br', 'ts'], 'ads': ['non_ssai', 'ssai'], - 'audio_channel': ['atmos', 'dolby51', 'stereo'], + 'audio_channel': ['stereo', 'dolby51', 'atmos'], 'encryption': ['plain', 'widevine'], # wv only so we can raise appropriate error - 'video_codec': ['h265', 'h264'], - 'ladder': ['tv', 'full'], - 'resolution': ['4k', 'hd'], - 'true_resolution': ['4k', 'hd'], - 'dynamic_range': ['hdr', 'sdr'], + 'video_codec': ['h264', 'h265'], + 'video_codec_non_secure': ['h264', 'h265', 'vp9'], + 'ladder': ['phone', 'tv', 'full'], + 'resolution': ['hd', '4k'], + 'true_resolution': ['hd', '4k'], + 'dynamic_range': ['sdr', 'hdr'], }, separators=(',', ':')), 'drm_parameters': json.dumps({ 'widevine_security_level': ['SW_SECURE_DECODE', 'SW_SECURE_CRYPTO'], 'hdcp_version': ['HDCP_V2_2', 'HDCP_V2_1', 'HDCP_V2', 'HDCP_V1'], }, separators=(',', ':')), - }, st=st, cookies=cookies) + }, cookies=cookies, st=st) @staticmethod def _parse_metadata_v1(video_data): @@ -274,6 +291,8 @@ def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') video_type = self._TYPE[video_type] cookies = self._get_cookies(url) # Cookies before any request + if not cookies or not cookies.get(self._TOKEN_NAME): + self.raise_login_required() video_data = traverse_obj( self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query={ @@ -292,7 +311,7 @@ def _real_extract(self, url): # See https://github.com/yt-dlp/yt-dlp/issues/396 st = self._request_webpage( f'{self._BASE_URL}/in', video_id, 'Fetching server time').get_header('x-origin-date') - watch = self._call_api_v2('pages/watch', video_id, content_type, cookies=cookies, st=st) + watch = self._call_api_v2('pages/watch', video_id, content_type, cookies, st) player_config = traverse_obj(watch, ( 'page', 'spaces', 'player', 'widget_wrappers', lambda _, v: v['template'] == 'PlayerWidget', 'widget', 'data', 'player_config', {dict}, any, {require('player config')})) From 9f54ea38984788811773ca2ceaca73864acf0e8a Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 9 Jun 2025 18:14:19 +0900 Subject: [PATCH 15/81] [ie/ctv] Remove extractor (#13429) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/ctv.py | 49 --------------------------------- yt_dlp/extractor/unsupported.py | 4 +++ 3 files changed, 4 insertions(+), 50 deletions(-) delete mode 100644 yt_dlp/extractor/ctv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1efc313be2..0d1dc2b045 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -446,7 +446,6 @@ CSpanIE, ) from .ctsnews import CtsNewsIE -from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py deleted file mode 100644 index a41dab11b1..0000000000 --- a/yt_dlp/extractor/ctv.py +++ /dev/null @@ -1,49 +0,0 @@ -from .common import InfoExtractor - - -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P(?:show|movie)s/[^/]+/[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88', - 'info_dict': { - 'id': '2102249', - 'ext': 'flv', - 'title': 'Wednesday, December 23, 2020', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.', - 'timestamp': 1608732000, - 'upload_date': '20201223', - 'series': 'Your Morning', - 'season': '2020-2021', - 'season_number': 5, - 'episode_number': 88, - 'tags': ['Your Morning'], - 'categories': ['Talk Show'], - 'duration': 7467.126, - }, - }, { - 'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - content = self._download_json( - 'https://www.ctv.ca/space-graphql/graphql', display_id, query={ - 'query': '''{ - resolvedPath(path: "/%s") { - lastSegment { - content { - ... on AxisContent { - axisId - videoPlayerDestCode - } - } - } - } -}''' % display_id, # noqa: UP031 - })['data']['resolvedPath']['lastSegment']['content'] - video_id = content['axisId'] - return self.url_result( - '9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id), - 'NineCNineMedia', video_id) diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 1b77198100..bd90bc533a 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -54,6 +54,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'viki\.com', r'deezer\.com', r'b-ch\.com', + r'ctv\.ca', ) _TESTS = [{ @@ -172,6 +173,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://www.b-ch.com/titles/8203/001', 'only_matching': True, + }, { + 'url': 'https://www.ctv.ca/shows/masterchef-53506/the-audition-battles-s15e1', + 'only_matching': True, }] def _real_extract(self, url): From 6fb3947c0dc6d0e3eab5077c5bada8402f47a277 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 9 Jun 2025 20:41:14 +0900 Subject: [PATCH 16/81] [ie/bellmedia] Remove extractor (#13429) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/bellmedia.py | 91 --------------------------------- yt_dlp/extractor/unsupported.py | 4 ++ 3 files changed, 4 insertions(+), 92 deletions(-) delete mode 100644 yt_dlp/extractor/bellmedia.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0d1dc2b045..b4ca2175cf 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -228,7 +228,6 @@ from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE -from .bellmedia import BellMediaIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py deleted file mode 100644 index ac45dd4779..0000000000 --- a/yt_dlp/extractor/bellmedia.py +++ /dev/null @@ -1,91 +0,0 @@ -from .common import InfoExtractor - - -class BellMediaIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?P - (?: - ctv| - tsn| - bnn(?:bloomberg)?| - thecomedynetwork| - discovery| - discoveryvelocity| - sciencechannel| - investigationdiscovery| - animalplanet| - bravo| - mtv| - space| - etalk| - marilyn - )\.ca| - (?:much|cp24)\.com - )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' - _TESTS = [{ - 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070', - 'md5': '3e5b8e38370741d5089da79161646635', - 'info_dict': { - 'id': '1403070', - 'ext': 'flv', - 'title': 'David Cockfield\'s Top Picks', - 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3', - 'upload_date': '20180525', - 'timestamp': 1527288600, - 'season_id': '73997', - 'season': '2018', - 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg', - 'tags': [], - 'categories': ['ETFs'], - 'season_number': 8, - 'duration': 272.038, - 'series': 'Market Call Tonight', - }, - }, { - 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', - 'only_matching': True, - }, { - 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', - 'only_matching': True, - }, { - 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', - 'only_matching': True, - }, { - 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', - 'only_matching': True, - }, { - 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', - 'only_matching': True, - }, { - 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', - 'only_matching': True, - }, { - 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', - 'only_matching': True, - }, { - 'url': 'http://www.etalk.ca/video?videoid=663455', - 'only_matching': True, - }, { - 'url': 'https://www.cp24.com/video?clipId=1982548', - 'only_matching': True, - }] - _DOMAINS = { - 'thecomedynetwork': 'comedy', - 'discoveryvelocity': 'discvel', - 'sciencechannel': 'discsci', - 'investigationdiscovery': 'invdisc', - 'animalplanet': 'aniplan', - 'etalk': 'ctv', - 'bnnbloomberg': 'bnn', - 'marilyn': 'ctv_marilyn', - } - - def _real_extract(self, url): - domain, video_id = self._match_valid_url(url).groups() - domain = domain.split('.')[0] - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', - 'ie_key': 'NineCNineMedia', - } diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index bd90bc533a..628e406191 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -55,6 +55,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'deezer\.com', r'b-ch\.com', r'ctv\.ca', + r'tsn\.ca', ) _TESTS = [{ @@ -176,6 +177,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://www.ctv.ca/shows/masterchef-53506/the-audition-battles-s15e1', 'only_matching': True, + }, { + 'url': 'https://www.tsn.ca/video/relaxed-oilers-look-to-put-emotional-game-2-loss-in-the-rearview%7E3148747', + 'only_matching': True, }] def _real_extract(self, url): From d57a0b5aa78d59324b037d37492fe86aa4fbf58a Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Wed, 11 Jun 2025 05:16:17 +0900 Subject: [PATCH 17/81] [ie/noovo] Remove extractor (#13429) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/noovo.py | 100 -------------------------------- yt_dlp/extractor/unsupported.py | 4 ++ 3 files changed, 4 insertions(+), 101 deletions(-) delete mode 100644 yt_dlp/extractor/noovo.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b4ca2175cf..0a00db437e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1374,7 +1374,6 @@ from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE -from .noovo import NoovoIE from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, diff --git a/yt_dlp/extractor/noovo.py b/yt_dlp/extractor/noovo.py deleted file mode 100644 index 772d4ed9e0..0000000000 --- a/yt_dlp/extractor/noovo.py +++ /dev/null @@ -1,100 +0,0 @@ -from .brightcove import BrightcoveNewIE -from .common import InfoExtractor -from ..utils import ( - int_or_none, - js_to_json, - smuggle_url, - try_get, -) - - -class NoovoIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P[^/]+/[^/?#&]+)' - _TESTS = [{ - # clip - 'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial', - 'info_dict': { - 'id': '5386045029001', - 'ext': 'mp4', - 'title': 'Chrysler Imperial', - 'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056', - 'timestamp': 1491399228, - 'upload_date': '20170405', - 'uploader_id': '618566855001', - 'series': 'RPM+', - }, - 'params': { - 'skip_download': True, - }, - }, { - # episode - 'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8', - 'info_dict': { - 'id': '5395865725001', - 'title': 'Épisode 13 : Les retrouvailles', - 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473', - 'ext': 'mp4', - 'timestamp': 1492019320, - 'upload_date': '20170412', - 'uploader_id': '618566855001', - 'series': "L'amour est dans le pré", - 'season_number': 5, - 'episode': 'Épisode 13', - 'episode_number': 13, - }, - 'params': { - 'skip_download': True, - }, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - brightcove_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'brightcove id') - - data = self._parse_json( - self._search_regex( - r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data', - default='{}'), - video_id, transform_source=js_to_json, fatal=False) - - title = try_get( - data, lambda x: x['video']['nom'], - str) or self._html_search_meta( - 'dcterms.Title', webpage, 'title', fatal=True) - - description = self._html_search_meta( - ('dcterms.Description', 'description'), webpage, 'description') - - series = try_get( - data, lambda x: x['emission']['nom']) or self._search_regex( - r']+class="banner-card__subtitle h4"[^>]*>([^<]+)', - webpage, 'series', default=None) - - season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {} - season = try_get(season_el, lambda x: x['nom'], str) - season_number = int_or_none(try_get(season_el, lambda x: x['numero'])) - - episode_el = try_get(season_el, lambda x: x['episode'], dict) or {} - episode = try_get(episode_el, lambda x: x['nom'], str) - episode_number = int_or_none(try_get(episode_el, lambda x: x['numero'])) - - return { - '_type': 'url_transparent', - 'ie_key': BrightcoveNewIE.ie_key(), - 'url': smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'geo_countries': ['CA']}), - 'id': brightcove_id, - 'title': title, - 'description': description, - 'series': series, - 'season': season, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - } diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 628e406191..05ae4dd18a 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -55,6 +55,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'deezer\.com', r'b-ch\.com', r'ctv\.ca', + r'noovo\.ca', r'tsn\.ca', ) @@ -177,6 +178,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://www.ctv.ca/shows/masterchef-53506/the-audition-battles-s15e1', 'only_matching': True, + }, { + 'url': 'https://www.noovo.ca/emissions/lamour-est-dans-le-pre/prets-pour-lamour-s10e1', + 'only_matching': True, }, { 'url': 'https://www.tsn.ca/video/relaxed-oilers-look-to-put-emotional-game-2-loss-in-the-rearview%7E3148747', 'only_matching': True, From 7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:24:52 -0500 Subject: [PATCH 18/81] [ie/hotstar] Improve error handling (#13727) Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index f10aab27a3..b280fb53ab 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -383,10 +383,13 @@ def _real_extract(self, url): formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) - if not formats and geo_restricted: - self.raise_geo_restricted(countries=['IN'], metadata_available=True) - elif not formats and has_drm: - self.report_drm(video_id) + if not formats: + if geo_restricted: + self.raise_geo_restricted(countries=['IN'], metadata_available=True) + elif has_drm: + self.report_drm(video_id) + elif not self._has_active_subscription(cookies, st): + self.raise_no_formats('Your account does not have access to this content', expected=True) self._remove_duplicate_formats(formats) for f in formats: f.setdefault('http_headers', {}).update(headers) From ade876efb31d55d3394185ffc56942fdc8d325cc Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:25:45 -0500 Subject: [PATCH 19/81] [ie/francetv] Improve error handling (#13726) Closes #13324 Authored by: bashonly --- yt_dlp/extractor/francetv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index edf6708a03..54c2c53aca 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -124,9 +124,10 @@ def _extract_video(self, video_id, hostname=None): elif code := traverse_obj(dinfo, ('code', {int})): if code == 2009: self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - elif code in (2015, 2017): + elif code in (2015, 2017, 2019): # 2015: L'accès à cette vidéo est impossible. (DRM-only) # 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM) + # 2019: L'accès à cette vidéo est incompatible avec votre configuration. (DRM-only) drm_formats = True continue self.report_warning( From d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf Mon Sep 17 00:00:00 2001 From: rdamas Date: Mon, 14 Jul 2025 20:55:52 +0200 Subject: [PATCH 20/81] [ie/archive.org] Fix extractor (#13706) Closes #13704 Authored by: rdamas --- yt_dlp/extractor/archiveorg.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 2849d9fd5b..572bd6bfe2 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -16,6 +16,7 @@ dict_get, extract_attributes, get_element_by_id, + get_element_text_and_html_by_tag, int_or_none, join_nonempty, js_to_json, @@ -72,6 +73,7 @@ class ArchiveOrgIE(InfoExtractor): 'display_id': 'Cops-v2.mp4', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'duration': 1091.96, + 'track': 'Cops-v2', }, }, { 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', @@ -86,6 +88,7 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'duration': 59.77, 'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg', + 'track': 'Commercial-JFK1960ElectionAdCampaignJingle', }, }, { 'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg', @@ -102,6 +105,7 @@ class ArchiveOrgIE(InfoExtractor): 'duration': 59.51, 'license': 'http://creativecommons.org/licenses/publicdomain/', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', + 'track': 'Commercial-Nixon1960ElectionAdToughonDefense', }, }, { 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16', @@ -182,6 +186,7 @@ class ArchiveOrgIE(InfoExtractor): 'duration': 130.46, 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg', 'display_id': 'irelandthemakingofarepublicreel1_01.mov', + 'track': 'irelandthemakingofarepublicreel1 01', }, }, { 'md5': '67335ee3b23a0da930841981c1e79b02', @@ -192,6 +197,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'irelandthemakingofarepublicreel1_02.mov', 'display_id': 'irelandthemakingofarepublicreel1_02.mov', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg', + 'track': 'irelandthemakingofarepublicreel1 02', }, }, { 'md5': 'e470e86787893603f4a341a16c281eb5', @@ -202,6 +208,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'irelandthemakingofarepublicreel2.mov', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', + 'track': 'irelandthemakingofarepublicreel2', }, }, ], @@ -229,15 +236,8 @@ class ArchiveOrgIE(InfoExtractor): @staticmethod def _playlist_data(webpage): - element = re.findall(r'''(?xs) - - ''', webpage)[0] - - return json.loads(extract_attributes(element)['value']) + element = get_element_text_and_html_by_tag('play-av', webpage)[1] + return json.loads(extract_attributes(element)['playlist']) def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) From 3a84be9d1660ef798ea28f929a20391bef6afda4 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Mon, 14 Jul 2025 22:01:53 +0300 Subject: [PATCH 21/81] [ie/TheHighWire] Add extractor (#13505) Closes #13364 Authored by: swayll --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/thehighwire.py | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 yt_dlp/extractor/thehighwire.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0a00db437e..c9172fef78 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2092,6 +2092,7 @@ TheGuardianPodcastIE, TheGuardianPodcastPlaylistIE, ) +from .thehighwire import TheHighWireIE from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( diff --git a/yt_dlp/extractor/thehighwire.py b/yt_dlp/extractor/thehighwire.py new file mode 100644 index 0000000000..8b596143f7 --- /dev/null +++ b/yt_dlp/extractor/thehighwire.py @@ -0,0 +1,43 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + url_or_none, +) +from ..utils.traversal import ( + find_element, + require, + traverse_obj, +) + + +class TheHighWireIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?thehighwire\.com/ark-videos/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://thehighwire.com/ark-videos/the-deposition-of-stanley-plotkin/', + 'info_dict': { + 'id': 'the-deposition-of-stanley-plotkin', + 'ext': 'mp4', + 'title': 'THE DEPOSITION OF STANLEY PLOTKIN', + 'description': 'md5:6d0be4f1181daaa10430fd8b945a5e54', + 'thumbnail': r're:https?://static\.arkengine\.com/video/.+\.jpg', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + embed_url = traverse_obj(webpage, ( + {find_element(cls='ark-video-embed', html=True)}, + {extract_attributes}, 'src', {url_or_none}, {require('embed URL')})) + embed_page = self._download_webpage(embed_url, display_id) + + return { + 'id': display_id, + **traverse_obj(webpage, { + 'title': ({find_element(cls='section-header')}, {clean_html}), + 'description': ({find_element(cls='episode-description__copy')}, {clean_html}), + }), + **self._parse_html5_media_entries(embed_url, embed_page, display_id, m3u8_id='hls')[0], + } From dcc4cba39e2a79d3efce16afa28dbe245468489f Mon Sep 17 00:00:00 2001 From: flanter21 <139064898+flanter21@users.noreply.github.com> Date: Thu, 17 Jul 2025 02:17:48 +0300 Subject: [PATCH 22/81] [ie/blackboardcollaborate] Support subtitles and authwalled videos (#12473) Authored by: flanter21 --- yt_dlp/extractor/_extractors.py | 5 +- yt_dlp/extractor/blackboardcollaborate.py | 146 +++++++++++++++++++--- 2 files changed, 135 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c9172fef78..4d67e1caa3 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -273,7 +273,10 @@ BitChuteChannelIE, BitChuteIE, ) -from .blackboardcollaborate import BlackboardCollaborateIE +from .blackboardcollaborate import ( + BlackboardCollaborateIE, + BlackboardCollaborateLaunchIE, +) from .bleacherreport import ( BleacherReportCMSIE, BleacherReportIE, diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 535890979b..c14ff1f142 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,16 +1,27 @@ from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + UnsupportedError, + float_or_none, + int_or_none, + join_nonempty, + jwt_decode_hs256, + mimetype2ext, + parse_iso8601, + parse_qs, + url_or_none, +) +from ..utils.traversal import traverse_obj class BlackboardCollaborateIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?P[a-z-]+)\.bbcollab\.com/ + (?P[a-z]+)(?:-lti)?\.bbcollab\.com/ (?: collab/ui/session/playback/load| recording )/ - (?P[^/]+)''' + (?P[^/?#]+)''' _TESTS = [ { 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256', @@ -19,9 +30,55 @@ class BlackboardCollaborateIE(InfoExtractor): 'id': '0a633b6a88824deb8c918f470b22b256', 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1', 'ext': 'mp4', - 'duration': 1896000, - 'timestamp': 1620331399, + 'duration': 1896, + 'timestamp': 1620333295, 'upload_date': '20210506', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/collab/ui/session/playback/load/4bde2dee104f40289a10f8e554270600', + 'md5': '108db6a8f83dcb0c2a07793649581865', + 'info_dict': { + 'id': '4bde2dee104f40289a10f8e554270600', + 'title': 'Meeting - Azerbaycanca erize formasi', + 'ext': 'mp4', + 'duration': 880, + 'timestamp': 1671176868, + 'upload_date': '20221216', + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/f83be390ecff46c0bf7dccb9dddcf5f6', + 'md5': 'e3b0b88ddf7847eae4b4c0e2d40b83a5', + 'info_dict': { + 'id': 'f83be390ecff46c0bf7dccb9dddcf5f6', + 'title': 'Keynote lecture by Laura Carvalho - recording_1', + 'ext': 'mp4', + 'duration': 5506, + 'timestamp': 1662721705, + 'upload_date': '20220909', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/c3e1e7c9e83d4cd9981c93c74888d496', + 'md5': 'fdb2d8c43d66fbc0b0b74ef5e604eb1f', + 'info_dict': { + 'id': 'c3e1e7c9e83d4cd9981c93c74888d496', + 'title': 'International Ally User Group - recording_18', + 'ext': 'mp4', + 'duration': 3479, + 'timestamp': 1721919621, + 'upload_date': '20240725', + 'subtitles': { + 'en': 'mincount:1', + 'live_chat': 'mincount:1', + }, }, }, { @@ -42,22 +99,81 @@ class BlackboardCollaborateIE(InfoExtractor): }, ] + def _call_api(self, region, video_id, path=None, token=None, note=None, fatal=False): + # Ref: https://github.com/blackboard/BBDN-Collab-Postman-REST + return self._download_json( + join_nonempty(f'https://{region}.bbcollab.com/collab/api/csa/recordings', video_id, path, delim='/'), + video_id, note or 'Downloading JSON metadata', fatal=fatal, + headers={'Authorization': f'Bearer {token}'} if token else None) + def _real_extract(self, url): mobj = self._match_valid_url(url) region = mobj.group('region') video_id = mobj.group('id') - info = self._download_json( - f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) - duration = info.get('duration') - title = info['name'] - upload_date = info.get('created') - streams = info['streams'] - formats = [{'format_id': k, 'url': url} for k, url in streams.items()] + token = parse_qs(url).get('authToken', [None])[-1] + + video_info = self._call_api(region, video_id, path='data/secure', token=token, note='Trying auth token') + if video_info: + video_extra = self._call_api(region, video_id, token=token, note='Retrieving extra attributes') + else: + video_info = self._call_api(region, video_id, path='data', note='Trying fallback', fatal=True) + video_extra = {} + + formats = traverse_obj(video_info, ('extStreams', lambda _, v: url_or_none(v['streamUrl']), { + 'url': 'streamUrl', + 'ext': ('contentType', {mimetype2ext}), + 'aspect_ratio': ('aspectRatio', {float_or_none}), + })) + + if filesize := traverse_obj(video_extra, ('storageSize', {int_or_none})): + for fmt in formats: + fmt['filesize'] = filesize + + subtitles = {} + for subs in traverse_obj(video_info, ('subtitles', lambda _, v: url_or_none(v['url']))): + subtitles.setdefault(subs.get('lang') or 'und', []).append({ + 'name': traverse_obj(subs, ('label', {str})), + 'url': subs['url'], + }) + + for live_chat_url in traverse_obj(video_info, ('chats', ..., 'url', {url_or_none})): + subtitles.setdefault('live_chat', []).append({'url': live_chat_url}) return { - 'duration': duration, + **traverse_obj(video_info, { + 'title': ('name', {str}), + 'timestamp': ('created', {parse_iso8601}), + 'duration': ('duration', {int_or_none(scale=1000)}), + }), 'formats': formats, 'id': video_id, - 'timestamp': parse_iso8601(upload_date), - 'title': title, + 'subtitles': subtitles, } + + +class BlackboardCollaborateLaunchIE(InfoExtractor): + _VALID_URL = r'https?://[a-z]+\.bbcollab\.com/launch/(?P[^/?#]+)' + + _TESTS = [ + { + 'url': 'https://au.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzQwNDE2NDgzLCJpYXQiOjE3NDA0MTYxODMsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3MzI4YzRjZTNmM2U0ZTcwYmY3MTY3N2RkZTgzMzk2NSIsImNvbnN1bWVySWQiOiJhM2Q3NGM0Y2QyZGU0MGJmODFkMjFlODNlMmEzNzM5MCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.xuELw4EafEwUMoYcCHidGn4Tw9O1QCbYHzYGJUl0kKk', + 'only_matching': True, + }, + { + 'url': 'https://us.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNjk0NDgxOTc3LCJpYXQiOjE2OTQ0ODE2NzcsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3YWU0MTFhNTU3NjU0OWFiOTZlYjVmMTM1YmY3MWU5MCIsImNvbnN1bWVySWQiOiJBRUU2MEI4MDI2QzM3ODU2RjMwMzNEN0ZEOTQzMTFFNSIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.yOhRZNaIjXYoMYMpcTzgjZJCnIFaYf2cAzbco8OAxlY', + 'only_matching': True, + }, + { + 'url': 'https://eu.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzUyNjgyODYwLCJpYXQiOjE3NTI2ODI1NjAsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI4MjQzYjFiODg2Nzk0NTZkYjkwN2NmNDZmZmE1MmFhZiIsImNvbnN1bWVySWQiOiI5ZTY4NzYwZWJiNzM0MzRiYWY3NTQyZjA1YmJkOTMzMCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.Xj4ymojYLwZ1vKPKZ-KxjpqQvFXoJekjRaG0npngwWs', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + token = self._match_id(url) + video_id = jwt_decode_hs256(token)['resourceAccessTicket']['resourceId'] + + redirect_url = self._request_webpage(url, video_id).url + if self.suitable(redirect_url): + raise UnsupportedError(redirect_url) + return self.url_result(redirect_url, BlackboardCollaborateIE, video_id) From c1ac543c8166ff031d62e340b3244ca8556e3fb9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 16 Jul 2025 18:19:58 -0500 Subject: [PATCH 23/81] [ie/soundcloud] Always extract original format extension (#13746) Closes #13743 Authored by: bashonly --- yt_dlp/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 3496a08ef6..404e298978 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -242,7 +242,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f format_urls.add(format_url) formats.append({ 'format_id': 'download', - 'ext': urlhandle_detect_ext(urlh, default='mp3'), + 'ext': urlhandle_detect_ext(urlh), 'filesize': int_or_none(urlh.headers.get('Content-Length')), 'url': format_url, 'quality': 10, From b8abd255e454acbe0023cdb946f9eb461ced7eeb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 14:43:40 -0500 Subject: [PATCH 24/81] [utils] `mimetype2ext`: Always parse `flac` from `audio/flac` (#13748) Authored by: bashonly --- yt_dlp/utils/_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 20aa341ca3..c930830d99 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2961,6 +2961,7 @@ def mimetype2ext(mt, default=NO_DEFAULT): 'audio/x-matroska': 'mka', 'audio/x-mpegurl': 'm3u', 'aacp': 'aac', + 'flac': 'flac', 'midi': 'mid', 'ogg': 'ogg', 'wav': 'wav', From 28bf46b7dafe2e241137763bf570a2f91ba8a53a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 14:46:06 -0500 Subject: [PATCH 25/81] [utils] `urlhandle_detect_ext`: Use `x-amz-meta-file-type` headers (#13749) Authored by: bashonly --- yt_dlp/utils/_utils.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index c930830d99..c91a06e9a6 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -3106,21 +3106,15 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None): def urlhandle_detect_ext(url_handle, default=NO_DEFAULT): getheader = url_handle.headers.get - cd = getheader('Content-Disposition') - if cd: - m = re.match(r'attachment;\s*filename="(?P[^"]+)"', cd) - if m: - e = determine_ext(m.group('filename'), default_ext=None) - if e: - return e + if cd := getheader('Content-Disposition'): + if m := re.match(r'attachment;\s*filename="(?P[^"]+)"', cd): + if ext := determine_ext(m.group('filename'), default_ext=None): + return ext - meta_ext = getheader('x-amz-meta-name') - if meta_ext: - e = meta_ext.rpartition('.')[2] - if e: - return e - - return mimetype2ext(getheader('Content-Type'), default=default) + return ( + determine_ext(getheader('x-amz-meta-name'), default_ext=None) + or getheader('x-amz-meta-file-type') + or mimetype2ext(getheader('Content-Type'), default=default)) def encode_data_uri(data, mime_type): From 5f951ce929b56a822514f1a02cc06af030855ec7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 15:06:02 -0500 Subject: [PATCH 26/81] [ie/aenetworks] Support new URL formats (#13747) Closes #13745 Authored by: bashonly --- yt_dlp/extractor/aenetworks.py | 70 ++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index e5c922b41f..a4a5f409ec 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -111,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P - shows/[^/]+/season-\d+/episode-\d+| - (?: - (?:movie|special)s/[^/]+| - (?:shows/[^/]+/)?videos - )/[^/?#&]+ + shows/[^/?#]+/season-\d+/episode-\d+| + (?Pmovie|special)s/[^/?#]+(?P/[^/?#]+)?| + (?:shows/[^/?#]+/)?videos/[^/?#]+ )''' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', @@ -128,7 +126,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20120529', 'uploader': 'AENE-NEW', 'duration': 2592.0, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:5', 'tags': 'count:14', 'categories': ['Mountain Men'], @@ -139,10 +137,7 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Mountain Men', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], 'skip': 'Geo-restricted - This content is not available in your location.', }, { @@ -156,7 +151,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20160112', 'uploader': 'AENE-NEW', 'duration': 1277.695, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:4', 'tags': 'count:23', 'episode': 'Inlawful Entry', @@ -166,10 +161,53 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Duck Dynasty', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams', + 'info_dict': { + 'id': '1590627395981', + 'ext': 'mp4', + 'title': 'VC Andrews\' Web of Dreams', + 'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5253.665, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:8', + 'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"], + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'VC Andrews\' Web of Dreams', + 'episode_number': 0, + 'timestamp': 1566489703.0, + 'upload_date': '20190822', }, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story', + 'info_dict': { + 'id': '1488235587551', + 'ext': 'mp4', + 'title': 'Hunting JonBenet\'s Killer: The Untold Story', + 'description': 'md5:209869425ee392d74fe29201821e48b4', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5003.903, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:10', + 'tags': 'count:11', + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'Hunting JonBenet\'s Killer: The Untold Story', + 'episode_number': 0, + 'timestamp': 1554987697.0, + 'upload_date': '20190411', + }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], }, { 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', @@ -198,7 +236,9 @@ class AENetworksIE(AENetworksBaseIE): }] def _real_extract(self, url): - domain, canonical = self._match_valid_url(url).groups() + domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra') + if url_type in ('movie', 'special') and not extra: + canonical += f'/full-{url_type}' return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) From 4919051e447c7f8ae9df8ba5c4208b6b5c04915a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 16:55:02 -0500 Subject: [PATCH 27/81] [core] Don't let format testing alter the return code (#13767) Closes #13750 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 44a6696c02..3cfcb8ef0f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2208,6 +2208,9 @@ def _check_formats(self, formats): continue temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file.close() + # If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code. + # Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750 + original_retcode = self._download_retcode try: success, _ = self.dl(temp_file.name, f, test=True) except (DownloadError, OSError, ValueError, *network_exceptions): @@ -2218,6 +2221,8 @@ def _check_formats(self, formats): os.remove(temp_file.name) except OSError: self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') + # Restore the actual return code + self._download_retcode = original_retcode f['__working'] = success if success: f.pop('__needs_testing', None) From 1f27a9f8baccb9105f2476154557540efe09a937 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 16:59:50 -0500 Subject: [PATCH 28/81] [core] Warn when skipping formats (#13090) Authored by: bashonly --- yt_dlp/YoutubeDL.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3cfcb8ef0f..9c9ee64a8c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2195,7 +2195,7 @@ def _filter(f): return op(actual_value, comparison_value) return _filter - def _check_formats(self, formats): + def _check_formats(self, formats, warning=True): for f in formats: working = f.get('__working') if working is not None: @@ -2228,7 +2228,11 @@ def _check_formats(self, formats): f.pop('__needs_testing', None) yield f else: - self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) + msg = f'Unable to download format {f["format_id"]}. Skipping...' + if warning: + self.report_warning(msg) + else: + self.to_screen(f'[info] {msg}') def _select_formats(self, formats, selector): return list(selector({ @@ -2954,7 +2958,7 @@ def is_wellformed(f): ) if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1]), reverse=True) + formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them From c8329fc572903eeed7edad1642773b2268b71a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Schmidt?= <121871105+moonshinerd@users.noreply.github.com> Date: Fri, 18 Jul 2025 19:43:04 -0300 Subject: [PATCH 29/81] [ie/rai] Fix formats extraction (#13572) Closes #13548 Authored by: moonshinerd, seproDev Co-authored-by: sepro --- yt_dlp/extractor/rai.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 027f7a7b6f..d1a4d4c37f 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -81,7 +81,7 @@ def fix_cdata(s): # geo flag is a bit unreliable and not properly set all the time geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y' - ext = determine_ext(media_url) + ext = determine_ext(media_url).lower() formats = [] if ext == 'mp3': @@ -108,7 +108,7 @@ def fix_cdata(s): 'format_id': join_nonempty('https', bitrate, delim='-'), }) else: - raise ExtractorError('Unrecognized media file found') + raise ExtractorError(f'Unrecognized media extension "{ext}"') if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url: self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) @@ -503,6 +503,28 @@ class RaiPlaySoundIE(RaiBaseIE): 'upload_date': '20211201', }, 'params': {'skip_download': True}, + }, { + # case-sensitivity test for uppercase extension + 'url': 'https://www.raiplaysound.it/audio/2020/05/Storia--Lunita-dItalia-e-lunificazione-della-Germania-b4c16390-7f3f-4282-b353-d94897dacb7c.html', + 'md5': 'c69ebd69282f0effd7ef67b7e2f6c7d8', + 'info_dict': { + 'id': 'b4c16390-7f3f-4282-b353-d94897dacb7c', + 'ext': 'mp3', + 'title': "Storia | 01 L'unità d'Italia e l'unificazione della Germania", + 'alt_title': 'md5:ed4ed82585c52057b71b43994a59b705', + 'description': 'md5:92818b6f31b2c150567d56b75db2ea7f', + 'uploader': 'rai radio 3', + 'duration': 2439.0, + 'thumbnail': 'https://www.raiplaysound.it/dl/img/2023/09/07/1694084898279_Maturadio-LOGO-2048x1152.jpg', + 'creators': ['rai radio 3'], + 'series': 'Maturadio', + 'season': 'Season 9', + 'season_number': 9, + 'episode': "01. L'unità d'Italia e l'unificazione della Germania", + 'episode_number': 1, + 'timestamp': 1590400740, + 'upload_date': '20200525', + }, }] def _real_extract(self, url): From 09982bc33e2f1f9a1ff66e6738df44f15b36f6a6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Jul 2025 18:24:52 -0500 Subject: [PATCH 30/81] [ie/dangalplay] Support other login regions (#13768) Authored by: bashonly --- yt_dlp/extractor/dangalplay.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py index f7b243234a..3b0dc1f607 100644 --- a/yt_dlp/extractor/dangalplay.py +++ b/yt_dlp/extractor/dangalplay.py @@ -11,8 +11,14 @@ class DangalPlayBaseIE(InfoExtractor): _NETRC_MACHINE = 'dangalplay' + _REGION = 'IN' _OTV_USER_ID = None - _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _LOGIN_HINT = ( + 'Pass credentials as -u "token" -p "USER_ID" ' + '(where USER_ID is the value of "otv_user_id" in your browser local storage). ' + 'Your login region can be optionally suffixed to the username as @REGION ' + '(where REGION is the two-letter "region" code found in your browser local storage), ' + 'e.g.: -u "token@IN" -p "USER_ID"') _API_BASE = 'https://ottapi.dangalplay.com' _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above @@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor): def _perform_login(self, username, password): if self._OTV_USER_ID: return - if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + mobj = re.fullmatch(r'token(?:@(?P[A-Z]{2}))?', username) + if not mobj or not re.fullmatch(r'[\da-f]{32}', password): raise ExtractorError(self._LOGIN_HINT, expected=True) + if region := mobj.group('region'): + self._REGION = region + self.write_debug(f'Setting login region to "{self._REGION}"') self._OTV_USER_ID = password def _real_initialize(self): @@ -52,7 +62,7 @@ def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=Tr f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, headers={'Accept': 'application/json'}, query={ 'auth_token': self._AUTH_TOKEN, - 'region': 'IN', + 'region': self._REGION, **query, }) @@ -106,7 +116,7 @@ def _generate_api_data(self, data): 'catalog_id': catalog_id, 'content_id': content_id, 'category': '', - 'region': 'IN', + 'region': self._REGION, 'auth_token': self._AUTH_TOKEN, 'id': self._OTV_USER_ID, 'md5': hashlib.md5(unhashed.encode()).hexdigest(), @@ -129,11 +139,14 @@ def _real_extract(self, url): except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 422: error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} - if error_info.get('code') == '1016': + error_code = error_info.get('code') + if error_code == '1016': self.raise_login_required( f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) - elif msg := error_info.get('message'): - raise ExtractorError(msg) + elif error_code == '4028': + self.raise_login_required( + f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None) + raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': ')) raise m3u8_url = traverse_obj(details, ( From 1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3 Mon Sep 17 00:00:00 2001 From: R0hanW <30849420+R0hanW@users.noreply.github.com> Date: Fri, 18 Jul 2025 19:38:52 -0400 Subject: [PATCH 31/81] [ie/PlayerFm] Add extractor (#13016) Closes #4518 Authored by: R0hanW --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/playerfm.py | 70 +++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 yt_dlp/extractor/playerfm.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4d67e1caa3..59a61e0604 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1557,6 +1557,7 @@ PlatziCourseIE, PlatziIE, ) +from .playerfm import PlayerFmIE from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE diff --git a/yt_dlp/extractor/playerfm.py b/yt_dlp/extractor/playerfm.py new file mode 100644 index 0000000000..d59d651a32 --- /dev/null +++ b/yt_dlp/extractor/playerfm.py @@ -0,0 +1,70 @@ +from .common import InfoExtractor +from ..utils import clean_html, clean_podcast_url, int_or_none, str_or_none, url_or_none +from ..utils.traversal import traverse_obj + + +class PlayerFmIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P[\w-]+))' + _TESTS = [{ + 'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix', + 'info_dict': { + 'ext': 'mp3', + 'id': '478606546', + 'display_id': 'movie-mindset-33-casino-feat-felix', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'title': 'Movie Mindset 33 - Casino feat. Felix', + 'creators': ['Chapo Trap House'], + 'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.', + 'duration': 6830, + 'timestamp': 1745406000, + 'upload_date': '20250423', + }, + }, { + 'url': 'https://player.fm/series/nbc-nightly-news-with-tom-llamas/thursday-april-17-2025', + 'info_dict': { + 'ext': 'mp3', + 'id': '477635490', + 'display_id': 'thursday-april-17-2025', + 'title': 'Thursday, April 17, 2025', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'duration': 1143, + 'description': 'md5:4890b8cf9a55a787561cd5d59dfcda82', + 'creators': ['NBC News'], + 'timestamp': 1744941374, + 'upload_date': '20250418', + }, + }, { + 'url': 'https://player.fm/series/soccer-101/ep-109-its-kicking-off-how-have-the-rules-for-kickoff-changed-what-are-the-best-approaches-to-getting-the-game-underway-and-how-could-we-improve-on-the-present-system-ack3NzL3yibvs4pf', + 'info_dict': { + 'ext': 'mp3', + 'id': '481418710', + 'thumbnail': r're:^https://.*\.(jpg|png)', + 'title': r're:#109 It\'s kicking off! How have the rules for kickoff changed, .+ the present system\?', + 'creators': ['TSS'], + 'duration': 1510, + 'display_id': 'md5:b52ecacaefab891b59db69721bfd9b13', + 'description': 'md5:52a39e36d08d8919527454f152ad3c25', + 'timestamp': 1659102055, + 'upload_date': '20220729', + }, + }] + + def _real_extract(self, url): + display_id, url = self._match_valid_url(url).group('id', 'url') + data = self._download_json(f'{url}.json', display_id) + + return { + 'display_id': display_id, + 'vcodec': 'none', + **traverse_obj(data, { + 'id': ('id', {int}, {str_or_none}), + 'url': ('url', {clean_podcast_url}), + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any), + 'filesize': ('size', {int_or_none}), + 'timestamp': ('publishedAt', {int_or_none}), + 'creators': ('series', 'author', {str}, filter, all, filter), + }), + } From 87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 14:57:20 -0500 Subject: [PATCH 32/81] [ie/mlbtv] Make formats downloadable with ffmpeg (#13761) Authored by: bashonly --- yt_dlp/extractor/mlb.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index 562b93fc78..b2b35a7121 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -457,12 +457,9 @@ def _extract_formats_and_subtitles(self, broadcast, video_id): self.report_warning(f'No formats available for {format_id} broadcast; skipping') return [], {} - cdn_headers = {'x-cdn-token': token} fmts, subs = self._extract_m3u8_formats_and_subtitles( - m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4', - m3u8_id=format_id, fatal=False, headers=cdn_headers) + m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) for fmt in fmts: - fmt['http_headers'] = cdn_headers fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' ')) fmt.setdefault('language', language) if fmt.get('vcodec') == 'none' and fmt['language'] == 'en': From 790c286ce3e0b534ca2d8f6648ced220d888f139 Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 21 Jul 2025 04:00:44 +0800 Subject: [PATCH 33/81] [ie/10play] Support new site domain (#13611) Closes #13577 Authored by: Georift --- yt_dlp/extractor/tenplay.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index 825da6516b..dd4ea56580 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -7,11 +7,11 @@ class TenPlayIE(InfoExtractor): IE_NAME = '10play' - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/?#]+/)+(?Ptpv\d{6}[a-z]{5})' + _VALID_URL = r'https?://(?:www\.)?10(?:play)?\.com\.au/(?:[^/?#]+/)+(?Ptpv\d{6}[a-z]{5})' _NETRC_MACHINE = '10play' _TESTS = [{ # Geo-restricted to Australia - 'url': 'https://10play.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf', + 'url': 'https://10.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf', 'info_dict': { 'id': '7440980000013868', 'ext': 'mp4', @@ -32,7 +32,7 @@ class TenPlayIE(InfoExtractor): 'params': {'skip_download': 'm3u8'}, }, { # Geo-restricted to Australia - 'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp', + 'url': 'https://10.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp', 'info_dict': { 'id': '9000000000091177', 'ext': 'mp4', @@ -55,7 +55,7 @@ class TenPlayIE(InfoExtractor): 'params': {'skip_download': 'm3u8'}, }, { # Geo-restricted to Australia; upgrading the m3u8 quality fails and we need the fallback - 'url': 'https://10play.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt', + 'url': 'https://10.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt', 'info_dict': { 'id': '9000000000084116', 'ext': 'mp4', @@ -77,6 +77,7 @@ class TenPlayIE(InfoExtractor): }, 'params': {'skip_download': 'm3u8'}, 'expected_warnings': ['Failed to download m3u8 information: HTTP Error 502'], + 'skip': 'video unavailable', }, { 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', 'only_matching': True, @@ -96,7 +97,7 @@ class TenPlayIE(InfoExtractor): def _real_extract(self, url): content_id = self._match_id(url) data = self._download_json( - 'https://10play.com.au/api/v1/videos/' + content_id, content_id) + 'https://10.com.au/api/v1/videos/' + content_id, content_id) video_data = self._download_json( f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}', @@ -137,21 +138,24 @@ def _real_extract(self, url): class TenPlaySeasonIE(InfoExtractor): IE_NAME = '10play:season' - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P[^/?#]+)/episodes/(?P[^/?#]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?10(?:play)?\.com\.au/(?P[^/?#]+)/episodes/(?P[^/?#]+)/?(?:$|[?#])' _TESTS = [{ - 'url': 'https://10play.com.au/masterchef/episodes/season-15', + 'url': 'https://10.com.au/masterchef/episodes/season-15', 'info_dict': { 'title': 'Season 15', 'id': 'MTQ2NjMxOQ==', }, 'playlist_mincount': 50, }, { - 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', + 'url': 'https://10.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', 'info_dict': { 'title': 'Season 2024', 'id': 'Mjc0OTIw', }, 'playlist_mincount': 159, + }, { + 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', + 'only_matching': True, }] def _entries(self, load_more_url, display_id=None): @@ -172,7 +176,7 @@ def _entries(self, load_more_url, display_id=None): def _real_extract(self, url): show, season = self._match_valid_url(url).group('show', 'season') season_info = self._download_json( - f'https://10play.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}') + f'https://10.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}') episodes_carousel = traverse_obj(season_info, ( 'content', 0, 'components', ( From f9dff95cb1c138913011417b3bba020c0a691bba Mon Sep 17 00:00:00 2001 From: WouterGordts Date: Sun, 20 Jul 2025 22:12:40 +0200 Subject: [PATCH 34/81] [ie/bandcamp] Extract tags (#13480) Authored by: WouterGordts --- yt_dlp/extractor/bandcamp.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 939c2800e6..d07d6e48b2 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -7,6 +7,7 @@ from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, + clean_html, extract_attributes, float_or_none, int_or_none, @@ -19,7 +20,7 @@ url_or_none, urljoin, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import find_element, find_elements, traverse_obj class BandcampIE(InfoExtractor): @@ -70,6 +71,9 @@ class BandcampIE(InfoExtractor): 'album': 'FTL: Advanced Edition Soundtrack', 'uploader_url': 'https://benprunty.bandcamp.com', 'uploader_id': 'benprunty', + 'tags': ['soundtrack', 'chiptunes', 'cinematic', 'electronic', 'video game music', 'California'], + 'artists': ['Ben Prunty'], + 'album_artists': ['Ben Prunty'], }, }, { # no free download, mp3 128 @@ -94,6 +98,9 @@ class BandcampIE(InfoExtractor): 'album': 'Call of the Mastodon', 'uploader_url': 'https://relapsealumni.bandcamp.com', 'uploader_id': 'relapsealumni', + 'tags': ['Philadelphia'], + 'artists': ['Mastodon'], + 'album_artists': ['Mastodon'], }, }, { # track from compilation album (artist/album_artist difference) @@ -118,6 +125,9 @@ class BandcampIE(InfoExtractor): 'album': 'DSK F/W 2016-2017 Free Compilation', 'uploader_url': 'https://diskotopia.bandcamp.com', 'uploader_id': 'diskotopia', + 'tags': ['Japan'], + 'artists': ['submerse'], + 'album_artists': ['Diskotopia'], }, }] @@ -252,6 +262,7 @@ def _real_extract(self, url): 'album': embed.get('album_title'), 'album_artist': album_artist, 'formats': formats, + 'tags': traverse_obj(webpage, ({find_elements(cls='tag')}, ..., {clean_html})), } From 32809eb2da92c649e540a5b714f6235036026161 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:05:43 -0500 Subject: [PATCH 35/81] Allow extractors to designate formats/subtitles for impersonation (#13778) Authored by: bashonly --- yt_dlp/YoutubeDL.py | 37 ++++++++++++++++++++++++++++++++++- yt_dlp/downloader/__init__.py | 2 +- yt_dlp/downloader/http.py | 5 ++++- yt_dlp/extractor/common.py | 30 ++++++++++++---------------- 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9c9ee64a8c..68074a5626 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -52,7 +52,7 @@ SSLError, network_exceptions, ) -from .networking.impersonate import ImpersonateRequestHandler +from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget from .plugins import directories as plugin_directories, load_all_plugins from .postprocessor import ( EmbedThumbnailPP, @@ -3231,6 +3231,16 @@ def dl(self, name, info, subtitle=False, test=False): } else: params = self.params + + impersonate = info.pop('impersonate', None) + # Do not override --impersonate with extractor-specified impersonation + if params.get('impersonate') is None: + available_target, requested_targets = self._parse_impersonate_targets(impersonate) + if available_target: + info['impersonate'] = available_target + elif requested_targets: + self.report_warning(self._unavailable_targets_message(requested_targets), only_once=True) + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: @@ -4183,6 +4193,31 @@ def _impersonate_target_available(self, target): for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler)) + def _parse_impersonate_targets(self, impersonate): + if impersonate in (True, ''): + impersonate = ImpersonateTarget() + + requested_targets = [ + t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) + for t in variadic(impersonate) + ] if impersonate else [] + + available_target = next(filter(self._impersonate_target_available, requested_targets), None) + + return available_target, requested_targets + + @staticmethod + def _unavailable_targets_message(requested_targets, note=None, is_error=False): + note = note or 'The extractor specified to use impersonation for this download' + specific_targets = ', '.join(filter(None, map(str, requested_targets))) + message = ( + 'no impersonate target is available' if not specific_targets + else f'none of these impersonate targets are available: {specific_targets}') + return ( + f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}' + f' https://github.com/yt-dlp/yt-dlp#impersonation ' + f'for information on installing the required dependencies') + def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 9c34bd289a..17458b9b94 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -99,7 +99,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default): if external_downloader is None: if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): return FFmpegFD - elif external_downloader.lower() != 'native': + elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None: ed = get_external_downloader(external_downloader) if ed.can_download(info_dict, external_downloader): return ed diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 90bfcaf552..073860f6f9 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -27,6 +27,9 @@ class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) + request_extensions = {} + if info_dict.get('impersonate') is not None: + request_extensions['impersonate'] = info_dict['impersonate'] class DownloadContext(dict): __getattr__ = dict.get @@ -109,7 +112,7 @@ def establish_connection(): if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 - request = Request(url, request_data, headers) + request = Request(url, request_data, headers, extensions=request_extensions) has_range = range_start is not None if has_range: request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d601e17514..8a914abf0b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -38,7 +38,6 @@ TransportError, network_exceptions, ) -from ..networking.impersonate import ImpersonateTarget from ..utils import ( IDENTITY, JSON_LD_RE, @@ -259,6 +258,11 @@ class InfoExtractor: * key The key (as hex) used to decrypt fragments. If `key` is given, any key URI will be ignored * iv The IV (as hex) used to decrypt fragments + * impersonate Impersonate target(s). Can be any of the following entities: + * an instance of yt_dlp.networking.impersonate.ImpersonateTarget + * a string in the format of CLIENT[:OS] + * a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances + * a boolean value; True means any impersonate target is sufficient * downloader_options A dictionary of downloader options (For internal use only) * http_chunk_size Chunk size for HTTP downloads @@ -336,6 +340,7 @@ class InfoExtractor: * "name": Name or description of the subtitles * "http_headers": A dictionary of additional HTTP headers to add to the request. + * "impersonate": Impersonate target(s); same as the "formats" field "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles'; contains automatically generated captions instead of normal subtitles @@ -884,26 +889,17 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa extensions = {} - if impersonate in (True, ''): - impersonate = ImpersonateTarget() - requested_targets = [ - t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) - for t in variadic(impersonate) - ] if impersonate else [] - - available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None) + available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate) if available_target: extensions['impersonate'] = available_target elif requested_targets: - message = 'The extractor is attempting impersonation, but ' - message += ( - 'no impersonate target is available' if not str(impersonate) - else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"') - info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation ' - 'for information on installing the required dependencies') + msg = 'The extractor is attempting impersonation' if require_impersonation: - raise ExtractorError(f'{message}; {info_msg}', expected=True) - self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True) + raise ExtractorError( + self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True), + expected=True) + self.report_warning( + self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True) try: return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions)) From a4561c7a66c39d88efe7ae51e7fa1986faf093fb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:20:58 -0500 Subject: [PATCH 36/81] [rh:requests] Refactor default headers (#13785) Authored by: bashonly --- yt_dlp/networking/_requests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 555c21ac33..6582038fcb 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -313,7 +313,7 @@ def _create_instance(self, cookiejar, legacy_ssl_support=None): max_retries=urllib3.util.retry.Retry(False), ) session.adapters.clear() - session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'}) + session.headers = requests.models.CaseInsensitiveDict() session.mount('https://', http_adapter) session.mount('http://', http_adapter) session.cookies = cookiejar @@ -322,6 +322,7 @@ def _create_instance(self, cookiejar, legacy_ssl_support=None): def _prepare_headers(self, _, headers): add_accept_encoding_header(headers, SUPPORTED_ENCODINGS) + headers.setdefault('Connection', 'keep-alive') def _send(self, request): From 8820101aa3152e5f4811541c645f8b5de231ba8c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:22:04 -0500 Subject: [PATCH 37/81] [ie/youtube] Use impersonation for downloading subtitles (#13786) Closes #13770 Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index fc1f087ace..5968edc60e 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -4056,6 +4056,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer 'ext': fmt, 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)), 'name': sub_name, + 'impersonate': True, STREAMING_DATA_CLIENT_NAME: client_name, }) From 2ac3eb98373d1c31341c5e918c83872c7ff409c6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:41:00 -0500 Subject: [PATCH 38/81] Fix `ImpersonateTarget` sanitization (#13791) Fix 32809eb2da92c649e540a5b714f6235036026161 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 68074a5626..14beb3df98 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3716,6 +3716,8 @@ def filter_fn(obj): return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} elif isinstance(obj, (list, tuple, set, LazyList)): return list(map(filter_fn, obj)) + elif isinstance(obj, ImpersonateTarget): + return str(obj) elif obj is None or isinstance(obj, (str, int, float, bool)): return obj else: From 3e49bc8a1bdb4109b857f2c361c358e86fa63405 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:42:21 -0500 Subject: [PATCH 39/81] Make extractor-designated impersonation override `--impersonate` (#13792) Fix 32809eb2da92c649e540a5b714f6235036026161 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 9 --------- yt_dlp/downloader/common.py | 11 +++++++++++ yt_dlp/downloader/http.py | 5 +++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 14beb3df98..e42fa73dd6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3232,15 +3232,6 @@ def dl(self, name, info, subtitle=False, test=False): else: params = self.params - impersonate = info.pop('impersonate', None) - # Do not override --impersonate with extractor-specified impersonation - if params.get('impersonate') is None: - available_target, requested_targets = self._parse_impersonate_targets(impersonate) - if available_target: - info['impersonate'] = available_target - elif requested_targets: - self.report_warning(self._unavailable_targets_message(requested_targets), only_once=True) - fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index bb9303f8a1..7bc70a51a2 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -495,3 +495,14 @@ def _debug_cmd(self, args, exe=None): exe = os.path.basename(args[0]) self.write_debug(f'{exe} command line: {shell_quote(args)}') + + def _get_impersonate_target(self, info_dict): + impersonate = info_dict.get('impersonate') + if impersonate is None: + return None + available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate) + if available_target: + return available_target + elif requested_targets: + self.report_warning(self.ydl._unavailable_targets_message(requested_targets)) + return None diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 073860f6f9..c388deb7ea 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -28,8 +28,9 @@ def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) request_extensions = {} - if info_dict.get('impersonate') is not None: - request_extensions['impersonate'] = info_dict['impersonate'] + impersonate_target = self._get_impersonate_target(info_dict) + if impersonate_target is not None: + request_extensions['impersonate'] = impersonate_target class DownloadContext(dict): __getattr__ = dict.get From ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 14:09:52 -0500 Subject: [PATCH 40/81] [ie/hotstar] Fix error handling (#13793) Fix 7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9 Closes #13790 Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index b280fb53ab..2ae527a59e 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -42,6 +42,7 @@ class HotStarBaseIE(InfoExtractor): } def _has_active_subscription(self, cookies, server_time): + server_time = int_or_none(server_time) or int(time.time()) expiry = traverse_obj(cookies, ( self._TOKEN_NAME, 'value', {jwt_decode_hs256}, 'sub', {json.loads}, 'subscriptions', 'in', ..., 'expiry', {parse_iso8601}, all, {max})) or 0 From 6be26626f7cfa71d28e0fac2861eb04758810c5d Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 22 Jul 2025 06:59:13 +0900 Subject: [PATCH 41/81] [utils] `unified_timestamp`: Return `int` values (#13796) Authored by: doe1080 --- yt_dlp/utils/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index c91a06e9a6..7d79f417fa 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1285,7 +1285,7 @@ def unified_timestamp(date_str, day_first=True): timetuple = email.utils.parsedate_tz(date_str) if timetuple: - return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds() + return calendar.timegm(timetuple) + pm_delta * 3600 - int(timezone.total_seconds()) @partial_application From 060c6a4501a0b8a92f1b9c12788f556d902c83c6 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 22 Jul 2025 07:32:10 +0900 Subject: [PATCH 42/81] [ie/skeb] Rework extractor (#13593) Closes #7440 Authored by: doe1080 --- yt_dlp/extractor/skeb.py | 194 +++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 108 deletions(-) diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py index bc5ec3da7f..70111d0944 100644 --- a/yt_dlp/extractor/skeb.py +++ b/yt_dlp/extractor/skeb.py @@ -1,140 +1,118 @@ from .common import InfoExtractor -from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + clean_html, + int_or_none, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj class SkebIE(InfoExtractor): - _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P\d+)' - + _VALID_URL = r'https?://skeb\.jp/@(?P[^/?#]+)/works/(?P\d+)' _TESTS = [{ 'url': 'https://skeb.jp/@riiru_wm/works/10', 'info_dict': { 'id': '466853', - 'title': '内容はおまかせします! by 姫ノ森りぃる@一周年', - 'description': 'md5:1ec50901efc3437cfbfe3790468d532d', - 'uploader': '姫ノ森りぃる@一周年', - 'uploader_id': 'riiru_wm', - 'age_limit': 0, - 'tags': [], - 'url': r're:https://skeb.+', - 'thumbnail': r're:https://skeb.+', - 'subtitles': { - 'jpn': [{ - 'url': r're:https://skeb.+', - 'ext': 'vtt', - }], - }, - 'width': 720, - 'height': 405, - 'duration': 313, - 'fps': 30, 'ext': 'mp4', + 'title': '10-1', + 'description': 'md5:1ec50901efc3437cfbfe3790468d532d', + 'duration': 313, + 'genres': ['video'], + 'thumbnail': r're:https?://.+', + 'uploader': '姫ノ森りぃる@ひとづま', + 'uploader_id': 'riiru_wm', }, }, { 'url': 'https://skeb.jp/@furukawa_nob/works/3', 'info_dict': { 'id': '489408', - 'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...', - 'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2', - 'uploader': '古川ノブ@音楽とVlogのVtuber', - 'uploader_id': 'furukawa_nob', - 'age_limit': 0, - 'tags': [ - 'よろしく', '大丈夫', 'お願い', 'でした', - '是非', 'O', 'バー', '遊び', 'おはよう', - 'オーバ', 'ボイス', - ], - 'url': r're:https://skeb.+', - 'thumbnail': r're:https://skeb.+', - 'subtitles': { - 'jpn': [{ - 'url': r're:https://skeb.+', - 'ext': 'vtt', - }], - }, - 'duration': 98, 'ext': 'mp3', - 'vcodec': 'none', - 'abr': 128, + 'title': '3-1', + 'description': 'md5:6de1f8f876426a6ac321c123848176a8', + 'duration': 98, + 'genres': ['voice'], + 'tags': 'count:11', + 'thumbnail': r're:https?://.+', + 'uploader': '古川ノブ@宮城の動画勢Vtuber', + 'uploader_id': 'furukawa_nob', }, }, { - 'url': 'https://skeb.jp/@mollowmollow/works/6', + 'url': 'https://skeb.jp/@Rizu_panda_cube/works/626', 'info_dict': { - 'id': '6', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - 'description': 'md5:aa6cbf2ba320b50bce219632de195f07', - '_type': 'playlist', - 'entries': [{ - 'id': '486430', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - 'description': 'md5:aa6cbf2ba320b50bce219632de195f07', - }, { - 'id': '486431', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - }], + 'id': '626', + 'description': 'md5:834557b39ca56960c5f77dd6ddabe775', + 'uploader': 'りづ100億%', + 'uploader_id': 'Rizu_panda_cube', + 'tags': 'count:57', + 'genres': ['video'], }, + 'playlist_count': 2, + 'expected_warnings': ['Skipping unsupported extension'], }] - def _real_extract(self, url): - video_id = self._match_id(url) - nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id) + def _call_api(self, uploader_id, work_id): + return self._download_json( + f'https://skeb.jp/api/users/{uploader_id}/works/{work_id}', work_id, headers={ + 'Accept': 'application/json', + 'Authorization': 'Bearer null', + }) - parent = { - 'id': video_id, - 'title': nuxt_data.get('title'), - 'description': nuxt_data.get('description'), - 'uploader': traverse_obj(nuxt_data, ('creator', 'name')), - 'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')), - 'age_limit': 18 if nuxt_data.get('nsfw') else 0, - 'tags': nuxt_data.get('tag_list'), + def _real_extract(self, url): + uploader_id, work_id = self._match_valid_url(url).group('uploader_id', 'id') + try: + works = self._call_api(uploader_id, work_id) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or e.cause.status != 429: + raise + webpage = e.cause.response.read().decode() + value = self._search_regex( + r'document\.cookie\s*=\s*["\']request_key=([^;"\']+)', webpage, 'request key') + self._set_cookie('skeb.jp', 'request_key', value) + works = self._call_api(uploader_id, work_id) + + info = { + 'uploader_id': uploader_id, + **traverse_obj(works, { + 'age_limit': ('nsfw', {bool}, {lambda x: 18 if x else None}), + 'description': (('source_body', 'body'), {clean_html}, filter, any), + 'genres': ('genre', {str}, filter, all, filter), + 'tags': ('tag_list', ..., {str}, filter, all, filter), + 'uploader': ('creator', 'name', {str}), + }), } entries = [] - for item in nuxt_data.get('previews') or []: - vid_url = item.get('url') - given_ext = traverse_obj(item, ('information', 'extension')) - preview_ext = determine_ext(vid_url, default_ext=None) - if not preview_ext: - content_disposition = parse_qs(vid_url)['response-content-disposition'][0] - preview_ext = self._search_regex( - r'filename="[^"]+\.([^\.]+?)"', content_disposition, - 'preview file extension', fatal=False, group=1) - if preview_ext not in ('mp4', 'mp3'): + for idx, preview in enumerate(traverse_obj(works, ('previews', lambda _, v: url_or_none(v['url']))), 1): + ext = traverse_obj(preview, ('information', 'extension', {str})) + if ext not in ('mp3', 'mp4'): + self.report_warning(f'Skipping unsupported extension "{ext}"') continue - if not vid_url or not item.get('id'): - continue - width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height')) - if width is not None and height is not None: - # the longest side is at most 720px for non-client viewers - max_size = max(width, height) - width, height = (x * 720 // max_size for x in (width, height)) + entries.append({ - **parent, - 'id': str(item['id']), - 'url': vid_url, - 'thumbnail': item.get('poster_url'), + 'ext': ext, + 'title': f'{work_id}-{idx}', 'subtitles': { - 'jpn': [{ - 'url': item.get('vtt_url'), + 'ja': [{ 'ext': 'vtt', + 'url': preview['vtt_url'], }], - } if item.get('vtt_url') else None, - 'width': width, - 'height': height, - 'duration': traverse_obj(item, ('information', 'duration')), - 'fps': traverse_obj(item, ('information', 'frame_rate')), - 'ext': preview_ext or given_ext, - 'vcodec': 'none' if preview_ext == 'mp3' else None, - # you'll always get 128kbps MP3 for non-client viewers - 'abr': 128 if preview_ext == 'mp3' else None, + } if url_or_none(preview.get('vtt_url')) else None, + 'vcodec': 'none' if ext == 'mp3' else None, + **info, + **traverse_obj(preview, { + 'id': ('id', {str_or_none}), + 'thumbnail': ('poster_url', {url_or_none}), + 'url': ('url', {url_or_none}), + }), + **traverse_obj(preview, ('information', { + 'duration': ('duration', {int_or_none}), + 'fps': ('frame_rate', {int_or_none}), + 'height': ('height', {int_or_none}), + 'width': ('width', {int_or_none}), + })), }) - if not entries: - raise ExtractorError('No video/audio attachment found in this commission.', expected=True) - elif len(entries) == 1: - return entries[0] - else: - parent.update({ - '_type': 'playlist', - 'entries': entries, - }) - return parent + return self.playlist_result(entries, work_id, **info) From d3edc5d52a7159eda2331dbc7e14bf40a6585c81 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:04:43 -0400 Subject: [PATCH 43/81] [ie/bilibili] Pass newer user-agent with API requests (#13736) Closes #12887 Authored by: c-basalt --- yt_dlp/extractor/bilibili.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0c6535fc72..3282a11bb7 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,6 +175,13 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): else: note = f'Downloading video formats for cid {cid}' + # TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735 + # playurl requests carrying old UA will be rejected + headers = { + 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36', + **(headers or {}), + } + return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] @@ -353,7 +360,7 @@ class BiliBiliIE(BilibiliBaseIE): 'id': 'BV1bK411W797', 'title': '物语中的人物是如何吐槽自己的OP的', }, - 'playlist_count': 18, + 'playlist_count': 23, 'playlist': [{ 'info_dict': { 'id': 'BV1bK411W797_p1', @@ -373,6 +380,7 @@ class BiliBiliIE(BilibiliBaseIE): '_old_archive_ids': ['bilibili 498159642_part1'], }, }], + 'params': {'playlist_items': '2'}, }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -1002,6 +1010,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1057,6 +1066,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1847,7 +1857,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): 'thumbnail': r're:^https?://.+\.jpg', 'timestamp': 1564836614, 'upload_date': '20190803', - 'uploader': 'tsukimi-つきみぐー', + 'uploader': '十六夜tsukimiつきみぐ', 'view_count': int, }, } @@ -1902,10 +1912,10 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): 'url': 'https://www.bilibili.com/audio/am10624', 'info_dict': { 'id': '10624', - 'title': '每日新曲推荐(每日11:00更新)', + 'title': '新曲推荐', 'description': '每天11:00更新,为你推送最新音乐', }, - 'playlist_count': 19, + 'playlist_count': 16, } def _real_extract(self, url): From b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:11:58 -0500 Subject: [PATCH 44/81] [ie/BiliBiliBangumi] Fix extractor (#13800) Closes #13795 Authored by: bashonly --- yt_dlp/extractor/bilibili.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 3282a11bb7..2846702f6a 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -907,13 +907,26 @@ def _real_extract(self, url): 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, headers=headers)) - geo_blocked = traverse_obj(play_info, ( - ('result', ('raw', 'data')), 'plugins', - lambda _, v: v['name'] == 'AreaLimitPanel', - 'config', 'is_block', {bool}, any)) - premium_only = play_info.get('code') == -10403 + # play_info can be structured in at least three different ways, e.g.: + # 1.) play_info['result']['video_info'] and play_info['code'] + # 2.) play_info['raw']['data']['video_info'] and play_info['code'] + # 3.) play_info['data']['result']['video_info'] and play_info['data']['code'] + # So we need to transform any of the above into a common structure + status_code = play_info.get('code') + if 'raw' in play_info: + play_info = play_info['raw'] + if 'data' in play_info: + play_info = play_info['data'] + if status_code is None: + status_code = play_info.get('code') + if 'result' in play_info: + play_info = play_info['result'] - video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {} + geo_blocked = traverse_obj(play_info, ( + 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any)) + premium_only = status_code == -10403 + + video_info = traverse_obj(play_info, ('video_info', {dict})) or {} formats = self.extract_formats(video_info) if not formats: @@ -923,8 +936,8 @@ def _real_extract(self, url): self.raise_login_required('This video is for premium members only') if traverse_obj(play_info, (( - ('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' - (('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none' + ('play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' vs 'PLAY_NONE' + 'play_video_type', # 'preview' vs 'whole' vs 'none' ), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})): self.report_warning( 'Only preview format is available, ' From d88b304d44c599d81acfa4231502270c8b9fe2f8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:15:31 -0500 Subject: [PATCH 45/81] [ie/patreon:campaign] Fix extractor (#13712) Closes #13622 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 2c1436cac1..9038b4a7ff 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -19,7 +19,7 @@ url_or_none, urljoin, ) -from ..utils.traversal import traverse_obj, value +from ..utils.traversal import require, traverse_obj, value class PatreonBaseIE(InfoExtractor): @@ -462,7 +462,7 @@ class PatreonCampaignIE(PatreonBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?patreon\.com/(?: (?:m|api/campaigns)/(?P\d+)| - (?:c/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+) + (?:cw?/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+) )(?:/posts)?/?(?:$|[?#])''' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', @@ -531,6 +531,28 @@ class PatreonCampaignIE(PatreonBaseIE): 'age_limit': 0, }, 'playlist_mincount': 331, + 'skip': 'Channel removed', + }, { + # next.js v13 data, see https://github.com/yt-dlp/yt-dlp/issues/13622 + 'url': 'https://www.patreon.com/c/anythingelse/posts', + 'info_dict': { + 'id': '9631148', + 'title': 'Anything Else?', + 'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08', + 'uploader': 'dan ', + 'uploader_id': '13852412', + 'uploader_url': 'https://www.patreon.com/anythingelse', + 'channel': 'Anything Else?', + 'channel_id': '9631148', + 'channel_url': 'https://www.patreon.com/anythingelse', + 'channel_follower_count': int, + 'age_limit': 0, + 'thumbnail': r're:https?://.+/.+', + }, + 'playlist_mincount': 151, + }, { + 'url': 'https://www.patreon.com/cw/anythingelse', + 'only_matching': True, }, { 'url': 'https://www.patreon.com/c/OgSog/posts', 'only_matching': True, @@ -572,8 +594,11 @@ def _real_extract(self, url): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent}) - campaign_id = self._search_nextjs_data( - webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] + campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), ( + 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str})) + if not campaign_id: + campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), ( + lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')})) params = { 'json-api-use-default-includes': 'false', From 959ac99e98c3215437e573c22d64be42d361e863 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 15 Jul 2025 01:17:34 +0200 Subject: [PATCH 46/81] Fix `--exec` placeholder expansion on Windows See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56 for more details Authored by: Grub4K --- yt_dlp/postprocessor/exec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py index 1f0a0015ec..243487dd25 100644 --- a/yt_dlp/postprocessor/exec.py +++ b/yt_dlp/postprocessor/exec.py @@ -18,7 +18,7 @@ def parse_cmd(self, cmd, info): if filepath: if '{}' not in cmd: cmd += ' {}' - cmd = cmd.replace('{}', shell_quote(filepath)) + cmd = cmd.replace('{}', shell_quote(filepath, shell=True)) return cmd def run(self, info): From 9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 22 Jul 2025 01:43:30 +0200 Subject: [PATCH 47/81] [cleanup] Misc (#13595) Closes #10853, Closes #12436, Closes #13314, Closes #13609 Authored by: seproDev, InvalidUsernameException, doe1080, hseg, bashonly, adamralph Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: InvalidUsernameException Co-authored-by: gesh Co-authored-by: Adam Ralph Co-authored-by: doe1080 <98906116+doe1080@users.noreply.github.com> --- CONTRIBUTING.md | 2 +- README.md | 6 +++--- devscripts/changelog_override.json | 10 ++++++++++ test/test_download.py | 4 ---- yt_dlp/YoutubeDL.py | 1 + yt_dlp/extractor/common.py | 5 ++++- yt_dlp/extractor/mirrativ.py | 2 +- yt_dlp/extractor/newspicks.py | 2 -- yt_dlp/extractor/youtube/_video.py | 4 ++-- 9 files changed, 22 insertions(+), 14 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd7b0f1210..2c58cdfc94 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -126,7 +126,7 @@ ### Are you willing to share account details if needed? While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. - Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. -- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password before sharing the account to something random. - Change the password after receiving the account back. ### Is the website primarily used for piracy? diff --git a/README.md b/README.md index 925ebd8c5b..7a6d1073f4 100644 --- a/README.md +++ b/README.md @@ -277,7 +277,7 @@ # USAGE AND OPTIONS yt-dlp [OPTIONS] [--] URL [URL...] -`Ctrl+F` is your friend :D +Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords @@ -1902,8 +1902,8 @@ #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) #### vimeo -* `client`: Client to extract video data from. One of `android` (default), `ios` or `web`. The `ios` client only works with previously cached OAuth tokens. The `web` client only works when authenticated with credentials or account cookies -* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens +* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index d7296bf309..c22ea94bfc 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -262,5 +262,15 @@ { "action": "remove", "when": "500761e41acb96953a5064e951d41d190c287e46" + }, + { + "action": "add", + "when": "f3008bc5f89d2691f2f8dfc51b406ef4e25281c3", + "short": "[priority] **Default behaviour changed from `--mtime` to `--no-mtime`**\nyt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)" + }, + { + "action": "add", + "when": "959ac99e98c3215437e573c22d64be42d361e863", + "short": "[priority] Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)\n - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped" } ] diff --git a/test/test_download.py b/test/test_download.py index c7842735c2..1714cb52ec 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -66,10 +66,6 @@ def _file_md5(fn): @is_download_test class TestDownload(unittest.TestCase): - # Parallel testing in nosetests. See - # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html - _multiprocess_shared_ = True - maxDiff = None COMPLETED_TESTS = {} diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e42fa73dd6..76fd18c338 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -529,6 +529,7 @@ class YoutubeDL: discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. + Argument values must always be a list of string(s). E.g. {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 8a914abf0b..4a4b5416d0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -397,6 +397,8 @@ class InfoExtractor: chapters: A list of dictionaries, with the following entries: * "start_time" - The start time of the chapter in seconds * "end_time" - The end time of the chapter in seconds + (optional: core code can determine this value from + the next chapter's start_time or the video's duration) * "title" (optional, string) heatmap: A list of dictionaries, with the following entries: * "start_time" - The start time of the data point in seconds @@ -411,7 +413,8 @@ class InfoExtractor: 'unlisted' or 'public'. Use 'InfoExtractor._availability' to set it media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer" - _old_archive_ids: A list of old archive ids needed for backward compatibility + _old_archive_ids: A list of old archive ids needed for backward + compatibility. Use yt_dlp.utils.make_archive_id to generate ids _format_sort_fields: A list of fields to use for sorting formats __post_extractor: A function to be called just before the metadata is written to either disk, logger or console. The function diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 4e24371a22..36a736a21d 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -18,7 +18,7 @@ class MirrativIE(MirrativBaseIE): IE_NAME = 'mirrativ' _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P[^/?#&]+)' - TESTS = [{ + _TESTS = [{ 'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw', 'info_dict': { 'id': 'UQomuS7EMgHoxRHjEhNiHw', diff --git a/yt_dlp/extractor/newspicks.py b/yt_dlp/extractor/newspicks.py index 5f19eed984..25be3c7203 100644 --- a/yt_dlp/extractor/newspicks.py +++ b/yt_dlp/extractor/newspicks.py @@ -18,7 +18,6 @@ class NewsPicksIE(InfoExtractor): 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】', 'cast': 'count:4', 'description': 'md5:09397aad46d6ded6487ff13f138acadf', - 'duration': 2940, 'release_date': '20220117', 'release_timestamp': 1642424400, 'series': 'HORIE ONE', @@ -35,7 +34,6 @@ class NewsPicksIE(InfoExtractor): 'title': '【検証】専門家は、KADOKAWAをどう見るか', 'cast': 'count:3', 'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d', - 'duration': 1320, 'release_date': '20240622', 'release_timestamp': 1719088080, 'series': 'NPレポート', diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 5968edc60e..171aa9b5c4 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2076,7 +2076,7 @@ def _extract_signature_function(self, video_id, player_url, example_sig): assert os.path.basename(func_id) == func_id self.write_debug(f'Extracting signature function {func_id}') - cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.03.31'), None + cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.07.21'), None if not cache_spec: code = self._load_player(video_id, player_url) @@ -2180,7 +2180,7 @@ def _load_player_data_from_cache(self, name, player_url): if data := self._player_cache.get(cache_id): return data - data = self.cache.load(*cache_id, min_ver='2025.03.31') + data = self.cache.load(*cache_id, min_ver='2025.07.21') if data: self._player_cache[cache_id] = data From 035b1ece8f382358f5503bf5011ca098f6c9eaf9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 23:47:12 +0000 Subject: [PATCH 48/81] Release 2025.07.21 Created by: bashonly :ci skip all --- CONTRIBUTORS | 9 +++++ Changelog.md | 91 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +-- supportedsites.md | 22 ++++++------ yt_dlp/version.py | 6 ++-- 5 files changed, 116 insertions(+), 16 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ba23b66dc5..f20b4ce172 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -784,3 +784,12 @@ eason1478 ceandreasen chauhantirth helpimnotdrowning +adamralph +averageFOSSenjoyer +bubo +flanter21 +Georift +moonshinerd +R0hanW +ShockedPlot7560 +swayll diff --git a/Changelog.md b/Changelog.md index 5a5c18cf34..7205b95aa3 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,97 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.07.21 + +#### Important changes +- **Default behaviour changed from `--mtime` to `--no-mtime`** +yt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780) +- Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56) + - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped + +#### Core changes +- [Allow extractors to designate formats/subtitles for impersonation](https://github.com/yt-dlp/yt-dlp/commit/32809eb2da92c649e540a5b714f6235036026161) ([#13778](https://github.com/yt-dlp/yt-dlp/issues/13778)) by [bashonly](https://github.com/bashonly) (With fixes in [3e49bc8](https://github.com/yt-dlp/yt-dlp/commit/3e49bc8a1bdb4109b857f2c361c358e86fa63405), [2ac3eb9](https://github.com/yt-dlp/yt-dlp/commit/2ac3eb98373d1c31341c5e918c83872c7ff409c6)) +- [Don't let format testing alter the return code](https://github.com/yt-dlp/yt-dlp/commit/4919051e447c7f8ae9df8ba5c4208b6b5c04915a) ([#13767](https://github.com/yt-dlp/yt-dlp/issues/13767)) by [bashonly](https://github.com/bashonly) +- [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/commit/959ac99e98c3215437e573c22d64be42d361e863) by [Grub4K](https://github.com/Grub4K) +- [No longer enable `--mtime` by default](https://github.com/yt-dlp/yt-dlp/commit/f3008bc5f89d2691f2f8dfc51b406ef4e25281c3) ([#12781](https://github.com/yt-dlp/yt-dlp/issues/12781)) by [seproDev](https://github.com/seproDev) +- [Warn when skipping formats](https://github.com/yt-dlp/yt-dlp/commit/1f27a9f8baccb9105f2476154557540efe09a937) ([#13090](https://github.com/yt-dlp/yt-dlp/issues/13090)) by [bashonly](https://github.com/bashonly) +- **jsinterp** + - [Cache undefined variable names](https://github.com/yt-dlp/yt-dlp/commit/b342d27f3f82d913976509ddf5bff539ad8567ec) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly) (With fixes in [805519b](https://github.com/yt-dlp/yt-dlp/commit/805519bfaa7cb5443912dfe45ac774834ba65a16)) + - [Fix variable scoping](https://github.com/yt-dlp/yt-dlp/commit/b6328ca05030d815222b25d208cc59a964623bf9) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) +- **utils** + - `mimetype2ext`: [Always parse `flac` from `audio/flac`](https://github.com/yt-dlp/yt-dlp/commit/b8abd255e454acbe0023cdb946f9eb461ced7eeb) ([#13748](https://github.com/yt-dlp/yt-dlp/issues/13748)) by [bashonly](https://github.com/bashonly) + - `unified_timestamp`: [Return `int` values](https://github.com/yt-dlp/yt-dlp/commit/6be26626f7cfa71d28e0fac2861eb04758810c5d) ([#13796](https://github.com/yt-dlp/yt-dlp/issues/13796)) by [doe1080](https://github.com/doe1080) + - `urlhandle_detect_ext`: [Use `x-amz-meta-file-type` headers](https://github.com/yt-dlp/yt-dlp/commit/28bf46b7dafe2e241137763bf570a2f91ba8a53a) ([#13749](https://github.com/yt-dlp/yt-dlp/issues/13749)) by [bashonly](https://github.com/bashonly) + +#### Extractor changes +- [Add `_search_nextjs_v13_data` helper](https://github.com/yt-dlp/yt-dlp/commit/5245231e4a39ecd5595d4337d46d85e150e2430a) ([#13398](https://github.com/yt-dlp/yt-dlp/issues/13398)) by [bashonly](https://github.com/bashonly) (With fixes in [b5fea53](https://github.com/yt-dlp/yt-dlp/commit/b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec)) +- [Detect invalid m3u8 playlist data](https://github.com/yt-dlp/yt-dlp/commit/e99c0b838a9c5feb40c0dcd291bd7b8620b8d36d) ([#13601](https://github.com/yt-dlp/yt-dlp/issues/13601)) by [Grub4K](https://github.com/Grub4K) +- **10play**: [Support new site domain](https://github.com/yt-dlp/yt-dlp/commit/790c286ce3e0b534ca2d8f6648ced220d888f139) ([#13611](https://github.com/yt-dlp/yt-dlp/issues/13611)) by [Georift](https://github.com/Georift) +- **9gag**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/0b359b184dee0c7052be482857bf562de67e4928) ([#13678](https://github.com/yt-dlp/yt-dlp/issues/13678)) by [bashonly](https://github.com/bashonly) +- **aenetworks**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/5f951ce929b56a822514f1a02cc06af030855ec7) ([#13747](https://github.com/yt-dlp/yt-dlp/issues/13747)) by [bashonly](https://github.com/bashonly) +- **archive.org**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf) ([#13706](https://github.com/yt-dlp/yt-dlp/issues/13706)) by [rdamas](https://github.com/rdamas) +- **bandaichannel**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/23e9389f936ec5236a87815b8576e5ce567b2f77) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **bandcamp**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/f9dff95cb1c138913011417b3bba020c0a691bba) ([#13480](https://github.com/yt-dlp/yt-dlp/issues/13480)) by [WouterGordts](https://github.com/WouterGordts) +- **bellmedia**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6fb3947c0dc6d0e3eab5077c5bada8402f47a277) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **bilibili**: [Pass newer user-agent with API requests](https://github.com/yt-dlp/yt-dlp/commit/d3edc5d52a7159eda2331dbc7e14bf40a6585c81) ([#13736](https://github.com/yt-dlp/yt-dlp/issues/13736)) by [c-basalt](https://github.com/c-basalt) +- **bilibilibangumi** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7) ([#13800](https://github.com/yt-dlp/yt-dlp/issues/13800)) by [bashonly](https://github.com/bashonly) + - [Fix geo-block detection](https://github.com/yt-dlp/yt-dlp/commit/884f35d54a64f1e6e7be49459842f573fc3a2701) ([#13667](https://github.com/yt-dlp/yt-dlp/issues/13667)) by [bashonly](https://github.com/bashonly) +- **blackboardcollaborate**: [Support subtitles and authwalled videos](https://github.com/yt-dlp/yt-dlp/commit/dcc4cba39e2a79d3efce16afa28dbe245468489f) ([#12473](https://github.com/yt-dlp/yt-dlp/issues/12473)) by [flanter21](https://github.com/flanter21) +- **btvplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae61e0f313dd03a09060abc7a212775c3717818) ([#13541](https://github.com/yt-dlp/yt-dlp/issues/13541)) by [bubo](https://github.com/bubo) +- **ctv**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9f54ea38984788811773ca2ceaca73864acf0e8a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **dangalplay**: [Support other login regions](https://github.com/yt-dlp/yt-dlp/commit/09982bc33e2f1f9a1ff66e6738df44f15b36f6a6) ([#13768](https://github.com/yt-dlp/yt-dlp/issues/13768)) by [bashonly](https://github.com/bashonly) +- **francetv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/ade876efb31d55d3394185ffc56942fdc8d325cc) ([#13726](https://github.com/yt-dlp/yt-dlp/issues/13726)) by [bashonly](https://github.com/bashonly) +- **hotstar** + - [Fix support for free accounts](https://github.com/yt-dlp/yt-dlp/commit/07d1d85f6387e4bdb107096f0131c7054f078bb9) ([#13700](https://github.com/yt-dlp/yt-dlp/issues/13700)) by [chauhantirth](https://github.com/chauhantirth) + - [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9) ([#13727](https://github.com/yt-dlp/yt-dlp/issues/13727)) by [bashonly](https://github.com/bashonly) (With fixes in [ef103b2](https://github.com/yt-dlp/yt-dlp/commit/ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d)) +- **joqrag**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6d39c420f7774562a106d90253e2ed5b75036321) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **limelight**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/5d693446e882931618c40c99bb593f0b87b30eb9) ([#13267](https://github.com/yt-dlp/yt-dlp/issues/13267)) by [doe1080](https://github.com/doe1080) +- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b4b4486effdcb96bb6b8148171a49ff579b69a4a) ([#13717](https://github.com/yt-dlp/yt-dlp/issues/13717)) by [Pawka](https://github.com/Pawka) +- **mir24.tv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7b4c96e0898db048259ef5fdf12ed14e3605dce3) ([#13651](https://github.com/yt-dlp/yt-dlp/issues/13651)) by [swayll](https://github.com/swayll) +- **mixlr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0f33950c778331bf4803c76e8b0ba1862df93431) ([#13561](https://github.com/yt-dlp/yt-dlp/issues/13561)) by [seproDev](https://github.com/seproDev), [ShockedPlot7560](https://github.com/ShockedPlot7560) +- **mlbtv**: [Make formats downloadable with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226) ([#13761](https://github.com/yt-dlp/yt-dlp/issues/13761)) by [bashonly](https://github.com/bashonly) +- **newspicks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2aaf1aa71d174700859c9ec1a81109b78e34961c) ([#13612](https://github.com/yt-dlp/yt-dlp/issues/13612)) by [doe1080](https://github.com/doe1080) +- **nhkradiru**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7c49a937887756efcfa162abdcf17e48c244cb0c) ([#12708](https://github.com/yt-dlp/yt-dlp/issues/12708)) by [garret1317](https://github.com/garret1317) +- **noovo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/d57a0b5aa78d59324b037d37492fe86aa4fbf58a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d88b304d44c599d81acfa4231502270c8b9fe2f8) ([#13712](https://github.com/yt-dlp/yt-dlp/issues/13712)) by [bashonly](https://github.com/bashonly) +- **playerfm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3) ([#13016](https://github.com/yt-dlp/yt-dlp/issues/13016)) by [R0hanW](https://github.com/R0hanW) +- **rai**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c8329fc572903eeed7edad1642773b2268b71a62) ([#13572](https://github.com/yt-dlp/yt-dlp/issues/13572)) by [moonshinerd](https://github.com/moonshinerd), [seproDev](https://github.com/seproDev) +- **raisudtirol**: [Support alternative domain](https://github.com/yt-dlp/yt-dlp/commit/85c3fa1925a9057ef4ae8af682686d5b3eb8e568) ([#13718](https://github.com/yt-dlp/yt-dlp/issues/13718)) by [barsnick](https://github.com/barsnick) +- **skeb**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/060c6a4501a0b8a92f1b9c12788f556d902c83c6) ([#13593](https://github.com/yt-dlp/yt-dlp/issues/13593)) by [doe1080](https://github.com/doe1080) +- **soundcloud**: [Always extract original format extension](https://github.com/yt-dlp/yt-dlp/commit/c1ac543c8166ff031d62e340b3244ca8556e3fb9) ([#13746](https://github.com/yt-dlp/yt-dlp/issues/13746)) by [bashonly](https://github.com/bashonly) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0b41746964e1d0470ac286ce09408940a3a51147) ([#13610](https://github.com/yt-dlp/yt-dlp/issues/13610)) by [bashonly](https://github.com/bashonly) +- **thehighwire**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3a84be9d1660ef798ea28f929a20391bef6afda4) ([#13505](https://github.com/yt-dlp/yt-dlp/issues/13505)) by [swayll](https://github.com/swayll) +- **twitch**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/422cc8cb2ff2bd3b4c2bc64e23507b7e6f522c35) ([#13618](https://github.com/yt-dlp/yt-dlp/issues/13618)) by [bashonly](https://github.com/bashonly) +- **unitednationswebtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/630f3389c33f0f7f6ec97e8917d20aeb4e4078da) ([#13538](https://github.com/yt-dlp/yt-dlp/issues/13538)) by [averageFOSSenjoyer](https://github.com/averageFOSSenjoyer) +- **vimeo** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a5d697f62d8be78ffd472acb2f52c8bc32833003) ([#13692](https://github.com/yt-dlp/yt-dlp/issues/13692)) by [bashonly](https://github.com/bashonly) + - [Handle age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/a6db1d297ab40cc346de24aacbeab93112b2f4e1) ([#13719](https://github.com/yt-dlp/yt-dlp/issues/13719)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Do not require PO Token for premium accounts](https://github.com/yt-dlp/yt-dlp/commit/5b57b72c1a7c6bd249ffcebdf5630761ec664c10) ([#13640](https://github.com/yt-dlp/yt-dlp/issues/13640)) by [coletdjnz](https://github.com/coletdjnz) + - [Ensure context params are consistent for web clients](https://github.com/yt-dlp/yt-dlp/commit/6e5bee418bc108565108153fd745c8e7a59f16dd) ([#13701](https://github.com/yt-dlp/yt-dlp/issues/13701)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract global nsig helper functions](https://github.com/yt-dlp/yt-dlp/commit/fca94ac5d63ed6578b5cd9c8129d97a8a713c39a) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/0e68332bcb9fba87c42805b7a051eeb2bed36206) ([#13659](https://github.com/yt-dlp/yt-dlp/issues/13659)) by [bashonly](https://github.com/bashonly) + - [Log bad playability statuses of player responses](https://github.com/yt-dlp/yt-dlp/commit/aa9f1f4d577e99897ac16cd19d4e217d688ea75d) ([#13647](https://github.com/yt-dlp/yt-dlp/issues/13647)) by [coletdjnz](https://github.com/coletdjnz) + - [Use impersonation for downloading subtitles](https://github.com/yt-dlp/yt-dlp/commit/8820101aa3152e5f4811541c645f8b5de231ba8c) ([#13786](https://github.com/yt-dlp/yt-dlp/issues/13786)) by [bashonly](https://github.com/bashonly) + - tab: [Fix subscriptions feed extraction](https://github.com/yt-dlp/yt-dlp/commit/c23d837b6524d1e7a4595948871ba1708cba4dfa) ([#13665](https://github.com/yt-dlp/yt-dlp/issues/13665)) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **hls**: [Do not fall back to ffmpeg when native is required](https://github.com/yt-dlp/yt-dlp/commit/a7113722ec33f30fc898caee9242af2b82188a53) ([#13655](https://github.com/yt-dlp/yt-dlp/issues/13655)) by [bashonly](https://github.com/bashonly) + +#### Networking changes +- **Request Handler** + - requests + - [Refactor default headers](https://github.com/yt-dlp/yt-dlp/commit/a4561c7a66c39d88efe7ae51e7fa1986faf093fb) ([#13785](https://github.com/yt-dlp/yt-dlp/issues/13785)) by [bashonly](https://github.com/bashonly) + - [Work around partial read dropping data](https://github.com/yt-dlp/yt-dlp/commit/c2ff2dbaec7929015373fe002e9bd4849931a4ce) ([#13599](https://github.com/yt-dlp/yt-dlp/issues/13599)) by [Grub4K](https://github.com/Grub4K) (With fixes in [c316416](https://github.com/yt-dlp/yt-dlp/commit/c316416b972d1b05e58fbcc21e80428b900ce102)) + +#### Misc. changes +- **cleanup** + - [Bump ruff to 0.12.x](https://github.com/yt-dlp/yt-dlp/commit/ca5cce5b07d51efe7310b449cdefeca8d873e9df) ([#13596](https://github.com/yt-dlp/yt-dlp/issues/13596)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [9951fdd](https://github.com/yt-dlp/yt-dlp/commit/9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e) by [adamralph](https://github.com/adamralph), [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080), [hseg](https://github.com/hseg), [InvalidUsernameException](https://github.com/InvalidUsernameException), [seproDev](https://github.com/seproDev) +- **devscripts**: [Fix filename/directory Bash completions](https://github.com/yt-dlp/yt-dlp/commit/99093e96fd6a26dea9d6e4bd1e4b16283b6ad1ee) ([#13620](https://github.com/yt-dlp/yt-dlp/issues/13620)) by [barsnick](https://github.com/barsnick) +- **test**: download: [Support `playlist_maxcount`](https://github.com/yt-dlp/yt-dlp/commit/fd36b8f31bafbd8096bdb92a446a0c9c6081209c) ([#13433](https://github.com/yt-dlp/yt-dlp/issues/13433)) by [InvalidUsernameException](https://github.com/InvalidUsernameException) + ### 2025.06.30 #### Core changes diff --git a/README.md b/README.md index 7a6d1073f4..f1d119317c 100644 --- a/README.md +++ b/README.md @@ -639,9 +639,9 @@ ## Filesystem Options: --no-part Do not use .part files - write directly into output file --mtime Use the Last-modified header to set the file - modification time (default) + modification time --no-mtime Do not use the Last-modified header to set - the file modification time + the file modification time (default) --write-description Write video description to a .description file --no-write-description Do not write video description (default) --write-info-json Write video metadata to a .info.json file diff --git a/supportedsites.md b/supportedsites.md index 8e48135d22..3e0bef4bcf 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -133,7 +133,6 @@ # Supported sites - **BaiduVideo**: 百度视频 - **BanBye** - **BanByeChannel** - - **bandaichannel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:user** @@ -157,7 +156,6 @@ # Supported sites - **Beeg** - **BehindKink**: (**Currently broken**) - **Bellator** - - **BellMedia** - **BerufeTV** - **Bet**: (**Currently broken**) - **bfi:player**: (**Currently broken**) @@ -197,6 +195,7 @@ # Supported sites - **BitChute** - **BitChuteChannel** - **BlackboardCollaborate** + - **BlackboardCollaborateLaunch** - **BleacherReport**: (**Currently broken**) - **BleacherReportCMS**: (**Currently broken**) - **blerp** @@ -225,6 +224,7 @@ # Supported sites - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen + - **BTVPlus** - **Bundesliga** - **Bundestag** - **BunnyCdn** @@ -317,7 +317,6 @@ # Supported sites - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **cu.ntv.co.jp**: 日テレ無料TADA! - **CultureUnplugged** @@ -652,7 +651,6 @@ # Supported sites - **jiosaavn:​show:playlist** - **jiosaavn:song** - **Joj** - - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) - **Jove** - **JStream** - **JTBC**: jtbc.co.kr @@ -723,9 +721,6 @@ # Supported sites - **life:embed** - **likee** - **likee:user** - - **limelight** - - **limelight:channel** - - **limelight:channel_list** - **LinkedIn**: [*linkedin*](## "netrc machine") - **linkedin:events**: [*linkedin*](## "netrc machine") - **linkedin:learning**: [*linkedin*](## "netrc machine") @@ -807,6 +802,7 @@ # Supported sites - **minds:channel** - **minds:group** - **Minoto** + - **mir24.tv** - **mirrativ** - **mirrativ:user** - **MirrorCoUK** @@ -817,6 +813,8 @@ # Supported sites - **mixcloud** - **mixcloud:playlist** - **mixcloud:user** + - **Mixlr** + - **MixlrRecoring** - **MLB** - **MLBArticle** - **MLBTV**: [*mlb*](## "netrc machine") @@ -973,7 +971,6 @@ # Supported sites - **NoicePodcast** - **NonkTube** - **NoodleMagazine** - - **Noovo** - **NOSNLArticle** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** @@ -1097,6 +1094,7 @@ # Supported sites - **Platzi**: [*platzi*](## "netrc machine") - **PlatziCourse**: [*platzi*](## "netrc machine") - **player.sky.it** + - **PlayerFm** - **playeur** - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlaySuisse**: [*playsuisse*](## "netrc machine") @@ -1472,11 +1470,12 @@ # Supported sites - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - **TF1** - - **TFO** + - **TFO**: (**Currently broken**) - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") - **TheGuardianPodcast** - **TheGuardianPodcastPlaylist** + - **TheHighWire** - **TheHoleTv** - **TheIntercept** - **ThePlatform** @@ -1544,8 +1543,8 @@ # Supported sites - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - **TV5MONDE** - - **tv5unis** - - **tv5unis:video** + - **tv5unis**: (**Currently broken**) + - **tv5unis:video**: (**Currently broken**) - **tv8.it** - **tv8.it:live**: TV8 Live - **tv8.it:playlist**: TV8 Playlist @@ -1600,6 +1599,7 @@ # Supported sites - **UlizaPortal**: ulizaportal.jp - **umg:de**: Universal Music Deutschland - **Unistra** + - **UnitedNationsWebTv** - **Unity**: (**Currently broken**) - **uol.com.br** - **uplynk** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 451fee7164..868429ffb2 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.06.30' +__version__ = '2025.07.21' -RELEASE_GIT_HEAD = 'b0187844988e557c7e1e6bb1aabd4c1176768d86' +RELEASE_GIT_HEAD = '9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.06.30' +_pkg_version = '2025.07.21' From 3e918d825d7ff367812658957b281b8cda8f9ebb Mon Sep 17 00:00:00 2001 From: Roland Crosby Date: Tue, 22 Jul 2025 13:50:42 -0400 Subject: [PATCH 49/81] [pp/XAttrMetadata] Add macOS "Where from" attribute (#12664) Authored by: rolandcrosby --- yt_dlp/postprocessor/xattrpp.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index e486b797b7..fd83d783ba 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -33,8 +33,17 @@ class XAttrMetadataPP(PostProcessor): # (e.g., 4kB on ext4), and we don't want to have the other ones fail 'user.dublincore.description': 'description', # 'user.xdg.comment': 'description', + 'com.apple.metadata:kMDItemWhereFroms': 'webpage_url', } + APPLE_PLIST_TEMPLATE = ''' + + + +\t%s + +''' + def run(self, info): mtime = os.stat(info['filepath']).st_mtime self.to_screen('Writing metadata to file\'s xattrs') @@ -44,6 +53,8 @@ def run(self, info): if value: if infoname == 'upload_date': value = hyphenate_date(value) + elif xattrname == 'com.apple.metadata:kMDItemWhereFroms': + value = self.APPLE_PLIST_TEMPLATE % value write_xattr(info['filepath'], xattrname, value.encode()) except XAttrUnavailableError as e: From eed94c7306d4ecdba53ad8783b1463a9af5c97f1 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 22 Jul 2025 20:10:51 +0200 Subject: [PATCH 50/81] [utils] Add `WINDOWS_VT_MODE` to globals (#12460) Authored by: Grub4K --- test/test_compat.py | 3 --- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/globals.py | 2 ++ yt_dlp/utils/_utils.py | 10 +++------- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index b1cc2a8187..3aa9c0c518 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -21,9 +21,6 @@ def test_compat_passthrough(self): with self.assertWarns(DeprecationWarning): _ = compat.compat_basestring - with self.assertWarns(DeprecationWarning): - _ = compat.WINDOWS_VT_MODE - self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 76fd18c338..a9f347bf4a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -36,6 +36,7 @@ from .globals import ( IN_CLI, LAZY_EXTRACTORS, + WINDOWS_VT_MODE, plugin_ies, plugin_ies_overrides, plugin_pps, @@ -4040,8 +4041,7 @@ def get_encoding(stream): if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') if not supports_terminal_sequences(stream): - from .utils import WINDOWS_VT_MODE # Must be imported locally - additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI') if additional_info: ret = f'{ret} ({",".join(additional_info)})' return ret diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index dae2c14592..2f3e35d4a8 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -37,7 +37,7 @@ from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..networking.exceptions import HTTPError as compat_HTTPError -passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) +passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',)) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE diff --git a/yt_dlp/globals.py b/yt_dlp/globals.py index 0cf276cc9e..81ad004480 100644 --- a/yt_dlp/globals.py +++ b/yt_dlp/globals.py @@ -1,3 +1,4 @@ +import os from collections import defaultdict # Please Note: Due to necessary changes and the complex nature involved in the plugin/globals system, @@ -28,3 +29,4 @@ def __repr__(self, /): # Misc IN_CLI = Indirect(False) LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled +WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 7d79f417fa..1cb62712ba 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -52,7 +52,7 @@ compat_HTMLParseError, ) from ..dependencies import xattr -from ..globals import IN_CLI +from ..globals import IN_CLI, WINDOWS_VT_MODE __name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module @@ -4759,13 +4759,10 @@ def jwt_decode_hs256(jwt): return json.loads(base64.urlsafe_b64decode(f'{payload_b64}===')) -WINDOWS_VT_MODE = False if os.name == 'nt' else None - - @functools.cache def supports_terminal_sequences(stream): if os.name == 'nt': - if not WINDOWS_VT_MODE: + if not WINDOWS_VT_MODE.value: return False elif not os.getenv('TERM'): return False @@ -4802,8 +4799,7 @@ def windows_enable_vt_mode(): finally: os.close(handle) - global WINDOWS_VT_MODE - WINDOWS_VT_MODE = True + WINDOWS_VT_MODE.value = True supports_terminal_sequences.cache_clear() From c59ad2b066bbccd3cc4eed580842f961bce7dd4a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:34:03 -0500 Subject: [PATCH 51/81] [utils] `random_user_agent`: Bump versions (#13543) Closes #5362 Authored by: bashonly --- yt_dlp/extractor/adobepass.py | 8 ++---- yt_dlp/extractor/bilibili.py | 7 ----- yt_dlp/extractor/francaisfacile.py | 13 +-------- yt_dlp/extractor/mitele.py | 2 +- yt_dlp/extractor/sproutvideo.py | 2 +- yt_dlp/extractor/telecinco.py | 13 +-------- yt_dlp/utils/networking.py | 46 +++--------------------------- 7 files changed, 10 insertions(+), 81 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 8c2d9d9340..eb45734ec0 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -48,7 +48,6 @@ 'username_field': 'user', 'password_field': 'passwd', 'login_hostname': 'login.xfinity.com', - 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -1379,11 +1378,8 @@ def _download_webpage_handle(self, *args, **kwargs): @staticmethod def _get_mso_headers(mso_info): - # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - return { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', - } if mso_info.get('needs_newer_ua') else {} + # Not needed currently + return {} @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2846702f6a..d00ac63176 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,13 +175,6 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): else: note = f'Downloading video formats for cid {cid}' - # TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735 - # playurl requests carrying old UA will be rejected - headers = { - 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36', - **(headers or {}), - } - return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] diff --git a/yt_dlp/extractor/francaisfacile.py b/yt_dlp/extractor/francaisfacile.py index d3208c2828..c432cf486c 100644 --- a/yt_dlp/extractor/francaisfacile.py +++ b/yt_dlp/extractor/francaisfacile.py @@ -1,9 +1,7 @@ import urllib.parse from .common import InfoExtractor -from ..networking.exceptions import HTTPError from ..utils import ( - ExtractorError, float_or_none, url_or_none, ) @@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor): def _real_extract(self, url): display_id = urllib.parse.unquote(self._match_id(url)) - - try: # yt-dlp's default user-agents are too old and blocked by the site - webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient - webpage = self._download_webpage(url, display_id, impersonate=True) + webpage = self._download_webpage(url, display_id) data = self._search_json( r']+\bdata-media-id=[^>]+\btype="application/json"[^>]*>', diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 0dded38c65..76fef337a2 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -79,7 +79,7 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) pre_player = self._search_json( r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', webpage, 'Pre Player', display_id)['prePlayer'] diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 494042738d..4afa838715 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -99,7 +99,7 @@ def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) webpage = self._download_webpage( - url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True) + url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index 2dbe2a7768..a34f2afd4a 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -63,17 +63,6 @@ def _parse_content(self, content, url): 'http_headers': headers, } - def _download_akamai_webpage(self, url, display_id): - try: # yt-dlp's default user-agents are too old and blocked by akamai - return self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient to bypass akamai - return self._download_webpage(url, display_id, impersonate=True) - class TelecincoIE(TelecincoBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' @@ -151,7 +140,7 @@ class TelecincoIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) article = self._search_json( r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', webpage, 'article', display_id)['article'] diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index 9fcab6456f..467312ce75 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -15,48 +15,10 @@ def random_user_agent(): - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '90.0.4430.212', - '90.0.4430.24', - '90.0.4430.70', - '90.0.4430.72', - '90.0.4430.85', - '90.0.4430.93', - '91.0.4472.101', - '91.0.4472.106', - '91.0.4472.114', - '91.0.4472.124', - '91.0.4472.164', - '91.0.4472.19', - '91.0.4472.77', - '92.0.4515.107', - '92.0.4515.115', - '92.0.4515.131', - '92.0.4515.159', - '92.0.4515.43', - '93.0.4556.0', - '93.0.4577.15', - '93.0.4577.63', - '93.0.4577.82', - '94.0.4606.41', - '94.0.4606.54', - '94.0.4606.61', - '94.0.4606.71', - '94.0.4606.81', - '94.0.4606.85', - '95.0.4638.17', - '95.0.4638.50', - '95.0.4638.54', - '95.0.4638.69', - '95.0.4638.74', - '96.0.4664.18', - '96.0.4664.45', - '96.0.4664.55', - '96.0.4664.93', - '97.0.4692.20', - ) - return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36' + # Target versions released within the last ~6 months + CHROME_MAJOR_VERSION_RANGE = (132, 138) + return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0') class HTTPHeaderDict(dict): From 59765ecbc08d18005de7143fbb1d1caf90239471 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:46:46 -0500 Subject: [PATCH 52/81] [ie/sproutvideo] Fix extractor (#13813) Authored by: bashonly --- yt_dlp/extractor/sproutvideo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 4afa838715..ff9dc7dee2 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -101,8 +101,8 @@ def _real_extract(self, url): webpage = self._download_webpage( url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( - r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, - contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', + r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info', + video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) # SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e] From 7e3f48d64d237281a97b3df1a61980c78a0302fe Mon Sep 17 00:00:00 2001 From: Atsushi2965 <142886283+atsushi2965@users.noreply.github.com> Date: Wed, 23 Jul 2025 06:55:00 +0900 Subject: [PATCH 53/81] [pp/EmbedThumbnail] Fix ffmpeg args for embedding in mp3 (#13720) Authored by: atsushi2965 --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index d8ba220cab..39e8826c6f 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -90,7 +90,7 @@ def run(self, info): if info['ext'] == 'mp3': options = [ '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', - '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] + '-metadata:s:v', 'title=Album cover', '-metadata:s:v', 'comment=Cover (front)'] self._report_run('ffmpeg', filename) self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) From afaf60d9fd5a0c7a85aeb1374fd97fbc13cd652c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 23 Jul 2025 18:27:20 -0500 Subject: [PATCH 54/81] [ie/vimeo] Fix login support and require authentication (#13823) Closes #13822 Authored by: bashonly --- README.md | 2 +- yt_dlp/extractor/vimeo.py | 59 +++++++++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index f1d119317c..e5bd21b9ca 100644 --- a/README.md +++ b/README.md @@ -1902,7 +1902,7 @@ #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) #### vimeo -* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens * `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 7ffe89f227..c45264bb52 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -49,7 +49,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' 'with the URL of the page that embeds this video.') - _DEFAULT_CLIENT = 'android' + _DEFAULT_CLIENT = 'web' _DEFAULT_AUTHED_CLIENT = 'web' _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', @@ -58,7 +58,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): _CLIENT_CONFIGS = { 'android': { 'CACHE_KEY': 'oauth-token-android', - 'CACHE_ONLY': False, + 'CACHE_ONLY': True, 'VIEWER_JWT': False, 'REQUIRES_AUTH': False, 'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==', @@ -88,6 +88,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): ), }, 'web': { + 'CACHE_ONLY': False, 'VIEWER_JWT': True, 'REQUIRES_AUTH': True, 'USER_AGENT': None, @@ -142,7 +143,6 @@ def _perform_login(self, username, password): 'service': 'vimeo', 'token': viewer['xsrft'], } - self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_webpage( self._LOGIN_URL, None, 'Logging in', @@ -151,16 +151,40 @@ def _perform_login(self, username, password): 'Referer': self._LOGIN_URL, }) except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 418: + if isinstance(e.cause, HTTPError) and e.cause.status in (405, 418): raise ExtractorError( 'Unable to log in: bad username or password', expected=True) raise ExtractorError('Unable to log in') + # Clear unauthenticated viewer info + self._viewer_info = None + def _real_initialize(self): - if self._LOGIN_REQUIRED and not self._is_logged_in: + if self._is_logged_in: + return + + if self._LOGIN_REQUIRED: self.raise_login_required() + if self._DEFAULT_CLIENT != 'web': + return + + for client_name, client_config in self._CLIENT_CONFIGS.items(): + if not client_config['CACHE_ONLY']: + continue + + cache_key = client_config['CACHE_KEY'] + if cache_key not in self._oauth_tokens: + if token := self.cache.load(self._NETRC_MACHINE, cache_key): + self._oauth_tokens[cache_key] = token + + if self._oauth_tokens.get(cache_key): + self._DEFAULT_CLIENT = client_name + self.write_debug( + f'Found cached {client_name} token; using {client_name} as default API client') + return + def _get_video_password(self): password = self.get_param('videopassword') if password is None: @@ -200,9 +224,6 @@ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): if vimeo_config: return self._parse_json(vimeo_config, video_id) - def _set_vimeo_cookie(self, name, value): - self._set_cookie('vimeo.com', name, value) - def _parse_config(self, config, video_id): video_data = config['video'] video_title = video_data.get('title') @@ -363,22 +384,26 @@ def _fetch_oauth_token(self, client): return f'Bearer {self._oauth_tokens[cache_key]}' def _get_requested_client(self): - default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT + if client := self._configuration_arg('client', [None], ie_key=VimeoIE)[0]: + if client not in self._CLIENT_CONFIGS: + raise ExtractorError( + f'Unsupported API client "{client}" requested. ' + f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + self.write_debug( + f'Using {client} API client as specified by extractor argument', only_once=True) + return client - client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0] - if client not in self._CLIENT_CONFIGS: - raise ExtractorError( - f'Unsupported API client "{client}" requested. ' - f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + if self._is_logged_in: + return self._DEFAULT_AUTHED_CLIENT - return client + return self._DEFAULT_CLIENT def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): client = force_client or self._get_requested_client() client_config = self._CLIENT_CONFIGS[client] if client_config['REQUIRES_AUTH'] and not self._is_logged_in: - self.raise_login_required(f'The {client} client requires authentication') + self.raise_login_required(f'The {client} client only works when logged-in') return self._download_json( join_nonempty( @@ -1192,7 +1217,6 @@ def _try_album_password(self, url): raise ExtractorError( 'This album is protected by a password, use the --video-password option', expected=True) - self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_json( f'https://vimeo.com/showcase/{album_id}/auth', @@ -1589,7 +1613,6 @@ def _real_extract(self, url): raise ExtractorError( 'This album is protected by a password, use the --video-password option', expected=True) - self._set_vimeo_cookie('vuid', viewer['vuid']) try: hashed_pass = self._download_json( f'https://vimeo.com/showcase/{album_id}/auth', From 0adeb1e54b2d7e95cd19999e71013877850f8f41 Mon Sep 17 00:00:00 2001 From: ischmidt20 Date: Thu, 24 Jul 2025 18:35:48 -0400 Subject: [PATCH 55/81] [ie/tbs] Fix truTV support (#9683) Closes #3400 Authored by: ischmidt20, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/tbs.py | 113 ++++++++++++++++++++++++++------ yt_dlp/extractor/trutv.py | 71 -------------------- yt_dlp/extractor/turner.py | 5 ++ 4 files changed, 97 insertions(+), 93 deletions(-) delete mode 100644 yt_dlp/extractor/trutv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 59a61e0604..1aa2927f8f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2166,7 +2166,6 @@ from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE -from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import ( TubeTuGrazIE, diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index 80534731e1..f8891671f1 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -5,45 +5,110 @@ from ..utils import ( float_or_none, int_or_none, + make_archive_id, strip_or_none, ) +from ..utils.traversal import traverse_obj class TBSIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?Ptbs|tntdrama)\.com(?P/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P[^/?#]+))' + _SITE_INFO = { + 'tbs': ('TBS', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg'), + 'tntdrama': ('TNT', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA'), + 'trutv': ('truTV', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'), + } + _VALID_URL = fr'''(?x) + https?://(?:www\.)?(?P{"|".join(map(re.escape, _SITE_INFO))})\.com + (?P/(?: + (?Pwatch(?:tnt|tbs|trutv))| + movies|shows/[^/?#]+/(?:clips|season-\d+/episode-\d+) + )/(?P[^/?#]+)) + ''' _TESTS = [{ - 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', + 'url': 'https://www.tbs.com/shows/american-dad/season-6/episode-12/you-debt-your-life', 'info_dict': { - 'id': '8d384cde33b89f3a43ce5329de42903ed5099887', + 'id': '984bdcd8db0cc00dc699927f2a411c8c6e0e48f3', 'ext': 'mp4', - 'title': 'Monster', - 'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.', - 'timestamp': 1508175329, - 'upload_date': '20171016', + 'title': 'You Debt Your Life', + 'description': 'md5:f211cfeb9187fd3cdb53eb0e8930d499', + 'duration': 1231.0, + 'thumbnail': r're:https://images\.tbs\.com/tbs/.+\.(?:jpe?g|png)', + 'chapters': 'count:4', + 'season': 'Season 6', + 'season_number': 6, + 'episode': 'Episode 12', + 'episode_number': 12, + 'timestamp': 1478276239, + 'upload_date': '20161104', }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.tntdrama.com/shows/the-librarians-the-next-chapter/season-1/episode-10/and-going-medieval', + 'info_dict': { + 'id': 'e487b31b663a8001864f62fd20907782f7b8ccb8', + 'ext': 'mp4', + 'title': 'And Going Medieval', + 'description': 'md5:5aed0ae23a6cf148a02fe3c1be8359fa', + 'duration': 2528.0, + 'thumbnail': r're:https://images\.tntdrama\.com/tnt/.+\.(?:jpe?g|png)', + 'chapters': 'count:7', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 10', + 'episode_number': 10, + 'timestamp': 1743107520, + 'upload_date': '20250327', }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/season-1/episode-1/got-the-bug-out', + 'info_dict': { + 'id': 'b457dd7458fd9e64b596355950b13a1ca799dc39', + 'ext': 'mp4', + 'title': 'Got the Bug Out', + 'description': 'md5:9eeddf6248f73517b0e5969b8a43c025', + 'duration': 1283.0, + 'thumbnail': r're:https://images\.trutv\.com/tru/.+\.(?:jpe?g|png)', + 'chapters': 'count:4', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1570040829, + 'upload_date': '20191002', + '_old_archive_ids': ['trutv b457dd7458fd9e64b596355950b13a1ca799dc39'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', + 'only_matching': True, }, { 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', 'only_matching': True, }, { 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope', 'only_matching': True, + }, { + 'url': 'https://www.trutv.com/shows/impractical-jokers/season-9/episode-1/you-dirty-dog', + 'only_matching': True, + }, { + 'url': 'https://www.trutv.com/watchtrutv/east', + 'only_matching': True, + }, { + 'url': 'https://www.tbs.com/watchtbs/east', + 'only_matching': True, + }, { + 'url': 'https://www.tntdrama.com/watchtnt/east', + 'only_matching': True, }] - _SOFTWARE_STATEMENT_MAP = { - 'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg', - 'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA', - } def _real_extract(self, url): - site, path, display_id = self._match_valid_url(url).groups() + site, path, display_id, watch = self._match_valid_url(url).group('site', 'path', 'id', 'watch') + is_live = bool(watch) webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r']+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})', - webpage, 'drupal setting'), display_id) - is_live = 'watchtnt' in path or 'watchtbs' in path + drupal_settings = self._search_json( + r']+\bdata-drupal-selector="drupal-settings-json"[^>]*>', + webpage, 'drupal settings', display_id) video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path) media_id = video_data['mediaID'] @@ -51,10 +116,14 @@ def _real_extract(self, url): tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse( drupal_settings['ngtv_token_url']).query) + auth_info = traverse_obj(drupal_settings, ('top2', {dict})) or {} + site_name = auth_info.get('siteName') or self._SITE_INFO[site][0] + software_statement = auth_info.get('softwareStatement') or self._SITE_INFO[site][1] + info = self._extract_ngtv_info( - media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], { + media_id, tokenizer_query, software_statement, { 'url': url, - 'site_name': site[:3].upper(), + 'site_name': site_name, 'auth_required': video_data.get('authRequired') == '1' or is_live, 'is_live': is_live, }) @@ -87,4 +156,6 @@ def _real_extract(self, url): 'thumbnails': thumbnails, 'is_live': is_live, }) + if site == 'trutv': + info['_old_archive_ids'] = [make_archive_id(site, media_id)] return info diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py deleted file mode 100644 index c1d0cb0d14..0000000000 --- a/yt_dlp/extractor/trutv.py +++ /dev/null @@ -1,71 +0,0 @@ -from .turner import TurnerBaseIE -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class TruTVIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P[0-9A-Za-z-]+)/(?:videos/(?P[0-9A-Za-z-]+)|(?P\d+))' - _TEST = { - 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html', - 'info_dict': { - 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1', - 'ext': 'mp4', - 'title': 'Sunlight-Activated Flower', - 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.", - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q' - - def _real_extract(self, url): - series_slug, clip_slug, video_id = self._match_valid_url(url).groups() - - if video_id: - path = 'episode' - display_id = video_id - else: - path = 'series/clip' - display_id = clip_slug - - data = self._download_json( - f'https://api.trutv.com/v2/web/{path}/{series_slug}/{display_id}', - display_id) - video_data = data['episode'] if video_id else data['info'] - media_id = video_data['mediaId'] - title = video_data['title'].strip() - - info = self._extract_ngtv_info( - media_id, {}, self._SOFTWARE_STATEMENT, { - 'url': url, - 'site_name': 'truTV', - 'auth_required': video_data.get('isAuthRequired'), - }) - - thumbnails = [] - for image in video_data.get('images', []): - image_url = image.get('srcUrl') - if not image_url: - continue - thumbnails.append({ - 'url': image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) - - info.update({ - 'id': media_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnails': thumbnails, - 'timestamp': parse_iso8601(video_data.get('publicationDate')), - 'series': video_data.get('showTitle'), - 'season_number': int_or_none(video_data.get('seasonNum')), - 'episode_number': int_or_none(video_data.get('episodeNum')), - }) - return info diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 4493705e99..a1a7fd6906 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -251,6 +251,11 @@ def _extract_ngtv_info(self, media_id, tokenizer_query, software_statement, ap_d 'end_time': start_time + chapter_duration, }) + if is_live: + for f in formats: + # Prevent ffmpeg from adding its own http headers or else we get HTTP Error 403 + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-icy', '0']} + return { 'formats': formats, 'chapters': chapters, From 485de69dbfeb7de7bcf9f7fe16d6c6ba9e81e1a0 Mon Sep 17 00:00:00 2001 From: Barry van Oudtshoorn Date: Fri, 25 Jul 2025 12:00:31 +0800 Subject: [PATCH 56/81] [ie/Parlview] Rework extractor (#13788) Closes #13787 Authored by: barryvan --- yt_dlp/extractor/parlview.py | 80 ++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index b93b5edacd..9c7efc58f4 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -1,63 +1,63 @@ +import re + from .common import InfoExtractor -from ..utils import ( - int_or_none, - try_get, - unified_timestamp, -) +from ..utils import parse_duration, parse_iso8601, url_or_none +from ..utils.traversal import traverse_obj class ParlviewIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P\d{6})' + _VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P[^/?#]+)' _TESTS = [{ - 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661', + 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614', 'info_dict': { - 'id': '542661', + 'id': '3406614', 'ext': 'mp4', - 'title': "Australia's Family Law System [Part 2]", - 'duration': 5799, - 'description': 'md5:7099883b391619dbae435891ca871a62', - 'timestamp': 1621430700, - 'upload_date': '20210519', - 'uploader': 'Joint Committee', + 'title': 'Senate Chamber', + 'description': 'Official Recording of Senate Proceedings from the Australian Parliament', + 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg', + 'upload_date': '20250325', + 'duration': 17999, + 'timestamp': 1742939400, }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', - 'only_matching': True, + 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv', + 'info_dict': { + 'id': 'SV1394.dv', + 'ext': 'mp4', + 'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]', + 'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament', + 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg', + 'upload_date': '19960822', + 'duration': 14765, + 'timestamp': 840754200, + }, + 'params': { + 'skip_download': True, + }, }] - _API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json' - _MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - media = self._download_json(self._API_URL % video_id, video_id).get('media') - timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/' + video_details = self._download_json( + f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails'] - stream = try_get(media, lambda x: x['renditions'][0], dict) - if not stream: - self.raise_no_formats('No streams were detected') - elif stream.get('streamType') != 'VOD': - self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType')))) - formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native') + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + video_details['files']['file']['url'], video_id, 'mp4') - media_info = self._download_webpage( - self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False) + DURATION_RE = re.compile(r'(?P\d+:\d+:\d+):\d+') return { 'id': video_id, - 'url': url, - 'title': self._html_search_regex(r'

([^<]+)<', webpage, 'title', fatal=False), 'formats': formats, - 'duration': int_or_none(media.get('duration')), - 'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')), - 'description': self._html_search_regex( - r']+class="descripti?on"[^>]*>[^>]+[^>]+>[^>]+>([^<]+)', - webpage, 'description', fatal=False), - 'uploader': self._html_search_regex( - r'[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False), - 'thumbnail': media.get('staticImage'), + 'subtitles': subtitles, + **traverse_obj(video_details, { + 'title': (('parlViewTitle', 'title'), {str}, any), + 'description': ('parlViewDescription', {str}), + 'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}), + 'timestamp': ('recordingFrom', {parse_iso8601}), + 'thumbnail': ('thumbUrl', {url_or_none}), + }), } From 4385480795acda35667be008d0bf26b46e9d65b4 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sat, 26 Jul 2025 03:41:21 +0900 Subject: [PATCH 57/81] [utils] `parse_resolution`: Support width-only pattern (#13802) Authored by: doe1080 --- test/test_utils.py | 1 + yt_dlp/utils/_utils.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index aedb565ec1..44747efda6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1373,6 +1373,7 @@ def test_parse_resolution(self): self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 1cb62712ba..a5471da4df 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1875,6 +1875,11 @@ def parse_resolution(s, *, lenient=False): if mobj: return {'height': int(mobj.group(1)) * 540} + if lenient: + mobj = re.search(r'(? Date: Fri, 25 Jul 2025 20:55:41 +0200 Subject: [PATCH 58/81] [ie/PlyrEmbed] Add extractor (#13836) Closes #13827 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/plyr.py | 104 ++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 yt_dlp/extractor/plyr.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1aa2927f8f..a2042557d7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1568,6 +1568,7 @@ ) from .plutotv import PlutoTVIE from .plvideo import PlVideoIE +from .plyr import PlyrEmbedIE from .podbayfm import ( PodbayFMChannelIE, PodbayFMIE, diff --git a/yt_dlp/extractor/plyr.py b/yt_dlp/extractor/plyr.py new file mode 100644 index 0000000000..c5f27cfd95 --- /dev/null +++ b/yt_dlp/extractor/plyr.py @@ -0,0 +1,104 @@ +import re + +from .common import InfoExtractor +from .vimeo import VimeoIE + + +class PlyrEmbedIE(InfoExtractor): + _VALID_URL = False + _WEBPAGE_TESTS = [{ + # data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1" + 'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/', + 'info_dict': { + 'id': '522319456', + 'ext': 'mp4', + 'title': '200.000.000 Mouths (1950–51)', + 'uploader': 'Zeughauskino', + 'uploader_url': '', + 'comment_count': int, + 'like_count': int, + 'duration': 963, + 'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d', + 'timestamp': 1615467405, + 'upload_date': '20210311', + 'release_timestamp': 1615467405, + 'release_date': '20210311', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + # data-plyr-provider="vimeo" data-plyr-embed-id="803435276" + 'url': 'https://www.inarcassa.it/', + 'info_dict': { + 'id': '803435276', + 'ext': 'mp4', + 'title': 'HOME_Moto_Perpetuo', + 'uploader': 'Inarcassa', + 'uploader_url': '', + 'duration': 38, + 'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + # data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI" + 'url': 'https://www.profile.nl', + 'info_dict': { + 'id': 'GF-BjYKoAqI', + 'ext': 'mp4', + 'title': 'PROFILE: Recruitment Profile', + 'description': '', + 'media_type': 'video', + 'uploader': 'Profile Nederland', + 'uploader_id': '@profilenederland', + 'uploader_url': 'https://www.youtube.com/@profilenederland', + 'channel': 'Profile Nederland', + 'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg', + 'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg', + 'channel_follower_count': int, + 'view_count': int, + 'like_count': int, + 'age_limit': 0, + 'duration': 39, + 'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg', + 'categories': ['Autos & Vehicles'], + 'tags': [], + 'timestamp': 1675692990, + 'upload_date': '20230206', + 'playable_in_embed': True, + 'availability': 'public', + 'live_status': 'not_live', + }, + }, { + # data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube" + 'url': 'https://www.vnis.edu.vn', + 'info_dict': { + 'id': 'vnis.edu', + 'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ', + 'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e', + 'age_limit': 0, + 'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png', + 'timestamp': 1753233356, + 'upload_date': '20250723', + }, + 'playlist_count': 3, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + plyr_embeds = re.finditer(r'''(?x) + ]+(?: + data-plyr-embed-id="(?P[^"]+)"[^>]+data-plyr-provider="(?P[^"]+)"| + data-plyr-provider="(?P[^"]+)"[^>]+data-plyr-embed-id="(?P[^"]+)" + )[^>]*>''', webpage) + for mobj in plyr_embeds: + embed_id = mobj.group('id1') or mobj.group('id2') + provider = mobj.group('provider1') or mobj.group('provider2') + if provider == 'vimeo': + if not re.match(r'https?://', embed_id): + embed_id = f'https://player.vimeo.com/video/{embed_id}' + yield VimeoIE._smuggle_referrer(embed_id, url) + elif provider == 'youtube': + if not re.match(r'https?://', embed_id): + embed_id = f'https://youtube.com/watch?v={embed_id}' + yield embed_id From d399505fdf8292332bdc91d33859a0b0d08104fd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 25 Jul 2025 14:44:39 -0500 Subject: [PATCH 59/81] [fd/external] Work around ffmpeg's `file:` URL handling (#13844) Closes #13781 Authored by: bashonly --- yt_dlp/downloader/external.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ee73ac043e..65ed83991b 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -572,7 +572,21 @@ def _call_downloader(self, tmpfilename, info_dict): if end_time: args += ['-t', str(end_time - start_time)] - args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] + url = fmt['url'] + if self.params.get('enable_file_urls') and url.startswith('file:'): + # The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs, + # so only local segments can be read unless we also include 'http,https,tcp,tls' + args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls'] + # ffmpeg incorrectly handles 'file:' URLs by only removing the + # 'file:' prefix and treating the rest as if it's a normal filepath. + # FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs: + # - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:' + # - On *nix, replace 'file://localhost/' with 'file:/' + # Ref: https://github.com/yt-dlp/yt-dlp/issues/13781 + # https://trac.ffmpeg.org/ticket/2702 + url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url) + + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] From 3e609b2cedd285739bf82c7af7853735092070a4 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:33:49 +0000 Subject: [PATCH 60/81] [ie/FaulioLive] Add extractor (#13421) Authored by: CasperMcFadden95, seproDev Co-authored-by: sepro --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/faulio.py | 92 +++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 yt_dlp/extractor/faulio.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a2042557d7..69389671ed 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -640,6 +640,7 @@ FancodeVodIE, ) from .fathom import FathomIE +from .faulio import FaulioLiveIE from .faz import FazIE from .fc2 import ( FC2IE, diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py new file mode 100644 index 0000000000..393023503e --- /dev/null +++ b/yt_dlp/extractor/faulio.py @@ -0,0 +1,92 @@ +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import js_to_json, url_or_none +from ..utils.traversal import traverse_obj + + +class FaulioLiveIE(InfoExtractor): + _DOMAINS = ( + 'aloula.sba.sa', + 'maraya.sba.net.ae', + 'sat7plus.org', + ) + _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P[a-zA-Z0-9-]+)' + _TESTS = [{ + 'url': 'https://aloula.sba.sa/live/saudiatv', + 'info_dict': { + 'id': 'aloula.faulio.com_saudiatv', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://maraya.sba.net.ae/live/1', + 'info_dict': { + 'id': 'maraya.faulio.com_1', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://sat7plus.org/live/pars', + 'info_dict': { + 'id': 'sat7.faulio.com_pars', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://sat7plus.org/fa/live/arabic', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + config_data = self._search_json( + r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json) + api_base = config_data['public']['TRANSLATIONS_API_URL'] + + channel = traverse_obj( + self._download_json(f'{api_base}/channels', video_id), + (lambda k, v: v['url'] == video_id, any)) + + formats = [] + subtitles = {} + if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})): + fmts, subs = self._extract_mpd_formats_and_subtitles( + mpd_url, video_id, mpd_id='dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', + **traverse_obj(channel, { + 'title': ('title', {str}), + 'description': ('description', {str}), + }), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + } From 30302df22b7b431ce920e0f7298cd10be9989967 Mon Sep 17 00:00:00 2001 From: InvalidUsernameException Date: Sat, 26 Jul 2025 00:22:32 +0200 Subject: [PATCH 61/81] [ie/sportdeuschland] Support embedded player URLs (#13833) Closes #13766 Authored by: InvalidUsernameException --- yt_dlp/extractor/sportdeutschland.py | 154 +++++++++++++-------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 8349d96045..0b7d90a071 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -8,84 +8,9 @@ class SportDeutschlandIE(InfoExtractor): - _VALID_URL = r'https?://sportdeutschland\.tv/(?P(?:[^/]+/)?[^?#/&]+)' + _VALID_URL = r'https?://(?:player\.)?sportdeutschland\.tv/(?P(?:[^/?#]+/)?[^?#/&]+)' _TESTS = [{ - 'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga', - 'info_dict': { - 'id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54', - 'ext': 'mp4', - 'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga', - 'display_id': 'blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga', - 'description': 'md5:a288c794a5ee69e200d8f12982f81a87', - 'live_status': 'was_live', - 'channel': 'Blau-Weiss Buchholz Tanzsport', - 'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport', - 'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3', - 'duration': 32447, - 'upload_date': '20230114', - 'timestamp': 1673733618, - }, - 'skip': 'not found', - }, { - 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', - 'info_dict': { - 'id': '95c80c52-6b9a-4ae9-9197-984145adfced', - 'ext': 'mp4', - 'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022', - 'display_id': 'deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', - 'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e', - 'live_status': 'was_live', - 'channel': 'Deutscher Badminton Verband', - 'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband', - 'channel_id': '93ca5866-2551-49fc-8424-6db35af58920', - 'duration': 41097, - 'upload_date': '20220309', - 'timestamp': 1646860727.0, - }, - 'skip': 'not found', - }, { - 'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023', - 'info_dict': { - 'id': '9889785e-55b0-4d97-a72a-ce9a9f157cce', - 'title': 'Formationswochenende Latein 2023 - Samstag', - 'display_id': 'ggcbremen/formationswochenende-latein-2023', - 'description': 'md5:6e4060d40ff6a8f8eeb471b51a8f08b2', - 'live_status': 'was_live', - 'channel': 'Grün-Gold-Club Bremen e.V.', - 'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb', - 'channel_url': 'https://sportdeutschland.tv/ggcbremen', - }, - 'playlist_count': 3, - 'playlist': [{ - 'info_dict': { - 'id': '988e1fea-9d44-4fab-8c72-3085fb667547', - 'ext': 'mp4', - 'channel_url': 'https://sportdeutschland.tv/ggcbremen', - 'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb', - 'channel': 'Grün-Gold-Club Bremen e.V.', - 'duration': 86, - 'title': 'Formationswochenende Latein 2023 - Samstag Part 1', - 'upload_date': '20230225', - 'timestamp': 1677349909, - 'live_status': 'was_live', - }, - }], - 'skip': 'not found', - }, { - 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1', - 'info_dict': { - 'id': '95d71b8a-370a-4b87-ad16-94680da18528', - 'ext': 'mp4', - 'title': r're:Gymnastik International - Tag 1 .+', - 'display_id': 'dtb/gymnastik-international-tag-1', - 'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab', - 'channel': 'Deutscher Turner-Bund', - 'channel_url': 'https://sportdeutschland.tv/dtb', - 'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52', - 'live_status': 'is_live', - }, - 'skip': 'live', - }, { + # Single-part video, direct link 'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', 'md5': '35c11a19395c938cdd076b93bda54cde', 'info_dict': { @@ -100,7 +25,82 @@ class SportDeutschlandIE(InfoExtractor): 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', 'timestamp': 1749913117, 'upload_date': '20250614', + 'duration': 12287.0, }, + }, { + # Single-part video, embedded player link + 'url': 'https://player.sportdeutschland.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc', + 'info_dict': { + 'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a', + 'ext': 'mp4', + 'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates', + 'display_id': '9e9619c4-7d77-43c4-926d-49fb57dc06dc', + 'channel': 'Rostock Griffins', + 'channel_url': 'https://sportdeutschland.tv/rostock-griffins', + 'live_status': 'was_live', + 'description': 'md5:60cb00067e55dafa27b0933a43d72862', + 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', + 'timestamp': 1749913117, + 'upload_date': '20250614', + 'duration': 12287.0, + }, + 'params': {'skip_download': True}, + }, { + # Multi-part video + 'url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2', + 'info_dict': { + 'id': '9f63d737-2444-4e3a-a1ea-840df73fd481', + 'display_id': 'rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2', + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2', + 'description': 'md5:0a17da15e48a687e6019639c3452572b', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', + 'live_status': 'was_live', + }, + 'playlist_count': 2, + 'playlist': [{ + 'info_dict': { + 'id': '9f725a94-d43e-40ff-859d-13da3081bb04', + 'ext': 'mp4', + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 1', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', + 'duration': 14773.0, + 'timestamp': 1753085197, + 'upload_date': '20250721', + 'live_status': 'was_live', + }, + }, { + 'info_dict': { + 'id': '9f725a94-370e-4477-89ac-1751098e3217', + 'ext': 'mp4', + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 2', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', + 'duration': 14773.0, + 'timestamp': 1753128421, + 'upload_date': '20250721', + 'live_status': 'was_live', + }, + }], + }, { + # Livestream + 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1', + 'info_dict': { + 'id': '95d71b8a-370a-4b87-ad16-94680da18528', + 'ext': 'mp4', + 'title': r're:Gymnastik International - Tag 1 .+', + 'display_id': 'dtb/gymnastik-international-tag-1', + 'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab', + 'channel': 'Deutscher Turner-Bund', + 'channel_url': 'https://sportdeutschland.tv/dtb', + 'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52', + 'live_status': 'is_live', + }, + 'skip': 'live', }] def _process_video(self, asset_id, video): From 1fe83b0111277a6f214c5ec1819cfbf943508baf Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sun, 27 Jul 2025 00:34:22 +0900 Subject: [PATCH 62/81] [ie/eagleplatform] Remove extractors (#13469) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 4 - yt_dlp/extractor/eagleplatform.py | 215 ------------------------------ yt_dlp/extractor/generic.py | 32 ----- yt_dlp/extractor/livejournal.py | 1 + 4 files changed, 1 insertion(+), 251 deletions(-) delete mode 100644 yt_dlp/extractor/eagleplatform.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 69389671ed..617c2c5ce0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -571,10 +571,6 @@ DWIE, DWArticleIE, ) -from .eagleplatform import ( - ClipYouEmbedIE, - EaglePlatformIE, -) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py deleted file mode 100644 index 685f8c0590..0000000000 --- a/yt_dlp/extractor/eagleplatform.py +++ /dev/null @@ -1,215 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - smuggle_url, - unsmuggle_url, - url_or_none, -) - - -class EaglePlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - eagleplatform:(?P[^/]+):| - https?://(?P.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= - ) - (?P\d+) - ''' - _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1'] - _TESTS = [{ - # http://lenta.ru/news/2015/03/06/navalny/ - 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '227304', - 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, - }, - }, { - # http://muz-tv.ru/play/7129/ - # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true - 'url': 'eagleplatform:media.clipyou.ru:12820', - 'md5': '358597369cf8ba56675c1df15e7af624', - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'skip': 'Georestricted', - }, { - # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', - 'only_matching': True, - }] - - @classmethod - def _extract_embed_urls(cls, url, webpage): - add_referer = functools.partial(smuggle_url, data={'referrer': url}) - - res = tuple(super()._extract_embed_urls(url, webpage)) - if res: - return map(add_referer, res) - - PLAYER_JS_RE = r''' - ]+ - src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) - .+? - ''' - # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) - mobj = re.search( - rf'''(?xs) - {PLAYER_JS_RE} - ]+ - class=(?P["\'])eagleplayer(?P=qclass)[^>]+ - data-id=["\'](?P\d+) - ''', webpage) - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - # Generalization of "Javascript code usage", "Combined usage" and - # "Usage without attaching to DOM" embeddings (see - # http://dultonmedia.github.io/eplayer/) - mobj = re.search( - r'''(?xs) - %s - - ''' % PLAYER_JS_RE, webpage) # noqa: UP031 - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - - @staticmethod - def _handle_error(response): - status = int_or_none(response.get('status', 200)) - if status != 200: - raise ExtractorError(' '.join(response['errors']), expected=True) - - def _download_json(self, url_or_request, video_id, *args, **kwargs): - try: - response = super()._download_json( - url_or_request, video_id, *args, **kwargs) - except ExtractorError as ee: - if isinstance(ee.cause, HTTPError): - response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id) - self._handle_error(response) - raise - return response - - def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): - return self._download_json(url_or_request, video_id, note)['data'][0] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = self._match_valid_url(url) - host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') - - headers = {} - query = { - 'id': video_id, - } - - referrer = smuggled_data.get('referrer') - if referrer: - headers['Referer'] = referrer - query['referrer'] = referrer - - player_data = self._download_json( - f'http://{host}/api/player_data', video_id, - headers=headers, query=query) - - media = player_data['data']['playlist']['viewports'][0]['medialist'][0] - - title = media['title'] - description = media.get('description') - thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') - duration = int_or_none(media.get('duration')) - view_count = int_or_none(media.get('views')) - - age_restriction = media.get('age_restriction') - age_limit = None - if age_restriction: - age_limit = 0 if age_restriction == 'allow_all' else 18 - - secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') - - formats = [] - - m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - - m3u8_formats_dict = {} - for f in m3u8_formats: - if f.get('height') is not None: - m3u8_formats_dict[f['height']] = f - - mp4_data = self._download_json( - # Secure mp4 URL is constructed according to Player.prototype.mp4 from - # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), - video_id, 'Downloading mp4 JSON', fatal=False) - if mp4_data: - for format_id, format_url in mp4_data.get('data', {}).items(): - if not url_or_none(format_url): - continue - height = int_or_none(format_id) - if height is not None and m3u8_formats_dict.get(height): - f = m3u8_formats_dict[height].copy() - f.update({ - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - else: - f = { - 'format_id': f'http-{format_id}', - 'height': int_or_none(format_id), - } - f['url'] = format_url - formats.append(f) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'age_limit': age_limit, - 'formats': formats, - } - - -class ClipYouEmbedIE(InfoExtractor): - _VALID_URL = False - - @classmethod - def _extract_embed_urls(cls, url, webpage): - mobj = re.search( - r']+src="https?://(?Pmedia\.clipyou\.ru)/index/player\?.*\brecord_id=(?P\d+).*"', webpage) - if mobj is not None: - yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url}) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d9a666f991..652c3b851b 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1010,38 +1010,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # referrer protected EaglePlatform embed - { - 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', - 'info_dict': { - 'id': '582306', - 'ext': 'mp4', - 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 3382, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, - # ClipYou (EaglePlatform) embed (custom URL) - { - 'url': 'http://muz-tv.ru/play/7129/', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'This video is unavailable.', - }, # Pladform embed { 'url': 'http://muz-tv.ru/kinozal/view/7400/', diff --git a/yt_dlp/extractor/livejournal.py b/yt_dlp/extractor/livejournal.py index c61f9bec7a..ee2dfca0d0 100644 --- a/yt_dlp/extractor/livejournal.py +++ b/yt_dlp/extractor/livejournal.py @@ -3,6 +3,7 @@ class LiveJournalIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P\d+)' _TEST = { 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', From e8c2bf798b6707d27fecde66161172da69c7cd72 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:02:56 -0400 Subject: [PATCH 63/81] [ie/neteasemusic] Support XFF (#11044) Closes #11043 Authored by: c-basalt --- yt_dlp/extractor/neteasemusic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 900b8b2a30..6c47086b9b 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -34,7 +34,6 @@ class NetEaseMusicBaseIE(InfoExtractor): 'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac ) _API_BASE = 'http://music.163.com/api/' - _GEO_BYPASS = False def _create_eapi_cipher(self, api_path, query_body, cookies): request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) @@ -64,6 +63,8 @@ def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs): 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}), }), } + if self._x_forwarded_for_ip: + headers.setdefault('X-Real-IP', self._x_forwarded_for_ip) return self._download_json( urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={ From daa1859be1b0e7d123da8b4e0988f2eb7bd47d15 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:11:57 +0000 Subject: [PATCH 64/81] [ie/FaulioLive] Support Bahry TV (#13850) Authored by: CasperMcFadden95 --- yt_dlp/extractor/faulio.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index 393023503e..a5d5c750b4 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -9,6 +9,7 @@ class FaulioLiveIE(InfoExtractor): _DOMAINS = ( 'aloula.sba.sa', + 'bahry.com', 'maraya.sba.net.ae', 'sat7plus.org', ) @@ -25,6 +26,18 @@ class FaulioLiveIE(InfoExtractor): 'params': { 'skip_download': 'Livestream', }, + }, { + 'url': 'https://bahry.com/live/1', + 'info_dict': { + 'id': 'bahry.faulio.com_1', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, }, { 'url': 'https://maraya.sba.net.ae/live/1', 'info_dict': { From 57186f958f164daa50203adcbf7ec74d541151cf Mon Sep 17 00:00:00 2001 From: Tom Hebb Date: Sat, 26 Jul 2025 14:43:38 -0400 Subject: [PATCH 65/81] [fd/hls] Fix `--hls-split-continuity` support (#13321) Authored by: tchebb --- yt_dlp/downloader/hls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 2256305785..58cfbbf163 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -205,7 +205,7 @@ def is_ad_fragment_end(s): line = line.strip() if line: if not line.startswith('#'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if ad_frag_next: continue @@ -231,7 +231,7 @@ def is_ad_fragment_end(s): byte_range = {} elif line.startswith('#EXT-X-MAP'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if frag_index > 0: self.report_error( From 66aa21dc5a3b79059c38f3ad1d05dc9b29187701 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:39:54 -0500 Subject: [PATCH 66/81] [build] Use `macos-14` runner for `macos` builds (#13814) Ref: https://github.blog/changelog/2025-07-11-upcoming-changes-to-macos-hosted-runners-macos-latest-migration-and-xcode-support-policy-updates/#macos-13-is-closing-down Authored by: bashonly --- .github/workflows/build.yml | 4 +++- bundle/pyinstaller.py | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e2411ecfad..b3db8fec1b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -242,7 +242,7 @@ jobs: permissions: contents: read actions: write # For cleaning up cache - runs-on: macos-13 + runs-on: macos-14 steps: - uses: actions/checkout@v4 @@ -261,6 +261,8 @@ jobs: - name: Install Requirements run: | brew install coreutils + # We need to use system Python in order to roll our own universal2 curl_cffi wheel + brew uninstall --ignore-dependencies python3 python3 -m venv ~/yt-dlp-build-venv source ~/yt-dlp-build-venv/bin/activate python3 devscripts/install_deps.py -o --include build diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index c2f6511210..0597f602d0 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -62,16 +62,22 @@ def parse_options(): def exe(onedir): """@returns (name, path)""" + platform_name, machine, extension = { + 'win32': (None, MACHINE, '.exe'), + 'darwin': ('macos', None, None), + }.get(OS_NAME, (OS_NAME, MACHINE, None)) + name = '_'.join(filter(None, ( 'yt-dlp', - {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), - MACHINE, + platform_name, + machine, ))) + return name, ''.join(filter(None, ( 'dist/', onedir and f'{name}/', name, - OS_NAME == 'win32' and '.exe', + extension, ))) From cc5a5caac5fbc0d605b52bde0778d6fd5f97b5ab Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:12:53 -0500 Subject: [PATCH 67/81] Deprecate `darwin_legacy_exe` support (#13857) Ref: https://github.com/yt-dlp/yt-dlp/issues/13856 Authored by: bashonly --- yt_dlp/update.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index de289cb780..f85be2d08f 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -141,6 +141,17 @@ def _get_binary_name(): def _get_system_deprecation(): MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 9) + EXE_MSG_TMPL = ('Support for {} has been deprecated. ' + 'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}') + STOP_MSG = 'You may stop receiving updates on this version at any time!' + variant = detect_variant() + + # Temporary until macos_legacy executable builds are discontinued + if variant == 'darwin_legacy_exe': + return EXE_MSG_TMPL.format( + f'{variant} (the PyInstaller-bundled executable for macOS versions older than 10.15)', + 'issues/13856', STOP_MSG) + if sys.version_info > MIN_RECOMMENDED: return None From 23c658b9cbe34a151f8f921ab1320bb5d4e40a4d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:59:02 -0500 Subject: [PATCH 68/81] Raise minimum recommended Python version to 3.10 (#13859) Ref: https://github.com/yt-dlp/yt-dlp/issues/13858 Authored by: bashonly --- yt_dlp/update.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index f85be2d08f..30cbf538e9 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -139,7 +139,7 @@ def _get_binary_name(): def _get_system_deprecation(): - MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 9) + MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 10) EXE_MSG_TMPL = ('Support for {} has been deprecated. ' 'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}') @@ -161,6 +161,13 @@ def _get_system_deprecation(): if sys.version_info < MIN_SUPPORTED: return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}' + # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 22.04 and Python 3.10 + if variant in ('linux_aarch64_exe', 'linux_armv7l_exe'): + libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2]) + if libc_ver < (2, 35): + return EXE_MSG_TMPL.format('system glibc version < 2.35', 'issues/13858', STOP_MSG) + return None + return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}' From b831406a1d3be34c159835079d12bae624c43610 Mon Sep 17 00:00:00 2001 From: Florentin Le Moal Date: Sun, 27 Jul 2025 21:52:05 +0200 Subject: [PATCH 69/81] [ie/rtve.es:program] Add extractor Authored by: meGAmeS1, seproDev Co-authored-by: sepro --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/rtve.py | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 617c2c5ce0..9445270858 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1781,6 +1781,7 @@ RTVEALaCartaIE, RTVEAudioIE, RTVELiveIE, + RTVEProgramIE, RTVETelevisionIE, ) from .rtvs import RTVSIE diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 2812d93059..c2ccf73ddc 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -6,9 +6,11 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, + InAdvancePagedList, clean_html, determine_ext, float_or_none, + int_or_none, make_archive_id, parse_iso8601, qualities, @@ -371,3 +373,62 @@ def _real_extract(self, url): raise ExtractorError('The webpage doesn\'t contain any video', expected=True) return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key()) + + +class RTVEProgramIE(RTVEBaseIE): + IE_NAME = 'rtve.es:program' + IE_DESC = 'RTVE.es programs' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/(?P[\w-]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.rtve.es/play/videos/saber-vivir/', + 'info_dict': { + 'id': '111570', + 'title': 'Saber vivir - Programa de ciencia y futuro en RTVE Play', + }, + 'playlist_mincount': 400, + }] + _PAGE_SIZE = 60 + + def _fetch_page(self, program_id, page_num): + return self._download_json( + f'https://www.rtve.es/api/programas/{program_id}/videos', + program_id, note=f'Downloading page {page_num}', + query={ + 'type': 39816, + 'page': page_num, + 'size': 60, + }) + + def _entries(self, page_data): + for video in traverse_obj(page_data, ('page', 'items', lambda _, v: url_or_none(v['htmlUrl']))): + yield self.url_result( + video['htmlUrl'], RTVEALaCartaIE, url_transparent=True, + **traverse_obj(video, { + 'id': ('id', {str}), + 'title': ('longTitle', {str}), + 'description': ('shortDescription', {str}), + 'duration': ('duration', {float_or_none(scale=1000)}), + 'series': (('programInfo', 'title'), {str}, any), + 'season_number': ('temporadaOrden', {int_or_none}), + 'season_id': ('temporadaId', {str}), + 'season': ('temporada', {str}), + 'episode_number': ('episode', {int_or_none}), + 'episode': ('title', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + }), + ) + + def _real_extract(self, url): + program_slug = self._match_id(url) + program_page = self._download_webpage(url, program_slug) + + program_id = self._html_search_meta('DC.identifier', program_page, 'Program ID', fatal=True) + + first_page = self._fetch_page(program_id, 1) + page_count = traverse_obj(first_page, ('page', 'totalPages', {int})) or 1 + + entries = InAdvancePagedList( + lambda idx: self._entries(self._fetch_page(program_id, idx + 1) if idx else first_page), + page_count, self._PAGE_SIZE) + + return self.playlist_result(entries, program_id, self._html_extract_title(program_page)) From 682334e4b35112f7a5798decdcb5cb12230ef948 Mon Sep 17 00:00:00 2001 From: fries1234 Date: Sun, 27 Jul 2025 13:26:33 -0700 Subject: [PATCH 70/81] [ie/tvw:news] Add extractor (#12907) Authored by: fries1234 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/tvw.py | 56 +++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9445270858..3eea0cdf6b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2235,6 +2235,7 @@ from .tvplayer import TVPlayerIE from .tvw import ( TvwIE, + TvwNewsIE, TvwTvChannelsIE, ) from .tweakers import TweakersIE diff --git a/yt_dlp/extractor/tvw.py b/yt_dlp/extractor/tvw.py index 0ab926dbdd..74d9b6424b 100644 --- a/yt_dlp/extractor/tvw.py +++ b/yt_dlp/extractor/tvw.py @@ -10,12 +10,15 @@ unified_timestamp, url_or_none, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import find_element, find_elements, traverse_obj class TvwIE(InfoExtractor): IE_NAME = 'tvw' - _VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P[^/?#]+)' + _VALID_URL = [ + r'https?://(?:www\.)?tvw\.org/video/(?P[^/?#]+)', + r'https?://(?:www\.)?tvw\.org/watch/?\?(?:[^#]+&)?eventID=(?P\d+)', + ] _TESTS = [{ 'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/', 'md5': '9ceb94fe2bb7fd726f74f16356825703', @@ -75,6 +78,20 @@ class TvwIE(InfoExtractor): 'display_id': 'washington-to-washington-a-new-space-race-2022041111', 'categories': ['Washington to Washington', 'General Interest'], }, + }, { + 'url': 'https://tvw.org/watch?eventID=2025041235', + 'md5': '7d697c02f110b37d6a47622ea608ca90', + 'info_dict': { + 'id': '2025041235', + 'ext': 'mp4', + 'title': 'Legislative Review - Medicaid Postpartum Bill Sparks Debate & Senate Approves Automatic Voter Registration', + 'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$', + 'description': 'md5:37d0f3a9187ae520aac261b3959eaee6', + 'timestamp': 1745006400, + 'upload_date': '20250418', + 'location': 'Hayner Media Center', + 'categories': ['Legislative Review'], + }, }] def _real_extract(self, url): @@ -125,6 +142,41 @@ def _real_extract(self, url): } +class TvwNewsIE(InfoExtractor): + IE_NAME = 'tvw:news' + _VALID_URL = r'https?://(?:www\.)?tvw\.org/\d{4}/\d{2}/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/', + 'info_dict': { + 'id': 'the-impact-issues-to-watch-in-the-2024-legislative-session', + 'title': 'The Impact - Issues to Watch in the 2024 Legislative Session', + 'description': 'md5:65f0b33ec8f18ff1cd401c5547aa5441', + }, + 'playlist_count': 6, + }, { + 'url': 'https://tvw.org/2024/06/the-impact-water-rights-and-the-skookumchuck-dam-debate/', + 'info_dict': { + 'id': 'the-impact-water-rights-and-the-skookumchuck-dam-debate', + 'title': 'The Impact - Water Rights and the Skookumchuck Dam Debate', + 'description': 'md5:185f3a2350ef81e3fa159ac3e040a94b', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + video_ids = traverse_obj(webpage, ( + {find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid')) + + return self.playlist_from_matches( + video_ids, playlist_id, + playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'), + playlist_description=self._og_search_description(webpage, default=None), + getter=lambda x: f'https://tvw.org/watch?eventID={x}', ie=TvwIE) + + class TvwTvChannelsIE(InfoExtractor): IE_NAME = 'tvw:tvchannels' _VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P[^/?#]+)' From 28b68f687561468e0c664dcb430707458970019f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 29 Jul 2025 14:47:28 -0500 Subject: [PATCH 71/81] [cookies] Load cookies with float `expires` timestamps (#13873) Authored by: bashonly --- yt_dlp/cookies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 5675445ace..459a4b7de0 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1335,7 +1335,7 @@ def prepare_line(line): if len(cookie_list) != self._ENTRY_LEN: raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) - if cookie.expires_at and not cookie.expires_at.isdigit(): + if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at): raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line From 62e2a9c0d55306906f18da2927e05e1cbc31473c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:31:35 -0500 Subject: [PATCH 72/81] [ci] Bump supported PyPy version to 3.11 (#13877) Ref: https://pypy.org/posts/2025/07/pypy-v7320-release.html Authored by: bashonly --- .github/workflows/core.yml | 4 ++-- .github/workflows/download.yml | 4 ++-- .github/workflows/signature-tests.yml | 2 +- CONTRIBUTING.md | 2 +- README.md | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index dd2c6f481e..86036989c0 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -37,7 +37,7 @@ jobs: matrix: os: [ubuntu-latest] # CPython 3.9 is in quick-test - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest @@ -49,7 +49,7 @@ jobs: - os: windows-latest python-version: '3.13' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 6849fba9b6..594a664c9c 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -28,13 +28,13 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.9' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml index 203172e0b9..42c65db353 100644 --- a/.github/workflows/signature-tests.yml +++ b/.github/workflows/signature-tests.yml @@ -25,7 +25,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.11] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2c58cdfc94..8822907b79 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -272,7 +272,7 @@ ## Adding support for a new site You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.11. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell diff --git a/README.md b/README.md index e5bd21b9ca..12f68e98d8 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ # To install nightly with pip: ``` ## DEPENDENCIES -Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly.