From 87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 14:57:20 -0500 Subject: [PATCH 01/23] [ie/mlbtv] Make formats downloadable with ffmpeg (#13761) Authored by: bashonly --- yt_dlp/extractor/mlb.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index 562b93fc7..b2b35a712 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -457,12 +457,9 @@ def _extract_formats_and_subtitles(self, broadcast, video_id): self.report_warning(f'No formats available for {format_id} broadcast; skipping') return [], {} - cdn_headers = {'x-cdn-token': token} fmts, subs = self._extract_m3u8_formats_and_subtitles( - m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4', - m3u8_id=format_id, fatal=False, headers=cdn_headers) + m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) for fmt in fmts: - fmt['http_headers'] = cdn_headers fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' ')) fmt.setdefault('language', language) if fmt.get('vcodec') == 'none' and fmt['language'] == 'en': From 790c286ce3e0b534ca2d8f6648ced220d888f139 Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 21 Jul 2025 04:00:44 +0800 Subject: [PATCH 02/23] [ie/10play] Support new site domain (#13611) Closes #13577 Authored by: Georift --- yt_dlp/extractor/tenplay.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index 825da6516..dd4ea5658 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -7,11 +7,11 @@ class TenPlayIE(InfoExtractor): IE_NAME = '10play' - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/?#]+/)+(?Ptpv\d{6}[a-z]{5})' + _VALID_URL = r'https?://(?:www\.)?10(?:play)?\.com\.au/(?:[^/?#]+/)+(?Ptpv\d{6}[a-z]{5})' _NETRC_MACHINE = '10play' _TESTS = [{ # Geo-restricted to Australia - 'url': 'https://10play.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf', + 'url': 'https://10.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf', 'info_dict': { 'id': '7440980000013868', 'ext': 'mp4', @@ -32,7 +32,7 @@ class TenPlayIE(InfoExtractor): 'params': {'skip_download': 'm3u8'}, }, { # Geo-restricted to Australia - 'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp', + 'url': 'https://10.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp', 'info_dict': { 'id': '9000000000091177', 'ext': 'mp4', @@ -55,7 +55,7 @@ class TenPlayIE(InfoExtractor): 'params': {'skip_download': 'm3u8'}, }, { # Geo-restricted to Australia; upgrading the m3u8 quality fails and we need the fallback - 'url': 'https://10play.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt', + 'url': 'https://10.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt', 'info_dict': { 'id': '9000000000084116', 'ext': 'mp4', @@ -77,6 +77,7 @@ class TenPlayIE(InfoExtractor): }, 'params': {'skip_download': 'm3u8'}, 'expected_warnings': ['Failed to download m3u8 information: HTTP Error 502'], + 'skip': 'video unavailable', }, { 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', 'only_matching': True, @@ -96,7 +97,7 @@ class TenPlayIE(InfoExtractor): def _real_extract(self, url): content_id = self._match_id(url) data = self._download_json( - 'https://10play.com.au/api/v1/videos/' + content_id, content_id) + 'https://10.com.au/api/v1/videos/' + content_id, content_id) video_data = self._download_json( f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}', @@ -137,21 +138,24 @@ def _real_extract(self, url): class TenPlaySeasonIE(InfoExtractor): IE_NAME = '10play:season' - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P[^/?#]+)/episodes/(?P[^/?#]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?10(?:play)?\.com\.au/(?P[^/?#]+)/episodes/(?P[^/?#]+)/?(?:$|[?#])' _TESTS = [{ - 'url': 'https://10play.com.au/masterchef/episodes/season-15', + 'url': 'https://10.com.au/masterchef/episodes/season-15', 'info_dict': { 'title': 'Season 15', 'id': 'MTQ2NjMxOQ==', }, 'playlist_mincount': 50, }, { - 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', + 'url': 'https://10.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', 'info_dict': { 'title': 'Season 2024', 'id': 'Mjc0OTIw', }, 'playlist_mincount': 159, + }, { + 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', + 'only_matching': True, }] def _entries(self, load_more_url, display_id=None): @@ -172,7 +176,7 @@ def _entries(self, load_more_url, display_id=None): def _real_extract(self, url): show, season = self._match_valid_url(url).group('show', 'season') season_info = self._download_json( - f'https://10play.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}') + f'https://10.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}') episodes_carousel = traverse_obj(season_info, ( 'content', 0, 'components', ( From f9dff95cb1c138913011417b3bba020c0a691bba Mon Sep 17 00:00:00 2001 From: WouterGordts Date: Sun, 20 Jul 2025 22:12:40 +0200 Subject: [PATCH 03/23] [ie/bandcamp] Extract tags (#13480) Authored by: WouterGordts --- yt_dlp/extractor/bandcamp.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 939c2800e..d07d6e48b 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -7,6 +7,7 @@ from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, + clean_html, extract_attributes, float_or_none, int_or_none, @@ -19,7 +20,7 @@ url_or_none, urljoin, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import find_element, find_elements, traverse_obj class BandcampIE(InfoExtractor): @@ -70,6 +71,9 @@ class BandcampIE(InfoExtractor): 'album': 'FTL: Advanced Edition Soundtrack', 'uploader_url': 'https://benprunty.bandcamp.com', 'uploader_id': 'benprunty', + 'tags': ['soundtrack', 'chiptunes', 'cinematic', 'electronic', 'video game music', 'California'], + 'artists': ['Ben Prunty'], + 'album_artists': ['Ben Prunty'], }, }, { # no free download, mp3 128 @@ -94,6 +98,9 @@ class BandcampIE(InfoExtractor): 'album': 'Call of the Mastodon', 'uploader_url': 'https://relapsealumni.bandcamp.com', 'uploader_id': 'relapsealumni', + 'tags': ['Philadelphia'], + 'artists': ['Mastodon'], + 'album_artists': ['Mastodon'], }, }, { # track from compilation album (artist/album_artist difference) @@ -118,6 +125,9 @@ class BandcampIE(InfoExtractor): 'album': 'DSK F/W 2016-2017 Free Compilation', 'uploader_url': 'https://diskotopia.bandcamp.com', 'uploader_id': 'diskotopia', + 'tags': ['Japan'], + 'artists': ['submerse'], + 'album_artists': ['Diskotopia'], }, }] @@ -252,6 +262,7 @@ def _real_extract(self, url): 'album': embed.get('album_title'), 'album_artist': album_artist, 'formats': formats, + 'tags': traverse_obj(webpage, ({find_elements(cls='tag')}, ..., {clean_html})), } From 32809eb2da92c649e540a5b714f6235036026161 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:05:43 -0500 Subject: [PATCH 04/23] Allow extractors to designate formats/subtitles for impersonation (#13778) Authored by: bashonly --- yt_dlp/YoutubeDL.py | 37 ++++++++++++++++++++++++++++++++++- yt_dlp/downloader/__init__.py | 2 +- yt_dlp/downloader/http.py | 5 ++++- yt_dlp/extractor/common.py | 30 ++++++++++++---------------- 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9c9ee64a8..68074a562 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -52,7 +52,7 @@ SSLError, network_exceptions, ) -from .networking.impersonate import ImpersonateRequestHandler +from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget from .plugins import directories as plugin_directories, load_all_plugins from .postprocessor import ( EmbedThumbnailPP, @@ -3231,6 +3231,16 @@ def dl(self, name, info, subtitle=False, test=False): } else: params = self.params + + impersonate = info.pop('impersonate', None) + # Do not override --impersonate with extractor-specified impersonation + if params.get('impersonate') is None: + available_target, requested_targets = self._parse_impersonate_targets(impersonate) + if available_target: + info['impersonate'] = available_target + elif requested_targets: + self.report_warning(self._unavailable_targets_message(requested_targets), only_once=True) + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: @@ -4183,6 +4193,31 @@ def _impersonate_target_available(self, target): for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler)) + def _parse_impersonate_targets(self, impersonate): + if impersonate in (True, ''): + impersonate = ImpersonateTarget() + + requested_targets = [ + t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) + for t in variadic(impersonate) + ] if impersonate else [] + + available_target = next(filter(self._impersonate_target_available, requested_targets), None) + + return available_target, requested_targets + + @staticmethod + def _unavailable_targets_message(requested_targets, note=None, is_error=False): + note = note or 'The extractor specified to use impersonation for this download' + specific_targets = ', '.join(filter(None, map(str, requested_targets))) + message = ( + 'no impersonate target is available' if not specific_targets + else f'none of these impersonate targets are available: {specific_targets}') + return ( + f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}' + f' https://github.com/yt-dlp/yt-dlp#impersonation ' + f'for information on installing the required dependencies') + def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 9c34bd289..17458b9b9 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -99,7 +99,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default): if external_downloader is None: if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): return FFmpegFD - elif external_downloader.lower() != 'native': + elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None: ed = get_external_downloader(external_downloader) if ed.can_download(info_dict, external_downloader): return ed diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 90bfcaf55..073860f6f 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -27,6 +27,9 @@ class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) + request_extensions = {} + if info_dict.get('impersonate') is not None: + request_extensions['impersonate'] = info_dict['impersonate'] class DownloadContext(dict): __getattr__ = dict.get @@ -109,7 +112,7 @@ def establish_connection(): if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 - request = Request(url, request_data, headers) + request = Request(url, request_data, headers, extensions=request_extensions) has_range = range_start is not None if has_range: request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d601e1751..8a914abf0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -38,7 +38,6 @@ TransportError, network_exceptions, ) -from ..networking.impersonate import ImpersonateTarget from ..utils import ( IDENTITY, JSON_LD_RE, @@ -259,6 +258,11 @@ class InfoExtractor: * key The key (as hex) used to decrypt fragments. If `key` is given, any key URI will be ignored * iv The IV (as hex) used to decrypt fragments + * impersonate Impersonate target(s). Can be any of the following entities: + * an instance of yt_dlp.networking.impersonate.ImpersonateTarget + * a string in the format of CLIENT[:OS] + * a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances + * a boolean value; True means any impersonate target is sufficient * downloader_options A dictionary of downloader options (For internal use only) * http_chunk_size Chunk size for HTTP downloads @@ -336,6 +340,7 @@ class InfoExtractor: * "name": Name or description of the subtitles * "http_headers": A dictionary of additional HTTP headers to add to the request. + * "impersonate": Impersonate target(s); same as the "formats" field "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles'; contains automatically generated captions instead of normal subtitles @@ -884,26 +889,17 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa extensions = {} - if impersonate in (True, ''): - impersonate = ImpersonateTarget() - requested_targets = [ - t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) - for t in variadic(impersonate) - ] if impersonate else [] - - available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None) + available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate) if available_target: extensions['impersonate'] = available_target elif requested_targets: - message = 'The extractor is attempting impersonation, but ' - message += ( - 'no impersonate target is available' if not str(impersonate) - else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"') - info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation ' - 'for information on installing the required dependencies') + msg = 'The extractor is attempting impersonation' if require_impersonation: - raise ExtractorError(f'{message}; {info_msg}', expected=True) - self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True) + raise ExtractorError( + self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True), + expected=True) + self.report_warning( + self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True) try: return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions)) From a4561c7a66c39d88efe7ae51e7fa1986faf093fb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:20:58 -0500 Subject: [PATCH 05/23] [rh:requests] Refactor default headers (#13785) Authored by: bashonly --- yt_dlp/networking/_requests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 555c21ac3..6582038fc 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -313,7 +313,7 @@ def _create_instance(self, cookiejar, legacy_ssl_support=None): max_retries=urllib3.util.retry.Retry(False), ) session.adapters.clear() - session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'}) + session.headers = requests.models.CaseInsensitiveDict() session.mount('https://', http_adapter) session.mount('http://', http_adapter) session.cookies = cookiejar @@ -322,6 +322,7 @@ def _create_instance(self, cookiejar, legacy_ssl_support=None): def _prepare_headers(self, _, headers): add_accept_encoding_header(headers, SUPPORTED_ENCODINGS) + headers.setdefault('Connection', 'keep-alive') def _send(self, request): From 8820101aa3152e5f4811541c645f8b5de231ba8c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 20 Jul 2025 18:22:04 -0500 Subject: [PATCH 06/23] [ie/youtube] Use impersonation for downloading subtitles (#13786) Closes #13770 Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index fc1f087ac..5968edc60 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -4056,6 +4056,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer 'ext': fmt, 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)), 'name': sub_name, + 'impersonate': True, STREAMING_DATA_CLIENT_NAME: client_name, }) From 2ac3eb98373d1c31341c5e918c83872c7ff409c6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:41:00 -0500 Subject: [PATCH 07/23] Fix `ImpersonateTarget` sanitization (#13791) Fix 32809eb2da92c649e540a5b714f6235036026161 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 68074a562..14beb3df9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3716,6 +3716,8 @@ def filter_fn(obj): return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} elif isinstance(obj, (list, tuple, set, LazyList)): return list(map(filter_fn, obj)) + elif isinstance(obj, ImpersonateTarget): + return str(obj) elif obj is None or isinstance(obj, (str, int, float, bool)): return obj else: From 3e49bc8a1bdb4109b857f2c361c358e86fa63405 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:42:21 -0500 Subject: [PATCH 08/23] Make extractor-designated impersonation override `--impersonate` (#13792) Fix 32809eb2da92c649e540a5b714f6235036026161 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 9 --------- yt_dlp/downloader/common.py | 11 +++++++++++ yt_dlp/downloader/http.py | 5 +++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 14beb3df9..e42fa73dd 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3232,15 +3232,6 @@ def dl(self, name, info, subtitle=False, test=False): else: params = self.params - impersonate = info.pop('impersonate', None) - # Do not override --impersonate with extractor-specified impersonation - if params.get('impersonate') is None: - available_target, requested_targets = self._parse_impersonate_targets(impersonate) - if available_target: - info['impersonate'] = available_target - elif requested_targets: - self.report_warning(self._unavailable_targets_message(requested_targets), only_once=True) - fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index bb9303f8a..7bc70a51a 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -495,3 +495,14 @@ def _debug_cmd(self, args, exe=None): exe = os.path.basename(args[0]) self.write_debug(f'{exe} command line: {shell_quote(args)}') + + def _get_impersonate_target(self, info_dict): + impersonate = info_dict.get('impersonate') + if impersonate is None: + return None + available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate) + if available_target: + return available_target + elif requested_targets: + self.report_warning(self.ydl._unavailable_targets_message(requested_targets)) + return None diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 073860f6f..c388deb7e 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -28,8 +28,9 @@ def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) request_extensions = {} - if info_dict.get('impersonate') is not None: - request_extensions['impersonate'] = info_dict['impersonate'] + impersonate_target = self._get_impersonate_target(info_dict) + if impersonate_target is not None: + request_extensions['impersonate'] = impersonate_target class DownloadContext(dict): __getattr__ = dict.get From ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 14:09:52 -0500 Subject: [PATCH 09/23] [ie/hotstar] Fix error handling (#13793) Fix 7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9 Closes #13790 Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index b280fb53a..2ae527a59 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -42,6 +42,7 @@ class HotStarBaseIE(InfoExtractor): } def _has_active_subscription(self, cookies, server_time): + server_time = int_or_none(server_time) or int(time.time()) expiry = traverse_obj(cookies, ( self._TOKEN_NAME, 'value', {jwt_decode_hs256}, 'sub', {json.loads}, 'subscriptions', 'in', ..., 'expiry', {parse_iso8601}, all, {max})) or 0 From 6be26626f7cfa71d28e0fac2861eb04758810c5d Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 22 Jul 2025 06:59:13 +0900 Subject: [PATCH 10/23] [utils] `unified_timestamp`: Return `int` values (#13796) Authored by: doe1080 --- yt_dlp/utils/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index c91a06e9a..7d79f417f 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1285,7 +1285,7 @@ def unified_timestamp(date_str, day_first=True): timetuple = email.utils.parsedate_tz(date_str) if timetuple: - return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds() + return calendar.timegm(timetuple) + pm_delta * 3600 - int(timezone.total_seconds()) @partial_application From 060c6a4501a0b8a92f1b9c12788f556d902c83c6 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 22 Jul 2025 07:32:10 +0900 Subject: [PATCH 11/23] [ie/skeb] Rework extractor (#13593) Closes #7440 Authored by: doe1080 --- yt_dlp/extractor/skeb.py | 194 +++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 108 deletions(-) diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py index bc5ec3da7..70111d094 100644 --- a/yt_dlp/extractor/skeb.py +++ b/yt_dlp/extractor/skeb.py @@ -1,140 +1,118 @@ from .common import InfoExtractor -from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + clean_html, + int_or_none, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj class SkebIE(InfoExtractor): - _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P\d+)' - + _VALID_URL = r'https?://skeb\.jp/@(?P[^/?#]+)/works/(?P\d+)' _TESTS = [{ 'url': 'https://skeb.jp/@riiru_wm/works/10', 'info_dict': { 'id': '466853', - 'title': '内容はおまかせします! by 姫ノ森りぃる@一周年', - 'description': 'md5:1ec50901efc3437cfbfe3790468d532d', - 'uploader': '姫ノ森りぃる@一周年', - 'uploader_id': 'riiru_wm', - 'age_limit': 0, - 'tags': [], - 'url': r're:https://skeb.+', - 'thumbnail': r're:https://skeb.+', - 'subtitles': { - 'jpn': [{ - 'url': r're:https://skeb.+', - 'ext': 'vtt', - }], - }, - 'width': 720, - 'height': 405, - 'duration': 313, - 'fps': 30, 'ext': 'mp4', + 'title': '10-1', + 'description': 'md5:1ec50901efc3437cfbfe3790468d532d', + 'duration': 313, + 'genres': ['video'], + 'thumbnail': r're:https?://.+', + 'uploader': '姫ノ森りぃる@ひとづま', + 'uploader_id': 'riiru_wm', }, }, { 'url': 'https://skeb.jp/@furukawa_nob/works/3', 'info_dict': { 'id': '489408', - 'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...', - 'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2', - 'uploader': '古川ノブ@音楽とVlogのVtuber', - 'uploader_id': 'furukawa_nob', - 'age_limit': 0, - 'tags': [ - 'よろしく', '大丈夫', 'お願い', 'でした', - '是非', 'O', 'バー', '遊び', 'おはよう', - 'オーバ', 'ボイス', - ], - 'url': r're:https://skeb.+', - 'thumbnail': r're:https://skeb.+', - 'subtitles': { - 'jpn': [{ - 'url': r're:https://skeb.+', - 'ext': 'vtt', - }], - }, - 'duration': 98, 'ext': 'mp3', - 'vcodec': 'none', - 'abr': 128, + 'title': '3-1', + 'description': 'md5:6de1f8f876426a6ac321c123848176a8', + 'duration': 98, + 'genres': ['voice'], + 'tags': 'count:11', + 'thumbnail': r're:https?://.+', + 'uploader': '古川ノブ@宮城の動画勢Vtuber', + 'uploader_id': 'furukawa_nob', }, }, { - 'url': 'https://skeb.jp/@mollowmollow/works/6', + 'url': 'https://skeb.jp/@Rizu_panda_cube/works/626', 'info_dict': { - 'id': '6', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - 'description': 'md5:aa6cbf2ba320b50bce219632de195f07', - '_type': 'playlist', - 'entries': [{ - 'id': '486430', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - 'description': 'md5:aa6cbf2ba320b50bce219632de195f07', - }, { - 'id': '486431', - 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - }], + 'id': '626', + 'description': 'md5:834557b39ca56960c5f77dd6ddabe775', + 'uploader': 'りづ100億%', + 'uploader_id': 'Rizu_panda_cube', + 'tags': 'count:57', + 'genres': ['video'], }, + 'playlist_count': 2, + 'expected_warnings': ['Skipping unsupported extension'], }] - def _real_extract(self, url): - video_id = self._match_id(url) - nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id) + def _call_api(self, uploader_id, work_id): + return self._download_json( + f'https://skeb.jp/api/users/{uploader_id}/works/{work_id}', work_id, headers={ + 'Accept': 'application/json', + 'Authorization': 'Bearer null', + }) - parent = { - 'id': video_id, - 'title': nuxt_data.get('title'), - 'description': nuxt_data.get('description'), - 'uploader': traverse_obj(nuxt_data, ('creator', 'name')), - 'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')), - 'age_limit': 18 if nuxt_data.get('nsfw') else 0, - 'tags': nuxt_data.get('tag_list'), + def _real_extract(self, url): + uploader_id, work_id = self._match_valid_url(url).group('uploader_id', 'id') + try: + works = self._call_api(uploader_id, work_id) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or e.cause.status != 429: + raise + webpage = e.cause.response.read().decode() + value = self._search_regex( + r'document\.cookie\s*=\s*["\']request_key=([^;"\']+)', webpage, 'request key') + self._set_cookie('skeb.jp', 'request_key', value) + works = self._call_api(uploader_id, work_id) + + info = { + 'uploader_id': uploader_id, + **traverse_obj(works, { + 'age_limit': ('nsfw', {bool}, {lambda x: 18 if x else None}), + 'description': (('source_body', 'body'), {clean_html}, filter, any), + 'genres': ('genre', {str}, filter, all, filter), + 'tags': ('tag_list', ..., {str}, filter, all, filter), + 'uploader': ('creator', 'name', {str}), + }), } entries = [] - for item in nuxt_data.get('previews') or []: - vid_url = item.get('url') - given_ext = traverse_obj(item, ('information', 'extension')) - preview_ext = determine_ext(vid_url, default_ext=None) - if not preview_ext: - content_disposition = parse_qs(vid_url)['response-content-disposition'][0] - preview_ext = self._search_regex( - r'filename="[^"]+\.([^\.]+?)"', content_disposition, - 'preview file extension', fatal=False, group=1) - if preview_ext not in ('mp4', 'mp3'): + for idx, preview in enumerate(traverse_obj(works, ('previews', lambda _, v: url_or_none(v['url']))), 1): + ext = traverse_obj(preview, ('information', 'extension', {str})) + if ext not in ('mp3', 'mp4'): + self.report_warning(f'Skipping unsupported extension "{ext}"') continue - if not vid_url or not item.get('id'): - continue - width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height')) - if width is not None and height is not None: - # the longest side is at most 720px for non-client viewers - max_size = max(width, height) - width, height = (x * 720 // max_size for x in (width, height)) + entries.append({ - **parent, - 'id': str(item['id']), - 'url': vid_url, - 'thumbnail': item.get('poster_url'), + 'ext': ext, + 'title': f'{work_id}-{idx}', 'subtitles': { - 'jpn': [{ - 'url': item.get('vtt_url'), + 'ja': [{ 'ext': 'vtt', + 'url': preview['vtt_url'], }], - } if item.get('vtt_url') else None, - 'width': width, - 'height': height, - 'duration': traverse_obj(item, ('information', 'duration')), - 'fps': traverse_obj(item, ('information', 'frame_rate')), - 'ext': preview_ext or given_ext, - 'vcodec': 'none' if preview_ext == 'mp3' else None, - # you'll always get 128kbps MP3 for non-client viewers - 'abr': 128 if preview_ext == 'mp3' else None, + } if url_or_none(preview.get('vtt_url')) else None, + 'vcodec': 'none' if ext == 'mp3' else None, + **info, + **traverse_obj(preview, { + 'id': ('id', {str_or_none}), + 'thumbnail': ('poster_url', {url_or_none}), + 'url': ('url', {url_or_none}), + }), + **traverse_obj(preview, ('information', { + 'duration': ('duration', {int_or_none}), + 'fps': ('frame_rate', {int_or_none}), + 'height': ('height', {int_or_none}), + 'width': ('width', {int_or_none}), + })), }) - if not entries: - raise ExtractorError('No video/audio attachment found in this commission.', expected=True) - elif len(entries) == 1: - return entries[0] - else: - parent.update({ - '_type': 'playlist', - 'entries': entries, - }) - return parent + return self.playlist_result(entries, work_id, **info) From d3edc5d52a7159eda2331dbc7e14bf40a6585c81 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:04:43 -0400 Subject: [PATCH 12/23] [ie/bilibili] Pass newer user-agent with API requests (#13736) Closes #12887 Authored by: c-basalt --- yt_dlp/extractor/bilibili.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0c6535fc7..3282a11bb 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,6 +175,13 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): else: note = f'Downloading video formats for cid {cid}' + # TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735 + # playurl requests carrying old UA will be rejected + headers = { + 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36', + **(headers or {}), + } + return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] @@ -353,7 +360,7 @@ class BiliBiliIE(BilibiliBaseIE): 'id': 'BV1bK411W797', 'title': '物语中的人物是如何吐槽自己的OP的', }, - 'playlist_count': 18, + 'playlist_count': 23, 'playlist': [{ 'info_dict': { 'id': 'BV1bK411W797_p1', @@ -373,6 +380,7 @@ class BiliBiliIE(BilibiliBaseIE): '_old_archive_ids': ['bilibili 498159642_part1'], }, }], + 'params': {'playlist_items': '2'}, }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -1002,6 +1010,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1057,6 +1066,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1847,7 +1857,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): 'thumbnail': r're:^https?://.+\.jpg', 'timestamp': 1564836614, 'upload_date': '20190803', - 'uploader': 'tsukimi-つきみぐー', + 'uploader': '十六夜tsukimiつきみぐ', 'view_count': int, }, } @@ -1902,10 +1912,10 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): 'url': 'https://www.bilibili.com/audio/am10624', 'info_dict': { 'id': '10624', - 'title': '每日新曲推荐(每日11:00更新)', + 'title': '新曲推荐', 'description': '每天11:00更新,为你推送最新音乐', }, - 'playlist_count': 19, + 'playlist_count': 16, } def _real_extract(self, url): From b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:11:58 -0500 Subject: [PATCH 13/23] [ie/BiliBiliBangumi] Fix extractor (#13800) Closes #13795 Authored by: bashonly --- yt_dlp/extractor/bilibili.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 3282a11bb..2846702f6 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -907,13 +907,26 @@ def _real_extract(self, url): 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, headers=headers)) - geo_blocked = traverse_obj(play_info, ( - ('result', ('raw', 'data')), 'plugins', - lambda _, v: v['name'] == 'AreaLimitPanel', - 'config', 'is_block', {bool}, any)) - premium_only = play_info.get('code') == -10403 + # play_info can be structured in at least three different ways, e.g.: + # 1.) play_info['result']['video_info'] and play_info['code'] + # 2.) play_info['raw']['data']['video_info'] and play_info['code'] + # 3.) play_info['data']['result']['video_info'] and play_info['data']['code'] + # So we need to transform any of the above into a common structure + status_code = play_info.get('code') + if 'raw' in play_info: + play_info = play_info['raw'] + if 'data' in play_info: + play_info = play_info['data'] + if status_code is None: + status_code = play_info.get('code') + if 'result' in play_info: + play_info = play_info['result'] - video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {} + geo_blocked = traverse_obj(play_info, ( + 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any)) + premium_only = status_code == -10403 + + video_info = traverse_obj(play_info, ('video_info', {dict})) or {} formats = self.extract_formats(video_info) if not formats: @@ -923,8 +936,8 @@ def _real_extract(self, url): self.raise_login_required('This video is for premium members only') if traverse_obj(play_info, (( - ('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' - (('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none' + ('play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' vs 'PLAY_NONE' + 'play_video_type', # 'preview' vs 'whole' vs 'none' ), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})): self.report_warning( 'Only preview format is available, ' From d88b304d44c599d81acfa4231502270c8b9fe2f8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:15:31 -0500 Subject: [PATCH 14/23] [ie/patreon:campaign] Fix extractor (#13712) Closes #13622 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 2c1436cac..9038b4a7f 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -19,7 +19,7 @@ url_or_none, urljoin, ) -from ..utils.traversal import traverse_obj, value +from ..utils.traversal import require, traverse_obj, value class PatreonBaseIE(InfoExtractor): @@ -462,7 +462,7 @@ class PatreonCampaignIE(PatreonBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?patreon\.com/(?: (?:m|api/campaigns)/(?P\d+)| - (?:c/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+) + (?:cw?/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+) )(?:/posts)?/?(?:$|[?#])''' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', @@ -531,6 +531,28 @@ class PatreonCampaignIE(PatreonBaseIE): 'age_limit': 0, }, 'playlist_mincount': 331, + 'skip': 'Channel removed', + }, { + # next.js v13 data, see https://github.com/yt-dlp/yt-dlp/issues/13622 + 'url': 'https://www.patreon.com/c/anythingelse/posts', + 'info_dict': { + 'id': '9631148', + 'title': 'Anything Else?', + 'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08', + 'uploader': 'dan ', + 'uploader_id': '13852412', + 'uploader_url': 'https://www.patreon.com/anythingelse', + 'channel': 'Anything Else?', + 'channel_id': '9631148', + 'channel_url': 'https://www.patreon.com/anythingelse', + 'channel_follower_count': int, + 'age_limit': 0, + 'thumbnail': r're:https?://.+/.+', + }, + 'playlist_mincount': 151, + }, { + 'url': 'https://www.patreon.com/cw/anythingelse', + 'only_matching': True, }, { 'url': 'https://www.patreon.com/c/OgSog/posts', 'only_matching': True, @@ -572,8 +594,11 @@ def _real_extract(self, url): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent}) - campaign_id = self._search_nextjs_data( - webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] + campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), ( + 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str})) + if not campaign_id: + campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), ( + lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')})) params = { 'json-api-use-default-includes': 'false', From 959ac99e98c3215437e573c22d64be42d361e863 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 15 Jul 2025 01:17:34 +0200 Subject: [PATCH 15/23] Fix `--exec` placeholder expansion on Windows See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56 for more details Authored by: Grub4K --- yt_dlp/postprocessor/exec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py index 1f0a0015e..243487dd2 100644 --- a/yt_dlp/postprocessor/exec.py +++ b/yt_dlp/postprocessor/exec.py @@ -18,7 +18,7 @@ def parse_cmd(self, cmd, info): if filepath: if '{}' not in cmd: cmd += ' {}' - cmd = cmd.replace('{}', shell_quote(filepath)) + cmd = cmd.replace('{}', shell_quote(filepath, shell=True)) return cmd def run(self, info): From 9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 22 Jul 2025 01:43:30 +0200 Subject: [PATCH 16/23] [cleanup] Misc (#13595) Closes #10853, Closes #12436, Closes #13314, Closes #13609 Authored by: seproDev, InvalidUsernameException, doe1080, hseg, bashonly, adamralph Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: InvalidUsernameException Co-authored-by: gesh Co-authored-by: Adam Ralph Co-authored-by: doe1080 <98906116+doe1080@users.noreply.github.com> --- CONTRIBUTING.md | 2 +- README.md | 6 +++--- devscripts/changelog_override.json | 10 ++++++++++ test/test_download.py | 4 ---- yt_dlp/YoutubeDL.py | 1 + yt_dlp/extractor/common.py | 5 ++++- yt_dlp/extractor/mirrativ.py | 2 +- yt_dlp/extractor/newspicks.py | 2 -- yt_dlp/extractor/youtube/_video.py | 4 ++-- 9 files changed, 22 insertions(+), 14 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd7b0f121..2c58cdfc9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -126,7 +126,7 @@ ### Are you willing to share account details if needed? While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. - Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. -- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password before sharing the account to something random. - Change the password after receiving the account back. ### Is the website primarily used for piracy? diff --git a/README.md b/README.md index 925ebd8c5..7a6d1073f 100644 --- a/README.md +++ b/README.md @@ -277,7 +277,7 @@ # USAGE AND OPTIONS yt-dlp [OPTIONS] [--] URL [URL...] -`Ctrl+F` is your friend :D +Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords @@ -1902,8 +1902,8 @@ #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) #### vimeo -* `client`: Client to extract video data from. One of `android` (default), `ios` or `web`. The `ios` client only works with previously cached OAuth tokens. The `web` client only works when authenticated with credentials or account cookies -* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens +* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index d7296bf30..c22ea94bf 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -262,5 +262,15 @@ { "action": "remove", "when": "500761e41acb96953a5064e951d41d190c287e46" + }, + { + "action": "add", + "when": "f3008bc5f89d2691f2f8dfc51b406ef4e25281c3", + "short": "[priority] **Default behaviour changed from `--mtime` to `--no-mtime`**\nyt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)" + }, + { + "action": "add", + "when": "959ac99e98c3215437e573c22d64be42d361e863", + "short": "[priority] Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)\n - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped" } ] diff --git a/test/test_download.py b/test/test_download.py index c7842735c..1714cb52e 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -66,10 +66,6 @@ def _file_md5(fn): @is_download_test class TestDownload(unittest.TestCase): - # Parallel testing in nosetests. See - # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html - _multiprocess_shared_ = True - maxDiff = None COMPLETED_TESTS = {} diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e42fa73dd..76fd18c33 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -529,6 +529,7 @@ class YoutubeDL: discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. + Argument values must always be a list of string(s). E.g. {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 8a914abf0..4a4b5416d 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -397,6 +397,8 @@ class InfoExtractor: chapters: A list of dictionaries, with the following entries: * "start_time" - The start time of the chapter in seconds * "end_time" - The end time of the chapter in seconds + (optional: core code can determine this value from + the next chapter's start_time or the video's duration) * "title" (optional, string) heatmap: A list of dictionaries, with the following entries: * "start_time" - The start time of the data point in seconds @@ -411,7 +413,8 @@ class InfoExtractor: 'unlisted' or 'public'. Use 'InfoExtractor._availability' to set it media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer" - _old_archive_ids: A list of old archive ids needed for backward compatibility + _old_archive_ids: A list of old archive ids needed for backward + compatibility. Use yt_dlp.utils.make_archive_id to generate ids _format_sort_fields: A list of fields to use for sorting formats __post_extractor: A function to be called just before the metadata is written to either disk, logger or console. The function diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 4e24371a2..36a736a21 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -18,7 +18,7 @@ class MirrativIE(MirrativBaseIE): IE_NAME = 'mirrativ' _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P[^/?#&]+)' - TESTS = [{ + _TESTS = [{ 'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw', 'info_dict': { 'id': 'UQomuS7EMgHoxRHjEhNiHw', diff --git a/yt_dlp/extractor/newspicks.py b/yt_dlp/extractor/newspicks.py index 5f19eed98..25be3c720 100644 --- a/yt_dlp/extractor/newspicks.py +++ b/yt_dlp/extractor/newspicks.py @@ -18,7 +18,6 @@ class NewsPicksIE(InfoExtractor): 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】', 'cast': 'count:4', 'description': 'md5:09397aad46d6ded6487ff13f138acadf', - 'duration': 2940, 'release_date': '20220117', 'release_timestamp': 1642424400, 'series': 'HORIE ONE', @@ -35,7 +34,6 @@ class NewsPicksIE(InfoExtractor): 'title': '【検証】専門家は、KADOKAWAをどう見るか', 'cast': 'count:3', 'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d', - 'duration': 1320, 'release_date': '20240622', 'release_timestamp': 1719088080, 'series': 'NPレポート', diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 5968edc60..171aa9b5c 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2076,7 +2076,7 @@ def _extract_signature_function(self, video_id, player_url, example_sig): assert os.path.basename(func_id) == func_id self.write_debug(f'Extracting signature function {func_id}') - cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.03.31'), None + cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.07.21'), None if not cache_spec: code = self._load_player(video_id, player_url) @@ -2180,7 +2180,7 @@ def _load_player_data_from_cache(self, name, player_url): if data := self._player_cache.get(cache_id): return data - data = self.cache.load(*cache_id, min_ver='2025.03.31') + data = self.cache.load(*cache_id, min_ver='2025.07.21') if data: self._player_cache[cache_id] = data From 035b1ece8f382358f5503bf5011ca098f6c9eaf9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 23:47:12 +0000 Subject: [PATCH 17/23] Release 2025.07.21 Created by: bashonly :ci skip all --- CONTRIBUTORS | 9 +++++ Changelog.md | 91 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +-- supportedsites.md | 22 ++++++------ yt_dlp/version.py | 6 ++-- 5 files changed, 116 insertions(+), 16 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ba23b66dc..f20b4ce17 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -784,3 +784,12 @@ eason1478 ceandreasen chauhantirth helpimnotdrowning +adamralph +averageFOSSenjoyer +bubo +flanter21 +Georift +moonshinerd +R0hanW +ShockedPlot7560 +swayll diff --git a/Changelog.md b/Changelog.md index 5a5c18cf3..7205b95aa 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,97 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.07.21 + +#### Important changes +- **Default behaviour changed from `--mtime` to `--no-mtime`** +yt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780) +- Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56) + - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped + +#### Core changes +- [Allow extractors to designate formats/subtitles for impersonation](https://github.com/yt-dlp/yt-dlp/commit/32809eb2da92c649e540a5b714f6235036026161) ([#13778](https://github.com/yt-dlp/yt-dlp/issues/13778)) by [bashonly](https://github.com/bashonly) (With fixes in [3e49bc8](https://github.com/yt-dlp/yt-dlp/commit/3e49bc8a1bdb4109b857f2c361c358e86fa63405), [2ac3eb9](https://github.com/yt-dlp/yt-dlp/commit/2ac3eb98373d1c31341c5e918c83872c7ff409c6)) +- [Don't let format testing alter the return code](https://github.com/yt-dlp/yt-dlp/commit/4919051e447c7f8ae9df8ba5c4208b6b5c04915a) ([#13767](https://github.com/yt-dlp/yt-dlp/issues/13767)) by [bashonly](https://github.com/bashonly) +- [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/commit/959ac99e98c3215437e573c22d64be42d361e863) by [Grub4K](https://github.com/Grub4K) +- [No longer enable `--mtime` by default](https://github.com/yt-dlp/yt-dlp/commit/f3008bc5f89d2691f2f8dfc51b406ef4e25281c3) ([#12781](https://github.com/yt-dlp/yt-dlp/issues/12781)) by [seproDev](https://github.com/seproDev) +- [Warn when skipping formats](https://github.com/yt-dlp/yt-dlp/commit/1f27a9f8baccb9105f2476154557540efe09a937) ([#13090](https://github.com/yt-dlp/yt-dlp/issues/13090)) by [bashonly](https://github.com/bashonly) +- **jsinterp** + - [Cache undefined variable names](https://github.com/yt-dlp/yt-dlp/commit/b342d27f3f82d913976509ddf5bff539ad8567ec) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly) (With fixes in [805519b](https://github.com/yt-dlp/yt-dlp/commit/805519bfaa7cb5443912dfe45ac774834ba65a16)) + - [Fix variable scoping](https://github.com/yt-dlp/yt-dlp/commit/b6328ca05030d815222b25d208cc59a964623bf9) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) +- **utils** + - `mimetype2ext`: [Always parse `flac` from `audio/flac`](https://github.com/yt-dlp/yt-dlp/commit/b8abd255e454acbe0023cdb946f9eb461ced7eeb) ([#13748](https://github.com/yt-dlp/yt-dlp/issues/13748)) by [bashonly](https://github.com/bashonly) + - `unified_timestamp`: [Return `int` values](https://github.com/yt-dlp/yt-dlp/commit/6be26626f7cfa71d28e0fac2861eb04758810c5d) ([#13796](https://github.com/yt-dlp/yt-dlp/issues/13796)) by [doe1080](https://github.com/doe1080) + - `urlhandle_detect_ext`: [Use `x-amz-meta-file-type` headers](https://github.com/yt-dlp/yt-dlp/commit/28bf46b7dafe2e241137763bf570a2f91ba8a53a) ([#13749](https://github.com/yt-dlp/yt-dlp/issues/13749)) by [bashonly](https://github.com/bashonly) + +#### Extractor changes +- [Add `_search_nextjs_v13_data` helper](https://github.com/yt-dlp/yt-dlp/commit/5245231e4a39ecd5595d4337d46d85e150e2430a) ([#13398](https://github.com/yt-dlp/yt-dlp/issues/13398)) by [bashonly](https://github.com/bashonly) (With fixes in [b5fea53](https://github.com/yt-dlp/yt-dlp/commit/b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec)) +- [Detect invalid m3u8 playlist data](https://github.com/yt-dlp/yt-dlp/commit/e99c0b838a9c5feb40c0dcd291bd7b8620b8d36d) ([#13601](https://github.com/yt-dlp/yt-dlp/issues/13601)) by [Grub4K](https://github.com/Grub4K) +- **10play**: [Support new site domain](https://github.com/yt-dlp/yt-dlp/commit/790c286ce3e0b534ca2d8f6648ced220d888f139) ([#13611](https://github.com/yt-dlp/yt-dlp/issues/13611)) by [Georift](https://github.com/Georift) +- **9gag**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/0b359b184dee0c7052be482857bf562de67e4928) ([#13678](https://github.com/yt-dlp/yt-dlp/issues/13678)) by [bashonly](https://github.com/bashonly) +- **aenetworks**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/5f951ce929b56a822514f1a02cc06af030855ec7) ([#13747](https://github.com/yt-dlp/yt-dlp/issues/13747)) by [bashonly](https://github.com/bashonly) +- **archive.org**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf) ([#13706](https://github.com/yt-dlp/yt-dlp/issues/13706)) by [rdamas](https://github.com/rdamas) +- **bandaichannel**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/23e9389f936ec5236a87815b8576e5ce567b2f77) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **bandcamp**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/f9dff95cb1c138913011417b3bba020c0a691bba) ([#13480](https://github.com/yt-dlp/yt-dlp/issues/13480)) by [WouterGordts](https://github.com/WouterGordts) +- **bellmedia**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6fb3947c0dc6d0e3eab5077c5bada8402f47a277) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **bilibili**: [Pass newer user-agent with API requests](https://github.com/yt-dlp/yt-dlp/commit/d3edc5d52a7159eda2331dbc7e14bf40a6585c81) ([#13736](https://github.com/yt-dlp/yt-dlp/issues/13736)) by [c-basalt](https://github.com/c-basalt) +- **bilibilibangumi** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7) ([#13800](https://github.com/yt-dlp/yt-dlp/issues/13800)) by [bashonly](https://github.com/bashonly) + - [Fix geo-block detection](https://github.com/yt-dlp/yt-dlp/commit/884f35d54a64f1e6e7be49459842f573fc3a2701) ([#13667](https://github.com/yt-dlp/yt-dlp/issues/13667)) by [bashonly](https://github.com/bashonly) +- **blackboardcollaborate**: [Support subtitles and authwalled videos](https://github.com/yt-dlp/yt-dlp/commit/dcc4cba39e2a79d3efce16afa28dbe245468489f) ([#12473](https://github.com/yt-dlp/yt-dlp/issues/12473)) by [flanter21](https://github.com/flanter21) +- **btvplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae61e0f313dd03a09060abc7a212775c3717818) ([#13541](https://github.com/yt-dlp/yt-dlp/issues/13541)) by [bubo](https://github.com/bubo) +- **ctv**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9f54ea38984788811773ca2ceaca73864acf0e8a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **dangalplay**: [Support other login regions](https://github.com/yt-dlp/yt-dlp/commit/09982bc33e2f1f9a1ff66e6738df44f15b36f6a6) ([#13768](https://github.com/yt-dlp/yt-dlp/issues/13768)) by [bashonly](https://github.com/bashonly) +- **francetv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/ade876efb31d55d3394185ffc56942fdc8d325cc) ([#13726](https://github.com/yt-dlp/yt-dlp/issues/13726)) by [bashonly](https://github.com/bashonly) +- **hotstar** + - [Fix support for free accounts](https://github.com/yt-dlp/yt-dlp/commit/07d1d85f6387e4bdb107096f0131c7054f078bb9) ([#13700](https://github.com/yt-dlp/yt-dlp/issues/13700)) by [chauhantirth](https://github.com/chauhantirth) + - [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9) ([#13727](https://github.com/yt-dlp/yt-dlp/issues/13727)) by [bashonly](https://github.com/bashonly) (With fixes in [ef103b2](https://github.com/yt-dlp/yt-dlp/commit/ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d)) +- **joqrag**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6d39c420f7774562a106d90253e2ed5b75036321) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **limelight**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/5d693446e882931618c40c99bb593f0b87b30eb9) ([#13267](https://github.com/yt-dlp/yt-dlp/issues/13267)) by [doe1080](https://github.com/doe1080) +- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b4b4486effdcb96bb6b8148171a49ff579b69a4a) ([#13717](https://github.com/yt-dlp/yt-dlp/issues/13717)) by [Pawka](https://github.com/Pawka) +- **mir24.tv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7b4c96e0898db048259ef5fdf12ed14e3605dce3) ([#13651](https://github.com/yt-dlp/yt-dlp/issues/13651)) by [swayll](https://github.com/swayll) +- **mixlr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0f33950c778331bf4803c76e8b0ba1862df93431) ([#13561](https://github.com/yt-dlp/yt-dlp/issues/13561)) by [seproDev](https://github.com/seproDev), [ShockedPlot7560](https://github.com/ShockedPlot7560) +- **mlbtv**: [Make formats downloadable with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226) ([#13761](https://github.com/yt-dlp/yt-dlp/issues/13761)) by [bashonly](https://github.com/bashonly) +- **newspicks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2aaf1aa71d174700859c9ec1a81109b78e34961c) ([#13612](https://github.com/yt-dlp/yt-dlp/issues/13612)) by [doe1080](https://github.com/doe1080) +- **nhkradiru**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7c49a937887756efcfa162abdcf17e48c244cb0c) ([#12708](https://github.com/yt-dlp/yt-dlp/issues/12708)) by [garret1317](https://github.com/garret1317) +- **noovo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/d57a0b5aa78d59324b037d37492fe86aa4fbf58a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d88b304d44c599d81acfa4231502270c8b9fe2f8) ([#13712](https://github.com/yt-dlp/yt-dlp/issues/13712)) by [bashonly](https://github.com/bashonly) +- **playerfm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3) ([#13016](https://github.com/yt-dlp/yt-dlp/issues/13016)) by [R0hanW](https://github.com/R0hanW) +- **rai**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c8329fc572903eeed7edad1642773b2268b71a62) ([#13572](https://github.com/yt-dlp/yt-dlp/issues/13572)) by [moonshinerd](https://github.com/moonshinerd), [seproDev](https://github.com/seproDev) +- **raisudtirol**: [Support alternative domain](https://github.com/yt-dlp/yt-dlp/commit/85c3fa1925a9057ef4ae8af682686d5b3eb8e568) ([#13718](https://github.com/yt-dlp/yt-dlp/issues/13718)) by [barsnick](https://github.com/barsnick) +- **skeb**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/060c6a4501a0b8a92f1b9c12788f556d902c83c6) ([#13593](https://github.com/yt-dlp/yt-dlp/issues/13593)) by [doe1080](https://github.com/doe1080) +- **soundcloud**: [Always extract original format extension](https://github.com/yt-dlp/yt-dlp/commit/c1ac543c8166ff031d62e340b3244ca8556e3fb9) ([#13746](https://github.com/yt-dlp/yt-dlp/issues/13746)) by [bashonly](https://github.com/bashonly) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0b41746964e1d0470ac286ce09408940a3a51147) ([#13610](https://github.com/yt-dlp/yt-dlp/issues/13610)) by [bashonly](https://github.com/bashonly) +- **thehighwire**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3a84be9d1660ef798ea28f929a20391bef6afda4) ([#13505](https://github.com/yt-dlp/yt-dlp/issues/13505)) by [swayll](https://github.com/swayll) +- **twitch**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/422cc8cb2ff2bd3b4c2bc64e23507b7e6f522c35) ([#13618](https://github.com/yt-dlp/yt-dlp/issues/13618)) by [bashonly](https://github.com/bashonly) +- **unitednationswebtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/630f3389c33f0f7f6ec97e8917d20aeb4e4078da) ([#13538](https://github.com/yt-dlp/yt-dlp/issues/13538)) by [averageFOSSenjoyer](https://github.com/averageFOSSenjoyer) +- **vimeo** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a5d697f62d8be78ffd472acb2f52c8bc32833003) ([#13692](https://github.com/yt-dlp/yt-dlp/issues/13692)) by [bashonly](https://github.com/bashonly) + - [Handle age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/a6db1d297ab40cc346de24aacbeab93112b2f4e1) ([#13719](https://github.com/yt-dlp/yt-dlp/issues/13719)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Do not require PO Token for premium accounts](https://github.com/yt-dlp/yt-dlp/commit/5b57b72c1a7c6bd249ffcebdf5630761ec664c10) ([#13640](https://github.com/yt-dlp/yt-dlp/issues/13640)) by [coletdjnz](https://github.com/coletdjnz) + - [Ensure context params are consistent for web clients](https://github.com/yt-dlp/yt-dlp/commit/6e5bee418bc108565108153fd745c8e7a59f16dd) ([#13701](https://github.com/yt-dlp/yt-dlp/issues/13701)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract global nsig helper functions](https://github.com/yt-dlp/yt-dlp/commit/fca94ac5d63ed6578b5cd9c8129d97a8a713c39a) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/0e68332bcb9fba87c42805b7a051eeb2bed36206) ([#13659](https://github.com/yt-dlp/yt-dlp/issues/13659)) by [bashonly](https://github.com/bashonly) + - [Log bad playability statuses of player responses](https://github.com/yt-dlp/yt-dlp/commit/aa9f1f4d577e99897ac16cd19d4e217d688ea75d) ([#13647](https://github.com/yt-dlp/yt-dlp/issues/13647)) by [coletdjnz](https://github.com/coletdjnz) + - [Use impersonation for downloading subtitles](https://github.com/yt-dlp/yt-dlp/commit/8820101aa3152e5f4811541c645f8b5de231ba8c) ([#13786](https://github.com/yt-dlp/yt-dlp/issues/13786)) by [bashonly](https://github.com/bashonly) + - tab: [Fix subscriptions feed extraction](https://github.com/yt-dlp/yt-dlp/commit/c23d837b6524d1e7a4595948871ba1708cba4dfa) ([#13665](https://github.com/yt-dlp/yt-dlp/issues/13665)) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **hls**: [Do not fall back to ffmpeg when native is required](https://github.com/yt-dlp/yt-dlp/commit/a7113722ec33f30fc898caee9242af2b82188a53) ([#13655](https://github.com/yt-dlp/yt-dlp/issues/13655)) by [bashonly](https://github.com/bashonly) + +#### Networking changes +- **Request Handler** + - requests + - [Refactor default headers](https://github.com/yt-dlp/yt-dlp/commit/a4561c7a66c39d88efe7ae51e7fa1986faf093fb) ([#13785](https://github.com/yt-dlp/yt-dlp/issues/13785)) by [bashonly](https://github.com/bashonly) + - [Work around partial read dropping data](https://github.com/yt-dlp/yt-dlp/commit/c2ff2dbaec7929015373fe002e9bd4849931a4ce) ([#13599](https://github.com/yt-dlp/yt-dlp/issues/13599)) by [Grub4K](https://github.com/Grub4K) (With fixes in [c316416](https://github.com/yt-dlp/yt-dlp/commit/c316416b972d1b05e58fbcc21e80428b900ce102)) + +#### Misc. changes +- **cleanup** + - [Bump ruff to 0.12.x](https://github.com/yt-dlp/yt-dlp/commit/ca5cce5b07d51efe7310b449cdefeca8d873e9df) ([#13596](https://github.com/yt-dlp/yt-dlp/issues/13596)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [9951fdd](https://github.com/yt-dlp/yt-dlp/commit/9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e) by [adamralph](https://github.com/adamralph), [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080), [hseg](https://github.com/hseg), [InvalidUsernameException](https://github.com/InvalidUsernameException), [seproDev](https://github.com/seproDev) +- **devscripts**: [Fix filename/directory Bash completions](https://github.com/yt-dlp/yt-dlp/commit/99093e96fd6a26dea9d6e4bd1e4b16283b6ad1ee) ([#13620](https://github.com/yt-dlp/yt-dlp/issues/13620)) by [barsnick](https://github.com/barsnick) +- **test**: download: [Support `playlist_maxcount`](https://github.com/yt-dlp/yt-dlp/commit/fd36b8f31bafbd8096bdb92a446a0c9c6081209c) ([#13433](https://github.com/yt-dlp/yt-dlp/issues/13433)) by [InvalidUsernameException](https://github.com/InvalidUsernameException) + ### 2025.06.30 #### Core changes diff --git a/README.md b/README.md index 7a6d1073f..f1d119317 100644 --- a/README.md +++ b/README.md @@ -639,9 +639,9 @@ ## Filesystem Options: --no-part Do not use .part files - write directly into output file --mtime Use the Last-modified header to set the file - modification time (default) + modification time --no-mtime Do not use the Last-modified header to set - the file modification time + the file modification time (default) --write-description Write video description to a .description file --no-write-description Do not write video description (default) --write-info-json Write video metadata to a .info.json file diff --git a/supportedsites.md b/supportedsites.md index 8e48135d2..3e0bef4bc 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -133,7 +133,6 @@ # Supported sites - **BaiduVideo**: 百度视频 - **BanBye** - **BanByeChannel** - - **bandaichannel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:user** @@ -157,7 +156,6 @@ # Supported sites - **Beeg** - **BehindKink**: (**Currently broken**) - **Bellator** - - **BellMedia** - **BerufeTV** - **Bet**: (**Currently broken**) - **bfi:player**: (**Currently broken**) @@ -197,6 +195,7 @@ # Supported sites - **BitChute** - **BitChuteChannel** - **BlackboardCollaborate** + - **BlackboardCollaborateLaunch** - **BleacherReport**: (**Currently broken**) - **BleacherReportCMS**: (**Currently broken**) - **blerp** @@ -225,6 +224,7 @@ # Supported sites - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen + - **BTVPlus** - **Bundesliga** - **Bundestag** - **BunnyCdn** @@ -317,7 +317,6 @@ # Supported sites - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **cu.ntv.co.jp**: 日テレ無料TADA! - **CultureUnplugged** @@ -652,7 +651,6 @@ # Supported sites - **jiosaavn:​show:playlist** - **jiosaavn:song** - **Joj** - - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) - **Jove** - **JStream** - **JTBC**: jtbc.co.kr @@ -723,9 +721,6 @@ # Supported sites - **life:embed** - **likee** - **likee:user** - - **limelight** - - **limelight:channel** - - **limelight:channel_list** - **LinkedIn**: [*linkedin*](## "netrc machine") - **linkedin:events**: [*linkedin*](## "netrc machine") - **linkedin:learning**: [*linkedin*](## "netrc machine") @@ -807,6 +802,7 @@ # Supported sites - **minds:channel** - **minds:group** - **Minoto** + - **mir24.tv** - **mirrativ** - **mirrativ:user** - **MirrorCoUK** @@ -817,6 +813,8 @@ # Supported sites - **mixcloud** - **mixcloud:playlist** - **mixcloud:user** + - **Mixlr** + - **MixlrRecoring** - **MLB** - **MLBArticle** - **MLBTV**: [*mlb*](## "netrc machine") @@ -973,7 +971,6 @@ # Supported sites - **NoicePodcast** - **NonkTube** - **NoodleMagazine** - - **Noovo** - **NOSNLArticle** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** @@ -1097,6 +1094,7 @@ # Supported sites - **Platzi**: [*platzi*](## "netrc machine") - **PlatziCourse**: [*platzi*](## "netrc machine") - **player.sky.it** + - **PlayerFm** - **playeur** - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlaySuisse**: [*playsuisse*](## "netrc machine") @@ -1472,11 +1470,12 @@ # Supported sites - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - **TF1** - - **TFO** + - **TFO**: (**Currently broken**) - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") - **TheGuardianPodcast** - **TheGuardianPodcastPlaylist** + - **TheHighWire** - **TheHoleTv** - **TheIntercept** - **ThePlatform** @@ -1544,8 +1543,8 @@ # Supported sites - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - **TV5MONDE** - - **tv5unis** - - **tv5unis:video** + - **tv5unis**: (**Currently broken**) + - **tv5unis:video**: (**Currently broken**) - **tv8.it** - **tv8.it:live**: TV8 Live - **tv8.it:playlist**: TV8 Playlist @@ -1600,6 +1599,7 @@ # Supported sites - **UlizaPortal**: ulizaportal.jp - **umg:de**: Universal Music Deutschland - **Unistra** + - **UnitedNationsWebTv** - **Unity**: (**Currently broken**) - **uol.com.br** - **uplynk** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 451fee716..868429ffb 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.06.30' +__version__ = '2025.07.21' -RELEASE_GIT_HEAD = 'b0187844988e557c7e1e6bb1aabd4c1176768d86' +RELEASE_GIT_HEAD = '9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.06.30' +_pkg_version = '2025.07.21' From 3e918d825d7ff367812658957b281b8cda8f9ebb Mon Sep 17 00:00:00 2001 From: Roland Crosby Date: Tue, 22 Jul 2025 13:50:42 -0400 Subject: [PATCH 18/23] [pp/XAttrMetadata] Add macOS "Where from" attribute (#12664) Authored by: rolandcrosby --- yt_dlp/postprocessor/xattrpp.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index e486b797b..fd83d783b 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -33,8 +33,17 @@ class XAttrMetadataPP(PostProcessor): # (e.g., 4kB on ext4), and we don't want to have the other ones fail 'user.dublincore.description': 'description', # 'user.xdg.comment': 'description', + 'com.apple.metadata:kMDItemWhereFroms': 'webpage_url', } + APPLE_PLIST_TEMPLATE = ''' + + + +\t%s + +''' + def run(self, info): mtime = os.stat(info['filepath']).st_mtime self.to_screen('Writing metadata to file\'s xattrs') @@ -44,6 +53,8 @@ def run(self, info): if value: if infoname == 'upload_date': value = hyphenate_date(value) + elif xattrname == 'com.apple.metadata:kMDItemWhereFroms': + value = self.APPLE_PLIST_TEMPLATE % value write_xattr(info['filepath'], xattrname, value.encode()) except XAttrUnavailableError as e: From eed94c7306d4ecdba53ad8783b1463a9af5c97f1 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 22 Jul 2025 20:10:51 +0200 Subject: [PATCH 19/23] [utils] Add `WINDOWS_VT_MODE` to globals (#12460) Authored by: Grub4K --- test/test_compat.py | 3 --- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/globals.py | 2 ++ yt_dlp/utils/_utils.py | 10 +++------- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index b1cc2a818..3aa9c0c51 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -21,9 +21,6 @@ def test_compat_passthrough(self): with self.assertWarns(DeprecationWarning): _ = compat.compat_basestring - with self.assertWarns(DeprecationWarning): - _ = compat.WINDOWS_VT_MODE - self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 76fd18c33..a9f347bf4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -36,6 +36,7 @@ from .globals import ( IN_CLI, LAZY_EXTRACTORS, + WINDOWS_VT_MODE, plugin_ies, plugin_ies_overrides, plugin_pps, @@ -4040,8 +4041,7 @@ def get_encoding(stream): if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') if not supports_terminal_sequences(stream): - from .utils import WINDOWS_VT_MODE # Must be imported locally - additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI') if additional_info: ret = f'{ret} ({",".join(additional_info)})' return ret diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index dae2c1459..2f3e35d4a 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -37,7 +37,7 @@ from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..networking.exceptions import HTTPError as compat_HTTPError -passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) +passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',)) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE diff --git a/yt_dlp/globals.py b/yt_dlp/globals.py index 0cf276cc9..81ad00448 100644 --- a/yt_dlp/globals.py +++ b/yt_dlp/globals.py @@ -1,3 +1,4 @@ +import os from collections import defaultdict # Please Note: Due to necessary changes and the complex nature involved in the plugin/globals system, @@ -28,3 +29,4 @@ def __repr__(self, /): # Misc IN_CLI = Indirect(False) LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled +WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 7d79f417f..1cb62712b 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -52,7 +52,7 @@ compat_HTMLParseError, ) from ..dependencies import xattr -from ..globals import IN_CLI +from ..globals import IN_CLI, WINDOWS_VT_MODE __name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module @@ -4759,13 +4759,10 @@ def jwt_decode_hs256(jwt): return json.loads(base64.urlsafe_b64decode(f'{payload_b64}===')) -WINDOWS_VT_MODE = False if os.name == 'nt' else None - - @functools.cache def supports_terminal_sequences(stream): if os.name == 'nt': - if not WINDOWS_VT_MODE: + if not WINDOWS_VT_MODE.value: return False elif not os.getenv('TERM'): return False @@ -4802,8 +4799,7 @@ def windows_enable_vt_mode(): finally: os.close(handle) - global WINDOWS_VT_MODE - WINDOWS_VT_MODE = True + WINDOWS_VT_MODE.value = True supports_terminal_sequences.cache_clear() From c59ad2b066bbccd3cc4eed580842f961bce7dd4a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:34:03 -0500 Subject: [PATCH 20/23] [utils] `random_user_agent`: Bump versions (#13543) Closes #5362 Authored by: bashonly --- yt_dlp/extractor/adobepass.py | 8 ++---- yt_dlp/extractor/bilibili.py | 7 ----- yt_dlp/extractor/francaisfacile.py | 13 +-------- yt_dlp/extractor/mitele.py | 2 +- yt_dlp/extractor/sproutvideo.py | 2 +- yt_dlp/extractor/telecinco.py | 13 +-------- yt_dlp/utils/networking.py | 46 +++--------------------------- 7 files changed, 10 insertions(+), 81 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 8c2d9d934..eb45734ec 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -48,7 +48,6 @@ 'username_field': 'user', 'password_field': 'passwd', 'login_hostname': 'login.xfinity.com', - 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -1379,11 +1378,8 @@ def _download_webpage_handle(self, *args, **kwargs): @staticmethod def _get_mso_headers(mso_info): - # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - return { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', - } if mso_info.get('needs_newer_ua') else {} + # Not needed currently + return {} @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2846702f6..d00ac6317 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,13 +175,6 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): else: note = f'Downloading video formats for cid {cid}' - # TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735 - # playurl requests carrying old UA will be rejected - headers = { - 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36', - **(headers or {}), - } - return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] diff --git a/yt_dlp/extractor/francaisfacile.py b/yt_dlp/extractor/francaisfacile.py index d3208c282..c432cf486 100644 --- a/yt_dlp/extractor/francaisfacile.py +++ b/yt_dlp/extractor/francaisfacile.py @@ -1,9 +1,7 @@ import urllib.parse from .common import InfoExtractor -from ..networking.exceptions import HTTPError from ..utils import ( - ExtractorError, float_or_none, url_or_none, ) @@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor): def _real_extract(self, url): display_id = urllib.parse.unquote(self._match_id(url)) - - try: # yt-dlp's default user-agents are too old and blocked by the site - webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient - webpage = self._download_webpage(url, display_id, impersonate=True) + webpage = self._download_webpage(url, display_id) data = self._search_json( r']+\bdata-media-id=[^>]+\btype="application/json"[^>]*>', diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 0dded38c6..76fef337a 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -79,7 +79,7 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) pre_player = self._search_json( r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', webpage, 'Pre Player', display_id)['prePlayer'] diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 494042738..4afa83871 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -99,7 +99,7 @@ def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) webpage = self._download_webpage( - url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True) + url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index 2dbe2a776..a34f2afd4 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -63,17 +63,6 @@ def _parse_content(self, content, url): 'http_headers': headers, } - def _download_akamai_webpage(self, url, display_id): - try: # yt-dlp's default user-agents are too old and blocked by akamai - return self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient to bypass akamai - return self._download_webpage(url, display_id, impersonate=True) - class TelecincoIE(TelecincoBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' @@ -151,7 +140,7 @@ class TelecincoIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) article = self._search_json( r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', webpage, 'article', display_id)['article'] diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index 9fcab6456..467312ce7 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -15,48 +15,10 @@ def random_user_agent(): - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '90.0.4430.212', - '90.0.4430.24', - '90.0.4430.70', - '90.0.4430.72', - '90.0.4430.85', - '90.0.4430.93', - '91.0.4472.101', - '91.0.4472.106', - '91.0.4472.114', - '91.0.4472.124', - '91.0.4472.164', - '91.0.4472.19', - '91.0.4472.77', - '92.0.4515.107', - '92.0.4515.115', - '92.0.4515.131', - '92.0.4515.159', - '92.0.4515.43', - '93.0.4556.0', - '93.0.4577.15', - '93.0.4577.63', - '93.0.4577.82', - '94.0.4606.41', - '94.0.4606.54', - '94.0.4606.61', - '94.0.4606.71', - '94.0.4606.81', - '94.0.4606.85', - '95.0.4638.17', - '95.0.4638.50', - '95.0.4638.54', - '95.0.4638.69', - '95.0.4638.74', - '96.0.4664.18', - '96.0.4664.45', - '96.0.4664.55', - '96.0.4664.93', - '97.0.4692.20', - ) - return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36' + # Target versions released within the last ~6 months + CHROME_MAJOR_VERSION_RANGE = (132, 138) + return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0') class HTTPHeaderDict(dict): From 59765ecbc08d18005de7143fbb1d1caf90239471 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:46:46 -0500 Subject: [PATCH 21/23] [ie/sproutvideo] Fix extractor (#13813) Authored by: bashonly --- yt_dlp/extractor/sproutvideo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 4afa83871..ff9dc7dee 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -101,8 +101,8 @@ def _real_extract(self, url): webpage = self._download_webpage( url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( - r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, - contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', + r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info', + video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) # SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e] From 7e3f48d64d237281a97b3df1a61980c78a0302fe Mon Sep 17 00:00:00 2001 From: Atsushi2965 <142886283+atsushi2965@users.noreply.github.com> Date: Wed, 23 Jul 2025 06:55:00 +0900 Subject: [PATCH 22/23] [pp/EmbedThumbnail] Fix ffmpeg args for embedding in mp3 (#13720) Authored by: atsushi2965 --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index d8ba220ca..39e8826c6 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -90,7 +90,7 @@ def run(self, info): if info['ext'] == 'mp3': options = [ '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', - '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] + '-metadata:s:v', 'title=Album cover', '-metadata:s:v', 'comment=Cover (front)'] self._report_run('ffmpeg', filename) self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) From afaf60d9fd5a0c7a85aeb1374fd97fbc13cd652c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 23 Jul 2025 18:27:20 -0500 Subject: [PATCH 23/23] [ie/vimeo] Fix login support and require authentication (#13823) Closes #13822 Authored by: bashonly --- README.md | 2 +- yt_dlp/extractor/vimeo.py | 59 +++++++++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index f1d119317..e5bd21b9c 100644 --- a/README.md +++ b/README.md @@ -1902,7 +1902,7 @@ #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) #### vimeo -* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens * `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 7ffe89f22..c45264bb5 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -49,7 +49,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' 'with the URL of the page that embeds this video.') - _DEFAULT_CLIENT = 'android' + _DEFAULT_CLIENT = 'web' _DEFAULT_AUTHED_CLIENT = 'web' _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', @@ -58,7 +58,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): _CLIENT_CONFIGS = { 'android': { 'CACHE_KEY': 'oauth-token-android', - 'CACHE_ONLY': False, + 'CACHE_ONLY': True, 'VIEWER_JWT': False, 'REQUIRES_AUTH': False, 'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==', @@ -88,6 +88,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): ), }, 'web': { + 'CACHE_ONLY': False, 'VIEWER_JWT': True, 'REQUIRES_AUTH': True, 'USER_AGENT': None, @@ -142,7 +143,6 @@ def _perform_login(self, username, password): 'service': 'vimeo', 'token': viewer['xsrft'], } - self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_webpage( self._LOGIN_URL, None, 'Logging in', @@ -151,16 +151,40 @@ def _perform_login(self, username, password): 'Referer': self._LOGIN_URL, }) except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 418: + if isinstance(e.cause, HTTPError) and e.cause.status in (405, 418): raise ExtractorError( 'Unable to log in: bad username or password', expected=True) raise ExtractorError('Unable to log in') + # Clear unauthenticated viewer info + self._viewer_info = None + def _real_initialize(self): - if self._LOGIN_REQUIRED and not self._is_logged_in: + if self._is_logged_in: + return + + if self._LOGIN_REQUIRED: self.raise_login_required() + if self._DEFAULT_CLIENT != 'web': + return + + for client_name, client_config in self._CLIENT_CONFIGS.items(): + if not client_config['CACHE_ONLY']: + continue + + cache_key = client_config['CACHE_KEY'] + if cache_key not in self._oauth_tokens: + if token := self.cache.load(self._NETRC_MACHINE, cache_key): + self._oauth_tokens[cache_key] = token + + if self._oauth_tokens.get(cache_key): + self._DEFAULT_CLIENT = client_name + self.write_debug( + f'Found cached {client_name} token; using {client_name} as default API client') + return + def _get_video_password(self): password = self.get_param('videopassword') if password is None: @@ -200,9 +224,6 @@ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): if vimeo_config: return self._parse_json(vimeo_config, video_id) - def _set_vimeo_cookie(self, name, value): - self._set_cookie('vimeo.com', name, value) - def _parse_config(self, config, video_id): video_data = config['video'] video_title = video_data.get('title') @@ -363,22 +384,26 @@ def _fetch_oauth_token(self, client): return f'Bearer {self._oauth_tokens[cache_key]}' def _get_requested_client(self): - default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT + if client := self._configuration_arg('client', [None], ie_key=VimeoIE)[0]: + if client not in self._CLIENT_CONFIGS: + raise ExtractorError( + f'Unsupported API client "{client}" requested. ' + f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + self.write_debug( + f'Using {client} API client as specified by extractor argument', only_once=True) + return client - client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0] - if client not in self._CLIENT_CONFIGS: - raise ExtractorError( - f'Unsupported API client "{client}" requested. ' - f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + if self._is_logged_in: + return self._DEFAULT_AUTHED_CLIENT - return client + return self._DEFAULT_CLIENT def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): client = force_client or self._get_requested_client() client_config = self._CLIENT_CONFIGS[client] if client_config['REQUIRES_AUTH'] and not self._is_logged_in: - self.raise_login_required(f'The {client} client requires authentication') + self.raise_login_required(f'The {client} client only works when logged-in') return self._download_json( join_nonempty( @@ -1192,7 +1217,6 @@ def _try_album_password(self, url): raise ExtractorError( 'This album is protected by a password, use the --video-password option', expected=True) - self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_json( f'https://vimeo.com/showcase/{album_id}/auth', @@ -1589,7 +1613,6 @@ def _real_extract(self, url): raise ExtractorError( 'This album is protected by a password, use the --video-password option', expected=True) - self._set_vimeo_cookie('vuid', viewer['vuid']) try: hashed_pass = self._download_json( f'https://vimeo.com/showcase/{album_id}/auth',