From 3e918d825d7ff367812658957b281b8cda8f9ebb Mon Sep 17 00:00:00 2001 From: Roland Crosby Date: Tue, 22 Jul 2025 13:50:42 -0400 Subject: [PATCH 01/27] [pp/XAttrMetadata] Add macOS "Where from" attribute (#12664) Authored by: rolandcrosby --- yt_dlp/postprocessor/xattrpp.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index e486b797b..fd83d783b 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -33,8 +33,17 @@ class XAttrMetadataPP(PostProcessor): # (e.g., 4kB on ext4), and we don't want to have the other ones fail 'user.dublincore.description': 'description', # 'user.xdg.comment': 'description', + 'com.apple.metadata:kMDItemWhereFroms': 'webpage_url', } + APPLE_PLIST_TEMPLATE = ''' + + + +\t%s + +''' + def run(self, info): mtime = os.stat(info['filepath']).st_mtime self.to_screen('Writing metadata to file\'s xattrs') @@ -44,6 +53,8 @@ def run(self, info): if value: if infoname == 'upload_date': value = hyphenate_date(value) + elif xattrname == 'com.apple.metadata:kMDItemWhereFroms': + value = self.APPLE_PLIST_TEMPLATE % value write_xattr(info['filepath'], xattrname, value.encode()) except XAttrUnavailableError as e: From eed94c7306d4ecdba53ad8783b1463a9af5c97f1 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 22 Jul 2025 20:10:51 +0200 Subject: [PATCH 02/27] [utils] Add `WINDOWS_VT_MODE` to globals (#12460) Authored by: Grub4K --- test/test_compat.py | 3 --- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/globals.py | 2 ++ yt_dlp/utils/_utils.py | 10 +++------- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index b1cc2a818..3aa9c0c51 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -21,9 +21,6 @@ def test_compat_passthrough(self): with self.assertWarns(DeprecationWarning): _ = compat.compat_basestring - with self.assertWarns(DeprecationWarning): - _ = compat.WINDOWS_VT_MODE - self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 76fd18c33..a9f347bf4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -36,6 +36,7 @@ from .globals import ( IN_CLI, LAZY_EXTRACTORS, + WINDOWS_VT_MODE, plugin_ies, plugin_ies_overrides, plugin_pps, @@ -4040,8 +4041,7 @@ def get_encoding(stream): if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') if not supports_terminal_sequences(stream): - from .utils import WINDOWS_VT_MODE # Must be imported locally - additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI') if additional_info: ret = f'{ret} ({",".join(additional_info)})' return ret diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index dae2c1459..2f3e35d4a 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -37,7 +37,7 @@ from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..networking.exceptions import HTTPError as compat_HTTPError -passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) +passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',)) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE diff --git a/yt_dlp/globals.py b/yt_dlp/globals.py index 0cf276cc9..81ad00448 100644 --- a/yt_dlp/globals.py +++ b/yt_dlp/globals.py @@ -1,3 +1,4 @@ +import os from collections import defaultdict # Please Note: Due to necessary changes and the complex nature involved in the plugin/globals system, @@ -28,3 +29,4 @@ def __repr__(self, /): # Misc IN_CLI = Indirect(False) LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled +WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 7d79f417f..1cb62712b 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -52,7 +52,7 @@ compat_HTMLParseError, ) from ..dependencies import xattr -from ..globals import IN_CLI +from ..globals import IN_CLI, WINDOWS_VT_MODE __name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module @@ -4759,13 +4759,10 @@ def jwt_decode_hs256(jwt): return json.loads(base64.urlsafe_b64decode(f'{payload_b64}===')) -WINDOWS_VT_MODE = False if os.name == 'nt' else None - - @functools.cache def supports_terminal_sequences(stream): if os.name == 'nt': - if not WINDOWS_VT_MODE: + if not WINDOWS_VT_MODE.value: return False elif not os.getenv('TERM'): return False @@ -4802,8 +4799,7 @@ def windows_enable_vt_mode(): finally: os.close(handle) - global WINDOWS_VT_MODE - WINDOWS_VT_MODE = True + WINDOWS_VT_MODE.value = True supports_terminal_sequences.cache_clear() From c59ad2b066bbccd3cc4eed580842f961bce7dd4a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:34:03 -0500 Subject: [PATCH 03/27] [utils] `random_user_agent`: Bump versions (#13543) Closes #5362 Authored by: bashonly --- yt_dlp/extractor/adobepass.py | 8 ++---- yt_dlp/extractor/bilibili.py | 7 ----- yt_dlp/extractor/francaisfacile.py | 13 +-------- yt_dlp/extractor/mitele.py | 2 +- yt_dlp/extractor/sproutvideo.py | 2 +- yt_dlp/extractor/telecinco.py | 13 +-------- yt_dlp/utils/networking.py | 46 +++--------------------------- 7 files changed, 10 insertions(+), 81 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 8c2d9d934..eb45734ec 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -48,7 +48,6 @@ 'username_field': 'user', 'password_field': 'passwd', 'login_hostname': 'login.xfinity.com', - 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -1379,11 +1378,8 @@ def _download_webpage_handle(self, *args, **kwargs): @staticmethod def _get_mso_headers(mso_info): - # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - return { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', - } if mso_info.get('needs_newer_ua') else {} + # Not needed currently + return {} @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2846702f6..d00ac6317 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,13 +175,6 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): else: note = f'Downloading video formats for cid {cid}' - # TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735 - # playurl requests carrying old UA will be rejected - headers = { - 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36', - **(headers or {}), - } - return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] diff --git a/yt_dlp/extractor/francaisfacile.py b/yt_dlp/extractor/francaisfacile.py index d3208c282..c432cf486 100644 --- a/yt_dlp/extractor/francaisfacile.py +++ b/yt_dlp/extractor/francaisfacile.py @@ -1,9 +1,7 @@ import urllib.parse from .common import InfoExtractor -from ..networking.exceptions import HTTPError from ..utils import ( - ExtractorError, float_or_none, url_or_none, ) @@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor): def _real_extract(self, url): display_id = urllib.parse.unquote(self._match_id(url)) - - try: # yt-dlp's default user-agents are too old and blocked by the site - webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient - webpage = self._download_webpage(url, display_id, impersonate=True) + webpage = self._download_webpage(url, display_id) data = self._search_json( r']+\bdata-media-id=[^>]+\btype="application/json"[^>]*>', diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 0dded38c6..76fef337a 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -79,7 +79,7 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) pre_player = self._search_json( r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', webpage, 'Pre Player', display_id)['prePlayer'] diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 494042738..4afa83871 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -99,7 +99,7 @@ def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) webpage = self._download_webpage( - url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True) + url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index 2dbe2a776..a34f2afd4 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -63,17 +63,6 @@ def _parse_content(self, content, url): 'http_headers': headers, } - def _download_akamai_webpage(self, url, display_id): - try: # yt-dlp's default user-agents are too old and blocked by akamai - return self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient to bypass akamai - return self._download_webpage(url, display_id, impersonate=True) - class TelecincoIE(TelecincoBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' @@ -151,7 +140,7 @@ class TelecincoIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) article = self._search_json( r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', webpage, 'article', display_id)['article'] diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index 9fcab6456..467312ce7 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -15,48 +15,10 @@ def random_user_agent(): - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '90.0.4430.212', - '90.0.4430.24', - '90.0.4430.70', - '90.0.4430.72', - '90.0.4430.85', - '90.0.4430.93', - '91.0.4472.101', - '91.0.4472.106', - '91.0.4472.114', - '91.0.4472.124', - '91.0.4472.164', - '91.0.4472.19', - '91.0.4472.77', - '92.0.4515.107', - '92.0.4515.115', - '92.0.4515.131', - '92.0.4515.159', - '92.0.4515.43', - '93.0.4556.0', - '93.0.4577.15', - '93.0.4577.63', - '93.0.4577.82', - '94.0.4606.41', - '94.0.4606.54', - '94.0.4606.61', - '94.0.4606.71', - '94.0.4606.81', - '94.0.4606.85', - '95.0.4638.17', - '95.0.4638.50', - '95.0.4638.54', - '95.0.4638.69', - '95.0.4638.74', - '96.0.4664.18', - '96.0.4664.45', - '96.0.4664.55', - '96.0.4664.93', - '97.0.4692.20', - ) - return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36' + # Target versions released within the last ~6 months + CHROME_MAJOR_VERSION_RANGE = (132, 138) + return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0') class HTTPHeaderDict(dict): From 59765ecbc08d18005de7143fbb1d1caf90239471 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:46:46 -0500 Subject: [PATCH 04/27] [ie/sproutvideo] Fix extractor (#13813) Authored by: bashonly --- yt_dlp/extractor/sproutvideo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 4afa83871..ff9dc7dee 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -101,8 +101,8 @@ def _real_extract(self, url): webpage = self._download_webpage( url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( - r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, - contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', + r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info', + video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) # SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e] From 7e3f48d64d237281a97b3df1a61980c78a0302fe Mon Sep 17 00:00:00 2001 From: Atsushi2965 <142886283+atsushi2965@users.noreply.github.com> Date: Wed, 23 Jul 2025 06:55:00 +0900 Subject: [PATCH 05/27] [pp/EmbedThumbnail] Fix ffmpeg args for embedding in mp3 (#13720) Authored by: atsushi2965 --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index d8ba220ca..39e8826c6 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -90,7 +90,7 @@ def run(self, info): if info['ext'] == 'mp3': options = [ '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', - '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] + '-metadata:s:v', 'title=Album cover', '-metadata:s:v', 'comment=Cover (front)'] self._report_run('ffmpeg', filename) self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) From afaf60d9fd5a0c7a85aeb1374fd97fbc13cd652c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 23 Jul 2025 18:27:20 -0500 Subject: [PATCH 06/27] [ie/vimeo] Fix login support and require authentication (#13823) Closes #13822 Authored by: bashonly --- README.md | 2 +- yt_dlp/extractor/vimeo.py | 59 +++++++++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index f1d119317..e5bd21b9c 100644 --- a/README.md +++ b/README.md @@ -1902,7 +1902,7 @@ #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) #### vimeo -* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens * `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 7ffe89f22..c45264bb5 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -49,7 +49,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' 'with the URL of the page that embeds this video.') - _DEFAULT_CLIENT = 'android' + _DEFAULT_CLIENT = 'web' _DEFAULT_AUTHED_CLIENT = 'web' _CLIENT_HEADERS = { 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', @@ -58,7 +58,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): _CLIENT_CONFIGS = { 'android': { 'CACHE_KEY': 'oauth-token-android', - 'CACHE_ONLY': False, + 'CACHE_ONLY': True, 'VIEWER_JWT': False, 'REQUIRES_AUTH': False, 'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==', @@ -88,6 +88,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): ), }, 'web': { + 'CACHE_ONLY': False, 'VIEWER_JWT': True, 'REQUIRES_AUTH': True, 'USER_AGENT': None, @@ -142,7 +143,6 @@ def _perform_login(self, username, password): 'service': 'vimeo', 'token': viewer['xsrft'], } - self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_webpage( self._LOGIN_URL, None, 'Logging in', @@ -151,16 +151,40 @@ def _perform_login(self, username, password): 'Referer': self._LOGIN_URL, }) except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 418: + if isinstance(e.cause, HTTPError) and e.cause.status in (405, 418): raise ExtractorError( 'Unable to log in: bad username or password', expected=True) raise ExtractorError('Unable to log in') + # Clear unauthenticated viewer info + self._viewer_info = None + def _real_initialize(self): - if self._LOGIN_REQUIRED and not self._is_logged_in: + if self._is_logged_in: + return + + if self._LOGIN_REQUIRED: self.raise_login_required() + if self._DEFAULT_CLIENT != 'web': + return + + for client_name, client_config in self._CLIENT_CONFIGS.items(): + if not client_config['CACHE_ONLY']: + continue + + cache_key = client_config['CACHE_KEY'] + if cache_key not in self._oauth_tokens: + if token := self.cache.load(self._NETRC_MACHINE, cache_key): + self._oauth_tokens[cache_key] = token + + if self._oauth_tokens.get(cache_key): + self._DEFAULT_CLIENT = client_name + self.write_debug( + f'Found cached {client_name} token; using {client_name} as default API client') + return + def _get_video_password(self): password = self.get_param('videopassword') if password is None: @@ -200,9 +224,6 @@ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): if vimeo_config: return self._parse_json(vimeo_config, video_id) - def _set_vimeo_cookie(self, name, value): - self._set_cookie('vimeo.com', name, value) - def _parse_config(self, config, video_id): video_data = config['video'] video_title = video_data.get('title') @@ -363,22 +384,26 @@ def _fetch_oauth_token(self, client): return f'Bearer {self._oauth_tokens[cache_key]}' def _get_requested_client(self): - default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT + if client := self._configuration_arg('client', [None], ie_key=VimeoIE)[0]: + if client not in self._CLIENT_CONFIGS: + raise ExtractorError( + f'Unsupported API client "{client}" requested. ' + f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + self.write_debug( + f'Using {client} API client as specified by extractor argument', only_once=True) + return client - client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0] - if client not in self._CLIENT_CONFIGS: - raise ExtractorError( - f'Unsupported API client "{client}" requested. ' - f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True) + if self._is_logged_in: + return self._DEFAULT_AUTHED_CLIENT - return client + return self._DEFAULT_CLIENT def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): client = force_client or self._get_requested_client() client_config = self._CLIENT_CONFIGS[client] if client_config['REQUIRES_AUTH'] and not self._is_logged_in: - self.raise_login_required(f'The {client} client requires authentication') + self.raise_login_required(f'The {client} client only works when logged-in') return self._download_json( join_nonempty( @@ -1192,7 +1217,6 @@ def _try_album_password(self, url): raise ExtractorError( 'This album is protected by a password, use the --video-password option', expected=True) - self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_json( f'https://vimeo.com/showcase/{album_id}/auth', @@ -1589,7 +1613,6 @@ def _real_extract(self, url): raise ExtractorError( 'This album is protected by a password, use the --video-password option', expected=True) - self._set_vimeo_cookie('vuid', viewer['vuid']) try: hashed_pass = self._download_json( f'https://vimeo.com/showcase/{album_id}/auth', From 0adeb1e54b2d7e95cd19999e71013877850f8f41 Mon Sep 17 00:00:00 2001 From: ischmidt20 Date: Thu, 24 Jul 2025 18:35:48 -0400 Subject: [PATCH 07/27] [ie/tbs] Fix truTV support (#9683) Closes #3400 Authored by: ischmidt20, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/tbs.py | 113 ++++++++++++++++++++++++++------ yt_dlp/extractor/trutv.py | 71 -------------------- yt_dlp/extractor/turner.py | 5 ++ 4 files changed, 97 insertions(+), 93 deletions(-) delete mode 100644 yt_dlp/extractor/trutv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 59a61e060..1aa2927f8 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2166,7 +2166,6 @@ from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE -from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import ( TubeTuGrazIE, diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index 80534731e..f8891671f 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -5,45 +5,110 @@ from ..utils import ( float_or_none, int_or_none, + make_archive_id, strip_or_none, ) +from ..utils.traversal import traverse_obj class TBSIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?Ptbs|tntdrama)\.com(?P/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P[^/?#]+))' + _SITE_INFO = { + 'tbs': ('TBS', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg'), + 'tntdrama': ('TNT', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA'), + 'trutv': ('truTV', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'), + } + _VALID_URL = fr'''(?x) + https?://(?:www\.)?(?P{"|".join(map(re.escape, _SITE_INFO))})\.com + (?P/(?: + (?Pwatch(?:tnt|tbs|trutv))| + movies|shows/[^/?#]+/(?:clips|season-\d+/episode-\d+) + )/(?P[^/?#]+)) + ''' _TESTS = [{ - 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', + 'url': 'https://www.tbs.com/shows/american-dad/season-6/episode-12/you-debt-your-life', 'info_dict': { - 'id': '8d384cde33b89f3a43ce5329de42903ed5099887', + 'id': '984bdcd8db0cc00dc699927f2a411c8c6e0e48f3', 'ext': 'mp4', - 'title': 'Monster', - 'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.', - 'timestamp': 1508175329, - 'upload_date': '20171016', + 'title': 'You Debt Your Life', + 'description': 'md5:f211cfeb9187fd3cdb53eb0e8930d499', + 'duration': 1231.0, + 'thumbnail': r're:https://images\.tbs\.com/tbs/.+\.(?:jpe?g|png)', + 'chapters': 'count:4', + 'season': 'Season 6', + 'season_number': 6, + 'episode': 'Episode 12', + 'episode_number': 12, + 'timestamp': 1478276239, + 'upload_date': '20161104', }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.tntdrama.com/shows/the-librarians-the-next-chapter/season-1/episode-10/and-going-medieval', + 'info_dict': { + 'id': 'e487b31b663a8001864f62fd20907782f7b8ccb8', + 'ext': 'mp4', + 'title': 'And Going Medieval', + 'description': 'md5:5aed0ae23a6cf148a02fe3c1be8359fa', + 'duration': 2528.0, + 'thumbnail': r're:https://images\.tntdrama\.com/tnt/.+\.(?:jpe?g|png)', + 'chapters': 'count:7', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 10', + 'episode_number': 10, + 'timestamp': 1743107520, + 'upload_date': '20250327', }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/season-1/episode-1/got-the-bug-out', + 'info_dict': { + 'id': 'b457dd7458fd9e64b596355950b13a1ca799dc39', + 'ext': 'mp4', + 'title': 'Got the Bug Out', + 'description': 'md5:9eeddf6248f73517b0e5969b8a43c025', + 'duration': 1283.0, + 'thumbnail': r're:https://images\.trutv\.com/tru/.+\.(?:jpe?g|png)', + 'chapters': 'count:4', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1570040829, + 'upload_date': '20191002', + '_old_archive_ids': ['trutv b457dd7458fd9e64b596355950b13a1ca799dc39'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', + 'only_matching': True, }, { 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', 'only_matching': True, }, { 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope', 'only_matching': True, + }, { + 'url': 'https://www.trutv.com/shows/impractical-jokers/season-9/episode-1/you-dirty-dog', + 'only_matching': True, + }, { + 'url': 'https://www.trutv.com/watchtrutv/east', + 'only_matching': True, + }, { + 'url': 'https://www.tbs.com/watchtbs/east', + 'only_matching': True, + }, { + 'url': 'https://www.tntdrama.com/watchtnt/east', + 'only_matching': True, }] - _SOFTWARE_STATEMENT_MAP = { - 'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg', - 'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA', - } def _real_extract(self, url): - site, path, display_id = self._match_valid_url(url).groups() + site, path, display_id, watch = self._match_valid_url(url).group('site', 'path', 'id', 'watch') + is_live = bool(watch) webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r']+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})', - webpage, 'drupal setting'), display_id) - is_live = 'watchtnt' in path or 'watchtbs' in path + drupal_settings = self._search_json( + r']+\bdata-drupal-selector="drupal-settings-json"[^>]*>', + webpage, 'drupal settings', display_id) video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path) media_id = video_data['mediaID'] @@ -51,10 +116,14 @@ def _real_extract(self, url): tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse( drupal_settings['ngtv_token_url']).query) + auth_info = traverse_obj(drupal_settings, ('top2', {dict})) or {} + site_name = auth_info.get('siteName') or self._SITE_INFO[site][0] + software_statement = auth_info.get('softwareStatement') or self._SITE_INFO[site][1] + info = self._extract_ngtv_info( - media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], { + media_id, tokenizer_query, software_statement, { 'url': url, - 'site_name': site[:3].upper(), + 'site_name': site_name, 'auth_required': video_data.get('authRequired') == '1' or is_live, 'is_live': is_live, }) @@ -87,4 +156,6 @@ def _real_extract(self, url): 'thumbnails': thumbnails, 'is_live': is_live, }) + if site == 'trutv': + info['_old_archive_ids'] = [make_archive_id(site, media_id)] return info diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py deleted file mode 100644 index c1d0cb0d1..000000000 --- a/yt_dlp/extractor/trutv.py +++ /dev/null @@ -1,71 +0,0 @@ -from .turner import TurnerBaseIE -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class TruTVIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P[0-9A-Za-z-]+)/(?:videos/(?P[0-9A-Za-z-]+)|(?P\d+))' - _TEST = { - 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html', - 'info_dict': { - 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1', - 'ext': 'mp4', - 'title': 'Sunlight-Activated Flower', - 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.", - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q' - - def _real_extract(self, url): - series_slug, clip_slug, video_id = self._match_valid_url(url).groups() - - if video_id: - path = 'episode' - display_id = video_id - else: - path = 'series/clip' - display_id = clip_slug - - data = self._download_json( - f'https://api.trutv.com/v2/web/{path}/{series_slug}/{display_id}', - display_id) - video_data = data['episode'] if video_id else data['info'] - media_id = video_data['mediaId'] - title = video_data['title'].strip() - - info = self._extract_ngtv_info( - media_id, {}, self._SOFTWARE_STATEMENT, { - 'url': url, - 'site_name': 'truTV', - 'auth_required': video_data.get('isAuthRequired'), - }) - - thumbnails = [] - for image in video_data.get('images', []): - image_url = image.get('srcUrl') - if not image_url: - continue - thumbnails.append({ - 'url': image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) - - info.update({ - 'id': media_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnails': thumbnails, - 'timestamp': parse_iso8601(video_data.get('publicationDate')), - 'series': video_data.get('showTitle'), - 'season_number': int_or_none(video_data.get('seasonNum')), - 'episode_number': int_or_none(video_data.get('episodeNum')), - }) - return info diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 4493705e9..a1a7fd690 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -251,6 +251,11 @@ def _extract_ngtv_info(self, media_id, tokenizer_query, software_statement, ap_d 'end_time': start_time + chapter_duration, }) + if is_live: + for f in formats: + # Prevent ffmpeg from adding its own http headers or else we get HTTP Error 403 + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-icy', '0']} + return { 'formats': formats, 'chapters': chapters, From 485de69dbfeb7de7bcf9f7fe16d6c6ba9e81e1a0 Mon Sep 17 00:00:00 2001 From: Barry van Oudtshoorn Date: Fri, 25 Jul 2025 12:00:31 +0800 Subject: [PATCH 08/27] [ie/Parlview] Rework extractor (#13788) Closes #13787 Authored by: barryvan --- yt_dlp/extractor/parlview.py | 80 ++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index b93b5edac..9c7efc58f 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -1,63 +1,63 @@ +import re + from .common import InfoExtractor -from ..utils import ( - int_or_none, - try_get, - unified_timestamp, -) +from ..utils import parse_duration, parse_iso8601, url_or_none +from ..utils.traversal import traverse_obj class ParlviewIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P\d{6})' + _VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P[^/?#]+)' _TESTS = [{ - 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661', + 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614', 'info_dict': { - 'id': '542661', + 'id': '3406614', 'ext': 'mp4', - 'title': "Australia's Family Law System [Part 2]", - 'duration': 5799, - 'description': 'md5:7099883b391619dbae435891ca871a62', - 'timestamp': 1621430700, - 'upload_date': '20210519', - 'uploader': 'Joint Committee', + 'title': 'Senate Chamber', + 'description': 'Official Recording of Senate Proceedings from the Australian Parliament', + 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg', + 'upload_date': '20250325', + 'duration': 17999, + 'timestamp': 1742939400, }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', - 'only_matching': True, + 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv', + 'info_dict': { + 'id': 'SV1394.dv', + 'ext': 'mp4', + 'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]', + 'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament', + 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg', + 'upload_date': '19960822', + 'duration': 14765, + 'timestamp': 840754200, + }, + 'params': { + 'skip_download': True, + }, }] - _API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json' - _MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - media = self._download_json(self._API_URL % video_id, video_id).get('media') - timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/' + video_details = self._download_json( + f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails'] - stream = try_get(media, lambda x: x['renditions'][0], dict) - if not stream: - self.raise_no_formats('No streams were detected') - elif stream.get('streamType') != 'VOD': - self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType')))) - formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native') + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + video_details['files']['file']['url'], video_id, 'mp4') - media_info = self._download_webpage( - self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False) + DURATION_RE = re.compile(r'(?P\d+:\d+:\d+):\d+') return { 'id': video_id, - 'url': url, - 'title': self._html_search_regex(r'

([^<]+)<', webpage, 'title', fatal=False), 'formats': formats, - 'duration': int_or_none(media.get('duration')), - 'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')), - 'description': self._html_search_regex( - r']+class="descripti?on"[^>]*>[^>]+[^>]+>[^>]+>([^<]+)', - webpage, 'description', fatal=False), - 'uploader': self._html_search_regex( - r'[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False), - 'thumbnail': media.get('staticImage'), + 'subtitles': subtitles, + **traverse_obj(video_details, { + 'title': (('parlViewTitle', 'title'), {str}, any), + 'description': ('parlViewDescription', {str}), + 'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}), + 'timestamp': ('recordingFrom', {parse_iso8601}), + 'thumbnail': ('thumbUrl', {url_or_none}), + }), } From 4385480795acda35667be008d0bf26b46e9d65b4 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sat, 26 Jul 2025 03:41:21 +0900 Subject: [PATCH 09/27] [utils] `parse_resolution`: Support width-only pattern (#13802) Authored by: doe1080 --- test/test_utils.py | 1 + yt_dlp/utils/_utils.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index aedb565ec..44747efda 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1373,6 +1373,7 @@ def test_parse_resolution(self): self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 1cb62712b..a5471da4d 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1875,6 +1875,11 @@ def parse_resolution(s, *, lenient=False): if mobj: return {'height': int(mobj.group(1)) * 540} + if lenient: + mobj = re.search(r'(? Date: Fri, 25 Jul 2025 20:55:41 +0200 Subject: [PATCH 10/27] [ie/PlyrEmbed] Add extractor (#13836) Closes #13827 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/plyr.py | 104 ++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 yt_dlp/extractor/plyr.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1aa2927f8..a2042557d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1568,6 +1568,7 @@ ) from .plutotv import PlutoTVIE from .plvideo import PlVideoIE +from .plyr import PlyrEmbedIE from .podbayfm import ( PodbayFMChannelIE, PodbayFMIE, diff --git a/yt_dlp/extractor/plyr.py b/yt_dlp/extractor/plyr.py new file mode 100644 index 000000000..c5f27cfd9 --- /dev/null +++ b/yt_dlp/extractor/plyr.py @@ -0,0 +1,104 @@ +import re + +from .common import InfoExtractor +from .vimeo import VimeoIE + + +class PlyrEmbedIE(InfoExtractor): + _VALID_URL = False + _WEBPAGE_TESTS = [{ + # data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1" + 'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/', + 'info_dict': { + 'id': '522319456', + 'ext': 'mp4', + 'title': '200.000.000 Mouths (1950–51)', + 'uploader': 'Zeughauskino', + 'uploader_url': '', + 'comment_count': int, + 'like_count': int, + 'duration': 963, + 'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d', + 'timestamp': 1615467405, + 'upload_date': '20210311', + 'release_timestamp': 1615467405, + 'release_date': '20210311', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + # data-plyr-provider="vimeo" data-plyr-embed-id="803435276" + 'url': 'https://www.inarcassa.it/', + 'info_dict': { + 'id': '803435276', + 'ext': 'mp4', + 'title': 'HOME_Moto_Perpetuo', + 'uploader': 'Inarcassa', + 'uploader_url': '', + 'duration': 38, + 'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + # data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI" + 'url': 'https://www.profile.nl', + 'info_dict': { + 'id': 'GF-BjYKoAqI', + 'ext': 'mp4', + 'title': 'PROFILE: Recruitment Profile', + 'description': '', + 'media_type': 'video', + 'uploader': 'Profile Nederland', + 'uploader_id': '@profilenederland', + 'uploader_url': 'https://www.youtube.com/@profilenederland', + 'channel': 'Profile Nederland', + 'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg', + 'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg', + 'channel_follower_count': int, + 'view_count': int, + 'like_count': int, + 'age_limit': 0, + 'duration': 39, + 'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg', + 'categories': ['Autos & Vehicles'], + 'tags': [], + 'timestamp': 1675692990, + 'upload_date': '20230206', + 'playable_in_embed': True, + 'availability': 'public', + 'live_status': 'not_live', + }, + }, { + # data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube" + 'url': 'https://www.vnis.edu.vn', + 'info_dict': { + 'id': 'vnis.edu', + 'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ', + 'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e', + 'age_limit': 0, + 'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png', + 'timestamp': 1753233356, + 'upload_date': '20250723', + }, + 'playlist_count': 3, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + plyr_embeds = re.finditer(r'''(?x) + ]+(?: + data-plyr-embed-id="(?P[^"]+)"[^>]+data-plyr-provider="(?P[^"]+)"| + data-plyr-provider="(?P[^"]+)"[^>]+data-plyr-embed-id="(?P[^"]+)" + )[^>]*>''', webpage) + for mobj in plyr_embeds: + embed_id = mobj.group('id1') or mobj.group('id2') + provider = mobj.group('provider1') or mobj.group('provider2') + if provider == 'vimeo': + if not re.match(r'https?://', embed_id): + embed_id = f'https://player.vimeo.com/video/{embed_id}' + yield VimeoIE._smuggle_referrer(embed_id, url) + elif provider == 'youtube': + if not re.match(r'https?://', embed_id): + embed_id = f'https://youtube.com/watch?v={embed_id}' + yield embed_id From d399505fdf8292332bdc91d33859a0b0d08104fd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 25 Jul 2025 14:44:39 -0500 Subject: [PATCH 11/27] [fd/external] Work around ffmpeg's `file:` URL handling (#13844) Closes #13781 Authored by: bashonly --- yt_dlp/downloader/external.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ee73ac043..65ed83991 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -572,7 +572,21 @@ def _call_downloader(self, tmpfilename, info_dict): if end_time: args += ['-t', str(end_time - start_time)] - args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] + url = fmt['url'] + if self.params.get('enable_file_urls') and url.startswith('file:'): + # The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs, + # so only local segments can be read unless we also include 'http,https,tcp,tls' + args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls'] + # ffmpeg incorrectly handles 'file:' URLs by only removing the + # 'file:' prefix and treating the rest as if it's a normal filepath. + # FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs: + # - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:' + # - On *nix, replace 'file://localhost/' with 'file:/' + # Ref: https://github.com/yt-dlp/yt-dlp/issues/13781 + # https://trac.ffmpeg.org/ticket/2702 + url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url) + + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] From 3e609b2cedd285739bf82c7af7853735092070a4 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:33:49 +0000 Subject: [PATCH 12/27] [ie/FaulioLive] Add extractor (#13421) Authored by: CasperMcFadden95, seproDev Co-authored-by: sepro --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/faulio.py | 92 +++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 yt_dlp/extractor/faulio.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a2042557d..69389671e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -640,6 +640,7 @@ FancodeVodIE, ) from .fathom import FathomIE +from .faulio import FaulioLiveIE from .faz import FazIE from .fc2 import ( FC2IE, diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py new file mode 100644 index 000000000..393023503 --- /dev/null +++ b/yt_dlp/extractor/faulio.py @@ -0,0 +1,92 @@ +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import js_to_json, url_or_none +from ..utils.traversal import traverse_obj + + +class FaulioLiveIE(InfoExtractor): + _DOMAINS = ( + 'aloula.sba.sa', + 'maraya.sba.net.ae', + 'sat7plus.org', + ) + _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P[a-zA-Z0-9-]+)' + _TESTS = [{ + 'url': 'https://aloula.sba.sa/live/saudiatv', + 'info_dict': { + 'id': 'aloula.faulio.com_saudiatv', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://maraya.sba.net.ae/live/1', + 'info_dict': { + 'id': 'maraya.faulio.com_1', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://sat7plus.org/live/pars', + 'info_dict': { + 'id': 'sat7.faulio.com_pars', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://sat7plus.org/fa/live/arabic', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + config_data = self._search_json( + r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json) + api_base = config_data['public']['TRANSLATIONS_API_URL'] + + channel = traverse_obj( + self._download_json(f'{api_base}/channels', video_id), + (lambda k, v: v['url'] == video_id, any)) + + formats = [] + subtitles = {} + if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})): + fmts, subs = self._extract_mpd_formats_and_subtitles( + mpd_url, video_id, mpd_id='dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', + **traverse_obj(channel, { + 'title': ('title', {str}), + 'description': ('description', {str}), + }), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + } From 30302df22b7b431ce920e0f7298cd10be9989967 Mon Sep 17 00:00:00 2001 From: InvalidUsernameException Date: Sat, 26 Jul 2025 00:22:32 +0200 Subject: [PATCH 13/27] [ie/sportdeuschland] Support embedded player URLs (#13833) Closes #13766 Authored by: InvalidUsernameException --- yt_dlp/extractor/sportdeutschland.py | 154 +++++++++++++-------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 8349d9604..0b7d90a07 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -8,84 +8,9 @@ class SportDeutschlandIE(InfoExtractor): - _VALID_URL = r'https?://sportdeutschland\.tv/(?P(?:[^/]+/)?[^?#/&]+)' + _VALID_URL = r'https?://(?:player\.)?sportdeutschland\.tv/(?P(?:[^/?#]+/)?[^?#/&]+)' _TESTS = [{ - 'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga', - 'info_dict': { - 'id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54', - 'ext': 'mp4', - 'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga', - 'display_id': 'blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga', - 'description': 'md5:a288c794a5ee69e200d8f12982f81a87', - 'live_status': 'was_live', - 'channel': 'Blau-Weiss Buchholz Tanzsport', - 'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport', - 'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3', - 'duration': 32447, - 'upload_date': '20230114', - 'timestamp': 1673733618, - }, - 'skip': 'not found', - }, { - 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', - 'info_dict': { - 'id': '95c80c52-6b9a-4ae9-9197-984145adfced', - 'ext': 'mp4', - 'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022', - 'display_id': 'deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', - 'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e', - 'live_status': 'was_live', - 'channel': 'Deutscher Badminton Verband', - 'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband', - 'channel_id': '93ca5866-2551-49fc-8424-6db35af58920', - 'duration': 41097, - 'upload_date': '20220309', - 'timestamp': 1646860727.0, - }, - 'skip': 'not found', - }, { - 'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023', - 'info_dict': { - 'id': '9889785e-55b0-4d97-a72a-ce9a9f157cce', - 'title': 'Formationswochenende Latein 2023 - Samstag', - 'display_id': 'ggcbremen/formationswochenende-latein-2023', - 'description': 'md5:6e4060d40ff6a8f8eeb471b51a8f08b2', - 'live_status': 'was_live', - 'channel': 'Grün-Gold-Club Bremen e.V.', - 'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb', - 'channel_url': 'https://sportdeutschland.tv/ggcbremen', - }, - 'playlist_count': 3, - 'playlist': [{ - 'info_dict': { - 'id': '988e1fea-9d44-4fab-8c72-3085fb667547', - 'ext': 'mp4', - 'channel_url': 'https://sportdeutschland.tv/ggcbremen', - 'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb', - 'channel': 'Grün-Gold-Club Bremen e.V.', - 'duration': 86, - 'title': 'Formationswochenende Latein 2023 - Samstag Part 1', - 'upload_date': '20230225', - 'timestamp': 1677349909, - 'live_status': 'was_live', - }, - }], - 'skip': 'not found', - }, { - 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1', - 'info_dict': { - 'id': '95d71b8a-370a-4b87-ad16-94680da18528', - 'ext': 'mp4', - 'title': r're:Gymnastik International - Tag 1 .+', - 'display_id': 'dtb/gymnastik-international-tag-1', - 'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab', - 'channel': 'Deutscher Turner-Bund', - 'channel_url': 'https://sportdeutschland.tv/dtb', - 'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52', - 'live_status': 'is_live', - }, - 'skip': 'live', - }, { + # Single-part video, direct link 'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', 'md5': '35c11a19395c938cdd076b93bda54cde', 'info_dict': { @@ -100,7 +25,82 @@ class SportDeutschlandIE(InfoExtractor): 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', 'timestamp': 1749913117, 'upload_date': '20250614', + 'duration': 12287.0, }, + }, { + # Single-part video, embedded player link + 'url': 'https://player.sportdeutschland.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc', + 'info_dict': { + 'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a', + 'ext': 'mp4', + 'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates', + 'display_id': '9e9619c4-7d77-43c4-926d-49fb57dc06dc', + 'channel': 'Rostock Griffins', + 'channel_url': 'https://sportdeutschland.tv/rostock-griffins', + 'live_status': 'was_live', + 'description': 'md5:60cb00067e55dafa27b0933a43d72862', + 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', + 'timestamp': 1749913117, + 'upload_date': '20250614', + 'duration': 12287.0, + }, + 'params': {'skip_download': True}, + }, { + # Multi-part video + 'url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2', + 'info_dict': { + 'id': '9f63d737-2444-4e3a-a1ea-840df73fd481', + 'display_id': 'rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2', + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2', + 'description': 'md5:0a17da15e48a687e6019639c3452572b', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', + 'live_status': 'was_live', + }, + 'playlist_count': 2, + 'playlist': [{ + 'info_dict': { + 'id': '9f725a94-d43e-40ff-859d-13da3081bb04', + 'ext': 'mp4', + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 1', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', + 'duration': 14773.0, + 'timestamp': 1753085197, + 'upload_date': '20250721', + 'live_status': 'was_live', + }, + }, { + 'info_dict': { + 'id': '9f725a94-370e-4477-89ac-1751098e3217', + 'ext': 'mp4', + 'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 2', + 'channel': 'Rhine-Ruhr 2025 FISU World University Games', + 'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334', + 'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games', + 'duration': 14773.0, + 'timestamp': 1753128421, + 'upload_date': '20250721', + 'live_status': 'was_live', + }, + }], + }, { + # Livestream + 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1', + 'info_dict': { + 'id': '95d71b8a-370a-4b87-ad16-94680da18528', + 'ext': 'mp4', + 'title': r're:Gymnastik International - Tag 1 .+', + 'display_id': 'dtb/gymnastik-international-tag-1', + 'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab', + 'channel': 'Deutscher Turner-Bund', + 'channel_url': 'https://sportdeutschland.tv/dtb', + 'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52', + 'live_status': 'is_live', + }, + 'skip': 'live', }] def _process_video(self, asset_id, video): From 1fe83b0111277a6f214c5ec1819cfbf943508baf Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sun, 27 Jul 2025 00:34:22 +0900 Subject: [PATCH 14/27] [ie/eagleplatform] Remove extractors (#13469) Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 4 - yt_dlp/extractor/eagleplatform.py | 215 ------------------------------ yt_dlp/extractor/generic.py | 32 ----- yt_dlp/extractor/livejournal.py | 1 + 4 files changed, 1 insertion(+), 251 deletions(-) delete mode 100644 yt_dlp/extractor/eagleplatform.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 69389671e..617c2c5ce 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -571,10 +571,6 @@ DWIE, DWArticleIE, ) -from .eagleplatform import ( - ClipYouEmbedIE, - EaglePlatformIE, -) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py deleted file mode 100644 index 685f8c059..000000000 --- a/yt_dlp/extractor/eagleplatform.py +++ /dev/null @@ -1,215 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - smuggle_url, - unsmuggle_url, - url_or_none, -) - - -class EaglePlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - eagleplatform:(?P[^/]+):| - https?://(?P.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= - ) - (?P\d+) - ''' - _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1'] - _TESTS = [{ - # http://lenta.ru/news/2015/03/06/navalny/ - 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '227304', - 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, - }, - }, { - # http://muz-tv.ru/play/7129/ - # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true - 'url': 'eagleplatform:media.clipyou.ru:12820', - 'md5': '358597369cf8ba56675c1df15e7af624', - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'skip': 'Georestricted', - }, { - # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', - 'only_matching': True, - }] - - @classmethod - def _extract_embed_urls(cls, url, webpage): - add_referer = functools.partial(smuggle_url, data={'referrer': url}) - - res = tuple(super()._extract_embed_urls(url, webpage)) - if res: - return map(add_referer, res) - - PLAYER_JS_RE = r''' - ]+ - src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) - .+? - ''' - # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) - mobj = re.search( - rf'''(?xs) - {PLAYER_JS_RE} - ]+ - class=(?P["\'])eagleplayer(?P=qclass)[^>]+ - data-id=["\'](?P\d+) - ''', webpage) - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - # Generalization of "Javascript code usage", "Combined usage" and - # "Usage without attaching to DOM" embeddings (see - # http://dultonmedia.github.io/eplayer/) - mobj = re.search( - r'''(?xs) - %s - - ''' % PLAYER_JS_RE, webpage) # noqa: UP031 - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - - @staticmethod - def _handle_error(response): - status = int_or_none(response.get('status', 200)) - if status != 200: - raise ExtractorError(' '.join(response['errors']), expected=True) - - def _download_json(self, url_or_request, video_id, *args, **kwargs): - try: - response = super()._download_json( - url_or_request, video_id, *args, **kwargs) - except ExtractorError as ee: - if isinstance(ee.cause, HTTPError): - response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id) - self._handle_error(response) - raise - return response - - def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): - return self._download_json(url_or_request, video_id, note)['data'][0] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = self._match_valid_url(url) - host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') - - headers = {} - query = { - 'id': video_id, - } - - referrer = smuggled_data.get('referrer') - if referrer: - headers['Referer'] = referrer - query['referrer'] = referrer - - player_data = self._download_json( - f'http://{host}/api/player_data', video_id, - headers=headers, query=query) - - media = player_data['data']['playlist']['viewports'][0]['medialist'][0] - - title = media['title'] - description = media.get('description') - thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') - duration = int_or_none(media.get('duration')) - view_count = int_or_none(media.get('views')) - - age_restriction = media.get('age_restriction') - age_limit = None - if age_restriction: - age_limit = 0 if age_restriction == 'allow_all' else 18 - - secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') - - formats = [] - - m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - - m3u8_formats_dict = {} - for f in m3u8_formats: - if f.get('height') is not None: - m3u8_formats_dict[f['height']] = f - - mp4_data = self._download_json( - # Secure mp4 URL is constructed according to Player.prototype.mp4 from - # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), - video_id, 'Downloading mp4 JSON', fatal=False) - if mp4_data: - for format_id, format_url in mp4_data.get('data', {}).items(): - if not url_or_none(format_url): - continue - height = int_or_none(format_id) - if height is not None and m3u8_formats_dict.get(height): - f = m3u8_formats_dict[height].copy() - f.update({ - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - else: - f = { - 'format_id': f'http-{format_id}', - 'height': int_or_none(format_id), - } - f['url'] = format_url - formats.append(f) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'age_limit': age_limit, - 'formats': formats, - } - - -class ClipYouEmbedIE(InfoExtractor): - _VALID_URL = False - - @classmethod - def _extract_embed_urls(cls, url, webpage): - mobj = re.search( - r']+src="https?://(?Pmedia\.clipyou\.ru)/index/player\?.*\brecord_id=(?P\d+).*"', webpage) - if mobj is not None: - yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url}) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d9a666f99..652c3b851 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1010,38 +1010,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # referrer protected EaglePlatform embed - { - 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', - 'info_dict': { - 'id': '582306', - 'ext': 'mp4', - 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 3382, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, - # ClipYou (EaglePlatform) embed (custom URL) - { - 'url': 'http://muz-tv.ru/play/7129/', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'This video is unavailable.', - }, # Pladform embed { 'url': 'http://muz-tv.ru/kinozal/view/7400/', diff --git a/yt_dlp/extractor/livejournal.py b/yt_dlp/extractor/livejournal.py index c61f9bec7..ee2dfca0d 100644 --- a/yt_dlp/extractor/livejournal.py +++ b/yt_dlp/extractor/livejournal.py @@ -3,6 +3,7 @@ class LiveJournalIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P\d+)' _TEST = { 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', From e8c2bf798b6707d27fecde66161172da69c7cd72 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:02:56 -0400 Subject: [PATCH 15/27] [ie/neteasemusic] Support XFF (#11044) Closes #11043 Authored by: c-basalt --- yt_dlp/extractor/neteasemusic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 900b8b2a3..6c47086b9 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -34,7 +34,6 @@ class NetEaseMusicBaseIE(InfoExtractor): 'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac ) _API_BASE = 'http://music.163.com/api/' - _GEO_BYPASS = False def _create_eapi_cipher(self, api_path, query_body, cookies): request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) @@ -64,6 +63,8 @@ def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs): 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}), }), } + if self._x_forwarded_for_ip: + headers.setdefault('X-Real-IP', self._x_forwarded_for_ip) return self._download_json( urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={ From daa1859be1b0e7d123da8b4e0988f2eb7bd47d15 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:11:57 +0000 Subject: [PATCH 16/27] [ie/FaulioLive] Support Bahry TV (#13850) Authored by: CasperMcFadden95 --- yt_dlp/extractor/faulio.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index 393023503..a5d5c750b 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -9,6 +9,7 @@ class FaulioLiveIE(InfoExtractor): _DOMAINS = ( 'aloula.sba.sa', + 'bahry.com', 'maraya.sba.net.ae', 'sat7plus.org', ) @@ -25,6 +26,18 @@ class FaulioLiveIE(InfoExtractor): 'params': { 'skip_download': 'Livestream', }, + }, { + 'url': 'https://bahry.com/live/1', + 'info_dict': { + 'id': 'bahry.faulio.com_1', + 'title': str, + 'description': str, + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream', + }, }, { 'url': 'https://maraya.sba.net.ae/live/1', 'info_dict': { From 57186f958f164daa50203adcbf7ec74d541151cf Mon Sep 17 00:00:00 2001 From: Tom Hebb Date: Sat, 26 Jul 2025 14:43:38 -0400 Subject: [PATCH 17/27] [fd/hls] Fix `--hls-split-continuity` support (#13321) Authored by: tchebb --- yt_dlp/downloader/hls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 225630578..58cfbbf16 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -205,7 +205,7 @@ def is_ad_fragment_end(s): line = line.strip() if line: if not line.startswith('#'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if ad_frag_next: continue @@ -231,7 +231,7 @@ def is_ad_fragment_end(s): byte_range = {} elif line.startswith('#EXT-X-MAP'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if frag_index > 0: self.report_error( From 66aa21dc5a3b79059c38f3ad1d05dc9b29187701 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:39:54 -0500 Subject: [PATCH 18/27] [build] Use `macos-14` runner for `macos` builds (#13814) Ref: https://github.blog/changelog/2025-07-11-upcoming-changes-to-macos-hosted-runners-macos-latest-migration-and-xcode-support-policy-updates/#macos-13-is-closing-down Authored by: bashonly --- .github/workflows/build.yml | 4 +++- bundle/pyinstaller.py | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e2411ecfa..b3db8fec1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -242,7 +242,7 @@ jobs: permissions: contents: read actions: write # For cleaning up cache - runs-on: macos-13 + runs-on: macos-14 steps: - uses: actions/checkout@v4 @@ -261,6 +261,8 @@ jobs: - name: Install Requirements run: | brew install coreutils + # We need to use system Python in order to roll our own universal2 curl_cffi wheel + brew uninstall --ignore-dependencies python3 python3 -m venv ~/yt-dlp-build-venv source ~/yt-dlp-build-venv/bin/activate python3 devscripts/install_deps.py -o --include build diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index c2f651121..0597f602d 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -62,16 +62,22 @@ def parse_options(): def exe(onedir): """@returns (name, path)""" + platform_name, machine, extension = { + 'win32': (None, MACHINE, '.exe'), + 'darwin': ('macos', None, None), + }.get(OS_NAME, (OS_NAME, MACHINE, None)) + name = '_'.join(filter(None, ( 'yt-dlp', - {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), - MACHINE, + platform_name, + machine, ))) + return name, ''.join(filter(None, ( 'dist/', onedir and f'{name}/', name, - OS_NAME == 'win32' and '.exe', + extension, ))) From cc5a5caac5fbc0d605b52bde0778d6fd5f97b5ab Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:12:53 -0500 Subject: [PATCH 19/27] Deprecate `darwin_legacy_exe` support (#13857) Ref: https://github.com/yt-dlp/yt-dlp/issues/13856 Authored by: bashonly --- yt_dlp/update.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index de289cb78..f85be2d08 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -141,6 +141,17 @@ def _get_binary_name(): def _get_system_deprecation(): MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 9) + EXE_MSG_TMPL = ('Support for {} has been deprecated. ' + 'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}') + STOP_MSG = 'You may stop receiving updates on this version at any time!' + variant = detect_variant() + + # Temporary until macos_legacy executable builds are discontinued + if variant == 'darwin_legacy_exe': + return EXE_MSG_TMPL.format( + f'{variant} (the PyInstaller-bundled executable for macOS versions older than 10.15)', + 'issues/13856', STOP_MSG) + if sys.version_info > MIN_RECOMMENDED: return None From 23c658b9cbe34a151f8f921ab1320bb5d4e40a4d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:59:02 -0500 Subject: [PATCH 20/27] Raise minimum recommended Python version to 3.10 (#13859) Ref: https://github.com/yt-dlp/yt-dlp/issues/13858 Authored by: bashonly --- yt_dlp/update.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index f85be2d08..30cbf538e 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -139,7 +139,7 @@ def _get_binary_name(): def _get_system_deprecation(): - MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 9) + MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 10) EXE_MSG_TMPL = ('Support for {} has been deprecated. ' 'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}') @@ -161,6 +161,13 @@ def _get_system_deprecation(): if sys.version_info < MIN_SUPPORTED: return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}' + # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 22.04 and Python 3.10 + if variant in ('linux_aarch64_exe', 'linux_armv7l_exe'): + libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2]) + if libc_ver < (2, 35): + return EXE_MSG_TMPL.format('system glibc version < 2.35', 'issues/13858', STOP_MSG) + return None + return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}' From b831406a1d3be34c159835079d12bae624c43610 Mon Sep 17 00:00:00 2001 From: Florentin Le Moal Date: Sun, 27 Jul 2025 21:52:05 +0200 Subject: [PATCH 21/27] [ie/rtve.es:program] Add extractor Authored by: meGAmeS1, seproDev Co-authored-by: sepro --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/rtve.py | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 617c2c5ce..944527085 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1781,6 +1781,7 @@ RTVEALaCartaIE, RTVEAudioIE, RTVELiveIE, + RTVEProgramIE, RTVETelevisionIE, ) from .rtvs import RTVSIE diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 2812d9305..c2ccf73dd 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -6,9 +6,11 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, + InAdvancePagedList, clean_html, determine_ext, float_or_none, + int_or_none, make_archive_id, parse_iso8601, qualities, @@ -371,3 +373,62 @@ def _real_extract(self, url): raise ExtractorError('The webpage doesn\'t contain any video', expected=True) return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key()) + + +class RTVEProgramIE(RTVEBaseIE): + IE_NAME = 'rtve.es:program' + IE_DESC = 'RTVE.es programs' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/(?P[\w-]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.rtve.es/play/videos/saber-vivir/', + 'info_dict': { + 'id': '111570', + 'title': 'Saber vivir - Programa de ciencia y futuro en RTVE Play', + }, + 'playlist_mincount': 400, + }] + _PAGE_SIZE = 60 + + def _fetch_page(self, program_id, page_num): + return self._download_json( + f'https://www.rtve.es/api/programas/{program_id}/videos', + program_id, note=f'Downloading page {page_num}', + query={ + 'type': 39816, + 'page': page_num, + 'size': 60, + }) + + def _entries(self, page_data): + for video in traverse_obj(page_data, ('page', 'items', lambda _, v: url_or_none(v['htmlUrl']))): + yield self.url_result( + video['htmlUrl'], RTVEALaCartaIE, url_transparent=True, + **traverse_obj(video, { + 'id': ('id', {str}), + 'title': ('longTitle', {str}), + 'description': ('shortDescription', {str}), + 'duration': ('duration', {float_or_none(scale=1000)}), + 'series': (('programInfo', 'title'), {str}, any), + 'season_number': ('temporadaOrden', {int_or_none}), + 'season_id': ('temporadaId', {str}), + 'season': ('temporada', {str}), + 'episode_number': ('episode', {int_or_none}), + 'episode': ('title', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + }), + ) + + def _real_extract(self, url): + program_slug = self._match_id(url) + program_page = self._download_webpage(url, program_slug) + + program_id = self._html_search_meta('DC.identifier', program_page, 'Program ID', fatal=True) + + first_page = self._fetch_page(program_id, 1) + page_count = traverse_obj(first_page, ('page', 'totalPages', {int})) or 1 + + entries = InAdvancePagedList( + lambda idx: self._entries(self._fetch_page(program_id, idx + 1) if idx else first_page), + page_count, self._PAGE_SIZE) + + return self.playlist_result(entries, program_id, self._html_extract_title(program_page)) From 682334e4b35112f7a5798decdcb5cb12230ef948 Mon Sep 17 00:00:00 2001 From: fries1234 Date: Sun, 27 Jul 2025 13:26:33 -0700 Subject: [PATCH 22/27] [ie/tvw:news] Add extractor (#12907) Authored by: fries1234 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/tvw.py | 56 +++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 944527085..3eea0cdf6 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2235,6 +2235,7 @@ from .tvplayer import TVPlayerIE from .tvw import ( TvwIE, + TvwNewsIE, TvwTvChannelsIE, ) from .tweakers import TweakersIE diff --git a/yt_dlp/extractor/tvw.py b/yt_dlp/extractor/tvw.py index 0ab926dbd..74d9b6424 100644 --- a/yt_dlp/extractor/tvw.py +++ b/yt_dlp/extractor/tvw.py @@ -10,12 +10,15 @@ unified_timestamp, url_or_none, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import find_element, find_elements, traverse_obj class TvwIE(InfoExtractor): IE_NAME = 'tvw' - _VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P[^/?#]+)' + _VALID_URL = [ + r'https?://(?:www\.)?tvw\.org/video/(?P[^/?#]+)', + r'https?://(?:www\.)?tvw\.org/watch/?\?(?:[^#]+&)?eventID=(?P\d+)', + ] _TESTS = [{ 'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/', 'md5': '9ceb94fe2bb7fd726f74f16356825703', @@ -75,6 +78,20 @@ class TvwIE(InfoExtractor): 'display_id': 'washington-to-washington-a-new-space-race-2022041111', 'categories': ['Washington to Washington', 'General Interest'], }, + }, { + 'url': 'https://tvw.org/watch?eventID=2025041235', + 'md5': '7d697c02f110b37d6a47622ea608ca90', + 'info_dict': { + 'id': '2025041235', + 'ext': 'mp4', + 'title': 'Legislative Review - Medicaid Postpartum Bill Sparks Debate & Senate Approves Automatic Voter Registration', + 'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$', + 'description': 'md5:37d0f3a9187ae520aac261b3959eaee6', + 'timestamp': 1745006400, + 'upload_date': '20250418', + 'location': 'Hayner Media Center', + 'categories': ['Legislative Review'], + }, }] def _real_extract(self, url): @@ -125,6 +142,41 @@ def _real_extract(self, url): } +class TvwNewsIE(InfoExtractor): + IE_NAME = 'tvw:news' + _VALID_URL = r'https?://(?:www\.)?tvw\.org/\d{4}/\d{2}/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/', + 'info_dict': { + 'id': 'the-impact-issues-to-watch-in-the-2024-legislative-session', + 'title': 'The Impact - Issues to Watch in the 2024 Legislative Session', + 'description': 'md5:65f0b33ec8f18ff1cd401c5547aa5441', + }, + 'playlist_count': 6, + }, { + 'url': 'https://tvw.org/2024/06/the-impact-water-rights-and-the-skookumchuck-dam-debate/', + 'info_dict': { + 'id': 'the-impact-water-rights-and-the-skookumchuck-dam-debate', + 'title': 'The Impact - Water Rights and the Skookumchuck Dam Debate', + 'description': 'md5:185f3a2350ef81e3fa159ac3e040a94b', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + video_ids = traverse_obj(webpage, ( + {find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid')) + + return self.playlist_from_matches( + video_ids, playlist_id, + playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'), + playlist_description=self._og_search_description(webpage, default=None), + getter=lambda x: f'https://tvw.org/watch?eventID={x}', ie=TvwIE) + + class TvwTvChannelsIE(InfoExtractor): IE_NAME = 'tvw:tvchannels' _VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P[^/?#]+)' From 28b68f687561468e0c664dcb430707458970019f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 29 Jul 2025 14:47:28 -0500 Subject: [PATCH 23/27] [cookies] Load cookies with float `expires` timestamps (#13873) Authored by: bashonly --- yt_dlp/cookies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 5675445ac..459a4b7de 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1335,7 +1335,7 @@ def prepare_line(line): if len(cookie_list) != self._ENTRY_LEN: raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) - if cookie.expires_at and not cookie.expires_at.isdigit(): + if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at): raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line From 62e2a9c0d55306906f18da2927e05e1cbc31473c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:31:35 -0500 Subject: [PATCH 24/27] [ci] Bump supported PyPy version to 3.11 (#13877) Ref: https://pypy.org/posts/2025/07/pypy-v7320-release.html Authored by: bashonly --- .github/workflows/core.yml | 4 ++-- .github/workflows/download.yml | 4 ++-- .github/workflows/signature-tests.yml | 2 +- CONTRIBUTING.md | 2 +- README.md | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index dd2c6f481..86036989c 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -37,7 +37,7 @@ jobs: matrix: os: [ubuntu-latest] # CPython 3.9 is in quick-test - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest @@ -49,7 +49,7 @@ jobs: - os: windows-latest python-version: '3.13' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 6849fba9b..594a664c9 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -28,13 +28,13 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.9' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml index 203172e0b..42c65db35 100644 --- a/.github/workflows/signature-tests.yml +++ b/.github/workflows/signature-tests.yml @@ -25,7 +25,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.11] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2c58cdfc9..8822907b7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -272,7 +272,7 @@ ## Adding support for a new site You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.11. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell diff --git a/README.md b/README.md index e5bd21b9c..12f68e98d 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ # To install nightly with pip: ``` ## DEPENDENCIES -Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly.