From ff6f94041aeee19c5559e1c1cd693960a1c1dd14 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 25 Jun 2025 12:10:00 -0500 Subject: [PATCH 01/51] [ie/youtube] Improve player context payloads (#13539) Closes #12563 Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 55ebdce1b..081ed6ac8 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2820,6 +2820,10 @@ def _generate_player_context(cls, sts=None): context['signatureTimestamp'] = sts return { 'playbackContext': { + 'adPlaybackContext': { + 'pyv': True, + 'adType': 'AD_TYPE_INSTREAM', + }, 'contentPlaybackContext': context, }, **cls._get_checkok_params(), From 8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 25 Jun 2025 13:32:57 -0500 Subject: [PATCH 02/51] [ie/youtube] Check any `ios` m3u8 formats prior to download (#13524) Closes #13511 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 2 ++ yt_dlp/extractor/youtube/_video.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 309489672..67ca90349 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2219,6 +2219,7 @@ def _check_formats(self, formats): self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: + f.pop('__needs_testing', None) yield f else: self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) @@ -3963,6 +3964,7 @@ def simplified_codec(f, field): self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), + self._format_out('Untested', self.Styles.WARNING) if f.get('__needs_testing') else None, format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), delim=', '), delim=' '), diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 081ed6ac8..5ccc33fa3 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -3556,6 +3556,11 @@ def process_manifest_format(f, proto, client_name, itag, po_token): f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') f['source_preference'] -= 20 + # XXX: Check if IOS HLS formats are affected by player PO token enforcement; temporary + # See https://github.com/yt-dlp/yt-dlp/issues/13511 + if proto == 'hls' and client_name == 'ios': + f['__needs_testing'] = True + itags[itag].add(key) if itag and all_formats: @@ -4284,6 +4289,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): # Newly uploaded videos' HLS formats are potentially problematic and need to be checked + # XXX: This is redundant for as long as we are already checking all IOS HLS formats upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc) if upload_datetime >= datetime_from_str('today-2days'): for fmt in info['formats']: From 5b559d0072b7164daf06bacdc41c6f11283452c8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 25 Jun 2025 14:02:37 -0500 Subject: [PATCH 03/51] [ie/sproutvideo] Fix extractor (#13544) Closes #13540 Authored by: bashonly --- yt_dlp/extractor/sproutvideo.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index c0923594e..764c78f1e 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -41,6 +41,7 @@ class SproutVideoIE(InfoExtractor): 'duration': 703, 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', }, + 'skip': 'Account Disabled', }, { # http formats 'sd' and 'hd' are available 'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90', @@ -97,11 +98,21 @@ def _extract_embed_urls(cls, url, webpage): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) + webpage = self._download_webpage(url, video_id, headers={ + **traverse_obj(smuggled_data, {'Referer': 'referer'}), + # yt-dlp's default Chrome user-agents are too old + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:140.0) Gecko/20100101 Firefox/140.0', + }) data = self._search_json( - r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', - end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) + r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, + contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', + transform_source=lambda x: base64.b64decode(x).decode()) + + # SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e] + # e.g. if the user-agent we used with the webpage request is too old + video_uid = data['videoUid'] + if video_id != video_uid: + raise ExtractorError(f'{self.IE_NAME} sent the wrong video data ({video_uid})') formats, subtitles = [], {} headers = { From c57412d1f9cf0124adc972a47858ac42b740c61d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A6sim?= Date: Wed, 25 Jun 2025 22:24:20 +0300 Subject: [PATCH 04/51] [ie/lsm] Fix extractors (#13126) Closes #12298 Authored by: Caesim404 --- yt_dlp/extractor/cloudycdn.py | 26 +++++++++++++++------ yt_dlp/extractor/lsm.py | 43 ++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py index 6e757d79e..a9a539274 100644 --- a/yt_dlp/extractor/cloudycdn.py +++ b/yt_dlp/extractor/cloudycdn.py @@ -11,7 +11,7 @@ class CloudyCDNIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P[^/?#]+)/media/(?P[\w-]+)' + _VALID_URL = r'(?:https?:)?//embed\.(?Pcloudycdn\.services|backscreen\.com)/(?P[^/?#]+)/media/(?P[\w-]+)' _EMBED_REGEX = [rf']+\bsrc=[\'"](?P{_VALID_URL})'] _TESTS = [{ 'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?', @@ -23,7 +23,7 @@ class CloudyCDNIE(InfoExtractor): 'duration': 1442, 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg', }, }, { 'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1', @@ -33,7 +33,7 @@ class CloudyCDNIE(InfoExtractor): 'ext': 'mp4', 'title': 'LV-8-5-1', 'timestamp': 1669767167, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/488306/placeholder1679423604.jpg', 'duration': 1205, 'upload_date': '20221130', }, @@ -48,9 +48,21 @@ class CloudyCDNIE(InfoExtractor): 'duration': 1673, 'title': 'D24-6000-074-cetstud', 'timestamp': 1718902233, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/788392/placeholder1718903938.jpg', }, 'params': {'format': 'bv'}, + }, { + 'url': 'https://embed.backscreen.com/ltv/media/32j_z25-0600-127?', + 'md5': '9b6fa09ac1a4de53d4f42b94affc3b42', + 'info_dict': { + 'id': '32j_z25-0600-127', + 'ext': 'mp4', + 'title': 'Z25-0600-127-DZ', + 'duration': 1906, + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/977427/placeholder1746633646.jpg', + 'timestamp': 1746632402, + 'upload_date': '20250507', + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/', @@ -60,17 +72,17 @@ class CloudyCDNIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20230223', 'duration': 629, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/518407/placeholder1678748124.jpg', 'timestamp': 1677181513, 'title': 'LIB-2', }, }] def _real_extract(self, url): - site_id, video_id = self._match_valid_url(url).group('site_id', 'id') + domain, site_id, video_id = self._match_valid_url(url).group('domain', 'site_id', 'id') data = self._download_json( - f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/', + f'https://player.{domain}/player/{site_id}/media/{video_id}/', video_id, data=urlencode_postdata({ 'version': '6.4.0', 'referer': url, diff --git a/yt_dlp/extractor/lsm.py b/yt_dlp/extractor/lsm.py index 56c06d745..93ea2e35d 100644 --- a/yt_dlp/extractor/lsm.py +++ b/yt_dlp/extractor/lsm.py @@ -167,11 +167,11 @@ class LSMLTVEmbedIE(InfoExtractor): 'duration': 1442, 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg', }, }, { 'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=', - 'md5': 'a1711e190fe680fdb68fd8413b378e87', + 'md5': 'f236cef2fd5953612754e4e66be51e7a', 'info_dict': { 'id': 'wUnFArIPDSY', 'ext': 'mp4', @@ -198,6 +198,8 @@ class LSMLTVEmbedIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/@LTV16plus', 'like_count': int, 'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5', + 'media_type': 'livestream', + 'timestamp': 1652550741, }, }] @@ -208,7 +210,7 @@ def _real_extract(self, url): r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id) embed_type = traverse_obj(data, ('source', 'name', {str})) - if embed_type == 'telia': + if embed_type in ('backscreen', 'telia'): # 'telia' only for backwards compat ie_key = 'CloudyCDN' embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none})) elif embed_type == 'youtube': @@ -226,9 +228,9 @@ def _real_extract(self, url): class LSMReplayIE(InfoExtractor): - _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P\d+)' + _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:skaties/|klausies/)?(?:ieraksts|statja)/[^/?#]+/(?P\d+)' _TESTS = [{ - 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', + 'url': 'https://replay.lsm.lv/lv/skaties/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', 'md5': '64f72a360ca530d5ed89c77646c9eee5', 'info_dict': { 'id': '46k_d23-6000-105', @@ -241,20 +243,23 @@ class LSMReplayIE(InfoExtractor): 'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg', }, }, { - 'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam', - 'md5': '719b33875cd1429846eeeaeec6df2830', + 'url': 'https://replay.lsm.lv/lv/klausies/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam', + 'md5': '84feb80fd7e6ec07744726a9f01cda4d', 'info_dict': { - 'id': 'a342781', - 'ext': 'mp3', + 'id': '183522', + 'ext': 'm4a', 'duration': 1823, 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām', 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg', 'upload_date': '20231102', - 'timestamp': 1698921060, + 'timestamp': 1698913860, 'description': 'md5:7bac3b2dd41e44325032943251c357b1', }, }, { - 'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', + 'url': 'https://replay.lsm.lv/ru/skaties/statja/ltv/355067/v-kengaragse-nacalas-ukladka-relsov', + 'only_matching': True, + }, { + 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', 'only_matching': True, }] @@ -267,12 +272,24 @@ def _real_extract(self, url): data = self._search_nuxt_data( self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__') + playback_type = data['playback']['type'] + + if playback_type == 'playable_audio_lr': + playback_data = { + 'formats': self._extract_m3u8_formats(data['playback']['service']['hls_url'], video_id), + } + elif playback_type == 'embed': + playback_data = { + '_type': 'url_transparent', + 'url': data['playback']['service']['url'], + } + else: + raise ExtractorError(f'Unsupported playback type "{playback_type}"') return { - '_type': 'url_transparent', 'id': video_id, + **playback_data, **traverse_obj(data, { - 'url': ('playback', 'service', 'url', {url_or_none}), 'title': ('mediaItem', 'title'), 'description': ('mediaItem', ('lead', 'body')), 'duration': ('mediaItem', 'duration', {int_or_none}), From a4ce4327c9836691d3b6b00e44a90b6741601ed8 Mon Sep 17 00:00:00 2001 From: D Trombett Date: Thu, 26 Jun 2025 01:24:39 +0200 Subject: [PATCH 05/51] [ie/SportDeutschland] Fix extractor (#13519) Closes #13518 Authored by: DTrombett --- yt_dlp/extractor/sportdeutschland.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 2d6acb876..8349d9604 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -25,6 +25,7 @@ class SportDeutschlandIE(InfoExtractor): 'upload_date': '20230114', 'timestamp': 1673733618, }, + 'skip': 'not found', }, { 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', 'info_dict': { @@ -41,6 +42,7 @@ class SportDeutschlandIE(InfoExtractor): 'upload_date': '20220309', 'timestamp': 1646860727.0, }, + 'skip': 'not found', }, { 'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023', 'info_dict': { @@ -68,6 +70,7 @@ class SportDeutschlandIE(InfoExtractor): 'live_status': 'was_live', }, }], + 'skip': 'not found', }, { 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1', 'info_dict': { @@ -82,13 +85,30 @@ class SportDeutschlandIE(InfoExtractor): 'live_status': 'is_live', }, 'skip': 'live', + }, { + 'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', + 'md5': '35c11a19395c938cdd076b93bda54cde', + 'info_dict': { + 'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a', + 'ext': 'mp4', + 'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates', + 'display_id': 'rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', + 'channel': 'Rostock Griffins', + 'channel_url': 'https://sportdeutschland.tv/rostock-griffins', + 'live_status': 'was_live', + 'description': 'md5:60cb00067e55dafa27b0933a43d72862', + 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', + 'timestamp': 1749913117, + 'upload_date': '20250614', + }, }] def _process_video(self, asset_id, video): is_live = video['type'] == 'mux_live' token = self._download_json( - f'https://api.sportdeutschland.tv/api/frontend/asset-token/{asset_id}', - video['id'], query={'type': video['type'], 'playback_id': video['src']})['token'] + f'https://api.sportdeutschland.tv/api/web/personal/asset-token/{asset_id}', + video['id'], query={'type': video['type'], 'playback_id': video['src']}, + headers={'Referer': 'https://sportdeutschland.tv/'})['token'] formats, subtitles = self._extract_m3u8_formats_and_subtitles( f'https://stream.mux.com/{video["src"]}.m3u8?token={token}', video['id'], live=is_live) From 3bd30291601c47fa4a257983473884103ecab0c7 Mon Sep 17 00:00:00 2001 From: D Trombett Date: Thu, 26 Jun 2025 01:26:23 +0200 Subject: [PATCH 06/51] [ie/tv8.it] Support slugless URLs (#13478) Authored by: DTrombett --- yt_dlp/extractor/skyit.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index 0013d2621..fe45be774 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -213,7 +213,7 @@ class CieloTVItIE(SkyItIE): # XXX: Do not subclass from concrete IE class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE IE_NAME = 'tv8.it' - _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/(?:[0-9a-z-]+-)?(?P\d+)' _TESTS = [{ 'url': 'https://www.tv8.it/video/ogni-mattina-ucciso-asino-di-andrea-lo-cicero-630529', 'md5': '9ab906a3f75ea342ed928442f9dabd21', @@ -227,6 +227,19 @@ class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE 'thumbnail': 'https://videoplatform.sky.it/still/2020/11/18/1605717753954_ogni-mattina-ucciso-asino-di-andrea-lo-cicero_videostill_1.jpg', }, 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.tv8.it/video/964361', + 'md5': '1e58e807154658a16edc29e45be38107', + 'info_dict': { + 'id': '964361', + 'ext': 'mp4', + 'title': 'GialappaShow - S.4 Ep.2', + 'description': 'md5:60bb4ff5af18bbeeaedabc1de5f9e1e2', + 'duration': 8030, + 'thumbnail': 'https://videoplatform.sky.it/captures/494/2024/11/06/964361/964361_1730888412914_thumb_494.jpg', + 'timestamp': 1730821499, + 'upload_date': '20241105', + }, }] _DOMAIN = 'mtv8' From 2600849badb0d08c55b58dcc77a13af6ba423da6 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Thu, 26 Jun 2025 08:37:49 +0900 Subject: [PATCH 07/51] [ie/huya:live] Fix extractor (#13520) Authored by: doe1080 --- yt_dlp/extractor/huya.py | 86 +++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index f79e032e4..65099b062 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -7,12 +7,13 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, + clean_html, int_or_none, parse_duration, str_or_none, try_get, unescapeHTML, - unified_strdate, + update_url, update_url_query, url_or_none, ) @@ -22,8 +23,8 @@ class HuyaLiveIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P[^/#?&]+)(?:\D|$)' IE_NAME = 'huya:live' - IE_DESC = 'huya.com' - TESTS = [{ + IE_DESC = '虎牙直播' + _TESTS = [{ 'url': 'https://www.huya.com/572329', 'info_dict': { 'id': '572329', @@ -149,63 +150,94 @@ class HuyaVideoIE(InfoExtractor): 'id': '1002412640', 'ext': 'mp4', 'title': '8月3日', - 'thumbnail': r're:https?://.*\.jpg', - 'duration': 14, + 'categories': ['主机游戏'], + 'duration': 14.0, 'uploader': '虎牙-ATS欧卡车队青木', 'uploader_id': '1564376151', 'upload_date': '20240803', 'view_count': int, 'comment_count': int, 'like_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1722675433, }, - }, - { + }, { 'url': 'https://www.huya.com/video/play/556054543.html', 'info_dict': { 'id': '556054543', 'ext': 'mp4', 'title': '我不挑事 也不怕事', - 'thumbnail': r're:https?://.*\.jpg', - 'duration': 1864, + 'categories': ['英雄联盟'], + 'description': 'md5:58184869687d18ce62dc7b4b2ad21201', + 'duration': 1864.0, 'uploader': '卡尔', 'uploader_id': '367138632', 'upload_date': '20210811', 'view_count': int, 'comment_count': int, 'like_count': int, + 'tags': 'count:4', + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1628675950, + }, + }, { + # Only m3u8 available + 'url': 'https://www.huya.com/video/play/1063345618.html', + 'info_dict': { + 'id': '1063345618', + 'ext': 'mp4', + 'title': '峡谷第一中!黑铁上钻石顶级教学对抗elo', + 'categories': ['英雄联盟'], + 'comment_count': int, + 'duration': 21603.0, + 'like_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1749668803, + 'upload_date': '20250611', + 'uploader': '北枫CC', + 'uploader_id': '2183525275', + 'view_count': int, }, }] def _real_extract(self, url: str): video_id = self._match_id(url) - video_data = self._download_json( - 'https://liveapi.huya.com/moment/getMomentContent', video_id, - query={'videoId': video_id})['data']['moment']['videoInfo'] + moment = self._download_json( + 'https://liveapi.huya.com/moment/getMomentContent', + video_id, query={'videoId': video_id})['data']['moment'] formats = [] - for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))): - formats.append({ - 'url': definition['url'], - **traverse_obj(definition, { - 'format_id': ('defName', {str}), - 'width': ('width', {int_or_none}), - 'height': ('height', {int_or_none}), + for definition in traverse_obj(moment, ( + 'videoInfo', 'definitions', lambda _, v: url_or_none(v['m3u8']), + )): + fmts = self._extract_m3u8_formats(definition['m3u8'], video_id, 'mp4', fatal=False) + for fmt in fmts: + fmt.update(**traverse_obj(definition, { 'filesize': ('size', {int_or_none}), - }), - }) + 'format_id': ('defName', {str}), + 'height': ('height', {int_or_none}), + 'quality': ('definition', {int_or_none}), + 'width': ('width', {int_or_none}), + })) + formats.extend(fmts) return { 'id': video_id, 'formats': formats, - **traverse_obj(video_data, { + **traverse_obj(moment, { + 'comment_count': ('commentCount', {int_or_none}), + 'description': ('content', {clean_html}, filter), + 'like_count': ('favorCount', {int_or_none}), + 'timestamp': ('cTime', {int_or_none}), + }), + **traverse_obj(moment, ('videoInfo', { 'title': ('videoTitle', {str}), - 'thumbnail': ('videoCover', {url_or_none}), + 'categories': ('category', {str}, filter, all, filter), 'duration': ('videoDuration', {parse_duration}), + 'tags': ('tags', ..., {str}, filter, all, filter), + 'thumbnail': (('videoBigCover', 'videoCover'), {url_or_none}, {update_url(query=None)}, any), 'uploader': ('nickName', {str}), 'uploader_id': ('uid', {str_or_none}), - 'upload_date': ('videoUploadTime', {unified_strdate}), 'view_count': ('videoPlayNum', {int_or_none}), - 'comment_count': ('videoCommentNum', {int_or_none}), - 'like_count': ('favorCount', {int_or_none}), - }), + })), } From 1838a1ce5d4ade80770ba9162eaffc9a1607dc70 Mon Sep 17 00:00:00 2001 From: Anton Larionov <11796525+anlar@users.noreply.github.com> Date: Thu, 26 Jun 2025 01:51:20 +0200 Subject: [PATCH 08/51] [ie/mave] Add extractor (#13380) Authored by: anlar --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/mave.py | 107 ++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 yt_dlp/extractor/mave.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 34c98b537..fbbd9571f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1107,6 +1107,7 @@ from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE from .matchtv import MatchTVIE +from .mave import MaveIE from .mbn import MBNIE from .mdr import MDRIE from .medaltv import MedalTVIE diff --git a/yt_dlp/extractor/mave.py b/yt_dlp/extractor/mave.py new file mode 100644 index 000000000..86d8d8b7c --- /dev/null +++ b/yt_dlp/extractor/mave.py @@ -0,0 +1,107 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + parse_iso8601, + urljoin, +) +from ..utils.traversal import require, traverse_obj + + +class MaveIE(InfoExtractor): + _VALID_URL = r'https?://(?P[\w-]+)\.mave\.digital/(?Pep-\d+)' + _TESTS = [{ + 'url': 'https://ochenlichnoe.mave.digital/ep-25', + 'md5': 'aa3e513ef588b4366df1520657cbc10c', + 'info_dict': { + 'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2', + 'ext': 'mp3', + 'display_id': 'ochenlichnoe-ep-25', + 'title': 'Между мной и миром: психология самооценки', + 'description': 'md5:4b7463baaccb6982f326bce5c700382a', + 'uploader': 'Самарский университет', + 'channel': 'Очень личное', + 'channel_id': 'ochenlichnoe', + 'channel_url': 'https://ochenlichnoe.mave.digital/', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'duration': 3744, + 'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg', + 'series': 'Очень личное', + 'series_id': '2e0c3749-6df2-4946-82f4-50691419c065', + 'season': 'Season 3', + 'season_number': 3, + 'episode': 'Episode 3', + 'episode_number': 3, + 'timestamp': 1747817300, + 'upload_date': '20250521', + }, + }, { + 'url': 'https://budem.mave.digital/ep-12', + 'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f', + 'info_dict': { + 'id': '41898bb5-ff57-4797-9236-37a8e537aa21', + 'ext': 'mp3', + 'display_id': 'budem-ep-12', + 'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана', + 'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19', + 'uploader': 'Полина Цветкова+Евгения Акопова', + 'channel': 'Все там будем', + 'channel_id': 'budem', + 'channel_url': 'https://budem.mave.digital/', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'age_limit': 18, + 'duration': 3664, + 'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg', + 'series': 'Все там будем', + 'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746', + 'season': 'Season 2', + 'season_number': 2, + 'episode': 'Episode 5', + 'episode_number': 5, + 'timestamp': 1735538400, + 'upload_date': '20241230', + }, + }] + _API_BASE_URL = 'https://api.mave.digital/' + + def _real_extract(self, url): + channel_id, slug = self._match_valid_url(url).group('channel', 'id') + display_id = f'{channel_id}-{slug}' + webpage = self._download_webpage(url, display_id) + data = traverse_obj( + self._search_nuxt_json(webpage, display_id), + ('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')})) + + return { + 'display_id': display_id, + 'channel_id': channel_id, + 'channel_url': f'https://{channel_id}.mave.digital/', + 'vcodec': 'none', + 'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None, + **traverse_obj(data, ('activeEpisodeData', { + 'url': ('audio', {urljoin(self._API_BASE_URL)}), + 'id': ('id', {str}), + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + 'duration': ('duration', {int_or_none}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('number', {int_or_none}), + 'view_count': ('listenings', {int_or_none}), + 'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any), + 'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any), + 'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}), + 'timestamp': ('publish_date', {parse_iso8601}), + })), + **traverse_obj(data, ('podcast', 'podcast', { + 'series_id': ('id', {str}), + 'series': ('title', {str}), + 'channel': ('title', {str}), + 'uploader': ('author', {str}), + })), + } From eff0759705ffcb0c9b9bb59433e9b3acbbd36ddc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 25 Jun 2025 23:53:38 +0000 Subject: [PATCH 09/51] Release 2025.06.25 Created by: bashonly :ci skip all --- CONTRIBUTORS | 2 ++ Changelog.md | 19 +++++++++++++++++++ supportedsites.md | 5 +++-- yt_dlp/version.py | 6 +++--- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ea391bc15..00d4d15aa 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -779,3 +779,5 @@ brian6932 iednod55 maxbin123 nullpos +anlar +eason1478 diff --git a/Changelog.md b/Changelog.md index dd95abc86..d37852658 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,25 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.06.25 + +#### Extractor changes +- [Add `_search_nuxt_json` helper](https://github.com/yt-dlp/yt-dlp/commit/51887484e46ab6015c041cb1ab626a55f25a03bd) ([#13386](https://github.com/yt-dlp/yt-dlp/issues/13386)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **brightcove**: new: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e6bd4a3da295b760ab20b39c18ce8934d312c2bf) ([#13461](https://github.com/yt-dlp/yt-dlp/issues/13461)) by [doe1080](https://github.com/doe1080) +- **huya**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2600849badb0d08c55b58dcc77a13af6ba423da6) ([#13520](https://github.com/yt-dlp/yt-dlp/issues/13520)) by [doe1080](https://github.com/doe1080) +- **hypergryph**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1722c55400ff30bb5aee5dd7a262f0b7e9ce2f0e) ([#13415](https://github.com/yt-dlp/yt-dlp/issues/13415)) by [doe1080](https://github.com/doe1080), [eason1478](https://github.com/eason1478) +- **lsm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c57412d1f9cf0124adc972a47858ac42b740c61d) ([#13126](https://github.com/yt-dlp/yt-dlp/issues/13126)) by [Caesim404](https://github.com/Caesim404) +- **mave**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1838a1ce5d4ade80770ba9162eaffc9a1607dc70) ([#13380](https://github.com/yt-dlp/yt-dlp/issues/13380)) by [anlar](https://github.com/anlar) +- **sportdeutschland**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ce4327c9836691d3b6b00e44a90b6741601ed8) ([#13519](https://github.com/yt-dlp/yt-dlp/issues/13519)) by [DTrombett](https://github.com/DTrombett) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5b559d0072b7164daf06bacdc41c6f11283452c8) ([#13544](https://github.com/yt-dlp/yt-dlp/issues/13544)) by [bashonly](https://github.com/bashonly) +- **tv8.it**: [Support slugless URLs](https://github.com/yt-dlp/yt-dlp/commit/3bd30291601c47fa4a257983473884103ecab0c7) ([#13478](https://github.com/yt-dlp/yt-dlp/issues/13478)) by [DTrombett](https://github.com/DTrombett) +- **youtube** + - [Check any `ios` m3u8 formats prior to download](https://github.com/yt-dlp/yt-dlp/commit/8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f) ([#13524](https://github.com/yt-dlp/yt-dlp/issues/13524)) by [bashonly](https://github.com/bashonly) + - [Improve player context payloads](https://github.com/yt-dlp/yt-dlp/commit/ff6f94041aeee19c5559e1c1cd693960a1c1dd14) ([#13539](https://github.com/yt-dlp/yt-dlp/issues/13539)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **test**: `traversal`: [Fix morsel tests for Python 3.14](https://github.com/yt-dlp/yt-dlp/commit/73bf10211668e4a59ccafd790e06ee82d9fea9ea) ([#13471](https://github.com/yt-dlp/yt-dlp/issues/13471)) by [Grub4K](https://github.com/Grub4K) + ### 2025.06.09 #### Extractor changes diff --git a/supportedsites.md b/supportedsites.md index 1fe381603..b3fe01173 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -590,7 +590,7 @@ # Supported sites - **Hungama** - **HungamaAlbumPlaylist** - **HungamaSong** - - **huya:live**: huya.com + - **huya:live**: 虎牙直播 - **huya:video**: 虎牙视频 - **Hypem** - **Hytale** @@ -776,6 +776,7 @@ # Supported sites - **massengeschmack.tv** - **Masters** - **MatchTV** + - **Mave** - **MBN**: mbn.co.kr (매일방송) - **MDR**: MDR.DE - **MedalTV** @@ -832,7 +833,7 @@ # Supported sites - **Mojevideo**: mojevideo.sk - **Mojvideo** - **Monstercat** - - **MonsterSirenHypergryphMusic** + - **monstersiren**: 塞壬唱片 - **Motherless** - **MotherlessGallery** - **MotherlessGroup** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index b97c01499..020a0299c 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.06.09' +__version__ = '2025.06.25' -RELEASE_GIT_HEAD = '339614a173c74b42d63e858c446a9cae262a13af' +RELEASE_GIT_HEAD = '1838a1ce5d4ade80770ba9162eaffc9a1607dc70' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.06.09' +_pkg_version = '2025.06.25' From 99b85ac102047446e6adf5b62bfc3c8d80b53778 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Thu, 26 Jun 2025 13:42:41 -0400 Subject: [PATCH 10/51] [ie/BilibiliSpaceVideo] Extract hidden-mode collections as playlists (#13533) Closes #13435 Authored by: c-basalt --- yt_dlp/extractor/bilibili.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 43c9000ce..0f5c2c97e 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1226,6 +1226,26 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): 'id': '313580179', }, 'playlist_mincount': 92, + }, { + # Hidden-mode collection + 'url': 'https://space.bilibili.com/3669403/video', + 'info_dict': { + 'id': '3669403', + }, + 'playlist': [{ + 'info_dict': { + '_type': 'playlist', + 'id': '3669403_3958082', + 'title': '合集·直播回放', + 'description': '', + 'uploader': '月路Yuel', + 'uploader_id': '3669403', + 'timestamp': int, + 'upload_date': str, + 'thumbnail': str, + }, + }], + 'params': {'playlist_items': '7'}, }] def _real_extract(self, url): @@ -1282,8 +1302,14 @@ def get_metadata(page_data): } def get_entries(page_data): - for entry in traverse_obj(page_data, ('list', 'vlist')) or []: - yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid']) + for entry in traverse_obj(page_data, ('list', 'vlist', ..., {dict})): + if traverse_obj(entry, ('meta', 'attribute')) == 156: + # hidden-mode collection doesn't show its videos in uploads; extract as playlist instead + yield self.url_result( + f'https://space.bilibili.com/{entry["mid"]}/lists/{entry["meta"]["id"]}?type=season', + BilibiliCollectionListIE, f'{entry["mid"]}_{entry["meta"]["id"]}') + else: + yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid']) metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries) return self.playlist_result(paged_list, playlist_id) From 06c1a8cdffe14050206683253726875144192ef5 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Fri, 27 Jun 2025 02:45:03 +0900 Subject: [PATCH 11/51] [ie/niconico:live] Fix extractor and downloader (#13158) Authored by: doe1080 --- yt_dlp/downloader/niconico.py | 53 +++++++++++------------ yt_dlp/extractor/common.py | 3 ++ yt_dlp/extractor/niconico.py | 79 +++++++++++++++++------------------ 3 files changed, 66 insertions(+), 69 deletions(-) diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 33cf15df8..35a12b555 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -5,47 +5,46 @@ from .common import FileDownloader from .external import FFmpegFD from ..networking import Request -from ..utils import DownloadError, str_or_none, try_get +from ..networking.websocket import WebSocketResponse +from ..utils import DownloadError, str_or_none, truncate_string +from ..utils.traversal import traverse_obj class NiconicoLiveFD(FileDownloader): """ Downloads niconico live without being stopped """ def real_download(self, filename, info_dict): - video_id = info_dict['video_id'] - ws_url = info_dict['url'] - ws_extractor = info_dict['ws'] - ws_origin_host = info_dict['origin'] - live_quality = info_dict.get('live_quality', 'high') - live_latency = info_dict.get('live_latency', 'high') + video_id = info_dict['id'] + opts = info_dict['downloader_options'] + quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url'] dl = FFmpegFD(self.ydl, self.params or {}) new_info_dict = info_dict.copy() - new_info_dict.update({ - 'protocol': 'm3u8', - }) + new_info_dict['protocol'] = 'm3u8' def communicate_ws(reconnect): - if reconnect: - ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'})) + # Support --load-info-json as if it is a reconnect attempt + if reconnect or not isinstance(ws_extractor, WebSocketResponse): + ws = self.ydl.urlopen(Request( + ws_url, headers={'Origin': 'https://live.nicovideo.jp'})) if self.ydl.params.get('verbose', False): - self.to_screen('[debug] Sending startWatching request') + self.write_debug('Sending startWatching request') ws.send(json.dumps({ - 'type': 'startWatching', 'data': { + 'reconnect': True, + 'room': { + 'commentable': True, + 'protocol': 'webSocket', + }, 'stream': { - 'quality': live_quality, - 'protocol': 'hls+fmp4', - 'latency': live_latency, 'accessRightMethod': 'single_cookie', 'chasePlay': False, + 'latency': 'high', + 'protocol': 'hls', + 'quality': quality, }, - 'room': { - 'protocol': 'webSocket', - 'commentable': True, - }, - 'reconnect': True, }, + 'type': 'startWatching', })) else: ws = ws_extractor @@ -58,7 +57,6 @@ def communicate_ws(reconnect): if not data or not isinstance(data, dict): continue if data.get('type') == 'ping': - # pong back ws.send(r'{"type":"pong"}') ws.send(r'{"type":"keepSeat"}') elif data.get('type') == 'disconnect': @@ -66,12 +64,10 @@ def communicate_ws(reconnect): return True elif data.get('type') == 'error': self.write_debug(data) - message = try_get(data, lambda x: x['body']['code'], str) or recv + message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv) return DownloadError(message) elif self.ydl.params.get('verbose', False): - if len(recv) > 100: - recv = recv[:100] + '...' - self.to_screen(f'[debug] Server said: {recv}') + self.write_debug(f'Server response: {truncate_string(recv, 100)}') def ws_main(): reconnect = False @@ -81,7 +77,8 @@ def ws_main(): if ret is True: return except BaseException as e: - self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) + self.to_screen( + f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}') time.sleep(10) continue finally: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 6058f66ae..32b4680b7 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -263,6 +263,9 @@ class InfoExtractor: * http_chunk_size Chunk size for HTTP downloads * ffmpeg_args Extra arguments for ffmpeg downloader (input) * ffmpeg_args_out Extra arguments for ffmpeg downloader (output) + * ws (NiconicoLiveFD only) WebSocketResponse + * ws_url (NiconicoLiveFD only) Websockets URL + * max_quality (NiconicoLiveFD only) Max stream quality string * is_dash_periods Whether the format is a result of merging multiple DASH periods. RTMP formats can also have the additional fields: page_url, diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 0d0f7ceef..a20e570e6 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -4,16 +4,15 @@ import json import re import time -import urllib.parse from .common import InfoExtractor, SearchInfoExtractor -from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, OnDemandPagedList, clean_html, determine_ext, + extract_attributes, float_or_none, int_or_none, parse_bitrate, @@ -22,9 +21,8 @@ parse_qs, parse_resolution, qualities, - remove_start, str_or_none, - unescapeHTML, + truncate_string, unified_timestamp, update_url_query, url_basename, @@ -32,7 +30,11 @@ urlencode_postdata, urljoin, ) -from ..utils.traversal import find_element, require, traverse_obj +from ..utils.traversal import ( + find_element, + require, + traverse_obj, +) class NiconicoBaseIE(InfoExtractor): @@ -806,41 +808,39 @@ class NiconicoLiveIE(NiconicoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id) + webpage = self._download_webpage(url, video_id, expected_status=404) + if err_msg := traverse_obj(webpage, ({find_element(cls='message')}, {clean_html})): + raise ExtractorError(err_msg, expected=True) - embedded_data = self._parse_json(unescapeHTML(self._search_regex( - r' 100: - recv = recv[:100] + '...' - self.write_debug(f'Server said: {recv}') + self.write_debug(f'Server response: {truncate_string(recv, 100)}') title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta( ('og:title', 'twitter:title'), webpage, 'live title', fatal=False) - raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {} + raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail', {dict})) or {} thumbnails = [] for name, value in raw_thumbs.items(): if not isinstance(value, dict): @@ -897,31 +895,30 @@ def _real_extract(self, url): cookie['domain'], cookie['name'], cookie['value'], expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure']) - fmt_common = { - 'live_latency': 'high', - 'origin': hostname, - 'protocol': 'niconico_live', - 'video_id': video_id, - 'ws': ws, - } q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr' a_map = {96: 'audio_low', 192: 'audio_high'} formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True) for fmt in formats: + fmt['protocol'] = 'niconico_live' if fmt.get('acodec') == 'none': fmt['format_id'] = next(q_iter, fmt['format_id']) elif fmt.get('vcodec') == 'none': abr = parse_bitrate(fmt['url'].lower()) fmt.update({ 'abr': abr, + 'acodec': 'mp4a.40.2', 'format_id': a_map.get(abr, fmt['format_id']), }) - fmt.update(fmt_common) return { 'id': video_id, 'title': title, + 'downloader_options': { + 'max_quality': traverse_obj(embedded_data, ('program', 'stream', 'maxQuality', {str})) or 'normal', + 'ws': ws, + 'ws_url': ws_url, + }, **traverse_obj(embedded_data, { 'view_count': ('program', 'statistics', 'watchCount'), 'comment_count': ('program', 'statistics', 'commentCount'), From 0a6b1044899f452cd10b6c7a6b00fa985a9a8b97 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:29:37 -0500 Subject: [PATCH 12/51] [ie/hotstar] Fix metadata extraction (#13560) Closes #7946 Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index e97740c90..341786929 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -232,10 +232,15 @@ def _real_extract(self, url): video_type = self._TYPE.get(video_type, video_type) cookies = self._get_cookies(url) # Cookies before any request - video_data = traverse_obj( - self._call_api_v1( - f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}), - ('body', 'results', 'item', {dict})) or {} + # tas=10000 can cause HTTP Error 504, see https://github.com/yt-dlp/yt-dlp/issues/7946 + for tas in (10000, 0): + query = {'tas': tas, 'contentId': video_id} + video_data = traverse_obj( + self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query=query), + ('body', 'results', 'item', {dict})) or {} + if video_data: + break + if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): self.report_drm(video_id) From 5e292baad62c749b6c340621ab2d0f904165ddfb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:31:06 -0500 Subject: [PATCH 13/51] [ie/hotstar] Raise for login required (#10405) Closes #10366 Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 341786929..e9e3d1926 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,6 +1,5 @@ import hashlib import hmac -import json import re import time import uuid @@ -29,31 +28,20 @@ def _call_api_v1(self, path, *args, **kwargs): headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'}) def _call_api_impl(self, path, video_id, query, st=None, cookies=None): + if not cookies or not cookies.get('userUP'): + self.raise_login_required() + st = int_or_none(st) or int(time.time()) exp = st + 6000 auth = f'st={st}~exp={exp}~acl=/*' auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() - - if cookies and cookies.get('userUP'): - token = cookies.get('userUP').value - else: - token = self._download_json( - f'{self._API_URL}/um/v3/users', - video_id, note='Downloading token', - data=json.dumps({'device_ids': [{'id': str(uuid.uuid4()), 'type': 'device_id'}]}).encode(), - headers={ - 'hotstarauth': auth, - 'x-hs-platform': 'PCTV', # or 'web' - 'Content-Type': 'application/json', - })['user_identity'] - response = self._download_json( f'{self._API_URL}/{path}', video_id, query=query, headers={ 'hotstarauth': auth, 'x-hs-appversion': '6.72.2', 'x-hs-platform': 'web', - 'x-hs-usertoken': token, + 'x-hs-usertoken': cookies['userUP'].value, }) if response['message'] != "Playback URL's fetched successfully": From b5bd057fe86550f3aa67f2fc8790d1c6a251c57b Mon Sep 17 00:00:00 2001 From: chauhantirth <92777505+chauhantirth@users.noreply.github.com> Date: Sat, 28 Jun 2025 07:59:43 +0530 Subject: [PATCH 14/51] [ie/hotstar] Fix extractor (#13530) Closes #11195 Authored by: chauhantirth, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/hotstar.py | 127 +++++++++++++++++++++++------------- 1 file changed, 82 insertions(+), 45 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index e9e3d1926..358b5ce75 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,5 +1,6 @@ import hashlib import hmac +import json import re import time import uuid @@ -12,14 +13,15 @@ int_or_none, join_nonempty, str_or_none, - traverse_obj, url_or_none, ) +from ..utils.traversal import require, traverse_obj class HotStarBaseIE(InfoExtractor): _BASE_URL = 'https://www.hotstar.com' _API_URL = 'https://api.hotstar.com' + _API_URL_V2 = 'https://apix.hotstar.com/v2' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' def _call_api_v1(self, path, *args, **kwargs): @@ -36,27 +38,38 @@ def _call_api_impl(self, path, video_id, query, st=None, cookies=None): auth = f'st={st}~exp={exp}~acl=/*' auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() response = self._download_json( - f'{self._API_URL}/{path}', video_id, query=query, + f'{self._API_URL_V2}/{path}', video_id, query=query, headers={ + 'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)', 'hotstarauth': auth, - 'x-hs-appversion': '6.72.2', - 'x-hs-platform': 'web', 'x-hs-usertoken': cookies['userUP'].value, + 'x-hs-device-id': traverse_obj(cookies, ('deviceId', 'value')) or str(uuid.uuid4()), + 'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970', + 'x-hs-platform': 'androidtv', + 'content-type': 'application/json', }) - if response['message'] != "Playback URL's fetched successfully": - raise ExtractorError( - response['message'], expected=True) - return response['data'] + if not traverse_obj(response, ('success', {dict})): + raise ExtractorError('API call was unsuccessful') + return response['success'] - def _call_api_v2(self, path, video_id, st=None, cookies=None): - return self._call_api_impl( - f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={ - 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265', - 'device-id': cookies.get('device_id').value if cookies.get('device_id') else str(uuid.uuid4()), - 'os-name': 'Windows', - 'os-version': '10', - }) + def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None): + return self._call_api_impl(f'{path}', video_id, query={ + 'content_id': video_id, + 'filters': f'content_type={content_type}', + 'client_capabilities': json.dumps({ + 'package': ['dash', 'hls'], + 'container': ['fmp4br', 'fmp4'], + 'ads': ['non_ssai', 'ssai'], + 'audio_channel': ['atmos', 'dolby51', 'stereo'], + 'encryption': ['plain'], + 'video_codec': ['h265'], # or ['h264'] + 'ladder': ['tv', 'full'], + 'resolution': ['4k'], # or ['hd'] + 'true_resolution': ['4k'], # or ['hd'] + 'dynamic_range': ['hdr'], # or ['sdr'] + }, separators=(',', ':')), + }, st=st, cookies=cookies) def _playlist_entries(self, path, item_id, root=None, **kwargs): results = self._call_api_v1(path, item_id, **kwargs)['body']['results'] @@ -68,6 +81,7 @@ def _playlist_entries(self, path, item_id, root=None, **kwargs): class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' + IE_DESC = 'JioHotstar' _VALID_URL = r'''(?x) https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) (?: @@ -102,15 +116,16 @@ class HotStarIE(HotStarBaseIE): 'upload_date': '20190501', 'duration': 1219, 'channel': 'StarPlus', - 'channel_id': '3', + 'channel_id': '821', 'series': 'Ek Bhram - Sarvagun Sampanna', 'season': 'Chapter 1', 'season_number': 1, - 'season_id': '6771', + 'season_id': '1260004607', 'episode': 'Janhvi Targets Suman', 'episode_number': 8, }, - }, { + 'params': {'skip_download': 'm3u8'}, + }, { # Metadata call gets HTTP Error 504 with tas=10000 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843', 'info_dict': { 'id': '1000282843', @@ -122,14 +137,14 @@ class HotStarIE(HotStarBaseIE): 'channel': 'StarPlus', 'series': 'Anupama', 'season_number': 1, - 'season_id': '7399', + 'season_id': '1260022018', 'upload_date': '20230307', 'episode': 'Anupama, Anuj Share a Moment', 'episode_number': 853, - 'duration': 1272, - 'channel_id': '3', + 'duration': 1266, + 'channel_id': '821', }, - 'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320', 'info_dict': { @@ -142,14 +157,15 @@ class HotStarIE(HotStarBaseIE): 'channel': 'Hotstar Specials', 'series': 'Kana Kaanum Kaalangal', 'season_number': 1, - 'season_id': '9441', + 'season_id': '1260097089', 'upload_date': '20220421', 'episode': 'Back To School', 'episode_number': 1, 'duration': 1810, - 'channel_id': '54', + 'channel_id': '1260003991', }, - }, { + 'params': {'skip_download': 'm3u8'}, + }, { # Metadata call gets HTTP Error 504 with tas=10000 'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286', 'info_dict': { 'id': '1000262286', @@ -161,6 +177,7 @@ class HotStarIE(HotStarBaseIE): 'timestamp': 1622943900, 'duration': 5395, }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.hotstar.com/in/movies/premam/1000091195', 'info_dict': { @@ -168,12 +185,13 @@ class HotStarIE(HotStarBaseIE): 'ext': 'mp4', 'title': 'Premam', 'release_year': 2015, - 'description': 'md5:d833c654e4187b5e34757eafb5b72d7f', + 'description': 'md5:096cd8aaae8dab56524823dc19dfa9f7', 'timestamp': 1462149000, 'upload_date': '20160502', 'episode': 'Premam', 'duration': 8994, }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', 'only_matching': True, @@ -196,6 +214,13 @@ class HotStarIE(HotStarBaseIE): None: 'content', } + _CONTENT_TYPE = { + 'movie': 'MOVIE', + 'episode': 'EPISODE', + 'match': 'SPORT', + 'content': 'CLIPS', + } + _IGNORE_MAP = { 'res': 'resolution', 'vcodec': 'video_codec', @@ -217,43 +242,46 @@ def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None): def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') - video_type = self._TYPE.get(video_type, video_type) + video_type = self._TYPE[video_type] cookies = self._get_cookies(url) # Cookies before any request # tas=10000 can cause HTTP Error 504, see https://github.com/yt-dlp/yt-dlp/issues/7946 - for tas in (10000, 0): + for tas, err in [(10000, False), (0, None)]: query = {'tas': tas, 'contentId': video_id} video_data = traverse_obj( - self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query=query), + self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, errnote=err, query=query), ('body', 'results', 'item', {dict})) or {} if video_data: break - if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): + if video_data.get('drmProtected'): self.report_drm(video_id) - # See https://github.com/yt-dlp/yt-dlp/issues/396 - st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date') - geo_restricted = False formats, subs = [], {} headers = {'Referer': f'{self._BASE_URL}/in'} + content_type = traverse_obj(video_data, ('contentType', {str})) or self._CONTENT_TYPE[video_type] - # change to v2 in the future - playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets'] - for playback_set in playback_sets: - if not isinstance(playback_set, dict): - continue - tags = str_or_none(playback_set.get('tagsCombination')) or '' + # See https://github.com/yt-dlp/yt-dlp/issues/396 + st = self._request_webpage( + f'{self._BASE_URL}/in', video_id, 'Fetching server time').get_header('x-origin-date') + watch = self._call_api_v2('pages/watch', video_id, content_type, cookies=cookies, st=st) + player_config = traverse_obj(watch, ( + 'page', 'spaces', 'player', 'widget_wrappers', lambda _, v: v['template'] == 'PlayerWidget', + 'widget', 'data', 'player_config', {dict}, any, {require('player config')})) + + for playback_set in traverse_obj(player_config, ( + ('media_asset', 'media_asset_v2'), + ('primary', 'fallback'), + all, lambda _, v: url_or_none(v['content_url']), + )): + tags = str_or_none(playback_set.get('playback_tags')) or '' if any(f'{prefix}:{ignore}' in tags for key, prefix in self._IGNORE_MAP.items() for ignore in self._configuration_arg(key)): continue - format_url = url_or_none(playback_set.get('playbackUrl')) - if not format_url: - continue - format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url) + format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', playback_set['content_url']) ext = determine_ext(format_url) current_formats, current_subs = [], {} @@ -273,8 +301,10 @@ def _real_extract(self, url): 'height': int_or_none(playback_set.get('height')), }] except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 403: + if isinstance(e.cause, HTTPError) and e.cause.status in (403, 474): geo_restricted = True + else: + self.write_debug(e) continue tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';')) @@ -292,6 +322,11 @@ def _real_extract(self, url): 'stereo': 2, 'dolby51': 6, }.get(tag_dict.get('audio_channel')) + if ( + 'Audio_Description' in f['format_id'] + or 'Audio Description' in (f.get('format_note') or '') + ): + f['source_preference'] = -99 + (f.get('source_preference') or -1) f['format_note'] = join_nonempty( tag_dict.get('ladder'), tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None, @@ -387,6 +422,7 @@ class HotStarPlaylistIE(HotStarBaseIE): def _real_extract(self, url): id_ = self._match_id(url) return self.playlist_result( + # XXX: If receiving HTTP Error 504, try with tas=0 self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_) @@ -457,4 +493,5 @@ def _real_extract(self, url): 'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id'] return self.playlist_result(self._playlist_entries( + # XXX: If receiving HTTP Error 504, try with tas=0 'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id) From 4bd9a7ade7e0508b9795b3e72a69eeb40788b62b Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 28 Jun 2025 18:30:51 -0500 Subject: [PATCH 15/51] [ie/hotstar:series] Fix extractor (#13564) * Removes HotStarSeasonIE and HotStarPlaylistIE Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 2 - yt_dlp/extractor/hotstar.py | 143 +++++++++++--------------------- 2 files changed, 49 insertions(+), 96 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fbbd9571f..a5a343447 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -805,9 +805,7 @@ from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, - HotStarPlaylistIE, HotStarPrefixIE, - HotStarSeasonIE, HotStarSeriesIE, ) from .hrefli import HrefLiRedirectIE diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 358b5ce75..c4fae00a9 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,3 +1,4 @@ +import functools import hashlib import hmac import json @@ -9,6 +10,7 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + OnDemandPagedList, determine_ext, int_or_none, join_nonempty, @@ -71,12 +73,36 @@ def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None): }, separators=(',', ':')), }, st=st, cookies=cookies) - def _playlist_entries(self, path, item_id, root=None, **kwargs): - results = self._call_api_v1(path, item_id, **kwargs)['body']['results'] - for video in traverse_obj(results, (('assets', None), 'items', ...)): - if video.get('contentId'): - yield self.url_result( - HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId']) + @staticmethod + def _parse_metadata_v1(video_data): + return traverse_obj(video_data, { + 'id': ('contentId', {str}), + 'title': ('title', {str}), + 'description': ('description', {str}), + 'duration': ('duration', {int_or_none}), + 'timestamp': (('broadcastDate', 'startDate'), {int_or_none}, any), + 'release_year': ('year', {int_or_none}), + 'channel': ('channelName', {str}), + 'channel_id': ('channelId', {int}, {str_or_none}), + 'series': ('showName', {str}), + 'season': ('seasonName', {str}), + 'season_number': ('seasonNo', {int_or_none}), + 'season_id': ('seasonId', {int}, {str_or_none}), + 'episode': ('title', {str}), + 'episode_number': ('episodeNo', {int_or_none}), + }) + + def _fetch_page(self, path, item_id, name, query, root, page): + results = self._call_api_v1( + path, item_id, note=f'Downloading {name} page {page + 1} JSON', query={ + **query, + 'tao': page * self._PAGE_SIZE, + 'tas': self._PAGE_SIZE, + })['body']['results'] + + for video in traverse_obj(results, (('assets', None), 'items', lambda _, v: v['contentId'])): + yield self.url_result( + HotStarIE._video_url(video['contentId'], root=root), HotStarIE, **self._parse_metadata_v1(video)) class HotStarIE(HotStarBaseIE): @@ -245,14 +271,11 @@ def _real_extract(self, url): video_type = self._TYPE[video_type] cookies = self._get_cookies(url) # Cookies before any request - # tas=10000 can cause HTTP Error 504, see https://github.com/yt-dlp/yt-dlp/issues/7946 - for tas, err in [(10000, False), (0, None)]: - query = {'tas': tas, 'contentId': video_id} - video_data = traverse_obj( - self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, errnote=err, query=query), - ('body', 'results', 'item', {dict})) or {} - if video_data: - break + video_data = traverse_obj( + self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query={ + 'tas': 5, # See https://github.com/yt-dlp/yt-dlp/issues/7946 + 'contentId': video_id, + }), ('body', 'results', 'item', {dict})) or {} if video_data.get('drmProtected'): self.report_drm(video_id) @@ -343,22 +366,10 @@ def _real_extract(self, url): f.setdefault('http_headers', {}).update(headers) return { + **self._parse_metadata_v1(video_data), 'id': video_id, - 'title': video_data.get('title'), - 'description': video_data.get('description'), - 'duration': int_or_none(video_data.get('duration')), - 'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')), - 'release_year': int_or_none(video_data.get('year')), 'formats': formats, 'subtitles': subs, - 'channel': video_data.get('channelName'), - 'channel_id': str_or_none(video_data.get('channelId')), - 'series': video_data.get('showName'), - 'season': video_data.get('seasonName'), - 'season_number': int_or_none(video_data.get('seasonNo')), - 'season_id': str_or_none(video_data.get('seasonId')), - 'episode': video_data.get('title'), - 'episode_number': int_or_none(video_data.get('episodeNo')), } @@ -399,65 +410,6 @@ def _real_extract(self, url): return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id) -class HotStarPlaylistIE(HotStarBaseIE): - IE_NAME = 'hotstar:playlist' - _VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P\w+)' - _TESTS = [{ - 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', - 'info_dict': { - 'id': '3_2_26', - }, - 'playlist_mincount': 20, - }, { - 'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272', - 'only_matching': True, - }] - - def _real_extract(self, url): - id_ = self._match_id(url) - return self.playlist_result( - # XXX: If receiving HTTP Error 504, try with tas=0 - self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_) - - -class HotStarSeasonIE(HotStarBaseIE): - IE_NAME = 'hotstar:season' - _VALID_URL = r'(?Phttps?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P\w+)' - _TESTS = [{ - 'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028', - 'info_dict': { - 'id': '8028', - }, - 'playlist_mincount': 35, - }, { - 'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357', - 'info_dict': { - 'id': '4357', - }, - 'playlist_mincount': 30, - }, { - 'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/', - 'info_dict': { - 'id': '8208', - }, - 'playlist_mincount': 19, - }, { - 'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/', - 'only_matching': True, - }] - - def _real_extract(self, url): - url, season_id = self._match_valid_url(url).groups() - return self.playlist_result(self._playlist_entries( - 'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id) - - class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' _VALID_URL = r'(?Phttps?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P\d+))/?(?:[#?]|$)' @@ -472,26 +424,29 @@ class HotStarSeriesIE(HotStarBaseIE): 'info_dict': { 'id': '1260050431', }, - 'playlist_mincount': 43, + 'playlist_mincount': 42, }, { 'url': 'https://www.hotstar.com/in/tv/mahabharat/435/', 'info_dict': { 'id': '435', }, 'playlist_mincount': 267, - }, { + }, { # HTTP Error 504 with tas=10000 (possibly because total size is over 1000 items?) 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/', 'info_dict': { 'id': '1260022017', }, - 'playlist_mincount': 940, + 'playlist_mincount': 1601, }] + _PAGE_SIZE = 100 def _real_extract(self, url): - url, series_id = self._match_valid_url(url).groups() - id_ = self._call_api_v1( + url, series_id = self._match_valid_url(url).group('url', 'id') + eid = self._call_api_v1( 'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id'] - return self.playlist_result(self._playlist_entries( - # XXX: If receiving HTTP Error 504, try with tas=0 - 'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id) + entries = OnDemandPagedList(functools.partial( + self._fetch_page, 'tray/g/1/items', series_id, + 'series', {'etid': 0, 'eid': eid}, url), self._PAGE_SIZE) + + return self.playlist_result(entries, series_id) From 7e2504f941a11ea2b0dba00de3f0295cdc253e79 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 28 Jun 2025 18:32:21 -0500 Subject: [PATCH 16/51] [ie/jiocinema] Remove extractors (#13565) Closes #10123, Closes #10144, Closes #10225, Closes #10240, Closes #10508 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 - yt_dlp/extractor/jiocinema.py | 408 -------------------------------- 2 files changed, 412 deletions(-) delete mode 100644 yt_dlp/extractor/jiocinema.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a5a343447..61cc05d31 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -919,10 +919,6 @@ ShugiinItvVodIE, ) from .jeuxvideo import JeuxVideoIE -from .jiocinema import ( - JioCinemaIE, - JioCinemaSeriesIE, -) from .jiosaavn import ( JioSaavnAlbumIE, JioSaavnArtistIE, diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py deleted file mode 100644 index 94c85064e..000000000 --- a/yt_dlp/extractor/jiocinema.py +++ /dev/null @@ -1,408 +0,0 @@ -import base64 -import itertools -import json -import random -import re -import string -import time - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - jwt_decode_hs256, - parse_age_limit, - try_call, - url_or_none, -) -from ..utils.traversal import traverse_obj - - -class JioCinemaBaseIE(InfoExtractor): - _NETRC_MACHINE = 'jiocinema' - _GEO_BYPASS = False - _ACCESS_TOKEN = None - _REFRESH_TOKEN = None - _GUEST_TOKEN = None - _USER_ID = None - _DEVICE_ID = None - _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'} - _APP_NAME = {'appName': 'RJIL_JioCinema'} - _APP_VERSION = {'appVersion': '5.0.0'} - _API_SIGNATURES = 'o668nxgzwff' - _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi' - _ACCESS_HINT = 'the `accessToken` from your browser local storage' - _LOGIN_HINT = ( - 'Log in with "-u phone -p " to authenticate with OTP, ' - f'or use "-u token -p " to log in with {_ACCESS_HINT}. ' - 'If you have previously logged in with yt-dlp and your session ' - 'has been cached, you can use "-u device -p "') - - def _cache_token(self, token_type): - assert token_type in ('access', 'refresh', 'all') - if token_type in ('access', 'all'): - self.cache.store( - JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN) - if token_type in ('refresh', 'all'): - self.cache.store( - JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN) - - def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}): - return self._download_json( - url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={ - 'Content-Type': 'application/json', - 'Accept': 'application/json', - **self._API_HEADERS, - **headers, - }, expected_status=(400, 403, 474)) - - def _call_auth_api(self, service, endpoint, note, headers={}, data={}): - return self._call_api( - f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}', - None, note=note, headers=headers, data=data) - - def _refresh_token(self): - if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID: - raise ExtractorError('User token has expired', expected=True) - response = self._call_auth_api( - 'token', 'refreshtoken', 'Refreshing token', - headers={'accesstoken': self._ACCESS_TOKEN}, data={ - **self._APP_NAME, - 'deviceId': self._DEVICE_ID, - 'refreshToken': self._REFRESH_TOKEN, - **self._APP_VERSION, - }) - refresh_token = response.get('refreshTokenId') - if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN: - JioCinemaBaseIE._REFRESH_TOKEN = refresh_token - self._cache_token('refresh') - JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] - self._cache_token('access') - - def _fetch_guest_token(self): - JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10)) - guest_token = self._call_auth_api( - 'token', 'guest', 'Downloading guest token', data={ - **self._APP_NAME, - 'deviceType': 'phone', - 'os': 'ios', - 'deviceId': self._DEVICE_ID, - 'freshLaunch': False, - 'adId': self._DEVICE_ID, - **self._APP_VERSION, - }) - self._GUEST_TOKEN = guest_token['authToken'] - self._USER_ID = guest_token['userId'] - - def _call_login_api(self, endpoint, guest_token, data, note): - return self._call_auth_api( - 'user', f'loginotp/{endpoint}', note, headers={ - **self.geo_verification_headers(), - 'accesstoken': self._GUEST_TOKEN, - **self._APP_NAME, - **traverse_obj(guest_token, 'data', { - 'deviceType': ('deviceType', {str}), - 'os': ('os', {str}), - })}, data=data) - - def _is_token_expired(self, token): - return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180) - - def _perform_login(self, username, password): - if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN): - return - - UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - - if username.lower() == 'token': - if try_call(lambda: jwt_decode_hs256(password)): - JioCinemaBaseIE._ACCESS_TOKEN = password - refresh_hint = 'the `refreshToken` UUID from your browser local storage' - refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0] - if not refresh_token: - self.to_screen( - 'To extend the life of your login session, in addition to your access token, ' - 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" ' - f'where REFRESH_TOKEN is {refresh_hint}') - elif re.fullmatch(UUID_RE, refresh_token): - JioCinemaBaseIE._REFRESH_TOKEN = refresh_token - else: - self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}') - else: - raise ExtractorError( - f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True) - - elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password): - JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh') - JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access') - if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN: - raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True) - - elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password): - self._fetch_guest_token() - guest_token = jwt_decode_hs256(self._GUEST_TOKEN) - initial_data = { - 'number': base64.b64encode(password.encode()).decode(), - **self._APP_VERSION, - } - response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP') - if not traverse_obj(response, ('OTPInfo', {dict})): - raise ExtractorError('There was a problem with the phone number login attempt') - - is_iphone = guest_token.get('os') == 'ios' - response = self._call_login_api('verify', guest_token, { - 'deviceInfo': { - 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android', - 'info': { - 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'}, - 'androidId': self._DEVICE_ID, - 'type': 'iOS' if is_iphone else 'Android', - }, - }, - **initial_data, - 'otp': self._get_tfa_info('the one-time password sent to your phone'), - }, 'Submitting OTP') - if traverse_obj(response, 'code') == 1043: - raise ExtractorError('Wrong OTP', expected=True) - JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken'] - JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] - - else: - raise ExtractorError(self._LOGIN_HINT, expected=True) - - user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data'] - JioCinemaBaseIE._USER_ID = user_token['userId'] - JioCinemaBaseIE._DEVICE_ID = user_token['deviceId'] - if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device': - self._cache_token('all') - if self.get_param('cachedir') is not False: - self.to_screen( - f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"') - elif not JioCinemaBaseIE._REFRESH_TOKEN: - JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load( - JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh') - if JioCinemaBaseIE._REFRESH_TOKEN: - self._cache_token('access') - self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"') - if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN): - self._refresh_token() - - -class JioCinemaIE(JioCinemaBaseIE): - IE_NAME = 'jiocinema' - _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P\d{3,})' - _TESTS = [{ - 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931', - 'info_dict': { - 'id': '3759931', - 'ext': 'mp4', - 'title': 'Pradeep to stop the wedding?', - 'description': 'md5:75f72d1d1a66976633345a3de6d672b1', - 'episode': 'Pradeep to stop the wedding?', - 'episode_number': 89, - 'season': 'Agnisakshi…Ek Samjhauta-S1', - 'season_number': 1, - 'series': 'Agnisakshi Ek Samjhauta', - 'duration': 1238.0, - 'thumbnail': r're:https?://.+\.jpg', - 'age_limit': 13, - 'season_id': '3698031', - 'upload_date': '20230606', - 'timestamp': 1686009600, - 'release_date': '20230607', - 'genres': ['Drama'], - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch', - 'info_dict': { - 'id': '3754021', - 'ext': 'mp4', - 'title': 'Bhediya', - 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0', - 'episode': 'Bhediya', - 'duration': 8500.0, - 'thumbnail': r're:https?://.+\.jpg', - 'age_limit': 13, - 'upload_date': '20230525', - 'timestamp': 1685026200, - 'release_date': '20230524', - 'genres': ['Comedy'], - }, - 'params': {'skip_download': 'm3u8'}, - }] - - def _extract_formats_and_subtitles(self, playback, video_id): - m3u8_url = traverse_obj(playback, ( - 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any)) - if not m3u8_url: # DRM-only content only serves dash urls - self.report_drm(video_id) - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls') - self._remove_duplicate_formats(formats) - - return { - # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p - 'formats': traverse_obj(formats, ( - lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)), - 'subtitles': subtitles, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN): - self._fetch_guest_token() - elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN): - self._refresh_token() - - playback = self._call_api( - f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id, - 'Downloading playback JSON', headers={ - **self.geo_verification_headers(), - 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN, - **self._APP_NAME, - 'deviceid': self._DEVICE_ID, - 'uniqueid': self._USER_ID, - 'x-apisignatures': self._API_SIGNATURES, - 'x-platform': 'androidweb', - 'x-platform-token': 'web', - }, data={ - '4k': False, - 'ageGroup': '18+', - 'appVersion': '3.4.0', - 'bitrateProfile': 'xhdpi', - 'capability': { - 'drmCapability': { - 'aesSupport': 'yes', - 'fairPlayDrmSupport': 'none', - 'playreadyDrmSupport': 'none', - 'widevineDRMSupport': 'none', - }, - 'frameRateCapability': [{ - 'frameRateSupport': '30fps', - 'videoQuality': '1440p', - }], - }, - 'continueWatchingRequired': False, - 'dolby': False, - 'downloadRequest': False, - 'hevc': False, - 'kidsSafe': False, - 'manufacturer': 'Windows', - 'model': 'Windows', - 'multiAudioRequired': True, - 'osVersion': '10', - 'parentalPinValid': True, - 'x-apisignatures': self._API_SIGNATURES, - }) - - status_code = traverse_obj(playback, ('code', {int})) - if status_code == 474: - self.raise_geo_restricted(countries=['IN']) - elif status_code == 1008: - error_msg = 'This content is only available for premium users' - if self._ACCESS_TOKEN: - raise ExtractorError(error_msg, expected=True) - self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None) - elif status_code == 400: - raise ExtractorError('The requested content is not available', expected=True) - elif status_code is not None and status_code != 200: - raise ExtractorError( - f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}') - - metadata = self._download_json( - f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details', - video_id, fatal=False, query={ - 'ids': f'include:{video_id}', - 'responseType': 'common', - 'devicePlatformType': 'desktop', - }) - - return { - 'id': video_id, - 'http_headers': self._API_HEADERS, - **self._extract_formats_and_subtitles(playback, video_id), - **traverse_obj(playback, ('data', { - # fallback metadata - 'title': ('name', {str}), - 'description': ('fullSynopsis', {str}), - 'series': ('show', 'name', {str}, filter), - 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}), - 'season_number': ('episode', 'season', {int_or_none}, filter), - 'episode': ('fullTitle', {str}), - 'episode_number': ('episode', 'episodeNo', {int_or_none}, filter), - 'age_limit': ('ageNemonic', {parse_age_limit}), - 'duration': ('totalDuration', {float_or_none}), - 'thumbnail': ('images', {url_or_none}), - })), - **traverse_obj(metadata, ('result', 0, { - 'title': ('fullTitle', {str}), - 'description': ('fullSynopsis', {str}), - 'series': ('showName', {str}, filter), - 'season': ('seasonName', {str}, filter), - 'season_number': ('season', {int_or_none}), - 'season_id': ('seasonId', {str}, filter), - 'episode': ('fullTitle', {str}), - 'episode_number': ('episode', {int_or_none}), - 'timestamp': ('uploadTime', {int_or_none}), - 'release_date': ('telecastDate', {str}), - 'age_limit': ('ageNemonic', {parse_age_limit}), - 'duration': ('duration', {float_or_none}), - 'genres': ('genres', ..., {str}), - 'thumbnail': ('seo', 'ogImage', {url_or_none}), - })), - } - - -class JioCinemaSeriesIE(JioCinemaBaseIE): - IE_NAME = 'jiocinema:series' - _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P[\w-]+)/(?P\d{3,})' - _TESTS = [{ - 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917', - 'info_dict': { - 'id': '3499917', - 'title': 'naagin', - }, - 'playlist_mincount': 120, - }, { - 'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820', - 'info_dict': { - 'id': '3499820', - 'title': 'mtv-splitsvilla-x5', - }, - 'playlist_mincount': 310, - }] - - def _entries(self, series_id): - seasons = traverse_obj(self._download_json( - f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id, - 'Downloading series metadata JSON', query={'responseType': 'common'}), ( - 'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter', - 'trayTabs', lambda _, v: v['id'])) - - for season_num, season in enumerate(seasons, start=1): - season_id = season['id'] - label = season.get('label') or season_num - for page_num in itertools.count(1): - episodes = traverse_obj(self._download_json( - f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode', - season_id, f'Downloading season {label} page {page_num} JSON', query={ - 'sort': 'episode:asc', - 'id': season_id, - 'responseType': 'common', - 'page': page_num, - }), ('result', lambda _, v: v['id'] and url_or_none(v['slug']))) - if not episodes: - break - for episode in episodes: - yield self.url_result( - episode['slug'], JioCinemaIE, **traverse_obj(episode, { - 'video_id': 'id', - 'video_title': ('fullTitle', {str}), - 'season_number': ('season', {int_or_none}), - 'episode_number': ('episode', {int_or_none}), - })) - - def _real_extract(self, url): - slug, series_id = self._match_valid_url(url).group('slug', 'id') - return self.playlist_result(self._entries(series_id), series_id, slug) From 7b81634fb1d15999757e7a9883daa6ef09ea785b Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 29 Jun 2025 18:49:27 +0200 Subject: [PATCH 17/51] [ie] Detect invalid m3u8 playlist data (#13563) Authored by: Grub4K --- test/test_InfoExtractor.py | 52 ++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/common.py | 33 ++++++++++++++++-------- 2 files changed, 75 insertions(+), 10 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index e6c8d574e..c9f70431f 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -36,6 +36,18 @@ def do_GET(self): self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + elif self.path == '/fake.m3u8': + self.send_response(200) + self.send_header('Content-Length', '1024') + self.end_headers() + self.wfile.write(1024 * b'\x00') + elif self.path == '/bipbop.m3u8': + with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f: + data = f.read() + self.send_response(200) + self.send_header('Content-Length', str(len(data))) + self.end_headers() + self.wfile.write(data) else: assert False @@ -2079,5 +2091,45 @@ def test_search_nuxt_json(self): self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) +class TestInfoExtractorNetwork(unittest.TestCase): + def setUp(self, /): + self.httpd = http.server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + self.port = http_server_port(self.httpd) + + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + self.called = False + + def require_warning(*args, **kwargs): + self.called = True + + self.ydl = FakeYDL() + self.ydl.report_warning = require_warning + self.ie = DummyIE(self.ydl) + + def tearDown(self, /): + self.ydl.close() + self.httpd.shutdown() + self.httpd.server_close() + self.server_thread.join(1) + + def test_extract_m3u8_formats(self): + formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( + f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False) + self.assertFalse(self.called) + self.assertTrue(formats) + self.assertTrue(subtitles) + + def test_extract_m3u8_formats_warning(self): + formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( + f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False) + self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file') + self.assertFalse(formats) + self.assertFalse(subtitles) + + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 32b4680b7..b75e80623 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,5 +1,6 @@ import base64 import collections +import contextlib import functools import getpass import http.client @@ -2129,21 +2130,33 @@ def _extract_m3u8_formats_and_subtitles( raise ExtractorError(errnote, video_id=video_id) self.report_warning(f'{errnote}{bug_reports_message()}') return [], {} - - res = self._download_webpage_handle( - m3u8_url, video_id, - note='Downloading m3u8 information' if note is None else note, - errnote='Failed to download m3u8 information' if errnote is None else errnote, + if note is None: + note = 'Downloading m3u8 information' + if errnote is None: + errnote = 'Failed to download m3u8 information' + response = self._request_webpage( + m3u8_url, video_id, note=note, errnote=errnote, fatal=fatal, data=data, headers=headers, query=query) - - if res is False: + if response is False: return [], {} - m3u8_doc, urlh = res - m3u8_url = urlh.url + with contextlib.closing(response): + prefix = response.read(512) + if not prefix.startswith(b'#EXTM3U'): + msg = 'Response data has no m3u header' + if fatal: + raise ExtractorError(msg, video_id=video_id) + self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id) + return [], {} + + content = self._webpage_read_content( + response, m3u8_url, video_id, note=note, errnote=errnote, + fatal=fatal, prefix=prefix, data=data) + if content is False: + return [], {} return self._parse_m3u8_formats_and_subtitles( - m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, + content, response.url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, headers=headers, query=query, video_id=video_id) From 1b883846347addeab12663fd74317fd544341a1c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 08:05:52 -0500 Subject: [PATCH 18/51] [ci] Add signature tests (#13582) Authored by: bashonly --- .github/workflows/signature-tests.yml | 41 +++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/signature-tests.yml diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml new file mode 100644 index 000000000..203172e0b --- /dev/null +++ b/.github/workflows/signature-tests.yml @@ -0,0 +1,41 @@ +name: Signature Tests +on: + push: + paths: + - .github/workflows/signature-tests.yml + - test/test_youtube_signature.py + - yt_dlp/jsinterp.py + pull_request: + paths: + - .github/workflows/signature-tests.yml + - test/test_youtube_signature.py + - yt_dlp/jsinterp.py +permissions: + contents: read + +concurrency: + group: signature-tests-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + tests: + name: Signature Tests + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install test requirements + run: python3 ./devscripts/install_deps.py --only-optional --include test + - name: Run tests + timeout-minutes: 15 + run: | + python3 -m yt_dlp -v || true # Print debug head + python3 ./devscripts/run_tests.py test/test_youtube_signature.py From 958153a226214c86879e36211ac191bf78289578 Mon Sep 17 00:00:00 2001 From: sepro Date: Mon, 30 Jun 2025 15:50:33 +0200 Subject: [PATCH 19/51] [jsinterp] Fix `extract_object` (#13580) Fixes sig extraction for YouTube player `e12fbea4` Authored by: seproDev --- test/test_jsinterp.py | 4 ++++ test/test_youtube_signature.py | 5 +++++ yt_dlp/jsinterp.py | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 2e3cdc2a5..4268e890b 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -478,6 +478,10 @@ def test_extract_function_with_global_stack(self): func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000}) self.assertEqual(func([1]), 1111) + def test_extract_object(self): + jsi = JSInterpreter('var a={};a.xy={};var xy;var zxy={};xy={z:function(){return "abc"}};') + self.assertTrue('z' in jsi.extract_object('xy', None)) + def test_increment_decrement(self): self._test('function f() { var x = 1; return ++x; }', 2) self._test('function f() { var x = 1; return x++; }', 1) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 3336b6bff..5e6792679 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -133,6 +133,11 @@ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0', ), + ( + 'https://www.youtube.com/s/player/e12fbea4/player_ias.vflset/en_US/base.js', + 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', + 'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a', + ), ] _NSIG_TESTS = [ diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 45aeffa22..b49f0cf30 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -857,7 +857,7 @@ def extract_object(self, objname, *global_stack): obj = {} obj_m = re.search( r'''(?x) - (?(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) }\s*; ''' % (re.escape(objname), _FUNC_NAME_RE), From e9f157669e24953a88d15ce22053649db7a8e81e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 14:19:43 -0500 Subject: [PATCH 20/51] [ie/hotstar] Fix formats extraction (#13585) Fix b5bd057fe86550f3aa67f2fc8790d1c6a251c57b Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index c4fae00a9..891bcc873 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -64,12 +64,16 @@ def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None): 'container': ['fmp4br', 'fmp4'], 'ads': ['non_ssai', 'ssai'], 'audio_channel': ['atmos', 'dolby51', 'stereo'], - 'encryption': ['plain'], - 'video_codec': ['h265'], # or ['h264'] + 'encryption': ['plain', 'widevine'], # wv only so we can raise appropriate error + 'video_codec': ['h265', 'h264'], 'ladder': ['tv', 'full'], - 'resolution': ['4k'], # or ['hd'] - 'true_resolution': ['4k'], # or ['hd'] - 'dynamic_range': ['hdr'], # or ['sdr'] + 'resolution': ['4k', 'hd'], + 'true_resolution': ['4k', 'hd'], + 'dynamic_range': ['hdr', 'sdr'], + }, separators=(',', ':')), + 'drm_parameters': json.dumps({ + 'widevine_security_level': ['SW_SECURE_DECODE', 'SW_SECURE_CRYPTO'], + 'hdcp_version': ['HDCP_V2_2', 'HDCP_V2_1', 'HDCP_V2', 'HDCP_V1'], }, separators=(',', ':')), }, st=st, cookies=cookies) @@ -281,7 +285,7 @@ def _real_extract(self, url): self.report_drm(video_id) geo_restricted = False - formats, subs = [], {} + formats, subs, has_drm = [], {}, False headers = {'Referer': f'{self._BASE_URL}/in'} content_type = traverse_obj(video_data, ('contentType', {str})) or self._CONTENT_TYPE[video_type] @@ -304,6 +308,11 @@ def _real_extract(self, url): for ignore in self._configuration_arg(key)): continue + tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';')) + if tag_dict.get('encryption') not in ('plain', None): + has_drm = True + continue + format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', playback_set['content_url']) ext = determine_ext(format_url) @@ -330,10 +339,6 @@ def _real_extract(self, url): self.write_debug(e) continue - tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';')) - if tag_dict.get('encryption') not in ('plain', None): - for f in current_formats: - f['has_drm'] = True for f in current_formats: for k, v in self._TAG_FIELDS.items(): if not f.get(k): @@ -361,6 +366,8 @@ def _real_extract(self, url): if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) + elif not formats and has_drm: + self.report_drm(video_id) self._remove_duplicate_formats(formats) for f in formats: f.setdefault('http_headers', {}).update(headers) From 2ba5391cd68ed4f2415c827d2cecbcbc75ace10b Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:02:59 -0500 Subject: [PATCH 21/51] [ie/youtube] Fix premium formats extraction (#13586) Fix ff6f94041aeee19c5559e1c1cd693960a1c1dd14 Closes #13545 Authored by: bashonly --- yt_dlp/extractor/youtube/_base.py | 2 ++ yt_dlp/extractor/youtube/_video.py | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 90e392715..5aee89b91 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -63,6 +63,7 @@ class _PoTokenContext(enum.Enum): 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS], 'SUPPORTS_COOKIES': True, + 'PLAYER_PARAMS': '8AEB', }, 'web_embedded': { 'INNERTUBE_CONTEXT': { @@ -174,6 +175,7 @@ class _PoTokenContext(enum.Enum): }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'SUPPORTS_COOKIES': True, + 'PLAYER_PARAMS': '8AEB', }, 'tv_simply': { 'INNERTUBE_CONTEXT': { diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 5ccc33fa3..4689c55db 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2820,10 +2820,6 @@ def _generate_player_context(cls, sts=None): context['signatureTimestamp'] = sts return { 'playbackContext': { - 'adPlaybackContext': { - 'pyv': True, - 'adType': 'AD_TYPE_INSTREAM', - }, 'contentPlaybackContext': context, }, **cls._get_checkok_params(), From 500761e41acb96953a5064e951d41d190c287e46 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:06:22 -0500 Subject: [PATCH 22/51] [ie] Fix m3u8 playlist data corruption (#13588) Revert 7b81634fb1d15999757e7a9883daa6ef09ea785b Closes #13581 Authored by: bashonly --- test/test_InfoExtractor.py | 52 -------------------------------------- yt_dlp/extractor/common.py | 33 ++++++++---------------- 2 files changed, 10 insertions(+), 75 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c9f70431f..e6c8d574e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -36,18 +36,6 @@ def do_GET(self): self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) - elif self.path == '/fake.m3u8': - self.send_response(200) - self.send_header('Content-Length', '1024') - self.end_headers() - self.wfile.write(1024 * b'\x00') - elif self.path == '/bipbop.m3u8': - with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f: - data = f.read() - self.send_response(200) - self.send_header('Content-Length', str(len(data))) - self.end_headers() - self.wfile.write(data) else: assert False @@ -2091,45 +2079,5 @@ def test_search_nuxt_json(self): self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) -class TestInfoExtractorNetwork(unittest.TestCase): - def setUp(self, /): - self.httpd = http.server.HTTPServer( - ('127.0.0.1', 0), InfoExtractorTestRequestHandler) - self.port = http_server_port(self.httpd) - - self.server_thread = threading.Thread(target=self.httpd.serve_forever) - self.server_thread.daemon = True - self.server_thread.start() - - self.called = False - - def require_warning(*args, **kwargs): - self.called = True - - self.ydl = FakeYDL() - self.ydl.report_warning = require_warning - self.ie = DummyIE(self.ydl) - - def tearDown(self, /): - self.ydl.close() - self.httpd.shutdown() - self.httpd.server_close() - self.server_thread.join(1) - - def test_extract_m3u8_formats(self): - formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( - f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False) - self.assertFalse(self.called) - self.assertTrue(formats) - self.assertTrue(subtitles) - - def test_extract_m3u8_formats_warning(self): - formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( - f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False) - self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file') - self.assertFalse(formats) - self.assertFalse(subtitles) - - if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b75e80623..32b4680b7 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,6 +1,5 @@ import base64 import collections -import contextlib import functools import getpass import http.client @@ -2130,33 +2129,21 @@ def _extract_m3u8_formats_and_subtitles( raise ExtractorError(errnote, video_id=video_id) self.report_warning(f'{errnote}{bug_reports_message()}') return [], {} - if note is None: - note = 'Downloading m3u8 information' - if errnote is None: - errnote = 'Failed to download m3u8 information' - response = self._request_webpage( - m3u8_url, video_id, note=note, errnote=errnote, + + res = self._download_webpage_handle( + m3u8_url, video_id, + note='Downloading m3u8 information' if note is None else note, + errnote='Failed to download m3u8 information' if errnote is None else errnote, fatal=fatal, data=data, headers=headers, query=query) - if response is False: + + if res is False: return [], {} - with contextlib.closing(response): - prefix = response.read(512) - if not prefix.startswith(b'#EXTM3U'): - msg = 'Response data has no m3u header' - if fatal: - raise ExtractorError(msg, video_id=video_id) - self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id) - return [], {} - - content = self._webpage_read_content( - response, m3u8_url, video_id, note=note, errnote=errnote, - fatal=fatal, prefix=prefix, data=data) - if content is False: - return [], {} + m3u8_doc, urlh = res + m3u8_url = urlh.url return self._parse_m3u8_formats_and_subtitles( - content, response.url, ext=ext, entry_protocol=entry_protocol, + m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, headers=headers, query=query, video_id=video_id) From b16722ede83377f77ea8352dcd0a6ca8e83b8f0f Mon Sep 17 00:00:00 2001 From: helpimnotdrowning <35247379+helpimnotdrowning@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:24:04 -0500 Subject: [PATCH 23/51] [ie/kick] Support subscriber-only content (#13550) Closes #13442 Authored by: helpimnotdrowning --- yt_dlp/extractor/kick.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py index 1f001d421..8049e1e34 100644 --- a/yt_dlp/extractor/kick.py +++ b/yt_dlp/extractor/kick.py @@ -1,12 +1,12 @@ +import functools +import urllib.parse from .common import InfoExtractor -from ..networking import HEADRequest from ..utils import ( UserNotLive, determine_ext, float_or_none, int_or_none, - merge_dicts, parse_iso8601, str_or_none, traverse_obj, @@ -16,21 +16,17 @@ class KickBaseIE(InfoExtractor): - def _real_initialize(self): - self._request_webpage( - HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False, impersonate=True) - xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN') - if not xsrf_token: - self.write_debug('kick.com did not set XSRF-TOKEN cookie') - KickBaseIE._API_HEADERS = { - 'Authorization': f'Bearer {xsrf_token.value}', - 'X-XSRF-TOKEN': xsrf_token.value, - } if xsrf_token else {} + @functools.cached_property + def _api_headers(self): + token = traverse_obj( + self._get_cookies('https://kick.com/'), + ('session_token', 'value', {urllib.parse.unquote})) + return {'Authorization': f'Bearer {token}'} if token else {} def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs): return self._download_json( f'https://kick.com/api/{path}', display_id, note=note, - headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs) + headers={**self._api_headers, **headers}, impersonate=True, **kwargs) class KickIE(KickBaseIE): From 35fc33fbc51c7f5392fb2300f65abf6cf107ef90 Mon Sep 17 00:00:00 2001 From: Clark Date: Mon, 30 Jun 2025 18:25:28 -0500 Subject: [PATCH 24/51] [ie/sauceplus] Add extractor (#13567) Authored by: ceandreasen, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/floatplane.py | 232 ++++++++++++++++++-------------- yt_dlp/extractor/sauceplus.py | 41 ++++++ 3 files changed, 176 insertions(+), 98 deletions(-) create mode 100644 yt_dlp/extractor/sauceplus.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 61cc05d31..ada12b3a8 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1824,6 +1824,7 @@ from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE +from .sauceplus import SaucePlusIE from .sbs import SBSIE from .sbscokr import ( SBSCoKrAllvodProgramIE, diff --git a/yt_dlp/extractor/floatplane.py b/yt_dlp/extractor/floatplane.py index b7ee160a4..7dd3b0eb2 100644 --- a/yt_dlp/extractor/floatplane.py +++ b/yt_dlp/extractor/floatplane.py @@ -17,8 +17,140 @@ from ..utils.traversal import traverse_obj -class FloatplaneIE(InfoExtractor): +class FloatplaneBaseIE(InfoExtractor): + def _real_extract(self, url): + post_id = self._match_id(url) + + post_data = self._download_json( + f'{self._BASE_URL}/api/v3/content/post', post_id, query={'id': post_id}, + note='Downloading post data', errnote='Unable to download post data', + impersonate=self._IMPERSONATE_TARGET) + + if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): + raise ExtractorError('Post does not contain a video or audio track', expected=True) + + uploader_url = format_field( + post_data, [('creator', 'urlname')], f'{self._BASE_URL}/channel/%s/home') or None + + common_info = { + 'uploader_url': uploader_url, + 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))), + 'availability': self._availability(needs_subscription=True), + **traverse_obj(post_data, { + 'uploader': ('creator', 'title', {str}), + 'uploader_id': ('creator', 'id', {str}), + 'channel': ('channel', 'title', {str}), + 'channel_id': ('channel', 'id', {str}), + 'release_timestamp': ('releaseDate', {parse_iso8601}), + }), + } + + items = [] + for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): + media_id = media['id'] + media_typ = media.get('type') or 'video' + + metadata = self._download_json( + f'{self._BASE_URL}/api/v3/content/{media_typ}', media_id, query={'id': media_id}, + note=f'Downloading {media_typ} metadata', impersonate=self._IMPERSONATE_TARGET) + + stream = self._download_json( + f'{self._BASE_URL}/api/v2/cdn/delivery', media_id, query={ + 'type': 'vod' if media_typ == 'video' else 'aod', + 'guid': metadata['guid'], + }, note=f'Downloading {media_typ} stream data', + impersonate=self._IMPERSONATE_TARGET) + + path_template = traverse_obj(stream, ('resource', 'uri', {str})) + + def format_path(params): + path = path_template + for i, val in (params or {}).items(): + path = path.replace(f'{{qualityLevelParams.{i}}}', val) + return path + + formats = [] + for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): + url = urljoin(stream['cdn'], format_path(traverse_obj( + stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict})))) + format_id = traverse_obj(quality, ('name', {str})) + hls_aes = {} + m3u8_data = None + + # If we need impersonation for the API, then we need it for HLS keys too: extract in advance + if self._IMPERSONATE_TARGET is not None: + m3u8_data = self._download_webpage( + url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS, + note=join_nonempty('Downloading', format_id, 'm3u8 information', delim=' '), + errnote=join_nonempty('Failed to download', format_id, 'm3u8 information', delim=' ')) + if not m3u8_data: + continue + + key_url = self._search_regex( + r'#EXT-X-KEY:METHOD=AES-128,URI="(https?://[^"]+)"', + m3u8_data, 'HLS AES key URI', default=None) + if key_url: + urlh = self._request_webpage( + key_url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS, + note=join_nonempty('Downloading', format_id, 'HLS AES key', delim=' '), + errnote=join_nonempty('Failed to download', format_id, 'HLS AES key', delim=' ')) + if urlh: + hls_aes['key'] = urlh.read().hex() + + formats.append({ + **traverse_obj(quality, { + 'format_note': ('label', {str}), + 'width': ('width', {int}), + 'height': ('height', {int}), + }), + **parse_codecs(quality.get('codecs')), + 'url': url, + 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'), + 'format_id': format_id, + 'hls_media_playlist_data': m3u8_data, + 'hls_aes': hls_aes or None, + }) + items.append({ + **common_info, + 'id': media_id, + **traverse_obj(metadata, { + 'title': ('title', {str}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('thumbnail', 'path', {url_or_none}), + }), + 'formats': formats, + }) + + post_info = { + **common_info, + 'id': post_id, + 'display_id': post_id, + **traverse_obj(post_data, { + 'title': ('title', {str}), + 'description': ('text', {clean_html}), + 'like_count': ('likes', {int_or_none}), + 'dislike_count': ('dislikes', {int_or_none}), + 'comment_count': ('comments', {int_or_none}), + 'thumbnail': ('thumbnail', 'path', {url_or_none}), + }), + 'http_headers': self._HEADERS, + } + + if len(items) > 1: + return self.playlist_result(items, **post_info) + + post_info.update(items[0]) + return post_info + + +class FloatplaneIE(FloatplaneBaseIE): _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P\w+)' + _BASE_URL = 'https://www.floatplane.com' + _IMPERSONATE_TARGET = None + _HEADERS = { + 'Origin': _BASE_URL, + 'Referer': f'{_BASE_URL}/', + } _TESTS = [{ 'url': 'https://www.floatplane.com/post/2Yf3UedF7C', 'info_dict': { @@ -170,105 +302,9 @@ class FloatplaneIE(InfoExtractor): }] def _real_initialize(self): - if not self._get_cookies('https://www.floatplane.com').get('sails.sid'): + if not self._get_cookies(self._BASE_URL).get('sails.sid'): self.raise_login_required() - def _real_extract(self, url): - post_id = self._match_id(url) - - post_data = self._download_json( - 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id}, - note='Downloading post data', errnote='Unable to download post data') - - if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): - raise ExtractorError('Post does not contain a video or audio track', expected=True) - - uploader_url = format_field( - post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None - - common_info = { - 'uploader_url': uploader_url, - 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))), - 'availability': self._availability(needs_subscription=True), - **traverse_obj(post_data, { - 'uploader': ('creator', 'title', {str}), - 'uploader_id': ('creator', 'id', {str}), - 'channel': ('channel', 'title', {str}), - 'channel_id': ('channel', 'id', {str}), - 'release_timestamp': ('releaseDate', {parse_iso8601}), - }), - } - - items = [] - for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): - media_id = media['id'] - media_typ = media.get('type') or 'video' - - metadata = self._download_json( - f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id}, - note=f'Downloading {media_typ} metadata') - - stream = self._download_json( - 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={ - 'type': 'vod' if media_typ == 'video' else 'aod', - 'guid': metadata['guid'], - }, note=f'Downloading {media_typ} stream data') - - path_template = traverse_obj(stream, ('resource', 'uri', {str})) - - def format_path(params): - path = path_template - for i, val in (params or {}).items(): - path = path.replace(f'{{qualityLevelParams.{i}}}', val) - return path - - formats = [] - for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): - url = urljoin(stream['cdn'], format_path(traverse_obj( - stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict})))) - formats.append({ - **traverse_obj(quality, { - 'format_id': ('name', {str}), - 'format_note': ('label', {str}), - 'width': ('width', {int}), - 'height': ('height', {int}), - }), - **parse_codecs(quality.get('codecs')), - 'url': url, - 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'), - }) - - items.append({ - **common_info, - 'id': media_id, - **traverse_obj(metadata, { - 'title': ('title', {str}), - 'duration': ('duration', {int_or_none}), - 'thumbnail': ('thumbnail', 'path', {url_or_none}), - }), - 'formats': formats, - }) - - post_info = { - **common_info, - 'id': post_id, - 'display_id': post_id, - **traverse_obj(post_data, { - 'title': ('title', {str}), - 'description': ('text', {clean_html}), - 'like_count': ('likes', {int_or_none}), - 'dislike_count': ('dislikes', {int_or_none}), - 'comment_count': ('comments', {int_or_none}), - 'thumbnail': ('thumbnail', 'path', {url_or_none}), - }), - } - - if len(items) > 1: - return self.playlist_result(items, **post_info) - - post_info.update(items[0]) - return post_info - class FloatplaneChannelIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P[\w-]+)/home(?:/(?P[\w-]+))?' diff --git a/yt_dlp/extractor/sauceplus.py b/yt_dlp/extractor/sauceplus.py new file mode 100644 index 000000000..75d7022d3 --- /dev/null +++ b/yt_dlp/extractor/sauceplus.py @@ -0,0 +1,41 @@ +from .floatplane import FloatplaneBaseIE + + +class SaucePlusIE(FloatplaneBaseIE): + IE_DESC = 'Sauce+' + _VALID_URL = r'https?://(?:(?:www|beta)\.)?sauceplus\.com/post/(?P\w+)' + _BASE_URL = 'https://www.sauceplus.com' + _HEADERS = { + 'Origin': _BASE_URL, + 'Referer': f'{_BASE_URL}/', + } + _IMPERSONATE_TARGET = True + _TESTS = [{ + 'url': 'https://www.sauceplus.com/post/YbBwIa2A5g', + 'info_dict': { + 'id': 'eit4Ugu5TL', + 'ext': 'mp4', + 'display_id': 'YbBwIa2A5g', + 'title': 'Scare the Coyote - Episode 3', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'duration': 2975, + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_date': '20250627', + 'release_timestamp': 1750993500, + 'uploader': 'Scare The Coyote', + 'uploader_id': '683e0a3269688656a5a49a44', + 'uploader_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home', + 'channel': 'Scare The Coyote', + 'channel_id': '683e0a326968866ceba49a45', + 'channel_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home/main', + 'availability': 'subscriber_only', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_initialize(self): + if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'): + self.raise_login_required() From 11b9416e10cff7513167d76d6c47774fcdd3e26a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:37:56 -0500 Subject: [PATCH 25/51] [ie/sproutvideo] Support browser impersonation (#13589) Closes #13576 Authored by: bashonly --- yt_dlp/extractor/sproutvideo.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 764c78f1e..b5af90541 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -98,11 +98,8 @@ def _extract_embed_urls(cls, url, webpage): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, headers={ - **traverse_obj(smuggled_data, {'Referer': 'referer'}), - # yt-dlp's default Chrome user-agents are too old - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:140.0) Gecko/20100101 Firefox/140.0', - }) + webpage = self._download_webpage( + url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True) data = self._search_json( r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', From b0187844988e557c7e1e6bb1aabd4c1176768d86 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:44:42 -0500 Subject: [PATCH 26/51] [cleanup] Misc (#13590) Authored by: bashonly --- devscripts/changelog_override.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 269de2c68..d7296bf30 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -254,5 +254,13 @@ { "action": "remove", "when": "d596824c2f8428362c072518856065070616e348" + }, + { + "action": "remove", + "when": "7b81634fb1d15999757e7a9883daa6ef09ea785b" + }, + { + "action": "remove", + "when": "500761e41acb96953a5064e951d41d190c287e46" } ] From 30fa54280b363265d0235b0aab3b1725eb0f61b8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 30 Jun 2025 23:47:20 +0000 Subject: [PATCH 27/51] Release 2025.06.30 Created by: bashonly :ci skip all --- CONTRIBUTORS | 3 +++ Changelog.md | 23 +++++++++++++++++++++++ supportedsites.md | 7 ++----- yt_dlp/version.py | 6 +++--- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 00d4d15aa..ba23b66dc 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -781,3 +781,6 @@ maxbin123 nullpos anlar eason1478 +ceandreasen +chauhantirth +helpimnotdrowning diff --git a/Changelog.md b/Changelog.md index d37852658..5a5c18cf3 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,29 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.06.30 + +#### Core changes +- **jsinterp**: [Fix `extract_object`](https://github.com/yt-dlp/yt-dlp/commit/958153a226214c86879e36211ac191bf78289578) ([#13580](https://github.com/yt-dlp/yt-dlp/issues/13580)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- **bilibilispacevideo**: [Extract hidden-mode collections as playlists](https://github.com/yt-dlp/yt-dlp/commit/99b85ac102047446e6adf5b62bfc3c8d80b53778) ([#13533](https://github.com/yt-dlp/yt-dlp/issues/13533)) by [c-basalt](https://github.com/c-basalt) +- **hotstar** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b5bd057fe86550f3aa67f2fc8790d1c6a251c57b) ([#13530](https://github.com/yt-dlp/yt-dlp/issues/13530)) by [bashonly](https://github.com/bashonly), [chauhantirth](https://github.com/chauhantirth) (With fixes in [e9f1576](https://github.com/yt-dlp/yt-dlp/commit/e9f157669e24953a88d15ce22053649db7a8e81e) by [bashonly](https://github.com/bashonly)) + - [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0a6b1044899f452cd10b6c7a6b00fa985a9a8b97) ([#13560](https://github.com/yt-dlp/yt-dlp/issues/13560)) by [bashonly](https://github.com/bashonly) + - [Raise for login required](https://github.com/yt-dlp/yt-dlp/commit/5e292baad62c749b6c340621ab2d0f904165ddfb) ([#10405](https://github.com/yt-dlp/yt-dlp/issues/10405)) by [bashonly](https://github.com/bashonly) + - series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4bd9a7ade7e0508b9795b3e72a69eeb40788b62b) ([#13564](https://github.com/yt-dlp/yt-dlp/issues/13564)) by [bashonly](https://github.com/bashonly) +- **jiocinema**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/7e2504f941a11ea2b0dba00de3f0295cdc253e79) ([#13565](https://github.com/yt-dlp/yt-dlp/issues/13565)) by [bashonly](https://github.com/bashonly) +- **kick**: [Support subscriber-only content](https://github.com/yt-dlp/yt-dlp/commit/b16722ede83377f77ea8352dcd0a6ca8e83b8f0f) ([#13550](https://github.com/yt-dlp/yt-dlp/issues/13550)) by [helpimnotdrowning](https://github.com/helpimnotdrowning) +- **niconico**: live: [Fix extractor and downloader](https://github.com/yt-dlp/yt-dlp/commit/06c1a8cdffe14050206683253726875144192ef5) ([#13158](https://github.com/yt-dlp/yt-dlp/issues/13158)) by [doe1080](https://github.com/doe1080) +- **sauceplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/35fc33fbc51c7f5392fb2300f65abf6cf107ef90) ([#13567](https://github.com/yt-dlp/yt-dlp/issues/13567)) by [bashonly](https://github.com/bashonly), [ceandreasen](https://github.com/ceandreasen) +- **sproutvideo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/11b9416e10cff7513167d76d6c47774fcdd3e26a) ([#13589](https://github.com/yt-dlp/yt-dlp/issues/13589)) by [bashonly](https://github.com/bashonly) +- **youtube**: [Fix premium formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2ba5391cd68ed4f2415c827d2cecbcbc75ace10b) ([#13586](https://github.com/yt-dlp/yt-dlp/issues/13586)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **ci**: [Add signature tests](https://github.com/yt-dlp/yt-dlp/commit/1b883846347addeab12663fd74317fd544341a1c) ([#13582](https://github.com/yt-dlp/yt-dlp/issues/13582)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [b018784](https://github.com/yt-dlp/yt-dlp/commit/b0187844988e557c7e1e6bb1aabd4c1176768d86) by [bashonly](https://github.com/bashonly) + ### 2025.06.25 #### Extractor changes diff --git a/supportedsites.md b/supportedsites.md index b3fe01173..8e48135d2 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -575,9 +575,7 @@ # Supported sites - **HollywoodReporterPlaylist** - **Holodex** - **HotNewHipHop**: (**Currently broken**) - - **hotstar** - - **hotstar:playlist** - - **hotstar:season** + - **hotstar**: JioHotstar - **hotstar:series** - **hrfernsehen** - **HRTi**: [*hrti*](## "netrc machine") @@ -647,8 +645,6 @@ # Supported sites - **Jamendo** - **JamendoAlbum** - **JeuxVideo**: (**Currently broken**) - - **jiocinema**: [*jiocinema*](## "netrc machine") - - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** - **jiosaavn:artist** - **jiosaavn:playlist** @@ -1299,6 +1295,7 @@ # Supported sites - **SampleFocus** - **Sangiin**: 参議院インターネット審議中継 (archive) - **Sapo**: SAPO Vídeos + - **SaucePlus**: Sauce+ - **SBS**: sbs.com.au - **sbs.co.kr** - **sbs.co.kr:allvod_program** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 020a0299c..451fee716 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.06.25' +__version__ = '2025.06.30' -RELEASE_GIT_HEAD = '1838a1ce5d4ade80770ba9162eaffc9a1607dc70' +RELEASE_GIT_HEAD = 'b0187844988e557c7e1e6bb1aabd4c1176768d86' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.06.25' +_pkg_version = '2025.06.30' From f3008bc5f89d2691f2f8dfc51b406ef4e25281c3 Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 1 Jul 2025 13:23:53 +0200 Subject: [PATCH 28/51] No longer enable `--mtime` by default (#12781) Closes #12780 Authored by: seproDev --- README.md | 9 +++++---- yt_dlp/YoutubeDL.py | 3 ++- yt_dlp/__init__.py | 6 ++++++ yt_dlp/downloader/fragment.py | 2 +- yt_dlp/downloader/http.py | 2 +- yt_dlp/options.py | 10 +++++----- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 0f9a7d556..e476c0084 100644 --- a/README.md +++ b/README.md @@ -1156,15 +1156,15 @@ # CONFIGURATION * `/etc/yt-dlp/config` * `/etc/yt-dlp/config.txt` -E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: +E.g. with the following configuration file, yt-dlp will always extract the audio, copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` # Lines starting with # are comments # Always extract audio -x -# Do not copy the mtime ---no-mtime +# Copy the mtime +--mtime # Use this proxy --proxy 127.0.0.1:3128 @@ -2262,6 +2262,7 @@ ### Differences in default behavior * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. * The sub-modules `swfinterp`, `casefold` are removed. * Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details. +* yt-dlp no longer applies the server modified time to downloaded files by default. Use `--mtime` or `--compat-options mtime-by-default` to revert this. For ease of use, a few more compat options are available: @@ -2271,7 +2272,7 @@ ### Differences in default behavior * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` * `--compat-options 2023`: Same as `--compat-options 2024,prefer-vp9-sort` -* `--compat-options 2024`: Currently does nothing. Use this to enable all future compat options +* `--compat-options 2024`: Same as `--compat-options mtime-by-default`. Use this to enable all future compat options The following compat options restore vulnerable behavior from before security patches: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 67ca90349..44a6696c0 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -482,7 +482,8 @@ class YoutubeDL: The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort, no-clean-infojson, no-playlist-metafiles, - no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort. + no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort, + mtime-by-default. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 714d9ad5c..2e7646b7e 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -159,6 +159,12 @@ def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): elif 'prefer-vp9-sort' in opts.compat_opts: opts.format_sort.extend(FormatSorter._prefer_vp9_sort) + if 'mtime-by-default' in opts.compat_opts: + if opts.updatetime is None: + opts.updatetime = True + else: + _unused_compat_opt('mtime-by-default') + _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 98784e703..7852ae90d 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -302,7 +302,7 @@ def _finish_frag_download(self, ctx, info_dict): elif to_file: self.try_rename(ctx['tmpfilename'], ctx['filename']) filetime = ctx.get('fragment_filetime') - if self.params.get('updatetime', True) and filetime: + if self.params.get('updatetime') and filetime: with contextlib.suppress(Exception): os.utime(ctx['filename'], (time.time(), filetime)) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 9c6dd8b79..90bfcaf55 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -348,7 +348,7 @@ def retry(e): self.try_rename(ctx.tmpfilename, ctx.filename) # Update file modification time - if self.params.get('updatetime', True): + if self.params.get('updatetime'): info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None)) self._hook_progress({ diff --git a/yt_dlp/options.py b/yt_dlp/options.py index b4d3d4d66..13ba445df 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -529,14 +529,14 @@ def _preset_alias_callback(option, opt_str, value, parser): 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', - 'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort', + 'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort', 'mtime-by-default', }, 'aliases': { 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'], '2021': ['2022', 'no-certifi', 'filename-sanitization'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': ['2024', 'prefer-vp9-sort'], - '2024': [], + '2024': ['mtime-by-default'], }, }, help=( 'Options that can help keep compatibility with youtube-dl or youtube-dlc ' @@ -1466,12 +1466,12 @@ def _preset_alias_callback(option, opt_str, value, parser): help='Do not use .part files - write directly into output file') filesystem.add_option( '--mtime', - action='store_true', dest='updatetime', default=True, - help='Use the Last-modified header to set the file modification time (default)') + action='store_true', dest='updatetime', default=None, + help='Use the Last-modified header to set the file modification time') filesystem.add_option( '--no-mtime', action='store_false', dest='updatetime', - help='Do not use the Last-modified header to set the file modification time') + help='Do not use the Last-modified header to set the file modification time (default)') filesystem.add_option( '--write-description', action='store_true', dest='writedescription', default=False, From ca5cce5b07d51efe7310b449cdefeca8d873e9df Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 1 Jul 2025 21:17:11 +0200 Subject: [PATCH 29/51] [cleanup] Bump ruff to 0.12.x (#13596) Authored by: seproDev --- pyproject.toml | 4 +++- yt_dlp/aes.py | 2 +- yt_dlp/extractor/nhk.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3775251e1..41d5ec3b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ dev = [ ] static-analysis = [ "autopep8~=2.0", - "ruff~=0.11.0", + "ruff~=0.12.0", ] test = [ "pytest~=8.1", @@ -210,10 +210,12 @@ ignore = [ "TD001", # invalid-todo-tag "TD002", # missing-todo-author "TD003", # missing-todo-link + "PLC0415", # import-outside-top-level "PLE0604", # invalid-all-object (false positives) "PLE0643", # potential-index-error (false positives) "PLW0603", # global-statement "PLW1510", # subprocess-run-without-check + "PLW1641", # eq-without-hash "PLW2901", # redefined-loop-name "RUF001", # ambiguous-unicode-character-string "RUF012", # mutable-class-default diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 065901d68..600cb12a8 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -435,7 +435,7 @@ def sub_bytes_inv(data): def rotate(data): - return data[1:] + [data[0]] + return [*data[1:], data[0]] def key_schedule_core(data, rcon_iteration): diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 0bd6edfcb..0d5e5b0e7 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -495,7 +495,7 @@ def _real_extract(self, url): chapters = None if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles): start_time = chapter_durations - end_time = chapter_durations[1:] + [duration] + end_time = [*chapter_durations[1:], duration] chapters = [{ 'start_time': s, 'end_time': e, From c2ff2dbaec7929015373fe002e9bd4849931a4ce Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Wed, 2 Jul 2025 00:12:43 +0200 Subject: [PATCH 30/51] [rh:requests] Work around partial read dropping data (#13599) Authored by: Grub4K --- test/test_networking.py | 17 ++++++++++++----- yt_dlp/networking/_requests.py | 4 ++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/test/test_networking.py b/test/test_networking.py index 2f441fced..afdd0c7aa 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -22,7 +22,6 @@ import tempfile import threading import time -import urllib.error import urllib.request import warnings import zlib @@ -223,10 +222,7 @@ def do_GET(self): if encoding == 'br' and brotli: payload = brotli.compress(payload) elif encoding == 'gzip': - buf = io.BytesIO() - with gzip.GzipFile(fileobj=buf, mode='wb') as f: - f.write(payload) - payload = buf.getvalue() + payload = gzip.compress(payload, mtime=0) elif encoding == 'deflate': payload = zlib.compress(payload) elif encoding == 'unsupported': @@ -729,6 +725,17 @@ def test_keep_header_casing(self, handler): assert 'X-test-heaDer: test' in res + def test_partial_read_then_full_read(self, handler): + with handler() as rh: + for encoding in ('', 'gzip', 'deflate'): + res = validate_and_send(rh, Request( + f'http://127.0.0.1:{self.http_port}/content-encoding', + headers={'ytdl-encoding': encoding})) + assert res.headers.get('Content-Encoding') == encoding + assert res.read(6) == b'' + assert res.read(0) == b'' + assert res.read() == b'