From 0a6b1044899f452cd10b6c7a6b00fa985a9a8b97 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:29:37 -0500 Subject: [PATCH 01/28] [ie/hotstar] Fix metadata extraction (#13560) Closes #7946 Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index e97740c90b..3417869298 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -232,10 +232,15 @@ def _real_extract(self, url): video_type = self._TYPE.get(video_type, video_type) cookies = self._get_cookies(url) # Cookies before any request - video_data = traverse_obj( - self._call_api_v1( - f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}), - ('body', 'results', 'item', {dict})) or {} + # tas=10000 can cause HTTP Error 504, see https://github.com/yt-dlp/yt-dlp/issues/7946 + for tas in (10000, 0): + query = {'tas': tas, 'contentId': video_id} + video_data = traverse_obj( + self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query=query), + ('body', 'results', 'item', {dict})) or {} + if video_data: + break + if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): self.report_drm(video_id) From 5e292baad62c749b6c340621ab2d0f904165ddfb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:31:06 -0500 Subject: [PATCH 02/28] [ie/hotstar] Raise for login required (#10405) Closes #10366 Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 3417869298..e9e3d1926b 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,6 +1,5 @@ import hashlib import hmac -import json import re import time import uuid @@ -29,31 +28,20 @@ def _call_api_v1(self, path, *args, **kwargs): headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'}) def _call_api_impl(self, path, video_id, query, st=None, cookies=None): + if not cookies or not cookies.get('userUP'): + self.raise_login_required() + st = int_or_none(st) or int(time.time()) exp = st + 6000 auth = f'st={st}~exp={exp}~acl=/*' auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() - - if cookies and cookies.get('userUP'): - token = cookies.get('userUP').value - else: - token = self._download_json( - f'{self._API_URL}/um/v3/users', - video_id, note='Downloading token', - data=json.dumps({'device_ids': [{'id': str(uuid.uuid4()), 'type': 'device_id'}]}).encode(), - headers={ - 'hotstarauth': auth, - 'x-hs-platform': 'PCTV', # or 'web' - 'Content-Type': 'application/json', - })['user_identity'] - response = self._download_json( f'{self._API_URL}/{path}', video_id, query=query, headers={ 'hotstarauth': auth, 'x-hs-appversion': '6.72.2', 'x-hs-platform': 'web', - 'x-hs-usertoken': token, + 'x-hs-usertoken': cookies['userUP'].value, }) if response['message'] != "Playback URL's fetched successfully": From b5bd057fe86550f3aa67f2fc8790d1c6a251c57b Mon Sep 17 00:00:00 2001 From: chauhantirth <92777505+chauhantirth@users.noreply.github.com> Date: Sat, 28 Jun 2025 07:59:43 +0530 Subject: [PATCH 03/28] [ie/hotstar] Fix extractor (#13530) Closes #11195 Authored by: chauhantirth, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/hotstar.py | 127 +++++++++++++++++++++++------------- 1 file changed, 82 insertions(+), 45 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index e9e3d1926b..358b5ce757 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,5 +1,6 @@ import hashlib import hmac +import json import re import time import uuid @@ -12,14 +13,15 @@ int_or_none, join_nonempty, str_or_none, - traverse_obj, url_or_none, ) +from ..utils.traversal import require, traverse_obj class HotStarBaseIE(InfoExtractor): _BASE_URL = 'https://www.hotstar.com' _API_URL = 'https://api.hotstar.com' + _API_URL_V2 = 'https://apix.hotstar.com/v2' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' def _call_api_v1(self, path, *args, **kwargs): @@ -36,27 +38,38 @@ def _call_api_impl(self, path, video_id, query, st=None, cookies=None): auth = f'st={st}~exp={exp}~acl=/*' auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() response = self._download_json( - f'{self._API_URL}/{path}', video_id, query=query, + f'{self._API_URL_V2}/{path}', video_id, query=query, headers={ + 'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)', 'hotstarauth': auth, - 'x-hs-appversion': '6.72.2', - 'x-hs-platform': 'web', 'x-hs-usertoken': cookies['userUP'].value, + 'x-hs-device-id': traverse_obj(cookies, ('deviceId', 'value')) or str(uuid.uuid4()), + 'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970', + 'x-hs-platform': 'androidtv', + 'content-type': 'application/json', }) - if response['message'] != "Playback URL's fetched successfully": - raise ExtractorError( - response['message'], expected=True) - return response['data'] + if not traverse_obj(response, ('success', {dict})): + raise ExtractorError('API call was unsuccessful') + return response['success'] - def _call_api_v2(self, path, video_id, st=None, cookies=None): - return self._call_api_impl( - f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={ - 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265', - 'device-id': cookies.get('device_id').value if cookies.get('device_id') else str(uuid.uuid4()), - 'os-name': 'Windows', - 'os-version': '10', - }) + def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None): + return self._call_api_impl(f'{path}', video_id, query={ + 'content_id': video_id, + 'filters': f'content_type={content_type}', + 'client_capabilities': json.dumps({ + 'package': ['dash', 'hls'], + 'container': ['fmp4br', 'fmp4'], + 'ads': ['non_ssai', 'ssai'], + 'audio_channel': ['atmos', 'dolby51', 'stereo'], + 'encryption': ['plain'], + 'video_codec': ['h265'], # or ['h264'] + 'ladder': ['tv', 'full'], + 'resolution': ['4k'], # or ['hd'] + 'true_resolution': ['4k'], # or ['hd'] + 'dynamic_range': ['hdr'], # or ['sdr'] + }, separators=(',', ':')), + }, st=st, cookies=cookies) def _playlist_entries(self, path, item_id, root=None, **kwargs): results = self._call_api_v1(path, item_id, **kwargs)['body']['results'] @@ -68,6 +81,7 @@ def _playlist_entries(self, path, item_id, root=None, **kwargs): class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' + IE_DESC = 'JioHotstar' _VALID_URL = r'''(?x) https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) (?: @@ -102,15 +116,16 @@ class HotStarIE(HotStarBaseIE): 'upload_date': '20190501', 'duration': 1219, 'channel': 'StarPlus', - 'channel_id': '3', + 'channel_id': '821', 'series': 'Ek Bhram - Sarvagun Sampanna', 'season': 'Chapter 1', 'season_number': 1, - 'season_id': '6771', + 'season_id': '1260004607', 'episode': 'Janhvi Targets Suman', 'episode_number': 8, }, - }, { + 'params': {'skip_download': 'm3u8'}, + }, { # Metadata call gets HTTP Error 504 with tas=10000 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843', 'info_dict': { 'id': '1000282843', @@ -122,14 +137,14 @@ class HotStarIE(HotStarBaseIE): 'channel': 'StarPlus', 'series': 'Anupama', 'season_number': 1, - 'season_id': '7399', + 'season_id': '1260022018', 'upload_date': '20230307', 'episode': 'Anupama, Anuj Share a Moment', 'episode_number': 853, - 'duration': 1272, - 'channel_id': '3', + 'duration': 1266, + 'channel_id': '821', }, - 'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320', 'info_dict': { @@ -142,14 +157,15 @@ class HotStarIE(HotStarBaseIE): 'channel': 'Hotstar Specials', 'series': 'Kana Kaanum Kaalangal', 'season_number': 1, - 'season_id': '9441', + 'season_id': '1260097089', 'upload_date': '20220421', 'episode': 'Back To School', 'episode_number': 1, 'duration': 1810, - 'channel_id': '54', + 'channel_id': '1260003991', }, - }, { + 'params': {'skip_download': 'm3u8'}, + }, { # Metadata call gets HTTP Error 504 with tas=10000 'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286', 'info_dict': { 'id': '1000262286', @@ -161,6 +177,7 @@ class HotStarIE(HotStarBaseIE): 'timestamp': 1622943900, 'duration': 5395, }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.hotstar.com/in/movies/premam/1000091195', 'info_dict': { @@ -168,12 +185,13 @@ class HotStarIE(HotStarBaseIE): 'ext': 'mp4', 'title': 'Premam', 'release_year': 2015, - 'description': 'md5:d833c654e4187b5e34757eafb5b72d7f', + 'description': 'md5:096cd8aaae8dab56524823dc19dfa9f7', 'timestamp': 1462149000, 'upload_date': '20160502', 'episode': 'Premam', 'duration': 8994, }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', 'only_matching': True, @@ -196,6 +214,13 @@ class HotStarIE(HotStarBaseIE): None: 'content', } + _CONTENT_TYPE = { + 'movie': 'MOVIE', + 'episode': 'EPISODE', + 'match': 'SPORT', + 'content': 'CLIPS', + } + _IGNORE_MAP = { 'res': 'resolution', 'vcodec': 'video_codec', @@ -217,43 +242,46 @@ def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None): def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') - video_type = self._TYPE.get(video_type, video_type) + video_type = self._TYPE[video_type] cookies = self._get_cookies(url) # Cookies before any request # tas=10000 can cause HTTP Error 504, see https://github.com/yt-dlp/yt-dlp/issues/7946 - for tas in (10000, 0): + for tas, err in [(10000, False), (0, None)]: query = {'tas': tas, 'contentId': video_id} video_data = traverse_obj( - self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query=query), + self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, errnote=err, query=query), ('body', 'results', 'item', {dict})) or {} if video_data: break - if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): + if video_data.get('drmProtected'): self.report_drm(video_id) - # See https://github.com/yt-dlp/yt-dlp/issues/396 - st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date') - geo_restricted = False formats, subs = [], {} headers = {'Referer': f'{self._BASE_URL}/in'} + content_type = traverse_obj(video_data, ('contentType', {str})) or self._CONTENT_TYPE[video_type] - # change to v2 in the future - playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets'] - for playback_set in playback_sets: - if not isinstance(playback_set, dict): - continue - tags = str_or_none(playback_set.get('tagsCombination')) or '' + # See https://github.com/yt-dlp/yt-dlp/issues/396 + st = self._request_webpage( + f'{self._BASE_URL}/in', video_id, 'Fetching server time').get_header('x-origin-date') + watch = self._call_api_v2('pages/watch', video_id, content_type, cookies=cookies, st=st) + player_config = traverse_obj(watch, ( + 'page', 'spaces', 'player', 'widget_wrappers', lambda _, v: v['template'] == 'PlayerWidget', + 'widget', 'data', 'player_config', {dict}, any, {require('player config')})) + + for playback_set in traverse_obj(player_config, ( + ('media_asset', 'media_asset_v2'), + ('primary', 'fallback'), + all, lambda _, v: url_or_none(v['content_url']), + )): + tags = str_or_none(playback_set.get('playback_tags')) or '' if any(f'{prefix}:{ignore}' in tags for key, prefix in self._IGNORE_MAP.items() for ignore in self._configuration_arg(key)): continue - format_url = url_or_none(playback_set.get('playbackUrl')) - if not format_url: - continue - format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url) + format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', playback_set['content_url']) ext = determine_ext(format_url) current_formats, current_subs = [], {} @@ -273,8 +301,10 @@ def _real_extract(self, url): 'height': int_or_none(playback_set.get('height')), }] except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 403: + if isinstance(e.cause, HTTPError) and e.cause.status in (403, 474): geo_restricted = True + else: + self.write_debug(e) continue tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';')) @@ -292,6 +322,11 @@ def _real_extract(self, url): 'stereo': 2, 'dolby51': 6, }.get(tag_dict.get('audio_channel')) + if ( + 'Audio_Description' in f['format_id'] + or 'Audio Description' in (f.get('format_note') or '') + ): + f['source_preference'] = -99 + (f.get('source_preference') or -1) f['format_note'] = join_nonempty( tag_dict.get('ladder'), tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None, @@ -387,6 +422,7 @@ class HotStarPlaylistIE(HotStarBaseIE): def _real_extract(self, url): id_ = self._match_id(url) return self.playlist_result( + # XXX: If receiving HTTP Error 504, try with tas=0 self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_) @@ -457,4 +493,5 @@ def _real_extract(self, url): 'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id'] return self.playlist_result(self._playlist_entries( + # XXX: If receiving HTTP Error 504, try with tas=0 'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id) From 4bd9a7ade7e0508b9795b3e72a69eeb40788b62b Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 28 Jun 2025 18:30:51 -0500 Subject: [PATCH 04/28] [ie/hotstar:series] Fix extractor (#13564) * Removes HotStarSeasonIE and HotStarPlaylistIE Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 2 - yt_dlp/extractor/hotstar.py | 143 +++++++++++--------------------- 2 files changed, 49 insertions(+), 96 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fbbd9571f7..a5a3434477 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -805,9 +805,7 @@ from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, - HotStarPlaylistIE, HotStarPrefixIE, - HotStarSeasonIE, HotStarSeriesIE, ) from .hrefli import HrefLiRedirectIE diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 358b5ce757..c4fae00a97 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,3 +1,4 @@ +import functools import hashlib import hmac import json @@ -9,6 +10,7 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + OnDemandPagedList, determine_ext, int_or_none, join_nonempty, @@ -71,12 +73,36 @@ def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None): }, separators=(',', ':')), }, st=st, cookies=cookies) - def _playlist_entries(self, path, item_id, root=None, **kwargs): - results = self._call_api_v1(path, item_id, **kwargs)['body']['results'] - for video in traverse_obj(results, (('assets', None), 'items', ...)): - if video.get('contentId'): - yield self.url_result( - HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId']) + @staticmethod + def _parse_metadata_v1(video_data): + return traverse_obj(video_data, { + 'id': ('contentId', {str}), + 'title': ('title', {str}), + 'description': ('description', {str}), + 'duration': ('duration', {int_or_none}), + 'timestamp': (('broadcastDate', 'startDate'), {int_or_none}, any), + 'release_year': ('year', {int_or_none}), + 'channel': ('channelName', {str}), + 'channel_id': ('channelId', {int}, {str_or_none}), + 'series': ('showName', {str}), + 'season': ('seasonName', {str}), + 'season_number': ('seasonNo', {int_or_none}), + 'season_id': ('seasonId', {int}, {str_or_none}), + 'episode': ('title', {str}), + 'episode_number': ('episodeNo', {int_or_none}), + }) + + def _fetch_page(self, path, item_id, name, query, root, page): + results = self._call_api_v1( + path, item_id, note=f'Downloading {name} page {page + 1} JSON', query={ + **query, + 'tao': page * self._PAGE_SIZE, + 'tas': self._PAGE_SIZE, + })['body']['results'] + + for video in traverse_obj(results, (('assets', None), 'items', lambda _, v: v['contentId'])): + yield self.url_result( + HotStarIE._video_url(video['contentId'], root=root), HotStarIE, **self._parse_metadata_v1(video)) class HotStarIE(HotStarBaseIE): @@ -245,14 +271,11 @@ def _real_extract(self, url): video_type = self._TYPE[video_type] cookies = self._get_cookies(url) # Cookies before any request - # tas=10000 can cause HTTP Error 504, see https://github.com/yt-dlp/yt-dlp/issues/7946 - for tas, err in [(10000, False), (0, None)]: - query = {'tas': tas, 'contentId': video_id} - video_data = traverse_obj( - self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, errnote=err, query=query), - ('body', 'results', 'item', {dict})) or {} - if video_data: - break + video_data = traverse_obj( + self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query={ + 'tas': 5, # See https://github.com/yt-dlp/yt-dlp/issues/7946 + 'contentId': video_id, + }), ('body', 'results', 'item', {dict})) or {} if video_data.get('drmProtected'): self.report_drm(video_id) @@ -343,22 +366,10 @@ def _real_extract(self, url): f.setdefault('http_headers', {}).update(headers) return { + **self._parse_metadata_v1(video_data), 'id': video_id, - 'title': video_data.get('title'), - 'description': video_data.get('description'), - 'duration': int_or_none(video_data.get('duration')), - 'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')), - 'release_year': int_or_none(video_data.get('year')), 'formats': formats, 'subtitles': subs, - 'channel': video_data.get('channelName'), - 'channel_id': str_or_none(video_data.get('channelId')), - 'series': video_data.get('showName'), - 'season': video_data.get('seasonName'), - 'season_number': int_or_none(video_data.get('seasonNo')), - 'season_id': str_or_none(video_data.get('seasonId')), - 'episode': video_data.get('title'), - 'episode_number': int_or_none(video_data.get('episodeNo')), } @@ -399,65 +410,6 @@ def _real_extract(self, url): return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id) -class HotStarPlaylistIE(HotStarBaseIE): - IE_NAME = 'hotstar:playlist' - _VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P\w+)' - _TESTS = [{ - 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', - 'info_dict': { - 'id': '3_2_26', - }, - 'playlist_mincount': 20, - }, { - 'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272', - 'only_matching': True, - }] - - def _real_extract(self, url): - id_ = self._match_id(url) - return self.playlist_result( - # XXX: If receiving HTTP Error 504, try with tas=0 - self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_) - - -class HotStarSeasonIE(HotStarBaseIE): - IE_NAME = 'hotstar:season' - _VALID_URL = r'(?Phttps?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P\w+)' - _TESTS = [{ - 'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028', - 'info_dict': { - 'id': '8028', - }, - 'playlist_mincount': 35, - }, { - 'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357', - 'info_dict': { - 'id': '4357', - }, - 'playlist_mincount': 30, - }, { - 'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/', - 'info_dict': { - 'id': '8208', - }, - 'playlist_mincount': 19, - }, { - 'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/', - 'only_matching': True, - }] - - def _real_extract(self, url): - url, season_id = self._match_valid_url(url).groups() - return self.playlist_result(self._playlist_entries( - 'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id) - - class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' _VALID_URL = r'(?Phttps?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P\d+))/?(?:[#?]|$)' @@ -472,26 +424,29 @@ class HotStarSeriesIE(HotStarBaseIE): 'info_dict': { 'id': '1260050431', }, - 'playlist_mincount': 43, + 'playlist_mincount': 42, }, { 'url': 'https://www.hotstar.com/in/tv/mahabharat/435/', 'info_dict': { 'id': '435', }, 'playlist_mincount': 267, - }, { + }, { # HTTP Error 504 with tas=10000 (possibly because total size is over 1000 items?) 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/', 'info_dict': { 'id': '1260022017', }, - 'playlist_mincount': 940, + 'playlist_mincount': 1601, }] + _PAGE_SIZE = 100 def _real_extract(self, url): - url, series_id = self._match_valid_url(url).groups() - id_ = self._call_api_v1( + url, series_id = self._match_valid_url(url).group('url', 'id') + eid = self._call_api_v1( 'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id'] - return self.playlist_result(self._playlist_entries( - # XXX: If receiving HTTP Error 504, try with tas=0 - 'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id) + entries = OnDemandPagedList(functools.partial( + self._fetch_page, 'tray/g/1/items', series_id, + 'series', {'etid': 0, 'eid': eid}, url), self._PAGE_SIZE) + + return self.playlist_result(entries, series_id) From 7e2504f941a11ea2b0dba00de3f0295cdc253e79 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 28 Jun 2025 18:32:21 -0500 Subject: [PATCH 05/28] [ie/jiocinema] Remove extractors (#13565) Closes #10123, Closes #10144, Closes #10225, Closes #10240, Closes #10508 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 - yt_dlp/extractor/jiocinema.py | 408 -------------------------------- 2 files changed, 412 deletions(-) delete mode 100644 yt_dlp/extractor/jiocinema.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a5a3434477..61cc05d313 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -919,10 +919,6 @@ ShugiinItvVodIE, ) from .jeuxvideo import JeuxVideoIE -from .jiocinema import ( - JioCinemaIE, - JioCinemaSeriesIE, -) from .jiosaavn import ( JioSaavnAlbumIE, JioSaavnArtistIE, diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py deleted file mode 100644 index 94c85064ef..0000000000 --- a/yt_dlp/extractor/jiocinema.py +++ /dev/null @@ -1,408 +0,0 @@ -import base64 -import itertools -import json -import random -import re -import string -import time - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - jwt_decode_hs256, - parse_age_limit, - try_call, - url_or_none, -) -from ..utils.traversal import traverse_obj - - -class JioCinemaBaseIE(InfoExtractor): - _NETRC_MACHINE = 'jiocinema' - _GEO_BYPASS = False - _ACCESS_TOKEN = None - _REFRESH_TOKEN = None - _GUEST_TOKEN = None - _USER_ID = None - _DEVICE_ID = None - _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'} - _APP_NAME = {'appName': 'RJIL_JioCinema'} - _APP_VERSION = {'appVersion': '5.0.0'} - _API_SIGNATURES = 'o668nxgzwff' - _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi' - _ACCESS_HINT = 'the `accessToken` from your browser local storage' - _LOGIN_HINT = ( - 'Log in with "-u phone -p " to authenticate with OTP, ' - f'or use "-u token -p " to log in with {_ACCESS_HINT}. ' - 'If you have previously logged in with yt-dlp and your session ' - 'has been cached, you can use "-u device -p "') - - def _cache_token(self, token_type): - assert token_type in ('access', 'refresh', 'all') - if token_type in ('access', 'all'): - self.cache.store( - JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN) - if token_type in ('refresh', 'all'): - self.cache.store( - JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN) - - def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}): - return self._download_json( - url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={ - 'Content-Type': 'application/json', - 'Accept': 'application/json', - **self._API_HEADERS, - **headers, - }, expected_status=(400, 403, 474)) - - def _call_auth_api(self, service, endpoint, note, headers={}, data={}): - return self._call_api( - f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}', - None, note=note, headers=headers, data=data) - - def _refresh_token(self): - if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID: - raise ExtractorError('User token has expired', expected=True) - response = self._call_auth_api( - 'token', 'refreshtoken', 'Refreshing token', - headers={'accesstoken': self._ACCESS_TOKEN}, data={ - **self._APP_NAME, - 'deviceId': self._DEVICE_ID, - 'refreshToken': self._REFRESH_TOKEN, - **self._APP_VERSION, - }) - refresh_token = response.get('refreshTokenId') - if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN: - JioCinemaBaseIE._REFRESH_TOKEN = refresh_token - self._cache_token('refresh') - JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] - self._cache_token('access') - - def _fetch_guest_token(self): - JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10)) - guest_token = self._call_auth_api( - 'token', 'guest', 'Downloading guest token', data={ - **self._APP_NAME, - 'deviceType': 'phone', - 'os': 'ios', - 'deviceId': self._DEVICE_ID, - 'freshLaunch': False, - 'adId': self._DEVICE_ID, - **self._APP_VERSION, - }) - self._GUEST_TOKEN = guest_token['authToken'] - self._USER_ID = guest_token['userId'] - - def _call_login_api(self, endpoint, guest_token, data, note): - return self._call_auth_api( - 'user', f'loginotp/{endpoint}', note, headers={ - **self.geo_verification_headers(), - 'accesstoken': self._GUEST_TOKEN, - **self._APP_NAME, - **traverse_obj(guest_token, 'data', { - 'deviceType': ('deviceType', {str}), - 'os': ('os', {str}), - })}, data=data) - - def _is_token_expired(self, token): - return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180) - - def _perform_login(self, username, password): - if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN): - return - - UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - - if username.lower() == 'token': - if try_call(lambda: jwt_decode_hs256(password)): - JioCinemaBaseIE._ACCESS_TOKEN = password - refresh_hint = 'the `refreshToken` UUID from your browser local storage' - refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0] - if not refresh_token: - self.to_screen( - 'To extend the life of your login session, in addition to your access token, ' - 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" ' - f'where REFRESH_TOKEN is {refresh_hint}') - elif re.fullmatch(UUID_RE, refresh_token): - JioCinemaBaseIE._REFRESH_TOKEN = refresh_token - else: - self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}') - else: - raise ExtractorError( - f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True) - - elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password): - JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh') - JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access') - if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN: - raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True) - - elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password): - self._fetch_guest_token() - guest_token = jwt_decode_hs256(self._GUEST_TOKEN) - initial_data = { - 'number': base64.b64encode(password.encode()).decode(), - **self._APP_VERSION, - } - response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP') - if not traverse_obj(response, ('OTPInfo', {dict})): - raise ExtractorError('There was a problem with the phone number login attempt') - - is_iphone = guest_token.get('os') == 'ios' - response = self._call_login_api('verify', guest_token, { - 'deviceInfo': { - 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android', - 'info': { - 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'}, - 'androidId': self._DEVICE_ID, - 'type': 'iOS' if is_iphone else 'Android', - }, - }, - **initial_data, - 'otp': self._get_tfa_info('the one-time password sent to your phone'), - }, 'Submitting OTP') - if traverse_obj(response, 'code') == 1043: - raise ExtractorError('Wrong OTP', expected=True) - JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken'] - JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] - - else: - raise ExtractorError(self._LOGIN_HINT, expected=True) - - user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data'] - JioCinemaBaseIE._USER_ID = user_token['userId'] - JioCinemaBaseIE._DEVICE_ID = user_token['deviceId'] - if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device': - self._cache_token('all') - if self.get_param('cachedir') is not False: - self.to_screen( - f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"') - elif not JioCinemaBaseIE._REFRESH_TOKEN: - JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load( - JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh') - if JioCinemaBaseIE._REFRESH_TOKEN: - self._cache_token('access') - self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"') - if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN): - self._refresh_token() - - -class JioCinemaIE(JioCinemaBaseIE): - IE_NAME = 'jiocinema' - _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P\d{3,})' - _TESTS = [{ - 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931', - 'info_dict': { - 'id': '3759931', - 'ext': 'mp4', - 'title': 'Pradeep to stop the wedding?', - 'description': 'md5:75f72d1d1a66976633345a3de6d672b1', - 'episode': 'Pradeep to stop the wedding?', - 'episode_number': 89, - 'season': 'Agnisakshi…Ek Samjhauta-S1', - 'season_number': 1, - 'series': 'Agnisakshi Ek Samjhauta', - 'duration': 1238.0, - 'thumbnail': r're:https?://.+\.jpg', - 'age_limit': 13, - 'season_id': '3698031', - 'upload_date': '20230606', - 'timestamp': 1686009600, - 'release_date': '20230607', - 'genres': ['Drama'], - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch', - 'info_dict': { - 'id': '3754021', - 'ext': 'mp4', - 'title': 'Bhediya', - 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0', - 'episode': 'Bhediya', - 'duration': 8500.0, - 'thumbnail': r're:https?://.+\.jpg', - 'age_limit': 13, - 'upload_date': '20230525', - 'timestamp': 1685026200, - 'release_date': '20230524', - 'genres': ['Comedy'], - }, - 'params': {'skip_download': 'm3u8'}, - }] - - def _extract_formats_and_subtitles(self, playback, video_id): - m3u8_url = traverse_obj(playback, ( - 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any)) - if not m3u8_url: # DRM-only content only serves dash urls - self.report_drm(video_id) - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls') - self._remove_duplicate_formats(formats) - - return { - # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p - 'formats': traverse_obj(formats, ( - lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)), - 'subtitles': subtitles, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN): - self._fetch_guest_token() - elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN): - self._refresh_token() - - playback = self._call_api( - f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id, - 'Downloading playback JSON', headers={ - **self.geo_verification_headers(), - 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN, - **self._APP_NAME, - 'deviceid': self._DEVICE_ID, - 'uniqueid': self._USER_ID, - 'x-apisignatures': self._API_SIGNATURES, - 'x-platform': 'androidweb', - 'x-platform-token': 'web', - }, data={ - '4k': False, - 'ageGroup': '18+', - 'appVersion': '3.4.0', - 'bitrateProfile': 'xhdpi', - 'capability': { - 'drmCapability': { - 'aesSupport': 'yes', - 'fairPlayDrmSupport': 'none', - 'playreadyDrmSupport': 'none', - 'widevineDRMSupport': 'none', - }, - 'frameRateCapability': [{ - 'frameRateSupport': '30fps', - 'videoQuality': '1440p', - }], - }, - 'continueWatchingRequired': False, - 'dolby': False, - 'downloadRequest': False, - 'hevc': False, - 'kidsSafe': False, - 'manufacturer': 'Windows', - 'model': 'Windows', - 'multiAudioRequired': True, - 'osVersion': '10', - 'parentalPinValid': True, - 'x-apisignatures': self._API_SIGNATURES, - }) - - status_code = traverse_obj(playback, ('code', {int})) - if status_code == 474: - self.raise_geo_restricted(countries=['IN']) - elif status_code == 1008: - error_msg = 'This content is only available for premium users' - if self._ACCESS_TOKEN: - raise ExtractorError(error_msg, expected=True) - self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None) - elif status_code == 400: - raise ExtractorError('The requested content is not available', expected=True) - elif status_code is not None and status_code != 200: - raise ExtractorError( - f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}') - - metadata = self._download_json( - f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details', - video_id, fatal=False, query={ - 'ids': f'include:{video_id}', - 'responseType': 'common', - 'devicePlatformType': 'desktop', - }) - - return { - 'id': video_id, - 'http_headers': self._API_HEADERS, - **self._extract_formats_and_subtitles(playback, video_id), - **traverse_obj(playback, ('data', { - # fallback metadata - 'title': ('name', {str}), - 'description': ('fullSynopsis', {str}), - 'series': ('show', 'name', {str}, filter), - 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}), - 'season_number': ('episode', 'season', {int_or_none}, filter), - 'episode': ('fullTitle', {str}), - 'episode_number': ('episode', 'episodeNo', {int_or_none}, filter), - 'age_limit': ('ageNemonic', {parse_age_limit}), - 'duration': ('totalDuration', {float_or_none}), - 'thumbnail': ('images', {url_or_none}), - })), - **traverse_obj(metadata, ('result', 0, { - 'title': ('fullTitle', {str}), - 'description': ('fullSynopsis', {str}), - 'series': ('showName', {str}, filter), - 'season': ('seasonName', {str}, filter), - 'season_number': ('season', {int_or_none}), - 'season_id': ('seasonId', {str}, filter), - 'episode': ('fullTitle', {str}), - 'episode_number': ('episode', {int_or_none}), - 'timestamp': ('uploadTime', {int_or_none}), - 'release_date': ('telecastDate', {str}), - 'age_limit': ('ageNemonic', {parse_age_limit}), - 'duration': ('duration', {float_or_none}), - 'genres': ('genres', ..., {str}), - 'thumbnail': ('seo', 'ogImage', {url_or_none}), - })), - } - - -class JioCinemaSeriesIE(JioCinemaBaseIE): - IE_NAME = 'jiocinema:series' - _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P[\w-]+)/(?P\d{3,})' - _TESTS = [{ - 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917', - 'info_dict': { - 'id': '3499917', - 'title': 'naagin', - }, - 'playlist_mincount': 120, - }, { - 'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820', - 'info_dict': { - 'id': '3499820', - 'title': 'mtv-splitsvilla-x5', - }, - 'playlist_mincount': 310, - }] - - def _entries(self, series_id): - seasons = traverse_obj(self._download_json( - f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id, - 'Downloading series metadata JSON', query={'responseType': 'common'}), ( - 'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter', - 'trayTabs', lambda _, v: v['id'])) - - for season_num, season in enumerate(seasons, start=1): - season_id = season['id'] - label = season.get('label') or season_num - for page_num in itertools.count(1): - episodes = traverse_obj(self._download_json( - f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode', - season_id, f'Downloading season {label} page {page_num} JSON', query={ - 'sort': 'episode:asc', - 'id': season_id, - 'responseType': 'common', - 'page': page_num, - }), ('result', lambda _, v: v['id'] and url_or_none(v['slug']))) - if not episodes: - break - for episode in episodes: - yield self.url_result( - episode['slug'], JioCinemaIE, **traverse_obj(episode, { - 'video_id': 'id', - 'video_title': ('fullTitle', {str}), - 'season_number': ('season', {int_or_none}), - 'episode_number': ('episode', {int_or_none}), - })) - - def _real_extract(self, url): - slug, series_id = self._match_valid_url(url).group('slug', 'id') - return self.playlist_result(self._entries(series_id), series_id, slug) From 7b81634fb1d15999757e7a9883daa6ef09ea785b Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 29 Jun 2025 18:49:27 +0200 Subject: [PATCH 06/28] [ie] Detect invalid m3u8 playlist data (#13563) Authored by: Grub4K --- test/test_InfoExtractor.py | 52 ++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/common.py | 33 ++++++++++++++++-------- 2 files changed, 75 insertions(+), 10 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index e6c8d574e0..c9f70431f7 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -36,6 +36,18 @@ def do_GET(self): self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + elif self.path == '/fake.m3u8': + self.send_response(200) + self.send_header('Content-Length', '1024') + self.end_headers() + self.wfile.write(1024 * b'\x00') + elif self.path == '/bipbop.m3u8': + with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f: + data = f.read() + self.send_response(200) + self.send_header('Content-Length', str(len(data))) + self.end_headers() + self.wfile.write(data) else: assert False @@ -2079,5 +2091,45 @@ def test_search_nuxt_json(self): self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) +class TestInfoExtractorNetwork(unittest.TestCase): + def setUp(self, /): + self.httpd = http.server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + self.port = http_server_port(self.httpd) + + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + self.called = False + + def require_warning(*args, **kwargs): + self.called = True + + self.ydl = FakeYDL() + self.ydl.report_warning = require_warning + self.ie = DummyIE(self.ydl) + + def tearDown(self, /): + self.ydl.close() + self.httpd.shutdown() + self.httpd.server_close() + self.server_thread.join(1) + + def test_extract_m3u8_formats(self): + formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( + f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False) + self.assertFalse(self.called) + self.assertTrue(formats) + self.assertTrue(subtitles) + + def test_extract_m3u8_formats_warning(self): + formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( + f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False) + self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file') + self.assertFalse(formats) + self.assertFalse(subtitles) + + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 32b4680b73..b75e806233 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,5 +1,6 @@ import base64 import collections +import contextlib import functools import getpass import http.client @@ -2129,21 +2130,33 @@ def _extract_m3u8_formats_and_subtitles( raise ExtractorError(errnote, video_id=video_id) self.report_warning(f'{errnote}{bug_reports_message()}') return [], {} - - res = self._download_webpage_handle( - m3u8_url, video_id, - note='Downloading m3u8 information' if note is None else note, - errnote='Failed to download m3u8 information' if errnote is None else errnote, + if note is None: + note = 'Downloading m3u8 information' + if errnote is None: + errnote = 'Failed to download m3u8 information' + response = self._request_webpage( + m3u8_url, video_id, note=note, errnote=errnote, fatal=fatal, data=data, headers=headers, query=query) - - if res is False: + if response is False: return [], {} - m3u8_doc, urlh = res - m3u8_url = urlh.url + with contextlib.closing(response): + prefix = response.read(512) + if not prefix.startswith(b'#EXTM3U'): + msg = 'Response data has no m3u header' + if fatal: + raise ExtractorError(msg, video_id=video_id) + self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id) + return [], {} + + content = self._webpage_read_content( + response, m3u8_url, video_id, note=note, errnote=errnote, + fatal=fatal, prefix=prefix, data=data) + if content is False: + return [], {} return self._parse_m3u8_formats_and_subtitles( - m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, + content, response.url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, headers=headers, query=query, video_id=video_id) From 1b883846347addeab12663fd74317fd544341a1c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 08:05:52 -0500 Subject: [PATCH 07/28] [ci] Add signature tests (#13582) Authored by: bashonly --- .github/workflows/signature-tests.yml | 41 +++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/signature-tests.yml diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml new file mode 100644 index 0000000000..203172e0b9 --- /dev/null +++ b/.github/workflows/signature-tests.yml @@ -0,0 +1,41 @@ +name: Signature Tests +on: + push: + paths: + - .github/workflows/signature-tests.yml + - test/test_youtube_signature.py + - yt_dlp/jsinterp.py + pull_request: + paths: + - .github/workflows/signature-tests.yml + - test/test_youtube_signature.py + - yt_dlp/jsinterp.py +permissions: + contents: read + +concurrency: + group: signature-tests-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + tests: + name: Signature Tests + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install test requirements + run: python3 ./devscripts/install_deps.py --only-optional --include test + - name: Run tests + timeout-minutes: 15 + run: | + python3 -m yt_dlp -v || true # Print debug head + python3 ./devscripts/run_tests.py test/test_youtube_signature.py From 958153a226214c86879e36211ac191bf78289578 Mon Sep 17 00:00:00 2001 From: sepro Date: Mon, 30 Jun 2025 15:50:33 +0200 Subject: [PATCH 08/28] [jsinterp] Fix `extract_object` (#13580) Fixes sig extraction for YouTube player `e12fbea4` Authored by: seproDev --- test/test_jsinterp.py | 4 ++++ test/test_youtube_signature.py | 5 +++++ yt_dlp/jsinterp.py | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 2e3cdc2a59..4268e890b8 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -478,6 +478,10 @@ def test_extract_function_with_global_stack(self): func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000}) self.assertEqual(func([1]), 1111) + def test_extract_object(self): + jsi = JSInterpreter('var a={};a.xy={};var xy;var zxy={};xy={z:function(){return "abc"}};') + self.assertTrue('z' in jsi.extract_object('xy', None)) + def test_increment_decrement(self): self._test('function f() { var x = 1; return ++x; }', 2) self._test('function f() { var x = 1; return x++; }', 1) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 3336b6bfff..5e67926798 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -133,6 +133,11 @@ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0', ), + ( + 'https://www.youtube.com/s/player/e12fbea4/player_ias.vflset/en_US/base.js', + 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', + 'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a', + ), ] _NSIG_TESTS = [ diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 45aeffa229..b49f0cf30a 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -857,7 +857,7 @@ def extract_object(self, objname, *global_stack): obj = {} obj_m = re.search( r'''(?x) - (?(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) }\s*; ''' % (re.escape(objname), _FUNC_NAME_RE), From e9f157669e24953a88d15ce22053649db7a8e81e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 14:19:43 -0500 Subject: [PATCH 09/28] [ie/hotstar] Fix formats extraction (#13585) Fix b5bd057fe86550f3aa67f2fc8790d1c6a251c57b Authored by: bashonly --- yt_dlp/extractor/hotstar.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index c4fae00a97..891bcc8731 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -64,12 +64,16 @@ def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None): 'container': ['fmp4br', 'fmp4'], 'ads': ['non_ssai', 'ssai'], 'audio_channel': ['atmos', 'dolby51', 'stereo'], - 'encryption': ['plain'], - 'video_codec': ['h265'], # or ['h264'] + 'encryption': ['plain', 'widevine'], # wv only so we can raise appropriate error + 'video_codec': ['h265', 'h264'], 'ladder': ['tv', 'full'], - 'resolution': ['4k'], # or ['hd'] - 'true_resolution': ['4k'], # or ['hd'] - 'dynamic_range': ['hdr'], # or ['sdr'] + 'resolution': ['4k', 'hd'], + 'true_resolution': ['4k', 'hd'], + 'dynamic_range': ['hdr', 'sdr'], + }, separators=(',', ':')), + 'drm_parameters': json.dumps({ + 'widevine_security_level': ['SW_SECURE_DECODE', 'SW_SECURE_CRYPTO'], + 'hdcp_version': ['HDCP_V2_2', 'HDCP_V2_1', 'HDCP_V2', 'HDCP_V1'], }, separators=(',', ':')), }, st=st, cookies=cookies) @@ -281,7 +285,7 @@ def _real_extract(self, url): self.report_drm(video_id) geo_restricted = False - formats, subs = [], {} + formats, subs, has_drm = [], {}, False headers = {'Referer': f'{self._BASE_URL}/in'} content_type = traverse_obj(video_data, ('contentType', {str})) or self._CONTENT_TYPE[video_type] @@ -304,6 +308,11 @@ def _real_extract(self, url): for ignore in self._configuration_arg(key)): continue + tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';')) + if tag_dict.get('encryption') not in ('plain', None): + has_drm = True + continue + format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', playback_set['content_url']) ext = determine_ext(format_url) @@ -330,10 +339,6 @@ def _real_extract(self, url): self.write_debug(e) continue - tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';')) - if tag_dict.get('encryption') not in ('plain', None): - for f in current_formats: - f['has_drm'] = True for f in current_formats: for k, v in self._TAG_FIELDS.items(): if not f.get(k): @@ -361,6 +366,8 @@ def _real_extract(self, url): if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) + elif not formats and has_drm: + self.report_drm(video_id) self._remove_duplicate_formats(formats) for f in formats: f.setdefault('http_headers', {}).update(headers) From 2ba5391cd68ed4f2415c827d2cecbcbc75ace10b Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:02:59 -0500 Subject: [PATCH 10/28] [ie/youtube] Fix premium formats extraction (#13586) Fix ff6f94041aeee19c5559e1c1cd693960a1c1dd14 Closes #13545 Authored by: bashonly --- yt_dlp/extractor/youtube/_base.py | 2 ++ yt_dlp/extractor/youtube/_video.py | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 90e3927153..5aee89b917 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -63,6 +63,7 @@ class _PoTokenContext(enum.Enum): 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS], 'SUPPORTS_COOKIES': True, + 'PLAYER_PARAMS': '8AEB', }, 'web_embedded': { 'INNERTUBE_CONTEXT': { @@ -174,6 +175,7 @@ class _PoTokenContext(enum.Enum): }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'SUPPORTS_COOKIES': True, + 'PLAYER_PARAMS': '8AEB', }, 'tv_simply': { 'INNERTUBE_CONTEXT': { diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 5ccc33fa33..4689c55db7 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2820,10 +2820,6 @@ def _generate_player_context(cls, sts=None): context['signatureTimestamp'] = sts return { 'playbackContext': { - 'adPlaybackContext': { - 'pyv': True, - 'adType': 'AD_TYPE_INSTREAM', - }, 'contentPlaybackContext': context, }, **cls._get_checkok_params(), From 500761e41acb96953a5064e951d41d190c287e46 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:06:22 -0500 Subject: [PATCH 11/28] [ie] Fix m3u8 playlist data corruption (#13588) Revert 7b81634fb1d15999757e7a9883daa6ef09ea785b Closes #13581 Authored by: bashonly --- test/test_InfoExtractor.py | 52 -------------------------------------- yt_dlp/extractor/common.py | 33 ++++++++---------------- 2 files changed, 10 insertions(+), 75 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c9f70431f7..e6c8d574e0 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -36,18 +36,6 @@ def do_GET(self): self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) - elif self.path == '/fake.m3u8': - self.send_response(200) - self.send_header('Content-Length', '1024') - self.end_headers() - self.wfile.write(1024 * b'\x00') - elif self.path == '/bipbop.m3u8': - with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f: - data = f.read() - self.send_response(200) - self.send_header('Content-Length', str(len(data))) - self.end_headers() - self.wfile.write(data) else: assert False @@ -2091,45 +2079,5 @@ def test_search_nuxt_json(self): self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) -class TestInfoExtractorNetwork(unittest.TestCase): - def setUp(self, /): - self.httpd = http.server.HTTPServer( - ('127.0.0.1', 0), InfoExtractorTestRequestHandler) - self.port = http_server_port(self.httpd) - - self.server_thread = threading.Thread(target=self.httpd.serve_forever) - self.server_thread.daemon = True - self.server_thread.start() - - self.called = False - - def require_warning(*args, **kwargs): - self.called = True - - self.ydl = FakeYDL() - self.ydl.report_warning = require_warning - self.ie = DummyIE(self.ydl) - - def tearDown(self, /): - self.ydl.close() - self.httpd.shutdown() - self.httpd.server_close() - self.server_thread.join(1) - - def test_extract_m3u8_formats(self): - formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( - f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False) - self.assertFalse(self.called) - self.assertTrue(formats) - self.assertTrue(subtitles) - - def test_extract_m3u8_formats_warning(self): - formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( - f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False) - self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file') - self.assertFalse(formats) - self.assertFalse(subtitles) - - if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b75e806233..32b4680b73 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,6 +1,5 @@ import base64 import collections -import contextlib import functools import getpass import http.client @@ -2130,33 +2129,21 @@ def _extract_m3u8_formats_and_subtitles( raise ExtractorError(errnote, video_id=video_id) self.report_warning(f'{errnote}{bug_reports_message()}') return [], {} - if note is None: - note = 'Downloading m3u8 information' - if errnote is None: - errnote = 'Failed to download m3u8 information' - response = self._request_webpage( - m3u8_url, video_id, note=note, errnote=errnote, + + res = self._download_webpage_handle( + m3u8_url, video_id, + note='Downloading m3u8 information' if note is None else note, + errnote='Failed to download m3u8 information' if errnote is None else errnote, fatal=fatal, data=data, headers=headers, query=query) - if response is False: + + if res is False: return [], {} - with contextlib.closing(response): - prefix = response.read(512) - if not prefix.startswith(b'#EXTM3U'): - msg = 'Response data has no m3u header' - if fatal: - raise ExtractorError(msg, video_id=video_id) - self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id) - return [], {} - - content = self._webpage_read_content( - response, m3u8_url, video_id, note=note, errnote=errnote, - fatal=fatal, prefix=prefix, data=data) - if content is False: - return [], {} + m3u8_doc, urlh = res + m3u8_url = urlh.url return self._parse_m3u8_formats_and_subtitles( - content, response.url, ext=ext, entry_protocol=entry_protocol, + m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, headers=headers, query=query, video_id=video_id) From b16722ede83377f77ea8352dcd0a6ca8e83b8f0f Mon Sep 17 00:00:00 2001 From: helpimnotdrowning <35247379+helpimnotdrowning@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:24:04 -0500 Subject: [PATCH 12/28] [ie/kick] Support subscriber-only content (#13550) Closes #13442 Authored by: helpimnotdrowning --- yt_dlp/extractor/kick.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py index 1f001d421a..8049e1e342 100644 --- a/yt_dlp/extractor/kick.py +++ b/yt_dlp/extractor/kick.py @@ -1,12 +1,12 @@ +import functools +import urllib.parse from .common import InfoExtractor -from ..networking import HEADRequest from ..utils import ( UserNotLive, determine_ext, float_or_none, int_or_none, - merge_dicts, parse_iso8601, str_or_none, traverse_obj, @@ -16,21 +16,17 @@ class KickBaseIE(InfoExtractor): - def _real_initialize(self): - self._request_webpage( - HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False, impersonate=True) - xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN') - if not xsrf_token: - self.write_debug('kick.com did not set XSRF-TOKEN cookie') - KickBaseIE._API_HEADERS = { - 'Authorization': f'Bearer {xsrf_token.value}', - 'X-XSRF-TOKEN': xsrf_token.value, - } if xsrf_token else {} + @functools.cached_property + def _api_headers(self): + token = traverse_obj( + self._get_cookies('https://kick.com/'), + ('session_token', 'value', {urllib.parse.unquote})) + return {'Authorization': f'Bearer {token}'} if token else {} def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs): return self._download_json( f'https://kick.com/api/{path}', display_id, note=note, - headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs) + headers={**self._api_headers, **headers}, impersonate=True, **kwargs) class KickIE(KickBaseIE): From 35fc33fbc51c7f5392fb2300f65abf6cf107ef90 Mon Sep 17 00:00:00 2001 From: Clark Date: Mon, 30 Jun 2025 18:25:28 -0500 Subject: [PATCH 13/28] [ie/sauceplus] Add extractor (#13567) Authored by: ceandreasen, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/floatplane.py | 232 ++++++++++++++++++-------------- yt_dlp/extractor/sauceplus.py | 41 ++++++ 3 files changed, 176 insertions(+), 98 deletions(-) create mode 100644 yt_dlp/extractor/sauceplus.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 61cc05d313..ada12b3a8a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1824,6 +1824,7 @@ from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE +from .sauceplus import SaucePlusIE from .sbs import SBSIE from .sbscokr import ( SBSCoKrAllvodProgramIE, diff --git a/yt_dlp/extractor/floatplane.py b/yt_dlp/extractor/floatplane.py index b7ee160a44..7dd3b0eb2d 100644 --- a/yt_dlp/extractor/floatplane.py +++ b/yt_dlp/extractor/floatplane.py @@ -17,8 +17,140 @@ from ..utils.traversal import traverse_obj -class FloatplaneIE(InfoExtractor): +class FloatplaneBaseIE(InfoExtractor): + def _real_extract(self, url): + post_id = self._match_id(url) + + post_data = self._download_json( + f'{self._BASE_URL}/api/v3/content/post', post_id, query={'id': post_id}, + note='Downloading post data', errnote='Unable to download post data', + impersonate=self._IMPERSONATE_TARGET) + + if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): + raise ExtractorError('Post does not contain a video or audio track', expected=True) + + uploader_url = format_field( + post_data, [('creator', 'urlname')], f'{self._BASE_URL}/channel/%s/home') or None + + common_info = { + 'uploader_url': uploader_url, + 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))), + 'availability': self._availability(needs_subscription=True), + **traverse_obj(post_data, { + 'uploader': ('creator', 'title', {str}), + 'uploader_id': ('creator', 'id', {str}), + 'channel': ('channel', 'title', {str}), + 'channel_id': ('channel', 'id', {str}), + 'release_timestamp': ('releaseDate', {parse_iso8601}), + }), + } + + items = [] + for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): + media_id = media['id'] + media_typ = media.get('type') or 'video' + + metadata = self._download_json( + f'{self._BASE_URL}/api/v3/content/{media_typ}', media_id, query={'id': media_id}, + note=f'Downloading {media_typ} metadata', impersonate=self._IMPERSONATE_TARGET) + + stream = self._download_json( + f'{self._BASE_URL}/api/v2/cdn/delivery', media_id, query={ + 'type': 'vod' if media_typ == 'video' else 'aod', + 'guid': metadata['guid'], + }, note=f'Downloading {media_typ} stream data', + impersonate=self._IMPERSONATE_TARGET) + + path_template = traverse_obj(stream, ('resource', 'uri', {str})) + + def format_path(params): + path = path_template + for i, val in (params or {}).items(): + path = path.replace(f'{{qualityLevelParams.{i}}}', val) + return path + + formats = [] + for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): + url = urljoin(stream['cdn'], format_path(traverse_obj( + stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict})))) + format_id = traverse_obj(quality, ('name', {str})) + hls_aes = {} + m3u8_data = None + + # If we need impersonation for the API, then we need it for HLS keys too: extract in advance + if self._IMPERSONATE_TARGET is not None: + m3u8_data = self._download_webpage( + url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS, + note=join_nonempty('Downloading', format_id, 'm3u8 information', delim=' '), + errnote=join_nonempty('Failed to download', format_id, 'm3u8 information', delim=' ')) + if not m3u8_data: + continue + + key_url = self._search_regex( + r'#EXT-X-KEY:METHOD=AES-128,URI="(https?://[^"]+)"', + m3u8_data, 'HLS AES key URI', default=None) + if key_url: + urlh = self._request_webpage( + key_url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS, + note=join_nonempty('Downloading', format_id, 'HLS AES key', delim=' '), + errnote=join_nonempty('Failed to download', format_id, 'HLS AES key', delim=' ')) + if urlh: + hls_aes['key'] = urlh.read().hex() + + formats.append({ + **traverse_obj(quality, { + 'format_note': ('label', {str}), + 'width': ('width', {int}), + 'height': ('height', {int}), + }), + **parse_codecs(quality.get('codecs')), + 'url': url, + 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'), + 'format_id': format_id, + 'hls_media_playlist_data': m3u8_data, + 'hls_aes': hls_aes or None, + }) + items.append({ + **common_info, + 'id': media_id, + **traverse_obj(metadata, { + 'title': ('title', {str}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('thumbnail', 'path', {url_or_none}), + }), + 'formats': formats, + }) + + post_info = { + **common_info, + 'id': post_id, + 'display_id': post_id, + **traverse_obj(post_data, { + 'title': ('title', {str}), + 'description': ('text', {clean_html}), + 'like_count': ('likes', {int_or_none}), + 'dislike_count': ('dislikes', {int_or_none}), + 'comment_count': ('comments', {int_or_none}), + 'thumbnail': ('thumbnail', 'path', {url_or_none}), + }), + 'http_headers': self._HEADERS, + } + + if len(items) > 1: + return self.playlist_result(items, **post_info) + + post_info.update(items[0]) + return post_info + + +class FloatplaneIE(FloatplaneBaseIE): _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P\w+)' + _BASE_URL = 'https://www.floatplane.com' + _IMPERSONATE_TARGET = None + _HEADERS = { + 'Origin': _BASE_URL, + 'Referer': f'{_BASE_URL}/', + } _TESTS = [{ 'url': 'https://www.floatplane.com/post/2Yf3UedF7C', 'info_dict': { @@ -170,105 +302,9 @@ class FloatplaneIE(InfoExtractor): }] def _real_initialize(self): - if not self._get_cookies('https://www.floatplane.com').get('sails.sid'): + if not self._get_cookies(self._BASE_URL).get('sails.sid'): self.raise_login_required() - def _real_extract(self, url): - post_id = self._match_id(url) - - post_data = self._download_json( - 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id}, - note='Downloading post data', errnote='Unable to download post data') - - if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): - raise ExtractorError('Post does not contain a video or audio track', expected=True) - - uploader_url = format_field( - post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None - - common_info = { - 'uploader_url': uploader_url, - 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))), - 'availability': self._availability(needs_subscription=True), - **traverse_obj(post_data, { - 'uploader': ('creator', 'title', {str}), - 'uploader_id': ('creator', 'id', {str}), - 'channel': ('channel', 'title', {str}), - 'channel_id': ('channel', 'id', {str}), - 'release_timestamp': ('releaseDate', {parse_iso8601}), - }), - } - - items = [] - for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): - media_id = media['id'] - media_typ = media.get('type') or 'video' - - metadata = self._download_json( - f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id}, - note=f'Downloading {media_typ} metadata') - - stream = self._download_json( - 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={ - 'type': 'vod' if media_typ == 'video' else 'aod', - 'guid': metadata['guid'], - }, note=f'Downloading {media_typ} stream data') - - path_template = traverse_obj(stream, ('resource', 'uri', {str})) - - def format_path(params): - path = path_template - for i, val in (params or {}).items(): - path = path.replace(f'{{qualityLevelParams.{i}}}', val) - return path - - formats = [] - for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): - url = urljoin(stream['cdn'], format_path(traverse_obj( - stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict})))) - formats.append({ - **traverse_obj(quality, { - 'format_id': ('name', {str}), - 'format_note': ('label', {str}), - 'width': ('width', {int}), - 'height': ('height', {int}), - }), - **parse_codecs(quality.get('codecs')), - 'url': url, - 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'), - }) - - items.append({ - **common_info, - 'id': media_id, - **traverse_obj(metadata, { - 'title': ('title', {str}), - 'duration': ('duration', {int_or_none}), - 'thumbnail': ('thumbnail', 'path', {url_or_none}), - }), - 'formats': formats, - }) - - post_info = { - **common_info, - 'id': post_id, - 'display_id': post_id, - **traverse_obj(post_data, { - 'title': ('title', {str}), - 'description': ('text', {clean_html}), - 'like_count': ('likes', {int_or_none}), - 'dislike_count': ('dislikes', {int_or_none}), - 'comment_count': ('comments', {int_or_none}), - 'thumbnail': ('thumbnail', 'path', {url_or_none}), - }), - } - - if len(items) > 1: - return self.playlist_result(items, **post_info) - - post_info.update(items[0]) - return post_info - class FloatplaneChannelIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P[\w-]+)/home(?:/(?P[\w-]+))?' diff --git a/yt_dlp/extractor/sauceplus.py b/yt_dlp/extractor/sauceplus.py new file mode 100644 index 0000000000..75d7022d3c --- /dev/null +++ b/yt_dlp/extractor/sauceplus.py @@ -0,0 +1,41 @@ +from .floatplane import FloatplaneBaseIE + + +class SaucePlusIE(FloatplaneBaseIE): + IE_DESC = 'Sauce+' + _VALID_URL = r'https?://(?:(?:www|beta)\.)?sauceplus\.com/post/(?P\w+)' + _BASE_URL = 'https://www.sauceplus.com' + _HEADERS = { + 'Origin': _BASE_URL, + 'Referer': f'{_BASE_URL}/', + } + _IMPERSONATE_TARGET = True + _TESTS = [{ + 'url': 'https://www.sauceplus.com/post/YbBwIa2A5g', + 'info_dict': { + 'id': 'eit4Ugu5TL', + 'ext': 'mp4', + 'display_id': 'YbBwIa2A5g', + 'title': 'Scare the Coyote - Episode 3', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'duration': 2975, + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_date': '20250627', + 'release_timestamp': 1750993500, + 'uploader': 'Scare The Coyote', + 'uploader_id': '683e0a3269688656a5a49a44', + 'uploader_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home', + 'channel': 'Scare The Coyote', + 'channel_id': '683e0a326968866ceba49a45', + 'channel_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home/main', + 'availability': 'subscriber_only', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_initialize(self): + if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'): + self.raise_login_required() From 11b9416e10cff7513167d76d6c47774fcdd3e26a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:37:56 -0500 Subject: [PATCH 14/28] [ie/sproutvideo] Support browser impersonation (#13589) Closes #13576 Authored by: bashonly --- yt_dlp/extractor/sproutvideo.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 764c78f1e5..b5af905414 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -98,11 +98,8 @@ def _extract_embed_urls(cls, url, webpage): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, headers={ - **traverse_obj(smuggled_data, {'Referer': 'referer'}), - # yt-dlp's default Chrome user-agents are too old - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:140.0) Gecko/20100101 Firefox/140.0', - }) + webpage = self._download_webpage( + url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True) data = self._search_json( r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', From b0187844988e557c7e1e6bb1aabd4c1176768d86 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:44:42 -0500 Subject: [PATCH 15/28] [cleanup] Misc (#13590) Authored by: bashonly --- devscripts/changelog_override.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 269de2c682..d7296bf309 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -254,5 +254,13 @@ { "action": "remove", "when": "d596824c2f8428362c072518856065070616e348" + }, + { + "action": "remove", + "when": "7b81634fb1d15999757e7a9883daa6ef09ea785b" + }, + { + "action": "remove", + "when": "500761e41acb96953a5064e951d41d190c287e46" } ] From 30fa54280b363265d0235b0aab3b1725eb0f61b8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 30 Jun 2025 23:47:20 +0000 Subject: [PATCH 16/28] Release 2025.06.30 Created by: bashonly :ci skip all --- CONTRIBUTORS | 3 +++ Changelog.md | 23 +++++++++++++++++++++++ supportedsites.md | 7 ++----- yt_dlp/version.py | 6 +++--- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 00d4d15aab..ba23b66dc5 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -781,3 +781,6 @@ maxbin123 nullpos anlar eason1478 +ceandreasen +chauhantirth +helpimnotdrowning diff --git a/Changelog.md b/Changelog.md index d37852658f..5a5c18cf34 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,29 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.06.30 + +#### Core changes +- **jsinterp**: [Fix `extract_object`](https://github.com/yt-dlp/yt-dlp/commit/958153a226214c86879e36211ac191bf78289578) ([#13580](https://github.com/yt-dlp/yt-dlp/issues/13580)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- **bilibilispacevideo**: [Extract hidden-mode collections as playlists](https://github.com/yt-dlp/yt-dlp/commit/99b85ac102047446e6adf5b62bfc3c8d80b53778) ([#13533](https://github.com/yt-dlp/yt-dlp/issues/13533)) by [c-basalt](https://github.com/c-basalt) +- **hotstar** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b5bd057fe86550f3aa67f2fc8790d1c6a251c57b) ([#13530](https://github.com/yt-dlp/yt-dlp/issues/13530)) by [bashonly](https://github.com/bashonly), [chauhantirth](https://github.com/chauhantirth) (With fixes in [e9f1576](https://github.com/yt-dlp/yt-dlp/commit/e9f157669e24953a88d15ce22053649db7a8e81e) by [bashonly](https://github.com/bashonly)) + - [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0a6b1044899f452cd10b6c7a6b00fa985a9a8b97) ([#13560](https://github.com/yt-dlp/yt-dlp/issues/13560)) by [bashonly](https://github.com/bashonly) + - [Raise for login required](https://github.com/yt-dlp/yt-dlp/commit/5e292baad62c749b6c340621ab2d0f904165ddfb) ([#10405](https://github.com/yt-dlp/yt-dlp/issues/10405)) by [bashonly](https://github.com/bashonly) + - series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4bd9a7ade7e0508b9795b3e72a69eeb40788b62b) ([#13564](https://github.com/yt-dlp/yt-dlp/issues/13564)) by [bashonly](https://github.com/bashonly) +- **jiocinema**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/7e2504f941a11ea2b0dba00de3f0295cdc253e79) ([#13565](https://github.com/yt-dlp/yt-dlp/issues/13565)) by [bashonly](https://github.com/bashonly) +- **kick**: [Support subscriber-only content](https://github.com/yt-dlp/yt-dlp/commit/b16722ede83377f77ea8352dcd0a6ca8e83b8f0f) ([#13550](https://github.com/yt-dlp/yt-dlp/issues/13550)) by [helpimnotdrowning](https://github.com/helpimnotdrowning) +- **niconico**: live: [Fix extractor and downloader](https://github.com/yt-dlp/yt-dlp/commit/06c1a8cdffe14050206683253726875144192ef5) ([#13158](https://github.com/yt-dlp/yt-dlp/issues/13158)) by [doe1080](https://github.com/doe1080) +- **sauceplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/35fc33fbc51c7f5392fb2300f65abf6cf107ef90) ([#13567](https://github.com/yt-dlp/yt-dlp/issues/13567)) by [bashonly](https://github.com/bashonly), [ceandreasen](https://github.com/ceandreasen) +- **sproutvideo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/11b9416e10cff7513167d76d6c47774fcdd3e26a) ([#13589](https://github.com/yt-dlp/yt-dlp/issues/13589)) by [bashonly](https://github.com/bashonly) +- **youtube**: [Fix premium formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2ba5391cd68ed4f2415c827d2cecbcbc75ace10b) ([#13586](https://github.com/yt-dlp/yt-dlp/issues/13586)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **ci**: [Add signature tests](https://github.com/yt-dlp/yt-dlp/commit/1b883846347addeab12663fd74317fd544341a1c) ([#13582](https://github.com/yt-dlp/yt-dlp/issues/13582)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [b018784](https://github.com/yt-dlp/yt-dlp/commit/b0187844988e557c7e1e6bb1aabd4c1176768d86) by [bashonly](https://github.com/bashonly) + ### 2025.06.25 #### Extractor changes diff --git a/supportedsites.md b/supportedsites.md index b3fe011739..8e48135d22 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -575,9 +575,7 @@ # Supported sites - **HollywoodReporterPlaylist** - **Holodex** - **HotNewHipHop**: (**Currently broken**) - - **hotstar** - - **hotstar:playlist** - - **hotstar:season** + - **hotstar**: JioHotstar - **hotstar:series** - **hrfernsehen** - **HRTi**: [*hrti*](## "netrc machine") @@ -647,8 +645,6 @@ # Supported sites - **Jamendo** - **JamendoAlbum** - **JeuxVideo**: (**Currently broken**) - - **jiocinema**: [*jiocinema*](## "netrc machine") - - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** - **jiosaavn:artist** - **jiosaavn:playlist** @@ -1299,6 +1295,7 @@ # Supported sites - **SampleFocus** - **Sangiin**: 参議院インターネット審議中継 (archive) - **Sapo**: SAPO Vídeos + - **SaucePlus**: Sauce+ - **SBS**: sbs.com.au - **sbs.co.kr** - **sbs.co.kr:allvod_program** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 020a0299c0..451fee7164 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.06.25' +__version__ = '2025.06.30' -RELEASE_GIT_HEAD = '1838a1ce5d4ade80770ba9162eaffc9a1607dc70' +RELEASE_GIT_HEAD = 'b0187844988e557c7e1e6bb1aabd4c1176768d86' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.06.25' +_pkg_version = '2025.06.30' From f3008bc5f89d2691f2f8dfc51b406ef4e25281c3 Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 1 Jul 2025 13:23:53 +0200 Subject: [PATCH 17/28] No longer enable `--mtime` by default (#12781) Closes #12780 Authored by: seproDev --- README.md | 9 +++++---- yt_dlp/YoutubeDL.py | 3 ++- yt_dlp/__init__.py | 6 ++++++ yt_dlp/downloader/fragment.py | 2 +- yt_dlp/downloader/http.py | 2 +- yt_dlp/options.py | 10 +++++----- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 0f9a7d5564..e476c0084b 100644 --- a/README.md +++ b/README.md @@ -1156,15 +1156,15 @@ # CONFIGURATION * `/etc/yt-dlp/config` * `/etc/yt-dlp/config.txt` -E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: +E.g. with the following configuration file, yt-dlp will always extract the audio, copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` # Lines starting with # are comments # Always extract audio -x -# Do not copy the mtime ---no-mtime +# Copy the mtime +--mtime # Use this proxy --proxy 127.0.0.1:3128 @@ -2262,6 +2262,7 @@ ### Differences in default behavior * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. * The sub-modules `swfinterp`, `casefold` are removed. * Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details. +* yt-dlp no longer applies the server modified time to downloaded files by default. Use `--mtime` or `--compat-options mtime-by-default` to revert this. For ease of use, a few more compat options are available: @@ -2271,7 +2272,7 @@ ### Differences in default behavior * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` * `--compat-options 2023`: Same as `--compat-options 2024,prefer-vp9-sort` -* `--compat-options 2024`: Currently does nothing. Use this to enable all future compat options +* `--compat-options 2024`: Same as `--compat-options mtime-by-default`. Use this to enable all future compat options The following compat options restore vulnerable behavior from before security patches: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 67ca90349f..44a6696c02 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -482,7 +482,8 @@ class YoutubeDL: The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort, no-clean-infojson, no-playlist-metafiles, - no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort. + no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort, + mtime-by-default. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 714d9ad5c2..2e7646b7ec 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -159,6 +159,12 @@ def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): elif 'prefer-vp9-sort' in opts.compat_opts: opts.format_sort.extend(FormatSorter._prefer_vp9_sort) + if 'mtime-by-default' in opts.compat_opts: + if opts.updatetime is None: + opts.updatetime = True + else: + _unused_compat_opt('mtime-by-default') + _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 98784e7039..7852ae90d0 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -302,7 +302,7 @@ def _finish_frag_download(self, ctx, info_dict): elif to_file: self.try_rename(ctx['tmpfilename'], ctx['filename']) filetime = ctx.get('fragment_filetime') - if self.params.get('updatetime', True) and filetime: + if self.params.get('updatetime') and filetime: with contextlib.suppress(Exception): os.utime(ctx['filename'], (time.time(), filetime)) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 9c6dd8b799..90bfcaf552 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -348,7 +348,7 @@ def retry(e): self.try_rename(ctx.tmpfilename, ctx.filename) # Update file modification time - if self.params.get('updatetime', True): + if self.params.get('updatetime'): info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None)) self._hook_progress({ diff --git a/yt_dlp/options.py b/yt_dlp/options.py index b4d3d4d668..13ba445df3 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -529,14 +529,14 @@ def _preset_alias_callback(option, opt_str, value, parser): 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', - 'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort', + 'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort', 'mtime-by-default', }, 'aliases': { 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'], '2021': ['2022', 'no-certifi', 'filename-sanitization'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': ['2024', 'prefer-vp9-sort'], - '2024': [], + '2024': ['mtime-by-default'], }, }, help=( 'Options that can help keep compatibility with youtube-dl or youtube-dlc ' @@ -1466,12 +1466,12 @@ def _preset_alias_callback(option, opt_str, value, parser): help='Do not use .part files - write directly into output file') filesystem.add_option( '--mtime', - action='store_true', dest='updatetime', default=True, - help='Use the Last-modified header to set the file modification time (default)') + action='store_true', dest='updatetime', default=None, + help='Use the Last-modified header to set the file modification time') filesystem.add_option( '--no-mtime', action='store_false', dest='updatetime', - help='Do not use the Last-modified header to set the file modification time') + help='Do not use the Last-modified header to set the file modification time (default)') filesystem.add_option( '--write-description', action='store_true', dest='writedescription', default=False, From ca5cce5b07d51efe7310b449cdefeca8d873e9df Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 1 Jul 2025 21:17:11 +0200 Subject: [PATCH 18/28] [cleanup] Bump ruff to 0.12.x (#13596) Authored by: seproDev --- pyproject.toml | 4 +++- yt_dlp/aes.py | 2 +- yt_dlp/extractor/nhk.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3775251e10..41d5ec3b0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ dev = [ ] static-analysis = [ "autopep8~=2.0", - "ruff~=0.11.0", + "ruff~=0.12.0", ] test = [ "pytest~=8.1", @@ -210,10 +210,12 @@ ignore = [ "TD001", # invalid-todo-tag "TD002", # missing-todo-author "TD003", # missing-todo-link + "PLC0415", # import-outside-top-level "PLE0604", # invalid-all-object (false positives) "PLE0643", # potential-index-error (false positives) "PLW0603", # global-statement "PLW1510", # subprocess-run-without-check + "PLW1641", # eq-without-hash "PLW2901", # redefined-loop-name "RUF001", # ambiguous-unicode-character-string "RUF012", # mutable-class-default diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 065901d68d..600cb12a89 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -435,7 +435,7 @@ def sub_bytes_inv(data): def rotate(data): - return data[1:] + [data[0]] + return [*data[1:], data[0]] def key_schedule_core(data, rcon_iteration): diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 0bd6edfcba..0d5e5b0e7e 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -495,7 +495,7 @@ def _real_extract(self, url): chapters = None if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles): start_time = chapter_durations - end_time = chapter_durations[1:] + [duration] + end_time = [*chapter_durations[1:], duration] chapters = [{ 'start_time': s, 'end_time': e, From c2ff2dbaec7929015373fe002e9bd4849931a4ce Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Wed, 2 Jul 2025 00:12:43 +0200 Subject: [PATCH 19/28] [rh:requests] Work around partial read dropping data (#13599) Authored by: Grub4K --- test/test_networking.py | 17 ++++++++++++----- yt_dlp/networking/_requests.py | 4 ++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/test/test_networking.py b/test/test_networking.py index 2f441fced2..afdd0c7aa7 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -22,7 +22,6 @@ import tempfile import threading import time -import urllib.error import urllib.request import warnings import zlib @@ -223,10 +222,7 @@ def do_GET(self): if encoding == 'br' and brotli: payload = brotli.compress(payload) elif encoding == 'gzip': - buf = io.BytesIO() - with gzip.GzipFile(fileobj=buf, mode='wb') as f: - f.write(payload) - payload = buf.getvalue() + payload = gzip.compress(payload, mtime=0) elif encoding == 'deflate': payload = zlib.compress(payload) elif encoding == 'unsupported': @@ -729,6 +725,17 @@ def test_keep_header_casing(self, handler): assert 'X-test-heaDer: test' in res + def test_partial_read_then_full_read(self, handler): + with handler() as rh: + for encoding in ('', 'gzip', 'deflate'): + res = validate_and_send(rh, Request( + f'http://127.0.0.1:{self.http_port}/content-encoding', + headers={'ytdl-encoding': encoding})) + assert res.headers.get('Content-Encoding') == encoding + assert res.read(6) == b'' + assert res.read(0) == b'' + assert res.read() == b'