diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 309489672e..67ca90349f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2219,6 +2219,7 @@ def _check_formats(self, formats): self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: + f.pop('__needs_testing', None) yield f else: self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) @@ -3963,6 +3964,7 @@ def simplified_codec(f, field): self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), + self._format_out('Untested', self.Styles.WARNING) if f.get('__needs_testing') else None, format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), delim=', '), delim=' '), diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index c0923594e5..764c78f1e5 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -41,6 +41,7 @@ class SproutVideoIE(InfoExtractor): 'duration': 703, 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', }, + 'skip': 'Account Disabled', }, { # http formats 'sd' and 'hd' are available 'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90', @@ -97,11 +98,21 @@ def _extract_embed_urls(cls, url, webpage): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) + webpage = self._download_webpage(url, video_id, headers={ + **traverse_obj(smuggled_data, {'Referer': 'referer'}), + # yt-dlp's default Chrome user-agents are too old + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:140.0) Gecko/20100101 Firefox/140.0', + }) data = self._search_json( - r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', - end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) + r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, + contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', + transform_source=lambda x: base64.b64decode(x).decode()) + + # SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e] + # e.g. if the user-agent we used with the webpage request is too old + video_uid = data['videoUid'] + if video_id != video_uid: + raise ExtractorError(f'{self.IE_NAME} sent the wrong video data ({video_uid})') formats, subtitles = [], {} headers = { diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 55ebdce1ba..5ccc33fa33 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2820,6 +2820,10 @@ def _generate_player_context(cls, sts=None): context['signatureTimestamp'] = sts return { 'playbackContext': { + 'adPlaybackContext': { + 'pyv': True, + 'adType': 'AD_TYPE_INSTREAM', + }, 'contentPlaybackContext': context, }, **cls._get_checkok_params(), @@ -3552,6 +3556,11 @@ def process_manifest_format(f, proto, client_name, itag, po_token): f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') f['source_preference'] -= 20 + # XXX: Check if IOS HLS formats are affected by player PO token enforcement; temporary + # See https://github.com/yt-dlp/yt-dlp/issues/13511 + if proto == 'hls' and client_name == 'ios': + f['__needs_testing'] = True + itags[itag].add(key) if itag and all_formats: @@ -4280,6 +4289,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): # Newly uploaded videos' HLS formats are potentially problematic and need to be checked + # XXX: This is redundant for as long as we are already checking all IOS HLS formats upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc) if upload_datetime >= datetime_from_str('today-2days'): for fmt in info['formats']: