diff --git a/README.md b/README.md index 0f9a7d556..bfbdb572e 100644 --- a/README.md +++ b/README.md @@ -2349,7 +2349,7 @@ #### Developer options --test Download only part of video for testing extractors --load-pages Load pages dumped by --write-pages --youtube-print-sig-code For testing youtube signatures - --allow-unplayable-formats List unplayable formats also + --allow-unplayable-formats List unplayable formats. Implies `--simulate` and `--list-formats`. --no-allow-unplayable-formats Default #### Old aliases diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 309489672..ef0ca9c60 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -248,7 +248,7 @@ class YoutubeDL: You can also pass a function. The function takes 'ctx' as argument and returns the formats to download. See "build_format_selector" for an implementation - allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. + allow_unplayable_formats: Allow unplayable formats to be extracted. ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually available for download (experimental) @@ -702,11 +702,14 @@ def process_color_policy(stream): self.deprecated_feature(system_deprecation.replace('\n', '\n ')) if self.params.get('allow_unplayable_formats'): + from . import _IN_CLI + + switch = '--allow-unplayable-formats' if _IN_CLI else 'allow_unplayable_formats' self.report_warning( - f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. ' - 'This is a developer option intended for debugging. \n' - ' If you experience any issues while using this option, ' - f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') + f'{switch} is a {self._format_err("developer option", self.Styles.EMPHASIS)} intended for {self._format_err("debugging", self.Styles.EMPHASIS)}. \n' + f' If you experience issues {self._format_err("DO NOT", self.Styles.ERROR)} open a bug report.') + self.params['listformats'] = True + self.params['simulate'] = True if self.params.get('bidi_workaround', False): try: @@ -2849,6 +2852,7 @@ def sanitize_numeric_fields(info): info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None if not self.params.get('allow_unplayable_formats'): + # Allow bypassing flaky `has_drm` detection formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe'] if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats): @@ -3464,12 +3468,7 @@ def correct_ext(filename, ext=new_ext): success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download else: - if self.params.get('allow_unplayable_formats'): - self.report_warning( - 'You have requested merging of multiple formats ' - 'while also allowing unplayable formats to be downloaded. ' - 'The formats won\'t be merged to prevent data corruption.') - elif not merger.available: + if not merger.available: msg = 'You have requested merging of multiple formats but ffmpeg is not installed' if not self.params.get('ignoreerrors'): self.report_error(f'{msg}. Aborting due to --abort-on-error') @@ -3500,7 +3499,7 @@ def correct_ext(filename, ext=new_ext): info_dict['__real_download'] = info_dict['__real_download'] or real_download success = success and partial_success - if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): + if downloaded and merger.available: info_dict['__postprocessors'].append(merger) info_dict['__files_to_merge'] = downloaded # Even if there were no downloads, it is being merged only now diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 714d9ad5c..4612bfca3 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -511,8 +511,7 @@ def report_args_compat(name, value, key1, key2=None, where=None): opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') opts.postprocessor_args.setdefault('sponskrub', []) - def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats', - val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): + def report_conflict(arg1, opt1, arg2, opt2, val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): if val2 is NO_DEFAULT: val2 = getattr(opts, opt2) if not val2: @@ -540,21 +539,6 @@ def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_u report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', val1=opts.sponskrub and opts.sponskrub_cut) - # Conflicts with --allow-unplayable-formats - report_conflict('--embed-metadata', 'addmetadata') - report_conflict('--embed-chapters', 'addchapters') - report_conflict('--embed-info-json', 'embed_infojson') - report_conflict('--embed-subs', 'embedsubtitles') - report_conflict('--embed-thumbnail', 'embedthumbnail') - report_conflict('--extract-audio', 'extractaudio') - report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never') - report_conflict('--recode-video', 'recodevideo') - report_conflict('--remove-chapters', 'remove_chapters', default=[]) - report_conflict('--remux-video', 'remuxvideo') - report_conflict('--sponskrub', 'sponskrub') - report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set()) - report_conflict('--xattrs', 'xattrs') - # Fully deprecated options def report_deprecation(val, old, new=None): if not val: diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ee73ac043..77fafc36b 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -468,7 +468,6 @@ def can_merge_formats(cls, info_dict, params): return ( info_dict.get('requested_formats') and info_dict.get('protocol') - and not params.get('allow_unplayable_formats') and 'no-direct-merge' not in params.get('compat_opts', []) and cls.can_download(info_dict)) diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 22d0ebd26..9d3fb4a46 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -256,14 +256,13 @@ def _get_unencrypted_media(self, doc): media = doc.findall(_add_ns('media')) if not media: self.report_error('No media found') - if not self.params.get('allow_unplayable_formats'): - for e in (doc.findall(_add_ns('drmAdditionalHeader')) - + doc.findall(_add_ns('drmAdditionalHeaderSet'))): - # If id attribute is missing it's valid for all media nodes - # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute - if 'id' not in e.attrib: - self.report_error('Missing ID in f4m DRM') - media = remove_encrypted_media(media) + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): + # If id attribute is missing it's valid for all media nodes + # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute + if 'id' not in e.attrib: + self.report_error('Missing ID in f4m DRM') + media = remove_encrypted_media(media) if not media: self.report_error('Unsupported DRM') return media diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 1f36a07f5..97a0380b4 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -15,6 +15,7 @@ traverse_obj, update_url_query, urljoin, + deprecation_warning, ) from ..utils._utils import _request_dump_filename @@ -39,6 +40,8 @@ def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039 @classmethod def can_download(cls, manifest, info_dict, allow_unplayable_formats=False): + if allow_unplayable_formats: + deprecation_warning('allow_unplayable_formats is not supported', stacklevel=1) UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] @@ -58,17 +61,15 @@ def can_download(cls, manifest, info_dict, allow_unplayable_formats=False): # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 ] - if not allow_unplayable_formats: - UNSUPPORTED_FEATURES += [ - r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM - ] + UNSUPPORTED_FEATURES += [ + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM + ] def check_results(): yield not info_dict.get('is_live') for feature in UNSUPPORTED_FEATURES: yield not re.search(feature, manifest) - if not allow_unplayable_formats: - yield not cls._has_drm(manifest) + yield not cls._has_drm(manifest) return all(check_results()) def real_download(self, filename, info_dict): @@ -91,7 +92,7 @@ def real_download(self, filename, info_dict): outf.write(s_bytes) s = s_bytes.decode('utf-8', 'ignore') - can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None + can_download, message = self.can_download(s, info_dict), None if can_download: has_ffmpeg = FFmpegFD.available() no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s @@ -105,7 +106,7 @@ def real_download(self, filename, info_dict): message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') if not can_download: - if self._has_drm(s) and not self.params.get('allow_unplayable_formats'): + if self._has_drm(s): if info_dict.get('has_drm') and self.params.get('test'): self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True) else: diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index e97740c90..57831aad7 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -236,7 +236,7 @@ def _real_extract(self, url): self._call_api_v1( f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}), ('body', 'results', 'item', {dict})) or {} - if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): + if video_data.get('drmProtected'): self.report_drm(video_id) # See https://github.com/yt-dlp/yt-dlp/issues/396 diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 57c276a67..171b1f796 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -140,13 +140,14 @@ def _real_extract(self, url): quality = qualities(self._KNOWN_FORMATS) formats = [] + has_drm = False for f in result.get('files', []): f_url = f.get('url') content_format = f.get('content_format') if not f_url: continue - if (not self.get_param('allow_unplayable_formats') - and ('-MDRM-' in content_format or '-FPS-' in content_format)): + if '-MDRM-' in content_format or '-FPS-' in content_format: + has_drm = True continue formats.append({ 'url': f_url, @@ -154,6 +155,8 @@ def _real_extract(self, url): 'quality': quality(content_format), 'filesize': int_or_none(f.get('size_in_bytes')), }) + if not formats and has_drm: + self.report_drm(video_id) compilation = result.get('compilation') episode = title if compilation else None diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index 6d51e32f6..cd2832466 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -464,6 +464,7 @@ def sign_url(unsigned_url): formats = [] subtitles = {} + has_drm = False for f in flavor_assets: # Continue if asset is not ready if f.get('status') != 2: @@ -473,7 +474,8 @@ def sign_url(unsigned_url): if f.get('fileExt') == 'chun': continue # DRM-protected video, cannot be decrypted - if not self.get_param('allow_unplayable_formats') and f.get('fileExt') == 'wvm': + if f.get('fileExt') == 'wvm': + has_drm = True continue if not f.get('fileExt'): # QT indicates QuickTime; some videos have broken fileExt @@ -513,6 +515,9 @@ def sign_url(unsigned_url): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) + if not formats and has_drm: + self.report_drm(entry_id) + if captions: for caption in captions.get('objects', []): # Continue if caption is not ready diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index 763a01448..522e0409f 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -91,11 +91,13 @@ def _extract_info(self, pc, mobile, i, referer): formats = [] urls = [] + has_drm = False for stream in pc_item.get('streams', []): stream_url = stream.get('url') if not stream_url or stream_url in urls: continue - if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'): + if stream.get('drmProtected'): + has_drm = True continue urls.append(stream_url) ext = determine_ext(stream_url) @@ -159,8 +161,8 @@ def _extract_info(self, pc, mobile, i, referer): format_id = mobile_url.get('targetMediaPlatform') if not media_url or media_url in urls: continue - if (format_id in ('Widevine', 'SmoothStreaming') - and not self.get_param('allow_unplayable_formats', False)): + if format_id in ('Widevine', 'SmoothStreaming'): + has_drm = True continue urls.append(media_url) ext = determine_ext(media_url) @@ -179,6 +181,9 @@ def _extract_info(self, pc, mobile, i, referer): 'ext': ext, }) + if not formats and has_drm: + self.report_drm(video_id) + subtitles = {} for flag in mobile_item.get('flags'): if flag == 'ClosedCaptions': diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 93e28624b..cc2da0875 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -29,8 +29,7 @@ def _real_extract(self, url): '$include': '[HasClosedCaptions]', }) - if (not self.get_param('allow_unplayable_formats') - and try_get(content_package, lambda x: x['Constraints']['Security']['Type'])): + if try_get(content_package, lambda x: x['Constraints']['Security']['Type']): self.report_drm(content_id) manifest_base_url = content_package_url + 'manifest.' diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index e7b69e370..544609ec3 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -65,8 +65,7 @@ def process_format_list(format_list, format_id=''): for format_dict in format_list: if not isinstance(format_dict, dict): continue - if (not self.get_param('allow_unplayable_formats') - and traverse_obj(format_dict, ('drm', 'keySystem'))): + if traverse_obj(format_dict, ('drm', 'keySystem')): has_drm = True continue format_url = url_or_none(format_dict.get('src')) diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index 178fd98bf..765256bd1 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -275,9 +275,8 @@ def _real_extract(self, url): 'url': stream_url, }) - if not formats: - if not self.get_param('allow_unplayable_formats') and drm: - self.report_drm(video_id) + if not formats and drm: + self.report_drm(video_id) info = { 'id': video_id, diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py index e8a471205..a651a3892 100644 --- a/yt_dlp/extractor/prosiebensat1.py +++ b/yt_dlp/extractor/prosiebensat1.py @@ -31,7 +31,7 @@ def _extract_video_info(self, url, clip_id): 'ids': clip_id, })[0] - if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True: + if video.get('is_protected') is True: self.report_drm(clip_id) formats = [] diff --git a/yt_dlp/extractor/redbee.py b/yt_dlp/extractor/redbee.py index d43bb0bab..dbc6f1e45 100644 --- a/yt_dlp/extractor/redbee.py +++ b/yt_dlp/extractor/redbee.py @@ -337,12 +337,16 @@ def _real_extract(self, url): 'height': height, }) + has_drm = False mpd_url = None if data.get('isLive') else data.get('urlDash') - if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')): - fmts, subs = self._extract_mpd_formats_and_subtitles( - mpd_url, media_id, mpd_id='dash', fatal=False) - formats.extend(fmts) - self._merge_subtitles(subs, target=subtitles) + if mpd_url: + if data.get('drm'): + has_drm = True + else: + fmts, subs = self._extract_mpd_formats_and_subtitles( + mpd_url, media_id, mpd_id='dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) audio_url = data.get('urlAudio') if audio_url: @@ -365,6 +369,9 @@ def _real_extract(self, url): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) + if not formats and has_drm: + self.report_drm(media_id) + return { 'id': media_id, 'formats': formats, diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 2dddb39f9..b35c46a28 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -237,8 +237,7 @@ def pv(name): return value or None if not formats: - if (not self.get_param('allow_unplayable_formats') - and xpath_text(video_xml, './Clip/DRM', default=None)): + if xpath_text(video_xml, './Clip/DRM', default=None): self.report_drm(video_id) ns_st_cds = pv('ns_st_cds') if ns_st_cds != 'free': diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index f0a3b6b7d..cc3afec9f 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -111,7 +111,7 @@ def _real_extract(self, url): playout = self._call_api( 'playout/new/url/' + video_id, video_id)['playout'] - if not self.get_param('allow_unplayable_formats') and playout.get('drm'): + if playout.get('drm'): self.report_drm(video_id) formats = self._extract_m3u8_formats(re.sub( diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index 0cd914cbb..e5fe920b6 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -142,7 +142,7 @@ def _real_extract(self, url): video_id = self._match_id(url) content = self._call_api( '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id) - if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'): + if content.get('isEncrypted'): self.report_drm(video_id) dash_url = content['videoURL'] headers = { diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py index fbef7cc0f..793cfd138 100644 --- a/yt_dlp/extractor/toggle.py +++ b/yt_dlp/extractor/toggle.py @@ -126,8 +126,7 @@ def _real_extract(self, url): }) if not formats: for meta in (info.get('Metas') or []): - if (not self.get_param('allow_unplayable_formats') - and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'): + if meta.get('Key') == 'Encryption' and meta.get('Value') == '1': self.report_drm(video_id) # Most likely because geo-blocked if no formats and no DRM diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py index cbd2c9c26..6350a363f 100644 --- a/yt_dlp/extractor/toutv.py +++ b/yt_dlp/extractor/toutv.py @@ -68,7 +68,7 @@ def _real_extract(self, url): }) # IsDrm does not necessarily mean the video is DRM protected (see # https://github.com/ytdl-org/youtube-dl/issues/13994). - if not self.get_param('allow_unplayable_formats') and metadata.get('IsDrm'): + if metadata.get('IsDrm'): self.report_warning('This video is probably DRM protected.', path) video_id = metadata['IdMedia'] details = metadata['Details'] diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 955a11647..8ad1b7da1 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -261,8 +261,7 @@ def _real_extract(self, url): formats = [] if stream_meta.get('is_drm'): - if not self.get_param('allow_unplayable_formats'): - self.report_drm(video_id) + self.report_drm(video_id) if stream_meta.get('is_premium'): sources = self._download_json( f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=livestreamings', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py new file mode 100644 index 000000000..bbd18fad9 --- /dev/null +++ b/yt_dlp/extractor/youtube.py @@ -0,0 +1,7812 @@ +import base64 +import calendar +import collections +import copy +import datetime as dt +import enum +import functools +import hashlib +import itertools +import json +import math +import os.path +import random +import re +import shlex +import sys +import threading +import time +import traceback +import urllib.parse + +from .common import InfoExtractor, SearchInfoExtractor +from .openload import PhantomJSwrapper +from ..jsinterp import JSInterpreter +from ..networking.exceptions import HTTPError, network_exceptions +from ..utils import ( + NO_DEFAULT, + ExtractorError, + LazyList, + UserNotLive, + bug_reports_message, + classproperty, + clean_html, + datetime_from_str, + dict_get, + filesize_from_tbr, + filter_dict, + float_or_none, + format_field, + get_first, + int_or_none, + is_html, + join_nonempty, + js_to_json, + mimetype2ext, + orderedSet, + parse_codecs, + parse_count, + parse_duration, + parse_iso8601, + parse_qs, + qualities, + remove_end, + remove_start, + smuggle_url, + str_or_none, + str_to_int, + strftime_or_none, + traverse_obj, + try_call, + try_get, + unescapeHTML, + unified_strdate, + unified_timestamp, + unsmuggle_url, + update_url_query, + url_or_none, + urljoin, + variadic, +) + +STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client' +STREAMING_DATA_PO_TOKEN = '__yt_dlp_po_token' + +# any clients starting with _ cannot be explicitly requested by the user +INNERTUBE_CLIENTS = { + 'web': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB', + 'clientVersion': '2.20241126.01.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, + 'REQUIRE_PO_TOKEN': True, + 'SUPPORTS_COOKIES': True, + }, + # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats + 'web_safari': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB', + 'clientVersion': '2.20241126.01.00', + 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, + 'REQUIRE_PO_TOKEN': True, + 'SUPPORTS_COOKIES': True, + }, + 'web_embedded': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB_EMBEDDED_PLAYER', + 'clientVersion': '1.20241201.00.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, + 'SUPPORTS_COOKIES': True, + }, + 'web_music': { + 'INNERTUBE_HOST': 'music.youtube.com', + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB_REMIX', + 'clientVersion': '1.20241127.01.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, + 'SUPPORTS_COOKIES': True, + }, + # This client now requires sign-in for every video + 'web_creator': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB_CREATOR', + 'clientVersion': '1.20241203.01.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, + 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, + }, + 'android': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID', + 'clientVersion': '19.44.38', + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.youtube/19.44.38 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, + 'REQUIRE_JS_PLAYER': False, + 'REQUIRE_PO_TOKEN': True, + }, + # This client now requires sign-in for every video + 'android_music': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID_MUSIC', + 'clientVersion': '7.27.52', + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.apps.youtube.music/7.27.52 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, + 'REQUIRE_JS_PLAYER': False, + 'REQUIRE_PO_TOKEN': True, + 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, + }, + # This client now requires sign-in for every video + 'android_creator': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID_CREATOR', + 'clientVersion': '24.45.100', + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.apps.youtube.creator/24.45.100 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, + 'REQUIRE_JS_PLAYER': False, + 'REQUIRE_PO_TOKEN': True, + 'REQUIRE_AUTH': True, + }, + # YouTube Kids videos aren't returned on this client for some reason + 'android_vr': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID_VR', + 'clientVersion': '1.60.19', + 'deviceMake': 'Oculus', + 'deviceModel': 'Quest 3', + 'androidSdkVersion': 32, + 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.60.19 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip', + 'osName': 'Android', + 'osVersion': '12L', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, + 'REQUIRE_JS_PLAYER': False, + 'SUPPORTS_COOKIES': True, + }, + # iOS clients have HLS live streams. Setting device model to get 60fps formats. + # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 + 'ios': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'IOS', + 'clientVersion': '19.45.4', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '18.1.0.22B83', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, + 'REQUIRE_JS_PLAYER': False, + }, + # This client now requires sign-in for every video + 'ios_music': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'IOS_MUSIC', + 'clientVersion': '7.27.0', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.youtubemusic/7.27.0 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '18.1.0.22B83', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, + 'REQUIRE_JS_PLAYER': False, + 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, + }, + # This client now requires sign-in for every video + 'ios_creator': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'IOS_CREATOR', + 'clientVersion': '24.45.100', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.ytcreator/24.45.100 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '18.1.0.22B83', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, + 'REQUIRE_JS_PLAYER': False, + 'REQUIRE_AUTH': True, + }, + # mweb has 'ultralow' formats + # See: https://github.com/yt-dlp/yt-dlp/pull/557 + 'mweb': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'MWEB', + 'clientVersion': '2.20241202.07.00', + 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, + 'SUPPORTS_COOKIES': True, + }, + 'tv': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5', + 'clientVersion': '7.20241201.18.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + 'SUPPORTS_COOKIES': True, + }, + # This client now requires sign-in for every video + # It was previously an age-gate workaround for videos that were `playable_in_embed` + # It may still be useful if signed into an EU account that is not age-verified + 'tv_embedded': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', + 'clientVersion': '2.0', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, + 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, + }, +} + + +def _split_innertube_client(client_name): + variant, *base = client_name.rsplit('.', 1) + if base: + return variant, base[0], variant + base, *variant = client_name.split('_', 1) + return client_name, base, variant[0] if variant else None + + +def short_client_name(client_name): + main, *parts = _split_innertube_client(client_name)[0].split('_') + return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper() + + +def build_innertube_clients(): + THIRD_PARTY = { + 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL + } + BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android') + priority = qualities(BASE_CLIENTS[::-1]) + + for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): + ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') + ytcfg.setdefault('REQUIRE_JS_PLAYER', True) + ytcfg.setdefault('REQUIRE_PO_TOKEN', False) + ytcfg.setdefault('REQUIRE_AUTH', False) + ytcfg.setdefault('SUPPORTS_COOKIES', False) + ytcfg.setdefault('PLAYER_PARAMS', None) + ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') + + _, base_client, variant = _split_innertube_client(client) + ytcfg['priority'] = 10 * priority(base_client) + + if variant == 'embedded': + ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY + ytcfg['priority'] -= 2 + elif variant: + ytcfg['priority'] -= 3 + + +build_innertube_clients() + + +class BadgeType(enum.Enum): + AVAILABILITY_UNLISTED = enum.auto() + AVAILABILITY_PRIVATE = enum.auto() + AVAILABILITY_PUBLIC = enum.auto() + AVAILABILITY_PREMIUM = enum.auto() + AVAILABILITY_SUBSCRIPTION = enum.auto() + LIVE_NOW = enum.auto() + VERIFIED = enum.auto() + + +class YoutubeBaseInfoExtractor(InfoExtractor): + """Provide base functions for Youtube extractors""" + + _RESERVED_NAMES = ( + r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|' + r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|' + r'browse|oembed|get_video_info|iframe_api|s/player|source|' + r'storefront|oops|index|account|t/terms|about|upload|signin|logout') + + _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' + + # _NETRC_MACHINE = 'youtube' + + # If True it will raise an error if no login info is provided + _LOGIN_REQUIRED = False + + _INVIDIOUS_SITES = ( + # invidious-redirect websites + r'(?:www\.)?redirect\.invidious\.io', + r'(?:(?:www|dev)\.)?invidio\.us', + # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md + r'(?:www\.)?invidious\.pussthecat\.org', + r'(?:www\.)?invidious\.zee\.li', + r'(?:www\.)?invidious\.ethibox\.fr', + r'(?:www\.)?iv\.ggtyler\.dev', + r'(?:www\.)?inv\.vern\.i2p', + r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion', + r'(?:www\.)?inv\.riverside\.rocks', + r'(?:www\.)?invidious\.silur\.me', + r'(?:www\.)?inv\.bp\.projectsegfau\.lt', + r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion', + r'(?:www\.)?invidious\.slipfox\.xyz', + r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion', + r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion', + r'(?:www\.)?invidious\.tiekoetter\.com', + r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion', + r'(?:www\.)?invidious\.nerdvpn\.de', + r'(?:www\.)?invidious\.weblibre\.org', + r'(?:www\.)?inv\.odyssey346\.dev', + r'(?:www\.)?invidious\.dhusch\.de', + r'(?:www\.)?iv\.melmac\.space', + r'(?:www\.)?watch\.thekitty\.zone', + r'(?:www\.)?invidious\.privacydev\.net', + r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion', + r'(?:www\.)?invidious\.drivet\.xyz', + r'(?:www\.)?vid\.priv\.au', + r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion', + r'(?:www\.)?inv\.vern\.cc', + r'(?:www\.)?invidious\.esmailelbob\.xyz', + r'(?:www\.)?invidious\.sethforprivacy\.com', + r'(?:www\.)?yt\.oelrichsgarcia\.de', + r'(?:www\.)?yt\.artemislena\.eu', + r'(?:www\.)?invidious\.flokinet\.to', + r'(?:www\.)?invidious\.baczek\.me', + r'(?:www\.)?y\.com\.sb', + r'(?:www\.)?invidious\.epicsite\.xyz', + r'(?:www\.)?invidious\.lidarshield\.cloud', + r'(?:www\.)?yt\.funami\.tech', + r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion', + r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion', + r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion', + # youtube-dl invidious instances list + r'(?:(?:www|no)\.)?invidiou\.sh', + r'(?:(?:www|fi)\.)?invidious\.snopyta\.org', + r'(?:www\.)?invidious\.kabi\.tk', + r'(?:www\.)?invidious\.mastodon\.host', + r'(?:www\.)?invidious\.zapashcanon\.fr', + r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks', + r'(?:www\.)?invidious\.tinfoil-hat\.net', + r'(?:www\.)?invidious\.himiko\.cloud', + r'(?:www\.)?invidious\.reallyancient\.tech', + r'(?:www\.)?invidious\.tube', + r'(?:www\.)?invidiou\.site', + r'(?:www\.)?invidious\.site', + r'(?:www\.)?invidious\.xyz', + r'(?:www\.)?invidious\.nixnet\.xyz', + r'(?:www\.)?invidious\.048596\.xyz', + r'(?:www\.)?invidious\.drycat\.fr', + r'(?:www\.)?inv\.skyn3t\.in', + r'(?:www\.)?tube\.poal\.co', + r'(?:www\.)?tube\.connect\.cafe', + r'(?:www\.)?vid\.wxzm\.sx', + r'(?:www\.)?vid\.mint\.lgbt', + r'(?:www\.)?vid\.puffyan\.us', + r'(?:www\.)?yewtu\.be', + r'(?:www\.)?yt\.elukerio\.org', + r'(?:www\.)?yt\.lelux\.fi', + r'(?:www\.)?invidious\.ggc-project\.de', + r'(?:www\.)?yt\.maisputain\.ovh', + r'(?:www\.)?ytprivate\.com', + r'(?:www\.)?invidious\.13ad\.de', + r'(?:www\.)?invidious\.toot\.koeln', + r'(?:www\.)?invidious\.fdn\.fr', + r'(?:www\.)?watch\.nettohikari\.com', + r'(?:www\.)?invidious\.namazso\.eu', + r'(?:www\.)?invidious\.silkky\.cloud', + r'(?:www\.)?invidious\.exonip\.de', + r'(?:www\.)?invidious\.riverside\.rocks', + r'(?:www\.)?invidious\.blamefran\.net', + r'(?:www\.)?invidious\.moomoo\.de', + r'(?:www\.)?ytb\.trom\.tf', + r'(?:www\.)?yt\.cyberhost\.uk', + r'(?:www\.)?kgg2m7yk5aybusll\.onion', + r'(?:www\.)?qklhadlycap4cnod\.onion', + r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion', + r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion', + r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion', + r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion', + r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p', + r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion', + r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion', + r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', + r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion', + r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', + # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances + r'(?:www\.)?piped\.kavin\.rocks', + r'(?:www\.)?piped\.tokhmi\.xyz', + r'(?:www\.)?piped\.syncpundit\.io', + r'(?:www\.)?piped\.mha\.fi', + r'(?:www\.)?watch\.whatever\.social', + r'(?:www\.)?piped\.garudalinux\.org', + r'(?:www\.)?piped\.rivo\.lol', + r'(?:www\.)?piped-libre\.kavin\.rocks', + r'(?:www\.)?yt\.jae\.fi', + r'(?:www\.)?piped\.mint\.lgbt', + r'(?:www\.)?il\.ax', + r'(?:www\.)?piped\.esmailelbob\.xyz', + r'(?:www\.)?piped\.projectsegfau\.lt', + r'(?:www\.)?piped\.privacydev\.net', + r'(?:www\.)?piped\.palveluntarjoaja\.eu', + r'(?:www\.)?piped\.smnz\.de', + r'(?:www\.)?piped\.adminforge\.de', + r'(?:www\.)?watch\.whatevertinfoil\.de', + r'(?:www\.)?piped\.qdi\.fi', + r'(?:(?:www|cf)\.)?piped\.video', + r'(?:www\.)?piped\.aeong\.one', + r'(?:www\.)?piped\.moomoo\.me', + r'(?:www\.)?piped\.chauvet\.pro', + r'(?:www\.)?watch\.leptons\.xyz', + r'(?:www\.)?pd\.vern\.cc', + r'(?:www\.)?piped\.hostux\.net', + r'(?:www\.)?piped\.lunar\.icu', + # Hyperpipe instances from https://hyperpipe.codeberg.page/ + r'(?:www\.)?hyperpipe\.surge\.sh', + r'(?:www\.)?hyperpipe\.esmailelbob\.xyz', + r'(?:www\.)?listen\.whatever\.social', + r'(?:www\.)?music\.adminforge\.de', + ) + + # extracted from account/account_menu ep + # XXX: These are the supported YouTube UI and API languages, + # which is slightly different from languages supported for translation in YouTube studio + _SUPPORTED_LANG_CODES = [ + 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es', + 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv', + 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi', + 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw', + 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml', + 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko', + ] + + _IGNORED_WARNINGS = { + 'Unavailable videos will be hidden during playback', + 'Unavailable videos are hidden', + } + + _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en + _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' + + _NETRC_MACHINE = 'youtube' + + def ucid_or_none(self, ucid): + return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None) + + def handle_or_none(self, handle): + return self._search_regex(rf'^({self._YT_HANDLE_RE})$', urllib.parse.unquote(handle or ''), + '@-handle', default=None) + + def handle_from_url(self, url): + return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})', + urllib.parse.unquote(url or ''), 'channel handle', default=None) + + def ucid_from_url(self, url): + return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})', + url, 'channel id', default=None) + + @functools.cached_property + def _preferred_lang(self): + """ + Returns a language code supported by YouTube for the user preferred language. + Returns None if no preferred language set. + """ + preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0] + if not preferred_lang: + return + if preferred_lang not in self._SUPPORTED_LANG_CODES: + raise ExtractorError( + f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.', + expected=True) + elif preferred_lang != 'en': + self.report_warning( + f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.') + return preferred_lang + + def _initialize_consent(self): + cookies = self._get_cookies('https://www.youtube.com/') + if cookies.get('__Secure-3PSID'): + return + socs = cookies.get('SOCS') + if socs and not socs.value.startswith('CAA'): # not consented + return + self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes) + + def _initialize_pref(self): + cookies = self._get_cookies('https://www.youtube.com/') + pref_cookie = cookies.get('PREF') + pref = {} + if pref_cookie: + try: + pref = dict(urllib.parse.parse_qsl(pref_cookie.value)) + except ValueError: + self.report_warning('Failed to parse user PREF cookie' + bug_reports_message()) + pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'}) + self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref)) + + def _real_initialize(self): + self._initialize_pref() + self._initialize_consent() + self._check_login_required() + + def _perform_login(self, username, password): + if username.startswith('oauth'): + raise ExtractorError( + f'Login with OAuth is no longer supported. {self._youtube_login_hint}', expected=True) + + self.report_warning( + f'Login with password is not supported for YouTube. {self._youtube_login_hint}') + + @property + def _youtube_login_hint(self): + return (f'{self._login_hint(method="cookies")}. Also see ' + 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies ' + 'for tips on effectively exporting YouTube cookies') + + def _check_login_required(self): + if self._LOGIN_REQUIRED and not self.is_authenticated: + self.raise_login_required( + f'Login details are needed to download this content. {self._youtube_login_hint}', method=None) + + _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=' + _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=' + + def _get_default_ytcfg(self, client='web'): + return copy.deepcopy(INNERTUBE_CLIENTS[client]) + + def _get_innertube_host(self, client='web'): + return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST'] + + def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'): + # try_get but with fallback to default ytcfg client values when present + _func = lambda y: try_get(y, getter, expected_type) + return _func(ytcfg) or _func(self._get_default_ytcfg(default_client)) + + def _extract_client_name(self, ytcfg, default_client='web'): + return self._ytcfg_get_safe( + ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'], + lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client) + + def _extract_client_version(self, ytcfg, default_client='web'): + return self._ytcfg_get_safe( + ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], + lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client) + + def _select_api_hostname(self, req_api_hostname, default_client=None): + return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0] + or req_api_hostname or self._get_innertube_host(default_client or 'web')) + + def _extract_context(self, ytcfg=None, default_client='web'): + context = get_first( + (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict) + # Enforce language and tz for extraction + client_context = traverse_obj(context, 'client', expected_type=dict, default={}) + client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0}) + return context + + _SAPISID = None + + def _generate_sapisidhash_header(self, origin='https://www.youtube.com'): + time_now = round(time.time()) + if self._SAPISID is None: + yt_cookies = self._get_cookies('https://www.youtube.com') + # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is. + # See: https://github.com/yt-dlp/yt-dlp/issues/393 + sapisid_cookie = dict_get( + yt_cookies, ('__Secure-3PAPISID', 'SAPISID')) + if sapisid_cookie and sapisid_cookie.value: + self._SAPISID = sapisid_cookie.value + self.write_debug('Extracted SAPISID cookie') + # SAPISID cookie is required if not already present + if not yt_cookies.get('SAPISID'): + self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie') + self._set_cookie( + '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600) + else: + self._SAPISID = False + if not self._SAPISID: + return None + # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323 + sapisidhash = hashlib.sha1( + f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest() + return f'SAPISIDHASH {time_now}_{sapisidhash}' + + def _call_api(self, ep, query, video_id, fatal=True, headers=None, + note='Downloading API JSON', errnote='Unable to download API page', + context=None, api_key=None, api_hostname=None, default_client='web'): + + data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)} + data.update(query) + real_headers = self.generate_api_headers(default_client=default_client) + real_headers.update({'content-type': 'application/json'}) + if headers: + real_headers.update(headers) + return self._download_json( + f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}', + video_id=video_id, fatal=fatal, note=note, errnote=errnote, + data=json.dumps(data).encode('utf8'), headers=real_headers, + query=filter_dict({ + 'key': self._configuration_arg( + 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0], + 'prettyPrint': 'false', + }, cndn=lambda _, v: v)) + + def extract_yt_initial_data(self, item_id, webpage, fatal=True): + return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal) + + @staticmethod + def _extract_session_index(*data): + """ + Index of current account in account list. + See: https://github.com/yt-dlp/yt-dlp/pull/519 + """ + for ytcfg in data: + session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX'])) + if session_index is not None: + return session_index + + def _data_sync_id_to_delegated_session_id(self, data_sync_id): + if not data_sync_id: + return + # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel + # and just "user_syncid||" for primary channel. We only want the channel_syncid + channel_syncid, _, user_syncid = data_sync_id.partition('||') + if user_syncid: + return channel_syncid + + def _extract_account_syncid(self, *args): + """ + Extract current session ID required to download private playlists of secondary channels + @params response and/or ytcfg + """ + # ytcfg includes channel_syncid if on secondary channel + if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)): + return delegated_sid + + data_sync_id = self._extract_data_sync_id(*args) + return self._data_sync_id_to_delegated_session_id(data_sync_id) + + def _extract_data_sync_id(self, *args): + """ + Extract current account dataSyncId. + In the format DELEGATED_SESSION_ID||USER_SESSION_ID or USER_SESSION_ID|| + @params response and/or ytcfg + """ + if data_sync_id := self._configuration_arg('data_sync_id', [None], ie_key=YoutubeIE, casesense=True)[0]: + return data_sync_id + + return traverse_obj( + args, (..., ('DATASYNC_ID', ('responseContext', 'mainAppWebResponseContext', 'datasyncId')), {str}, any)) + + def _extract_visitor_data(self, *args): + """ + Extracts visitorData from an API response or ytcfg + Appears to be used to track session state + """ + if visitor_data := self._configuration_arg('visitor_data', [None], ie_key=YoutubeIE, casesense=True)[0]: + return visitor_data + return get_first( + args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))], + expected_type=str) + + @functools.cached_property + def is_authenticated(self): + return bool(self._generate_sapisidhash_header()) + + def extract_ytcfg(self, video_id, webpage): + if not webpage: + return {} + return self._parse_json( + self._search_regex( + r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', + default='{}'), video_id, fatal=False) or {} + + def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs): + headers = {} + account_syncid = account_syncid or self._extract_account_syncid(ytcfg) + if account_syncid: + headers['X-Goog-PageId'] = account_syncid + if session_index is None: + session_index = self._extract_session_index(ytcfg) + if account_syncid or session_index is not None: + headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0 + + auth = self._generate_sapisidhash_header(origin) + if auth is not None: + headers['Authorization'] = auth + headers['X-Origin'] = origin + + return headers + + def generate_api_headers( + self, *, ytcfg=None, account_syncid=None, session_index=None, + visitor_data=None, api_hostname=None, default_client='web', **kwargs): + + origin = 'https://' + (self._select_api_hostname(api_hostname, default_client)) + headers = { + 'X-YouTube-Client-Name': str( + self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), + 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), + 'Origin': origin, + 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), + 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), + **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin), + } + return filter_dict(headers) + + def _download_ytcfg(self, client, video_id): + url = { + 'web': 'https://www.youtube.com', + 'web_music': 'https://music.youtube.com', + 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1', + }.get(client) + if not url: + return {} + webpage = self._download_webpage( + url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config') + return self.extract_ytcfg(video_id, webpage) or {} + + @staticmethod + def _build_api_continuation_query(continuation, ctp=None): + query = { + 'continuation': continuation, + } + # TODO: Inconsistency with clickTrackingParams. + # Currently we have a fixed ctp contained within context (from ytcfg) + # and a ctp in root query for continuation. + if ctp: + query['clickTracking'] = {'clickTrackingParams': ctp} + return query + + @classmethod + def _extract_next_continuation_data(cls, renderer): + next_continuation = try_get( + renderer, (lambda x: x['continuations'][0]['nextContinuationData'], + lambda x: x['continuation']['reloadContinuationData']), dict) + if not next_continuation: + return + continuation = next_continuation.get('continuation') + if not continuation: + return + ctp = next_continuation.get('clickTrackingParams') + return cls._build_api_continuation_query(continuation, ctp) + + @classmethod + def _extract_continuation_ep_data(cls, continuation_ep: dict): + if isinstance(continuation_ep, dict): + continuation = try_get( + continuation_ep, lambda x: x['continuationCommand']['token'], str) + if not continuation: + return + ctp = continuation_ep.get('clickTrackingParams') + return cls._build_api_continuation_query(continuation, ctp) + + @classmethod + def _extract_continuation(cls, renderer): + next_continuation = cls._extract_next_continuation_data(renderer) + if next_continuation: + return next_continuation + + return traverse_obj(renderer, ( + ('contents', 'items', 'rows'), ..., 'continuationItemRenderer', + ('continuationEndpoint', ('button', 'buttonRenderer', 'command')), + ), get_all=False, expected_type=cls._extract_continuation_ep_data) + + @classmethod + def _extract_alerts(cls, data): + for alert_dict in try_get(data, lambda x: x['alerts'], list) or []: + if not isinstance(alert_dict, dict): + continue + for alert in alert_dict.values(): + alert_type = alert.get('type') + if not alert_type: + continue + message = cls._get_text(alert, 'text') + if message: + yield alert_type, message + + def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False): + errors, warnings = [], [] + for alert_type, alert_message in alerts: + if alert_type.lower() == 'error' and fatal: + errors.append([alert_type, alert_message]) + elif alert_message not in self._IGNORED_WARNINGS: + warnings.append([alert_type, alert_message]) + + for alert_type, alert_message in (warnings + errors[:-1]): + self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once) + if errors: + raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected) + + def _extract_and_report_alerts(self, data, *args, **kwargs): + return self._report_alerts(self._extract_alerts(data), *args, **kwargs) + + def _extract_badges(self, badge_list: list): + """ + Extract known BadgeType's from a list of badge renderers. + @returns [{'type': BadgeType}] + """ + icon_type_map = { + 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED, + 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE, + 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC, + 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED, + 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED, + 'CHECK': BadgeType.VERIFIED, + } + + badge_style_map = { + 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION, + 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM, + 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW, + 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED, + 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED, + } + + label_map = { + 'unlisted': BadgeType.AVAILABILITY_UNLISTED, + 'private': BadgeType.AVAILABILITY_PRIVATE, + 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION, + 'live': BadgeType.LIVE_NOW, + 'premium': BadgeType.AVAILABILITY_PREMIUM, + 'verified': BadgeType.VERIFIED, + 'official artist channel': BadgeType.VERIFIED, + } + + badges = [] + for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))): + badge_type = ( + icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str)) + or badge_style_map.get(traverse_obj(badge, 'style')) + ) + if badge_type: + badges.append({'type': badge_type}) + continue + + # fallback, won't work in some languages + label = traverse_obj( + badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='') + for match, label_badge_type in label_map.items(): + if match in label.lower(): + badges.append({'type': label_badge_type}) + break + + return badges + + @staticmethod + def _has_badge(badges, badge_type): + return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type)) + + @staticmethod + def _get_text(data, *path_list, max_runs=None): + for path in path_list or [None]: + if path is None: + obj = [data] + else: + obj = traverse_obj(data, path, default=[]) + if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)): + obj = [obj] + for item in obj: + text = try_get(item, lambda x: x['simpleText'], str) + if text: + return text + runs = try_get(item, lambda x: x['runs'], list) or [] + if not runs and isinstance(item, list): + runs = item + + runs = runs[:min(len(runs), max_runs or len(runs))] + text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str)) + if text: + return text + + def _get_count(self, data, *path_list): + count_text = self._get_text(data, *path_list) or '' + count = parse_count(count_text) + if count is None: + count = str_to_int( + self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None)) + return count + + @staticmethod + def _extract_thumbnails(data, *path_list, final_key='thumbnails'): + """ + Extract thumbnails from thumbnails dict + @param path_list: path list to level that contains 'thumbnails' key + """ + thumbnails = [] + for path in path_list or [()]: + for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)): + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + # Sometimes youtube gives a wrong thumbnail URL. See: + # https://github.com/yt-dlp/yt-dlp/issues/233 + # https://github.com/ytdl-org/youtube-dl/issues/28023 + if 'maxresdefault' in thumbnail_url: + thumbnail_url = thumbnail_url.split('?')[0] + thumbnails.append({ + 'url': thumbnail_url, + 'height': int_or_none(thumbnail.get('height')), + 'width': int_or_none(thumbnail.get('width')), + }) + return thumbnails + + @staticmethod + def extract_relative_time(relative_time_text): + """ + Extracts a relative time from string and converts to dt object + e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago' + """ + + # XXX: this could be moved to a general function in utils/_utils.py + # The relative time text strings are roughly the same as what + # Javascript's Intl.RelativeTimeFormat function generates. + # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat + mobj = re.search( + r'(?Ptoday|yesterday|now)|(?P