1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-17 18:58:35 +00:00

Merge remote-tracking branch 'upstream/master' into feat/youtube/sabr

# Conflicts:
#	yt_dlp/extractor/youtube/_video.py
This commit is contained in:
coletdjnz 2025-07-12 11:47:06 +12:00
commit 951a2995c6
No known key found for this signature in database
GPG Key ID: 91984263BB39894A
15 changed files with 703 additions and 325 deletions

View File

@ -1800,6 +1800,7 @@ #### youtube
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)

View File

@ -10,9 +10,13 @@ __yt_dlp()
diropts="--cache-dir"
if [[ ${prev} =~ ${fileopts} ]]; then
local IFS=$'\n'
type compopt &>/dev/null && compopt -o filenames
COMPREPLY=( $(compgen -f -- ${cur}) )
return 0
elif [[ ${prev} =~ ${diropts} ]]; then
local IFS=$'\n'
type compopt &>/dev/null && compopt -o dirnames
COMPREPLY=( $(compgen -d -- ${cur}) )
return 0
fi

View File

@ -14,6 +14,7 @@
from test.helper import (
assertGreaterEqual,
assertLessEqual,
expect_info_dict,
expect_warnings,
get_params,
@ -121,10 +122,13 @@ def print_skipping(reason):
params = get_params(test_case.get('params', {}))
params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist')
params.setdefault('playlistend', test_case.get(
'playlist_mincount', test_case.get('playlist_count', -2) + 1))
params.setdefault('playlistend', max(
test_case.get('playlist_mincount', -1),
test_case.get('playlist_count', -2) + 1,
test_case.get('playlist_maxcount', -2) + 1))
params.setdefault('skip_download', True)
if 'playlist_duration_sum' not in test_case:
params.setdefault('extract_flat', 'in_playlist')
ydl = YoutubeDL(params, auto_init=False)
ydl.add_default_info_extractors()
@ -159,6 +163,7 @@ def try_rm_tcs_files(tcs=None):
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
try_rm_tcs_files()
try:
test_url = test_case['url']
try_num = 1
while True:
try:
@ -166,7 +171,7 @@ def try_rm_tcs_files(tcs=None):
# for outside error handling, and returns the exit code
# instead of the result dict.
res_dict = ydl.extract_info(
test_case['url'],
test_url,
force_generic_extractor=params.get('force_generic_extractor', False))
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
@ -194,23 +199,23 @@ def try_rm_tcs_files(tcs=None):
self.assertTrue('entries' in res_dict)
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
num_entries = len(res_dict.get('entries', []))
if 'playlist_mincount' in test_case:
mincount = test_case['playlist_mincount']
assertGreaterEqual(
self,
len(res_dict['entries']),
test_case['playlist_mincount'],
'Expected at least %d in playlist %s, but got only %d' % (
test_case['playlist_mincount'], test_case['url'],
len(res_dict['entries'])))
self, num_entries, mincount,
f'Expected at least {mincount} entries in playlist {test_url}, but got only {num_entries}')
if 'playlist_count' in test_case:
count = test_case['playlist_count']
got = num_entries if num_entries <= count else 'more'
self.assertEqual(
len(res_dict['entries']),
test_case['playlist_count'],
'Expected %d entries in playlist %s, but got %d.' % (
test_case['playlist_count'],
test_case['url'],
len(res_dict['entries']),
))
num_entries, count,
f'Expected exactly {count} entries in playlist {test_url}, but got {got}')
if 'playlist_maxcount' in test_case:
maxcount = test_case['playlist_maxcount']
assertLessEqual(
self, num_entries, maxcount,
f'Expected at most {maxcount} entries in playlist {test_url}, but got more')
if 'playlist_duration_sum' in test_case:
got_duration = sum(e['duration'] for e in res_dict['entries'])
self.assertEqual(

View File

@ -536,6 +536,11 @@ def test_nested_function_scoping(self):
}
''', 31)
def test_undefined_varnames(self):
jsi = JSInterpreter('function f(){ var a; return [a, b]; }')
self._test(jsi, [JS_Undefined, JS_Undefined])
self.assertEqual(jsi._undefined_varnames, {'b'})
if __name__ == '__main__':
unittest.main()

View File

@ -373,6 +373,10 @@
'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
),
(
'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js',
'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA',
),
]

View File

@ -1147,6 +1147,7 @@
MindsIE,
)
from .minoto import MinotoIE
from .mir24tv import Mir24TvIE
from .mirrativ import (
MirrativIE,
MirrativUserIE,

View File

@ -900,7 +900,9 @@ def _real_extract(self, url):
headers=headers))
geo_blocked = traverse_obj(play_info, (
'raw', 'data', 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any))
('result', ('raw', 'data')), 'plugins',
lambda _, v: v['name'] == 'AreaLimitPanel',
'config', 'is_block', {bool}, any))
premium_only = play_info.get('code') == -10403
video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {}
@ -914,7 +916,7 @@ def _real_extract(self, url):
if traverse_obj(play_info, ((
('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE'
('raw', 'data', 'play_video_type'), # 'preview' vs 'whole'
(('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none'
), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
self.report_warning(
'Only preview format is available, '

View File

@ -0,0 +1,37 @@
from .common import InfoExtractor
from ..utils import parse_qs, url_or_none
from ..utils.traversal import require, traverse_obj
class Mir24TvIE(InfoExtractor):
IE_NAME = 'mir24.tv'
_VALID_URL = r'https?://(?:www\.)?mir24\.tv/news/(?P<id>[0-9]+)/[^/?#]+'
_TESTS = [{
'url': 'https://mir24.tv/news/16635210/dni-kultury-rossii-otkrylis-v-uzbekistane.-na-prazdnichnom-koncerte-vystupili-zvezdy-rossijskoj-estrada',
'info_dict': {
'id': '16635210',
'title': 'Дни культуры России открылись в Узбекистане. На праздничном концерте выступили звезды российской эстрады',
'ext': 'mp4',
'thumbnail': r're:https://images\.mir24\.tv/.+\.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, impersonate=True)
iframe_url = self._search_regex(
r'<iframe\b[^>]+\bsrc=["\'](https?://mir24\.tv/players/[^"\']+)',
webpage, 'iframe URL')
m3u8_url = traverse_obj(iframe_url, (
{parse_qs}, 'source', -1, {self._proto_relative_url}, {url_or_none}, {require('m3u8 URL')}))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -1,53 +1,72 @@
import re
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import (
clean_html,
parse_iso8601,
parse_qs,
url_or_none,
)
from ..utils.traversal import require, traverse_obj
class NewsPicksIE(InfoExtractor):
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<id>[^?/#]+)'
_TESTS = [{
'url': 'https://newspicks.com/movie-series/11?movieId=1813',
'url': 'https://newspicks.com/movie-series/11/?movieId=1813',
'info_dict': {
'id': '1813',
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
'channel': 'HORIE ONE',
'channel_id': '11',
'release_date': '20220117',
'thumbnail': r're:https://.+jpg',
'ext': 'mp4',
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
'cast': 'count:4',
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
'duration': 2940,
'release_date': '20220117',
'release_timestamp': 1642424400,
'series': 'HORIE ONE',
'series_id': '11',
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
'timestamp': 1642424420,
'upload_date': '20220117',
},
}, {
'url': 'https://newspicks.com/movie-series/158/?movieId=3932',
'info_dict': {
'id': '3932',
'ext': 'mp4',
'title': '【検証】専門家は、KADOKAWAをどう見るか',
'cast': 'count:3',
'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d',
'duration': 1320,
'release_date': '20240622',
'release_timestamp': 1719088080,
'series': 'NPレポート',
'series_id': '158',
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
'timestamp': 1719086400,
'upload_date': '20240622',
},
}]
def _real_extract(self, url):
video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
series_id = self._match_id(url)
video_id = traverse_obj(parse_qs(url), ('movieId', -1, {str}, {require('movie ID')}))
webpage = self._download_webpage(url, video_id)
entries = self._parse_html5_media_entries(
url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
if not entries:
raise ExtractorError('No HTML5 media elements found')
info = entries[0]
title = self._html_search_meta('og:title', webpage, fatal=False)
description = self._html_search_meta(
('og:description', 'twitter:title'), webpage, fatal=False)
channel = self._html_search_regex(
r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
if not title or not channel:
title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
fragment = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['fragment']
m3u8_url = traverse_obj(fragment, ('movie', 'movieUrl', {url_or_none}, {require('m3u8 URL')}))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
release_date = self._search_regex(
r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
webpage, 'release date', fatal=False, group=(1, 2, 3))
info.update({
return {
'id': video_id,
'title': title,
'description': description,
'channel': channel,
'channel_id': channel_id,
'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
})
return info
'formats': formats,
'series': traverse_obj(fragment, ('series', 'title', {str})),
'series_id': series_id,
'subtitles': subtitles,
**traverse_obj(fragment, ('movie', {
'title': ('title', {str}),
'cast': ('relatedUsers', ..., 'displayName', {str}, filter, all, filter),
'description': ('explanation', {clean_html}),
'release_timestamp': ('onAirStartDate', {parse_iso8601}),
'thumbnail': (('image', 'coverImageUrl'), {url_or_none}, any),
'timestamp': ('published', {parse_iso8601}),
})),
}

View File

@ -8,6 +8,8 @@
get_element_by_class,
int_or_none,
join_nonempty,
make_archive_id,
orderedSet,
parse_duration,
remove_end,
traverse_obj,
@ -16,6 +18,7 @@
unified_timestamp,
url_or_none,
urljoin,
variadic,
)
@ -591,102 +594,179 @@ class NhkRadiruIE(InfoExtractor):
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
_TESTS = [{
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239',
'skip': 'Episode expired on 2024-06-09',
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=LG96ZW5KZ4_01_4251382',
'skip': 'Episode expires on 2025-07-14',
'info_dict': {
'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集',
'id': '0449_01_4003239',
'title': 'クラシックの庭\u3000特集「ドボルザークを聴く」(1)交響曲を中心に',
'id': 'LG96ZW5KZ4_01_4251382',
'ext': 'm4a',
'uploader': 'NHK FM 東京',
'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc',
'series': 'ジャズ・トゥナイト',
'channel': 'NHK FM 東京',
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
'upload_date': '20240601',
'series_id': '0449_01',
'release_date': '20240601',
'timestamp': 1717257600,
'release_timestamp': 1717250400,
'description': 'md5:652d3c38a25b77959c716421eba1617a',
'uploader': 'NHK FM・東京',
'channel': 'NHK FM・東京',
'duration': 6597.0,
'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/LG96ZW5KZ4/LG96ZW5KZ4-eyecatch_a67c6e949325016c0724f2ed3eec8a2f.jpg',
'categories': ['音楽', 'クラシック・オペラ'],
'cast': ['田添菜穂子'],
'series': 'クラシックの庭',
'series_id': 'LG96ZW5KZ4',
'episode': '特集「ドボルザークを聴く」(1)交響曲を中心に',
'episode_id': 'QP1Q2ZXZY3',
'timestamp': 1751871000,
'upload_date': '20250707',
'release_timestamp': 1751864403,
'release_date': '20250707',
},
}, {
# playlist, airs every weekday so it should _hopefully_ be okay forever
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=Z9L1V2M24L_01',
'info_dict': {
'id': '0458_01',
'id': 'Z9L1V2M24L_01',
'title': 'ベストオブクラシック',
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
'series_id': '0458_01',
'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/Z9L1V2M24L/Z9L1V2M24L-eyecatch_83ed28b4782907998875965fee60a351.jpg',
'series_id': 'Z9L1V2M24L_01',
'uploader': 'NHK FM',
'channel': 'NHK FM',
'series': 'ベストオブクラシック',
},
'playlist_mincount': 3,
}, {
# one with letters in the id
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688',
'note': 'Expires on 2025-03-31',
'info_dict': {
'id': 'F683_01_3910688',
'ext': 'm4a',
'title': '夏目漱石「文鳥」第1回',
'series': '【らじる文庫】夏目漱石「文鳥」全4回',
'series_id': 'F683_01',
'description': '朗読:浅井理アナウンサー',
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg',
'upload_date': '20240106',
'release_date': '20240106',
'uploader': 'NHK R1',
'release_timestamp': 1704511800,
'channel': 'NHK R1',
'timestamp': 1704512700,
},
'expected_warnings': ['Unable to download JSON metadata',
'Failed to get extended metadata. API returned Error 1: Invalid parameters'],
}, {
# news
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173',
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=18439M2W42_02_4251212',
'skip': 'Expires on 2025-07-15',
'info_dict': {
'id': 'F261_01_4012173',
'id': '18439M2W42_02_4251212',
'ext': 'm4a',
'channel': 'NHKラジオ第1',
'title': 'マイあさ! 午前5時のNHKニュース 2025年7月8日',
'uploader': 'NHKラジオ第1',
'channel': 'NHKラジオ第1',
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg',
'series': 'NHKラジオニュース',
'title': '午前時のNHKニュース',
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
'release_timestamp': 1718290800,
'release_date': '20240613',
'timestamp': 1718291400,
'upload_date': '20240613',
'timestamp': 1751919420,
'upload_date': '20250707',
'release_timestamp': 1751918400,
'release_date': '20250707',
},
}, {
# fallback when extended metadata fails
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298',
'skip': 'Expires on 2024-06-07',
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=J8792PY43V_20_4253945',
'skip': 'Expires on 2025-09-01',
'info_dict': {
'id': '2834_01_4009298',
'title': 'まち☆キラ!開成町特集',
'id': 'J8792PY43V_20_4253945',
'ext': 'm4a',
'release_date': '20240531',
'upload_date': '20240531',
'series': 'はま☆キラ!',
'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg',
'channel': 'NHK R1,FM',
'description': '',
'timestamp': 1717123800,
'uploader': 'NHK R1,FM',
'release_timestamp': 1717120800,
'series_id': '2834_01',
'title': '「後絶たない筋肉増強剤の使用」ワールドリポート',
'description': '大濱 敦(ソウル支局)',
'uploader': 'NHK R1',
'channel': 'NHK R1',
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/J8792PY43V/img/corner/box_31_thumbnail.jpg',
'series': 'マイあさ! ワールドリポート',
'series_id': 'J8792PY43V_20',
'timestamp': 1751837100,
'upload_date': '20250706',
'release_timestamp': 1751835600,
'release_date': '20250706',
},
'expected_warnings': ['Failed to get extended metadata. API returned empty list.'],
'expected_warnings': ['Failed to download extended metadata: HTTP Error 404: Not Found'],
}]
_API_URL_TMPL = None
# The `_format_*` and `_make_*` functions are ported from: https://www.nhk.or.jp/radio/assets/js/timetable_detail_new.js
def _format_act_list(self, act_list):
role_groups = {}
for act in traverse_obj(act_list, (..., {dict})):
role = act.get('role')
if role not in role_groups:
role_groups[role] = []
role_groups[role].append(act)
formatted_roles = []
for role, acts in role_groups.items():
for i, act in enumerate(acts):
res = f'{role}' if i == 0 and role is not None else ''
if title := act.get('title'):
res += f'{title}'
formatted_roles.append(join_nonempty(res, act.get('name'), delim=''))
return join_nonempty(*formatted_roles, delim='')
def _make_artists(self, track, key):
artists = []
for artist in traverse_obj(track, (key, ..., {dict})):
if res := join_nonempty(*traverse_obj(artist, ((
('role', filter, {'{}'.format}),
('part', filter, {'{}'.format}),
('name', filter),
), {str})), delim=''):
artists.append(res)
return ''.join(artists) or None
def _make_duration(self, track, key):
d = traverse_obj(track, (key, {parse_duration}))
if d is None:
return None
hours, remainder = divmod(d, 3600)
minutes, seconds = divmod(remainder, 60)
res = ''
if hours > 0:
res += f'{int(hours)}時間'
if minutes > 0:
res += f'{int(minutes)}'
res += f'{int(seconds):02}秒)'
return res
def _format_music_list(self, music_list):
tracks = []
for track in traverse_obj(music_list, (..., {dict})):
track_details = traverse_obj(track, ((
('name', filter, {'{}'.format}),
('lyricist', filter, {'{}:作詞'.format}),
('composer', filter, {'{}:作曲'.format}),
('arranger', filter, {'{}:編曲'.format}),
), {str}))
track_details.append(self._make_artists(track, 'byArtist'))
track_details.append(self._make_duration(track, 'duration'))
if label := join_nonempty('label', 'code', delim=' ', from_dict=track):
track_details.append(f'{label}')
if location := traverse_obj(track, ('location', {str})):
track_details.append(f'{location}')
tracks.append(join_nonempty(*track_details, delim='\n'))
return '\n\n'.join(tracks)
def _format_description(self, response):
detailed_description = traverse_obj(response, ('detailedDescription', {dict})) or {}
return join_nonempty(
join_nonempty('epg80', 'epg200', delim='\n\n', from_dict=detailed_description),
traverse_obj(response, ('misc', 'actList', {self._format_act_list})),
traverse_obj(response, ('misc', 'musicList', {self._format_music_list})),
delim='\n\n')
def _get_thumbnails(self, data, keys, name=None, preference=-1):
thumbnails = []
for size, thumb in traverse_obj(data, (
*variadic(keys, (str, bytes, dict, set)), {dict.items},
lambda _, v: v[0] != 'copyright' and url_or_none(v[1]['url']),
)):
thumbnails.append({
'url': thumb['url'],
'width': int_or_none(thumb.get('width')),
'height': int_or_none(thumb.get('height')),
'preference': preference,
'id': join_nonempty(name, size),
})
preference -= 1
return thumbnails
def _extract_extended_metadata(self, episode_id, aa_vinfo):
service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')}))
date_id = aa_vinfo[3]
detail_url = try_call(
lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3]))
lambda: self._API_URL_TMPL.format(broadcastEventId=join_nonempty(service, area, date_id)))
if not detail_url:
return {}
@ -699,36 +779,37 @@ def _extract_extended_metadata(self, episode_id, aa_vinfo):
if error := traverse_obj(response, ('error', {dict})):
self.report_warning(
'Failed to get extended metadata. API returned '
f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}')
f'Error {join_nonempty("statuscode", "message", from_dict=error, delim=": ")}')
return {}
full_meta = traverse_obj(response, ('list', service, 0, {dict}))
if not full_meta:
self.report_warning('Failed to get extended metadata. API returned empty list.')
return {}
station = traverse_obj(response, ('publishedOn', 'broadcastDisplayName', {str}))
station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None
thumbnails = [{
'id': str(id_),
'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1,
**traverse_obj(thumb, {
'url': 'url',
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
} for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))]
thumbnails = []
thumbnails.extend(self._get_thumbnails(response, ('about', 'eyecatch')))
for num, dct in enumerate(traverse_obj(response, ('about', 'eyecatchList', ...))):
thumbnails.extend(self._get_thumbnails(dct, None, join_nonempty('list', num), -2))
thumbnails.extend(
self._get_thumbnails(response, ('about', 'partOfSeries', 'eyecatch'), 'series', -3))
return filter_dict({
'description': self._format_description(response),
'cast': traverse_obj(response, ('misc', 'actList', ..., 'name', {str})),
'thumbnails': thumbnails,
**traverse_obj(response, {
'title': ('name', {str}),
'timestamp': ('endDate', {unified_timestamp}),
'release_timestamp': ('startDate', {unified_timestamp}),
'duration': ('duration', {parse_duration}),
}),
**traverse_obj(response, ('identifierGroup', {
'series': ('radioSeriesName', {str}),
'series_id': ('radioSeriesId', {str}),
'episode': ('radioEpisodeName', {str}),
'episode_id': ('radioEpisodeId', {str}),
'categories': ('genre', ..., ['name1', 'name2'], {str}, all, {orderedSet}),
})),
'channel': station,
'uploader': station,
'description': join_nonempty(
'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta),
'thumbnails': thumbnails,
**traverse_obj(full_meta, {
'title': ('title', {str}),
'timestamp': ('end_time', {unified_timestamp}),
'release_timestamp': ('start_time', {unified_timestamp}),
}),
})
def _extract_episode_info(self, episode, programme_id, series_meta):
@ -782,7 +863,9 @@ def _real_extract(self, url):
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
programme_id = f'{site_id}_{corner_id}'
if site_id == 'F261': # XXX: News programmes use old API (for now?)
# XXX: News programmes use the old API
# Can't move this to NhkRadioNewsPageIE because news items still use the normal URL format
if site_id == '18439M2W42':
meta = self._download_json(
'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main']
series_meta = traverse_obj(meta, {
@ -843,8 +926,8 @@ class NhkRadioNewsPageIE(InfoExtractor):
'url': 'https://www.nhk.or.jp/radionews/',
'playlist_mincount': 5,
'info_dict': {
'id': 'F261_01',
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
'id': '18439M2W42_01',
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg',
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
'channel': 'NHKラジオ第1',
'uploader': 'NHKラジオ第1',
@ -853,7 +936,7 @@ class NhkRadioNewsPageIE(InfoExtractor):
}]
def _real_extract(self, url):
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=18439M2W42_01', NhkRadiruIE)
class NhkRadiruLiveIE(InfoExtractor):
@ -863,11 +946,12 @@ class NhkRadiruLiveIE(InfoExtractor):
# radio 1, no area specified
'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
'info_dict': {
'id': 'r1-tokyo',
'title': 're:^ネットラジオ第1 東京.+$',
'id': 'bs-r1-130',
'title': 're:^NHKラジオ第1・東京.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r1/r1-logo.svg',
'live_status': 'is_live',
'_old_archive_ids': ['nhkradirulive r1-tokyo'],
},
}, {
# radio 2, area specified
@ -875,26 +959,28 @@ class NhkRadiruLiveIE(InfoExtractor):
'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
'info_dict': {
'id': 'r2-fukuoka',
'title': 're:^ネットラジオ第2 福岡.+$',
'id': 'bs-r2-400',
'title': 're:^NHKラジオ第2.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r2/r2-logo.svg',
'live_status': 'is_live',
'_old_archive_ids': ['nhkradirulive r2-fukuoka'],
},
}, {
# fm, area specified
'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
'info_dict': {
'id': 'fm-sapporo',
'title': 're:^NHKネットラジオFM 札幌.+$',
'id': 'bs-r3-010',
'title': 're:^NHK FM・札幌.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r3/r3-logo.svg',
'live_status': 'is_live',
'_old_archive_ids': ['nhkradirulive fm-sapporo'],
},
}]
_NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
_NOA_STATION_IDS = {'r1': 'r1', 'r2': 'r2', 'fm': 'r3'}
def _real_extract(self, url):
station = self._match_id(url)
@ -911,12 +997,15 @@ def _real_extract(self, url):
noa_info = self._download_json(
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
station, note=f'Downloading {area} station metadata', fatal=False)
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
broadcast_service = traverse_obj(noa_info, (self._NOA_STATION_IDS.get(station), 'publishedOn'))
return {
'title': ' '.join(traverse_obj(present_info, (('service', 'area'), 'name', {str}))),
'id': join_nonempty(station, area),
'thumbnails': traverse_obj(present_info, ('service', 'images', ..., {
**traverse_obj(broadcast_service, {
'title': ('broadcastDisplayName', {str}),
'id': ('id', {str}),
}),
'_old_archive_ids': [make_archive_id(self, join_nonempty(station, area))],
'thumbnails': traverse_obj(broadcast_service, ('logo', ..., {
'url': 'url',
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),

View File

@ -1,6 +1,5 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
traverse_obj,
@ -61,10 +60,10 @@ def _real_extract(self, url):
post = self._download_json(
'https://9gag.com/v1/post', post_id, query={
'id': post_id,
})['data']['post']
}, impersonate=True)['data']['post']
if post.get('type') != 'Animated':
raise ExtractorError(
self.raise_no_formats(
'The given url does not contain a video',
expected=True)

View File

@ -1,5 +1,6 @@
import calendar
import copy
import dataclasses
import datetime as dt
import enum
import functools
@ -38,6 +39,61 @@ class _PoTokenContext(enum.Enum):
SUBS = 'subs'
class StreamingProtocol(enum.Enum):
HTTPS = 'https'
DASH = 'dash'
HLS = 'hls'
SABR = 'sabr'
@dataclasses.dataclass
class BasePoTokenPolicy:
required: bool = False
# Try to fetch a PO Token even if it is not required.
recommended: bool = False
not_required_for_premium: bool = False
@dataclasses.dataclass
class GvsPoTokenPolicy(BasePoTokenPolicy):
not_required_with_player_token: bool = False
@dataclasses.dataclass
class PlayerPoTokenPolicy(BasePoTokenPolicy):
pass
@dataclasses.dataclass
class SubsPoTokenPolicy(BasePoTokenPolicy):
pass
WEB_PO_TOKEN_POLICIES = {
'GVS_PO_TOKEN_POLICY': {
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_for_premium=True,
not_required_with_player_token=False,
),
StreamingProtocol.DASH: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_for_premium=True,
not_required_with_player_token=False,
),
StreamingProtocol.HLS: GvsPoTokenPolicy(
required=False,
recommended=True,
),
},
'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False),
# In rollout, currently detected via experiment
# Premium users DO require a PO Token for subtitles
'SUBS_PO_TOKEN_POLICY': SubsPoTokenPolicy(required=False),
}
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
'web': {
@ -48,8 +104,8 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'SUPPORTS_COOKIES': True,
**WEB_PO_TOKEN_POLICIES,
},
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
'web_safari': {
@ -61,8 +117,8 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'SUPPORTS_COOKIES': True,
**WEB_PO_TOKEN_POLICIES,
'PLAYER_PARAMS': '8AEB',
},
'web_embedded': {
@ -84,7 +140,24 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'GVS_PO_TOKEN_POLICY': {
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_for_premium=True,
not_required_with_player_token=False,
),
StreamingProtocol.DASH: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_for_premium=True,
not_required_with_player_token=False,
),
StreamingProtocol.HLS: GvsPoTokenPolicy(
required=False,
recommended=True,
),
},
'SUPPORTS_COOKIES': True,
},
# This client now requires sign-in for every video
@ -96,7 +169,24 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'GVS_PO_TOKEN_POLICY': {
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_for_premium=True,
not_required_with_player_token=False,
),
StreamingProtocol.DASH: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_for_premium=True,
not_required_with_player_token=False,
),
StreamingProtocol.HLS: GvsPoTokenPolicy(
required=False,
recommended=True,
),
},
'REQUIRE_AUTH': True,
'SUPPORTS_COOKIES': True,
},
@ -113,7 +203,24 @@ class _PoTokenContext(enum.Enum):
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'REQUIRE_JS_PLAYER': False,
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'GVS_PO_TOKEN_POLICY': {
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_with_player_token=True,
),
StreamingProtocol.DASH: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_with_player_token=True,
),
StreamingProtocol.HLS: GvsPoTokenPolicy(
required=False,
recommended=True,
not_required_with_player_token=True,
),
},
'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True),
},
# YouTube Kids videos aren't returned on this client for some reason
'android_vr': {
@ -147,7 +254,21 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'GVS_PO_TOKEN_POLICY': {
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_with_player_token=True,
),
# HLS Livestreams require POT 30 seconds in
# TODO: Rolling out
StreamingProtocol.HLS: GvsPoTokenPolicy(
required=False,
recommended=True,
not_required_with_player_token=True,
),
},
'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True),
'REQUIRE_JS_PLAYER': False,
},
# mweb has 'ultralow' formats
@ -162,7 +283,24 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'GVS_PO_TOKEN_POLICY': {
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_for_premium=True,
not_required_with_player_token=False,
),
StreamingProtocol.DASH: GvsPoTokenPolicy(
required=True,
recommended=True,
not_required_for_premium=True,
not_required_with_player_token=False,
),
StreamingProtocol.HLS: GvsPoTokenPolicy(
required=False,
recommended=True,
),
},
'SUPPORTS_COOKIES': True,
},
'tv': {
@ -226,7 +364,11 @@ def build_innertube_clients():
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg.setdefault('PO_TOKEN_REQUIRED_CONTEXTS', [])
ytcfg.setdefault('GVS_PO_TOKEN_POLICY', {})
for protocol in StreamingProtocol:
ytcfg['GVS_PO_TOKEN_POLICY'].setdefault(protocol, GvsPoTokenPolicy())
ytcfg.setdefault('PLAYER_PO_TOKEN_POLICY', PlayerPoTokenPolicy())
ytcfg.setdefault('SUBS_PO_TOKEN_POLICY', SubsPoTokenPolicy())
ytcfg.setdefault('REQUIRE_AUTH', False)
ytcfg.setdefault('SUPPORTS_COOKIES', False)
ytcfg.setdefault('PLAYER_PARAMS', None)

View File

@ -317,17 +317,31 @@ def _extract_lockup_view_model(self, view_model):
content_id = view_model.get('contentId')
if not content_id:
return
content_type = view_model.get('contentType')
if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
if content_type == 'LOCKUP_CONTENT_TYPE_VIDEO':
ie = YoutubeIE
url = f'https://www.youtube.com/watch?v={content_id}'
thumb_keys = (None,)
elif content_type in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
ie = YoutubeTabIE
url = f'https://www.youtube.com/playlist?list={content_id}'
thumb_keys = ('collectionThumbnailViewModel', 'primaryThumbnail')
else:
self.report_warning(
f'Unsupported lockup view model content type "{content_type}"{bug_reports_message()}', only_once=True)
f'Unsupported lockup view model content type "{content_type}"{bug_reports_message()}',
only_once=True)
return
return self.url_result(
f'https://www.youtube.com/playlist?list={content_id}', ie=YoutubeTabIE, video_id=content_id,
url, ie, content_id,
title=traverse_obj(view_model, (
'metadata', 'lockupMetadataViewModel', 'title', 'content', {str})),
thumbnails=self._extract_thumbnails(view_model, (
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'),
duration=traverse_obj(view_model, (
'contentImage', 'thumbnailViewModel', 'overlays', ..., 'thumbnailOverlayBadgeViewModel',
'thumbnailBadges', ..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)))
def _rich_entries(self, rich_grid_renderer):
if lockup_view_model := traverse_obj(rich_grid_renderer, ('content', 'lockupViewModel', {dict})):

View File

@ -18,6 +18,9 @@
from ._base import (
INNERTUBE_CLIENTS,
BadgeType,
GvsPoTokenPolicy,
PlayerPoTokenPolicy,
StreamingProtocol,
YoutubeBaseInfoExtractor,
_PoTokenContext,
_split_innertube_client,
@ -72,10 +75,11 @@
from ...utils.networking import clean_headers, clean_proxies, select_proxy
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
STREAMING_DATA_INITIAL_PO_TOKEN = '__yt_dlp_po_token'
STREAMING_DATA_FETCH_PO_TOKEN = '__yt_dlp_fetch_po_token'
STREAMING_DATA_FETCH_SUBS_PO_TOKEN = '__yt_dlp_fetch_subs_po_token'
STREAMING_DATA_FETCH_GVS_PO_TOKEN = '__yt_dlp_fetch_gvs_po_token'
STREAMING_DATA_PLAYER_TOKEN_PROVIDED = '__yt_dlp_player_token_provided'
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
STREAMING_DATA_IS_PREMIUM_SUBSCRIBER = '__yt_dlp_is_premium_subscriber'
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
@ -255,6 +259,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
_DEFAULT_CLIENTS = ('tv', 'ios', 'web')
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web')
# Premium does not require POT (except for subtitles)
_DEFAULT_PREMIUM_CLIENTS = ('tv', 'web')
_GEO_BYPASS = False
@ -1836,7 +1842,8 @@ def refetch_manifest(format_id, delay):
if time.time() <= start_time + delay:
return
_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
_, _, _, _, prs, player_url = self._initial_extract(
url, smuggled_data, webpage_url, 'web', video_id)
video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
microformats = traverse_obj(
prs, (..., 'microformat', 'playerMicroformatRenderer'),
@ -2903,7 +2910,7 @@ def _get_config_po_token(self, client: str, context: _PoTokenContext):
only_once=True)
continue
def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None, visitor_data=None,
def fetch_po_token(self, client='web', context: _PoTokenContext = _PoTokenContext.GVS, ytcfg=None, visitor_data=None,
data_sync_id=None, session_index=None, player_url=None, video_id=None, webpage=None,
required=False, bypass_cache=None, **kwargs):
"""
@ -2990,7 +2997,6 @@ def _fetch_po_token(self, client, **kwargs):
fetch_pot_policy == 'never'
or (
fetch_pot_policy == 'auto'
and _PoTokenContext(context) not in self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
and not kwargs.get('required', False)
)
):
@ -3049,19 +3055,19 @@ def _is_agegated(player_response):
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token, reload_playback_token):
def _extract_player_response(self, client, video_id, webpage_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token, reload_playback_token):
headers = self.generate_api_headers(
ytcfg=player_ytcfg,
default_client=client,
visitor_data=visitor_data,
session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
session_index=self._extract_session_index(webpage_ytcfg, player_ytcfg),
delegated_session_id=(
self._parse_data_sync_id(data_sync_id)[0]
or self._extract_delegated_session_id(master_ytcfg, initial_pr, player_ytcfg)
or self._extract_delegated_session_id(webpage_ytcfg, initial_pr, player_ytcfg)
),
user_session_id=(
self._parse_data_sync_id(data_sync_id)[1]
or self._extract_user_session_id(master_ytcfg, initial_pr, player_ytcfg)
or self._extract_user_session_id(webpage_ytcfg, initial_pr, player_ytcfg)
),
)
@ -3077,9 +3083,8 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
if po_token:
yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
sts = self._extract_signature_timestamp(video_id, player_url, webpage_ytcfg, fatal=False) if player_url else None
yt_query.update(self._generate_player_context(sts, reload_playback_token))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
ytcfg=player_ytcfg, headers=headers, fatal=True,
@ -3087,10 +3092,14 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
) or None
def _get_requested_clients(self, url, smuggled_data):
def _get_requested_clients(self, url, smuggled_data, is_premium_subscriber):
requested_clients = []
excluded_clients = []
default_clients = self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated else self._DEFAULT_CLIENTS
default_clients = (
self._DEFAULT_PREMIUM_CLIENTS if is_premium_subscriber
else self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated
else self._DEFAULT_CLIENTS
)
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@ -3132,11 +3141,12 @@ def _invalid_player_response(self, pr, video_id):
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
return pr_id
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, reload_playback_token):
def _extract_player_responses(self, clients, video_id, webpage, webpage_client, webpage_ytcfg, is_premium_subscriber, reload_playback_token):
initial_pr = None
if webpage:
initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage,
f'{webpage_client} client initial player response', video_id, fatal=False)
prs = []
deprioritized_prs = []
@ -3167,11 +3177,11 @@ def append_client(*client_names):
while clients:
deprioritize_pr = False
client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {}
if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
player_ytcfg = webpage_ytcfg if client == webpage_client else {}
if 'configs' not in self._configuration_arg('player_skip') and client != webpage_client:
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
player_url = player_url or self._extract_player_url(webpage_ytcfg, player_ytcfg, webpage=webpage)
require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
if 'js' in self._configuration_arg('player_skip'):
require_js_player = False
@ -3181,10 +3191,16 @@ def append_client(*client_names):
player_url = self._download_player_url(video_id)
tried_iframe_fallback = True
pr = initial_pr if client == 'web' and not reload_playback_token else None
pr = None
if (
client == webpage_client
and 'player_response' not in self._configuration_arg('webpage_skip')
and not reload_playback_token
):
pr = initial_pr
visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
visitor_data = visitor_data or self._extract_visitor_data(webpage_ytcfg, initial_pr, player_ytcfg)
data_sync_id = data_sync_id or self._extract_data_sync_id(webpage_ytcfg, initial_pr, player_ytcfg)
fetch_po_token_args = {
'client': client,
@ -3193,58 +3209,26 @@ def append_client(*client_names):
'data_sync_id': data_sync_id if self.is_authenticated else None,
'player_url': player_url if require_js_player else None,
'webpage': webpage,
'session_index': self._extract_session_index(master_ytcfg, player_ytcfg),
'session_index': self._extract_session_index(webpage_ytcfg, player_ytcfg),
'ytcfg': player_ytcfg or self._get_default_ytcfg(client),
}
# Don't need a player PO token for WEB if using player response from webpage
player_pot_policy: PlayerPoTokenPolicy = self._get_default_ytcfg(client)['PLAYER_PO_TOKEN_POLICY']
player_po_token = None if pr else self.fetch_po_token(
context=_PoTokenContext.PLAYER, **fetch_po_token_args)
context=_PoTokenContext.PLAYER, **fetch_po_token_args,
required=player_pot_policy.required or player_pot_policy.recommended)
fetch_gvs_po_token_func = functools.partial(
self.fetch_po_token,
context=_PoTokenContext.GVS,
**fetch_po_token_args,
)
gvs_po_token = fetch_gvs_po_token_func()
self.fetch_po_token, context=_PoTokenContext.GVS, **fetch_po_token_args)
fetch_subs_po_token_func = functools.partial(
self.fetch_po_token,
context=_PoTokenContext.SUBS,
**fetch_po_token_args,
)
required_pot_contexts = self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
if (
not player_po_token
and _PoTokenContext.PLAYER in required_pot_contexts
):
# TODO: may need to skip player response request. Unsure yet..
self.report_warning(
f'No Player PO Token provided for {client} client, '
f'which may be required for working {client} formats. This client will be deprioritized'
f'You can manually pass a Player PO Token for this client with --extractor-args "youtube:po_token={client}.player+XXX". '
f'For more information, refer to {PO_TOKEN_GUIDE_URL} .', only_once=True)
deprioritize_pr = True
if (
not gvs_po_token
and _PoTokenContext.GVS in required_pot_contexts
and 'missing_pot' in self._configuration_arg('formats')
):
# note: warning with help message is provided later during format processing
self.report_warning(
f'No GVS PO Token provided for {client} client, '
f'which may be required for working {client} formats. This client will be deprioritized',
only_once=True)
deprioritize_pr = True
self.fetch_po_token, context=_PoTokenContext.SUBS, **fetch_po_token_args)
try:
pr = pr or self._extract_player_response(
client, video_id,
master_ytcfg=player_ytcfg or master_ytcfg,
webpage_ytcfg=player_ytcfg or webpage_ytcfg,
player_ytcfg=player_ytcfg,
player_url=player_url,
initial_pr=initial_pr,
@ -3263,15 +3247,16 @@ def append_client(*client_names):
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
sd = pr.setdefault('streamingData', {})
sd[STREAMING_DATA_CLIENT_NAME] = client
sd[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
sd[STREAMING_DATA_FETCH_PO_TOKEN] = fetch_gvs_po_token_func
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = client
f[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
f[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
f[STREAMING_DATA_FETCH_PO_TOKEN] = fetch_gvs_po_token_func
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
f[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
f[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
if deprioritize_pr:
deprioritized_prs.append(pr)
else:
@ -3297,6 +3282,10 @@ def append_client(*client_names):
# web_creator may work around age-verification for all videos but requires PO token
append_client('tv_embedded', 'web_creator')
status = traverse_obj(pr, ('playabilityStatus', 'status', {str}))
if status not in ('OK', 'LIVE_STREAM_OFFLINE', 'AGE_CHECK_REQUIRED', 'AGE_VERIFICATION_REQUIRED'):
self.write_debug(f'{video_id}: {client} player response playability status: {status}')
prs.extend(deprioritized_prs)
if skipped_clients:
@ -3375,7 +3364,7 @@ def _process_n_param(self, gvs_url, video_id, player_url, proto='https'):
def _reload_sabr_config(self, video_id, client_name, reload_playback_token):
# xxx: may also update client info?
url = 'https://www.youtube.com/watch?v=' + video_id
_, _, prs, player_url = self._download_player_responses(url, {}, video_id, url, reload_playback_token)
_, _, _, _, prs, player_url = self._initial_extract(url, {}, url, 'web', video_id, reload_playback_token)
video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
microformats = traverse_obj(
prs, (..., 'microformat', 'playerMicroformatRenderer'),
@ -3418,21 +3407,31 @@ def build_fragments(f):
}),
} for range_start in range(0, f['filesize'], CHUNK_SIZE))
def gvs_pot_required(policy, is_premium_subscriber, has_player_token):
return (
policy.required
and not (policy.not_required_with_player_token and has_player_token)
and not (policy.not_required_for_premium and is_premium_subscriber))
# save pots per client to avoid fetching again
gvs_pots = {}
for pr in player_responses:
streaming_data = traverse_obj(pr, 'streamingData')
if not streaming_data:
continue
fetch_po_token_func = streaming_data[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
po_token = streaming_data.get(STREAMING_DATA_INITIAL_PO_TOKEN)
fetch_po_token = streaming_data.get(STREAMING_DATA_FETCH_PO_TOKEN)
innertube_context = streaming_data.get(STREAMING_DATA_INNERTUBE_CONTEXT)
streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
def get_stream_id(fmt_stream):
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
def process_format_stream(fmt_stream, proto):
nonlocal itag_qualities, res_qualities, original_language
def process_format_stream(fmt_stream, proto, missing_pot):
nonlocal original_language
itag = str_or_none(fmt_stream.get('itag'))
audio_track = fmt_stream.get('audioTrack') or {}
quality = fmt_stream.get('quality')
@ -3480,13 +3479,7 @@ def process_format_stream(fmt_stream, proto):
self.report_warning(
f'Some {client_name} client {proto} formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
# Clients that require PO Token return videoplayback URLs that may return 403
require_po_token = (
not po_token
and _PoTokenContext.GVS in self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
and itag not in ['18']) # these formats do not require PO Token
if require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
if missing_pot and 'missing_pot' not in self._configuration_arg('formats'):
self._report_pot_format_skipped(video_id, client_name, proto)
return None
@ -3501,7 +3494,7 @@ def process_format_stream(fmt_stream, proto):
name, fmt_stream.get('isDrc') and 'DRC',
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
is_damaged and 'DAMAGED', require_po_token and 'MISSING POT',
is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
@ -3547,10 +3540,21 @@ def process_sabr_formats_and_subtitles():
only_once=True)
return
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.SABR]
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
po_token = (
gvs_pots.get(client_name)
or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
if client_name not in gvs_pots:
gvs_pots[client_name] = po_token
sabr_config = {
'video_playback_ustreamer_config': video_playback_ustreamer_config,
'po_token': po_token,
'fetch_po_token_fn': fetch_po_token,
'fetch_po_token_fn': fetch_po_token_func,
'client_name': client_name,
'client_info': traverse_obj(innertube_context, 'client'),
'reload_config_fn': functools.partial(self._reload_sabr_config, video_id, client_name),
@ -3564,7 +3568,7 @@ def process_sabr_formats_and_subtitles():
if stream_id in stream_ids:
continue
fmt = process_format_stream(fmt_stream, proto)
fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
if not fmt:
continue
@ -3613,7 +3617,17 @@ def process_https_formats():
if stream_id in stream_ids:
continue
fmt = process_format_stream(fmt_stream, proto)
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
require_po_token = (
stream_id[0] not in ['18']
and gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided))
po_token = (
gvs_pots.get(client_name)
or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
if not fmt:
continue
@ -3648,6 +3662,8 @@ def process_https_formats():
if po_token:
fmt_url = update_url_query(fmt_url, {'pot': po_token})
if client_name not in gvs_pots:
gvs_pots[client_name] = po_token
fmt['url'] = fmt_url
@ -3690,7 +3706,7 @@ def process_https_formats():
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
skip_manifests.add('dash')
def process_manifest_format(f, proto, client_name, itag, po_token):
def process_manifest_format(f, proto, client_name, itag, missing_pot):
key = (proto, f.get('language'))
if not all_formats and key in itags[itag]:
return False
@ -3698,20 +3714,11 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
if f.get('source_preference') is None:
f['source_preference'] = -1
# Clients that require PO Token return videoplayback URLs that may return 403
# hls does not currently require PO Token
if (
not po_token
and _PoTokenContext.GVS in self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
and proto != 'hls'
):
if 'missing_pot' not in self._configuration_arg('formats'):
self._report_pot_format_skipped(video_id, client_name, proto)
return False
if missing_pot:
f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
f['source_preference'] -= 20
# XXX: Check if IOS HLS formats are affected by player PO token enforcement; temporary
# XXX: Check if IOS HLS formats are affected by PO token enforcement; temporary
# See https://github.com/yt-dlp/yt-dlp/issues/13511
if proto == 'hls' and client_name == 'ios':
f['__needs_testing'] = True
@ -3749,36 +3756,56 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
hls_manifest_url = 'hls' not in skip_manifests and streaming_data.get('hlsManifestUrl')
if hls_manifest_url:
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS]
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
for sub in traverse_obj(subs, (..., ..., {dict})):
# HLS subs (m3u8) do not need a PO token; save client name for debugging
sub[STREAMING_DATA_CLIENT_NAME] = client_name
subtitles = self._merge_subtitles(subs, subtitles)
for f in fmts:
if process_manifest_format(f, 'hls', client_name, self._search_regex(
r'/itag/(\d+)', f['url'], 'itag', default=None), po_token):
yield f
if client_name not in gvs_pots:
gvs_pots[client_name] = po_token
if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'):
self._report_pot_format_skipped(video_id, client_name, 'hls')
else:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
for sub in traverse_obj(subs, (..., ..., {dict})):
# TODO: If HLS video requires a PO Token, do the subs also require pot?
# Save client name for debugging
sub[STREAMING_DATA_CLIENT_NAME] = client_name
subtitles = self._merge_subtitles(subs, subtitles)
for f in fmts:
if process_manifest_format(f, 'hls', client_name, self._search_regex(
r'/itag/(\d+)', f['url'], 'itag', default=None), require_po_token and not po_token):
yield f
dash_manifest_url = 'dash' not in skip_manifests and streaming_data.get('dashManifestUrl')
if dash_manifest_url:
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH]
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
for sub in traverse_obj(subs, (..., ..., {dict})):
# TODO: Investigate if DASH subs ever need a PO token; save client name for debugging
sub[STREAMING_DATA_CLIENT_NAME] = client_name
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
for f in formats:
if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
f['filesize'] = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
if needs_live_processing:
f['is_from_start'] = True
if client_name not in gvs_pots:
gvs_pots[client_name] = po_token
if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'):
self._report_pot_format_skipped(video_id, client_name, 'dash')
else:
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
for sub in traverse_obj(subs, (..., ..., {dict})):
# TODO: If DASH video requires a PO Token, do the subs also require pot?
# Save client name for debugging
sub[STREAMING_DATA_CLIENT_NAME] = client_name
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
for f in formats:
if process_manifest_format(f, 'dash', client_name, f['format_id'], require_po_token and not po_token):
f['filesize'] = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
if needs_live_processing:
f['is_from_start'] = True
yield f
yield f
yield subtitles
def _extract_storyboard(self, player_responses, duration):
@ -3819,25 +3846,22 @@ def _extract_storyboard(self, player_responses, duration):
} for j in range(math.ceil(fragment_count))],
}
def _download_player_responses(self, url, smuggled_data, video_id, webpage_url, reload_playback_token=None):
def _download_initial_webpage(self, webpage_url, webpage_client, video_id):
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
if webpage_url and 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
pp = (
self._configuration_arg('player_params', [None], casesense=True)[0]
or traverse_obj(INNERTUBE_CLIENTS, ('web', 'PLAYER_PARAMS', {str}))
or traverse_obj(INNERTUBE_CLIENTS, (webpage_client, 'PLAYER_PARAMS', {str}))
)
if pp:
query['pp'] = pp
webpage = self._download_webpage_with_retries(webpage_url, video_id, query=query)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
player_responses, player_url = self._extract_player_responses(
self._get_requested_clients(url, smuggled_data),
video_id, webpage, master_ytcfg, reload_playback_token)
return webpage, master_ytcfg, player_responses, player_url
webpage = self._download_webpage_with_retries(
webpage_url, video_id, query=query,
headers=traverse_obj(self._get_default_ytcfg(webpage_client), {
'User-Agent': ('INNERTUBE_CONTEXT', 'client', 'userAgent', {str}),
}))
return webpage
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
@ -3862,14 +3886,60 @@ def _list_formats(self, video_id, microformats, video_details, player_responses,
return live_broadcast_details, live_status, formats, subtitles
def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg):
initial_data = None
if webpage and 'initial_data' not in self._configuration_arg('webpage_skip'):
initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
if not traverse_obj(initial_data, 'contents'):
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
initial_data = None
if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
query = {'videoId': video_id}
query.update(self._get_checkok_params())
initial_data = self._extract_response(
item_id=video_id, ep='next', fatal=False,
ytcfg=webpage_ytcfg, query=query, check_get_keys='contents',
note='Downloading initial data API JSON', default_client=webpage_client)
return initial_data
def _is_premium_subscriber(self, initial_data):
if not self.is_authenticated or not initial_data:
return False
tlr = traverse_obj(
initial_data, ('topbar', 'desktopTopbarRenderer', 'logo', 'topbarLogoRenderer'))
return (
traverse_obj(tlr, ('iconImage', 'iconType')) == 'YOUTUBE_PREMIUM_LOGO'
or 'premium' in (self._get_text(tlr, 'tooltipText') or '').lower()
)
def _initial_extract(self, url, smuggled_data, webpage_url, webpage_client, video_id, reload_playback_token=None):
# This function is also used by live-from-start refresh
webpage = self._download_initial_webpage(webpage_url, webpage_client, video_id)
webpage_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg(webpage_client)
initial_data = self._download_initial_data(video_id, webpage, webpage_client, webpage_ytcfg)
is_premium_subscriber = self._is_premium_subscriber(initial_data)
if is_premium_subscriber:
self.write_debug('Detected YouTube Premium subscription')
player_responses, player_url = self._extract_player_responses(
self._get_requested_clients(url, smuggled_data, is_premium_subscriber),
video_id, webpage, webpage_client, webpage_ytcfg, is_premium_subscriber, reload_playback_token)
return webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
base_url = self.http_scheme() + '//www.youtube.com/'
webpage_url = base_url + 'watch?v=' + video_id
webpage_client = 'web'
webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url = self._initial_extract(
url, smuggled_data, webpage_url, webpage_client, video_id)
playability_statuses = traverse_obj(
player_responses, (..., 'playabilityStatus'), expected_type=dict)
@ -4145,7 +4215,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
pctr = pr['captions']['playerCaptionsTracklistRenderer']
client_name = pr['streamingData'][STREAMING_DATA_CLIENT_NAME]
innertube_client_name = pr['streamingData'][STREAMING_DATA_INNERTUBE_CONTEXT]['client']['clientName']
required_contexts = self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['SUBS_PO_TOKEN_POLICY']
fetch_subs_po_token_func = pr['streamingData'][STREAMING_DATA_FETCH_SUBS_PO_TOKEN]
pot_params = {}
@ -4158,11 +4228,11 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
requires_pot = (
# We can detect the experiment for now
any(e in traverse_obj(qs, ('exp', ...)) for e in ('xpe', 'xpv'))
or _PoTokenContext.SUBS in required_contexts)
or (pot_policy.required and not (pot_policy.not_required_for_premium and is_premium_subscriber)))
if not already_fetched_pot:
already_fetched_pot = True
if subs_po_token := fetch_subs_po_token_func(required=requires_pot):
if subs_po_token := fetch_subs_po_token_func(required=requires_pot or pot_policy.recommended):
pot_params.update({
'pot': subs_po_token,
'potc': '1',
@ -4265,21 +4335,6 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
'release_year': int_or_none(release_year),
})
initial_data = None
if webpage:
initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
if not traverse_obj(initial_data, 'contents'):
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
initial_data = None
if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
query = {'videoId': video_id}
query.update(self._get_checkok_params())
initial_data = self._extract_response(
item_id=video_id, ep='next', fatal=False,
ytcfg=master_ytcfg, query=query, check_get_keys='contents',
headers=self.generate_api_headers(ytcfg=master_ytcfg),
note='Downloading initial data API JSON')
COMMENTS_SECTION_IDS = ('comment-item-section', 'engagement-panel-comments-section')
info['comment_count'] = traverse_obj(initial_data, (
'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
@ -4478,7 +4533,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
or get_first(microformats, 'isUnlisted', expected_type=bool))))
info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
info['__post_extractor'] = self.extract_comments(webpage_ytcfg, video_id, contents, webpage)
self.mark_watched(video_id, player_responses)

View File

@ -677,8 +677,9 @@ def dict_item(key, val):
# Set value as JS_Undefined or its pre-existing value
local_vars.set_local(var, ret)
else:
ret = local_vars.get(var, JS_Undefined)
if ret is JS_Undefined:
ret = local_vars.get(var, NO_DEFAULT)
if ret is NO_DEFAULT:
ret = JS_Undefined
self._undefined_varnames.add(var)
return ret, should_return