[^/]+/[^/?#&]+)'
- _TESTS = [{
- # clip
- 'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
- 'info_dict': {
- 'id': '5386045029001',
- 'ext': 'mp4',
- 'title': 'Chrysler Imperial',
- 'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
- 'timestamp': 1491399228,
- 'upload_date': '20170405',
- 'uploader_id': '618566855001',
- 'series': 'RPM+',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # episode
- 'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
- 'info_dict': {
- 'id': '5395865725001',
- 'title': 'Épisode 13 : Les retrouvailles',
- 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
- 'ext': 'mp4',
- 'timestamp': 1492019320,
- 'upload_date': '20170412',
- 'uploader_id': '618566855001',
- 'series': "L'amour est dans le pré",
- 'season_number': 5,
- 'episode': 'Épisode 13',
- 'episode_number': 13,
- },
- 'params': {
- 'skip_download': True,
- },
- }]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- brightcove_id = self._search_regex(
- r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
-
- data = self._parse_json(
- self._search_regex(
- r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
- default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
-
- title = try_get(
- data, lambda x: x['video']['nom'],
- str) or self._html_search_meta(
- 'dcterms.Title', webpage, 'title', fatal=True)
-
- description = self._html_search_meta(
- ('dcterms.Description', 'description'), webpage, 'description')
-
- series = try_get(
- data, lambda x: x['emission']['nom']) or self._search_regex(
- r']+class="banner-card__subtitle h4"[^>]*>([^<]+)',
- webpage, 'series', default=None)
-
- season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
- season = try_get(season_el, lambda x: x['nom'], str)
- season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
-
- episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
- episode = try_get(episode_el, lambda x: x['nom'], str)
- episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
-
- return {
- '_type': 'url_transparent',
- 'ie_key': BrightcoveNewIE.ie_key(),
- 'url': smuggle_url(
- self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
- {'geo_countries': ['CA']}),
- 'id': brightcove_id,
- 'title': title,
- 'description': description,
- 'series': series,
- 'season': season,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- }
diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py
index c489dc7312..027f7a7b6f 100644
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@@ -765,7 +765,7 @@ class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE
class RaiSudtirolIE(RaiBaseIE):
- _VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P\w+)'
+ _VALID_URL = r'https?://rai(?:bz|sudtirol)\.rai\.it/.+media=(?P\w+)'
_TESTS = [{
# mp4 file
'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
@@ -791,6 +791,9 @@ class RaiSudtirolIE(RaiBaseIE):
'formats': 'count:6',
},
'params': {'skip_download': True},
+ }, {
+ 'url': 'https://raibz.rai.it/de/index.php?media=Ptv1751660400',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py
index 0d1b252175..1884ab2e8e 100644
--- a/yt_dlp/extractor/tfo.py
+++ b/yt_dlp/extractor/tfo.py
@@ -6,6 +6,7 @@
class TFOIE(InfoExtractor):
+ _WORKING = False
_GEO_COUNTRIES = ['CA']
_VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P\d+)'
_TEST = {
diff --git a/yt_dlp/extractor/tv5unis.py b/yt_dlp/extractor/tv5unis.py
index 88fd334822..fe7fd0325b 100644
--- a/yt_dlp/extractor/tv5unis.py
+++ b/yt_dlp/extractor/tv5unis.py
@@ -51,6 +51,7 @@ def _real_extract(self, url):
class TV5UnisVideoIE(TV5UnisBaseIE):
+ _WORKING = False
IE_NAME = 'tv5unis:video'
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/]+/(?P\d+)'
_TEST = {
@@ -71,6 +72,7 @@ def _gql_args(groups):
class TV5UnisIE(TV5UnisBaseIE):
+ _WORKING = False
IE_NAME = 'tv5unis'
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P[^/]+)(?:/saisons/(?P\d+)/episodes/(?P\d+))?/?(?:[?#&]|$)'
_TESTS = [{
diff --git a/yt_dlp/extractor/unitednations.py b/yt_dlp/extractor/unitednations.py
new file mode 100644
index 0000000000..f9283fd6c1
--- /dev/null
+++ b/yt_dlp/extractor/unitednations.py
@@ -0,0 +1,32 @@
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+
+
+class UnitedNationsWebTvIE(InfoExtractor):
+ _VALID_URL = r'https?://webtv\.un\.org/(?:ar|zh|en|fr|ru|es)/asset/\w+/(?P\w+)'
+ _TESTS = [{
+ 'url': 'https://webtv.un.org/en/asset/k1o/k1o7stmi6p',
+ 'md5': 'b2f8b3030063298ae841b4b7ddc01477',
+ 'info_dict': {
+ 'id': '1_o7stmi6p',
+ 'ext': 'mp4',
+ 'title': 'António Guterres (Secretary-General) on Israel and Iran - Security Council, 9939th meeting',
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2503451/sp/250345100/thumbnail/entry_id/1_o7stmi6p/version/100021',
+ 'uploader_id': 'evgeniia.alisova@un.org',
+ 'upload_date': '20250620',
+ 'timestamp': 1750430976,
+ 'duration': 234,
+ 'view_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ partner_id = self._html_search_regex(
+ r'partnerId:\s*(\d+)', webpage, 'partner_id')
+ entry_id = self._html_search_regex(
+ r'const\s+kentryID\s*=\s*["\'](\w+)["\']', webpage, 'kentry_id')
+
+ return self.url_result(f'kaltura:{partner_id}:{entry_id}', KalturaIE)
diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py
index 31393b02a4..05ae4dd18a 100644
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@@ -53,6 +53,10 @@ class KnownDRMIE(UnsupportedInfoExtractor):
r'(?:beta\.)?crunchyroll\.com',
r'viki\.com',
r'deezer\.com',
+ r'b-ch\.com',
+ r'ctv\.ca',
+ r'noovo\.ca',
+ r'tsn\.ca',
)
_TESTS = [{
@@ -168,6 +172,18 @@ class KnownDRMIE(UnsupportedInfoExtractor):
}, {
'url': 'http://www.deezer.com/playlist/176747451',
'only_matching': True,
+ }, {
+ 'url': 'https://www.b-ch.com/titles/8203/001',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ctv.ca/shows/masterchef-53506/the-audition-battles-s15e1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.noovo.ca/emissions/lamour-est-dans-le-pre/prets-pour-lamour-s10e1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tsn.ca/video/relaxed-oilers-look-to-put-emotional-game-2-loss-in-the-rearview%7E3148747',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index b268fad56d..7ffe89f227 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -21,6 +21,7 @@
js_to_json,
jwt_decode_hs256,
merge_dicts,
+ mimetype2ext,
parse_filesize,
parse_iso8601,
parse_qs,
@@ -28,9 +29,11 @@
smuggle_url,
str_or_none,
traverse_obj,
+ try_call,
try_get,
unified_timestamp,
unsmuggle_url,
+ url_basename,
url_or_none,
urlencode_postdata,
urlhandle_detect_ext,
@@ -45,14 +48,57 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_REFERER_HINT = (
'Cannot download embed-only video without embedding URL. Please call yt-dlp '
'with the URL of the page that embeds this video.')
- _IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw=='
- _IOS_CLIENT_HEADERS = {
+
+ _DEFAULT_CLIENT = 'android'
+ _DEFAULT_AUTHED_CLIENT = 'web'
+ _CLIENT_HEADERS = {
'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
'Accept-Language': 'en',
- 'User-Agent': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0',
}
- _IOS_OAUTH_CACHE_KEY = 'oauth-token-ios'
- _ios_oauth_token = None
+ _CLIENT_CONFIGS = {
+ 'android': {
+ 'CACHE_KEY': 'oauth-token-android',
+ 'CACHE_ONLY': False,
+ 'VIEWER_JWT': False,
+ 'REQUIRES_AUTH': False,
+ 'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==',
+ 'USER_AGENT': 'com.vimeo.android.videoapp (OnePlus, ONEPLUS A6003, OnePlus, Android 14/34 Version 11.8.1) Kotlin VimeoNetworking/3.12.0',
+ 'VIDEOS_FIELDS': (
+ 'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration', 'width',
+ 'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time', 'content_rating',
+ 'content_rating_class', 'rating_mod_locked', 'license', 'privacy', 'pictures', 'tags', 'stats',
+ 'categories', 'uploader', 'metadata', 'user', 'files', 'download', 'app', 'play', 'status',
+ 'resource_key', 'badge', 'upload', 'transcode', 'is_playable', 'has_audio',
+ ),
+ },
+ 'ios': {
+ 'CACHE_KEY': 'oauth-token-ios',
+ 'CACHE_ONLY': True,
+ 'VIEWER_JWT': False,
+ 'REQUIRES_AUTH': False,
+ 'AUTH': 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw==',
+ 'USER_AGENT': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0',
+ 'VIDEOS_FIELDS': (
+ 'uri', 'name', 'description', 'type', 'link', 'player_embed_url', 'duration',
+ 'width', 'language', 'height', 'embed', 'created_time', 'modified_time', 'release_time',
+ 'content_rating', 'content_rating_class', 'rating_mod_locked', 'license', 'config_url',
+ 'embed_player_config_url', 'privacy', 'pictures', 'tags', 'stats', 'categories', 'uploader',
+ 'metadata', 'user', 'files', 'download', 'app', 'play', 'status', 'resource_key', 'badge',
+ 'upload', 'transcode', 'is_playable', 'has_audio',
+ ),
+ },
+ 'web': {
+ 'VIEWER_JWT': True,
+ 'REQUIRES_AUTH': True,
+ 'USER_AGENT': None,
+ 'VIDEOS_FIELDS': (
+ 'config_url', 'created_time', 'description', 'license',
+ 'metadata.connections.comments.total', 'metadata.connections.likes.total',
+ 'release_time', 'stats.plays',
+ ),
+ },
+ }
+ _oauth_tokens = {}
_viewer_info = None
@staticmethod
@@ -80,7 +126,14 @@ def _fetch_viewer_info(self, display_id=None, fatal=True):
return self._viewer_info
+ @property
+ def _is_logged_in(self):
+ return 'vimeo' in self._get_cookies('https://vimeo.com')
+
def _perform_login(self, username, password):
+ if self._is_logged_in:
+ return
+
viewer = self._fetch_viewer_info()
data = {
'action': 'login',
@@ -105,8 +158,8 @@ def _perform_login(self, username, password):
raise ExtractorError('Unable to log in')
def _real_initialize(self):
- if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vuid'):
- self._raise_login_required()
+ if self._LOGIN_REQUIRED and not self._is_logged_in:
+ self.raise_login_required()
def _get_video_password(self):
password = self.get_param('videopassword')
@@ -277,52 +330,95 @@ def _parse_config(self, config, video_id):
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
}
- def _fetch_oauth_token(self):
- if not self._ios_oauth_token:
- self._ios_oauth_token = self.cache.load(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY)
+ def _fetch_oauth_token(self, client):
+ client_config = self._CLIENT_CONFIGS[client]
- if not self._ios_oauth_token:
- self._ios_oauth_token = self._download_json(
+ if client_config['VIEWER_JWT']:
+ return f'jwt {self._fetch_viewer_info()["jwt"]}'
+
+ cache_key = client_config['CACHE_KEY']
+
+ if not self._oauth_tokens.get(cache_key):
+ self._oauth_tokens[cache_key] = self.cache.load(self._NETRC_MACHINE, cache_key)
+
+ if not self._oauth_tokens.get(cache_key):
+ if client_config['CACHE_ONLY']:
+ raise ExtractorError(
+ f'The {client} client is unable to fetch new OAuth tokens '
+ f'and is only intended for use with previously cached tokens', expected=True)
+
+ self._oauth_tokens[cache_key] = self._download_json(
'https://api.vimeo.com/oauth/authorize/client', None,
- 'Fetching OAuth token', 'Failed to fetch OAuth token',
+ f'Fetching {client} OAuth token', f'Failed to fetch {client} OAuth token',
headers={
- 'Authorization': f'Basic {self._IOS_CLIENT_AUTH}',
- **self._IOS_CLIENT_HEADERS,
+ 'Authorization': f'Basic {client_config["AUTH"]}',
+ 'User-Agent': client_config['USER_AGENT'],
+ **self._CLIENT_HEADERS,
}, data=urlencode_postdata({
'grant_type': 'client_credentials',
- 'scope': 'private public create edit delete interact upload purchased stats',
+ 'scope': 'private public create edit delete interact upload purchased stats video_files',
}, quote_via=urllib.parse.quote))['access_token']
- self.cache.store(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY, self._ios_oauth_token)
+ self.cache.store(self._NETRC_MACHINE, cache_key, self._oauth_tokens[cache_key])
- return self._ios_oauth_token
+ return f'Bearer {self._oauth_tokens[cache_key]}'
+
+ def _get_requested_client(self):
+ default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT
+
+ client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0]
+ if client not in self._CLIENT_CONFIGS:
+ raise ExtractorError(
+ f'Unsupported API client "{client}" requested. '
+ f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True)
+
+ return client
+
+ def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs):
+ client = force_client or self._get_requested_client()
+
+ client_config = self._CLIENT_CONFIGS[client]
+ if client_config['REQUIRES_AUTH'] and not self._is_logged_in:
+ self.raise_login_required(f'The {client} client requires authentication')
- def _call_videos_api(self, video_id, unlisted_hash=None, **kwargs):
return self._download_json(
- join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
- video_id, 'Downloading API JSON', headers={
- 'Authorization': f'Bearer {self._fetch_oauth_token()}',
- **self._IOS_CLIENT_HEADERS,
- }, query={
- 'fields': ','.join((
- 'config_url', 'embed_player_config_url', 'player_embed_url', 'download', 'play',
- 'files', 'description', 'license', 'release_time', 'created_time', 'stats.plays',
- 'metadata.connections.comments.total', 'metadata.connections.likes.total')),
+ join_nonempty(
+ 'https://api.vimeo.com/videos',
+ join_nonempty(video_id, unlisted_hash, delim=':'),
+ path, delim='/'),
+ video_id, f'Downloading {client} API JSON', f'Unable to download {client} API JSON',
+ headers=filter_dict({
+ 'Authorization': self._fetch_oauth_token(client),
+ 'User-Agent': client_config['USER_AGENT'],
+ **self._CLIENT_HEADERS,
+ }), query={
+ 'fields': ','.join(client_config['VIDEOS_FIELDS']),
+ **(query or {}),
}, **kwargs)
- def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=None):
+ def _extract_original_format(self, url, video_id, unlisted_hash=None):
# Original/source formats are only available when logged in
- if not self._get_cookies('https://vimeo.com/').get('vimeo'):
- return
+ if not self._is_logged_in:
+ return None
- query = {'action': 'load_download_config'}
- if unlisted_hash:
- query['unlisted_hash'] = unlisted_hash
- download_data = self._download_json(
- url, video_id, 'Loading download config JSON', fatal=False,
- query=query, headers={'X-Requested-With': 'XMLHttpRequest'},
- expected_status=(403, 404)) or {}
- source_file = download_data.get('source_file')
- download_url = try_get(source_file, lambda x: x['download_url'])
+ policy = self._configuration_arg('original_format_policy', ['auto'], ie_key=VimeoIE)[0]
+ if policy == 'never':
+ return None
+
+ try:
+ download_data = self._download_json(
+ url, video_id, 'Loading download config JSON', query=filter_dict({
+ 'action': 'load_download_config',
+ 'unlisted_hash': unlisted_hash,
+ }), headers={
+ 'Accept': 'application/json',
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+ except ExtractorError as error:
+ self.write_debug(f'Unable to load download config JSON: {error.cause}')
+ download_data = None
+
+ source_file = traverse_obj(download_data, ('source_file', {dict})) or {}
+ download_url = traverse_obj(source_file, ('download_url', {url_or_none}))
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
source_name = source_file.get('public_name', 'Original')
if self._is_valid_url(download_url, video_id, f'{source_name} video'):
@@ -340,8 +436,27 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=N
'quality': 1,
}
- original_response = api_data or self._call_videos_api(
- video_id, unlisted_hash, fatal=False, expected_status=(403, 404))
+ # Most web client API requests are subject to rate-limiting (429) when logged-in.
+ # Requesting only the 'privacy' field is NOT rate-limited,
+ # so first we should check if video even has 'download' formats available
+ try:
+ privacy_info = self._call_videos_api(
+ video_id, unlisted_hash, force_client='web', query={'fields': 'privacy'})
+ except ExtractorError as error:
+ self.write_debug(f'Unable to download privacy info: {error.cause}')
+ return None
+
+ if not traverse_obj(privacy_info, ('privacy', 'download', {bool})):
+ msg = f'{video_id}: Vimeo says this video is not downloadable'
+ if policy != 'always':
+ self.write_debug(
+ f'{msg}, so yt-dlp is not attempting to extract the original/source format. '
+ f'To try anyways, use --extractor-args "vimeo:original_format_policy=always"')
+ return None
+ self.write_debug(f'{msg}; attempting to extract original/source format anyways')
+
+ original_response = self._call_videos_api(
+ video_id, unlisted_hash, force_client='web', query={'fields': 'download'}, fatal=False)
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
download_url = download_data.get('link')
if not download_url or download_data.get('quality') != 'source':
@@ -919,25 +1034,125 @@ def _verify_player_video_password(self, url, video_id, headers):
raise ExtractorError('Wrong video password', expected=True)
return checked
+ def _get_subtitles(self, video_id, unlisted_hash):
+ subs = {}
+ text_tracks = self._call_videos_api(
+ video_id, unlisted_hash, path='texttracks', query={
+ 'include_transcript': 'true',
+ 'fields': ','.join((
+ 'active', 'display_language', 'id', 'language', 'link', 'name', 'type', 'uri',
+ )),
+ }, fatal=False)
+ for tt in traverse_obj(text_tracks, ('data', lambda _, v: url_or_none(v['link']))):
+ subs.setdefault(tt.get('language'), []).append({
+ 'url': tt['link'],
+ 'ext': 'vtt',
+ 'name': tt.get('display_language'),
+ })
+ return subs
+
+ def _parse_api_response(self, video, video_id, unlisted_hash=None):
+ formats, subtitles = [], {}
+ seen_urls = set()
+ duration = traverse_obj(video, ('duration', {int_or_none}))
+
+ for file in traverse_obj(video, (
+ (('play', (None, 'progressive')), 'files', 'download'), lambda _, v: url_or_none(v['link']),
+ )):
+ format_url = file['link']
+ if format_url in seen_urls:
+ continue
+ seen_urls.add(format_url)
+ quality = file.get('quality')
+ ext = determine_ext(format_url)
+ if quality == 'hls' or ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ elif quality == 'dash' or ext == 'mpd':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id='dash', fatal=False)
+ for fmt in fmts:
+ fmt['format_id'] = join_nonempty(
+ *fmt['format_id'].split('-', 2)[:2], int_or_none(fmt.get('tbr')))
+ else:
+ fmt = traverse_obj(file, {
+ 'ext': ('type', {mimetype2ext(default='mp4')}),
+ 'vcodec': ('codec', {str.lower}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'filesize': ('size', {int_or_none}),
+ 'fps': ('fps', {int_or_none}),
+ })
+ fmt.update({
+ 'url': format_url,
+ 'format_id': join_nonempty(
+ 'http', traverse_obj(file, 'public_name', 'rendition'), quality),
+ 'tbr': try_call(lambda: fmt['filesize'] * 8 / duration / 1024),
+ })
+ formats.append(fmt)
+ continue
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ if traverse_obj(video, ('metadata', 'connections', 'texttracks', 'total', {int})):
+ self._merge_subtitles(self.extract_subtitles(video_id, unlisted_hash), target=subtitles)
+
+ return {
+ **traverse_obj(video, {
+ 'title': ('name', {str}),
+ 'uploader': ('user', 'name', {str}),
+ 'uploader_id': ('user', 'link', {url_basename}),
+ 'uploader_url': ('user', 'link', {url_or_none}),
+ 'release_timestamp': ('live', 'scheduled_start_time', {int_or_none}),
+ 'thumbnails': ('pictures', 'sizes', lambda _, v: url_or_none(v['link']), {
+ 'url': 'link',
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }),
+ }),
+ 'id': video_id,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'live_status': {
+ 'streaming': 'is_live',
+ 'done': 'was_live',
+ }.get(traverse_obj(video, ('live', 'status', {str}))),
+ }
+
def _extract_from_api(self, video_id, unlisted_hash=None):
for retry in (False, True):
try:
video = self._call_videos_api(video_id, unlisted_hash)
break
except ExtractorError as e:
- if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
- and 'password' in traverse_obj(
- self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
- ({json.loads}, 'invalid_parameters', ..., 'field'),
- )):
+ if not isinstance(e.cause, HTTPError):
+ raise
+ response = traverse_obj(
+ self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
+ ({json.loads}, {dict})) or {}
+ if (
+ not retry and e.cause.status == 400
+ and 'password' in traverse_obj(response, ('invalid_parameters', ..., 'field'))
+ ):
self._verify_video_password(video_id)
- continue
- raise
+ elif e.cause.status == 404 and response.get('error_code') == 5460:
+ self.raise_login_required(join_nonempty(
+ traverse_obj(response, ('error', {str.strip})),
+ 'Authentication may be needed due to your location.',
+ 'If your IP address is located in Europe you could try using a VPN/proxy,',
+ f'or else u{self._login_hint()[1:]}',
+ delim=' '), method=None)
+ else:
+ raise
+
+ if config_url := traverse_obj(video, ('config_url', {url_or_none})):
+ info = self._parse_config(self._download_json(config_url, video_id), video_id)
+ else:
+ info = self._parse_api_response(video, video_id, unlisted_hash)
- info = self._parse_config(self._download_json(
- video['config_url'], video_id), video_id)
source_format = self._extract_original_format(
- f'https://vimeo.com/{video_id}', video_id, unlisted_hash, api_data=video)
+ f'https://vimeo.com/{video_id}', video_id, unlisted_hash)
if source_format:
info['formats'].append(source_format)
diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py
index 5aee89b917..0a9b510c7d 100644
--- a/yt_dlp/extractor/youtube/_base.py
+++ b/yt_dlp/extractor/youtube/_base.py
@@ -1,5 +1,6 @@
import calendar
import copy
+import dataclasses
import datetime as dt
import enum
import functools
@@ -38,6 +39,60 @@ class _PoTokenContext(enum.Enum):
SUBS = 'subs'
+class StreamingProtocol(enum.Enum):
+ HTTPS = 'https'
+ DASH = 'dash'
+ HLS = 'hls'
+
+
+@dataclasses.dataclass
+class BasePoTokenPolicy:
+ required: bool = False
+ # Try to fetch a PO Token even if it is not required.
+ recommended: bool = False
+ not_required_for_premium: bool = False
+
+
+@dataclasses.dataclass
+class GvsPoTokenPolicy(BasePoTokenPolicy):
+ not_required_with_player_token: bool = False
+
+
+@dataclasses.dataclass
+class PlayerPoTokenPolicy(BasePoTokenPolicy):
+ pass
+
+
+@dataclasses.dataclass
+class SubsPoTokenPolicy(BasePoTokenPolicy):
+ pass
+
+
+WEB_PO_TOKEN_POLICIES = {
+ 'GVS_PO_TOKEN_POLICY': {
+ StreamingProtocol.HTTPS: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_for_premium=True,
+ not_required_with_player_token=False,
+ ),
+ StreamingProtocol.DASH: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_for_premium=True,
+ not_required_with_player_token=False,
+ ),
+ StreamingProtocol.HLS: GvsPoTokenPolicy(
+ required=False,
+ recommended=True,
+ ),
+ },
+ 'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False),
+ # In rollout, currently detected via experiment
+ # Premium users DO require a PO Token for subtitles
+ 'SUBS_PO_TOKEN_POLICY': SubsPoTokenPolicy(required=False),
+}
+
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
'web': {
@@ -48,8 +103,9 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
- 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'SUPPORTS_COOKIES': True,
+ **WEB_PO_TOKEN_POLICIES,
+ 'PLAYER_PARAMS': '8AEB',
},
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
'web_safari': {
@@ -61,8 +117,8 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
- 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
'SUPPORTS_COOKIES': True,
+ **WEB_PO_TOKEN_POLICIES,
'PLAYER_PARAMS': '8AEB',
},
'web_embedded': {
@@ -84,7 +140,24 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
- 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
+ 'GVS_PO_TOKEN_POLICY': {
+ StreamingProtocol.HTTPS: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_for_premium=True,
+ not_required_with_player_token=False,
+ ),
+ StreamingProtocol.DASH: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_for_premium=True,
+ not_required_with_player_token=False,
+ ),
+ StreamingProtocol.HLS: GvsPoTokenPolicy(
+ required=False,
+ recommended=True,
+ ),
+ },
'SUPPORTS_COOKIES': True,
},
# This client now requires sign-in for every video
@@ -96,7 +169,24 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
- 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
+ 'GVS_PO_TOKEN_POLICY': {
+ StreamingProtocol.HTTPS: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_for_premium=True,
+ not_required_with_player_token=False,
+ ),
+ StreamingProtocol.DASH: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_for_premium=True,
+ not_required_with_player_token=False,
+ ),
+ StreamingProtocol.HLS: GvsPoTokenPolicy(
+ required=False,
+ recommended=True,
+ ),
+ },
'REQUIRE_AUTH': True,
'SUPPORTS_COOKIES': True,
},
@@ -113,7 +203,24 @@ class _PoTokenContext(enum.Enum):
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'REQUIRE_JS_PLAYER': False,
- 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
+ 'GVS_PO_TOKEN_POLICY': {
+ StreamingProtocol.HTTPS: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_with_player_token=True,
+ ),
+ StreamingProtocol.DASH: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_with_player_token=True,
+ ),
+ StreamingProtocol.HLS: GvsPoTokenPolicy(
+ required=False,
+ recommended=True,
+ not_required_with_player_token=True,
+ ),
+ },
+ 'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True),
},
# YouTube Kids videos aren't returned on this client for some reason
'android_vr': {
@@ -147,7 +254,21 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
- 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
+ 'GVS_PO_TOKEN_POLICY': {
+ StreamingProtocol.HTTPS: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_with_player_token=True,
+ ),
+ # HLS Livestreams require POT 30 seconds in
+ # TODO: Rolling out
+ StreamingProtocol.HLS: GvsPoTokenPolicy(
+ required=False,
+ recommended=True,
+ not_required_with_player_token=True,
+ ),
+ },
+ 'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True),
'REQUIRE_JS_PLAYER': False,
},
# mweb has 'ultralow' formats
@@ -162,7 +283,24 @@ class _PoTokenContext(enum.Enum):
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
- 'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
+ 'GVS_PO_TOKEN_POLICY': {
+ StreamingProtocol.HTTPS: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_for_premium=True,
+ not_required_with_player_token=False,
+ ),
+ StreamingProtocol.DASH: GvsPoTokenPolicy(
+ required=True,
+ recommended=True,
+ not_required_for_premium=True,
+ not_required_with_player_token=False,
+ ),
+ StreamingProtocol.HLS: GvsPoTokenPolicy(
+ required=False,
+ recommended=True,
+ ),
+ },
'SUPPORTS_COOKIES': True,
},
'tv': {
@@ -226,7 +364,11 @@ def build_innertube_clients():
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
- ytcfg.setdefault('PO_TOKEN_REQUIRED_CONTEXTS', [])
+ ytcfg.setdefault('GVS_PO_TOKEN_POLICY', {})
+ for protocol in StreamingProtocol:
+ ytcfg['GVS_PO_TOKEN_POLICY'].setdefault(protocol, GvsPoTokenPolicy())
+ ytcfg.setdefault('PLAYER_PO_TOKEN_POLICY', PlayerPoTokenPolicy())
+ ytcfg.setdefault('SUBS_PO_TOKEN_POLICY', SubsPoTokenPolicy())
ytcfg.setdefault('REQUIRE_AUTH', False)
ytcfg.setdefault('SUPPORTS_COOKIES', False)
ytcfg.setdefault('PLAYER_PARAMS', None)
diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py
index 208abee937..fc1f087ace 100644
--- a/yt_dlp/extractor/youtube/_video.py
+++ b/yt_dlp/extractor/youtube/_video.py
@@ -18,6 +18,9 @@
from ._base import (
INNERTUBE_CLIENTS,
BadgeType,
+ GvsPoTokenPolicy,
+ PlayerPoTokenPolicy,
+ StreamingProtocol,
YoutubeBaseInfoExtractor,
_PoTokenContext,
_split_innertube_client,
@@ -71,9 +74,11 @@
from ...utils.networking import clean_headers, clean_proxies, select_proxy
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
-STREAMING_DATA_INITIAL_PO_TOKEN = '__yt_dlp_po_token'
STREAMING_DATA_FETCH_SUBS_PO_TOKEN = '__yt_dlp_fetch_subs_po_token'
+STREAMING_DATA_FETCH_GVS_PO_TOKEN = '__yt_dlp_fetch_gvs_po_token'
+STREAMING_DATA_PLAYER_TOKEN_PROVIDED = '__yt_dlp_player_token_provided'
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
+STREAMING_DATA_IS_PREMIUM_SUBSCRIBER = '__yt_dlp_is_premium_subscriber'
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
@@ -253,6 +258,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
_DEFAULT_CLIENTS = ('tv', 'ios', 'web')
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web')
+ # Premium does not require POT (except for subtitles)
+ _DEFAULT_PREMIUM_CLIENTS = ('tv', 'web')
_GEO_BYPASS = False
@@ -1833,7 +1840,8 @@ def refetch_manifest(format_id, delay):
if time.time() <= start_time + delay:
return
- _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
+ _, _, _, _, prs, player_url = self._initial_extract(
+ url, smuggled_data, webpage_url, 'web', video_id)
video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
microformats = traverse_obj(
prs, (..., 'microformat', 'playerMicroformatRenderer'),
@@ -2891,7 +2899,7 @@ def _get_config_po_token(self, client: str, context: _PoTokenContext):
only_once=True)
continue
- def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None, visitor_data=None,
+ def fetch_po_token(self, client='web', context: _PoTokenContext = _PoTokenContext.GVS, ytcfg=None, visitor_data=None,
data_sync_id=None, session_index=None, player_url=None, video_id=None, webpage=None,
required=False, **kwargs):
"""
@@ -2976,7 +2984,6 @@ def _fetch_po_token(self, client, **kwargs):
fetch_pot_policy == 'never'
or (
fetch_pot_policy == 'auto'
- and _PoTokenContext(context) not in self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
and not kwargs.get('required', False)
)
):
@@ -3035,19 +3042,19 @@ def _is_agegated(player_response):
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
- def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
+ def _extract_player_response(self, client, video_id, webpage_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
headers = self.generate_api_headers(
ytcfg=player_ytcfg,
default_client=client,
visitor_data=visitor_data,
- session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
+ session_index=self._extract_session_index(webpage_ytcfg, player_ytcfg),
delegated_session_id=(
self._parse_data_sync_id(data_sync_id)[0]
- or self._extract_delegated_session_id(master_ytcfg, initial_pr, player_ytcfg)
+ or self._extract_delegated_session_id(webpage_ytcfg, initial_pr, player_ytcfg)
),
user_session_id=(
self._parse_data_sync_id(data_sync_id)[1]
- or self._extract_user_session_id(master_ytcfg, initial_pr, player_ytcfg)
+ or self._extract_user_session_id(webpage_ytcfg, initial_pr, player_ytcfg)
),
)
@@ -3063,7 +3070,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
if po_token:
yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
- sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
+ sts = self._extract_signature_timestamp(video_id, player_url, webpage_ytcfg, fatal=False) if player_url else None
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
@@ -3072,10 +3079,14 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
) or None
- def _get_requested_clients(self, url, smuggled_data):
+ def _get_requested_clients(self, url, smuggled_data, is_premium_subscriber):
requested_clients = []
excluded_clients = []
- default_clients = self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated else self._DEFAULT_CLIENTS
+ default_clients = (
+ self._DEFAULT_PREMIUM_CLIENTS if is_premium_subscriber
+ else self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated
+ else self._DEFAULT_CLIENTS
+ )
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@@ -3117,11 +3128,12 @@ def _invalid_player_response(self, pr, video_id):
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
return pr_id
- def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
+ def _extract_player_responses(self, clients, video_id, webpage, webpage_client, webpage_ytcfg, is_premium_subscriber):
initial_pr = None
if webpage:
initial_pr = self._search_json(
- self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage,
+ f'{webpage_client} client initial player response', video_id, fatal=False)
prs = []
deprioritized_prs = []
@@ -3152,11 +3164,11 @@ def append_client(*client_names):
while clients:
deprioritize_pr = False
client, base_client, variant = _split_innertube_client(clients.pop())
- player_ytcfg = master_ytcfg if client == 'web' else {}
- if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
+ player_ytcfg = webpage_ytcfg if client == webpage_client else {}
+ if 'configs' not in self._configuration_arg('player_skip') and client != webpage_client:
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
- player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
+ player_url = player_url or self._extract_player_url(webpage_ytcfg, player_ytcfg, webpage=webpage)
require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
if 'js' in self._configuration_arg('player_skip'):
require_js_player = False
@@ -3166,10 +3178,12 @@ def append_client(*client_names):
player_url = self._download_player_url(video_id)
tried_iframe_fallback = True
- pr = initial_pr if client == 'web' else None
+ pr = None
+ if client == webpage_client and 'player_response' not in self._configuration_arg('webpage_skip'):
+ pr = initial_pr
- visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
- data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
+ visitor_data = visitor_data or self._extract_visitor_data(webpage_ytcfg, initial_pr, player_ytcfg)
+ data_sync_id = data_sync_id or self._extract_data_sync_id(webpage_ytcfg, initial_pr, player_ytcfg)
fetch_po_token_args = {
'client': client,
@@ -3178,53 +3192,26 @@ def append_client(*client_names):
'data_sync_id': data_sync_id if self.is_authenticated else None,
'player_url': player_url if require_js_player else None,
'webpage': webpage,
- 'session_index': self._extract_session_index(master_ytcfg, player_ytcfg),
+ 'session_index': self._extract_session_index(webpage_ytcfg, player_ytcfg),
'ytcfg': player_ytcfg or self._get_default_ytcfg(client),
}
# Don't need a player PO token for WEB if using player response from webpage
+ player_pot_policy: PlayerPoTokenPolicy = self._get_default_ytcfg(client)['PLAYER_PO_TOKEN_POLICY']
player_po_token = None if pr else self.fetch_po_token(
- context=_PoTokenContext.PLAYER, **fetch_po_token_args)
+ context=_PoTokenContext.PLAYER, **fetch_po_token_args,
+ required=player_pot_policy.required or player_pot_policy.recommended)
- gvs_po_token = self.fetch_po_token(
- context=_PoTokenContext.GVS, **fetch_po_token_args)
+ fetch_gvs_po_token_func = functools.partial(
+ self.fetch_po_token, context=_PoTokenContext.GVS, **fetch_po_token_args)
fetch_subs_po_token_func = functools.partial(
- self.fetch_po_token,
- context=_PoTokenContext.SUBS,
- **fetch_po_token_args,
- )
-
- required_pot_contexts = self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
-
- if (
- not player_po_token
- and _PoTokenContext.PLAYER in required_pot_contexts
- ):
- # TODO: may need to skip player response request. Unsure yet..
- self.report_warning(
- f'No Player PO Token provided for {client} client, '
- f'which may be required for working {client} formats. This client will be deprioritized'
- f'You can manually pass a Player PO Token for this client with --extractor-args "youtube:po_token={client}.player+XXX". '
- f'For more information, refer to {PO_TOKEN_GUIDE_URL} .', only_once=True)
- deprioritize_pr = True
-
- if (
- not gvs_po_token
- and _PoTokenContext.GVS in required_pot_contexts
- and 'missing_pot' in self._configuration_arg('formats')
- ):
- # note: warning with help message is provided later during format processing
- self.report_warning(
- f'No GVS PO Token provided for {client} client, '
- f'which may be required for working {client} formats. This client will be deprioritized',
- only_once=True)
- deprioritize_pr = True
+ self.fetch_po_token, context=_PoTokenContext.SUBS, **fetch_po_token_args)
try:
pr = pr or self._extract_player_response(
client, video_id,
- master_ytcfg=player_ytcfg or master_ytcfg,
+ webpage_ytcfg=player_ytcfg or webpage_ytcfg,
player_ytcfg=player_ytcfg,
player_url=player_url,
initial_pr=initial_pr,
@@ -3242,12 +3229,16 @@ def append_client(*client_names):
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
sd = pr.setdefault('streamingData', {})
sd[STREAMING_DATA_CLIENT_NAME] = client
- sd[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
+ sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
+ sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
+ sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = client
- f[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
+ f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
+ f[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
+ f[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
if deprioritize_pr:
deprioritized_prs.append(pr)
else:
@@ -3357,6 +3348,15 @@ def build_fragments(f):
}),
} for range_start in range(0, f['filesize'], CHUNK_SIZE))
+ def gvs_pot_required(policy, is_premium_subscriber, has_player_token):
+ return (
+ policy.required
+ and not (policy.not_required_with_player_token and has_player_token)
+ and not (policy.not_required_for_premium and is_premium_subscriber))
+
+ # save pots per client to avoid fetching again
+ gvs_pots = {}
+
for fmt in streaming_formats:
client_name = fmt[STREAMING_DATA_CLIENT_NAME]
if fmt.get('targetDurationSec'):
@@ -3416,7 +3416,7 @@ def build_fragments(f):
encrypted_sig = try_get(sc, lambda x: x['s'][0])
if not all((sc, fmt_url, player_url, encrypted_sig)):
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
- if client_name == 'web':
+ if client_name in ('web', 'web_safari'):
msg += 'YouTube is forcing SABR streaming for this client. '
else:
msg += (
@@ -3476,18 +3476,25 @@ def build_fragments(f):
self.report_warning(
'Some formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
- po_token = fmt.get(STREAMING_DATA_INITIAL_PO_TOKEN)
+ fetch_po_token_func = fmt[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
+ pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
+
+ require_po_token = (
+ itag not in ['18']
+ and gvs_pot_required(
+ pot_policy, fmt[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER],
+ fmt[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]))
+
+ po_token = (
+ gvs_pots.get(client_name)
+ or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
fmt_url = update_url_query(fmt_url, {'pot': po_token})
+ if client_name not in gvs_pots:
+ gvs_pots[client_name] = po_token
- # Clients that require PO Token return videoplayback URLs that may return 403
- require_po_token = (
- not po_token
- and _PoTokenContext.GVS in self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
- and itag not in ['18']) # these formats do not require PO Token
-
- if require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
+ if not po_token and require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
self._report_pot_format_skipped(video_id, client_name, 'https')
continue
@@ -3502,7 +3509,7 @@ def build_fragments(f):
name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
- is_damaged and 'DAMAGED', require_po_token and 'MISSING POT',
+ is_damaged and 'DAMAGED', require_po_token and not po_token and 'MISSING POT',
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
@@ -3565,7 +3572,7 @@ def build_fragments(f):
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
skip_manifests.add('dash')
- def process_manifest_format(f, proto, client_name, itag, po_token):
+ def process_manifest_format(f, proto, client_name, itag, missing_pot):
key = (proto, f.get('language'))
if not all_formats and key in itags[itag]:
return False
@@ -3573,20 +3580,11 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
if f.get('source_preference') is None:
f['source_preference'] = -1
- # Clients that require PO Token return videoplayback URLs that may return 403
- # hls does not currently require PO Token
- if (
- not po_token
- and _PoTokenContext.GVS in self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
- and proto != 'hls'
- ):
- if 'missing_pot' not in self._configuration_arg('formats'):
- self._report_pot_format_skipped(video_id, client_name, proto)
- return False
+ if missing_pot:
f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
f['source_preference'] -= 20
- # XXX: Check if IOS HLS formats are affected by player PO token enforcement; temporary
+ # XXX: Check if IOS HLS formats are affected by PO token enforcement; temporary
# See https://github.com/yt-dlp/yt-dlp/issues/13511
if proto == 'hls' and client_name == 'ios':
f['__needs_testing'] = True
@@ -3625,39 +3623,62 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
subtitles = {}
for sd in streaming_data:
client_name = sd[STREAMING_DATA_CLIENT_NAME]
- po_token = sd.get(STREAMING_DATA_INITIAL_PO_TOKEN)
+ fetch_pot_func = sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
+ is_premium_subscriber = sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
+ has_player_token = sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
+
hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
if hls_manifest_url:
+ pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
+ client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS]
+ require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
+ po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
if po_token:
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
- fmts, subs = self._extract_m3u8_formats_and_subtitles(
- hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
- for sub in traverse_obj(subs, (..., ..., {dict})):
- # HLS subs (m3u8) do not need a PO token; save client name for debugging
- sub[STREAMING_DATA_CLIENT_NAME] = client_name
- subtitles = self._merge_subtitles(subs, subtitles)
- for f in fmts:
- if process_manifest_format(f, 'hls', client_name, self._search_regex(
- r'/itag/(\d+)', f['url'], 'itag', default=None), po_token):
- yield f
+ if client_name not in gvs_pots:
+ gvs_pots[client_name] = po_token
+ if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'):
+ self._report_pot_format_skipped(video_id, client_name, 'hls')
+ else:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
+ for sub in traverse_obj(subs, (..., ..., {dict})):
+ # TODO: If HLS video requires a PO Token, do the subs also require pot?
+ # Save client name for debugging
+ sub[STREAMING_DATA_CLIENT_NAME] = client_name
+ subtitles = self._merge_subtitles(subs, subtitles)
+ for f in fmts:
+ if process_manifest_format(f, 'hls', client_name, self._search_regex(
+ r'/itag/(\d+)', f['url'], 'itag', default=None), require_po_token and not po_token):
+ yield f
dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
if dash_manifest_url:
+ pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
+ client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH]
+ require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
+ po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
if po_token:
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
- formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
- for sub in traverse_obj(subs, (..., ..., {dict})):
- # TODO: Investigate if DASH subs ever need a PO token; save client name for debugging
- sub[STREAMING_DATA_CLIENT_NAME] = client_name
- subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
- for f in formats:
- if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
- f['filesize'] = int_or_none(self._search_regex(
- r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
- if needs_live_processing:
- f['is_from_start'] = True
+ if client_name not in gvs_pots:
+ gvs_pots[client_name] = po_token
+ if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'):
+ self._report_pot_format_skipped(video_id, client_name, 'dash')
+ else:
+ formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
+ for sub in traverse_obj(subs, (..., ..., {dict})):
+ # TODO: If DASH video requires a PO Token, do the subs also require pot?
+ # Save client name for debugging
+ sub[STREAMING_DATA_CLIENT_NAME] = client_name
+ subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
+ for f in formats:
+ if process_manifest_format(f, 'dash', client_name, f['format_id'], require_po_token and not po_token):
+ f['filesize'] = int_or_none(self._search_regex(
+ r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
+ if needs_live_processing:
+ f['is_from_start'] = True
- yield f
+ yield f
yield subtitles
def _extract_storyboard(self, player_responses, duration):
@@ -3698,22 +3719,22 @@ def _extract_storyboard(self, player_responses, duration):
} for j in range(math.ceil(fragment_count))],
}
- def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
+ def _download_initial_webpage(self, webpage_url, webpage_client, video_id):
webpage = None
- if 'webpage' not in self._configuration_arg('player_skip'):
+ if webpage_url and 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
- pp = self._configuration_arg('player_params', [None], casesense=True)[0]
+ pp = (
+ self._configuration_arg('player_params', [None], casesense=True)[0]
+ or traverse_obj(INNERTUBE_CLIENTS, (webpage_client, 'PLAYER_PARAMS', {str}))
+ )
if pp:
query['pp'] = pp
- webpage = self._download_webpage_with_retries(webpage_url, video_id, query=query)
-
- master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
-
- player_responses, player_url = self._extract_player_responses(
- self._get_requested_clients(url, smuggled_data),
- video_id, webpage, master_ytcfg, smuggled_data)
-
- return webpage, master_ytcfg, player_responses, player_url
+ webpage = self._download_webpage_with_retries(
+ webpage_url, video_id, query=query,
+ headers=traverse_obj(self._get_default_ytcfg(webpage_client), {
+ 'User-Agent': ('INNERTUBE_CONTEXT', 'client', 'userAgent', {str}),
+ }))
+ return webpage
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
@@ -3738,14 +3759,60 @@ def _list_formats(self, video_id, microformats, video_details, player_responses,
return live_broadcast_details, live_status, streaming_data, formats, subtitles
+ def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg):
+ initial_data = None
+ if webpage and 'initial_data' not in self._configuration_arg('webpage_skip'):
+ initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
+ if not traverse_obj(initial_data, 'contents'):
+ self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
+ initial_data = None
+ if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
+ query = {'videoId': video_id}
+ query.update(self._get_checkok_params())
+ initial_data = self._extract_response(
+ item_id=video_id, ep='next', fatal=False,
+ ytcfg=webpage_ytcfg, query=query, check_get_keys='contents',
+ note='Downloading initial data API JSON', default_client=webpage_client)
+ return initial_data
+
+ def _is_premium_subscriber(self, initial_data):
+ if not self.is_authenticated or not initial_data:
+ return False
+
+ tlr = traverse_obj(
+ initial_data, ('topbar', 'desktopTopbarRenderer', 'logo', 'topbarLogoRenderer'))
+ return (
+ traverse_obj(tlr, ('iconImage', 'iconType')) == 'YOUTUBE_PREMIUM_LOGO'
+ or 'premium' in (self._get_text(tlr, 'tooltipText') or '').lower()
+ )
+
+ def _initial_extract(self, url, smuggled_data, webpage_url, webpage_client, video_id):
+ # This function is also used by live-from-start refresh
+ webpage = self._download_initial_webpage(webpage_url, webpage_client, video_id)
+ webpage_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg(webpage_client)
+
+ initial_data = self._download_initial_data(video_id, webpage, webpage_client, webpage_ytcfg)
+
+ is_premium_subscriber = self._is_premium_subscriber(initial_data)
+ if is_premium_subscriber:
+ self.write_debug('Detected YouTube Premium subscription')
+
+ player_responses, player_url = self._extract_player_responses(
+ self._get_requested_clients(url, smuggled_data, is_premium_subscriber),
+ video_id, webpage, webpage_client, webpage_ytcfg, is_premium_subscriber)
+
+ return webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url
+
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
base_url = self.http_scheme() + '//www.youtube.com/'
webpage_url = base_url + 'watch?v=' + video_id
+ webpage_client = 'web'
- webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
+ webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url = self._initial_extract(
+ url, smuggled_data, webpage_url, webpage_client, video_id)
playability_statuses = traverse_obj(
player_responses, (..., 'playabilityStatus'), expected_type=dict)
@@ -4020,7 +4087,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
pctr = pr['captions']['playerCaptionsTracklistRenderer']
client_name = pr['streamingData'][STREAMING_DATA_CLIENT_NAME]
innertube_client_name = pr['streamingData'][STREAMING_DATA_INNERTUBE_CONTEXT]['client']['clientName']
- required_contexts = self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
+ pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['SUBS_PO_TOKEN_POLICY']
fetch_subs_po_token_func = pr['streamingData'][STREAMING_DATA_FETCH_SUBS_PO_TOKEN]
pot_params = {}
@@ -4033,11 +4100,11 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
requires_pot = (
# We can detect the experiment for now
any(e in traverse_obj(qs, ('exp', ...)) for e in ('xpe', 'xpv'))
- or _PoTokenContext.SUBS in required_contexts)
+ or (pot_policy.required and not (pot_policy.not_required_for_premium and is_premium_subscriber)))
if not already_fetched_pot:
already_fetched_pot = True
- if subs_po_token := fetch_subs_po_token_func(required=requires_pot):
+ if subs_po_token := fetch_subs_po_token_func(required=requires_pot or pot_policy.recommended):
pot_params.update({
'pot': subs_po_token,
'potc': '1',
@@ -4140,21 +4207,6 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
'release_year': int_or_none(release_year),
})
- initial_data = None
- if webpage:
- initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
- if not traverse_obj(initial_data, 'contents'):
- self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
- initial_data = None
- if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
- query = {'videoId': video_id}
- query.update(self._get_checkok_params())
- initial_data = self._extract_response(
- item_id=video_id, ep='next', fatal=False,
- ytcfg=master_ytcfg, query=query, check_get_keys='contents',
- headers=self.generate_api_headers(ytcfg=master_ytcfg),
- note='Downloading initial data API JSON')
-
COMMENTS_SECTION_IDS = ('comment-item-section', 'engagement-panel-comments-section')
info['comment_count'] = traverse_obj(initial_data, (
'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
@@ -4353,7 +4405,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
or get_first(microformats, 'isUnlisted', expected_type=bool))))
- info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
+ info['__post_extractor'] = self.extract_comments(webpage_ytcfg, video_id, contents, webpage)
self.mark_watched(video_id, player_responses)