mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[youtube] Improvements to JS player extraction (See desc) (#860)
* fallback player url extraction when it fails to be extracted from the webpage * don't download js player unnecessarily for clients that don't require it * try to extract js player url from any additional client configs * ability to skip the js player usage/download using `player_skip=js` * ability to skip the initial webpage download using `player_skip=webpage` known issue: * authentication for multi-channel accounts and multi-account cookies may not work correctly if the webpage or client configs are skipped * formats from the web client requiring signature decryption will be skipped if player js extraction is skipped Authored by: coletdjnz
This commit is contained in:
		| @@ -117,6 +117,7 @@ INNERTUBE_CLIENTS = { | ||||
|             } | ||||
|         }, | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 3, | ||||
|         'REQUIRE_JS_PLAYER': False | ||||
|     }, | ||||
|     'android_embedded': { | ||||
|         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', | ||||
| @@ -126,7 +127,8 @@ INNERTUBE_CLIENTS = { | ||||
|                 'clientVersion': '16.20', | ||||
|             }, | ||||
|         }, | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 55 | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 55, | ||||
|         'REQUIRE_JS_PLAYER': False | ||||
|     }, | ||||
|     'android_music': { | ||||
|         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', | ||||
| @@ -138,6 +140,7 @@ INNERTUBE_CLIENTS = { | ||||
|             } | ||||
|         }, | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 21, | ||||
|         'REQUIRE_JS_PLAYER': False | ||||
|     }, | ||||
|     'android_creator': { | ||||
|         'INNERTUBE_CONTEXT': { | ||||
| @@ -146,7 +149,8 @@ INNERTUBE_CLIENTS = { | ||||
|                 'clientVersion': '21.24.100', | ||||
|             }, | ||||
|         }, | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 14 | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 14, | ||||
|         'REQUIRE_JS_PLAYER': False | ||||
|     }, | ||||
|     # ios has HLS live streams | ||||
|     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680 | ||||
| @@ -158,7 +162,8 @@ INNERTUBE_CLIENTS = { | ||||
|                 'clientVersion': '16.20', | ||||
|             } | ||||
|         }, | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 5 | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 5, | ||||
|         'REQUIRE_JS_PLAYER': False | ||||
|     }, | ||||
|     'ios_embedded': { | ||||
|         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8', | ||||
| @@ -168,7 +173,8 @@ INNERTUBE_CLIENTS = { | ||||
|                 'clientVersion': '16.20', | ||||
|             }, | ||||
|         }, | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 66 | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 66, | ||||
|         'REQUIRE_JS_PLAYER': False | ||||
|     }, | ||||
|     'ios_music': { | ||||
|         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og', | ||||
| @@ -179,7 +185,8 @@ INNERTUBE_CLIENTS = { | ||||
|                 'clientVersion': '4.32', | ||||
|             }, | ||||
|         }, | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 26 | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 26, | ||||
|         'REQUIRE_JS_PLAYER': False | ||||
|     }, | ||||
|     'ios_creator': { | ||||
|         'INNERTUBE_CONTEXT': { | ||||
| @@ -188,7 +195,8 @@ INNERTUBE_CLIENTS = { | ||||
|                 'clientVersion': '21.24.100', | ||||
|             }, | ||||
|         }, | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 15 | ||||
|         'INNERTUBE_CONTEXT_CLIENT_NAME': 15, | ||||
|         'REQUIRE_JS_PLAYER': False | ||||
|     }, | ||||
|     # mweb has 'ultralow' formats | ||||
|     # See: https://github.com/yt-dlp/yt-dlp/pull/557 | ||||
| @@ -215,6 +223,7 @@ def build_innertube_clients(): | ||||
|     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): | ||||
|         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') | ||||
|         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') | ||||
|         ytcfg.setdefault('REQUIRE_JS_PLAYER', True) | ||||
|         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') | ||||
|         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0]) | ||||
|  | ||||
| @@ -1858,14 +1867,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         self._code_cache = {} | ||||
|         self._player_cache = {} | ||||
|  | ||||
|     def _extract_player_url(self, ytcfg=None, webpage=None): | ||||
|         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str) | ||||
|         if not player_url and webpage: | ||||
|             player_url = self._search_regex( | ||||
|                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"', | ||||
|                 webpage, 'player URL', fatal=False) | ||||
|     def _extract_player_url(self, *ytcfgs, webpage=None): | ||||
|         player_url = traverse_obj( | ||||
|             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'), | ||||
|             get_all=False, expected_type=compat_str) | ||||
|         if not player_url: | ||||
|             return None | ||||
|             return | ||||
|         if player_url.startswith('//'): | ||||
|             player_url = 'https:' + player_url | ||||
|         elif not re.match(r'https?://', player_url): | ||||
| @@ -1873,6 +1880,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'https://www.youtube.com', player_url) | ||||
|         return player_url | ||||
|  | ||||
|     def _download_player_url(self, video_id, fatal=False): | ||||
|         res = self._download_webpage( | ||||
|             'https://www.youtube.com/iframe_api', | ||||
|             note='Downloading iframe API JS', video_id=video_id, fatal=fatal) | ||||
|         if res: | ||||
|             player_version = self._search_regex( | ||||
|                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal) | ||||
|             if player_version: | ||||
|                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js' | ||||
|  | ||||
|     def _signature_cache_id(self, example_sig): | ||||
|         """ Return a string representation of a signature """ | ||||
|         return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) | ||||
| @@ -2462,7 +2479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|  | ||||
|         session_index = self._extract_session_index(player_ytcfg, master_ytcfg) | ||||
|         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr) | ||||
|         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) | ||||
|         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None | ||||
|         headers = self.generate_api_headers( | ||||
|             player_ytcfg, identity_token, syncid, | ||||
|             default_client=client, session_index=session_index) | ||||
| @@ -2507,7 +2524,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config') | ||||
|         return self.extract_ytcfg(video_id, webpage) or {} | ||||
|  | ||||
|     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token): | ||||
|     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, identity_token): | ||||
|         initial_pr = None | ||||
|         if webpage: | ||||
|             initial_pr = self._extract_yt_initial_variable( | ||||
| @@ -2516,6 +2533,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|  | ||||
|         original_clients = clients | ||||
|         clients = clients[::-1] | ||||
|         prs = [] | ||||
|  | ||||
|         def append_client(client_name): | ||||
|             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients: | ||||
| @@ -2525,23 +2543,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         # extraction of some data. So we return the initial_pr with formats | ||||
|         # stripped out even if not requested by the user | ||||
|         # See: https://github.com/yt-dlp/yt-dlp/issues/501 | ||||
|         yielded_pr = False | ||||
|         if initial_pr: | ||||
|             pr = dict(initial_pr) | ||||
|             pr['streamingData'] = None | ||||
|             yielded_pr = True | ||||
|             yield pr | ||||
|             prs.append(pr) | ||||
|  | ||||
|         last_error = None | ||||
|         tried_iframe_fallback = False | ||||
|         player_url = None | ||||
|         while clients: | ||||
|             client = clients.pop() | ||||
|             player_ytcfg = master_ytcfg if client == 'web' else {} | ||||
|             if 'configs' not in self._configuration_arg('player_skip'): | ||||
|                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg | ||||
|  | ||||
|             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) | ||||
|             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER') | ||||
|             if 'js' in self._configuration_arg('player_skip'): | ||||
|                 require_js_player = False | ||||
|                 player_url = None | ||||
|  | ||||
|             if not player_url and not tried_iframe_fallback and require_js_player: | ||||
|                 player_url = self._download_player_url(video_id) | ||||
|                 tried_iframe_fallback = True | ||||
|  | ||||
|             try: | ||||
|                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( | ||||
|                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr) | ||||
|                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url if require_js_player else None, initial_pr) | ||||
|             except ExtractorError as e: | ||||
|                 if last_error: | ||||
|                     self.report_warning(last_error) | ||||
| @@ -2549,8 +2577,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 continue | ||||
|  | ||||
|             if pr: | ||||
|                 yielded_pr = True | ||||
|                 yield pr | ||||
|                 prs.append(pr) | ||||
|  | ||||
|             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in | ||||
|             if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header(): | ||||
| @@ -2559,9 +2586,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 append_client(f'{client}_agegate') | ||||
|  | ||||
|         if last_error: | ||||
|             if not yielded_pr: | ||||
|             if not len(prs): | ||||
|                 raise last_error | ||||
|             self.report_warning(last_error) | ||||
|         return prs, player_url | ||||
|  | ||||
|     def _extract_formats(self, streaming_data, video_id, player_url, is_live): | ||||
|         itags, stream_ids = [], [] | ||||
| @@ -2708,16 +2736,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|  | ||||
|         base_url = self.http_scheme() + '//www.youtube.com/' | ||||
|         webpage_url = base_url + 'watch?v=' + video_id | ||||
|         webpage = self._download_webpage( | ||||
|             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) | ||||
|         webpage = None | ||||
|         if 'webpage' not in self._configuration_arg('player_skip'): | ||||
|             webpage = self._download_webpage( | ||||
|                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) | ||||
|  | ||||
|         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() | ||||
|         player_url = self._extract_player_url(master_ytcfg, webpage) | ||||
|         identity_token = self._extract_identity_token(webpage, video_id) | ||||
|  | ||||
|         player_responses = list(self._extract_player_responses( | ||||
|         player_responses, player_url = self._extract_player_responses( | ||||
|             self._get_requested_clients(url, smuggled_data), | ||||
|             video_id, webpage, master_ytcfg, player_url, identity_token)) | ||||
|             video_id, webpage, master_ytcfg, identity_token) | ||||
|  | ||||
|         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 coletdjnz
					coletdjnz