mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[crunchyroll:playlist] Implement beta API (#2955)
Closes #3121, #2930 Authored by: tejing1
This commit is contained in:
		| @@ -86,6 +86,22 @@ class CrunchyrollBaseIE(InfoExtractor): | ||||
|         if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): | ||||
|             raise ExtractorError('Login succeeded but did not set etp_rt cookie') | ||||
|  | ||||
|     # Beta-specific, but needed for redirects | ||||
|     def _get_beta_embedded_json(self, webpage, display_id): | ||||
|         initial_state = self._parse_json(self._search_regex( | ||||
|             r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id) | ||||
|         app_config = self._parse_json(self._search_regex( | ||||
|             r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id) | ||||
|         return initial_state, app_config | ||||
|  | ||||
|     def _redirect_to_beta(self, webpage, iekey, video_id): | ||||
|         if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): | ||||
|             raise ExtractorError('Received a beta page from non-beta url when not logged in.') | ||||
|         initial_state, app_config = self._get_beta_embedded_json(webpage, video_id) | ||||
|         url = app_config['baseSiteUrl'] + initial_state['router']['locations']['current']['pathname'] | ||||
|         self.to_screen(f'{video_id}: Redirected to beta site - {url}') | ||||
|         return self.url_result(f'{url}', iekey, video_id) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _add_skip_wall(url): | ||||
|         parsed_url = compat_urlparse.urlparse(url) | ||||
| @@ -406,6 +422,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|         webpage = self._download_webpage( | ||||
|             self._add_skip_wall(webpage_url), video_id, | ||||
|             headers=self.geo_verification_headers()) | ||||
|         if re.search(r'<div id="preload-data">', webpage): | ||||
|             return self._redirect_to_beta(webpage, CrunchyrollBetaIE.ie_key(), video_id) | ||||
|         note_m = self._html_search_regex( | ||||
|             r'<div class="showmedia-trailer-notice">(.+?)</div>', | ||||
|             webpage, 'trailer-notice', default='') | ||||
| @@ -670,6 +688,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): | ||||
|             # https:// gives a 403, but http:// does not | ||||
|             self._add_skip_wall(url).replace('https://', 'http://'), show_id, | ||||
|             headers=self.geo_verification_headers()) | ||||
|         if re.search(r'<div id="preload-data">', webpage): | ||||
|             return self._redirect_to_beta(webpage, CrunchyrollBetaShowIE.ie_key(), show_id) | ||||
|         title = self._html_search_meta('name', webpage, default=None) | ||||
|  | ||||
|         episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"' | ||||
| @@ -692,9 +712,56 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CrunchyrollBetaIE(CrunchyrollBaseIE): | ||||
| class CrunchyrollBetaBaseIE(CrunchyrollBaseIE): | ||||
|     params = None | ||||
|  | ||||
|     def _get_params(self, lang): | ||||
|         if not CrunchyrollBetaBaseIE.params: | ||||
|             initial_state, app_config = self._get_beta_embedded_json(self._download_webpage( | ||||
|                 f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None) | ||||
|             api_domain = app_config['cxApiParams']['apiDomain'] | ||||
|             basic_token = str(base64.b64encode(('%s:' % app_config['cxApiParams']['accountAuthClientId']).encode('ascii')), 'ascii') | ||||
|             auth_response = self._download_json( | ||||
|                 f'{api_domain}/auth/v1/token', None, note='Authenticating with cookie', | ||||
|                 headers={ | ||||
|                     'Authorization': 'Basic ' + basic_token | ||||
|                 }, data='grant_type=etp_rt_cookie'.encode('ascii')) | ||||
|             policy_response = self._download_json( | ||||
|                 f'{api_domain}/index/v2', None, note='Retrieving signed policy', | ||||
|                 headers={ | ||||
|                     'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] | ||||
|                 }) | ||||
|             bucket = policy_response['cms']['bucket'] | ||||
|             params = { | ||||
|                 'Policy': policy_response['cms']['policy'], | ||||
|                 'Signature': policy_response['cms']['signature'], | ||||
|                 'Key-Pair-Id': policy_response['cms']['key_pair_id'] | ||||
|             } | ||||
|             locale = traverse_obj(initial_state, ('localization', 'locale')) | ||||
|             if locale: | ||||
|                 params['locale'] = locale | ||||
|             CrunchyrollBetaBaseIE.params = (api_domain, bucket, params) | ||||
|         return CrunchyrollBetaBaseIE.params | ||||
|  | ||||
|     def _redirect_from_beta(self, url, lang, internal_id, display_id, is_episode, iekey): | ||||
|         initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(url, display_id), display_id) | ||||
|         content_data = initial_state['content']['byId'][internal_id] | ||||
|         if is_episode: | ||||
|             video_id = content_data['external_id'].split('.')[1] | ||||
|             series_id = content_data['episode_metadata']['series_slug_title'] | ||||
|         else: | ||||
|             series_id = content_data['slug_title'] | ||||
|         series_id = re.sub(r'-{2,}', '-', series_id) | ||||
|         url = f'https://www.crunchyroll.com/{lang}{series_id}' | ||||
|         if is_episode: | ||||
|             url = url + f'/{display_id}-{video_id}' | ||||
|         self.to_screen(f'{display_id}: Not logged in. Redirecting to non-beta site - {url}') | ||||
|         return self.url_result(url, iekey, display_id) | ||||
|  | ||||
|  | ||||
| class CrunchyrollBetaIE(CrunchyrollBetaBaseIE): | ||||
|     IE_NAME = 'crunchyroll:beta' | ||||
|     _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<internal_id>\w+)/(?P<id>[\w\-]+)/?(?:\?|$)' | ||||
|     _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<id>\w+)/(?P<display_id>[\w\-]*)/?(?:\?|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', | ||||
|         'info_dict': { | ||||
| @@ -705,51 +772,49 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): | ||||
|             'uploader': 'Toei Animation', | ||||
|             'title': 'World Trigger Episode 73 – To the Future', | ||||
|             'upload_date': '20160402', | ||||
|             'episode_number': 73, | ||||
|             'series': 'World Trigger', | ||||
|             'average_rating': 4.9, | ||||
|             'episode': 'To the Future', | ||||
|             'season': 'World Trigger', | ||||
|             'thumbnail': 'https://img1.ak.crunchyroll.com/i/spire3-tmb/c870dedca1a83137c2d3d144984155ed1459527119_main.jpg', | ||||
|             'season_number': 1, | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|         'expected_warnings': ['Unable to download XML'] | ||||
|     }, { | ||||
|         'url': 'https://beta.crunchyroll.com/watch/GYK53DMPR/wicked-lord-shingan-reborn', | ||||
|         'info_dict': { | ||||
|             'id': '648781', | ||||
|             'ext': 'mp4', | ||||
|             'episode_number': 1, | ||||
|             'timestamp': 1389173400, | ||||
|             'series': 'Love, Chunibyo & Other Delusions - Heart Throb -', | ||||
|             'description': 'md5:5579d1a0355cc618558ba23d27067a62', | ||||
|             'uploader': 'TBS', | ||||
|             'episode': 'Wicked Lord Shingan... Reborn', | ||||
|             'average_rating': 4.9, | ||||
|             'season': 'Love, Chunibyo & Other Delusions - Heart Throb -', | ||||
|             'thumbnail': 'https://img1.ak.crunchyroll.com/i/spire3-tmb/2ba0384e225a5370d5f0ee9496d91ea51389046521_main.jpg', | ||||
|             'title': 'Love, Chunibyo & Other Delusions - Heart Throb - Episode 1 – Wicked Lord Shingan... Reborn', | ||||
|             'season_number': 2, | ||||
|             'upload_date': '20140108', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|         'expected_warnings': ['Unable to download XML'] | ||||
|     }, { | ||||
|         'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         initial_state = self._parse_json( | ||||
|             self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), | ||||
|             display_id) | ||||
|         episode_data = initial_state['content']['byId'][internal_id] | ||||
|         if not self._get_cookies(url).get('etp_rt'): | ||||
|             video_id = episode_data['external_id'].split('.')[1] | ||||
|             series_id = episode_data['episode_metadata']['series_slug_title'] | ||||
|             return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', | ||||
|                                    CrunchyrollIE.ie_key(), video_id) | ||||
|         lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') | ||||
|  | ||||
|         if not self._get_cookies(url).get('etp_rt'): | ||||
|             return self._redirect_from_beta(url, lang, internal_id, display_id, True, CrunchyrollIE.ie_key()) | ||||
|  | ||||
|         api_domain, bucket, params = self._get_params(lang) | ||||
|  | ||||
|         app_config = self._parse_json( | ||||
|             self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), | ||||
|             display_id) | ||||
|         client_id = app_config['cxApiParams']['accountAuthClientId'] | ||||
|         api_domain = app_config['cxApiParams']['apiDomain'] | ||||
|         basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii') | ||||
|         auth_response = self._download_json( | ||||
|             f'{api_domain}/auth/v1/token', display_id, | ||||
|             note='Authenticating with cookie', | ||||
|             headers={ | ||||
|                 'Authorization': 'Basic ' + basic_token | ||||
|             }, data='grant_type=etp_rt_cookie'.encode('ascii')) | ||||
|         policy_response = self._download_json( | ||||
|             f'{api_domain}/index/v2', display_id, | ||||
|             note='Retrieving signed policy', | ||||
|             headers={ | ||||
|                 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] | ||||
|             }) | ||||
|         bucket = policy_response['cms']['bucket'] | ||||
|         params = { | ||||
|             'Policy': policy_response['cms']['policy'], | ||||
|             'Signature': policy_response['cms']['signature'], | ||||
|             'Key-Pair-Id': policy_response['cms']['key_pair_id'] | ||||
|         } | ||||
|         locale = traverse_obj(initial_state, ('localization', 'locale')) | ||||
|         if locale: | ||||
|             params['locale'] = locale | ||||
|         episode_response = self._download_json( | ||||
|             f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id, | ||||
|             note='Retrieving episode metadata', | ||||
| @@ -827,9 +892,9 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CrunchyrollBetaShowIE(CrunchyrollBaseIE): | ||||
| class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE): | ||||
|     IE_NAME = 'crunchyroll:playlist:beta' | ||||
|     _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/\w+/(?P<id>[\w\-]+)/?(?:\?|$)' | ||||
|     _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/(?P<id>\w+)/(?P<display_id>[\w\-]*)/?(?:\?|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', | ||||
|         'info_dict': { | ||||
| @@ -837,12 +902,57 @@ class CrunchyrollBetaShowIE(CrunchyrollBaseIE): | ||||
|             'title': 'Girl Friend BETA', | ||||
|         }, | ||||
|         'playlist_mincount': 10, | ||||
|     }, { | ||||
|         'url': 'https://beta.crunchyroll.com/series/GYJQV73V6/love-chunibyo--other-delusions---heart-throb--', | ||||
|         'info_dict': { | ||||
|             'id': 'love-chunibyo-other-delusions-heart-throb-', | ||||
|             'title': 'Love, Chunibyo & Other Delusions - Heart Throb -', | ||||
|         }, | ||||
|         'playlist_mincount': 10, | ||||
|     }, { | ||||
|         'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         lang, series_id = self._match_valid_url(url).group('lang', 'id') | ||||
|         return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id.lower()}', | ||||
|                                CrunchyrollShowPlaylistIE.ie_key(), series_id) | ||||
|         lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') | ||||
|  | ||||
|         if not self._get_cookies(url).get('etp_rt'): | ||||
|             return self._redirect_from_beta(url, lang, internal_id, display_id, False, CrunchyrollShowPlaylistIE.ie_key()) | ||||
|  | ||||
|         api_domain, bucket, params = self._get_params(lang) | ||||
|  | ||||
|         series_response = self._download_json( | ||||
|             f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id, | ||||
|             note='Retrieving series metadata', query=params) | ||||
|  | ||||
|         seasons_response = self._download_json( | ||||
|             f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id, | ||||
|             note='Retrieving season list', query=params) | ||||
|  | ||||
|         def entries(): | ||||
|             for season in seasons_response['items']: | ||||
|                 episodes_response = self._download_json( | ||||
|                     f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id, | ||||
|                     note=f'Retrieving episode list for {season.get("slug_title")}', query=params) | ||||
|                 for episode in episodes_response['items']: | ||||
|                     episode_id = episode['id'] | ||||
|                     episode_display_id = episode['slug_title'] | ||||
|                     yield { | ||||
|                         '_type': 'url', | ||||
|                         'url': f'https://beta.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}', | ||||
|                         'ie_key': CrunchyrollBetaIE.ie_key(), | ||||
|                         'id': episode_id, | ||||
|                         'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')), | ||||
|                         'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')), | ||||
|                         'duration': float_or_none(episode.get('duration_ms'), 1000), | ||||
|                         'series': episode.get('series_title'), | ||||
|                         'series_id': episode.get('series_id'), | ||||
|                         'season': episode.get('season_title'), | ||||
|                         'season_id': episode.get('season_id'), | ||||
|                         'season_number': episode.get('season_number'), | ||||
|                         'episode': episode.get('title'), | ||||
|                         'episode_number': episode.get('sequence_number') | ||||
|                     } | ||||
|  | ||||
|         return self.playlist_result(entries(), internal_id, series_response.get('title')) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jeff Huffman
					Jeff Huffman