mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[crunchyroll:beta] Add cookies support (#2506)
* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in. * Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats Closes #1911 Authored by: tejing1
This commit is contained in:
		| @@ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import re | ||||
| import json | ||||
| import zlib | ||||
| @@ -23,13 +24,16 @@ from ..utils import ( | ||||
|     bytes_to_intlist, | ||||
|     extract_attributes, | ||||
|     float_or_none, | ||||
|     format_field, | ||||
|     intlist_to_bytes, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     lowercase_escape, | ||||
|     merge_dicts, | ||||
|     qualities, | ||||
|     remove_end, | ||||
|     sanitized_Request, | ||||
|     traverse_obj, | ||||
|     try_get, | ||||
|     urlencode_postdata, | ||||
|     xpath_text, | ||||
| @@ -733,13 +737,118 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): | ||||
|     def _real_extract(self, url): | ||||
|         lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         episode_data = self._parse_json( | ||||
|             self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'), | ||||
|             display_id)['content']['byId'][internal_id] | ||||
|         video_id = episode_data['external_id'].split('.')[1] | ||||
|         series_id = episode_data['episode_metadata']['series_slug_title'] | ||||
|         return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', | ||||
|                                CrunchyrollIE.ie_key(), video_id) | ||||
|         initial_state = self._parse_json( | ||||
|             self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), | ||||
|             display_id) | ||||
|         episode_data = initial_state['content']['byId'][internal_id] | ||||
|         if not self._get_cookies(url).get('etp_rt'): | ||||
|             video_id = episode_data['external_id'].split('.')[1] | ||||
|             series_id = episode_data['episode_metadata']['series_slug_title'] | ||||
|             return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', | ||||
|                                    CrunchyrollIE.ie_key(), video_id) | ||||
|  | ||||
|         app_config = self._parse_json( | ||||
|             self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), | ||||
|             display_id) | ||||
|         client_id = app_config['cxApiParams']['accountAuthClientId'] | ||||
|         api_domain = app_config['cxApiParams']['apiDomain'] | ||||
|         basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii') | ||||
|         auth_response = self._download_json( | ||||
|             f'{api_domain}/auth/v1/token', display_id, | ||||
|             note='Authenticating with cookie', | ||||
|             headers={ | ||||
|                 'Authorization': 'Basic ' + basic_token | ||||
|             }, data='grant_type=etp_rt_cookie'.encode('ascii')) | ||||
|         policy_response = self._download_json( | ||||
|             f'{api_domain}/index/v2', display_id, | ||||
|             note='Retrieving signed policy', | ||||
|             headers={ | ||||
|                 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] | ||||
|             }) | ||||
|         bucket = policy_response['cms']['bucket'] | ||||
|         params = { | ||||
|             'Policy': policy_response['cms']['policy'], | ||||
|             'Signature': policy_response['cms']['signature'], | ||||
|             'Key-Pair-Id': policy_response['cms']['key_pair_id'] | ||||
|         } | ||||
|         locale = traverse_obj(initial_state, ('localization', 'locale')) | ||||
|         if locale: | ||||
|             params['locale'] = locale | ||||
|         episode_response = self._download_json( | ||||
|             f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id, | ||||
|             note='Retrieving episode metadata', | ||||
|             query=params) | ||||
|         if episode_response.get('is_premium_only') and not episode_response.get('playback'): | ||||
|             raise ExtractorError('This video is for premium members only.', expected=True) | ||||
|         stream_response = self._download_json( | ||||
|             episode_response['playback'], display_id, | ||||
|             note='Retrieving stream info') | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')): | ||||
|             for thumbnail_data in thumbnails_data: | ||||
|                 thumbnails.append({ | ||||
|                     'url': thumbnail_data.get('source'), | ||||
|                     'width': thumbnail_data.get('width'), | ||||
|                     'height': thumbnail_data.get('height'), | ||||
|                 }) | ||||
|         subtitles = {} | ||||
|         for lang, subtitle_data in stream_response.get('subtitles').items(): | ||||
|             subtitles[lang] = [{ | ||||
|                 'url': subtitle_data.get('url'), | ||||
|                 'ext': subtitle_data.get('format') | ||||
|             }] | ||||
|  | ||||
|         requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])] | ||||
|         hardsub_preference = qualities(requested_hardsubs[::-1]) | ||||
|         requested_formats = self._configuration_arg('format') or ['adaptive_hls'] | ||||
|  | ||||
|         formats = [] | ||||
|         for stream_type, streams in stream_response.get('streams', {}).items(): | ||||
|             if stream_type not in requested_formats: | ||||
|                 continue | ||||
|             for stream in streams.values(): | ||||
|                 hardsub_lang = stream.get('hardsub_locale') or '' | ||||
|                 if hardsub_lang.lower() not in requested_hardsubs: | ||||
|                     continue | ||||
|                 format_id = join_nonempty( | ||||
|                     stream_type, | ||||
|                     format_field(stream, 'hardsub_locale', 'hardsub-%s')) | ||||
|                 if not stream.get('url'): | ||||
|                     continue | ||||
|                 if stream_type.split('_')[-1] == 'hls': | ||||
|                     adaptive_formats = self._extract_m3u8_formats( | ||||
|                         stream['url'], display_id, 'mp4', m3u8_id=format_id, | ||||
|                         note='Downloading %s information' % format_id, | ||||
|                         fatal=False) | ||||
|                 elif stream_type.split('_')[-1] == 'dash': | ||||
|                     adaptive_formats = self._extract_mpd_formats( | ||||
|                         stream['url'], display_id, mpd_id=format_id, | ||||
|                         note='Downloading %s information' % format_id, | ||||
|                         fatal=False) | ||||
|                 for f in adaptive_formats: | ||||
|                     if f.get('acodec') != 'none': | ||||
|                         f['language'] = stream_response.get('audio_locale') | ||||
|                     f['quality'] = hardsub_preference(hardsub_lang.lower()) | ||||
|                 formats.extend(adaptive_formats) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': internal_id, | ||||
|             'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')), | ||||
|             'description': episode_response.get('description').replace(r'\r\n', '\n'), | ||||
|             'duration': float_or_none(episode_response.get('duration_ms'), 1000), | ||||
|             'thumbnails': thumbnails, | ||||
|             'series': episode_response.get('series_title'), | ||||
|             'series_id': episode_response.get('series_id'), | ||||
|             'season': episode_response.get('season_title'), | ||||
|             'season_id': episode_response.get('season_id'), | ||||
|             'season_number': episode_response.get('season_number'), | ||||
|             'episode': episode_response.get('title'), | ||||
|             'episode_number': episode_response.get('sequence_number'), | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CrunchyrollBetaShowIE(CrunchyrollBaseIE): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jeff Huffman
					Jeff Huffman