mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[roosterteeth] Add series extractor
This commit is contained in:
		| @@ -1,74 +1,31 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
| ) | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     LazyList, | ||||
|     parse_qs, | ||||
|     str_or_none, | ||||
|     traverse_obj, | ||||
|     url_or_none, | ||||
|     urlencode_postdata, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RoosterTeethIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' | ||||
| class RoosterTeethBaseIE(InfoExtractor): | ||||
|     _NETRC_MACHINE = 'roosterteeth' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', | ||||
|         'md5': 'e2bd7764732d785ef797700a2489f212', | ||||
|         'info_dict': { | ||||
|             'id': '9156', | ||||
|             'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Million Dollars, But... The Game Announcement', | ||||
|             'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', | ||||
|             'thumbnail': r're:^https?://.*\.png$', | ||||
|             'series': 'Million Dollars, But...', | ||||
|             'episode': 'Million Dollars, But... The Game Announcement', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://roosterteeth.com/watch/rwby-bonus-25', | ||||
|         'md5': 'fe8d9d976b272c18a24fe7f1f5830084', | ||||
|         'info_dict': { | ||||
|             'id': '31', | ||||
|             'display_id': 'rwby-bonus-25', | ||||
|             'title': 'Volume 2, World of Remnant 3', | ||||
|             'description': 'md5:8d58d3270292ea11da00ea712bbfb009', | ||||
|             'episode': 'Volume 2, World of Remnant 3', | ||||
|             'channel_id': 'fab60c1c-29cb-43bc-9383-5c3538d9e246', | ||||
|             'thumbnail': r're:^https?://.*\.(png|jpe?g)$', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # only available for FIRST members | ||||
|         'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/watch/' | ||||
|     _API_BASE = 'https://svod-be.roosterteeth.com' | ||||
|     _API_BASE_URL = f'{_API_BASE}/api/v1' | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|         if self._get_cookies(self._API_BASE_URL).get('rt_access_token'): | ||||
|             return | ||||
|  | ||||
|         try: | ||||
|             self._download_json( | ||||
| @@ -90,13 +47,95 @@ class RoosterTeethIE(InfoExtractor): | ||||
|             self.report_warning(msg) | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'): | ||||
|             return | ||||
|         self._login() | ||||
|  | ||||
|     def _extract_video_info(self, data): | ||||
|         thumbnails = [] | ||||
|         for image in traverse_obj(data, ('included', 'images')): | ||||
|             if image.get('type') not in ('episode_image', 'bonus_feature_image'): | ||||
|                 continue | ||||
|             thumbnails.extend([{ | ||||
|                 'id': name, | ||||
|                 'url': url, | ||||
|             } for name, url in (image.get('attributes') or {}).items() if url_or_none(url)]) | ||||
|  | ||||
|         attributes = data.get('attributes') or {} | ||||
|         title = traverse_obj(attributes, 'title', 'display_title') | ||||
|         sub_only = attributes.get('is_sponsors_only') | ||||
|  | ||||
|         return { | ||||
|             'id': str(data.get('id')), | ||||
|             'display_id': attributes.get('slug'), | ||||
|             'title': title, | ||||
|             'description': traverse_obj(attributes, 'description', 'caption'), | ||||
|             'series': attributes.get('show_title'), | ||||
|             'season_number': int_or_none(attributes.get('season_number')), | ||||
|             'season_id': attributes.get('season_id'), | ||||
|             'episode': title, | ||||
|             'episode_number': int_or_none(attributes.get('number')), | ||||
|             'episode_id': str_or_none(data.get('uuid')), | ||||
|             'channel_id': attributes.get('channel_id'), | ||||
|             'duration': int_or_none(attributes.get('length')), | ||||
|             'thumbnails': thumbnails, | ||||
|             'availability': self._availability( | ||||
|                 needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only, | ||||
|                 is_private=False, is_unlisted=False), | ||||
|             'tags': attributes.get('genres') | ||||
|         } | ||||
|  | ||||
|  | ||||
| class RoosterTeethIE(RoosterTeethBaseIE): | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', | ||||
|         'info_dict': { | ||||
|             'id': '9156', | ||||
|             'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Million Dollars, But... The Game Announcement', | ||||
|             'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', | ||||
|             'thumbnail': r're:^https?://.*\.png$', | ||||
|             'series': 'Million Dollars, But...', | ||||
|             'episode': 'Million Dollars, But... The Game Announcement', | ||||
|         }, | ||||
|         'skip_download': 'm3u8', | ||||
|     }, { | ||||
|         'url': 'https://roosterteeth.com/watch/rwby-bonus-25', | ||||
|         'info_dict': { | ||||
|             'id': '40432', | ||||
|             'display_id': 'rwby-bonus-25', | ||||
|             'title': 'Grimm', | ||||
|             'description': 'md5:f30ff570741213418a8d2c19868b93ab', | ||||
|             'episode': 'Grimm', | ||||
|             'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1', | ||||
|             'thumbnail': r're:^https?://.*\.(png|jpe?g)$', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|         'skip_download': 'm3u8', | ||||
|     }, { | ||||
|         'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # only available for FIRST members | ||||
|         'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         api_episode_url = self._EPISODE_BASE_URL + display_id | ||||
|         api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}' | ||||
|  | ||||
|         try: | ||||
|             video_data = self._download_json( | ||||
| @@ -118,36 +157,55 @@ class RoosterTeethIE(InfoExtractor): | ||||
|         episode = self._download_json( | ||||
|             api_episode_url, display_id, | ||||
|             'Downloading episode JSON metadata')['data'][0] | ||||
|         attributes = episode['attributes'] | ||||
|         title = attributes.get('title') or attributes['display_title'] | ||||
|         video_id = compat_str(episode['id']) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for image in episode.get('included', {}).get('images', []): | ||||
|             if image.get('type') in ('episode_image', 'bonus_feature_image'): | ||||
|                 img_attributes = image.get('attributes') or {} | ||||
|                 for k in ('thumb', 'small', 'medium', 'large'): | ||||
|                     img_url = img_attributes.get(k) | ||||
|                     if img_url: | ||||
|                         thumbnails.append({ | ||||
|                             'id': k, | ||||
|                             'url': img_url, | ||||
|                         }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': attributes.get('description') or attributes.get('caption'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'series': attributes.get('show_title'), | ||||
|             'season_number': int_or_none(attributes.get('season_number')), | ||||
|             'season_id': attributes.get('season_id'), | ||||
|             'episode': title, | ||||
|             'episode_number': int_or_none(attributes.get('number')), | ||||
|             'episode_id': str_or_none(episode.get('uuid')), | ||||
|             'formats': formats, | ||||
|             'channel_id': attributes.get('channel_id'), | ||||
|             'duration': int_or_none(attributes.get('length')), | ||||
|             'subtitles': subtitles | ||||
|             'subtitles': subtitles, | ||||
|             **self._extract_video_info(episode) | ||||
|         } | ||||
|  | ||||
|  | ||||
| class RoosterTeethSeriesIE(RoosterTeethBaseIE): | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://roosterteeth.com/series/rwby?season=7', | ||||
|         'playlist_count': 13, | ||||
|         'info_dict': { | ||||
|             'id': 'rwby-7', | ||||
|             'title': 'RWBY - Season 7', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://roosterteeth.com/series/role-initiative', | ||||
|         'playlist_mincount': 16, | ||||
|         'info_dict': { | ||||
|             'id': 'role-initiative', | ||||
|             'title': 'Role Initiative', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _entries(self, series_id, season_number): | ||||
|         display_id = join_nonempty(series_id, season_number) | ||||
|         # TODO: extract bonus material | ||||
|         for data in self._download_json( | ||||
|                 f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)['data']: | ||||
|             idx = traverse_obj(data, ('attributes', 'number')) | ||||
|             if season_number and idx != season_number: | ||||
|                 continue | ||||
|             season_url = urljoin(self._API_BASE, data['links']['episodes']) | ||||
|             season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data'] | ||||
|             for episode in season: | ||||
|                 yield self.url_result( | ||||
|                     f'https://www.roosterteeth.com{episode["canonical_links"]["self"]}', | ||||
|                     RoosterTeethIE.ie_key(), | ||||
|                     **self._extract_video_info(episode)) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         series_id = self._match_id(url) | ||||
|         season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none) | ||||
|  | ||||
|         entries = LazyList(self._entries(series_id, season_number)) | ||||
|         return self.playlist_result( | ||||
|             entries, | ||||
|             join_nonempty(series_id, season_number), | ||||
|             join_nonempty(entries[0].get('series'), season_number, delim=' - Season ')) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan