mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[ie/gem.cbc.ca] Fix extractors (#12404)
Does not fix broken login support Closes #11848 Authored by: bashonly, dirkf Co-authored-by: dirkf <fieldhouse@gmx.net>
This commit is contained in:
		| @@ -14,16 +14,18 @@ from ..utils import ( | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     orderedSet, | ||||
|     parse_age_limit, | ||||
|     parse_iso8601, | ||||
|     replace_extension, | ||||
|     smuggle_url, | ||||
|     strip_or_none, | ||||
|     traverse_obj, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     update_url, | ||||
|     url_basename, | ||||
|     url_or_none, | ||||
| ) | ||||
| from ..utils.traversal import require, traverse_obj, trim_str | ||||
| 
 | ||||
| 
 | ||||
| class CBCIE(InfoExtractor): | ||||
| @@ -516,9 +518,43 @@ class CBCPlayerPlaylistIE(InfoExtractor): | ||||
|         return self.playlist_result(entries(), playlist_id) | ||||
| 
 | ||||
| 
 | ||||
| class CBCGemIE(InfoExtractor): | ||||
| class CBCGemBaseIE(InfoExtractor): | ||||
|     _NETRC_MACHINE = 'cbcgem' | ||||
|     _GEO_COUNTRIES = ['CA'] | ||||
| 
 | ||||
|     def _call_show_api(self, item_id, display_id=None): | ||||
|         return self._download_json( | ||||
|             f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}', | ||||
|             display_id or item_id, query={'device': 'web'}) | ||||
| 
 | ||||
|     def _extract_item_info(self, item_info): | ||||
|         episode_number = None | ||||
|         title = traverse_obj(item_info, ('title', {str})) | ||||
|         if title and (mobj := re.match(r'(?P<episode>\d+)\. (?P<title>.+)', title)): | ||||
|             episode_number = int_or_none(mobj.group('episode')) | ||||
|             title = mobj.group('title') | ||||
| 
 | ||||
|         return { | ||||
|             'episode_number': episode_number, | ||||
|             **traverse_obj(item_info, { | ||||
|                 'id': ('url', {str}), | ||||
|                 'episode_id': ('url', {str}), | ||||
|                 'description': ('description', {str}), | ||||
|                 'thumbnail': ('images', 'card', 'url', {url_or_none}, {update_url(query=None)}), | ||||
|                 'episode_number': ('episodeNumber', {int_or_none}), | ||||
|                 'duration': ('metadata', 'duration', {int_or_none}), | ||||
|                 'release_timestamp': ('metadata', 'airDate', {unified_timestamp}), | ||||
|                 'timestamp': ('metadata', 'availabilityDate', {unified_timestamp}), | ||||
|                 'age_limit': ('metadata', 'rating', {trim_str(start='C')}, {parse_age_limit}), | ||||
|             }), | ||||
|             'episode': title, | ||||
|             'title': title, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class CBCGemIE(CBCGemBaseIE): | ||||
|     IE_NAME = 'gem.cbc.ca' | ||||
|     _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' | ||||
|     _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         # This is a normal, public, TV show video | ||||
|         'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', | ||||
| @@ -529,7 +565,7 @@ class CBCGemIE(InfoExtractor): | ||||
|             'description': 'md5:929868d20021c924020641769eb3e7f1', | ||||
|             'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg', | ||||
|             'duration': 1324, | ||||
|             'categories': ['comedy'], | ||||
|             'genres': ['Comédie et humour'], | ||||
|             'series': 'Schitt\'s Creek', | ||||
|             'season': 'Season 6', | ||||
|             'season_number': 6, | ||||
| @@ -537,9 +573,10 @@ class CBCGemIE(InfoExtractor): | ||||
|             'episode_number': 1, | ||||
|             'episode_id': 'schitts-creek/s06e01', | ||||
|             'upload_date': '20210618', | ||||
|             'timestamp': 1623988800, | ||||
|             'timestamp': 1623974400, | ||||
|             'release_date': '20200107', | ||||
|             'release_timestamp': 1578427200, | ||||
|             'release_timestamp': 1578355200, | ||||
|             'age_limit': 14, | ||||
|         }, | ||||
|         'params': {'format': 'bv'}, | ||||
|     }, { | ||||
| @@ -557,12 +594,13 @@ class CBCGemIE(InfoExtractor): | ||||
|             'episode_number': 1, | ||||
|             'episode': 'The Cup Runneth Over', | ||||
|             'episode_id': 'schitts-creek/s01e01', | ||||
|             'duration': 1309, | ||||
|             'categories': ['comedy'], | ||||
|             'duration': 1308, | ||||
|             'genres': ['Comédie et humour'], | ||||
|             'upload_date': '20210617', | ||||
|             'timestamp': 1623902400, | ||||
|             'release_date': '20151124', | ||||
|             'release_timestamp': 1448323200, | ||||
|             'timestamp': 1623888000, | ||||
|             'release_date': '20151123', | ||||
|             'release_timestamp': 1448236800, | ||||
|             'age_limit': 14, | ||||
|         }, | ||||
|         'params': {'format': 'bv'}, | ||||
|     }, { | ||||
| @@ -570,9 +608,7 @@ class CBCGemIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     _GEO_COUNTRIES = ['CA'] | ||||
|     _TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' | ||||
|     _NETRC_MACHINE = 'cbcgem' | ||||
|     _claims_token = None | ||||
| 
 | ||||
|     def _new_claims_token(self, email, password): | ||||
| @@ -634,10 +670,12 @@ class CBCGemIE(InfoExtractor): | ||||
|         self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token') | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video_info = self._download_json( | ||||
|             f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}', | ||||
|             video_id, expected_status=426) | ||||
|         video_id, season_number = self._match_valid_url(url).group('id', 'season') | ||||
|         video_info = self._call_show_api(video_id) | ||||
|         item_info = traverse_obj(video_info, ( | ||||
|             'content', ..., 'lineups', ..., 'items', | ||||
|             lambda _, v: v['url'] == video_id, any, {require('item info')})) | ||||
|         media_id = item_info['idMedia'] | ||||
| 
 | ||||
|         email, password = self._get_login_info() | ||||
|         if email and password: | ||||
| @@ -645,7 +683,20 @@ class CBCGemIE(InfoExtractor): | ||||
|             headers = {'x-claims-token': claims_token} | ||||
|         else: | ||||
|             headers = {} | ||||
|         m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) | ||||
| 
 | ||||
|         m3u8_info = self._download_json( | ||||
|             'https://services.radio-canada.ca/media/validation/v2/', | ||||
|             video_id, headers=headers, query={ | ||||
|                 'appCode': 'gem', | ||||
|                 'connectionType': 'hd', | ||||
|                 'deviceType': 'ipad', | ||||
|                 'multibitrate': 'true', | ||||
|                 'output': 'json', | ||||
|                 'tech': 'hls', | ||||
|                 'manifestVersion': '2', | ||||
|                 'manifestType': 'desktop', | ||||
|                 'idMedia': media_id, | ||||
|             }) | ||||
| 
 | ||||
|         if m3u8_info.get('errorCode') == 1: | ||||
|             self.raise_geo_restricted(countries=['CA']) | ||||
| @@ -671,26 +722,20 @@ class CBCGemIE(InfoExtractor): | ||||
|                     fmt['preference'] = -2 | ||||
| 
 | ||||
|         return { | ||||
|             'season_number': int_or_none(season_number), | ||||
|             **traverse_obj(video_info, { | ||||
|                 'series': ('title', {str}), | ||||
|                 'season_number': ('structuredMetadata', 'partofSeason', 'seasonNumber', {int_or_none}), | ||||
|                 'genres': ('structuredMetadata', 'genre', ..., {str}), | ||||
|             }), | ||||
|             **self._extract_item_info(item_info), | ||||
|             'id': video_id, | ||||
|             'episode_id': video_id, | ||||
|             'formats': formats, | ||||
|             **traverse_obj(video_info, { | ||||
|                 'title': ('title', {str}), | ||||
|                 'episode': ('title', {str}), | ||||
|                 'description': ('description', {str}), | ||||
|                 'thumbnail': ('image', {url_or_none}), | ||||
|                 'series': ('series', {str}), | ||||
|                 'season_number': ('season', {int_or_none}), | ||||
|                 'episode_number': ('episode', {int_or_none}), | ||||
|                 'duration': ('duration', {int_or_none}), | ||||
|                 'categories': ('category', {str}, all), | ||||
|                 'release_timestamp': ('airDate', {int_or_none(scale=1000)}), | ||||
|                 'timestamp': ('availableDate', {int_or_none(scale=1000)}), | ||||
|             }), | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class CBCGemPlaylistIE(InfoExtractor): | ||||
| class CBCGemPlaylistIE(CBCGemBaseIE): | ||||
|     IE_NAME = 'gem.cbc.ca:playlist' | ||||
|     _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)' | ||||
|     _TESTS = [{ | ||||
| @@ -700,70 +745,35 @@ class CBCGemPlaylistIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': 'schitts-creek/s06', | ||||
|             'title': 'Season 6', | ||||
|             'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', | ||||
|             'series': 'Schitt\'s Creek', | ||||
|             'season_number': 6, | ||||
|             'season': 'Season 6', | ||||
|             'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://gem.cbc.ca/schitts-creek/s06', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/' | ||||
| 
 | ||||
|     def _entries(self, season_info): | ||||
|         for episode in traverse_obj(season_info, ('items', lambda _, v: v['url'])): | ||||
|             yield self.url_result( | ||||
|                 f'https://gem.cbc.ca/media/{episode["url"]}', CBCGemIE, | ||||
|                 **self._extract_item_info(episode)) | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         match = self._match_valid_url(url) | ||||
|         season_id = match.group('id') | ||||
|         show = match.group('show') | ||||
|         show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426) | ||||
|         season = int(match.group('season')) | ||||
|         season_id, show, season = self._match_valid_url(url).group('id', 'show', 'season') | ||||
|         show_info = self._call_show_api(show, display_id=season_id) | ||||
|         season_info = traverse_obj(show_info, ( | ||||
|             'content', ..., 'lineups', | ||||
|             lambda _, v: v['seasonNumber'] == int(season), any, {require('season info')})) | ||||
| 
 | ||||
|         season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None) | ||||
| 
 | ||||
|         if season_info is None: | ||||
|             raise ExtractorError(f'Couldn\'t find season {season} of {show}') | ||||
| 
 | ||||
|         episodes = [] | ||||
|         for episode in season_info['assets']: | ||||
|             episodes.append({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'ie_key': 'CBCGem', | ||||
|                 'url': 'https://gem.cbc.ca/media/' + episode['id'], | ||||
|                 'id': episode['id'], | ||||
|                 'title': episode.get('title'), | ||||
|                 'description': episode.get('description'), | ||||
|                 'thumbnail': episode.get('image'), | ||||
|                 'series': episode.get('series'), | ||||
|                 'season_number': episode.get('season'), | ||||
|                 'season': season_info['title'], | ||||
|                 'season_id': season_info.get('id'), | ||||
|                 'episode_number': episode.get('episode'), | ||||
|                 'episode': episode.get('title'), | ||||
|                 'episode_id': episode['id'], | ||||
|                 'duration': episode.get('duration'), | ||||
|                 'categories': [episode.get('category')], | ||||
|             }) | ||||
| 
 | ||||
|         thumbnail = None | ||||
|         tn_uri = season_info.get('image') | ||||
|         # the-national was observed to use a "data:image/png;base64" | ||||
|         # URI for their 'image' value. The image was 1x1, and is | ||||
|         # probably just a placeholder, so it is ignored. | ||||
|         if tn_uri is not None and not tn_uri.startswith('data:'): | ||||
|             thumbnail = tn_uri | ||||
| 
 | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'entries': episodes, | ||||
|             'id': season_id, | ||||
|             'title': season_info['title'], | ||||
|             'description': season_info.get('description'), | ||||
|             'thumbnail': thumbnail, | ||||
|             'series': show_info.get('title'), | ||||
|             'season_number': season_info.get('season'), | ||||
|             'season': season_info['title'], | ||||
|         } | ||||
|         return self.playlist_result( | ||||
|             self._entries(season_info), season_id, | ||||
|             **traverse_obj(season_info, { | ||||
|                 'title': ('title', {str}), | ||||
|                 'season': ('title', {str}), | ||||
|                 'season_number': ('seasonNumber', {int_or_none}), | ||||
|             }), series=traverse_obj(show_info, ('title', {str}))) | ||||
| 
 | ||||
| 
 | ||||
| class CBCGemLiveIE(InfoExtractor): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bashonly
					bashonly