mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[extractor/cbc] Fix live extractor, playlist _VALID_URL (#6625)
				
					
				
			Authored by: makew0rld
This commit is contained in:
		| @@ -8,14 +8,16 @@ from ..compat import ( | ||||
|     compat_str, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     js_to_json, | ||||
|     orderedSet, | ||||
|     parse_iso8601, | ||||
|     smuggle_url, | ||||
|     strip_or_none, | ||||
|     traverse_obj, | ||||
|     try_get, | ||||
|     ExtractorError, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| @@ -404,7 +406,7 @@ class CBCGemIE(InfoExtractor): | ||||
| 
 | ||||
| class CBCGemPlaylistIE(InfoExtractor): | ||||
|     IE_NAME = 'gem.cbc.ca:playlist' | ||||
|     _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)' | ||||
|     _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)' | ||||
|     _TESTS = [{ | ||||
|         # TV show playlist, all public videos | ||||
|         'url': 'https://gem.cbc.ca/media/schitts-creek/s06', | ||||
| @@ -414,6 +416,9 @@ class CBCGemPlaylistIE(InfoExtractor): | ||||
|             'title': 'Season 6', | ||||
|             'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://gem.cbc.ca/schitts-creek/s06', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/' | ||||
| 
 | ||||
| @@ -473,49 +478,90 @@ class CBCGemPlaylistIE(InfoExtractor): | ||||
| 
 | ||||
| class CBCGemLiveIE(InfoExtractor): | ||||
|     IE_NAME = 'gem.cbc.ca:live' | ||||
|     _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://gem.cbc.ca/live/920604739687', | ||||
|         'info_dict': { | ||||
|             'title': 'Ottawa', | ||||
|             'description': 'The live TV channel and local programming from Ottawa', | ||||
|             'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', | ||||
|             'is_live': True, | ||||
|             'id': 'AyqZwxRqh8EH', | ||||
|             'ext': 'mp4', | ||||
|             'timestamp': 1492106160, | ||||
|             'upload_date': '20170413', | ||||
|             'uploader': 'CBCC-NEW', | ||||
|     _VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'https://gem.cbc.ca/live/920604739687', | ||||
|             'info_dict': { | ||||
|                 'title': 'Ottawa', | ||||
|                 'description': 'The live TV channel and local programming from Ottawa', | ||||
|                 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', | ||||
|                 'is_live': True, | ||||
|                 'id': 'AyqZwxRqh8EH', | ||||
|                 'ext': 'mp4', | ||||
|                 'timestamp': 1492106160, | ||||
|                 'upload_date': '20170413', | ||||
|                 'uploader': 'CBCC-NEW', | ||||
|             }, | ||||
|             'skip': 'Live might have ended', | ||||
|         }, | ||||
|         'skip': 'Live might have ended', | ||||
|     } | ||||
| 
 | ||||
|     # It's unclear where the chars at the end come from, but they appear to be | ||||
|     # constant. Might need updating in the future. | ||||
|     # There are two URLs, some livestreams are in one, and some | ||||
|     # in the other. The JSON schema is the same for both. | ||||
|     _API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT'] | ||||
|         { | ||||
|             'url': 'https://gem.cbc.ca/live/44', | ||||
|             'info_dict': { | ||||
|                 'id': '44', | ||||
|                 'ext': 'mp4', | ||||
|                 'is_live': True, | ||||
|                 'title': r're:^Ottawa [0-9\-: ]+', | ||||
|                 'description': 'The live TV channel and local programming from Ottawa', | ||||
|                 'live_status': 'is_live', | ||||
|                 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*' | ||||
|             }, | ||||
|             'params': {'skip_download': True}, | ||||
|             'skip': 'Live might have ended', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://gem.cbc.ca/live-event/10835', | ||||
|             'info_dict': { | ||||
|                 'id': '10835', | ||||
|                 'ext': 'mp4', | ||||
|                 'is_live': True, | ||||
|                 'title': r're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+', | ||||
|                 'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.', | ||||
|                 'live_status': 'is_live', | ||||
|                 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*', | ||||
|                 'timestamp': 1679706000, | ||||
|                 'upload_date': '20230325', | ||||
|             }, | ||||
|             'params': {'skip_download': True}, | ||||
|             'skip': 'Live might have ended', | ||||
|         } | ||||
|     ] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data'] | ||||
| 
 | ||||
|         for api_url in self._API_URLS: | ||||
|             video_info = next(( | ||||
|                 stream for stream in self._download_json(api_url, video_id)['entries'] | ||||
|                 if stream.get('guid') == video_id), None) | ||||
|             if video_info: | ||||
|                 break | ||||
|         else: | ||||
|         # Two types of metadata JSON | ||||
|         if not video_info.get('formattedIdMedia'): | ||||
|             video_info = traverse_obj( | ||||
|                 video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}), | ||||
|                 get_all=False, default={}) | ||||
| 
 | ||||
|         video_stream_id = video_info.get('formattedIdMedia') | ||||
|         if not video_stream_id: | ||||
|             raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) | ||||
| 
 | ||||
|         stream_data = self._download_json( | ||||
|             'https://services.radio-canada.ca/media/validation/v2/', video_id, query={ | ||||
|                 'appCode': 'mpx', | ||||
|                 'connectionType': 'hd', | ||||
|                 'deviceType': 'ipad', | ||||
|                 'idMedia': video_stream_id, | ||||
|                 'multibitrate': 'true', | ||||
|                 'output': 'json', | ||||
|                 'tech': 'hls', | ||||
|                 'manifestType': 'desktop', | ||||
|             }) | ||||
| 
 | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'ThePlatform', | ||||
|             'url': video_info['content'][0]['url'], | ||||
|             'id': video_id, | ||||
|             'title': video_info.get('title'), | ||||
|             'description': video_info.get('description'), | ||||
|             'tags': try_get(video_info, lambda x: x['keywords'].split(', ')), | ||||
|             'thumbnail': video_info.get('cbc$staticImage'), | ||||
|             'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True), | ||||
|             'is_live': True, | ||||
|             **traverse_obj(video_info, { | ||||
|                 'title': 'title', | ||||
|                 'description': 'description', | ||||
|                 'thumbnail': ('images', 'card', 'url'), | ||||
|                 'timestamp': ('airDate', {parse_iso8601}), | ||||
|             }) | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 makeworld
					makeworld