mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[cbc] add new extractor for olympics.cbc.ca(closes #15535)
This commit is contained in:
		| @@ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -13,6 +14,7 @@ from ..utils import ( | ||||
|     xpath_element, | ||||
|     xpath_with_ns, | ||||
|     find_xpath_attr, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     parse_age_limit, | ||||
|     int_or_none, | ||||
| @@ -359,3 +361,63 @@ class CBCWatchIE(CBCWatchBaseIE): | ||||
|         video_id = self._match_id(url) | ||||
|         rss = self._call_api('web/browse/' + video_id, video_id) | ||||
|         return self._parse_rss_feed(rss) | ||||
|  | ||||
|  | ||||
| class CBCOlympicsIE(InfoExtractor): | ||||
|     IE_NAME = 'cbc.ca:olympics' | ||||
|     _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_id = self._hidden_inputs(webpage)['videoId'] | ||||
|         video_doc = self._download_xml( | ||||
|             'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id) | ||||
|         title = xpath_text(video_doc, 'title', fatal=True) | ||||
|         is_live = xpath_text(video_doc, 'kind') == 'Live' | ||||
|         if is_live: | ||||
|             title = self._live_title(title) | ||||
|  | ||||
|         formats = [] | ||||
|         for video_source in video_doc.findall('videoSources/videoSource'): | ||||
|             uri = xpath_text(video_source, 'uri') | ||||
|             if not uri: | ||||
|                 continue | ||||
|             tokenize = self._download_json( | ||||
|                 'https://olympics.cbc.ca/api/api-akamai/tokenize', | ||||
|                 video_id, data=json.dumps({ | ||||
|                     'VideoSource': uri, | ||||
|                 }).encode(), headers={ | ||||
|                     'Content-Type': 'application/json', | ||||
|                     'Referer': url, | ||||
|                     # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js | ||||
|                     'Cookie': '_dvp=TK:C0ObxjerU',  # AKAMAI CDN cookie | ||||
|                 }, fatal=False) | ||||
|             if not tokenize: | ||||
|                 continue | ||||
|             content_url = tokenize['ContentUrl'] | ||||
|             video_source_format = video_source.get('format') | ||||
|             if video_source_format == 'IIS': | ||||
|                 formats.extend(self._extract_ism_formats( | ||||
|                     content_url, video_id, ism_id=video_source_format, fatal=False)) | ||||
|             else: | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     content_url, video_id, 'mp4', | ||||
|                     'm3u8' if is_live else 'm3u8_native', | ||||
|                     m3u8_id=video_source_format, fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': xpath_text(video_doc, 'description'), | ||||
|             'thumbnail': xpath_text(video_doc, 'thumbnailUrl'), | ||||
|             'duration': parse_duration(xpath_text(video_doc, 'duration')), | ||||
|             'formats': formats, | ||||
|             'is_live': is_live, | ||||
|         } | ||||
|   | ||||
| @@ -162,6 +162,7 @@ from .cbc import ( | ||||
|     CBCPlayerIE, | ||||
|     CBCWatchVideoIE, | ||||
|     CBCWatchIE, | ||||
|     CBCOlympicsIE, | ||||
| ) | ||||
| from .cbs import CBSIE | ||||
| from .cbslocal import CBSLocalIE | ||||
|   | ||||
| @@ -82,7 +82,7 @@ def register_socks_protocols(): | ||||
| compiled_regex_type = type(re.compile('')) | ||||
|  | ||||
| std_headers = { | ||||
|     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', | ||||
|     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)', | ||||
|     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', | ||||
|     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | ||||
|     'Accept-Encoding': 'gzip, deflate', | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine