mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[cbc] add new extractor for olympics.cbc.ca(closes #15535)
This commit is contained in:
		| @@ -1,6 +1,7 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import json | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -13,6 +14,7 @@ from ..utils import ( | |||||||
|     xpath_element, |     xpath_element, | ||||||
|     xpath_with_ns, |     xpath_with_ns, | ||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|  |     parse_duration, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     parse_age_limit, |     parse_age_limit, | ||||||
|     int_or_none, |     int_or_none, | ||||||
| @@ -359,3 +361,63 @@ class CBCWatchIE(CBCWatchBaseIE): | |||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         rss = self._call_api('web/browse/' + video_id, video_id) |         rss = self._call_api('web/browse/' + video_id, video_id) | ||||||
|         return self._parse_rss_feed(rss) |         return self._parse_rss_feed(rss) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class CBCOlympicsIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'cbc.ca:olympics' | ||||||
|  |     _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         display_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, display_id) | ||||||
|  |         video_id = self._hidden_inputs(webpage)['videoId'] | ||||||
|  |         video_doc = self._download_xml( | ||||||
|  |             'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id) | ||||||
|  |         title = xpath_text(video_doc, 'title', fatal=True) | ||||||
|  |         is_live = xpath_text(video_doc, 'kind') == 'Live' | ||||||
|  |         if is_live: | ||||||
|  |             title = self._live_title(title) | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         for video_source in video_doc.findall('videoSources/videoSource'): | ||||||
|  |             uri = xpath_text(video_source, 'uri') | ||||||
|  |             if not uri: | ||||||
|  |                 continue | ||||||
|  |             tokenize = self._download_json( | ||||||
|  |                 'https://olympics.cbc.ca/api/api-akamai/tokenize', | ||||||
|  |                 video_id, data=json.dumps({ | ||||||
|  |                     'VideoSource': uri, | ||||||
|  |                 }).encode(), headers={ | ||||||
|  |                     'Content-Type': 'application/json', | ||||||
|  |                     'Referer': url, | ||||||
|  |                     # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js | ||||||
|  |                     'Cookie': '_dvp=TK:C0ObxjerU',  # AKAMAI CDN cookie | ||||||
|  |                 }, fatal=False) | ||||||
|  |             if not tokenize: | ||||||
|  |                 continue | ||||||
|  |             content_url = tokenize['ContentUrl'] | ||||||
|  |             video_source_format = video_source.get('format') | ||||||
|  |             if video_source_format == 'IIS': | ||||||
|  |                 formats.extend(self._extract_ism_formats( | ||||||
|  |                     content_url, video_id, ism_id=video_source_format, fatal=False)) | ||||||
|  |             else: | ||||||
|  |                 formats.extend(self._extract_m3u8_formats( | ||||||
|  |                     content_url, video_id, 'mp4', | ||||||
|  |                     'm3u8' if is_live else 'm3u8_native', | ||||||
|  |                     m3u8_id=video_source_format, fatal=False)) | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'display_id': display_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': xpath_text(video_doc, 'description'), | ||||||
|  |             'thumbnail': xpath_text(video_doc, 'thumbnailUrl'), | ||||||
|  |             'duration': parse_duration(xpath_text(video_doc, 'duration')), | ||||||
|  |             'formats': formats, | ||||||
|  |             'is_live': is_live, | ||||||
|  |         } | ||||||
|   | |||||||
| @@ -162,6 +162,7 @@ from .cbc import ( | |||||||
|     CBCPlayerIE, |     CBCPlayerIE, | ||||||
|     CBCWatchVideoIE, |     CBCWatchVideoIE, | ||||||
|     CBCWatchIE, |     CBCWatchIE, | ||||||
|  |     CBCOlympicsIE, | ||||||
| ) | ) | ||||||
| from .cbs import CBSIE | from .cbs import CBSIE | ||||||
| from .cbslocal import CBSLocalIE | from .cbslocal import CBSLocalIE | ||||||
|   | |||||||
| @@ -82,7 +82,7 @@ def register_socks_protocols(): | |||||||
| compiled_regex_type = type(re.compile('')) | compiled_regex_type = type(re.compile('')) | ||||||
|  |  | ||||||
| std_headers = { | std_headers = { | ||||||
|     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', |     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)', | ||||||
|     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', |     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', | ||||||
|     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | ||||||
|     'Accept-Encoding': 'gzip, deflate', |     'Accept-Encoding': 'gzip, deflate', | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine