mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-24 09:25:46 +00:00
[ie/gem.cbc.ca] Support standalone, series & Olympics URLs (#15878)
Closes #8382, Closes #8790, Closes #15850 Authored by: bashonly, makew0rld, 0xvd Co-authored-by: makeworld <makeworld@protonmail.com> Co-authored-by: 0xvd <0xvd12@gmail.com>
This commit is contained in:
@@ -311,8 +311,10 @@ from .canalsurmas import CanalsurmasIE
|
|||||||
from .caracoltv import CaracolTvPlayIE
|
from .caracoltv import CaracolTvPlayIE
|
||||||
from .cbc import (
|
from .cbc import (
|
||||||
CBCIE,
|
CBCIE,
|
||||||
|
CBCGemContentIE,
|
||||||
CBCGemIE,
|
CBCGemIE,
|
||||||
CBCGemLiveIE,
|
CBCGemLiveIE,
|
||||||
|
CBCGemOlympicsIE,
|
||||||
CBCGemPlaylistIE,
|
CBCGemPlaylistIE,
|
||||||
CBCListenIE,
|
CBCListenIE,
|
||||||
CBCPlayerIE,
|
CBCPlayerIE,
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
jwt_decode_hs256,
|
jwt_decode_hs256,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
@@ -25,6 +26,7 @@ from ..utils import (
|
|||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import require, traverse_obj, trim_str
|
from ..utils.traversal import require, traverse_obj, trim_str
|
||||||
|
|
||||||
@@ -540,6 +542,32 @@ class CBCGemBaseIE(InfoExtractor):
|
|||||||
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}',
|
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}',
|
||||||
display_id or item_id, query={'device': 'web'})
|
display_id or item_id, query={'device': 'web'})
|
||||||
|
|
||||||
|
def _call_media_api(self, media_id, app_code='gem', display_id=None, headers=None):
|
||||||
|
media_data = self._download_json(
|
||||||
|
'https://services.radio-canada.ca/media/validation/v2/',
|
||||||
|
display_id or media_id, headers=headers, query={
|
||||||
|
'appCode': app_code,
|
||||||
|
'connectionType': 'hd',
|
||||||
|
'deviceType': 'ipad',
|
||||||
|
'multibitrate': 'true',
|
||||||
|
'output': 'json',
|
||||||
|
'tech': 'hls',
|
||||||
|
'manifestVersion': '2',
|
||||||
|
'manifestType': 'desktop',
|
||||||
|
'idMedia': media_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
error_code = traverse_obj(media_data, ('errorCode', {int}))
|
||||||
|
if error_code == 1:
|
||||||
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
if error_code == 35:
|
||||||
|
self.raise_login_required(method='password')
|
||||||
|
if error_code != 0:
|
||||||
|
error_message = join_nonempty(error_code, media_data.get('message'), delim=' - ')
|
||||||
|
raise ExtractorError(f'{self.IE_NAME} said: {error_message}')
|
||||||
|
|
||||||
|
return media_data
|
||||||
|
|
||||||
def _extract_item_info(self, item_info):
|
def _extract_item_info(self, item_info):
|
||||||
episode_number = None
|
episode_number = None
|
||||||
title = traverse_obj(item_info, ('title', {str}))
|
title = traverse_obj(item_info, ('title', {str}))
|
||||||
@@ -567,7 +595,7 @@ class CBCGemBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class CBCGemIE(CBCGemBaseIE):
|
class CBCGemIE(CBCGemBaseIE):
|
||||||
IE_NAME = 'gem.cbc.ca'
|
IE_NAME = 'gem.cbc.ca'
|
||||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)'
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]{2,4})/?(?:[?#]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# This is a normal, public, TV show video
|
# This is a normal, public, TV show video
|
||||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||||
@@ -709,29 +737,10 @@ class CBCGemIE(CBCGemBaseIE):
|
|||||||
if claims_token := self._fetch_claims_token():
|
if claims_token := self._fetch_claims_token():
|
||||||
headers['x-claims-token'] = claims_token
|
headers['x-claims-token'] = claims_token
|
||||||
|
|
||||||
m3u8_info = self._download_json(
|
m3u8_url = self._call_media_api(
|
||||||
'https://services.radio-canada.ca/media/validation/v2/',
|
item_info['idMedia'], display_id=video_id, headers=headers)['url']
|
||||||
video_id, headers=headers, query={
|
|
||||||
'appCode': 'gem',
|
|
||||||
'connectionType': 'hd',
|
|
||||||
'deviceType': 'ipad',
|
|
||||||
'multibitrate': 'true',
|
|
||||||
'output': 'json',
|
|
||||||
'tech': 'hls',
|
|
||||||
'manifestVersion': '2',
|
|
||||||
'manifestType': 'desktop',
|
|
||||||
'idMedia': item_info['idMedia'],
|
|
||||||
})
|
|
||||||
|
|
||||||
if m3u8_info.get('errorCode') == 1:
|
|
||||||
self.raise_geo_restricted(countries=['CA'])
|
|
||||||
elif m3u8_info.get('errorCode') == 35:
|
|
||||||
self.raise_login_required(method='password')
|
|
||||||
elif m3u8_info.get('errorCode') != 0:
|
|
||||||
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
m3u8_url, video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
|
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
@@ -801,7 +810,128 @@ class CBCGemPlaylistIE(CBCGemBaseIE):
|
|||||||
}), series=traverse_obj(show_info, ('title', {str})))
|
}), series=traverse_obj(show_info, ('title', {str})))
|
||||||
|
|
||||||
|
|
||||||
class CBCGemLiveIE(InfoExtractor):
|
class CBCGemContentIE(CBCGemBaseIE):
|
||||||
|
IE_NAME = 'gem.cbc.ca:content'
|
||||||
|
IE_DESC = False # Do not list
|
||||||
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?P<id>[0-9a-z-]+)/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Series URL; content_type == 'Season'
|
||||||
|
'url': 'https://gem.cbc.ca/the-tunnel',
|
||||||
|
'playlist_count': 3,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-tunnel',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Miniseries URL; content_type == 'Parts'
|
||||||
|
'url': 'https://gem.cbc.ca/summit-72',
|
||||||
|
'playlist_count': 1,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'summit-72',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Olympics URL; content_type == 'Standalone'
|
||||||
|
'url': 'https://gem.cbc.ca/ski-jumping-nh-individual-womens-final-30086',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ski-jumping-nh-individual-womens-final-30086',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ski Jumping: NH Individual (Women\'s) - Final',
|
||||||
|
'description': 'md5:411c07c8a9a4a36344530b0c726bf8ab',
|
||||||
|
'duration': 12793,
|
||||||
|
'thumbnail': r're:https://[^.]+\.cbc\.ca/.+\.jpg',
|
||||||
|
'release_timestamp': 1770482100,
|
||||||
|
'release_date': '20260207',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Movie URL; content_type == 'Standalone'; requires authentication
|
||||||
|
'url': 'https://gem.cbc.ca/copa-71',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data']
|
||||||
|
content_type = data['contentType']
|
||||||
|
self.write_debug(f'Routing for content type "{content_type}"')
|
||||||
|
|
||||||
|
if content_type == 'Standalone':
|
||||||
|
new_url = traverse_obj(data, (
|
||||||
|
'header', 'cta', 'media', 'url', {urljoin('https://gem.cbc.ca/')}))
|
||||||
|
if CBCGemOlympicsIE.suitable(new_url):
|
||||||
|
return self.url_result(new_url, CBCGemOlympicsIE)
|
||||||
|
|
||||||
|
# Manually construct non-Olympics standalone URLs to avoid returning trailer URLs
|
||||||
|
return self.url_result(f'https://gem.cbc.ca/{display_id}/s01e01', CBCGemIE)
|
||||||
|
|
||||||
|
# Handle series URLs (content_type == 'Season') and miniseries URLs (content_type == 'Parts')
|
||||||
|
def entries():
|
||||||
|
for playlist_url in traverse_obj(data, (
|
||||||
|
'content', ..., 'lineups', ..., 'url', {urljoin('https://gem.cbc.ca/')},
|
||||||
|
{lambda x: x if CBCGemPlaylistIE.suitable(x) else None},
|
||||||
|
)):
|
||||||
|
yield self.url_result(playlist_url, CBCGemPlaylistIE)
|
||||||
|
|
||||||
|
return self.playlist_result(entries(), display_id)
|
||||||
|
|
||||||
|
|
||||||
|
class CBCGemOlympicsIE(CBCGemBaseIE):
|
||||||
|
IE_NAME = 'gem.cbc.ca:olympics'
|
||||||
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?P<id>(?:[0-9a-z]+-)+[0-9]{5,})/s01e(?P<media_id>[0-9]{5,})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://gem.cbc.ca/ski-jumping-nh-individual-womens-final-30086/s01e30086',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ski-jumping-nh-individual-womens-final-30086',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ski Jumping: NH Individual (Women\'s) - Final',
|
||||||
|
'description': 'md5:411c07c8a9a4a36344530b0c726bf8ab',
|
||||||
|
'duration': 12793,
|
||||||
|
'thumbnail': r're:https://[^.]+\.cbc\.ca/.+\.jpg',
|
||||||
|
'release_timestamp': 1770482100,
|
||||||
|
'release_date': '20260207',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, media_id = self._match_valid_url(url).group('id', 'media_id')
|
||||||
|
|
||||||
|
video_info = self._call_show_api(video_id)
|
||||||
|
item_info = traverse_obj(video_info, (
|
||||||
|
'content', ..., 'lineups', ..., 'items',
|
||||||
|
lambda _, v: v['formattedIdMedia'] == media_id, any, {require('item info')}))
|
||||||
|
|
||||||
|
live_status = {
|
||||||
|
'LiveEvent': 'is_live',
|
||||||
|
'Replay': 'was_live',
|
||||||
|
}.get(item_info.get('type'))
|
||||||
|
|
||||||
|
release_timestamp = traverse_obj(item_info, (
|
||||||
|
'metadata', (('live', 'startDate'), ('replay', 'airDate')), {parse_iso8601}, any))
|
||||||
|
|
||||||
|
if live_status == 'is_live' and release_timestamp and release_timestamp > time.time():
|
||||||
|
formats = []
|
||||||
|
live_status = 'is_upcoming'
|
||||||
|
self.raise_no_formats('This livestream has not yet started', expected=True)
|
||||||
|
else:
|
||||||
|
m3u8_url = self._call_media_api(media_id, 'medianetlive', video_id)['url']
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=live_status == 'is_live')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'live_status': live_status,
|
||||||
|
'release_timestamp': release_timestamp,
|
||||||
|
**traverse_obj(item_info, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('images', 'card', 'url', {url_or_none}),
|
||||||
|
'duration': ('metadata', 'replay', 'duration', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBCGemLiveIE(CBCGemBaseIE):
|
||||||
IE_NAME = 'gem.cbc.ca:live'
|
IE_NAME = 'gem.cbc.ca:live'
|
||||||
_VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
|
_VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@@ -871,7 +1001,6 @@ class CBCGemLiveIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
_GEO_COUNTRIES = ['CA']
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -900,19 +1029,8 @@ class CBCGemLiveIE(InfoExtractor):
|
|||||||
live_status = 'is_upcoming'
|
live_status = 'is_upcoming'
|
||||||
self.raise_no_formats('This livestream has not yet started', expected=True)
|
self.raise_no_formats('This livestream has not yet started', expected=True)
|
||||||
else:
|
else:
|
||||||
stream_data = self._download_json(
|
m3u8_url = self._call_media_api(video_stream_id, 'medianetlive', video_id)['url']
|
||||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=live_status == 'is_live')
|
||||||
'appCode': 'medianetlive',
|
|
||||||
'connectionType': 'hd',
|
|
||||||
'deviceType': 'ipad',
|
|
||||||
'idMedia': video_stream_id,
|
|
||||||
'multibitrate': 'true',
|
|
||||||
'output': 'json',
|
|
||||||
'tech': 'hls',
|
|
||||||
'manifestType': 'desktop',
|
|
||||||
})
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
stream_data['url'], video_id, 'mp4', live=live_status == 'is_live')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|||||||
Reference in New Issue
Block a user