mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 01:18:30 +00:00
279 lines
17 KiB
Python
279 lines
17 KiB
Python
import json
|
|
import re
|
|
import urllib.parse
|
|
|
|
from yt_dlp.extractor.common import ExtractorError
|
|
from yt_dlp.utils import int_or_none, traverse_obj
|
|
|
|
from .common import InfoExtractor
|
|
|
|
|
|
class RokuChannelIE(InfoExtractor):
|
|
# The regex captures either /watch/<id> or /details/<series_id>/<slug>[/season-<season>]
|
|
_VALID_URL = r'https?://(?:www\.)?therokuchannel\.roku\.com/(?:(?:watch/(?P<id>[0-9a-f]{32}))|(?:details/(?P<series_id>[0-9a-f]{32})/(?P<slug>[^/]+)(?:/season-(?P<season>\d+))?))'
|
|
_TESTS = [{
|
|
# Single episode test (using a details URL with an episode slug)
|
|
'url': 'https://therokuchannel.roku.com/details/a9474f67937c5986aa1ac0747f5bb615/beastmaster-s1-e1-the-legend-continues',
|
|
'md5': 'b8a683e430a79e20295cff9848bea865',
|
|
'info_dict': {
|
|
'id': 'a9474f67937c5986aa1ac0747f5bb615',
|
|
'ext': 'mp4',
|
|
'title': 'The Legend Continues',
|
|
'description': 'Dar begins his quest to rescue his love, Kyra, after the Terron warriors abduct her.',
|
|
'episode_number': 1,
|
|
'season_number': 1,
|
|
'series': 'BeastMaster',
|
|
'release_date': '19991004', # from releaseDate "1999-10-04T00:00:00Z"
|
|
'duration': 3600.0,
|
|
},
|
|
'skip': 'Requires live website and valid cookies',
|
|
}, {
|
|
# Season playlist test.
|
|
'url': 'https://therokuchannel.roku.com/details/48af1a617b1654a8a73cddefddedc7b8/beastmaster/season-2',
|
|
'playlist_count': 22,
|
|
'info_dict': {
|
|
'id': '48af1a617b1654a8a73cddefddedc7b8',
|
|
'title': 'BeastMaster - Season 2',
|
|
},
|
|
'skip': 'Requires live website and valid cookies',
|
|
}, {
|
|
# Full series playlist test.
|
|
'url': 'https://therokuchannel.roku.com/details/48af1a617b1654a8a73cddefddedc7b8/beastmaster',
|
|
'playlist_count': 64,
|
|
'info_dict': {
|
|
'id': '48af1a617b1654a8a73cddefddedc7b8',
|
|
'title': 'BeastMaster',
|
|
},
|
|
'skip': 'Requires live website and valid cookies',
|
|
}, {
|
|
# Only-matching test for a DRM-protected movie.
|
|
'url': 'https://therokuchannel.roku.com/details/b1f983c03f27531388474c46372b956c/friday-after-next',
|
|
'only_matching': True,
|
|
}]
|
|
|
|
def _real_extract(self, url):
|
|
mobj = re.match(self._VALID_URL, url)
|
|
# If the URL contains a "-s#-e#" pattern anywhere, treat it as a single episode extraction.
|
|
if re.search(r'-s\d+e\d+', url, re.IGNORECASE):
|
|
return self._real_extract_single(url, mobj)
|
|
# For /details/ URLs, decide based on presence of season info:
|
|
if mobj.group('series_id'):
|
|
# Query the API details using the series_id.
|
|
details = self._get_details(mobj.group('series_id'))
|
|
# If no "seasons" key is present, assume it's a single episode.
|
|
if 'seasons' not in details:
|
|
return self._real_extract_single(url, mobj)
|
|
# Otherwise, if a season number is provided, extract that season's episodes.
|
|
if mobj.group('season'):
|
|
return self._real_extract_playlist(url, mobj)
|
|
# Otherwise treat the URL as representing the full series.
|
|
return self._real_extract_series(url, mobj)
|
|
# Otherwise, if the URL is of /watch/ type, extract single video.
|
|
return self._real_extract_single(url, mobj)
|
|
|
|
def _get_details(self, video_id):
|
|
# Build the full API URL with detailed query parameters.
|
|
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{video_id}'
|
|
query = (
|
|
'?expand=credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes'
|
|
'&include=type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,categoryObjects,runTimeSeconds,castAndCrew,'
|
|
'savable,stationDma,kidsDirected,releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,credits.meta,'
|
|
'credits.order,credits.name,credits.role,credits.personId,credits.images,parentalRatings,reverseChronological,contentRatingClass,'
|
|
'languageDialogBody,detailScreenOptions,viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
|
|
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,seasons.releaseYear,seasons.castAndCrew,'
|
|
'seasons.credits.birthDate,seasons.credits.meta,seasons.credits.order,seasons.credits.name,seasons.credits.role,'
|
|
'seasons.credits.personId,seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
|
|
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
|
|
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
|
|
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
|
|
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
|
|
'&filter=categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all'
|
|
'&featureInclude=bookmark,watchlist,linearSchedule'
|
|
)
|
|
full_url = base_url + query
|
|
try:
|
|
details = self._download_json(full_url, video_id,
|
|
note='Downloading detailed content info',
|
|
fatal=False)
|
|
return details or {}
|
|
except ExtractorError:
|
|
return {}
|
|
|
|
def _real_extract_single(self, url, mobj):
|
|
# Single episode extraction using the API details.
|
|
video_id = mobj.group('id') or mobj.group('series_id')
|
|
details = self._get_details(video_id)
|
|
title = details.get('title', '').strip()
|
|
description = details.get('description', '').strip()
|
|
webpage = self._download_webpage(url, video_id)
|
|
mpd_url = self._search_regex(
|
|
r'(https?://vod-playlist\.sr\.roku\.com/1\.mpd\?[^\'" >]+)',
|
|
webpage, 'mpd URL', fatal=False)
|
|
if not mpd_url:
|
|
# Fallback: use CSRF token and playback API.
|
|
self._download_webpage('https://therokuchannel.roku.com/', video_id,
|
|
note='Initializing session', fatal=False)
|
|
csrf_info = self._download_json('https://therokuchannel.roku.com/api/v1/csrf',
|
|
video_id, note='Downloading CSRF token',
|
|
fatal=False)
|
|
csrf_token = csrf_info.get('csrf') if csrf_info else None
|
|
headers = {
|
|
'authority': 'therokuchannel.roku.com',
|
|
'accept': '*/*',
|
|
'accept-language': 'en-US,en;q=0.9',
|
|
'user-agent': ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
|
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
|
'Chrome/102.0.5005.63 Safari/537.36'),
|
|
'referer': 'https://therokuchannel.roku.com/',
|
|
'Content-Type': 'application/json',
|
|
}
|
|
if csrf_token:
|
|
headers['csrf-token'] = csrf_token
|
|
playback_payload = {
|
|
'rokuId': video_id,
|
|
'mediaFormat': 'mpeg-dash',
|
|
'drmType': 'widevine',
|
|
'quality': 'fhd',
|
|
'providerId': 'rokuavod',
|
|
}
|
|
playback_json = self._download_json(
|
|
'https://therokuchannel.roku.com/api/v3/playback',
|
|
video_id,
|
|
data=json.dumps(playback_payload).encode('utf-8'),
|
|
headers=headers,
|
|
note='Downloading playback JSON',
|
|
fatal=True)
|
|
videos = traverse_obj(playback_json, ('playbackMedia', 'videos'), expected_type=list) or []
|
|
dash_url = None
|
|
for video in videos:
|
|
if video.get('streamFormat') == 'dash':
|
|
dash_url = video.get('url')
|
|
break
|
|
if not dash_url:
|
|
raise ExtractorError('Unable to extract dash URL from API', expected=True)
|
|
parsed = urllib.parse.urlparse(dash_url)
|
|
query_params = urllib.parse.parse_qs(parsed.query)
|
|
if 'origin' in query_params:
|
|
mpd_url = urllib.parse.unquote(query_params['origin'][0]).split('?')[0]
|
|
else:
|
|
mpd_url = dash_url
|
|
formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
|
|
return {
|
|
'id': video_id,
|
|
'title': title,
|
|
'description': description,
|
|
'formats': formats,
|
|
}
|
|
|
|
def _real_extract_playlist(self, url, mobj):
|
|
# Extract episodes for a specific season.
|
|
series_id = mobj.group('series_id')
|
|
season_num = int_or_none(mobj.group('season')) or 1
|
|
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{series_id}'
|
|
params = {
|
|
'expand': 'credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes',
|
|
'include': (
|
|
'type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,'
|
|
'categoryObjects,runTimeSeconds,castAndCrew,savable,stationDma,kidsDirected,'
|
|
'releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,'
|
|
'credits.meta,credits.order,credits.name,credits.role,seasons.credits.personId,credits.images,'
|
|
'parentalRatings,reverseChronological,contentRatingClass,languageDialogBody,detailScreenOptions,'
|
|
'viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
|
|
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,'
|
|
'seasons.releaseYear,seasons.castAndCrew,seasons.credits.birthDate,seasons.credits.meta,'
|
|
'seasons.credits.order,seasons.credits.name,seasons.credits.role,seasons.credits.personId,'
|
|
'seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
|
|
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
|
|
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
|
|
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
|
|
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
|
|
),
|
|
'filter': 'categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all',
|
|
'featureInclude': 'bookmark,watchlist,linearSchedule',
|
|
}
|
|
series_data = self._download_json(base_url, series_id,
|
|
note='Downloading series data',
|
|
fatal=True, query=params)
|
|
series_title = series_data.get('title') or mobj.group('slug')
|
|
entries = []
|
|
if series_data.get('seasons'):
|
|
for season in series_data.get('seasons', []):
|
|
if int_or_none(season.get('seasonNumber')) == season_num:
|
|
for episode in season.get('episodes') or []:
|
|
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
|
|
if not episode_id:
|
|
continue
|
|
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
|
|
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
|
|
entry.update({
|
|
'title': f'{series_title} - S{season.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
|
|
'season_number': int_or_none(season.get('seasonNumber')),
|
|
'episode_number': int_or_none(episode.get('episodeNumber')),
|
|
})
|
|
entries.append(entry)
|
|
break
|
|
if not entries:
|
|
raise ExtractorError(f'No episodes found for season {season_num}', expected=True)
|
|
return self.playlist_result(entries, series_id, f'{series_title} - Season {season_num}')
|
|
|
|
def _real_extract_series(self, url, mobj):
|
|
# Extract all episodes across all seasons.
|
|
series_id = mobj.group('series_id')
|
|
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{series_id}'
|
|
params = {
|
|
'expand': 'credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes',
|
|
'include': (
|
|
'type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,'
|
|
'categoryObjects,runTimeSeconds,castAndCrew,savable,stationDma,kidsDirected,'
|
|
'releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,'
|
|
'credits.meta,credits.order,credits.name,credits.role,seasons.credits.personId,credits.images,'
|
|
'parentalRatings,reverseChronological,contentRatingClass,languageDialogBody,detailScreenOptions,'
|
|
'viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
|
|
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,'
|
|
'seasons.releaseYear,seasons.castAndCrew,seasons.credits.birthDate,seasons.credits.meta,'
|
|
'seasons.credits.order,seasons.credits.name,seasons.credits.role,seasons.credits.personId,'
|
|
'seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
|
|
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
|
|
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
|
|
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
|
|
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
|
|
),
|
|
'filter': 'categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all',
|
|
'featureInclude': 'bookmark,watchlist,linearSchedule',
|
|
}
|
|
series_data = self._download_json(base_url, series_id,
|
|
note='Downloading series data',
|
|
fatal=True, query=params)
|
|
series_title = series_data.get('title') or mobj.group('slug')
|
|
entries = []
|
|
if series_data.get('seasons'):
|
|
for season in series_data.get('seasons', []):
|
|
for episode in season.get('episodes') or []:
|
|
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
|
|
if not episode_id:
|
|
continue
|
|
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
|
|
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
|
|
entry.update({
|
|
'title': f'{series_title} - S{season.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
|
|
'season_number': int_or_none(season.get('seasonNumber')),
|
|
'episode_number': int_or_none(episode.get('episodeNumber')),
|
|
})
|
|
entries.append(entry)
|
|
else:
|
|
for episode in series_data.get('episodes', []):
|
|
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
|
|
if not episode_id:
|
|
continue
|
|
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
|
|
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
|
|
entry.update({
|
|
'title': f'{series_title} - S{episode.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
|
|
'season_number': int_or_none(episode.get('seasonNumber')),
|
|
'episode_number': int_or_none(episode.get('episodeNumber')),
|
|
})
|
|
entries.append(entry)
|
|
if not entries:
|
|
raise ExtractorError('No episodes found for series', expected=True)
|
|
return self.playlist_result(entries, series_id, series_title)
|