1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 17:08:32 +00:00
This commit is contained in:
Sipherdrakon 2025-06-17 21:21:43 +02:00 committed by GitHub
commit cc460344bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 279 additions and 0 deletions

View File

@ -1747,6 +1747,7 @@
RokfinSearchIE,
RokfinStackIE,
)
from .roku import RokuChannelIE
from .roosterteeth import (
RoosterTeethIE,
RoosterTeethSeriesIE,

278
yt_dlp/extractor/roku.py Normal file
View File

@ -0,0 +1,278 @@
import json
import re
import urllib.parse
from yt_dlp.extractor.common import ExtractorError
from yt_dlp.utils import int_or_none, traverse_obj
from .common import InfoExtractor
class RokuChannelIE(InfoExtractor):
# The regex captures either /watch/<id> or /details/<series_id>/<slug>[/season-<season>]
_VALID_URL = r'https?://(?:www\.)?therokuchannel\.roku\.com/(?:(?:watch/(?P<id>[0-9a-f]{32}))|(?:details/(?P<series_id>[0-9a-f]{32})/(?P<slug>[^/]+)(?:/season-(?P<season>\d+))?))'
_TESTS = [{
# Single episode test (using a details URL with an episode slug)
'url': 'https://therokuchannel.roku.com/details/a9474f67937c5986aa1ac0747f5bb615/beastmaster-s1-e1-the-legend-continues',
'md5': 'b8a683e430a79e20295cff9848bea865',
'info_dict': {
'id': 'a9474f67937c5986aa1ac0747f5bb615',
'ext': 'mp4',
'title': 'The Legend Continues',
'description': 'Dar begins his quest to rescue his love, Kyra, after the Terron warriors abduct her.',
'episode_number': 1,
'season_number': 1,
'series': 'BeastMaster',
'release_date': '19991004', # from releaseDate "1999-10-04T00:00:00Z"
'duration': 3600.0,
},
'skip': 'Requires live website and valid cookies',
}, {
# Season playlist test.
'url': 'https://therokuchannel.roku.com/details/48af1a617b1654a8a73cddefddedc7b8/beastmaster/season-2',
'playlist_count': 22,
'info_dict': {
'id': '48af1a617b1654a8a73cddefddedc7b8',
'title': 'BeastMaster - Season 2',
},
'skip': 'Requires live website and valid cookies',
}, {
# Full series playlist test.
'url': 'https://therokuchannel.roku.com/details/48af1a617b1654a8a73cddefddedc7b8/beastmaster',
'playlist_count': 64,
'info_dict': {
'id': '48af1a617b1654a8a73cddefddedc7b8',
'title': 'BeastMaster',
},
'skip': 'Requires live website and valid cookies',
}, {
# Only-matching test for a DRM-protected movie.
'url': 'https://therokuchannel.roku.com/details/b1f983c03f27531388474c46372b956c/friday-after-next',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
# If the URL contains a "-s#-e#" pattern anywhere, treat it as a single episode extraction.
if re.search(r'-s\d+e\d+', url, re.IGNORECASE):
return self._real_extract_single(url, mobj)
# For /details/ URLs, decide based on presence of season info:
if mobj.group('series_id'):
# Query the API details using the series_id.
details = self._get_details(mobj.group('series_id'))
# If no "seasons" key is present, assume it's a single episode.
if 'seasons' not in details:
return self._real_extract_single(url, mobj)
# Otherwise, if a season number is provided, extract that season's episodes.
if mobj.group('season'):
return self._real_extract_playlist(url, mobj)
# Otherwise treat the URL as representing the full series.
return self._real_extract_series(url, mobj)
# Otherwise, if the URL is of /watch/ type, extract single video.
return self._real_extract_single(url, mobj)
def _get_details(self, video_id):
# Build the full API URL with detailed query parameters.
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{video_id}'
query = (
'?expand=credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes'
'&include=type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,categoryObjects,runTimeSeconds,castAndCrew,'
'savable,stationDma,kidsDirected,releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,credits.meta,'
'credits.order,credits.name,credits.role,credits.personId,credits.images,parentalRatings,reverseChronological,contentRatingClass,'
'languageDialogBody,detailScreenOptions,viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,seasons.releaseYear,seasons.castAndCrew,'
'seasons.credits.birthDate,seasons.credits.meta,seasons.credits.order,seasons.credits.name,seasons.credits.role,'
'seasons.credits.personId,seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
'&filter=categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all'
'&featureInclude=bookmark,watchlist,linearSchedule'
)
full_url = base_url + query
try:
details = self._download_json(full_url, video_id,
note='Downloading detailed content info',
fatal=False)
return details or {}
except ExtractorError:
return {}
def _real_extract_single(self, url, mobj):
# Single episode extraction using the API details.
video_id = mobj.group('id') or mobj.group('series_id')
details = self._get_details(video_id)
title = details.get('title', '').strip()
description = details.get('description', '').strip()
webpage = self._download_webpage(url, video_id)
mpd_url = self._search_regex(
r'(https?://vod-playlist\.sr\.roku\.com/1\.mpd\?[^\'" >]+)',
webpage, 'mpd URL', fatal=False)
if not mpd_url:
# Fallback: use CSRF token and playback API.
self._download_webpage('https://therokuchannel.roku.com/', video_id,
note='Initializing session', fatal=False)
csrf_info = self._download_json('https://therokuchannel.roku.com/api/v1/csrf',
video_id, note='Downloading CSRF token',
fatal=False)
csrf_token = csrf_info.get('csrf') if csrf_info else None
headers = {
'authority': 'therokuchannel.roku.com',
'accept': '*/*',
'accept-language': 'en-US,en;q=0.9',
'user-agent': ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/102.0.5005.63 Safari/537.36'),
'referer': 'https://therokuchannel.roku.com/',
'Content-Type': 'application/json',
}
if csrf_token:
headers['csrf-token'] = csrf_token
playback_payload = {
'rokuId': video_id,
'mediaFormat': 'mpeg-dash',
'drmType': 'widevine',
'quality': 'fhd',
'providerId': 'rokuavod',
}
playback_json = self._download_json(
'https://therokuchannel.roku.com/api/v3/playback',
video_id,
data=json.dumps(playback_payload).encode('utf-8'),
headers=headers,
note='Downloading playback JSON',
fatal=True)
videos = traverse_obj(playback_json, ('playbackMedia', 'videos'), expected_type=list) or []
dash_url = None
for video in videos:
if video.get('streamFormat') == 'dash':
dash_url = video.get('url')
break
if not dash_url:
raise ExtractorError('Unable to extract dash URL from API', expected=True)
parsed = urllib.parse.urlparse(dash_url)
query_params = urllib.parse.parse_qs(parsed.query)
if 'origin' in query_params:
mpd_url = urllib.parse.unquote(query_params['origin'][0]).split('?')[0]
else:
mpd_url = dash_url
formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
}
def _real_extract_playlist(self, url, mobj):
# Extract episodes for a specific season.
series_id = mobj.group('series_id')
season_num = int_or_none(mobj.group('season')) or 1
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{series_id}'
params = {
'expand': 'credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes',
'include': (
'type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,'
'categoryObjects,runTimeSeconds,castAndCrew,savable,stationDma,kidsDirected,'
'releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,'
'credits.meta,credits.order,credits.name,credits.role,seasons.credits.personId,credits.images,'
'parentalRatings,reverseChronological,contentRatingClass,languageDialogBody,detailScreenOptions,'
'viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,'
'seasons.releaseYear,seasons.castAndCrew,seasons.credits.birthDate,seasons.credits.meta,'
'seasons.credits.order,seasons.credits.name,seasons.credits.role,seasons.credits.personId,'
'seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
),
'filter': 'categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all',
'featureInclude': 'bookmark,watchlist,linearSchedule',
}
series_data = self._download_json(base_url, series_id,
note='Downloading series data',
fatal=True, query=params)
series_title = series_data.get('title') or mobj.group('slug')
entries = []
if series_data.get('seasons'):
for season in series_data.get('seasons', []):
if int_or_none(season.get('seasonNumber')) == season_num:
for episode in season.get('episodes') or []:
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
if not episode_id:
continue
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
entry.update({
'title': f'{series_title} - S{season.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
'season_number': int_or_none(season.get('seasonNumber')),
'episode_number': int_or_none(episode.get('episodeNumber')),
})
entries.append(entry)
break
if not entries:
raise ExtractorError(f'No episodes found for season {season_num}', expected=True)
return self.playlist_result(entries, series_id, f'{series_title} - Season {season_num}')
def _real_extract_series(self, url, mobj):
# Extract all episodes across all seasons.
series_id = mobj.group('series_id')
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{series_id}'
params = {
'expand': 'credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes',
'include': (
'type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,'
'categoryObjects,runTimeSeconds,castAndCrew,savable,stationDma,kidsDirected,'
'releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,'
'credits.meta,credits.order,credits.name,credits.role,seasons.credits.personId,credits.images,'
'parentalRatings,reverseChronological,contentRatingClass,languageDialogBody,detailScreenOptions,'
'viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,'
'seasons.releaseYear,seasons.castAndCrew,seasons.credits.birthDate,seasons.credits.meta,'
'seasons.credits.order,seasons.credits.name,seasons.credits.role,seasons.credits.personId,'
'seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
),
'filter': 'categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all',
'featureInclude': 'bookmark,watchlist,linearSchedule',
}
series_data = self._download_json(base_url, series_id,
note='Downloading series data',
fatal=True, query=params)
series_title = series_data.get('title') or mobj.group('slug')
entries = []
if series_data.get('seasons'):
for season in series_data.get('seasons', []):
for episode in season.get('episodes') or []:
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
if not episode_id:
continue
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
entry.update({
'title': f'{series_title} - S{season.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
'season_number': int_or_none(season.get('seasonNumber')),
'episode_number': int_or_none(episode.get('episodeNumber')),
})
entries.append(entry)
else:
for episode in series_data.get('episodes', []):
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
if not episode_id:
continue
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
entry.update({
'title': f'{series_title} - S{episode.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
'season_number': int_or_none(episode.get('seasonNumber')),
'episode_number': int_or_none(episode.get('episodeNumber')),
})
entries.append(entry)
if not entries:
raise ExtractorError('No episodes found for series', expected=True)
return self.playlist_result(entries, series_id, series_title)