mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
Merge 62d364b93d
into 73bf102116
This commit is contained in:
commit
cc460344bc
@ -1747,6 +1747,7 @@
|
||||
RokfinSearchIE,
|
||||
RokfinStackIE,
|
||||
)
|
||||
from .roku import RokuChannelIE
|
||||
from .roosterteeth import (
|
||||
RoosterTeethIE,
|
||||
RoosterTeethSeriesIE,
|
||||
|
278
yt_dlp/extractor/roku.py
Normal file
278
yt_dlp/extractor/roku.py
Normal file
@ -0,0 +1,278 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp.extractor.common import ExtractorError
|
||||
from yt_dlp.utils import int_or_none, traverse_obj
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RokuChannelIE(InfoExtractor):
|
||||
# The regex captures either /watch/<id> or /details/<series_id>/<slug>[/season-<season>]
|
||||
_VALID_URL = r'https?://(?:www\.)?therokuchannel\.roku\.com/(?:(?:watch/(?P<id>[0-9a-f]{32}))|(?:details/(?P<series_id>[0-9a-f]{32})/(?P<slug>[^/]+)(?:/season-(?P<season>\d+))?))'
|
||||
_TESTS = [{
|
||||
# Single episode test (using a details URL with an episode slug)
|
||||
'url': 'https://therokuchannel.roku.com/details/a9474f67937c5986aa1ac0747f5bb615/beastmaster-s1-e1-the-legend-continues',
|
||||
'md5': 'b8a683e430a79e20295cff9848bea865',
|
||||
'info_dict': {
|
||||
'id': 'a9474f67937c5986aa1ac0747f5bb615',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Legend Continues',
|
||||
'description': 'Dar begins his quest to rescue his love, Kyra, after the Terron warriors abduct her.',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'series': 'BeastMaster',
|
||||
'release_date': '19991004', # from releaseDate "1999-10-04T00:00:00Z"
|
||||
'duration': 3600.0,
|
||||
},
|
||||
'skip': 'Requires live website and valid cookies',
|
||||
}, {
|
||||
# Season playlist test.
|
||||
'url': 'https://therokuchannel.roku.com/details/48af1a617b1654a8a73cddefddedc7b8/beastmaster/season-2',
|
||||
'playlist_count': 22,
|
||||
'info_dict': {
|
||||
'id': '48af1a617b1654a8a73cddefddedc7b8',
|
||||
'title': 'BeastMaster - Season 2',
|
||||
},
|
||||
'skip': 'Requires live website and valid cookies',
|
||||
}, {
|
||||
# Full series playlist test.
|
||||
'url': 'https://therokuchannel.roku.com/details/48af1a617b1654a8a73cddefddedc7b8/beastmaster',
|
||||
'playlist_count': 64,
|
||||
'info_dict': {
|
||||
'id': '48af1a617b1654a8a73cddefddedc7b8',
|
||||
'title': 'BeastMaster',
|
||||
},
|
||||
'skip': 'Requires live website and valid cookies',
|
||||
}, {
|
||||
# Only-matching test for a DRM-protected movie.
|
||||
'url': 'https://therokuchannel.roku.com/details/b1f983c03f27531388474c46372b956c/friday-after-next',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
# If the URL contains a "-s#-e#" pattern anywhere, treat it as a single episode extraction.
|
||||
if re.search(r'-s\d+e\d+', url, re.IGNORECASE):
|
||||
return self._real_extract_single(url, mobj)
|
||||
# For /details/ URLs, decide based on presence of season info:
|
||||
if mobj.group('series_id'):
|
||||
# Query the API details using the series_id.
|
||||
details = self._get_details(mobj.group('series_id'))
|
||||
# If no "seasons" key is present, assume it's a single episode.
|
||||
if 'seasons' not in details:
|
||||
return self._real_extract_single(url, mobj)
|
||||
# Otherwise, if a season number is provided, extract that season's episodes.
|
||||
if mobj.group('season'):
|
||||
return self._real_extract_playlist(url, mobj)
|
||||
# Otherwise treat the URL as representing the full series.
|
||||
return self._real_extract_series(url, mobj)
|
||||
# Otherwise, if the URL is of /watch/ type, extract single video.
|
||||
return self._real_extract_single(url, mobj)
|
||||
|
||||
def _get_details(self, video_id):
|
||||
# Build the full API URL with detailed query parameters.
|
||||
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{video_id}'
|
||||
query = (
|
||||
'?expand=credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes'
|
||||
'&include=type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,categoryObjects,runTimeSeconds,castAndCrew,'
|
||||
'savable,stationDma,kidsDirected,releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,credits.meta,'
|
||||
'credits.order,credits.name,credits.role,credits.personId,credits.images,parentalRatings,reverseChronological,contentRatingClass,'
|
||||
'languageDialogBody,detailScreenOptions,viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
|
||||
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,seasons.releaseYear,seasons.castAndCrew,'
|
||||
'seasons.credits.birthDate,seasons.credits.meta,seasons.credits.order,seasons.credits.name,seasons.credits.role,'
|
||||
'seasons.credits.personId,seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
|
||||
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
|
||||
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
|
||||
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
|
||||
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
|
||||
'&filter=categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all'
|
||||
'&featureInclude=bookmark,watchlist,linearSchedule'
|
||||
)
|
||||
full_url = base_url + query
|
||||
try:
|
||||
details = self._download_json(full_url, video_id,
|
||||
note='Downloading detailed content info',
|
||||
fatal=False)
|
||||
return details or {}
|
||||
except ExtractorError:
|
||||
return {}
|
||||
|
||||
def _real_extract_single(self, url, mobj):
|
||||
# Single episode extraction using the API details.
|
||||
video_id = mobj.group('id') or mobj.group('series_id')
|
||||
details = self._get_details(video_id)
|
||||
title = details.get('title', '').strip()
|
||||
description = details.get('description', '').strip()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mpd_url = self._search_regex(
|
||||
r'(https?://vod-playlist\.sr\.roku\.com/1\.mpd\?[^\'" >]+)',
|
||||
webpage, 'mpd URL', fatal=False)
|
||||
if not mpd_url:
|
||||
# Fallback: use CSRF token and playback API.
|
||||
self._download_webpage('https://therokuchannel.roku.com/', video_id,
|
||||
note='Initializing session', fatal=False)
|
||||
csrf_info = self._download_json('https://therokuchannel.roku.com/api/v1/csrf',
|
||||
video_id, note='Downloading CSRF token',
|
||||
fatal=False)
|
||||
csrf_token = csrf_info.get('csrf') if csrf_info else None
|
||||
headers = {
|
||||
'authority': 'therokuchannel.roku.com',
|
||||
'accept': '*/*',
|
||||
'accept-language': 'en-US,en;q=0.9',
|
||||
'user-agent': ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/102.0.5005.63 Safari/537.36'),
|
||||
'referer': 'https://therokuchannel.roku.com/',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if csrf_token:
|
||||
headers['csrf-token'] = csrf_token
|
||||
playback_payload = {
|
||||
'rokuId': video_id,
|
||||
'mediaFormat': 'mpeg-dash',
|
||||
'drmType': 'widevine',
|
||||
'quality': 'fhd',
|
||||
'providerId': 'rokuavod',
|
||||
}
|
||||
playback_json = self._download_json(
|
||||
'https://therokuchannel.roku.com/api/v3/playback',
|
||||
video_id,
|
||||
data=json.dumps(playback_payload).encode('utf-8'),
|
||||
headers=headers,
|
||||
note='Downloading playback JSON',
|
||||
fatal=True)
|
||||
videos = traverse_obj(playback_json, ('playbackMedia', 'videos'), expected_type=list) or []
|
||||
dash_url = None
|
||||
for video in videos:
|
||||
if video.get('streamFormat') == 'dash':
|
||||
dash_url = video.get('url')
|
||||
break
|
||||
if not dash_url:
|
||||
raise ExtractorError('Unable to extract dash URL from API', expected=True)
|
||||
parsed = urllib.parse.urlparse(dash_url)
|
||||
query_params = urllib.parse.parse_qs(parsed.query)
|
||||
if 'origin' in query_params:
|
||||
mpd_url = urllib.parse.unquote(query_params['origin'][0]).split('?')[0]
|
||||
else:
|
||||
mpd_url = dash_url
|
||||
formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract_playlist(self, url, mobj):
|
||||
# Extract episodes for a specific season.
|
||||
series_id = mobj.group('series_id')
|
||||
season_num = int_or_none(mobj.group('season')) or 1
|
||||
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{series_id}'
|
||||
params = {
|
||||
'expand': 'credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes',
|
||||
'include': (
|
||||
'type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,'
|
||||
'categoryObjects,runTimeSeconds,castAndCrew,savable,stationDma,kidsDirected,'
|
||||
'releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,'
|
||||
'credits.meta,credits.order,credits.name,credits.role,seasons.credits.personId,credits.images,'
|
||||
'parentalRatings,reverseChronological,contentRatingClass,languageDialogBody,detailScreenOptions,'
|
||||
'viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
|
||||
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,'
|
||||
'seasons.releaseYear,seasons.castAndCrew,seasons.credits.birthDate,seasons.credits.meta,'
|
||||
'seasons.credits.order,seasons.credits.name,seasons.credits.role,seasons.credits.personId,'
|
||||
'seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
|
||||
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
|
||||
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
|
||||
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
|
||||
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
|
||||
),
|
||||
'filter': 'categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all',
|
||||
'featureInclude': 'bookmark,watchlist,linearSchedule',
|
||||
}
|
||||
series_data = self._download_json(base_url, series_id,
|
||||
note='Downloading series data',
|
||||
fatal=True, query=params)
|
||||
series_title = series_data.get('title') or mobj.group('slug')
|
||||
entries = []
|
||||
if series_data.get('seasons'):
|
||||
for season in series_data.get('seasons', []):
|
||||
if int_or_none(season.get('seasonNumber')) == season_num:
|
||||
for episode in season.get('episodes') or []:
|
||||
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
|
||||
if not episode_id:
|
||||
continue
|
||||
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
|
||||
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
|
||||
entry.update({
|
||||
'title': f'{series_title} - S{season.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
|
||||
'season_number': int_or_none(season.get('seasonNumber')),
|
||||
'episode_number': int_or_none(episode.get('episodeNumber')),
|
||||
})
|
||||
entries.append(entry)
|
||||
break
|
||||
if not entries:
|
||||
raise ExtractorError(f'No episodes found for season {season_num}', expected=True)
|
||||
return self.playlist_result(entries, series_id, f'{series_title} - Season {season_num}')
|
||||
|
||||
def _real_extract_series(self, url, mobj):
|
||||
# Extract all episodes across all seasons.
|
||||
series_id = mobj.group('series_id')
|
||||
base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{series_id}'
|
||||
params = {
|
||||
'expand': 'credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes',
|
||||
'include': (
|
||||
'type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,'
|
||||
'categoryObjects,runTimeSeconds,castAndCrew,savable,stationDma,kidsDirected,'
|
||||
'releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,'
|
||||
'credits.meta,credits.order,credits.name,credits.role,seasons.credits.personId,credits.images,'
|
||||
'parentalRatings,reverseChronological,contentRatingClass,languageDialogBody,detailScreenOptions,'
|
||||
'viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,'
|
||||
'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,'
|
||||
'seasons.releaseYear,seasons.castAndCrew,seasons.credits.birthDate,seasons.credits.meta,'
|
||||
'seasons.credits.order,seasons.credits.name,seasons.credits.role,seasons.credits.personId,'
|
||||
'seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,'
|
||||
'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,'
|
||||
'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,'
|
||||
'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,'
|
||||
'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions'
|
||||
),
|
||||
'filter': 'categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all',
|
||||
'featureInclude': 'bookmark,watchlist,linearSchedule',
|
||||
}
|
||||
series_data = self._download_json(base_url, series_id,
|
||||
note='Downloading series data',
|
||||
fatal=True, query=params)
|
||||
series_title = series_data.get('title') or mobj.group('slug')
|
||||
entries = []
|
||||
if series_data.get('seasons'):
|
||||
for season in series_data.get('seasons', []):
|
||||
for episode in season.get('episodes') or []:
|
||||
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
|
||||
if not episode_id:
|
||||
continue
|
||||
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
|
||||
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
|
||||
entry.update({
|
||||
'title': f'{series_title} - S{season.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
|
||||
'season_number': int_or_none(season.get('seasonNumber')),
|
||||
'episode_number': int_or_none(episode.get('episodeNumber')),
|
||||
})
|
||||
entries.append(entry)
|
||||
else:
|
||||
for episode in series_data.get('episodes', []):
|
||||
episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id'))
|
||||
if not episode_id:
|
||||
continue
|
||||
episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}'
|
||||
entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id)
|
||||
entry.update({
|
||||
'title': f'{series_title} - S{episode.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}',
|
||||
'season_number': int_or_none(episode.get('seasonNumber')),
|
||||
'episode_number': int_or_none(episode.get('episodeNumber')),
|
||||
})
|
||||
entries.append(entry)
|
||||
if not entries:
|
||||
raise ExtractorError('No episodes found for series', expected=True)
|
||||
return self.playlist_result(entries, series_id, series_title)
|
Loading…
Reference in New Issue
Block a user