diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 34c98b537..a79c959f6 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1747,6 +1747,7 @@ RokfinSearchIE, RokfinStackIE, ) +from .roku import RokuChannelIE from .roosterteeth import ( RoosterTeethIE, RoosterTeethSeriesIE, diff --git a/yt_dlp/extractor/roku.py b/yt_dlp/extractor/roku.py new file mode 100644 index 000000000..4067ce467 --- /dev/null +++ b/yt_dlp/extractor/roku.py @@ -0,0 +1,278 @@ +import json +import re +import urllib.parse + +from yt_dlp.extractor.common import ExtractorError +from yt_dlp.utils import int_or_none, traverse_obj + +from .common import InfoExtractor + + +class RokuChannelIE(InfoExtractor): + # The regex captures either /watch/ or /details//[/season-] + _VALID_URL = r'https?://(?:www\.)?therokuchannel\.roku\.com/(?:(?:watch/(?P[0-9a-f]{32}))|(?:details/(?P[0-9a-f]{32})/(?P[^/]+)(?:/season-(?P\d+))?))' + _TESTS = [{ + # Single episode test (using a details URL with an episode slug) + 'url': 'https://therokuchannel.roku.com/details/a9474f67937c5986aa1ac0747f5bb615/beastmaster-s1-e1-the-legend-continues', + 'md5': 'b8a683e430a79e20295cff9848bea865', + 'info_dict': { + 'id': 'a9474f67937c5986aa1ac0747f5bb615', + 'ext': 'mp4', + 'title': 'The Legend Continues', + 'description': 'Dar begins his quest to rescue his love, Kyra, after the Terron warriors abduct her.', + 'episode_number': 1, + 'season_number': 1, + 'series': 'BeastMaster', + 'release_date': '19991004', # from releaseDate "1999-10-04T00:00:00Z" + 'duration': 3600.0, + }, + 'skip': 'Requires live website and valid cookies', + }, { + # Season playlist test. + 'url': 'https://therokuchannel.roku.com/details/48af1a617b1654a8a73cddefddedc7b8/beastmaster/season-2', + 'playlist_count': 22, + 'info_dict': { + 'id': '48af1a617b1654a8a73cddefddedc7b8', + 'title': 'BeastMaster - Season 2', + }, + 'skip': 'Requires live website and valid cookies', + }, { + # Full series playlist test. + 'url': 'https://therokuchannel.roku.com/details/48af1a617b1654a8a73cddefddedc7b8/beastmaster', + 'playlist_count': 64, + 'info_dict': { + 'id': '48af1a617b1654a8a73cddefddedc7b8', + 'title': 'BeastMaster', + }, + 'skip': 'Requires live website and valid cookies', + }, { + # Only-matching test for a DRM-protected movie. + 'url': 'https://therokuchannel.roku.com/details/b1f983c03f27531388474c46372b956c/friday-after-next', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + # If the URL contains a "-s#-e#" pattern anywhere, treat it as a single episode extraction. + if re.search(r'-s\d+e\d+', url, re.IGNORECASE): + return self._real_extract_single(url, mobj) + # For /details/ URLs, decide based on presence of season info: + if mobj.group('series_id'): + # Query the API details using the series_id. + details = self._get_details(mobj.group('series_id')) + # If no "seasons" key is present, assume it's a single episode. + if 'seasons' not in details: + return self._real_extract_single(url, mobj) + # Otherwise, if a season number is provided, extract that season's episodes. + if mobj.group('season'): + return self._real_extract_playlist(url, mobj) + # Otherwise treat the URL as representing the full series. + return self._real_extract_series(url, mobj) + # Otherwise, if the URL is of /watch/ type, extract single video. + return self._real_extract_single(url, mobj) + + def _get_details(self, video_id): + # Build the full API URL with detailed query parameters. + base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{video_id}' + query = ( + '?expand=credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes' + '&include=type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,categoryObjects,runTimeSeconds,castAndCrew,' + 'savable,stationDma,kidsDirected,releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,credits.meta,' + 'credits.order,credits.name,credits.role,credits.personId,credits.images,parentalRatings,reverseChronological,contentRatingClass,' + 'languageDialogBody,detailScreenOptions,viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,' + 'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,seasons.releaseYear,seasons.castAndCrew,' + 'seasons.credits.birthDate,seasons.credits.meta,seasons.credits.order,seasons.credits.name,seasons.credits.role,' + 'seasons.credits.personId,seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,' + 'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,' + 'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,' + 'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,' + 'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions' + '&filter=categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all' + '&featureInclude=bookmark,watchlist,linearSchedule' + ) + full_url = base_url + query + try: + details = self._download_json(full_url, video_id, + note='Downloading detailed content info', + fatal=False) + return details or {} + except ExtractorError: + return {} + + def _real_extract_single(self, url, mobj): + # Single episode extraction using the API details. + video_id = mobj.group('id') or mobj.group('series_id') + details = self._get_details(video_id) + title = details.get('title', '').strip() + description = details.get('description', '').strip() + webpage = self._download_webpage(url, video_id) + mpd_url = self._search_regex( + r'(https?://vod-playlist\.sr\.roku\.com/1\.mpd\?[^\'" >]+)', + webpage, 'mpd URL', fatal=False) + if not mpd_url: + # Fallback: use CSRF token and playback API. + self._download_webpage('https://therokuchannel.roku.com/', video_id, + note='Initializing session', fatal=False) + csrf_info = self._download_json('https://therokuchannel.roku.com/api/v1/csrf', + video_id, note='Downloading CSRF token', + fatal=False) + csrf_token = csrf_info.get('csrf') if csrf_info else None + headers = { + 'authority': 'therokuchannel.roku.com', + 'accept': '*/*', + 'accept-language': 'en-US,en;q=0.9', + 'user-agent': ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/102.0.5005.63 Safari/537.36'), + 'referer': 'https://therokuchannel.roku.com/', + 'Content-Type': 'application/json', + } + if csrf_token: + headers['csrf-token'] = csrf_token + playback_payload = { + 'rokuId': video_id, + 'mediaFormat': 'mpeg-dash', + 'drmType': 'widevine', + 'quality': 'fhd', + 'providerId': 'rokuavod', + } + playback_json = self._download_json( + 'https://therokuchannel.roku.com/api/v3/playback', + video_id, + data=json.dumps(playback_payload).encode('utf-8'), + headers=headers, + note='Downloading playback JSON', + fatal=True) + videos = traverse_obj(playback_json, ('playbackMedia', 'videos'), expected_type=list) or [] + dash_url = None + for video in videos: + if video.get('streamFormat') == 'dash': + dash_url = video.get('url') + break + if not dash_url: + raise ExtractorError('Unable to extract dash URL from API', expected=True) + parsed = urllib.parse.urlparse(dash_url) + query_params = urllib.parse.parse_qs(parsed.query) + if 'origin' in query_params: + mpd_url = urllib.parse.unquote(query_params['origin'][0]).split('?')[0] + else: + mpd_url = dash_url + formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash') + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + } + + def _real_extract_playlist(self, url, mobj): + # Extract episodes for a specific season. + series_id = mobj.group('series_id') + season_num = int_or_none(mobj.group('season')) or 1 + base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{series_id}' + params = { + 'expand': 'credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes', + 'include': ( + 'type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,' + 'categoryObjects,runTimeSeconds,castAndCrew,savable,stationDma,kidsDirected,' + 'releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,' + 'credits.meta,credits.order,credits.name,credits.role,seasons.credits.personId,credits.images,' + 'parentalRatings,reverseChronological,contentRatingClass,languageDialogBody,detailScreenOptions,' + 'viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,' + 'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,' + 'seasons.releaseYear,seasons.castAndCrew,seasons.credits.birthDate,seasons.credits.meta,' + 'seasons.credits.order,seasons.credits.name,seasons.credits.role,seasons.credits.personId,' + 'seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,' + 'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,' + 'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,' + 'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,' + 'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions' + ), + 'filter': 'categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all', + 'featureInclude': 'bookmark,watchlist,linearSchedule', + } + series_data = self._download_json(base_url, series_id, + note='Downloading series data', + fatal=True, query=params) + series_title = series_data.get('title') or mobj.group('slug') + entries = [] + if series_data.get('seasons'): + for season in series_data.get('seasons', []): + if int_or_none(season.get('seasonNumber')) == season_num: + for episode in season.get('episodes') or []: + episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id')) + if not episode_id: + continue + episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}' + entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id) + entry.update({ + 'title': f'{series_title} - S{season.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}', + 'season_number': int_or_none(season.get('seasonNumber')), + 'episode_number': int_or_none(episode.get('episodeNumber')), + }) + entries.append(entry) + break + if not entries: + raise ExtractorError(f'No episodes found for season {season_num}', expected=True) + return self.playlist_result(entries, series_id, f'{series_title} - Season {season_num}') + + def _real_extract_series(self, url, mobj): + # Extract all episodes across all seasons. + series_id = mobj.group('series_id') + base_url = f'https://therokuchannel.roku.com/api/v2/homescreen/content/https%3A%2F%2Fcontent.sr.roku.com%2Fcontent%2Fv1%2Froku-trc%2F{series_id}' + params = { + 'expand': 'credits,viewOptions,categoryObjects,viewOptions.providerDetails,series,season,season.episodes,next,episodes,seasons,seasons.episodes', + 'include': ( + 'type,title,imageMap.detailPoster,imageMap.detailBackground,bobs.detailScreen,' + 'categoryObjects,runTimeSeconds,castAndCrew,savable,stationDma,kidsDirected,' + 'releaseDate,releaseYear,description,descriptions,indicators,genres,credits.birthDate,' + 'credits.meta,credits.order,credits.name,credits.role,seasons.credits.personId,credits.images,' + 'parentalRatings,reverseChronological,contentRatingClass,languageDialogBody,detailScreenOptions,' + 'viewOptions,episodeNumber,seasonNumber,sportInfo,eventState,series.title,season,' + 'seasons.title,seasons.seasonNumber,seasons.description,seasons.descriptions,' + 'seasons.releaseYear,seasons.castAndCrew,seasons.credits.birthDate,seasons.credits.meta,' + 'seasons.credits.order,seasons.credits.name,seasons.credits.role,seasons.credits.personId,' + 'seasons.credits.images,seasons.imageMap.detailBackground,seasons.episodes.title,' + 'seasons.episodes.description,seasons.episodes.descriptions.40,seasons.episodes.descriptions.60,' + 'seasons.episodes.episodeNumber,seasons.episodes.seasonNumber,seasons.episodes.images,' + 'seasons.episodes.imageMap.grid,seasons.episodes.indicators,seasons.episodes.releaseDate,' + 'seasons.episodes.viewOptions,episodes.episodeNumber,episodes.seasonNumber,episodes.viewOptions' + ), + 'filter': 'categoryObjects:genreAppropriate eq true,seasons.episodes:(not empty(viewOptions)):all', + 'featureInclude': 'bookmark,watchlist,linearSchedule', + } + series_data = self._download_json(base_url, series_id, + note='Downloading series data', + fatal=True, query=params) + series_title = series_data.get('title') or mobj.group('slug') + entries = [] + if series_data.get('seasons'): + for season in series_data.get('seasons', []): + for episode in season.get('episodes') or []: + episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id')) + if not episode_id: + continue + episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}' + entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id) + entry.update({ + 'title': f'{series_title} - S{season.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}', + 'season_number': int_or_none(season.get('seasonNumber')), + 'episode_number': int_or_none(episode.get('episodeNumber')), + }) + entries.append(entry) + else: + for episode in series_data.get('episodes', []): + episode_id = episode.get('id') or traverse_obj(episode, ('meta', 'id')) + if not episode_id: + continue + episode_url = f'https://therokuchannel.roku.com/watch/{episode_id}' + entry = self.url_result(episode_url, ie_key=self.ie_key(), video_id=episode_id) + entry.update({ + 'title': f'{series_title} - S{episode.get("seasonNumber")}E{episode.get("episodeNumber")} - {episode.get("title", "")}', + 'season_number': int_or_none(episode.get('seasonNumber')), + 'episode_number': int_or_none(episode.get('episodeNumber')), + }) + entries.append(entry) + if not entries: + raise ExtractorError('No episodes found for series', expected=True) + return self.playlist_result(entries, series_id, series_title)