From fdfac32149d5a4ab0365a02159057b94f15044fd Mon Sep 17 00:00:00 2001 From: Randalix Date: Tue, 12 Aug 2025 23:55:21 +0200 Subject: [PATCH] feat: Update SouthParkDeIE to use new API extraction logic --- yt_dlp/extractor/southpark.py | 50 ++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 3d661a86ac..61251a010a 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,4 +1,8 @@ from .mtv import MTVServicesInfoExtractor +from ..utils import ( + traverse_obj, + random_uuidv4, +) class SouthParkIE(MTVServicesInfoExtractor): @@ -99,14 +103,46 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE }, }] - def _get_feed_url(self, uri, url=None): - video_id = self._id_from_uri(uri) - config = self._download_json( - f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}', video_id) - return self._remove_template_parameter(config['feedWithQueryParams']) + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - def _get_feed_query(self, uri): - return + data = self._parse_json(self._search_regex( + r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) + + # Find the videoDetail object by first finding the MainContainer component + video_detail = traverse_obj(data, ( + 'children', lambda _, v: v.get('type') == 'MainContainer', + 'children', 0, 'children', 0, 'props', 'videoDetail' + ), get_all=False) + + # Fallback for a simpler data structure found on some pages + if not video_detail: + video_detail = traverse_obj(data, ('children', 0, 'videoDetail'), get_all=False) + + api_url = video_detail['videoServiceUrl'] + + # Call the Topaz API to get the final stream URL + api_data = self._download_json( + api_url, display_id, 'Fetching video metadata', query={ + 'ssus': random_uuidv4(), + 'clientPlatform': 'mobile', + }) + + hls_url = traverse_obj(api_data, ('stitchedstream', 'source')) + + return { + 'id': video_detail['id'], + 'display_id': display_id, + 'url': hls_url, + 'title': video_detail.get('title'), + 'description': video_detail.get('description'), + 'duration': traverse_obj(video_detail, ('duration', 'milliseconds'), expected_type=int) / 1000, + 'season_number': video_detail.get('seasonNumber'), + 'episode_number': traverse_obj(video_detail, 'episodeAiringOrder'), + 'timestamp': traverse_obj(video_detail, ('publishDate', 'timestamp')), + 'series': traverse_obj(video_detail, ('parentEntity', 'title')), + } class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE