From d3de29389dab6c40bcd48e2bdd32b9c71d3f0a3b Mon Sep 17 00:00:00 2001 From: Randalix Date: Wed, 13 Aug 2025 00:14:45 +0200 Subject: [PATCH] fix: Correct SouthParkDeIE _real_extract method --- yt_dlp/extractor/southpark.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 9b79040a22..5d70c9d830 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -111,14 +111,10 @@ def _real_extract(self, url): data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id) - # Try multiple paths and, crucially, get only the FIRST match, not a list - video_detail = traverse_obj(data, - # Path for regular episodes - ('children', lambda _, v: v.get('type') == 'MainContainer', - 'children', 0, 'children', 0, 'props', 'videoDetail'), - # Fallback path for special episodes - ('children', 0, 'videoDetail'), - get_all=False) + video_detail = traverse_obj(data, ( + 'children', lambda _, v: v.get('type') == 'MainContainer', + 'children', 0, 'children', 0, 'props', 'videoDetail' + ), ('children', 0, 'videoDetail'), get_all=False) if not video_detail: raise ExtractorError('Could not find video data in page') @@ -131,12 +127,11 @@ def _real_extract(self, url): 'clientPlatform': 'mobile', }) - hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str, get_all=False) + hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str) - return { + info = { 'id': video_detail['id'], 'display_id': display_id, - 'url': hls_url, 'title': video_detail.get('title'), 'description': video_detail.get('description'), 'duration': traverse_obj(video_detail, ('duration', 'milliseconds'), expected_type=int) / 1000, @@ -145,6 +140,9 @@ def _real_extract(self, url): 'timestamp': traverse_obj(video_detail, ('publishDate', 'timestamp')), 'series': traverse_obj(video_detail, ('parentEntity', 'title')), } + info['formats'] = self._extract_m3u8_formats( + hls_url, display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + return info class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE