1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-13 16:08:29 +00:00

fix: Correct SouthParkDeIE _real_extract method

This commit is contained in:
Randalix 2025-08-13 00:14:45 +02:00 committed by Randalix (aider)
parent 8207f3b824
commit d3de29389d

View File

@ -111,14 +111,10 @@ def _real_extract(self, url):
data = self._parse_json(self._search_regex(
r'window\.__DATA__\s*=\s*({.+?});', webpage, 'data'), display_id)
# Try multiple paths and, crucially, get only the FIRST match, not a list
video_detail = traverse_obj(data,
# Path for regular episodes
('children', lambda _, v: v.get('type') == 'MainContainer',
'children', 0, 'children', 0, 'props', 'videoDetail'),
# Fallback path for special episodes
('children', 0, 'videoDetail'),
get_all=False)
video_detail = traverse_obj(data, (
'children', lambda _, v: v.get('type') == 'MainContainer',
'children', 0, 'children', 0, 'props', 'videoDetail'
), ('children', 0, 'videoDetail'), get_all=False)
if not video_detail:
raise ExtractorError('Could not find video data in page')
@ -131,12 +127,11 @@ def _real_extract(self, url):
'clientPlatform': 'mobile',
})
hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str, get_all=False)
hls_url = traverse_obj(api_data, ('stitchedstream', 'source'), expected_type=str)
return {
info = {
'id': video_detail['id'],
'display_id': display_id,
'url': hls_url,
'title': video_detail.get('title'),
'description': video_detail.get('description'),
'duration': traverse_obj(video_detail, ('duration', 'milliseconds'), expected_type=int) / 1000,
@ -145,6 +140,9 @@ def _real_extract(self, url):
'timestamp': traverse_obj(video_detail, ('publishDate', 'timestamp')),
'series': traverse_obj(video_detail, ('parentEntity', 'title')),
}
info['formats'] = self._extract_m3u8_formats(
hls_url, display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
return info
class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE