1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-05 21:08:33 +00:00

further simplify, improve comments

Authored by: bashonly
This commit is contained in:
bashonly 2025-06-09 14:10:56 -05:00
parent e6133732c7
commit ce6b8db516
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
2 changed files with 17 additions and 28 deletions

View File

@ -1952,14 +1952,14 @@ def test_search_nuxt_json(self):
<script data-ssr="true" id="__NUXT_DATA__" type="application/json"> <script data-ssr="true" id="__NUXT_DATA__" type="application/json">
[ [
["ShallowReactive",1], ["ShallowReactive",1],
{"data":2,"state":22,"once":26}, {"data":2,"state":21,"once":25},
["ShallowReactive",3], ["ShallowReactive",3],
{"$abcdef123456":4}, {"$abcdef123456":4},
{"podcast":5,"activeEpisodeData":7}, {"podcast":5,"activeEpisodeData":7},
{"podcast":6,"seasons":14}, {"podcast":6,"seasons":14},
{"title":10,"id":11}, {"title":10,"id":11},
["Reactive",8], ["Reactive",8],
{"episode":9,"creators":18,"trick_data":19,"empty_list":21}, {"episode":9,"creators":18,"empty_list":20},
{"title":12,"id":13}, {"title":12,"id":13},
"Series Title", "Series Title",
"podcast-id-01", "podcast-id-01",
@ -1969,12 +1969,11 @@ def test_search_nuxt_json(self):
1, 1,
2, 2,
3, 3,
[20], [19],
[99,"gotcha"],
"Podcast Creator", "Podcast Creator",
[], [],
{"$ssite-config":23}, {"$ssite-config":22},
{"env":24,"name":25}, {"env":23,"name":24},
"production", "production",
"podcast-website", "podcast-website",
["Set"] ["Set"]
@ -1994,7 +1993,6 @@ def test_search_nuxt_json(self):
'id': 'episode-id-99', 'id': 'episode-id-99',
}, },
'creators': ['Podcast Creator'], 'creators': ['Podcast Creator'],
'trick_data': [99, 'gotcha'],
'empty_list': [], 'empty_list': [],
}, },
} }
@ -2024,9 +2022,6 @@ def test_search_nuxt_json(self):
self.assertEqual(self.ie._search_nuxt_json(HTML, None, traverse=None), FULL) self.assertEqual(self.ie._search_nuxt_json(HTML, None, traverse=None), FULL)
self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {}) self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
self.assertEqual(self.ie._search_nuxt_json(BAD_HTML, None, fatal=False), {}) self.assertEqual(self.ie._search_nuxt_json(BAD_HTML, None, fatal=False), {})
self.assertEqual(self.ie._search_nuxt_json(HTML, None, fatal=False, allow_recursion=1), {})
with self.assertRaisesRegex(ExtractorError, r'recursion limit reached'):
self.ie._search_nuxt_json(HTML, None, allow_recursion=1)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1795,45 +1795,39 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
return traverse_obj(ret, traverse) or {} return traverse_obj(ret, traverse) or {}
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, traverse=('data', ..., {dict}, any), allow_recursion=100): def _search_nuxt_json(self, webpage, video_id, *, fatal=True, traverse=('data', ..., {dict}, any)):
"""Parses Nuxt.js metadata when it has already been rendered into a JSON array""" """Parses metadata from Nuxt rich JSON payload arrays"""
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
ERROR_MSG = 'Unable to extract NUXT JSON data' # https://github.com/nuxt/nuxt/pull/19205
array = self._search_json( array = self._search_json(
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage, 'nuxt data', video_id, r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage, 'nuxt data', video_id,
contains_pattern=r'\[(?s:.+)\]', default=NO_DEFAULT if fatal else [{}]) contains_pattern=r'\[(?s:.+)\]', default=NO_DEFAULT if fatal else [{}])
def extract_element(element, allow_recursion): def extract_element(element):
if allow_recursion < 0:
msg = f'{ERROR_MSG}: recursion limit reached'
if fatal:
raise ExtractorError(msg)
self.report_warning(msg, video_id=video_id, only_once=True)
return None
allow_recursion -= 1
try: try:
if isinstance(element, list) and element: if isinstance(element, list) and element:
if element[0] in ('ShallowReactive', 'Reactive') and isinstance(element[1], int): if element[0] in ('ShallowReactive', 'Reactive') and isinstance(element[1], int):
return extract_element(array[element[1]], allow_recursion) return extract_element(array[element[1]])
if all(isinstance(ele, int) for ele in element): if all(isinstance(ele, int) for ele in element):
return [extract_element(array[ele], allow_recursion) for ele in element] return [extract_element(array[ele]) for ele in element]
if isinstance(element, dict): if isinstance(element, dict):
ret = {} ret = {}
for k, v in element.items(): for k, v in element.items():
if isinstance(v, int): if isinstance(v, int):
ret[k] = extract_element(array[v], allow_recursion) ret[k] = extract_element(array[v])
else: else:
ret[k] = v ret[k] = v
return ret return ret
except IndexError as e: except IndexError as e:
error_msg = f'Unable to extract NUXT JSON data: {e}'
if not fatal: if not fatal:
self.report_warning(error_msg, video_id=video_id, only_once=True)
return None return None
raise ExtractorError(f'{ERROR_MSG}: {e}') raise ExtractorError(error_msg)
return element return element
return traverse_obj(extract_element(array[0], allow_recursion), traverse) or {} return traverse_obj(extract_element(array[0]), traverse) or {}
@staticmethod @staticmethod
def _hidden_inputs(html): def _hidden_inputs(html):