1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-28 01:18:30 +00:00

split into 2 methods, support Set and Map, add test data

Authored by: bashonly
This commit is contained in:
bashonly 2025-06-09 17:54:10 -05:00
parent 0beb5faf3c
commit 593e3ec151
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
2 changed files with 35 additions and 21 deletions

View File

@ -1952,7 +1952,7 @@ def test_search_nuxt_json(self):
<script data-ssr="true" id="__NUXT_DATA__" type="application/json"> <script data-ssr="true" id="__NUXT_DATA__" type="application/json">
[ [
["ShallowReactive",1], ["ShallowReactive",1],
{"data":2,"state":21,"once":25}, {"data":2,"state":21,"once":25,"_errors":28},
["ShallowReactive",3], ["ShallowReactive",3],
{"$abcdef123456":4}, {"$abcdef123456":4},
{"podcast":5,"activeEpisodeData":7}, {"podcast":5,"activeEpisodeData":7},
@ -1973,10 +1973,14 @@ def test_search_nuxt_json(self):
"Podcast Creator", "Podcast Creator",
[], [],
{"$ssite-config":22}, {"$ssite-config":22},
{"env":23,"name":24}, {"env":23,"name":24,"map":26},
"production", "production",
"podcast-website", "podcast-website",
["Set"] ["Set"],
["Reactive",27],
["Map"],
["ShallowReactive",29],
{}
] ]
</script>''' </script>'''
PAYLOAD = { PAYLOAD = {
@ -2003,9 +2007,11 @@ def test_search_nuxt_json(self):
'$ssite-config': { '$ssite-config': {
'env': 'production', 'env': 'production',
'name': 'podcast-website', 'name': 'podcast-website',
'map': {},
}, },
}, },
'once': None, 'once': [],
'_errors': {},
} }
BAD_HTML = ''' BAD_HTML = '''
<script data-ssr="true" id="__NUXT_DATA__" type="application/json"> <script data-ssr="true" id="__NUXT_DATA__" type="application/json">

View File

@ -1795,29 +1795,21 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
return traverse_obj(ret, traverse) or {} return traverse_obj(ret, traverse) or {}
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT): def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
"""Parses metadata from Nuxt rich JSON payload arrays""" """Resolves Nuxt rich JSON payload arrays"""
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57 # Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
# https://github.com/nuxt/nuxt/pull/19205 # https://github.com/nuxt/nuxt/pull/19205
IGNORED_TYPES = ('Map', 'Set', 'EmptyRef', 'EmptyShallowRef', 'NuxtError')
ERROR_MSG = 'Unable to extract Nuxt JSON data' ERROR_MSG = 'Unable to extract Nuxt JSON data'
if default is not NO_DEFAULT:
fatal = False
array = self._search_json(
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage, 'Nuxt JSON data', video_id,
contains_pattern=r'\[(?s:.+)\]', default=NO_DEFAULT if fatal else [])
result = [None] result = [None]
stack = [(result, 0, 0)] stack = [(result, 0, 0)]
while stack: while stack:
target, index, source = stack.pop() target, index, source = stack.pop()
if 0 <= source < len(array): if 0 <= source < len(array):
element = array[source] element = array[source]
elif fatal:
raise ExtractorError(ERROR_MSG, video_id=video_id)
elif default is NO_DEFAULT: elif default is NO_DEFAULT:
if fatal:
raise ExtractorError(ERROR_MSG, video_id=video_id)
self.report_warning(ERROR_MSG, video_id=video_id) self.report_warning(ERROR_MSG, video_id=video_id)
return {} return {}
else: else:
@ -1827,11 +1819,16 @@ def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT
if element[0] in ('ShallowReactive', 'Reactive', 'ShallowRef', 'Ref'): if element[0] in ('ShallowReactive', 'Reactive', 'ShallowRef', 'Ref'):
stack.append((target, index, element[1])) stack.append((target, index, element[1]))
continue continue
if element[0] not in IGNORED_TYPES: if element[0] == 'Map':
target[index] = {}
elif element[0] == 'Set':
target[index] = []
else:
target[index] = None
if element[0] not in ('EmptyRef', 'EmptyShallowRef', 'NuxtError'):
self.write_debug( self.write_debug(
f'{video_id}: Discarding unsupported type in Nuxt payload: {element[0]}', f'{video_id}: Discarding unsupported type in Nuxt payload: {element[0]}',
only_once=True) only_once=True)
target[index] = None
continue continue
if isinstance(element, list): if isinstance(element, list):
@ -1850,6 +1847,17 @@ def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT
return result[0] return result[0]
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
"""Parses metadata from Nuxt rich JSON payloads embedded in HTML"""
if default is not NO_DEFAULT:
fatal = False
array = self._search_json(
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage, 'Nuxt JSON data', video_id,
contains_pattern=r'\[(?s:.+)\]', default=NO_DEFAULT if fatal else [])
return self._resolve_nuxt_array(array, video_id, fatal=fatal, default=default)
@staticmethod @staticmethod
def _hidden_inputs(html): def _hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)