1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-28 01:18:30 +00:00

[ie] rework _resolve_nuxt_array to return partial results

Authored by: bashonly
This commit is contained in:
bashonly 2025-06-11 17:33:38 -05:00
parent f4acb5726d
commit 1dbd7250b4
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
2 changed files with 66 additions and 42 deletions

View File

@ -2032,38 +2032,51 @@ def test_search_nuxt_json(self):
'message': 'Service Unavailable', 'message': 'Service Unavailable',
}, },
} }
INVALID_LIST = [ PARTIALLY_INVALID = [(
''' '''
{"data":1}, {"data":1},
{"invalid_raw_list":2}, {"invalid_raw_list":2},
[15,16,17] [15,16,17]
''',
{'data': {'invalid_raw_list': [None, None, None]}},
), (
'''
{"data":1},
["EmptyRef",2],
"not valid JSON"
''',
{'data': None},
), (
'''
{"data":1},
["EmptyShallowRef",2],
"not valid JSON"
''',
{'data': None},
)]
INVALID = [
'''
[]
''', ''',
''' '''
{"data":1}, ["unsupported",1],
["EmptyRef",2], {"data":2},
"not valid JSON"
''',
'''
{"data":1},
["EmptyShallowRef",2],
"not valid JSON"
''',
'''
{"data":1},
["unsupported",2],
{} {}
''', ''',
] ]
DEFAULT = {'default': 'works'} DEFAULT = object()
self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD) self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD)
self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {}) self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
self.assertEqual(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT) self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT)
self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(INVALID_LIST[0]), None, fatal=False), {})
for invalid_data in INVALID_LIST[1:]: for data, expected in PARTIALLY_INVALID:
self.assertEqual( self.assertEqual(
self.ie._search_nuxt_json(HTML_TMPL.format(invalid_data), None, default=DEFAULT), self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected)
DEFAULT)
for data in INVALID:
self.assertIs(
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1798,35 +1798,46 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT): def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
"""Resolves Nuxt rich JSON payload arrays""" """Resolves Nuxt rich JSON payload arrays"""
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
# https://github.com/nuxt/nuxt/pull/19205
if default is not NO_DEFAULT: if default is not NO_DEFAULT:
fatal = False fatal = False
if not isinstance(array, list) or not array:
error_msg = 'Unable to resolve Nuxt JSON data: invalid input'
if fatal:
raise ExtractorError(error_msg, video_id=video_id)
elif default is NO_DEFAULT:
self.report_warning(error_msg, video_id=video_id)
return {} if default is NO_DEFAULT else default
def indirect_reviver(data): def indirect_reviver(data):
return data return data
def json_reviver(data): def json_reviver(data):
return json.loads(data) return json.loads(data)
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57 gen = devalue.parse_iter(array, revivers={
# https://github.com/nuxt/nuxt/pull/19205 'NuxtError': indirect_reviver,
try: 'EmptyShallowRef': json_reviver,
return devalue.parse(array, revivers={ 'EmptyRef': json_reviver,
'NuxtError': indirect_reviver, 'ShallowRef': indirect_reviver,
'EmptyShallowRef': json_reviver, 'ShallowReactive': indirect_reviver,
'EmptyRef': json_reviver, 'Ref': indirect_reviver,
'ShallowRef': indirect_reviver, 'Reactive': indirect_reviver,
'ShallowReactive': indirect_reviver, })
'Ref': indirect_reviver,
'Reactive': indirect_reviver, while True:
}) try:
except (IndexError, TypeError, ValueError) as e: error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}'
if default is not NO_DEFAULT: if fatal:
return default raise ExtractorError(error_msg, video_id=video_id)
error_msg = f'Unable to resolve Nuxt JSON data: {e}' elif default is NO_DEFAULT:
if fatal: self.report_warning(error_msg, video_id=video_id, only_once=True)
raise ExtractorError(error_msg, video_id=video_id) else:
self.report_warning(error_msg, video_id=video_id) self.write_debug(f'{video_id}: {error_msg}', only_once=True)
return {} except StopIteration as error:
return error.value or ({} if default is NO_DEFAULT else default)
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT): def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
"""Parses metadata from Nuxt rich JSON payloads embedded in HTML""" """Parses metadata from Nuxt rich JSON payloads embedded in HTML"""