1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-28 01:18:30 +00:00

[ie] rework _resolve_nuxt_array to return partial results

Authored by: bashonly
This commit is contained in:
bashonly 2025-06-11 17:33:38 -05:00
parent f4acb5726d
commit 1dbd7250b4
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
2 changed files with 66 additions and 42 deletions

View File

@ -2032,38 +2032,51 @@ def test_search_nuxt_json(self):
'message': 'Service Unavailable',
},
}
INVALID_LIST = [
PARTIALLY_INVALID = [(
'''
{"data":1},
{"invalid_raw_list":2},
[15,16,17]
''',
{'data': {'invalid_raw_list': [None, None, None]}},
), (
'''
{"data":1},
["EmptyRef",2],
"not valid JSON"
''',
{'data': None},
), (
'''
{"data":1},
["EmptyShallowRef",2],
"not valid JSON"
''',
{'data': None},
)]
INVALID = [
'''
{"data":1},
["unsupported",2],
[]
''',
'''
["unsupported",1],
{"data":2},
{}
''',
]
DEFAULT = {'default': 'works'}
DEFAULT = object()
self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD)
self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
self.assertEqual(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT)
self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(INVALID_LIST[0]), None, fatal=False), {})
for invalid_data in INVALID_LIST[1:]:
self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT)
for data, expected in PARTIALLY_INVALID:
self.assertEqual(
self.ie._search_nuxt_json(HTML_TMPL.format(invalid_data), None, default=DEFAULT),
DEFAULT)
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected)
for data in INVALID:
self.assertIs(
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
if __name__ == '__main__':

View File

@ -1798,19 +1798,26 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
"""Resolves Nuxt rich JSON payload arrays"""
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
# https://github.com/nuxt/nuxt/pull/19205
if default is not NO_DEFAULT:
fatal = False
if not isinstance(array, list) or not array:
error_msg = 'Unable to resolve Nuxt JSON data: invalid input'
if fatal:
raise ExtractorError(error_msg, video_id=video_id)
elif default is NO_DEFAULT:
self.report_warning(error_msg, video_id=video_id)
return {} if default is NO_DEFAULT else default
def indirect_reviver(data):
return data
def json_reviver(data):
return json.loads(data)
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
# https://github.com/nuxt/nuxt/pull/19205
try:
return devalue.parse(array, revivers={
gen = devalue.parse_iter(array, revivers={
'NuxtError': indirect_reviver,
'EmptyShallowRef': json_reviver,
'EmptyRef': json_reviver,
@ -1819,14 +1826,18 @@ def json_reviver(data):
'Ref': indirect_reviver,
'Reactive': indirect_reviver,
})
except (IndexError, TypeError, ValueError) as e:
if default is not NO_DEFAULT:
return default
error_msg = f'Unable to resolve Nuxt JSON data: {e}'
while True:
try:
error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}'
if fatal:
raise ExtractorError(error_msg, video_id=video_id)
self.report_warning(error_msg, video_id=video_id)
return {}
elif default is NO_DEFAULT:
self.report_warning(error_msg, video_id=video_id, only_once=True)
else:
self.write_debug(f'{video_id}: {error_msg}', only_once=True)
except StopIteration as error:
return error.value or ({} if default is NO_DEFAULT else default)
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
"""Parses metadata from Nuxt rich JSON payloads embedded in HTML"""