mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 01:18:30 +00:00
[ie] rework _resolve_nuxt_array
to return partial results
Authored by: bashonly
This commit is contained in:
parent
f4acb5726d
commit
1dbd7250b4
@ -2032,38 +2032,51 @@ def test_search_nuxt_json(self):
|
||||
'message': 'Service Unavailable',
|
||||
},
|
||||
}
|
||||
INVALID_LIST = [
|
||||
PARTIALLY_INVALID = [(
|
||||
'''
|
||||
{"data":1},
|
||||
{"invalid_raw_list":2},
|
||||
[15,16,17]
|
||||
{"data":1},
|
||||
{"invalid_raw_list":2},
|
||||
[15,16,17]
|
||||
''',
|
||||
{'data': {'invalid_raw_list': [None, None, None]}},
|
||||
), (
|
||||
'''
|
||||
{"data":1},
|
||||
["EmptyRef",2],
|
||||
"not valid JSON"
|
||||
''',
|
||||
{'data': None},
|
||||
), (
|
||||
'''
|
||||
{"data":1},
|
||||
["EmptyShallowRef",2],
|
||||
"not valid JSON"
|
||||
''',
|
||||
{'data': None},
|
||||
)]
|
||||
INVALID = [
|
||||
'''
|
||||
[]
|
||||
''',
|
||||
'''
|
||||
{"data":1},
|
||||
["EmptyRef",2],
|
||||
"not valid JSON"
|
||||
''',
|
||||
'''
|
||||
{"data":1},
|
||||
["EmptyShallowRef",2],
|
||||
"not valid JSON"
|
||||
''',
|
||||
'''
|
||||
{"data":1},
|
||||
["unsupported",2],
|
||||
["unsupported",1],
|
||||
{"data":2},
|
||||
{}
|
||||
''',
|
||||
]
|
||||
DEFAULT = {'default': 'works'}
|
||||
DEFAULT = object()
|
||||
|
||||
self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD)
|
||||
self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
|
||||
self.assertEqual(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT)
|
||||
self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(INVALID_LIST[0]), None, fatal=False), {})
|
||||
for invalid_data in INVALID_LIST[1:]:
|
||||
self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT)
|
||||
|
||||
for data, expected in PARTIALLY_INVALID:
|
||||
self.assertEqual(
|
||||
self.ie._search_nuxt_json(HTML_TMPL.format(invalid_data), None, default=DEFAULT),
|
||||
DEFAULT)
|
||||
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected)
|
||||
|
||||
for data in INVALID:
|
||||
self.assertIs(
|
||||
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1798,35 +1798,46 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
|
||||
|
||||
def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||
"""Resolves Nuxt rich JSON payload arrays"""
|
||||
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
|
||||
# https://github.com/nuxt/nuxt/pull/19205
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
|
||||
if not isinstance(array, list) or not array:
|
||||
error_msg = 'Unable to resolve Nuxt JSON data: invalid input'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg, video_id=video_id)
|
||||
elif default is NO_DEFAULT:
|
||||
self.report_warning(error_msg, video_id=video_id)
|
||||
return {} if default is NO_DEFAULT else default
|
||||
|
||||
def indirect_reviver(data):
|
||||
return data
|
||||
|
||||
def json_reviver(data):
|
||||
return json.loads(data)
|
||||
|
||||
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
|
||||
# https://github.com/nuxt/nuxt/pull/19205
|
||||
try:
|
||||
return devalue.parse(array, revivers={
|
||||
'NuxtError': indirect_reviver,
|
||||
'EmptyShallowRef': json_reviver,
|
||||
'EmptyRef': json_reviver,
|
||||
'ShallowRef': indirect_reviver,
|
||||
'ShallowReactive': indirect_reviver,
|
||||
'Ref': indirect_reviver,
|
||||
'Reactive': indirect_reviver,
|
||||
})
|
||||
except (IndexError, TypeError, ValueError) as e:
|
||||
if default is not NO_DEFAULT:
|
||||
return default
|
||||
error_msg = f'Unable to resolve Nuxt JSON data: {e}'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg, video_id=video_id)
|
||||
self.report_warning(error_msg, video_id=video_id)
|
||||
return {}
|
||||
gen = devalue.parse_iter(array, revivers={
|
||||
'NuxtError': indirect_reviver,
|
||||
'EmptyShallowRef': json_reviver,
|
||||
'EmptyRef': json_reviver,
|
||||
'ShallowRef': indirect_reviver,
|
||||
'ShallowReactive': indirect_reviver,
|
||||
'Ref': indirect_reviver,
|
||||
'Reactive': indirect_reviver,
|
||||
})
|
||||
|
||||
while True:
|
||||
try:
|
||||
error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg, video_id=video_id)
|
||||
elif default is NO_DEFAULT:
|
||||
self.report_warning(error_msg, video_id=video_id, only_once=True)
|
||||
else:
|
||||
self.write_debug(f'{video_id}: {error_msg}', only_once=True)
|
||||
except StopIteration as error:
|
||||
return error.value or ({} if default is NO_DEFAULT else default)
|
||||
|
||||
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||
"""Parses metadata from Nuxt rich JSON payloads embedded in HTML"""
|
||||
|
Loading…
Reference in New Issue
Block a user