From 1dbd7250b4f247cdd9ceb15e0ec2e9337566f50e Mon Sep 17 00:00:00 2001 From: bashonly Date: Wed, 11 Jun 2025 17:33:38 -0500 Subject: [PATCH] [ie] rework `_resolve_nuxt_array` to return partial results Authored by: bashonly --- test/test_InfoExtractor.py | 57 +++++++++++++++++++++++--------------- yt_dlp/extractor/common.py | 51 +++++++++++++++++++++------------- 2 files changed, 66 insertions(+), 42 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 962c93959..e6c8d574e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -2032,38 +2032,51 @@ def test_search_nuxt_json(self): 'message': 'Service Unavailable', }, } - INVALID_LIST = [ + PARTIALLY_INVALID = [( ''' - {"data":1}, - {"invalid_raw_list":2}, - [15,16,17] + {"data":1}, + {"invalid_raw_list":2}, + [15,16,17] + ''', + {'data': {'invalid_raw_list': [None, None, None]}}, + ), ( + ''' + {"data":1}, + ["EmptyRef",2], + "not valid JSON" + ''', + {'data': None}, + ), ( + ''' + {"data":1}, + ["EmptyShallowRef",2], + "not valid JSON" + ''', + {'data': None}, + )] + INVALID = [ + ''' + [] ''', ''' - {"data":1}, - ["EmptyRef",2], - "not valid JSON" - ''', - ''' - {"data":1}, - ["EmptyShallowRef",2], - "not valid JSON" - ''', - ''' - {"data":1}, - ["unsupported",2], + ["unsupported",1], + {"data":2}, {} ''', ] - DEFAULT = {'default': 'works'} + DEFAULT = object() self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD) self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {}) - self.assertEqual(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT) - self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(INVALID_LIST[0]), None, fatal=False), {}) - for invalid_data in INVALID_LIST[1:]: + self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT) + + for data, expected in PARTIALLY_INVALID: self.assertEqual( - self.ie._search_nuxt_json(HTML_TMPL.format(invalid_data), None, default=DEFAULT), - DEFAULT) + self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected) + + for data in INVALID: + self.assertIs( + self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) if __name__ == '__main__': diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ca9077299..6058f66ae 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1798,35 +1798,46 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT): """Resolves Nuxt rich JSON payload arrays""" + # Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57 + # https://github.com/nuxt/nuxt/pull/19205 if default is not NO_DEFAULT: fatal = False + if not isinstance(array, list) or not array: + error_msg = 'Unable to resolve Nuxt JSON data: invalid input' + if fatal: + raise ExtractorError(error_msg, video_id=video_id) + elif default is NO_DEFAULT: + self.report_warning(error_msg, video_id=video_id) + return {} if default is NO_DEFAULT else default + def indirect_reviver(data): return data def json_reviver(data): return json.loads(data) - # Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57 - # https://github.com/nuxt/nuxt/pull/19205 - try: - return devalue.parse(array, revivers={ - 'NuxtError': indirect_reviver, - 'EmptyShallowRef': json_reviver, - 'EmptyRef': json_reviver, - 'ShallowRef': indirect_reviver, - 'ShallowReactive': indirect_reviver, - 'Ref': indirect_reviver, - 'Reactive': indirect_reviver, - }) - except (IndexError, TypeError, ValueError) as e: - if default is not NO_DEFAULT: - return default - error_msg = f'Unable to resolve Nuxt JSON data: {e}' - if fatal: - raise ExtractorError(error_msg, video_id=video_id) - self.report_warning(error_msg, video_id=video_id) - return {} + gen = devalue.parse_iter(array, revivers={ + 'NuxtError': indirect_reviver, + 'EmptyShallowRef': json_reviver, + 'EmptyRef': json_reviver, + 'ShallowRef': indirect_reviver, + 'ShallowReactive': indirect_reviver, + 'Ref': indirect_reviver, + 'Reactive': indirect_reviver, + }) + + while True: + try: + error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}' + if fatal: + raise ExtractorError(error_msg, video_id=video_id) + elif default is NO_DEFAULT: + self.report_warning(error_msg, video_id=video_id, only_once=True) + else: + self.write_debug(f'{video_id}: {error_msg}', only_once=True) + except StopIteration as error: + return error.value or ({} if default is NO_DEFAULT else default) def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT): """Parses metadata from Nuxt rich JSON payloads embedded in HTML"""