mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 01:18:30 +00:00
[ie] Rewrite Nuxt JSON helpers to use devalue
Authored by: bashonly
This commit is contained in:
parent
8a57b3d3db
commit
0ac5e97eb0
@ -101,6 +101,7 @@
|
|||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
from ..utils._utils import _request_dump_filename
|
from ..utils._utils import _request_dump_filename
|
||||||
|
from ..utils.web import devalue
|
||||||
|
|
||||||
|
|
||||||
class InfoExtractor:
|
class InfoExtractor:
|
||||||
@ -1797,63 +1798,47 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
|
|||||||
|
|
||||||
def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
|
def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||||
"""Resolves Nuxt rich JSON payload arrays"""
|
"""Resolves Nuxt rich JSON payload arrays"""
|
||||||
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
|
|
||||||
# https://github.com/nuxt/nuxt/pull/19205
|
|
||||||
ERROR_MSG = 'Unable to extract Nuxt JSON data'
|
|
||||||
|
|
||||||
result = [None]
|
|
||||||
stack = [(result, 0, 0)]
|
|
||||||
while stack:
|
|
||||||
target, index, source = stack.pop()
|
|
||||||
if 0 <= source < len(array):
|
|
||||||
element = array[source]
|
|
||||||
elif default is NO_DEFAULT:
|
|
||||||
if fatal:
|
|
||||||
raise ExtractorError(ERROR_MSG, video_id=video_id)
|
|
||||||
self.report_warning(ERROR_MSG, video_id=video_id)
|
|
||||||
return {}
|
|
||||||
else:
|
|
||||||
return default
|
|
||||||
|
|
||||||
if isinstance(element, list) and element and isinstance(element[0], str):
|
|
||||||
if element[0] in ('ShallowReactive', 'Reactive', 'ShallowRef', 'Ref'):
|
|
||||||
stack.append((target, index, element[1]))
|
|
||||||
elif element[0] == 'Map':
|
|
||||||
target[index] = {}
|
|
||||||
elif element[0] == 'Set':
|
|
||||||
target[index] = []
|
|
||||||
else:
|
|
||||||
target[index] = None
|
|
||||||
if element[0] not in ('EmptyRef', 'EmptyShallowRef', 'NuxtError'):
|
|
||||||
self.write_debug(
|
|
||||||
f'{video_id}: Discarding unsupported type in Nuxt payload: {element[0]}',
|
|
||||||
only_once=True)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if isinstance(element, list):
|
|
||||||
target[index] = element.copy()
|
|
||||||
for offset, val in enumerate(element):
|
|
||||||
stack.append((target[index], offset, val))
|
|
||||||
|
|
||||||
elif isinstance(element, dict):
|
|
||||||
target[index] = element.copy()
|
|
||||||
for key, val in element.items():
|
|
||||||
if isinstance(val, int):
|
|
||||||
stack.append((target[index], key, val))
|
|
||||||
|
|
||||||
else:
|
|
||||||
target[index] = element
|
|
||||||
|
|
||||||
return result[0]
|
|
||||||
|
|
||||||
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
|
|
||||||
"""Parses metadata from Nuxt rich JSON payloads embedded in HTML"""
|
|
||||||
if default is not NO_DEFAULT:
|
if default is not NO_DEFAULT:
|
||||||
fatal = False
|
fatal = False
|
||||||
|
|
||||||
|
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
|
||||||
|
# https://github.com/nuxt/nuxt/pull/19205
|
||||||
|
def simple_reviver(data):
|
||||||
|
return data
|
||||||
|
|
||||||
|
def empty_reviver(data):
|
||||||
|
return self._parse_json(data, video_id, fatal=fatal, errnote=None if fatal else False)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return devalue.parse(array, revivers={
|
||||||
|
'NuxtError': simple_reviver,
|
||||||
|
'EmptyShallowRef': empty_reviver,
|
||||||
|
'EmptyRef': empty_reviver,
|
||||||
|
'ShallowRef': simple_reviver,
|
||||||
|
'ShallowReactive': simple_reviver,
|
||||||
|
'Ref': simple_reviver,
|
||||||
|
'Reactive': simple_reviver,
|
||||||
|
})
|
||||||
|
except (IndexError, TypeError, ValueError) as e:
|
||||||
|
if default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
error_msg = f'Unable to resolve Nuxt JSON data: {e}'
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(error_msg, video_id=video_id)
|
||||||
|
self.report_warning(error_msg, video_id=video_id)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||||
|
"""Parses metadata from Nuxt rich JSON payloads embedded in HTML"""
|
||||||
|
passed_default = default is not NO_DEFAULT
|
||||||
|
|
||||||
array = self._search_json(
|
array = self._search_json(
|
||||||
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage, 'Nuxt JSON data', video_id,
|
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage,
|
||||||
contains_pattern=r'\[(?s:.+)\]', default=NO_DEFAULT if fatal else [])
|
'Nuxt JSON data', video_id, contains_pattern=r'\[(?s:.+)\]',
|
||||||
|
fatal=fatal, default=NO_DEFAULT if not passed_default else None)
|
||||||
|
|
||||||
|
if not array:
|
||||||
|
return default if passed_default else {}
|
||||||
|
|
||||||
return self._resolve_nuxt_array(array, video_id, fatal=fatal, default=default)
|
return self._resolve_nuxt_array(array, video_id, fatal=fatal, default=default)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user