mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
improvements?
Authored by: bashonly
This commit is contained in:
parent
ce6b8db516
commit
11801ca7b3
@ -1979,26 +1979,25 @@ def test_search_nuxt_json(self):
|
|||||||
["Set"]
|
["Set"]
|
||||||
]
|
]
|
||||||
</script>'''
|
</script>'''
|
||||||
DATA = {
|
PAYLOAD = {
|
||||||
'podcast': {
|
|
||||||
'podcast': {
|
|
||||||
'title': 'Series Title',
|
|
||||||
'id': 'podcast-id-01',
|
|
||||||
},
|
|
||||||
'seasons': [1, 2, 3],
|
|
||||||
},
|
|
||||||
'activeEpisodeData': {
|
|
||||||
'episode': {
|
|
||||||
'title': 'Episode Title',
|
|
||||||
'id': 'episode-id-99',
|
|
||||||
},
|
|
||||||
'creators': ['Podcast Creator'],
|
|
||||||
'empty_list': [],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
FULL = {
|
|
||||||
'data': {
|
'data': {
|
||||||
'$abcdef123456': DATA,
|
'$abcdef123456': {
|
||||||
|
'podcast': {
|
||||||
|
'podcast': {
|
||||||
|
'title': 'Series Title',
|
||||||
|
'id': 'podcast-id-01',
|
||||||
|
},
|
||||||
|
'seasons': [1, 2, 3],
|
||||||
|
},
|
||||||
|
'activeEpisodeData': {
|
||||||
|
'episode': {
|
||||||
|
'title': 'Episode Title',
|
||||||
|
'id': 'episode-id-99',
|
||||||
|
},
|
||||||
|
'creators': ['Podcast Creator'],
|
||||||
|
'empty_list': [],
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
'state': {
|
'state': {
|
||||||
'$ssite-config': {
|
'$ssite-config': {
|
||||||
@ -2006,7 +2005,7 @@ def test_search_nuxt_json(self):
|
|||||||
'name': 'podcast-website',
|
'name': 'podcast-website',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'once': ['Set'],
|
'once': None,
|
||||||
}
|
}
|
||||||
BAD_HTML = '''
|
BAD_HTML = '''
|
||||||
<script data-ssr="true" id="__NUXT_DATA__" type="application/json">
|
<script data-ssr="true" id="__NUXT_DATA__" type="application/json">
|
||||||
@ -2018,8 +2017,7 @@ def test_search_nuxt_json(self):
|
|||||||
]
|
]
|
||||||
</script>'''
|
</script>'''
|
||||||
|
|
||||||
self.assertEqual(self.ie._search_nuxt_json(HTML, None), DATA)
|
self.assertEqual(self.ie._search_nuxt_json(HTML, 'id'), PAYLOAD)
|
||||||
self.assertEqual(self.ie._search_nuxt_json(HTML, None, traverse=None), FULL)
|
|
||||||
self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
|
self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
|
||||||
self.assertEqual(self.ie._search_nuxt_json(BAD_HTML, None, fatal=False), {})
|
self.assertEqual(self.ie._search_nuxt_json(BAD_HTML, None, fatal=False), {})
|
||||||
|
|
||||||
|
@ -1795,39 +1795,57 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
|
|||||||
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
|
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
|
||||||
return traverse_obj(ret, traverse) or {}
|
return traverse_obj(ret, traverse) or {}
|
||||||
|
|
||||||
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, traverse=('data', ..., {dict}, any)):
|
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||||
"""Parses metadata from Nuxt rich JSON payload arrays"""
|
"""Parses metadata from Nuxt rich JSON payload arrays"""
|
||||||
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
|
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
|
||||||
# https://github.com/nuxt/nuxt/pull/19205
|
# https://github.com/nuxt/nuxt/pull/19205
|
||||||
array = self._search_json(
|
try:
|
||||||
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage, 'nuxt data', video_id,
|
array = self._search_json(
|
||||||
contains_pattern=r'\[(?s:.+)\]', default=NO_DEFAULT if fatal else [{}])
|
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage,
|
||||||
|
'Nuxt JSON data', video_id, contains_pattern=r'\[(?s:.+)\]')
|
||||||
|
except ExtractorError as e:
|
||||||
|
if fatal:
|
||||||
|
raise
|
||||||
|
if default is NO_DEFAULT:
|
||||||
|
self.report_warning(e.orig_msg)
|
||||||
|
return {}
|
||||||
|
return default
|
||||||
|
|
||||||
|
IGNORED_TYPES = ('Map', 'Set', 'Ref', 'ShallowRef', 'EmptyRef', 'EmptyShallowRef', 'NuxtError')
|
||||||
|
|
||||||
def extract_element(element):
|
def extract_element(element):
|
||||||
try:
|
if isinstance(element, list):
|
||||||
if isinstance(element, list) and element:
|
if element and isinstance(element[0], str):
|
||||||
if element[0] in ('ShallowReactive', 'Reactive') and isinstance(element[1], int):
|
if element[0] in ('ShallowReactive', 'Reactive') and isinstance(element[1], int):
|
||||||
return extract_element(array[element[1]])
|
return extract_element(array[element[1]])
|
||||||
if all(isinstance(ele, int) for ele in element):
|
if element[0] not in IGNORED_TYPES:
|
||||||
return [extract_element(array[ele]) for ele in element]
|
self.write_debug(
|
||||||
if isinstance(element, dict):
|
f'{video_id}: Discarding unsupported type in Nuxt payload: {element[0]}',
|
||||||
ret = {}
|
only_once=True)
|
||||||
for k, v in element.items():
|
|
||||||
if isinstance(v, int):
|
|
||||||
ret[k] = extract_element(array[v])
|
|
||||||
else:
|
|
||||||
ret[k] = v
|
|
||||||
return ret
|
|
||||||
except IndexError as e:
|
|
||||||
error_msg = f'Unable to extract NUXT JSON data: {e}'
|
|
||||||
if not fatal:
|
|
||||||
self.report_warning(error_msg, video_id=video_id, only_once=True)
|
|
||||||
return None
|
return None
|
||||||
raise ExtractorError(error_msg)
|
return [extract_element(array[ele]) for ele in element]
|
||||||
|
if isinstance(element, dict):
|
||||||
|
ret = {}
|
||||||
|
for k, v in element.items():
|
||||||
|
ret[k] = extract_element(array[v])
|
||||||
|
return ret
|
||||||
return element
|
return element
|
||||||
|
|
||||||
return traverse_obj(extract_element(array[0]), traverse) or {}
|
try:
|
||||||
|
payload = extract_element(array[0])
|
||||||
|
except IndexError as e:
|
||||||
|
error_msg = f'Unable to extract Nuxt JSON data: {e}'
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(error_msg)
|
||||||
|
if default is NO_DEFAULT:
|
||||||
|
self.report_warning(error_msg, video_id=video_id)
|
||||||
|
return {}
|
||||||
|
return default
|
||||||
|
|
||||||
|
if default is NO_DEFAULT:
|
||||||
|
default = {}
|
||||||
|
|
||||||
|
return payload or default
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _hidden_inputs(html):
|
def _hidden_inputs(html):
|
||||||
|
Loading…
Reference in New Issue
Block a user