mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-07-19 19:58:30 +00:00
[ie] Rework _search_nextjs_v13_data
helper (#13711)
Fix 5245231e4a
Authored by: bashonly
This commit is contained in:
parent
5245231e4a
commit
b5fea53f20
@ -1969,21 +1969,26 @@ def test_search_nextjs_v13_data(self):
|
|||||||
<script>self.__next_f.push([1,"df:[\"$undefined\",[\"$\",\"div\",null,{\"children\":[\"$\",\"$L17\",null,{}],\"do_not_include_this_field\":\"fail\"}],[\"$\",\"div\",null,{\"children\":[[\"$\",\"$L19\",null,{\"duplicated_field_name\":{\"x\":1}}],[\"$\",\"$L20\",null,{\"duplicated_field_name\":{\"y\":2}}]]}],\"$undefined\"]\n"])</script>
|
<script>self.__next_f.push([1,"df:[\"$undefined\",[\"$\",\"div\",null,{\"children\":[\"$\",\"$L17\",null,{}],\"do_not_include_this_field\":\"fail\"}],[\"$\",\"div\",null,{\"children\":[[\"$\",\"$L19\",null,{\"duplicated_field_name\":{\"x\":1}}],[\"$\",\"$L20\",null,{\"duplicated_field_name\":{\"y\":2}}]]}],\"$undefined\"]\n"])</script>
|
||||||
<script>self.__next_f.push([3,"MzM6WyIkIiwiJEwzMiIsbnVsbCx7ImRlY29kZWQiOiJzdWNjZXNzIn1d"])</script>
|
<script>self.__next_f.push([3,"MzM6WyIkIiwiJEwzMiIsbnVsbCx7ImRlY29kZWQiOiJzdWNjZXNzIn1d"])</script>
|
||||||
'''
|
'''
|
||||||
EXPECTED = [{
|
EXPECTED = {
|
||||||
'foo': 'bar',
|
'18': {
|
||||||
}, {
|
'foo': 'bar',
|
||||||
'meta': {
|
|
||||||
'dateCreated': 1730489700,
|
|
||||||
'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
|
|
||||||
},
|
},
|
||||||
}, {
|
'16': {
|
||||||
'duplicated_field_name': {'x': 1},
|
'meta': {
|
||||||
}, {
|
'dateCreated': 1730489700,
|
||||||
'duplicated_field_name': {'y': 2},
|
'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
|
||||||
}]
|
},
|
||||||
|
},
|
||||||
|
'19': {
|
||||||
|
'duplicated_field_name': {'x': 1},
|
||||||
|
},
|
||||||
|
'20': {
|
||||||
|
'duplicated_field_name': {'y': 2},
|
||||||
|
},
|
||||||
|
}
|
||||||
self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
|
self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
|
||||||
self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), [])
|
self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {})
|
||||||
self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), [])
|
self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {})
|
||||||
|
|
||||||
def test_search_nuxt_json(self):
|
def test_search_nuxt_json(self):
|
||||||
HTML_TMPL = '<script data-ssr="true" id="__NUXT_DATA__" type="application/json">[{}]</script>'
|
HTML_TMPL = '<script data-ssr="true" id="__NUXT_DATA__" type="application/json">[{}]</script>'
|
||||||
|
@ -1785,7 +1785,7 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU
|
|||||||
|
|
||||||
def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
|
def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
|
||||||
"""Parses Next.js app router flight data that was introduced in Next.js v13"""
|
"""Parses Next.js app router flight data that was introduced in Next.js v13"""
|
||||||
nextjs_data = []
|
nextjs_data = {}
|
||||||
if not fatal and not isinstance(webpage, str):
|
if not fatal and not isinstance(webpage, str):
|
||||||
return nextjs_data
|
return nextjs_data
|
||||||
|
|
||||||
@ -1797,9 +1797,9 @@ def flatten(flight_data):
|
|||||||
if not isinstance(data, dict):
|
if not isinstance(data, dict):
|
||||||
return
|
return
|
||||||
children = data.pop('children', None)
|
children = data.pop('children', None)
|
||||||
if data and isinstance(name, str) and name.startswith('$'):
|
if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name):
|
||||||
# It is useful hydration JSON data
|
# It is useful hydration JSON data
|
||||||
nextjs_data.append(data)
|
nextjs_data[name[2:]] = data
|
||||||
flatten(children)
|
flatten(children)
|
||||||
return
|
return
|
||||||
for f in flight_data:
|
for f in flight_data:
|
||||||
@ -1823,10 +1823,16 @@ def flatten(flight_data):
|
|||||||
flight_text += chunk
|
flight_text += chunk
|
||||||
|
|
||||||
for f in flight_text.splitlines():
|
for f in flight_text.splitlines():
|
||||||
prefix, _, body = f.partition(':')
|
prefix, _, body = f.lstrip().partition(':')
|
||||||
if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix.lstrip()):
|
if not re.fullmatch(r'[0-9a-f]+', prefix):
|
||||||
# The body isn't necessarily valid JSON, so this should always be non-fatal
|
continue
|
||||||
|
# The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal
|
||||||
|
if body.startswith('[') and body.endswith(']'):
|
||||||
flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
|
flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
|
||||||
|
elif body.startswith('{') and body.endswith('}'):
|
||||||
|
data = self._parse_json(body, video_id, fatal=False, errnote=False)
|
||||||
|
if data is not None:
|
||||||
|
nextjs_data[prefix] = data
|
||||||
|
|
||||||
return nextjs_data
|
return nextjs_data
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user