mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-07-29 16:48:30 +00:00
Smaller rearrangements
This commit is contained in:
parent
f383c0b600
commit
d3d29be050
53
changed.diff
Normal file
53
changed.diff
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
|
||||||
|
index b2af2a67b..e2b94c89a 100644
|
||||||
|
--- a/yt_dlp/extractor/common.py
|
||||||
|
+++ b/yt_dlp/extractor/common.py
|
||||||
|
@@ -1807,9 +1807,8 @@ def flatten(flight_data):
|
||||||
|
flatten(f)
|
||||||
|
|
||||||
|
flight_text = ''
|
||||||
|
- # The flight segments regex pattern can afford to be (and should be) strict
|
||||||
|
- # Ref: https://github.com/vercel/next.js/commit/5a4a08fdce91a038f2ed3a70568d3ed040403150
|
||||||
|
- # /packages/next/src/server/app-render/use-flight-response.tsx
|
||||||
|
+ # The non JSON part is written as string in the next.js source and should be matched strictly
|
||||||
|
+ # Ref: https://github.com/vercel/next.js/blob/5a4a08fdce91a038f2ed3a70568d3ed040403150/packages/next/src/server/app-render/use-flight-response.tsx#L189
|
||||||
|
for flight_segment in re.findall(r'<script[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage):
|
||||||
|
segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
|
||||||
|
# Some earlier versions of next.js "optimized" away this array structure; this is unsupported
|
||||||
|
@@ -1818,27 +1817,26 @@ def flatten(flight_data):
|
||||||
|
self.write_debug(
|
||||||
|
f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
|
||||||
|
continue
|
||||||
|
+ # Ignore useless payload types (1: data, 2: base64)
|
||||||
|
+ # Ref: https://github.com/vercel/next.js/blob/5a4a08fdce91a038f2ed3a70568d3ed040403150/packages/next/src/server/app-render/use-flight-response.tsx#L11-#L14
|
||||||
|
payload_type, chunk = segment
|
||||||
|
- if payload_type == 3:
|
||||||
|
+ if payload_type == 1:
|
||||||
|
+ flight_text += chunk
|
||||||
|
+ elif payload_type == 3:
|
||||||
|
try:
|
||||||
|
- chunk = base64.b64decode(chunk).decode()
|
||||||
|
+ flight_text += base64.b64decode(chunk).decode()
|
||||||
|
except (ValueError, binascii.Error):
|
||||||
|
msg = 'Unable to parse next.js data: unable to decode flight data'
|
||||||
|
if not fatal:
|
||||||
|
self.report_warning(msg, video_id=video_id, only_once=True)
|
||||||
|
continue
|
||||||
|
raise ExtractorError(msg)
|
||||||
|
- elif payload_type != 1:
|
||||||
|
- # Ignore useless payload types (0: bootstrap, 2: form state)
|
||||||
|
- continue
|
||||||
|
- flight_text += chunk
|
||||||
|
|
||||||
|
for f in flight_text.splitlines():
|
||||||
|
prefix, _, body = f.partition(':')
|
||||||
|
- if not (body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix)):
|
||||||
|
- continue
|
||||||
|
- # The body isn't necessarily valid JSON; this should always be non-fatal
|
||||||
|
- flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
|
||||||
|
+ if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix):
|
||||||
|
+ # The body isn't necessarily valid JSON; this should always be non-fatal
|
||||||
|
+ flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
|
||||||
|
|
||||||
|
return nextjs_data
|
||||||
|
|
@ -1980,8 +1980,6 @@ def test_search_nextjs_v13_data(self):
|
|||||||
'duplicated_field_name': {'x': 1},
|
'duplicated_field_name': {'x': 1},
|
||||||
}, {
|
}, {
|
||||||
'duplicated_field_name': {'y': 2},
|
'duplicated_field_name': {'y': 2},
|
||||||
}, {
|
|
||||||
'decoded': 'success',
|
|
||||||
}]
|
}]
|
||||||
self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
|
self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
|
||||||
self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), [])
|
self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), [])
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import base64
|
import base64
|
||||||
import binascii
|
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
import functools
|
import functools
|
||||||
@ -1807,9 +1806,8 @@ def flatten(flight_data):
|
|||||||
flatten(f)
|
flatten(f)
|
||||||
|
|
||||||
flight_text = ''
|
flight_text = ''
|
||||||
# The flight segments regex pattern can afford to be (and should be) strict
|
# The script part is written as a string in the next.js source and should be matched strictly
|
||||||
# Ref: https://github.com/vercel/next.js/commit/5a4a08fdce91a038f2ed3a70568d3ed040403150
|
# Ref: https://github.com/vercel/next.js/blob/5a4a08fdce91a038f2ed3a70568d3ed040403150/packages/next/src/server/app-render/use-flight-response.tsx#L189
|
||||||
# /packages/next/src/server/app-render/use-flight-response.tsx
|
|
||||||
for flight_segment in re.findall(r'<script[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage):
|
for flight_segment in re.findall(r'<script[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage):
|
||||||
segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
|
segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
|
||||||
# Some earlier versions of next.js "optimized" away this array structure; this is unsupported
|
# Some earlier versions of next.js "optimized" away this array structure; this is unsupported
|
||||||
@ -1818,27 +1816,17 @@ def flatten(flight_data):
|
|||||||
self.write_debug(
|
self.write_debug(
|
||||||
f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
|
f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
|
||||||
continue
|
continue
|
||||||
|
# Use only relevant payload type (1 == data)
|
||||||
|
# Ref: https://github.com/vercel/next.js/blob/5a4a08fdce91a038f2ed3a70568d3ed040403150/packages/next/src/server/app-render/use-flight-response.tsx#L11-#L14
|
||||||
payload_type, chunk = segment
|
payload_type, chunk = segment
|
||||||
if payload_type == 3:
|
if payload_type == 1:
|
||||||
try:
|
flight_text += chunk
|
||||||
chunk = base64.b64decode(chunk).decode()
|
|
||||||
except (ValueError, binascii.Error):
|
|
||||||
msg = 'Unable to parse next.js data: unable to decode flight data'
|
|
||||||
if not fatal:
|
|
||||||
self.report_warning(msg, video_id=video_id, only_once=True)
|
|
||||||
continue
|
|
||||||
raise ExtractorError(msg)
|
|
||||||
elif payload_type != 1:
|
|
||||||
# Ignore useless payload types (0: bootstrap, 2: form state)
|
|
||||||
continue
|
|
||||||
flight_text += chunk
|
|
||||||
|
|
||||||
for f in flight_text.splitlines():
|
for f in flight_text.splitlines():
|
||||||
prefix, _, body = f.partition(':')
|
prefix, _, body = f.partition(':')
|
||||||
if not (body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix)):
|
if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix):
|
||||||
continue
|
# The body isn't necessarily valid JSON; this should always be non-fatal
|
||||||
# The body isn't necessarily valid JSON; this should always be non-fatal
|
flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
|
||||||
flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
|
|
||||||
|
|
||||||
return nextjs_data
|
return nextjs_data
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user