diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index bc89b2955e..b1f6ef255c 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1947,6 +1947,34 @@ def test_search_nextjs_data(self):
with self.assertWarns(DeprecationWarning):
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
+ def test_search_nextjs_v13_data(self):
+ HTML = R'''
+
+
+
+
+
+
+
+ '''
+ EXPECTED = [{
+ 'foo': 'bar',
+ }, {
+ 'meta': {
+ 'dateCreated': 1730489700,
+ 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
+ },
+ }, {
+ 'duplicated_field_name': {'x': 1},
+ }, {
+ 'duplicated_field_name': {'y': 2},
+ }, {
+ 'decoded': 'success',
+ }]
+ self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
+ self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), [])
+ self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), [])
+
if __name__ == '__main__':
unittest.main()
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 1174bd4f5e..1255683054 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1,4 +1,5 @@
import base64
+import binascii
import collections
import functools
import getpass
@@ -1778,6 +1779,60 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU
r'', fatal=fatal, default=default, **kw)
+ def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
+ """Parses Next.js app router flight data that was introduced in Next.js v13"""
+ nextjs_data = []
+ if not fatal and not isinstance(webpage, str):
+ return nextjs_data
+ # This regex pattern can afford to be and should be strict
+ # Ref: https://github.com/vercel/next.js/commit/5a4a08fdce91a038f2ed3a70568d3ed040403150
+ # /packages/next/src/server/app-render/use-flight-response.tsx
+ flight_segments = re.findall(r'', webpage)
+
+ def flatten(flight_data):
+ if not isinstance(flight_data, list) or not flight_data:
+ return
+ if len(flight_data) == 4 and flight_data[0] == '$':
+ _, name, _, data = flight_data
+ if not isinstance(data, dict):
+ return
+ children = data.pop('children', None)
+ if data and name and name[0] == '$':
+ # It is useful hydration JSON data
+ nextjs_data.append(data)
+ flatten(children)
+ return
+ for f in flight_data:
+ flatten(f)
+
+ for flight_segment in flight_segments:
+ segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
+ # Some earlier versions of next.js "optimized" away this array structure; this is unsupported
+ # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761
+ if not isinstance(segment, list) or len(segment) != 2:
+ self.write_debug(
+ f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
+ continue
+ payload_type, chunk = segment
+ if payload_type == 3:
+ try:
+ chunk = base64.b64decode(chunk).decode()
+ except (ValueError, binascii.Error):
+ msg = 'Unable to parse next.js data: unable to decode flight data'
+ if not fatal:
+ self.report_warning(msg, video_id=video_id, only_once=True)
+ continue
+ raise ExtractorError(msg)
+ elif payload_type != 1:
+ # Ignore useless payload types (0: bootstrap, 2: form state)
+ continue
+ # Not all chunks are complete JSON data; this should always be non-fatal
+ flatten(self._search_json(
+ r'^[\da-f]+:', chunk, 'flight data', video_id,
+ default=None, contains_pattern=r'\[.+\]'))
+
+ return nextjs_data
+
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name)