From 767c099c1e95bbc1491799949f32a8700c7ba0e4 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Sat, 12 Jul 2025 17:48:15 -0500
Subject: [PATCH] [ie] Rework `_search_nextjs_v13_data

Authored by: bashonly
---
 yt_dlp/extractor/common.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index a3ff5a1c0..9829aee81 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1785,7 +1785,7 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU
 
     def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
         """Parses Next.js app router flight data that was introduced in Next.js v13"""
-        nextjs_data = []
+        nextjs_data = {}
         if not fatal and not isinstance(webpage, str):
             return nextjs_data
 
@@ -1797,9 +1797,9 @@ def flatten(flight_data):
                 if not isinstance(data, dict):
                     return
                 children = data.pop('children', None)
-                if data and isinstance(name, str) and name.startswith('$'):
+                if data and isinstance(name, str) and name.startswith('$L'):
                     # It is useful hydration JSON data
-                    nextjs_data.append(data)
+                    nextjs_data[name[2:]] = data
                 flatten(children)
                 return
             for f in flight_data:
@@ -1823,10 +1823,16 @@ def flatten(flight_data):
                 flight_text += chunk
 
         for f in flight_text.splitlines():
-            prefix, _, body = f.partition(':')
-            if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix.lstrip()):
-                # The body isn't necessarily valid JSON, so this should always be non-fatal
+            prefix, _, body = f.lstrip().partition(':')
+            if not re.fullmatch(r'[0-9a-f]+', prefix):
+                continue
+            # The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal
+            if body.startswith('[') and body.endswith(']'):
                 flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
+            elif body.startswith('{') and body.endswith('}'):
+                data = self._parse_json(body, video_id, fatal=False, errnote=False)
+                if data is not None:
+                    nextjs_data[prefix] = data
 
         return nextjs_data