From 767c099c1e95bbc1491799949f32a8700c7ba0e4 Mon Sep 17 00:00:00 2001 From: bashonly Date: Sat, 12 Jul 2025 17:48:15 -0500 Subject: [PATCH] [ie] Rework `_search_nextjs_v13_data Authored by: bashonly --- yt_dlp/extractor/common.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a3ff5a1c0..9829aee81 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1785,7 +1785,7 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU def _search_nextjs_v13_data(self, webpage, video_id, fatal=True): """Parses Next.js app router flight data that was introduced in Next.js v13""" - nextjs_data = [] + nextjs_data = {} if not fatal and not isinstance(webpage, str): return nextjs_data @@ -1797,9 +1797,9 @@ def flatten(flight_data): if not isinstance(data, dict): return children = data.pop('children', None) - if data and isinstance(name, str) and name.startswith('$'): + if data and isinstance(name, str) and name.startswith('$L'): # It is useful hydration JSON data - nextjs_data.append(data) + nextjs_data[name[2:]] = data flatten(children) return for f in flight_data: @@ -1823,10 +1823,16 @@ def flatten(flight_data): flight_text += chunk for f in flight_text.splitlines(): - prefix, _, body = f.partition(':') - if body.startswith('[') and body.endswith(']') and re.fullmatch(r'[0-9a-f]{1,3}', prefix.lstrip()): - # The body isn't necessarily valid JSON, so this should always be non-fatal + prefix, _, body = f.lstrip().partition(':') + if not re.fullmatch(r'[0-9a-f]+', prefix): + continue + # The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal + if body.startswith('[') and body.endswith(']'): flatten(self._parse_json(body, video_id, fatal=False, errnote=False)) + elif body.startswith('{') and body.endswith('}'): + data = self._parse_json(body, video_id, fatal=False, errnote=False) + if data is not None: + nextjs_data[prefix] = data return nextjs_data