From c840feeba121628708f14722cd3b12e5885106fa Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Thu, 5 Jun 2025 02:04:15 -0500
Subject: [PATCH 1/7] [ie] Add `_search_nextjs_v13_data` helper

Authored by: bashonly
---
 test/test_InfoExtractor.py | 28 +++++++++++++++++++
 yt_dlp/extractor/common.py | 55 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index bc89b2955..b1f6ef255 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1947,6 +1947,34 @@ def test_search_nextjs_data(self):
         with self.assertWarns(DeprecationWarning):
             self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
 
+    def test_search_nextjs_v13_data(self):
+        HTML = R'''
+            <script>(self.__next_f=self.__next_f||[]).push([0])</script>
+            <script>self.__next_f.push([2,"0:[\"$\",\"$L0\",null,{\"do_not_add_this\":\"fail\"}]\n"])</script>
+            <script>self.__next_f.push([1,"1:I[46975,[],\"HTTPAccessFallbackBoundary\"]\n2:I[32630,[\"8183\",\"static/chunks/8183-768193f6a9e33cdd.js\"]]\n"])</script>
+            <script nonce="abc123">self.__next_f.push([1,"e:[false,[\"$\",\"div\",null,{\"children\":[\"$\",\"$L18\",null,{\"foo\":\"bar\"}]}],false]\n"])</script>
+            <script>self.__next_f.push([1,"2a:[[\"$\",\"div\",null,{\"className\":\"flex flex-col\",\"children\":[]}],[\"$\",\"$L16\",null,{\"meta\":{\"dateCreated\":1730489700,\"uuid\":\"40cac41d-8d29-4ef5-aa11-75047b9f0907\"}}]]\n"])</script>
+            <script>self.__next_f.push([1,"df:[\"$undefined\",[\"$\",\"div\",null,{\"children\":[\"$\",\"$L17\",null,{}],\"do_not_include_this_field\":\"fail\"}],[\"$\",\"div\",null,{\"children\":[[\"$\",\"$L19\",null,{\"duplicated_field_name\":{\"x\":1}}],[\"$\",\"$L20\",null,{\"duplicated_field_name\":{\"y\":2}}]]}],\"$undefined\"]\n"])</script>
+            <script>self.__next_f.push([3,"MzM6WyIkIiwiJEwzMiIsbnVsbCx7ImRlY29kZWQiOiJzdWNjZXNzIn1d"])</script>
+            '''
+        EXPECTED = [{
+            'foo': 'bar',
+        }, {
+            'meta': {
+                'dateCreated': 1730489700,
+                'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
+            },
+        }, {
+            'duplicated_field_name': {'x': 1},
+        }, {
+            'duplicated_field_name': {'y': 2},
+        }, {
+            'decoded': 'success',
+        }]
+        self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
+        self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), [])
+        self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), [])
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 1174bd4f5..125568305 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1,4 +1,5 @@
 import base64
+import binascii
 import collections
 import functools
 import getpass
@@ -1778,6 +1779,60 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU
             r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
             video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
 
+    def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
+        """Parses Next.js app router flight data that was introduced in Next.js v13"""
+        nextjs_data = []
+        if not fatal and not isinstance(webpage, str):
+            return nextjs_data
+        # This regex pattern can afford to be and should be strict
+        # Ref: https://github.com/vercel/next.js/commit/5a4a08fdce91a038f2ed3a70568d3ed040403150
+        #      /packages/next/src/server/app-render/use-flight-response.tsx
+        flight_segments = re.findall(r'<script[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage)
+
+        def flatten(flight_data):
+            if not isinstance(flight_data, list) or not flight_data:
+                return
+            if len(flight_data) == 4 and flight_data[0] == '$':
+                _, name, _, data = flight_data
+                if not isinstance(data, dict):
+                    return
+                children = data.pop('children', None)
+                if data and name and name[0] == '$':
+                    # It is useful hydration JSON data
+                    nextjs_data.append(data)
+                flatten(children)
+                return
+            for f in flight_data:
+                flatten(f)
+
+        for flight_segment in flight_segments:
+            segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
+            # Some earlier versions of next.js "optimized" away this array structure; this is unsupported
+            # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761
+            if not isinstance(segment, list) or len(segment) != 2:
+                self.write_debug(
+                    f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
+                continue
+            payload_type, chunk = segment
+            if payload_type == 3:
+                try:
+                    chunk = base64.b64decode(chunk).decode()
+                except (ValueError, binascii.Error):
+                    msg = 'Unable to parse next.js data: unable to decode flight data'
+                    if not fatal:
+                        self.report_warning(msg, video_id=video_id, only_once=True)
+                        continue
+                    raise ExtractorError(msg)
+            elif payload_type != 1:
+                # Ignore useless payload types (0: bootstrap, 2: form state)
+                continue
+            # Not all chunks are complete JSON data; this should always be non-fatal
+            flatten(self._search_json(
+                r'^[\da-f]+:', chunk, 'flight data', video_id,
+                default=None, contains_pattern=r'\[.+\]'))
+
+        return nextjs_data
+
     def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
         """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
         rectx = re.escape(context_name)

From 7f91e4df8cbf1fc33c434fc6747e94381d7507c0 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Thu, 5 Jun 2025 02:07:27 -0500
Subject: [PATCH 2/7] [ie/9now.com.au] Refactor extractor

Authored by: bashonly
---
 yt_dlp/extractor/ninenow.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py
index 7b0cb77a7..2f3a4ed28 100644
--- a/yt_dlp/extractor/ninenow.py
+++ b/yt_dlp/extractor/ninenow.py
@@ -1,6 +1,3 @@
-import json
-import re
-
 from .brightcove import BrightcoveNewIE
 from .common import InfoExtractor
 from ..utils import (
@@ -11,7 +8,12 @@
     str_or_none,
     url_or_none,
 )
-from ..utils.traversal import require, traverse_obj, value
+from ..utils.traversal import (
+    get_first,
+    require,
+    traverse_obj,
+    value,
+)
 
 
 class NineNowIE(InfoExtractor):
@@ -101,20 +103,11 @@ class NineNowIE(InfoExtractor):
     }]
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}'
 
-    # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv and yt_dlp.extractor.goplay
-    def _find_json(self, s):
-        return self._search_json(
-            r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
-
     def _real_extract(self, url):
         display_id, video_type = self._match_valid_url(url).group('id', 'type')
         webpage = self._download_webpage(url, display_id)
 
-        common_data = traverse_obj(
-            re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
-            (..., {json.loads}, ..., {self._find_json},
-             lambda _, v: v['payload'][video_type]['slug'] == display_id,
-             'payload', any, {require('video data')}))
+        common_data = get_first(self._search_nextjs_v13_data(webpage, display_id), ('payload', {dict}))
 
         if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})):
             self.report_drm(display_id)

From 0539ea2971ea9418a25ea00d983d0daa99735a66 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Thu, 5 Jun 2025 02:07:40 -0500
Subject: [PATCH 3/7] [ie/goplay] Refactor and fix extractor

Authored by: bashonly
---
 yt_dlp/extractor/goplay.py | 44 +++++++++++++-------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py
index c654c757c..2e959cead 100644
--- a/yt_dlp/extractor/goplay.py
+++ b/yt_dlp/extractor/goplay.py
@@ -5,16 +5,11 @@
 import hmac
 import json
 import os
-import re
 import urllib.parse
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    int_or_none,
-    remove_end,
-    traverse_obj,
-)
+from ..utils import ExtractorError, int_or_none
+from ..utils.traversal import get_first, traverse_obj
 
 
 class GoPlayIE(InfoExtractor):
@@ -27,10 +22,10 @@ class GoPlayIE(InfoExtractor):
         'info_dict': {
             'id': '2baa4560-87a0-421b-bffc-359914e3c387',
             'ext': 'mp4',
-            'title': 'S22 - Aflevering 1',
+            'title': 'De Slimste Mens ter Wereld - S22 - Aflevering 1',
             'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
             'series': 'De Slimste Mens ter Wereld',
-            'episode': 'Episode 1',
+            'episode': 'Wordt aangekondigd',
             'season_number': 22,
             'episode_number': 1,
             'season': 'Season 22',
@@ -52,7 +47,7 @@ class GoPlayIE(InfoExtractor):
         'info_dict': {
             'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
             'ext': 'mp4',
-            'title': 'S11 - Aflevering 1',
+            'title': 'De Mol - S11 - Aflevering 1',
             'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
             'episode': 'Episode 1',
             'series': 'De Mol',
@@ -75,21 +70,13 @@ def _real_initialize(self):
         if not self._id_token:
             raise self.raise_login_required(method='password')
 
-    # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv
-    def _find_json(self, s):
-        return self._search_json(
-            r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
-
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
 
-        nextjs_data = traverse_obj(
-            re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
-            (..., {json.loads}, ..., {self._find_json}, ...))
-        meta = traverse_obj(nextjs_data, (
-            ..., ..., 'children', ..., ..., 'children',
-            lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any))
+        nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
+        meta = get_first(nextjs_data, (
+            lambda k, v: k in ('video', 'meta') and v['path'] == urllib.parse.urlparse(url).path))
 
         video_id = meta['uuid']
         info_dict = traverse_obj(meta, {
@@ -98,19 +85,18 @@ def _real_extract(self, url):
         })
 
         if traverse_obj(meta, ('program', 'subtype')) != 'movie':
-            for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
-                episode_data = traverse_obj(
-                    season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
+            for season_data in traverse_obj(nextjs_data, (..., 'playlists', ..., {dict})):
+                episode_data = traverse_obj(season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
                 if not episode_data:
                     continue
 
-                episode_title = traverse_obj(
-                    episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
+                season_number = traverse_obj(season_data, ('season', {int_or_none}))
                 info_dict.update({
-                    'title': episode_title or info_dict.get('title'),
-                    'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
-                    'season_number': traverse_obj(season_data, ('season', {int_or_none})),
+                    'episode': traverse_obj(episode_data, ('episodeTitle', {str})),
                     'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
+                    'season_number': season_number,
+                    'series': self._search_regex(
+                        fr'^(.+)? - S{season_number} - ', info_dict.get('title'), 'series', default=None),
                 })
                 break
 

From 7894a0e98b84fbc85738f7e665371c57567cfbce Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Thu, 5 Jun 2025 02:07:52 -0500
Subject: [PATCH 4/7] [ie/francetv:site] Refactor and fix extractor

Authored by: bashonly
---
 yt_dlp/extractor/francetv.py | 48 +++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py
index 5c9f8e36d..edf6708a0 100644
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@@ -1,4 +1,3 @@
-import json
 import re
 import urllib.parse
 
@@ -19,7 +18,11 @@
     unsmuggle_url,
     url_or_none,
 )
-from ..utils.traversal import find_element, traverse_obj
+from ..utils.traversal import (
+    find_element,
+    get_first,
+    traverse_obj,
+)
 
 
 class FranceTVBaseInfoExtractor(InfoExtractor):
@@ -258,7 +261,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
     _TESTS = [{
         'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
         'info_dict': {
-            'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',  # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba'
+            'id': 'b2cf9fd8-e971-4757-8651-848f2772df61',  # old: ec217ecc-0733-48cf-ac06-af1347b849d1
             'ext': 'mp4',
             'title': '13h15, le dimanche... - Les mystères de Jésus',
             'timestamp': 1502623500,
@@ -269,7 +272,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
         'params': {
             'skip_download': True,
         },
-        'add_ie': [FranceTVIE.ie_key()],
+        'skip': 'Unfortunately, this video is no longer available',
     }, {
         # geo-restricted
         'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
@@ -287,7 +290,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 1441,
         },
-        'skip': 'No longer available',
+        'skip': 'Unfortunately, this video is no longer available',
     }, {
         # geo-restricted livestream (workflow == 'token-akamai')
         'url': 'https://www.france.tv/france-4/direct.html',
@@ -308,6 +311,19 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
             'live_status': 'is_live',
         },
         'params': {'skip_download': 'livestream'},
+    }, {
+        # Not geo-restricted
+        'url': 'https://www.france.tv/france-2/la-maison-des-maternelles/5574051-nous-sommes-amis-et-nous-avons-fait-un-enfant-ensemble.html',
+        'info_dict': {
+            'id': 'b448bfe4-9fe7-11ee-97d8-2ba3426fa3df',
+            'ext': 'mp4',
+            'title': 'Nous sommes amis et nous avons fait un enfant ensemble - Émission du jeudi 21 décembre 2023',
+            'duration': 1065,
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'timestamp': 1703147921,
+            'upload_date': '20231221',
+        },
+        'params': {'skip_download': 'm3u8'},
     }, {
         # france3
         'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
@@ -342,30 +358,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
         'only_matching': True,
     }]
 
-    # XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay
-    def _find_json(self, s):
-        return self._search_json(
-            r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
-
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
+        nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
 
-        nextjs_data = traverse_obj(
-            re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
-            (..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children'))
-
-        if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)):
+        if get_first(nextjs_data, ('isLive', {bool})):
             # For livestreams we need the id of the stream instead of the currently airing episode id
-            video_id = traverse_obj(nextjs_data, (
-                ..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ...,
-                'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)),
-                'options', 'id', {str}, any))
+            video_id = get_first(nextjs_data, ('options', 'id', {str}))
         else:
-            video_id = traverse_obj(nextjs_data, (
-                ..., ..., ..., 'children',
-                lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path,
-                'video', ('playerReplayId', 'siId'), {str}, any))
+            video_id = get_first(nextjs_data, ('video', ('playerReplayId', 'siId'), {str}))
 
         if not video_id:
             raise ExtractorError('Unable to extract video ID')

From 9986c49ac6952de619a887b116fc94a58306356d Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Sun, 22 Jun 2025 16:18:11 -0500
Subject: [PATCH 5/7] cleanup

Authored by: bashonly
---
 yt_dlp/extractor/common.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 125568305..10ca1257f 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1784,10 +1784,6 @@ def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
         nextjs_data = []
         if not fatal and not isinstance(webpage, str):
             return nextjs_data
-        # This regex pattern can afford to be and should be strict
-        # Ref: https://github.com/vercel/next.js/commit/5a4a08fdce91a038f2ed3a70568d3ed040403150
-        #      /packages/next/src/server/app-render/use-flight-response.tsx
-        flight_segments = re.findall(r'<script[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage)
 
         def flatten(flight_data):
             if not isinstance(flight_data, list) or not flight_data:
@@ -1805,7 +1801,10 @@ def flatten(flight_data):
             for f in flight_data:
                 flatten(f)
 
-        for flight_segment in flight_segments:
+        # The flight segments regex pattern can afford to be (and should be) strict
+        # Ref: https://github.com/vercel/next.js/commit/5a4a08fdce91a038f2ed3a70568d3ed040403150
+        #      /packages/next/src/server/app-render/use-flight-response.tsx
+        for flight_segment in re.findall(r'<script[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage):
             segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
             # Some earlier versions of next.js "optimized" away this array structure; this is unsupported
             # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761

From 1ded2a3faa50ae0b51aae1ea606d867ef4e78278 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 22 Jun 2025 22:25:51 +0000
Subject: [PATCH 6/7] precision

---
 yt_dlp/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 53023b642..893b7f41f 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1794,7 +1794,7 @@ def flatten(flight_data):
                 if not isinstance(data, dict):
                     return
                 children = data.pop('children', None)
-                if data and name and name[0] == '$':
+                if data and isinstance(name, str) and name[:1] == '$':
                     # It is useful hydration JSON data
                     nextjs_data.append(data)
                 flatten(children)

From 3f9542f4e8dffc8d14ae88ac1851c167cd20f44d Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 22 Jun 2025 22:26:45 +0000
Subject: [PATCH 7/7] simplify

---
 yt_dlp/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 893b7f41f..9c17f721d 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1787,7 +1787,7 @@ def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
             return nextjs_data
 
         def flatten(flight_data):
-            if not isinstance(flight_data, list) or not flight_data:
+            if not isinstance(flight_data, list):
                 return
             if len(flight_data) == 4 and flight_data[0] == '$':
                 _, name, _, data = flight_data