[RadioFrance] fix profile pagination detection

2026-02-04 13:07:00 +00:00 · 2024-10-15 14:44:48 +01:00
parent 867bf965bb
commit e01fab7041
1 changed files with 12 additions and 18 deletions
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -392,7 +392,7 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
    _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
    _TESTS = [{
-        'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
+        'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet',
        'info_dict': {
            'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
            'display_id': 'thomas-pesquet',
@@ -422,30 +422,24 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
        webpage = self._download_webpage(url, profile_id, note=f'Downloading {profile_id} page {cursor}')
        resp = dict()
        # On profile pages, the data is stored in a javascript array in the final <script>
        # Each episode is stored as
        # a[0] = { id: ... }; a[1] = [ id: ... ]; on page 2->
        # If a page had a thumbnail, the a variable contains image data,
        # and episode data is stored in b[0]...
        resp['items'] = []
        podcastindex = 0
        nextmatch = True
        while nextmatch:
            nextmatch = self._search_json(r'\w+\[' + str(podcastindex) + r'\]\s*=\s*', webpage, profile_id,
                                          profile_id, transform_source=js_to_json, fatal=False, default=None)
            podcastindex += 1
            if nextmatch is not None:
                resp['items'].append(nextmatch)
-        # There is more than one pagination key in the final <script>
+        # get episode data from page
        # We should use pick the pagination object which is within a documents object
        pagedata = self._search_json(r'documents\s*:\s*', webpage, profile_id, profile_id,
                                     transform_source=js_to_json)
-        lastPage = traverse_obj(pagedata, ('pagination', 'lastPage'))
+
        # get thepage data
        pagekey = pagedata['pagination']
        hasMorePages = False
        lastPage = int(self._search_regex(pagekey+'\.lastPage=(\d+);', webpage, profile_id, '0'))
        hasMorePages = cursor < lastPage
        resp['next'] = cursor + 1 if hasMorePages else None
        # get episode data, note, not all will be A/V, so filter for 'expression'
        for item in pagedata['items']:
            if item['model']=='Expression':
                resp['items'].append(item)
        resp['metadata'] = self._search_json(r'content:\s*', webpage, profile_id, profile_id,
                                             transform_source=js_to_json)
        # If the image data is stored separately rather than in the main content area