mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-04 13:07:00 +00:00
[RadioFrance] fix profile pagination detection
This commit is contained in:
@@ -392,7 +392,7 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
|
|||||||
_VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
|
_VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
|
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
|
'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
|
||||||
'display_id': 'thomas-pesquet',
|
'display_id': 'thomas-pesquet',
|
||||||
@@ -422,30 +422,24 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
|
|||||||
webpage = self._download_webpage(url, profile_id, note=f'Downloading {profile_id} page {cursor}')
|
webpage = self._download_webpage(url, profile_id, note=f'Downloading {profile_id} page {cursor}')
|
||||||
|
|
||||||
resp = dict()
|
resp = dict()
|
||||||
|
|
||||||
# On profile pages, the data is stored in a javascript array in the final <script>
|
|
||||||
# Each episode is stored as
|
|
||||||
# a[0] = { id: ... }; a[1] = [ id: ... ]; on page 2->
|
|
||||||
# If a page had a thumbnail, the a variable contains image data,
|
|
||||||
# and episode data is stored in b[0]...
|
|
||||||
resp['items'] = []
|
resp['items'] = []
|
||||||
podcastindex = 0
|
|
||||||
nextmatch = True
|
|
||||||
while nextmatch:
|
|
||||||
nextmatch = self._search_json(r'\w+\[' + str(podcastindex) + r'\]\s*=\s*', webpage, profile_id,
|
|
||||||
profile_id, transform_source=js_to_json, fatal=False, default=None)
|
|
||||||
podcastindex += 1
|
|
||||||
if nextmatch is not None:
|
|
||||||
resp['items'].append(nextmatch)
|
|
||||||
|
|
||||||
# There is more than one pagination key in the final <script>
|
# get episode data from page
|
||||||
# We should use pick the pagination object which is within a documents object
|
|
||||||
pagedata = self._search_json(r'documents\s*:\s*', webpage, profile_id, profile_id,
|
pagedata = self._search_json(r'documents\s*:\s*', webpage, profile_id, profile_id,
|
||||||
transform_source=js_to_json)
|
transform_source=js_to_json)
|
||||||
lastPage = traverse_obj(pagedata, ('pagination', 'lastPage'))
|
|
||||||
|
# get thepage data
|
||||||
|
pagekey = pagedata['pagination']
|
||||||
|
hasMorePages = False
|
||||||
|
lastPage = int(self._search_regex(pagekey+'\.lastPage=(\d+);', webpage, profile_id, '0'))
|
||||||
hasMorePages = cursor < lastPage
|
hasMorePages = cursor < lastPage
|
||||||
resp['next'] = cursor + 1 if hasMorePages else None
|
resp['next'] = cursor + 1 if hasMorePages else None
|
||||||
|
|
||||||
|
# get episode data, note, not all will be A/V, so filter for 'expression'
|
||||||
|
for item in pagedata['items']:
|
||||||
|
if item['model']=='Expression':
|
||||||
|
resp['items'].append(item)
|
||||||
|
|
||||||
resp['metadata'] = self._search_json(r'content:\s*', webpage, profile_id, profile_id,
|
resp['metadata'] = self._search_json(r'content:\s*', webpage, profile_id, profile_id,
|
||||||
transform_source=js_to_json)
|
transform_source=js_to_json)
|
||||||
# If the image data is stored separately rather than in the main content area
|
# If the image data is stored separately rather than in the main content area
|
||||||
|
|||||||
Reference in New Issue
Block a user