From e10926d1d26615cd2b0a9db74370f9db8d4c9734 Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Fri, 16 May 2025 12:08:16 +0200 Subject: [PATCH 1/2] Fix playlist support for arte.tv --- yt_dlp/extractor/arte.py | 42 ++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 142d4b066..a6e12c2ac 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -268,33 +268,55 @@ class ArteTVPlaylistIE(ArteTVBaseIE): 'only_matching': True, }, { 'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/', - 'playlist_mincount': 100, + 'playlist_mincount': 20, 'info_dict': { 'description': 'md5:84e7bf1feda248bc325ebfac818c476e', 'id': 'RC-014123', 'title': 'ARTE Reportage - najlepsze reportaże', }, + }, { + 'url': 'https://www.arte.tv/de/videos/RC-025470/ramy/', + 'playlist_mincount': 30, + 'info_dict': { + 'description': 'md5:8766d73504ddccd12dbd1395a1d56815', + 'id': 'RC-025470', + 'title': 'Ramy', + }, }] def _real_extract(self, url): lang, playlist_id = self._match_valid_url(url).group('lang', 'id') - playlist = self._download_json( - f'{self._API_BASE}/playlist/{lang}/{playlist_id}', playlist_id)['data']['attributes'] + webpage = self._download_webpage(url, playlist_id) + + unescape_func = lambda jstring: jstring.replace('\\"', '"').replace('\\\\', '\\') + playlist_data = self._search_json(r'\$L23.+?', webpage, 'series data', + playlist_id, + end_pattern=r'\],\[\[', + transform_source=unescape_func) + + playlist_item_filter = lambda _, v: re.match(rf'collection_(?:videos|subcollection)_{playlist_id}', v['code']) + collections = traverse_obj(playlist_data, + ('data', + 'zones', + playlist_item_filter, + 'content', + 'data', + ...)) entries = [{ '_type': 'url_transparent', - 'url': video['config']['url'], + 'url': f'https://www.arte.tv{video['url']}', 'ie_key': ArteTVIE.ie_key(), - 'id': video.get('providerId'), + 'id': video.get('id'), 'title': video.get('title'), 'alt_title': video.get('subtitle'), - 'thumbnail': url_or_none(traverse_obj(video, ('mainImage', 'url'))), - 'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))), - } for video in traverse_obj(playlist, ('items', lambda _, v: v['config']['url']))] + 'duration': int_or_none(traverse_obj(video, ('duration'))), + 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), + } for video in collections] return self.playlist_result(entries, playlist_id, - traverse_obj(playlist, ('metadata', 'title')), - traverse_obj(playlist, ('metadata', 'description'))) + traverse_obj(playlist_data, ('data', 'metadata', 'title')), + traverse_obj(playlist_data, ('data', 'metadata', 'description'))) class ArteTVCategoryIE(ArteTVBaseIE): From 096fd691bb73776c386ef689a56ecd55da0a28d9 Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Fri, 16 May 2025 13:04:42 +0200 Subject: [PATCH 2/2] Refactored to work with Python 3.9 --- yt_dlp/extractor/arte.py | 49 +++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index a6e12c2ac..387c28ae3 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -284,16 +284,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): }, }] - def _real_extract(self, url): - lang, playlist_id = self._match_valid_url(url).group('lang', 'id') - webpage = self._download_webpage(url, playlist_id) - - unescape_func = lambda jstring: jstring.replace('\\"', '"').replace('\\\\', '\\') - playlist_data = self._search_json(r'\$L23.+?', webpage, 'series data', - playlist_id, - end_pattern=r'\],\[\[', - transform_source=unescape_func) - + def _entries(self, playlist_data, playlist_id): playlist_item_filter = lambda _, v: re.match(rf'collection_(?:videos|subcollection)_{playlist_id}', v['code']) collections = traverse_obj(playlist_data, ('data', @@ -303,20 +294,32 @@ def _real_extract(self, url): 'data', ...)) - entries = [{ - '_type': 'url_transparent', - 'url': f'https://www.arte.tv{video['url']}', - 'ie_key': ArteTVIE.ie_key(), - 'id': video.get('id'), - 'title': video.get('title'), - 'alt_title': video.get('subtitle'), - 'duration': int_or_none(traverse_obj(video, ('duration'))), - 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), - } for video in collections] + for video in collections: + yield { + '_type': 'url_transparent', + 'url': 'https://www.arte.tv' + video['url'], + 'ie_key': ArteTVIE.ie_key(), + 'id': video['id'], + 'title': video.get('title'), + 'alt_title': video.get('subtitle'), + 'duration': int_or_none(traverse_obj(video, ('duration'))), + 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), + } - return self.playlist_result(entries, playlist_id, - traverse_obj(playlist_data, ('data', 'metadata', 'title')), - traverse_obj(playlist_data, ('data', 'metadata', 'description'))) + def _real_extract(self, url): + lang, playlist_id = self._match_valid_url(url).group('lang', 'id') + webpage = self._download_webpage(url, playlist_id) + + unescape_func = lambda jstring: jstring.replace('\\"', '"').replace('\\\\', '\\') + json_data = self._search_json(r'\$L23.+?', webpage, 'series data', + playlist_id, + end_pattern=r'\],\[\[', + transform_source=unescape_func) + + return self.playlist_result(self._entries(json_data, playlist_id), + playlist_id, + traverse_obj(json_data, ('data', 'metadata', 'title')), + traverse_obj(json_data, ('data', 'metadata', 'description'))) class ArteTVCategoryIE(ArteTVBaseIE):