From 441daea6e168220acafd5f08eddc0d329bbc2c28 Mon Sep 17 00:00:00 2001 From: Subrat Lima Date: Sun, 6 Apr 2025 22:09:09 +0530 Subject: [PATCH] [ie/reverbnation] clean up extractor code --- yt_dlp/extractor/reverbnation.py | 84 +++++++++++--------------------- 1 file changed, 29 insertions(+), 55 deletions(-) diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index 4ddc6c5b9f..2aa4c11f5e 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -3,9 +3,7 @@ from .common import InfoExtractor from ..utils import ( InAdvancePagedList, - float_or_none, int_or_none, - qualities, str_or_none, traverse_obj, url_or_none, @@ -21,15 +19,30 @@ class ReverbNationIE(InfoExtractor): 'info_dict': { 'id': '16965047', 'ext': 'mp3', + 'vcodec': 'none', 'tbr': 192, 'duration': 217, 'title': 'MONA LISA', - 'uploader': 'ALKILADOS', - 'uploader_id': '216429', + 'artists': ['ALKILADOS'], 'thumbnail': r're:^https?://.*\.jpg', }, }] + def _extract_song(self, json_data): + return { + 'ext': 'mp3', + 'vcodec': 'none', + **traverse_obj(json_data, { + 'id': ('id', {str_or_none}), + 'title': ('name', {str}), + 'artists': ('artist', 'name', all), + 'thumbnail': ('image', {url_or_none}), + 'duration': ('duration', {int_or_none}), + 'tbr': ('bitrate', {int_or_none}), + 'url': ('url', {url_or_none}), + }), + } + def _real_extract(self, url): song_id = self._match_id(url) @@ -39,33 +52,10 @@ def _real_extract(self, url): note=f'Downloading information of song {song_id}', ) - THUMBNAILS = ('thumbnail', 'image') - quality = qualities(THUMBNAILS) - thumbnails = [] - for thumb_key in THUMBNAILS: - if api_res.get(thumb_key): - thumbnails.append({ - 'url': api_res[thumb_key], - 'preference': quality(thumb_key), - }) - - return { - 'id': str_or_none(song_id), - 'ext': 'mp3', - 'vcodec': 'none', - 'thumbnails': thumbnails, - **traverse_obj(api_res, { - 'title': ('name', {str_or_none}), - 'url': ('url', {url_or_none}), - 'uploader': ('artist', 'name', {str_or_none}), - 'uploader_id': ('artist', 'id', {str_or_none}), - 'duration': ('duration', {float_or_none}), - 'tbr': ('bitrate', {int_or_none}), - }), - } + return self._extract_song(api_res) -class ReverbNationArtistIE(InfoExtractor): +class ReverbNationArtistIE(ReverbNationIE): IE_NAME = 'reverbnation:artist' _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/(?P[\w-]+)(?:/songs)?$' _TESTS = [{ @@ -85,38 +75,22 @@ class ReverbNationArtistIE(InfoExtractor): }] _PAGE_SIZE = 25 - def _yield_songs(self, json_data): - for song in json_data.get('results'): - yield { - 'ext': 'mp3', - 'vcodec': 'none', - **traverse_obj(song, { - 'id': ('id', {str_or_none}), - 'title': ('name', {str_or_none}), - 'url': ('url', {url_or_none}), - 'uploader': ('artist', 'name', {str_or_none}), - 'uploader_id': ('artist', 'id', {str_or_none}), - 'duration': ('duration', {float_or_none}), - 'tbr': ('bitrate', {int_or_none}), - 'thumbnail': ('thumbnail', {url_or_none}), - }), - } - - def _fetch_page(self, artist_id, page): - return self._download_json(f'https://www.reverbnation.com/api/artist/{artist_id}/songs?page={page}&per_page={self._PAGE_SIZE}', f'{artist_id}_{page}') - - def _entries(self, token, first_page_data, page): - page_data = first_page_data if not page else self._fetch_page(token, page + 1) - yield from self._yield_songs(page_data) + def _entries(self, artist_id, page): + page_data = self._download_json( + f'https://www.reverbnation.com/api/artist/{artist_id}/songs', + f'{artist_id}_{page + 1}', query={'page': page + 1, 'per_page': self._PAGE_SIZE}) + for song_data in page_data['results']: + yield self._extract_song(song_data) def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) artist_url = self._html_search_meta('twitter:player', webpage, 'player url') artist_id = self._search_regex(r'artist_(?P\d+)', artist_url, 'artist id') - playlist_data = self._fetch_page(artist_id, 1) - total_pages = traverse_obj(playlist_data, ('pagination', 'page_count', {int})) + page_data = self._search_json('"SONGS_WITH_PAGINATION":', webpage, 'json_data', display_id) + total_pages = traverse_obj(page_data, ('pagination', 'page_count', {int})) + self._PAGE_SIZE = traverse_obj(page_data, ('pagination', 'per_page', {int})) return self.playlist_result(InAdvancePagedList( - functools.partial(self._entries, artist_id, playlist_data), + functools.partial(self._entries, artist_id), total_pages, self._PAGE_SIZE), artist_id, display_id)