diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c9172fef78..a4c58ff853 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1723,7 +1723,10 @@ ) from .restudy import RestudyIE from .reuters import ReutersIE -from .reverbnation import ReverbNationIE +from .reverbnation import ( + ReverbNationArtistIE, + ReverbNationIE, +) from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index f3bcc2c328..2aa4c11f5e 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -1,11 +1,17 @@ +import functools + from .common import InfoExtractor from ..utils import ( - qualities, + InAdvancePagedList, + int_or_none, str_or_none, + traverse_obj, + url_or_none, ) class ReverbNationIE(InfoExtractor): + IE_NAME = 'reverbnation:song' _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', @@ -13,13 +19,30 @@ class ReverbNationIE(InfoExtractor): 'info_dict': { 'id': '16965047', 'ext': 'mp3', + 'vcodec': 'none', + 'tbr': 192, + 'duration': 217, 'title': 'MONA LISA', - 'uploader': 'ALKILADOS', - 'uploader_id': '216429', + 'artists': ['ALKILADOS'], 'thumbnail': r're:^https?://.*\.jpg', }, }] + def _extract_song(self, json_data): + return { + 'ext': 'mp3', + 'vcodec': 'none', + **traverse_obj(json_data, { + 'id': ('id', {str_or_none}), + 'title': ('name', {str}), + 'artists': ('artist', 'name', all), + 'thumbnail': ('image', {url_or_none}), + 'duration': ('duration', {int_or_none}), + 'tbr': ('bitrate', {int_or_none}), + 'url': ('url', {url_or_none}), + }), + } + def _real_extract(self, url): song_id = self._match_id(url) @@ -29,23 +52,45 @@ def _real_extract(self, url): note=f'Downloading information of song {song_id}', ) - THUMBNAILS = ('thumbnail', 'image') - quality = qualities(THUMBNAILS) - thumbnails = [] - for thumb_key in THUMBNAILS: - if api_res.get(thumb_key): - thumbnails.append({ - 'url': api_res[thumb_key], - 'preference': quality(thumb_key), - }) + return self._extract_song(api_res) - return { - 'id': song_id, - 'title': api_res['name'], - 'url': api_res['url'], - 'uploader': api_res.get('artist', {}).get('name'), - 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), - 'thumbnails': thumbnails, - 'ext': 'mp3', - 'vcodec': 'none', - } + +class ReverbNationArtistIE(ReverbNationIE): + IE_NAME = 'reverbnation:artist' + _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/(?P[\w-]+)(?:/songs)?$' + _TESTS = [{ + 'url': 'https://www.reverbnation.com/morganandersson', + 'info_dict': { + 'id': '1078497', + 'title': 'morganandersson', + }, + 'playlist_mincount': 8, + }, { + 'url': 'https://www.reverbnation.com/monogem/songs', + 'info_dict': { + 'id': '3716672', + 'title': 'monogem', + }, + 'playlist_mincount': 10, + }] + _PAGE_SIZE = 25 + + def _entries(self, artist_id, page): + page_data = self._download_json( + f'https://www.reverbnation.com/api/artist/{artist_id}/songs', + f'{artist_id}_{page + 1}', query={'page': page + 1, 'per_page': self._PAGE_SIZE}) + for song_data in page_data['results']: + yield self._extract_song(song_data) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + artist_url = self._html_search_meta('twitter:player', webpage, 'player url') + artist_id = self._search_regex(r'artist_(?P\d+)', artist_url, 'artist id') + page_data = self._search_json('"SONGS_WITH_PAGINATION":', webpage, 'json_data', display_id) + total_pages = traverse_obj(page_data, ('pagination', 'page_count', {int})) + self._PAGE_SIZE = traverse_obj(page_data, ('pagination', 'per_page', {int})) + + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, artist_id), + total_pages, self._PAGE_SIZE), artist_id, display_id)