From 300de372012bce9ca4614a2ec540b9c702d45a4c Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Wed, 11 Sep 2024 15:51:09 +0530 Subject: [PATCH] [ie/reverbnation] enhancement: added support for artist page --- yt_dlp/extractor/_extractors.py | 2 +- yt_dlp/extractor/reverbnation.py | 68 ++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e7b162512f..3540ff7f80 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1704,7 +1704,7 @@ ) from .restudy import RestudyIE from .reuters import ReutersIE -from .reverbnation import ReverbNationIE +from .reverbnation import ReverbNationArtistIE, ReverbNationIE from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index ddf8c3753f..a7a236ecf8 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -1,11 +1,11 @@ +import functools + from .common import InfoExtractor -from ..utils import ( - qualities, - str_or_none, -) +from ..utils import InAdvancePagedList, int_or_none, qualities, str_or_none, traverse_obj class ReverbNationIE(InfoExtractor): + IE_NAME = 'reverbnation:song' _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', @@ -13,6 +13,8 @@ class ReverbNationIE(InfoExtractor): 'info_dict': { 'id': '16965047', 'ext': 'mp3', + 'tbr': 192, + 'duration': 217, 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', @@ -46,6 +48,64 @@ def _real_extract(self, url): 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), 'thumbnails': thumbnails, + 'duration': api_res.get('duration'), + 'tbr': api_res.get('bitrate'), 'ext': 'mp3', 'vcodec': 'none', } + + +class ReverbNationArtistIE(InfoExtractor): + IE_NAME = 'reverbnation:artist' + _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/(?P[\w-]+)(?:/songs)?$' + _TESTS = [{ + 'url': 'https://www.reverbnation.com/morganandersson', + 'info_dict': { + 'id': '1078497', + 'title': 'morganandersson', + }, + 'playlist_mincount': 8, + }, { + 'url': 'https://www.reverbnation.com/monogem/songs', + 'info_dict': { + 'id': '3716672', + 'title': 'monogem', + }, + 'playlist_mincount': 10, + }] + _PAGE_SIZE = 25 + + def _yield_songs(self, json_data): + for song in json_data.get('results'): + data = { + 'id': str_or_none(song.get('id')), + 'title': song.get('name'), + 'url': song.get('url'), + 'uploader': song.get('artist', {}).get('name'), + 'uploader_id': str_or_none(song.get('artist', {}).get('id')), + 'thumbnail': song.get('thumbnail'), + 'duration': int_or_none(song.get('duration')), + 'tbr': int_or_none(song.get('bitrate')), + 'ext': 'mp3', + 'vcodec': 'none', + } + yield data + + def _fetch_page(self, artist_id, page): + return self._download_json(f'https://www.reverbnation.com/api/artist/{artist_id}/songs?page={page}&per_page={self._PAGE_SIZE}', f'{artist_id}_{page}') + + def _entries(self, token, first_page_data, page): + page_data = first_page_data if not page else self._fetch_page(token, page + 1) + yield from self._yield_songs(page_data) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + artist_url = self._html_search_meta('twitter:player', webpage, 'player url') + artist_id = self._search_regex(r'artist_(?P\d+)', artist_url, 'artist id') + playlist_data = self._fetch_page(artist_id, 1) + total_pages = traverse_obj(playlist_data, ('pagination', 'page_count', {int})) + + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, artist_id, playlist_data), + total_pages, self._PAGE_SIZE), artist_id, display_id)