From 300de372012bce9ca4614a2ec540b9c702d45a4c Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Wed, 11 Sep 2024 15:51:09 +0530 Subject: [PATCH 1/4] [ie/reverbnation] enhancement: added support for artist page --- yt_dlp/extractor/_extractors.py | 2 +- yt_dlp/extractor/reverbnation.py | 68 ++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e7b162512f..3540ff7f80 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1704,7 +1704,7 @@ ) from .restudy import RestudyIE from .reuters import ReutersIE -from .reverbnation import ReverbNationIE +from .reverbnation import ReverbNationArtistIE, ReverbNationIE from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index ddf8c3753f..a7a236ecf8 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -1,11 +1,11 @@ +import functools + from .common import InfoExtractor -from ..utils import ( - qualities, - str_or_none, -) +from ..utils import InAdvancePagedList, int_or_none, qualities, str_or_none, traverse_obj class ReverbNationIE(InfoExtractor): + IE_NAME = 'reverbnation:song' _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', @@ -13,6 +13,8 @@ class ReverbNationIE(InfoExtractor): 'info_dict': { 'id': '16965047', 'ext': 'mp3', + 'tbr': 192, + 'duration': 217, 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', @@ -46,6 +48,64 @@ def _real_extract(self, url): 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), 'thumbnails': thumbnails, + 'duration': api_res.get('duration'), + 'tbr': api_res.get('bitrate'), 'ext': 'mp3', 'vcodec': 'none', } + + +class ReverbNationArtistIE(InfoExtractor): + IE_NAME = 'reverbnation:artist' + _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/(?P[\w-]+)(?:/songs)?$' + _TESTS = [{ + 'url': 'https://www.reverbnation.com/morganandersson', + 'info_dict': { + 'id': '1078497', + 'title': 'morganandersson', + }, + 'playlist_mincount': 8, + }, { + 'url': 'https://www.reverbnation.com/monogem/songs', + 'info_dict': { + 'id': '3716672', + 'title': 'monogem', + }, + 'playlist_mincount': 10, + }] + _PAGE_SIZE = 25 + + def _yield_songs(self, json_data): + for song in json_data.get('results'): + data = { + 'id': str_or_none(song.get('id')), + 'title': song.get('name'), + 'url': song.get('url'), + 'uploader': song.get('artist', {}).get('name'), + 'uploader_id': str_or_none(song.get('artist', {}).get('id')), + 'thumbnail': song.get('thumbnail'), + 'duration': int_or_none(song.get('duration')), + 'tbr': int_or_none(song.get('bitrate')), + 'ext': 'mp3', + 'vcodec': 'none', + } + yield data + + def _fetch_page(self, artist_id, page): + return self._download_json(f'https://www.reverbnation.com/api/artist/{artist_id}/songs?page={page}&per_page={self._PAGE_SIZE}', f'{artist_id}_{page}') + + def _entries(self, token, first_page_data, page): + page_data = first_page_data if not page else self._fetch_page(token, page + 1) + yield from self._yield_songs(page_data) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + artist_url = self._html_search_meta('twitter:player', webpage, 'player url') + artist_id = self._search_regex(r'artist_(?P\d+)', artist_url, 'artist id') + playlist_data = self._fetch_page(artist_id, 1) + total_pages = traverse_obj(playlist_data, ('pagination', 'page_count', {int})) + + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, artist_id, playlist_data), + total_pages, self._PAGE_SIZE), artist_id, display_id) From 8cdec2d4709df9602fd29453bf2430354dd75578 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Thu, 12 Sep 2024 10:45:32 +0530 Subject: [PATCH 2/4] [ie/reverbnation] removed starting position from url regex and updated extraction code to use traverse_obj --- yt_dlp/extractor/reverbnation.py | 45 +++++++++++++++++--------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index a7a236ecf8..876040051d 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -1,12 +1,12 @@ import functools from .common import InfoExtractor -from ..utils import InAdvancePagedList, int_or_none, qualities, str_or_none, traverse_obj +from ..utils import InAdvancePagedList, float_or_none, int_or_none, qualities, str_or_none, traverse_obj, url_or_none class ReverbNationIE(InfoExtractor): IE_NAME = 'reverbnation:song' - _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' + _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', @@ -42,22 +42,24 @@ def _real_extract(self, url): }) return { - 'id': song_id, - 'title': api_res['name'], - 'url': api_res['url'], - 'uploader': api_res.get('artist', {}).get('name'), - 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), - 'thumbnails': thumbnails, - 'duration': api_res.get('duration'), - 'tbr': api_res.get('bitrate'), + 'id': str_or_none(song_id), 'ext': 'mp3', 'vcodec': 'none', + 'thumbnails': thumbnails, + **traverse_obj(api_res, { + 'title': ('name', {str_or_none}), + 'url': ('url', {url_or_none}), + 'uploader': ('artist', 'name', {str_or_none}), + 'uploader_id': ('artist', 'id', {str_or_none}), + 'duration': ('duration', {float_or_none}), + 'tbr': ('bitrate', {int_or_none}), + }), } class ReverbNationArtistIE(InfoExtractor): IE_NAME = 'reverbnation:artist' - _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/(?P[\w-]+)(?:/songs)?$' + _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/(?P[\w-]+)(?:/songs)?$' _TESTS = [{ 'url': 'https://www.reverbnation.com/morganandersson', 'info_dict': { @@ -77,19 +79,20 @@ class ReverbNationArtistIE(InfoExtractor): def _yield_songs(self, json_data): for song in json_data.get('results'): - data = { - 'id': str_or_none(song.get('id')), - 'title': song.get('name'), - 'url': song.get('url'), - 'uploader': song.get('artist', {}).get('name'), - 'uploader_id': str_or_none(song.get('artist', {}).get('id')), - 'thumbnail': song.get('thumbnail'), - 'duration': int_or_none(song.get('duration')), - 'tbr': int_or_none(song.get('bitrate')), + yield { 'ext': 'mp3', 'vcodec': 'none', + **traverse_obj(song, { + 'id': ('id', {str_or_none}), + 'title': ('name', {str_or_none}), + 'url': ('url', {url_or_none}), + 'uploader': ('artist', 'name', {str_or_none}), + 'uploader_id': ('artist', 'id', {str_or_none}), + 'duration': ('duration', {float_or_none}), + 'tbr': ('bitrate', {int_or_none}), + 'thumbnail': ('thumbnail', {url_or_none}), + }), } - yield data def _fetch_page(self, artist_id, page): return self._download_json(f'https://www.reverbnation.com/api/artist/{artist_id}/songs?page={page}&per_page={self._PAGE_SIZE}', f'{artist_id}_{page}') From c700c5cfd225e1033f83277b505169afcdf2d657 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Thu, 12 Sep 2024 15:02:40 +0530 Subject: [PATCH 3/4] [ie/reverbnation] adjusted formatting for imports --- yt_dlp/extractor/_extractors.py | 5 ++++- yt_dlp/extractor/reverbnation.py | 10 +++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3540ff7f80..80784091e8 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1704,7 +1704,10 @@ ) from .restudy import RestudyIE from .reuters import ReutersIE -from .reverbnation import ReverbNationArtistIE, ReverbNationIE +from .reverbnation import ( + ReverbNationArtistIE, + ReverbNationIE, +) from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index 876040051d..4ddc6c5b9f 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -1,7 +1,15 @@ import functools from .common import InfoExtractor -from ..utils import InAdvancePagedList, float_or_none, int_or_none, qualities, str_or_none, traverse_obj, url_or_none +from ..utils import ( + InAdvancePagedList, + float_or_none, + int_or_none, + qualities, + str_or_none, + traverse_obj, + url_or_none, +) class ReverbNationIE(InfoExtractor): From 441daea6e168220acafd5f08eddc0d329bbc2c28 Mon Sep 17 00:00:00 2001 From: Subrat Lima Date: Sun, 6 Apr 2025 22:09:09 +0530 Subject: [PATCH 4/4] [ie/reverbnation] clean up extractor code --- yt_dlp/extractor/reverbnation.py | 84 +++++++++++--------------------- 1 file changed, 29 insertions(+), 55 deletions(-) diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index 4ddc6c5b9f..2aa4c11f5e 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -3,9 +3,7 @@ from .common import InfoExtractor from ..utils import ( InAdvancePagedList, - float_or_none, int_or_none, - qualities, str_or_none, traverse_obj, url_or_none, @@ -21,15 +19,30 @@ class ReverbNationIE(InfoExtractor): 'info_dict': { 'id': '16965047', 'ext': 'mp3', + 'vcodec': 'none', 'tbr': 192, 'duration': 217, 'title': 'MONA LISA', - 'uploader': 'ALKILADOS', - 'uploader_id': '216429', + 'artists': ['ALKILADOS'], 'thumbnail': r're:^https?://.*\.jpg', }, }] + def _extract_song(self, json_data): + return { + 'ext': 'mp3', + 'vcodec': 'none', + **traverse_obj(json_data, { + 'id': ('id', {str_or_none}), + 'title': ('name', {str}), + 'artists': ('artist', 'name', all), + 'thumbnail': ('image', {url_or_none}), + 'duration': ('duration', {int_or_none}), + 'tbr': ('bitrate', {int_or_none}), + 'url': ('url', {url_or_none}), + }), + } + def _real_extract(self, url): song_id = self._match_id(url) @@ -39,33 +52,10 @@ def _real_extract(self, url): note=f'Downloading information of song {song_id}', ) - THUMBNAILS = ('thumbnail', 'image') - quality = qualities(THUMBNAILS) - thumbnails = [] - for thumb_key in THUMBNAILS: - if api_res.get(thumb_key): - thumbnails.append({ - 'url': api_res[thumb_key], - 'preference': quality(thumb_key), - }) - - return { - 'id': str_or_none(song_id), - 'ext': 'mp3', - 'vcodec': 'none', - 'thumbnails': thumbnails, - **traverse_obj(api_res, { - 'title': ('name', {str_or_none}), - 'url': ('url', {url_or_none}), - 'uploader': ('artist', 'name', {str_or_none}), - 'uploader_id': ('artist', 'id', {str_or_none}), - 'duration': ('duration', {float_or_none}), - 'tbr': ('bitrate', {int_or_none}), - }), - } + return self._extract_song(api_res) -class ReverbNationArtistIE(InfoExtractor): +class ReverbNationArtistIE(ReverbNationIE): IE_NAME = 'reverbnation:artist' _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/(?P[\w-]+)(?:/songs)?$' _TESTS = [{ @@ -85,38 +75,22 @@ class ReverbNationArtistIE(InfoExtractor): }] _PAGE_SIZE = 25 - def _yield_songs(self, json_data): - for song in json_data.get('results'): - yield { - 'ext': 'mp3', - 'vcodec': 'none', - **traverse_obj(song, { - 'id': ('id', {str_or_none}), - 'title': ('name', {str_or_none}), - 'url': ('url', {url_or_none}), - 'uploader': ('artist', 'name', {str_or_none}), - 'uploader_id': ('artist', 'id', {str_or_none}), - 'duration': ('duration', {float_or_none}), - 'tbr': ('bitrate', {int_or_none}), - 'thumbnail': ('thumbnail', {url_or_none}), - }), - } - - def _fetch_page(self, artist_id, page): - return self._download_json(f'https://www.reverbnation.com/api/artist/{artist_id}/songs?page={page}&per_page={self._PAGE_SIZE}', f'{artist_id}_{page}') - - def _entries(self, token, first_page_data, page): - page_data = first_page_data if not page else self._fetch_page(token, page + 1) - yield from self._yield_songs(page_data) + def _entries(self, artist_id, page): + page_data = self._download_json( + f'https://www.reverbnation.com/api/artist/{artist_id}/songs', + f'{artist_id}_{page + 1}', query={'page': page + 1, 'per_page': self._PAGE_SIZE}) + for song_data in page_data['results']: + yield self._extract_song(song_data) def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) artist_url = self._html_search_meta('twitter:player', webpage, 'player url') artist_id = self._search_regex(r'artist_(?P\d+)', artist_url, 'artist id') - playlist_data = self._fetch_page(artist_id, 1) - total_pages = traverse_obj(playlist_data, ('pagination', 'page_count', {int})) + page_data = self._search_json('"SONGS_WITH_PAGINATION":', webpage, 'json_data', display_id) + total_pages = traverse_obj(page_data, ('pagination', 'page_count', {int})) + self._PAGE_SIZE = traverse_obj(page_data, ('pagination', 'per_page', {int})) return self.playlist_result(InAdvancePagedList( - functools.partial(self._entries, artist_id, playlist_data), + functools.partial(self._entries, artist_id), total_pages, self._PAGE_SIZE), artist_id, display_id)