1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-18 19:28:31 +00:00

[ie/reverbnation] clean up extractor code

This commit is contained in:
Subrat Lima 2025-04-06 22:09:09 +05:30
parent c700c5cfd2
commit 441daea6e1

View File

@ -3,9 +3,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
InAdvancePagedList, InAdvancePagedList,
float_or_none,
int_or_none, int_or_none,
qualities,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
url_or_none, url_or_none,
@ -21,15 +19,30 @@ class ReverbNationIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '16965047', 'id': '16965047',
'ext': 'mp3', 'ext': 'mp3',
'vcodec': 'none',
'tbr': 192, 'tbr': 192,
'duration': 217, 'duration': 217,
'title': 'MONA LISA', 'title': 'MONA LISA',
'uploader': 'ALKILADOS', 'artists': ['ALKILADOS'],
'uploader_id': '216429',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
}, },
}] }]
def _extract_song(self, json_data):
return {
'ext': 'mp3',
'vcodec': 'none',
**traverse_obj(json_data, {
'id': ('id', {str_or_none}),
'title': ('name', {str}),
'artists': ('artist', 'name', all),
'thumbnail': ('image', {url_or_none}),
'duration': ('duration', {int_or_none}),
'tbr': ('bitrate', {int_or_none}),
'url': ('url', {url_or_none}),
}),
}
def _real_extract(self, url): def _real_extract(self, url):
song_id = self._match_id(url) song_id = self._match_id(url)
@ -39,33 +52,10 @@ def _real_extract(self, url):
note=f'Downloading information of song {song_id}', note=f'Downloading information of song {song_id}',
) )
THUMBNAILS = ('thumbnail', 'image') return self._extract_song(api_res)
quality = qualities(THUMBNAILS)
thumbnails = []
for thumb_key in THUMBNAILS:
if api_res.get(thumb_key):
thumbnails.append({
'url': api_res[thumb_key],
'preference': quality(thumb_key),
})
return {
'id': str_or_none(song_id),
'ext': 'mp3',
'vcodec': 'none',
'thumbnails': thumbnails,
**traverse_obj(api_res, {
'title': ('name', {str_or_none}),
'url': ('url', {url_or_none}),
'uploader': ('artist', 'name', {str_or_none}),
'uploader_id': ('artist', 'id', {str_or_none}),
'duration': ('duration', {float_or_none}),
'tbr': ('bitrate', {int_or_none}),
}),
}
class ReverbNationArtistIE(InfoExtractor): class ReverbNationArtistIE(ReverbNationIE):
IE_NAME = 'reverbnation:artist' IE_NAME = 'reverbnation:artist'
_VALID_URL = r'https?://(?:www\.)?reverbnation\.com/(?P<id>[\w-]+)(?:/songs)?$' _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/(?P<id>[\w-]+)(?:/songs)?$'
_TESTS = [{ _TESTS = [{
@ -85,38 +75,22 @@ class ReverbNationArtistIE(InfoExtractor):
}] }]
_PAGE_SIZE = 25 _PAGE_SIZE = 25
def _yield_songs(self, json_data): def _entries(self, artist_id, page):
for song in json_data.get('results'): page_data = self._download_json(
yield { f'https://www.reverbnation.com/api/artist/{artist_id}/songs',
'ext': 'mp3', f'{artist_id}_{page + 1}', query={'page': page + 1, 'per_page': self._PAGE_SIZE})
'vcodec': 'none', for song_data in page_data['results']:
**traverse_obj(song, { yield self._extract_song(song_data)
'id': ('id', {str_or_none}),
'title': ('name', {str_or_none}),
'url': ('url', {url_or_none}),
'uploader': ('artist', 'name', {str_or_none}),
'uploader_id': ('artist', 'id', {str_or_none}),
'duration': ('duration', {float_or_none}),
'tbr': ('bitrate', {int_or_none}),
'thumbnail': ('thumbnail', {url_or_none}),
}),
}
def _fetch_page(self, artist_id, page):
return self._download_json(f'https://www.reverbnation.com/api/artist/{artist_id}/songs?page={page}&per_page={self._PAGE_SIZE}', f'{artist_id}_{page}')
def _entries(self, token, first_page_data, page):
page_data = first_page_data if not page else self._fetch_page(token, page + 1)
yield from self._yield_songs(page_data)
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
artist_url = self._html_search_meta('twitter:player', webpage, 'player url') artist_url = self._html_search_meta('twitter:player', webpage, 'player url')
artist_id = self._search_regex(r'artist_(?P<artist>\d+)', artist_url, 'artist id') artist_id = self._search_regex(r'artist_(?P<artist>\d+)', artist_url, 'artist id')
playlist_data = self._fetch_page(artist_id, 1) page_data = self._search_json('"SONGS_WITH_PAGINATION":', webpage, 'json_data', display_id)
total_pages = traverse_obj(playlist_data, ('pagination', 'page_count', {int})) total_pages = traverse_obj(page_data, ('pagination', 'page_count', {int}))
self._PAGE_SIZE = traverse_obj(page_data, ('pagination', 'per_page', {int}))
return self.playlist_result(InAdvancePagedList( return self.playlist_result(InAdvancePagedList(
functools.partial(self._entries, artist_id, playlist_data), functools.partial(self._entries, artist_id),
total_pages, self._PAGE_SIZE), artist_id, display_id) total_pages, self._PAGE_SIZE), artist_id, display_id)