mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 01:18:30 +00:00
[ie/difm] enhance extractor, fix test and clean up code
This commit is contained in:
parent
e4b1fd68f8
commit
a1694a14c3
@ -8,40 +8,32 @@
|
|||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
clean_url = lambda x: unescapeHTML(sanitize_url(x, scheme='https'))
|
clean_url = lambda x: unescapeHTML(sanitize_url(x, scheme='https'))
|
||||||
|
|
||||||
|
|
||||||
def extract_episode(episode):
|
|
||||||
return traverse_obj(episode, {
|
|
||||||
'id': ('id', {str_or_none}),
|
|
||||||
'timestamp': ('start_at', {unified_strdate}, {int_or_none}),
|
|
||||||
'duration': ('tracks', 0, 'length', {int_or_none}),
|
|
||||||
'artist': ('tracks', 0, 'display_artist', {str_or_none}),
|
|
||||||
'title': ('tracks', 0, 'display_title', {str_or_none}),
|
|
||||||
'thumbnail': ('tracks', 0, 'asset_url', {clean_url}),
|
|
||||||
'url': ('tracks', 0, 'content', 'assets', 0, 'url', {clean_url}),
|
|
||||||
'filesize': ('tracks', 0, 'content', 'assets', 0, 'size', {int_or_none}),
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class DIFMShowEpisodeIE(InfoExtractor):
|
class DIFMShowEpisodeIE(InfoExtractor):
|
||||||
IE_NAME = 'difm:showepisode'
|
IE_NAME = 'difm:showepisode'
|
||||||
_VALID_URL = r'https?://www\.di\.fm/shows/(?P<show_name>[\w-]+)/episodes/(?P<episode_id>[\w-]+)'
|
_VALID_URL = r'https?://www\.di\.fm/shows/(?P<show_id>[\w-]+)/episodes/(?P<episode_id>[\w-]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://www.di.fm/shows/airwaves-progressions-radio/episodes/001',
|
'url': 'https://www.di.fm/shows/airwaves-progressions-radio/episodes/001',
|
||||||
'md5': '5725ec4226aed05c58b6460df5e4b4df',
|
'md5': '5725ec4226aed05c58b6460df5e4b4df',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '130151',
|
'id': '130151',
|
||||||
|
'ext': 'm4a',
|
||||||
'title': 'Progressions 001 (04 April 2020)',
|
'title': 'Progressions 001 (04 April 2020)',
|
||||||
|
'description': '',
|
||||||
'duration': 7456,
|
'duration': 7456,
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'timestamp': 20200404,
|
'upload_date': '20200404',
|
||||||
'artist': 'Airwave',
|
'timestamp': 1586008800,
|
||||||
|
'artists': ['Airwave'],
|
||||||
'filesize': 120584191,
|
'filesize': 120584191,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.di.fm/shows/the-global-warm-up/episodes/1095',
|
'url': 'https://www.di.fm/shows/the-global-warm-up/episodes/1095',
|
||||||
@ -49,17 +41,35 @@ class DIFMShowEpisodeIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def _extract_data(self, episode):
|
||||||
|
return {
|
||||||
|
'ext': 'm4a',
|
||||||
|
**traverse_obj(episode, {
|
||||||
|
'id': ('id', {str_or_none}),
|
||||||
|
'timestamp': ('start_at', {unified_timestamp}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'duration': ('tracks', 0, 'length', {int_or_none}),
|
||||||
|
'artist': ('tracks', 0, 'display_artist', {str}),
|
||||||
|
'title': ('tracks', 0, 'display_title', {str}),
|
||||||
|
'like_count': ('tracks', 0, 'votes', 'up', {int_or_none}),
|
||||||
|
'dislike_count': ('tracks', 0, 'votes', 'down', {int_or_none}),
|
||||||
|
'thumbnail': ('tracks', 0, 'asset_url', {clean_url}),
|
||||||
|
'url': ('tracks', 0, 'content', 'assets', 0, 'url', {clean_url}),
|
||||||
|
'filesize': ('tracks', 0, 'content', 'assets', 0, 'size', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_name, episode_id = self._match_valid_url(url).group('show_name', 'episode_id')
|
show_id, episode_id = self._match_valid_url(url).group('show_id', 'episode_id')
|
||||||
video_id = f'{show_name}-{episode_id}'
|
video_id = f'{show_id}-{episode_id}'
|
||||||
webpage = self._download_webpage(url, video_id, fatal=True, impersonate=True)
|
webpage = self._download_webpage(url, video_id, fatal=True, impersonate=True)
|
||||||
json_data = self._search_json('"EpisodeDetail.LayoutEngine",', webpage, 'json_data', video_id)['episode']
|
json_data = self._search_json('"EpisodeDetail.LayoutEngine",', webpage, 'json_data', video_id)['episode']
|
||||||
return extract_episode(json_data)
|
return self._extract_data(json_data)
|
||||||
|
|
||||||
|
|
||||||
class DIFMShowIE(InfoExtractor):
|
class DIFMShowIE(DIFMShowEpisodeIE):
|
||||||
IE_NAME = 'difm:show'
|
IE_NAME = 'difm:show'
|
||||||
_VALID_URL = r'https?://www\.di\.fm/shows/(?P<show_name>[\w-]+)$'
|
_VALID_URL = r'https?://www\.di\.fm/shows/(?P<id>[\w-]+)$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.di.fm/shows/the-global-warm-up',
|
'url': 'https://www.di.fm/shows/the-global-warm-up',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -71,15 +81,19 @@ class DIFMShowIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 5
|
_PAGE_SIZE = 5
|
||||||
|
|
||||||
def _entries(self, show_name, session_key, page):
|
def _entries(self, show_id, session_key, page):
|
||||||
show_metadata = self._download_json(f'https://api.audioaddict.com/v1/di/shows/{show_name}/episodes?page={page + 1}&per_page={self._PAGE_SIZE}', f'{show_name}-{page + 1}', headers={'X-Session-Key': session_key})
|
show_metadata = self._download_json(
|
||||||
|
f'https://api.audioaddict.com/v1/di/shows/{show_id}/episodes',
|
||||||
|
f'{show_id}-{page + 1}', headers={'X-Session-Key': session_key},
|
||||||
|
query={'page': str(page + 1), 'per_page': str(self._PAGE_SIZE)},
|
||||||
|
)
|
||||||
for episode_metadata in show_metadata:
|
for episode_metadata in show_metadata:
|
||||||
yield extract_episode(episode_metadata)
|
yield self._extract_data(episode_metadata)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_name = self._match_valid_url(url).group('show_name')
|
show_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, show_name, fatal=True, impersonate=True)
|
webpage = self._download_webpage(url, show_id, fatal=True, impersonate=True)
|
||||||
show_title = self._html_extract_title(webpage).removesuffix(' - DI.FM')
|
show_title = self._html_extract_title(webpage).removesuffix(' - DI.FM')
|
||||||
session_key = self._search_regex(r'"session_key"\s*:\s*"(?P<session_key>\w+)"', webpage, 'session_key')
|
session_key = self._search_regex(r'"session_key"\s*:\s*"(?P<session_key>\w+)"', webpage, 'session_key')
|
||||||
entries = OnDemandPagedList(functools.partial(self._entries, show_name, session_key), self._PAGE_SIZE)
|
entries = OnDemandPagedList(functools.partial(self._entries, show_id, session_key), self._PAGE_SIZE)
|
||||||
return self.playlist_result(entries, show_name, show_title)
|
return self.playlist_result(entries, show_id, show_title)
|
||||||
|
Loading…
Reference in New Issue
Block a user