mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 01:18:30 +00:00
refactor: split into base class, track ie, playlist ie
This commit is contained in:
parent
705bb0ba5f
commit
cd282aae39
@ -756,11 +756,102 @@ def _real_extract(self, url):
|
||||
clean_html(get_element_by_class('wall_post_text', webpage)))
|
||||
|
||||
|
||||
class VKMusicIE(VKBaseIE):
|
||||
IE_NAME = 'vk:music'
|
||||
class VKMusicBaseIE(VKBaseIE):
|
||||
_BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
|
||||
|
||||
# Debug and test on https://regexr.com/8dlot
|
||||
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/(?:audio(?P<track_id>-?\d+_\d+)|(?:.*[\?&](?:act|z)=audio_playlist|music/[a-z]+/)(?P<playlist_id>(?P<pl_oid>-?\d+)_(?P<pl_id>\d+)))(?:(?:%2F|_|[?&]access_hash=)(?P<access_hash>[0-9a-f]+))?'
|
||||
def _b64_decode(self, enc):
|
||||
dec = ''
|
||||
e = n = 0
|
||||
for c in enc:
|
||||
r = self._BASE64_CHARS.index(c)
|
||||
cond = n % 4
|
||||
e = 64 * e + r if cond else r
|
||||
n += 1
|
||||
if cond:
|
||||
dec += chr(255 & e >> (-2 * n & 6))
|
||||
return dec
|
||||
|
||||
def _unmask_url(self, mask_url, vk_id):
|
||||
if 'audio_api_unavailable' in mask_url:
|
||||
extra = mask_url.split('?extra=')[1].split('#')
|
||||
_, base = self._b64_decode(extra[1]).split(chr(11))
|
||||
mask_url = list(self._b64_decode(extra[0]))
|
||||
url_len = len(mask_url)
|
||||
indexes = [None] * url_len
|
||||
index = int(base) ^ vk_id
|
||||
for n in range(url_len - 1, -1, -1):
|
||||
index = (url_len * (n + 1) ^ index + n) % url_len
|
||||
indexes[n] = index
|
||||
for n in range(1, url_len):
|
||||
c = mask_url[n]
|
||||
index = indexes[url_len - 1 - n]
|
||||
mask_url[n] = mask_url[index]
|
||||
mask_url[index] = c
|
||||
mask_url = ''.join(mask_url)
|
||||
return mask_url
|
||||
|
||||
def _parse_track_meta(self, meta, track_id=None):
|
||||
len_ = len(meta)
|
||||
info = {}
|
||||
|
||||
info['id'] = f'{meta[1]}_{meta[0]}' \
|
||||
if len_ >= 2 and meta[1] and meta[0] \
|
||||
else track_id
|
||||
|
||||
title = unescapeHTML(meta[3]) if len_ >= 3 else None # TODO: fallback
|
||||
artist = unescapeHTML(meta[4]) if len_ >= 4 else None # artists in one string, may include "feat."
|
||||
info['title'] = join_nonempty(artist, title, delim=' - ')
|
||||
info['track'] = title
|
||||
info['uploader'] = artist
|
||||
|
||||
# artists as list
|
||||
info['artists'] = (
|
||||
# not htmlescaped unlike meta[4]
|
||||
traverse_obj((*meta[17], *meta[18]), (..., 'name'))
|
||||
if len_ >= 18 else None
|
||||
) or [artist]
|
||||
|
||||
info['duration'] = int_or_none(meta[5]) if len_ >= 5 else None
|
||||
info['thumbnails'] = [{'url': meta[14]}] if len_ >= 14 else []
|
||||
|
||||
# meta[30] is 2 bits
|
||||
# most significant: isExplicit
|
||||
# least significant: isForeignAgent
|
||||
# i. e.
|
||||
# 00 = safe
|
||||
# 01 = marked by RKN as "foreign agent"
|
||||
# 10 = explicit lyrics
|
||||
# 11 = both E lyrics and "foreign agent"
|
||||
if len_ >= 30 and meta[30]:
|
||||
info['age_limit'] = 18
|
||||
|
||||
return info
|
||||
|
||||
def _raise_if_blocked(self, meta, track_id):
|
||||
reason = traverse_obj(
|
||||
self._parse_json(
|
||||
meta[12] if len(meta) >= 12 else None,
|
||||
track_id, fatal=False),
|
||||
('claim', 'reason'))
|
||||
|
||||
if reason == 'geo':
|
||||
self.raise_geo_restricted()
|
||||
# can be an empty string
|
||||
elif reason is not None:
|
||||
raise ExtractorError(
|
||||
'This track is unavailable. '
|
||||
f'Reason code: {reason:r}')
|
||||
|
||||
|
||||
class VKMusicTrackIE(VKMusicBaseIE):
|
||||
IE_NAME = 'vkmusic:track'
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:m|new)\.)?vk\.(?:com|ru)/
|
||||
audio(?P<id>-?\d+_\d+)
|
||||
(?:(?:%2F|_)(?P<hash>[0-9a-f]+))?
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -814,27 +905,6 @@ class VKMusicIE(VKBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://vk.com/artist/linkinpark/releases?z=audio_playlist-2000984503_984503%2Fc468f3a862b6f73b55',
|
||||
'info_dict': {
|
||||
'id': '-2000984503_984503',
|
||||
'title': 'Linkin Park - One More Light',
|
||||
'description': '',
|
||||
'album': 'One More Light',
|
||||
'uploader': 'Linkin Park',
|
||||
'artists': ['Linkin Park'],
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'genres': ['Alternative'],
|
||||
'release_year': 2017,
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'view_count': int,
|
||||
},
|
||||
'playlist_count': 10,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'special symbols in title and artist must be unescaped',
|
||||
'url': 'https://vk.com/audio-2001069891_6069891',
|
||||
@ -852,84 +922,13 @@ class VKMusicIE(VKBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://vk.com/audios877252112?block=playlists§ion=general&z=audio_playlist-147845620_2390',
|
||||
'info_dict': {
|
||||
'id': '-147845620_2390',
|
||||
'title': 'VK Fest 2024: Белая сцена',
|
||||
'description': 'md5:6d652551bb1faaddbcd46321a77fa8d0',
|
||||
'uploader': 'VK Музыка',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'view_count': int,
|
||||
},
|
||||
'playlist_count': 18,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _parse_track_meta(self, meta, track_id=None):
|
||||
len_ = len(meta)
|
||||
info = {}
|
||||
|
||||
info['id'] = f'{meta[1]}_{meta[0]}' \
|
||||
if len_ >= 2 and meta[1] and meta[0] \
|
||||
else track_id
|
||||
|
||||
title = unescapeHTML(meta[3]) if len_ >= 3 else None # TODO: fallback
|
||||
artist = unescapeHTML(meta[4]) if len_ >= 4 else None # artists in one string, may include "feat."
|
||||
info['title'] = join_nonempty(artist, title, delim=' - ')
|
||||
info['track'] = title
|
||||
info['uploader'] = artist
|
||||
|
||||
# artists as list
|
||||
info['artists'] = (
|
||||
# not htmlescaped unlike meta[4]
|
||||
traverse_obj((*meta[17], *meta[18]), (..., 'name'))
|
||||
if len_ >= 18 else None
|
||||
) or [artist]
|
||||
|
||||
info['duration'] = int_or_none(meta[5]) if len_ >= 5 else None
|
||||
info['thumbnails'] = [{'url': meta[14]}] if len_ >= 14 else []
|
||||
|
||||
# meta[30] is 2 bits
|
||||
# most significant: isExplicit
|
||||
# least significant: isForeignAgent
|
||||
# i. e.
|
||||
# 00 = safe
|
||||
# 01 = marked by RKN as "foreign agent"
|
||||
# 10 = explicit lyrics
|
||||
# 11 = both E lyrics and "foreign agent"
|
||||
if len_ >= 30 and meta[30]:
|
||||
info['age_limit'] = 18
|
||||
|
||||
return info
|
||||
|
||||
def _raise_if_blocked(self, meta, track_id):
|
||||
reason = traverse_obj(
|
||||
self._parse_json(
|
||||
meta[12] if len(meta) >= 12 else None,
|
||||
track_id, fatal=False),
|
||||
('claim', 'reason'))
|
||||
|
||||
if reason == 'geo':
|
||||
self.raise_geo_restricted()
|
||||
# can be an empty string
|
||||
elif reason is not None:
|
||||
raise ExtractorError(
|
||||
'This track is unavailable. '
|
||||
f'Reason code: {reason:r}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
track_id = mobj.group('track_id')
|
||||
playlist_id = mobj.group('playlist_id')
|
||||
access_hash = mobj.group('access_hash')
|
||||
track_id = mobj.group('id')
|
||||
access_hash = mobj.group('hash')
|
||||
|
||||
if track_id:
|
||||
if not access_hash:
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
@ -972,10 +971,8 @@ def _real_extract(self, url):
|
||||
self.raise_login_required()
|
||||
|
||||
meta = meta[0]
|
||||
|
||||
self._raise_if_blocked(meta, track_id)
|
||||
|
||||
url = _unmask_url(meta[2], self._parse_vk_id())
|
||||
url = self._unmask_url(meta[2], self._parse_vk_id())
|
||||
|
||||
return {
|
||||
**self._parse_track_meta(meta, track_id),
|
||||
@ -988,35 +985,93 @@ def _real_extract(self, url):
|
||||
}],
|
||||
}
|
||||
|
||||
elif playlist_id:
|
||||
|
||||
class VKMusicPlaylistIE(VKMusicBaseIE):
|
||||
IE_NAME = 'vkmusic:playlist'
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:m|new)\.)?vk\.(?:com|ru)/
|
||||
(?:
|
||||
music/(?:album|playlist)/|
|
||||
.*[?&](?:act|z)=audio_playlist
|
||||
)
|
||||
(?P<full_id>(?P<oid>-?\d+)_(?P<id>\d+))
|
||||
(?:(?:%2F|_|[?&]access_hash=)(?P<hash>[0-9a-f]+))?
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://vk.com/artist/linkinpark/releases?z=audio_playlist-2000984503_984503%2Fc468f3a862b6f73b55',
|
||||
'info_dict': {
|
||||
'id': '-2000984503_984503',
|
||||
'title': 'Linkin Park - One More Light',
|
||||
'description': '',
|
||||
'album': 'One More Light',
|
||||
'uploader': 'Linkin Park',
|
||||
'artists': ['Linkin Park'],
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'genres': ['Alternative'],
|
||||
'release_year': 2017,
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'view_count': int,
|
||||
},
|
||||
'playlist_count': 10,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://vk.com/audios877252112?block=playlists§ion=general&z=audio_playlist-147845620_2390',
|
||||
'info_dict': {
|
||||
'id': '-147845620_2390',
|
||||
'title': 'VK Fest 2024: Белая сцена',
|
||||
'description': 'md5:6d652551bb1faaddbcd46321a77fa8d0',
|
||||
'uploader': 'VK Музыка',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'view_count': int,
|
||||
},
|
||||
'playlist_count': 18,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
playlist_id = mobj.group('full_id')
|
||||
|
||||
meta = self._download_payload('al_audio', playlist_id, {
|
||||
'act': 'load_section',
|
||||
'access_hash': access_hash or '',
|
||||
'access_hash': mobj.group('hash') or '',
|
||||
'claim': '0',
|
||||
'context': '',
|
||||
'from_id': self._parse_vk_id(),
|
||||
'is_loading_all': '1',
|
||||
'is_preload': '0',
|
||||
'offset': '0',
|
||||
'owner_id': mobj.group('pl_oid'),
|
||||
'playlist_id': mobj.group('pl_id'),
|
||||
'owner_id': mobj.group('oid'),
|
||||
'playlist_id': mobj.group('id'),
|
||||
'ref': '',
|
||||
'type': 'playlist',
|
||||
})[0]
|
||||
|
||||
tracks = meta['list']
|
||||
|
||||
entries = []
|
||||
|
||||
for ent in tracks:
|
||||
info = self._parse_track_meta(ent)
|
||||
ent_access = f'_{ent[24]}' if len(ent) >= 24 and ent[24] else ''
|
||||
track_id = info.pop('id')
|
||||
title = info.pop('title')
|
||||
audio_url = f'https://vk.com/audio{track_id}{ent_access}'
|
||||
|
||||
ent_hash = f'_{ent[24]}' if len(ent) >= 24 and ent[24] else ''
|
||||
audio_url = f'https://vk.com/audio{track_id}{ent_hash}'
|
||||
|
||||
entries.append(self.url_result(
|
||||
audio_url, VKMusicIE, track_id, title, **info))
|
||||
audio_url, VKMusicTrackIE, track_id, title, **info))
|
||||
|
||||
title = unescapeHTML(meta.get('title')) # TODO: fallback
|
||||
artist = unescapeHTML(meta.get('authorName'))
|
||||
@ -1187,39 +1242,3 @@ def _real_extract(self, url):
|
||||
**self._extract_common_meta(stream_info),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
_BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
|
||||
|
||||
|
||||
def _b64_decode(enc):
|
||||
dec = ''
|
||||
e = n = 0
|
||||
for c in enc:
|
||||
r = _BASE64_CHARS.index(c)
|
||||
cond = n % 4
|
||||
e = 64 * e + r if cond else r
|
||||
n += 1
|
||||
if cond:
|
||||
dec += chr(255 & e >> (-2 * n & 6))
|
||||
return dec
|
||||
|
||||
|
||||
def _unmask_url(mask_url, vk_id):
|
||||
if 'audio_api_unavailable' in mask_url:
|
||||
extra = mask_url.split('?extra=')[1].split('#')
|
||||
func, base = _b64_decode(extra[1]).split(chr(11))
|
||||
mask_url = list(_b64_decode(extra[0]))
|
||||
url_len = len(mask_url)
|
||||
indexes = [None] * url_len
|
||||
index = int(base) ^ vk_id
|
||||
for n in range(url_len - 1, -1, -1):
|
||||
index = (url_len * (n + 1) ^ index + n) % url_len
|
||||
indexes[n] = index
|
||||
for n in range(1, url_len):
|
||||
c = mask_url[n]
|
||||
index = indexes[url_len - 1 - n]
|
||||
mask_url[n] = mask_url[index]
|
||||
mask_url[index] = c
|
||||
mask_url = ''.join(mask_url)
|
||||
return mask_url
|
||||
|
Loading…
Reference in New Issue
Block a user