1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-28 01:18:30 +00:00

feat: parse access_hash from url for tracks too

This commit is contained in:
DarkCat09 2025-04-01 16:55:50 +04:00
parent c71008ed60
commit df89f7643d
No known key found for this signature in database

View File

@ -758,7 +758,10 @@ def _real_extract(self, url):
class VKMusicIE(VKBaseIE): class VKMusicIE(VKBaseIE):
IE_NAME = 'vk:music' IE_NAME = 'vk:music'
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/(?:audio(?P<track_id>-?\d+_\d+)|(?:.*[\?&](?:act|z)=audio_playlist|music/[a-z]+/)(?P<playlist_id>(?P<pl_oid>-?\d+)_(?P<pl_id>\d+))(?:(?:%2F|_|[?&]access_hash=)(?P<access_hash>[0-9a-f]+))?)'
# Debug and test on https://regexr.com/8dlot
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/(?:audio(?P<track_id>-?\d+_\d+)|(?:.*[\?&](?:act|z)=audio_playlist|music/[a-z]+/)(?P<playlist_id>(?P<pl_oid>-?\d+)_(?P<pl_id>\d+)))(?:(?:%2F|_|[?&]access_hash=)(?P<access_hash>[0-9a-f]+))?'
_TESTS = [ _TESTS = [
{ {
'url': 'https://vk.com/audio-2001746599_34746599', 'url': 'https://vk.com/audio-2001746599_34746599',
@ -909,47 +912,49 @@ def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
track_id = mobj.group('track_id') track_id = mobj.group('track_id')
playlist_id = mobj.group('playlist_id') playlist_id = mobj.group('playlist_id')
access_hash = mobj.group('access_hash')
if track_id: if track_id:
webpage = self._download_webpage(url, track_id) if not access_hash:
webpage = self._download_webpage(url, track_id)
# copied regex from VKWallPostIE data_audio = self._search_regex(
# XXX: common code should be unified, moved to a class r'data-audio="([^"]+)', webpage, 'data-audio attr',
data_audio = self._search_regex( default=None, group=1)
r'data-audio="([^"]+)', webpage, 'data-audio attr',
default=None, group=1)
if data_audio: if data_audio:
meta = self._parse_json(unescapeHTML(data_audio), track_id) meta = self._parse_json(unescapeHTML(data_audio), track_id)
else: else:
if self._parse_vk_id() == 0: if self._parse_vk_id() == 0:
self.raise_login_required( self.raise_login_required(
'This track is unavailable. ' 'This track is unavailable. '
'Log in or provide a link with access hash') 'Log in or provide a link with access hash')
data_exec = self._search_regex( data_exec = self._search_regex(
r'class="AudioPlayerBlock__root"[^>]+data-exec="([^"]+)', r'class="AudioPlayerBlock__root"[^>]+data-exec="([^"]+)',
webpage, 'AudioPlayerBlock data-exec', group=1) webpage, 'AudioPlayerBlock data-exec', group=1)
meta = traverse_obj( meta = traverse_obj(
self._parse_json(unescapeHTML(data_exec), track_id), self._parse_json(unescapeHTML(data_exec), track_id),
('AudioPlayerBlock/init', 'firstAudio')) ('AudioPlayerBlock/init', 'firstAudio'))
one_more_id = meta[24] del data_exec
block_reason = traverse_obj( del data_audio
self._parse_json(meta[12], track_id, fatal=False), del webpage
('claim', 'reason'))
if block_reason == 'geo': access_hash = meta[24]
self.raise_geo_restricted()
del data_audio block_reason = traverse_obj(
del webpage self._parse_json(meta[12], track_id, fatal=False),
('claim', 'reason'))
if block_reason == 'geo':
self.raise_geo_restricted()
meta = self._download_payload('al_audio', track_id, { meta = self._download_payload('al_audio', track_id, {
'act': 'reload_audios', 'act': 'reload_audios',
'audio_ids': f'{track_id}_{one_more_id}', 'audio_ids': f'{track_id}_{access_hash}',
})[0][0] })[0][0]
url = _unmask_url(meta[2], self._parse_vk_id()) url = _unmask_url(meta[2], self._parse_vk_id())
@ -968,7 +973,7 @@ def _real_extract(self, url):
elif playlist_id: elif playlist_id:
meta = self._download_payload('al_audio', playlist_id, { meta = self._download_payload('al_audio', playlist_id, {
'act': 'load_section', 'act': 'load_section',
'access_hash': mobj.group('access_hash') or '', 'access_hash': access_hash or '',
'claim': '0', 'claim': '0',
'context': '', 'context': '',
'from_id': self._parse_vk_id(), 'from_id': self._parse_vk_id(),