1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-09 23:08:32 +00:00

fix(playlist): description, htmlescape, url regex, +test

This commit is contained in:
DarkCat09 2025-03-30 15:41:45 +04:00
parent 40b039df2c
commit 90a026e648
No known key found for this signature in database

View File

@ -760,7 +760,7 @@ def _real_extract(self, url):
class VKMusicIE(VKBaseIE):
IE_NAME = 'vk:music'
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/(?:audio(?P<track_id>-?\d+_\d+)|(?:.*\?(?:act|z)=audio_playlist|music/[a-z]+/)(?P<playlist_id>(?P<pl_oid>-?\d+)_(?P<pl_id>\d+))(?:(?:%2F|_|[?&]access_hash=)(?P<access_hash>[0-9a-f]+))?)'
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/(?:audio(?P<track_id>-?\d+_\d+)|(?:.*[\?&](?:act|z)=audio_playlist|music/[a-z]+/)(?P<playlist_id>(?P<pl_oid>-?\d+)_(?P<pl_id>\d+))(?:(?:%2F|_|[?&]access_hash=)(?P<access_hash>[0-9a-f]+))?)'
_TESTS = [
{
'url': 'https://vk.com/audio-2001746599_34746599',
@ -850,6 +850,25 @@ class VKMusicIE(VKBaseIE):
'params': {
'skip_download': True,
}
},
{
'url': 'https://vk.com/audios877252112?block=playlists&section=general&z=audio_playlist-147845620_2390',
'info_dict': {
'id': '-147845620_2390',
'title': 'VK Музыка - VK Fest 2024: Белая сцена',
'description': 'md5:6d652551bb1faaddbcd46321a77fa8d0',
'album': 'VK Fest 2024: Белая сцена', # XXX: not an album (but who cares actually)
'uploader': 'VK Музыка',
'artists': ['VK Музыка'], # XXX: not actually a list of all artists
'thumbnail': r're:https?://.*\.jpg',
'modified_timestamp': int,
'modified_date': str,
'view_count': int,
},
'playlist_count': 18,
'params': {
'skip_download': True,
}
}
]
@ -955,8 +974,8 @@ def _real_extract(self, url):
entries.append(self.url_result(
audio_url, VKMusicIE, track_id, title, **info))
title = meta.get('title') # TODO: fallback
artist = meta.get('authorName')
title = unescapeHTML(meta.get('title')) # TODO: fallback
artist = unescapeHTML(meta.get('authorName'))
genre, year = self._search_regex(
r'^([^<]+)<\s*span[^>]*>[^<]*</\s*span\s*>(\d+)$',
meta.get('infoLine1'), 'genre and release year',
@ -966,13 +985,13 @@ def _real_extract(self, url):
entries,
playlist_id,
join_nonempty(artist, title, delim=' - '),
meta.get('description'),
unescapeHTML(meta.get('rawDescription')),
album=title,
uploader=artist,
artists=[artist],
thumbnail=meta.get('coverUrl'), # XXX: should i also specify `thumbnails`?
genres=[genre] if genre else [],
release_year=int_or_none(year), # XXX: is None ok here?
genres=[unescapeHTML(genre)] if genre else None,
release_year=int_or_none(year),
modified_timestamp=int_or_none(meta.get('lastUpdated')),
view_count=int_or_none(meta.get('listens')))