mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-07-12 16:28:31 +00:00
fix: custom regexs -> yt-dlp html helpers (for reliability)
This commit is contained in:
parent
70e0c591be
commit
cdbfe3a793
@ -13,6 +13,7 @@
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
get_element_html_by_class,
|
get_element_html_by_class,
|
||||||
get_element_html_by_id,
|
get_element_html_by_id,
|
||||||
@ -1083,6 +1084,7 @@ def _real_extract(self, url):
|
|||||||
playlist_id)
|
playlist_id)
|
||||||
del hash_in_url
|
del hash_in_url
|
||||||
|
|
||||||
|
# to remove big scripts and other elements not used by parser
|
||||||
html = get_element_html_by_class('AudioPlaylistSnippet', webpage)
|
html = get_element_html_by_class('AudioPlaylistSnippet', webpage)
|
||||||
del webpage
|
del webpage
|
||||||
|
|
||||||
@ -1105,29 +1107,29 @@ def _real_extract(self, url):
|
|||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
audio_url, VKMusicTrackIE, track_id, title, **info))
|
audio_url, VKMusicTrackIE, track_id, title, **info))
|
||||||
|
|
||||||
title = self._html_search_regex(
|
header = get_element_html_by_class('AudioPlaylistSnippet__header', html)
|
||||||
r'class="[^"]*AudioPlaylistSnippet__title--main[^"]*"[^>]*>([^<]+)',
|
|
||||||
html, 'playlist title', fatal=False, group=1)
|
|
||||||
|
|
||||||
artist = self._html_search_regex(
|
title = clean_html(get_element_by_class('AudioPlaylistSnippet__title', header))
|
||||||
r'class="[^"]*AudioPlaylistSnippet__author[^"]*"[^>]*>\s*<a(?:\s[^>]*)?>([^<]+)',
|
artist = clean_html(get_element_by_class('AudioPlaylistSnippet__author', header))
|
||||||
html, 'playlist author', fatal=False, group=1)
|
|
||||||
|
|
||||||
description = clean_html(get_element_by_class(
|
info_text = clean_html(get_element_by_class('AudioPlaylistSnippet__info', header))
|
||||||
'AudioPlaylistSnippet__description', html))
|
info_sep = info_text.find('·')
|
||||||
# description = self._html_search_regex(
|
|
||||||
# r'div\s[^>]*class="[^"]*AudioPlaylistSnippet__description[^"]*">??????',
|
|
||||||
# html, 'playlist description', fatal=False, group=1)
|
|
||||||
|
|
||||||
genre, year = self._html_search_regex(
|
|
||||||
r'class="[^"]*AudioPlaylistSnippet__info[^"]*"[^>]*>\s*(.+) .*;(\d+)\s*</',
|
|
||||||
html, 'genre and release year', default=(None, None), group=(1, 2))
|
|
||||||
|
|
||||||
|
year = int_or_none(info_text[info_sep + 1:]) if info_sep != -1 else None
|
||||||
is_album = year is not None
|
is_album = year is not None
|
||||||
|
genre = info_text[:info_sep].rstrip() if is_album else None
|
||||||
|
|
||||||
thumbnail = url_or_none(self._html_search_regex(
|
del header
|
||||||
r'class="[^"]*AudioPlaylistSnippet__cover[^"]*"[^>]*style="background-image\s*:\s*url\s*\(\s*\'([^\']+)',
|
|
||||||
html, 'playlist thumbnail', fatal=False, group=1))
|
description = clean_html(get_element_by_class('AudioPlaylistSnippet__description', html))
|
||||||
|
|
||||||
|
thumbnail = url_or_none(self._search_regex(
|
||||||
|
r'background[^:;]*:\s*url\s*\(\s*\'([^\']+)',
|
||||||
|
extract_attributes(
|
||||||
|
get_element_html_by_class(
|
||||||
|
'AudioPlaylistSnippet__cover',
|
||||||
|
html)).get('style'),
|
||||||
|
'playlist thumbnail', fatal=False, group=1))
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id,
|
entries, playlist_id,
|
||||||
@ -1138,7 +1140,7 @@ def _real_extract(self, url):
|
|||||||
artists=[artist] if is_album else None,
|
artists=[artist] if is_album else None,
|
||||||
thumbnails=[{'url': thumbnail}] if thumbnail else [],
|
thumbnails=[{'url': thumbnail}] if thumbnail else [],
|
||||||
genres=[genre] if genre else None,
|
genres=[genre] if genre else None,
|
||||||
release_year=int_or_none(year))
|
release_year=year)
|
||||||
|
|
||||||
|
|
||||||
class VKPlayBaseIE(InfoExtractor):
|
class VKPlayBaseIE(InfoExtractor):
|
||||||
|
Loading…
Reference in New Issue
Block a user