mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-08-13 16:08:29 +00:00
fixes to extractor regex, artist splitting, and half·alive test
This commit is contained in:
parent
13e6a52074
commit
223b3b75ef
@ -1671,12 +1671,37 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://music.youtube.com/watch?v=DbCvuSGfR3Y',
|
||||
'info_dict': {
|
||||
'id': 'DbCvuSGfR3Y',
|
||||
'ext': 'mp4',
|
||||
'title': 'Back Around',
|
||||
'artists': ['half·alive'],
|
||||
'track': 'Back Around',
|
||||
'album': 'Conditions Of A Punk',
|
||||
'release_date': '20221202',
|
||||
'release_year': 2021,
|
||||
'alt_title': 'Back Around',
|
||||
'description': 'md5:bfc0e2b3cc903a608d8a85a13cb50f95',
|
||||
'media_type': 'video',
|
||||
'uploader': 'half•alive',
|
||||
'channel': 'half•alive',
|
||||
'channel_id': 'UCYQrYophdVI3nVDPOnXyIng',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYQrYophdVI3nVDPOnXyIng',
|
||||
'channel_is_verified': True,
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 223,
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/DbCvuSGfR3Y/maxresdefault.webp',
|
||||
'heatmap': 'count:100',
|
||||
'categories': ['Music'],
|
||||
'tags': ['half·alive', 'Conditions Of A Punk', 'Back Around'],
|
||||
'creators': ['half·alive'],
|
||||
'timestamp': 1669889281,
|
||||
'upload_date': '20221201',
|
||||
'playable_in_embed': True,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -4207,15 +4232,14 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
|
||||
# Youtube Music Auto-generated description
|
||||
if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
|
||||
# Before you change this, learn how regexes work. The last guy didn't.
|
||||
mobj = re.search(
|
||||
r'''(?xs)
|
||||
(?:\n|^)(?P<track>[^\n·]+)\ ·\ (?P<artist>[^\n]+)\n+
|
||||
(?P<album>[^\n]+)\n+
|
||||
(?:℗\s*(?P<release_year>\d{4})[^\n]+\n+)?
|
||||
(?:Released\ on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
|
||||
(?:℗\s*(?P<release_year>\d{4}))?
|
||||
(?:.+?\nReleased\ on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
|
||||
(?:.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+)\n)?
|
||||
.+Auto-generated\ by\ YouTube\.\s*$
|
||||
.+\nAuto-generated\ by\ YouTube\.\s*$
|
||||
''', video_description)
|
||||
if mobj:
|
||||
release_year = mobj.group('release_year')
|
||||
@ -4227,7 +4251,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
info.update({
|
||||
'album': mobj.group('album'.strip()),
|
||||
'artists': ([a] if (a := mobj.group('clean_artist'))
|
||||
else [a.strip() for a in mobj.group('artist').split('·')]),
|
||||
else [a.strip() for a in mobj.group('artist').split(' · ')]),
|
||||
'track': mobj.group('track').strip(),
|
||||
'release_date': release_date,
|
||||
'release_year': int_or_none(release_year),
|
||||
|
Loading…
Reference in New Issue
Block a user