mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-11-02 07:35:13 +00:00
Updated to release 2020.11.26
This commit is contained in:
@@ -1335,44 +1335,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return self._parse_json(
|
||||
uppercase_escape(config), video_id, fatal=False)
|
||||
|
||||
def _get_music_metadata_from_yt_initial(self, yt_initial):
|
||||
music_metadata = []
|
||||
key_map = {
|
||||
'Album': 'album',
|
||||
'Artist': 'artist',
|
||||
'Song': 'track'
|
||||
}
|
||||
contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
|
||||
if type(contents) is list:
|
||||
for content in contents:
|
||||
music_track = {}
|
||||
if type(content) is not dict:
|
||||
continue
|
||||
videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
|
||||
if type(videoSecondaryInfoRenderer) is not dict:
|
||||
continue
|
||||
rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
|
||||
if type(rows) is not list:
|
||||
continue
|
||||
for row in rows:
|
||||
metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
|
||||
if type(metadataRowRenderer) is not dict:
|
||||
continue
|
||||
key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
|
||||
value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
|
||||
try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
|
||||
if type(key) is not str or type(value) is not str:
|
||||
continue
|
||||
if key in key_map:
|
||||
if key_map[key] in music_track:
|
||||
# we've started on a new track
|
||||
music_metadata.append(music_track)
|
||||
music_track = {}
|
||||
music_track[key_map[key]] = value
|
||||
if len(music_track.keys()):
|
||||
music_metadata.append(music_track)
|
||||
return music_metadata
|
||||
|
||||
def _get_automatic_captions(self, video_id, webpage):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
@@ -2295,7 +2257,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Youtube Music Auto-generated description
|
||||
release_date = release_year = None
|
||||
if video_description:
|
||||
mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
|
||||
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
|
||||
if mobj:
|
||||
if not track:
|
||||
track = mobj.group('track').strip()
|
||||
@@ -2312,13 +2274,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if release_year:
|
||||
release_year = int(release_year)
|
||||
|
||||
yt_initial = self._get_yt_initial_data(video_id, video_webpage)
|
||||
if yt_initial:
|
||||
music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
|
||||
if len(music_metadata):
|
||||
album = music_metadata[0].get('album')
|
||||
artist = music_metadata[0].get('artist')
|
||||
track = music_metadata[0].get('track')
|
||||
yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
|
||||
contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
|
||||
for content in contents:
|
||||
rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
|
||||
multiple_songs = False
|
||||
for row in rows:
|
||||
if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
|
||||
multiple_songs = True
|
||||
break
|
||||
for row in rows:
|
||||
mrr = row.get('metadataRowRenderer') or {}
|
||||
mrr_title = try_get(
|
||||
mrr, lambda x: x['title']['simpleText'], compat_str)
|
||||
mrr_contents = try_get(
|
||||
mrr, lambda x: x['contents'][0], dict) or {}
|
||||
mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
|
||||
if not (mrr_title and mrr_contents_text):
|
||||
continue
|
||||
if mrr_title == 'License':
|
||||
video_license = mrr_contents_text
|
||||
elif not multiple_songs:
|
||||
if mrr_title == 'Album':
|
||||
album = mrr_contents_text
|
||||
elif mrr_title == 'Artist':
|
||||
artist = mrr_contents_text
|
||||
elif mrr_title == 'Song':
|
||||
track = mrr_contents_text
|
||||
|
||||
m_episode = re.search(
|
||||
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
||||
|
||||
Reference in New Issue
Block a user