mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[youtube] Add support for downloading top lists (fixes #1868)
It needs to know the channel and the title of the list, because the ids change every time you browse the channels and are attached to a 'VISITOR_INFO1_LIVE' cookie.
This commit is contained in:
		| @@ -15,6 +15,7 @@ from youtube_dl.extractor import ( | |||||||
|     YoutubeIE, |     YoutubeIE, | ||||||
|     YoutubeChannelIE, |     YoutubeChannelIE, | ||||||
|     YoutubeShowIE, |     YoutubeShowIE, | ||||||
|  |     YoutubeTopListIE, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -116,5 +117,12 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|         original_video = entries[0] |         original_video = entries[0] | ||||||
|         self.assertEqual(original_video['id'], 'rjFaenf1T-Y') |         self.assertEqual(original_video['id'], 'rjFaenf1T-Y') | ||||||
|  |  | ||||||
|  |     def test_youtube_toplist(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = YoutubeTopListIE(dl) | ||||||
|  |         result = ie.extract('yttoplist:music:Top Tracks') | ||||||
|  |         entries = result['entries'] | ||||||
|  |         self.assertTrue(len(entries) >= 9) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -194,6 +194,7 @@ from .youtube import ( | |||||||
|     YoutubeWatchLaterIE, |     YoutubeWatchLaterIE, | ||||||
|     YoutubeFavouritesIE, |     YoutubeFavouritesIE, | ||||||
|     YoutubeHistoryIE, |     YoutubeHistoryIE, | ||||||
|  |     YoutubeTopListIE, | ||||||
| ) | ) | ||||||
| from .zdf import ZDFIE | from .zdf import ZDFIE | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1576,6 +1576,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | |||||||
|         if len(playlist_id) == 13:  # 'RD' + 11 characters for the video id |         if len(playlist_id) == 13:  # 'RD' + 11 characters for the video id | ||||||
|             # Mixes require a custom extraction process |             # Mixes require a custom extraction process | ||||||
|             return self._extract_mix(playlist_id) |             return self._extract_mix(playlist_id) | ||||||
|  |         if playlist_id.startswith('TL'): | ||||||
|  |             raise ExtractorError(u'For downloading YouTube.com top lists, use ' | ||||||
|  |                 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) | ||||||
|  |  | ||||||
|         # Extract the video ids from the playlist pages |         # Extract the video ids from the playlist pages | ||||||
|         ids = [] |         ids = [] | ||||||
| @@ -1598,6 +1601,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | |||||||
|         return self.playlist_result(url_results, playlist_id, playlist_title) |         return self.playlist_result(url_results, playlist_id, playlist_title) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class YoutubeTopListIE(YoutubePlaylistIE): | ||||||
|  |     IE_NAME = u'youtube:toplist' | ||||||
|  |     IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' | ||||||
|  |         u' (Example: "yttoplist:music:Top Tracks")') | ||||||
|  |     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         channel = mobj.group('chann') | ||||||
|  |         title = mobj.group('title') | ||||||
|  |         query = compat_urllib_parse.urlencode({'title': title}) | ||||||
|  |         playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query) | ||||||
|  |         channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) | ||||||
|  |         link = self._html_search_regex(playlist_re, channel_page, u'list') | ||||||
|  |         url = compat_urlparse.urljoin('https://www.youtube.com/', link) | ||||||
|  |          | ||||||
|  |         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' | ||||||
|  |         ids = [] | ||||||
|  |         # sometimes the webpage doesn't contain the videos | ||||||
|  |         # retry until we get them | ||||||
|  |         for i in itertools.count(0): | ||||||
|  |             msg = u'Downloading Youtube mix' | ||||||
|  |             if i > 0: | ||||||
|  |                 msg += ', retry #%d' % i | ||||||
|  |             webpage = self._download_webpage(url, title, msg) | ||||||
|  |             ids = orderedSet(re.findall(video_re, webpage)) | ||||||
|  |             if ids: | ||||||
|  |                 break | ||||||
|  |         url_results = self._ids_to_results(ids) | ||||||
|  |         return self.playlist_result(url_results, playlist_title=title) | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeChannelIE(InfoExtractor): | class YoutubeChannelIE(InfoExtractor): | ||||||
|     IE_DESC = u'YouTube.com channels' |     IE_DESC = u'YouTube.com channels' | ||||||
|     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" |     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz