mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[youtube:playlist] Fetch all the videos in a mix (fixes #3837)
Since there doesn't seem to be any indication, it stops when there aren't new videos in the webpage.
This commit is contained in:
		| @@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') |         result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') | ||||||
|         entries = result['entries'] |         entries = result['entries'] | ||||||
|         self.assertTrue(len(entries) >= 20) |         self.assertTrue(len(entries) >= 50) | ||||||
|         original_video = entries[0] |         original_video = entries[0] | ||||||
|         self.assertEqual(original_video['id'], 'OQpdSVF_k_w') |         self.assertEqual(original_video['id'], 'OQpdSVF_k_w') | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1818,20 +1818,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | |||||||
|     def _extract_mix(self, playlist_id): |     def _extract_mix(self, playlist_id): | ||||||
|         # The mixes are generated from a single video |         # The mixes are generated from a single video | ||||||
|         # the id of the playlist is just 'RD' + video_id |         # the id of the playlist is just 'RD' + video_id | ||||||
|         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) |         ids = [] | ||||||
|         webpage = self._download_webpage( |         last_id = playlist_id[-11:] | ||||||
|             url, playlist_id, 'Downloading Youtube mix') |         for n in itertools.count(1): | ||||||
|  |             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) | ||||||
|  |             webpage = self._download_webpage( | ||||||
|  |                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n)) | ||||||
|  |             new_ids = orderedSet(re.findall( | ||||||
|  |                 r'''(?xs)data-video-username=".*?".*? | ||||||
|  |                            href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), | ||||||
|  |                 webpage)) | ||||||
|  |             # Fetch new pages until all the videos are repeated, it seems that | ||||||
|  |             # there are always 51 unique videos. | ||||||
|  |             new_ids = [_id for _id in new_ids if _id not in ids] | ||||||
|  |             if not new_ids: | ||||||
|  |                 break | ||||||
|  |             ids.extend(new_ids) | ||||||
|  |             last_id = ids[-1] | ||||||
|  |  | ||||||
|  |         url_results = self._ids_to_results(ids) | ||||||
|  |  | ||||||
|         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) |         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) | ||||||
|         title_span = ( |         title_span = ( | ||||||
|             search_title('playlist-title') or |             search_title('playlist-title') or | ||||||
|             search_title('title long-title') or |             search_title('title long-title') or | ||||||
|             search_title('title')) |             search_title('title')) | ||||||
|         title = clean_html(title_span) |         title = clean_html(title_span) | ||||||
|         ids = orderedSet(re.findall( |  | ||||||
|             r'''(?xs)data-video-username=".*?".*? |  | ||||||
|                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), |  | ||||||
|             webpage)) |  | ||||||
|         url_results = self._ids_to_results(ids) |  | ||||||
|  |  | ||||||
|         return self.playlist_result(url_results, playlist_id, title) |         return self.playlist_result(url_results, playlist_id, title) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz