mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[youtube:playlist] Fetch all the videos in a mix (fixes #3837)
Since there doesn't seem to be any indication, it stops when there aren't new videos in the webpage.
This commit is contained in:
		| @@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') | ||||
|         entries = result['entries'] | ||||
|         self.assertTrue(len(entries) >= 20) | ||||
|         self.assertTrue(len(entries) >= 50) | ||||
|         original_video = entries[0] | ||||
|         self.assertEqual(original_video['id'], 'OQpdSVF_k_w') | ||||
|  | ||||
|   | ||||
| @@ -1818,20 +1818,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | ||||
|     def _extract_mix(self, playlist_id): | ||||
|         # The mixes are generated from a single video | ||||
|         # the id of the playlist is just 'RD' + video_id | ||||
|         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) | ||||
|         ids = [] | ||||
|         last_id = playlist_id[-11:] | ||||
|         for n in itertools.count(1): | ||||
|             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) | ||||
|             webpage = self._download_webpage( | ||||
|             url, playlist_id, 'Downloading Youtube mix') | ||||
|                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n)) | ||||
|             new_ids = orderedSet(re.findall( | ||||
|                 r'''(?xs)data-video-username=".*?".*? | ||||
|                            href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), | ||||
|                 webpage)) | ||||
|             # Fetch new pages until all the videos are repeated, it seems that | ||||
|             # there are always 51 unique videos. | ||||
|             new_ids = [_id for _id in new_ids if _id not in ids] | ||||
|             if not new_ids: | ||||
|                 break | ||||
|             ids.extend(new_ids) | ||||
|             last_id = ids[-1] | ||||
|  | ||||
|         url_results = self._ids_to_results(ids) | ||||
|  | ||||
|         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) | ||||
|         title_span = ( | ||||
|             search_title('playlist-title') or | ||||
|             search_title('title long-title') or | ||||
|             search_title('title')) | ||||
|         title = clean_html(title_span) | ||||
|         ids = orderedSet(re.findall( | ||||
|             r'''(?xs)data-video-username=".*?".*? | ||||
|                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), | ||||
|             webpage)) | ||||
|         url_results = self._ids_to_results(ids) | ||||
|  | ||||
|         return self.playlist_result(url_results, playlist_id, title) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz