mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[youtube:playlist] Don't use the gdata api (closes #1508)
Parse the playlist pages instead
This commit is contained in:
		| @@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|     def test_youtube_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], 'ytdl test PL') | ||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||
| @@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|     def test_issue_673(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('PLBB231211A4F62143')[0] | ||||
|         result = ie.extract('PLBB231211A4F62143') | ||||
|         self.assertTrue(len(result['entries']) > 25) | ||||
|  | ||||
|     def test_youtube_playlist_long(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertTrue(len(result['entries']) >= 799) | ||||
|  | ||||
| @@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         #651 | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') | ||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||
|         self.assertFalse('pElCt5oNDuI' in ytie_results) | ||||
|         self.assertFalse('KdPEApIVdWM' in ytie_results) | ||||
| @@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|     def test_youtube_playlist_empty(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0] | ||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(len(result['entries']), 0) | ||||
|  | ||||
| @@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         # TODO find a > 100 (paginating?) videos course | ||||
|         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] | ||||
|         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         entries = result['entries'] | ||||
|         self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') | ||||
|         self.assertEqual(len(entries), 25) | ||||
| @@ -99,7 +99,7 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|     def test_youtube_safe_search(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] | ||||
|         result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl') | ||||
|         self.assertEqual(len(result['entries']), 2) | ||||
|  | ||||
|     def test_youtube_show(self): | ||||
|   | ||||
| @@ -1506,8 +1506,9 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|                      | | ||||
|                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) | ||||
|                      )""" | ||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' | ||||
|     _MAX_RESULTS = 50 | ||||
|     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' | ||||
|     _MORE_PAGES_INDICATOR = r'data-link-type="next"' | ||||
|     _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&' | ||||
|     IE_NAME = u'youtube:playlist' | ||||
|  | ||||
|     @classmethod | ||||
| @@ -1532,41 +1533,23 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|             else: | ||||
|                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|  | ||||
|         # Download playlist videos from API | ||||
|         videos = [] | ||||
|         # Extract the video ids from the playlist pages | ||||
|         ids = [] | ||||
|  | ||||
|         for page_num in itertools.count(1): | ||||
|             start_index = self._MAX_RESULTS * (page_num - 1) + 1 | ||||
|             if start_index >= 1000: | ||||
|                 self._downloader.report_warning(u'Max number of results reached') | ||||
|                 break | ||||
|             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index) | ||||
|             url = self._TEMPLATE_URL % (playlist_id, page_num) | ||||
|             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) | ||||
|             # The ids are duplicated | ||||
|             new_ids = orderedSet(re.findall(self._VIDEO_RE, page)) | ||||
|             ids.extend(new_ids) | ||||
|  | ||||
|             try: | ||||
|                 response = json.loads(page) | ||||
|             except ValueError as err: | ||||
|                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) | ||||
|  | ||||
|             if 'feed' not in response: | ||||
|                 raise ExtractorError(u'Got a malformed response from YouTube API') | ||||
|             playlist_title = response['feed']['title']['$t'] | ||||
|             if 'entry' not in response['feed']: | ||||
|                 # Number of videos is a multiple of self._MAX_RESULTS | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, page) is None: | ||||
|                 break | ||||
|  | ||||
|             for entry in response['feed']['entry']: | ||||
|                 index = entry['yt$position']['$t'] | ||||
|                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']: | ||||
|                     videos.append(( | ||||
|                         index, | ||||
|                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t'] | ||||
|                     )) | ||||
|         playlist_title = self._og_search_title(page) | ||||
|  | ||||
|         videos = [v[1] for v in sorted(videos)] | ||||
|  | ||||
|         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos] | ||||
|         return [self.playlist_result(url_results, playlist_id, playlist_title)] | ||||
|         url_results = [self.url_result(vid, 'Youtube') for vid in ids] | ||||
|         return self.playlist_result(url_results, playlist_id, playlist_title) | ||||
|  | ||||
|  | ||||
| class YoutubeChannelIE(InfoExtractor): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz