mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[youtube:playlist] Don't use the gdata api (closes #1508)
Parse the playlist pages instead
This commit is contained in:
		| @@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|     def test_youtube_playlist(self): |     def test_youtube_playlist(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] |         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') | ||||||
|         self.assertIsPlaylist(result) |         self.assertIsPlaylist(result) | ||||||
|         self.assertEqual(result['title'], 'ytdl test PL') |         self.assertEqual(result['title'], 'ytdl test PL') | ||||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] |         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||||
| @@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|     def test_issue_673(self): |     def test_issue_673(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         result = ie.extract('PLBB231211A4F62143')[0] |         result = ie.extract('PLBB231211A4F62143') | ||||||
|         self.assertTrue(len(result['entries']) > 25) |         self.assertTrue(len(result['entries']) > 25) | ||||||
|  |  | ||||||
|     def test_youtube_playlist_long(self): |     def test_youtube_playlist_long(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] |         result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | ||||||
|         self.assertIsPlaylist(result) |         self.assertIsPlaylist(result) | ||||||
|         self.assertTrue(len(result['entries']) >= 799) |         self.assertTrue(len(result['entries']) >= 799) | ||||||
|  |  | ||||||
| @@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|         #651 |         #651 | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] |         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') | ||||||
|         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] |         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] | ||||||
|         self.assertFalse('pElCt5oNDuI' in ytie_results) |         self.assertFalse('pElCt5oNDuI' in ytie_results) | ||||||
|         self.assertFalse('KdPEApIVdWM' in ytie_results) |         self.assertFalse('KdPEApIVdWM' in ytie_results) | ||||||
| @@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|     def test_youtube_playlist_empty(self): |     def test_youtube_playlist_empty(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0] |         result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx') | ||||||
|         self.assertIsPlaylist(result) |         self.assertIsPlaylist(result) | ||||||
|         self.assertEqual(len(result['entries']), 0) |         self.assertEqual(len(result['entries']), 0) | ||||||
|  |  | ||||||
| @@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         # TODO find a > 100 (paginating?) videos course |         # TODO find a > 100 (paginating?) videos course | ||||||
|         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] |         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||||
|         entries = result['entries'] |         entries = result['entries'] | ||||||
|         self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') |         self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') | ||||||
|         self.assertEqual(len(entries), 25) |         self.assertEqual(len(entries), 25) | ||||||
| @@ -99,7 +99,7 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|     def test_youtube_safe_search(self): |     def test_youtube_safe_search(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = YoutubePlaylistIE(dl) |         ie = YoutubePlaylistIE(dl) | ||||||
|         result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] |         result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl') | ||||||
|         self.assertEqual(len(result['entries']), 2) |         self.assertEqual(len(result['entries']), 2) | ||||||
|  |  | ||||||
|     def test_youtube_show(self): |     def test_youtube_show(self): | ||||||
|   | |||||||
| @@ -1506,8 +1506,9 @@ class YoutubePlaylistIE(InfoExtractor): | |||||||
|                      | |                      | | ||||||
|                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) |                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) | ||||||
|                      )""" |                      )""" | ||||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' |     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' | ||||||
|     _MAX_RESULTS = 50 |     _MORE_PAGES_INDICATOR = r'data-link-type="next"' | ||||||
|  |     _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&' | ||||||
|     IE_NAME = u'youtube:playlist' |     IE_NAME = u'youtube:playlist' | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
| @@ -1532,41 +1533,23 @@ class YoutubePlaylistIE(InfoExtractor): | |||||||
|             else: |             else: | ||||||
|                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) |                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||||
|  |  | ||||||
|         # Download playlist videos from API |         # Extract the video ids from the playlist pages | ||||||
|         videos = [] |         ids = [] | ||||||
|  |  | ||||||
|         for page_num in itertools.count(1): |         for page_num in itertools.count(1): | ||||||
|             start_index = self._MAX_RESULTS * (page_num - 1) + 1 |             url = self._TEMPLATE_URL % (playlist_id, page_num) | ||||||
|             if start_index >= 1000: |  | ||||||
|                 self._downloader.report_warning(u'Max number of results reached') |  | ||||||
|                 break |  | ||||||
|             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index) |  | ||||||
|             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) |             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) | ||||||
|  |             # The ids are duplicated | ||||||
|  |             new_ids = orderedSet(re.findall(self._VIDEO_RE, page)) | ||||||
|  |             ids.extend(new_ids) | ||||||
|  |  | ||||||
|             try: |             if re.search(self._MORE_PAGES_INDICATOR, page) is None: | ||||||
|                 response = json.loads(page) |  | ||||||
|             except ValueError as err: |  | ||||||
|                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) |  | ||||||
|  |  | ||||||
|             if 'feed' not in response: |  | ||||||
|                 raise ExtractorError(u'Got a malformed response from YouTube API') |  | ||||||
|             playlist_title = response['feed']['title']['$t'] |  | ||||||
|             if 'entry' not in response['feed']: |  | ||||||
|                 # Number of videos is a multiple of self._MAX_RESULTS |  | ||||||
|                 break |                 break | ||||||
|  |  | ||||||
|             for entry in response['feed']['entry']: |         playlist_title = self._og_search_title(page) | ||||||
|                 index = entry['yt$position']['$t'] |  | ||||||
|                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']: |  | ||||||
|                     videos.append(( |  | ||||||
|                         index, |  | ||||||
|                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t'] |  | ||||||
|                     )) |  | ||||||
|  |  | ||||||
|         videos = [v[1] for v in sorted(videos)] |         url_results = [self.url_result(vid, 'Youtube') for vid in ids] | ||||||
|  |         return self.playlist_result(url_results, playlist_id, playlist_title) | ||||||
|         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos] |  | ||||||
|         return [self.playlist_result(url_results, playlist_id, playlist_title)] |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeChannelIE(InfoExtractor): | class YoutubeChannelIE(InfoExtractor): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz