mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[youtube] Add support for search result URLs (Fixes #2495)
This commit is contained in:
		| @@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase): | |||||||
|     def test_youtube_truncated(self): |     def test_youtube_truncated(self): | ||||||
|         self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) |         self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) | ||||||
|  |  | ||||||
|  |     def test_youtube_search_matching(self): | ||||||
|  |         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) | ||||||
|  |         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | ||||||
|  |  | ||||||
|     def test_justin_tv_channelid_matching(self): |     def test_justin_tv_channelid_matching(self): | ||||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) |         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||||
|         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) |         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ from youtube_dl.extractor import ( | |||||||
|     YoutubeChannelIE, |     YoutubeChannelIE, | ||||||
|     YoutubeShowIE, |     YoutubeShowIE, | ||||||
|     YoutubeTopListIE, |     YoutubeTopListIE, | ||||||
|  |     YoutubeSearchURLIE, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase): | |||||||
|         entries = result['entries'] |         entries = result['entries'] | ||||||
|         self.assertTrue(len(entries) >= 5) |         self.assertTrue(len(entries) >= 5) | ||||||
|  |  | ||||||
|  |     def test_youtube_search_url(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = YoutubeSearchURLIE(dl) | ||||||
|  |         result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') | ||||||
|  |         entries = result['entries'] | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['title'], 'youtube-dl test video') | ||||||
|  |         self.assertTrue(len(entries) >= 5) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -675,7 +675,7 @@ class YoutubeDL(object): | |||||||
|             info_dict['playlist'] = None |             info_dict['playlist'] = None | ||||||
|             info_dict['playlist_index'] = None |             info_dict['playlist_index'] = None | ||||||
|  |  | ||||||
|         if 'display_id' not in info_dict: |         if 'display_id' not in info_dict and 'id' in info_dict: | ||||||
|             info_dict['display_id'] = info_dict['id'] |             info_dict['display_id'] = info_dict['id'] | ||||||
|  |  | ||||||
|         # This extractors handle format selection themselves |         # This extractors handle format selection themselves | ||||||
|   | |||||||
| @@ -285,19 +285,20 @@ from .youku import YoukuIE | |||||||
| from .youporn import YouPornIE | from .youporn import YouPornIE | ||||||
| from .youtube import ( | from .youtube import ( | ||||||
|     YoutubeIE, |     YoutubeIE, | ||||||
|     YoutubePlaylistIE, |  | ||||||
|     YoutubeSearchIE, |  | ||||||
|     YoutubeSearchDateIE, |  | ||||||
|     YoutubeUserIE, |  | ||||||
|     YoutubeChannelIE, |     YoutubeChannelIE, | ||||||
|     YoutubeShowIE, |  | ||||||
|     YoutubeSubscriptionsIE, |  | ||||||
|     YoutubeRecommendedIE, |  | ||||||
|     YoutubeTruncatedURLIE, |  | ||||||
|     YoutubeWatchLaterIE, |  | ||||||
|     YoutubeFavouritesIE, |     YoutubeFavouritesIE, | ||||||
|     YoutubeHistoryIE, |     YoutubeHistoryIE, | ||||||
|  |     YoutubePlaylistIE, | ||||||
|  |     YoutubeRecommendedIE, | ||||||
|  |     YoutubeSearchDateIE, | ||||||
|  |     YoutubeSearchIE, | ||||||
|  |     YoutubeSearchURLIE, | ||||||
|  |     YoutubeShowIE, | ||||||
|  |     YoutubeSubscriptionsIE, | ||||||
|     YoutubeTopListIE, |     YoutubeTopListIE, | ||||||
|  |     YoutubeTruncatedURLIE, | ||||||
|  |     YoutubeUserIE, | ||||||
|  |     YoutubeWatchLaterIE, | ||||||
| ) | ) | ||||||
| from .zdf import ZDFIE | from .zdf import ZDFIE | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1645,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor): | |||||||
|  |  | ||||||
| class YoutubeUserIE(InfoExtractor): | class YoutubeUserIE(InfoExtractor): | ||||||
|     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' |     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' | ||||||
|     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' |     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' | ||||||
|     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' |     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' | ||||||
|     _GDATA_PAGE_SIZE = 50 |     _GDATA_PAGE_SIZE = 50 | ||||||
|     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' |     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' | ||||||
| @@ -1744,12 +1744,50 @@ class YoutubeSearchIE(SearchInfoExtractor): | |||||||
|                   for video_id in video_ids] |                   for video_id in video_ids] | ||||||
|         return self.playlist_result(videos, query) |         return self.playlist_result(videos, query) | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeSearchDateIE(YoutubeSearchIE): | class YoutubeSearchDateIE(YoutubeSearchIE): | ||||||
|     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' |     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' | ||||||
|     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' |     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' | ||||||
|     _SEARCH_KEY = 'ytsearchdate' |     _SEARCH_KEY = 'ytsearchdate' | ||||||
|     IE_DESC = u'YouTube.com searches, newest videos first' |     IE_DESC = u'YouTube.com searches, newest videos first' | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class YoutubeSearchURLIE(InfoExtractor): | ||||||
|  |     IE_DESC = u'YouTube.com search URLs' | ||||||
|  |     IE_NAME = u'youtube:search_url' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         query = compat_urllib_parse.unquote_plus(mobj.group('query')) | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, query) | ||||||
|  |         result_code = self._search_regex( | ||||||
|  |             r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML') | ||||||
|  |  | ||||||
|  |         part_codes = re.findall( | ||||||
|  |             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) | ||||||
|  |         entries = [] | ||||||
|  |         for part_code in part_codes: | ||||||
|  |             part_title = self._html_search_regex( | ||||||
|  |                 r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) | ||||||
|  |             part_url_snippet = self._html_search_regex( | ||||||
|  |                 r'(?s)href="([^"]+)"', part_code, 'item URL') | ||||||
|  |             part_url = compat_urlparse.urljoin( | ||||||
|  |                 'https://www.youtube.com/', part_url_snippet) | ||||||
|  |             entries.append({ | ||||||
|  |                 '_type': 'url', | ||||||
|  |                 'url': part_url, | ||||||
|  |                 'title': part_title, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'entries': entries, | ||||||
|  |             'title': query, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeShowIE(InfoExtractor): | class YoutubeShowIE(InfoExtractor): | ||||||
|     IE_DESC = u'YouTube.com (multi-season) shows' |     IE_DESC = u'YouTube.com (multi-season) shows' | ||||||
|     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' |     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister