mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 08:35:12 +00:00 
			
		
		
		
	[youtube] Add support for search result URLs (Fixes #2495)
This commit is contained in:
		@@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):
 | 
				
			|||||||
    def test_youtube_truncated(self):
 | 
					    def test_youtube_truncated(self):
 | 
				
			||||||
        self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
 | 
					        self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_youtube_search_matching(self):
 | 
				
			||||||
 | 
					        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
 | 
				
			||||||
 | 
					        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_justin_tv_channelid_matching(self):
 | 
					    def test_justin_tv_channelid_matching(self):
 | 
				
			||||||
        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
 | 
					        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
 | 
				
			||||||
        self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
 | 
					        self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -16,6 +16,7 @@ from youtube_dl.extractor import (
 | 
				
			|||||||
    YoutubeChannelIE,
 | 
					    YoutubeChannelIE,
 | 
				
			||||||
    YoutubeShowIE,
 | 
					    YoutubeShowIE,
 | 
				
			||||||
    YoutubeTopListIE,
 | 
					    YoutubeTopListIE,
 | 
				
			||||||
 | 
					    YoutubeSearchURLIE,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
 | 
				
			|||||||
        entries = result['entries']
 | 
					        entries = result['entries']
 | 
				
			||||||
        self.assertTrue(len(entries) >= 5)
 | 
					        self.assertTrue(len(entries) >= 5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_youtube_search_url(self):
 | 
				
			||||||
 | 
					        dl = FakeYDL()
 | 
				
			||||||
 | 
					        ie = YoutubeSearchURLIE(dl)
 | 
				
			||||||
 | 
					        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
 | 
				
			||||||
 | 
					        entries = result['entries']
 | 
				
			||||||
 | 
					        self.assertIsPlaylist(result)
 | 
				
			||||||
 | 
					        self.assertEqual(result['title'], 'youtube-dl test video')
 | 
				
			||||||
 | 
					        self.assertTrue(len(entries) >= 5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    unittest.main()
 | 
					    unittest.main()
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -675,7 +675,7 @@ class YoutubeDL(object):
 | 
				
			|||||||
            info_dict['playlist'] = None
 | 
					            info_dict['playlist'] = None
 | 
				
			||||||
            info_dict['playlist_index'] = None
 | 
					            info_dict['playlist_index'] = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if 'display_id' not in info_dict:
 | 
					        if 'display_id' not in info_dict and 'id' in info_dict:
 | 
				
			||||||
            info_dict['display_id'] = info_dict['id']
 | 
					            info_dict['display_id'] = info_dict['id']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # This extractors handle format selection themselves
 | 
					        # This extractors handle format selection themselves
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -285,19 +285,20 @@ from .youku import YoukuIE
 | 
				
			|||||||
from .youporn import YouPornIE
 | 
					from .youporn import YouPornIE
 | 
				
			||||||
from .youtube import (
 | 
					from .youtube import (
 | 
				
			||||||
    YoutubeIE,
 | 
					    YoutubeIE,
 | 
				
			||||||
    YoutubePlaylistIE,
 | 
					 | 
				
			||||||
    YoutubeSearchIE,
 | 
					 | 
				
			||||||
    YoutubeSearchDateIE,
 | 
					 | 
				
			||||||
    YoutubeUserIE,
 | 
					 | 
				
			||||||
    YoutubeChannelIE,
 | 
					    YoutubeChannelIE,
 | 
				
			||||||
    YoutubeShowIE,
 | 
					 | 
				
			||||||
    YoutubeSubscriptionsIE,
 | 
					 | 
				
			||||||
    YoutubeRecommendedIE,
 | 
					 | 
				
			||||||
    YoutubeTruncatedURLIE,
 | 
					 | 
				
			||||||
    YoutubeWatchLaterIE,
 | 
					 | 
				
			||||||
    YoutubeFavouritesIE,
 | 
					    YoutubeFavouritesIE,
 | 
				
			||||||
    YoutubeHistoryIE,
 | 
					    YoutubeHistoryIE,
 | 
				
			||||||
 | 
					    YoutubePlaylistIE,
 | 
				
			||||||
 | 
					    YoutubeRecommendedIE,
 | 
				
			||||||
 | 
					    YoutubeSearchDateIE,
 | 
				
			||||||
 | 
					    YoutubeSearchIE,
 | 
				
			||||||
 | 
					    YoutubeSearchURLIE,
 | 
				
			||||||
 | 
					    YoutubeShowIE,
 | 
				
			||||||
 | 
					    YoutubeSubscriptionsIE,
 | 
				
			||||||
    YoutubeTopListIE,
 | 
					    YoutubeTopListIE,
 | 
				
			||||||
 | 
					    YoutubeTruncatedURLIE,
 | 
				
			||||||
 | 
					    YoutubeUserIE,
 | 
				
			||||||
 | 
					    YoutubeWatchLaterIE,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
from .zdf import ZDFIE
 | 
					from .zdf import ZDFIE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1645,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class YoutubeUserIE(InfoExtractor):
 | 
					class YoutubeUserIE(InfoExtractor):
 | 
				
			||||||
    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
 | 
					    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
 | 
				
			||||||
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
 | 
					    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
 | 
				
			||||||
    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
 | 
					    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
 | 
				
			||||||
    _GDATA_PAGE_SIZE = 50
 | 
					    _GDATA_PAGE_SIZE = 50
 | 
				
			||||||
    _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
 | 
					    _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
 | 
				
			||||||
@@ -1744,12 +1744,50 @@ class YoutubeSearchIE(SearchInfoExtractor):
 | 
				
			|||||||
                  for video_id in video_ids]
 | 
					                  for video_id in video_ids]
 | 
				
			||||||
        return self.playlist_result(videos, query)
 | 
					        return self.playlist_result(videos, query)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
 | 
					class YoutubeSearchDateIE(YoutubeSearchIE):
 | 
				
			||||||
    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
 | 
					    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
 | 
				
			||||||
    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
 | 
					    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
 | 
				
			||||||
    _SEARCH_KEY = 'ytsearchdate'
 | 
					    _SEARCH_KEY = 'ytsearchdate'
 | 
				
			||||||
    IE_DESC = u'YouTube.com searches, newest videos first'
 | 
					    IE_DESC = u'YouTube.com searches, newest videos first'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class YoutubeSearchURLIE(InfoExtractor):
 | 
				
			||||||
 | 
					    IE_DESC = u'YouTube.com search URLs'
 | 
				
			||||||
 | 
					    IE_NAME = u'youtube:search_url'
 | 
				
			||||||
 | 
					    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
 | 
					        mobj = re.match(self._VALID_URL, url)
 | 
				
			||||||
 | 
					        query = compat_urllib_parse.unquote_plus(mobj.group('query'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        webpage = self._download_webpage(url, query)
 | 
				
			||||||
 | 
					        result_code = self._search_regex(
 | 
				
			||||||
 | 
					            r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        part_codes = re.findall(
 | 
				
			||||||
 | 
					            r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
 | 
				
			||||||
 | 
					        entries = []
 | 
				
			||||||
 | 
					        for part_code in part_codes:
 | 
				
			||||||
 | 
					            part_title = self._html_search_regex(
 | 
				
			||||||
 | 
					                r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
 | 
				
			||||||
 | 
					            part_url_snippet = self._html_search_regex(
 | 
				
			||||||
 | 
					                r'(?s)href="([^"]+)"', part_code, 'item URL')
 | 
				
			||||||
 | 
					            part_url = compat_urlparse.urljoin(
 | 
				
			||||||
 | 
					                'https://www.youtube.com/', part_url_snippet)
 | 
				
			||||||
 | 
					            entries.append({
 | 
				
			||||||
 | 
					                '_type': 'url',
 | 
				
			||||||
 | 
					                'url': part_url,
 | 
				
			||||||
 | 
					                'title': part_title,
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return {
 | 
				
			||||||
 | 
					            '_type': 'playlist',
 | 
				
			||||||
 | 
					            'entries': entries,
 | 
				
			||||||
 | 
					            'title': query,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class YoutubeShowIE(InfoExtractor):
 | 
					class YoutubeShowIE(InfoExtractor):
 | 
				
			||||||
    IE_DESC = u'YouTube.com (multi-season) shows'
 | 
					    IE_DESC = u'YouTube.com (multi-season) shows'
 | 
				
			||||||
    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
 | 
					    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user