mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[generic] Support embedded vimeo videos (#1602)
This commit is contained in:
		| @@ -1,4 +1,5 @@ | |||||||
| #!/usr/bin/env python | #!/usr/bin/env python | ||||||
|  | # coding: utf-8 | ||||||
|  |  | ||||||
| # Allow direct execution | # Allow direct execution | ||||||
| import os | import os | ||||||
| @@ -21,6 +22,8 @@ from youtube_dl.utils import ( | |||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|     get_meta_content, |     get_meta_content, | ||||||
|     xpath_with_ns, |     xpath_with_ns, | ||||||
|  |     smuggle_url, | ||||||
|  |     unsmuggle_url, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| if sys.version_info < (3, 0): | if sys.version_info < (3, 0): | ||||||
| @@ -155,5 +158,18 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(find('media:song/media:author').text, u'The Author') |         self.assertEqual(find('media:song/media:author').text, u'The Author') | ||||||
|         self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') |         self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') | ||||||
|  |  | ||||||
|  |     def test_smuggle_url(self): | ||||||
|  |         data = {u"ö": u"ö", u"abc": [3]} | ||||||
|  |         url = 'https://foo.bar/baz?x=y#a' | ||||||
|  |         smug_url = smuggle_url(url, data) | ||||||
|  |         unsmug_url, unsmug_data = unsmuggle_url(smug_url) | ||||||
|  |         self.assertEqual(url, unsmug_url) | ||||||
|  |         self.assertEqual(data, unsmug_data) | ||||||
|  |  | ||||||
|  |         res_url, res_data = unsmuggle_url(url) | ||||||
|  |         self.assertEqual(res_url, url) | ||||||
|  |         self.assertEqual(res_data, None) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -11,6 +11,8 @@ from ..utils import ( | |||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
|  |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     smuggle_url, | ||||||
|  |     unescapeHTML, | ||||||
| ) | ) | ||||||
| from .brightcove import BrightcoveIE | from .brightcove import BrightcoveIE | ||||||
|  |  | ||||||
| @@ -29,6 +31,17 @@ class GenericIE(InfoExtractor): | |||||||
|                 u"title": u"R\u00e9gis plante sa Jeep" |                 u"title": u"R\u00e9gis plante sa Jeep" | ||||||
|             } |             } | ||||||
|         }, |         }, | ||||||
|  |         # embedded vimeo video | ||||||
|  |         { | ||||||
|  |             u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references', | ||||||
|  |             u'file': u'22444065.mp4', | ||||||
|  |             u'md5': u'2903896e23df39722c33f015af0666e2', | ||||||
|  |             u'info_dict': { | ||||||
|  |                 u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011', | ||||||
|  |                 u"uploader_id": u"skillsmatter", | ||||||
|  |                 u"uploader": u"Skills Matter", | ||||||
|  |             } | ||||||
|  |         } | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def report_download_webpage(self, video_id): |     def report_download_webpage(self, video_id): | ||||||
| @@ -127,6 +140,14 @@ class GenericIE(InfoExtractor): | |||||||
|             bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) |             bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) | ||||||
|             return self.url_result(bc_url, 'Brightcove') |             return self.url_result(bc_url, 'Brightcove') | ||||||
|  |  | ||||||
|  |         # Look for embedded Vimeo player | ||||||
|  |         mobj = re.search( | ||||||
|  |             r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage) | ||||||
|  |         if mobj: | ||||||
|  |             player_url = unescapeHTML(mobj.group(1)) | ||||||
|  |             surl = smuggle_url(player_url, {'Referer': url}) | ||||||
|  |             return self.url_result(surl, 'Vimeo') | ||||||
|  |  | ||||||
|         # Start with something easy: JW Player in SWFObject |         # Start with something easy: JW Player in SWFObject | ||||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) |         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||||
|         if mobj is None: |         if mobj is None: | ||||||
|   | |||||||
| @@ -11,6 +11,7 @@ from ..utils import ( | |||||||
|     get_element_by_attribute, |     get_element_by_attribute, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     std_headers, |     std_headers, | ||||||
|  |     unsmuggle_url, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| class VimeoIE(InfoExtractor): | class VimeoIE(InfoExtractor): | ||||||
| @@ -53,7 +54,7 @@ class VimeoIE(InfoExtractor): | |||||||
|                 u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', |                 u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', | ||||||
|                 u'uploader': u'The BLN & Business of Software', |                 u'uploader': u'The BLN & Business of Software', | ||||||
|             }, |             }, | ||||||
|         }, |         } | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def _login(self): |     def _login(self): | ||||||
| @@ -98,6 +99,12 @@ class VimeoIE(InfoExtractor): | |||||||
|         self._login() |         self._login() | ||||||
|  |  | ||||||
|     def _real_extract(self, url, new_video=True): |     def _real_extract(self, url, new_video=True): | ||||||
|  |         url, data = unsmuggle_url(url) | ||||||
|  |         headers = std_headers | ||||||
|  |         if data is not None: | ||||||
|  |             headers = headers.copy() | ||||||
|  |             headers.update(data) | ||||||
|  |  | ||||||
|         # Extract ID from URL |         # Extract ID from URL | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         if mobj is None: |         if mobj is None: | ||||||
| @@ -112,7 +119,7 @@ class VimeoIE(InfoExtractor): | |||||||
|             url = 'https://vimeo.com/' + video_id |             url = 'https://vimeo.com/' + video_id | ||||||
|  |  | ||||||
|         # Retrieve video webpage to extract further information |         # Retrieve video webpage to extract further information | ||||||
|         request = compat_urllib_request.Request(url, None, std_headers) |         request = compat_urllib_request.Request(url, None, headers) | ||||||
|         webpage = self._download_webpage(request, video_id) |         webpage = self._download_webpage(request, video_id) | ||||||
|  |  | ||||||
|         # Now we begin extracting as much information as we can from what we |         # Now we begin extracting as much information as we can from what we | ||||||
|   | |||||||
| @@ -945,3 +945,20 @@ class locked_file(object): | |||||||
|  |  | ||||||
| def shell_quote(args): | def shell_quote(args): | ||||||
|     return ' '.join(map(pipes.quote, args)) |     return ' '.join(map(pipes.quote, args)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def smuggle_url(url, data): | ||||||
|  |     """ Pass additional data in a URL for internal use. """ | ||||||
|  |  | ||||||
|  |     sdata = compat_urllib_parse.urlencode( | ||||||
|  |         {u'__youtubedl_smuggle': json.dumps(data)}) | ||||||
|  |     return url + u'#' + sdata | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def unsmuggle_url(smug_url): | ||||||
|  |     if not '#__youtubedl_smuggle' in smug_url: | ||||||
|  |         return smug_url, None | ||||||
|  |     url, _, sdata = smug_url.rpartition(u'#') | ||||||
|  |     jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] | ||||||
|  |     data = json.loads(jsond) | ||||||
|  |     return url, data | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister