mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	Add support for direct links to a video (#1973)
This commit is contained in:
		| @@ -13,6 +13,8 @@ from ..utils import ( | |||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|  |     unified_strdate, | ||||||
|  |     url_basename, | ||||||
| ) | ) | ||||||
| from .brightcove import BrightcoveIE | from .brightcove import BrightcoveIE | ||||||
|  |  | ||||||
| @@ -71,6 +73,17 @@ class GenericIE(InfoExtractor): | |||||||
|                 u'skip_download': True, |                 u'skip_download': True, | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
|  |         # Direct link to a video | ||||||
|  |         { | ||||||
|  |             u'url': u'http://media.w3.org/2010/05/sintel/trailer.mp4', | ||||||
|  |             u'file': u'trailer.mp4', | ||||||
|  |             u'md5': u'67d406c2bcb6af27fa886f31aa934bbe', | ||||||
|  |             u'info_dict': { | ||||||
|  |                 u'id': u'trailer', | ||||||
|  |                 u'title': u'trailer', | ||||||
|  |                 u'upload_date': u'20100513', | ||||||
|  |             } | ||||||
|  |         } | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def report_download_webpage(self, video_id): |     def report_download_webpage(self, video_id): | ||||||
| @@ -83,7 +96,7 @@ class GenericIE(InfoExtractor): | |||||||
|         """Report information extraction.""" |         """Report information extraction.""" | ||||||
|         self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url) |         self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url) | ||||||
|  |  | ||||||
|     def _test_redirect(self, url): |     def _send_head(self, url): | ||||||
|         """Check if it is a redirect, like url shorteners, in case return the new url.""" |         """Check if it is a redirect, like url shorteners, in case return the new url.""" | ||||||
|         class HeadRequest(compat_urllib_request.Request): |         class HeadRequest(compat_urllib_request.Request): | ||||||
|             def get_method(self): |             def get_method(self): | ||||||
| @@ -131,29 +144,46 @@ class GenericIE(InfoExtractor): | |||||||
|         response = opener.open(HeadRequest(url)) |         response = opener.open(HeadRequest(url)) | ||||||
|         if response is None: |         if response is None: | ||||||
|             raise ExtractorError(u'Invalid URL protocol') |             raise ExtractorError(u'Invalid URL protocol') | ||||||
|         new_url = response.geturl() |         return response | ||||||
|  |  | ||||||
|         if url == new_url: |  | ||||||
|             return False |  | ||||||
|  |  | ||||||
|         self.report_following_redirect(new_url) |  | ||||||
|         return new_url |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         parsed_url = compat_urlparse.urlparse(url) |         parsed_url = compat_urlparse.urlparse(url) | ||||||
|         if not parsed_url.scheme: |         if not parsed_url.scheme: | ||||||
|             self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') |             self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') | ||||||
|             return self.url_result('http://' + url) |             return self.url_result('http://' + url) | ||||||
|  |         video_id = os.path.splitext(url.split('/')[-1])[0] | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             new_url = self._test_redirect(url) |             response = self._send_head(url) | ||||||
|             if new_url: |  | ||||||
|  |             # Check for redirect | ||||||
|  |             new_url = response.geturl() | ||||||
|  |             if url != new_url: | ||||||
|  |                 self.report_following_redirect(new_url) | ||||||
|                 return self.url_result(new_url) |                 return self.url_result(new_url) | ||||||
|  |  | ||||||
|  |             # Check for direct link to a video | ||||||
|  |             content_type = response.headers.get('Content-Type', '') | ||||||
|  |             m = re.match(r'^(?:audio|video)/(?P<format_id>.+)$', content_type) | ||||||
|  |             if m: | ||||||
|  |                 upload_date = response.headers.get('Last-Modified') | ||||||
|  |                 if upload_date: | ||||||
|  |                     upload_date = unified_strdate(upload_date) | ||||||
|  |                 assert (url_basename(url) == 'trailer.mp4') | ||||||
|  |                 return { | ||||||
|  |                     'id': video_id, | ||||||
|  |                     'title': os.path.splitext(url_basename(url))[0], | ||||||
|  |                     'formats': [{ | ||||||
|  |                         'format_id': m.group('format_id'), | ||||||
|  |                         'url': url, | ||||||
|  |                     }], | ||||||
|  |                     'upload_date': upload_date, | ||||||
|  |                 } | ||||||
|  |  | ||||||
|         except compat_urllib_error.HTTPError: |         except compat_urllib_error.HTTPError: | ||||||
|             # This may be a stupid server that doesn't like HEAD, our UA, or so |             # This may be a stupid server that doesn't like HEAD, our UA, or so | ||||||
|             pass |             pass | ||||||
|  |  | ||||||
|         video_id = url.split('/')[-1] |  | ||||||
|         try: |         try: | ||||||
|             webpage = self._download_webpage(url, video_id) |             webpage = self._download_webpage(url, video_id) | ||||||
|         except ValueError: |         except ValueError: | ||||||
|   | |||||||
| @@ -761,12 +761,17 @@ def unified_strdate(date_str): | |||||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', |         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', |         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||||
|         '%Y-%m-%dT%H:%M:%S', |         '%Y-%m-%dT%H:%M:%S', | ||||||
|  |         '%Y-%m-%dT%H:%M:%S', | ||||||
|     ] |     ] | ||||||
|     for expression in format_expressions: |     for expression in format_expressions: | ||||||
|         try: |         try: | ||||||
|             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') |             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||||
|         except: |         except: | ||||||
|             pass |             pass | ||||||
|  |     if upload_date is None: | ||||||
|  |         timetuple = email.utils.parsedate_tz(date_str) | ||||||
|  |         if timetuple: | ||||||
|  |             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') | ||||||
|     return upload_date |     return upload_date | ||||||
|  |  | ||||||
| def determine_ext(url, default_ext=u'unknown_video'): | def determine_ext(url, default_ext=u'unknown_video'): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister