mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	| @@ -296,9 +296,11 @@ class InfoExtractor(object): | ||||
|         content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) | ||||
|         return (content, urlh) | ||||
|  | ||||
|     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||
|     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None): | ||||
|         content_type = urlh.headers.get('Content-Type', '') | ||||
|         webpage_bytes = urlh.read() | ||||
|         if prefix is not None: | ||||
|             webpage_bytes = prefix + webpage_bytes | ||||
|         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) | ||||
|         if m: | ||||
|             encoding = m.group(1) | ||||
|   | ||||
| @@ -452,7 +452,23 @@ class GenericIE(InfoExtractor): | ||||
|                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', | ||||
|             }, | ||||
|             'playlist_mincount': 2, | ||||
|         }, | ||||
|         # Direct link with incorrect MIME type | ||||
|         { | ||||
|             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', | ||||
|             'md5': '4ccbebe5f36706d85221f204d7eb5913', | ||||
|             'info_dict': { | ||||
|                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', | ||||
|                 'id': '5_Lennart_Poettering_-_Systemd', | ||||
|                 'ext': 'webm', | ||||
|                 'title': '5_Lennart_Poettering_-_Systemd', | ||||
|                 'upload_date': '20141120', | ||||
|             }, | ||||
|             'expected_warnings': [ | ||||
|                 'URL could be a direct video link, returning it as such.' | ||||
|             ] | ||||
|         } | ||||
|  | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -606,10 +622,28 @@ class GenericIE(InfoExtractor): | ||||
|         if not self._downloader.params.get('test', False) and not is_intentional: | ||||
|             self._downloader.report_warning('Falling back on generic information extractor.') | ||||
|  | ||||
|         if full_response: | ||||
|             webpage = self._webpage_read_content(full_response, url, video_id) | ||||
|         else: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|         if not full_response: | ||||
|             full_response = self._request_webpage(url, video_id) | ||||
|  | ||||
|         # Maybe it's a direct link to a video? | ||||
|         # Be careful not to download the whole thing! | ||||
|         first_bytes = full_response.read(512) | ||||
|         if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')): | ||||
|             self._downloader.report_warning( | ||||
|                 'URL could be a direct video link, returning it as such.') | ||||
|             upload_date = unified_strdate( | ||||
|                 head_response.headers.get('Last-Modified')) | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': os.path.splitext(url_basename(url))[0], | ||||
|                 'direct': True, | ||||
|                 'url': url, | ||||
|                 'upload_date': upload_date, | ||||
|             } | ||||
|  | ||||
|         webpage = self._webpage_read_content( | ||||
|             full_response, url, video_id, prefix=first_bytes) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Is it an RSS feed? | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister