mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[generic] Handle audio streams that do not implement HEAD (Fixes #4032)
This commit is contained in:
		| @@ -242,7 +242,6 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||
|         """ Returns a tuple (page content as string, URL handle) """ | ||||
|  | ||||
|         # Strip hashes from the URL (#1038) | ||||
|         if isinstance(url_or_request, (compat_str, str)): | ||||
|             url_or_request = url_or_request.partition('#')[0] | ||||
| @@ -251,6 +250,10 @@ class InfoExtractor(object): | ||||
|         if urlh is False: | ||||
|             assert not fatal | ||||
|             return False | ||||
|         content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) | ||||
|         return (content, urlh) | ||||
|  | ||||
|     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||
|         content_type = urlh.headers.get('Content-Type', '') | ||||
|         webpage_bytes = urlh.read() | ||||
|         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) | ||||
| @@ -309,7 +312,7 @@ class InfoExtractor(object): | ||||
|                 msg += ' Visit %s for more details' % blocked_iframe | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         return (content, urlh) | ||||
|         return content | ||||
|  | ||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||
|         """ Returns the data of the page as a string """ | ||||
|   | ||||
| @@ -503,14 +503,14 @@ class GenericIE(InfoExtractor): | ||||
|         self.to_screen('%s: Requesting header' % video_id) | ||||
|  | ||||
|         head_req = HEADRequest(url) | ||||
|         response = self._request_webpage( | ||||
|         head_response = self._request_webpage( | ||||
|             head_req, video_id, | ||||
|             note=False, errnote='Could not send HEAD request to %s' % url, | ||||
|             fatal=False) | ||||
|  | ||||
|         if response is not False: | ||||
|         if head_response is not False: | ||||
|             # Check for redirect | ||||
|             new_url = response.geturl() | ||||
|             new_url = head_response.geturl() | ||||
|             if url != new_url: | ||||
|                 self.report_following_redirect(new_url) | ||||
|                 if force_videoid: | ||||
| @@ -518,13 +518,17 @@ class GenericIE(InfoExtractor): | ||||
|                         new_url, {'force_videoid': force_videoid}) | ||||
|                 return self.url_result(new_url) | ||||
|  | ||||
|         full_response = None | ||||
|         if head_response is False: | ||||
|             full_response = self._request_webpage(url, video_id) | ||||
|             head_response = full_response | ||||
|  | ||||
|         # Check for direct link to a video | ||||
|             content_type = response.headers.get('Content-Type', '') | ||||
|         content_type = head_response.headers.get('Content-Type', '') | ||||
|         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type) | ||||
|         if m: | ||||
|                 upload_date = response.headers.get('Last-Modified') | ||||
|                 if upload_date: | ||||
|                     upload_date = unified_strdate(upload_date) | ||||
|             upload_date = unified_strdate( | ||||
|                 head_response.headers.get('Last-Modified')) | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': os.path.splitext(url_basename(url))[0], | ||||
| @@ -539,13 +543,10 @@ class GenericIE(InfoExtractor): | ||||
|         if not self._downloader.params.get('test', False) and not is_intentional: | ||||
|             self._downloader.report_warning('Falling back on generic information extractor.') | ||||
|  | ||||
|         try: | ||||
|         if full_response: | ||||
|             webpage = _webpage_read_content(url, video_id) | ||||
|         else: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|         except ValueError: | ||||
|             # since this is the last-resort InfoExtractor, if | ||||
|             # this error is thrown, it'll be thrown here | ||||
|             raise ExtractorError('Failed to download URL: %s' % url) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Is it an RSS feed? | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister