mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[generic] Handle audio streams that do not implement HEAD (Fixes #4032)
This commit is contained in:
		| @@ -242,7 +242,6 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): |     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||||
|         """ Returns a tuple (page content as string, URL handle) """ |         """ Returns a tuple (page content as string, URL handle) """ | ||||||
|  |  | ||||||
|         # Strip hashes from the URL (#1038) |         # Strip hashes from the URL (#1038) | ||||||
|         if isinstance(url_or_request, (compat_str, str)): |         if isinstance(url_or_request, (compat_str, str)): | ||||||
|             url_or_request = url_or_request.partition('#')[0] |             url_or_request = url_or_request.partition('#')[0] | ||||||
| @@ -251,6 +250,10 @@ class InfoExtractor(object): | |||||||
|         if urlh is False: |         if urlh is False: | ||||||
|             assert not fatal |             assert not fatal | ||||||
|             return False |             return False | ||||||
|  |         content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) | ||||||
|  |         return (content, urlh) | ||||||
|  |  | ||||||
|  |     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||||
|         content_type = urlh.headers.get('Content-Type', '') |         content_type = urlh.headers.get('Content-Type', '') | ||||||
|         webpage_bytes = urlh.read() |         webpage_bytes = urlh.read() | ||||||
|         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) |         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) | ||||||
| @@ -309,7 +312,7 @@ class InfoExtractor(object): | |||||||
|                 msg += ' Visit %s for more details' % blocked_iframe |                 msg += ' Visit %s for more details' % blocked_iframe | ||||||
|             raise ExtractorError(msg, expected=True) |             raise ExtractorError(msg, expected=True) | ||||||
|  |  | ||||||
|         return (content, urlh) |         return content | ||||||
|  |  | ||||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): |     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||||
|         """ Returns the data of the page as a string """ |         """ Returns the data of the page as a string """ | ||||||
|   | |||||||
| @@ -503,14 +503,14 @@ class GenericIE(InfoExtractor): | |||||||
|         self.to_screen('%s: Requesting header' % video_id) |         self.to_screen('%s: Requesting header' % video_id) | ||||||
|  |  | ||||||
|         head_req = HEADRequest(url) |         head_req = HEADRequest(url) | ||||||
|         response = self._request_webpage( |         head_response = self._request_webpage( | ||||||
|             head_req, video_id, |             head_req, video_id, | ||||||
|             note=False, errnote='Could not send HEAD request to %s' % url, |             note=False, errnote='Could not send HEAD request to %s' % url, | ||||||
|             fatal=False) |             fatal=False) | ||||||
|  |  | ||||||
|         if response is not False: |         if head_response is not False: | ||||||
|             # Check for redirect |             # Check for redirect | ||||||
|             new_url = response.geturl() |             new_url = head_response.geturl() | ||||||
|             if url != new_url: |             if url != new_url: | ||||||
|                 self.report_following_redirect(new_url) |                 self.report_following_redirect(new_url) | ||||||
|                 if force_videoid: |                 if force_videoid: | ||||||
| @@ -518,13 +518,17 @@ class GenericIE(InfoExtractor): | |||||||
|                         new_url, {'force_videoid': force_videoid}) |                         new_url, {'force_videoid': force_videoid}) | ||||||
|                 return self.url_result(new_url) |                 return self.url_result(new_url) | ||||||
|  |  | ||||||
|  |         full_response = None | ||||||
|  |         if head_response is False: | ||||||
|  |             full_response = self._request_webpage(url, video_id) | ||||||
|  |             head_response = full_response | ||||||
|  |  | ||||||
|         # Check for direct link to a video |         # Check for direct link to a video | ||||||
|             content_type = response.headers.get('Content-Type', '') |         content_type = head_response.headers.get('Content-Type', '') | ||||||
|         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type) |         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type) | ||||||
|         if m: |         if m: | ||||||
|                 upload_date = response.headers.get('Last-Modified') |             upload_date = unified_strdate( | ||||||
|                 if upload_date: |                 head_response.headers.get('Last-Modified')) | ||||||
|                     upload_date = unified_strdate(upload_date) |  | ||||||
|             return { |             return { | ||||||
|                 'id': video_id, |                 'id': video_id, | ||||||
|                 'title': os.path.splitext(url_basename(url))[0], |                 'title': os.path.splitext(url_basename(url))[0], | ||||||
| @@ -539,13 +543,10 @@ class GenericIE(InfoExtractor): | |||||||
|         if not self._downloader.params.get('test', False) and not is_intentional: |         if not self._downloader.params.get('test', False) and not is_intentional: | ||||||
|             self._downloader.report_warning('Falling back on generic information extractor.') |             self._downloader.report_warning('Falling back on generic information extractor.') | ||||||
|  |  | ||||||
|         try: |         if full_response: | ||||||
|  |             webpage = _webpage_read_content(url, video_id) | ||||||
|  |         else: | ||||||
|             webpage = self._download_webpage(url, video_id) |             webpage = self._download_webpage(url, video_id) | ||||||
|         except ValueError: |  | ||||||
|             # since this is the last-resort InfoExtractor, if |  | ||||||
|             # this error is thrown, it'll be thrown here |  | ||||||
|             raise ExtractorError('Failed to download URL: %s' % url) |  | ||||||
|  |  | ||||||
|         self.report_extraction(video_id) |         self.report_extraction(video_id) | ||||||
|  |  | ||||||
|         # Is it an RSS feed? |         # Is it an RSS feed? | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister