mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[extractor/generic] Remove HEAD request
This commit is contained in:
		| @@ -111,7 +111,6 @@ from ..compat import compat_etree_fromstring | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     KNOWN_EXTENSIONS, |     KNOWN_EXTENSIONS, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     HEADRequest, |  | ||||||
|     UnsupportedError, |     UnsupportedError, | ||||||
|     determine_ext, |     determine_ext, | ||||||
|     dict_get, |     dict_get, | ||||||
| @@ -124,7 +123,6 @@ from ..utils import ( | |||||||
|     orderedSet, |     orderedSet, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     parse_resolution, |     parse_resolution, | ||||||
|     sanitized_Request, |  | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
|     str_or_none, |     str_or_none, | ||||||
|     try_call, |     try_call, | ||||||
| @@ -2807,49 +2805,30 @@ class GenericIE(InfoExtractor): | |||||||
|         else: |         else: | ||||||
|             video_id = self._generic_id(url) |             video_id = self._generic_id(url) | ||||||
| 
 | 
 | ||||||
|         self.to_screen('%s: Requesting header' % video_id) |         # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) | ||||||
| 
 |         # making it impossible to download only chunk of the file (yet we need only 512kB to | ||||||
|         head_req = HEADRequest(url) |         # test whether it's HTML or not). According to yt-dlp default Accept-Encoding | ||||||
|         head_response = self._request_webpage( |         # that will always result in downloading the whole file that is not desirable. | ||||||
|             head_req, video_id, |         # Therefore for extraction pass we have to override Accept-Encoding to any in order | ||||||
|             note=False, errnote='Could not send HEAD request to %s' % url, |         # to accept raw bytes and being able to download only a chunk. | ||||||
|             fatal=False) |         # It may probably better to solve this by checking Content-Type for application/octet-stream | ||||||
| 
 |         # after a HEAD request, but not sure if we can rely on this. | ||||||
|         if head_response is not False: |         full_response = self._request_webpage(url, video_id, headers={'Accept-Encoding': '*'}) | ||||||
|             # Check for redirect |         new_url = full_response.geturl() | ||||||
|             new_url = head_response.geturl() |         if url != new_url: | ||||||
|             if url != new_url: |             self.report_following_redirect(new_url) | ||||||
|                 self.report_following_redirect(new_url) |             if force_videoid: | ||||||
|                 if force_videoid: |                 new_url = smuggle_url(new_url, {'force_videoid': force_videoid}) | ||||||
|                     new_url = smuggle_url( |             return self.url_result(new_url) | ||||||
|                         new_url, {'force_videoid': force_videoid}) |  | ||||||
|                 return self.url_result(new_url) |  | ||||||
| 
 |  | ||||||
|         def request_webpage(): |  | ||||||
|             request = sanitized_Request(url) |  | ||||||
|             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) |  | ||||||
|             # making it impossible to download only chunk of the file (yet we need only 512kB to |  | ||||||
|             # test whether it's HTML or not). According to yt-dlp default Accept-Encoding |  | ||||||
|             # that will always result in downloading the whole file that is not desirable. |  | ||||||
|             # Therefore for extraction pass we have to override Accept-Encoding to any in order |  | ||||||
|             # to accept raw bytes and being able to download only a chunk. |  | ||||||
|             # It may probably better to solve this by checking Content-Type for application/octet-stream |  | ||||||
|             # after HEAD request finishes, but not sure if we can rely on this. |  | ||||||
|             request.add_header('Accept-Encoding', '*') |  | ||||||
|             return self._request_webpage(request, video_id) |  | ||||||
| 
 |  | ||||||
|         full_response = None |  | ||||||
|         if head_response is False: |  | ||||||
|             head_response = full_response = request_webpage() |  | ||||||
| 
 | 
 | ||||||
|         info_dict = { |         info_dict = { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': self._generic_title(url), |             'title': self._generic_title(url), | ||||||
|             'timestamp': unified_timestamp(head_response.headers.get('Last-Modified')) |             'timestamp': unified_timestamp(full_response.headers.get('Last-Modified')) | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         # Check for direct link to a video |         # Check for direct link to a video | ||||||
|         content_type = head_response.headers.get('Content-Type', '').lower() |         content_type = full_response.headers.get('Content-Type', '').lower() | ||||||
|         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type) |         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type) | ||||||
|         if m: |         if m: | ||||||
|             self.report_detected('direct video link') |             self.report_detected('direct video link') | ||||||
| @@ -2878,7 +2857,6 @@ class GenericIE(InfoExtractor): | |||||||
|             self.report_warning( |             self.report_warning( | ||||||
|                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) |                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) | ||||||
| 
 | 
 | ||||||
|         full_response = full_response or request_webpage() |  | ||||||
|         first_bytes = full_response.read(512) |         first_bytes = full_response.read(512) | ||||||
| 
 | 
 | ||||||
|         # Is it an M3U playlist? |         # Is it an M3U playlist? | ||||||
| @@ -4103,7 +4081,7 @@ class GenericIE(InfoExtractor): | |||||||
|                 webpage) |                 webpage) | ||||||
|             if not found: |             if not found: | ||||||
|                 # Look also in Refresh HTTP header |                 # Look also in Refresh HTTP header | ||||||
|                 refresh_header = head_response.headers.get('Refresh') |                 refresh_header = full_response.headers.get('Refresh') | ||||||
|                 if refresh_header: |                 if refresh_header: | ||||||
|                     found = re.search(REDIRECT_REGEX, refresh_header) |                     found = re.search(REDIRECT_REGEX, refresh_header) | ||||||
|             if found: |             if found: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan