mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Merge pull request #8819 from remitamine/simple-webpage-requests
[extractor/common] simplify using data, headers and query params with _download_* methods
This commit is contained in:
		| @@ -48,6 +48,7 @@ from ..utils import ( | |||||||
|     determine_protocol, |     determine_protocol, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     mimetype2ext, |     mimetype2ext, | ||||||
|  |     update_url_query, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -345,7 +346,7 @@ class InfoExtractor(object): | |||||||
|     def IE_NAME(self): |     def IE_NAME(self): | ||||||
|         return compat_str(type(self).__name__[:-2]) |         return compat_str(type(self).__name__[:-2]) | ||||||
|  |  | ||||||
|     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): |     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None): | ||||||
|         """ Returns the response handle """ |         """ Returns the response handle """ | ||||||
|         if note is None: |         if note is None: | ||||||
|             self.report_download_webpage(video_id) |             self.report_download_webpage(video_id) | ||||||
| @@ -354,6 +355,12 @@ class InfoExtractor(object): | |||||||
|                 self.to_screen('%s' % (note,)) |                 self.to_screen('%s' % (note,)) | ||||||
|             else: |             else: | ||||||
|                 self.to_screen('%s: %s' % (video_id, note)) |                 self.to_screen('%s: %s' % (video_id, note)) | ||||||
|  |         # data, headers and query params will be ignored for `Request` objects | ||||||
|  |         if isinstance(url_or_request, compat_str): | ||||||
|  |             if query: | ||||||
|  |                 url_or_request = update_url_query(url_or_request, query) | ||||||
|  |             if data or headers: | ||||||
|  |                 url_or_request = sanitized_Request(url_or_request, data, headers or {}) | ||||||
|         try: |         try: | ||||||
|             return self._downloader.urlopen(url_or_request) |             return self._downloader.urlopen(url_or_request) | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
| @@ -369,13 +376,13 @@ class InfoExtractor(object): | |||||||
|                 self._downloader.report_warning(errmsg) |                 self._downloader.report_warning(errmsg) | ||||||
|                 return False |                 return False | ||||||
|  |  | ||||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None): |     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None): | ||||||
|         """ Returns a tuple (page content as string, URL handle) """ |         """ Returns a tuple (page content as string, URL handle) """ | ||||||
|         # Strip hashes from the URL (#1038) |         # Strip hashes from the URL (#1038) | ||||||
|         if isinstance(url_or_request, (compat_str, str)): |         if isinstance(url_or_request, (compat_str, str)): | ||||||
|             url_or_request = url_or_request.partition('#')[0] |             url_or_request = url_or_request.partition('#')[0] | ||||||
|  |  | ||||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal) |         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) | ||||||
|         if urlh is False: |         if urlh is False: | ||||||
|             assert not fatal |             assert not fatal | ||||||
|             return False |             return False | ||||||
| @@ -462,13 +469,13 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|         return content |         return content | ||||||
|  |  | ||||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): |     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None): | ||||||
|         """ Returns the data of the page as a string """ |         """ Returns the data of the page as a string """ | ||||||
|         success = False |         success = False | ||||||
|         try_count = 0 |         try_count = 0 | ||||||
|         while success is False: |         while success is False: | ||||||
|             try: |             try: | ||||||
|                 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding) |                 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query) | ||||||
|                 success = True |                 success = True | ||||||
|             except compat_http_client.IncompleteRead as e: |             except compat_http_client.IncompleteRead as e: | ||||||
|                 try_count += 1 |                 try_count += 1 | ||||||
| @@ -483,10 +490,10 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|     def _download_xml(self, url_or_request, video_id, |     def _download_xml(self, url_or_request, video_id, | ||||||
|                       note='Downloading XML', errnote='Unable to download XML', |                       note='Downloading XML', errnote='Unable to download XML', | ||||||
|                       transform_source=None, fatal=True, encoding=None): |                       transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None): | ||||||
|         """Return the xml as an xml.etree.ElementTree.Element""" |         """Return the xml as an xml.etree.ElementTree.Element""" | ||||||
|         xml_string = self._download_webpage( |         xml_string = self._download_webpage( | ||||||
|             url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding) |             url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) | ||||||
|         if xml_string is False: |         if xml_string is False: | ||||||
|             return xml_string |             return xml_string | ||||||
|         if transform_source: |         if transform_source: | ||||||
| @@ -497,10 +504,10 @@ class InfoExtractor(object): | |||||||
|                        note='Downloading JSON metadata', |                        note='Downloading JSON metadata', | ||||||
|                        errnote='Unable to download JSON metadata', |                        errnote='Unable to download JSON metadata', | ||||||
|                        transform_source=None, |                        transform_source=None, | ||||||
|                        fatal=True, encoding=None): |                        fatal=True, encoding=None, data=None, headers=None, query=None): | ||||||
|         json_string = self._download_webpage( |         json_string = self._download_webpage( | ||||||
|             url_or_request, video_id, note, errnote, fatal=fatal, |             url_or_request, video_id, note, errnote, fatal=fatal, | ||||||
|             encoding=encoding) |             encoding=encoding, data=data, headers=headers, query=query) | ||||||
|         if (not fatal) and json_string is False: |         if (not fatal) and json_string is False: | ||||||
|             return None |             return None | ||||||
|         return self._parse_json( |         return self._parse_json( | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 remitamine
					remitamine