mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[networking] Add module (#2861)
No actual changes - code is only moved around
This commit is contained in:
		
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -74,7 +74,7 @@ offlinetest: codetest | |||||||
| 	$(PYTHON) -m pytest -k "not download" | 	$(PYTHON) -m pytest -k "not download" | ||||||
|  |  | ||||||
| # XXX: This is hard to maintain | # XXX: This is hard to maintain | ||||||
| CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies | CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking | ||||||
| yt-dlp: yt_dlp/*.py yt_dlp/*/*.py | yt-dlp: yt_dlp/*.py yt_dlp/*/*.py | ||||||
| 	mkdir -p zip | 	mkdir -p zip | ||||||
| 	for d in $(CODE_FOLDERS) ; do \ | 	for d in $(CODE_FOLDERS) ; do \ | ||||||
|   | |||||||
| @@ -54,6 +54,7 @@ class CommitGroup(enum.Enum): | |||||||
|                     'core', |                     'core', | ||||||
|                     'dependencies', |                     'dependencies', | ||||||
|                     'jsinterp', |                     'jsinterp', | ||||||
|  |                     'networking', | ||||||
|                     'outtmpl', |                     'outtmpl', | ||||||
|                     'formats', |                     'formats', | ||||||
|                     'plugins', |                     'plugins', | ||||||
|   | |||||||
| @@ -258,15 +258,6 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') |         self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') | ||||||
|         self.assertEqual(sanitize_url('foo bar'), 'foo bar') |         self.assertEqual(sanitize_url('foo bar'), 'foo bar') | ||||||
| 
 | 
 | ||||||
|     def test_extract_basic_auth(self): |  | ||||||
|         auth_header = lambda url: sanitized_Request(url).get_header('Authorization') |  | ||||||
|         self.assertFalse(auth_header('http://foo.bar')) |  | ||||||
|         self.assertFalse(auth_header('http://:foo.bar')) |  | ||||||
|         self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==') |  | ||||||
|         self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=') |  | ||||||
|         self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=') |  | ||||||
|         self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz') |  | ||||||
| 
 |  | ||||||
|     def test_expand_path(self): |     def test_expand_path(self): | ||||||
|         def env(var): |         def env(var): | ||||||
|             return f'%{var}%' if sys.platform == 'win32' else f'${var}' |             return f'%{var}%' if sys.platform == 'win32' else f'${var}' | ||||||
| @@ -2324,6 +2315,15 @@ Line 1 | |||||||
|         self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], |         self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], | ||||||
|                          msg='function on a `re.Match` should give group name as well') |                          msg='function on a `re.Match` should give group name as well') | ||||||
| 
 | 
 | ||||||
|  |     def test_extract_basic_auth(self): | ||||||
|  |         auth_header = lambda url: sanitized_Request(url).get_header('Authorization') | ||||||
|  |         self.assertFalse(auth_header('http://foo.bar')) | ||||||
|  |         self.assertFalse(auth_header('http://:foo.bar')) | ||||||
|  |         self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==') | ||||||
|  |         self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=') | ||||||
|  |         self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=') | ||||||
|  |         self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz') | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -151,6 +151,7 @@ from .utils import ( | |||||||
|     write_json_file, |     write_json_file, | ||||||
|     write_string, |     write_string, | ||||||
| ) | ) | ||||||
|  | from .utils.networking import clean_headers | ||||||
| from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__ | from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__ | ||||||
| 
 | 
 | ||||||
| if compat_os_name == 'nt': | if compat_os_name == 'nt': | ||||||
| @@ -672,6 +673,7 @@ class YoutubeDL: | |||||||
|                     raise |                     raise | ||||||
| 
 | 
 | ||||||
|         self.params['compat_opts'] = set(self.params.get('compat_opts', ())) |         self.params['compat_opts'] = set(self.params.get('compat_opts', ())) | ||||||
|  |         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {})) | ||||||
|         if auto_init and auto_init != 'no_verbose_header': |         if auto_init and auto_init != 'no_verbose_header': | ||||||
|             self.print_debug_header() |             self.print_debug_header() | ||||||
| 
 | 
 | ||||||
| @@ -745,9 +747,6 @@ class YoutubeDL: | |||||||
|             else self.params['format'] if callable(self.params['format']) |             else self.params['format'] if callable(self.params['format']) | ||||||
|             else self.build_format_selector(self.params['format'])) |             else self.build_format_selector(self.params['format'])) | ||||||
| 
 | 
 | ||||||
|         # Set http_headers defaults according to std_headers |  | ||||||
|         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {})) |  | ||||||
| 
 |  | ||||||
|         hooks = { |         hooks = { | ||||||
|             'post_hooks': self.add_post_hook, |             'post_hooks': self.add_post_hook, | ||||||
|             'progress_hooks': self.add_progress_hook, |             'progress_hooks': self.add_progress_hook, | ||||||
| @@ -941,12 +940,14 @@ class YoutubeDL: | |||||||
|         self.save_console_title() |         self.save_console_title() | ||||||
|         return self |         return self | ||||||
| 
 | 
 | ||||||
|     def __exit__(self, *args): |     def save_cookies(self): | ||||||
|         self.restore_console_title() |  | ||||||
| 
 |  | ||||||
|         if self.params.get('cookiefile') is not None: |         if self.params.get('cookiefile') is not None: | ||||||
|             self.cookiejar.save(ignore_discard=True, ignore_expires=True) |             self.cookiejar.save(ignore_discard=True, ignore_expires=True) | ||||||
| 
 | 
 | ||||||
|  |     def __exit__(self, *args): | ||||||
|  |         self.restore_console_title() | ||||||
|  |         self.save_cookies() | ||||||
|  | 
 | ||||||
|     def trouble(self, message=None, tb=None, is_error=True): |     def trouble(self, message=None, tb=None, is_error=True): | ||||||
|         """Determine action to take when a download problem appears. |         """Determine action to take when a download problem appears. | ||||||
| 
 | 
 | ||||||
| @@ -2468,9 +2469,7 @@ class YoutubeDL: | |||||||
| 
 | 
 | ||||||
|     def _calc_headers(self, info_dict): |     def _calc_headers(self, info_dict): | ||||||
|         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {}) |         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {}) | ||||||
|         if 'Youtubedl-No-Compression' in res:  # deprecated |         clean_headers(res) | ||||||
|             res.pop('Youtubedl-No-Compression', None) |  | ||||||
|             res['Accept-Encoding'] = 'identity' |  | ||||||
|         cookies = self.cookiejar.get_cookies_for_url(info_dict['url']) |         cookies = self.cookiejar.get_cookies_for_url(info_dict['url']) | ||||||
|         if cookies: |         if cookies: | ||||||
|             encoder = LenientSimpleCookie() |             encoder = LenientSimpleCookie() | ||||||
| @@ -3856,12 +3855,6 @@ class YoutubeDL: | |||||||
|     def list_subtitles(self, video_id, subtitles, name='subtitles'): |     def list_subtitles(self, video_id, subtitles, name='subtitles'): | ||||||
|         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) |         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) | ||||||
| 
 | 
 | ||||||
|     def urlopen(self, req): |  | ||||||
|         """ Start an HTTP download """ |  | ||||||
|         if isinstance(req, str): |  | ||||||
|             req = sanitized_Request(req) |  | ||||||
|         return self._opener.open(req, timeout=self._socket_timeout) |  | ||||||
| 
 |  | ||||||
|     def print_debug_header(self): |     def print_debug_header(self): | ||||||
|         if not self.params.get('verbose'): |         if not self.params.get('verbose'): | ||||||
|             return |             return | ||||||
| @@ -3989,13 +3982,8 @@ class YoutubeDL: | |||||||
|             return |             return | ||||||
|         timeout_val = self.params.get('socket_timeout') |         timeout_val = self.params.get('socket_timeout') | ||||||
|         self._socket_timeout = 20 if timeout_val is None else float(timeout_val) |         self._socket_timeout = 20 if timeout_val is None else float(timeout_val) | ||||||
| 
 |  | ||||||
|         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') |  | ||||||
|         opts_cookiefile = self.params.get('cookiefile') |  | ||||||
|         opts_proxy = self.params.get('proxy') |         opts_proxy = self.params.get('proxy') | ||||||
| 
 | 
 | ||||||
|         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self) |  | ||||||
| 
 |  | ||||||
|         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) |         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) | ||||||
|         if opts_proxy is not None: |         if opts_proxy is not None: | ||||||
|             if opts_proxy == '': |             if opts_proxy == '': | ||||||
| @@ -4037,6 +4025,18 @@ class YoutubeDL: | |||||||
|         opener.addheaders = [] |         opener.addheaders = [] | ||||||
|         self._opener = opener |         self._opener = opener | ||||||
| 
 | 
 | ||||||
|  |     @functools.cached_property | ||||||
|  |     def cookiejar(self): | ||||||
|  |         """Global cookiejar instance""" | ||||||
|  |         return load_cookies( | ||||||
|  |             self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) | ||||||
|  | 
 | ||||||
|  |     def urlopen(self, req): | ||||||
|  |         """ Start an HTTP download """ | ||||||
|  |         if isinstance(req, str): | ||||||
|  |             req = sanitized_Request(req) | ||||||
|  |         return self._opener.open(req, timeout=self._socket_timeout) | ||||||
|  | 
 | ||||||
|     def encode(self, s): |     def encode(self, s): | ||||||
|         if isinstance(s, bytes): |         if isinstance(s, bytes): | ||||||
|             return s  # Already encoded |             return s  # Already encoded | ||||||
|   | |||||||
							
								
								
									
										0
									
								
								yt_dlp/networking/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								yt_dlp/networking/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										139
									
								
								yt_dlp/networking/_helper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								yt_dlp/networking/_helper.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,139 @@ | |||||||
|  | from __future__ import annotations | ||||||
|  | 
 | ||||||
|  | import contextlib | ||||||
|  | import ssl | ||||||
|  | import sys | ||||||
|  | import urllib.parse | ||||||
|  | 
 | ||||||
|  | from ..dependencies import certifi | ||||||
|  | from ..socks import ProxyType | ||||||
|  | from ..utils import YoutubeDLError | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def ssl_load_certs(context: ssl.SSLContext, use_certifi=True): | ||||||
|  |     if certifi and use_certifi: | ||||||
|  |         context.load_verify_locations(cafile=certifi.where()) | ||||||
|  |     else: | ||||||
|  |         try: | ||||||
|  |             context.load_default_certs() | ||||||
|  |         # Work around the issue in load_default_certs when there are bad certificates. See: | ||||||
|  |         # https://github.com/yt-dlp/yt-dlp/issues/1060, | ||||||
|  |         # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312 | ||||||
|  |         except ssl.SSLError: | ||||||
|  |             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 | ||||||
|  |             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): | ||||||
|  |                 for storename in ('CA', 'ROOT'): | ||||||
|  |                     _ssl_load_windows_store_certs(context, storename) | ||||||
|  |             context.set_default_verify_paths() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _ssl_load_windows_store_certs(ssl_context, storename): | ||||||
|  |     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py | ||||||
|  |     try: | ||||||
|  |         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename) | ||||||
|  |                  if encoding == 'x509_asn' and ( | ||||||
|  |                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)] | ||||||
|  |     except PermissionError: | ||||||
|  |         return | ||||||
|  |     for cert in certs: | ||||||
|  |         with contextlib.suppress(ssl.SSLError): | ||||||
|  |             ssl_context.load_verify_locations(cadata=cert) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def make_socks_proxy_opts(socks_proxy): | ||||||
|  |     url_components = urllib.parse.urlparse(socks_proxy) | ||||||
|  |     if url_components.scheme.lower() == 'socks5': | ||||||
|  |         socks_type = ProxyType.SOCKS5 | ||||||
|  |     elif url_components.scheme.lower() in ('socks', 'socks4'): | ||||||
|  |         socks_type = ProxyType.SOCKS4 | ||||||
|  |     elif url_components.scheme.lower() == 'socks4a': | ||||||
|  |         socks_type = ProxyType.SOCKS4A | ||||||
|  | 
 | ||||||
|  |     def unquote_if_non_empty(s): | ||||||
|  |         if not s: | ||||||
|  |             return s | ||||||
|  |         return urllib.parse.unquote_plus(s) | ||||||
|  |     return { | ||||||
|  |         'proxytype': socks_type, | ||||||
|  |         'addr': url_components.hostname, | ||||||
|  |         'port': url_components.port or 1080, | ||||||
|  |         'rdns': True, | ||||||
|  |         'username': unquote_if_non_empty(url_components.username), | ||||||
|  |         'password': unquote_if_non_empty(url_components.password), | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_redirect_method(method, status): | ||||||
|  |     """Unified redirect method handling""" | ||||||
|  | 
 | ||||||
|  |     # A 303 must either use GET or HEAD for subsequent request | ||||||
|  |     # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 | ||||||
|  |     if status == 303 and method != 'HEAD': | ||||||
|  |         method = 'GET' | ||||||
|  |     # 301 and 302 redirects are commonly turned into a GET from a POST | ||||||
|  |     # for subsequent requests by browsers, so we'll do the same. | ||||||
|  |     # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 | ||||||
|  |     # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 | ||||||
|  |     if status in (301, 302) and method == 'POST': | ||||||
|  |         method = 'GET' | ||||||
|  |     return method | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def make_ssl_context( | ||||||
|  |     verify=True, | ||||||
|  |     client_certificate=None, | ||||||
|  |     client_certificate_key=None, | ||||||
|  |     client_certificate_password=None, | ||||||
|  |     legacy_support=False, | ||||||
|  |     use_certifi=True, | ||||||
|  | ): | ||||||
|  |     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) | ||||||
|  |     context.check_hostname = verify | ||||||
|  |     context.verify_mode = ssl.CERT_REQUIRED if verify else ssl.CERT_NONE | ||||||
|  | 
 | ||||||
|  |     # Some servers may reject requests if ALPN extension is not sent. See: | ||||||
|  |     # https://github.com/python/cpython/issues/85140 | ||||||
|  |     # https://github.com/yt-dlp/yt-dlp/issues/3878 | ||||||
|  |     with contextlib.suppress(NotImplementedError): | ||||||
|  |         context.set_alpn_protocols(['http/1.1']) | ||||||
|  |     if verify: | ||||||
|  |         ssl_load_certs(context, use_certifi) | ||||||
|  | 
 | ||||||
|  |     if legacy_support: | ||||||
|  |         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT | ||||||
|  |         context.set_ciphers('DEFAULT')  # compat | ||||||
|  | 
 | ||||||
|  |     elif ssl.OPENSSL_VERSION_INFO >= (1, 1, 1) and not ssl.OPENSSL_VERSION.startswith('LibreSSL'): | ||||||
|  |         # Use the default SSL ciphers and minimum TLS version settings from Python 3.10 [1]. | ||||||
|  |         # This is to ensure consistent behavior across Python versions and libraries, and help avoid fingerprinting | ||||||
|  |         # in some situations [2][3]. | ||||||
|  |         # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely | ||||||
|  |         # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe. | ||||||
|  |         # LibreSSL is excluded until further investigation due to cipher support issues [5][6]. | ||||||
|  |         # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536 | ||||||
|  |         # 2. https://github.com/yt-dlp/yt-dlp/issues/4627 | ||||||
|  |         # 3. https://github.com/yt-dlp/yt-dlp/pull/5294 | ||||||
|  |         # 4. https://peps.python.org/pep-0644/ | ||||||
|  |         # 5. https://peps.python.org/pep-0644/#libressl-support | ||||||
|  |         # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368 | ||||||
|  |         context.set_ciphers( | ||||||
|  |             '@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM') | ||||||
|  |         context.minimum_version = ssl.TLSVersion.TLSv1_2 | ||||||
|  | 
 | ||||||
|  |     if client_certificate: | ||||||
|  |         try: | ||||||
|  |             context.load_cert_chain( | ||||||
|  |                 client_certificate, keyfile=client_certificate_key, | ||||||
|  |                 password=client_certificate_password) | ||||||
|  |         except ssl.SSLError: | ||||||
|  |             raise YoutubeDLError('Unable to load client certificate') | ||||||
|  | 
 | ||||||
|  |     return context | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def add_accept_encoding_header(headers, supported_encodings): | ||||||
|  |     if supported_encodings and 'Accept-Encoding' not in headers: | ||||||
|  |         headers['Accept-Encoding'] = ', '.join(supported_encodings) | ||||||
|  | 
 | ||||||
|  |     elif 'Accept-Encoding' not in headers: | ||||||
|  |         headers['Accept-Encoding'] = 'identity' | ||||||
							
								
								
									
										315
									
								
								yt_dlp/networking/_urllib.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										315
									
								
								yt_dlp/networking/_urllib.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,315 @@ | |||||||
|  | import functools | ||||||
|  | import gzip | ||||||
|  | import http.client | ||||||
|  | import io | ||||||
|  | import socket | ||||||
|  | import ssl | ||||||
|  | import urllib.error | ||||||
|  | import urllib.parse | ||||||
|  | import urllib.request | ||||||
|  | import urllib.response | ||||||
|  | import zlib | ||||||
|  | 
 | ||||||
|  | from ._helper import ( | ||||||
|  |     add_accept_encoding_header, | ||||||
|  |     get_redirect_method, | ||||||
|  |     make_socks_proxy_opts, | ||||||
|  | ) | ||||||
|  | from ..dependencies import brotli | ||||||
|  | from ..socks import sockssocket | ||||||
|  | from ..utils import escape_url, update_url_query | ||||||
|  | from ..utils.networking import clean_headers, std_headers | ||||||
|  | 
 | ||||||
|  | SUPPORTED_ENCODINGS = ['gzip', 'deflate'] | ||||||
|  | 
 | ||||||
|  | if brotli: | ||||||
|  |     SUPPORTED_ENCODINGS.append('br') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): | ||||||
|  |     hc = http_class(*args, **kwargs) | ||||||
|  |     source_address = ydl_handler._params.get('source_address') | ||||||
|  | 
 | ||||||
|  |     if source_address is not None: | ||||||
|  |         # This is to workaround _create_connection() from socket where it will try all | ||||||
|  |         # address data from getaddrinfo() including IPv6. This filters the result from | ||||||
|  |         # getaddrinfo() based on the source_address value. | ||||||
|  |         # This is based on the cpython socket.create_connection() function. | ||||||
|  |         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691 | ||||||
|  |         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): | ||||||
|  |             host, port = address | ||||||
|  |             err = None | ||||||
|  |             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) | ||||||
|  |             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6 | ||||||
|  |             ip_addrs = [addr for addr in addrs if addr[0] == af] | ||||||
|  |             if addrs and not ip_addrs: | ||||||
|  |                 ip_version = 'v4' if af == socket.AF_INET else 'v6' | ||||||
|  |                 raise OSError( | ||||||
|  |                     "No remote IP%s addresses available for connect, can't use '%s' as source address" | ||||||
|  |                     % (ip_version, source_address[0])) | ||||||
|  |             for res in ip_addrs: | ||||||
|  |                 af, socktype, proto, canonname, sa = res | ||||||
|  |                 sock = None | ||||||
|  |                 try: | ||||||
|  |                     sock = socket.socket(af, socktype, proto) | ||||||
|  |                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: | ||||||
|  |                         sock.settimeout(timeout) | ||||||
|  |                     sock.bind(source_address) | ||||||
|  |                     sock.connect(sa) | ||||||
|  |                     err = None  # Explicitly break reference cycle | ||||||
|  |                     return sock | ||||||
|  |                 except OSError as _: | ||||||
|  |                     err = _ | ||||||
|  |                     if sock is not None: | ||||||
|  |                         sock.close() | ||||||
|  |             if err is not None: | ||||||
|  |                 raise err | ||||||
|  |             else: | ||||||
|  |                 raise OSError('getaddrinfo returns an empty list') | ||||||
|  |         if hasattr(hc, '_create_connection'): | ||||||
|  |             hc._create_connection = _create_connection | ||||||
|  |         hc.source_address = (source_address, 0) | ||||||
|  | 
 | ||||||
|  |     return hc | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class HTTPHandler(urllib.request.HTTPHandler): | ||||||
|  |     """Handler for HTTP requests and responses. | ||||||
|  | 
 | ||||||
|  |     This class, when installed with an OpenerDirector, automatically adds | ||||||
|  |     the standard headers to every HTTP request and handles gzipped, deflated and | ||||||
|  |     brotli responses from web servers. | ||||||
|  | 
 | ||||||
|  |     Part of this code was copied from: | ||||||
|  | 
 | ||||||
|  |     http://techknack.net/python-urllib2-handlers/ | ||||||
|  | 
 | ||||||
|  |     Andrew Rowls, the author of that code, agreed to release it to the | ||||||
|  |     public domain. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self, params, *args, **kwargs): | ||||||
|  |         urllib.request.HTTPHandler.__init__(self, *args, **kwargs) | ||||||
|  |         self._params = params | ||||||
|  | 
 | ||||||
|  |     def http_open(self, req): | ||||||
|  |         conn_class = http.client.HTTPConnection | ||||||
|  | 
 | ||||||
|  |         socks_proxy = req.headers.get('Ytdl-socks-proxy') | ||||||
|  |         if socks_proxy: | ||||||
|  |             conn_class = make_socks_conn_class(conn_class, socks_proxy) | ||||||
|  |             del req.headers['Ytdl-socks-proxy'] | ||||||
|  | 
 | ||||||
|  |         return self.do_open(functools.partial( | ||||||
|  |             _create_http_connection, self, conn_class, False), | ||||||
|  |             req) | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def deflate(data): | ||||||
|  |         if not data: | ||||||
|  |             return data | ||||||
|  |         try: | ||||||
|  |             return zlib.decompress(data, -zlib.MAX_WBITS) | ||||||
|  |         except zlib.error: | ||||||
|  |             return zlib.decompress(data) | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def brotli(data): | ||||||
|  |         if not data: | ||||||
|  |             return data | ||||||
|  |         return brotli.decompress(data) | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def gz(data): | ||||||
|  |         gz = gzip.GzipFile(fileobj=io.BytesIO(data), mode='rb') | ||||||
|  |         try: | ||||||
|  |             return gz.read() | ||||||
|  |         except OSError as original_oserror: | ||||||
|  |             # There may be junk add the end of the file | ||||||
|  |             # See http://stackoverflow.com/q/4928560/35070 for details | ||||||
|  |             for i in range(1, 1024): | ||||||
|  |                 try: | ||||||
|  |                     gz = gzip.GzipFile(fileobj=io.BytesIO(data[:-i]), mode='rb') | ||||||
|  |                     return gz.read() | ||||||
|  |                 except OSError: | ||||||
|  |                     continue | ||||||
|  |             else: | ||||||
|  |                 raise original_oserror | ||||||
|  | 
 | ||||||
|  |     def http_request(self, req): | ||||||
|  |         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not | ||||||
|  |         # always respected by websites, some tend to give out URLs with non percent-encoded | ||||||
|  |         # non-ASCII characters (see telemb.py, ard.py [#3412]) | ||||||
|  |         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) | ||||||
|  |         # To work around aforementioned issue we will replace request's original URL with | ||||||
|  |         # percent-encoded one | ||||||
|  |         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) | ||||||
|  |         # the code of this workaround has been moved here from YoutubeDL.urlopen() | ||||||
|  |         url = req.get_full_url() | ||||||
|  |         url_escaped = escape_url(url) | ||||||
|  | 
 | ||||||
|  |         # Substitute URL if any change after escaping | ||||||
|  |         if url != url_escaped: | ||||||
|  |             req = update_Request(req, url=url_escaped) | ||||||
|  | 
 | ||||||
|  |         for h, v in self._params.get('http_headers', std_headers).items(): | ||||||
|  |             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 | ||||||
|  |             # The dict keys are capitalized because of this bug by urllib | ||||||
|  |             if h.capitalize() not in req.headers: | ||||||
|  |                 req.add_header(h, v) | ||||||
|  | 
 | ||||||
|  |         clean_headers(req.headers) | ||||||
|  |         add_accept_encoding_header(req.headers, SUPPORTED_ENCODINGS) | ||||||
|  |         return super().do_request_(req) | ||||||
|  | 
 | ||||||
|  |     def http_response(self, req, resp): | ||||||
|  |         old_resp = resp | ||||||
|  | 
 | ||||||
|  |         # Content-Encoding header lists the encodings in order that they were applied [1]. | ||||||
|  |         # To decompress, we simply do the reverse. | ||||||
|  |         # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding | ||||||
|  |         decoded_response = None | ||||||
|  |         for encoding in (e.strip() for e in reversed(resp.headers.get('Content-encoding', '').split(','))): | ||||||
|  |             if encoding == 'gzip': | ||||||
|  |                 decoded_response = self.gz(decoded_response or resp.read()) | ||||||
|  |             elif encoding == 'deflate': | ||||||
|  |                 decoded_response = self.deflate(decoded_response or resp.read()) | ||||||
|  |             elif encoding == 'br' and brotli: | ||||||
|  |                 decoded_response = self.brotli(decoded_response or resp.read()) | ||||||
|  | 
 | ||||||
|  |         if decoded_response is not None: | ||||||
|  |             resp = urllib.request.addinfourl(io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code) | ||||||
|  |             resp.msg = old_resp.msg | ||||||
|  |         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see | ||||||
|  |         # https://github.com/ytdl-org/youtube-dl/issues/6457). | ||||||
|  |         if 300 <= resp.code < 400: | ||||||
|  |             location = resp.headers.get('Location') | ||||||
|  |             if location: | ||||||
|  |                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 | ||||||
|  |                 location = location.encode('iso-8859-1').decode() | ||||||
|  |                 location_escaped = escape_url(location) | ||||||
|  |                 if location != location_escaped: | ||||||
|  |                     del resp.headers['Location'] | ||||||
|  |                     resp.headers['Location'] = location_escaped | ||||||
|  |         return resp | ||||||
|  | 
 | ||||||
|  |     https_request = http_request | ||||||
|  |     https_response = http_response | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def make_socks_conn_class(base_class, socks_proxy): | ||||||
|  |     assert issubclass(base_class, ( | ||||||
|  |         http.client.HTTPConnection, http.client.HTTPSConnection)) | ||||||
|  | 
 | ||||||
|  |     proxy_args = make_socks_proxy_opts(socks_proxy) | ||||||
|  | 
 | ||||||
|  |     class SocksConnection(base_class): | ||||||
|  |         def connect(self): | ||||||
|  |             self.sock = sockssocket() | ||||||
|  |             self.sock.setproxy(**proxy_args) | ||||||
|  |             if isinstance(self.timeout, (int, float)): | ||||||
|  |                 self.sock.settimeout(self.timeout) | ||||||
|  |             self.sock.connect((self.host, self.port)) | ||||||
|  | 
 | ||||||
|  |             if isinstance(self, http.client.HTTPSConnection): | ||||||
|  |                 if hasattr(self, '_context'):  # Python > 2.6 | ||||||
|  |                     self.sock = self._context.wrap_socket( | ||||||
|  |                         self.sock, server_hostname=self.host) | ||||||
|  |                 else: | ||||||
|  |                     self.sock = ssl.wrap_socket(self.sock) | ||||||
|  | 
 | ||||||
|  |     return SocksConnection | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class RedirectHandler(urllib.request.HTTPRedirectHandler): | ||||||
|  |     """YoutubeDL redirect handler | ||||||
|  | 
 | ||||||
|  |     The code is based on HTTPRedirectHandler implementation from CPython [1]. | ||||||
|  | 
 | ||||||
|  |     This redirect handler fixes and improves the logic to better align with RFC7261 | ||||||
|  |      and what browsers tend to do [2][3] | ||||||
|  | 
 | ||||||
|  |     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py | ||||||
|  |     2. https://datatracker.ietf.org/doc/html/rfc7231 | ||||||
|  |     3. https://github.com/python/cpython/issues/91306 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302 | ||||||
|  | 
 | ||||||
|  |     def redirect_request(self, req, fp, code, msg, headers, newurl): | ||||||
|  |         if code not in (301, 302, 303, 307, 308): | ||||||
|  |             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp) | ||||||
|  | 
 | ||||||
|  |         new_data = req.data | ||||||
|  | 
 | ||||||
|  |         # Technically the Cookie header should be in unredirected_hdrs, | ||||||
|  |         # however in practice some may set it in normal headers anyway. | ||||||
|  |         # We will remove it here to prevent any leaks. | ||||||
|  |         remove_headers = ['Cookie'] | ||||||
|  | 
 | ||||||
|  |         new_method = get_redirect_method(req.get_method(), code) | ||||||
|  |         # only remove payload if method changed (e.g. POST to GET) | ||||||
|  |         if new_method != req.get_method(): | ||||||
|  |             new_data = None | ||||||
|  |             remove_headers.extend(['Content-Length', 'Content-Type']) | ||||||
|  | 
 | ||||||
|  |         new_headers = {k: v for k, v in req.headers.items() if k.title() not in remove_headers} | ||||||
|  | 
 | ||||||
|  |         return urllib.request.Request( | ||||||
|  |             newurl, headers=new_headers, origin_req_host=req.origin_req_host, | ||||||
|  |             unverifiable=True, method=new_method, data=new_data) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ProxyHandler(urllib.request.ProxyHandler): | ||||||
|  |     def __init__(self, proxies=None): | ||||||
|  |         # Set default handlers | ||||||
|  |         for type in ('http', 'https'): | ||||||
|  |             setattr(self, '%s_open' % type, | ||||||
|  |                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: | ||||||
|  |                         meth(r, proxy, type)) | ||||||
|  |         urllib.request.ProxyHandler.__init__(self, proxies) | ||||||
|  | 
 | ||||||
|  |     def proxy_open(self, req, proxy, type): | ||||||
|  |         req_proxy = req.headers.get('Ytdl-request-proxy') | ||||||
|  |         if req_proxy is not None: | ||||||
|  |             proxy = req_proxy | ||||||
|  |             del req.headers['Ytdl-request-proxy'] | ||||||
|  | 
 | ||||||
|  |         if proxy == '__noproxy__': | ||||||
|  |             return None  # No Proxy | ||||||
|  |         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): | ||||||
|  |             req.add_header('Ytdl-socks-proxy', proxy) | ||||||
|  |             # yt-dlp's http/https handlers do wrapping the socket with socks | ||||||
|  |             return None | ||||||
|  |         return urllib.request.ProxyHandler.proxy_open( | ||||||
|  |             self, req, proxy, type) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class PUTRequest(urllib.request.Request): | ||||||
|  |     def get_method(self): | ||||||
|  |         return 'PUT' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class HEADRequest(urllib.request.Request): | ||||||
|  |     def get_method(self): | ||||||
|  |         return 'HEAD' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def update_Request(req, url=None, data=None, headers=None, query=None): | ||||||
|  |     req_headers = req.headers.copy() | ||||||
|  |     req_headers.update(headers or {}) | ||||||
|  |     req_data = data or req.data | ||||||
|  |     req_url = update_url_query(url or req.get_full_url(), query) | ||||||
|  |     req_get_method = req.get_method() | ||||||
|  |     if req_get_method == 'HEAD': | ||||||
|  |         req_type = HEADRequest | ||||||
|  |     elif req_get_method == 'PUT': | ||||||
|  |         req_type = PUTRequest | ||||||
|  |     else: | ||||||
|  |         req_type = urllib.request.Request | ||||||
|  |     new_req = req_type( | ||||||
|  |         req_url, data=req_data, headers=req_headers, | ||||||
|  |         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) | ||||||
|  |     if hasattr(req, 'timeout'): | ||||||
|  |         new_req.timeout = req.timeout | ||||||
|  |     return new_req | ||||||
							
								
								
									
										9
									
								
								yt_dlp/networking/exceptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								yt_dlp/networking/exceptions.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  | import http.client | ||||||
|  | import socket | ||||||
|  | import ssl | ||||||
|  | import urllib.error | ||||||
|  | 
 | ||||||
|  | network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error] | ||||||
|  | if hasattr(ssl, 'CertificateError'): | ||||||
|  |     network_exceptions.append(ssl.CertificateError) | ||||||
|  | network_exceptions = tuple(network_exceptions) | ||||||
| @@ -3,13 +3,10 @@ import warnings | |||||||
| 
 | 
 | ||||||
| from ..compat.compat_utils import passthrough_module | from ..compat.compat_utils import passthrough_module | ||||||
| 
 | 
 | ||||||
| # XXX: Implement this the same way as other DeprecationWarnings without circular import | passthrough_module(__name__, '._deprecated') | ||||||
| passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn( |  | ||||||
|     DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5)) |  | ||||||
| del passthrough_module | del passthrough_module | ||||||
| 
 | 
 | ||||||
| # isort: off | # isort: off | ||||||
| from .traversal import * | from .traversal import * | ||||||
| from ._utils import * | from ._utils import * | ||||||
| from ._utils import _configuration_args, _get_exe_version_output | from ._utils import _configuration_args, _get_exe_version_output | ||||||
| from ._deprecated import * |  | ||||||
|   | |||||||
| @@ -1,7 +1,26 @@ | |||||||
| """Deprecated - New code should avoid these""" | """Deprecated - New code should avoid these""" | ||||||
|  | import warnings | ||||||
|  | 
 | ||||||
|  | from ..compat.compat_utils import passthrough_module | ||||||
|  | 
 | ||||||
|  | # XXX: Implement this the same way as other DeprecationWarnings without circular import | ||||||
|  | passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn( | ||||||
|  |     DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) | ||||||
|  | del passthrough_module | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| from ._utils import preferredencoding | from ._utils import preferredencoding | ||||||
| 
 | 
 | ||||||
|  | # isort: split | ||||||
|  | from ..networking._urllib import PUTRequest  # noqa: F401 | ||||||
|  | from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest  # noqa: F401 | ||||||
|  | from ..networking._urllib import HTTPHandler as YoutubeDLHandler  # noqa: F401 | ||||||
|  | from ..networking._urllib import ProxyHandler as PerRequestProxyHandler  # noqa: F401 | ||||||
|  | from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler  # noqa: F401 | ||||||
|  | from ..networking._urllib import make_socks_conn_class, update_Request  # noqa: F401 | ||||||
|  | from ..networking.exceptions import network_exceptions  # noqa: F401 | ||||||
|  | from .networking import random_user_agent, std_headers  # noqa: F401 | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def encodeFilename(s, for_subprocess=False): | def encodeFilename(s, for_subprocess=False): | ||||||
|     assert isinstance(s, str) |     assert isinstance(s, str) | ||||||
|   | |||||||
| @@ -11,7 +11,6 @@ import datetime | |||||||
| import email.header | import email.header | ||||||
| import email.utils | import email.utils | ||||||
| import errno | import errno | ||||||
| import gzip |  | ||||||
| import hashlib | import hashlib | ||||||
| import hmac | import hmac | ||||||
| import html.entities | import html.entities | ||||||
| @@ -46,7 +45,6 @@ import urllib.error | |||||||
| import urllib.parse | import urllib.parse | ||||||
| import urllib.request | import urllib.request | ||||||
| import xml.etree.ElementTree | import xml.etree.ElementTree | ||||||
| import zlib |  | ||||||
| 
 | 
 | ||||||
| from . import traversal | from . import traversal | ||||||
| 
 | 
 | ||||||
| @@ -58,8 +56,7 @@ from ..compat import ( | |||||||
|     compat_os_name, |     compat_os_name, | ||||||
|     compat_shlex_quote, |     compat_shlex_quote, | ||||||
| ) | ) | ||||||
| from ..dependencies import brotli, certifi, websockets, xattr | from ..dependencies import websockets, xattr | ||||||
| from ..socks import ProxyType, sockssocket |  | ||||||
| 
 | 
 | ||||||
| __name__ = __name__.rsplit('.', 1)[0]  # Pretend to be the parent module | __name__ = __name__.rsplit('.', 1)[0]  # Pretend to be the parent module | ||||||
| 
 | 
 | ||||||
| @@ -67,65 +64,6 @@ __name__ = __name__.rsplit('.', 1)[0]  # Pretend to be the parent module | |||||||
| compiled_regex_type = type(re.compile('')) | compiled_regex_type = type(re.compile('')) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def random_user_agent(): |  | ||||||
|     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' |  | ||||||
|     _CHROME_VERSIONS = ( |  | ||||||
|         '90.0.4430.212', |  | ||||||
|         '90.0.4430.24', |  | ||||||
|         '90.0.4430.70', |  | ||||||
|         '90.0.4430.72', |  | ||||||
|         '90.0.4430.85', |  | ||||||
|         '90.0.4430.93', |  | ||||||
|         '91.0.4472.101', |  | ||||||
|         '91.0.4472.106', |  | ||||||
|         '91.0.4472.114', |  | ||||||
|         '91.0.4472.124', |  | ||||||
|         '91.0.4472.164', |  | ||||||
|         '91.0.4472.19', |  | ||||||
|         '91.0.4472.77', |  | ||||||
|         '92.0.4515.107', |  | ||||||
|         '92.0.4515.115', |  | ||||||
|         '92.0.4515.131', |  | ||||||
|         '92.0.4515.159', |  | ||||||
|         '92.0.4515.43', |  | ||||||
|         '93.0.4556.0', |  | ||||||
|         '93.0.4577.15', |  | ||||||
|         '93.0.4577.63', |  | ||||||
|         '93.0.4577.82', |  | ||||||
|         '94.0.4606.41', |  | ||||||
|         '94.0.4606.54', |  | ||||||
|         '94.0.4606.61', |  | ||||||
|         '94.0.4606.71', |  | ||||||
|         '94.0.4606.81', |  | ||||||
|         '94.0.4606.85', |  | ||||||
|         '95.0.4638.17', |  | ||||||
|         '95.0.4638.50', |  | ||||||
|         '95.0.4638.54', |  | ||||||
|         '95.0.4638.69', |  | ||||||
|         '95.0.4638.74', |  | ||||||
|         '96.0.4664.18', |  | ||||||
|         '96.0.4664.45', |  | ||||||
|         '96.0.4664.55', |  | ||||||
|         '96.0.4664.93', |  | ||||||
|         '97.0.4692.20', |  | ||||||
|     ) |  | ||||||
|     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| SUPPORTED_ENCODINGS = [ |  | ||||||
|     'gzip', 'deflate' |  | ||||||
| ] |  | ||||||
| if brotli: |  | ||||||
|     SUPPORTED_ENCODINGS.append('br') |  | ||||||
| 
 |  | ||||||
| std_headers = { |  | ||||||
|     'User-Agent': random_user_agent(), |  | ||||||
|     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |  | ||||||
|     'Accept-Language': 'en-us,en;q=0.5', |  | ||||||
|     'Sec-Fetch-Mode': 'navigate', |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| USER_AGENTS = { | USER_AGENTS = { | ||||||
|     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', |     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', | ||||||
| } | } | ||||||
| @@ -958,80 +896,16 @@ def formatSeconds(secs, delim=':', msec=False): | |||||||
|     return '%s.%03d' % (ret, time.milliseconds) if msec else ret |     return '%s.%03d' % (ret, time.milliseconds) if msec else ret | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _ssl_load_windows_store_certs(ssl_context, storename): |  | ||||||
|     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py |  | ||||||
|     try: |  | ||||||
|         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename) |  | ||||||
|                  if encoding == 'x509_asn' and ( |  | ||||||
|                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)] |  | ||||||
|     except PermissionError: |  | ||||||
|         return |  | ||||||
|     for cert in certs: |  | ||||||
|         with contextlib.suppress(ssl.SSLError): |  | ||||||
|             ssl_context.load_verify_locations(cadata=cert) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def make_HTTPS_handler(params, **kwargs): | def make_HTTPS_handler(params, **kwargs): | ||||||
|     opts_check_certificate = not params.get('nocheckcertificate') |     from ..networking._helper import make_ssl_context | ||||||
|     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) |     return YoutubeDLHTTPSHandler(params, context=make_ssl_context( | ||||||
|     context.check_hostname = opts_check_certificate |         verify=not params.get('nocheckcertificate'), | ||||||
|     if params.get('legacyserverconnect'): |         client_certificate=params.get('client_certificate'), | ||||||
|         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT |         client_certificate_key=params.get('client_certificate_key'), | ||||||
|         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998 |         client_certificate_password=params.get('client_certificate_password'), | ||||||
|         context.set_ciphers('DEFAULT') |         legacy_support=params.get('legacyserverconnect'), | ||||||
|     elif ( |         use_certifi='no-certifi' not in params.get('compat_opts', []), | ||||||
|         sys.version_info < (3, 10) |     ), **kwargs) | ||||||
|         and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1) |  | ||||||
|         and not ssl.OPENSSL_VERSION.startswith('LibreSSL') |  | ||||||
|     ): |  | ||||||
|         # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1]. |  | ||||||
|         # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting |  | ||||||
|         # in some situations [2][3]. |  | ||||||
|         # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely |  | ||||||
|         # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe. |  | ||||||
|         # LibreSSL is excluded until further investigation due to cipher support issues [5][6]. |  | ||||||
|         # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536 |  | ||||||
|         # 2. https://github.com/yt-dlp/yt-dlp/issues/4627 |  | ||||||
|         # 3. https://github.com/yt-dlp/yt-dlp/pull/5294 |  | ||||||
|         # 4. https://peps.python.org/pep-0644/ |  | ||||||
|         # 5. https://peps.python.org/pep-0644/#libressl-support |  | ||||||
|         # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368 |  | ||||||
|         context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM') |  | ||||||
|         context.minimum_version = ssl.TLSVersion.TLSv1_2 |  | ||||||
| 
 |  | ||||||
|     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE |  | ||||||
|     if opts_check_certificate: |  | ||||||
|         if certifi and 'no-certifi' not in params.get('compat_opts', []): |  | ||||||
|             context.load_verify_locations(cafile=certifi.where()) |  | ||||||
|         else: |  | ||||||
|             try: |  | ||||||
|                 context.load_default_certs() |  | ||||||
|                 # Work around the issue in load_default_certs when there are bad certificates. See: |  | ||||||
|                 # https://github.com/yt-dlp/yt-dlp/issues/1060, |  | ||||||
|                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312 |  | ||||||
|             except ssl.SSLError: |  | ||||||
|                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 |  | ||||||
|                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): |  | ||||||
|                     for storename in ('CA', 'ROOT'): |  | ||||||
|                         _ssl_load_windows_store_certs(context, storename) |  | ||||||
|                 context.set_default_verify_paths() |  | ||||||
| 
 |  | ||||||
|     client_certfile = params.get('client_certificate') |  | ||||||
|     if client_certfile: |  | ||||||
|         try: |  | ||||||
|             context.load_cert_chain( |  | ||||||
|                 client_certfile, keyfile=params.get('client_certificate_key'), |  | ||||||
|                 password=params.get('client_certificate_password')) |  | ||||||
|         except ssl.SSLError: |  | ||||||
|             raise YoutubeDLError('Unable to load client certificate') |  | ||||||
| 
 |  | ||||||
|     # Some servers may reject requests if ALPN extension is not sent. See: |  | ||||||
|     # https://github.com/python/cpython/issues/85140 |  | ||||||
|     # https://github.com/yt-dlp/yt-dlp/issues/3878 |  | ||||||
|     with contextlib.suppress(NotImplementedError): |  | ||||||
|         context.set_alpn_protocols(['http/1.1']) |  | ||||||
| 
 |  | ||||||
|     return YoutubeDLHTTPSHandler(params, context=context, **kwargs) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def bug_reports_message(before=';'): | def bug_reports_message(before=';'): | ||||||
| @@ -1059,12 +933,6 @@ class YoutubeDLError(Exception): | |||||||
|         super().__init__(self.msg) |         super().__init__(self.msg) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error] |  | ||||||
| if hasattr(ssl, 'CertificateError'): |  | ||||||
|     network_exceptions.append(ssl.CertificateError) |  | ||||||
| network_exceptions = tuple(network_exceptions) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class ExtractorError(YoutubeDLError): | class ExtractorError(YoutubeDLError): | ||||||
|     """Error during info extraction.""" |     """Error during info extraction.""" | ||||||
| 
 | 
 | ||||||
| @@ -1072,6 +940,7 @@ class ExtractorError(YoutubeDLError): | |||||||
|         """ tb, if given, is the original traceback (so that it can be printed out). |         """ tb, if given, is the original traceback (so that it can be printed out). | ||||||
|         If expected is set, this is a normal error message and most likely not a bug in yt-dlp. |         If expected is set, this is a normal error message and most likely not a bug in yt-dlp. | ||||||
|         """ |         """ | ||||||
|  |         from ..networking.exceptions import network_exceptions | ||||||
|         if sys.exc_info()[0] in network_exceptions: |         if sys.exc_info()[0] in network_exceptions: | ||||||
|             expected = True |             expected = True | ||||||
| 
 | 
 | ||||||
| @@ -1271,225 +1140,6 @@ class XAttrUnavailableError(YoutubeDLError): | |||||||
|     pass |     pass | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): |  | ||||||
|     hc = http_class(*args, **kwargs) |  | ||||||
|     source_address = ydl_handler._params.get('source_address') |  | ||||||
| 
 |  | ||||||
|     if source_address is not None: |  | ||||||
|         # This is to workaround _create_connection() from socket where it will try all |  | ||||||
|         # address data from getaddrinfo() including IPv6. This filters the result from |  | ||||||
|         # getaddrinfo() based on the source_address value. |  | ||||||
|         # This is based on the cpython socket.create_connection() function. |  | ||||||
|         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691 |  | ||||||
|         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): |  | ||||||
|             host, port = address |  | ||||||
|             err = None |  | ||||||
|             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) |  | ||||||
|             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6 |  | ||||||
|             ip_addrs = [addr for addr in addrs if addr[0] == af] |  | ||||||
|             if addrs and not ip_addrs: |  | ||||||
|                 ip_version = 'v4' if af == socket.AF_INET else 'v6' |  | ||||||
|                 raise OSError( |  | ||||||
|                     "No remote IP%s addresses available for connect, can't use '%s' as source address" |  | ||||||
|                     % (ip_version, source_address[0])) |  | ||||||
|             for res in ip_addrs: |  | ||||||
|                 af, socktype, proto, canonname, sa = res |  | ||||||
|                 sock = None |  | ||||||
|                 try: |  | ||||||
|                     sock = socket.socket(af, socktype, proto) |  | ||||||
|                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: |  | ||||||
|                         sock.settimeout(timeout) |  | ||||||
|                     sock.bind(source_address) |  | ||||||
|                     sock.connect(sa) |  | ||||||
|                     err = None  # Explicitly break reference cycle |  | ||||||
|                     return sock |  | ||||||
|                 except OSError as _: |  | ||||||
|                     err = _ |  | ||||||
|                     if sock is not None: |  | ||||||
|                         sock.close() |  | ||||||
|             if err is not None: |  | ||||||
|                 raise err |  | ||||||
|             else: |  | ||||||
|                 raise OSError('getaddrinfo returns an empty list') |  | ||||||
|         if hasattr(hc, '_create_connection'): |  | ||||||
|             hc._create_connection = _create_connection |  | ||||||
|         hc.source_address = (source_address, 0) |  | ||||||
| 
 |  | ||||||
|     return hc |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class YoutubeDLHandler(urllib.request.HTTPHandler): |  | ||||||
|     """Handler for HTTP requests and responses. |  | ||||||
| 
 |  | ||||||
|     This class, when installed with an OpenerDirector, automatically adds |  | ||||||
|     the standard headers to every HTTP request and handles gzipped, deflated and |  | ||||||
|     brotli responses from web servers. |  | ||||||
| 
 |  | ||||||
|     Part of this code was copied from: |  | ||||||
| 
 |  | ||||||
|     http://techknack.net/python-urllib2-handlers/ |  | ||||||
| 
 |  | ||||||
|     Andrew Rowls, the author of that code, agreed to release it to the |  | ||||||
|     public domain. |  | ||||||
|     """ |  | ||||||
| 
 |  | ||||||
|     def __init__(self, params, *args, **kwargs): |  | ||||||
|         urllib.request.HTTPHandler.__init__(self, *args, **kwargs) |  | ||||||
|         self._params = params |  | ||||||
| 
 |  | ||||||
|     def http_open(self, req): |  | ||||||
|         conn_class = http.client.HTTPConnection |  | ||||||
| 
 |  | ||||||
|         socks_proxy = req.headers.get('Ytdl-socks-proxy') |  | ||||||
|         if socks_proxy: |  | ||||||
|             conn_class = make_socks_conn_class(conn_class, socks_proxy) |  | ||||||
|             del req.headers['Ytdl-socks-proxy'] |  | ||||||
| 
 |  | ||||||
|         return self.do_open(functools.partial( |  | ||||||
|             _create_http_connection, self, conn_class, False), |  | ||||||
|             req) |  | ||||||
| 
 |  | ||||||
|     @staticmethod |  | ||||||
|     def deflate(data): |  | ||||||
|         if not data: |  | ||||||
|             return data |  | ||||||
|         try: |  | ||||||
|             return zlib.decompress(data, -zlib.MAX_WBITS) |  | ||||||
|         except zlib.error: |  | ||||||
|             return zlib.decompress(data) |  | ||||||
| 
 |  | ||||||
|     @staticmethod |  | ||||||
|     def brotli(data): |  | ||||||
|         if not data: |  | ||||||
|             return data |  | ||||||
|         return brotli.decompress(data) |  | ||||||
| 
 |  | ||||||
|     @staticmethod |  | ||||||
|     def gz(data): |  | ||||||
|         gz = gzip.GzipFile(fileobj=io.BytesIO(data), mode='rb') |  | ||||||
|         try: |  | ||||||
|             return gz.read() |  | ||||||
|         except OSError as original_oserror: |  | ||||||
|             # There may be junk add the end of the file |  | ||||||
|             # See http://stackoverflow.com/q/4928560/35070 for details |  | ||||||
|             for i in range(1, 1024): |  | ||||||
|                 try: |  | ||||||
|                     gz = gzip.GzipFile(fileobj=io.BytesIO(data[:-i]), mode='rb') |  | ||||||
|                     return gz.read() |  | ||||||
|                 except OSError: |  | ||||||
|                     continue |  | ||||||
|             else: |  | ||||||
|                 raise original_oserror |  | ||||||
| 
 |  | ||||||
|     def http_request(self, req): |  | ||||||
|         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not |  | ||||||
|         # always respected by websites, some tend to give out URLs with non percent-encoded |  | ||||||
|         # non-ASCII characters (see telemb.py, ard.py [#3412]) |  | ||||||
|         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) |  | ||||||
|         # To work around aforementioned issue we will replace request's original URL with |  | ||||||
|         # percent-encoded one |  | ||||||
|         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) |  | ||||||
|         # the code of this workaround has been moved here from YoutubeDL.urlopen() |  | ||||||
|         url = req.get_full_url() |  | ||||||
|         url_escaped = escape_url(url) |  | ||||||
| 
 |  | ||||||
|         # Substitute URL if any change after escaping |  | ||||||
|         if url != url_escaped: |  | ||||||
|             req = update_Request(req, url=url_escaped) |  | ||||||
| 
 |  | ||||||
|         for h, v in self._params.get('http_headers', std_headers).items(): |  | ||||||
|             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 |  | ||||||
|             # The dict keys are capitalized because of this bug by urllib |  | ||||||
|             if h.capitalize() not in req.headers: |  | ||||||
|                 req.add_header(h, v) |  | ||||||
| 
 |  | ||||||
|         if 'Youtubedl-no-compression' in req.headers:  # deprecated |  | ||||||
|             req.headers.pop('Youtubedl-no-compression', None) |  | ||||||
|             req.add_header('Accept-encoding', 'identity') |  | ||||||
| 
 |  | ||||||
|         if 'Accept-encoding' not in req.headers: |  | ||||||
|             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS)) |  | ||||||
| 
 |  | ||||||
|         return super().do_request_(req) |  | ||||||
| 
 |  | ||||||
|     def http_response(self, req, resp): |  | ||||||
|         old_resp = resp |  | ||||||
| 
 |  | ||||||
|         # Content-Encoding header lists the encodings in order that they were applied [1]. |  | ||||||
|         # To decompress, we simply do the reverse. |  | ||||||
|         # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding |  | ||||||
|         decoded_response = None |  | ||||||
|         for encoding in (e.strip() for e in reversed(resp.headers.get('Content-encoding', '').split(','))): |  | ||||||
|             if encoding == 'gzip': |  | ||||||
|                 decoded_response = self.gz(decoded_response or resp.read()) |  | ||||||
|             elif encoding == 'deflate': |  | ||||||
|                 decoded_response = self.deflate(decoded_response or resp.read()) |  | ||||||
|             elif encoding == 'br' and brotli: |  | ||||||
|                 decoded_response = self.brotli(decoded_response or resp.read()) |  | ||||||
| 
 |  | ||||||
|         if decoded_response is not None: |  | ||||||
|             resp = urllib.request.addinfourl(io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code) |  | ||||||
|             resp.msg = old_resp.msg |  | ||||||
|         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see |  | ||||||
|         # https://github.com/ytdl-org/youtube-dl/issues/6457). |  | ||||||
|         if 300 <= resp.code < 400: |  | ||||||
|             location = resp.headers.get('Location') |  | ||||||
|             if location: |  | ||||||
|                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 |  | ||||||
|                 location = location.encode('iso-8859-1').decode() |  | ||||||
|                 location_escaped = escape_url(location) |  | ||||||
|                 if location != location_escaped: |  | ||||||
|                     del resp.headers['Location'] |  | ||||||
|                     resp.headers['Location'] = location_escaped |  | ||||||
|         return resp |  | ||||||
| 
 |  | ||||||
|     https_request = http_request |  | ||||||
|     https_response = http_response |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def make_socks_conn_class(base_class, socks_proxy): |  | ||||||
|     assert issubclass(base_class, ( |  | ||||||
|         http.client.HTTPConnection, http.client.HTTPSConnection)) |  | ||||||
| 
 |  | ||||||
|     url_components = urllib.parse.urlparse(socks_proxy) |  | ||||||
|     if url_components.scheme.lower() == 'socks5': |  | ||||||
|         socks_type = ProxyType.SOCKS5 |  | ||||||
|     elif url_components.scheme.lower() in ('socks', 'socks4'): |  | ||||||
|         socks_type = ProxyType.SOCKS4 |  | ||||||
|     elif url_components.scheme.lower() == 'socks4a': |  | ||||||
|         socks_type = ProxyType.SOCKS4A |  | ||||||
| 
 |  | ||||||
|     def unquote_if_non_empty(s): |  | ||||||
|         if not s: |  | ||||||
|             return s |  | ||||||
|         return urllib.parse.unquote_plus(s) |  | ||||||
| 
 |  | ||||||
|     proxy_args = ( |  | ||||||
|         socks_type, |  | ||||||
|         url_components.hostname, url_components.port or 1080, |  | ||||||
|         True,  # Remote DNS |  | ||||||
|         unquote_if_non_empty(url_components.username), |  | ||||||
|         unquote_if_non_empty(url_components.password), |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     class SocksConnection(base_class): |  | ||||||
|         def connect(self): |  | ||||||
|             self.sock = sockssocket() |  | ||||||
|             self.sock.setproxy(*proxy_args) |  | ||||||
|             if isinstance(self.timeout, (int, float)): |  | ||||||
|                 self.sock.settimeout(self.timeout) |  | ||||||
|             self.sock.connect((self.host, self.port)) |  | ||||||
| 
 |  | ||||||
|             if isinstance(self, http.client.HTTPSConnection): |  | ||||||
|                 if hasattr(self, '_context'):  # Python > 2.6 |  | ||||||
|                     self.sock = self._context.wrap_socket( |  | ||||||
|                         self.sock, server_hostname=self.host) |  | ||||||
|                 else: |  | ||||||
|                     self.sock = ssl.wrap_socket(self.sock) |  | ||||||
| 
 |  | ||||||
|     return SocksConnection |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler): | class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler): | ||||||
|     def __init__(self, params, https_conn_class=None, *args, **kwargs): |     def __init__(self, params, https_conn_class=None, *args, **kwargs): | ||||||
|         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs) |         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs) | ||||||
| @@ -1507,9 +1157,11 @@ class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler): | |||||||
| 
 | 
 | ||||||
|         socks_proxy = req.headers.get('Ytdl-socks-proxy') |         socks_proxy = req.headers.get('Ytdl-socks-proxy') | ||||||
|         if socks_proxy: |         if socks_proxy: | ||||||
|  |             from ..networking._urllib import make_socks_conn_class | ||||||
|             conn_class = make_socks_conn_class(conn_class, socks_proxy) |             conn_class = make_socks_conn_class(conn_class, socks_proxy) | ||||||
|             del req.headers['Ytdl-socks-proxy'] |             del req.headers['Ytdl-socks-proxy'] | ||||||
| 
 | 
 | ||||||
|  |         from ..networking._urllib import _create_http_connection | ||||||
|         try: |         try: | ||||||
|             return self.do_open( |             return self.do_open( | ||||||
|                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs) |                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs) | ||||||
| @@ -1535,56 +1187,6 @@ class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor): | |||||||
|     https_response = http_response |     https_response = http_response | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler): |  | ||||||
|     """YoutubeDL redirect handler |  | ||||||
| 
 |  | ||||||
|     The code is based on HTTPRedirectHandler implementation from CPython [1]. |  | ||||||
| 
 |  | ||||||
|     This redirect handler fixes and improves the logic to better align with RFC7261 |  | ||||||
|      and what browsers tend to do [2][3] |  | ||||||
| 
 |  | ||||||
|     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py |  | ||||||
|     2. https://datatracker.ietf.org/doc/html/rfc7231 |  | ||||||
|     3. https://github.com/python/cpython/issues/91306 |  | ||||||
|     """ |  | ||||||
| 
 |  | ||||||
|     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302 |  | ||||||
| 
 |  | ||||||
|     def redirect_request(self, req, fp, code, msg, headers, newurl): |  | ||||||
|         if code not in (301, 302, 303, 307, 308): |  | ||||||
|             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp) |  | ||||||
| 
 |  | ||||||
|         new_method = req.get_method() |  | ||||||
|         new_data = req.data |  | ||||||
| 
 |  | ||||||
|         # Technically the Cookie header should be in unredirected_hdrs, |  | ||||||
|         # however in practice some may set it in normal headers anyway. |  | ||||||
|         # We will remove it here to prevent any leaks. |  | ||||||
|         remove_headers = ['Cookie'] |  | ||||||
| 
 |  | ||||||
|         # A 303 must either use GET or HEAD for subsequent request |  | ||||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 |  | ||||||
|         if code == 303 and req.get_method() != 'HEAD': |  | ||||||
|             new_method = 'GET' |  | ||||||
|         # 301 and 302 redirects are commonly turned into a GET from a POST |  | ||||||
|         # for subsequent requests by browsers, so we'll do the same. |  | ||||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 |  | ||||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 |  | ||||||
|         elif code in (301, 302) and req.get_method() == 'POST': |  | ||||||
|             new_method = 'GET' |  | ||||||
| 
 |  | ||||||
|         # only remove payload if method changed (e.g. POST to GET) |  | ||||||
|         if new_method != req.get_method(): |  | ||||||
|             new_data = None |  | ||||||
|             remove_headers.extend(['Content-Length', 'Content-Type']) |  | ||||||
| 
 |  | ||||||
|         new_headers = {k: v for k, v in req.headers.items() if k.title() not in remove_headers} |  | ||||||
| 
 |  | ||||||
|         return urllib.request.Request( |  | ||||||
|             newurl, headers=new_headers, origin_req_host=req.origin_req_host, |  | ||||||
|             unverifiable=True, method=new_method, data=new_data) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def extract_timezone(date_str): | def extract_timezone(date_str): | ||||||
|     m = re.search( |     m = re.search( | ||||||
|         r'''(?x) |         r'''(?x) | ||||||
| @@ -2390,16 +1992,6 @@ def urljoin(base, path): | |||||||
|     return urllib.parse.urljoin(base, path) |     return urllib.parse.urljoin(base, path) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class HEADRequest(urllib.request.Request): |  | ||||||
|     def get_method(self): |  | ||||||
|         return 'HEAD' |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class PUTRequest(urllib.request.Request): |  | ||||||
|     def get_method(self): |  | ||||||
|         return 'PUT' |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): | def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): | ||||||
|     if get_attr and v is not None: |     if get_attr and v is not None: | ||||||
|         v = getattr(v, get_attr, None) |         v = getattr(v, get_attr, None) | ||||||
| @@ -3016,26 +2608,6 @@ def update_url_query(url, query): | |||||||
|     return update_url(url, query_update=query) |     return update_url(url, query_update=query) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def update_Request(req, url=None, data=None, headers=None, query=None): |  | ||||||
|     req_headers = req.headers.copy() |  | ||||||
|     req_headers.update(headers or {}) |  | ||||||
|     req_data = data or req.data |  | ||||||
|     req_url = update_url_query(url or req.get_full_url(), query) |  | ||||||
|     req_get_method = req.get_method() |  | ||||||
|     if req_get_method == 'HEAD': |  | ||||||
|         req_type = HEADRequest |  | ||||||
|     elif req_get_method == 'PUT': |  | ||||||
|         req_type = PUTRequest |  | ||||||
|     else: |  | ||||||
|         req_type = urllib.request.Request |  | ||||||
|     new_req = req_type( |  | ||||||
|         req_url, data=req_data, headers=req_headers, |  | ||||||
|         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) |  | ||||||
|     if hasattr(req, 'timeout'): |  | ||||||
|         new_req.timeout = req.timeout |  | ||||||
|     return new_req |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def _multipart_encode_impl(data, boundary): | def _multipart_encode_impl(data, boundary): | ||||||
|     content_type = 'multipart/form-data; boundary=%s' % boundary |     content_type = 'multipart/form-data; boundary=%s' % boundary | ||||||
| 
 | 
 | ||||||
| @@ -4769,31 +4341,6 @@ class GeoUtils: | |||||||
|             struct.pack('!L', random.randint(addr_min, addr_max)))) |             struct.pack('!L', random.randint(addr_min, addr_max)))) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class PerRequestProxyHandler(urllib.request.ProxyHandler): |  | ||||||
|     def __init__(self, proxies=None): |  | ||||||
|         # Set default handlers |  | ||||||
|         for type in ('http', 'https'): |  | ||||||
|             setattr(self, '%s_open' % type, |  | ||||||
|                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: |  | ||||||
|                         meth(r, proxy, type)) |  | ||||||
|         urllib.request.ProxyHandler.__init__(self, proxies) |  | ||||||
| 
 |  | ||||||
|     def proxy_open(self, req, proxy, type): |  | ||||||
|         req_proxy = req.headers.get('Ytdl-request-proxy') |  | ||||||
|         if req_proxy is not None: |  | ||||||
|             proxy = req_proxy |  | ||||||
|             del req.headers['Ytdl-request-proxy'] |  | ||||||
| 
 |  | ||||||
|         if proxy == '__noproxy__': |  | ||||||
|             return None  # No Proxy |  | ||||||
|         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): |  | ||||||
|             req.add_header('Ytdl-socks-proxy', proxy) |  | ||||||
|             # yt-dlp's http/https handlers do wrapping the socket with socks |  | ||||||
|             return None |  | ||||||
|         return urllib.request.ProxyHandler.proxy_open( |  | ||||||
|             self, req, proxy, type) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is | # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is | ||||||
| # released into Public Domain | # released into Public Domain | ||||||
| # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 | # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 | ||||||
|   | |||||||
							
								
								
									
										60
									
								
								yt_dlp/utils/networking.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								yt_dlp/utils/networking.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | |||||||
|  | import random | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def random_user_agent(): | ||||||
|  |     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' | ||||||
|  |     _CHROME_VERSIONS = ( | ||||||
|  |         '90.0.4430.212', | ||||||
|  |         '90.0.4430.24', | ||||||
|  |         '90.0.4430.70', | ||||||
|  |         '90.0.4430.72', | ||||||
|  |         '90.0.4430.85', | ||||||
|  |         '90.0.4430.93', | ||||||
|  |         '91.0.4472.101', | ||||||
|  |         '91.0.4472.106', | ||||||
|  |         '91.0.4472.114', | ||||||
|  |         '91.0.4472.124', | ||||||
|  |         '91.0.4472.164', | ||||||
|  |         '91.0.4472.19', | ||||||
|  |         '91.0.4472.77', | ||||||
|  |         '92.0.4515.107', | ||||||
|  |         '92.0.4515.115', | ||||||
|  |         '92.0.4515.131', | ||||||
|  |         '92.0.4515.159', | ||||||
|  |         '92.0.4515.43', | ||||||
|  |         '93.0.4556.0', | ||||||
|  |         '93.0.4577.15', | ||||||
|  |         '93.0.4577.63', | ||||||
|  |         '93.0.4577.82', | ||||||
|  |         '94.0.4606.41', | ||||||
|  |         '94.0.4606.54', | ||||||
|  |         '94.0.4606.61', | ||||||
|  |         '94.0.4606.71', | ||||||
|  |         '94.0.4606.81', | ||||||
|  |         '94.0.4606.85', | ||||||
|  |         '95.0.4638.17', | ||||||
|  |         '95.0.4638.50', | ||||||
|  |         '95.0.4638.54', | ||||||
|  |         '95.0.4638.69', | ||||||
|  |         '95.0.4638.74', | ||||||
|  |         '96.0.4664.18', | ||||||
|  |         '96.0.4664.45', | ||||||
|  |         '96.0.4664.55', | ||||||
|  |         '96.0.4664.93', | ||||||
|  |         '97.0.4692.20', | ||||||
|  |     ) | ||||||
|  |     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | std_headers = { | ||||||
|  |     'User-Agent': random_user_agent(), | ||||||
|  |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | ||||||
|  |     'Accept-Language': 'en-us,en;q=0.5', | ||||||
|  |     'Sec-Fetch-Mode': 'navigate', | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def clean_headers(headers): | ||||||
|  |     if 'Youtubedl-no-compression' in headers:  # compat | ||||||
|  |         del headers['Youtubedl-no-compression'] | ||||||
|  |         headers['Accept-Encoding'] = 'identity' | ||||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan