mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 00:25:15 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			419 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			419 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import annotations
 | 
						|
 | 
						|
import contextlib
 | 
						|
import functools
 | 
						|
import http.client
 | 
						|
import logging
 | 
						|
import re
 | 
						|
import socket
 | 
						|
import warnings
 | 
						|
 | 
						|
from ..dependencies import brotli, requests, urllib3
 | 
						|
from ..utils import bug_reports_message, int_or_none, variadic
 | 
						|
from ..utils.networking import normalize_url
 | 
						|
 | 
						|
if requests is None:
 | 
						|
    raise ImportError('requests module is not installed')
 | 
						|
 | 
						|
if urllib3 is None:
 | 
						|
    raise ImportError('urllib3 module is not installed')
 | 
						|
 | 
						|
urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
 | 
						|
 | 
						|
if urllib3_version < (1, 26, 17):
 | 
						|
    raise ImportError('Only urllib3 >= 1.26.17 is supported')
 | 
						|
 | 
						|
if requests.__build__ < 0x023202:
 | 
						|
    raise ImportError('Only requests >= 2.32.2 is supported')
 | 
						|
 | 
						|
import requests.adapters
 | 
						|
import requests.utils
 | 
						|
import urllib3.connection
 | 
						|
import urllib3.exceptions
 | 
						|
import urllib3.util
 | 
						|
 | 
						|
from ._helper import (
 | 
						|
    InstanceStoreMixin,
 | 
						|
    add_accept_encoding_header,
 | 
						|
    create_connection,
 | 
						|
    create_socks_proxy_socket,
 | 
						|
    get_redirect_method,
 | 
						|
    make_socks_proxy_opts,
 | 
						|
    select_proxy,
 | 
						|
)
 | 
						|
from .common import (
 | 
						|
    Features,
 | 
						|
    RequestHandler,
 | 
						|
    Response,
 | 
						|
    register_preference,
 | 
						|
    register_rh,
 | 
						|
)
 | 
						|
from .exceptions import (
 | 
						|
    CertificateVerifyError,
 | 
						|
    HTTPError,
 | 
						|
    IncompleteRead,
 | 
						|
    ProxyError,
 | 
						|
    RequestError,
 | 
						|
    SSLError,
 | 
						|
    TransportError,
 | 
						|
)
 | 
						|
from ..socks import ProxyError as SocksProxyError
 | 
						|
 | 
						|
SUPPORTED_ENCODINGS = [
 | 
						|
    'gzip', 'deflate',
 | 
						|
]
 | 
						|
 | 
						|
if brotli is not None:
 | 
						|
    SUPPORTED_ENCODINGS.append('br')
 | 
						|
 | 
						|
'''
 | 
						|
Override urllib3's behavior to not convert lower-case percent-encoded characters
 | 
						|
to upper-case during url normalization process.
 | 
						|
 | 
						|
RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
 | 
						|
and normalizers should convert them to uppercase for consistency [1].
 | 
						|
 | 
						|
However, some sites may have an incorrect implementation where they provide
 | 
						|
a percent-encoded url that is then compared case-sensitively.[2]
 | 
						|
 | 
						|
While this is a very rare case, since urllib does not do this normalization step, it
 | 
						|
is best to avoid it in requests too for compatability reasons.
 | 
						|
 | 
						|
1: https://tools.ietf.org/html/rfc3986#section-2.1
 | 
						|
2: https://github.com/streamlink/streamlink/pull/4003
 | 
						|
'''
 | 
						|
 | 
						|
 | 
						|
class Urllib3PercentREOverride:
 | 
						|
    def __init__(self, r: re.Pattern):
 | 
						|
        self.re = r
 | 
						|
 | 
						|
    # pass through all other attribute calls to the original re
 | 
						|
    def __getattr__(self, item):
 | 
						|
        return self.re.__getattribute__(item)
 | 
						|
 | 
						|
    def subn(self, repl, string, *args, **kwargs):
 | 
						|
        return string, self.re.subn(repl, string, *args, **kwargs)[1]
 | 
						|
 | 
						|
 | 
						|
# urllib3 >= 1.25.8 uses subn:
 | 
						|
# https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
 | 
						|
import urllib3.util.url
 | 
						|
 | 
						|
if hasattr(urllib3.util.url, 'PERCENT_RE'):
 | 
						|
    urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE)
 | 
						|
elif hasattr(urllib3.util.url, '_PERCENT_RE'):  # urllib3 >= 2.0.0
 | 
						|
    urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
 | 
						|
else:
 | 
						|
    warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
 | 
						|
 | 
						|
'''
 | 
						|
Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
 | 
						|
server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
 | 
						|
however this is an issue because we set check_hostname to True in our SSLContext.
 | 
						|
 | 
						|
Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
 | 
						|
 | 
						|
This has been fixed in urllib3 2.0+.
 | 
						|
See: https://github.com/urllib3/urllib3/issues/517
 | 
						|
'''
 | 
						|
 | 
						|
if urllib3_version < (2, 0, 0):
 | 
						|
    with contextlib.suppress(Exception):
 | 
						|
        urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True
 | 
						|
 | 
						|
 | 
						|
# Requests will not automatically handle no_proxy by default
 | 
						|
# due to buggy no_proxy handling with proxy dict [1].
 | 
						|
# 1. https://github.com/psf/requests/issues/5000
 | 
						|
requests.adapters.select_proxy = select_proxy
 | 
						|
 | 
						|
 | 
						|
class RequestsResponseAdapter(Response):
 | 
						|
    def __init__(self, res: requests.models.Response):
 | 
						|
        super().__init__(
 | 
						|
            fp=res.raw, headers=res.headers, url=res.url,
 | 
						|
            status=res.status_code, reason=res.reason)
 | 
						|
 | 
						|
        self._requests_response = res
 | 
						|
 | 
						|
    def read(self, amt: int | None = None):
 | 
						|
        try:
 | 
						|
            # Interact with urllib3 response directly.
 | 
						|
            return self.fp.read(amt, decode_content=True)
 | 
						|
 | 
						|
        # See urllib3.response.HTTPResponse.read() for exceptions raised on read
 | 
						|
        except urllib3.exceptions.SSLError as e:
 | 
						|
            raise SSLError(cause=e) from e
 | 
						|
 | 
						|
        except urllib3.exceptions.ProtocolError as e:
 | 
						|
            # IncompleteRead is always contained within ProtocolError
 | 
						|
            # See urllib3.response.HTTPResponse._error_catcher()
 | 
						|
            ir_err = next(
 | 
						|
                (err for err in (e.__context__, e.__cause__, *variadic(e.args))
 | 
						|
                 if isinstance(err, http.client.IncompleteRead)), None)
 | 
						|
            if ir_err is not None:
 | 
						|
                # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
 | 
						|
                # but uses an `int` for its `partial` property.
 | 
						|
                partial = ir_err.partial if isinstance(ir_err.partial, int) else len(ir_err.partial)
 | 
						|
                raise IncompleteRead(partial=partial, expected=ir_err.expected) from e
 | 
						|
            raise TransportError(cause=e) from e
 | 
						|
 | 
						|
        except urllib3.exceptions.HTTPError as e:
 | 
						|
            # catch-all for any other urllib3 response exceptions
 | 
						|
            raise TransportError(cause=e) from e
 | 
						|
 | 
						|
 | 
						|
class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
 | 
						|
    def __init__(self, ssl_context=None, proxy_ssl_context=None, source_address=None, **kwargs):
 | 
						|
        self._pm_args = {}
 | 
						|
        if ssl_context:
 | 
						|
            self._pm_args['ssl_context'] = ssl_context
 | 
						|
        if source_address:
 | 
						|
            self._pm_args['source_address'] = (source_address, 0)
 | 
						|
        self._proxy_ssl_context = proxy_ssl_context or ssl_context
 | 
						|
        super().__init__(**kwargs)
 | 
						|
 | 
						|
    def init_poolmanager(self, *args, **kwargs):
 | 
						|
        return super().init_poolmanager(*args, **kwargs, **self._pm_args)
 | 
						|
 | 
						|
    def proxy_manager_for(self, proxy, **proxy_kwargs):
 | 
						|
        extra_kwargs = {}
 | 
						|
        if not proxy.lower().startswith('socks') and self._proxy_ssl_context:
 | 
						|
            extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
 | 
						|
        return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
 | 
						|
 | 
						|
    # Skip `requests` internal verification; we use our own SSLContext
 | 
						|
    def cert_verify(*args, **kwargs):
 | 
						|
        pass
 | 
						|
 | 
						|
    # requests 2.32.2+: Reimplementation without `_urllib3_request_context`
 | 
						|
    def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None):
 | 
						|
        url = urllib3.util.parse_url(request.url).url
 | 
						|
 | 
						|
        manager = self.poolmanager
 | 
						|
        if proxy := select_proxy(url, proxies):
 | 
						|
            manager = self.proxy_manager_for(proxy)
 | 
						|
 | 
						|
        return manager.connection_from_url(url)
 | 
						|
 | 
						|
 | 
						|
class RequestsSession(requests.sessions.Session):
 | 
						|
    """
 | 
						|
    Ensure unified redirect method handling with our urllib redirect handler.
 | 
						|
    """
 | 
						|
 | 
						|
    def rebuild_method(self, prepared_request, response):
 | 
						|
        new_method = get_redirect_method(prepared_request.method, response.status_code)
 | 
						|
 | 
						|
        # HACK: requests removes headers/body on redirect unless code was a 307/308.
 | 
						|
        if new_method == prepared_request.method:
 | 
						|
            response._real_status_code = response.status_code
 | 
						|
            response.status_code = 308
 | 
						|
 | 
						|
        prepared_request.method = new_method
 | 
						|
 | 
						|
        # Requests fails to resolve dot segments on absolute redirect locations
 | 
						|
        # See: https://github.com/yt-dlp/yt-dlp/issues/9020
 | 
						|
        prepared_request.url = normalize_url(prepared_request.url)
 | 
						|
 | 
						|
    def rebuild_auth(self, prepared_request, response):
 | 
						|
        # HACK: undo status code change from rebuild_method, if applicable.
 | 
						|
        # rebuild_auth runs after requests would remove headers/body based on status code
 | 
						|
        if hasattr(response, '_real_status_code'):
 | 
						|
            response.status_code = response._real_status_code
 | 
						|
            del response._real_status_code
 | 
						|
        return super().rebuild_auth(prepared_request, response)
 | 
						|
 | 
						|
 | 
						|
class Urllib3LoggingFilter(logging.Filter):
 | 
						|
 | 
						|
    def filter(self, record):
 | 
						|
        # Ignore HTTP request messages since HTTPConnection prints those
 | 
						|
        return record.msg != '%s://%s:%s "%s %s %s" %s %s'
 | 
						|
 | 
						|
 | 
						|
class Urllib3LoggingHandler(logging.Handler):
 | 
						|
    """Redirect urllib3 logs to our logger"""
 | 
						|
 | 
						|
    def __init__(self, logger, *args, **kwargs):
 | 
						|
        super().__init__(*args, **kwargs)
 | 
						|
        self._logger = logger
 | 
						|
 | 
						|
    def emit(self, record):
 | 
						|
        try:
 | 
						|
            msg = self.format(record)
 | 
						|
            if record.levelno >= logging.ERROR:
 | 
						|
                self._logger.error(msg)
 | 
						|
            else:
 | 
						|
                self._logger.stdout(msg)
 | 
						|
 | 
						|
        except Exception:
 | 
						|
            self.handleError(record)
 | 
						|
 | 
						|
 | 
						|
@register_rh
 | 
						|
class RequestsRH(RequestHandler, InstanceStoreMixin):
 | 
						|
 | 
						|
    """Requests RequestHandler
 | 
						|
    https://github.com/psf/requests
 | 
						|
    """
 | 
						|
    _SUPPORTED_URL_SCHEMES = ('http', 'https')
 | 
						|
    _SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
 | 
						|
    _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
 | 
						|
    _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
 | 
						|
    RH_NAME = 'requests'
 | 
						|
 | 
						|
    def __init__(self, *args, **kwargs):
 | 
						|
        super().__init__(*args, **kwargs)
 | 
						|
 | 
						|
        # Forward urllib3 debug messages to our logger
 | 
						|
        logger = logging.getLogger('urllib3')
 | 
						|
        self.__logging_handler = Urllib3LoggingHandler(logger=self._logger)
 | 
						|
        self.__logging_handler.setFormatter(logging.Formatter('requests: %(message)s'))
 | 
						|
        self.__logging_handler.addFilter(Urllib3LoggingFilter())
 | 
						|
        logger.addHandler(self.__logging_handler)
 | 
						|
        # TODO: Use a logger filter to suppress pool reuse warning instead
 | 
						|
        logger.setLevel(logging.ERROR)
 | 
						|
 | 
						|
        if self.verbose:
 | 
						|
            # Setting this globally is not ideal, but is easier than hacking with urllib3.
 | 
						|
            # It could technically be problematic for scripts embedding yt-dlp.
 | 
						|
            # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
 | 
						|
            urllib3.connection.HTTPConnection.debuglevel = 1
 | 
						|
            logger.setLevel(logging.DEBUG)
 | 
						|
        # this is expected if we are using --no-check-certificate
 | 
						|
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
						|
 | 
						|
    def close(self):
 | 
						|
        self._clear_instances()
 | 
						|
        # Remove the logging handler that contains a reference to our logger
 | 
						|
        # See: https://github.com/yt-dlp/yt-dlp/issues/8922
 | 
						|
        logging.getLogger('urllib3').removeHandler(self.__logging_handler)
 | 
						|
 | 
						|
    def _check_extensions(self, extensions):
 | 
						|
        super()._check_extensions(extensions)
 | 
						|
        extensions.pop('cookiejar', None)
 | 
						|
        extensions.pop('timeout', None)
 | 
						|
 | 
						|
    def _create_instance(self, cookiejar):
 | 
						|
        session = RequestsSession()
 | 
						|
        http_adapter = RequestsHTTPAdapter(
 | 
						|
            ssl_context=self._make_sslcontext(),
 | 
						|
            source_address=self.source_address,
 | 
						|
            max_retries=urllib3.util.retry.Retry(False),
 | 
						|
        )
 | 
						|
        session.adapters.clear()
 | 
						|
        session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
 | 
						|
        session.mount('https://', http_adapter)
 | 
						|
        session.mount('http://', http_adapter)
 | 
						|
        session.cookies = cookiejar
 | 
						|
        session.trust_env = False  # no need, we already load proxies from env
 | 
						|
        return session
 | 
						|
 | 
						|
    def _send(self, request):
 | 
						|
 | 
						|
        headers = self._merge_headers(request.headers)
 | 
						|
        add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
 | 
						|
 | 
						|
        max_redirects_exceeded = False
 | 
						|
 | 
						|
        session = self._get_instance(cookiejar=self._get_cookiejar(request))
 | 
						|
 | 
						|
        try:
 | 
						|
            requests_res = session.request(
 | 
						|
                method=request.method,
 | 
						|
                url=request.url,
 | 
						|
                data=request.data,
 | 
						|
                headers=headers,
 | 
						|
                timeout=self._calculate_timeout(request),
 | 
						|
                proxies=self._get_proxies(request),
 | 
						|
                allow_redirects=True,
 | 
						|
                stream=True,
 | 
						|
            )
 | 
						|
 | 
						|
        except requests.exceptions.TooManyRedirects as e:
 | 
						|
            max_redirects_exceeded = True
 | 
						|
            requests_res = e.response
 | 
						|
 | 
						|
        except requests.exceptions.SSLError as e:
 | 
						|
            if 'CERTIFICATE_VERIFY_FAILED' in str(e):
 | 
						|
                raise CertificateVerifyError(cause=e) from e
 | 
						|
            raise SSLError(cause=e) from e
 | 
						|
 | 
						|
        except requests.exceptions.ProxyError as e:
 | 
						|
            raise ProxyError(cause=e) from e
 | 
						|
 | 
						|
        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
 | 
						|
            raise TransportError(cause=e) from e
 | 
						|
 | 
						|
        except urllib3.exceptions.HTTPError as e:
 | 
						|
            # Catch any urllib3 exceptions that may leak through
 | 
						|
            raise TransportError(cause=e) from e
 | 
						|
 | 
						|
        except requests.exceptions.RequestException as e:
 | 
						|
            # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
 | 
						|
            raise RequestError(cause=e) from e
 | 
						|
 | 
						|
        res = RequestsResponseAdapter(requests_res)
 | 
						|
 | 
						|
        if not 200 <= res.status < 300:
 | 
						|
            raise HTTPError(res, redirect_loop=max_redirects_exceeded)
 | 
						|
 | 
						|
        return res
 | 
						|
 | 
						|
 | 
						|
@register_preference(RequestsRH)
 | 
						|
def requests_preference(rh, request):
 | 
						|
    return 100
 | 
						|
 | 
						|
 | 
						|
# Use our socks proxy implementation with requests to avoid an extra dependency.
 | 
						|
class SocksHTTPConnection(urllib3.connection.HTTPConnection):
 | 
						|
    def __init__(self, _socks_options, *args, **kwargs):  # must use _socks_options to pass PoolKey checks
 | 
						|
        self._proxy_args = _socks_options
 | 
						|
        super().__init__(*args, **kwargs)
 | 
						|
 | 
						|
    def _new_conn(self):
 | 
						|
        try:
 | 
						|
            return create_connection(
 | 
						|
                address=(self._proxy_args['addr'], self._proxy_args['port']),
 | 
						|
                timeout=self.timeout,
 | 
						|
                source_address=self.source_address,
 | 
						|
                _create_socket_func=functools.partial(
 | 
						|
                    create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
 | 
						|
        except (socket.timeout, TimeoutError) as e:
 | 
						|
            raise urllib3.exceptions.ConnectTimeoutError(
 | 
						|
                self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
 | 
						|
        except SocksProxyError as e:
 | 
						|
            raise urllib3.exceptions.ProxyError(str(e), e) from e
 | 
						|
        except OSError as e:
 | 
						|
            raise urllib3.exceptions.NewConnectionError(
 | 
						|
                self, f'Failed to establish a new connection: {e}') from e
 | 
						|
 | 
						|
 | 
						|
class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool):
 | 
						|
    ConnectionCls = SocksHTTPConnection
 | 
						|
 | 
						|
 | 
						|
class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool):
 | 
						|
    ConnectionCls = SocksHTTPSConnection
 | 
						|
 | 
						|
 | 
						|
class SocksProxyManager(urllib3.PoolManager):
 | 
						|
 | 
						|
    def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw):
 | 
						|
        connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy)
 | 
						|
        super().__init__(num_pools, headers, **connection_pool_kw)
 | 
						|
        self.pool_classes_by_scheme = {
 | 
						|
            'http': SocksHTTPConnectionPool,
 | 
						|
            'https': SocksHTTPSConnectionPool,
 | 
						|
        }
 | 
						|
 | 
						|
 | 
						|
requests.adapters.SOCKSProxyManager = SocksProxyManager
 |