mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[core] Improve HTTP redirect handling (#7094)
Aligns HTTP redirect handling with what browsers commonly do and RFC standards. 
Fixes issues afac4caa7d missed.
Authored by: coletdjnz
			
			
This commit is contained in:
		| @@ -1664,61 +1664,44 @@ class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler): | ||||
| 
 | ||||
|     The code is based on HTTPRedirectHandler implementation from CPython [1]. | ||||
| 
 | ||||
|     This redirect handler solves two issues: | ||||
|      - ensures redirect URL is always unicode under python 2 | ||||
|      - introduces support for experimental HTTP response status code | ||||
|        308 Permanent Redirect [2] used by some sites [3] | ||||
|     This redirect handler fixes and improves the logic to better align with RFC7261 | ||||
|      and what browsers tend to do [2][3] | ||||
| 
 | ||||
|     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py | ||||
|     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308 | ||||
|     3. https://github.com/ytdl-org/youtube-dl/issues/28768 | ||||
|     2. https://datatracker.ietf.org/doc/html/rfc7231 | ||||
|     3. https://github.com/python/cpython/issues/91306 | ||||
|     """ | ||||
| 
 | ||||
|     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302 | ||||
| 
 | ||||
|     def redirect_request(self, req, fp, code, msg, headers, newurl): | ||||
|         """Return a Request or None in response to a redirect. | ||||
| 
 | ||||
|         This is called by the http_error_30x methods when a | ||||
|         redirection response is received.  If a redirection should | ||||
|         take place, return a new Request to allow http_error_30x to | ||||
|         perform the redirect.  Otherwise, raise HTTPError if no-one | ||||
|         else should try to handle this url.  Return None if you can't | ||||
|         but another Handler might. | ||||
|         """ | ||||
|         m = req.get_method() | ||||
|         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD") | ||||
|                  or code in (301, 302, 303) and m == "POST")): | ||||
|         if code not in (301, 302, 303, 307, 308): | ||||
|             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp) | ||||
|         # Strictly (according to RFC 2616), 301 or 302 in response to | ||||
|         # a POST MUST NOT cause a redirection without confirmation | ||||
|         # from the user (of urllib.request, in this case).  In practice, | ||||
|         # essentially all clients do redirect in this case, so we do | ||||
|         # the same. | ||||
| 
 | ||||
|         # Be conciliant with URIs containing a space.  This is mainly | ||||
|         # redundant with the more complete encoding done in http_error_302(), | ||||
|         # but it is kept for compatibility with other callers. | ||||
|         newurl = newurl.replace(' ', '%20') | ||||
| 
 | ||||
|         CONTENT_HEADERS = ("content-length", "content-type") | ||||
|         # NB: don't use dict comprehension for python 2.6 compatibility | ||||
|         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS} | ||||
| 
 | ||||
|         new_method = req.get_method() | ||||
|         new_data = req.data | ||||
|         remove_headers = [] | ||||
|         # A 303 must either use GET or HEAD for subsequent request | ||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 | ||||
|         if code == 303 and m != 'HEAD': | ||||
|             m = 'GET' | ||||
|         if code == 303 and req.get_method() != 'HEAD': | ||||
|             new_method = 'GET' | ||||
|         # 301 and 302 redirects are commonly turned into a GET from a POST | ||||
|         # for subsequent requests by browsers, so we'll do the same. | ||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 | ||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 | ||||
|         if code in (301, 302) and m == 'POST': | ||||
|             m = 'GET' | ||||
|         elif code in (301, 302) and req.get_method() == 'POST': | ||||
|             new_method = 'GET' | ||||
| 
 | ||||
|         # only remove payload if method changed (e.g. POST to GET) | ||||
|         if new_method != req.get_method(): | ||||
|             new_data = None | ||||
|             remove_headers.extend(['Content-Length', 'Content-Type']) | ||||
| 
 | ||||
|         new_headers = {k: v for k, v in req.headers.items() if k.lower() not in remove_headers} | ||||
| 
 | ||||
|         return urllib.request.Request( | ||||
|             newurl, headers=newheaders, origin_req_host=req.origin_req_host, | ||||
|             unverifiable=True, method=m) | ||||
|             newurl, headers=new_headers, origin_req_host=req.origin_req_host, | ||||
|             unverifiable=True, method=new_method, data=new_data) | ||||
| 
 | ||||
| 
 | ||||
| def extract_timezone(date_str): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 coletdjnz
					coletdjnz