1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-10-29 21:57:50 +00:00

[networking] Remove dot segments during URL normalization (#7662)

This implements RFC3986 5.2.4 remove_dot_segments during the URL normalization process.

Closes #3355, #6526

Authored by: coletdjnz
This commit is contained in:
coletdjnz
2023-07-29 10:40:20 +12:00
committed by GitHub
parent a15fcd299e
commit 4bf912282a
8 changed files with 104 additions and 36 deletions

View File

@@ -41,7 +41,8 @@ from .exceptions import (
from ..dependencies import brotli
from ..socks import ProxyError as SocksProxyError
from ..socks import sockssocket
from ..utils import escape_url, update_url_query
from ..utils import update_url_query
from ..utils.networking import normalize_url
SUPPORTED_ENCODINGS = ['gzip', 'deflate']
CONTENT_DECODE_ERRORS = [zlib.error, OSError]
@@ -179,7 +180,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler):
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
# the code of this workaround has been moved here from YoutubeDL.urlopen()
url = req.get_full_url()
url_escaped = escape_url(url)
url_escaped = normalize_url(url)
# Substitute URL if any change after escaping
if url != url_escaped:
@@ -212,7 +213,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler):
if location:
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
location = location.encode('iso-8859-1').decode()
location_escaped = escape_url(location)
location_escaped = normalize_url(location)
if location != location_escaped:
del resp.headers['Location']
resp.headers['Location'] = location_escaped