1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-17 10:48:29 +00:00

Update _utils.py

Add sanitation for Windows legacy devices
This commit is contained in:
Alan Xiao 2025-04-23 19:35:51 -04:00 committed by GitHub
parent 2381881fe5
commit 968d8e3613
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -167,6 +167,18 @@ def IDENTITY(x):
NUMBER_RE = r'\d+(?:\.\d+)?'
WINDOWS_RESERVED_NAMES_RE = fr'({'|'.join(
("CON", "PRN", "AUX", "CLOCK$", "NUL")
+ tuple(f"{name:s}{num:d}" for name, num in itertools.product(("COM", "LPT"), range(0, 10)))
+ tuple(
f"{name:s}{ssd:s}"
for name, ssd in itertools.product(
("COM", "LPT"),
("\N{SUPERSCRIPT ONE}", "\N{SUPERSCRIPT TWO}", "\N{SUPERSCRIPT THREE}"),
)
)
)})'
@functools.cache
def preferredencoding():
@ -679,6 +691,19 @@ def replace_insane(char):
return result
def _sanitize_windows_reserved_names(s):
# Append _res to invalid path names
# in order to maintain easy recognizability
# when a user accidentally writes to device files
# - CON.opus => CON_res.opus
def suffix_sanitize(match):
other = match.group(3) if match.group(3) else ''
if not match.group(2) and other:
return match.group(1) + other
return match.group(1) + '_res' + match.group(2) + other # suffix the reserved portion only
return re.sub(fr'{WINDOWS_RESERVED_NAMES_RE}(\.*)(.*$)', suffix_sanitize, s)
def _sanitize_path_parts(parts):
sanitized_parts = []
for part in parts:
@ -694,6 +719,7 @@ def _sanitize_path_parts(parts):
# - trailing dots and spaces (`asdf...` => `asdf..#`)
# - invalid chars (`<>` => `##`)
sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part)
sanitized_part = _sanitize_windows_reserved_names(sanitized_part)
sanitized_parts.append(sanitized_part)
return sanitized_parts
@ -713,6 +739,11 @@ def sanitize_path(s, force=False):
if normed.startswith('\\\\'):
# UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`)
parts = normed.split('\\')
# allow user to write to explicitly declared legacy devices
if len(parts) == 4 and re.fullmatch(WINDOWS_RESERVED_NAMES_RE, parts[3]):
return '\\'.join(parts[:4])
# sanitize legacy name device otherwise
parts[3] = _sanitize_windows_reserved_names(parts[3])
root = '\\'.join(parts[:4]) + '\\'
parts = parts[4:]
elif normed[1:2] == ':':