mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-07-22 05:08:30 +00:00
Update _utils.py
Add sanitation for Windows legacy devices
This commit is contained in:
parent
2381881fe5
commit
968d8e3613
@ -167,6 +167,18 @@ def IDENTITY(x):
|
|||||||
|
|
||||||
NUMBER_RE = r'\d+(?:\.\d+)?'
|
NUMBER_RE = r'\d+(?:\.\d+)?'
|
||||||
|
|
||||||
|
WINDOWS_RESERVED_NAMES_RE = fr'({'|'.join(
|
||||||
|
("CON", "PRN", "AUX", "CLOCK$", "NUL")
|
||||||
|
+ tuple(f"{name:s}{num:d}" for name, num in itertools.product(("COM", "LPT"), range(0, 10)))
|
||||||
|
+ tuple(
|
||||||
|
f"{name:s}{ssd:s}"
|
||||||
|
for name, ssd in itertools.product(
|
||||||
|
("COM", "LPT"),
|
||||||
|
("\N{SUPERSCRIPT ONE}", "\N{SUPERSCRIPT TWO}", "\N{SUPERSCRIPT THREE}"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)})'
|
||||||
|
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
def preferredencoding():
|
def preferredencoding():
|
||||||
@ -679,6 +691,19 @@ def replace_insane(char):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_windows_reserved_names(s):
|
||||||
|
# Append _res to invalid path names
|
||||||
|
# in order to maintain easy recognizability
|
||||||
|
# when a user accidentally writes to device files
|
||||||
|
# - CON.opus => CON_res.opus
|
||||||
|
def suffix_sanitize(match):
|
||||||
|
other = match.group(3) if match.group(3) else ''
|
||||||
|
if not match.group(2) and other:
|
||||||
|
return match.group(1) + other
|
||||||
|
return match.group(1) + '_res' + match.group(2) + other # suffix the reserved portion only
|
||||||
|
return re.sub(fr'{WINDOWS_RESERVED_NAMES_RE}(\.*)(.*$)', suffix_sanitize, s)
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_path_parts(parts):
|
def _sanitize_path_parts(parts):
|
||||||
sanitized_parts = []
|
sanitized_parts = []
|
||||||
for part in parts:
|
for part in parts:
|
||||||
@ -694,6 +719,7 @@ def _sanitize_path_parts(parts):
|
|||||||
# - trailing dots and spaces (`asdf...` => `asdf..#`)
|
# - trailing dots and spaces (`asdf...` => `asdf..#`)
|
||||||
# - invalid chars (`<>` => `##`)
|
# - invalid chars (`<>` => `##`)
|
||||||
sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part)
|
sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part)
|
||||||
|
sanitized_part = _sanitize_windows_reserved_names(sanitized_part)
|
||||||
sanitized_parts.append(sanitized_part)
|
sanitized_parts.append(sanitized_part)
|
||||||
|
|
||||||
return sanitized_parts
|
return sanitized_parts
|
||||||
@ -713,6 +739,11 @@ def sanitize_path(s, force=False):
|
|||||||
if normed.startswith('\\\\'):
|
if normed.startswith('\\\\'):
|
||||||
# UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`)
|
# UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`)
|
||||||
parts = normed.split('\\')
|
parts = normed.split('\\')
|
||||||
|
# allow user to write to explicitly declared legacy devices
|
||||||
|
if len(parts) == 4 and re.fullmatch(WINDOWS_RESERVED_NAMES_RE, parts[3]):
|
||||||
|
return '\\'.join(parts[:4])
|
||||||
|
# sanitize legacy name device otherwise
|
||||||
|
parts[3] = _sanitize_windows_reserved_names(parts[3])
|
||||||
root = '\\'.join(parts[:4]) + '\\'
|
root = '\\'.join(parts[:4]) + '\\'
|
||||||
parts = parts[4:]
|
parts = parts[4:]
|
||||||
elif normed[1:2] == ':':
|
elif normed[1:2] == ':':
|
||||||
|
Loading…
Reference in New Issue
Block a user