1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 17:08:32 +00:00
This commit is contained in:
Alan Xiao 2025-06-17 09:59:42 +02:00 committed by GitHub
commit 9ebdd82986
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 40 additions and 0 deletions

View File

@ -249,6 +249,14 @@ def test_sanitize_path(self):
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s')
self.assertEqual(sanitize_path('CON.opus'), 'CON_res.opus')
self.assertEqual(sanitize_path('abc\\CON\\def'), 'abc\\CON_res\\def')
self.assertEqual(sanitize_path('CON\\abc'), 'CON_res\\abc')
self.assertEqual(sanitize_path('CON.'), 'CON#')
self.assertEqual(sanitize_path('CON..'), 'CON_res.#')
self.assertEqual(sanitize_path('\\\\.\\CON'), '\\\\.\\CON')
self.assertEqual(sanitize_path('\\\\.\\CON\\abc'), '\\\\.\\CON_res\\abc')
# Check with nt._path_normpath if available
try:
from nt import _path_normpath as nt_path_normpath

View File

@ -167,6 +167,19 @@ def IDENTITY(x):
NUMBER_RE = r'\d+(?:\.\d+)?'
WINDOWS_RESERVED_NAMES = (
'CON', 'CONOUT$', 'CONIN$', 'PRN', 'AUX', 'NUL',
*tuple(f'{name:s}{num:d}' for name, num in itertools.product(('COM', 'LPT'), range(10))),
*tuple(
f'{name:s}{ssd:s}'
for name, ssd in itertools.product(
('COM', 'LPT'),
('\N{SUPERSCRIPT ONE}', '\N{SUPERSCRIPT TWO}', '\N{SUPERSCRIPT THREE}'),
)
),
)
WINDOWS_RESERVED_NAMES_RE = fr'({"|".join(WINDOWS_RESERVED_NAMES)})'
@functools.cache
def preferredencoding():
@ -679,6 +692,19 @@ def replace_insane(char):
return result
def _sanitize_windows_reserved_names(s):
# Append _res to invalid path names
# in order to maintain easy recognizability
# when a user accidentally writes to device files
# - CON.opus => CON_res.opus
def suffix_sanitize(match):
other = match.group(3) if match.group(3) else ''
if not match.group(2) and other:
return match.group(1) + other
return match.group(1) + '_res' + match.group(2) + other # suffix the reserved portion only
return re.sub(fr'{WINDOWS_RESERVED_NAMES_RE}(\.*)(.*$)', suffix_sanitize, s)
def _sanitize_path_parts(parts):
sanitized_parts = []
for part in parts:
@ -694,6 +720,7 @@ def _sanitize_path_parts(parts):
# - trailing dots and spaces (`asdf...` => `asdf..#`)
# - invalid chars (`<>` => `##`)
sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part)
sanitized_part = _sanitize_windows_reserved_names(sanitized_part)
sanitized_parts.append(sanitized_part)
return sanitized_parts
@ -713,6 +740,11 @@ def sanitize_path(s, force=False):
if normed.startswith('\\\\'):
# UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`)
parts = normed.split('\\')
# allow user to write to explicitly declared legacy devices
if len(parts) == 4 and re.fullmatch(WINDOWS_RESERVED_NAMES_RE, parts[3]):
return '\\'.join(parts[:4])
# sanitize legacy name device otherwise
parts[3] = _sanitize_windows_reserved_names(parts[3])
root = '\\'.join(parts[:4]) + '\\'
parts = parts[4:]
elif normed[1:2] == ':':