diff --git a/test/test_utils.py b/test/test_utils.py index aedb565ec..2efac25d8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -249,6 +249,14 @@ def test_sanitize_path(self): self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s') + self.assertEqual(sanitize_path('CON.opus'), 'CON_res.opus') + self.assertEqual(sanitize_path('abc\\CON\\def'), 'abc\\CON_res\\def') + self.assertEqual(sanitize_path('CON\\abc'), 'CON_res\\abc') + self.assertEqual(sanitize_path('CON.'), 'CON#') + self.assertEqual(sanitize_path('CON..'), 'CON_res.#') + self.assertEqual(sanitize_path('\\\\.\\CON'), '\\\\.\\CON') + self.assertEqual(sanitize_path('\\\\.\\CON\\abc'), '\\\\.\\CON_res\\abc') + # Check with nt._path_normpath if available try: from nt import _path_normpath as nt_path_normpath diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 20aa341ca..e0944f015 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -167,6 +167,19 @@ def IDENTITY(x): NUMBER_RE = r'\d+(?:\.\d+)?' +WINDOWS_RESERVED_NAMES = ( + 'CON', 'CONOUT$', 'CONIN$', 'PRN', 'AUX', 'NUL', + *tuple(f'{name:s}{num:d}' for name, num in itertools.product(('COM', 'LPT'), range(10))), + *tuple( + f'{name:s}{ssd:s}' + for name, ssd in itertools.product( + ('COM', 'LPT'), + ('\N{SUPERSCRIPT ONE}', '\N{SUPERSCRIPT TWO}', '\N{SUPERSCRIPT THREE}'), + ) + ), +) +WINDOWS_RESERVED_NAMES_RE = fr'({"|".join(WINDOWS_RESERVED_NAMES)})' + @functools.cache def preferredencoding(): @@ -679,6 +692,19 @@ def replace_insane(char): return result +def _sanitize_windows_reserved_names(s): + # Append _res to invalid path names + # in order to maintain easy recognizability + # when a user accidentally writes to device files + # - CON.opus => CON_res.opus + def suffix_sanitize(match): + other = match.group(3) if match.group(3) else '' + if not match.group(2) and other: + return match.group(1) + other + return match.group(1) + '_res' + match.group(2) + other # suffix the reserved portion only + return re.sub(fr'{WINDOWS_RESERVED_NAMES_RE}(\.*)(.*$)', suffix_sanitize, s) + + def _sanitize_path_parts(parts): sanitized_parts = [] for part in parts: @@ -694,6 +720,7 @@ def _sanitize_path_parts(parts): # - trailing dots and spaces (`asdf...` => `asdf..#`) # - invalid chars (`<>` => `##`) sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part) + sanitized_part = _sanitize_windows_reserved_names(sanitized_part) sanitized_parts.append(sanitized_part) return sanitized_parts @@ -713,6 +740,11 @@ def sanitize_path(s, force=False): if normed.startswith('\\\\'): # UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`) parts = normed.split('\\') + # allow user to write to explicitly declared legacy devices + if len(parts) == 4 and re.fullmatch(WINDOWS_RESERVED_NAMES_RE, parts[3]): + return '\\'.join(parts[:4]) + # sanitize legacy name device otherwise + parts[3] = _sanitize_windows_reserved_names(parts[3]) root = '\\'.join(parts[:4]) + '\\' parts = parts[4:] elif normed[1:2] == ':':