mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-21 07:55:56 +00:00
[cookies] Ignore cookies with control characters (#15862)
http.cookies.Morsel was patched in Python 3.14.3 and 3.13.12 to raise a CookieError if the cookie name, value or any attribute of its input contains a control character. yt_dlp.cookies.LenientSimpleCookie now preemptively discards any cookies containing control characters, which is consistent with its more lenient parsing. Ref: https://github.com/python/cpython/issues/143919 Closes #15849 Authored by: bashonly, syphyr Co-authored-by: syphyr <syphyr@gmail.com>
This commit is contained in:
@@ -205,8 +205,8 @@ class TestLenientSimpleCookie(unittest.TestCase):
|
|||||||
),
|
),
|
||||||
(
|
(
|
||||||
'Test quoted cookie',
|
'Test quoted cookie',
|
||||||
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
|
'keebler="E=mc2; L=\\"Loves\\"; fudge=;"',
|
||||||
{'keebler': 'E=mc2; L="Loves"; fudge=\012;'},
|
{'keebler': 'E=mc2; L="Loves"; fudge=;'},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"Allow '=' in an unquoted value",
|
"Allow '=' in an unquoted value",
|
||||||
@@ -328,4 +328,30 @@ class TestLenientSimpleCookie(unittest.TestCase):
|
|||||||
'Key=Value; [Invalid]=Value; Another=Value',
|
'Key=Value; [Invalid]=Value; Another=Value',
|
||||||
{'Key': 'Value', 'Another': 'Value'},
|
{'Key': 'Value', 'Another': 'Value'},
|
||||||
),
|
),
|
||||||
|
# Ref: https://github.com/python/cpython/issues/143919
|
||||||
|
(
|
||||||
|
'Test invalid cookie name w/ control character',
|
||||||
|
'foo\012=bar;',
|
||||||
|
{},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'Test invalid cookie name w/ control character 2',
|
||||||
|
'foo\015baz=bar',
|
||||||
|
{},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'Test invalid cookie name w/ control character followed by valid cookie',
|
||||||
|
'foo\015=bar; x=y;',
|
||||||
|
{'x': 'y'},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'Test invalid cookie value w/ control character',
|
||||||
|
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
|
||||||
|
{},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'Test invalid quoted attribute value w/ control character',
|
||||||
|
'Customer="WILE_E_COYOTE"; Version="1\\012"; Path="/acme"',
|
||||||
|
{},
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1168,6 +1168,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
|||||||
# We use Morsel's legal key chars to avoid errors on setting values
|
# We use Morsel's legal key chars to avoid errors on setting values
|
||||||
_LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
|
_LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
|
||||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
|
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
|
||||||
|
_LEGAL_KEY_RE = re.compile(rf'[{_LEGAL_KEY_CHARS}]+', re.ASCII)
|
||||||
|
|
||||||
_RESERVED = {
|
_RESERVED = {
|
||||||
'expires',
|
'expires',
|
||||||
@@ -1185,17 +1186,17 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
|||||||
|
|
||||||
# Added 'bad' group to catch the remaining value
|
# Added 'bad' group to catch the remaining value
|
||||||
_COOKIE_PATTERN = re.compile(r'''
|
_COOKIE_PATTERN = re.compile(r'''
|
||||||
\s* # Optional whitespace at start of cookie
|
[ ]* # Optional whitespace at start of cookie
|
||||||
(?P<key> # Start of group 'key'
|
(?P<key> # Start of group 'key'
|
||||||
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
|
[^ =;]+ # Match almost anything here for now and validate later
|
||||||
) # End of group 'key'
|
) # End of group 'key'
|
||||||
( # Optional group: there may not be a value.
|
( # Optional group: there may not be a value.
|
||||||
\s*=\s* # Equal Sign
|
[ ]*=[ ]* # Equal Sign
|
||||||
( # Start of potential value
|
( # Start of potential value
|
||||||
(?P<val> # Start of group 'val'
|
(?P<val> # Start of group 'val'
|
||||||
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
||||||
| # or
|
| # or
|
||||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
\w{3},\ [\w\d -]{9,11}\ [\d:]{8}\ GMT # Special case for "expires" attr
|
||||||
| # or
|
| # or
|
||||||
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
|
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
|
||||||
) # End of group 'val'
|
) # End of group 'val'
|
||||||
@@ -1203,10 +1204,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
|||||||
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
||||||
) # End of potential value
|
) # End of potential value
|
||||||
)? # End of optional value group
|
)? # End of optional value group
|
||||||
\s* # Any number of spaces.
|
[ ]* # Any number of spaces.
|
||||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
([ ]+|;|$) # Ending either at space, semicolon, or EOS.
|
||||||
''', re.ASCII | re.VERBOSE)
|
''', re.ASCII | re.VERBOSE)
|
||||||
|
|
||||||
|
# http.cookies.Morsel raises on values w/ control characters in Python 3.14.3+ & 3.13.12+
|
||||||
|
# Ref: https://github.com/python/cpython/issues/143919
|
||||||
|
_CONTROL_CHARACTER_RE = re.compile(r'[\x00-\x1F\x7F]')
|
||||||
|
|
||||||
def load(self, data):
|
def load(self, data):
|
||||||
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
||||||
if not isinstance(data, str):
|
if not isinstance(data, str):
|
||||||
@@ -1219,6 +1224,9 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
key, value = match.group('key', 'val')
|
key, value = match.group('key', 'val')
|
||||||
|
if not self._LEGAL_KEY_RE.fullmatch(key):
|
||||||
|
morsel = None
|
||||||
|
continue
|
||||||
|
|
||||||
is_attribute = False
|
is_attribute = False
|
||||||
if key.startswith('$'):
|
if key.startswith('$'):
|
||||||
@@ -1237,6 +1245,14 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
|||||||
value = True
|
value = True
|
||||||
else:
|
else:
|
||||||
value, _ = self.value_decode(value)
|
value, _ = self.value_decode(value)
|
||||||
|
# Guard against control characters in quoted attribute values
|
||||||
|
if self._CONTROL_CHARACTER_RE.search(value):
|
||||||
|
# While discarding the entire morsel is not very lenient,
|
||||||
|
# it's better than http.cookies.Morsel raising a CookieError
|
||||||
|
# and it's probably better to err on the side of caution
|
||||||
|
self.pop(morsel.key, None)
|
||||||
|
morsel = None
|
||||||
|
continue
|
||||||
|
|
||||||
morsel[key] = value
|
morsel[key] = value
|
||||||
|
|
||||||
@@ -1246,6 +1262,10 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
|||||||
elif value is not None:
|
elif value is not None:
|
||||||
morsel = self.get(key, http.cookies.Morsel())
|
morsel = self.get(key, http.cookies.Morsel())
|
||||||
real_value, coded_value = self.value_decode(value)
|
real_value, coded_value = self.value_decode(value)
|
||||||
|
# Guard against control characters in quoted cookie values
|
||||||
|
if self._CONTROL_CHARACTER_RE.search(real_value):
|
||||||
|
morsel = None
|
||||||
|
continue
|
||||||
morsel.set(key, real_value, coded_value)
|
morsel.set(key, real_value, coded_value)
|
||||||
self[key] = morsel
|
self[key] = morsel
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user