mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[cookies] Move YoutubeDLCookieJar to cookies module (#7091)
				
					
				
			Authored by: coletdjnz
This commit is contained in:
		| @@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||||||
| import re | import re | ||||||
| import tempfile | import tempfile | ||||||
| 
 | 
 | ||||||
| from yt_dlp.utils import YoutubeDLCookieJar | from yt_dlp.cookies import YoutubeDLCookieJar | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TestYoutubeDLCookieJar(unittest.TestCase): | class TestYoutubeDLCookieJar(unittest.TestCase): | ||||||
| @@ -47,6 +47,12 @@ class TestYoutubeDLCookieJar(unittest.TestCase): | |||||||
|         # will be ignored |         # will be ignored | ||||||
|         self.assertFalse(cookiejar._cookies) |         self.assertFalse(cookiejar._cookies) | ||||||
| 
 | 
 | ||||||
|  |     def test_get_cookie_header(self): | ||||||
|  |         cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') | ||||||
|  |         cookiejar.load(ignore_discard=True, ignore_expires=True) | ||||||
|  |         header = cookiejar.get_cookie_header('https://www.foobar.foobar') | ||||||
|  |         self.assertIn('HTTPONLY_COOKIE', header) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -2404,7 +2404,7 @@ class YoutubeDL: | |||||||
|         if 'Youtubedl-No-Compression' in res:  # deprecated |         if 'Youtubedl-No-Compression' in res:  # deprecated | ||||||
|             res.pop('Youtubedl-No-Compression', None) |             res.pop('Youtubedl-No-Compression', None) | ||||||
|             res['Accept-Encoding'] = 'identity' |             res['Accept-Encoding'] = 'identity' | ||||||
|         cookies = self._calc_cookies(info_dict['url']) |         cookies = self.cookiejar.get_cookie_header(info_dict['url']) | ||||||
|         if cookies: |         if cookies: | ||||||
|             res['Cookie'] = cookies |             res['Cookie'] = cookies | ||||||
| 
 | 
 | ||||||
| @@ -2416,9 +2416,8 @@ class YoutubeDL: | |||||||
|         return res |         return res | ||||||
| 
 | 
 | ||||||
|     def _calc_cookies(self, url): |     def _calc_cookies(self, url): | ||||||
|         pr = sanitized_Request(url) |         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version') | ||||||
|         self.cookiejar.add_cookie_header(pr) |         return self.cookiejar.get_cookie_header(url) | ||||||
|         return pr.get_header('Cookie') |  | ||||||
| 
 | 
 | ||||||
|     def _sort_thumbnails(self, thumbnails): |     def _sort_thumbnails(self, thumbnails): | ||||||
|         thumbnails.sort(key=lambda t: ( |         thumbnails.sort(key=lambda t: ( | ||||||
|   | |||||||
| @@ -1,7 +1,9 @@ | |||||||
| import base64 | import base64 | ||||||
|  | import collections | ||||||
| import contextlib | import contextlib | ||||||
| import http.cookiejar | import http.cookiejar | ||||||
| import http.cookies | import http.cookies | ||||||
|  | import io | ||||||
| import json | import json | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| @@ -11,6 +13,7 @@ import subprocess | |||||||
| import sys | import sys | ||||||
| import tempfile | import tempfile | ||||||
| import time | import time | ||||||
|  | import urllib.request | ||||||
| from datetime import datetime, timedelta, timezone | from datetime import datetime, timedelta, timezone | ||||||
| from enum import Enum, auto | from enum import Enum, auto | ||||||
| from hashlib import pbkdf2_hmac | from hashlib import pbkdf2_hmac | ||||||
| @@ -29,11 +32,14 @@ from .dependencies import ( | |||||||
| from .minicurses import MultilinePrinter, QuietMultilinePrinter | from .minicurses import MultilinePrinter, QuietMultilinePrinter | ||||||
| from .utils import ( | from .utils import ( | ||||||
|     Popen, |     Popen, | ||||||
|     YoutubeDLCookieJar, |  | ||||||
|     error_to_str, |     error_to_str, | ||||||
|  |     escape_url, | ||||||
|     expand_path, |     expand_path, | ||||||
|     is_path_like, |     is_path_like, | ||||||
|  |     sanitize_url, | ||||||
|  |     str_or_none, | ||||||
|     try_call, |     try_call, | ||||||
|  |     write_string, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} | CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} | ||||||
| @@ -1091,3 +1097,139 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | |||||||
| 
 | 
 | ||||||
|             else: |             else: | ||||||
|                 morsel = None |                 morsel = None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): | ||||||
|  |     """ | ||||||
|  |     See [1] for cookie file format. | ||||||
|  | 
 | ||||||
|  |     1. https://curl.haxx.se/docs/http-cookies.html | ||||||
|  |     """ | ||||||
|  |     _HTTPONLY_PREFIX = '#HttpOnly_' | ||||||
|  |     _ENTRY_LEN = 7 | ||||||
|  |     _HEADER = '''# Netscape HTTP Cookie File | ||||||
|  | # This file is generated by yt-dlp.  Do not edit. | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  |     _CookieFileEntry = collections.namedtuple( | ||||||
|  |         'CookieFileEntry', | ||||||
|  |         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) | ||||||
|  | 
 | ||||||
|  |     def __init__(self, filename=None, *args, **kwargs): | ||||||
|  |         super().__init__(None, *args, **kwargs) | ||||||
|  |         if is_path_like(filename): | ||||||
|  |             filename = os.fspath(filename) | ||||||
|  |         self.filename = filename | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def _true_or_false(cndn): | ||||||
|  |         return 'TRUE' if cndn else 'FALSE' | ||||||
|  | 
 | ||||||
|  |     @contextlib.contextmanager | ||||||
|  |     def open(self, file, *, write=False): | ||||||
|  |         if is_path_like(file): | ||||||
|  |             with open(file, 'w' if write else 'r', encoding='utf-8') as f: | ||||||
|  |                 yield f | ||||||
|  |         else: | ||||||
|  |             if write: | ||||||
|  |                 file.truncate(0) | ||||||
|  |             yield file | ||||||
|  | 
 | ||||||
|  |     def _really_save(self, f, ignore_discard=False, ignore_expires=False): | ||||||
|  |         now = time.time() | ||||||
|  |         for cookie in self: | ||||||
|  |             if (not ignore_discard and cookie.discard | ||||||
|  |                     or not ignore_expires and cookie.is_expired(now)): | ||||||
|  |                 continue | ||||||
|  |             name, value = cookie.name, cookie.value | ||||||
|  |             if value is None: | ||||||
|  |                 # cookies.txt regards 'Set-Cookie: foo' as a cookie | ||||||
|  |                 # with no name, whereas http.cookiejar regards it as a | ||||||
|  |                 # cookie with no value. | ||||||
|  |                 name, value = '', name | ||||||
|  |             f.write('%s\n' % '\t'.join(( | ||||||
|  |                 cookie.domain, | ||||||
|  |                 self._true_or_false(cookie.domain.startswith('.')), | ||||||
|  |                 cookie.path, | ||||||
|  |                 self._true_or_false(cookie.secure), | ||||||
|  |                 str_or_none(cookie.expires, default=''), | ||||||
|  |                 name, value | ||||||
|  |             ))) | ||||||
|  | 
 | ||||||
|  |     def save(self, filename=None, *args, **kwargs): | ||||||
|  |         """ | ||||||
|  |         Save cookies to a file. | ||||||
|  |         Code is taken from CPython 3.6 | ||||||
|  |         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """ | ||||||
|  | 
 | ||||||
|  |         if filename is None: | ||||||
|  |             if self.filename is not None: | ||||||
|  |                 filename = self.filename | ||||||
|  |             else: | ||||||
|  |                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) | ||||||
|  | 
 | ||||||
|  |         # Store session cookies with `expires` set to 0 instead of an empty string | ||||||
|  |         for cookie in self: | ||||||
|  |             if cookie.expires is None: | ||||||
|  |                 cookie.expires = 0 | ||||||
|  | 
 | ||||||
|  |         with self.open(filename, write=True) as f: | ||||||
|  |             f.write(self._HEADER) | ||||||
|  |             self._really_save(f, *args, **kwargs) | ||||||
|  | 
 | ||||||
|  |     def load(self, filename=None, ignore_discard=False, ignore_expires=False): | ||||||
|  |         """Load cookies from a file.""" | ||||||
|  |         if filename is None: | ||||||
|  |             if self.filename is not None: | ||||||
|  |                 filename = self.filename | ||||||
|  |             else: | ||||||
|  |                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) | ||||||
|  | 
 | ||||||
|  |         def prepare_line(line): | ||||||
|  |             if line.startswith(self._HTTPONLY_PREFIX): | ||||||
|  |                 line = line[len(self._HTTPONLY_PREFIX):] | ||||||
|  |             # comments and empty lines are fine | ||||||
|  |             if line.startswith('#') or not line.strip(): | ||||||
|  |                 return line | ||||||
|  |             cookie_list = line.split('\t') | ||||||
|  |             if len(cookie_list) != self._ENTRY_LEN: | ||||||
|  |                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) | ||||||
|  |             cookie = self._CookieFileEntry(*cookie_list) | ||||||
|  |             if cookie.expires_at and not cookie.expires_at.isdigit(): | ||||||
|  |                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) | ||||||
|  |             return line | ||||||
|  | 
 | ||||||
|  |         cf = io.StringIO() | ||||||
|  |         with self.open(filename) as f: | ||||||
|  |             for line in f: | ||||||
|  |                 try: | ||||||
|  |                     cf.write(prepare_line(line)) | ||||||
|  |                 except http.cookiejar.LoadError as e: | ||||||
|  |                     if f'{line.strip()} '[0] in '[{"': | ||||||
|  |                         raise http.cookiejar.LoadError( | ||||||
|  |                             'Cookies file must be Netscape formatted, not JSON. See  ' | ||||||
|  |                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp') | ||||||
|  |                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') | ||||||
|  |                     continue | ||||||
|  |         cf.seek(0) | ||||||
|  |         self._really_load(cf, filename, ignore_discard, ignore_expires) | ||||||
|  |         # Session cookies are denoted by either `expires` field set to | ||||||
|  |         # an empty string or 0. MozillaCookieJar only recognizes the former | ||||||
|  |         # (see [1]). So we need force the latter to be recognized as session | ||||||
|  |         # cookies on our own. | ||||||
|  |         # Session cookies may be important for cookies-based authentication, | ||||||
|  |         # e.g. usually, when user does not check 'Remember me' check box while | ||||||
|  |         # logging in on a site, some important cookies are stored as session | ||||||
|  |         # cookies so that not recognizing them will result in failed login. | ||||||
|  |         # 1. https://bugs.python.org/issue17164 | ||||||
|  |         for cookie in self: | ||||||
|  |             # Treat `expires=0` cookies as session cookies | ||||||
|  |             if cookie.expires == 0: | ||||||
|  |                 cookie.expires = None | ||||||
|  |                 cookie.discard = True | ||||||
|  | 
 | ||||||
|  |     def get_cookie_header(self, url): | ||||||
|  |         """Generate a Cookie HTTP header for a given url""" | ||||||
|  |         cookie_req = urllib.request.Request(escape_url(sanitize_url(url))) | ||||||
|  |         self.add_cookie_header(cookie_req) | ||||||
|  |         return cookie_req.get_header('Cookie') | ||||||
|   | |||||||
| @@ -3444,7 +3444,7 @@ class InfoExtractor: | |||||||
| 
 | 
 | ||||||
|     def _get_cookies(self, url): |     def _get_cookies(self, url): | ||||||
|         """ Return a http.cookies.SimpleCookie with the cookies for the url """ |         """ Return a http.cookies.SimpleCookie with the cookies for the url """ | ||||||
|         return LenientSimpleCookie(self._downloader._calc_cookies(url)) |         return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url)) | ||||||
| 
 | 
 | ||||||
|     def _apply_first_set_cookie_header(self, url_handle, cookie): |     def _apply_first_set_cookie_header(self, url_handle, cookie): | ||||||
|         """ |         """ | ||||||
|   | |||||||
| @@ -10,6 +10,9 @@ from ._utils import decode_base_n, preferredencoding | |||||||
| from .traversal import traverse_obj | from .traversal import traverse_obj | ||||||
| from ..dependencies import certifi, websockets | from ..dependencies import certifi, websockets | ||||||
| 
 | 
 | ||||||
|  | # isort: split | ||||||
|  | from ..cookies import YoutubeDLCookieJar  # noqa: F401 | ||||||
|  | 
 | ||||||
| has_certifi = bool(certifi) | has_certifi = bool(certifi) | ||||||
| has_websockets = bool(websockets) | has_websockets = bool(websockets) | ||||||
| 
 | 
 | ||||||
|   | |||||||
| @@ -1518,136 +1518,6 @@ def is_path_like(f): | |||||||
|     return isinstance(f, (str, bytes, os.PathLike)) |     return isinstance(f, (str, bytes, os.PathLike)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): |  | ||||||
|     """ |  | ||||||
|     See [1] for cookie file format. |  | ||||||
| 
 |  | ||||||
|     1. https://curl.haxx.se/docs/http-cookies.html |  | ||||||
|     """ |  | ||||||
|     _HTTPONLY_PREFIX = '#HttpOnly_' |  | ||||||
|     _ENTRY_LEN = 7 |  | ||||||
|     _HEADER = '''# Netscape HTTP Cookie File |  | ||||||
| # This file is generated by yt-dlp.  Do not edit. |  | ||||||
| 
 |  | ||||||
| ''' |  | ||||||
|     _CookieFileEntry = collections.namedtuple( |  | ||||||
|         'CookieFileEntry', |  | ||||||
|         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) |  | ||||||
| 
 |  | ||||||
|     def __init__(self, filename=None, *args, **kwargs): |  | ||||||
|         super().__init__(None, *args, **kwargs) |  | ||||||
|         if is_path_like(filename): |  | ||||||
|             filename = os.fspath(filename) |  | ||||||
|         self.filename = filename |  | ||||||
| 
 |  | ||||||
|     @staticmethod |  | ||||||
|     def _true_or_false(cndn): |  | ||||||
|         return 'TRUE' if cndn else 'FALSE' |  | ||||||
| 
 |  | ||||||
|     @contextlib.contextmanager |  | ||||||
|     def open(self, file, *, write=False): |  | ||||||
|         if is_path_like(file): |  | ||||||
|             with open(file, 'w' if write else 'r', encoding='utf-8') as f: |  | ||||||
|                 yield f |  | ||||||
|         else: |  | ||||||
|             if write: |  | ||||||
|                 file.truncate(0) |  | ||||||
|             yield file |  | ||||||
| 
 |  | ||||||
|     def _really_save(self, f, ignore_discard=False, ignore_expires=False): |  | ||||||
|         now = time.time() |  | ||||||
|         for cookie in self: |  | ||||||
|             if (not ignore_discard and cookie.discard |  | ||||||
|                     or not ignore_expires and cookie.is_expired(now)): |  | ||||||
|                 continue |  | ||||||
|             name, value = cookie.name, cookie.value |  | ||||||
|             if value is None: |  | ||||||
|                 # cookies.txt regards 'Set-Cookie: foo' as a cookie |  | ||||||
|                 # with no name, whereas http.cookiejar regards it as a |  | ||||||
|                 # cookie with no value. |  | ||||||
|                 name, value = '', name |  | ||||||
|             f.write('%s\n' % '\t'.join(( |  | ||||||
|                 cookie.domain, |  | ||||||
|                 self._true_or_false(cookie.domain.startswith('.')), |  | ||||||
|                 cookie.path, |  | ||||||
|                 self._true_or_false(cookie.secure), |  | ||||||
|                 str_or_none(cookie.expires, default=''), |  | ||||||
|                 name, value |  | ||||||
|             ))) |  | ||||||
| 
 |  | ||||||
|     def save(self, filename=None, *args, **kwargs): |  | ||||||
|         """ |  | ||||||
|         Save cookies to a file. |  | ||||||
|         Code is taken from CPython 3.6 |  | ||||||
|         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """ |  | ||||||
| 
 |  | ||||||
|         if filename is None: |  | ||||||
|             if self.filename is not None: |  | ||||||
|                 filename = self.filename |  | ||||||
|             else: |  | ||||||
|                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) |  | ||||||
| 
 |  | ||||||
|         # Store session cookies with `expires` set to 0 instead of an empty string |  | ||||||
|         for cookie in self: |  | ||||||
|             if cookie.expires is None: |  | ||||||
|                 cookie.expires = 0 |  | ||||||
| 
 |  | ||||||
|         with self.open(filename, write=True) as f: |  | ||||||
|             f.write(self._HEADER) |  | ||||||
|             self._really_save(f, *args, **kwargs) |  | ||||||
| 
 |  | ||||||
|     def load(self, filename=None, ignore_discard=False, ignore_expires=False): |  | ||||||
|         """Load cookies from a file.""" |  | ||||||
|         if filename is None: |  | ||||||
|             if self.filename is not None: |  | ||||||
|                 filename = self.filename |  | ||||||
|             else: |  | ||||||
|                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) |  | ||||||
| 
 |  | ||||||
|         def prepare_line(line): |  | ||||||
|             if line.startswith(self._HTTPONLY_PREFIX): |  | ||||||
|                 line = line[len(self._HTTPONLY_PREFIX):] |  | ||||||
|             # comments and empty lines are fine |  | ||||||
|             if line.startswith('#') or not line.strip(): |  | ||||||
|                 return line |  | ||||||
|             cookie_list = line.split('\t') |  | ||||||
|             if len(cookie_list) != self._ENTRY_LEN: |  | ||||||
|                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) |  | ||||||
|             cookie = self._CookieFileEntry(*cookie_list) |  | ||||||
|             if cookie.expires_at and not cookie.expires_at.isdigit(): |  | ||||||
|                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) |  | ||||||
|             return line |  | ||||||
| 
 |  | ||||||
|         cf = io.StringIO() |  | ||||||
|         with self.open(filename) as f: |  | ||||||
|             for line in f: |  | ||||||
|                 try: |  | ||||||
|                     cf.write(prepare_line(line)) |  | ||||||
|                 except http.cookiejar.LoadError as e: |  | ||||||
|                     if f'{line.strip()} '[0] in '[{"': |  | ||||||
|                         raise http.cookiejar.LoadError( |  | ||||||
|                             'Cookies file must be Netscape formatted, not JSON. See  ' |  | ||||||
|                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp') |  | ||||||
|                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') |  | ||||||
|                     continue |  | ||||||
|         cf.seek(0) |  | ||||||
|         self._really_load(cf, filename, ignore_discard, ignore_expires) |  | ||||||
|         # Session cookies are denoted by either `expires` field set to |  | ||||||
|         # an empty string or 0. MozillaCookieJar only recognizes the former |  | ||||||
|         # (see [1]). So we need force the latter to be recognized as session |  | ||||||
|         # cookies on our own. |  | ||||||
|         # Session cookies may be important for cookies-based authentication, |  | ||||||
|         # e.g. usually, when user does not check 'Remember me' check box while |  | ||||||
|         # logging in on a site, some important cookies are stored as session |  | ||||||
|         # cookies so that not recognizing them will result in failed login. |  | ||||||
|         # 1. https://bugs.python.org/issue17164 |  | ||||||
|         for cookie in self: |  | ||||||
|             # Treat `expires=0` cookies as session cookies |  | ||||||
|             if cookie.expires == 0: |  | ||||||
|                 cookie.expires = None |  | ||||||
|                 cookie.discard = True |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor): | class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor): | ||||||
|     def __init__(self, cookiejar=None): |     def __init__(self, cookiejar=None): | ||||||
|         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar) |         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 coletdjnz
					coletdjnz