mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[cookies] Move YoutubeDLCookieJar to cookies module (#7091)
				
					
				
			Authored by: coletdjnz
This commit is contained in:
		| @@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
| import re | ||||
| import tempfile | ||||
| 
 | ||||
| from yt_dlp.utils import YoutubeDLCookieJar | ||||
| from yt_dlp.cookies import YoutubeDLCookieJar | ||||
| 
 | ||||
| 
 | ||||
| class TestYoutubeDLCookieJar(unittest.TestCase): | ||||
| @@ -47,6 +47,12 @@ class TestYoutubeDLCookieJar(unittest.TestCase): | ||||
|         # will be ignored | ||||
|         self.assertFalse(cookiejar._cookies) | ||||
| 
 | ||||
|     def test_get_cookie_header(self): | ||||
|         cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') | ||||
|         cookiejar.load(ignore_discard=True, ignore_expires=True) | ||||
|         header = cookiejar.get_cookie_header('https://www.foobar.foobar') | ||||
|         self.assertIn('HTTPONLY_COOKIE', header) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -2404,7 +2404,7 @@ class YoutubeDL: | ||||
|         if 'Youtubedl-No-Compression' in res:  # deprecated | ||||
|             res.pop('Youtubedl-No-Compression', None) | ||||
|             res['Accept-Encoding'] = 'identity' | ||||
|         cookies = self._calc_cookies(info_dict['url']) | ||||
|         cookies = self.cookiejar.get_cookie_header(info_dict['url']) | ||||
|         if cookies: | ||||
|             res['Cookie'] = cookies | ||||
| 
 | ||||
| @@ -2416,9 +2416,8 @@ class YoutubeDL: | ||||
|         return res | ||||
| 
 | ||||
|     def _calc_cookies(self, url): | ||||
|         pr = sanitized_Request(url) | ||||
|         self.cookiejar.add_cookie_header(pr) | ||||
|         return pr.get_header('Cookie') | ||||
|         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version') | ||||
|         return self.cookiejar.get_cookie_header(url) | ||||
| 
 | ||||
|     def _sort_thumbnails(self, thumbnails): | ||||
|         thumbnails.sort(key=lambda t: ( | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| import base64 | ||||
| import collections | ||||
| import contextlib | ||||
| import http.cookiejar | ||||
| import http.cookies | ||||
| import io | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| @@ -11,6 +13,7 @@ import subprocess | ||||
| import sys | ||||
| import tempfile | ||||
| import time | ||||
| import urllib.request | ||||
| from datetime import datetime, timedelta, timezone | ||||
| from enum import Enum, auto | ||||
| from hashlib import pbkdf2_hmac | ||||
| @@ -29,11 +32,14 @@ from .dependencies import ( | ||||
| from .minicurses import MultilinePrinter, QuietMultilinePrinter | ||||
| from .utils import ( | ||||
|     Popen, | ||||
|     YoutubeDLCookieJar, | ||||
|     error_to_str, | ||||
|     escape_url, | ||||
|     expand_path, | ||||
|     is_path_like, | ||||
|     sanitize_url, | ||||
|     str_or_none, | ||||
|     try_call, | ||||
|     write_string, | ||||
| ) | ||||
| 
 | ||||
| CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} | ||||
| @@ -1091,3 +1097,139 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | ||||
| 
 | ||||
|             else: | ||||
|                 morsel = None | ||||
| 
 | ||||
| 
 | ||||
| class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): | ||||
|     """ | ||||
|     See [1] for cookie file format. | ||||
| 
 | ||||
|     1. https://curl.haxx.se/docs/http-cookies.html | ||||
|     """ | ||||
|     _HTTPONLY_PREFIX = '#HttpOnly_' | ||||
|     _ENTRY_LEN = 7 | ||||
|     _HEADER = '''# Netscape HTTP Cookie File | ||||
| # This file is generated by yt-dlp.  Do not edit. | ||||
| 
 | ||||
| ''' | ||||
|     _CookieFileEntry = collections.namedtuple( | ||||
|         'CookieFileEntry', | ||||
|         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) | ||||
| 
 | ||||
|     def __init__(self, filename=None, *args, **kwargs): | ||||
|         super().__init__(None, *args, **kwargs) | ||||
|         if is_path_like(filename): | ||||
|             filename = os.fspath(filename) | ||||
|         self.filename = filename | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _true_or_false(cndn): | ||||
|         return 'TRUE' if cndn else 'FALSE' | ||||
| 
 | ||||
|     @contextlib.contextmanager | ||||
|     def open(self, file, *, write=False): | ||||
|         if is_path_like(file): | ||||
|             with open(file, 'w' if write else 'r', encoding='utf-8') as f: | ||||
|                 yield f | ||||
|         else: | ||||
|             if write: | ||||
|                 file.truncate(0) | ||||
|             yield file | ||||
| 
 | ||||
|     def _really_save(self, f, ignore_discard=False, ignore_expires=False): | ||||
|         now = time.time() | ||||
|         for cookie in self: | ||||
|             if (not ignore_discard and cookie.discard | ||||
|                     or not ignore_expires and cookie.is_expired(now)): | ||||
|                 continue | ||||
|             name, value = cookie.name, cookie.value | ||||
|             if value is None: | ||||
|                 # cookies.txt regards 'Set-Cookie: foo' as a cookie | ||||
|                 # with no name, whereas http.cookiejar regards it as a | ||||
|                 # cookie with no value. | ||||
|                 name, value = '', name | ||||
|             f.write('%s\n' % '\t'.join(( | ||||
|                 cookie.domain, | ||||
|                 self._true_or_false(cookie.domain.startswith('.')), | ||||
|                 cookie.path, | ||||
|                 self._true_or_false(cookie.secure), | ||||
|                 str_or_none(cookie.expires, default=''), | ||||
|                 name, value | ||||
|             ))) | ||||
| 
 | ||||
|     def save(self, filename=None, *args, **kwargs): | ||||
|         """ | ||||
|         Save cookies to a file. | ||||
|         Code is taken from CPython 3.6 | ||||
|         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """ | ||||
| 
 | ||||
|         if filename is None: | ||||
|             if self.filename is not None: | ||||
|                 filename = self.filename | ||||
|             else: | ||||
|                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) | ||||
| 
 | ||||
|         # Store session cookies with `expires` set to 0 instead of an empty string | ||||
|         for cookie in self: | ||||
|             if cookie.expires is None: | ||||
|                 cookie.expires = 0 | ||||
| 
 | ||||
|         with self.open(filename, write=True) as f: | ||||
|             f.write(self._HEADER) | ||||
|             self._really_save(f, *args, **kwargs) | ||||
| 
 | ||||
|     def load(self, filename=None, ignore_discard=False, ignore_expires=False): | ||||
|         """Load cookies from a file.""" | ||||
|         if filename is None: | ||||
|             if self.filename is not None: | ||||
|                 filename = self.filename | ||||
|             else: | ||||
|                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) | ||||
| 
 | ||||
|         def prepare_line(line): | ||||
|             if line.startswith(self._HTTPONLY_PREFIX): | ||||
|                 line = line[len(self._HTTPONLY_PREFIX):] | ||||
|             # comments and empty lines are fine | ||||
|             if line.startswith('#') or not line.strip(): | ||||
|                 return line | ||||
|             cookie_list = line.split('\t') | ||||
|             if len(cookie_list) != self._ENTRY_LEN: | ||||
|                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) | ||||
|             cookie = self._CookieFileEntry(*cookie_list) | ||||
|             if cookie.expires_at and not cookie.expires_at.isdigit(): | ||||
|                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) | ||||
|             return line | ||||
| 
 | ||||
|         cf = io.StringIO() | ||||
|         with self.open(filename) as f: | ||||
|             for line in f: | ||||
|                 try: | ||||
|                     cf.write(prepare_line(line)) | ||||
|                 except http.cookiejar.LoadError as e: | ||||
|                     if f'{line.strip()} '[0] in '[{"': | ||||
|                         raise http.cookiejar.LoadError( | ||||
|                             'Cookies file must be Netscape formatted, not JSON. See  ' | ||||
|                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp') | ||||
|                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') | ||||
|                     continue | ||||
|         cf.seek(0) | ||||
|         self._really_load(cf, filename, ignore_discard, ignore_expires) | ||||
|         # Session cookies are denoted by either `expires` field set to | ||||
|         # an empty string or 0. MozillaCookieJar only recognizes the former | ||||
|         # (see [1]). So we need force the latter to be recognized as session | ||||
|         # cookies on our own. | ||||
|         # Session cookies may be important for cookies-based authentication, | ||||
|         # e.g. usually, when user does not check 'Remember me' check box while | ||||
|         # logging in on a site, some important cookies are stored as session | ||||
|         # cookies so that not recognizing them will result in failed login. | ||||
|         # 1. https://bugs.python.org/issue17164 | ||||
|         for cookie in self: | ||||
|             # Treat `expires=0` cookies as session cookies | ||||
|             if cookie.expires == 0: | ||||
|                 cookie.expires = None | ||||
|                 cookie.discard = True | ||||
| 
 | ||||
|     def get_cookie_header(self, url): | ||||
|         """Generate a Cookie HTTP header for a given url""" | ||||
|         cookie_req = urllib.request.Request(escape_url(sanitize_url(url))) | ||||
|         self.add_cookie_header(cookie_req) | ||||
|         return cookie_req.get_header('Cookie') | ||||
|   | ||||
| @@ -3444,7 +3444,7 @@ class InfoExtractor: | ||||
| 
 | ||||
|     def _get_cookies(self, url): | ||||
|         """ Return a http.cookies.SimpleCookie with the cookies for the url """ | ||||
|         return LenientSimpleCookie(self._downloader._calc_cookies(url)) | ||||
|         return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url)) | ||||
| 
 | ||||
|     def _apply_first_set_cookie_header(self, url_handle, cookie): | ||||
|         """ | ||||
|   | ||||
| @@ -10,6 +10,9 @@ from ._utils import decode_base_n, preferredencoding | ||||
| from .traversal import traverse_obj | ||||
| from ..dependencies import certifi, websockets | ||||
| 
 | ||||
| # isort: split | ||||
| from ..cookies import YoutubeDLCookieJar  # noqa: F401 | ||||
| 
 | ||||
| has_certifi = bool(certifi) | ||||
| has_websockets = bool(websockets) | ||||
| 
 | ||||
|   | ||||
| @@ -1518,136 +1518,6 @@ def is_path_like(f): | ||||
|     return isinstance(f, (str, bytes, os.PathLike)) | ||||
| 
 | ||||
| 
 | ||||
| class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): | ||||
|     """ | ||||
|     See [1] for cookie file format. | ||||
| 
 | ||||
|     1. https://curl.haxx.se/docs/http-cookies.html | ||||
|     """ | ||||
|     _HTTPONLY_PREFIX = '#HttpOnly_' | ||||
|     _ENTRY_LEN = 7 | ||||
|     _HEADER = '''# Netscape HTTP Cookie File | ||||
| # This file is generated by yt-dlp.  Do not edit. | ||||
| 
 | ||||
| ''' | ||||
|     _CookieFileEntry = collections.namedtuple( | ||||
|         'CookieFileEntry', | ||||
|         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) | ||||
| 
 | ||||
|     def __init__(self, filename=None, *args, **kwargs): | ||||
|         super().__init__(None, *args, **kwargs) | ||||
|         if is_path_like(filename): | ||||
|             filename = os.fspath(filename) | ||||
|         self.filename = filename | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _true_or_false(cndn): | ||||
|         return 'TRUE' if cndn else 'FALSE' | ||||
| 
 | ||||
|     @contextlib.contextmanager | ||||
|     def open(self, file, *, write=False): | ||||
|         if is_path_like(file): | ||||
|             with open(file, 'w' if write else 'r', encoding='utf-8') as f: | ||||
|                 yield f | ||||
|         else: | ||||
|             if write: | ||||
|                 file.truncate(0) | ||||
|             yield file | ||||
| 
 | ||||
|     def _really_save(self, f, ignore_discard=False, ignore_expires=False): | ||||
|         now = time.time() | ||||
|         for cookie in self: | ||||
|             if (not ignore_discard and cookie.discard | ||||
|                     or not ignore_expires and cookie.is_expired(now)): | ||||
|                 continue | ||||
|             name, value = cookie.name, cookie.value | ||||
|             if value is None: | ||||
|                 # cookies.txt regards 'Set-Cookie: foo' as a cookie | ||||
|                 # with no name, whereas http.cookiejar regards it as a | ||||
|                 # cookie with no value. | ||||
|                 name, value = '', name | ||||
|             f.write('%s\n' % '\t'.join(( | ||||
|                 cookie.domain, | ||||
|                 self._true_or_false(cookie.domain.startswith('.')), | ||||
|                 cookie.path, | ||||
|                 self._true_or_false(cookie.secure), | ||||
|                 str_or_none(cookie.expires, default=''), | ||||
|                 name, value | ||||
|             ))) | ||||
| 
 | ||||
|     def save(self, filename=None, *args, **kwargs): | ||||
|         """ | ||||
|         Save cookies to a file. | ||||
|         Code is taken from CPython 3.6 | ||||
|         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """ | ||||
| 
 | ||||
|         if filename is None: | ||||
|             if self.filename is not None: | ||||
|                 filename = self.filename | ||||
|             else: | ||||
|                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) | ||||
| 
 | ||||
|         # Store session cookies with `expires` set to 0 instead of an empty string | ||||
|         for cookie in self: | ||||
|             if cookie.expires is None: | ||||
|                 cookie.expires = 0 | ||||
| 
 | ||||
|         with self.open(filename, write=True) as f: | ||||
|             f.write(self._HEADER) | ||||
|             self._really_save(f, *args, **kwargs) | ||||
| 
 | ||||
|     def load(self, filename=None, ignore_discard=False, ignore_expires=False): | ||||
|         """Load cookies from a file.""" | ||||
|         if filename is None: | ||||
|             if self.filename is not None: | ||||
|                 filename = self.filename | ||||
|             else: | ||||
|                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) | ||||
| 
 | ||||
|         def prepare_line(line): | ||||
|             if line.startswith(self._HTTPONLY_PREFIX): | ||||
|                 line = line[len(self._HTTPONLY_PREFIX):] | ||||
|             # comments and empty lines are fine | ||||
|             if line.startswith('#') or not line.strip(): | ||||
|                 return line | ||||
|             cookie_list = line.split('\t') | ||||
|             if len(cookie_list) != self._ENTRY_LEN: | ||||
|                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) | ||||
|             cookie = self._CookieFileEntry(*cookie_list) | ||||
|             if cookie.expires_at and not cookie.expires_at.isdigit(): | ||||
|                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) | ||||
|             return line | ||||
| 
 | ||||
|         cf = io.StringIO() | ||||
|         with self.open(filename) as f: | ||||
|             for line in f: | ||||
|                 try: | ||||
|                     cf.write(prepare_line(line)) | ||||
|                 except http.cookiejar.LoadError as e: | ||||
|                     if f'{line.strip()} '[0] in '[{"': | ||||
|                         raise http.cookiejar.LoadError( | ||||
|                             'Cookies file must be Netscape formatted, not JSON. See  ' | ||||
|                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp') | ||||
|                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') | ||||
|                     continue | ||||
|         cf.seek(0) | ||||
|         self._really_load(cf, filename, ignore_discard, ignore_expires) | ||||
|         # Session cookies are denoted by either `expires` field set to | ||||
|         # an empty string or 0. MozillaCookieJar only recognizes the former | ||||
|         # (see [1]). So we need force the latter to be recognized as session | ||||
|         # cookies on our own. | ||||
|         # Session cookies may be important for cookies-based authentication, | ||||
|         # e.g. usually, when user does not check 'Remember me' check box while | ||||
|         # logging in on a site, some important cookies are stored as session | ||||
|         # cookies so that not recognizing them will result in failed login. | ||||
|         # 1. https://bugs.python.org/issue17164 | ||||
|         for cookie in self: | ||||
|             # Treat `expires=0` cookies as session cookies | ||||
|             if cookie.expires == 0: | ||||
|                 cookie.expires = None | ||||
|                 cookie.discard = True | ||||
| 
 | ||||
| 
 | ||||
| class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor): | ||||
|     def __init__(self, cookiejar=None): | ||||
|         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 coletdjnz
					coletdjnz