mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 00:25:15 +00:00 
			
		
		
		
	[cookies] Move YoutubeDLCookieJar to cookies module (#7091)
				
					
				
			Authored by: coletdjnz
This commit is contained in:
		@@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
import re
 | 
			
		||||
import tempfile
 | 
			
		||||
 | 
			
		||||
from yt_dlp.utils import YoutubeDLCookieJar
 | 
			
		||||
from yt_dlp.cookies import YoutubeDLCookieJar
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestYoutubeDLCookieJar(unittest.TestCase):
 | 
			
		||||
@@ -47,6 +47,12 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
 | 
			
		||||
        # will be ignored
 | 
			
		||||
        self.assertFalse(cookiejar._cookies)
 | 
			
		||||
 | 
			
		||||
    def test_get_cookie_header(self):
 | 
			
		||||
        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
 | 
			
		||||
        cookiejar.load(ignore_discard=True, ignore_expires=True)
 | 
			
		||||
        header = cookiejar.get_cookie_header('https://www.foobar.foobar')
 | 
			
		||||
        self.assertIn('HTTPONLY_COOKIE', header)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -2404,7 +2404,7 @@ class YoutubeDL:
 | 
			
		||||
        if 'Youtubedl-No-Compression' in res:  # deprecated
 | 
			
		||||
            res.pop('Youtubedl-No-Compression', None)
 | 
			
		||||
            res['Accept-Encoding'] = 'identity'
 | 
			
		||||
        cookies = self._calc_cookies(info_dict['url'])
 | 
			
		||||
        cookies = self.cookiejar.get_cookie_header(info_dict['url'])
 | 
			
		||||
        if cookies:
 | 
			
		||||
            res['Cookie'] = cookies
 | 
			
		||||
 | 
			
		||||
@@ -2416,9 +2416,8 @@ class YoutubeDL:
 | 
			
		||||
        return res
 | 
			
		||||
 | 
			
		||||
    def _calc_cookies(self, url):
 | 
			
		||||
        pr = sanitized_Request(url)
 | 
			
		||||
        self.cookiejar.add_cookie_header(pr)
 | 
			
		||||
        return pr.get_header('Cookie')
 | 
			
		||||
        self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
 | 
			
		||||
        return self.cookiejar.get_cookie_header(url)
 | 
			
		||||
 | 
			
		||||
    def _sort_thumbnails(self, thumbnails):
 | 
			
		||||
        thumbnails.sort(key=lambda t: (
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,9 @@
 | 
			
		||||
import base64
 | 
			
		||||
import collections
 | 
			
		||||
import contextlib
 | 
			
		||||
import http.cookiejar
 | 
			
		||||
import http.cookies
 | 
			
		||||
import io
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
@@ -11,6 +13,7 @@ import subprocess
 | 
			
		||||
import sys
 | 
			
		||||
import tempfile
 | 
			
		||||
import time
 | 
			
		||||
import urllib.request
 | 
			
		||||
from datetime import datetime, timedelta, timezone
 | 
			
		||||
from enum import Enum, auto
 | 
			
		||||
from hashlib import pbkdf2_hmac
 | 
			
		||||
@@ -29,11 +32,14 @@ from .dependencies import (
 | 
			
		||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
 | 
			
		||||
from .utils import (
 | 
			
		||||
    Popen,
 | 
			
		||||
    YoutubeDLCookieJar,
 | 
			
		||||
    error_to_str,
 | 
			
		||||
    escape_url,
 | 
			
		||||
    expand_path,
 | 
			
		||||
    is_path_like,
 | 
			
		||||
    sanitize_url,
 | 
			
		||||
    str_or_none,
 | 
			
		||||
    try_call,
 | 
			
		||||
    write_string,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
 | 
			
		||||
@@ -1091,3 +1097,139 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                morsel = None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
 | 
			
		||||
    """
 | 
			
		||||
    See [1] for cookie file format.
 | 
			
		||||
 | 
			
		||||
    1. https://curl.haxx.se/docs/http-cookies.html
 | 
			
		||||
    """
 | 
			
		||||
    _HTTPONLY_PREFIX = '#HttpOnly_'
 | 
			
		||||
    _ENTRY_LEN = 7
 | 
			
		||||
    _HEADER = '''# Netscape HTTP Cookie File
 | 
			
		||||
# This file is generated by yt-dlp.  Do not edit.
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
    _CookieFileEntry = collections.namedtuple(
 | 
			
		||||
        'CookieFileEntry',
 | 
			
		||||
        ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
 | 
			
		||||
 | 
			
		||||
    def __init__(self, filename=None, *args, **kwargs):
 | 
			
		||||
        super().__init__(None, *args, **kwargs)
 | 
			
		||||
        if is_path_like(filename):
 | 
			
		||||
            filename = os.fspath(filename)
 | 
			
		||||
        self.filename = filename
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _true_or_false(cndn):
 | 
			
		||||
        return 'TRUE' if cndn else 'FALSE'
 | 
			
		||||
 | 
			
		||||
    @contextlib.contextmanager
 | 
			
		||||
    def open(self, file, *, write=False):
 | 
			
		||||
        if is_path_like(file):
 | 
			
		||||
            with open(file, 'w' if write else 'r', encoding='utf-8') as f:
 | 
			
		||||
                yield f
 | 
			
		||||
        else:
 | 
			
		||||
            if write:
 | 
			
		||||
                file.truncate(0)
 | 
			
		||||
            yield file
 | 
			
		||||
 | 
			
		||||
    def _really_save(self, f, ignore_discard=False, ignore_expires=False):
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        for cookie in self:
 | 
			
		||||
            if (not ignore_discard and cookie.discard
 | 
			
		||||
                    or not ignore_expires and cookie.is_expired(now)):
 | 
			
		||||
                continue
 | 
			
		||||
            name, value = cookie.name, cookie.value
 | 
			
		||||
            if value is None:
 | 
			
		||||
                # cookies.txt regards 'Set-Cookie: foo' as a cookie
 | 
			
		||||
                # with no name, whereas http.cookiejar regards it as a
 | 
			
		||||
                # cookie with no value.
 | 
			
		||||
                name, value = '', name
 | 
			
		||||
            f.write('%s\n' % '\t'.join((
 | 
			
		||||
                cookie.domain,
 | 
			
		||||
                self._true_or_false(cookie.domain.startswith('.')),
 | 
			
		||||
                cookie.path,
 | 
			
		||||
                self._true_or_false(cookie.secure),
 | 
			
		||||
                str_or_none(cookie.expires, default=''),
 | 
			
		||||
                name, value
 | 
			
		||||
            )))
 | 
			
		||||
 | 
			
		||||
    def save(self, filename=None, *args, **kwargs):
 | 
			
		||||
        """
 | 
			
		||||
        Save cookies to a file.
 | 
			
		||||
        Code is taken from CPython 3.6
 | 
			
		||||
        https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
 | 
			
		||||
 | 
			
		||||
        if filename is None:
 | 
			
		||||
            if self.filename is not None:
 | 
			
		||||
                filename = self.filename
 | 
			
		||||
            else:
 | 
			
		||||
                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
 | 
			
		||||
 | 
			
		||||
        # Store session cookies with `expires` set to 0 instead of an empty string
 | 
			
		||||
        for cookie in self:
 | 
			
		||||
            if cookie.expires is None:
 | 
			
		||||
                cookie.expires = 0
 | 
			
		||||
 | 
			
		||||
        with self.open(filename, write=True) as f:
 | 
			
		||||
            f.write(self._HEADER)
 | 
			
		||||
            self._really_save(f, *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
 | 
			
		||||
        """Load cookies from a file."""
 | 
			
		||||
        if filename is None:
 | 
			
		||||
            if self.filename is not None:
 | 
			
		||||
                filename = self.filename
 | 
			
		||||
            else:
 | 
			
		||||
                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
 | 
			
		||||
 | 
			
		||||
        def prepare_line(line):
 | 
			
		||||
            if line.startswith(self._HTTPONLY_PREFIX):
 | 
			
		||||
                line = line[len(self._HTTPONLY_PREFIX):]
 | 
			
		||||
            # comments and empty lines are fine
 | 
			
		||||
            if line.startswith('#') or not line.strip():
 | 
			
		||||
                return line
 | 
			
		||||
            cookie_list = line.split('\t')
 | 
			
		||||
            if len(cookie_list) != self._ENTRY_LEN:
 | 
			
		||||
                raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
 | 
			
		||||
            cookie = self._CookieFileEntry(*cookie_list)
 | 
			
		||||
            if cookie.expires_at and not cookie.expires_at.isdigit():
 | 
			
		||||
                raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
 | 
			
		||||
            return line
 | 
			
		||||
 | 
			
		||||
        cf = io.StringIO()
 | 
			
		||||
        with self.open(filename) as f:
 | 
			
		||||
            for line in f:
 | 
			
		||||
                try:
 | 
			
		||||
                    cf.write(prepare_line(line))
 | 
			
		||||
                except http.cookiejar.LoadError as e:
 | 
			
		||||
                    if f'{line.strip()} '[0] in '[{"':
 | 
			
		||||
                        raise http.cookiejar.LoadError(
 | 
			
		||||
                            'Cookies file must be Netscape formatted, not JSON. See  '
 | 
			
		||||
                            'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
 | 
			
		||||
                    write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
 | 
			
		||||
                    continue
 | 
			
		||||
        cf.seek(0)
 | 
			
		||||
        self._really_load(cf, filename, ignore_discard, ignore_expires)
 | 
			
		||||
        # Session cookies are denoted by either `expires` field set to
 | 
			
		||||
        # an empty string or 0. MozillaCookieJar only recognizes the former
 | 
			
		||||
        # (see [1]). So we need force the latter to be recognized as session
 | 
			
		||||
        # cookies on our own.
 | 
			
		||||
        # Session cookies may be important for cookies-based authentication,
 | 
			
		||||
        # e.g. usually, when user does not check 'Remember me' check box while
 | 
			
		||||
        # logging in on a site, some important cookies are stored as session
 | 
			
		||||
        # cookies so that not recognizing them will result in failed login.
 | 
			
		||||
        # 1. https://bugs.python.org/issue17164
 | 
			
		||||
        for cookie in self:
 | 
			
		||||
            # Treat `expires=0` cookies as session cookies
 | 
			
		||||
            if cookie.expires == 0:
 | 
			
		||||
                cookie.expires = None
 | 
			
		||||
                cookie.discard = True
 | 
			
		||||
 | 
			
		||||
    def get_cookie_header(self, url):
 | 
			
		||||
        """Generate a Cookie HTTP header for a given url"""
 | 
			
		||||
        cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
 | 
			
		||||
        self.add_cookie_header(cookie_req)
 | 
			
		||||
        return cookie_req.get_header('Cookie')
 | 
			
		||||
 
 | 
			
		||||
@@ -3444,7 +3444,7 @@ class InfoExtractor:
 | 
			
		||||
 | 
			
		||||
    def _get_cookies(self, url):
 | 
			
		||||
        """ Return a http.cookies.SimpleCookie with the cookies for the url """
 | 
			
		||||
        return LenientSimpleCookie(self._downloader._calc_cookies(url))
 | 
			
		||||
        return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
 | 
			
		||||
 | 
			
		||||
    def _apply_first_set_cookie_header(self, url_handle, cookie):
 | 
			
		||||
        """
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,9 @@ from ._utils import decode_base_n, preferredencoding
 | 
			
		||||
from .traversal import traverse_obj
 | 
			
		||||
from ..dependencies import certifi, websockets
 | 
			
		||||
 | 
			
		||||
# isort: split
 | 
			
		||||
from ..cookies import YoutubeDLCookieJar  # noqa: F401
 | 
			
		||||
 | 
			
		||||
has_certifi = bool(certifi)
 | 
			
		||||
has_websockets = bool(websockets)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1518,136 +1518,6 @@ def is_path_like(f):
 | 
			
		||||
    return isinstance(f, (str, bytes, os.PathLike))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
 | 
			
		||||
    """
 | 
			
		||||
    See [1] for cookie file format.
 | 
			
		||||
 | 
			
		||||
    1. https://curl.haxx.se/docs/http-cookies.html
 | 
			
		||||
    """
 | 
			
		||||
    _HTTPONLY_PREFIX = '#HttpOnly_'
 | 
			
		||||
    _ENTRY_LEN = 7
 | 
			
		||||
    _HEADER = '''# Netscape HTTP Cookie File
 | 
			
		||||
# This file is generated by yt-dlp.  Do not edit.
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
    _CookieFileEntry = collections.namedtuple(
 | 
			
		||||
        'CookieFileEntry',
 | 
			
		||||
        ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
 | 
			
		||||
 | 
			
		||||
    def __init__(self, filename=None, *args, **kwargs):
 | 
			
		||||
        super().__init__(None, *args, **kwargs)
 | 
			
		||||
        if is_path_like(filename):
 | 
			
		||||
            filename = os.fspath(filename)
 | 
			
		||||
        self.filename = filename
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _true_or_false(cndn):
 | 
			
		||||
        return 'TRUE' if cndn else 'FALSE'
 | 
			
		||||
 | 
			
		||||
    @contextlib.contextmanager
 | 
			
		||||
    def open(self, file, *, write=False):
 | 
			
		||||
        if is_path_like(file):
 | 
			
		||||
            with open(file, 'w' if write else 'r', encoding='utf-8') as f:
 | 
			
		||||
                yield f
 | 
			
		||||
        else:
 | 
			
		||||
            if write:
 | 
			
		||||
                file.truncate(0)
 | 
			
		||||
            yield file
 | 
			
		||||
 | 
			
		||||
    def _really_save(self, f, ignore_discard=False, ignore_expires=False):
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        for cookie in self:
 | 
			
		||||
            if (not ignore_discard and cookie.discard
 | 
			
		||||
                    or not ignore_expires and cookie.is_expired(now)):
 | 
			
		||||
                continue
 | 
			
		||||
            name, value = cookie.name, cookie.value
 | 
			
		||||
            if value is None:
 | 
			
		||||
                # cookies.txt regards 'Set-Cookie: foo' as a cookie
 | 
			
		||||
                # with no name, whereas http.cookiejar regards it as a
 | 
			
		||||
                # cookie with no value.
 | 
			
		||||
                name, value = '', name
 | 
			
		||||
            f.write('%s\n' % '\t'.join((
 | 
			
		||||
                cookie.domain,
 | 
			
		||||
                self._true_or_false(cookie.domain.startswith('.')),
 | 
			
		||||
                cookie.path,
 | 
			
		||||
                self._true_or_false(cookie.secure),
 | 
			
		||||
                str_or_none(cookie.expires, default=''),
 | 
			
		||||
                name, value
 | 
			
		||||
            )))
 | 
			
		||||
 | 
			
		||||
    def save(self, filename=None, *args, **kwargs):
 | 
			
		||||
        """
 | 
			
		||||
        Save cookies to a file.
 | 
			
		||||
        Code is taken from CPython 3.6
 | 
			
		||||
        https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
 | 
			
		||||
 | 
			
		||||
        if filename is None:
 | 
			
		||||
            if self.filename is not None:
 | 
			
		||||
                filename = self.filename
 | 
			
		||||
            else:
 | 
			
		||||
                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
 | 
			
		||||
 | 
			
		||||
        # Store session cookies with `expires` set to 0 instead of an empty string
 | 
			
		||||
        for cookie in self:
 | 
			
		||||
            if cookie.expires is None:
 | 
			
		||||
                cookie.expires = 0
 | 
			
		||||
 | 
			
		||||
        with self.open(filename, write=True) as f:
 | 
			
		||||
            f.write(self._HEADER)
 | 
			
		||||
            self._really_save(f, *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
 | 
			
		||||
        """Load cookies from a file."""
 | 
			
		||||
        if filename is None:
 | 
			
		||||
            if self.filename is not None:
 | 
			
		||||
                filename = self.filename
 | 
			
		||||
            else:
 | 
			
		||||
                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
 | 
			
		||||
 | 
			
		||||
        def prepare_line(line):
 | 
			
		||||
            if line.startswith(self._HTTPONLY_PREFIX):
 | 
			
		||||
                line = line[len(self._HTTPONLY_PREFIX):]
 | 
			
		||||
            # comments and empty lines are fine
 | 
			
		||||
            if line.startswith('#') or not line.strip():
 | 
			
		||||
                return line
 | 
			
		||||
            cookie_list = line.split('\t')
 | 
			
		||||
            if len(cookie_list) != self._ENTRY_LEN:
 | 
			
		||||
                raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
 | 
			
		||||
            cookie = self._CookieFileEntry(*cookie_list)
 | 
			
		||||
            if cookie.expires_at and not cookie.expires_at.isdigit():
 | 
			
		||||
                raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
 | 
			
		||||
            return line
 | 
			
		||||
 | 
			
		||||
        cf = io.StringIO()
 | 
			
		||||
        with self.open(filename) as f:
 | 
			
		||||
            for line in f:
 | 
			
		||||
                try:
 | 
			
		||||
                    cf.write(prepare_line(line))
 | 
			
		||||
                except http.cookiejar.LoadError as e:
 | 
			
		||||
                    if f'{line.strip()} '[0] in '[{"':
 | 
			
		||||
                        raise http.cookiejar.LoadError(
 | 
			
		||||
                            'Cookies file must be Netscape formatted, not JSON. See  '
 | 
			
		||||
                            'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
 | 
			
		||||
                    write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
 | 
			
		||||
                    continue
 | 
			
		||||
        cf.seek(0)
 | 
			
		||||
        self._really_load(cf, filename, ignore_discard, ignore_expires)
 | 
			
		||||
        # Session cookies are denoted by either `expires` field set to
 | 
			
		||||
        # an empty string or 0. MozillaCookieJar only recognizes the former
 | 
			
		||||
        # (see [1]). So we need force the latter to be recognized as session
 | 
			
		||||
        # cookies on our own.
 | 
			
		||||
        # Session cookies may be important for cookies-based authentication,
 | 
			
		||||
        # e.g. usually, when user does not check 'Remember me' check box while
 | 
			
		||||
        # logging in on a site, some important cookies are stored as session
 | 
			
		||||
        # cookies so that not recognizing them will result in failed login.
 | 
			
		||||
        # 1. https://bugs.python.org/issue17164
 | 
			
		||||
        for cookie in self:
 | 
			
		||||
            # Treat `expires=0` cookies as session cookies
 | 
			
		||||
            if cookie.expires == 0:
 | 
			
		||||
                cookie.expires = None
 | 
			
		||||
                cookie.discard = True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
 | 
			
		||||
    def __init__(self, cookiejar=None):
 | 
			
		||||
        urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user