1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-09 23:08:32 +00:00

test cookie

This commit is contained in:
c-basalt 2024-12-31 04:34:27 -05:00
parent b87a0582c5
commit f0c1da2528
4 changed files with 148 additions and 36 deletions

View File

@ -1,9 +1,14 @@
#!/usr/bin/env python3
# Allow direct execution
from __future__ import annotations
import os
import dataclasses
import datetime
import time
import sys
import unittest
import http.cookiejar
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -11,14 +16,43 @@
from test.helper import (
FakeYDL,
)
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.jsinterp.common import ExternalJSI
from yt_dlp.jsinterp._deno import DenoJSI, DenoJITlessJSI, DenoJSDomJSI
from yt_dlp.jsinterp._phantomjs import PhantomJSJSI
@dataclasses.dataclass
class NetscapeFields:
name: str
value: str
domain: str
path: str
secure: bool
expires: int | None
def to_cookie(self):
return http.cookiejar.Cookie(
0, self.name, self.value,
None, False,
self.domain, True, self.domain.startswith('.'),
self.path, True,
self.secure, self.expires, False,
None, None, {},
)
def expire_str(self):
return datetime.datetime.fromtimestamp(
self.expires, datetime.timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT')
def __eq__(self, other: NetscapeFields | http.cookiejar.Cookie):
return all(getattr(self, attr) == getattr(other, attr) for attr in ['name', 'value', 'domain', 'path', 'secure', 'expires'])
class Base:
class TestExternalJSI(unittest.TestCase):
_JSI_CLASS: type[ExternalJSI] = None
maxDiff = 2000
def setUp(self):
self.ydl = FakeYDL()
@ -52,6 +86,7 @@ def test_execute_dom_script(self):
<script src="https://example.com/script.js"></script>
<script type="text/javascript">
document.getElementById("test-div").innerHTML = "Hello, world!"
console.log('this should not show up');
</script>
</body></html>'''),
'Hello, world!')
@ -59,8 +94,6 @@ def test_execute_dom_script(self):
def test_execute_dom_script_with_error(self):
if 'dom' not in self.jsi._SUPPORTED_FEATURES:
self.skipTest('DOM not supported')
if self.jsi.JSI_KEY == 'PhantomJS':
self.skipTest('PhantomJS does not catch errors')
self.assertEqual(self.jsi.execute(
'console.log(document.getElementById("test-div").innerHTML);',
location='https://example.com',
@ -69,11 +102,67 @@ def test_execute_dom_script_with_error(self):
<script src="https://example.com/script.js"></script>
<script type="text/javascript">
document.getElementById("test-div").innerHTML = "Hello, world!"
console.log('this should not show up');
a = b; // Undefined variable assignment
</script>
</body></html>'''),
'Hello, world!')
def assert_cookiejar_equal(self, cookiejar: http.cookiejar.CookieJar, ref_cookiejar: http.cookiejar.CookieJar):
for cookie in cookiejar:
ref_cookie = next((c for c in ref_cookiejar if c.name == cookie.name and c.domain == cookie.domain), None)
self.assertEqual(repr(cookie), repr(ref_cookie))
def assert_cookie_str_equal(self, cookie_str, ref_cookie_str):
print([cookie_str, ref_cookie_str])
self.assertEqual(set(cookie_str.split('; ')), set(ref_cookie_str.split('; ')))
def test_execute_cookiejar(self):
if 'cookies' not in self.jsi._SUPPORTED_FEATURES:
self.skipTest('Cookies not supported')
cookiejar = YoutubeDLCookieJar()
ref_cookiejar = YoutubeDLCookieJar()
for test_cookie in [
NetscapeFields('test1', 'test1', '.example.com', '/', False, int(time.time()) + 1000),
NetscapeFields('test2', 'test2', '.example.com', '/', True, int(time.time()) + 1000),
NetscapeFields('test3', 'test3', '.example.com', '/123', False, int(time.time()) + 1000),
NetscapeFields('test4', 'test4', '.example.com', '/456', False, int(time.time()) + 1000),
NetscapeFields('test5', 'test5', '.example.com', '/123', True, int(time.time()) + 1000),
NetscapeFields('test6', 'test6', '.example.com', '/456', True, int(time.time()) + 1000),
NetscapeFields('test1', 'other1', '.other.com', '/', False, int(time.time()) + 1000),
NetscapeFields('test2', 'other2', '.other.com', '/', False, int(time.time()) + 1000),
NetscapeFields('test7', 'other7', '.other.com', '/', False, int(time.time()) + 1000),
]:
cookiejar.set_cookie(test_cookie.to_cookie())
ref_cookiejar.set_cookie(test_cookie.to_cookie())
# test identity without modification from js
self.assert_cookie_str_equal(self.jsi.execute(
'console.log(document.cookie);',
location='http://example.com/123/456',
html='<html><body><div id="test-div">Hello, world!</div></body></html>',
cookiejar=cookiejar),
'test1=test1; test3=test3')
self.assert_cookiejar_equal(cookiejar, ref_cookiejar)
# test modification of existing cookie from js
new_cookie_1 = NetscapeFields('test1', 'new1', '.example.com', '/', True, int(time.time()) + 900)
new_cookie_2 = NetscapeFields('test2', 'new2', '.example.com', '/', True, int(time.time()) + 900)
ref_cookiejar.set_cookie(new_cookie_1.to_cookie())
ref_cookiejar.set_cookie(new_cookie_2.to_cookie())
self.assert_cookie_str_equal(self.jsi.execute(
f'''document.cookie = "test1=new1; secure; expires={new_cookie_1.expire_str()}; domain=.example.com; path=/";
console.log(document.cookie);''',
location='https://example.com/123/456',
html=f'''<html><body><div id="test-div">Hello, world!</div>
<script>
document.cookie = "test2=new2; secure; expires={new_cookie_2.expire_str()}; domain=.example.com; path=/";
</script>
</body></html>''',
cookiejar=cookiejar),
'test1=new1; test2=new2; test3=test3; test5=test5')
self.assert_cookiejar_equal(cookiejar, ref_cookiejar)
class TestDeno(Base.TestExternalJSI):
_JSI_CLASS = DenoJSI

View File

@ -2,7 +2,6 @@
import http.cookiejar
import json
import re
import subprocess
import typing
import urllib.parse
@ -16,7 +15,7 @@
shell_quote,
unified_timestamp,
)
from ._helper import TempFileWrapper, random_string
from ._helper import TempFileWrapper, random_string, override_navigator_js, extract_script_tags
from .common import ExternalJSI, register_jsi
@ -36,15 +35,7 @@ def __init__(self, *args, flags=[], replace_flags=False, init_script=None, **kwa
@property
def _override_navigator_js(self):
return '\n'.join([
'Object.defineProperty(navigator, "%s", { value: %s, configurable: true });' % (k, json.dumps(v))
for k, v in {
'userAgent': self.user_agent,
'language': 'en-US',
'languages': ['en-US'],
'webdriver': False,
}.items()
])
return override_navigator_js(self.user_agent)
def _run_deno(self, cmd):
self.write_debug(f'Deno command line: {shell_quote(cmd)}')
@ -137,21 +128,13 @@ def _ensure_jsdom(self):
self._run_deno(cmd)
self._JSDOM_IMPORT_CHECKED = True
def _parse_script_tags(self, html: str):
for match_start in re.finditer(r'<script[^>]*>', html, re.DOTALL):
end = html.find('</script>', match_start.end())
if end > match_start.end():
yield html[match_start.end():end]
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='', html='', cookiejar=None):
self.report_note(video_id, note)
self._ensure_jsdom()
callback_varname = f'__callback_{random_string()}'
inline_scripts = '\n'.join([
'try { %s } catch (e) {}' % script
for script in self._parse_script_tags(html)
])
html, inline_scripts = extract_script_tags(html)
wrapper_scripts = '\n'.join(['try { %s } catch (e) {}' % script for script in inline_scripts])
script = f'''{self._init_script};
{self._override_navigator_js};
@ -164,27 +147,33 @@ def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='
}});
Object.keys(dom.window).forEach((key) => {{try {{window[key] = dom.window[key]}} catch (e) {{}}}});
delete window.jsdom;
const origLog = console.log;
console.log = () => {{}};
console.info = () => {{}};
return () => {{
const stdout = [];
const origLog = console.log;
console.log = (...msg) => stdout.push(msg.map(m => m.toString()).join(' '));
return () => {{ origLog(JSON.stringify({{
stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
}}
}})();
await (async () => {{
{inline_scripts}
}})();
{callback_varname} = {callback_varname}();
await (async () => {{
{wrapper_scripts}
{callback_varname} = {callback_varname}(); // begin to capture console.log
try {{
{jscode}
}})().finally({callback_varname});
}} finally {{
{callback_varname}();
}}
'''
location_args = ['--location', location] if location else []
with TempFileWrapper(script, suffix='.js') as js_file:
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
data = json.loads(self._run_deno(cmd))
result = self._run_deno(cmd)
try:
data = json.loads(result)
except json.JSONDecodeError as e:
raise ExtractorError(f'Failed to parse JSON output from Deno: {result}', cause=e)
self.apply_cookies(cookiejar, data['cookies'])
return data['stdout']

View File

@ -1,7 +1,9 @@
from __future__ import annotations
import contextlib
import json
import os
import random
import re
import string
import tempfile
@ -69,3 +71,31 @@ def __exit__(self, exc_type, exc_value, traceback):
def random_string(length: int = 10) -> str:
return ''.join(random.choices(string.ascii_letters, k=length))
def override_navigator_js(user_agent: str) -> str:
return '\n'.join([
'Object.defineProperty(navigator, "%s", { value: %s, configurable: true });' % (k, json.dumps(v))
for k, v in {
'userAgent': user_agent,
'language': 'en-US',
'languages': ['en-US'],
'webdriver': False,
}.items()
])
def extract_script_tags(html: str) -> tuple[str, list[str]]:
script_indicies = []
inline_scripts = []
for match_start in re.finditer(r'<script[^>]*>', html, re.DOTALL):
end = html.find('</script>', match_start.end())
if end > match_start.end():
script_indicies.append((match_start.start(), end + len('</script>')))
inline_scripts.append(html[match_start.end():end])
for start, end in script_indicies:
html = html[:start] + html[end:]
return html, inline_scripts

View File

@ -16,7 +16,7 @@
is_outdated_version,
shell_quote,
)
from ._helper import TempFileWrapper, random_string
from ._helper import TempFileWrapper, random_string, extract_script_tags
from .common import ExternalJSI, register_jsi
@ -135,19 +135,23 @@ def _execute_html(self, jscode: str, url: str, html: str, cookiejar, video_id=No
if 'saveAndExit();' not in jscode:
raise ExtractorError('`saveAndExit();` not found in `jscode`')
html, inline_scripts = extract_script_tags(html)
wrapped_scripts = '\n'.join([
'page.evaluate(function() { try { %s } catch (e) {} });' % inline for inline in inline_scripts])
html_file = TempFileWrapper(html, suffix='.html')
cookie_file = TempFileWrapper(self._save_cookies(url, cookiejar), suffix='.json')
jscode = self._TEMPLATE.format_map({
script = self._TEMPLATE.format_map({
'url': json.dumps(str(url)),
'ua': json.dumps(str(self.user_agent)),
'jscode': jscode,
'jscode': f'{wrapped_scripts}\n{jscode}',
'html_fn': json.dumps(html_file.name),
'cookies_fn': json.dumps(cookie_file.name),
'timeout': int(self.timeout * 1000),
})
stdout = self._execute(jscode, video_id, note=note)
stdout = self._execute(script, video_id, note=note)
self._load_cookies(cookie_file.read(), cookiejar)
new_html = html_file.read()