mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-07-10 07:18:33 +00:00
test cookie
This commit is contained in:
parent
b87a0582c5
commit
f0c1da2528
@ -1,9 +1,14 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
# Allow direct execution
|
from __future__ import annotations
|
||||||
import os
|
import os
|
||||||
|
import dataclasses
|
||||||
|
import datetime
|
||||||
|
import time
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
import http.cookiejar
|
||||||
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
@ -11,14 +16,43 @@
|
|||||||
from test.helper import (
|
from test.helper import (
|
||||||
FakeYDL,
|
FakeYDL,
|
||||||
)
|
)
|
||||||
|
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||||
from yt_dlp.jsinterp.common import ExternalJSI
|
from yt_dlp.jsinterp.common import ExternalJSI
|
||||||
from yt_dlp.jsinterp._deno import DenoJSI, DenoJITlessJSI, DenoJSDomJSI
|
from yt_dlp.jsinterp._deno import DenoJSI, DenoJITlessJSI, DenoJSDomJSI
|
||||||
from yt_dlp.jsinterp._phantomjs import PhantomJSJSI
|
from yt_dlp.jsinterp._phantomjs import PhantomJSJSI
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class NetscapeFields:
|
||||||
|
name: str
|
||||||
|
value: str
|
||||||
|
domain: str
|
||||||
|
path: str
|
||||||
|
secure: bool
|
||||||
|
expires: int | None
|
||||||
|
|
||||||
|
def to_cookie(self):
|
||||||
|
return http.cookiejar.Cookie(
|
||||||
|
0, self.name, self.value,
|
||||||
|
None, False,
|
||||||
|
self.domain, True, self.domain.startswith('.'),
|
||||||
|
self.path, True,
|
||||||
|
self.secure, self.expires, False,
|
||||||
|
None, None, {},
|
||||||
|
)
|
||||||
|
|
||||||
|
def expire_str(self):
|
||||||
|
return datetime.datetime.fromtimestamp(
|
||||||
|
self.expires, datetime.timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||||
|
|
||||||
|
def __eq__(self, other: NetscapeFields | http.cookiejar.Cookie):
|
||||||
|
return all(getattr(self, attr) == getattr(other, attr) for attr in ['name', 'value', 'domain', 'path', 'secure', 'expires'])
|
||||||
|
|
||||||
|
|
||||||
class Base:
|
class Base:
|
||||||
class TestExternalJSI(unittest.TestCase):
|
class TestExternalJSI(unittest.TestCase):
|
||||||
_JSI_CLASS: type[ExternalJSI] = None
|
_JSI_CLASS: type[ExternalJSI] = None
|
||||||
|
maxDiff = 2000
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.ydl = FakeYDL()
|
self.ydl = FakeYDL()
|
||||||
@ -52,6 +86,7 @@ def test_execute_dom_script(self):
|
|||||||
<script src="https://example.com/script.js"></script>
|
<script src="https://example.com/script.js"></script>
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
document.getElementById("test-div").innerHTML = "Hello, world!"
|
document.getElementById("test-div").innerHTML = "Hello, world!"
|
||||||
|
console.log('this should not show up');
|
||||||
</script>
|
</script>
|
||||||
</body></html>'''),
|
</body></html>'''),
|
||||||
'Hello, world!')
|
'Hello, world!')
|
||||||
@ -59,8 +94,6 @@ def test_execute_dom_script(self):
|
|||||||
def test_execute_dom_script_with_error(self):
|
def test_execute_dom_script_with_error(self):
|
||||||
if 'dom' not in self.jsi._SUPPORTED_FEATURES:
|
if 'dom' not in self.jsi._SUPPORTED_FEATURES:
|
||||||
self.skipTest('DOM not supported')
|
self.skipTest('DOM not supported')
|
||||||
if self.jsi.JSI_KEY == 'PhantomJS':
|
|
||||||
self.skipTest('PhantomJS does not catch errors')
|
|
||||||
self.assertEqual(self.jsi.execute(
|
self.assertEqual(self.jsi.execute(
|
||||||
'console.log(document.getElementById("test-div").innerHTML);',
|
'console.log(document.getElementById("test-div").innerHTML);',
|
||||||
location='https://example.com',
|
location='https://example.com',
|
||||||
@ -69,11 +102,67 @@ def test_execute_dom_script_with_error(self):
|
|||||||
<script src="https://example.com/script.js"></script>
|
<script src="https://example.com/script.js"></script>
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
document.getElementById("test-div").innerHTML = "Hello, world!"
|
document.getElementById("test-div").innerHTML = "Hello, world!"
|
||||||
|
console.log('this should not show up');
|
||||||
a = b; // Undefined variable assignment
|
a = b; // Undefined variable assignment
|
||||||
</script>
|
</script>
|
||||||
</body></html>'''),
|
</body></html>'''),
|
||||||
'Hello, world!')
|
'Hello, world!')
|
||||||
|
|
||||||
|
def assert_cookiejar_equal(self, cookiejar: http.cookiejar.CookieJar, ref_cookiejar: http.cookiejar.CookieJar):
|
||||||
|
for cookie in cookiejar:
|
||||||
|
ref_cookie = next((c for c in ref_cookiejar if c.name == cookie.name and c.domain == cookie.domain), None)
|
||||||
|
self.assertEqual(repr(cookie), repr(ref_cookie))
|
||||||
|
|
||||||
|
def assert_cookie_str_equal(self, cookie_str, ref_cookie_str):
|
||||||
|
print([cookie_str, ref_cookie_str])
|
||||||
|
self.assertEqual(set(cookie_str.split('; ')), set(ref_cookie_str.split('; ')))
|
||||||
|
|
||||||
|
def test_execute_cookiejar(self):
|
||||||
|
if 'cookies' not in self.jsi._SUPPORTED_FEATURES:
|
||||||
|
self.skipTest('Cookies not supported')
|
||||||
|
cookiejar = YoutubeDLCookieJar()
|
||||||
|
ref_cookiejar = YoutubeDLCookieJar()
|
||||||
|
for test_cookie in [
|
||||||
|
NetscapeFields('test1', 'test1', '.example.com', '/', False, int(time.time()) + 1000),
|
||||||
|
NetscapeFields('test2', 'test2', '.example.com', '/', True, int(time.time()) + 1000),
|
||||||
|
NetscapeFields('test3', 'test3', '.example.com', '/123', False, int(time.time()) + 1000),
|
||||||
|
NetscapeFields('test4', 'test4', '.example.com', '/456', False, int(time.time()) + 1000),
|
||||||
|
NetscapeFields('test5', 'test5', '.example.com', '/123', True, int(time.time()) + 1000),
|
||||||
|
NetscapeFields('test6', 'test6', '.example.com', '/456', True, int(time.time()) + 1000),
|
||||||
|
NetscapeFields('test1', 'other1', '.other.com', '/', False, int(time.time()) + 1000),
|
||||||
|
NetscapeFields('test2', 'other2', '.other.com', '/', False, int(time.time()) + 1000),
|
||||||
|
NetscapeFields('test7', 'other7', '.other.com', '/', False, int(time.time()) + 1000),
|
||||||
|
]:
|
||||||
|
cookiejar.set_cookie(test_cookie.to_cookie())
|
||||||
|
ref_cookiejar.set_cookie(test_cookie.to_cookie())
|
||||||
|
|
||||||
|
# test identity without modification from js
|
||||||
|
self.assert_cookie_str_equal(self.jsi.execute(
|
||||||
|
'console.log(document.cookie);',
|
||||||
|
location='http://example.com/123/456',
|
||||||
|
html='<html><body><div id="test-div">Hello, world!</div></body></html>',
|
||||||
|
cookiejar=cookiejar),
|
||||||
|
'test1=test1; test3=test3')
|
||||||
|
self.assert_cookiejar_equal(cookiejar, ref_cookiejar)
|
||||||
|
|
||||||
|
# test modification of existing cookie from js
|
||||||
|
new_cookie_1 = NetscapeFields('test1', 'new1', '.example.com', '/', True, int(time.time()) + 900)
|
||||||
|
new_cookie_2 = NetscapeFields('test2', 'new2', '.example.com', '/', True, int(time.time()) + 900)
|
||||||
|
ref_cookiejar.set_cookie(new_cookie_1.to_cookie())
|
||||||
|
ref_cookiejar.set_cookie(new_cookie_2.to_cookie())
|
||||||
|
self.assert_cookie_str_equal(self.jsi.execute(
|
||||||
|
f'''document.cookie = "test1=new1; secure; expires={new_cookie_1.expire_str()}; domain=.example.com; path=/";
|
||||||
|
console.log(document.cookie);''',
|
||||||
|
location='https://example.com/123/456',
|
||||||
|
html=f'''<html><body><div id="test-div">Hello, world!</div>
|
||||||
|
<script>
|
||||||
|
document.cookie = "test2=new2; secure; expires={new_cookie_2.expire_str()}; domain=.example.com; path=/";
|
||||||
|
</script>
|
||||||
|
</body></html>''',
|
||||||
|
cookiejar=cookiejar),
|
||||||
|
'test1=new1; test2=new2; test3=test3; test5=test5')
|
||||||
|
self.assert_cookiejar_equal(cookiejar, ref_cookiejar)
|
||||||
|
|
||||||
|
|
||||||
class TestDeno(Base.TestExternalJSI):
|
class TestDeno(Base.TestExternalJSI):
|
||||||
_JSI_CLASS = DenoJSI
|
_JSI_CLASS = DenoJSI
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import typing
|
import typing
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
@ -16,7 +15,7 @@
|
|||||||
shell_quote,
|
shell_quote,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
from ._helper import TempFileWrapper, random_string
|
from ._helper import TempFileWrapper, random_string, override_navigator_js, extract_script_tags
|
||||||
from .common import ExternalJSI, register_jsi
|
from .common import ExternalJSI, register_jsi
|
||||||
|
|
||||||
|
|
||||||
@ -36,15 +35,7 @@ def __init__(self, *args, flags=[], replace_flags=False, init_script=None, **kwa
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def _override_navigator_js(self):
|
def _override_navigator_js(self):
|
||||||
return '\n'.join([
|
return override_navigator_js(self.user_agent)
|
||||||
'Object.defineProperty(navigator, "%s", { value: %s, configurable: true });' % (k, json.dumps(v))
|
|
||||||
for k, v in {
|
|
||||||
'userAgent': self.user_agent,
|
|
||||||
'language': 'en-US',
|
|
||||||
'languages': ['en-US'],
|
|
||||||
'webdriver': False,
|
|
||||||
}.items()
|
|
||||||
])
|
|
||||||
|
|
||||||
def _run_deno(self, cmd):
|
def _run_deno(self, cmd):
|
||||||
self.write_debug(f'Deno command line: {shell_quote(cmd)}')
|
self.write_debug(f'Deno command line: {shell_quote(cmd)}')
|
||||||
@ -137,21 +128,13 @@ def _ensure_jsdom(self):
|
|||||||
self._run_deno(cmd)
|
self._run_deno(cmd)
|
||||||
self._JSDOM_IMPORT_CHECKED = True
|
self._JSDOM_IMPORT_CHECKED = True
|
||||||
|
|
||||||
def _parse_script_tags(self, html: str):
|
|
||||||
for match_start in re.finditer(r'<script[^>]*>', html, re.DOTALL):
|
|
||||||
end = html.find('</script>', match_start.end())
|
|
||||||
if end > match_start.end():
|
|
||||||
yield html[match_start.end():end]
|
|
||||||
|
|
||||||
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='', html='', cookiejar=None):
|
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='', html='', cookiejar=None):
|
||||||
self.report_note(video_id, note)
|
self.report_note(video_id, note)
|
||||||
self._ensure_jsdom()
|
self._ensure_jsdom()
|
||||||
callback_varname = f'__callback_{random_string()}'
|
callback_varname = f'__callback_{random_string()}'
|
||||||
|
|
||||||
inline_scripts = '\n'.join([
|
html, inline_scripts = extract_script_tags(html)
|
||||||
'try { %s } catch (e) {}' % script
|
wrapper_scripts = '\n'.join(['try { %s } catch (e) {}' % script for script in inline_scripts])
|
||||||
for script in self._parse_script_tags(html)
|
|
||||||
])
|
|
||||||
|
|
||||||
script = f'''{self._init_script};
|
script = f'''{self._init_script};
|
||||||
{self._override_navigator_js};
|
{self._override_navigator_js};
|
||||||
@ -164,27 +147,33 @@ def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='
|
|||||||
}});
|
}});
|
||||||
Object.keys(dom.window).forEach((key) => {{try {{window[key] = dom.window[key]}} catch (e) {{}}}});
|
Object.keys(dom.window).forEach((key) => {{try {{window[key] = dom.window[key]}} catch (e) {{}}}});
|
||||||
delete window.jsdom;
|
delete window.jsdom;
|
||||||
|
const origLog = console.log;
|
||||||
|
console.log = () => {{}};
|
||||||
|
console.info = () => {{}};
|
||||||
return () => {{
|
return () => {{
|
||||||
const stdout = [];
|
const stdout = [];
|
||||||
const origLog = console.log;
|
|
||||||
console.log = (...msg) => stdout.push(msg.map(m => m.toString()).join(' '));
|
console.log = (...msg) => stdout.push(msg.map(m => m.toString()).join(' '));
|
||||||
return () => {{ origLog(JSON.stringify({{
|
return () => {{ origLog(JSON.stringify({{
|
||||||
stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
|
stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
|
||||||
}}
|
}}
|
||||||
}})();
|
}})();
|
||||||
await (async () => {{
|
{wrapper_scripts}
|
||||||
{inline_scripts}
|
{callback_varname} = {callback_varname}(); // begin to capture console.log
|
||||||
}})();
|
try {{
|
||||||
{callback_varname} = {callback_varname}();
|
|
||||||
await (async () => {{
|
|
||||||
{jscode}
|
{jscode}
|
||||||
}})().finally({callback_varname});
|
}} finally {{
|
||||||
|
{callback_varname}();
|
||||||
|
}}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
location_args = ['--location', location] if location else []
|
location_args = ['--location', location] if location else []
|
||||||
with TempFileWrapper(script, suffix='.js') as js_file:
|
with TempFileWrapper(script, suffix='.js') as js_file:
|
||||||
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
|
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
|
||||||
data = json.loads(self._run_deno(cmd))
|
result = self._run_deno(cmd)
|
||||||
|
try:
|
||||||
|
data = json.loads(result)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ExtractorError(f'Failed to parse JSON output from Deno: {result}', cause=e)
|
||||||
self.apply_cookies(cookiejar, data['cookies'])
|
self.apply_cookies(cookiejar, data['cookies'])
|
||||||
return data['stdout']
|
return data['stdout']
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import string
|
import string
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
@ -69,3 +71,31 @@ def __exit__(self, exc_type, exc_value, traceback):
|
|||||||
|
|
||||||
def random_string(length: int = 10) -> str:
|
def random_string(length: int = 10) -> str:
|
||||||
return ''.join(random.choices(string.ascii_letters, k=length))
|
return ''.join(random.choices(string.ascii_letters, k=length))
|
||||||
|
|
||||||
|
|
||||||
|
def override_navigator_js(user_agent: str) -> str:
|
||||||
|
return '\n'.join([
|
||||||
|
'Object.defineProperty(navigator, "%s", { value: %s, configurable: true });' % (k, json.dumps(v))
|
||||||
|
for k, v in {
|
||||||
|
'userAgent': user_agent,
|
||||||
|
'language': 'en-US',
|
||||||
|
'languages': ['en-US'],
|
||||||
|
'webdriver': False,
|
||||||
|
}.items()
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
def extract_script_tags(html: str) -> tuple[str, list[str]]:
|
||||||
|
script_indicies = []
|
||||||
|
inline_scripts = []
|
||||||
|
|
||||||
|
for match_start in re.finditer(r'<script[^>]*>', html, re.DOTALL):
|
||||||
|
end = html.find('</script>', match_start.end())
|
||||||
|
if end > match_start.end():
|
||||||
|
script_indicies.append((match_start.start(), end + len('</script>')))
|
||||||
|
inline_scripts.append(html[match_start.end():end])
|
||||||
|
|
||||||
|
for start, end in script_indicies:
|
||||||
|
html = html[:start] + html[end:]
|
||||||
|
|
||||||
|
return html, inline_scripts
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
)
|
)
|
||||||
from ._helper import TempFileWrapper, random_string
|
from ._helper import TempFileWrapper, random_string, extract_script_tags
|
||||||
from .common import ExternalJSI, register_jsi
|
from .common import ExternalJSI, register_jsi
|
||||||
|
|
||||||
|
|
||||||
@ -135,19 +135,23 @@ def _execute_html(self, jscode: str, url: str, html: str, cookiejar, video_id=No
|
|||||||
if 'saveAndExit();' not in jscode:
|
if 'saveAndExit();' not in jscode:
|
||||||
raise ExtractorError('`saveAndExit();` not found in `jscode`')
|
raise ExtractorError('`saveAndExit();` not found in `jscode`')
|
||||||
|
|
||||||
|
html, inline_scripts = extract_script_tags(html)
|
||||||
|
wrapped_scripts = '\n'.join([
|
||||||
|
'page.evaluate(function() { try { %s } catch (e) {} });' % inline for inline in inline_scripts])
|
||||||
|
|
||||||
html_file = TempFileWrapper(html, suffix='.html')
|
html_file = TempFileWrapper(html, suffix='.html')
|
||||||
cookie_file = TempFileWrapper(self._save_cookies(url, cookiejar), suffix='.json')
|
cookie_file = TempFileWrapper(self._save_cookies(url, cookiejar), suffix='.json')
|
||||||
|
|
||||||
jscode = self._TEMPLATE.format_map({
|
script = self._TEMPLATE.format_map({
|
||||||
'url': json.dumps(str(url)),
|
'url': json.dumps(str(url)),
|
||||||
'ua': json.dumps(str(self.user_agent)),
|
'ua': json.dumps(str(self.user_agent)),
|
||||||
'jscode': jscode,
|
'jscode': f'{wrapped_scripts}\n{jscode}',
|
||||||
'html_fn': json.dumps(html_file.name),
|
'html_fn': json.dumps(html_file.name),
|
||||||
'cookies_fn': json.dumps(cookie_file.name),
|
'cookies_fn': json.dumps(cookie_file.name),
|
||||||
'timeout': int(self.timeout * 1000),
|
'timeout': int(self.timeout * 1000),
|
||||||
})
|
})
|
||||||
|
|
||||||
stdout = self._execute(jscode, video_id, note=note)
|
stdout = self._execute(script, video_id, note=note)
|
||||||
self._load_cookies(cookie_file.read(), cookiejar)
|
self._load_cookies(cookie_file.read(), cookiejar)
|
||||||
new_html = html_file.read()
|
new_html = html_file.read()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user