diff --git a/README.md b/README.md
index 0f9a7d556..ce1a09313 100644
--- a/README.md
+++ b/README.md
@@ -213,7 +213,7 @@ ### Metadata
### Misc
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
-* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
+* [**phantomjs**](https://github.com/ariya/phantomjs), [**deno**](https://github.com/denoland/deno/) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) and [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt) respectively
* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
* Any external downloader that you want to use with `--downloader`
@@ -798,6 +798,9 @@ ## Workarounds:
be used along with --min-sleep-interval
--sleep-subtitles SECONDS Number of seconds to sleep before each
subtitle download
+ --jsi-preference JSI Preferred JS interpreters to use during
+ extraction. Can be given as comma-separated
+ values
## Video Format Options:
-f, --format FORMAT Video format code, see "FORMAT SELECTION"
diff --git a/test/test_download.py b/test/test_download.py
index 3f36869d9..8bc5658ef 100755
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -25,12 +25,14 @@
import yt_dlp.YoutubeDL # isort: split
from yt_dlp.extractor import get_info_extractor
+from yt_dlp.jsinterp.common import get_included_jsi
from yt_dlp.networking.exceptions import HTTPError, TransportError
from yt_dlp.utils import (
DownloadError,
ExtractorError,
UnavailableVideoError,
YoutubeDLError,
+ filter_dict,
format_bytes,
join_nonempty,
)
@@ -82,6 +84,29 @@ def __str__(self):
# Dynamically generate tests
def generator(test_case, tname):
+
+ # setting `jsi_matrix` to True, or `jsi_matrix_only_include`, `jsi_matrix_exclude` to non-empty list
+ # to trigger matrix behavior for JSI
+ if any(test_case.get(key) for key in [
+ 'jsi_matrix', 'jsi_matrix_only_include', 'jsi_matrix_exclude',
+ ]):
+ jsi_keys = list(get_included_jsi(only_include=test_case.get('jsi_matrix_only_include'),
+ exclude=test_case.get('jsi_matrix_exclude')))
+
+ # use jsi_preference here, instead of force blocking other jsi runtimes
+ # exclusion, if needed, should be specified in test case to optimize testing
+ def generate_jsi_sub_case(jsi_key):
+ sub_case = filter_dict(test_case, lambda k, _: not k.startswith('jsi_matrix'))
+ sub_case['params'] = {**test_case.get('params', {}), 'jsi_preference': [jsi_key]}
+ return generator(sub_case, f'{tname}_{jsi_key}')
+
+ def run_sub_cases(self):
+ for i, jsi_key in enumerate(jsi_keys):
+ with self.subTest(jsi_key):
+ print(f'Running case {tname} using JSI: {jsi_key} ({i + 1}/{len(jsi_keys)})')
+ generate_jsi_sub_case(jsi_key)(self)
+ return run_sub_cases
+
def test_template(self):
if self.COMPLETED_TESTS.get(tname):
return
diff --git a/test/test_jsi_external.py b/test/test_jsi_external.py
new file mode 100644
index 000000000..1a7793e48
--- /dev/null
+++ b/test/test_jsi_external.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+import os
+import dataclasses
+import datetime
+import time
+import sys
+import unittest
+import http.cookiejar
+import functools
+import typing
+
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import FakeYDL
+from yt_dlp.cookies import YoutubeDLCookieJar
+from yt_dlp.jsinterp.common import get_included_jsi
+from yt_dlp.jsinterp._helper import prepare_wasm_jsmodule
+
+if typing.TYPE_CHECKING:
+ from yt_dlp.jsinterp.common import JSI
+
+
+@dataclasses.dataclass
+class NetscapeFields:
+ name: str
+ value: str
+ domain: str
+ path: str
+ secure: bool
+ expires: int | None
+
+ def to_cookie(self):
+ return http.cookiejar.Cookie(
+ 0, self.name, self.value,
+ None, False,
+ self.domain, True, self.domain.startswith('.'),
+ self.path, True,
+ self.secure, self.expires, False,
+ None, None, {},
+ )
+
+ def expire_str(self):
+ return datetime.datetime.fromtimestamp(
+ self.expires, datetime.timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT')
+
+ def __eq__(self, other: NetscapeFields | http.cookiejar.Cookie):
+ return all(getattr(self, attr) == getattr(other, attr) for attr in ['name', 'value', 'domain', 'path', 'secure', 'expires'])
+
+
+def use_jsi_rumtimes(exclude=[]):
+ def inner(func: typing.Callable[[unittest.TestCase, type[JSI]], None]):
+ @functools.wraps(func)
+ def wrapper(self: unittest.TestCase):
+ for key, jsi in get_included_jsi(exclude=exclude).items():
+ def wrapped_jsi_with_unavaliable_auto_skip(*args, **kwargs):
+ if getattr(jsi, 'TEST_DATA_PLUGIN', False):
+ self.skipTest('Testdata plugin')
+ instance = jsi(*args, **kwargs)
+ if not instance.is_available():
+ self.skipTest(f'{key} is not available')
+ return instance
+
+ with self.subTest(key):
+ func(self, wrapped_jsi_with_unavaliable_auto_skip)
+ return wrapper
+ return inner
+
+
+class TestExternalJSI(unittest.TestCase):
+ _TESTDATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'testdata', 'jsi_external')
+ maxDiff = 2000
+
+ def setUp(self):
+ self.ydl = FakeYDL()
+
+ @use_jsi_rumtimes()
+ def test_execute(self, jsi_cls: type[JSI]):
+ jsi = jsi_cls(self.ydl, '', 10)
+ self.assertEqual(jsi.execute('console.log("Hello, world!");'), 'Hello, world!')
+
+ @use_jsi_rumtimes()
+ def test_user_agent(self, jsi_cls: type[JSI]):
+ ua = self.ydl.params['http_headers']['User-Agent']
+
+ jsi = jsi_cls(self.ydl, '', 10)
+ self.assertEqual(jsi.execute('console.log(navigator.userAgent);'), ua)
+ self.assertNotEqual(jsi.execute('console.log(JSON.stringify(navigator.webdriver));'), 'true')
+
+ jsi = jsi_cls(self.ydl, '', 10, user_agent='test/ua')
+ self.assertEqual(jsi.execute('console.log(navigator.userAgent);'), 'test/ua')
+
+ @use_jsi_rumtimes()
+ def test_location(self, jsi_cls: type[JSI]):
+ jsi = jsi_cls(self.ydl, 'https://example.com/123/456', 10)
+ self.assertEqual(jsi.execute('console.log(JSON.stringify([location.href, location.hostname]));'),
+ '["https://example.com/123/456","example.com"]')
+
+ @use_jsi_rumtimes(exclude=['Deno'])
+ def test_execute_dom_parse(self, jsi_cls: type[JSI]):
+ jsi = jsi_cls(self.ydl, '', 10)
+ self.assertEqual(jsi.execute(
+ 'console.log(document.getElementById("test-div").innerHTML);',
+ html='
Hello, world!
'),
+ 'Hello, world!')
+
+ @use_jsi_rumtimes(exclude=['Deno'])
+ def test_execute_dom_script(self, jsi_cls: type[JSI]):
+ jsi = jsi_cls(self.ydl, '', 10)
+ self.assertEqual(jsi.execute(
+ 'console.log(document.getElementById("test-div").innerHTML);',
+ html='''Hello, world!
+
+
+
+ '''),
+ 'Hello, world!')
+
+ @use_jsi_rumtimes(exclude=['Deno'])
+ def test_dom_location(self, jsi_cls: type[JSI]):
+ jsi = jsi_cls(self.ydl, 'https://example.com/123/456', 10)
+ self.assertEqual(jsi.execute(
+ 'console.log(document.getElementById("test-div").innerHTML);',
+ html='''
+ Hello, world!
'''),
+ 'example.com')
+
+ @use_jsi_rumtimes(exclude=['Deno'])
+ def test_execute_cookiejar(self, jsi_cls: type[JSI]):
+ cookiejar = YoutubeDLCookieJar()
+ ref_cookiejar = YoutubeDLCookieJar()
+
+ def _assert_expected_execute(cookie_str, ref_cookie_str):
+ self.assertEqual(set(cookie_str.split('; ')), set(ref_cookie_str.split('; ')))
+ for cookie in cookiejar:
+ ref_cookie = next((c for c in ref_cookiejar if c.name == cookie.name
+ and c.domain == cookie.domain), None)
+ self.assertEqual(repr(cookie), repr(ref_cookie))
+
+ for test_cookie in [
+ NetscapeFields('test1', 'test1', '.example.com', '/', False, int(time.time()) + 1000),
+ NetscapeFields('test2', 'test2', '.example.com', '/', True, int(time.time()) + 1000),
+ NetscapeFields('test3', 'test3', '.example.com', '/123', False, int(time.time()) + 1000),
+ NetscapeFields('test4', 'test4', '.example.com', '/456', False, int(time.time()) + 1000),
+ NetscapeFields('test5', 'test5', '.example.com', '/123', True, int(time.time()) + 1000),
+ NetscapeFields('test6', 'test6', '.example.com', '/456', True, int(time.time()) + 1000),
+ NetscapeFields('test1', 'other1', '.other.com', '/', False, int(time.time()) + 1000),
+ NetscapeFields('test2', 'other2', '.other.com', '/', False, int(time.time()) + 1000),
+ NetscapeFields('test7', 'other7', '.other.com', '/', False, int(time.time()) + 1000),
+ ]:
+ cookiejar.set_cookie(test_cookie.to_cookie())
+ ref_cookiejar.set_cookie(test_cookie.to_cookie())
+
+ # test identity without modification from js
+ jsi = jsi_cls(self.ydl, 'http://example.com/123/456', 10)
+ _assert_expected_execute(jsi.execute(
+ 'console.log(document.cookie);', cookiejar=cookiejar),
+ 'test1=test1; test3=test3')
+
+ # test modification of existing cookie from js
+ new_cookie_1 = NetscapeFields('test1', 'new1', '.example.com', '/', True, int(time.time()) + 900)
+ new_cookie_2 = NetscapeFields('test2', 'new2', '.example.com', '/', True, int(time.time()) + 900)
+ ref_cookiejar.set_cookie(new_cookie_1.to_cookie())
+ ref_cookiejar.set_cookie(new_cookie_2.to_cookie())
+
+ # change to https url to test secure-domain behavior
+ jsi = jsi_cls(self.ydl, 'https://example.com/123/456', 10)
+ _assert_expected_execute(jsi.execute(
+ f'''document.cookie = "test1=new1; secure; expires={new_cookie_1.expire_str()}; domain=.example.com; path=/";
+ console.log(document.cookie);''',
+ html=f'''Hello, world!
+
+ ''',
+ cookiejar=cookiejar),
+ 'test1=new1; test2=new2; test3=test3; test5=test5')
+
+ @use_jsi_rumtimes(exclude=['PhantomJS'])
+ def test_wasm(self, jsi_cls: type[JSI]):
+ with open(os.path.join(self._TESTDATA_DIR, 'hello_wasm.js')) as f:
+ js_mod = f.read()
+ with open(os.path.join(self._TESTDATA_DIR, 'hello_wasm_bg.wasm'), 'rb') as f:
+ wasm = f.read()
+
+ js_base = prepare_wasm_jsmodule(js_mod, wasm)
+
+ js_code = js_base + ''';
+ console.log(add(1, 2));
+ greet('world');
+ '''
+
+ jsi = jsi_cls(self.ydl, '', 10)
+ self.assertEqual(jsi.execute(js_code), '3\nHello, world!')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 2e3cdc2a5..ef3c68170 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -9,7 +9,7 @@
import math
-from yt_dlp.jsinterp import JS_Undefined, JSInterpreter, js_number_to_string
+from yt_dlp.jsinterp.native import JS_Undefined, JSInterpreter, js_number_to_string
class NaN:
diff --git a/test/test_plugins.py b/test/test_plugins.py
index 195726b18..8299bfaff 100644
--- a/test/test_plugins.py
+++ b/test/test_plugins.py
@@ -22,9 +22,11 @@
from yt_dlp.globals import (
extractors,
postprocessors,
+ jsi_runtimes,
plugin_dirs,
plugin_ies,
plugin_pps,
+ plugin_jsis,
all_plugins_loaded,
plugin_specs,
)
@@ -44,16 +46,24 @@
plugin_destination=plugin_pps,
)
+JSI_PLUGIN_SPEC = PluginSpec(
+ module_name='jsinterp',
+ suffix='JSI',
+ destination=jsi_runtimes,
+ plugin_destination=plugin_jsis,
+)
+
def reset_plugins():
plugin_ies.value = {}
plugin_pps.value = {}
+ plugin_jsis.value = {}
plugin_dirs.value = ['default']
plugin_specs.value = {}
all_plugins_loaded.value = False
# Clearing override plugins is probably difficult
for module_name in tuple(sys.modules):
- for plugin_type in ('extractor', 'postprocessor'):
+ for plugin_type in ('extractor', 'postprocessor', 'jsinterp'):
if module_name.startswith(f'{PACKAGE_NAME}.{plugin_type}.'):
del sys.modules[module_name]
@@ -108,6 +118,17 @@ def test_postprocessor_classes(self):
self.assertIn(f'{PACKAGE_NAME}.postprocessor.normal', sys.modules.keys())
self.assertIn('NormalPluginPP', plugin_pps.value)
+ def test_jsi_runtime_classes(self):
+ plugins_jsi = load_plugins(JSI_PLUGIN_SPEC)
+ self.assertIn('NormalPluginJSI', plugins_jsi.keys())
+ self.assertIn(f'{PACKAGE_NAME}.jsinterp.normal', sys.modules.keys())
+ self.assertIn('NormalPluginJSI', plugin_jsis.value)
+
+ self.assertNotIn('OverrideDenoJSI', plugins_jsi.keys())
+ self.assertNotIn('OverrideDenoJSI', plugin_jsis.value)
+ self.assertNotIn('_UnderscoreOverrideDenoJSI', plugins_jsi.keys())
+ self.assertNotIn('_UnderscoreOverrideDenoJSI', plugin_jsis.value)
+
def test_importing_zipped_module(self):
zip_path = TEST_DATA_DIR / 'zipped_plugins.zip'
shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4])
@@ -125,6 +146,9 @@ def test_importing_zipped_module(self):
plugins_pp = load_plugins(POSTPROCESSOR_PLUGIN_SPEC)
self.assertIn('ZippedPluginPP', plugins_pp.keys())
+ plugins_jsi = load_plugins(JSI_PLUGIN_SPEC)
+ self.assertIn('ZippedPluginJSI', plugins_jsi.keys())
+
finally:
sys.path.remove(str(zip_path))
os.remove(zip_path)
@@ -134,13 +158,14 @@ def test_reloading_plugins(self):
reload_plugins_path = TEST_DATA_DIR / 'reload_plugins'
load_plugins(EXTRACTOR_PLUGIN_SPEC)
load_plugins(POSTPROCESSOR_PLUGIN_SPEC)
+ load_plugins(JSI_PLUGIN_SPEC)
# Remove default folder and add reload_plugin path
sys.path.remove(str(TEST_DATA_DIR))
sys.path.append(str(reload_plugins_path))
importlib.invalidate_caches()
try:
- for plugin_type in ('extractor', 'postprocessor'):
+ for plugin_type in ('extractor', 'postprocessor', 'jsinterp'):
package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}')
self.assertIn(reload_plugins_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__))
@@ -161,6 +186,14 @@ def test_reloading_plugins(self):
postprocessors.value['NormalPluginPP'].REPLACED,
msg='Reloading has not replaced original postprocessor plugin globally')
+ plugins_jsi = load_plugins(JSI_PLUGIN_SPEC)
+ self.assertIn('NormalPluginJSI', plugins_jsi.keys())
+ self.assertTrue(plugins_jsi['NormalPluginJSI'].REPLACED,
+ msg='Reloading has not replaced original postprocessor plugin')
+ self.assertTrue(
+ jsi_runtimes.value['NormalPluginJSI'].REPLACED,
+ msg='Reloading has not replaced original postprocessor plugin globally')
+
finally:
sys.path.remove(str(reload_plugins_path))
sys.path.append(str(TEST_DATA_DIR))
@@ -181,6 +214,24 @@ def test_extractor_override_plugin(self):
from yt_dlp.extractor.generic import GenericIE
self.assertEqual(GenericIE.IE_NAME, 'generic+override+underscore-override')
+ def test_jsi_override_plugin(self):
+ load_plugins(JSI_PLUGIN_SPEC)
+
+ from yt_dlp.jsinterp._deno import DenoJSI
+
+ # test that jsi_runtimes is updated with override jsi
+ self.assertTrue(DenoJSI is jsi_runtimes.value['Deno'])
+ self.assertEqual(jsi_runtimes.value['Deno'].TEST_FIELD, 'override')
+ self.assertEqual(jsi_runtimes.value['Deno'].SECONDARY_TEST_FIELD, 'underscore-override')
+
+ self.assertEqual(jsi_runtimes.value['Deno'].JSI_NAME, 'Deno+override+underscore-override')
+ importlib.invalidate_caches()
+ # test that loading a second time doesn't wrap a second time
+ load_plugins(EXTRACTOR_PLUGIN_SPEC)
+ from yt_dlp.jsinterp._deno import DenoJSI
+ self.assertTrue(DenoJSI is jsi_runtimes.value['Deno'])
+ self.assertEqual(jsi_runtimes.value['Deno'].JSI_NAME, 'Deno+override+underscore-override')
+
def test_load_all_plugin_types(self):
# no plugin specs registered
@@ -188,24 +239,29 @@ def test_load_all_plugin_types(self):
self.assertNotIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
self.assertNotIn(f'{PACKAGE_NAME}.postprocessor.normal', sys.modules.keys())
+ self.assertNotIn(f'{PACKAGE_NAME}.jsinterp.normal', sys.modules.keys())
register_plugin_spec(EXTRACTOR_PLUGIN_SPEC)
register_plugin_spec(POSTPROCESSOR_PLUGIN_SPEC)
+ register_plugin_spec(JSI_PLUGIN_SPEC)
load_all_plugins()
self.assertTrue(all_plugins_loaded.value)
self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
self.assertIn(f'{PACKAGE_NAME}.postprocessor.normal', sys.modules.keys())
+ self.assertIn(f'{PACKAGE_NAME}.jsinterp.normal', sys.modules.keys())
def test_no_plugin_dirs(self):
register_plugin_spec(EXTRACTOR_PLUGIN_SPEC)
register_plugin_spec(POSTPROCESSOR_PLUGIN_SPEC)
+ register_plugin_spec(JSI_PLUGIN_SPEC)
plugin_dirs.value = []
load_all_plugins()
self.assertNotIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
self.assertNotIn(f'{PACKAGE_NAME}.postprocessor.normal', sys.modules.keys())
+ self.assertNotIn(f'{PACKAGE_NAME}.jsinterp.normal', sys.modules.keys())
def test_set_plugin_dirs(self):
custom_plugin_dir = str(TEST_DATA_DIR / 'plugin_packages')
@@ -236,9 +292,11 @@ def test_append_plugin_dirs(self):
def test_get_plugin_spec(self):
register_plugin_spec(EXTRACTOR_PLUGIN_SPEC)
register_plugin_spec(POSTPROCESSOR_PLUGIN_SPEC)
+ register_plugin_spec(JSI_PLUGIN_SPEC)
self.assertEqual(plugin_specs.value.get('extractor'), EXTRACTOR_PLUGIN_SPEC)
self.assertEqual(plugin_specs.value.get('postprocessor'), POSTPROCESSOR_PLUGIN_SPEC)
+ self.assertEqual(plugin_specs.value.get('jsinterp'), JSI_PLUGIN_SPEC)
self.assertIsNone(plugin_specs.value.get('invalid'))
diff --git a/test/testdata/jsi_external/hello_wasm.js b/test/testdata/jsi_external/hello_wasm.js
new file mode 100644
index 000000000..1a3a31c46
--- /dev/null
+++ b/test/testdata/jsi_external/hello_wasm.js
@@ -0,0 +1,234 @@
+// wasm-pack build --target web
+/* lib.rs
+use wasm_bindgen::prelude::*;
+
+#[wasm_bindgen]
+extern "C" {
+ pub fn eval(s: &str);
+}
+
+#[wasm_bindgen]
+pub fn greet(name: &str) {
+ eval(&format!("console.log('Hello, {}!')", name));
+}
+
+#[wasm_bindgen]
+pub fn add(left: i32, right: i32) -> i32 {
+ left + right
+}
+*/
+
+let wasm;
+
+const cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
+
+if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
+
+let cachedUint8ArrayMemory0 = null;
+
+function getUint8ArrayMemory0() {
+ if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
+ cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
+ }
+ return cachedUint8ArrayMemory0;
+}
+
+function getStringFromWasm0(ptr, len) {
+ ptr = ptr >>> 0;
+ return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
+}
+
+let WASM_VECTOR_LEN = 0;
+
+const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
+
+const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
+ ? function (arg, view) {
+ return cachedTextEncoder.encodeInto(arg, view);
+}
+ : function (arg, view) {
+ const buf = cachedTextEncoder.encode(arg);
+ view.set(buf);
+ return {
+ read: arg.length,
+ written: buf.length
+ };
+});
+
+function passStringToWasm0(arg, malloc, realloc) {
+
+ if (realloc === undefined) {
+ const buf = cachedTextEncoder.encode(arg);
+ const ptr = malloc(buf.length, 1) >>> 0;
+ getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
+ WASM_VECTOR_LEN = buf.length;
+ return ptr;
+ }
+
+ let len = arg.length;
+ let ptr = malloc(len, 1) >>> 0;
+
+ const mem = getUint8ArrayMemory0();
+
+ let offset = 0;
+
+ for (; offset < len; offset++) {
+ const code = arg.charCodeAt(offset);
+ if (code > 0x7F) break;
+ mem[ptr + offset] = code;
+ }
+
+ if (offset !== len) {
+ if (offset !== 0) {
+ arg = arg.slice(offset);
+ }
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
+ const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
+ const ret = encodeString(arg, view);
+
+ offset += ret.written;
+ ptr = realloc(ptr, len, offset, 1) >>> 0;
+ }
+
+ WASM_VECTOR_LEN = offset;
+ return ptr;
+}
+/**
+ * @param {string} name
+ */
+export function greet(name) {
+ const ptr0 = passStringToWasm0(name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+ const len0 = WASM_VECTOR_LEN;
+ wasm.greet(ptr0, len0);
+}
+
+/**
+ * @param {number} left
+ * @param {number} right
+ * @returns {number}
+ */
+export function add(left, right) {
+ const ret = wasm.add(left, right);
+ return ret;
+}
+
+async function __wbg_load(module, imports) {
+ if (typeof Response === 'function' && module instanceof Response) {
+ if (typeof WebAssembly.instantiateStreaming === 'function') {
+ try {
+ return await WebAssembly.instantiateStreaming(module, imports);
+
+ } catch (e) {
+ if (module.headers.get('Content-Type') != 'application/wasm') {
+ console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
+
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ const bytes = await module.arrayBuffer();
+ return await WebAssembly.instantiate(bytes, imports);
+
+ } else {
+ const instance = await WebAssembly.instantiate(module, imports);
+
+ if (instance instanceof WebAssembly.Instance) {
+ return { instance, module };
+
+ } else {
+ return instance;
+ }
+ }
+}
+
+function __wbg_get_imports() {
+ const imports = {};
+ imports.wbg = {};
+ imports.wbg.__wbg_eval_d1c6d8ede79fdfce = function(arg0, arg1) {
+ eval(getStringFromWasm0(arg0, arg1));
+ };
+ imports.wbg.__wbindgen_init_externref_table = function() {
+ const table = wasm.__wbindgen_export_0;
+ const offset = table.grow(4);
+ table.set(0, undefined);
+ table.set(offset + 0, undefined);
+ table.set(offset + 1, null);
+ table.set(offset + 2, true);
+ table.set(offset + 3, false);
+ ;
+ };
+
+ return imports;
+}
+
+function __wbg_init_memory(imports, memory) {
+
+}
+
+function __wbg_finalize_init(instance, module) {
+ wasm = instance.exports;
+ __wbg_init.__wbindgen_wasm_module = module;
+ cachedUint8ArrayMemory0 = null;
+
+
+ wasm.__wbindgen_start();
+ return wasm;
+}
+
+function initSync(module) {
+ if (wasm !== undefined) return wasm;
+
+
+ if (typeof module !== 'undefined') {
+ if (Object.getPrototypeOf(module) === Object.prototype) {
+ ({module} = module)
+ } else {
+ console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
+ }
+ }
+
+ const imports = __wbg_get_imports();
+
+ __wbg_init_memory(imports);
+
+ if (!(module instanceof WebAssembly.Module)) {
+ module = new WebAssembly.Module(module);
+ }
+
+ const instance = new WebAssembly.Instance(module, imports);
+
+ return __wbg_finalize_init(instance, module);
+}
+
+async function __wbg_init(module_or_path) {
+ if (wasm !== undefined) return wasm;
+
+
+ if (typeof module_or_path !== 'undefined') {
+ if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
+ ({module_or_path} = module_or_path)
+ } else {
+ console.warn('using deprecated parameters for the initialization function; pass a single object instead')
+ }
+ }
+
+ if (typeof module_or_path === 'undefined') {
+ module_or_path = new URL('hello_wasm_bg.wasm', import.meta.url);
+ }
+ const imports = __wbg_get_imports();
+
+ if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
+ module_or_path = fetch(module_or_path);
+ }
+
+ __wbg_init_memory(imports);
+
+ const { instance, module } = await __wbg_load(await module_or_path, imports);
+
+ return __wbg_finalize_init(instance, module);
+}
+
+export { initSync };
+export default __wbg_init;
diff --git a/test/testdata/jsi_external/hello_wasm_bg.wasm b/test/testdata/jsi_external/hello_wasm_bg.wasm
new file mode 100644
index 000000000..d8f32c44c
Binary files /dev/null and b/test/testdata/jsi_external/hello_wasm_bg.wasm differ
diff --git a/test/testdata/reload_plugins/yt_dlp_plugins/jsinterp/normal.py b/test/testdata/reload_plugins/yt_dlp_plugins/jsinterp/normal.py
new file mode 100644
index 000000000..936555830
--- /dev/null
+++ b/test/testdata/reload_plugins/yt_dlp_plugins/jsinterp/normal.py
@@ -0,0 +1,5 @@
+from yt_dlp.jsinterp.common import JSI
+
+
+class NormalPluginJSI(JSI):
+ REPLACED = True
diff --git a/test/testdata/yt_dlp_plugins/jsinterp/normal.py b/test/testdata/yt_dlp_plugins/jsinterp/normal.py
new file mode 100644
index 000000000..329f1a8df
--- /dev/null
+++ b/test/testdata/yt_dlp_plugins/jsinterp/normal.py
@@ -0,0 +1,6 @@
+from yt_dlp.jsinterp.common import JSI
+
+
+class NormalPluginJSI(JSI):
+ TEST_DATA_PLUGIN = True
+ REPLACED = False
diff --git a/test/testdata/yt_dlp_plugins/jsinterp/override.py b/test/testdata/yt_dlp_plugins/jsinterp/override.py
new file mode 100644
index 000000000..a55836427
--- /dev/null
+++ b/test/testdata/yt_dlp_plugins/jsinterp/override.py
@@ -0,0 +1,5 @@
+from yt_dlp.jsinterp._deno import DenoJSI
+
+
+class OverrideDenoJSI(DenoJSI, plugin_name='override'):
+ TEST_FIELD = 'override'
diff --git a/test/testdata/yt_dlp_plugins/jsinterp/overridetwo.py b/test/testdata/yt_dlp_plugins/jsinterp/overridetwo.py
new file mode 100644
index 000000000..63e6a721d
--- /dev/null
+++ b/test/testdata/yt_dlp_plugins/jsinterp/overridetwo.py
@@ -0,0 +1,5 @@
+from yt_dlp.jsinterp._deno import DenoJSI
+
+
+class _UnderscoreOverrideDenoJSI(DenoJSI, plugin_name='underscore-override'):
+ SECONDARY_TEST_FIELD = 'underscore-override'
diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/jsinterp/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/jsinterp/zipped.py
new file mode 100644
index 000000000..cb081c33e
--- /dev/null
+++ b/test/testdata/zipped_plugins/yt_dlp_plugins/jsinterp/zipped.py
@@ -0,0 +1,5 @@
+from yt_dlp.jsinterp.common import JSI
+
+
+class ZippedPluginJSI(JSI):
+ pass
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 309489672..33c1c46d7 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -32,13 +32,15 @@
from .downloader.rtmp import rtmpdump_version
from .extractor import gen_extractor_classes, get_info_extractor, import_extractors
from .extractor.common import UnsupportedURLIE
-from .extractor.openload import PhantomJSwrapper
+from .jsinterp import PhantomJSwrapper
from .globals import (
IN_CLI,
LAZY_EXTRACTORS,
plugin_ies,
plugin_ies_overrides,
plugin_pps,
+ plugin_jsis,
+ plugin_jsis_overrides,
all_plugins_loaded,
plugin_dirs,
)
@@ -445,6 +447,8 @@ class YoutubeDL:
Actual sleep time will be a random float from range
[sleep_interval; max_sleep_interval].
sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
+ jsi_preference: Preferred JS interpreters to use during extraction. Can be
+ given as comma-separated values.
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
match_filter: A function that gets called for every video with the signature
@@ -4097,13 +4101,17 @@ def get_encoding(stream):
write_debug(f'Proxy map: {self.proxies}')
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
- for plugin_type, plugins in (('Extractor', plugin_ies), ('Post-Processor', plugin_pps)):
+ for plugin_type, plugins in (('Extractor', plugin_ies), ('Post-Processor', plugin_pps),
+ ('JSI-Runtime', plugin_jsis)):
display_list = [
klass.__name__ if klass.__name__ == name else f'{klass.__name__} as {name}'
for name, klass in plugins.value.items()]
if plugin_type == 'Extractor':
display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
for parent, plugins in plugin_ies_overrides.value.items())
+ elif plugin_type == 'JSI-Runtime':
+ display_list.extend(f'{plugins[-1].JSI_NAME.partition("+")[2]} ({parent.__name__})'
+ for parent, plugins in plugin_jsis_overrides.value.items())
if not display_list:
continue
write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 714d9ad5c..2459409ab 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -946,6 +946,7 @@ def parse_options(argv=None):
'sleep_interval': opts.sleep_interval,
'max_sleep_interval': opts.max_sleep_interval,
'sleep_interval_subtitles': opts.sleep_interval_subtitles,
+ 'jsi_preference': opts.jsi_preference,
'external_downloader': opts.external_downloader,
'download_ranges': opts.download_ranges,
'force_keyframes_at_cuts': opts.force_keyframes_at_cuts,
diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py
index 68ace240c..f2abde781 100644
--- a/yt_dlp/extractor/douyutv.py
+++ b/yt_dlp/extractor/douyutv.py
@@ -4,7 +4,7 @@
import uuid
from .common import InfoExtractor
-from .openload import PhantomJSwrapper
+from ..jsinterp import PhantomJSwrapper
from ..utils import (
ExtractorError,
UserNotLive,
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
index 735b44637..81b18e3e4 100644
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@@ -5,7 +5,7 @@
import urllib.parse
from .common import InfoExtractor
-from .openload import PhantomJSwrapper
+from ..jsinterp import JSIWrapper
from ..utils import (
ExtractorError,
clean_html,
@@ -398,6 +398,27 @@ class IqIE(InfoExtractor):
IE_DESC = 'International version of iQiyi'
_VALID_URL = r'https?://(?:www\.)?iq\.com/play/(?:[\w%-]*-)?(?P\w+)'
_TESTS = [{
+ 'url': 'https://www.iq.com/play/sangmin-dinneaw-episode-1-xmk7546rfw',
+ 'md5': '63fcb4b7d4863472fe0a9be75d9e9d60',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': 'xmk7546rfw',
+ 'title': '尚岷与丁尼奥 第1集',
+ 'description': 'md5:e8fe4a8da25f4b8c86bc5506b1c3faaa',
+ 'duration': 3092,
+ 'timestamp': 1735520401,
+ 'upload_date': '20241230',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'series': 'Sangmin Dinneaw',
+ 'age_limit': 18,
+ 'average_rating': float,
+ 'categories': [],
+ 'cast': ['Sangmin Choi', 'Ratana Aiamsaart'],
+ },
+ 'expected_warnings': ['format is restricted'],
+ 'jsi_matrix': True,
+ }, {
'url': 'https://www.iq.com/play/one-piece-episode-1000-1ma1i6ferf4',
'md5': '2d7caf6eeca8a32b407094b33b757d39',
'info_dict': {
@@ -418,6 +439,7 @@ class IqIE(InfoExtractor):
'format': '500',
},
'expected_warnings': ['format is restricted'],
+ 'skip': 'geo-restricted',
}, {
# VIP-restricted video
'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4',
@@ -449,7 +471,6 @@ class IqIE(InfoExtractor):
}
_DASH_JS = '''
- console.log(page.evaluate(function() {
var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s";
var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s";
var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime();
@@ -515,9 +536,7 @@ class IqIE(InfoExtractor):
var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path);
dash_paths[bid] = dash_path;
});
- return JSON.stringify(dash_paths);
- }));
- saveAndExit();
+ console.log(JSON.stringify(dash_paths));
'''
def _extract_vms_player_js(self, webpage, video_id):
@@ -597,22 +616,22 @@ def _real_extract(self, url):
else:
ut_list = ['0']
+ jsi = JSIWrapper(self, url, timeout=120)
+
# bid 0 as an initial format checker
- dash_paths = self._parse_json(PhantomJSwrapper(self, timeout=120_000).get(
- url, note2='Executing signature code (this may take a couple minutes)',
- html='', video_id=video_id, jscode=self._DASH_JS % {
- 'tvid': video_info['tvId'],
- 'vid': video_info['vid'],
- 'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
- expected_type=str, default='04022001010011000000'),
- 'uid': uid,
- 'dfp': self._get_cookie('dfp', ''),
- 'mode': self._get_cookie('mod', 'intl'),
- 'lang': self._get_cookie('lang', 'en_us'),
- 'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
- 'ut_list': '[' + ','.join(ut_list) + ']',
- 'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
- })[1].strip(), video_id)
+ dash_paths = self._parse_json(jsi.execute(self._DASH_JS % {
+ 'tvid': video_info['tvId'],
+ 'vid': video_info['vid'],
+ 'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
+ expected_type=str, default='04022001010011000000'),
+ 'uid': uid,
+ 'dfp': self._get_cookie('dfp', ''),
+ 'mode': self._get_cookie('mod', 'intl'),
+ 'lang': self._get_cookie('lang', 'en_us'),
+ 'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
+ 'ut_list': '[' + ','.join(ut_list) + ']',
+ 'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
+ }, video_id, html=''), video_id)
formats, subtitles = [], {}
initial_format_data = self._download_json(
diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py
deleted file mode 100644
index 2d56252b1..000000000
--- a/yt_dlp/extractor/openload.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import collections
-import contextlib
-import json
-import os
-import subprocess
-import tempfile
-import urllib.parse
-
-from ..utils import (
- ExtractorError,
- Popen,
- check_executable,
- format_field,
- get_exe_version,
- is_outdated_version,
- shell_quote,
-)
-
-
-def cookie_to_dict(cookie):
- cookie_dict = {
- 'name': cookie.name,
- 'value': cookie.value,
- }
- if cookie.port_specified:
- cookie_dict['port'] = cookie.port
- if cookie.domain_specified:
- cookie_dict['domain'] = cookie.domain
- if cookie.path_specified:
- cookie_dict['path'] = cookie.path
- if cookie.expires is not None:
- cookie_dict['expires'] = cookie.expires
- if cookie.secure is not None:
- cookie_dict['secure'] = cookie.secure
- if cookie.discard is not None:
- cookie_dict['discard'] = cookie.discard
- with contextlib.suppress(TypeError):
- if (cookie.has_nonstandard_attr('httpOnly')
- or cookie.has_nonstandard_attr('httponly')
- or cookie.has_nonstandard_attr('HttpOnly')):
- cookie_dict['httponly'] = True
- return cookie_dict
-
-
-def cookie_jar_to_list(cookie_jar):
- return [cookie_to_dict(cookie) for cookie in cookie_jar]
-
-
-class PhantomJSwrapper:
- """PhantomJS wrapper class
-
- This class is experimental.
- """
-
- INSTALL_HINT = 'Please download it from https://phantomjs.org/download.html'
-
- _BASE_JS = R'''
- phantom.onError = function(msg, trace) {{
- var msgStack = ['PHANTOM ERROR: ' + msg];
- if(trace && trace.length) {{
- msgStack.push('TRACE:');
- trace.forEach(function(t) {{
- msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
- + (t.function ? ' (in function ' + t.function +')' : ''));
- }});
- }}
- console.error(msgStack.join('\n'));
- phantom.exit(1);
- }};
- '''
-
- _TEMPLATE = R'''
- var page = require('webpage').create();
- var fs = require('fs');
- var read = {{ mode: 'r', charset: 'utf-8' }};
- var write = {{ mode: 'w', charset: 'utf-8' }};
- JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
- phantom.addCookie(x);
- }});
- page.settings.resourceTimeout = {timeout};
- page.settings.userAgent = "{ua}";
- page.onLoadStarted = function() {{
- page.evaluate(function() {{
- delete window._phantom;
- delete window.callPhantom;
- }});
- }};
- var saveAndExit = function() {{
- fs.write("{html}", page.content, write);
- fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
- phantom.exit();
- }};
- page.onLoadFinished = function(status) {{
- if(page.url === "") {{
- page.setContent(fs.read("{html}", read), "{url}");
- }}
- else {{
- {jscode}
- }}
- }};
- page.open("");
- '''
-
- _TMP_FILE_NAMES = ['script', 'html', 'cookies']
-
- @staticmethod
- def _version():
- return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
-
- def __init__(self, extractor, required_version=None, timeout=10000):
- self._TMP_FILES = {}
-
- self.exe = check_executable('phantomjs', ['-v'])
- if not self.exe:
- raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True)
-
- self.extractor = extractor
-
- if required_version:
- version = self._version()
- if is_outdated_version(version, required_version):
- self.extractor._downloader.report_warning(
- 'Your copy of PhantomJS is outdated, update it to version '
- f'{required_version} or newer if you encounter any errors.')
-
- for name in self._TMP_FILE_NAMES:
- tmp = tempfile.NamedTemporaryFile(delete=False)
- tmp.close()
- self._TMP_FILES[name] = tmp
-
- self.options = collections.ChainMap({
- 'timeout': timeout,
- }, {
- x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
- for x in self._TMP_FILE_NAMES
- })
-
- def __del__(self):
- for name in self._TMP_FILE_NAMES:
- with contextlib.suppress(OSError, KeyError):
- os.remove(self._TMP_FILES[name].name)
-
- def _save_cookies(self, url):
- cookies = cookie_jar_to_list(self.extractor.cookiejar)
- for cookie in cookies:
- if 'path' not in cookie:
- cookie['path'] = '/'
- if 'domain' not in cookie:
- cookie['domain'] = urllib.parse.urlparse(url).netloc
- with open(self._TMP_FILES['cookies'].name, 'wb') as f:
- f.write(json.dumps(cookies).encode())
-
- def _load_cookies(self):
- with open(self._TMP_FILES['cookies'].name, 'rb') as f:
- cookies = json.loads(f.read().decode('utf-8'))
- for cookie in cookies:
- if cookie['httponly'] is True:
- cookie['rest'] = {'httpOnly': None}
- if 'expiry' in cookie:
- cookie['expire_time'] = cookie['expiry']
- self.extractor._set_cookie(**cookie)
-
- def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
- """
- Downloads webpage (if needed) and executes JS
-
- Params:
- url: website url
- html: optional, html code of website
- video_id: video id
- note: optional, displayed when downloading webpage
- note2: optional, displayed when executing JS
- headers: custom http headers
- jscode: code to be executed when page is loaded
-
- Returns tuple with:
- * downloaded website (after JS execution)
- * anything you print with `console.log` (but not inside `page.execute`!)
-
- In most cases you don't need to add any `jscode`.
- It is executed in `page.onLoadFinished`.
- `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
- It is possible to wait for some element on the webpage, e.g.
- var check = function() {
- var elementFound = page.evaluate(function() {
- return document.querySelector('#b.done') !== null;
- });
- if(elementFound)
- saveAndExit();
- else
- window.setTimeout(check, 500);
- }
-
- page.evaluate(function(){
- document.querySelector('#a').click();
- });
- check();
- """
- if 'saveAndExit();' not in jscode:
- raise ExtractorError('`saveAndExit();` not found in `jscode`')
- if not html:
- html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
- with open(self._TMP_FILES['html'].name, 'wb') as f:
- f.write(html.encode())
-
- self._save_cookies(url)
-
- user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent']
- jscode = self._TEMPLATE.format_map(self.options.new_child({
- 'url': url,
- 'ua': user_agent.replace('"', '\\"'),
- 'jscode': jscode,
- }))
-
- stdout = self.execute(jscode, video_id, note=note2)
-
- with open(self._TMP_FILES['html'].name, 'rb') as f:
- html = f.read().decode('utf-8')
- self._load_cookies()
-
- return html, stdout
-
- def execute(self, jscode, video_id=None, *, note='Executing JS'):
- """Execute JS and return stdout"""
- if 'phantom.exit();' not in jscode:
- jscode += ';\nphantom.exit();'
- jscode = self._BASE_JS + jscode
-
- with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f:
- f.write(jscode)
- self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
-
- cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name]
- self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}')
- try:
- stdout, stderr, returncode = Popen.run(cmd, timeout=self.options['timeout'] / 1000,
- text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- except Exception as e:
- raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e)
- if returncode:
- raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}')
-
- return stdout
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index e1e9777e8..a6180d4b4 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -5,7 +5,7 @@
import re
from .common import InfoExtractor
-from .openload import PhantomJSwrapper
+from ..jsinterp import PhantomJSwrapper
from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 65182b971..9a683ae8f 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -6,7 +6,7 @@
from .common import InfoExtractor
from .periscope import PeriscopeBaseIE, PeriscopeIE
-from ..jsinterp import js_number_to_string
+from ..jsinterp.native import js_number_to_string
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py
index 55ebdce1b..148efcb3a 100644
--- a/yt_dlp/extractor/youtube/_video.py
+++ b/yt_dlp/extractor/youtube/_video.py
@@ -25,8 +25,7 @@
)
from .pot._director import initialize_pot_director
from .pot.provider import PoTokenContext, PoTokenRequest
-from ..openload import PhantomJSwrapper
-from ...jsinterp import JSInterpreter
+from ...jsinterp import JSInterpreter, PhantomJSwrapper
from ...networking.exceptions import HTTPError
from ...utils import (
NO_DEFAULT,
diff --git a/yt_dlp/globals.py b/yt_dlp/globals.py
index 0cf276cc9..917b1fa44 100644
--- a/yt_dlp/globals.py
+++ b/yt_dlp/globals.py
@@ -15,6 +15,7 @@ def __repr__(self, /):
postprocessors = Indirect({})
extractors = Indirect({})
+jsi_runtimes = Indirect({})
# Plugins
all_plugins_loaded = Indirect(False)
@@ -23,7 +24,9 @@ def __repr__(self, /):
plugin_ies = Indirect({})
plugin_pps = Indirect({})
+plugin_jsis = Indirect({})
plugin_ies_overrides = Indirect(defaultdict(list))
+plugin_jsis_overrides = Indirect(defaultdict(list))
# Misc
IN_CLI = Indirect(False)
diff --git a/yt_dlp/jsinterp/__init__.py b/yt_dlp/jsinterp/__init__.py
new file mode 100644
index 000000000..4052924a3
--- /dev/null
+++ b/yt_dlp/jsinterp/__init__.py
@@ -0,0 +1,27 @@
+# flake8: noqa: F401
+from .native import JSInterpreter
+from .common import _JSI_PREFERENCES, JSIWrapper
+from ._phantomjs import PhantomJSJSI, PhantomJSwrapper
+from ._deno import DenoJSI, DenoJSDomJSI
+from ..globals import jsi_runtimes, plugin_jsis
+from ..plugins import PluginSpec, register_plugin_spec
+
+jsi_runtimes.value.update({
+ name: value
+ for name, value in globals().items()
+ if name.endswith('JSI')
+})
+
+register_plugin_spec(PluginSpec(
+ module_name='jsinterp',
+ suffix='JSI',
+ destination=jsi_runtimes,
+ plugin_destination=plugin_jsis,
+))
+
+__all__ = [
+ JSInterpreter,
+ PhantomJSwrapper,
+ _JSI_PREFERENCES,
+ JSIWrapper,
+]
diff --git a/yt_dlp/jsinterp/_deno.py b/yt_dlp/jsinterp/_deno.py
new file mode 100644
index 000000000..72264998a
--- /dev/null
+++ b/yt_dlp/jsinterp/_deno.py
@@ -0,0 +1,204 @@
+from __future__ import annotations
+
+import http.cookiejar
+import json
+import platform
+import re
+import subprocess
+import typing
+import urllib.parse
+
+
+from ..utils import (
+ ExtractorError,
+ Popen,
+ int_or_none,
+ shell_quote,
+ unified_timestamp,
+)
+from ._helper import TempFileWrapper, random_string, override_navigator_js, extract_script_tags
+from .common import ExternalJSI
+
+
+class DenoJSI(ExternalJSI):
+ """JS interpreter class using Deno binary"""
+ _BASE_PREFERENCE = 5
+ _EXE_NAME = 'deno'
+ _DENO_FLAGS = ['--cached-only', '--no-prompt', '--no-check']
+ _INIT_SCRIPT = 'localStorage.clear(); delete window.Deno; global = window = globalThis;\n'
+
+ def __init__(self, *args, flags=[], replace_flags=False, init_script=None, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._flags = flags if replace_flags else [*self._DENO_FLAGS, *flags]
+ self._init_script = self._INIT_SCRIPT if init_script is None else init_script
+
+ @property
+ def _override_navigator_js(self):
+ return override_navigator_js(self.user_agent)
+
+ def _run_deno(self, cmd):
+ self.write_debug(f'Deno command line: {shell_quote(cmd)}')
+ try:
+ stdout, stderr, returncode = Popen.run(
+ cmd, timeout=self.timeout, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ except Exception as e:
+ raise ExtractorError('Unable to run Deno binary', cause=e)
+ if returncode:
+ raise ExtractorError(f'Failed with returncode {returncode}:\n{stderr}')
+ elif stderr:
+ self.report_warning(f'JS console error msg:\n{stderr.strip()}')
+ return stdout.strip()
+
+ def execute(self, jscode, video_id=None, note='Executing JS in Deno'):
+ self.report_note(video_id, note)
+ location_args = ['--location', self._url] if self._url else []
+ with TempFileWrapper(f'{self._init_script};\n{self._override_navigator_js}\n{jscode}', suffix='.js') as js_file:
+ cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
+ return self._run_deno(cmd)
+
+
+class DenoJSDomJSI(DenoJSI):
+ _BASE_PREFERENCE = 4
+ _DENO_FLAGS = ['--cached-only', '--no-prompt', '--no-check']
+ _JSDOM_VERSION = None
+ _JSDOM_URL = 'https://esm.sh/v135/jsdom' # force use esm v135, see esm-dev/esm.sh #1034
+
+ @staticmethod
+ def serialize_cookie(cookiejar: YoutubeDLCookieJar | None, url: str):
+ """serialize netscape-compatible fields from cookiejar for tough-cookie loading"""
+ # JSDOM use tough-cookie as its CookieJar https://github.com/jsdom/jsdom/blob/main/lib/api.js
+ # tough-cookie use Cookie.fromJSON and Cookie.toJSON for cookie serialization
+ # https://github.com/salesforce/tough-cookie/blob/master/lib/cookie/cookie.ts
+ if not cookiejar:
+ return json.dumps({'cookies': []})
+ cookies: list[http.cookiejar.Cookie] = list(cookiejar.get_cookies_for_url(url))
+ return json.dumps({'cookies': [{
+ 'key': cookie.name,
+ 'value': cookie.value,
+ # leading dot of domain must be removed, otherwise will fail to match
+ 'domain': cookie.domain.lstrip('.') or urllib.parse.urlparse(url).hostname,
+ 'expires': int_or_none(cookie.expires, invscale=1000),
+ 'hostOnly': not cookie.domain_initial_dot,
+ 'secure': bool(cookie.secure),
+ 'path': cookie.path,
+ } for cookie in cookies if cookie.value]})
+
+ @staticmethod
+ def apply_cookies(cookiejar: YoutubeDLCookieJar | None, cookies: list[dict]):
+ """apply cookies from serialized tough-cookie"""
+ # see serialize_cookie
+ if not cookiejar:
+ return
+ for cookie_dict in cookies:
+ if not all(cookie_dict.get(k) for k in ('key', 'value', 'domain')):
+ continue
+ if cookie_dict.get('hostOnly'):
+ cookie_dict['domain'] = cookie_dict['domain'].lstrip('.')
+ else:
+ cookie_dict['domain'] = '.' + cookie_dict['domain'].lstrip('.')
+
+ cookiejar.set_cookie(http.cookiejar.Cookie(
+ 0, cookie_dict['key'], cookie_dict['value'],
+ None, False,
+ cookie_dict['domain'], True, not cookie_dict.get('hostOnly'),
+ cookie_dict.get('path', '/'), True,
+ bool(cookie_dict.get('secure')),
+ unified_timestamp(cookie_dict.get('expires')),
+ False, None, None, {}))
+
+ def _ensure_jsdom(self):
+ if self._JSDOM_VERSION:
+ return
+ # `--allow-import` is unsupported in v1, and esm.sh:443 is default allowed remote host for v2
+ result = self._run_deno([self.exe, 'info', self._JSDOM_URL])
+ version_line = next((line for line in result.splitlines() if self._JSDOM_URL in line), '')
+ if m := re.search(r'@([\d\.]+)', version_line):
+ self._JSDOM_VERSION = m[1]
+
+ def report_version(self):
+ super().report_version()
+ self._ensure_jsdom()
+ self.write_debug(f'JSDOM lib version {self._JSDOM_VERSION}')
+
+ def execute(self, jscode, video_id=None, note='Executing JS in Deno with jsdom', html='', cookiejar=None):
+ self.report_note(video_id, note)
+ self._ensure_jsdom()
+
+ if cookiejar and not self._url:
+ self.report_warning('No valid url scope provided, cookiejar is not applied')
+ cookiejar = None
+
+ html, inline_scripts = extract_script_tags(html)
+ wrapper_scripts = '\n'.join(['try { %s } catch (e) {}' % script for script in inline_scripts])
+
+ callback_varname = f'__callback_{random_string()}'
+ script = f'''{self._init_script};
+ import jsdom from "{self._JSDOM_URL}";
+ let {callback_varname} = (() => {{
+ const jar = jsdom.CookieJar.deserializeSync({json.dumps(self.serialize_cookie(cookiejar, self._url))});
+ const dom = new jsdom.JSDOM({json.dumps(str(html))}, {{
+ {'url: %s,' % json.dumps(str(self._url)) if self._url else ''}
+ cookieJar: jar,
+ pretendToBeVisual: true,
+ }});
+ Object.keys(dom.window).filter(key => !['atob', 'btoa', 'crypto', 'location'].includes(key))
+ .filter(key => !(window.location? [] : ['sessionStorage', 'localStorage']).includes(key))
+ .forEach((key) => {{
+ try {{window[key] = dom.window[key]}} catch (e) {{ console.error(e) }}
+ }});
+ {self._override_navigator_js};
+
+ window.screen = {{
+ availWidth: 1920,
+ availHeight: 1040,
+ width: 1920,
+ height: 1080,
+ colorDepth: 24,
+ isExtended: true,
+ onchange: null,
+ orientation: {{angle: 0, type: 'landscape-primary', onchange: null}},
+ pixelDepth: 24,
+ }}
+ Object.defineProperty(document.body, 'clientWidth', {{value: 1903}});
+ Object.defineProperty(document.body, 'clientHeight', {{value: 2000}});
+ document.domain = location?.hostname;
+
+ delete window.jsdom;
+ const origLog = console.log;
+ console.log = () => {{}};
+ console.info = () => {{}};
+ return () => {{
+ const stdout = [];
+ console.log = (...msg) => stdout.push(msg.map(m => '' + m).join(' '));
+ return () => {{ origLog(JSON.stringify({{
+ stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
+ }}
+ }})();
+ {wrapper_scripts}
+ {callback_varname} = {callback_varname}(); // begin to capture console.log
+ try {{
+ {jscode}
+ }} finally {{
+ {callback_varname}();
+ }}
+ '''
+
+ # https://github.com/prebuild/node-gyp-build/blob/6822ec5/node-gyp-build.js#L196-L198
+ # This jsdom dependency raises fatal error on linux unless read for this file is allowed
+ read_flag = ['--allow-read=/etc/alpine-release'] if platform.system() == 'Linux' else []
+
+ location_args = ['--location', self._url] if self._url else []
+
+ with TempFileWrapper(script, suffix='.js') as js_file:
+ cmd = [self.exe, 'run', *self._flags, *read_flag, *location_args, js_file.name]
+ result = self._run_deno(cmd)
+ try:
+ data = json.loads(result)
+ except json.JSONDecodeError as e:
+ raise ExtractorError(f'Failed to parse JSON output from Deno: {result}', cause=e)
+ self.apply_cookies(cookiejar, data['cookies'])
+ return data['stdout']
+
+
+if typing.TYPE_CHECKING:
+ from ..cookies import YoutubeDLCookieJar
diff --git a/yt_dlp/jsinterp/_helper.py b/yt_dlp/jsinterp/_helper.py
new file mode 100644
index 000000000..811366466
--- /dev/null
+++ b/yt_dlp/jsinterp/_helper.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+import contextlib
+import json
+import os
+import random
+import re
+import string
+import tempfile
+
+
+class TempFileWrapper:
+ """
+ Wrapper for NamedTemporaryFile, auto closes file after io and deletes file upon wrapper object gc
+
+ @param {str | bytes | None} content: content to write to file upon creation
+ @param {bool} text: whether to open file in text mode
+ @param {str} encoding: encoding to use for text mode
+ @param {str | None} suffix: suffix for filename of temporary file
+ """
+
+ def __init__(self, content: str | bytes | None = None, text: bool = True,
+ encoding='utf-8', suffix: str | None = None):
+ self.encoding = None if not text else encoding
+ self.text = text
+ self._file = tempfile.NamedTemporaryFile('w' if text else 'wb', encoding=self.encoding,
+ suffix=suffix, delete=False)
+ if content:
+ self._file.write(content)
+ self._file.close()
+
+ @property
+ def name(self):
+ return self._file.name
+
+ @contextlib.contextmanager
+ def opened_file(self, mode, *, seek=None, seek_whence=0):
+ mode = mode if (self.text or 'b' in mode) else mode + 'b'
+ with open(self._file.name, mode, encoding=self.encoding) as f:
+ if seek is not None:
+ self._file.seek(seek, seek_whence)
+ yield f
+
+ def write(self, s, seek=None, seek_whence=0):
+ """re-open file in write mode and write, optionally seek to position first"""
+ with self.opened_file('w', seek=seek, seek_whence=seek_whence) as f:
+ return f.write(s)
+
+ def append_write(self, s, seek=None, seek_whence=0):
+ """re-open file in append mode and write, optionally seek to position first"""
+ with self.opened_file('a', seek=seek, seek_whence=seek_whence) as f:
+ return f.write(s)
+
+ def read(self, n=-1, seek=None, seek_whence=0):
+ """re-open file and read, optionally seek to position first"""
+ with self.opened_file('r', seek=seek, seek_whence=seek_whence) as f:
+ return f.read(n)
+
+ def cleanup(self):
+ with contextlib.suppress(OSError):
+ os.remove(self._file.name)
+
+ def __del__(self):
+ self.cleanup()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.cleanup()
+
+
+def random_string(length: int = 10) -> str:
+ return ''.join(random.choices(string.ascii_letters, k=length))
+
+
+def override_navigator_js(user_agent: str) -> str:
+ """Generate js snippet to override navigator properties based on user_agent string"""
+ return '\n'.join([
+ 'Object.defineProperty(navigator, "%s", { value: %s, configurable: true });' % (k, json.dumps(v))
+ for k, v in {
+ 'userAgent': user_agent,
+ 'language': 'en-US',
+ 'languages': ['en-US'],
+ 'webdriver': False,
+ 'cookieEnabled': True,
+ 'appCodeName': user_agent.split('/', maxsplit=1)[0],
+ 'appName': 'Netscape',
+ 'appVersion': user_agent.split('/', maxsplit=1)[-1],
+ 'platform': 'Win32',
+ 'product': 'Gecko',
+ 'productSub': '20030107',
+ 'vendor': 'Google Inc.',
+ 'vendorSub': '',
+ 'onLine': True,
+ }.items()
+ ])
+
+
+def extract_script_tags(html: str) -> tuple[str, list[str]]:
+ script_indicies = []
+ inline_scripts = []
+
+ for match_start in re.finditer(r'', match_start.end())
+ if end > match_start.end():
+ script_indicies.append((match_start.start(), end + len('')))
+ inline_scripts.append(html[match_start.end():end])
+
+ for start, end in script_indicies:
+ html = html[:start] + html[end:]
+
+ return html, inline_scripts
+
+
+def prepare_wasm_jsmodule(js_mod: str, wasm: bytes) -> str:
+ """
+ Sanitize js wrapper module generated by rust wasm-pack for wasm init
+ Removes export and import.meta, and inlines wasm binary as Uint8Array
+ See test/test_data/jsi_external/hello_wasm.js for example
+
+ @param {str} js_mod: js wrapper module generated by rust wasm-pack
+ @param {bytes} wasm: wasm binary
+ """
+
+ js_mod = re.sub(r'export(?:\s+default)?([\s{])', r'\1', js_mod)
+ js_mod = js_mod.replace('import.meta', '{}')
+
+ return js_mod + ''';
+ await (async () => {
+ const t = __wbg_get_imports();
+ __wbg_init_memory(t);
+ const {module, instance} = await WebAssembly.instantiate(Uint8Array.from(%s), t);
+ __wbg_finalize_init(instance, module);
+ })();
+ ''' % list(wasm)
diff --git a/yt_dlp/jsinterp/_phantomjs.py b/yt_dlp/jsinterp/_phantomjs.py
new file mode 100644
index 000000000..ccd255052
--- /dev/null
+++ b/yt_dlp/jsinterp/_phantomjs.py
@@ -0,0 +1,262 @@
+from __future__ import annotations
+
+import contextlib
+import http.cookiejar
+import json
+import subprocess
+import typing
+import urllib.parse
+
+
+from ..utils import (
+ ExtractorError,
+ Popen,
+ filter_dict,
+ int_or_none,
+ is_outdated_version,
+ shell_quote,
+)
+from ._helper import TempFileWrapper, random_string, extract_script_tags
+from .common import ExternalJSI
+
+
+class PhantomJSJSI(ExternalJSI):
+ _EXE_NAME = 'phantomjs'
+ _BASE_PREFERENCE = 3
+
+ _BASE_JS = R'''
+ phantom.onError = function(msg, trace) {{
+ var msgStack = ['PHANTOM ERROR: ' + msg];
+ if(trace && trace.length) {{
+ msgStack.push('TRACE:');
+ trace.forEach(function(t) {{
+ msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
+ + (t.function ? ' (in function ' + t.function +')' : ''));
+ }});
+ }}
+ console.error(msgStack.join('\n'));
+ phantom.exit(1);
+ }};
+ '''
+
+ _TEMPLATE = R'''
+ var page = require('webpage').create();
+ var fs = require('fs');
+ var read = {{ mode: 'r', charset: 'utf-8' }};
+ var write = {{ mode: 'w', charset: 'utf-8' }};
+ page.settings.resourceTimeout = {timeout};
+ page.settings.userAgent = {ua};
+ page.onLoadStarted = function() {{
+ page.evaluate(function() {{
+ delete window._phantom;
+ delete window.callPhantom;
+ }});
+ }};
+ var saveAndExit = function() {{
+ fs.write({html_fn}, page.content, write);
+ fs.write({cookies_fn}, JSON.stringify(phantom.cookies), write);
+ phantom.exit();
+ }};
+ var loaded = false;
+ page.onLoadFinished = function(status) {{
+ if(page.url === "" && !loaded) {{
+ page.setContent(fs.read({html_fn}, read), {url});
+ loaded = true;
+ }}
+ else {{
+ JSON.parse(fs.read({cookies_fn}, read)).forEach(function(x) {{
+ phantom.addCookie(x);
+ }});
+ {jscode}
+ }}
+ }};
+ page.open("");
+ '''
+
+ def _save_cookies(self, url, cookiejar: YoutubeDLCookieJar | None):
+ def _cookie_to_dict(cookie: http.cookiejar.Cookie):
+ cookie_dict = {
+ 'name': cookie.name,
+ 'value': cookie.value,
+ 'port': cookie.port,
+ 'domain': cookie.domain,
+ 'path': cookie.path or '/',
+ 'expires': int_or_none(cookie.expires, invscale=1000),
+ 'secure': cookie.secure,
+ 'discard': cookie.discard,
+ }
+ if not cookie_dict['domain']:
+ cookie_dict['domain'] = urllib.parse.urlparse(url).hostname
+ cookie_dict['port'] = urllib.parse.urlparse(url).port
+ with contextlib.suppress(TypeError):
+ if (cookie.has_nonstandard_attr('httpOnly')
+ or cookie.has_nonstandard_attr('httponly')
+ or cookie.has_nonstandard_attr('HttpOnly')):
+ cookie_dict['httponly'] = True
+ return filter_dict(cookie_dict)
+
+ cookies = cookiejar.get_cookies_for_url(url) if cookiejar else []
+ return json.dumps([_cookie_to_dict(cookie) for cookie in cookies])
+
+ def _load_cookies(self, cookies_json: str, cookiejar: YoutubeDLCookieJar | None):
+ if not cookiejar:
+ return
+ cookies = json.loads(cookies_json)
+ for cookie in cookies:
+ cookiejar.set_cookie(http.cookiejar.Cookie(
+ 0, cookie['name'], cookie['value'], cookie.get('port'), cookie.get('port') is not None,
+ cookie['domain'], True, cookie['domain'].startswith('.'),
+ cookie.get('path', '/'), True,
+ cookie.get('secure', False), cookie.get('expiry'),
+ cookie.get('discard', False), None, None,
+ {'httpOnly': None} if cookie.get('httponly') is True else {},
+ ))
+
+ def _execute(self, jscode: str, video_id=None, *, note='Executing JS in PhantomJS'):
+ """Execute JS and return stdout"""
+ if 'phantom.exit();' not in jscode:
+ jscode += ';\nphantom.exit();'
+ jscode = self._BASE_JS + jscode
+
+ self.report_note(video_id, note)
+ with TempFileWrapper(jscode, suffix='.js') as js_file:
+ cmd = [self.exe, '--ssl-protocol=any', js_file.name]
+ self.write_debug(f'PhantomJS command line: {shell_quote(cmd)}')
+ try:
+ stdout, stderr, returncode = Popen.run(
+ cmd, timeout=self.timeout, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ except Exception as e:
+ raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e)
+ if returncode:
+ raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}')
+ return stdout
+
+ def _execute_html(self, jscode: str, url: str, html: str, cookiejar, video_id=None, note='Executing JS on webpage'):
+ if 'saveAndExit();' not in jscode:
+ raise ExtractorError('`saveAndExit();` not found in `jscode`')
+
+ if cookiejar and not url:
+ self.report_warning('No valid url scope provided, cookiejar is not applied')
+ cookiejar = None
+
+ html, inline_scripts = extract_script_tags(html)
+ wrapped_scripts = '\n'.join([
+ 'page.evaluate(function() { try { %s } catch (e) {} });' % inline for inline in inline_scripts])
+
+ html_file = TempFileWrapper(html, suffix='.html')
+ cookie_file = TempFileWrapper(self._save_cookies(url, cookiejar), suffix='.json')
+
+ script = self._TEMPLATE.format_map({
+ 'url': json.dumps(str(url)),
+ 'ua': json.dumps(str(self.user_agent)),
+ 'jscode': f'{wrapped_scripts}\n{jscode}',
+ 'html_fn': json.dumps(html_file.name),
+ 'cookies_fn': json.dumps(cookie_file.name),
+ 'timeout': int(self.timeout * 1000),
+ })
+
+ stdout = self._execute(script, video_id, note=note)
+ self._load_cookies(cookie_file.read(), cookiejar)
+ new_html = html_file.read()
+
+ return new_html, stdout
+
+ def execute(self, jscode, video_id=None, note='Executing JS in PhantomJS', html='', cookiejar=None):
+ jscode = '''console.log(page.evaluate(function() {
+ var %(std_var)s = [];
+ console.log = function() {
+ var values = '';
+ for (var i = 0; i < arguments.length; i++) {
+ values += arguments[i] + ' ';
+ }
+ %(std_var)s.push(values);
+ }
+ %(jscode)s;
+ return %(std_var)s.join('\\n');
+
+ }));
+ saveAndExit();''' % {
+ 'std_var': f'__stdout__values_{random_string()}',
+ 'jscode': jscode,
+ }
+ return self._execute_html(jscode, self._url, html, cookiejar, video_id=video_id, note=note)[1].strip()
+
+
+class PhantomJSwrapper:
+ """PhantomJS wrapper class
+
+ This class is experimental.
+ """
+ INSTALL_HINT = 'Please download PhantomJS from https://phantomjs.org/download.html'
+
+ @classmethod
+ def _version(cls):
+ return PhantomJSJSI.exe_version
+
+ def __init__(self, extractor: InfoExtractor, required_version=None, timeout=10000):
+ self._jsi = PhantomJSJSI(extractor._downloader, '', timeout / 1000, {})
+
+ if not self._jsi.is_available():
+ raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True)
+
+ self.extractor = extractor
+
+ if required_version:
+ if is_outdated_version(self._jsi.exe_version, required_version):
+ self._jsi.report_warning(
+ 'Your copy of PhantomJS is outdated, update it to version '
+ f'{required_version} or newer if you encounter any errors.')
+
+ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
+ """
+ Downloads webpage (if needed) and executes JS
+
+ Params:
+ url: website url
+ html: optional, html code of website
+ video_id: video id
+ note: optional, displayed when downloading webpage
+ note2: optional, displayed when executing JS
+ headers: custom http headers
+ jscode: code to be executed when page is loaded
+
+ Returns tuple with:
+ * downloaded website (after JS execution)
+ * anything you print with `console.log` (but not inside `page.execute`!)
+
+ In most cases you don't need to add any `jscode`.
+ It is executed in `page.onLoadFinished`.
+ `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
+ It is possible to wait for some element on the webpage, e.g.
+ var check = function() {
+ var elementFound = page.evaluate(function() {
+ return document.querySelector('#b.done') !== null;
+ });
+ if(elementFound)
+ saveAndExit();
+ else
+ window.setTimeout(check, 500);
+ }
+
+ page.evaluate(function(){
+ document.querySelector('#a').click();
+ });
+ check();
+ """
+ if 'saveAndExit();' not in jscode:
+ raise ExtractorError('`saveAndExit();` not found in `jscode`')
+ if not html:
+ html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
+
+ self._jsi.user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent']
+
+ return self._jsi._execute_html(jscode, url, html, self.extractor.cookiejar, video_id=video_id, note=note2)
+
+ def execute(self, jscode, video_id=None, *, note='Executing JS in PhantomJS'):
+ """Execute JS and return stdout"""
+ return self._jsi.execute(jscode, video_id=video_id, note=note)
+
+
+if typing.TYPE_CHECKING:
+ from ..extractor.common import InfoExtractor
+ from ..cookies import YoutubeDLCookieJar
diff --git a/yt_dlp/jsinterp/common.py b/yt_dlp/jsinterp/common.py
new file mode 100644
index 000000000..f8ac233fb
--- /dev/null
+++ b/yt_dlp/jsinterp/common.py
@@ -0,0 +1,315 @@
+from __future__ import annotations
+
+import abc
+import inspect
+import sys
+import typing
+
+from ..globals import jsi_runtimes, plugin_jsis_overrides
+from ..extractor.common import InfoExtractor
+from ..utils import (
+ classproperty,
+ format_field,
+ filter_dict,
+ get_exe_version,
+ url_or_none,
+ sanitize_url,
+ ExtractorError,
+)
+
+_JSI_PREFERENCES: set[JSIPreference] = set()
+
+
+def get_all_handlers() -> dict[str, type[JSI]]:
+ return {jsi.JSI_KEY: jsi for jsi in jsi_runtimes.value.values()}
+
+
+def to_jsi_keys(jsi_or_keys: typing.Iterable[str | type[JSI] | JSI]) -> list[str]:
+ return [jok if isinstance(jok, str) else jok.JSI_KEY for jok in jsi_or_keys]
+
+
+def get_included_jsi(only_include=None, exclude=None):
+ return {
+ key: value for key, value in get_all_handlers().items()
+ if (not only_include or key in to_jsi_keys(only_include))
+ and (not exclude or key not in to_jsi_keys(exclude))
+ }
+
+
+def order_to_pref(jsi_order: typing.Iterable[str | type[JSI] | JSI], multiplier: int) -> JSIPreference:
+ """convert a list of jsi keys into a preference function"""
+ jsi_order = reversed(to_jsi_keys(jsi_order))
+ pref_score = {jsi_cls: (i + 1) * multiplier for i, jsi_cls in enumerate(jsi_order)}
+
+ def _pref(jsi: JSI, *args):
+ return pref_score.get(jsi.JSI_KEY, 0)
+ return _pref
+
+
+class JSIWrapper:
+ """
+ Helper class to forward JS interp request to a JSI that supports it.
+
+ Usage:
+ ```
+ def _real_extract(self, url):
+ ...
+ jsi = JSIWrapper(self, url)
+ result = jsi.execute(jscode, video_id)
+ ...
+ ```
+
+ @param dl_or_ie: `YoutubeDL` or `InfoExtractor` instance.
+ @param url: setting url context
+ @param only_include: limit JSI to choose from.
+ @param exclude: JSI to avoid using.
+ @param jsi_params: extra kwargs to pass to `JSI.__init__()` for each JSI, using jsi key as dict key.
+ @param preferred_order: list of JSI to try before others. First in list is tried first.
+ @param timeout: timeout parameter for all chosen JSI
+ @param user_agent: specify user-agent to use, default to downloader UA
+ """
+
+ def __init__(
+ self,
+ dl_or_ie: YoutubeDL | InfoExtractor,
+ url: str = '',
+ only_include: typing.Iterable[str | type[JSI]] = [],
+ exclude: typing.Iterable[str | type[JSI]] = [],
+ jsi_params: dict[str, dict] = {},
+ preferred_order: typing.Iterable[str | type[JSI]] = [],
+ timeout: float | int = 10,
+ user_agent: str | None = None,
+ ):
+ if isinstance(dl_or_ie, InfoExtractor):
+ self._downloader = dl_or_ie._downloader
+ self._ie_key = dl_or_ie.ie_key()
+ else:
+ self._downloader = dl_or_ie
+ self._ie_key = None
+
+ self._url = self._sanitize_url(url)
+ self.preferences: set[JSIPreference] = {
+ order_to_pref(self._load_jsi_keys_from_option('jsi_preference'), 10000),
+ order_to_pref(preferred_order, 100),
+ } | _JSI_PREFERENCES
+
+ handler_classes = self._load_allowed_jsi_cls(only_include, exclude)
+ if not handler_classes:
+ raise ExtractorError('No JSI is allowed to use')
+
+ user_agent = user_agent or self._downloader.params['http_headers']['User-Agent']
+ self._handler_dict = {cls.JSI_KEY: cls(
+ self._downloader, url=self._url, timeout=timeout,
+ user_agent=user_agent, **jsi_params.get(cls.JSI_KEY, {}),
+ ) for cls in handler_classes.values()}
+
+ self._is_test = self._downloader.params.get('test', False)
+
+ def _sanitize_url(self, url):
+ sanitized = sanitize_url(url_or_none(url)) or ''
+ if url and not sanitized:
+ self.report_warning(f'Invalid URL: "{url}", using empty string instead')
+ return sanitized
+
+ def _load_jsi_keys_from_option(self, option_key):
+ jsi_keys = self._downloader.params.get(option_key, [])
+ valid_handlers = list(get_all_handlers())
+ for invalid_key in [key for key in jsi_keys if key not in valid_handlers]:
+ self.report_warning(f'{option_key}: `{invalid_key}` is not a valid JSI', only_once=True)
+ jsi_keys.remove(invalid_key)
+ return jsi_keys
+
+ def _load_allowed_jsi_cls(self, only_include, exclude):
+ self.write_debug(f'Loaded JSI runtimes: {get_all_handlers()}')
+ handler_classes = filter_dict(
+ get_included_jsi(only_include, exclude),
+ lambda _, v: v.supports_extractor(self._ie_key))
+ self.write_debug(f'Select JSI {"for " + self._ie_key if self._ie_key else ""}: {to_jsi_keys(handler_classes)}, '
+ f'included: {to_jsi_keys(only_include) or "all"}, excluded: {to_jsi_keys(exclude)}')
+ return handler_classes
+
+ def write_debug(self, message, only_once=False):
+ return self._downloader.write_debug(f'[JSIDirector] {message}', only_once=only_once)
+
+ def report_warning(self, message, only_once=False):
+ return self._downloader.report_warning(f'[JSIDirector] {message}', only_once=only_once)
+
+ def _get_handlers(self, method_name: str, *args, **kwargs) -> list[JSI]:
+ def _supports_method_with_params(jsi: JSI):
+ if not callable(method := getattr(jsi, method_name, None)):
+ return False
+ method_params = inspect.signature(method).parameters
+ return all(key in method_params for key in kwargs)
+
+ handlers = [h for h in self._handler_dict.values() if _supports_method_with_params(h)]
+ self.write_debug(f'Choosing handlers for method `{method_name}` with kwargs {list(kwargs)}'
+ f': {to_jsi_keys(handlers)}')
+
+ if not handlers:
+ raise ExtractorError(f'No JSI supports method `{method_name}` with kwargs {list(kwargs)}, '
+ f'included handlers: {to_jsi_keys(self._handler_dict.values())}')
+
+ preferences = {
+ handler.JSI_KEY: sum(pref_func(handler, method_name, args, kwargs) for pref_func in self.preferences)
+ for handler in handlers
+ }
+ self.write_debug('JSI preferences for `{}` request: {}'.format(
+ method_name, ', '.join(f'{key}={pref}' for key, pref in preferences.items())))
+
+ return sorted(handlers, key=lambda h: preferences[h.JSI_KEY], reverse=True)
+
+ def _dispatch_request(self, method_name: str, *args, **kwargs):
+ handlers = self._get_handlers(method_name, *args, **kwargs)
+
+ unavailable: list[str] = []
+ exceptions: list[tuple[JSI, Exception]] = []
+
+ for handler in handlers:
+ if not handler.is_available():
+ if self._is_test:
+ raise ExtractorError(f'{handler.JSI_NAME} is not available for testing, '
+ f'add "{handler.JSI_KEY}" in `exclude` if it should not be used')
+ self.write_debug(f'{handler.JSI_KEY} is not available')
+ unavailable.append(handler.JSI_NAME)
+ continue
+
+ try:
+ self.write_debug(f'Dispatching `{method_name}` task to {handler.JSI_NAME}')
+ handler.report_version()
+ return getattr(handler, method_name)(*args, **kwargs)
+ except ExtractorError as e:
+ if self._is_test:
+ raise ExtractorError(f'{handler.JSI_NAME} got error while evaluating js, '
+ f'add "{handler.JSI_KEY}" in `exclude` if it should not be used')
+ exceptions.append((handler, e))
+ self.write_debug(f'{handler.JSI_NAME} encountered error, fallback to next handler: {e}')
+
+ if not exceptions:
+ msg = f'No available JSI installed, please install one of: {", ".join(unavailable)}'
+ else:
+ msg = f'Failed to perform {method_name}, total {len(exceptions)} errors'
+ if unavailable:
+ msg = f'{msg}. You may try installing one of unavailable JSI: {", ".join(unavailable)}'
+ raise ExtractorError(msg)
+
+ def execute(self, jscode: str, video_id: str | None, note: str | None = None,
+ html: str | None = None, cookiejar: YoutubeDLCookieJar | None = None) -> str:
+ """
+ Execute JS code and return stdout from console.log
+
+ @param jscode: JS code to execute
+ @param video_id
+ @param note
+ @param html: html to load as document
+ @param cookiejar: cookiejar to read and set cookies, pass `InfoExtractor.cookiejar` if you want to read and write cookies
+ """
+ return self._dispatch_request('execute', jscode, video_id, **filter_dict({
+ 'note': note, 'html': html, 'cookiejar': cookiejar}))
+
+
+class JSI(abc.ABC):
+ _BASE_PREFERENCE: int = 0
+
+ def __init__(self, downloader: YoutubeDL, url: str, timeout: float | int, user_agent=None):
+ self._downloader = downloader
+ self._url = url
+ self.timeout = timeout
+ self.user_agent: str = user_agent or self._downloader.params['http_headers']['User-Agent']
+
+ @classmethod
+ def __init_subclass__(cls, *, plugin_name=None, **kwargs):
+ if plugin_name:
+ mro = inspect.getmro(cls)
+ next_mro_class = super_class = mro[mro.index(cls) + 1]
+
+ while getattr(super_class, '__wrapped__', None):
+ super_class = super_class.__wrapped__
+
+ if not any(override.PLUGIN_NAME == plugin_name for override in plugin_jsis_overrides.value[super_class]):
+ cls.__wrapped__ = next_mro_class
+ cls.PLUGIN_NAME, cls.JSI_KEY = plugin_name, next_mro_class.JSI_KEY
+ cls.JSI_NAME = f'{next_mro_class.JSI_NAME}+{plugin_name}'
+
+ setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
+ # additional update jsi_runtime because jsis are not further loaded like extractors
+ jsi_runtimes.value[super_class.JSI_KEY] = cls
+ plugin_jsis_overrides.value[super_class].append(cls)
+ return super().__init_subclass__(**kwargs)
+
+ @abc.abstractmethod
+ def is_available(self) -> bool:
+ raise NotImplementedError
+
+ def write_debug(self, msg, *args, **kwargs):
+ self._downloader.write_debug(f'[{self.JSI_NAME}] {msg}', *args, **kwargs)
+
+ def report_warning(self, msg, *args, **kwargs):
+ self._downloader.report_warning(f'[{self.JSI_NAME}] {msg}', *args, **kwargs)
+
+ def to_screen(self, msg, *args, **kwargs):
+ self._downloader.to_screen(f'[{self.JSI_NAME}] {msg}', *args, **kwargs)
+
+ def report_note(self, video_id, note):
+ self.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
+
+ def report_version(self):
+ return
+
+ @classmethod
+ def supports_extractor(cls, ie_key: str):
+ return True
+
+ @classproperty
+ def JSI_NAME(cls) -> str:
+ return cls.__name__[:-3]
+
+ @classproperty
+ def JSI_KEY(cls) -> str:
+ assert cls.__name__.endswith('JSI'), 'JSI class names must end with "JSI"'
+ return cls.__name__[:-3]
+
+
+class ExternalJSI(JSI, abc.ABC):
+ _EXE_NAME: str
+
+ @classproperty(cache=True)
+ def exe_version(cls):
+ return get_exe_version(cls._EXE_NAME, args=getattr(cls, 'V_ARGS', ['--version']), version_re=r'([0-9.]+)')
+
+ @classproperty
+ def exe(cls):
+ return cls._EXE_NAME if cls.exe_version else None
+
+ @classmethod
+ def is_available(cls):
+ return bool(cls.exe)
+
+ def report_version(self):
+ self.write_debug(f'{self._EXE_NAME} version {self.exe_version}')
+
+
+def register_jsi_preference(*handlers: type[JSI]):
+ assert all(issubclass(handler, JSI) for handler in handlers), f'{handlers} must all be a subclass of JSI'
+
+ def outer(pref_func: JSIPreference) -> JSIPreference:
+ def inner(handler: JSI, *args):
+ if not handlers or isinstance(handler, handlers):
+ return pref_func(handler, *args)
+ return 0
+ _JSI_PREFERENCES.add(inner)
+ return inner
+ return outer
+
+
+@register_jsi_preference()
+def _base_preference(handler: JSI, *args):
+ return min(10, getattr(handler, '_BASE_PREFERENCE', 0))
+
+
+if typing.TYPE_CHECKING:
+ from ..YoutubeDL import YoutubeDL
+ from ..cookies import YoutubeDLCookieJar
+
+ class JSIPreference(typing.Protocol):
+ def __call__(self, handler: JSI, method_name: str, *args, **kwargs) -> int:
+ ...
diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp/native.py
similarity index 99%
rename from yt_dlp/jsinterp.py
rename to yt_dlp/jsinterp/native.py
index 45aeffa22..2812d28c1 100644
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp/native.py
@@ -6,7 +6,7 @@
import operator
import re
-from .utils import (
+from ..utils import (
NO_DEFAULT,
ExtractorError,
function_with_repr,
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index b4d3d4d66..f347696a1 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1192,6 +1192,11 @@ def _preset_alias_callback(option, opt_str, value, parser):
'--sleep-subtitles', metavar='SECONDS',
dest='sleep_interval_subtitles', default=0, type=int,
help='Number of seconds to sleep before each subtitle download')
+ workarounds.add_option(
+ '--jsi-preference',
+ metavar='JSI', dest='jsi_preference', default=[], type='str', action='callback',
+ callback=_list_from_options_callback,
+ help='Preferred JS interpreters to use during extraction. Can be given as comma-separated values.')
verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')
verbosity.add_option(