From 6327703ea81ce94e652541c5f2b2648c308bec88 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Thu, 8 Aug 2024 11:07:27 -0400 Subject: [PATCH] creating external --- test/test_jsinterp.py | 2 +- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/extractor/douyutv.py | 2 +- yt_dlp/extractor/iqiyi.py | 2 +- yt_dlp/extractor/pornhub.py | 2 +- yt_dlp/extractor/rplay.py | 6 +- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/jsinterp/__init__.py | 9 ++ .../openload.py => jsinterp/external.py} | 119 ++++++++++++++---- yt_dlp/{ => jsinterp}/jsinterp.py | 2 +- 10 files changed, 114 insertions(+), 34 deletions(-) create mode 100644 yt_dlp/jsinterp/__init__.py rename yt_dlp/{extractor/openload.py => jsinterp/external.py} (73%) rename yt_dlp/{ => jsinterp}/jsinterp.py (99%) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index df92c8315b..34b145959a 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -9,7 +9,7 @@ import math -from yt_dlp.jsinterp import JS_Undefined, JSInterpreter +from yt_dlp.jsinterp.jsinterp import JS_Undefined, JSInterpreter class NaN: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9691a1ea7c..6ddf3f51bf 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -32,7 +32,7 @@ from .downloader.rtmp import rtmpdump_version from .extractor import gen_extractor_classes, get_info_extractor from .extractor.common import UnsupportedURLIE -from .extractor.openload import PhantomJSwrapper +from .jsinterp import PhantomJSwrapper from .minicurses import format_text from .networking import HEADRequest, Request, RequestDirector from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index 8932f914e4..6417c63a50 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -4,7 +4,7 @@ import uuid from .common import InfoExtractor -from .openload import DenoWrapper, PhantomJSwrapper +from ..jsinterp import DenoWrapper, PhantomJSwrapper from ..utils import ( ExtractorError, UserNotLive, diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 735b44637c..18ee3d2de1 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -5,7 +5,7 @@ import urllib.parse from .common import InfoExtractor -from .openload import PhantomJSwrapper +from ..jsinterp import PhantomJSwrapper from ..utils import ( ExtractorError, clean_html, diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 679dc63234..6e67265a09 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor -from .openload import PhantomJSwrapper +from ..jsinterp import PhantomJSwrapper from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/rplay.py b/yt_dlp/extractor/rplay.py index 0b9a00e680..98bba2364a 100644 --- a/yt_dlp/extractor/rplay.py +++ b/yt_dlp/extractor/rplay.py @@ -8,7 +8,7 @@ import time from .common import InfoExtractor -from .openload import DenoWrapper +from ..jsinterp import DenoWrapper from ..utils import ( ExtractorError, UserNotLive, @@ -121,7 +121,7 @@ def _calc_butter_token(self): butter_js += '__new_init().then(() => console.log((new ButterFactory()).generate_butter()));' jsi = DenoWrapper(self) - return jsi.execute(butter_js) + return jsi.execute(butter_js, jit_less=False) def get_butter_token(self): cache = self.cache.load('rplay', 'butter-token') or {} @@ -229,7 +229,7 @@ def _real_extract(self, url): 'contentOid': video_id, 'creatorOid': metainfo.get('uploader_id'), **self.requestor_query, - }, fatal=False)) + }, errnote='Failed to get thumbnail url', fatal=False)) formats = self._extract_m3u8_formats(m3u8_url, video_id, headers={ 'Referer': 'https://rplay.live/', 'Butter': self.get_butter_token()}) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 53aca3816b..257e9767de 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -20,7 +20,7 @@ import urllib.parse from .common import InfoExtractor, SearchInfoExtractor -from .openload import PhantomJSwrapper +from ..jsinterp import PhantomJSwrapper from ..jsinterp import JSInterpreter from ..networking.exceptions import HTTPError, network_exceptions from ..utils import ( diff --git a/yt_dlp/jsinterp/__init__.py b/yt_dlp/jsinterp/__init__.py new file mode 100644 index 0000000000..944d73fca6 --- /dev/null +++ b/yt_dlp/jsinterp/__init__.py @@ -0,0 +1,9 @@ +from .jsinterp import JSInterpreter +from .external import PhantomJSwrapper, DenoWrapper + + +__all__ = [ + JSInterpreter, + PhantomJSwrapper, + DenoWrapper, +] diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/jsinterp/external.py similarity index 73% rename from yt_dlp/extractor/openload.py rename to yt_dlp/jsinterp/external.py index 525be0e6dc..1eb21701c7 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/jsinterp/external.py @@ -6,7 +6,7 @@ import tempfile import urllib.parse -from .common import InfoExtractor +from ..extractor.common import InfoExtractor from ..utils import ( ExtractorError, Popen, @@ -47,14 +47,29 @@ def cookie_jar_to_list(cookie_jar): return [cookie_to_dict(cookie) for cookie in cookie_jar] +@contextlib.contextmanager +def _temp_file(content, *, mode='wt', encoding='utf-8', suffix=None, close=True): + if 'r' in mode: + encoding = None + temp_file_handle = tempfile.NamedTemporaryFile(mode, encoding=encoding, suffix=suffix, delete=False) + try: + temp_file_handle.write(content) + if close: + temp_file_handle.close() + yield temp_file_handle + finally: + with contextlib.suppress(OSError): + os.remove(temp_file_handle.name) + + class ExternalJSI: @classproperty(cache=True) def version(cls): - return get_exe_version(cls.EXE_NAME, args=getattr(cls, 'V_ARGS', ['--version']), version_re=r'([0-9.]+)') + return get_exe_version(cls._EXE_NAME, args=getattr(cls, 'V_ARGS', ['--version']), version_re=r'([0-9.]+)') @classproperty def exe(cls): - return cls.EXE_NAME if cls.version else None + return cls._EXE_NAME if cls.version else None @classproperty def is_available(cls): @@ -62,7 +77,7 @@ def is_available(cls): class DenoWrapper(ExternalJSI): - EXE_NAME = 'deno' + _EXE_NAME = 'deno' INSTALL_HINT = 'Please install deno following https://docs.deno.com/runtime/manual/getting_started/installation/ or download its binary from https://github.com/denoland/deno/releases' def __init__(self, extractor: InfoExtractor, required_version=None, timeout=10000): @@ -76,31 +91,19 @@ def __init__(self, extractor: InfoExtractor, required_version=None, timeout=1000 self.extractor.report_warning( f'Deno is outdated, update it to version {required_version} or newer if you encounter any errors.') - @contextlib.contextmanager - def _create_temp_js(self, jscode): - js_file = tempfile.NamedTemporaryFile('wt', encoding='utf-8', suffix='.js', delete=False) - try: - js_file.write(jscode) - js_file.close() - yield js_file - finally: - with contextlib.suppress(OSError): - os.remove(js_file.name) - - def execute(self, jscode, video_id=None, *, note='Executing JS in Deno', - allow_net=None, jit_less=True, base_js=None): + def execute(self, jscode, video_id=None, *, note='Executing JS in Deno', flags=[], jit_less=True, base_js=None): """Execute JS directly in Deno runtime and return stdout""" base_js = base_js if base_js is not None else 'delete window.Deno; global = window;' - with self._create_temp_js(base_js + jscode) as js_file: - self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}') + with _temp_file(base_js + jscode, suffix='.js') as js_file: + if note: + self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}') - cmd = [self.exe, 'run', js_file.name] - if allow_net: - cmd.append('--allow-net' if isinstance(allow_net, bool) else f'--allow-net={allow_net}') + cmd = [self.exe, 'run'] + flags if jit_less: cmd.append('--v8-flags=--jitless') + cmd.append(js_file.name) self.extractor.write_debug(f'Deno command line: {shell_quote(cmd)}') try: @@ -116,12 +119,80 @@ def execute(self, jscode, video_id=None, *, note='Executing JS in Deno', return stdout.strip() +class PuppeteerWrapper: + version = '16.2.0' + _HEADLESS = False + + @classproperty + def is_available(cls): + return DenoWrapper.is_available + + @classproperty + def INSTALL_HINT(cls): + msg = 'Run "deno run -A https://deno.land/x/puppeteer@16.2.0/install.ts" to install puppeteer' + if not DenoWrapper.is_available: + msg = f'{DenoWrapper.INSTALL_HINT}. Then {msg}' + return msg + + def __init__(self, extractor: InfoExtractor, required_version=None, timeout=10000): + self.deno = DenoWrapper(extractor, timeout=(timeout + 30000)) + self.timeout = timeout + self.extractor = extractor + + if required_version: + self.extractor.report_warning(f'required_version is not supported on {self.__class__.__name__}') + + def _deno_execute(self, jscode, note=None): + return self.deno.execute(f''' + import puppeteer from "https://deno.land/x/puppeteer@16.2.0/mod.ts"; + const browser = await puppeteer.launch({{ + headless: {json.dumps(bool(self._HEADLESS))}, args: ["--disable-web-security"]}}); + try {{ + {jscode} + }} finally {{ + await browser.close(); + }}''', note=note, flags=['--allow-all'], jit_less=False, base_js='') + + def evaluate(self, jscode, video_id=None, note='Executing JS in Puppeteer', url='about:blank'): + self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}') + return json.loads(self._deno_execute(f''' + const page = await browser.newPage(); + window.setTimeout(async () => {{ + console.error('Puppeteer execution timed out'); + await browser.close(); + Deno.exit(1); + }}, {int(self.timeout)}); + page.resourceTimeout = {int(self.timeout)}; + + await page.setRequestInterception(true); + page.on("request", request => request.abort()); + + const url = {json.dumps(str(url))}; + await page.evaluate(`window.history.replaceState('', '', ${{JSON.stringify(url)}})`); + + console.log(JSON.stringify(await page.evaluate({json.dumps(str(jscode))}))); + await browser.close(); + Deno.exit(0); + ''')) + + def execute(self, jscode, **args): + return self.evaluate(''' + (() => {{ + const results = []; + const origConsole = console; + const console = new Proxy(console, { get: (target, prop, receiver) => { + if (prop === 'log') return (...data) => data.forEach(i => results.push(i)); + return target[prop]}}) + }})(); + ''') + + class PhantomJSwrapper(ExternalJSI): """PhantomJS wrapper class This class is experimental. """ - EXE_NAME = 'phantomjs' + _EXE_NAME = 'phantomjs' INSTALL_HINT = 'Please download PhantomJS from https://phantomjs.org/download.html' _BASE_JS = R''' @@ -288,7 +359,7 @@ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on w return html, stdout - def execute(self, jscode, video_id=None, *, note='Executing JS'): + def execute(self, jscode, video_id=None, *, note='Executing JS in PhantomJS'): """Execute JS and return stdout""" if 'phantom.exit();' not in jscode: jscode += ';\nphantom.exit();' diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp/jsinterp.py similarity index 99% rename from yt_dlp/jsinterp.py rename to yt_dlp/jsinterp/jsinterp.py index 851d4dc7bf..8c2b5a7251 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp/jsinterp.py @@ -6,7 +6,7 @@ import operator import re -from .utils import ( +from ..utils import ( NO_DEFAULT, ExtractorError, function_with_repr,