1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-04 20:38:38 +00:00

creating external

This commit is contained in:
c-basalt 2024-08-08 11:07:27 -04:00
parent e13c4573d9
commit 6327703ea8
10 changed files with 114 additions and 34 deletions

View File

@ -9,7 +9,7 @@
import math import math
from yt_dlp.jsinterp import JS_Undefined, JSInterpreter from yt_dlp.jsinterp.jsinterp import JS_Undefined, JSInterpreter
class NaN: class NaN:

View File

@ -32,7 +32,7 @@
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
from .extractor import gen_extractor_classes, get_info_extractor from .extractor import gen_extractor_classes, get_info_extractor
from .extractor.common import UnsupportedURLIE from .extractor.common import UnsupportedURLIE
from .extractor.openload import PhantomJSwrapper from .jsinterp import PhantomJSwrapper
from .minicurses import format_text from .minicurses import format_text
from .networking import HEADRequest, Request, RequestDirector from .networking import HEADRequest, Request, RequestDirector
from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES

View File

@ -4,7 +4,7 @@
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from .openload import DenoWrapper, PhantomJSwrapper from ..jsinterp import DenoWrapper, PhantomJSwrapper
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
UserNotLive, UserNotLive,

View File

@ -5,7 +5,7 @@
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .openload import PhantomJSwrapper from ..jsinterp import PhantomJSwrapper
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,

View File

@ -5,7 +5,7 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .openload import PhantomJSwrapper from ..jsinterp import PhantomJSwrapper
from ..networking import Request from ..networking import Request
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (

View File

@ -8,7 +8,7 @@
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from .openload import DenoWrapper from ..jsinterp import DenoWrapper
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
UserNotLive, UserNotLive,
@ -121,7 +121,7 @@ def _calc_butter_token(self):
butter_js += '__new_init().then(() => console.log((new ButterFactory()).generate_butter()));' butter_js += '__new_init().then(() => console.log((new ButterFactory()).generate_butter()));'
jsi = DenoWrapper(self) jsi = DenoWrapper(self)
return jsi.execute(butter_js) return jsi.execute(butter_js, jit_less=False)
def get_butter_token(self): def get_butter_token(self):
cache = self.cache.load('rplay', 'butter-token') or {} cache = self.cache.load('rplay', 'butter-token') or {}
@ -229,7 +229,7 @@ def _real_extract(self, url):
'contentOid': video_id, 'contentOid': video_id,
'creatorOid': metainfo.get('uploader_id'), 'creatorOid': metainfo.get('uploader_id'),
**self.requestor_query, **self.requestor_query,
}, fatal=False)) }, errnote='Failed to get thumbnail url', fatal=False))
formats = self._extract_m3u8_formats(m3u8_url, video_id, headers={ formats = self._extract_m3u8_formats(m3u8_url, video_id, headers={
'Referer': 'https://rplay.live/', 'Butter': self.get_butter_token()}) 'Referer': 'https://rplay.live/', 'Butter': self.get_butter_token()})

View File

@ -20,7 +20,7 @@
import urllib.parse import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper from ..jsinterp import PhantomJSwrapper
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..networking.exceptions import HTTPError, network_exceptions from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import ( from ..utils import (

View File

@ -0,0 +1,9 @@
from .jsinterp import JSInterpreter
from .external import PhantomJSwrapper, DenoWrapper
__all__ = [
JSInterpreter,
PhantomJSwrapper,
DenoWrapper,
]

View File

@ -6,7 +6,7 @@
import tempfile import tempfile
import urllib.parse import urllib.parse
from .common import InfoExtractor from ..extractor.common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
Popen, Popen,
@ -47,14 +47,29 @@ def cookie_jar_to_list(cookie_jar):
return [cookie_to_dict(cookie) for cookie in cookie_jar] return [cookie_to_dict(cookie) for cookie in cookie_jar]
@contextlib.contextmanager
def _temp_file(content, *, mode='wt', encoding='utf-8', suffix=None, close=True):
if 'r' in mode:
encoding = None
temp_file_handle = tempfile.NamedTemporaryFile(mode, encoding=encoding, suffix=suffix, delete=False)
try:
temp_file_handle.write(content)
if close:
temp_file_handle.close()
yield temp_file_handle
finally:
with contextlib.suppress(OSError):
os.remove(temp_file_handle.name)
class ExternalJSI: class ExternalJSI:
@classproperty(cache=True) @classproperty(cache=True)
def version(cls): def version(cls):
return get_exe_version(cls.EXE_NAME, args=getattr(cls, 'V_ARGS', ['--version']), version_re=r'([0-9.]+)') return get_exe_version(cls._EXE_NAME, args=getattr(cls, 'V_ARGS', ['--version']), version_re=r'([0-9.]+)')
@classproperty @classproperty
def exe(cls): def exe(cls):
return cls.EXE_NAME if cls.version else None return cls._EXE_NAME if cls.version else None
@classproperty @classproperty
def is_available(cls): def is_available(cls):
@ -62,7 +77,7 @@ def is_available(cls):
class DenoWrapper(ExternalJSI): class DenoWrapper(ExternalJSI):
EXE_NAME = 'deno' _EXE_NAME = 'deno'
INSTALL_HINT = 'Please install deno following https://docs.deno.com/runtime/manual/getting_started/installation/ or download its binary from https://github.com/denoland/deno/releases' INSTALL_HINT = 'Please install deno following https://docs.deno.com/runtime/manual/getting_started/installation/ or download its binary from https://github.com/denoland/deno/releases'
def __init__(self, extractor: InfoExtractor, required_version=None, timeout=10000): def __init__(self, extractor: InfoExtractor, required_version=None, timeout=10000):
@ -76,31 +91,19 @@ def __init__(self, extractor: InfoExtractor, required_version=None, timeout=1000
self.extractor.report_warning( self.extractor.report_warning(
f'Deno is outdated, update it to version {required_version} or newer if you encounter any errors.') f'Deno is outdated, update it to version {required_version} or newer if you encounter any errors.')
@contextlib.contextmanager def execute(self, jscode, video_id=None, *, note='Executing JS in Deno', flags=[], jit_less=True, base_js=None):
def _create_temp_js(self, jscode):
js_file = tempfile.NamedTemporaryFile('wt', encoding='utf-8', suffix='.js', delete=False)
try:
js_file.write(jscode)
js_file.close()
yield js_file
finally:
with contextlib.suppress(OSError):
os.remove(js_file.name)
def execute(self, jscode, video_id=None, *, note='Executing JS in Deno',
allow_net=None, jit_less=True, base_js=None):
"""Execute JS directly in Deno runtime and return stdout""" """Execute JS directly in Deno runtime and return stdout"""
base_js = base_js if base_js is not None else 'delete window.Deno; global = window;' base_js = base_js if base_js is not None else 'delete window.Deno; global = window;'
with self._create_temp_js(base_js + jscode) as js_file: with _temp_file(base_js + jscode, suffix='.js') as js_file:
self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}') if note:
self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
cmd = [self.exe, 'run', js_file.name] cmd = [self.exe, 'run'] + flags
if allow_net:
cmd.append('--allow-net' if isinstance(allow_net, bool) else f'--allow-net={allow_net}')
if jit_less: if jit_less:
cmd.append('--v8-flags=--jitless') cmd.append('--v8-flags=--jitless')
cmd.append(js_file.name)
self.extractor.write_debug(f'Deno command line: {shell_quote(cmd)}') self.extractor.write_debug(f'Deno command line: {shell_quote(cmd)}')
try: try:
@ -116,12 +119,80 @@ def execute(self, jscode, video_id=None, *, note='Executing JS in Deno',
return stdout.strip() return stdout.strip()
class PuppeteerWrapper:
version = '16.2.0'
_HEADLESS = False
@classproperty
def is_available(cls):
return DenoWrapper.is_available
@classproperty
def INSTALL_HINT(cls):
msg = 'Run "deno run -A https://deno.land/x/puppeteer@16.2.0/install.ts" to install puppeteer'
if not DenoWrapper.is_available:
msg = f'{DenoWrapper.INSTALL_HINT}. Then {msg}'
return msg
def __init__(self, extractor: InfoExtractor, required_version=None, timeout=10000):
self.deno = DenoWrapper(extractor, timeout=(timeout + 30000))
self.timeout = timeout
self.extractor = extractor
if required_version:
self.extractor.report_warning(f'required_version is not supported on {self.__class__.__name__}')
def _deno_execute(self, jscode, note=None):
return self.deno.execute(f'''
import puppeteer from "https://deno.land/x/puppeteer@16.2.0/mod.ts";
const browser = await puppeteer.launch({{
headless: {json.dumps(bool(self._HEADLESS))}, args: ["--disable-web-security"]}});
try {{
{jscode}
}} finally {{
await browser.close();
}}''', note=note, flags=['--allow-all'], jit_less=False, base_js='')
def evaluate(self, jscode, video_id=None, note='Executing JS in Puppeteer', url='about:blank'):
self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
return json.loads(self._deno_execute(f'''
const page = await browser.newPage();
window.setTimeout(async () => {{
console.error('Puppeteer execution timed out');
await browser.close();
Deno.exit(1);
}}, {int(self.timeout)});
page.resourceTimeout = {int(self.timeout)};
await page.setRequestInterception(true);
page.on("request", request => request.abort());
const url = {json.dumps(str(url))};
await page.evaluate(`window.history.replaceState('', '', ${{JSON.stringify(url)}})`);
console.log(JSON.stringify(await page.evaluate({json.dumps(str(jscode))})));
await browser.close();
Deno.exit(0);
'''))
def execute(self, jscode, **args):
return self.evaluate('''
(() => {{
const results = [];
const origConsole = console;
const console = new Proxy(console, { get: (target, prop, receiver) => {
if (prop === 'log') return (...data) => data.forEach(i => results.push(i));
return target[prop]}})
}})();
''')
class PhantomJSwrapper(ExternalJSI): class PhantomJSwrapper(ExternalJSI):
"""PhantomJS wrapper class """PhantomJS wrapper class
This class is experimental. This class is experimental.
""" """
EXE_NAME = 'phantomjs' _EXE_NAME = 'phantomjs'
INSTALL_HINT = 'Please download PhantomJS from https://phantomjs.org/download.html' INSTALL_HINT = 'Please download PhantomJS from https://phantomjs.org/download.html'
_BASE_JS = R''' _BASE_JS = R'''
@ -288,7 +359,7 @@ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on w
return html, stdout return html, stdout
def execute(self, jscode, video_id=None, *, note='Executing JS'): def execute(self, jscode, video_id=None, *, note='Executing JS in PhantomJS'):
"""Execute JS and return stdout""" """Execute JS and return stdout"""
if 'phantom.exit();' not in jscode: if 'phantom.exit();' not in jscode:
jscode += ';\nphantom.exit();' jscode += ';\nphantom.exit();'

View File

@ -6,7 +6,7 @@
import operator import operator
import re import re
from .utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
ExtractorError, ExtractorError,
function_with_repr, function_with_repr,