1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-01 01:58:28 +00:00

Merge branch 'yt-dlp:master' into master

This commit is contained in:
Nikolay Fedorov 2025-07-06 13:52:40 +03:00 committed by GitHub
commit d58e75c06f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 151 additions and 9 deletions

View File

@ -490,6 +490,52 @@ def test_increment_decrement(self):
self._test('function f() { var a = "test--"; return a; }', 'test--') self._test('function f() { var a = "test--"; return a; }', 'test--')
self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--') self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
def test_nested_function_scoping(self):
self._test(R'''
function f() {
var g = function() {
var P = 2;
return P;
};
var P = 1;
g();
return P;
}
''', 1)
self._test(R'''
function f() {
var x = function() {
for (var w = 1, M = []; w < 2; w++) switch (w) {
case 1:
M.push("a");
case 2:
M.push("b");
}
return M
};
var w = "c";
var M = "d";
var y = x();
y.push(w);
y.push(M);
return y;
}
''', ['a', 'b', 'c', 'd'])
self._test(R'''
function f() {
var P, Q;
var z = 100;
var g = function() {
var P, Q; P = 2; Q = 15;
z = 0;
return P+Q;
};
P = 1; Q = 10;
var x = g(), y = 3;
return P+Q+x+y+z;
}
''', 31)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -333,6 +333,46 @@
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js', 'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u', 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
), ),
(
'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
),
(
'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
),
(
'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
),
(
'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
),
(
'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
),
(
'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
),
(
'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
),
(
'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
),
(
'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
),
(
'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
),
] ]

View File

@ -26,7 +26,7 @@
from .pot._director import initialize_pot_director from .pot._director import initialize_pot_director
from .pot.provider import PoTokenContext, PoTokenRequest from .pot.provider import PoTokenContext, PoTokenRequest
from ..openload import PhantomJSwrapper from ..openload import PhantomJSwrapper
from ...jsinterp import JSInterpreter from ...jsinterp import JSInterpreter, LocalNameSpace
from ...networking.exceptions import HTTPError from ...networking.exceptions import HTTPError
from ...utils import ( from ...utils import (
NO_DEFAULT, NO_DEFAULT,
@ -1801,6 +1801,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js', 'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
} }
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()} _INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
_NSIG_FUNC_CACHE_ID = 'nsig func'
_DUMMY_STRING = 'dlp_wins'
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@ -2204,7 +2206,7 @@ def _decrypt_nsig(self, s, video_id, player_url):
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
try: try:
extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url) extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
ret = extract_nsig(jsi, func_code)(s) ret = extract_nsig(jsi, func_code)(s)
except JSInterpreter.Exception as e: except JSInterpreter.Exception as e:
try: try:
@ -2312,16 +2314,18 @@ def _interpret_player_js_global_var(self, jscode, player_url):
jsi = JSInterpreter(varcode) jsi = JSInterpreter(varcode)
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url) interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
return varname, interpret_global_var(varvalue, {}, allow_recursion=10) return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url): def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
# Fixup global array
varname, global_list = self._interpret_player_js_global_var(jscode, player_url) varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
if varname and global_list: if varname and global_list:
nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}' nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
else: else:
varname = 'dlp_wins' varname = self._DUMMY_STRING
global_list = [] global_list = []
# Fixup typeof check
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+' undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
fixed_code = re.sub( fixed_code = re.sub(
fr'''(?x) fr'''(?x)
@ -2334,6 +2338,32 @@ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
self.write_debug(join_nonempty( self.write_debug(join_nonempty(
'No typeof statement found in nsig function code', 'No typeof statement found in nsig function code',
player_url and f' player = {player_url}', delim='\n'), only_once=True) player_url and f' player = {player_url}', delim='\n'), only_once=True)
# Fixup global funcs
jsi = JSInterpreter(fixed_code)
cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
try:
self._cached(
self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
except JSInterpreter.Exception:
self._player_cache.pop(cache_id, None)
global_funcnames = jsi._undefined_varnames
debug_names = []
jsi = JSInterpreter(jscode)
for func_name in global_funcnames:
try:
func_args, func_code = jsi.extract_function_code(func_name)
fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
debug_names.append(func_name)
except Exception:
self.report_warning(join_nonempty(
f'Unable to extract global nsig function {func_name} from player JS',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
if debug_names:
self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
return argnames, fixed_code return argnames, fixed_code
def _extract_n_function_code(self, video_id, player_url): def _extract_n_function_code(self, video_id, player_url):
@ -2347,7 +2377,7 @@ def _extract_n_function_code(self, video_id, player_url):
func_name = self._extract_n_function_name(jscode, player_url=player_url) func_name = self._extract_n_function_name(jscode, player_url=player_url)
# XXX: Workaround for the global array variable and lack of `typeof` implementation # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url) func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
return jsi, player_id, func_code return jsi, player_id, func_code

View File

@ -222,6 +222,14 @@ def __setitem__(self, key, value):
def __delitem__(self, key): def __delitem__(self, key):
raise NotImplementedError('Deleting is not supported') raise NotImplementedError('Deleting is not supported')
def set_local(self, key, value):
self.maps[0][key] = value
def get_local(self, key):
if key in self.maps[0]:
return self.maps[0][key]
return JS_Undefined
class Debugger: class Debugger:
import sys import sys
@ -271,6 +279,7 @@ class JSInterpreter:
def __init__(self, code, objects=None): def __init__(self, code, objects=None):
self.code, self._functions = code, {} self.code, self._functions = code, {}
self._objects = {} if objects is None else objects self._objects = {} if objects is None else objects
self._undefined_varnames = set()
class Exception(ExtractorError): # noqa: A001 class Exception(ExtractorError): # noqa: A001
def __init__(self, msg, expr=None, *args, **kwargs): def __init__(self, msg, expr=None, *args, **kwargs):
@ -381,7 +390,7 @@ def _dump(self, obj, namespace):
return self._named_object(namespace, obj) return self._named_object(namespace, obj)
@Debugger.wrap_interpreter @Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100): def interpret_statement(self, stmt, local_vars, allow_recursion=100, _is_var_declaration=False):
if allow_recursion < 0: if allow_recursion < 0:
raise self.Exception('Recursion limit reached') raise self.Exception('Recursion limit reached')
allow_recursion -= 1 allow_recursion -= 1
@ -401,6 +410,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if m.group('throw'): if m.group('throw'):
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion)) raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
should_return = not m.group('var') should_return = not m.group('var')
_is_var_declaration = _is_var_declaration or bool(m.group('var'))
if not expr: if not expr:
return None, should_return return None, should_return
@ -585,7 +595,8 @@ def dict_item(key, val):
sub_expressions = list(self._separate(expr)) sub_expressions = list(self._separate(expr))
if len(sub_expressions) > 1: if len(sub_expressions) > 1:
for sub_expr in sub_expressions: for sub_expr in sub_expressions:
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) ret, should_abort = self.interpret_statement(
sub_expr, local_vars, allow_recursion, _is_var_declaration=_is_var_declaration)
if should_abort: if should_abort:
return ret, True return ret, True
return ret, False return ret, False
@ -599,8 +610,12 @@ def dict_item(key, val):
left_val = local_vars.get(m.group('out')) left_val = local_vars.get(m.group('out'))
if not m.group('index'): if not m.group('index'):
local_vars[m.group('out')] = self._operator( eval_result = self._operator(
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion) m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
if _is_var_declaration:
local_vars.set_local(m.group('out'), eval_result)
else:
local_vars[m.group('out')] = eval_result
return local_vars[m.group('out')], should_return return local_vars[m.group('out')], should_return
elif left_val in (None, JS_Undefined): elif left_val in (None, JS_Undefined):
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr) raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
@ -654,7 +669,18 @@ def dict_item(key, val):
return float('NaN'), should_return return float('NaN'), should_return
elif m and m.group('return'): elif m and m.group('return'):
return local_vars.get(m.group('name'), JS_Undefined), should_return var = m.group('name')
# Declared variables
if _is_var_declaration:
ret = local_vars.get_local(var)
# Register varname in local namespace
# Set value as JS_Undefined or its pre-existing value
local_vars.set_local(var, ret)
else:
ret = local_vars.get(var, JS_Undefined)
if ret is JS_Undefined:
self._undefined_varnames.add(var)
return ret, should_return
with contextlib.suppress(ValueError): with contextlib.suppress(ValueError):
return json.loads(js_to_json(expr, strict=True)), should_return return json.loads(js_to_json(expr, strict=True)), should_return