1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-29 00:28:29 +00:00

Merge branch 'yt-dlp:master' into master

This commit is contained in:
Nikolay Fedorov 2025-07-06 13:52:40 +03:00 committed by GitHub
commit d58e75c06f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 151 additions and 9 deletions

View File

@ -490,6 +490,52 @@ def test_increment_decrement(self):
self._test('function f() { var a = "test--"; return a; }', 'test--')
self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
def test_nested_function_scoping(self):
self._test(R'''
function f() {
var g = function() {
var P = 2;
return P;
};
var P = 1;
g();
return P;
}
''', 1)
self._test(R'''
function f() {
var x = function() {
for (var w = 1, M = []; w < 2; w++) switch (w) {
case 1:
M.push("a");
case 2:
M.push("b");
}
return M
};
var w = "c";
var M = "d";
var y = x();
y.push(w);
y.push(M);
return y;
}
''', ['a', 'b', 'c', 'd'])
self._test(R'''
function f() {
var P, Q;
var z = 100;
var g = function() {
var P, Q; P = 2; Q = 15;
z = 0;
return P+Q;
};
P = 1; Q = 10;
var x = g(), y = 3;
return P+Q+x+y+z;
}
''', 31)
if __name__ == '__main__':
unittest.main()

View File

@ -333,6 +333,46 @@
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
),
(
'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
),
(
'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
),
(
'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
),
(
'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
),
(
'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
),
(
'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
),
(
'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
),
(
'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
),
(
'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
),
(
'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
),
]

View File

@ -26,7 +26,7 @@
from .pot._director import initialize_pot_director
from .pot.provider import PoTokenContext, PoTokenRequest
from ..openload import PhantomJSwrapper
from ...jsinterp import JSInterpreter
from ...jsinterp import JSInterpreter, LocalNameSpace
from ...networking.exceptions import HTTPError
from ...utils import (
NO_DEFAULT,
@ -1801,6 +1801,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
}
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
_NSIG_FUNC_CACHE_ID = 'nsig func'
_DUMMY_STRING = 'dlp_wins'
@classmethod
def suitable(cls, url):
@ -2204,7 +2206,7 @@ def _decrypt_nsig(self, s, video_id, player_url):
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
try:
extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
ret = extract_nsig(jsi, func_code)(s)
except JSInterpreter.Exception as e:
try:
@ -2312,16 +2314,18 @@ def _interpret_player_js_global_var(self, jscode, player_url):
jsi = JSInterpreter(varcode)
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
return varname, interpret_global_var(varvalue, {}, allow_recursion=10)
return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
# Fixup global array
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
if varname and global_list:
nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
else:
varname = 'dlp_wins'
varname = self._DUMMY_STRING
global_list = []
# Fixup typeof check
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
fixed_code = re.sub(
fr'''(?x)
@ -2334,6 +2338,32 @@ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
self.write_debug(join_nonempty(
'No typeof statement found in nsig function code',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
# Fixup global funcs
jsi = JSInterpreter(fixed_code)
cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
try:
self._cached(
self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
except JSInterpreter.Exception:
self._player_cache.pop(cache_id, None)
global_funcnames = jsi._undefined_varnames
debug_names = []
jsi = JSInterpreter(jscode)
for func_name in global_funcnames:
try:
func_args, func_code = jsi.extract_function_code(func_name)
fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
debug_names.append(func_name)
except Exception:
self.report_warning(join_nonempty(
f'Unable to extract global nsig function {func_name} from player JS',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
if debug_names:
self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
return argnames, fixed_code
def _extract_n_function_code(self, video_id, player_url):
@ -2347,7 +2377,7 @@ def _extract_n_function_code(self, video_id, player_url):
func_name = self._extract_n_function_name(jscode, player_url=player_url)
# XXX: Workaround for the global array variable and lack of `typeof` implementation
# XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
return jsi, player_id, func_code

View File

@ -222,6 +222,14 @@ def __setitem__(self, key, value):
def __delitem__(self, key):
raise NotImplementedError('Deleting is not supported')
def set_local(self, key, value):
self.maps[0][key] = value
def get_local(self, key):
if key in self.maps[0]:
return self.maps[0][key]
return JS_Undefined
class Debugger:
import sys
@ -271,6 +279,7 @@ class JSInterpreter:
def __init__(self, code, objects=None):
self.code, self._functions = code, {}
self._objects = {} if objects is None else objects
self._undefined_varnames = set()
class Exception(ExtractorError): # noqa: A001
def __init__(self, msg, expr=None, *args, **kwargs):
@ -381,7 +390,7 @@ def _dump(self, obj, namespace):
return self._named_object(namespace, obj)
@Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
def interpret_statement(self, stmt, local_vars, allow_recursion=100, _is_var_declaration=False):
if allow_recursion < 0:
raise self.Exception('Recursion limit reached')
allow_recursion -= 1
@ -401,6 +410,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if m.group('throw'):
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
should_return = not m.group('var')
_is_var_declaration = _is_var_declaration or bool(m.group('var'))
if not expr:
return None, should_return
@ -585,7 +595,8 @@ def dict_item(key, val):
sub_expressions = list(self._separate(expr))
if len(sub_expressions) > 1:
for sub_expr in sub_expressions:
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
ret, should_abort = self.interpret_statement(
sub_expr, local_vars, allow_recursion, _is_var_declaration=_is_var_declaration)
if should_abort:
return ret, True
return ret, False
@ -599,8 +610,12 @@ def dict_item(key, val):
left_val = local_vars.get(m.group('out'))
if not m.group('index'):
local_vars[m.group('out')] = self._operator(
eval_result = self._operator(
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
if _is_var_declaration:
local_vars.set_local(m.group('out'), eval_result)
else:
local_vars[m.group('out')] = eval_result
return local_vars[m.group('out')], should_return
elif left_val in (None, JS_Undefined):
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
@ -654,7 +669,18 @@ def dict_item(key, val):
return float('NaN'), should_return
elif m and m.group('return'):
return local_vars.get(m.group('name'), JS_Undefined), should_return
var = m.group('name')
# Declared variables
if _is_var_declaration:
ret = local_vars.get_local(var)
# Register varname in local namespace
# Set value as JS_Undefined or its pre-existing value
local_vars.set_local(var, ret)
else:
ret = local_vars.get(var, JS_Undefined)
if ret is JS_Undefined:
self._undefined_varnames.add(var)
return ret, should_return
with contextlib.suppress(ValueError):
return json.loads(js_to_json(expr, strict=True)), should_return