diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 4268e890b8..a1088cea49 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -490,6 +490,52 @@ def test_increment_decrement(self): self._test('function f() { var a = "test--"; return a; }', 'test--') self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--') + def test_nested_function_scoping(self): + self._test(R''' + function f() { + var g = function() { + var P = 2; + return P; + }; + var P = 1; + g(); + return P; + } + ''', 1) + self._test(R''' + function f() { + var x = function() { + for (var w = 1, M = []; w < 2; w++) switch (w) { + case 1: + M.push("a"); + case 2: + M.push("b"); + } + return M + }; + var w = "c"; + var M = "d"; + var y = x(); + y.push(w); + y.push(M); + return y; + } + ''', ['a', 'b', 'c', 'd']) + self._test(R''' + function f() { + var P, Q; + var z = 100; + var g = function() { + var P, Q; P = 2; Q = 15; + z = 0; + return P+Q; + }; + P = 1; Q = 10; + var x = g(), y = 3; + return P+Q+x+y+z; + } + ''', 31) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 5e67926798..98607df55e 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -333,6 +333,46 @@ 'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js', 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u', ), + ( + 'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq', + ), + ( + 'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A', + ), + ( + 'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A', + ), + ( + 'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy', + ), + ( + 'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf', + ), + ( + 'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa', + ), + ( + 'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF', + ), + ( + 'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD', + ), + ( + 'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba', + ), + ( + 'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js', + 'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg', + ), ] diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 4689c55db7..f13dbb3161 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -26,7 +26,7 @@ from .pot._director import initialize_pot_director from .pot.provider import PoTokenContext, PoTokenRequest from ..openload import PhantomJSwrapper -from ...jsinterp import JSInterpreter +from ...jsinterp import JSInterpreter, LocalNameSpace from ...networking.exceptions import HTTPError from ...utils import ( NO_DEFAULT, @@ -1801,6 +1801,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js', } _INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()} + _NSIG_FUNC_CACHE_ID = 'nsig func' + _DUMMY_STRING = 'dlp_wins' @classmethod def suitable(cls, url): @@ -2204,7 +2206,7 @@ def _decrypt_nsig(self, s, video_id, player_url): self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') try: - extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url) + extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url) ret = extract_nsig(jsi, func_code)(s) except JSInterpreter.Exception as e: try: @@ -2312,16 +2314,18 @@ def _interpret_player_js_global_var(self, jscode, player_url): jsi = JSInterpreter(varcode) interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url) - return varname, interpret_global_var(varvalue, {}, allow_recursion=10) + return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10) def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url): + # Fixup global array varname, global_list = self._interpret_player_js_global_var(jscode, player_url) if varname and global_list: nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}' else: - varname = 'dlp_wins' + varname = self._DUMMY_STRING global_list = [] + # Fixup typeof check undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+' fixed_code = re.sub( fr'''(?x) @@ -2334,6 +2338,32 @@ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url): self.write_debug(join_nonempty( 'No typeof statement found in nsig function code', player_url and f' player = {player_url}', delim='\n'), only_once=True) + + # Fixup global funcs + jsi = JSInterpreter(fixed_code) + cache_id = (self._NSIG_FUNC_CACHE_ID, player_url) + try: + self._cached( + self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING) + except JSInterpreter.Exception: + self._player_cache.pop(cache_id, None) + + global_funcnames = jsi._undefined_varnames + debug_names = [] + jsi = JSInterpreter(jscode) + for func_name in global_funcnames: + try: + func_args, func_code = jsi.extract_function_code(func_name) + fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}' + debug_names.append(func_name) + except Exception: + self.report_warning(join_nonempty( + f'Unable to extract global nsig function {func_name} from player JS', + player_url and f' player = {player_url}', delim='\n'), only_once=True) + + if debug_names: + self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}') + return argnames, fixed_code def _extract_n_function_code(self, video_id, player_url): @@ -2347,7 +2377,7 @@ def _extract_n_function_code(self, video_id, player_url): func_name = self._extract_n_function_name(jscode, player_url=player_url) - # XXX: Workaround for the global array variable and lack of `typeof` implementation + # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url) return jsi, player_id, func_code diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index b49f0cf30a..f06d96832f 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -222,6 +222,14 @@ def __setitem__(self, key, value): def __delitem__(self, key): raise NotImplementedError('Deleting is not supported') + def set_local(self, key, value): + self.maps[0][key] = value + + def get_local(self, key): + if key in self.maps[0]: + return self.maps[0][key] + return JS_Undefined + class Debugger: import sys @@ -271,6 +279,7 @@ class JSInterpreter: def __init__(self, code, objects=None): self.code, self._functions = code, {} self._objects = {} if objects is None else objects + self._undefined_varnames = set() class Exception(ExtractorError): # noqa: A001 def __init__(self, msg, expr=None, *args, **kwargs): @@ -381,7 +390,7 @@ def _dump(self, obj, namespace): return self._named_object(namespace, obj) @Debugger.wrap_interpreter - def interpret_statement(self, stmt, local_vars, allow_recursion=100): + def interpret_statement(self, stmt, local_vars, allow_recursion=100, _is_var_declaration=False): if allow_recursion < 0: raise self.Exception('Recursion limit reached') allow_recursion -= 1 @@ -401,6 +410,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100): if m.group('throw'): raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion)) should_return = not m.group('var') + _is_var_declaration = _is_var_declaration or bool(m.group('var')) if not expr: return None, should_return @@ -585,7 +595,8 @@ def dict_item(key, val): sub_expressions = list(self._separate(expr)) if len(sub_expressions) > 1: for sub_expr in sub_expressions: - ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) + ret, should_abort = self.interpret_statement( + sub_expr, local_vars, allow_recursion, _is_var_declaration=_is_var_declaration) if should_abort: return ret, True return ret, False @@ -599,8 +610,12 @@ def dict_item(key, val): left_val = local_vars.get(m.group('out')) if not m.group('index'): - local_vars[m.group('out')] = self._operator( + eval_result = self._operator( m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion) + if _is_var_declaration: + local_vars.set_local(m.group('out'), eval_result) + else: + local_vars[m.group('out')] = eval_result return local_vars[m.group('out')], should_return elif left_val in (None, JS_Undefined): raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr) @@ -654,7 +669,18 @@ def dict_item(key, val): return float('NaN'), should_return elif m and m.group('return'): - return local_vars.get(m.group('name'), JS_Undefined), should_return + var = m.group('name') + # Declared variables + if _is_var_declaration: + ret = local_vars.get_local(var) + # Register varname in local namespace + # Set value as JS_Undefined or its pre-existing value + local_vars.set_local(var, ret) + else: + ret = local_vars.get(var, JS_Undefined) + if ret is JS_Undefined: + self._undefined_varnames.add(var) + return ret, should_return with contextlib.suppress(ValueError): return json.loads(js_to_json(expr, strict=True)), should_return