mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	Completely change project name to yt-dlp (#85)
* All modules and binary names are changed * All documentation references changed * yt-dlp no longer loads youtube-dlc config files * All URLs changed to point to organization account Co-authored-by: Pccode66 Co-authored-by: pukkandan
This commit is contained in:
		
							
								
								
									
										262
									
								
								yt_dlp/jsinterp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								yt_dlp/jsinterp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,262 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import operator | ||||
| import re | ||||
|  | ||||
| from .utils import ( | ||||
|     ExtractorError, | ||||
|     remove_quotes, | ||||
| ) | ||||
|  | ||||
| _OPERATORS = [ | ||||
|     ('|', operator.or_), | ||||
|     ('^', operator.xor), | ||||
|     ('&', operator.and_), | ||||
|     ('>>', operator.rshift), | ||||
|     ('<<', operator.lshift), | ||||
|     ('-', operator.sub), | ||||
|     ('+', operator.add), | ||||
|     ('%', operator.mod), | ||||
|     ('/', operator.truediv), | ||||
|     ('*', operator.mul), | ||||
| ] | ||||
| _ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS] | ||||
| _ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) | ||||
|  | ||||
| _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' | ||||
|  | ||||
|  | ||||
| class JSInterpreter(object): | ||||
|     def __init__(self, code, objects=None): | ||||
|         if objects is None: | ||||
|             objects = {} | ||||
|         self.code = code | ||||
|         self._functions = {} | ||||
|         self._objects = objects | ||||
|  | ||||
|     def interpret_statement(self, stmt, local_vars, allow_recursion=100): | ||||
|         if allow_recursion < 0: | ||||
|             raise ExtractorError('Recursion limit reached') | ||||
|  | ||||
|         should_abort = False | ||||
|         stmt = stmt.lstrip() | ||||
|         stmt_m = re.match(r'var\s', stmt) | ||||
|         if stmt_m: | ||||
|             expr = stmt[len(stmt_m.group(0)):] | ||||
|         else: | ||||
|             return_m = re.match(r'return(?:\s+|$)', stmt) | ||||
|             if return_m: | ||||
|                 expr = stmt[len(return_m.group(0)):] | ||||
|                 should_abort = True | ||||
|             else: | ||||
|                 # Try interpreting it as an expression | ||||
|                 expr = stmt | ||||
|  | ||||
|         v = self.interpret_expression(expr, local_vars, allow_recursion) | ||||
|         return v, should_abort | ||||
|  | ||||
|     def interpret_expression(self, expr, local_vars, allow_recursion): | ||||
|         expr = expr.strip() | ||||
|         if expr == '':  # Empty expression | ||||
|             return None | ||||
|  | ||||
|         if expr.startswith('('): | ||||
|             parens_count = 0 | ||||
|             for m in re.finditer(r'[()]', expr): | ||||
|                 if m.group(0) == '(': | ||||
|                     parens_count += 1 | ||||
|                 else: | ||||
|                     parens_count -= 1 | ||||
|                     if parens_count == 0: | ||||
|                         sub_expr = expr[1:m.start()] | ||||
|                         sub_result = self.interpret_expression( | ||||
|                             sub_expr, local_vars, allow_recursion) | ||||
|                         remaining_expr = expr[m.end():].strip() | ||||
|                         if not remaining_expr: | ||||
|                             return sub_result | ||||
|                         else: | ||||
|                             expr = json.dumps(sub_result) + remaining_expr | ||||
|                         break | ||||
|             else: | ||||
|                 raise ExtractorError('Premature end of parens in %r' % expr) | ||||
|  | ||||
|         for op, opfunc in _ASSIGN_OPERATORS: | ||||
|             m = re.match(r'''(?x) | ||||
|                 (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])? | ||||
|                 \s*%s | ||||
|                 (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr) | ||||
|             if not m: | ||||
|                 continue | ||||
|             right_val = self.interpret_expression( | ||||
|                 m.group('expr'), local_vars, allow_recursion - 1) | ||||
|  | ||||
|             if m.groupdict().get('index'): | ||||
|                 lvar = local_vars[m.group('out')] | ||||
|                 idx = self.interpret_expression( | ||||
|                     m.group('index'), local_vars, allow_recursion) | ||||
|                 assert isinstance(idx, int) | ||||
|                 cur = lvar[idx] | ||||
|                 val = opfunc(cur, right_val) | ||||
|                 lvar[idx] = val | ||||
|                 return val | ||||
|             else: | ||||
|                 cur = local_vars.get(m.group('out')) | ||||
|                 val = opfunc(cur, right_val) | ||||
|                 local_vars[m.group('out')] = val | ||||
|                 return val | ||||
|  | ||||
|         if expr.isdigit(): | ||||
|             return int(expr) | ||||
|  | ||||
|         var_m = re.match( | ||||
|             r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE, | ||||
|             expr) | ||||
|         if var_m: | ||||
|             return local_vars[var_m.group('name')] | ||||
|  | ||||
|         try: | ||||
|             return json.loads(expr) | ||||
|         except ValueError: | ||||
|             pass | ||||
|  | ||||
|         m = re.match( | ||||
|             r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr) | ||||
|         if m: | ||||
|             val = local_vars[m.group('in')] | ||||
|             idx = self.interpret_expression( | ||||
|                 m.group('idx'), local_vars, allow_recursion - 1) | ||||
|             return val[idx] | ||||
|  | ||||
|         m = re.match( | ||||
|             r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE, | ||||
|             expr) | ||||
|         if m: | ||||
|             variable = m.group('var') | ||||
|             member = remove_quotes(m.group('member') or m.group('member2')) | ||||
|             arg_str = m.group('args') | ||||
|  | ||||
|             if variable in local_vars: | ||||
|                 obj = local_vars[variable] | ||||
|             else: | ||||
|                 if variable not in self._objects: | ||||
|                     self._objects[variable] = self.extract_object(variable) | ||||
|                 obj = self._objects[variable] | ||||
|  | ||||
|             if arg_str is None: | ||||
|                 # Member access | ||||
|                 if member == 'length': | ||||
|                     return len(obj) | ||||
|                 return obj[member] | ||||
|  | ||||
|             assert expr.endswith(')') | ||||
|             # Function call | ||||
|             if arg_str == '': | ||||
|                 argvals = tuple() | ||||
|             else: | ||||
|                 argvals = tuple([ | ||||
|                     self.interpret_expression(v, local_vars, allow_recursion) | ||||
|                     for v in arg_str.split(',')]) | ||||
|  | ||||
|             if member == 'split': | ||||
|                 assert argvals == ('',) | ||||
|                 return list(obj) | ||||
|             if member == 'join': | ||||
|                 assert len(argvals) == 1 | ||||
|                 return argvals[0].join(obj) | ||||
|             if member == 'reverse': | ||||
|                 assert len(argvals) == 0 | ||||
|                 obj.reverse() | ||||
|                 return obj | ||||
|             if member == 'slice': | ||||
|                 assert len(argvals) == 1 | ||||
|                 return obj[argvals[0]:] | ||||
|             if member == 'splice': | ||||
|                 assert isinstance(obj, list) | ||||
|                 index, howMany = argvals | ||||
|                 res = [] | ||||
|                 for i in range(index, min(index + howMany, len(obj))): | ||||
|                     res.append(obj.pop(index)) | ||||
|                 return res | ||||
|  | ||||
|             return obj[member](argvals) | ||||
|  | ||||
|         for op, opfunc in _OPERATORS: | ||||
|             m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr) | ||||
|             if not m: | ||||
|                 continue | ||||
|             x, abort = self.interpret_statement( | ||||
|                 m.group('x'), local_vars, allow_recursion - 1) | ||||
|             if abort: | ||||
|                 raise ExtractorError( | ||||
|                     'Premature left-side return of %s in %r' % (op, expr)) | ||||
|             y, abort = self.interpret_statement( | ||||
|                 m.group('y'), local_vars, allow_recursion - 1) | ||||
|             if abort: | ||||
|                 raise ExtractorError( | ||||
|                     'Premature right-side return of %s in %r' % (op, expr)) | ||||
|             return opfunc(x, y) | ||||
|  | ||||
|         m = re.match( | ||||
|             r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) | ||||
|         if m: | ||||
|             fname = m.group('func') | ||||
|             argvals = tuple([ | ||||
|                 int(v) if v.isdigit() else local_vars[v] | ||||
|                 for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() | ||||
|             if fname not in self._functions: | ||||
|                 self._functions[fname] = self.extract_function(fname) | ||||
|             return self._functions[fname](argvals) | ||||
|  | ||||
|         raise ExtractorError('Unsupported JS expression %r' % expr) | ||||
|  | ||||
|     def extract_object(self, objname): | ||||
|         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' | ||||
|         obj = {} | ||||
|         obj_m = re.search( | ||||
|             r'''(?x) | ||||
|                 (?<!this\.)%s\s*=\s*{\s* | ||||
|                     (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) | ||||
|                 }\s*; | ||||
|             ''' % (re.escape(objname), _FUNC_NAME_RE), | ||||
|             self.code) | ||||
|         fields = obj_m.group('fields') | ||||
|         # Currently, it only supports function definitions | ||||
|         fields_m = re.finditer( | ||||
|             r'''(?x) | ||||
|                 (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)} | ||||
|             ''' % _FUNC_NAME_RE, | ||||
|             fields) | ||||
|         for f in fields_m: | ||||
|             argnames = f.group('args').split(',') | ||||
|             obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code')) | ||||
|  | ||||
|         return obj | ||||
|  | ||||
|     def extract_function(self, funcname): | ||||
|         func_m = re.search( | ||||
|             r'''(?x) | ||||
|                 (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* | ||||
|                 \((?P<args>[^)]*)\)\s* | ||||
|                 \{(?P<code>[^}]+)\}''' % ( | ||||
|                 re.escape(funcname), re.escape(funcname), re.escape(funcname)), | ||||
|             self.code) | ||||
|         if func_m is None: | ||||
|             raise ExtractorError('Could not find JS function %r' % funcname) | ||||
|         argnames = func_m.group('args').split(',') | ||||
|  | ||||
|         return self.build_function(argnames, func_m.group('code')) | ||||
|  | ||||
|     def call_function(self, funcname, *args): | ||||
|         f = self.extract_function(funcname) | ||||
|         return f(args) | ||||
|  | ||||
|     def build_function(self, argnames, code): | ||||
|         def resf(args): | ||||
|             local_vars = dict(zip(argnames, args)) | ||||
|             for stmt in code.split(';'): | ||||
|                 res, abort = self.interpret_statement(stmt, local_vars) | ||||
|                 if abort: | ||||
|                     break | ||||
|             return res | ||||
|         return resf | ||||
		Reference in New Issue
	
	Block a user
	 Pccode66
					Pccode66