mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[youtube] Move swfinterp into its own file
This commit is contained in:
		| @@ -14,6 +14,7 @@ import zlib | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..jsinterp import JSInterpreter | ||||
| from ..swfinterp import SWFInterpreter | ||||
| from ..utils import ( | ||||
|     compat_chr, | ||||
|     compat_parse_qs, | ||||
| @@ -450,457 +451,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         return lambda s: initial_function([s]) | ||||
|  | ||||
|     def _parse_sig_swf(self, file_contents): | ||||
|         if file_contents[1:3] != b'WS': | ||||
|             raise ExtractorError( | ||||
|                 u'Not an SWF file; header is %r' % file_contents[:3]) | ||||
|         if file_contents[:1] == b'C': | ||||
|             content = zlib.decompress(file_contents[8:]) | ||||
|         else: | ||||
|             raise NotImplementedError(u'Unsupported compression format %r' % | ||||
|                                       file_contents[:1]) | ||||
|  | ||||
|         def extract_tags(content): | ||||
|             pos = 0 | ||||
|             while pos < len(content): | ||||
|                 header16 = struct.unpack('<H', content[pos:pos+2])[0] | ||||
|                 pos += 2 | ||||
|                 tag_code = header16 >> 6 | ||||
|                 tag_len = header16 & 0x3f | ||||
|                 if tag_len == 0x3f: | ||||
|                     tag_len = struct.unpack('<I', content[pos:pos+4])[0] | ||||
|                     pos += 4 | ||||
|                 assert pos+tag_len <= len(content) | ||||
|                 yield (tag_code, content[pos:pos+tag_len]) | ||||
|                 pos += tag_len | ||||
|  | ||||
|         code_tag = next(tag | ||||
|                         for tag_code, tag in extract_tags(content) | ||||
|                         if tag_code == 82) | ||||
|         p = code_tag.index(b'\0', 4) + 1 | ||||
|         code_reader = io.BytesIO(code_tag[p:]) | ||||
|  | ||||
|         # Parse ABC (AVM2 ByteCode) | ||||
|         def read_int(reader=None): | ||||
|             if reader is None: | ||||
|                 reader = code_reader | ||||
|             res = 0 | ||||
|             shift = 0 | ||||
|             for _ in range(5): | ||||
|                 buf = reader.read(1) | ||||
|                 assert len(buf) == 1 | ||||
|                 b = struct.unpack('<B', buf)[0] | ||||
|                 res = res | ((b & 0x7f) << shift) | ||||
|                 if b & 0x80 == 0: | ||||
|                     break | ||||
|                 shift += 7 | ||||
|             return res | ||||
|  | ||||
|         def u30(reader=None): | ||||
|             res = read_int(reader) | ||||
|             assert res & 0xf0000000 == 0 | ||||
|             return res | ||||
|         u32 = read_int | ||||
|  | ||||
|         def s32(reader=None): | ||||
|             v = read_int(reader) | ||||
|             if v & 0x80000000 != 0: | ||||
|                 v = - ((v ^ 0xffffffff) + 1) | ||||
|             return v | ||||
|  | ||||
|         def s24(reader): | ||||
|             bs = reader.read(3) | ||||
|             assert len(bs) == 3 | ||||
|             first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00' | ||||
|             return struct.unpack('!i', first_byte + bs) | ||||
|  | ||||
|         def read_string(reader=None): | ||||
|             if reader is None: | ||||
|                 reader = code_reader | ||||
|             slen = u30(reader) | ||||
|             resb = reader.read(slen) | ||||
|             assert len(resb) == slen | ||||
|             return resb.decode('utf-8') | ||||
|  | ||||
|         def read_bytes(count, reader=None): | ||||
|             if reader is None: | ||||
|                 reader = code_reader | ||||
|             resb = reader.read(count) | ||||
|             assert len(resb) == count | ||||
|             return resb | ||||
|  | ||||
|         def read_byte(reader=None): | ||||
|             resb = read_bytes(1, reader=reader) | ||||
|             res = struct.unpack('<B', resb)[0] | ||||
|             return res | ||||
|  | ||||
|         # minor_version + major_version | ||||
|         read_bytes(2 + 2) | ||||
|  | ||||
|         # Constant pool | ||||
|         int_count = u30() | ||||
|         for _c in range(1, int_count): | ||||
|             s32() | ||||
|         uint_count = u30() | ||||
|         for _c in range(1, uint_count): | ||||
|             u32() | ||||
|         double_count = u30() | ||||
|         read_bytes((double_count-1) * 8) | ||||
|         string_count = u30() | ||||
|         constant_strings = [u''] | ||||
|         for _c in range(1, string_count): | ||||
|             s = read_string() | ||||
|             constant_strings.append(s) | ||||
|         namespace_count = u30() | ||||
|         for _c in range(1, namespace_count): | ||||
|             read_bytes(1)  # kind | ||||
|             u30()  # name | ||||
|         ns_set_count = u30() | ||||
|         for _c in range(1, ns_set_count): | ||||
|             count = u30() | ||||
|             for _c2 in range(count): | ||||
|                 u30() | ||||
|         multiname_count = u30() | ||||
|         MULTINAME_SIZES = { | ||||
|             0x07: 2,  # QName | ||||
|             0x0d: 2,  # QNameA | ||||
|             0x0f: 1,  # RTQName | ||||
|             0x10: 1,  # RTQNameA | ||||
|             0x11: 0,  # RTQNameL | ||||
|             0x12: 0,  # RTQNameLA | ||||
|             0x09: 2,  # Multiname | ||||
|             0x0e: 2,  # MultinameA | ||||
|             0x1b: 1,  # MultinameL | ||||
|             0x1c: 1,  # MultinameLA | ||||
|         } | ||||
|         multinames = [u''] | ||||
|         for _c in range(1, multiname_count): | ||||
|             kind = u30() | ||||
|             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind | ||||
|             if kind == 0x07: | ||||
|                 u30()  # namespace_idx | ||||
|                 name_idx = u30() | ||||
|                 multinames.append(constant_strings[name_idx]) | ||||
|             else: | ||||
|                 multinames.append('[MULTINAME kind: %d]' % kind) | ||||
|                 for _c2 in range(MULTINAME_SIZES[kind]): | ||||
|                     u30() | ||||
|  | ||||
|         # Methods | ||||
|         method_count = u30() | ||||
|         MethodInfo = collections.namedtuple( | ||||
|             'MethodInfo', | ||||
|             ['NEED_ARGUMENTS', 'NEED_REST']) | ||||
|         method_infos = [] | ||||
|         for method_id in range(method_count): | ||||
|             param_count = u30() | ||||
|             u30()  # return type | ||||
|             for _ in range(param_count): | ||||
|                 u30()  # param type | ||||
|             u30()  # name index (always 0 for youtube) | ||||
|             flags = read_byte() | ||||
|             if flags & 0x08 != 0: | ||||
|                 # Options present | ||||
|                 option_count = u30() | ||||
|                 for c in range(option_count): | ||||
|                     u30()  # val | ||||
|                     read_bytes(1)  # kind | ||||
|             if flags & 0x80 != 0: | ||||
|                 # Param names present | ||||
|                 for _ in range(param_count): | ||||
|                     u30()  # param name | ||||
|             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | ||||
|             method_infos.append(mi) | ||||
|  | ||||
|         # Metadata | ||||
|         metadata_count = u30() | ||||
|         for _c in range(metadata_count): | ||||
|             u30()  # name | ||||
|             item_count = u30() | ||||
|             for _c2 in range(item_count): | ||||
|                 u30()  # key | ||||
|                 u30()  # value | ||||
|  | ||||
|         def parse_traits_info(): | ||||
|             trait_name_idx = u30() | ||||
|             kind_full = read_byte() | ||||
|             kind = kind_full & 0x0f | ||||
|             attrs = kind_full >> 4 | ||||
|             methods = {} | ||||
|             if kind in [0x00, 0x06]:  # Slot or Const | ||||
|                 u30()  # Slot id | ||||
|                 u30()  # type_name_idx | ||||
|                 vindex = u30() | ||||
|                 if vindex != 0: | ||||
|                     read_byte()  # vkind | ||||
|             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter | ||||
|                 u30()  # disp_id | ||||
|                 method_idx = u30() | ||||
|                 methods[multinames[trait_name_idx]] = method_idx | ||||
|             elif kind == 0x04:  # Class | ||||
|                 u30()  # slot_id | ||||
|                 u30()  # classi | ||||
|             elif kind == 0x05:  # Function | ||||
|                 u30()  # slot_id | ||||
|                 function_idx = u30() | ||||
|                 methods[function_idx] = multinames[trait_name_idx] | ||||
|             else: | ||||
|                 raise ExtractorError(u'Unsupported trait kind %d' % kind) | ||||
|  | ||||
|             if attrs & 0x4 != 0:  # Metadata present | ||||
|                 metadata_count = u30() | ||||
|                 for _c3 in range(metadata_count): | ||||
|                     u30()  # metadata index | ||||
|  | ||||
|             return methods | ||||
|  | ||||
|         class AVMClass(object): | ||||
|             def __init__(self, name_idx): | ||||
|                 self.name_idx = name_idx | ||||
|                 self.method_names = {} | ||||
|                 self.method_idxs = {} | ||||
|                 self.methods = {} | ||||
|                 self.method_pyfunctions = {} | ||||
|                 self.variables = {} | ||||
|  | ||||
|             @property | ||||
|             def name(self): | ||||
|                 return multinames[self.name_idx] | ||||
|  | ||||
|         # Classes | ||||
|         class_count = u30() | ||||
|         classes = [] | ||||
|         for class_id in range(class_count): | ||||
|             name_idx = u30() | ||||
|             classes.append(AVMClass(name_idx)) | ||||
|             u30()  # super_name idx | ||||
|             flags = read_byte() | ||||
|             if flags & 0x08 != 0:  # Protected namespace is present | ||||
|                 u30()  # protected_ns_idx | ||||
|             intrf_count = u30() | ||||
|             for _c2 in range(intrf_count): | ||||
|                 u30() | ||||
|             u30()  # iinit | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|         assert len(classes) == class_count | ||||
|  | ||||
|         swfi = SWFInterpreter(file_contents) | ||||
|         TARGET_CLASSNAME = u'SignatureDecipher' | ||||
|         searched_class = next( | ||||
|             c for c in classes if c.name == TARGET_CLASSNAME) | ||||
|         if searched_class is None: | ||||
|             raise ExtractorError(u'Target class %r not found' % | ||||
|                                  TARGET_CLASSNAME) | ||||
|  | ||||
|         for avm_class in classes: | ||||
|             u30()  # cinit | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 trait_methods = parse_traits_info() | ||||
|                 avm_class.method_names.update(trait_methods.items()) | ||||
|                 avm_class.method_idxs.update(dict( | ||||
|                     (idx, name) | ||||
|                     for name, idx in trait_methods.items())) | ||||
|  | ||||
|         # Scripts | ||||
|         script_count = u30() | ||||
|         for _c in range(script_count): | ||||
|             u30()  # init | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|  | ||||
|         # Method bodies | ||||
|         method_body_count = u30() | ||||
|         Method = collections.namedtuple('Method', ['code', 'local_count']) | ||||
|         for _c in range(method_body_count): | ||||
|             method_idx = u30() | ||||
|             u30()  # max_stack | ||||
|             local_count = u30() | ||||
|             u30()  # init_scope_depth | ||||
|             u30()  # max_scope_depth | ||||
|             code_length = u30() | ||||
|             code = read_bytes(code_length) | ||||
|             for avm_class in classes: | ||||
|                 if method_idx in avm_class.method_idxs: | ||||
|                     m = Method(code, local_count) | ||||
|                     avm_class.methods[avm_class.method_idxs[method_idx]] = m | ||||
|             exception_count = u30() | ||||
|             for _c2 in range(exception_count): | ||||
|                 u30()  # from | ||||
|                 u30()  # to | ||||
|                 u30()  # target | ||||
|                 u30()  # exc_type | ||||
|                 u30()  # var_name | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|  | ||||
|         assert p + code_reader.tell() == len(code_tag) | ||||
|  | ||||
|         def extract_function(avm_class, func_name): | ||||
|             if func_name in avm_class.method_pyfunctions: | ||||
|                 return avm_class.method_pyfunctions[func_name] | ||||
|             if func_name not in avm_class.methods: | ||||
|                 raise ExtractorError(u'Cannot find function %r' % func_name) | ||||
|             m = avm_class.methods[func_name] | ||||
|  | ||||
|             def resfunc(args): | ||||
|                 registers = ['(this)'] + list(args) + [None] * m.local_count | ||||
|                 stack = [] | ||||
|                 coder = io.BytesIO(m.code) | ||||
|                 while True: | ||||
|                     opcode = struct.unpack('!B', coder.read(1))[0] | ||||
|                     if opcode == 17:  # iftrue | ||||
|                         offset = s24(coder) | ||||
|                         value = stack.pop() | ||||
|                         if value: | ||||
|                             coder.seek(coder.tell() + offset) | ||||
|                     elif opcode == 36:  # pushbyte | ||||
|                         v = struct.unpack('!B', coder.read(1))[0] | ||||
|                         stack.append(v) | ||||
|                     elif opcode == 44:  # pushstring | ||||
|                         idx = u30(coder) | ||||
|                         stack.append(constant_strings[idx]) | ||||
|                     elif opcode == 48:  # pushscope | ||||
|                         # We don't implement the scope register, so we'll just | ||||
|                         # ignore the popped value | ||||
|                         stack.pop() | ||||
|                     elif opcode == 70:  # callproperty | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         arg_count = u30(coder) | ||||
|                         args = list(reversed( | ||||
|                             [stack.pop() for _ in range(arg_count)])) | ||||
|                         obj = stack.pop() | ||||
|                         if mname == u'split': | ||||
|                             assert len(args) == 1 | ||||
|                             assert isinstance(args[0], compat_str) | ||||
|                             assert isinstance(obj, compat_str) | ||||
|                             if args[0] == u'': | ||||
|                                 res = list(obj) | ||||
|                             else: | ||||
|                                 res = obj.split(args[0]) | ||||
|                             stack.append(res) | ||||
|                         elif mname == u'slice': | ||||
|                             assert len(args) == 1 | ||||
|                             assert isinstance(args[0], int) | ||||
|                             assert isinstance(obj, list) | ||||
|                             res = obj[args[0]:] | ||||
|                             stack.append(res) | ||||
|                         elif mname == u'join': | ||||
|                             assert len(args) == 1 | ||||
|                             assert isinstance(args[0], compat_str) | ||||
|                             assert isinstance(obj, list) | ||||
|                             res = args[0].join(obj) | ||||
|                             stack.append(res) | ||||
|                         elif mname in avm_class.method_pyfunctions: | ||||
|                             stack.append(avm_class.method_pyfunctions[mname](args)) | ||||
|                         else: | ||||
|                             raise NotImplementedError( | ||||
|                                 u'Unsupported property %r on %r' | ||||
|                                 % (mname, obj)) | ||||
|                     elif opcode == 72:  # returnvalue | ||||
|                         res = stack.pop() | ||||
|                         return res | ||||
|                     elif opcode == 79:  # callpropvoid | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         arg_count = u30(coder) | ||||
|                         args = list(reversed( | ||||
|                             [stack.pop() for _ in range(arg_count)])) | ||||
|                         obj = stack.pop() | ||||
|                         if mname == u'reverse': | ||||
|                             assert isinstance(obj, list) | ||||
|                             obj.reverse() | ||||
|                         else: | ||||
|                             raise NotImplementedError( | ||||
|                                 u'Unsupported (void) property %r on %r' | ||||
|                                 % (mname, obj)) | ||||
|                     elif opcode == 86:  # newarray | ||||
|                         arg_count = u30(coder) | ||||
|                         arr = [] | ||||
|                         for i in range(arg_count): | ||||
|                             arr.append(stack.pop()) | ||||
|                         arr = arr[::-1] | ||||
|                         stack.append(arr) | ||||
|                     elif opcode == 93:  # findpropstrict | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         res = extract_function(avm_class, mname) | ||||
|                         stack.append(res) | ||||
|                     elif opcode == 94:  # findproperty | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         res = avm_class.variables.get(mname) | ||||
|                         stack.append(res) | ||||
|                     elif opcode == 96:  # getlex | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         res = avm_class.variables.get(mname) | ||||
|                         stack.append(res) | ||||
|                     elif opcode == 97:  # setproperty | ||||
|                         index = u30(coder) | ||||
|                         value = stack.pop() | ||||
|                         idx = stack.pop() | ||||
|                         obj = stack.pop() | ||||
|                         assert isinstance(obj, list) | ||||
|                         assert isinstance(idx, int) | ||||
|                         obj[idx] = value | ||||
|                     elif opcode == 98:  # getlocal | ||||
|                         index = u30(coder) | ||||
|                         stack.append(registers[index]) | ||||
|                     elif opcode == 99:  # setlocal | ||||
|                         index = u30(coder) | ||||
|                         value = stack.pop() | ||||
|                         registers[index] = value | ||||
|                     elif opcode == 102:  # getproperty | ||||
|                         index = u30(coder) | ||||
|                         pname = multinames[index] | ||||
|                         if pname == u'length': | ||||
|                             obj = stack.pop() | ||||
|                             assert isinstance(obj, list) | ||||
|                             stack.append(len(obj)) | ||||
|                         else:  # Assume attribute access | ||||
|                             idx = stack.pop() | ||||
|                             assert isinstance(idx, int) | ||||
|                             obj = stack.pop() | ||||
|                             assert isinstance(obj, list) | ||||
|                             stack.append(obj[idx]) | ||||
|                     elif opcode == 128:  # coerce | ||||
|                         u30(coder) | ||||
|                     elif opcode == 133:  # coerce_s | ||||
|                         assert isinstance(stack[-1], (type(None), compat_str)) | ||||
|                     elif opcode == 164:  # modulo | ||||
|                         value2 = stack.pop() | ||||
|                         value1 = stack.pop() | ||||
|                         res = value1 % value2 | ||||
|                         stack.append(res) | ||||
|                     elif opcode == 175:  # greaterequals | ||||
|                         value2 = stack.pop() | ||||
|                         value1 = stack.pop() | ||||
|                         result = value1 >= value2 | ||||
|                         stack.append(result) | ||||
|                     elif opcode == 208:  # getlocal_0 | ||||
|                         stack.append(registers[0]) | ||||
|                     elif opcode == 209:  # getlocal_1 | ||||
|                         stack.append(registers[1]) | ||||
|                     elif opcode == 210:  # getlocal_2 | ||||
|                         stack.append(registers[2]) | ||||
|                     elif opcode == 211:  # getlocal_3 | ||||
|                         stack.append(registers[3]) | ||||
|                     elif opcode == 214:  # setlocal_2 | ||||
|                         registers[2] = stack.pop() | ||||
|                     elif opcode == 215:  # setlocal_3 | ||||
|                         registers[3] = stack.pop() | ||||
|                     else: | ||||
|                         raise NotImplementedError( | ||||
|                             u'Unsupported opcode %d' % opcode) | ||||
|  | ||||
|             avm_class.method_pyfunctions[func_name] = resfunc | ||||
|             return resfunc | ||||
|  | ||||
|         initial_function = extract_function(searched_class, u'decipher') | ||||
|         searched_class = swfi.extract_class(TARGET_CLASSNAME) | ||||
|         initial_function = swfi.extract_function(searched_class, u'decipher') | ||||
|         return lambda s: initial_function([s]) | ||||
|  | ||||
|     def _decrypt_signature(self, s, video_id, player_url, age_gate=False): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister