mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[youtube] Move swfinterp into its own file
This commit is contained in:
		| @@ -45,6 +45,12 @@ _TESTS = [ | ||||
|         u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', | ||||
|         u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', | ||||
|     ), | ||||
|     ( | ||||
|         u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', | ||||
|         u'swf', | ||||
|         86, | ||||
|         u'23456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?#$%&\'()*+,-./:;<=>"' | ||||
|     ), | ||||
| ] | ||||
|  | ||||
|  | ||||
| @@ -57,12 +63,12 @@ class TestSignature(unittest.TestCase): | ||||
|  | ||||
|  | ||||
| def make_tfunc(url, stype, sig_input, expected_sig): | ||||
|     basename = url.rpartition('/')[2] | ||||
|     m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) | ||||
|     assert m, '%r should follow URL format' % basename | ||||
|     m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url) | ||||
|     assert m, '%r should follow URL format' % url | ||||
|     test_id = m.group(1) | ||||
|  | ||||
|     def test_func(self): | ||||
|         basename = 'player-%s.%s' % (test_id, stype) | ||||
|         fn = os.path.join(self.TESTDATA_DIR, basename) | ||||
|  | ||||
|         if not os.path.exists(fn): | ||||
|   | ||||
| @@ -14,6 +14,7 @@ import zlib | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..jsinterp import JSInterpreter | ||||
| from ..swfinterp import SWFInterpreter | ||||
| from ..utils import ( | ||||
|     compat_chr, | ||||
|     compat_parse_qs, | ||||
| @@ -450,457 +451,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         return lambda s: initial_function([s]) | ||||
|  | ||||
|     def _parse_sig_swf(self, file_contents): | ||||
|         if file_contents[1:3] != b'WS': | ||||
|             raise ExtractorError( | ||||
|                 u'Not an SWF file; header is %r' % file_contents[:3]) | ||||
|         if file_contents[:1] == b'C': | ||||
|             content = zlib.decompress(file_contents[8:]) | ||||
|         else: | ||||
|             raise NotImplementedError(u'Unsupported compression format %r' % | ||||
|                                       file_contents[:1]) | ||||
|  | ||||
|         def extract_tags(content): | ||||
|             pos = 0 | ||||
|             while pos < len(content): | ||||
|                 header16 = struct.unpack('<H', content[pos:pos+2])[0] | ||||
|                 pos += 2 | ||||
|                 tag_code = header16 >> 6 | ||||
|                 tag_len = header16 & 0x3f | ||||
|                 if tag_len == 0x3f: | ||||
|                     tag_len = struct.unpack('<I', content[pos:pos+4])[0] | ||||
|                     pos += 4 | ||||
|                 assert pos+tag_len <= len(content) | ||||
|                 yield (tag_code, content[pos:pos+tag_len]) | ||||
|                 pos += tag_len | ||||
|  | ||||
|         code_tag = next(tag | ||||
|                         for tag_code, tag in extract_tags(content) | ||||
|                         if tag_code == 82) | ||||
|         p = code_tag.index(b'\0', 4) + 1 | ||||
|         code_reader = io.BytesIO(code_tag[p:]) | ||||
|  | ||||
|         # Parse ABC (AVM2 ByteCode) | ||||
|         def read_int(reader=None): | ||||
|             if reader is None: | ||||
|                 reader = code_reader | ||||
|             res = 0 | ||||
|             shift = 0 | ||||
|             for _ in range(5): | ||||
|                 buf = reader.read(1) | ||||
|                 assert len(buf) == 1 | ||||
|                 b = struct.unpack('<B', buf)[0] | ||||
|                 res = res | ((b & 0x7f) << shift) | ||||
|                 if b & 0x80 == 0: | ||||
|                     break | ||||
|                 shift += 7 | ||||
|             return res | ||||
|  | ||||
|         def u30(reader=None): | ||||
|             res = read_int(reader) | ||||
|             assert res & 0xf0000000 == 0 | ||||
|             return res | ||||
|         u32 = read_int | ||||
|  | ||||
|         def s32(reader=None): | ||||
|             v = read_int(reader) | ||||
|             if v & 0x80000000 != 0: | ||||
|                 v = - ((v ^ 0xffffffff) + 1) | ||||
|             return v | ||||
|  | ||||
|         def s24(reader): | ||||
|             bs = reader.read(3) | ||||
|             assert len(bs) == 3 | ||||
|             first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00' | ||||
|             return struct.unpack('!i', first_byte + bs) | ||||
|  | ||||
|         def read_string(reader=None): | ||||
|             if reader is None: | ||||
|                 reader = code_reader | ||||
|             slen = u30(reader) | ||||
|             resb = reader.read(slen) | ||||
|             assert len(resb) == slen | ||||
|             return resb.decode('utf-8') | ||||
|  | ||||
|         def read_bytes(count, reader=None): | ||||
|             if reader is None: | ||||
|                 reader = code_reader | ||||
|             resb = reader.read(count) | ||||
|             assert len(resb) == count | ||||
|             return resb | ||||
|  | ||||
|         def read_byte(reader=None): | ||||
|             resb = read_bytes(1, reader=reader) | ||||
|             res = struct.unpack('<B', resb)[0] | ||||
|             return res | ||||
|  | ||||
|         # minor_version + major_version | ||||
|         read_bytes(2 + 2) | ||||
|  | ||||
|         # Constant pool | ||||
|         int_count = u30() | ||||
|         for _c in range(1, int_count): | ||||
|             s32() | ||||
|         uint_count = u30() | ||||
|         for _c in range(1, uint_count): | ||||
|             u32() | ||||
|         double_count = u30() | ||||
|         read_bytes((double_count-1) * 8) | ||||
|         string_count = u30() | ||||
|         constant_strings = [u''] | ||||
|         for _c in range(1, string_count): | ||||
|             s = read_string() | ||||
|             constant_strings.append(s) | ||||
|         namespace_count = u30() | ||||
|         for _c in range(1, namespace_count): | ||||
|             read_bytes(1)  # kind | ||||
|             u30()  # name | ||||
|         ns_set_count = u30() | ||||
|         for _c in range(1, ns_set_count): | ||||
|             count = u30() | ||||
|             for _c2 in range(count): | ||||
|                 u30() | ||||
|         multiname_count = u30() | ||||
|         MULTINAME_SIZES = { | ||||
|             0x07: 2,  # QName | ||||
|             0x0d: 2,  # QNameA | ||||
|             0x0f: 1,  # RTQName | ||||
|             0x10: 1,  # RTQNameA | ||||
|             0x11: 0,  # RTQNameL | ||||
|             0x12: 0,  # RTQNameLA | ||||
|             0x09: 2,  # Multiname | ||||
|             0x0e: 2,  # MultinameA | ||||
|             0x1b: 1,  # MultinameL | ||||
|             0x1c: 1,  # MultinameLA | ||||
|         } | ||||
|         multinames = [u''] | ||||
|         for _c in range(1, multiname_count): | ||||
|             kind = u30() | ||||
|             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind | ||||
|             if kind == 0x07: | ||||
|                 u30()  # namespace_idx | ||||
|                 name_idx = u30() | ||||
|                 multinames.append(constant_strings[name_idx]) | ||||
|             else: | ||||
|                 multinames.append('[MULTINAME kind: %d]' % kind) | ||||
|                 for _c2 in range(MULTINAME_SIZES[kind]): | ||||
|                     u30() | ||||
|  | ||||
|         # Methods | ||||
|         method_count = u30() | ||||
|         MethodInfo = collections.namedtuple( | ||||
|             'MethodInfo', | ||||
|             ['NEED_ARGUMENTS', 'NEED_REST']) | ||||
|         method_infos = [] | ||||
|         for method_id in range(method_count): | ||||
|             param_count = u30() | ||||
|             u30()  # return type | ||||
|             for _ in range(param_count): | ||||
|                 u30()  # param type | ||||
|             u30()  # name index (always 0 for youtube) | ||||
|             flags = read_byte() | ||||
|             if flags & 0x08 != 0: | ||||
|                 # Options present | ||||
|                 option_count = u30() | ||||
|                 for c in range(option_count): | ||||
|                     u30()  # val | ||||
|                     read_bytes(1)  # kind | ||||
|             if flags & 0x80 != 0: | ||||
|                 # Param names present | ||||
|                 for _ in range(param_count): | ||||
|                     u30()  # param name | ||||
|             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | ||||
|             method_infos.append(mi) | ||||
|  | ||||
|         # Metadata | ||||
|         metadata_count = u30() | ||||
|         for _c in range(metadata_count): | ||||
|             u30()  # name | ||||
|             item_count = u30() | ||||
|             for _c2 in range(item_count): | ||||
|                 u30()  # key | ||||
|                 u30()  # value | ||||
|  | ||||
|         def parse_traits_info(): | ||||
|             trait_name_idx = u30() | ||||
|             kind_full = read_byte() | ||||
|             kind = kind_full & 0x0f | ||||
|             attrs = kind_full >> 4 | ||||
|             methods = {} | ||||
|             if kind in [0x00, 0x06]:  # Slot or Const | ||||
|                 u30()  # Slot id | ||||
|                 u30()  # type_name_idx | ||||
|                 vindex = u30() | ||||
|                 if vindex != 0: | ||||
|                     read_byte()  # vkind | ||||
|             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter | ||||
|                 u30()  # disp_id | ||||
|                 method_idx = u30() | ||||
|                 methods[multinames[trait_name_idx]] = method_idx | ||||
|             elif kind == 0x04:  # Class | ||||
|                 u30()  # slot_id | ||||
|                 u30()  # classi | ||||
|             elif kind == 0x05:  # Function | ||||
|                 u30()  # slot_id | ||||
|                 function_idx = u30() | ||||
|                 methods[function_idx] = multinames[trait_name_idx] | ||||
|             else: | ||||
|                 raise ExtractorError(u'Unsupported trait kind %d' % kind) | ||||
|  | ||||
|             if attrs & 0x4 != 0:  # Metadata present | ||||
|                 metadata_count = u30() | ||||
|                 for _c3 in range(metadata_count): | ||||
|                     u30()  # metadata index | ||||
|  | ||||
|             return methods | ||||
|  | ||||
|         class AVMClass(object): | ||||
|             def __init__(self, name_idx): | ||||
|                 self.name_idx = name_idx | ||||
|                 self.method_names = {} | ||||
|                 self.method_idxs = {} | ||||
|                 self.methods = {} | ||||
|                 self.method_pyfunctions = {} | ||||
|                 self.variables = {} | ||||
|  | ||||
|             @property | ||||
|             def name(self): | ||||
|                 return multinames[self.name_idx] | ||||
|  | ||||
|         # Classes | ||||
|         class_count = u30() | ||||
|         classes = [] | ||||
|         for class_id in range(class_count): | ||||
|             name_idx = u30() | ||||
|             classes.append(AVMClass(name_idx)) | ||||
|             u30()  # super_name idx | ||||
|             flags = read_byte() | ||||
|             if flags & 0x08 != 0:  # Protected namespace is present | ||||
|                 u30()  # protected_ns_idx | ||||
|             intrf_count = u30() | ||||
|             for _c2 in range(intrf_count): | ||||
|                 u30() | ||||
|             u30()  # iinit | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|         assert len(classes) == class_count | ||||
|  | ||||
|         swfi = SWFInterpreter(file_contents) | ||||
|         TARGET_CLASSNAME = u'SignatureDecipher' | ||||
|         searched_class = next( | ||||
|             c for c in classes if c.name == TARGET_CLASSNAME) | ||||
|         if searched_class is None: | ||||
|             raise ExtractorError(u'Target class %r not found' % | ||||
|                                  TARGET_CLASSNAME) | ||||
|  | ||||
|         for avm_class in classes: | ||||
|             u30()  # cinit | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 trait_methods = parse_traits_info() | ||||
|                 avm_class.method_names.update(trait_methods.items()) | ||||
|                 avm_class.method_idxs.update(dict( | ||||
|                     (idx, name) | ||||
|                     for name, idx in trait_methods.items())) | ||||
|  | ||||
|         # Scripts | ||||
|         script_count = u30() | ||||
|         for _c in range(script_count): | ||||
|             u30()  # init | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|  | ||||
|         # Method bodies | ||||
|         method_body_count = u30() | ||||
|         Method = collections.namedtuple('Method', ['code', 'local_count']) | ||||
|         for _c in range(method_body_count): | ||||
|             method_idx = u30() | ||||
|             u30()  # max_stack | ||||
|             local_count = u30() | ||||
|             u30()  # init_scope_depth | ||||
|             u30()  # max_scope_depth | ||||
|             code_length = u30() | ||||
|             code = read_bytes(code_length) | ||||
|             for avm_class in classes: | ||||
|                 if method_idx in avm_class.method_idxs: | ||||
|                     m = Method(code, local_count) | ||||
|                     avm_class.methods[avm_class.method_idxs[method_idx]] = m | ||||
|             exception_count = u30() | ||||
|             for _c2 in range(exception_count): | ||||
|                 u30()  # from | ||||
|                 u30()  # to | ||||
|                 u30()  # target | ||||
|                 u30()  # exc_type | ||||
|                 u30()  # var_name | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|  | ||||
|         assert p + code_reader.tell() == len(code_tag) | ||||
|  | ||||
|         def extract_function(avm_class, func_name): | ||||
|             if func_name in avm_class.method_pyfunctions: | ||||
|                 return avm_class.method_pyfunctions[func_name] | ||||
|             if func_name not in avm_class.methods: | ||||
|                 raise ExtractorError(u'Cannot find function %r' % func_name) | ||||
|             m = avm_class.methods[func_name] | ||||
|  | ||||
|             def resfunc(args): | ||||
|                 registers = ['(this)'] + list(args) + [None] * m.local_count | ||||
|                 stack = [] | ||||
|                 coder = io.BytesIO(m.code) | ||||
|                 while True: | ||||
|                     opcode = struct.unpack('!B', coder.read(1))[0] | ||||
|                     if opcode == 17:  # iftrue | ||||
|                         offset = s24(coder) | ||||
|                         value = stack.pop() | ||||
|                         if value: | ||||
|                             coder.seek(coder.tell() + offset) | ||||
|                     elif opcode == 36:  # pushbyte | ||||
|                         v = struct.unpack('!B', coder.read(1))[0] | ||||
|                         stack.append(v) | ||||
|                     elif opcode == 44:  # pushstring | ||||
|                         idx = u30(coder) | ||||
|                         stack.append(constant_strings[idx]) | ||||
|                     elif opcode == 48:  # pushscope | ||||
|                         # We don't implement the scope register, so we'll just | ||||
|                         # ignore the popped value | ||||
|                         stack.pop() | ||||
|                     elif opcode == 70:  # callproperty | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         arg_count = u30(coder) | ||||
|                         args = list(reversed( | ||||
|                             [stack.pop() for _ in range(arg_count)])) | ||||
|                         obj = stack.pop() | ||||
|                         if mname == u'split': | ||||
|                             assert len(args) == 1 | ||||
|                             assert isinstance(args[0], compat_str) | ||||
|                             assert isinstance(obj, compat_str) | ||||
|                             if args[0] == u'': | ||||
|                                 res = list(obj) | ||||
|                             else: | ||||
|                                 res = obj.split(args[0]) | ||||
|                             stack.append(res) | ||||
|                         elif mname == u'slice': | ||||
|                             assert len(args) == 1 | ||||
|                             assert isinstance(args[0], int) | ||||
|                             assert isinstance(obj, list) | ||||
|                             res = obj[args[0]:] | ||||
|                             stack.append(res) | ||||
|                         elif mname == u'join': | ||||
|                             assert len(args) == 1 | ||||
|                             assert isinstance(args[0], compat_str) | ||||
|                             assert isinstance(obj, list) | ||||
|                             res = args[0].join(obj) | ||||
|                             stack.append(res) | ||||
|                         elif mname in avm_class.method_pyfunctions: | ||||
|                             stack.append(avm_class.method_pyfunctions[mname](args)) | ||||
|                         else: | ||||
|                             raise NotImplementedError( | ||||
|                                 u'Unsupported property %r on %r' | ||||
|                                 % (mname, obj)) | ||||
|                     elif opcode == 72:  # returnvalue | ||||
|                         res = stack.pop() | ||||
|                         return res | ||||
|                     elif opcode == 79:  # callpropvoid | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         arg_count = u30(coder) | ||||
|                         args = list(reversed( | ||||
|                             [stack.pop() for _ in range(arg_count)])) | ||||
|                         obj = stack.pop() | ||||
|                         if mname == u'reverse': | ||||
|                             assert isinstance(obj, list) | ||||
|                             obj.reverse() | ||||
|                         else: | ||||
|                             raise NotImplementedError( | ||||
|                                 u'Unsupported (void) property %r on %r' | ||||
|                                 % (mname, obj)) | ||||
|                     elif opcode == 86:  # newarray | ||||
|                         arg_count = u30(coder) | ||||
|                         arr = [] | ||||
|                         for i in range(arg_count): | ||||
|                             arr.append(stack.pop()) | ||||
|                         arr = arr[::-1] | ||||
|                         stack.append(arr) | ||||
|                     elif opcode == 93:  # findpropstrict | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         res = extract_function(avm_class, mname) | ||||
|                         stack.append(res) | ||||
|                     elif opcode == 94:  # findproperty | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         res = avm_class.variables.get(mname) | ||||
|                         stack.append(res) | ||||
|                     elif opcode == 96:  # getlex | ||||
|                         index = u30(coder) | ||||
|                         mname = multinames[index] | ||||
|                         res = avm_class.variables.get(mname) | ||||
|                         stack.append(res) | ||||
|                     elif opcode == 97:  # setproperty | ||||
|                         index = u30(coder) | ||||
|                         value = stack.pop() | ||||
|                         idx = stack.pop() | ||||
|                         obj = stack.pop() | ||||
|                         assert isinstance(obj, list) | ||||
|                         assert isinstance(idx, int) | ||||
|                         obj[idx] = value | ||||
|                     elif opcode == 98:  # getlocal | ||||
|                         index = u30(coder) | ||||
|                         stack.append(registers[index]) | ||||
|                     elif opcode == 99:  # setlocal | ||||
|                         index = u30(coder) | ||||
|                         value = stack.pop() | ||||
|                         registers[index] = value | ||||
|                     elif opcode == 102:  # getproperty | ||||
|                         index = u30(coder) | ||||
|                         pname = multinames[index] | ||||
|                         if pname == u'length': | ||||
|                             obj = stack.pop() | ||||
|                             assert isinstance(obj, list) | ||||
|                             stack.append(len(obj)) | ||||
|                         else:  # Assume attribute access | ||||
|                             idx = stack.pop() | ||||
|                             assert isinstance(idx, int) | ||||
|                             obj = stack.pop() | ||||
|                             assert isinstance(obj, list) | ||||
|                             stack.append(obj[idx]) | ||||
|                     elif opcode == 128:  # coerce | ||||
|                         u30(coder) | ||||
|                     elif opcode == 133:  # coerce_s | ||||
|                         assert isinstance(stack[-1], (type(None), compat_str)) | ||||
|                     elif opcode == 164:  # modulo | ||||
|                         value2 = stack.pop() | ||||
|                         value1 = stack.pop() | ||||
|                         res = value1 % value2 | ||||
|                         stack.append(res) | ||||
|                     elif opcode == 175:  # greaterequals | ||||
|                         value2 = stack.pop() | ||||
|                         value1 = stack.pop() | ||||
|                         result = value1 >= value2 | ||||
|                         stack.append(result) | ||||
|                     elif opcode == 208:  # getlocal_0 | ||||
|                         stack.append(registers[0]) | ||||
|                     elif opcode == 209:  # getlocal_1 | ||||
|                         stack.append(registers[1]) | ||||
|                     elif opcode == 210:  # getlocal_2 | ||||
|                         stack.append(registers[2]) | ||||
|                     elif opcode == 211:  # getlocal_3 | ||||
|                         stack.append(registers[3]) | ||||
|                     elif opcode == 214:  # setlocal_2 | ||||
|                         registers[2] = stack.pop() | ||||
|                     elif opcode == 215:  # setlocal_3 | ||||
|                         registers[3] = stack.pop() | ||||
|                     else: | ||||
|                         raise NotImplementedError( | ||||
|                             u'Unsupported opcode %d' % opcode) | ||||
|  | ||||
|             avm_class.method_pyfunctions[func_name] = resfunc | ||||
|             return resfunc | ||||
|  | ||||
|         initial_function = extract_function(searched_class, u'decipher') | ||||
|         searched_class = swfi.extract_class(TARGET_CLASSNAME) | ||||
|         initial_function = swfi.extract_function(searched_class, u'decipher') | ||||
|         return lambda s: initial_function([s]) | ||||
|  | ||||
|     def _decrypt_signature(self, s, video_id, player_url, age_gate=False): | ||||
|   | ||||
							
								
								
									
										503
									
								
								youtube_dl/swfinterp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										503
									
								
								youtube_dl/swfinterp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,503 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import collections | ||||
| import io | ||||
| import struct | ||||
| import zlib | ||||
|  | ||||
| from .utils import ExtractorError | ||||
|  | ||||
|  | ||||
| def _extract_tags(content): | ||||
|     pos = 0 | ||||
|     while pos < len(content): | ||||
|         header16 = struct.unpack('<H', content[pos:pos + 2])[0] | ||||
|         pos += 2 | ||||
|         tag_code = header16 >> 6 | ||||
|         tag_len = header16 & 0x3f | ||||
|         if tag_len == 0x3f: | ||||
|             tag_len = struct.unpack('<I', content[pos:pos + 4])[0] | ||||
|             pos += 4 | ||||
|         assert pos + tag_len <= len(content) | ||||
|         yield (tag_code, content[pos:pos + tag_len]) | ||||
|         pos += tag_len | ||||
|  | ||||
|  | ||||
| class _AVMClass_Object(object): | ||||
|     def __init__(self, avm_class): | ||||
|         self.avm_class = avm_class | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return '%s#%x' % (self.avm_class.name, id(self)) | ||||
|  | ||||
|  | ||||
| class _AVMClass(object): | ||||
|     def __init__(self, name_idx, name): | ||||
|         self.name_idx = name_idx | ||||
|         self.name = name | ||||
|         self.method_names = {} | ||||
|         self.method_idxs = {} | ||||
|         self.methods = {} | ||||
|         self.method_pyfunctions = {} | ||||
|         self.variables = {} | ||||
|  | ||||
|     def make_object(self): | ||||
|         return _AVMClass_Object(self) | ||||
|  | ||||
|  | ||||
| def _read_int(reader): | ||||
|     res = 0 | ||||
|     shift = 0 | ||||
|     for _ in range(5): | ||||
|         buf = reader.read(1) | ||||
|         assert len(buf) == 1 | ||||
|         b = struct.unpack('<B', buf)[0] | ||||
|         res = res | ((b & 0x7f) << shift) | ||||
|         if b & 0x80 == 0: | ||||
|             break | ||||
|         shift += 7 | ||||
|     return res | ||||
|  | ||||
|  | ||||
| def _u30(reader): | ||||
|     res = _read_int(reader) | ||||
|     assert res & 0xf0000000 == 0 | ||||
|     return res | ||||
| u32 = _read_int | ||||
|  | ||||
|  | ||||
| def _s32(reader): | ||||
|     v = _read_int(reader) | ||||
|     if v & 0x80000000 != 0: | ||||
|         v = - ((v ^ 0xffffffff) + 1) | ||||
|     return v | ||||
|  | ||||
|  | ||||
| def _s24(reader): | ||||
|     bs = reader.read(3) | ||||
|     assert len(bs) == 3 | ||||
|     first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00' | ||||
|     return struct.unpack('!i', first_byte + bs) | ||||
|  | ||||
|  | ||||
| def _read_string(reader): | ||||
|     slen = _u30(reader) | ||||
|     resb = reader.read(slen) | ||||
|     assert len(resb) == slen | ||||
|     return resb.decode('utf-8') | ||||
|  | ||||
|  | ||||
| def _read_bytes(count, reader): | ||||
|     if reader is None: | ||||
|         reader = code_reader | ||||
|     resb = reader.read(count) | ||||
|     assert len(resb) == count | ||||
|     return resb | ||||
|  | ||||
|  | ||||
| def _read_byte(reader): | ||||
|     resb = _read_bytes(1, reader=reader) | ||||
|     res = struct.unpack('<B', resb)[0] | ||||
|     return res | ||||
|  | ||||
|  | ||||
| class SWFInterpreter(object): | ||||
|     def __init__(self, file_contents): | ||||
|         if file_contents[1:3] != b'WS': | ||||
|             raise ExtractorError( | ||||
|                 'Not an SWF file; header is %r' % file_contents[:3]) | ||||
|         if file_contents[:1] == b'C': | ||||
|             content = zlib.decompress(file_contents[8:]) | ||||
|         else: | ||||
|             raise NotImplementedError( | ||||
|                 'Unsupported compression format %r' % | ||||
|                 file_contents[:1]) | ||||
|  | ||||
|         code_tag = next(tag | ||||
|                         for tag_code, tag in _extract_tags(content) | ||||
|                         if tag_code == 82) | ||||
|         p = code_tag.index(b'\0', 4) + 1 | ||||
|         code_reader = io.BytesIO(code_tag[p:]) | ||||
|  | ||||
|         # Parse ABC (AVM2 ByteCode) | ||||
|  | ||||
|         # Define a couple convenience methods | ||||
|         u30 = lambda *args: _u30(*args, reader=code_reader) | ||||
|         s32 = lambda *args: _s32(*args, reader=code_reader) | ||||
|         u32 = lambda *args: _u32(*args, reader=code_reader) | ||||
|         read_bytes = lambda *args: _read_bytes(*args, reader=code_reader) | ||||
|         read_byte = lambda *args: _read_byte(*args, reader=code_reader) | ||||
|  | ||||
|         # minor_version + major_version | ||||
|         read_bytes(2 + 2) | ||||
|  | ||||
|         # Constant pool | ||||
|         int_count = u30() | ||||
|         for _c in range(1, int_count): | ||||
|             s32() | ||||
|         uint_count = u30() | ||||
|         for _c in range(1, uint_count): | ||||
|             u32() | ||||
|         double_count = u30() | ||||
|         read_bytes((double_count - 1) * 8) | ||||
|         string_count = u30() | ||||
|         constant_strings = [''] | ||||
|         for _c in range(1, string_count): | ||||
|             s = _read_string(code_reader) | ||||
|             constant_strings.append(s) | ||||
|         namespace_count = u30() | ||||
|         for _c in range(1, namespace_count): | ||||
|             read_bytes(1)  # kind | ||||
|             u30()  # name | ||||
|         ns_set_count = u30() | ||||
|         for _c in range(1, ns_set_count): | ||||
|             count = u30() | ||||
|             for _c2 in range(count): | ||||
|                 u30() | ||||
|         multiname_count = u30() | ||||
|         MULTINAME_SIZES = { | ||||
|             0x07: 2,  # QName | ||||
|             0x0d: 2,  # QNameA | ||||
|             0x0f: 1,  # RTQName | ||||
|             0x10: 1,  # RTQNameA | ||||
|             0x11: 0,  # RTQNameL | ||||
|             0x12: 0,  # RTQNameLA | ||||
|             0x09: 2,  # Multiname | ||||
|             0x0e: 2,  # MultinameA | ||||
|             0x1b: 1,  # MultinameL | ||||
|             0x1c: 1,  # MultinameLA | ||||
|         } | ||||
|         self.multinames = [''] | ||||
|         for _c in range(1, multiname_count): | ||||
|             kind = u30() | ||||
|             assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind | ||||
|             if kind == 0x07: | ||||
|                 u30()  # namespace_idx | ||||
|                 name_idx = u30() | ||||
|                 self.multinames.append(constant_strings[name_idx]) | ||||
|             else: | ||||
|                 self.multinames.append('[MULTINAME kind: %d]' % kind) | ||||
|                 for _c2 in range(MULTINAME_SIZES[kind]): | ||||
|                     u30() | ||||
|  | ||||
|         # Methods | ||||
|         method_count = u30() | ||||
|         MethodInfo = collections.namedtuple( | ||||
|             'MethodInfo', | ||||
|             ['NEED_ARGUMENTS', 'NEED_REST']) | ||||
|         method_infos = [] | ||||
|         for method_id in range(method_count): | ||||
|             param_count = u30() | ||||
|             u30()  # return type | ||||
|             for _ in range(param_count): | ||||
|                 u30()  # param type | ||||
|             u30()  # name index (always 0 for youtube) | ||||
|             flags = read_byte() | ||||
|             if flags & 0x08 != 0: | ||||
|                 # Options present | ||||
|                 option_count = u30() | ||||
|                 for c in range(option_count): | ||||
|                     u30()  # val | ||||
|                     read_bytes(1)  # kind | ||||
|             if flags & 0x80 != 0: | ||||
|                 # Param names present | ||||
|                 for _ in range(param_count): | ||||
|                     u30()  # param name | ||||
|             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | ||||
|             method_infos.append(mi) | ||||
|  | ||||
|         # Metadata | ||||
|         metadata_count = u30() | ||||
|         for _c in range(metadata_count): | ||||
|             u30()  # name | ||||
|             item_count = u30() | ||||
|             for _c2 in range(item_count): | ||||
|                 u30()  # key | ||||
|                 u30()  # value | ||||
|  | ||||
|         def parse_traits_info(): | ||||
|             trait_name_idx = u30() | ||||
|             kind_full = read_byte() | ||||
|             kind = kind_full & 0x0f | ||||
|             attrs = kind_full >> 4 | ||||
|             methods = {} | ||||
|             if kind in [0x00, 0x06]:  # Slot or Const | ||||
|                 u30()  # Slot id | ||||
|                 u30()  # type_name_idx | ||||
|                 vindex = u30() | ||||
|                 if vindex != 0: | ||||
|                     read_byte()  # vkind | ||||
|             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter | ||||
|                 u30()  # disp_id | ||||
|                 method_idx = u30() | ||||
|                 methods[self.multinames[trait_name_idx]] = method_idx | ||||
|             elif kind == 0x04:  # Class | ||||
|                 u30()  # slot_id | ||||
|                 u30()  # classi | ||||
|             elif kind == 0x05:  # Function | ||||
|                 u30()  # slot_id | ||||
|                 function_idx = u30() | ||||
|                 methods[function_idx] = self.multinames[trait_name_idx] | ||||
|             else: | ||||
|                 raise ExtractorError('Unsupported trait kind %d' % kind) | ||||
|  | ||||
|             if attrs & 0x4 != 0:  # Metadata present | ||||
|                 metadata_count = u30() | ||||
|                 for _c3 in range(metadata_count): | ||||
|                     u30()  # metadata index | ||||
|  | ||||
|             return methods | ||||
|  | ||||
|         # Classes | ||||
|         class_count = u30() | ||||
|         classes = [] | ||||
|         for class_id in range(class_count): | ||||
|             name_idx = u30() | ||||
|             classes.append(_AVMClass(name_idx, self.multinames[name_idx])) | ||||
|             u30()  # super_name idx | ||||
|             flags = read_byte() | ||||
|             if flags & 0x08 != 0:  # Protected namespace is present | ||||
|                 u30()  # protected_ns_idx | ||||
|             intrf_count = u30() | ||||
|             for _c2 in range(intrf_count): | ||||
|                 u30() | ||||
|             u30()  # iinit | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|         assert len(classes) == class_count | ||||
|         self._classes_by_name = dict((c.name, c) for c in classes) | ||||
|  | ||||
|         for avm_class in classes: | ||||
|             u30()  # cinit | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 trait_methods = parse_traits_info() | ||||
|                 avm_class.method_names.update(trait_methods.items()) | ||||
|                 avm_class.method_idxs.update(dict( | ||||
|                     (idx, name) | ||||
|                     for name, idx in trait_methods.items())) | ||||
|  | ||||
|         # Scripts | ||||
|         script_count = u30() | ||||
|         for _c in range(script_count): | ||||
|             u30()  # init | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|  | ||||
|         # Method bodies | ||||
|         method_body_count = u30() | ||||
|         Method = collections.namedtuple('Method', ['code', 'local_count']) | ||||
|         for _c in range(method_body_count): | ||||
|             method_idx = u30() | ||||
|             u30()  # max_stack | ||||
|             local_count = u30() | ||||
|             u30()  # init_scope_depth | ||||
|             u30()  # max_scope_depth | ||||
|             code_length = u30() | ||||
|             code = read_bytes(code_length) | ||||
|             for avm_class in classes: | ||||
|                 if method_idx in avm_class.method_idxs: | ||||
|                     m = Method(code, local_count) | ||||
|                     avm_class.methods[avm_class.method_idxs[method_idx]] = m | ||||
|             exception_count = u30() | ||||
|             for _c2 in range(exception_count): | ||||
|                 u30()  # from | ||||
|                 u30()  # to | ||||
|                 u30()  # target | ||||
|                 u30()  # exc_type | ||||
|                 u30()  # var_name | ||||
|             trait_count = u30() | ||||
|             for _c2 in range(trait_count): | ||||
|                 parse_traits_info() | ||||
|  | ||||
|         assert p + code_reader.tell() == len(code_tag) | ||||
|  | ||||
|     def extract_class(self, class_name): | ||||
|         try: | ||||
|             return self._classes_by_name[class_name] | ||||
|         except KeyError: | ||||
|             raise ExtractorError('Class %r not found' % class_name) | ||||
|  | ||||
|     def extract_function(self, avm_class, func_name): | ||||
|         if func_name in avm_class.method_pyfunctions: | ||||
|             return avm_class.method_pyfunctions[func_name] | ||||
|         if func_name in self._classes_by_name: | ||||
|             return self._classes_by_name[func_name].make_object() | ||||
|         if func_name not in avm_class.methods: | ||||
|             raise ExtractorError('Cannot find function %r' % func_name) | ||||
|         m = avm_class.methods[func_name] | ||||
|  | ||||
|         def resfunc(args): | ||||
|             # Helper functions | ||||
|             coder = io.BytesIO(m.code) | ||||
|             s24 = lambda: _s24(coder) | ||||
|             u30 = lambda: _u30(coder) | ||||
|  | ||||
|             print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) | ||||
|             registers = ['(this)'] + list(args) + [None] * m.local_count | ||||
|             stack = [] | ||||
|             while True: | ||||
|                 opcode = _read_byte(coder) | ||||
|                 print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) | ||||
|                 if opcode == 17:  # iftrue | ||||
|                     offset = s24() | ||||
|                     value = stack.pop() | ||||
|                     if value: | ||||
|                         coder.seek(coder.tell() + offset) | ||||
|                 elif opcode == 36:  # pushbyte | ||||
|                     v = _read_byte(coder) | ||||
|                     stack.append(v) | ||||
|                 elif opcode == 44:  # pushstring | ||||
|                     idx = u30() | ||||
|                     stack.append(constant_strings[idx]) | ||||
|                 elif opcode == 48:  # pushscope | ||||
|                     # We don't implement the scope register, so we'll just | ||||
|                     # ignore the popped value | ||||
|                     new_scope = stack.pop() | ||||
|                 elif opcode == 70:  # callproperty | ||||
|                     index = u30() | ||||
|                     mname = self.multinames[index] | ||||
|                     arg_count = u30() | ||||
|                     args = list(reversed( | ||||
|                         [stack.pop() for _ in range(arg_count)])) | ||||
|                     obj = stack.pop() | ||||
|                     if mname == 'split': | ||||
|                         assert len(args) == 1 | ||||
|                         assert isinstance(args[0], compat_str) | ||||
|                         assert isinstance(obj, compat_str) | ||||
|                         if args[0] == '': | ||||
|                             res = list(obj) | ||||
|                         else: | ||||
|                             res = obj.split(args[0]) | ||||
|                         stack.append(res) | ||||
|                     elif mname == 'slice': | ||||
|                         assert len(args) == 1 | ||||
|                         assert isinstance(args[0], int) | ||||
|                         assert isinstance(obj, list) | ||||
|                         res = obj[args[0]:] | ||||
|                         stack.append(res) | ||||
|                     elif mname == 'join': | ||||
|                         assert len(args) == 1 | ||||
|                         assert isinstance(args[0], compat_str) | ||||
|                         assert isinstance(obj, list) | ||||
|                         res = args[0].join(obj) | ||||
|                         stack.append(res) | ||||
|                     elif mname in avm_class.method_pyfunctions: | ||||
|                         stack.append(avm_class.method_pyfunctions[mname](args)) | ||||
|                     else: | ||||
|                         raise NotImplementedError( | ||||
|                             'Unsupported property %r on %r' | ||||
|                             % (mname, obj)) | ||||
|                 elif opcode == 72:  # returnvalue | ||||
|                     res = stack.pop() | ||||
|                     return res | ||||
|                 elif opcode == 74:  # constructproperty | ||||
|                     index = u30() | ||||
|                     arg_count = u30() | ||||
|                     args = list(reversed( | ||||
|                         [stack.pop() for _ in range(arg_count)])) | ||||
|                     obj = stack.pop() | ||||
|  | ||||
|                     mname = self.multinames[index] | ||||
|                     construct_method = self.extract_function( | ||||
|                         obj.avm_class, mname) | ||||
|                     # We do not actually call the constructor for now; | ||||
|                     # we just pretend it does nothing | ||||
|                     stack.append(obj) | ||||
|                 elif opcode == 79:  # callpropvoid | ||||
|                     index = u30() | ||||
|                     mname = self.multinames[index] | ||||
|                     arg_count = u30() | ||||
|                     args = list(reversed( | ||||
|                         [stack.pop() for _ in range(arg_count)])) | ||||
|                     obj = stack.pop() | ||||
|                     if mname == 'reverse': | ||||
|                         assert isinstance(obj, list) | ||||
|                         obj.reverse() | ||||
|                     else: | ||||
|                         raise NotImplementedError( | ||||
|                             'Unsupported (void) property %r on %r' | ||||
|                             % (mname, obj)) | ||||
|                 elif opcode == 86:  # newarray | ||||
|                     arg_count = u30() | ||||
|                     arr = [] | ||||
|                     for i in range(arg_count): | ||||
|                         arr.append(stack.pop()) | ||||
|                     arr = arr[::-1] | ||||
|                     stack.append(arr) | ||||
|                 elif opcode == 93:  # findpropstrict | ||||
|                     index = u30() | ||||
|                     mname = self.multinames[index] | ||||
|                     res = self.extract_function(avm_class, mname) | ||||
|                     stack.append(res) | ||||
|                 elif opcode == 94:  # findproperty | ||||
|                     index = u30() | ||||
|                     mname = self.multinames[index] | ||||
|                     res = avm_class.variables.get(mname) | ||||
|                     stack.append(res) | ||||
|                 elif opcode == 96:  # getlex | ||||
|                     index = u30() | ||||
|                     mname = self.multinames[index] | ||||
|                     res = avm_class.variables.get(mname, None) | ||||
|                     stack.append(res) | ||||
|                 elif opcode == 97:  # setproperty | ||||
|                     index = u30() | ||||
|                     value = stack.pop() | ||||
|                     idx = self.multinames[index] | ||||
|                     obj = stack.pop() | ||||
|                     obj[idx] = value | ||||
|                 elif opcode == 98:  # getlocal | ||||
|                     index = u30() | ||||
|                     stack.append(registers[index]) | ||||
|                 elif opcode == 99:  # setlocal | ||||
|                     index = u30() | ||||
|                     value = stack.pop() | ||||
|                     registers[index] = value | ||||
|                 elif opcode == 102:  # getproperty | ||||
|                     index = u30() | ||||
|                     pname = self.multinames[index] | ||||
|                     if pname == 'length': | ||||
|                         obj = stack.pop() | ||||
|                         assert isinstance(obj, list) | ||||
|                         stack.append(len(obj)) | ||||
|                     else:  # Assume attribute access | ||||
|                         idx = stack.pop() | ||||
|                         assert isinstance(idx, int) | ||||
|                         obj = stack.pop() | ||||
|                         assert isinstance(obj, list) | ||||
|                         stack.append(obj[idx]) | ||||
|                 elif opcode == 128:  # coerce | ||||
|                     u30() | ||||
|                 elif opcode == 133:  # coerce_s | ||||
|                     assert isinstance(stack[-1], (type(None), compat_str)) | ||||
|                 elif opcode == 164:  # modulo | ||||
|                     value2 = stack.pop() | ||||
|                     value1 = stack.pop() | ||||
|                     res = value1 % value2 | ||||
|                     stack.append(res) | ||||
|                 elif opcode == 175:  # greaterequals | ||||
|                     value2 = stack.pop() | ||||
|                     value1 = stack.pop() | ||||
|                     result = value1 >= value2 | ||||
|                     stack.append(result) | ||||
|                 elif opcode == 208:  # getlocal_0 | ||||
|                     stack.append(registers[0]) | ||||
|                 elif opcode == 209:  # getlocal_1 | ||||
|                     stack.append(registers[1]) | ||||
|                 elif opcode == 210:  # getlocal_2 | ||||
|                     stack.append(registers[2]) | ||||
|                 elif opcode == 211:  # getlocal_3 | ||||
|                     stack.append(registers[3]) | ||||
|                 elif opcode == 214:  # setlocal_2 | ||||
|                     registers[2] = stack.pop() | ||||
|                 elif opcode == 215:  # setlocal_3 | ||||
|                     registers[3] = stack.pop() | ||||
|                 else: | ||||
|                     raise NotImplementedError( | ||||
|                         'Unsupported opcode %d' % opcode) | ||||
|  | ||||
|         avm_class.method_pyfunctions[func_name] = resfunc | ||||
|         return resfunc | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister