diff --git a/README.md b/README.md index f8156c6..92fad6e 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,6 @@ Either run main.py and it will prompt you to enter the script's memory address, ### --ramfile path, -r path Path to the MEM1 RAM dump, defaults to 'ram.raw' in the current directory if not specified -### --outfile path, -o path -Path to the text file to store the result to (will print to console if not specified) - ### --address addr, -a addr Address of the script to disassemble, such as 80e4a688 for aa1_01_init_evt in PAL revision 0 @@ -62,6 +59,9 @@ Disabled: ### --ttyd, -t Changes opcodes and variable bases to their TTYD values +## --cpp, -c +Changes the output format to use C/C++ macros (C++ implementation in ttyd-tools, C imlpementation coming soon in decomp) + ## Potential Update Plans - Storing operand types for specific user_funcs for better disassembly diff --git a/binread.py b/binread.py index 7282995..8271400 100644 --- a/binread.py +++ b/binread.py @@ -1,36 +1,45 @@ -from config import config +# Binary Reader: reads binary data from a RAM dump into various forms + +from typing import List class BinaryReader: - def __init__(self, path): - self._f = open(path, 'rb') - - def __del__(self): - self._f.close() + def __init__(self, path: str): + with open(path, 'rb') as f: + self.dat = f.read() - def readat(self, addr, size): - self._f.seek(addr - 0x80000000) - return self._f.read(size) + # Address to ram dump offset + def addr_to_offs(self, addr: int) -> int: + return addr - 0x80000000 - def readatS(self, addr): - self._f.seek(addr - 0x80000000) + # Reads bytes at an address + def read(self, addr: int, size: int) -> bytes: + offs = self.addr_to_offs(addr) + return self.dat[offs:offs+size] + + # Reads a null-terminated SJIS string at an address + def read_str(self, addr: int) -> str: + offs = self.addr_to_offs(addr) strn = bytearray() while True: - c = self._f.read(1)[0] + c = self.dat[offs] if c == 0: break strn.append(c) + offs += 1 return strn.decode("shift-jis") - def readatI(self, addr, size): - return int.from_bytes(self.readat(addr, size), 'big') + # Reads an integer at an address + def read_int(self, addr: int, size: int) -> int: + return int.from_bytes(self.read(addr, size), "big") - def readatH(self, addr): - return self.readatI(addr, 2) + # Reads a halfword at an address + def read_half(self, addr: int) -> int: + return self.read_int(addr, 2) - def readatW(self, addr): - return self.readatI(addr, 4) + # Reads a word at an address + def read_word(self, addr: int) -> int: + return self.read_int(addr, 4) - def readatWA(self, addr, length): - return [self.readatW(addr + (i * 4)) for i in range(0, length)] - -ramReader = BinaryReader(config.dumpPath) + # Reads a word array at an address + def read_word_array(self, addr: int, length: int) -> List[int]: + return [self.read_word(addr + (i * 4)) for i in range(length)] diff --git a/config.py b/config.py index 8032490..74e62e6 100644 --- a/config.py +++ b/config.py @@ -1,74 +1,65 @@ +# Config: command line arguments for the program + import argparse class Config: def __init__(self): parser = argparse.ArgumentParser() parser.add_argument("--ramfile", "-r") - parser.add_argument("--outfile", "-o") parser.add_argument("--address", "-a") parser.add_argument("--map", "-m") parser.add_argument("--showstrings", "-s", action="store_true") parser.add_argument("--lineaddrs", "-l", action="store_true") parser.add_argument("--nopointer", '-n', action="store_true") parser.add_argument("--ttyd", "-t", action="store_true") + parser.add_argument("--cpp", "-c", action="store_true") args = parser.parse_args() # --ramfile path, -r path # Path to the MEM1 RAM dump if args.ramfile is not None: - self.dumpPath = args.ramfile - else: - self.dumpPath = "ram.raw" - - # --outfile path, -o path - # Disassembly is stored to a text file instead of being printed to the console - if args.outfile is not None: - self.toFile = True - self.outPath = args.outfile + self.dump_path = args.ramfile else: - self.toFile = False - self.outPath = None + self.dump_path = "ram.raw" # --address addr, -a addr # Address of the script to disassemble # Ex. 80e4a688 for aa1_01_init_evt if args.address is not None: - if args.address.startswith('0x'): - args.address = args.address[2:] self.addr = int(args.address, 16) else: - self.addr = int(input("addr: 0x"), 16) + self.addr = int(input("addr: "), 16) # --map path, -m path # Path to a symbol map, will be used # Ex. 80e4a688 for aa1_01_init_evt if args.map is not None: - self.useMap = True - self.mapPath = args.map + self.map_path = args.map else: - self.useMap = False - self.mapPath = None + self.map_path = None # --showstrings, -s # Prints the contents of a string instead of its address for supported instructions, currently can't re-assemble # enabled: debug_put_msg "aa1_01_init_evt" # disabled: debug_put_msg 80CAC958 - self.showStrings = args.showstrings + self.show_strings = args.showstrings # --lineaddrs, -l # Prints the memory address of an instruction at the start of the line # enabled: 80e4a688: debug_put_msg 0x80CAC958 # disabled: debug_put_msg 0x80CAC958 - self.showLineAddrs = args.lineaddrs + self.show_line_addrs = args.lineaddrs # --nopointer, -n # Prints 'ptr' instead of actual addresses, useful for comparing code from different builds # enabled: user_func ptr, 1, 1, ptr, 1073741824 # disabled: user_func 0x800eb72c, 1, 1, 0x80caa0d0, 1073741824 - self.noPointer = args.nopointer + self.no_pointer = args.nopointer # --ttyd, -t # Changes to TTYD values for opcodes and vriables self.spm = not args.ttyd -config = Config() + # --cpp, -c + # Changes output format to C++ preprocessor macros + self.cpp_macros = args.cpp diff --git a/disassembler.py b/disassembler.py new file mode 100644 index 0000000..1848fa0 --- /dev/null +++ b/disassembler.py @@ -0,0 +1,278 @@ +# Disassembler: converts binary evt scripts to text + +from binread import BinaryReader +from config import Config +from opcodes import OpcodeNamer +from symbols import SymbolMap + +from enum import Enum +import struct + +class Indenter: + def __init__(self, namer: OpcodeNamer): + self.opc = namer + + # Indentation definitions + # Unindentation happens before instruction, indentation after + # "Middle" indents unindent before and indent after + self.indents = { + "do", "if_str_equal", "if_str_not_equal", "if_str_small", "if_str_large", + "if_str_small_equal", "if_str_large_equal", "iff_equal", "iff_not_equal", + "iff_small", "iff_large", "iff_small_equal", "iff_large_equal", "if_equal", + "if_not_equal", "if_small", "if_large", "if_small_equal", "if_large_equal", + "if_flag", "if_not_flag", "inline_evt", "inline_evt_id", "brother_evt", + "brother_evt_id" + } + self.double_indents = { + "switch", "switchi" + } + self.middle_indents = { + "else", "case", "case_equal", "case_not_equal", "case_small", "case_large", + "case_small_equal", "case_large_equal", "case_etc", "case_or", "case_and", + "case_flag", "case_between" + } + self.unindents = { + "end_if", "end_inline", "while", "end_brother" + } + self.double_unindents = { + "end_switch" + } + self.indents |= self.middle_indents + self.unindents |= self.middle_indents + + # Opcode to indentation difference before instruction + def get_unindent(self, opc: int) -> int: + if self.opc.opc_to_name(opc) in self.double_unindents: + return 2 + elif self.opc.opc_to_name(opc) in self.unindents: + return 1 + else: + return 0 + + # Opcode to indentation difference after instruction + def get_indent(self, opc: int) -> int: + if self.opc.opc_to_name(opc) in self.double_indents: + return 2 + elif self.opc.opc_to_name(opc) in self.indents: + return 1 + else: + return 0 + +class OpType(Enum): + NORMAL = 0 + STRING = 1 + HEX = 2 + FUNC = 3 + +class Disassembler: + def __init__(self, ctx: Config): + self.ctx = ctx + self.opc = OpcodeNamer(self.ctx.spm, self.ctx.cpp_macros) + self.ram = BinaryReader(self.ctx.dump_path) + self.sym = SymbolMap(self.ctx.map_path, self.ctx.cpp_macros) + self.idt = Indenter(self.opc) + + # Special disassembly for certain operands + self.operand_type_defs = { + "if_str_equal" : [OpType.STRING, OpType.STRING], + "if_str_not_equal" : [OpType.STRING, OpType.STRING], + "if_str_small" : [OpType.STRING, OpType.STRING], + "if_str_large" : [OpType.STRING, OpType.STRING], + "if_str_small_equal" : [OpType.STRING, OpType.STRING], + "if_str_large_equal" : [OpType.STRING, OpType.STRING], + "if_flag" : [OpType.HEX, OpType.HEX], + "if_not_flag" : [OpType.HEX, OpType.HEX], + "case_flag" : [OpType.HEX], + "debug_put_msg" : [OpType.STRING], + "user_func" : [OpType.FUNC] + } + + # Data type definitions + self.type_bases = { + "Address" : -270000000, + "Float" : -240000000, + "UF" : -210000000, + "UW" : -190000000, + "GSW" : -170000000, + "LSW" : -150000000, + "GSWF" : -130000000, + "LSWF" : -110000000, + "GF" : -90000000, + "LF" : -70000000, + "GW" : -50000000, + "LW" : -30000000 + } + if not self.ctx.spm: + self.type_bases["Address"] = -250000000 + self.type_bases["Float"] = -230000000 + + # Disassembles a script at an address + def disassemble(self, addr: int) -> str: + # Indent inside EVT_BEGIN block for macro mode + if self.ctx.cpp_macros: + lines = ["EVT_BEGIN()"] + min_indent = 1 + indent = 1 + else: + lines = [] + min_indent = 0 + indent = 0 + + # Disassemble + ptr = addr + opc = 2 # any valid value that isn't end_script + while self.opc.opc_to_name(opc) != "end_script": + # halfword cmdn + # halfword cmd + # word[cmdn] data + count = self.ram.read_half(ptr) + opc = self.ram.read_half(ptr + 2) + data = self.ram.read_word_array(ptr + 4, count) + + # Convert line to text + line = self.disassemble_line(opc, data) + + # Unindent before instruction + # Limited with min_indent since the game sometimes puts too many end_if opcodes + indent -= self.idt.get_unindent(opc) + indent = max(min_indent, indent) + + # Macro mode needs to fully unindent when terminating + if self.ctx.cpp_macros and self.opc.opc_to_name(opc) == "end_script": + indent = 0 + + # Apply indentation to line + line = " " * indent + line + + # Indent after instruction + indent += self.idt.get_indent(opc) + + # Add address if enabled + if self.ctx.show_line_addrs: + line = f"{hex(ptr)[2:]}: {line}" + + # Append to output + lines.append(line) + + # Move to next instruction + ptr += 4 + (count * 4) + + return '\n'.join(lines) + + # Int to datatype + def get_type(self, val: int) -> int: + for t in self.type_bases: + # Special extent for address and float + if t == "Address": + if val <= self.type_bases[t]: + return t + elif t == "Float": + if val < self.type_bases["UF"]: + return t + else: + base = self.type_bases[t] + if base <= val <= base + 10000000: + return t + return "Immediate" + + # Uint to int + def sign(self, val: int) -> int: + return struct.unpack(">i", int.to_bytes(val, 4, "big"))[0] + + # Prints an address + def format_addr(self, addr: int) -> str: + if addr == self.type_bases["Address"]: + # ADDR(0) is used as a null pointer for some functions + if self.ctx.cpp_macros: + return "nullptr" + else: + return "EVT_NULLPTR" + else: + if self.ctx.no_pointer and not self.sym.has_name(addr): + # Hide bare addresses in noPointer mode + return "ptr" + else: + # Return symbol instead of address if known + return self.sym.get_name(addr) + + # Prints an operand + def operand_normal(self, val: int) -> str: + sval = self.sign(val) + t = self.get_type(sval) + if t == "Address": + return self.format_addr(val) + elif t == "Float": + return str((sval - self.type_bases["Float"]) / 1024) + elif t == "Immediate": + return str(sval) + else: + return f"{t}({sval - self.type_bases[t]})" + + # Prints a string address as its value if enabled + def operand_string(self, addr: int) -> str: + t = self.get_type(self.sign(addr)) + if t == "Address" and self.ctx.show_strings: + return f'"{self.ram.read_str(addr)}"' + else: + return self.operand_normal(addr) + + # Prints an immediate in hex (for flags) + def operand_hex(self, val: int) -> str: + t = self.get_type(self.sign(val)) + if t == "Immediate": + return hex(val) + else: + return self.operand_normal(val) + + # Disassembles one operand of a line + def disassemble_operand(self, val: int, op_t: OpType): + # Format based on type + if op_t == OpType.STRING: + ret = self.operand_string(val) + elif op_t == OpType.HEX: + ret = self.operand_hex(val) + else: # including OpType.FUNC, handled later + ret = self.operand_normal(val) + + # Add extra macro formatting if needed + if self.ctx.cpp_macros and op_t != OpType.FUNC: + val_t = self.get_type(self.sign(val)) + if val_t == "Address": + if op_t == OpType.STRING: + ret = f"PTR({ret})" + else: + ret = f"PTR(&{ret})" + elif val_t == "Float": + ret = f"FLOAT({ret})" + + return ret; + + # Disassembles one line of a script + def disassemble_line(self, opc, data): + # Add instruction name + instr = self.opc.opc_to_name(opc) + line = f"{self.opc.name_to_printing_name(instr)}" + + # Add operands + if len(data) > 0: + # Get operand type definitions + if instr in self.operand_type_defs: + types = self.operand_type_defs[self.opc.opc_to_name(opc)][:len(data)] + else: + types = [] + + # Default to normal operand if not defined + if len(types) < len(data): + types += [OpType.NORMAL for _ in range(len(data) - len(types))] + + # Format each operand by type + ops = [self.disassemble_operand(data[i], t) for i, t in enumerate(types)] + else: + ops = [] + + if self.ctx.cpp_macros: + line += '(' + ", ".join(ops) + ')' + else: + line += ' ' + ", ".join(ops) + + return line.strip() diff --git a/main.py b/main.py index fda96f6..a7a6d3a 100644 --- a/main.py +++ b/main.py @@ -1,37 +1,11 @@ -from config import config -from binread import ramReader -from opcodes import opcodes, opcodesR -from parsers import parseOperands, getIndent, getUnindent +from config import Config +from disassembler import Disassembler -ptr = config.addr -if config.toFile: - out = open(config.outPath, 'w', encoding="utf-8") +def main(): + ctx = Config() + dis = Disassembler(ctx) + txt = dis.disassemble(ctx.addr) + print(txt) -opc = 0 -indent = 0 -while opc != opcodesR["end_script"]: - # halfword cmdn - # halfword cmd - # word[cmdn] data - count = ramReader.readatH(ptr) - opc = ramReader.readatH(ptr + 2) - data = ramReader.readatWA(ptr + 4, count) - - line = f"{opcodes[opc]} {parseOperands(opc, data)}" - indent += getUnindent(opc) - if indent < 0: - indent = 0 # sometimes the game put too many end_ifs - line = ' ' * indent + line - indent += getIndent(opc) - - if config.showLineAddrs: - line = f"{hex(ptr)[2:]}: {line}" - if config.toFile: - out.write(line + '\n') - else: - print(line) - - ptr += 4 + (count * 4) - -if config.toFile: - out.close() +if __name__ == "__main__": + main() diff --git a/opcodes.py b/opcodes.py index cb31d2e..b3e57fa 100644 --- a/opcodes.py +++ b/opcodes.py @@ -1,134 +1,57 @@ -from config import config +# Opcode Namer: handles instruction names -opcodes = { - 0x1: "end_script", - 0x2: "end_evt", - 0x3: "lbl", - 0x4: "goto", - 0x5: "do", - 0x6: "while", - 0x7: "do_break", - 0x8: "do_continue", - 0x9: "wait_frm", - 0xa: "wait_msec", - 0xb: "halt", - 0xc: "if_str_equal", - 0xd: "if_str_not_equal", - 0xe: "if_str_small", - 0xf: "if_str_large", - 0x10: "if_str_small_equal", - 0x11: "if_str_large_equal", - 0x12: "iff_equal", - 0x13: "iff_not_equal", - 0x14: "iff_small", - 0x15: "iff_large", - 0x16: "iff_small_equal", - 0x17: "iff_large_equal", - 0x18: "if_equal", - 0x19: "if_not_equal", - 0x1a: "if_small", - 0x1b: "if_large", - 0x1c: "if_small_equal", - 0x1d: "if_large_equal", - 0x1e: "if_flag", - 0x1f: "if_not_flag", - 0x20: "else", - 0x21: "end_if", - 0x22: "switch", - 0x23: "switchi", - 0x24: "case_equal", - 0x25: "case_not_equal", - 0x26: "case_small", - 0x27: "case_large", - 0x28: "case_small_equal", - 0x29: "case_large_equal", - 0x2a: "case_etc", - 0x2b: "case_or", - 0x2c: "case_and", - 0x2d: "case_flag", - 0x2e: "case_end", - 0x2f: "case_between", - 0x30: "switch_break", - 0x31: "end_switch", - 0x32: "set", - 0x33: "seti", - 0x34: "setf", - 0x35: "add", - 0x36: "sub", - 0x37: "mul", - 0x38: "div", - 0x39: "mod", - 0x3a: "addf", - 0x3b: "subf", - 0x3c: "mulf", - 0x3d: "divf", - 0x3e: "set_read", - 0x3f: "read", - 0x40: "read2", - 0x41: "read3", - 0x42: "read4", - 0x43: "read_n", - 0x44: "set_readf", - 0x45: "readf", - 0x46: "readf2", - 0x47: "readf3", - 0x48: "readf4", - 0x49: "readf_n", - 0x4a: "clamp_int", - 0x4b: "set_user_wrk", - 0x4c: "set_user_flg", - 0x4d: "alloc_user_wrk", - 0x4e: "and", - 0x4f: "andi", - 0x50: "or", - 0x51: "ori", - 0x52: "set_frame_from_msec", - 0x53: "set_msec_from_frame", - 0x54: "set_ram", - 0x55: "set_ramf", - 0x56: "get_ram", - 0x57: "get_ramf", - 0x58: "setr", - 0x59: "setrf", - 0x5a: "getr", - 0x5b: "getrf", - 0x5c: "user_func", - 0x5d: "run_evt", - 0x5e: "run_evt_id", - 0x5f: "run_child_evt", - 0x60: "delete_evt", - 0x61: "restart_evt", - 0x62: "set_pri", - 0x63: "set_spd", - 0x64: "set_type", - 0x65: "stop_all", - 0x66: "start_all", - 0x67: "stop_other", - 0x68: "start_other", - 0x69: "stop_id", - 0x6a: "start_id", - 0x6b: "chk_evt", - 0x6c: "inline_evt", - 0x6d: "inline_evt_id", - 0x6e: "end_inline", - 0x6f: "brother_evt", - 0x70: "brother_evt_id", - 0x71: "end_brother", - 0x72: "debug_put_msg", - 0x73: "debug_msg_clear", - 0x74: "debug_put_reg", - 0x75: "debug_name", - 0x76: "debug_rem", - 0x77: "debug_bp" -} +class OpcodeNamer: + def __init__(self, spm: bool, cpp_macros: bool): + # Base list, starting from 1 + opcodes = [ + "end_script", "end_evt", "lbl", "goto", "do", "while", "do_break", + "do_continue", "wait_frm", "wait_msec", "halt", "if_str_equal", + "if_str_not_equal", "if_str_small", "if_str_large", "if_str_small_equal", + "if_str_large_equal", "iff_equal", "iff_not_equal", "iff_small", + "iff_large", "iff_small_equal", "iff_large_equal", "if_equal", + "if_not_equal", "if_small", "if_large", "if_small_equal", "if_large_equal", + "if_flag", "if_not_flag", "else", "end_if", "switch", "switchi", + "case_equal", "case_not_equal", "case_small", "case_large", "case_small_equal", + "case_large_equal", "case_etc", "case_or", "case_and", "case_flag", + "case_end", "case_between", "switch_break", "end_switch", "set", "seti", + "setf", "add", "sub", "mul", "div", "mod", "addf", "subf", "mulf", "divf", + "set_read", "read", "read2", "read3", "read4", "read_n", "set_readf", + "readf", "readf2", "readf3", "readf4", "readf_n", "clamp_int", "set_user_wrk", + "set_user_flg", "alloc_user_wrk", "and", "andi", "or", "ori", "set_frame_from_msec", + "set_msec_from_frame", "set_ram", "set_ramf", "get_ram", "get_ramf", "setr", + "setrf", "getr", "getrf", "user_func", "run_evt", "run_evt_id", "run_child_evt", + "delete_evt", "restart_evt", "set_pri", "set_spd", "set_type", "stop_all", + "start_all", "stop_other", "start_other", "stop_id", "start_id", "chk_evt", + "inline_evt", "inline_evt_id", "end_inline", "brother_evt", "brother_evt_id", + "end_brother", "debug_put_msg", "debug_msg_clear", "debug_put_reg", + "debug_name", "debug_rem", "debug_bp" + ] -if not config.spm: - del opcodes[0x4a] - for i in range(0x4a, 0x77): - opcodes[i] = opcodes[i+1] - del opcodes[0x77] + # Remove SPM-only opcode for TTYD + if not spm: + opcodes.remove("clamp_int") -opcodesR = {} -for opc in opcodes: - name = opcodes[opc] - opcodesR[name] = opc + # Build name and opcode dicts + self.names = {} + for i, name in enumerate(opcodes): + opc = i + 1 + self.names[opc] = name + + # Store macro flag + self.cpp_macros = cpp_macros + + # Gets the name for an opcode + def opc_to_name(self, opc: int) -> str: + return self.names[opc] + + # Considers macro mode for a name + def name_to_printing_name(self, name: str) -> str: + if self.cpp_macros: + if name == "end_evt": + return "RETURN" + elif name == "end_script": + return "EVT_END" + else: + return name.upper() + else: + return name diff --git a/parsers.py b/parsers.py deleted file mode 100644 index da8d89f..0000000 --- a/parsers.py +++ /dev/null @@ -1,167 +0,0 @@ -import struct -from enum import Enum -from config import config -from binread import ramReader -from opcodes import opcodes, opcodesR -from symbols import symbolMap - -# Indentation definitions -indents = ["do", "if_str_equal", "if_str_not_equal", "if_str_small", "if_str_large", "if_str_small_equal", "if_str_large_equal", "iff_equal", "iff_not_equal", "iff_small", "iff_large", "iff_small_equal", "iff_large_equal", "if_equal", "if_not_equal", "if_small", "if_large", "if_small_equal", "if_large_equal", "if_flag", "if_not_flag", "inline_evt", "inline_evt_id", "brother_evt", "brother_evt_id"] -doubleIndents = ["switch", "switchi"] -middleIndents = ["else", "case", "case_equal", "case_not_equal", "case_small", "case_large", "case_small_equal", "case_large_equal", "case_etc", "case_or", "case_and", "case_flag", "case_between"] -unindents = ["end_if", "end_inline", "while", "end_brother"] -doubleUnindents = ["end_switch"] -indents += middleIndents -unindents += middleIndents - -# Special disassembly for certain operands -class OpType(Enum): - NORMAL = 0 - STRING = 1 - HEX = 2 -operandTypeDefs = { - "if_str_equal" : [OpType.STRING, OpType.STRING], - "if_str_not_equal" : [OpType.STRING, OpType.STRING], - "if_str_small" : [OpType.STRING, OpType.STRING], - "if_str_large" : [OpType.STRING, OpType.STRING], - "if_str_small_equal" : [OpType.STRING, OpType.STRING], - "if_str_large_equal" : [OpType.STRING, OpType.STRING], - "if_flag" : [OpType.HEX, OpType.HEX], - "if_not_flag" : [OpType.HEX, OpType.HEX], - "case_flag" : [OpType.HEX], - "debug_put_msg" : [OpType.STRING] -} - -# Data type definitions -if config.spm: - typeBases = { - 'Address': -270000000, - 'Float': -240000000, - 'UF': -210000000, - 'UW': -190000000, - 'GSW': -170000000, - 'LSW': -150000000, - 'GSWF': -130000000, - 'LSWF': -110000000, - 'GF': -90000000, - 'LF': -70000000, - 'GW': -50000000, - 'LW': -30000000 - } -else: - typeBases = { - 'Address': -250000000, - 'Float': -230000000, - 'UF': -210000000, - 'UW': -190000000, - 'GSW': -170000000, - 'LSW': -150000000, - 'GSWF': -130000000, - 'LSWF': -110000000, - 'GF': -90000000, - 'LF': -70000000, - 'GW': -50000000, - 'LW': -30000000 - } - -# Opcode to indentation difference before instruction (-2, -1, 0) -def getUnindent(opc): - if opcodes[opc] in doubleUnindents: - return -2 - else: - return -1 * (opcodes[opc] in unindents) - -# Opcode to indentation difference after instruction (0, 1, 2) -def getIndent(opc): - if opcodes[opc] in doubleIndents: - return 2 - else: - return opcodes[opc] in indents - -# Uint to int -def sign(val): - return struct.unpack(">i", int.to_bytes(val, 4, 'big'))[0] - -# Int to datatype -def getType(val): - for t in typeBases: - if t == 'Address': - if val <= typeBases[t]: - return t - elif t == 'Float': - if val < typeBases['UF']: - return t - else: - base = typeBases[t] - if base <= val <= base + 10000000: - return t - return "Immediate" - -# Normal disassembler for operands -def normalOperand(val): - sval = sign(val) - t = getType(sval) - if t == 'Address': - if sval == typeBases[t]: - return "nullptr" - else: - if config.useMap and symbolMap.hasAddress(val): - return symbolMap.getName(val) - elif config.noPointer: - return "ptr" - else: - return hex(val) - elif t == 'Float': - return f"{(sval - typeBases['Float']) / 1024}" - elif t == 'Immediate': - return sval - else: - return f"{t}({sval - typeBases[t]})" - -# Print a string address as its value -def stringOperand(addr): - t = getType(sign(addr)) - if t == 'Address': - if config.showStrings: - return f'"{ramReader.readatS(addr)}"' - elif config.noPointer: - return "ptr" - else: - return hex(addr) - else: - return normalOperand(addr) - -# Print immediates in hex (for flags) -def hexOperand(val): - t = getType(sign(val)) - if t == 'Immediate': - return hex(val) - else: - return normalOperand(val) - -# Disassemble an operand list for a specific instruction -def parseOperands(opc, data): - if len(data) == 0: - return "" - - s = "" - - instr = opcodes[opc] - if instr in operandTypeDefs: - types = operandTypeDefs[opcodes[opc]] - for i, t in enumerate(types): - if i >= len(data): - break - if t == OpType.STRING: - s += f"{stringOperand(data[i])}, " - elif t == OpType.HEX: - s += f"{hexOperand(data[i])}, " - else: - s += f"{normalOperand(data[i])}, " - else: - i = -1 - - for d in data[i+1:]: - s += f"{normalOperand(d)}, " - - return s[:-2] diff --git a/symbols.py b/symbols.py index 17fa658..5f21bc1 100644 --- a/symbols.py +++ b/symbols.py @@ -1,37 +1,38 @@ -from config import config +# Symbol Map: accessor for data in a dolphin format symbol map class SymbolMap: - def __init__(self, path): - mapfile = open(path, 'r') - self._addrToName = {} - self._nameToAddr = {} - for line in mapfile.readlines(): - # addr size addr2 section(?) name - splt = line.split() - if len(splt) == 0: - continue - if not splt[0][0] == '8': - continue - addr = int(splt[0], 16) - name = splt[4] - #name = line[line.find(splt[4]):-1] # name can contain spaces so we can't just use split() - self._addrToName[addr] = name - self._nameToAddr[name] = addr - mapfile.close() + def __init__(self, path, cpp_macros): + self.names = {} + self.addresses = {} + if path is not None: + with open(path) as mapfile: + for line in mapfile.readlines(): + # addr size addr2 section(?) name + splt = line.split() + if len(splt) == 0: + continue + if not splt[0][0] == '8': + continue + addr = int(splt[0], 16) + name = splt[4] + self.names[addr] = name + self.addresses[name] = addr + self.cpp_macros = cpp_macros - def hasAddress(self, addr): - return addr in self._addrToName + def has_name(self, addr): + return addr in self.names - def hasName(self, name): - return name in self._nameToAddr + def has_address(self, name): + return name in self.addresses - def getName(self, addr): - return self._addrToName[addr] + def get_name(self, addr): + if self.has_name(addr): + return self.names[addr] + else: + if self.cpp_macros: + return f"unk_{addr:x}" + else: + return hex(addr) - def getAddress(self, name): - return self._nameToAddr[name] - -if config.useMap: - symbolMap = SymbolMap(config.mapPath) -else: - symbolMap = None + def get_address(self, name): + return self.addresses[name] diff --git a/test.py b/test.py new file mode 100644 index 0000000..7cd81e4 --- /dev/null +++ b/test.py @@ -0,0 +1,165 @@ +# Test: whole-program tests for various features + +from dataclasses import dataclass +from disassembler import Disassembler +from symbols import SymbolMap + +@dataclass +class DummyConfig: + show_strings: bool + show_line_addrs: bool + no_pointer: bool + cpp_macros: bool + map_path: str + dump_path: str + spm: bool + +def test_disasm( + addr, + show_strings = False, + show_line_addrs = False, + no_pointer = False, + cpp_macros = False, + map_path = "R8PP01.map", + dump_path = "ram.raw", + spm = True +): + ctx = DummyConfig(show_strings, show_line_addrs, no_pointer, cpp_macros, map_path, dump_path, spm) + dis = Disassembler(ctx) + return dis.disassemble(addr) + +# Lines with no operands +# Lines with operands +# String operands +# Indentation +# Named symbols +# Unnamed symbols +# String operands +# Line addresses +assert test_disasm(0x80d2f8c8, show_strings=True, show_line_addrs=True) == """80d2f8c8: user_func evt_sub_get_mapname, 0, LW(0) +80d2f8d8: if_str_equal LW(0), "mac_02" +80d2f8e4: run_child_evt 0x80d2f650 +80d2f8ec: end_if +80d2f8f0: if_str_equal LW(0), "mac_05" +80d2f8fc: run_child_evt 0x80d2f718 +80d2f904: end_if +80d2f908: if_str_equal LW(0), "mac_12" +80d2f914: run_child_evt 0x80d2f788 +80d2f91c: end_if +80d2f920: if_str_equal LW(0), "mac_15" +80d2f92c: run_child_evt 0x80d2f858 +80d2f934: end_if +80d2f938: end_evt +80d2f93c: end_script""" + +# Double and middle indents +# No pointer +# Immediate operands +# No symbol map +assert test_disasm(0x80cf8f90, no_pointer=True, map_path=None) == """user_func ptr +switch GSW(0) + case_between 174, 178 + user_func ptr, 1, 1, ptr, 1 + case_etc + user_func ptr, 1, 1, ptr, 1 +end_switch +set LW(0), 0 +switch GSW(0) + case_equal 59 + set LW(0), ptr + case_equal 131 + set LW(0), ptr + case_equal 288 + set LW(0), ptr + case_large_equal 424 + if_equal GSWF(584), 0 + set LW(0), ptr + end_if +end_switch +if_not_equal LW(0), 0 + do 0 + user_func ptr, 2, LW(1) + if_not_flag LW(1), 0x1 + do_break + end_if + wait_frm 1 + while + user_func ptr, 0 + run_evt LW(0) +end_if +end_evt +end_script""" + +# Float operands +assert test_disasm(0x80cf8e28) == """mulf LW(0), 0.4443359375 +user_func evt_mapobj_rotate, 0x80caa458, 0, LW(0), 0 +mulf LW(0), -1.0 +user_func evt_mapobj_rotate, 0x80caa460, 0, LW(0), 0 +end_evt +end_script""" + +# Hex operands +assert test_disasm(0x80d2b490) == """do 0 + user_func 0x800d4460, 0, LW(10) + if_flag LW(10), 0x200 + do_break + end_if + user_func evt_key_get_buttontrg, 0, LW(10) + if_flag LW(10), 0x300 + do_break + end_if + wait_frm 1 +while +user_func 0x80c4af10, 2, LW(10) +if_flag LW(10), 0x4 + end_evt +end_if +user_func 0x80c4af10, 1, 2 +user_func 0x80c4afc4, LW(0), 7, LW(10) +if_not_equal LW(10), 0 + run_child_evt LW(10) +end_if +user_func 0x800d231c, 0, 65, 500 +user_func 0x800d3528, 0, 30, 500 +user_func 0x800d3528, 1, 30, 500 +user_func evt_snd_flag_on, 32 +user_func 0x800e720c, 2, 1 +user_func 0x800e7268, 1000, 300 +user_func 0x80c4afc4, LW(0), 3, LW(1), LW(2) +user_func evt_seq_mapchange, LW(1), LW(2) +end_evt +end_script""" + +# C++ macros +assert test_disasm(0x80d2f8c8, show_strings=True, cpp_macros=True) == """EVT_BEGIN() + USER_FUNC(evt_sub_get_mapname, 0, LW(0)) + IF_STR_EQUAL(LW(0), PTR("mac_02")) + RUN_CHILD_EVT(PTR(&unk_80d2f650)) + END_IF() + IF_STR_EQUAL(LW(0), PTR("mac_05")) + RUN_CHILD_EVT(PTR(&unk_80d2f718)) + END_IF() + IF_STR_EQUAL(LW(0), PTR("mac_12")) + RUN_CHILD_EVT(PTR(&unk_80d2f788)) + END_IF() + IF_STR_EQUAL(LW(0), PTR("mac_15")) + RUN_CHILD_EVT(PTR(&unk_80d2f858)) + END_IF() + RETURN() +EVT_END()""" + +# C++ macros with floats +assert test_disasm(0x80cf8e28, cpp_macros=True) == """EVT_BEGIN() + MULF(LW(0), FLOAT(0.4443359375)) + USER_FUNC(evt_mapobj_rotate, PTR(&unk_80caa458), 0, LW(0), 0) + MULF(LW(0), FLOAT(-1.0)) + USER_FUNC(evt_mapobj_rotate, PTR(&unk_80caa460), 0, LW(0), 0) + RETURN() +EVT_END()""" + +# TODO: add tests for +# TTYD +# nullptr +# All data types +# All opcodes? +