diff --git a/.gitignore b/.gitignore index 992ebb24..ffe0d7b4 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,6 @@ rz_hexagon.egg-info/ Hexagon.json .config .last_llvm_commit_info -venv/ -.venv -/rizin/ +.venv/ +.vscode +rizin/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..d289d8d0 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "rzil_compiler"] + path = rzil_compiler + url = git@github.com:rizinorg/rz-rzilcompiler.git diff --git a/.reuse/dep5 b/.reuse/dep5 index dafeadd6..de9e6a91 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -23,6 +23,10 @@ Files: .gitignore Copyright: 2021 RizinOrg License: LGPL-3.0-only +Files: .gitmodules +Copyright: 2022 Rot127 +License: LGPL-3.0-only + Files: .pylintrc Copyright: 2021 RizinOrg License: LGPL-3.0-only @@ -54,3 +58,7 @@ License: LGPL-3.0-only Files: import/* Copyright: 2022 Rot127 License: LGPL-3.0-only + +Files: handwritten/*.json +Copyright: 2023 Rot127 +License: LGPL-3.0-only diff --git a/Conf.py b/Conf.py new file mode 100644 index 00000000..cb129118 --- /dev/null +++ b/Conf.py @@ -0,0 +1,75 @@ +# SPDX-FileCopyrightText: 2022 Rot127 +# SPDX-License-Identifier: LGPL-3.0-only + +import subprocess + +from enum import StrEnum +from pathlib import Path + +from helperFunctions import log + + +class OutputFile(StrEnum): + """ + Enum of paths used by the components. + + is replaced with the path to the repositories root. + is replaced with the architecture name. + """ + + OUT_BASE = "/rizin/" + LIBRZ_DIR = "/rizin/librz/" + IL_OPS_DIR = "/rizin/librz/arch/isa/hexagon/il_ops/" + + ANA_TESTS = "/rizin/test/db/analysis/hexagon" + ASM_TESTS = "/rizin/test/db/asm/hexagon" + RZIL_TESTS = "/rizin/test/db/rzil/hexagon" + ANALYSIS_HEXAGON_C = "/rizin/librz/arch/p/analysis/analysis_hexagon.c" + ASM_HEXAGON_C = "/rizin/librz/arch/p/asm/asm_hexagon.c" + CC_HEXAGON_32_SDB_TXT = "/rizin/librz/arch/types/cc-hexagon-32.sdb.txt" + HEXAGON_IL_C = "/rizin/librz/arch/isa/hexagon/hexagon_il.c" + HEXAGON_IL_GETTER_TABLE_H = "/rizin/librz/arch/isa/hexagon/hexagon_il_getter_table.h" + HEXAGON_IL_H = "/rizin/librz/arch/isa/hexagon/hexagon_il.h" + HEXAGON_ARCH_C = "/rizin/librz/arch/isa/hexagon/hexagon_arch.c" + HEXAGON_ARCH_H = "/rizin/librz/arch/isa/hexagon/hexagon_arch.h" + HEXAGON_C = "/rizin/librz/arch/isa/hexagon/hexagon.c" + HEXAGON_DISAS_C = "/rizin/librz/arch/isa/hexagon/hexagon_disas.c" + HEXAGON_H = "/rizin/librz/arch/isa/hexagon/hexagon.h" + HEXAGON_INSN_H = "/rizin/librz/arch/isa/hexagon/hexagon_insn.h" + HEXAGON_REG_TABLES_H = "/rizin/librz/arch/isa/hexagon/hexagon_reg_tables.h" + HEXAGON_DWARF_REG_TABLE_H = "/rizin/librz/arch/isa/hexagon/hexagon_dwarf_reg_num_table.inc" + + +class Conf: + """ + Holds all the configurable values like paths. + """ + + @staticmethod + def replace_placeholders(path_str: str) -> str: + if "" in path_str: + root = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + check=True, + stdout=subprocess.PIPE, + ) + root_dir = Path(root.stdout.decode("utf8").strip("\n")) + if not root_dir.exists(): + raise NotADirectoryError(str(root_dir)) + + path_str = path_str.replace("", str(root_dir)) + return path_str + + @staticmethod + def get_path(file: OutputFile) -> Path: + return Path(Conf.replace_placeholders(file)) + + @staticmethod + def check_path(path: Path, is_file: bool = True) -> None: + """Checks a given path and creates the directory if it doesn't exist.""" + if not path.exists(): + target = path + if is_file: + target = path.parent + log(f"Create dir {str(target)}") + target.mkdir(parents=True, exist_ok=True) diff --git a/HardwareRegister.py b/HardwareRegister.py index 12d2acec..c90081c8 100644 --- a/HardwareRegister.py +++ b/HardwareRegister.py @@ -55,6 +55,10 @@ def set_well_defined_asm_names(self, llvm_asm: str, llvm_alt: list): match_alias = re.search(r"^[rcpgvqs]\d{1,2}(:\d{1,2})?$", ",".join(llvm_alt)) if (llvm_asm == "p3:0") or (llvm_asm in llvm_alt): match_asm = None + if (llvm_asm in llvm_alt) and len(llvm_alt) == 1: + # Alias of some regs equal asm name. + self.asm_name = llvm_asm + self.alias = [] if match_asm and match_alias: raise ImplementationException( "HW reg alias and asm names match same pattern: alias: {} asm: {}".format(",".join(llvm_alt), llvm_asm) diff --git a/Immediate.py b/Immediate.py index a40d1243..024f6f13 100644 --- a/Immediate.py +++ b/Immediate.py @@ -62,10 +62,15 @@ def __init__( self.encoding_width = 0 # Num. bits stored in encoding. self.total_width = 0 - self.parse_imm_type(llvm_type) - - def parse_imm_type(self, llvm_imm_type: str) -> None: - """Parse immediate types like: u4_2Imm. This method sets all kinds of flags, the scale and total width.""" + self.parse_imm_type(llvm_type, llvm_syntax == "II") + + def parse_imm_type(self, llvm_imm_type: str, is_second: bool) -> None: + """Parse immediate types like: u4_2Imm. This method sets + all kinds of flags, the scale, total width and ISA identifier. + Args: + llvm_imm_type: The llvm type string (e.g.: u4_2Imm). + is_second: Flag if this immediate is the second immediate in the instruction. + """ type_letter = re.search(r"^([a-z]+)\d{1,2}", llvm_imm_type) if not type_letter: raise ImplementationException("Unhandled immediate type: {}".format(llvm_imm_type)) @@ -80,6 +85,7 @@ def parse_imm_type(self, llvm_imm_type: str) -> None: elif type_letter == "a" or type_letter == "b": self.is_signed = True self.is_pc_relative = True + type_letter = "r" # In QEMUs shortcode all PC relative immediates are named with 'r' # Constant value -1 elif type_letter == "n": self.is_signed = True @@ -95,6 +101,7 @@ def parse_imm_type(self, llvm_imm_type: str) -> None: return else: raise ImplementationException("Unhandled immediate type: {}".format(llvm_imm_type)) + self.isa_id = type_letter.upper() if is_second else type_letter # Value before _ represents number of encoded bits. result = re.search(r"[a-z](\d+)\_", llvm_imm_type) @@ -147,7 +154,8 @@ def c_template(self, force_extendable=False) -> str: if self.total_width == 32: info.append("HEX_OP_TEMPLATE_FLAG_IMM_DOUBLE_HASH") info = " | ".join(info) - r = f".info = {info}, .masks = {{ {self.opcode_mask.c_template} }}" + r = f".info = {info}, .masks = {{ {self.opcode_mask.c_template} }}, " + r += f".isa_id = '{self.isa_id if self.isa_id != '' else 0}'" if self.scale > 0: r += f", .imm_scale = {self.scale}" return r diff --git a/InstructionTemplate.py b/InstructionTemplate.py index 614223b8..f3bc600c 100644 --- a/InstructionTemplate.py +++ b/InstructionTemplate.py @@ -8,6 +8,7 @@ import HexagonArchInfo import PluginInfo +from rzilcompiler.Compiler import RZILInstruction from Immediate import Immediate from ImplementationException import ImplementationException from InstructionEncoding import InstructionEncoding @@ -87,7 +88,7 @@ def __init__(self, llvm_instruction): self.llvm_new_operand_index: bool = None self.is_predicated: bool = False self.is_pred_new: bool = False - self.is_pred_false: bool = False # Duplex can have both, true and false predicates. + self.is_pred_false: bool = False self.is_pred_true: bool = False # Special @@ -97,6 +98,8 @@ def __init__(self, llvm_instruction): self.is_loop_begin: bool = None self.loop_member = None + self.il_ops: RZILInstruction = None + # Execution specific (Interesting for decompiler plugin) # The address mode of load/store instructions self.addr_mode = None @@ -216,6 +219,8 @@ def parse_instruction(self) -> None: # Indices of new values (stored in "opNewValue") are only for non predicates. is_new_value = self.new_operand_index == index and self.has_new_non_predicate operand = Register(op_name, op_type, is_new_value, index) + # Second letter in reg name is used in QEMU shortcode to identify the register. + operand.isa_id = op_name[1] # Whether the predicate registers holds a new value is denoted in "isPredicatedNew". if self.is_pred_new and operand.is_predicate: operand.is_new_value = True @@ -314,7 +319,7 @@ def get_template_in_c(self) -> str: flags.append("HEX_INSN_TEMPLATE_FLAG_LOOP_0") elif self.loop_member == LoopMembership.HEX_LOOP_1: flags.append("HEX_INSN_TEMPLATE_FLAG_LOOP_1") - if flags != []: + if flags: flags = " | ".join(flags) code += f".flags = {flags},\n" code += "}" diff --git a/LLVMImporter.py b/LLVMImporter.py index ad775f81..8eb0e399 100755 --- a/LLVMImporter.py +++ b/LLVMImporter.py @@ -1,16 +1,22 @@ #!/usr/bin/env python3 +import argparse # SPDX-FileCopyrightText: 2021 Rot127 # # SPDX-License-Identifier: LGPL-3.0-only -from itertools import chain import json import os import re import subprocess -import argparse +from pathlib import Path +from tqdm import tqdm + +from Conf import OutputFile, Conf +from rzilcompiler.Transformer.Hybrids.SubRoutine import SubRoutineInitType +from rzilcompiler.ArchEnum import ArchEnum +from rzilcompiler.Compiler import Compiler, RZILInstruction from HardwareRegister import HardwareRegister from ImplementationException import ImplementationException from Instruction import Instruction @@ -25,12 +31,14 @@ set_pos_after_license, get_license, get_generation_timestamp, - compare_src_to_old_src, + src_matches_old_src, include_file, + gen_c_doxygen, + get_delimiter_line, ) import PluginInfo import HexagonArchInfo -from InstructionTemplate import PARSE_BITS_MASK_CONST +from InstructionTemplate import PARSE_BITS_MASK_CONST, InstructionTemplate class LLVMImporter: @@ -43,9 +51,14 @@ class LLVMImporter: sub_instruction_names = list() sub_instructions = dict() hardware_regs = dict() + rzilcompiler = None + edited_files: [str] = list() - def __init__(self, build_json: bool, test_mode=False): + def __init__(self, build_json: bool, gen_rzil: bool, skip_pcpp: bool, rzil_compile: bool, test_mode=False): + self.gen_rzil = gen_rzil + self.rzil_compile = rzil_compile self.sub_namespaces = set() + self.skip_pcpp = skip_pcpp self.test_mode = test_mode if self.test_mode: self.hexagon_target_json_path = "../Hexagon.json" @@ -60,6 +73,9 @@ def __init__(self, build_json: bool, test_mode=False): exit() self.set_llvm_commit_info(use_prev=True) + if self.gen_rzil: + self.setup_rzil_compiler() + with open(self.hexagon_target_json_path) as file: self.hexArch = json.load(file) self.update_hex_arch() @@ -83,7 +99,6 @@ def __init__(self, build_json: bool, test_mode=False): self.check_insn_syntax_length() if not test_mode: self.generate_rizin_code() - self.generate_decompiler_code() self.add_license_info_header() self.apply_clang_format() log("Done") @@ -97,11 +112,12 @@ def get_import_config(self): if not os.path.exists(".config"): with open(cwd + "/.config", "w") as f: config = "# Configuration for th LLVMImporter.\n" - config += "LLVM_PROJECT_REPO_DIR = /path/to/llvm_project" + config += "LLVM_PROJECT_REPO_DIR = /path/to/llvm_project\n" + config += "CLANG_FORMAT_BIN = clang-format-18" f.write(config) log( - "This is your first time running the generator{}.".format(" TESTS" if self.test_mode else "") - + " Please set the path to the llvm_project repo in {}/.config.".format(cwd) + f"This is your first time running the generator{' TESTS' if self.test_mode else ''}." + + f" Please set the path to the llvm_project repo and clang-format binary in {cwd}/.config." ) exit() with open(cwd + "/.config") as f: @@ -111,15 +127,18 @@ def get_import_config(self): continue ln = ln.split("=") if ln[0].strip() == "LLVM_PROJECT_REPO_DIR": - dr = ln[1].strip() - if not os.path.exists(dr): + conf_value = ln[1].strip() + if not os.path.exists(conf_value): log( - "The LLVM_PROJECT_REPO_DIR is set to an invalid directory: '{}'".format(dr), + f"The LLVM_PROJECT_REPO_DIR is set to an invalid directory: '{conf_value}'", LogLevel.ERROR, ) exit() - self.config["LLVM_PROJECT_REPO_DIR"] = dr - self.config["LLVM_PROJECT_HEXAGON_DIR"] = dr + "/llvm/lib/Target/Hexagon" + self.config["LLVM_PROJECT_REPO_DIR"] = conf_value + self.config["LLVM_PROJECT_HEXAGON_DIR"] = conf_value + "/llvm/lib/Target/Hexagon" + elif ln[0].strip() == "CLANG_FORMAT_BIN": + conf_value = ln[1].strip() + self.config["CLANG_FORMAT_BIN"] = conf_value else: log("Unknown configuration in config file: '{}'".format(ln[0]), LogLevel.WARNING) else: @@ -181,6 +200,15 @@ def generate_hexagon_json(self): cwd=self.config["LLVM_PROJECT_HEXAGON_DIR"], ) + def setup_rzil_compiler(self): + log("Init compiler") + self.rzilcompiler = Compiler(ArchEnum.HEXAGON) + if not self.skip_pcpp: + self.rzilcompiler.run_preprocessor() + + log("Load instruction behavior.") + self.rzilcompiler.preprocessor.load_insn_behavior() + def update_hex_arch(self): """Imports system instructions and registers described in the manual but not implemented by LLVM.""" reg_count = 0 @@ -192,19 +220,23 @@ def update_hex_arch(self): for filename in sorted(os.listdir(reg_dir)): if filename.split(".")[-1] != "json": continue + reg_class = "" + if len(filename.split("-")) == 2: + reg_class = filename.split("-")[0] with open(reg_dir + filename) as f: reg = json.load(f) reg_name = list(reg.keys())[0] - if reg_name != "SysRegs" or reg_name != "SysRegs64": - if reg_name in self.hexArch["!instanceof"]["DwarfRegNum"]: - raise ImplementationException( - "Register {} already present in the LLVM definitions." - " Please check whether LLVM implements System/Monitor" - " instructions and system registers etc.".format(reg_name) - ) - self.hexArch["!instanceof"]["DwarfRegNum"] += reg.keys() - reg_count += 1 + if reg_name in self.hexArch["!instanceof"]["DwarfRegNum"]: + raise ValueError( + f"Register {reg_name} already present in the LLVM definitions." + " Please check whether LLVM defines it." + ) + self.hexArch["!instanceof"]["DwarfRegNum"] += reg.keys() + reg_count += 1 self.hexArch.update(reg) + if reg_class: + arg = {"def": reg_name, "kind": "def", "printable": reg_name} + self.hexArch[reg_class]["MemberList"]["args"].append([arg, None]) instr_count = 0 insn_dir = "./import/instructions/" if not self.test_mode else "../import/instructions/" @@ -218,44 +250,130 @@ def update_hex_arch(self): for llvm_instr in self.hexArch["!instanceof"]["HInst"]: syntax_list[llvm_instr] = self.hexArch[llvm_instr]["AsmString"] if "UNDOCUMENTED" not in instn_name and insn[instn_name]["AsmString"] in syntax_list.values(): - log(f"Imported instruction was added to LLVM. Remove it if opcodes match. Instr.: '{instn_name}'") - continue + if self.obsolete_import_handler(filename, insn, instn_name, syntax_list): + continue self.hexArch.update(insn) self.hexArch["!instanceof"]["HInst"] += list(insn.keys()) instr_count += 1 log("Imported {} registers.".format(reg_count)) log("Imported {} instructions.".format(instr_count)) + def obsolete_import_handler(self, filename, insn, instn_name, syntax_list) -> bool: + """ + Handles the case of an imported instruction becoming obsolete because it was + added to LLVM. + :return: True if the encodings match. False otherwise. + """ + name_idx = list(syntax_list.values()).index(insn[instn_name]["AsmString"]) + llvm_insn_name = self.hexArch["!instanceof"]["HInst"][name_idx] + imported_enc = insn[instn_name]["Inst"] + llvm_enc = self.hexArch[llvm_insn_name]["Inst"] + encodings_match = True + cleaned_llvm_enc = list() + for imp_bit, llvm_bit in zip(imported_enc, llvm_enc): + if isinstance(imp_bit, dict) and isinstance(llvm_bit, dict): + if imp_bit["var"] != llvm_bit["var"]: + encodings_match = False + elif imp_bit != llvm_bit: + encodings_match = False + + if isinstance(llvm_bit, dict): + del llvm_bit["kind"] + del llvm_bit["printable"] + cleaned_llvm_enc.append(llvm_bit) + if encodings_match: + log( + "Imported instruction was added to LLVM.\n" + f"\tInstr.: '{instn_name}' -> '{llvm_insn_name}'\n" + f"\tRemove: {filename}" + ) + return True + log( + "Imported instruction was added to LLVM. But the encodings mismatch!\n" + f"\tInstr.: '{instn_name}' -> '{llvm_insn_name}'\n" + f"\tImported enc: {imported_enc}\n" + f"\tLLVM enc: {llvm_enc}", + LogLevel.WARNING, + ) + return False + + def skip_insn(self, insn_name: str) -> bool: + # PS_ instructions are pseudo instructions, but not marked as such. + # They do not exist in QEMU. + if insn_name.lower().startswith("ps"): + return True + return False + def parse_instructions(self) -> None: - for i, i_name in enumerate(self.hexArch["!instanceof"]["HInst"]): - llvm_instruction = self.hexArch[i_name] - if llvm_instruction is None: - log( - "Could not find instruction with name: {} in json file.".format(i_name), - LogLevel.ERROR, - ) - continue - if llvm_instruction["isPseudo"]: - log( - "Pseudo instruction passed. Name: {}".format(i_name), - LogLevel.VERBOSE, - ) - continue - log("{} | Parse {}".format(i, i_name), LogLevel.VERBOSE) - self.llvm_instructions[i_name] = llvm_instruction - - if llvm_instruction["Type"]["def"] == "TypeSUBINSN": - self.sub_instruction_names.append(i_name) - self.sub_instructions[i_name] = SubInstruction(llvm_instruction) - ns = self.sub_instructions[i_name].namespace - if ns not in self.sub_namespaces: - self.sub_namespaces.add(ns) - else: - self.normal_instruction_names.append(i_name) - self.normal_instructions[i_name] = Instruction(llvm_instruction) + compiled_insn = 0 + hvx_compiled = 0 + standard_compiled = 0 + # Filter out pseudo instructions + no_pseudo = [ + i for i in self.hexArch["!instanceof"]["HInst"] if not self.hexArch[i]["isPseudo"] and not self.skip_insn(i) + ] + if self.gen_rzil and self.rzil_compile: + self.rzilcompiler.parse_shortcode() + + with tqdm( + desc="Parse instructions.", + postfix=f"Succ. compiled: {compiled_insn}/{len(no_pseudo)}", + total=len(no_pseudo), + ) as t: + for i, i_name in enumerate(no_pseudo): + llvm_instruction = self.hexArch[i_name] + if llvm_instruction is None: + log("Could not find instruction with name: {} in json file.".format(i_name), LogLevel.ERROR) + continue + log("{} | Parse {}".format(i, i_name), LogLevel.VERBOSE) + self.llvm_instructions[i_name] = llvm_instruction + + if llvm_instruction["Type"]["def"] == "TypeSUBINSN": + self.sub_instruction_names.append(i_name) + insn = SubInstruction(llvm_instruction) + self.sub_instructions[i_name] = insn + ns = self.sub_instructions[i_name].namespace + if ns not in self.sub_namespaces: + self.sub_namespaces.add(ns) + else: + self.normal_instruction_names.append(i_name) + insn = Instruction(llvm_instruction) + self.normal_instructions[i_name] = insn + if self.gen_rzil and self.rzil_compile: + log("{} | Compile {}".format(i, i_name), LogLevel.VERBOSE) + if self.set_il_op(insn): + if i_name[:2] == "V6": + hvx_compiled += 1 + else: + standard_compiled += 1 + compiled_insn += 1 + else: + insn.il_ops = RZILInstruction.get_unimplemented_rzil_instr(insn.name) + t.n = i + t.postfix = f"Succ. compiled: {compiled_insn}/{len(no_pseudo)}" + t.update() + if self.gen_rzil: + self.rzilcompiler.transformer.ext.report_missing_fcns() log("Parsed {} normal instructions.".format(len(self.normal_instructions))) log("Parsed {} sub-instructions.".format(len(self.sub_instructions))) + if self.gen_rzil and self.rzil_compile: + total = len(self.normal_instruction_names) + len(self.sub_instruction_names) + total_hvx = len([n for n in self.normal_instruction_names if n[:2] == "V6"]) + total_standard = total - total_hvx + log(f"{standard_compiled}/{total_standard} standard instructions compiled.") + log(f"{hvx_compiled}/{total_hvx} HVX instructions compiled.") + log(f"In total: {compiled_insn}/{total} instructions compiled.") + + def set_il_op(self, insn: InstructionTemplate) -> bool: + try: + insn.il_ops = self.rzilcompiler.compile_insn(insn.name) + return True + except Exception as e: + log(f"Failed to compile instruction {insn.name}\nException: {e}\n", LogLevel.DEBUG) + # Compiler failure for instruction or not implemented + insn.il_ops = RZILInstruction.get_unimplemented_rzil_instr(insn.name) + return False def parse_hardware_registers(self) -> None: cc = 0 @@ -357,9 +475,14 @@ def generate_rizin_code(self) -> None: self.build_hexagon_c() self.build_hexagon_h() self.build_dwarf_reg_num_table() + self.build_hexagon_reg_tables_h() self.build_asm_hexagon_c() self.build_hexagon_arch_c() self.build_hexagon_arch_h() + self.build_hexagon_il_h() + self.build_hexagon_il_getter_table_h() + self.build_hexagon_il_c() + self.build_hexagon_il_X_ops_c() self.copy_tests() self.build_analysis_hexagon_c() self.build_cc_hexagon_32_sdb_txt() @@ -383,15 +506,160 @@ def add_license_info_header(self) -> None: with open(p, "r+") as f: content = f.read() f.seek(0, 0) + + # If header message is there, skip it. + match = re.search(get_delimiter_line(), content) + if match: + content = content[match.start() :] f.write(get_license() + "\n" + get_generation_timestamp(self.config) + "\n" + content) + if p not in self.edited_files: + log("Write {}".format(p), LogLevel.INFO) + + def build_hexagon_il_h(self, path: Path = Conf.get_path(OutputFile.HEXAGON_IL_H)) -> None: + if not self.gen_rzil: + self.unchanged_files.append(path) + return + code = get_generation_warning_c_code() + code += "\n" + code += get_include_guard("hexagon_il.h") + code += "\n" + + code += include_file("handwritten/hexagon_il_h/includes.h") + code += "\n" + + code += include_file("handwritten/hexagon_il_h/macros.h") + code += "\n" + + code += include_file("handwritten/hexagon_il_h/declarations.h") + + # Getter declarations + for insn in list(self.normal_instructions.values()) + list(self.sub_instructions.values()): + for fcn_decl in insn.il_ops["getter_rzil"]["fcn_decl"]: + code += f"{fcn_decl};\n" + + with open("handwritten/misc_il_insns.json") as f: + misc_insns = json.loads(f.read()) + + for name in misc_insns["qemu_defined"]: + rzil_insn = self.rzilcompiler.compile_insn(name) + for decl in rzil_insn["getter_rzil"]["fcn_decl"]: + code += f"{decl};\n" + + for routine_name, routine in self.rzilcompiler.sub_routines.items(): + sub_routine = self.rzilcompiler.get_sub_routine(routine_name) + code += f"{sub_routine.il_init(SubRoutineInitType.DECL)};\n" + + code += "\n#endif\n" + + self.write_src(code, path) + + def build_hexagon_il_c(self, path: Path = Conf.get_path(OutputFile.HEXAGON_IL_C)) -> None: + if not self.gen_rzil: + self.unchanged_files.append(path) + return + code = get_generation_warning_c_code() + code += "\n" + + code += include_file("handwritten/hexagon_il_c/includes.c") + code += "\n" + + code += include_file("handwritten/hexagon_il_c/functions.c") + code += "\n" + code += include_file("handwritten/hexagon_il_c/exclude.c") + + self.write_src(code, path) + + def get_il_op_c_defintion(self, syntax: str, rzil_insn: RZILInstruction) -> str: + code = "" + for rzil_code, fcn_decl, needs_hi, needs_pkt in zip( + rzil_insn["rzil"], rzil_insn["getter_rzil"]["fcn_decl"], rzil_insn["needs_hi"], rzil_insn["needs_pkt"] + ): + code += f"// {syntax}\n" + code += f"{fcn_decl} {{" + + if needs_hi: + code += "const HexInsn *hi = bundle->insn;" + if needs_pkt: + code += "HexPkt *pkt = bundle->pkt;" + + code += rzil_code + code += "}\n\n" + return code + + def build_hexagon_il_X_ops_c(self, path: Path = Conf.get_path(OutputFile.IL_OPS_DIR)) -> None: + """Generate the IL op getter for each instruction. + The file the getter is written to depend on the instruction class. + Args: + path: Path to directory where the src files will be written. + + Returns: None + """ + if not (self.gen_rzil and self.rzil_compile): + for subdir, _, files in os.walk(path): + for file in files: + self.unchanged_files.append(os.path.join(subdir, file)) + return + insns = dict() + # Bundle instructions by category + for i_name in sorted(self.normal_instruction_names + self.sub_instruction_names): + insn = ( + self.normal_instructions[i_name] + if i_name in self.normal_instruction_names + else self.sub_instructions[i_name] + ) + try: + # category: A2, SA1 etc. + category = re.search(r"^([a-zA-Z\d]+)_", insn.name).group(1) + except Exception as e: + print(insn.name) + raise e + if category in insns: + insns[category].append(insn) + else: + insns[category] = [insn] - def build_hexagon_insn_enum_h(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon_insn.h") -> None: + for cp in insns.keys(): + code = get_generation_warning_c_code() + code += include_file("handwritten/hexagon_il_X_ops_c/includes.h") + "\n" + for insn in insns[cp]: + code += self.get_il_op_c_defintion(insn.syntax, insn.il_ops) + code += include_file("handwritten/hexagon_il_X_ops_c/excludes.h") + self.write_src(code, path.joinpath(f"hexagon_il_{cp}_ops.c")) + + self.gen_misc_instructions(path) + + def gen_misc_instructions(self, path: Path = Conf.get_path(OutputFile.IL_OPS_DIR)) -> None: + code = get_generation_warning_c_code() + code += include_file("handwritten/hexagon_il_X_ops_c/includes.h") + "\n" + + with open("handwritten/misc_il_insns.json") as f: + misc_insns = json.loads(f.read()) + + for name in misc_insns["qemu_defined"]: + rzil_insn = self.rzilcompiler.compile_insn(name) + if name in self.normal_instructions: + syntax = self.normal_instructions[name] + elif name in self.sub_instructions: + syntax = self.sub_instructions[name] + else: + syntax = "No syntax" + code += self.get_il_op_c_defintion(syntax, rzil_insn) + + for routine_name, routine in self.rzilcompiler.sub_routines.items(): + sub_routine = self.rzilcompiler.get_sub_routine(routine_name) + code += sub_routine.il_init(SubRoutineInitType.DEF) + "\n\n" + + code += include_file("handwritten/hexagon_il_X_ops_c/non_insn_ops.c") + code += include_file("handwritten/hexagon_il_X_ops_c/excludes.h") + self.write_src(code, path.joinpath("hexagon_il_non_insn_ops.c")) + + def build_hexagon_insn_enum_h(self, path: Path = Conf.get_path(OutputFile.HEXAGON_INSN_H)) -> None: code = get_generation_warning_c_code() code += "\n" code += get_include_guard("hexagon_insn.h") code += "\ntypedef enum {\n" enum = "" - for name in self.normal_instruction_names + self.sub_instruction_names: + for name in sorted(self.normal_instruction_names + self.sub_instruction_names): if "invalid_decode" in name: enum = (PluginInfo.INSTR_ENUM_PREFIX + name.upper() + " = 0,") + enum else: @@ -402,7 +670,7 @@ def build_hexagon_insn_enum_h(self, path: str = "./rizin/librz/arch/isa/hexagon/ self.write_src(code, path) - def build_hexagon_disas_c(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon_disas.c") -> None: + def build_hexagon_disas_c(self, path: Path = Conf.get_path(OutputFile.HEXAGON_DISAS_C)) -> None: code = get_generation_warning_c_code() code += include_file("handwritten/hexagon_disas_c/include.c") @@ -437,10 +705,66 @@ def build_hexagon_disas_c(self, path: str = "./rizin/librz/arch/isa/hexagon/hexa self.write_src(code, path) - def build_hexagon_h(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon.h") -> None: - indent = PluginInfo.LINE_INDENT - general_prefix = PluginInfo.GENERAL_ENUM_PREFIX + def build_hexagon_il_getter_table_h(self, path: Path = Conf.get_path(OutputFile.HEXAGON_IL_GETTER_TABLE_H)) -> None: + if not self.gen_rzil: + self.unchanged_files.append(path) + return + code = get_generation_warning_c_code() + code += "\n" + code += get_include_guard("hexagon_il_getter_table.h") + code += "\n" + code += include_file("handwritten/hexagon_il_getter_table_h/includes.h") + code += "\n" + + # Lookup table + code += "static HexILInsn hex_il_getter_lt[] = {\n" + table = "" + for name in sorted(self.normal_instruction_names + self.sub_instruction_names): + insn = self.normal_instructions[name] if name in self.normal_instructions else self.sub_instructions[name] + if "invalid_decode" in name.lower(): + # Invalid decode is always at the top. + tmp = f"{{{{(HexILOpGetter) {insn.il_ops['getter_rzil']['name'][0]}, {insn.il_ops['meta'][0][0]}}},\n" + tmp += "{(HexILOpGetter) NULL, HEX_IL_INSN_ATTR_INVALID},\n" + tmp += "{(HexILOpGetter) NULL, HEX_IL_INSN_ATTR_INVALID}\n" + tmp += "}," + table = tmp + table + continue + members_to_set = PluginInfo.NUM_HEX_IL_INSN_MEMBERS + getter: str + meta: [str] + table += "{" + for getter, meta in zip(insn.il_ops["getter_rzil"]["name"], insn.il_ops["meta"]): + table += f"{{(HexILOpGetter) {getter}, {'|'.join(meta)}}},\n" + members_to_set -= 1 + if members_to_set < 1: + log("Can not set more than two IL operations. Please add more members to HexILInsn.", LogLevel.ERROR) + if members_to_set == 1: + table += "{(HexILOpGetter) NULL, HEX_IL_INSN_ATTR_INVALID}\n" + else: + table += "{(HexILOpGetter) NULL, HEX_IL_INSN_ATTR_INVALID},\n" + table += "{(HexILOpGetter) NULL, HEX_IL_INSN_ATTR_INVALID}\n" + + table += "}," + code += table + "};" + + code += "\n#endif" + self.write_src(code, path) + + def build_hexagon_reg_tables_h(self, path: Path = Conf.get_path(OutputFile.HEXAGON_REG_TABLES_H)) -> None: + code = get_generation_warning_c_code() + code += "\n" + code += get_include_guard("hexagon_reg_tables.h") + code += "\n" + code += include_file("handwritten/hexagon_reg_tables_h/includes.h") + code += "\n" + code += self.gen_alias_lt() + code += self.get_reg_name_tables() + + code += "\n#endif" + self.write_src(code, path) + + def build_hexagon_h(self, path: Path = Conf.get_path(OutputFile.HEXAGON_H)) -> None: code = get_generation_warning_c_code() code += "\n" code += get_include_guard("hexagon.h") @@ -449,6 +773,9 @@ def build_hexagon_h(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon.h" code += include_file("handwritten/hexagon_h/includes.h") code += "\n" + code += include_file("handwritten/hexagon_h/macros.h") + code += "\n" + code += f"#define {PluginInfo.GENERAL_ENUM_PREFIX}MAX_OPERANDS {PluginInfo.MAX_OPERANDS}\n" code += f"#define {PluginInfo.GENERAL_ENUM_PREFIX}PARSE_BITS_MASK 0x{PARSE_BITS_MASK_CONST:x}\n\n" code += include_file("handwritten/hexagon_h/typedefs.h") @@ -458,30 +785,8 @@ def build_hexagon_h(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon.h" code += ",\n".join([HardwareRegister.get_enum_item_of_class(reg_class) for reg_class in self.hardware_regs]) code += "} HexRegClass;\n\n" - reg_class: str - for reg_class in self.hardware_regs: - code += "typedef enum {\n" - - hw_reg: HardwareRegister - for hw_reg in sorted( - self.hardware_regs[reg_class].values(), - key=lambda x: x.hw_encoding, - ): - alias = ",".join(hw_reg.alias) - code += "{}{} = {},{}".format( - indent, - hw_reg.enum_name, - hw_reg.hw_encoding, - " // " + alias + "\n" if alias != "" else "\n", - ) - code += "}} {}{}; // {}\n\n".format( - general_prefix, - HardwareRegister.register_class_name_to_upper(reg_class), - reg_class, - ) - - code += include_file("handwritten/hexagon_h/macros.h") - code += "\n" + code += self.gen_reg_enums() + code += self.gen_alias_enum() if len(self.reg_resolve_decl) == 0: raise ImplementationException( @@ -497,44 +802,140 @@ def build_hexagon_h(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon.h" self.write_src(code, path) - def build_hexagon_c(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon.c") -> None: - general_prefix = PluginInfo.GENERAL_ENUM_PREFIX - code = get_generation_warning_c_code() - code += include_file("handwritten/hexagon_c/include.c") + def get_reg_name_tables(self) -> str: + """ + Generates the lookup tables of register names, alias and their corresponding .new names (_tmp). + Each hardware register has a specific number, with which it is identified in the opcode + (HardwareRegister.hw_encoding). + The index of a hardware registers name, alias and .new names is calculated like following: - reg_class: str + reg_name_index = HardwareRegister.hw_encoding + alias_index = reg_name_index + 1 + reg_name_new_index = reg_name_index + 2 + alias_new_index = reg_name_index + 3 + + Note: The hw_encoding values does not necessarily increment by one. + Lines which have no index due to that are filled with NULL. + + Returns: The C code with lookup tables for each register class. + """ + code = "" for reg_class in self.hardware_regs: - func_name = HardwareRegister.get_func_name_of_class(reg_class, False) - function = "\nchar* {}(int opcode_reg, bool get_alias)".format(func_name) - self.reg_resolve_decl.append(function + ";") - code += "{} {{".format(function) + code += "\n\n" + gen_c_doxygen(f"Lookup table for register names and alias of class {reg_class}.") + table_name = PluginInfo.REGISTER_LOOKUP_TABLE_NAME_V69.format(reg_class.lower()) + code += f"HexRegNames {table_name}[] = {{\n" - parsing_code = HardwareRegister.get_parse_code_reg_bits(reg_class, "opcode_reg") - if parsing_code != "": - code += "{}".format(parsing_code) + index = 0 + hw_reg: HardwareRegister + for hw_reg in sorted( + self.hardware_regs[reg_class].values(), + key=lambda x: x.hw_encoding, + ): + while index < hw_reg.hw_encoding: + code += f"{{NULL, NULL, NULL, NULL}}, // -\n" + index += 1 + name = hw_reg.asm_name + alias = hw_reg.alias[0] if len(hw_reg.alias) > 0 else hw_reg.asm_name + code += f'{{"{name.upper()}", "{alias.upper()}", "{name.upper()}_tmp", "{alias}_tmp"}}, // {hw_reg.enum_name}\n' + index += 1 + code += "};\n" + return code + + def get_hw_alias(self) -> [dict]: + """ + Generates the list with alias of hardware registers and all the information about each alias. + Used to generate alias enums and lookup tables. + """ + alias = list() + for reg_class in self.hardware_regs: + hw_reg: HardwareRegister + for hw_reg in sorted(self.hardware_regs[reg_class].values(), key=lambda x: x.hw_encoding): + if hw_reg.is_mod: + # Alias already set for c0, c1 + continue + if len(hw_reg.alias) == 0: + continue + for a in hw_reg.alias: + alias.append( + { + "alias_enum": f'{PluginInfo.REGISTER_ALIAS_ENUM_PREFIX}{re.sub(r":", "_", a).upper()}', + "reg_class": hw_reg.get_enum_item_of_class(reg_class), + "reg_enum": hw_reg.enum_name, + "real": hw_reg.asm_name, + } + ) + return alias + + def gen_alias_lt(self) -> str: + """ + Generates the lookup table for all know register alias. + Returns: C lookup table with register alias. + """ + code = gen_c_doxygen("Lookup table for register alias.\n") + code += f"HexRegAliasMapping {PluginInfo.ALIAS_REGISTER_LOOKUP_TABLE_v69}[] = {{\n" + code += "\n".join( + [f'{{{a["reg_class"]}, {a["reg_enum"]}}}, // {a["alias_enum"]}' for i, a in enumerate(self.get_hw_alias())] + ) + code += "\n};\n\n" + return code - code += "switch (opcode_reg) {" - code += 'default:return "";' + def gen_alias_enum(self) -> str: + """ + Generates the enum for all know register alias. + Returns: C enum with register alias. + """ + code = "typedef enum {\n" + code += "".join([a["alias_enum"] + f" = {i},\n" for i, a in enumerate(self.get_hw_alias())]) + code += "} HexRegAlias;\n\n" + return code + + def gen_reg_enums(self) -> str: + code = "" + reg_class: str + for reg_class in self.hardware_regs: + code += "typedef enum {\n" hw_reg: HardwareRegister - for hw_reg in self.hardware_regs[reg_class].values(): - alias = "".join(hw_reg.alias).upper() - alias_choice = 'get_alias ? "' + alias + '" : "' + hw_reg.asm_name.upper() + '"' - code += "case {}:\nreturn {};".format( + for hw_reg in sorted( + self.hardware_regs[reg_class].values(), + key=lambda x: x.hw_encoding, + ): + alias = ",".join(hw_reg.alias) + code += "{} = {},{}".format( hw_reg.enum_name, - alias_choice if alias != "" else '"' + hw_reg.asm_name.upper() + '"', + hw_reg.hw_encoding, + " // " + alias + "\n" if alias != "" else "\n", ) - code += "}}\n" + code += "}} {}{}; // {}\n\n".format( + PluginInfo.GENERAL_ENUM_PREFIX, + HardwareRegister.register_class_name_to_upper(reg_class), + reg_class, + ) + return code + + def build_hexagon_c(self, path: Path = Conf.get_path(OutputFile.HEXAGON_C)) -> None: + general_prefix = PluginInfo.GENERAL_ENUM_PREFIX + code = get_generation_warning_c_code() + code += include_file("handwritten/hexagon_c/include.c") + code += "\n" + + code += self.gen_resolve_reg_enum_id_fcn() + code += "\n" + code += self.gen_get_reg_name_fcns() + code += "\n" reg_in_cls_decl = ( - f"char *{general_prefix.lower()}" "get_reg_in_class(HexRegClass cls, int opcode_reg, bool get_alias)" + f"RZ_API const char *{general_prefix.lower()}" + "get_reg_in_class(HexRegClass cls, int reg_num, bool get_alias, bool get_new, bool reg_num_is_enum)" ) self.reg_resolve_decl.append(f"{reg_in_cls_decl};") code += f"{reg_in_cls_decl} {{\n" code += "switch (cls) {\n" for reg_class in self.hardware_regs: - code += f"case {HardwareRegister.get_enum_item_of_class(reg_class)}:\n" - code += f"return {HardwareRegister.get_func_name_of_class(reg_class, False)}(opcode_reg, get_alias);\n" + rc = HardwareRegister.get_func_name_of_class(reg_class, False) + ec = HardwareRegister.get_enum_item_of_class(reg_class) + code += f"case {ec}:\n" + code += f"return {rc}(reg_num, get_alias, get_new, reg_num_is_enum);\n" code += "default:\n" code += "return NULL;\n" code += "}\n" @@ -544,7 +945,7 @@ def build_hexagon_c(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon.c" self.write_src(code, path) - def build_dwarf_reg_num_table(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon_dwarf_reg_num_table.inc"): + def build_dwarf_reg_num_table(self, path: Path = Conf.get_path(OutputFile.HEXAGON_DWARF_REG_TABLE_H)): code = get_generation_warning_c_code() code += "\n" code += "static const char *map_dwarf_reg_to_hexagon_reg(ut32 reg_num) {" @@ -572,7 +973,55 @@ def build_dwarf_reg_num_table(self, path: str = "./rizin/librz/arch/isa/hexagon/ code += "}}" self.write_src(code, path) - def build_asm_hexagon_c(self, path: str = "./rizin/librz/arch/p/asm/asm_hexagon.c") -> None: + def gen_resolve_reg_enum_id_fcn(self, param_name: str = "reg_num") -> str: + var_name = param_name + decl = "RZ_API ut32 hex_resolve_reg_enum_id(HexRegClass class, ut32 reg_num)" + self.reg_resolve_decl.append(f"{decl};") + + code = f"{decl} {{\n" "\tswitch (class) {\n" "\tdefault:\n" f"\t\treturn {var_name};\n" + for reg_class in self.hardware_regs: + class_enum = HardwareRegister.get_enum_item_of_class(reg_class) + parsing_code = HardwareRegister.get_parse_code_reg_bits(reg_class, var_name) + if not parsing_code: + continue + code += f"\tcase {class_enum}:{{\n" f"{parsing_code}\n" f"\treturn {var_name};\n" "}" + code += "}\n" "rz_warn_if_reached();\n" "return UT32_MAX;\n" "}" + return code + + def gen_get_reg_name_fcns(self): + code = "" + reg_class: str + for reg_class in self.hardware_regs: + func_name = HardwareRegister.get_func_name_of_class(reg_class, False) + function = f"\nconst char* {func_name}(int reg_num, bool get_alias, bool get_new, bool reg_num_is_enum)" + self.reg_resolve_decl.append(function + ";") + code += f"{function} {{" + + parsing_code = HardwareRegister.get_parse_code_reg_bits(reg_class, "reg_num") + if parsing_code != "": + code += f"reg_num = hex_resolve_reg_enum_id({HardwareRegister.get_enum_item_of_class(reg_class)}, reg_num);\n" + + warn_ior = "%s: Index out of range during register name lookup: i = %d\\n" + table_name = PluginInfo.REGISTER_LOOKUP_TABLE_NAME_V69.format(reg_class.lower()) + code += ( + f"if (reg_num >= ARRAY_LEN({table_name}))" + f'{{RZ_LOG_INFO("{warn_ior}", "{func_name}", reg_num);' + f"return NULL;}}" + ) + code += f"const char *name;" + code += f"const HexRegNames rn = {table_name}[reg_num];" + code += "if (get_alias) {" + code += "name = get_new ? rn.alias_tmp : rn.alias;" + code += "} else {" + code += "name = get_new ? rn.name_tmp : rn.name;}" + + warn_invalid_reg = "%s: No register name present at index: %d\\n" + code += "if (!name) {" f'RZ_LOG_INFO("{warn_invalid_reg}", "{func_name}", reg_num);' "return NULL;}" + code += "return name;" + code += "}\n" + return code + + def build_asm_hexagon_c(self, path: Path = Conf.get_path(OutputFile.ASM_HEXAGON_C)) -> None: code = get_generation_warning_c_code() code += include_file("handwritten/asm_hexagon_c/include.c") @@ -580,7 +1029,7 @@ def build_asm_hexagon_c(self, path: str = "./rizin/librz/arch/p/asm/asm_hexagon. self.write_src(code, path) - def build_hexagon_arch_c(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon_arch.c"): + def build_hexagon_arch_c(self, path: Path = Conf.get_path(OutputFile.HEXAGON_ARCH_C)): code = get_generation_warning_c_code() code += include_file("handwritten/hexagon_arch_c/include.c") @@ -589,7 +1038,7 @@ def build_hexagon_arch_c(self, path: str = "./rizin/librz/arch/isa/hexagon/hexag self.write_src(code, path) - def build_hexagon_arch_h(self, path: str = "./rizin/librz/arch/isa/hexagon/hexagon_arch.h"): + def build_hexagon_arch_h(self, path: Path = Conf.get_path(OutputFile.HEXAGON_ARCH_H)): code = get_generation_warning_c_code() code += get_include_guard("hexagon_arch.h") @@ -603,30 +1052,33 @@ def build_hexagon_arch_h(self, path: str = "./rizin/librz/arch/isa/hexagon/hexag @staticmethod def copy_tests() -> None: with open("handwritten/analysis-tests/hexagon") as f: - with open("./rizin/test/db/analysis/hexagon", "w+") as g: + path = Conf.get_path(OutputFile.ANA_TESTS) + Conf.check_path(path.absolute()) + with open(path, "w+") as g: set_pos_after_license(g) g.writelines(f.readlines()) with open("handwritten/asm-tests/hexagon") as f: - with open("./rizin/test/db/asm/hexagon", "w+") as g: + path = Conf.get_path(OutputFile.ASM_TESTS) + Conf.check_path(path.absolute()) + with open(path, "w+") as g: + set_pos_after_license(g) + g.writelines(f.readlines()) + + with open("handwritten/rzil-tests/hexagon") as f: + path = Conf.get_path(OutputFile.RZIL_TESTS) + Conf.check_path(path.absolute()) + with open(path, "w+") as g: set_pos_after_license(g) g.writelines(f.readlines()) log("Copied test files to ./rizin/test/db/", LogLevel.DEBUG) - def build_analysis_hexagon_c(self, path: str = "./rizin/librz/arch/p/analysis/analysis_hexagon.c") -> None: + def build_analysis_hexagon_c(self, path: Path = Conf.get_path(OutputFile.ANALYSIS_HEXAGON_C)) -> None: """Generates and writes the register profile. Note that some registers share the same offsets. R0 and R1:0 are both based at offset 0. """ profile = self.get_alias_profile().splitlines(keepends=True) - tmp_regs = [] # Tmp register for RZIL reg_offset = 0 - offsets = {"IntRegs": 0} - offsets["CtrRegs"] = offsets["IntRegs"] + len(self.hardware_regs["IntRegs"]) * 32 - offsets["GuestRegs"] = offsets["CtrRegs"] + len(self.hardware_regs["CtrRegs"]) * 32 - offsets["HvxQR"] = offsets["GuestRegs"] + len(self.hardware_regs["GuestRegs"]) * 32 - offsets["HvxVR"] = offsets["HvxQR"] + len(self.hardware_regs["HvxQR"]) * 128 - offsets["SysRegs"] = offsets["HvxVR"] + len(self.hardware_regs["HvxVR"]) * 1024 - offsets["TmpRegs"] = offsets["SysRegs"] + len(self.hardware_regs["SysRegs"]) * 32 for hw_reg_class in self.hardware_regs: if hw_reg_class in [ @@ -636,41 +1088,24 @@ def build_analysis_hexagon_c(self, path: str = "./rizin/librz/arch/p/analysis/an "ModRegs", ]: continue # Those registers would only be duplicates. - if hw_reg_class in ["IntRegs", "DoubleRegs"]: - reg_offset = offsets["IntRegs"] - elif hw_reg_class in ["CtrRegs", "CtrRegs64"]: - reg_offset = offsets["CtrRegs"] - elif hw_reg_class == "PredRegs": - reg_offset = offsets["CtrRegs"] + (32 * 4) # PredRegs = C4 - elif hw_reg_class in ["GuestRegs", "GuestRegs64"]: - reg_offset = offsets["GuestRegs"] - elif hw_reg_class in ["HvxVR", "HvxWR", "HvxVQR"]: - reg_offset = offsets["HvxVR"] - elif hw_reg_class == "HvxQR": - reg_offset = offsets["HvxQR"] - elif hw_reg_class in ["SysRegs", "SysRegs64"]: - reg_offset = offsets["SysRegs"] - else: - raise ImplementationException( - "Register profile can't be completed. Base for type {} missing.".format(hw_reg_class) - ) hw_reg: HardwareRegister for hw_reg in { k: v for k, v in sorted(self.hardware_regs[hw_reg_class].items(), key=lambda item: item[1]) }.values(): profile.append(hw_reg.get_reg_profile(reg_offset, False) + "\n") - tmp_regs.append(hw_reg.get_reg_profile(reg_offset + offsets["TmpRegs"], True) + "\n") - reg_offset += hw_reg.size if not (hw_reg.llvm_reg_class == "PredRegs") else 8 + reg_offset += 8 if (hw_reg.llvm_reg_class == "PredRegs") else hw_reg.size + profile.append(hw_reg.get_reg_profile(reg_offset, True) + "\n") + reg_offset += 8 if (hw_reg.llvm_reg_class == "PredRegs") else hw_reg.size profile.append("\n") - profile = profile + tmp_regs - profile = profile[:-1] # Remove line breaks profile[-1] = profile[-1][:-1] + ";\n" # [:-1] to remove line break. code = get_generation_warning_c_code() code += include_file("handwritten/analysis_hexagon_c/include.c") + code += "\n" code += include_file("handwritten/analysis_hexagon_c/functions.c") + code += "\n" tmp = list() tmp.append("const char *p =") @@ -682,16 +1117,16 @@ def build_analysis_hexagon_c(self, path: str = "./rizin/librz/arch/p/analysis/an ) code += "\n" + "".join(tmp) + code += "\n" code += include_file("handwritten/analysis_hexagon_c/initialization.c") self.write_src(code, path) - # RIZIN SPECIFC def get_alias_profile(self) -> str: - """Returns the alias profile of register. A0 = R0, SP = R29 PC = pc etc.""" + """Returns the alias profile of register. A0 = R0, SP = R29 PC = C9 etc.""" indent = PluginInfo.LINE_INDENT - p = "\n" + '"=PC{}pc\\n"'.format(indent) + "\n" + p = "\n" + '"=PC{}C9\\n"'.format(indent) + "\n" p += '"=SP{}R29\\n"'.format(indent) + "\n" p += '"=BP{}R30\\n"'.format(indent) + "\n" p += '"=LR{}R31\\n"'.format(indent) + "\n" @@ -741,9 +1176,7 @@ def get_alias_profile(self) -> str: return p @staticmethod - def build_cc_hexagon_32_sdb_txt( - path: str = "rizin/librz/analysis/d/cc-hexagon-32.sdb.txt", - ) -> None: + def build_cc_hexagon_32_sdb_txt(path: Path = Conf.get_path(OutputFile.CC_HEXAGON_32_SDB_TXT)) -> None: """Builds the *incomplete* calling convention as sdb file. Hexagon can pass arguments and return values via different registers. E.g. either over R0 or R1:0. But the calling convention logic in rizin and the sdb is not sophisticated enough to model this. @@ -751,31 +1184,30 @@ def build_cc_hexagon_32_sdb_txt( """ cc_dict = dict() + Conf.check_path(path) with open(path, "w+") as f: for reg in HexagonArchInfo.CC_REGS["GPR_args"]: n = int(re.search(r"\d{1,2}", reg).group(0)) if reg[0] == "R": - cc_dict["cc.hexagon.arg{}".format(n)] = "r{}".format(n) + cc_dict[f"cc.hexagon.arg{n}"] = f"R{n}" elif reg[0] == "D": + # Rizin has currently no way to define a different CC for + # different sized parameters. continue else: - raise ImplementationException( - "Could not assign register {} to a specific argument" " value.".format(reg) - ) + raise ImplementationException(f"Could not assign register {reg} to a specific return value.") cc_dict["cc.hexagon.argn"] = "stack_rev" for reg in HexagonArchInfo.CC_REGS["GPR_ret"]: n = int(re.search(r"\d{1,2}", reg).group(0)) if reg[0] == "R": if HexagonArchInfo.CC_REGS["GPR_ret"].index(reg) == 0: - cc_dict["cc.hexagon.ret".format(n)] = "r{}".format(n) + cc_dict["cc.hexagon.ret"] = f"R{n}" else: continue elif reg[0] == "D": continue else: - raise ImplementationException( - "Could not assign register {} to a specific return" " value.".format(reg) - ) + raise ImplementationException(f"Could not assign register {reg} to a specific return value.") f.write("default.cc=hexagon\n\nhexagon=cc\ncc.hexagon.maxargs=6\n") for k, v in cc_dict.items(): @@ -786,31 +1218,26 @@ def build_cc_hexagon_32_sdb_txt( for reg in HexagonArchInfo.CC_REGS["HVX_args"]: n = int(re.search(r"\d{1,2}", reg).group(0)) if reg[0] == "V": - cc_dict["cc.hvx.arg{}".format(n)] = "v{}".format(n) + cc_dict[f"cc.hvx.arg{n}"] = f"V{n}" elif reg[0] == "W": continue else: - raise ImplementationException( - "Could not assign register {} to a specific argument" " value.".format(reg) - ) + raise ImplementationException(f"Could not assign register {reg} to a specific return value.") for reg in HexagonArchInfo.CC_REGS["HVX_ret"]: n = int(re.search(r"\d{1,2}", reg).group(0)) if reg[0] == "V": if HexagonArchInfo.CC_REGS["HVX_ret"].index(reg) == 0: - cc_dict["cc.hvx.ret".format(n)] = "v{}".format(n) + cc_dict["cc.hvx.ret"] = f"V{n}" else: continue elif reg[0] == "W": continue else: - raise ImplementationException( - "Could not assign register {} to a specific return" " value.".format(reg) - ) + raise ImplementationException(f"Could not assign register {reg} to a specific return value.") for k, v in cc_dict.items(): f.write(k + "=" + v + "\n") - @staticmethod - def apply_clang_format() -> None: + def apply_clang_format(self) -> None: log("Apply clang-format.") for subdir, dirs, files in os.walk("rizin/librz/"): for file in files: @@ -823,20 +1250,22 @@ def apply_clang_format() -> None: ".inc", ]: log("Format {}".format(p), LogLevel.VERBOSE) - os.system("clang-format-13 -style file -i " + p) + os.system(f"{self.config['CLANG_FORMAT_BIN']} -style file -i " + p) - def write_src(self, code: str, path: str) -> None: + def write_src(self, code: str, path: Path) -> None: """Compares the given src code to the src code in the file at path and writes it if it differs. It ignores the leading license header and timestamps in the existing src file. - Changes in formatting (anything which matches the regex '[[:blank:]]') + Changes in formatting (anything which matches the regex '[[:blank:]]') are ignored as well. """ - if compare_src_to_old_src(code, path): + if src_matches_old_src(code, path): self.unchanged_files.append(path) return - with open(path, "w+") as dest: - dest.writelines(code) + Conf.check_path(path.absolute()) + with open(path.absolute(), "w+") as dest: log("Write {}".format(path), LogLevel.INFO) + dest.writelines(code) + self.edited_files.append(path) if __name__ == "__main__": @@ -848,5 +1277,28 @@ def write_src(self, code: str, path: str) -> None: help="Run llvm-tblgen to build a new Hexagon.json file from the LLVM definitions.", dest="bjs", ) + parser.add_argument( + "--no-rzil", + action="store_false", + default=True, + help="Do not invoke the RZIL compiler at all.", + dest="rzil", + ) + parser.add_argument( + "--no-rzil-compile", + action="store_false", + default=True, + help="(For testing only) Do not invoke the RZIL compiler to generate the instruction behavior. " + 'No "il_ops" files will be generated. Other IL code will.', + dest="rzil_compile", + ) + parser.add_argument( + "--no-pcpp", + action="store_true", + default=False, + help="Do not invoke the preprocessor of the RZIL compiler.", + dest="skip_pcpp", + ) + args = parser.parse_args() - interface = LLVMImporter(args.bjs) + interface = LLVMImporter(args.bjs, args.rzil, args.skip_pcpp, args.rzil_compile) diff --git a/Operand.py b/Operand.py index eceb6827..00958768 100644 --- a/Operand.py +++ b/Operand.py @@ -101,6 +101,7 @@ class Operand: "is_in_out_operand", "type", "opcode_mask", + "isa_id", ] def __init__(self, llvm_syntax: str, llvm_type: str, syntax_index: int): @@ -110,6 +111,7 @@ def __init__(self, llvm_syntax: str, llvm_type: str, syntax_index: int): self.syntax_index = syntax_index self.explicit_syntax = normalize_llvm_syntax(self.llvm_syntax) self.opcode_mask: SparseMask = None + self.isa_id = "" # ISA identifier character of the QEMU shortcode. d for Rdd, S for II of type s32_0Imm etc. self.is_in_operand = False self.is_out_operand = False diff --git a/PluginInfo.py b/PluginInfo.py index ba2d532a..7e5f1fb2 100644 --- a/PluginInfo.py +++ b/PluginInfo.py @@ -7,8 +7,13 @@ REPO_URL = "https://github.com/rizinorg/rz-hexagon" GENERATION_WARNING_DELIMITER = "//" + "=" * 40 GENERAL_ENUM_PREFIX = "HEX_" +GENERAL_FCN_PREFIX = "hex_" INSTR_ENUM_PREFIX = GENERAL_ENUM_PREFIX + "INS_" REGISTER_ENUM_PREFIX = GENERAL_ENUM_PREFIX + "REG_" +REGISTER_ALIAS_ENUM_PREFIX = REGISTER_ENUM_PREFIX + "ALIAS_" +REGISTER_LOOKUP_TABLE_NAME_V69 = "hexagon_{}_lt_v69" +ALIAS_REGISTER_LOOKUP_TABLE_v69 = "hex_alias_reg_lt_v69" FRAMEWORK_NAME = "rizin" MAX_OPERANDS = 6 +NUM_HEX_IL_INSN_MEMBERS = 3 diff --git a/README.md b/README.md index cfc5fdcc..9ebbb231 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ This plugin is under continuous work. So checkout the Github issues for missing ### Requirements -- For formatting we need `clang-format-13`. If it is not available on your distribution, you can install it from https://apt.llvm.org/. +- For formatting we need `clang-format`. If it is not available on your distribution, you can install it from https://apt.llvm.org/. - Python requirements are in `requirements.txt` - As a developer you also need `black`, `flake8`, `reuse`. @@ -45,13 +45,36 @@ Please add this directory to your `PATH`. # Install +**Python 3.11** + +We require `Python 3.11`. Please follow the install-instructions from the [Python documentation](https://docs.python.org/3.11/). + +**Clone repository** + ```bash -git clone https://github.com/rizinorg/rz-hexagon.git +git clone --recurse-submodules https://github.com/rizinorg/rz-hexagon.git cd rz-hexagon/ -pip3 install -r requirements.txt +``` + +**Setup a virtual environment** + +```bash +python3 -m venv .venv +# Activate the virtual environment. +# This step might differ from shell to shell (the one below is for bash/zsh). +# Take a look at the Python docs if you are using another one. +# https://docs.python.org/3.11/library/venv.html?highlight=virtual%20environment +source .venv/bin/activate +``` + +**Install `rz-hexagon` as package** + +```bash +pip3 install -r requirements.txt -r rzil_compiler/requirements.txt # If you enjoy some colors pip3 install -r optional_requirements.txt # Install as develop package +pip3 install -e rzil_compiler/ pip3 install -e . ``` diff --git a/Register.py b/Register.py index 9e989bc4..a304842d 100644 --- a/Register.py +++ b/Register.py @@ -167,7 +167,8 @@ def c_template(self, force_extendable=False) -> str: info = " | ".join(info) return ( f".info = {info}, .masks = {{ {self.opcode_mask.c_template} }}, " - + f".reg_cls = {Register.get_enum_item_of_class(self.llvm_type)}" + + f".reg_cls = {Register.get_enum_item_of_class(self.llvm_type)}, " + + f".isa_id = '{self.isa_id}'" ) @staticmethod diff --git a/Tests/testCodeGeneration.py b/Tests/testCodeGeneration.py index e7ea1029..30465a62 100644 --- a/Tests/testCodeGeneration.py +++ b/Tests/testCodeGeneration.py @@ -9,7 +9,7 @@ class TestInstruction(unittest.TestCase): def setUp(self): - self.interface = LLVMImporter(False, test_mode=True) + self.interface = LLVMImporter(False, test_mode=True, gen_rzil=False, skip_pcpp=True, rzil_compile=False) self.json = self.interface.hexArch def test_code_generation(self) -> None: diff --git a/Tests/testEncoding.py b/Tests/testEncoding.py index f7625135..968e22ff 100644 --- a/Tests/testEncoding.py +++ b/Tests/testEncoding.py @@ -12,7 +12,7 @@ class TestInstructionEncoding(unittest.TestCase): def setUp(self) -> None: - self.interface = LLVMImporter(False, test_mode=True) + self.interface = LLVMImporter(False, test_mode=True, gen_rzil=False, skip_pcpp=True, rzil_compile=False) self.json = self.interface.hexArch def test_manual_mask(self) -> None: diff --git a/Tests/testImmediate.py b/Tests/testImmediate.py index e03e67b9..7d76082c 100644 --- a/Tests/testImmediate.py +++ b/Tests/testImmediate.py @@ -12,7 +12,7 @@ class TestImmediate(unittest.TestCase): def setUp(self) -> None: - self.interface = LLVMImporter(False, test_mode=True) + self.interface = LLVMImporter(False, test_mode=True, gen_rzil=False, skip_pcpp=True, rzil_compile=False) self.json = self.interface.hexArch def test_immediate_initialization(self): diff --git a/Tests/testRegister.py b/Tests/testRegister.py index 4f9e7528..364e7a90 100644 --- a/Tests/testRegister.py +++ b/Tests/testRegister.py @@ -10,7 +10,7 @@ class TestRegister(unittest.TestCase): def setUp(self) -> None: - self.interface = LLVMImporter(False, test_mode=True) + self.interface = LLVMImporter(False, test_mode=True, gen_rzil=False, skip_pcpp=True, rzil_compile=False) self.json = self.interface.hexArch def test_parse_reg_type(self): diff --git a/handwritten/README.md b/handwritten/README.md new file mode 100644 index 00000000..05b37cc5 --- /dev/null +++ b/handwritten/README.md @@ -0,0 +1,8 @@ +Some instructions are not parsed by the disassembler generator, because they are irrelevant for it. +`endloop` instructions are one example. + +The names used in QEMU for them, can be added to `misc_il_insn.json`. +They will be written to `hexagon_il_non_insn_ops.c` and declared in `hexagon_il.h`. + +If you need to define an arbitrary sub-routine, +you can add it in `rzil_compiler/Resources/Hexagon/sub_routines.json`. \ No newline at end of file diff --git a/handwritten/analysis-tests/hexagon b/handwritten/analysis-tests/hexagon index 4dff5453..0d0ea55a 100644 --- a/handwritten/analysis-tests/hexagon +++ b/handwritten/analysis-tests/hexagon @@ -9,13 +9,13 @@ afx EOF EXPECT=< 0x00005128 ? jump 0x5128 -c 0x00005130 -> 0x00005134 ? if (P0) jump:nt 0x5154 -c 0x00005130 -> 0x00005154 ? if (P0) jump:nt 0x5154 +c 0x00005130 -> 0x00005134 [ if (P0) jump:nt 0x5154 +c 0x00005130 -> 0x00005154 [ if (P0) jump:nt 0x5154 c 0x00005134 -> 0x00005138 [ jump 0x5138 C 0x00005138 -> 0x000050e0 [ call sym.pHello C 0x0000513c -> 0x000050f8 [ call sym.pWorld c 0x00005140 -> 0x00005144 [ jump 0x5144 -c 0x00005150 -> 0x00005128 ? jump 0x5128 +c 0x00005150 -> 0x00005128 [ jump 0x5128 EOF RUN @@ -451,7 +451,7 @@ EOF EXPECT=<pcalign == 0) { - analysis->pcalign = 0x4; + analysis->pcalign = HEX_PC_ALIGNMENT; } + // Disassemble as many instructions as possible from the buffer. + ut32 buf_offset = 0; + while (buf_offset + HEX_INSN_SIZE <= len && buf_offset <= HEX_INSN_SIZE * HEX_MAX_INSN_PER_PKT) { + const ut32 buf_ptr = rz_read_at_le32(buf, buf_offset); + if (buf_offset > 0 && (buf_ptr == HEX_INVALID_INSN_0 || buf_ptr == HEX_INVALID_INSN_F)) { + // Do not disassemble invalid instructions, if we already have a valid one. + break; + } + + HexReversedOpcode rev = { .action = HEXAGON_ANALYSIS, .ana_op = op, .asm_op = NULL }; + hexagon_reverse_opcode(NULL, &rev, buf + buf_offset, addr + buf_offset, false); + buf_offset += HEX_INSN_SIZE; + } + // Copy operation actually requested. HexReversedOpcode rev = { .action = HEXAGON_ANALYSIS, .ana_op = op, .asm_op = NULL }; + hexagon_reverse_opcode(NULL, &rev, buf, addr, true); + bool decoded_packet = len > HEX_INSN_SIZE; + if (mask & RZ_ANALYSIS_OP_MASK_IL) { + op->il_op = hex_get_il_op(addr, decoded_packet); + } - hexagon_reverse_opcode(NULL, &rev, buf, addr); + return HEX_INSN_SIZE; +} - return op->size; +static RzAnalysisILConfig *rz_hexagon_il_config(RzAnalysis *a) { + HexState *state = hexagon_state(false); + state->just_init = true; + return rz_analysis_il_config_new(32, a->big_endian, 32); } diff --git a/handwritten/analysis_hexagon_c/include.c b/handwritten/analysis_hexagon_c/include.c index 539b3e56..d3b7d485 100644 --- a/handwritten/analysis_hexagon_c/include.c +++ b/handwritten/analysis_hexagon_c/include.c @@ -9,3 +9,4 @@ #include #include #include +#include diff --git a/handwritten/analysis_hexagon_c/initialization.c b/handwritten/analysis_hexagon_c/initialization.c index 870508f9..c1ff815d 100644 --- a/handwritten/analysis_hexagon_c/initialization.c +++ b/handwritten/analysis_hexagon_c/initialization.c @@ -10,4 +10,5 @@ RzAnalysisPlugin rz_analysis_plugin_hexagon = { .op = hexagon_v6_op, .esil = false, .get_reg_profile = get_reg_profile, + .il_config = rz_hexagon_il_config, }; diff --git a/handwritten/asm_hexagon_c/initialization.c b/handwritten/asm_hexagon_c/initialization.c index 72c5a246..8938280c 100644 --- a/handwritten/asm_hexagon_c/initialization.c +++ b/handwritten/asm_hexagon_c/initialization.c @@ -2,12 +2,7 @@ // SPDX-License-Identifier: LGPL-3.0-only static RZ_OWN RzPVector /**/ *get_token_patterns() { - static RzPVector *pvec = NULL; - if (pvec) { - return pvec; - } - - pvec = rz_pvector_new(rz_asm_token_pattern_free); + RzPVector *pvec = rz_pvector_new(rz_asm_token_pattern_free); RzAsmTokenPattern *pat = RZ_NEW0(RzAsmTokenPattern); pat->type = RZ_ASM_TOKEN_META; @@ -142,7 +137,6 @@ static bool hexagon_init(void **user) { HexState *state = hexagon_state(false); rz_return_val_if_fail(state, false); - *user = state; // user = RzAsm.plugin_data state->cfg = rz_config_new(state); rz_return_val_if_fail(state->cfg, false); @@ -153,8 +147,10 @@ static bool hexagon_init(void **user) { SETCB("plugins.hexagon.sdk", "false", &hex_cfg_set, "Print packet syntax in objdump style."); SETCB("plugins.hexagon.reg.alias", "true", &hex_cfg_set, "Print the alias of registers (Alias from C0 = SA0)."); - state->token_patterns = get_token_patterns(); - compile_token_patterns(state->token_patterns); + if (!state->token_patterns) { + state->token_patterns = get_token_patterns(); + } + rz_asm_compile_token_patterns(state->token_patterns); return true; } @@ -176,14 +172,27 @@ RZ_API RZ_BORROW RzConfig *hexagon_get_config() { */ static int disassemble(RzAsm *a, RzAsmOp *op, const ut8 *buf, int l) { rz_return_val_if_fail(a && op && buf, -1); - if (l < 4) { + if (l < HEX_INSN_SIZE) { return -1; } ut32 addr = (ut32)a->pc; + // Disassemble as many instructions as possible from the buffer. + ut32 buf_offset = 0; + while (buf_offset + HEX_INSN_SIZE <= l && buf_offset <= HEX_INSN_SIZE * HEX_MAX_INSN_PER_PKT) { + const ut32 buf_ptr = rz_read_at_le32(buf, buf_offset); + if (buf_offset > 0 && (buf_ptr == HEX_INVALID_INSN_0 || buf_ptr == HEX_INVALID_INSN_F)) { + // Do not disassemble invalid instructions, if we already have a valid one. + break; + } + + HexReversedOpcode rev = { .action = HEXAGON_DISAS, .ana_op = NULL, .asm_op = op }; + hexagon_reverse_opcode(a, &rev, buf + buf_offset, addr + buf_offset, false); + buf_offset += HEX_INSN_SIZE; + } + // Copy operation actually requested. HexReversedOpcode rev = { .action = HEXAGON_DISAS, .ana_op = NULL, .asm_op = op }; - - hexagon_reverse_opcode(a, &rev, buf, addr); - return op->size; + hexagon_reverse_opcode(a, &rev, buf, addr, true); + return HEX_INSN_SIZE; } RzAsmPlugin rz_asm_plugin_hexagon = { diff --git a/handwritten/hexagon_arch_c/functions.c b/handwritten/hexagon_arch_c/functions.c index ebdf4c3c..0551bfbc 100644 --- a/handwritten/hexagon_arch_c/functions.c +++ b/handwritten/hexagon_arch_c/functions.c @@ -1,6 +1,10 @@ // SPDX-FileCopyrightText: 2021 Rot127 // SPDX-License-Identifier: LGPL-3.0-only +static inline bool is_invalid_insn_data(ut32 data) { + return data == HEX_INVALID_INSN_0 || data == HEX_INVALID_INSN_F; +} + static inline bool is_last_instr(const ut8 parse_bits) { // Duplex instr. (parse bits = 0) are always the last. return ((parse_bits == 0x3) || (parse_bits == 0x0)); @@ -62,7 +66,7 @@ static inline bool is_endloop01_pkt(const ut8 pb_hi_0, const ut8 pb_hi_1) { * \param addr The address of the instruction. * \return Pointer to instruction or NULL if none was found. */ -static HexInsnContainer *hex_get_hic_at_addr(HexState *state, const ut32 addr) { +RZ_API HexInsnContainer *hex_get_hic_at_addr(HexState *state, const ut32 addr) { HexPkt *p; for (ut8 i = 0; i < HEXAGON_STATE_PKTS; ++i) { p = &state->pkts[i]; @@ -71,6 +75,7 @@ static HexInsnContainer *hex_get_hic_at_addr(HexState *state, const ut32 addr) { rz_list_foreach (p->bin, iter, hic) { if (addr == hic->addr) { p->last_access = rz_time_now(); + RZ_LOG_DEBUG("===== RET buffed_pkts[%d] hic @ 0x010%x ====> \n", i, addr); return hic; } } @@ -88,6 +93,78 @@ static inline bool hic_at_addr(RZ_NONNULL const HexInsnContainer *hic, const ut3 return (hic->addr == addr) || (hic->is_duplex && sub_insn_at_addr(hic, addr)); } +/** + * \brief Gives for an ISA register character the register name. + * E.g.: If the ISA instruction uses the variable "Rd", it passes 'd' as identifier to this function. + * The function returns a concrete register name like "R3", "R10" or any other name which is associated with the id. + * + * \param hi The hexagon instruction. + * \param isa_id The ISA register character. + * \param new_reg If true it will return the .new register name ("R3_tmp", "R10_tmp" etc.) + * \return const char * The concrete register name. Or NULL on error. + */ +RZ_API const HexOp *hex_isa_to_reg(const HexInsn *hi, const char isa_id, bool new_reg) { + rz_return_val_if_fail(hi && isa_id, NULL); + const HexOp *op = NULL; + for (ut32 i = 0; i < hi->op_count; ++i) { + if ((hi->ops[i].isa_id == isa_id) && (hi->ops[i].type == HEX_OP_TYPE_REG)) { + op = &hi->ops[i]; + break; + } + } + if (!op) { + RZ_LOG_WARN("Could not find equivalent register for ISA variable \'%c\'\n", isa_id); + return NULL; + } + return op; +} + +/** + * \brief Gives for an n-register the HexOp. + * + * \param bundle The packet and instruction bundle. + * \param isa_id The ISA register character this reg is known to the instruction. + * \return HexOp The HexOp. Or {0} on error. + */ +RZ_API const HexOp hex_nreg_to_op(const HexInsnPktBundle *bundle, const char isa_id) { + rz_return_val_if_fail(bundle && isa_id, (HexOp){ 0 }); + const HexInsn *hi = bundle->insn; + const HexOp *op = NULL; + for (ut32 i = 0; i < hi->op_count; ++i) { + if ((hi->ops[i].isa_id == isa_id) && (hi->ops[i].type == HEX_OP_TYPE_REG)) { + op = &hi->ops[i]; + break; + } + } + if (!op) { + RZ_LOG_WARN("Could not find equivalent register for ISA variable \'%c\'\n", isa_id); + return (HexOp){ 0 }; + } + + HexOp nop = *op; + nop.op.reg = resolve_n_register(op->op.reg, hi->addr, bundle->pkt); + + return nop; +} + +/** + * \brief Gives for a ISA immediate character the immediate value stored in the instruction. + * + * \param hi The hexagon instruction. + * \param isa_id The character which identifies the immediate. + * \return ut64 The immediate value. + */ +RZ_API ut64 hex_isa_to_imm(const HexInsn *hi, const char isa_id) { + rz_return_val_if_fail(hi && isa_id, 0); + for (ut32 i = 0; i < hi->op_count; ++i) { + if (hi->ops[i].isa_id == isa_id && (hi->ops[i].type == HEX_OP_TYPE_IMM)) { + return hi->ops[i].op.imm; + } + } + RZ_LOG_WARN("No immediate operand for \'%c\' found.\n", isa_id); + return 0; +} + /** * \brief Returns the index of an addr in a given packet. * @@ -120,6 +197,7 @@ static void hex_clear_pkt(RZ_NONNULL HexPkt *p) { p->is_valid = false; p->last_access = 0; rz_list_purge(p->bin); + rz_pvector_clear(p->il_ops); } /** @@ -134,9 +212,11 @@ static HexPkt *hex_get_stale_pkt(HexState *state) { for (ut8 i = 0; i < HEXAGON_STATE_PKTS; ++i) { if (state->pkts[i].last_access < oldest) { + oldest = state->pkts[i].last_access; stale_state_pkt = &state->pkts[i]; } } + hex_clear_pkt(stale_state_pkt); return stale_state_pkt; } @@ -147,7 +227,7 @@ static HexPkt *hex_get_stale_pkt(HexState *state) { * \param addr The address of an instruction. * \return HexPkt* The packet to which this address belongs to or NULL if no packet was found. */ -static HexPkt *hex_get_pkt(HexState *state, const ut32 addr) { +RZ_API HexPkt *hex_get_pkt(RZ_BORROW HexState *state, const ut32 addr) { HexPkt *p = NULL; HexInsnContainer *hic = NULL; RzListIter *iter = NULL; @@ -155,6 +235,7 @@ static HexPkt *hex_get_pkt(HexState *state, const ut32 addr) { p = &state->pkts[i]; rz_list_foreach (p->bin, iter, hic) { if (hic_at_addr(hic, addr)) { + p->last_access = rz_time_now(); return p; } } @@ -177,30 +258,23 @@ RZ_API void hex_insn_free(RZ_NULLABLE HexInsn *i) { * \param i The instruction container to be freed. */ RZ_API void hex_insn_container_free(RZ_NULLABLE HexInsnContainer *c) { - if (c && c->is_duplex) { + if (c) { + // bin is a uninion. Just free all of them. hex_insn_free(c->bin.sub[0]); hex_insn_free(c->bin.sub[1]); - } else if (c) { - hex_insn_free(c->bin.insn); } free(c); } /** - * \brief Copies one instruction container to another. + * \brief Moves one instruction container to another. * * \param dest The destination insruction container. * \param src The source instruction container. */ -RZ_API void hex_copy_insn_container(RZ_OUT HexInsnContainer *dest, const HexInsnContainer *src) { +RZ_API void hex_move_insn_container(RZ_OUT HexInsnContainer *dest, const HexInsnContainer *src) { rz_return_if_fail(dest && src); - memcpy(dest, src, sizeof(HexInsnContainer)); - if (src->is_duplex) { - memcpy(dest->bin.sub[0], src->bin.sub[0], sizeof(HexInsn)); - memcpy(dest->bin.sub[1], src->bin.sub[1], sizeof(HexInsn)); - } else { - memcpy(dest->bin.insn, src->bin.insn, sizeof(HexInsn)); - } + memmove(dest, src, sizeof(HexInsnContainer)); } /** @@ -245,6 +319,9 @@ RZ_API HexState *hexagon_state(bool reset) { state = NULL; return NULL; } + if (state) { + return state; + } state = calloc(1, sizeof(HexState)); if (!state) { @@ -252,12 +329,14 @@ RZ_API HexState *hexagon_state(bool reset) { } for (int i = 0; i < HEXAGON_STATE_PKTS; ++i) { state->pkts[i].bin = rz_list_newf((RzListFree)hex_insn_container_free); + state->pkts[i].il_ops = rz_pvector_new(NULL); if (!state->pkts[i].bin) { RZ_LOG_FATAL("Could not initialize instruction list!"); } hex_clear_pkt(&(state->pkts[i])); } state->const_ext_l = rz_list_newf((RzListFree)hex_const_ext_free); + state->token_patterns = NULL; return state; } @@ -406,11 +485,11 @@ void hex_set_hic_text(RZ_INOUT HexInsnContainer *hic) { * \brief Sets the packet related information in an instruction. * * \param hi The instruction. - * \param p The packet the instruction belongs to. + * \param pkt The packet the instruction belongs to. * \param k The index of the instruction within the packet. */ -static void hex_set_pkt_info(const RzAsm *rz_asm, RZ_INOUT HexInsnContainer *hic, const HexPkt *p, const ut8 k, const bool update_text) { - rz_return_if_fail(hic && p); +static void hex_set_pkt_info(const RzAsm *rz_asm, RZ_INOUT HexInsnContainer *hic, const HexPkt *pkt, const ut8 k, const bool update_text) { + rz_return_if_fail(hic && pkt); bool is_first = (k == 0); HexPktInfo *hi_pi = &hic->pkt_info; HexState *state = hexagon_state(false); @@ -421,7 +500,7 @@ static void hex_set_pkt_info(const RzAsm *rz_asm, RZ_INOUT HexInsnContainer *hic if (is_first && is_last_instr(hic->parse_bits)) { // Single instruction packet. hi_pi->first_insn = true; hi_pi->last_insn = true; - if (p->is_valid) { + if (pkt->is_valid) { strncpy(hi_pi->text_prefix, get_pkt_indicator(rz_asm->utf8, sdk_form, true, SINGLE_IN_PKT), 8); if (sdk_form) { strncpy(hi_pi->text_postfix, get_pkt_indicator(rz_asm->utf8, sdk_form, false, SINGLE_IN_PKT), 8); @@ -432,7 +511,7 @@ static void hex_set_pkt_info(const RzAsm *rz_asm, RZ_INOUT HexInsnContainer *hic } else if (is_first) { hi_pi->first_insn = true; hi_pi->last_insn = false; - if (p->is_valid) { + if (pkt->is_valid) { strncpy(hi_pi->text_prefix, get_pkt_indicator(rz_asm->utf8, sdk_form, true, FIRST_IN_PKT), 8); } else { strncpy(hi_pi->text_prefix, HEX_PKT_UNK, 8); @@ -440,13 +519,13 @@ static void hex_set_pkt_info(const RzAsm *rz_asm, RZ_INOUT HexInsnContainer *hic } else if (is_last_instr(hic->parse_bits)) { hi_pi->first_insn = false; hi_pi->last_insn = true; - if (p->is_valid) { + if (pkt->is_valid) { strncpy(hi_pi->text_prefix, get_pkt_indicator(rz_asm->utf8, sdk_form, true, LAST_IN_PKT), 8); if (sdk_form) { strncpy(hi_pi->text_postfix, get_pkt_indicator(rz_asm->utf8, sdk_form, false, LAST_IN_PKT), 8); } - switch (hex_get_loop_flag(p)) { + switch (hex_get_loop_flag(pkt)) { default: break; case HEX_LOOP_01: @@ -465,7 +544,7 @@ static void hex_set_pkt_info(const RzAsm *rz_asm, RZ_INOUT HexInsnContainer *hic } else { hi_pi->first_insn = false; hi_pi->last_insn = false; - if (p->is_valid) { + if (pkt->is_valid) { strncpy(hi_pi->text_prefix, get_pkt_indicator(rz_asm->utf8, sdk_form, true, MID_IN_PKT), 8); } else { strncpy(hi_pi->text_prefix, HEX_PKT_UNK, 8); @@ -506,13 +585,42 @@ RZ_API HexLoopAttr hex_get_loop_flag(const HexPkt *p) { } /** - * \brief Sets the packet after pkt to valid and updates its textual assembly. + * \brief Sets the given packet to valid and updates the packet information of + * each instruction in it. + * + * \param state The to operate on. + * \param pkt The packet to set to valid. + */ +static void make_packet_valid(RZ_BORROW HexState *state, RZ_BORROW HexPkt *pkt) { + rz_return_if_fail(state && pkt); + pkt->is_valid = true; + HexInsnContainer *hi = NULL; + RzListIter *it = NULL; + ut8 i = 0; + ut8 slot = 0; + rz_list_foreach (pkt->bin, it, hi) { + hex_set_pkt_info(&state->rz_asm, hi, pkt, i, true); + if (hi->is_duplex) { + hi->bin.sub[0]->slot = 0; + hi->bin.sub[1]->slot = 1; + slot = 2; + } else { + hi->bin.insn->slot = slot; + ++slot; + } + ++i; + } + pkt->last_access = rz_time_now(); +} + +/** + * \brief Sets the packet after \p pkt to valid and updates its textual assembly. * * \param state The state to operate on. * \param pkt The packet which predecessor will be updated. */ static void make_next_packet_valid(HexState *state, const HexPkt *pkt) { - HexInsnContainer *tmp = rz_list_get_top(pkt->bin); + HexInsnContainer *tmp = rz_list_get_n(pkt->bin, 0); if (!tmp) { return; } @@ -525,15 +633,7 @@ static void make_next_packet_valid(HexState *state, const HexPkt *pkt) { if (p->is_valid) { break; } - p->is_valid = true; - HexInsnContainer *hi = NULL; - RzListIter *it = NULL; - ut8 k = 0; - rz_list_foreach (p->bin, it, hi) { - hex_set_pkt_info(&state->rz_asm, hi, p, k, true); - ++k; - } - p->last_access = rz_time_now(); + make_packet_valid(state, p); break; } } @@ -549,6 +649,7 @@ RZ_API HexInsn *hexagon_alloc_instr() { if (!hi) { RZ_LOG_FATAL("Could not allocate memory for new instruction.\n"); } + hi->fround_mode = RZ_FLOAT_RMODE_RNE; return hi; } @@ -570,31 +671,32 @@ RZ_API HexInsnContainer *hexagon_alloc_instr_container() { * * \param state The state to operate on. * \param new_hic The instruction container to copy. - * \param p The packet in which will hold the instruction container. + * \param pkt The packet in which will hold the instruction container. * \param k The index of the instruction container in the packet. * \return HexInsnContainer* Pointer to the copied instruction container on the heap. */ -static HexInsnContainer *hex_add_to_pkt(HexState *state, const HexInsnContainer *new_hic, RZ_INOUT HexPkt *p, const ut8 k) { +static HexInsnContainer *hex_add_to_pkt(HexState *state, const HexInsnContainer *new_hic, RZ_INOUT HexPkt *pkt, const ut8 k) { if (k > 3) { RZ_LOG_FATAL("Instruction could not be set! A packet can only hold four instructions but k=%d.", k); } HexInsnContainer *hic = hexagon_alloc_instr_container(); - hex_copy_insn_container(hic, new_hic); - rz_list_insert(p->bin, k, hic); + hex_move_insn_container(hic, new_hic); + rz_list_del_n(pkt->bin, k); + rz_list_insert(pkt->bin, k, hic); if (k == 0) { - p->pkt_addr = hic->addr; + pkt->pkt_addr = hic->addr; } - p->last_instr_present |= is_last_instr(hic->parse_bits); - ut32 p_l = rz_list_length(p->bin); - hex_set_pkt_info(&state->rz_asm, hic, p, k, false); + pkt->last_instr_present |= is_last_instr(hic->parse_bits); + ut32 p_l = rz_list_length(pkt->bin); + hex_set_pkt_info(&state->rz_asm, hic, pkt, k, false); if (k == 0 && p_l > 1) { // Update the instruction which was previously the first one. - hex_set_pkt_info(&state->rz_asm, rz_list_get_n(p->bin, 1), p, 1, true); + hex_set_pkt_info(&state->rz_asm, rz_list_get_n(pkt->bin, 1), pkt, 1, true); } - p->last_access = rz_time_now(); - if (p->last_instr_present) { - make_next_packet_valid(state, p); + pkt->last_access = rz_time_now(); + if (pkt->last_instr_present) { + make_next_packet_valid(state, pkt); } return hic; } @@ -604,26 +706,27 @@ static HexInsnContainer *hex_add_to_pkt(HexState *state, const HexInsnContainer * * \param state The state to operate on. * \param new_hic The instruction container to copy. - * \param p The old packet which attributes are copied to the new one. - * \param new_p The new packet will hold the instruction container. + * \param pkt The old packet which attributes are copied to the new one. + * \param new_pkt The new packet will hold the instruction container. * \return HexInsnContainer* Pointer to the copied instruction container on the heap. */ -static HexInsnContainer *hex_to_new_pkt(HexState *state, const HexInsnContainer *new_hic, const HexPkt *p, RZ_INOUT HexPkt *new_p) { - hex_clear_pkt(new_p); +static HexInsnContainer *hex_to_new_pkt(HexState *state, const HexInsnContainer *new_hic, const HexPkt *pkt, RZ_INOUT HexPkt *new_pkt) { + hex_clear_pkt(new_pkt); HexInsnContainer *hic = hexagon_alloc_instr_container(); - hex_copy_insn_container(hic, new_hic); - rz_list_insert(new_p->bin, 0, hic); - - new_p->last_instr_present |= is_last_instr(hic->parse_bits); - new_p->hw_loop0_addr = p->hw_loop0_addr; - new_p->hw_loop1_addr = p->hw_loop1_addr; - new_p->is_valid = (p->is_valid || p->last_instr_present); - new_p->pkt_addr = hic->addr; - new_p->last_access = rz_time_now(); - hex_set_pkt_info(&state->rz_asm, hic, new_p, 0, false); - if (new_p->last_instr_present) { - make_next_packet_valid(state, new_p); + hex_move_insn_container(hic, new_hic); + rz_list_del_n(new_pkt->bin, 0); + rz_list_insert(new_pkt->bin, 0, hic); + + new_pkt->last_instr_present |= is_last_instr(hic->parse_bits); + new_pkt->hw_loop0_addr = pkt->hw_loop0_addr; + new_pkt->hw_loop1_addr = pkt->hw_loop1_addr; + new_pkt->is_valid = (pkt->is_valid || pkt->last_instr_present); + new_pkt->pkt_addr = hic->addr; + new_pkt->last_access = rz_time_now(); + hex_set_pkt_info(&state->rz_asm, hic, new_pkt, 0, false); + if (new_pkt->last_instr_present) { + make_next_packet_valid(state, new_pkt); } return hic; } @@ -636,24 +739,101 @@ static HexInsnContainer *hex_to_new_pkt(HexState *state, const HexInsnContainer * \return HexInsnContainer* Pointer to the copied instruction container on the heap. */ static HexInsnContainer *hex_add_to_stale_pkt(HexState *state, const HexInsnContainer *new_hic) { - HexPkt *p = hex_get_stale_pkt(state); - hex_clear_pkt(p); + HexPkt *pkt = hex_get_stale_pkt(state); + hex_clear_pkt(pkt); HexInsnContainer *hic = hexagon_alloc_instr_container(); - hex_copy_insn_container(hic, new_hic); - rz_list_insert(p->bin, 0, hic); + hex_move_insn_container(hic, new_hic); + rz_list_insert(pkt->bin, 0, hic); - p->last_instr_present |= is_last_instr(hic->parse_bits); - p->pkt_addr = new_hic->addr; + pkt->last_instr_present |= is_last_instr(hic->parse_bits); + pkt->pkt_addr = new_hic->addr; // p->is_valid = true; // Setting it true also detects a lot of data as valid assembly. - p->last_access = rz_time_now(); - hex_set_pkt_info(&state->rz_asm, hic, p, 0, false); - if (p->last_instr_present) { - make_next_packet_valid(state, p); + pkt->last_access = rz_time_now(); + hex_set_pkt_info(&state->rz_asm, hic, pkt, 0, false); + if (pkt->last_instr_present) { + make_next_packet_valid(state, pkt); } return hic; } +#if RZ_BUILD_DEBUG +static char desc_letter_hic(const HexInsnContainer *hic) { + char desc = ' '; + if (!hic) { + desc = ' '; + } else if (hic->is_duplex) { + desc = hic->bin.sub[0]->identifier != HEX_INS_INVALID_DECODE ? 'v' : 'i'; + desc = hic->pkt_info.last_insn ? 'l' : desc; + } else { + desc = hic->bin.insn->identifier != HEX_INS_INVALID_DECODE ? 'v' : 'i'; + desc = hic->pkt_info.last_insn ? 'l' : desc; + } + return desc; +} +#endif + +static void print_state_pkt(const HexState *state, st32 index, HexBufferAction action, const HexInsnContainer *new_hic) { +#if RZ_BUILD_DEBUG + ut32 oldest = 7; + ut32 newest = 0; + ut64 min_time = 0xffffffffffffffff; + ut64 max_time = 0; + for (int i = 0; i < HEXAGON_STATE_PKTS; ++i) { + const HexPkt *pkt = &state->pkts[i]; + if (pkt->last_access < min_time) { + min_time = pkt->last_access; + oldest = i; + } + if (pkt->last_access > max_time) { + max_time = pkt->last_access; + newest = i; + } + } + RZ_LOG_DEBUG("╭─────┬──────────────┬─────┬──────────────────┬───────────────╮\n"); + RZ_LOG_DEBUG("│ pkt │ packet │ │ │ [i]n[v]alid │\n"); + RZ_LOG_DEBUG("│ id │ address │ age │ last access │ [l]ast │\n"); + RZ_LOG_DEBUG("├─────┼──────────────┼─────┼──────────────────┼───┬───┬───┬───┤\n"); + RzStrBuf *pkt_line = rz_strbuf_new(""); + for (int i = 0; i < HEXAGON_STATE_PKTS; ++i) { + const HexPkt *pkt = &state->pkts[i]; + const char *time_ind = " "; + if (i == oldest) { + time_ind = "old"; + } else if (i == newest) { + time_ind = "new"; + } + rz_strbuf_appendf(pkt_line, "│ %d │ 0x%010x │ %s │ %016llu │ ", i, pkt->pkt_addr, time_ind, pkt->last_access); + HexInsnContainer *hic = NULL; + for (int j = 0; j < 4; ++j) { + hic = rz_list_get_n(pkt->bin, j); + const char desc = desc_letter_hic(hic); + rz_strbuf_appendf(pkt_line, "%c │ ", desc); + } + if ((index < 0 && i == oldest) || (index == i)) { + const char desc = desc_letter_hic(new_hic); + rz_strbuf_append(pkt_line, " < "); + if (action == HEX_BUF_ADD) { + rz_strbuf_appendf(pkt_line, "%s %c", "ADDED", desc); + } else if (action == HEX_BUF_STALE) { + rz_strbuf_appendf(pkt_line, "added %c %s", desc, "to STALE"); + } else if (action == HEX_BUF_NEW) { + rz_strbuf_appendf(pkt_line, "added %c %s", desc, "to NEW"); + } + } + rz_strbuf_append(pkt_line, "\n"); + RZ_LOG_DEBUG("%s", rz_strbuf_get(pkt_line)); + rz_strbuf_fini(pkt_line); + if (i < HEXAGON_STATE_PKTS - 1) { + RZ_LOG_DEBUG("├─────┼──────────────┼─────┼──────────────────┼───┼───┼───┼───┤\n"); + } else { + RZ_LOG_DEBUG("╰─────┴──────────────┴─────┴──────────────────┴───┴───┴───┴───╯\n"); + } + } + rz_strbuf_free(pkt_line); +#endif +} + /** * \brief Copies the given instruction container to a state packet it belongs to. * If the instruction container does not fit to any packet, it will be written to a stale one. @@ -679,17 +859,18 @@ static HexInsnContainer *hex_add_hic_to_state(HexState *state, const HexInsnCont return hex_add_to_stale_pkt(state, new_hic); } - for (ut8 i = 0; i < HEXAGON_STATE_PKTS; ++i, k = 0) { + ut32 i = 0; + for (; i < HEXAGON_STATE_PKTS; ++i, k = 0) { p = &(state->pkts[i]); HexInsnContainer *p_hic = NULL; // Instructions container already in the packet. RzListIter *iter = NULL; rz_list_foreach (p->bin, iter, p_hic) { if (new_hic->addr == (p_hic->addr - 4)) { - // Instruction preceeds one in the packet. + // Instruction precedes one in the packet. if (is_last_instr(new_hic->parse_bits) || is_pkt_full(p)) { - write_to_stale_pkt = true; - break; + // Continue searching. The instruction might belong to another packet. + continue; } else { insert_before_pkt_hi = true; add_to_pkt = true; @@ -710,20 +891,31 @@ static HexInsnContainer *hex_add_hic_to_state(HexState *state, const HexInsnCont break; } } + if (!add_to_pkt && !new_pkt && !write_to_stale_pkt) { + // No packet found this one belongs to. + // Add to a stale one. + write_to_stale_pkt = true; + } // Add the instruction to packet p if (add_to_pkt) { if (insert_before_pkt_hi) { - return hex_add_to_pkt(state, new_hic, p, k); + HexInsnContainer *result_hic = hex_add_to_pkt(state, new_hic, p, k); + print_state_pkt(state, i, HEX_BUF_ADD, result_hic); + return result_hic; } - return hex_add_to_pkt(state, new_hic, p, k + 1); - + HexInsnContainer *result_hic = hex_add_to_pkt(state, new_hic, p, k + 1); + print_state_pkt(state, i, HEX_BUF_ADD, result_hic); + return result_hic; } else if (new_pkt) { ut8 ni = (get_state_pkt_index(state, p) + 1) % HEXAGON_STATE_PKTS; - return hex_to_new_pkt(state, new_hic, p, &state->pkts[ni]); - } else { - return hex_add_to_stale_pkt(state, new_hic); + HexInsnContainer *result_hic = hex_to_new_pkt(state, new_hic, p, &state->pkts[ni]); + print_state_pkt(state, ni, HEX_BUF_NEW, result_hic); + return result_hic; } + HexInsnContainer *result_hic = hex_add_to_stale_pkt(state, new_hic); + print_state_pkt(state, -1, HEX_BUF_STALE, result_hic); + return result_hic; } /** @@ -734,8 +926,11 @@ static HexInsnContainer *hex_add_hic_to_state(HexState *state, const HexInsnCont * \param addr The address of the instruction container. * \param parse_bits The parse bits of the instruction container. */ -static void setup_new_hic(HexInsnContainer *hic, const HexReversedOpcode *rz_reverse, const ut32 addr, const ut8 parse_bits) { +static void setup_new_hic(HexInsnContainer *hic, const HexReversedOpcode *rz_reverse, const ut32 addr, const ut8 parse_bits, ut32 data) { + rz_return_if_fail(hic && rz_reverse); + bool invalid = is_invalid_insn_data(data); hic->identifier = HEX_INS_INVALID_DECODE; + hic->bytes = data; hic->addr = addr; hic->parse_bits = parse_bits; if (rz_reverse->asm_op) { @@ -755,7 +950,11 @@ static void setup_new_hic(HexInsnContainer *hic, const HexReversedOpcode *rz_rev hic->asm_op.size = 4; hic->ana_op.size = 4; - if (parse_bits == 0b00) { + + hic->bin.sub[0] = NULL; + hic->bin.sub[1] = NULL; + if (parse_bits == 0b00 && !invalid) { + hic->is_duplex = true; hic->bin.sub[0] = hexagon_alloc_instr(); hic->bin.sub[1] = hexagon_alloc_instr(); } else { @@ -820,14 +1019,42 @@ RZ_API void hex_extend_op(HexState *state, RZ_INOUT HexOp *op, const bool set_ne } } +static void copy_asm_ana_ops(const HexState *state, RZ_BORROW HexReversedOpcode *rz_reverse, RZ_BORROW HexInsnContainer *hic) { + rz_return_if_fail(state && rz_reverse && hic); + switch (rz_reverse->action) { + default: + memcpy(rz_reverse->asm_op, &hic->asm_op, sizeof(RzAsmOp)); + memcpy(rz_reverse->ana_op, &hic->ana_op, sizeof(RzAnalysisOp)); + rz_strbuf_set(&rz_reverse->asm_op->buf_asm, hic->text); + rz_reverse->asm_op->asm_toks = rz_asm_tokenize_asm_regex(&rz_reverse->asm_op->buf_asm, state->token_patterns); + if (rz_reverse->asm_op->asm_toks) { + rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type; + } + break; + case HEXAGON_DISAS: + memcpy(rz_reverse->asm_op, &hic->asm_op, sizeof(RzAsmOp)); + rz_strbuf_set(&rz_reverse->asm_op->buf_asm, hic->text); + rz_reverse->asm_op->asm_toks = rz_asm_tokenize_asm_regex(&rz_reverse->asm_op->buf_asm, state->token_patterns); + if (rz_reverse->asm_op->asm_toks) { + rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type; + } + break; + case HEXAGON_ANALYSIS: + memcpy(rz_reverse->ana_op, &hic->ana_op, sizeof(RzAnalysisOp)); + break; + } +} + /** - * \brief Reverses a given opcode and copies the result into one of the rizin structs in rz_reverse. + * \brief Reverses a given opcode and copies the result into one of the rizin structs in rz_reverse + * if \p copy_result is set. * * \param rz_reverse Rizin core structs which store asm and analysis information. * \param buf The buffer which stores the current opcode. * \param addr The address of the current opcode. + * \param copy_result If set, it copies the result. Otherwise it only buffers it in the internal state. */ -RZ_API void hexagon_reverse_opcode(const RzAsm *rz_asm, HexReversedOpcode *rz_reverse, const ut8 *buf, const ut64 addr) { +RZ_API void hexagon_reverse_opcode(const RzAsm *rz_asm, HexReversedOpcode *rz_reverse, const ut8 *buf, const ut64 addr, const bool copy_result) { HexState *state = hexagon_state(false); if (!state) { RZ_LOG_FATAL("HexState was NULL."); @@ -836,36 +1063,29 @@ RZ_API void hexagon_reverse_opcode(const RzAsm *rz_asm, HexReversedOpcode *rz_re memcpy(&state->rz_asm, rz_asm, sizeof(RzAsm)); } HexInsnContainer *hic = hex_get_hic_at_addr(state, addr); - if (hic) { - // Opcode was already reversed and is still in the state. Copy the result and return. - switch (rz_reverse->action) { - default: - memcpy(rz_reverse->asm_op, &(hic->asm_op), sizeof(RzAsmOp)); - memcpy(rz_reverse->ana_op, &(hic->ana_op), sizeof(RzAnalysisOp)); - rz_strbuf_set(&rz_reverse->asm_op->buf_asm, hic->text); - rz_reverse->asm_op->asm_toks = rz_asm_tokenize_asm_regex(&rz_reverse->asm_op->buf_asm, state->token_patterns); - if (rz_reverse->asm_op->asm_toks) { - rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type; - } - return; - case HEXAGON_DISAS: - memcpy(rz_reverse->asm_op, &(hic->asm_op), sizeof(RzAsmOp)); - rz_strbuf_set(&rz_reverse->asm_op->buf_asm, hic->text); - rz_reverse->asm_op->asm_toks = rz_asm_tokenize_asm_regex(&rz_reverse->asm_op->buf_asm, state->token_patterns); - if (rz_reverse->asm_op->asm_toks) { - rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type; - } - return; - case HEXAGON_ANALYSIS: - memcpy(rz_reverse->ana_op, &(hic->ana_op), sizeof(RzAnalysisOp)); - return; + if (hic && !is_invalid_insn_data(hic->bytes)) { + // Code was already reversed and is still in the state. Copy the result and return. + // + // We never return buffered instructions of 0x00000000 and 0xffffffff. + // Because Rizin's IO layer is not a transparent view into the binary. + // Sometimes it passes a buffer for address `a` of size `n`, which has only + // `m` bytes of actual binary data set (where `m < n`). + // Although, there are still valid instructions bytes at `a + m` in the + // actual binary. So the IO layer only passes a certain window of `n - m` valid bytes + // and sets the rest to `0x0` or `0xff`. + // So previously we might have disassembled and buffered those invalid bytes + // at `a + m`. Although in the actual binary there are valid + // instructions at this address. + if (copy_result) { + copy_asm_ana_ops(state, rz_reverse, hic); } + return; } ut32 data = rz_read_le32(buf); ut8 parse_bits = (data & HEX_PARSE_BITS_MASK) >> 14; HexInsnContainer hic_new = { 0 }; - setup_new_hic(&hic_new, rz_reverse, addr, parse_bits); + setup_new_hic(&hic_new, rz_reverse, addr, parse_bits, data); // Add to state hic = hex_add_hic_to_state(state, &hic_new); if (!hic) { @@ -873,25 +1093,10 @@ RZ_API void hexagon_reverse_opcode(const RzAsm *rz_asm, HexReversedOpcode *rz_re } HexPkt *p = hex_get_pkt(state, hic->addr); - // Do disasassembly and analysis + // Do disassembly and analysis hexagon_disasm_instruction(state, data, hic, p); - switch (rz_reverse->action) { - default: - memcpy(rz_reverse->asm_op, &hic->asm_op, sizeof(RzAsmOp)); - memcpy(rz_reverse->ana_op, &hic->ana_op, sizeof(RzAnalysisOp)); - rz_strbuf_set(&rz_reverse->asm_op->buf_asm, hic->text); - rz_reverse->asm_op->asm_toks = rz_asm_tokenize_asm_regex(&rz_reverse->asm_op->buf_asm, state->token_patterns); - rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type; - break; - case HEXAGON_DISAS: - memcpy(rz_reverse->asm_op, &hic->asm_op, sizeof(RzAsmOp)); - rz_strbuf_set(&rz_reverse->asm_op->buf_asm, hic->text); - rz_reverse->asm_op->asm_toks = rz_asm_tokenize_asm_regex(&rz_reverse->asm_op->buf_asm, state->token_patterns); - rz_reverse->asm_op->asm_toks->op_type = hic->ana_op.type; - break; - case HEXAGON_ANALYSIS: - memcpy(rz_reverse->ana_op, &hic->ana_op, sizeof(RzAnalysisOp)); - break; + if (copy_result) { + copy_asm_ana_ops(state, rz_reverse, hic); } -} +} \ No newline at end of file diff --git a/handwritten/hexagon_arch_c/include.c b/handwritten/hexagon_arch_c/include.c index 9083b9bf..94513b36 100644 --- a/handwritten/hexagon_arch_c/include.c +++ b/handwritten/hexagon_arch_c/include.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/handwritten/hexagon_arch_h/declarations.h b/handwritten/hexagon_arch_h/declarations.h index a837c8bf..4d9aeb38 100644 --- a/handwritten/hexagon_arch_h/declarations.h +++ b/handwritten/hexagon_arch_h/declarations.h @@ -30,8 +30,13 @@ RZ_API void hex_insn_container_free(RZ_NULLABLE HexInsnContainer *c); RZ_API void hex_const_ext_free(RZ_NULLABLE HexConstExt *ce); RZ_API HexState *hexagon_state(bool reset); RZ_IPI void hexagon_state_fini(HexState *state); -RZ_API void hexagon_reverse_opcode(const RzAsm *rz_asm, HexReversedOpcode *rz_reverse, const ut8 *buf, const ut64 addr); +RZ_API void hexagon_reverse_opcode(const RzAsm *rz_asm, HexReversedOpcode *rz_reverse, const ut8 *buf, const ut64 addr, const bool copy_result); RZ_API ut8 hexagon_get_pkt_index_of_addr(const ut32 addr, const HexPkt *p); RZ_API HexLoopAttr hex_get_loop_flag(const HexPkt *p); +RZ_API const HexOp *hex_isa_to_reg(const HexInsn *hi, const char isa_id, bool new_reg); +RZ_API ut64 hex_isa_to_imm(const HexInsn *hi, const char isa_id); void hex_set_hic_text(RZ_INOUT HexInsnContainer *hic); -RZ_API void hex_copy_insn_container(RZ_OUT HexInsnContainer *dest, const HexInsnContainer *src); +RZ_API void hex_move_insn_container(RZ_OUT HexInsnContainer *dest, const HexInsnContainer *src); +RZ_API HexPkt *hex_get_pkt(RZ_BORROW HexState *state, const ut32 addr); +RZ_API HexInsnContainer *hex_get_hic_at_addr(HexState *state, const ut32 addr); +RZ_API const HexOp hex_nreg_to_op(const HexInsnPktBundle *bundle, const char isa_id); diff --git a/handwritten/hexagon_c/functions.c b/handwritten/hexagon_c/functions.c index ebe06e5a..5999c8bd 100644 --- a/handwritten/hexagon_c/functions.c +++ b/handwritten/hexagon_c/functions.c @@ -41,8 +41,8 @@ int resolve_n_register(const int reg_num, const ut32 addr, const HexPkt *p) { hic = rz_list_get_n(p->bin, prod_i); if (!hic || !hic->bin.insn || (hic->is_duplex && (!hic->bin.sub[0] || !hic->bin.sub[1]))) { - // This case happens if the current instruction (with the .new register) - // is yet the only one in the packet. + // This case happens if the current instruction (with the .new register) + // is yet the only one in the packet. return UT32_MAX; } if (hic->identifier == HEX_INS_A4_EXT) { @@ -56,3 +56,61 @@ int resolve_n_register(const int reg_num, const ut32 addr, const HexPkt *p) { } return UT32_MAX; } + +/** + * \brief Returns a HexOp of the given register number and class. + * + * \param reg_num The register number as in the name. + * \param reg_class The HexRegClass this register belongs to. + * \param tmp_reg Flag if the register is a .new register. + * + * \return A setup HexOp. Currently the HexOp.attr field is *not* set! + */ +RZ_API const HexOp hex_explicit_to_op(ut32 reg_num, HexRegClass reg_class, bool tmp_reg) { + HexOp op = { 0 }; + op.type = HEX_OP_TYPE_REG; + op.class = reg_class; + op.op.reg = reg_num; + // TODO: Add attributes? + return op; +} + +/** + * \brief Returns a HexOp of the given register alias. + * + * \param alias The alias to get the HexOp for. + * \param tmp_reg Flag if the alias is referring to the .new register. + * + * \return A setup HexOp. Currently the HexOp.attr field is *not* set! + */ +RZ_API const HexOp hex_alias_to_op(HexRegAlias alias, bool tmp_reg) { + HexOp op = { 0 }; + if (alias >= ARRAY_LEN(hex_alias_reg_lt_v69)) { + rz_warn_if_reached(); + return op; + } + op.type = HEX_OP_TYPE_REG; + op.class = hex_alias_reg_lt_v69[alias].cls; + op.op.reg = hex_alias_reg_lt_v69[alias].reg_enum; + // TODO: Add attributes? + return op; +} + +/** + * \brief Returns the real register name for a register alias. + * + * \param alias The register alias. + * \param tmp_reg The register the tmp real register name. + * \return const char * The corresponding register name. Or NULL on error. + */ +RZ_API const char *hex_alias_to_reg(HexRegAlias alias, bool tmp_reg) { + if (alias >= ARRAY_LEN(hex_alias_reg_lt_v69)) { + return NULL; + } + HexRegClass reg_class = hex_alias_reg_lt_v69[alias].cls; + int reg_enum = hex_alias_reg_lt_v69[alias].reg_enum; + if (alias == HEX_REG_ALIAS_PC) { + return "PC"; + } + return hex_get_reg_in_class(reg_class, reg_enum, false, tmp_reg, true); +} diff --git a/handwritten/hexagon_c/include.c b/handwritten/hexagon_c/include.c index be65f7fe..53fbec79 100644 --- a/handwritten/hexagon_c/include.c +++ b/handwritten/hexagon_c/include.c @@ -12,3 +12,4 @@ #include #include #include +#include diff --git a/handwritten/hexagon_disas_c/functions.c b/handwritten/hexagon_disas_c/functions.c index 22df0813..5290507a 100644 --- a/handwritten/hexagon_disas_c/functions.c +++ b/handwritten/hexagon_disas_c/functions.c @@ -12,7 +12,7 @@ static const HexInsnTemplate *get_sub_template_table(const ut8 duplex_iclass, bool high) { switch(duplex_iclass) { default: - RZ_LOG_WARN("IClasses > 0xe are reserved.\n"); + RZ_LOG_INFO("IClasses > 0xe are reserved.\n"); return NULL; case 0: return high ? templates_sub_L1 : templates_sub_L1; @@ -139,9 +139,11 @@ static void hex_disasm_with_templates(const HexInsnTemplate *tpl, HexState *stat hi->op_count++; hi->ops[i].attr = 0; + hi->ops[i].isa_id = op->isa_id; switch (type) { case HEX_OP_TEMPLATE_TYPE_IMM: { hi->ops[i].type = HEX_OP_TYPE_IMM; + hi->ops[i].class = op->info; ut32 bits_total; hi->ops[i].op.imm = hex_op_masks_extract(op->masks, hi_u32, &bits_total) << op->imm_scale; hi->ops[i].shift = op->imm_scale; @@ -184,6 +186,7 @@ static void hex_disasm_with_templates(const HexInsnTemplate *tpl, HexState *stat break; case HEX_OP_TEMPLATE_TYPE_REG: hi->ops[i].type = HEX_OP_TYPE_REG; + hi->ops[i].class = op->reg_cls; hi->ops[i].op.reg = hex_op_masks_extract(op->masks, hi_u32, NULL); if (op->info & HEX_OP_TEMPLATE_FLAG_REG_OUT) { hi->ops[i].attr |= HEX_OP_REG_OUT; @@ -199,7 +202,13 @@ static void hex_disasm_with_templates(const HexInsnTemplate *tpl, HexState *stat if (op->info & HEX_OP_TEMPLATE_FLAG_REG_N_REG) { regidx = resolve_n_register(hi->ops[i].op.reg, hic->addr, pkt); } - rz_strbuf_append(&sb, hex_get_reg_in_class(op->reg_cls, regidx, print_reg_alias)); + const char *reg_name = hex_get_reg_in_class(op->reg_cls, regidx, print_reg_alias, false, false); + if (!reg_name) { + rz_strbuf_append(&sb, ""); + hi->identifier = HEX_INS_INVALID_DECODE; + } else { + rz_strbuf_append(&sb, reg_name); + } break; default: rz_warn_if_reached(); @@ -212,6 +221,7 @@ static void hex_disasm_with_templates(const HexInsnTemplate *tpl, HexState *stat rz_strbuf_append_n(&sb, tpl->syntax + syntax_cur, syntax_len - syntax_cur); } strncpy(hi->text_infix, rz_strbuf_get(&sb), sizeof(hi->text_infix) - 1); + rz_strbuf_fini(&sb); // RzAnalysisOp contents hic->ana_op.addr = hic->addr; @@ -272,7 +282,7 @@ static void hex_set_invalid_duplex(const ut32 hi_u32, RZ_INOUT RZ_NONNULL HexIns HexInsn *hi_low = hic->bin.sub[1]; rz_return_if_fail(hi_high && hi_low); hic->identifier = HEX_INS_INVALID_DECODE; - hic->opcode = hi_u32; + hic->bytes = hi_u32; hi_high->opcode = (hi_u32 >> 16) & 0x1fff; hi_low->opcode = hi_u32 & 0x1fff; hi_high->identifier = HEX_INS_INVALID_DECODE; @@ -286,22 +296,27 @@ int hexagon_disasm_instruction(HexState *state, const ut32 hi_u32, RZ_INOUT HexI ut32 addr = hic->addr; if (hic->pkt_info.last_insn) { switch (hex_get_loop_flag(pkt)) { - default: break; + default: + pkt->hw_loop = HEX_NO_LOOP; + break; case HEX_LOOP_01: hic->ana_op.prefix = RZ_ANALYSIS_OP_PREFIX_HWLOOP_END; hic->ana_op.fail = pkt->hw_loop0_addr; hic->ana_op.jump = pkt->hw_loop1_addr; hic->ana_op.val = hic->ana_op.jump; + pkt->hw_loop = HEX_LOOP_01; break; case HEX_LOOP_0: hic->ana_op.prefix = RZ_ANALYSIS_OP_PREFIX_HWLOOP_END; hic->ana_op.jump = pkt->hw_loop0_addr; hic->ana_op.val = hic->ana_op.jump; + pkt->hw_loop = HEX_LOOP_0; break; case HEX_LOOP_1: hic->ana_op.prefix = RZ_ANALYSIS_OP_PREFIX_HWLOOP_END; hic->ana_op.jump = pkt->hw_loop1_addr; hic->ana_op.val = hic->ana_op.jump; + pkt->hw_loop = HEX_LOOP_1; break; } } @@ -316,7 +331,7 @@ int hexagon_disasm_instruction(HexState *state, const ut32 hi_u32, RZ_INOUT HexI ut32 iclass = (((hi_u32 >> 29) & 0xF) << 1) | ((hi_u32 >> 13) & 1); if (iclass == 0xf) { - RZ_LOG_WARN("Reserved duplex instruction class used at: 0x%" PFMT32x ".\n", addr); + RZ_LOG_INFO("Reserved duplex instruction class used at: 0x%" PFMT32x ".\n", addr); } const HexInsnTemplate *tmp_high = get_sub_template_table(iclass, true); @@ -342,9 +357,8 @@ int hexagon_disasm_instruction(HexState *state, const ut32 hi_u32, RZ_INOUT HexI hic->ana_op.eob = true; } if (hic->identifier == HEX_INS_INVALID_DECODE) { + hic->is_duplex = false; hic->ana_op.type = RZ_ANALYSIS_OP_TYPE_ILL; - HexInsn *hi = hexagon_alloc_instr(); - hic->bin.insn = hi; snprintf(hic->bin.insn->text_infix, sizeof(hic->bin.insn->text_infix), "invalid"); } hex_set_hic_text(hic); diff --git a/handwritten/hexagon_disas_c/types.c b/handwritten/hexagon_disas_c/types.c index 0e21772f..80ec4564 100644 --- a/handwritten/hexagon_disas_c/types.c +++ b/handwritten/hexagon_disas_c/types.c @@ -39,6 +39,7 @@ typedef struct { typedef struct { ut8 info; // HexOpTemplateType | HexOpTemplateFlag ut8 syntax; // offset into HexInsnTemplate.syntax where to insert this op + char isa_id; HexOpMask masks[HEX_OP_MASKS_MAX]; union { ut8 imm_scale; diff --git a/handwritten/hexagon_h/declarations.h b/handwritten/hexagon_h/declarations.h index 2fb5db6e..df80e5bf 100644 --- a/handwritten/hexagon_h/declarations.h +++ b/handwritten/hexagon_h/declarations.h @@ -5,3 +5,6 @@ RZ_API RZ_BORROW RzConfig *hexagon_get_config(); RZ_API void hex_extend_op(HexState *state, RZ_INOUT HexOp *op, const bool set_new_extender, const ut32 addr); int resolve_n_register(const int reg_num, const ut32 addr, const HexPkt *p); int hexagon_disasm_instruction(HexState *state, const ut32 hi_u32, RZ_INOUT HexInsnContainer *hi, HexPkt *pkt); +RZ_API const HexOp hex_alias_to_op(HexRegAlias alias, bool tmp_reg); +RZ_API const char *hex_alias_to_reg_name(HexRegAlias alias, bool tmp_reg); +RZ_API const HexOp hex_explicit_to_op(ut32 reg_num, HexRegClass reg_class, bool tmp_reg); diff --git a/handwritten/hexagon_h/macros.h b/handwritten/hexagon_h/macros.h index 67f77436..cd503ad3 100644 --- a/handwritten/hexagon_h/macros.h +++ b/handwritten/hexagon_h/macros.h @@ -1,8 +1,24 @@ // SPDX-FileCopyrightText: 2021 Rot127 // SPDX-License-Identifier: LGPL-3.0-only -#define BIT_MASK(len) (BIT(len)-1) -#define BF_MASK(start, len) (BIT_MASK(len)<<(start)) -#define BF_PREP(x, start, len) (((x)&BIT_MASK(len))<<(start)) -#define BF_GET(y, start, len) (((y)>>(start)) & BIT_MASK(len)) -#define BF_GETB(y, start, end) (BF_GET((y), (start), (end) - (start) + 1) +#define HEX_INSN_SIZE 4 +#define HEX_MAX_INSN_PER_PKT 4 + +#define HEX_PC_ALIGNMENT 0x4 + +#define HEX_PRED_WIDTH 8 +#define HEX_GPR_WIDTH 32 +#define HEX_GPR64_WIDTH 64 +#define HEX_CTR_WIDTH 32 +#define HEX_CTR64_WIDTH 64 + +#define HEX_INVALID_INSN_0 0x00000000 +#define HEX_INVALID_INSN_F 0xffffffff + +#define MAX_CONST_EXT 512 +#define HEXAGON_STATE_PKTS 8 +#define ARRAY_LEN(a) (sizeof(a) / sizeof((a)[0])) + +#define ALIAS2OP(alias, is_new) hex_alias_to_op(alias, is_new) +#define EXPLICIT2OP(num, class, is_new) hex_explicit_to_op(num, class, is_new) +#define NREG2OP(bundle, isa_id) hex_nreg_to_op(bundle, isa_id) \ No newline at end of file diff --git a/handwritten/hexagon_h/typedefs.h b/handwritten/hexagon_h/typedefs.h index df233e46..3d0cfb9c 100644 --- a/handwritten/hexagon_h/typedefs.h +++ b/handwritten/hexagon_h/typedefs.h @@ -1,8 +1,17 @@ // SPDX-FileCopyrightText: 2021 Rot127 // SPDX-License-Identifier: LGPL-3.0-only -#define MAX_CONST_EXT 512 -#define HEXAGON_STATE_PKTS 8 +typedef struct { + const char *name; + const char *alias; + const char *name_tmp; + const char *alias_tmp; +} HexRegNames; + +typedef struct { + ut32 /* Reg class */ cls; + ut32 /* Reg Enum */ reg_enum; +} HexRegAliasMapping; typedef enum { HEX_OP_TYPE_IMM, @@ -13,10 +22,10 @@ typedef enum { * \brief Flags to mark which kind of predicates instructions use. */ typedef enum { - HEX_NOPRED, ///< no conditional execution - HEX_PRED_TRUE, ///< if (Pd) ... - HEX_PRED_FALSE, ///< if (!Pd) ... - HEX_PRED_NEW, ///< if (Pd.new) or if (!Pd.new) + HEX_NOPRED, ///< no conditional execution + HEX_PRED_TRUE, ///< if (Pd) ... + HEX_PRED_FALSE, ///< if (!Pd) ... + HEX_PRED_NEW, ///< if (Pd.new) or if (!Pd.new) } HexPred; typedef enum { @@ -44,15 +53,55 @@ typedef struct { } HexPktInfo; typedef struct { - ut8 type; + ut8 /* HexOpType */ type; ///< Operand type: Immediate or register + ut8 class; ///< Equivalent to: HexRegClass (for registers) OR HexOpTemplateFlag (for immediate values). + char isa_id; ///< The identifier character in the ISA of this instruction: 'd' for Rdd, I for Ii etc. 0x0 if not known. union { - ut8 reg; // + additional Hi or Lo selector // + additional shift // + additional :brev // - st64 imm; - } op; - HexOpAttr attr; - ut8 shift; + ut8 reg; ///< Register number. E.g. 3 for R3 etc. + st64 imm; ///< Immediate value. + } op; ///< Actual value of the operand. + HexOpAttr attr; ///< Attributes of the operand. + ut8 shift; ///< Number of bits to shift the bits in the opcode to retrieve the operand value. } HexOp; +typedef RzILOpEffect *(*HexILOpGetter)(void /* HexInsnPktBundle */ *); + +typedef enum { + HEX_IL_INSN_ATTR_INVALID = 0, ///< Operation was not set or implemented. + HEX_IL_INSN_ATTR_NONE = 1 << 0, ///< Nothing special about this operation. + HEX_IL_INSN_ATTR_COND = 1 << 1, ///< Executes differently if a certain condition is met. + HEX_IL_INSN_ATTR_SUB = 1 << 2, ///< Operation is a sub-instruction. + HEX_IL_INSN_ATTR_BRANCH = 1 << 3, ///< Operation contains a branch. + HEX_IL_INSN_ATTR_MEM_READ = 1 << 4, ///< Operation reads from the memory. + HEX_IL_INSN_ATTR_MEM_WRITE = 1 << 5, ///< Operation writes to the memory. + HEX_IL_INSN_ATTR_NEW = 1 << 6, ///< Operation reads a .new value. + HEX_IL_INSN_ATTR_WPRED = 1 << 7, ///< Operation writes a predicate register. + HEX_IL_INSN_ATTR_WRITE_P0 = 1 << 8, ///< Writes predicate register P0 + HEX_IL_INSN_ATTR_WRITE_P1 = 1 << 9, ///< Writes predicate register P1 + HEX_IL_INSN_ATTR_WRITE_P2 = 1 << 10, ///< Writes predicate register P2 + HEX_IL_INSN_ATTR_WRITE_P3 = 1 << 11, ///< Writes predicate register P3 +} HexILInsnAttr; + +/** + * \brief Represents a single operation of an instruction. + */ +typedef struct { + HexILOpGetter get_il_op; ///< Pointer to the getter to retrieve the RzILOpEffects of this operation. + HexILInsnAttr attr; ///< Attributes to shuffle it to the correct position in the packets IL ops. + void /* HexInsn */ *hi; ///< The instruction this op belongs to. +} HexILOp; + +/** + * \brief Struct of instruction operations. Usually an instruction has only one operation + * but duplex and compound instructions can have more. + * The last op in this struct has all members set to NULL/0. + */ +typedef struct { + HexILOp op0; + HexILOp op1; + HexILOp end; +} HexILInsn; + typedef struct { bool is_sub; ///< Flag for sub-instructions. ut8 op_count; ///< The number of operands this instruction has. @@ -62,6 +111,9 @@ typedef struct { HexInsnID identifier; ///< The instruction identifier char text_infix[128]; ///< Textual disassembly of the instruction. HexOp ops[HEX_MAX_OPERANDS]; ///< The operands of the instructions. + HexILInsn il_insn; ///< RZIL instruction. These are not meant for execution! Use the packet ops for that. + ut8 slot; ///< The slot the instruction occupies. + RzFloatRMode fround_mode; ///< The float rounding mode of the instruction. } HexInsn; /** @@ -70,65 +122,122 @@ typedef struct { */ typedef struct { ut8 parse_bits; ///< Parse bits of instruction. - bool is_duplex; ///< Does this container hold two sub-instructions? - ut32 identifier; ///< Equals instruction ID if is_duplex = false. Otherwise: (high.id << 16) | (low.id & 0xffff) - union { - HexInsn *sub[2]; ///< Pointer to sub-instructions if is_duplex = true. sub[0] = high, sub[1] = low - HexInsn *insn; ///< Pointer to instruction if is_duplex = false. - } bin; - ut32 addr; ///< Address of container. Equals address of instruction or of the high sub-instruction if this is a duplex. - ut32 opcode; ///< The instruction opcode. - HexPktInfo pkt_info; ///< Packet related information. First/last instr., prefix and postfix for text etc. - // Deprecated members will be removed on RzArch introduction. - RZ_DEPRECATE RzAsmOp asm_op; ///< Private copy of AsmOp. Currently only of interest because it holds the utf8 flag. + bool is_duplex; ///< Does this container hold two sub-instructions? + ut32 identifier; ///< Equals instruction ID if is_duplex = false. Otherwise: (high.id << 16) | (low.id & 0xffff) + union { + HexInsn *sub[2]; ///< Pointer to sub-instructions if is_duplex = true. sub[0] = high, sub[1] = low + HexInsn *insn; ///< Pointer to instruction if is_duplex = false. + } bin; + ut32 addr; ///< Address of container. Equals address of instruction or of the high sub-instruction if this is a duplex. + ut32 bytes; ///< The instruction bytes. + HexPktInfo pkt_info; ///< Packet related information. First/last instr., prefix and postfix for text etc. + // Deprecated members will be removed on RzArch introduction. + RZ_DEPRECATE RzAsmOp asm_op; ///< Private copy of AsmOp. Currently only of interest because it holds the utf8 flag. RZ_DEPRECATE RzAnalysisOp ana_op; ///< Private copy of AnalysisOp. Analysis info is written into it. char text[296]; ///< Textual disassembly } HexInsnContainer; +#define HEX_LOG_SLOT_BIT_OFF 4 +#define HEX_LOG_SLOT_LOG_WIDTH 2 +#define HEX_LOG_SLOT_LOG_MASK 0b11 + +/** + * \brief Holds information about the execution of the packet. + */ +typedef struct { + RzBitVector *slot_cancelled; ///< Flags for cancelled slots. If bit at (1 << slot i) is set, slot i is cancelled. + RzBitVector *pred_read; ///< Predicate register (P0-P3) read, if flags set at (1 << reg_num) are set. + RzBitVector *pred_tmp_read; ///< Tmp predicate register (P0-P3) read, if flags set at (1 << reg_num) are set. + RzBitVector *pred_written; ///< Predicate register (P0-P3) written, if flags (3:0) are set at (1 << pred_num). + ///< The bits[11:4] are used to indicate the last slot which wrote to the predicate (2bit each). + ///< Details are necessary because, if instructions in different slots + ///< write to the same predicate, the result is ANDed. + RzBitVector *gpr_read; ///< GPR register (R0-R31) read, if flags set at (1 << reg_num) are set. + RzBitVector *gpr_tmp_read; ///< Tmp GPR register (R0-R31) read, if flags set at (1 << reg_num) are set. + RzBitVector *gpr_written; ///< GPR register (R0-R31) written, if flags set at (1 << reg_num) are set. + RzBitVector *ctr_read; ///< Control register (C0-C31) read, if flags set at (1 << reg_num) are set. + RzBitVector *ctr_tmp_read; ///< Tmp control register (C0-C31) read, if flags set at (1 << reg_num) are set. + RzBitVector *ctr_written; ///< Control register (C0-C31) written, if flags set at (1 << reg_num) are set. +} HexILExecData; + /** * \brief Represents an Hexagon instruction packet. * We do not assign instructions to slots, but the order of instructions matters nonetheless. - * The layout of a packet is: + * The layout of a real packet is: * * low addr | Slot 3 * ---------+---------- * | Slot 2 * ---------+---------- - * | Slot 1 -> High Sub-Instruction is always in Slot 1 + * | Slot 1 -> High Sub-Instruction of Duplex is always in Slot 1 * ---------+---------- - * high addr| Slot 0 -> Low Sub-Instruction is always in Slot 0 + * high addr| Slot 0 -> Low Sub-Instruction of Duplex is always in Slot 0 * * Because of this order the textual disassembly of duplex instructions is: " ; ". * Also, the high sub-instruction is located at the _lower_ memory address (aligned to 4 bytes). * The low sub-instruction at . * * This said: The HexPkt.bin holds only instruction container, no instructions! - * The container holds the instructions or sub-instructions. + * The container holds a normal instruction or two sub-instructions. */ typedef struct { - RzList /**/ *bin; ///< Descending by address sorted list of instruction containers. bool last_instr_present; ///< Has an instruction the parsing bits 0b11 set (is last instruction). bool is_valid; ///< Is it a valid packet? Do we know which instruction is the first? + bool is_eob; ///< Is this packet the end of a code block? E.g. contains unconditional jmp. + HexLoopAttr hw_loop; ///< If the packet is the end of a hardware loop, it stores here from which one.s ut32 hw_loop0_addr; ///< Start address of hardware loop 0 ut32 hw_loop1_addr; ///< Start address of hardware loop 1 - ut64 last_access; ///< Last time accessed in milliseconds ut32 pkt_addr; ///< Address of the packet. Equals the address of the first instruction. - bool is_eob; ///< Is this packet the end of a code block? E.g. contains unconditional jmp. + ut64 last_access; ///< Last time accessed in milliseconds + RzList /**/ *bin; ///< Descending by address sorted list of instruction containers. + RzPVector /**/ *il_ops; ///< Pointer to RZIL ops of the packet. If empty the il ops were not shuffled into order yet. + HexILExecData il_op_stats; ///< Meta information about the IL operations executed (register read/written etc.) } HexPkt; +/** + * \brief This struct is given to the IL getter of each instruction. + * They use it for resolving register names, alias and the like. + */ +typedef struct { + const HexInsn *insn; + HexPkt *pkt; +} HexInsnPktBundle; + typedef struct { - ut32 addr; // Address of the instruction which gets the extender applied. - ut32 const_ext; // The constant extender value. + ut32 addr; ///< Address of the instruction which gets the extender applied. + ut32 const_ext; ///< The constant extender value. } HexConstExt; +/** + * \brief Flags for the debug printing about the state packet buffer. + */ +typedef enum { + HEX_BUF_ADD = 0, ///< Instruction is added to a specific packet i. + HEX_BUF_STALE = 1, ///< Instruction is written to a stale packet (overwrites old one). + HEX_BUF_NEW = 2, ///< Instruction is written to a new packet (overwrites old one). +} HexBufferAction; + /** * \brief Buffer packets for reversed instructions. - * */ typedef struct { - HexPkt pkts[HEXAGON_STATE_PKTS]; // buffered instructions - RzList /**/ *const_ext_l; // Constant extender values. + bool just_init; ///< Flag indicates if IL VM was just initialized. + HexPkt pkts[HEXAGON_STATE_PKTS]; // buffered instructions + RzList /**/ *const_ext_l; // Constant extender values. RzAsm rz_asm; // Copy of RzAsm struct. Holds certain flags of interesed for disassembly formatting. RzConfig *cfg; RzPVector /**/ *token_patterns; ///< PVector with token patterns. Priority ordered. } HexState; + +/** + * \brief Register fields of different registers. + */ +typedef enum { + HEX_REG_FIELD_USR_LPCFG, ///< The LPCFG field of the USR register + HEX_REG_FIELD_USR_OVF, ///< The OVF field of the USR register +} HexRegField; + +typedef enum { + HEX_RF_WIDTH, + HEX_RF_OFFSET, +} HexRegFieldProperty; diff --git a/handwritten/hexagon_il_X_ops_c/excludes.h b/handwritten/hexagon_il_X_ops_c/excludes.h new file mode 100644 index 00000000..26f7ff01 --- /dev/null +++ b/handwritten/hexagon_il_X_ops_c/excludes.h @@ -0,0 +1,4 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +#include \ No newline at end of file diff --git a/handwritten/hexagon_il_X_ops_c/includes.h b/handwritten/hexagon_il_X_ops_c/includes.h new file mode 100644 index 00000000..4daa5185 --- /dev/null +++ b/handwritten/hexagon_il_X_ops_c/includes.h @@ -0,0 +1,7 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +#include +#include "../hexagon_il.h" +#include +#include diff --git a/handwritten/hexagon_il_X_ops_c/non_insn_ops.c b/handwritten/hexagon_il_X_ops_c/non_insn_ops.c new file mode 100644 index 00000000..eff739cf --- /dev/null +++ b/handwritten/hexagon_il_X_ops_c/non_insn_ops.c @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +/** + * \brief Returns the value of an register field property. + * + * \param property The property to get the value for. + * \param field The register field. + * \return RzILOpPure The value as integer as U32 or U32_MAX on failure. + */ +RZ_IPI RZ_OWN RzILOpPure *hex_get_rf_property_val(const HexRegFieldProperty property, const HexRegField field) { + RzILOpPure *r = NULL; + switch (field) { + default: + RZ_LOG_WARN("Register field not implemented.\n"); + break; + case HEX_REG_FIELD_USR_LPCFG: + if (property == HEX_RF_WIDTH) { + r = U32(2); + } else if (property == HEX_RF_OFFSET) { + r = U32(8); + } + break; + case HEX_REG_FIELD_USR_OVF: + if (property == HEX_RF_WIDTH) { + r = U32(1); + } else if (property == HEX_RF_OFFSET) { + r = U32(0); + } + break; + } + return r; +} + +/** + * \brief Returns the next PC as pure. + * + * \param pkt The instruction packet. + * \return RzILOpPure* The next PC as pure. + */ +RZ_IPI RZ_OWN RzILOpEffect *hex_get_npc(const HexPkt *pkt) { + rz_return_val_if_fail(pkt, NULL); + RzILOpPure *r; + r = U64(pkt->pkt_addr + (rz_list_length(pkt->bin) * HEX_INSN_SIZE)); + return SETL("ret_val", r); +} + +RZ_IPI RZ_OWN RzILOpEffect *hex_commit_packet(HexInsnPktBundle *bundle) { + HexILExecData *stats = &bundle->pkt->il_op_stats; + RzILOpEffect *commit_seq = EMPTY(); + for (ut8 i = 0; i <= HEX_REG_CTR_REGS_C31; ++i) { + if (!(rz_bv_get(stats->ctr_written, i))) { + continue; + } + const char *dest_reg = hex_get_reg_in_class(HEX_REG_CLASS_CTR_REGS, i, false, false, false); + const char *src_reg = hex_get_reg_in_class(HEX_REG_CLASS_CTR_REGS, i, false, true, false); + commit_seq = SEQ2(commit_seq, SETG(dest_reg, VARG(src_reg))); + } + + for (ut8 i = 0; i <= HEX_REG_INT_REGS_R31; ++i) { + if (!(rz_bv_get(stats->gpr_written, i))) { + continue; + } + const char *dest_reg = hex_get_reg_in_class(HEX_REG_CLASS_INT_REGS, i, false, false, false); + const char *src_reg = hex_get_reg_in_class(HEX_REG_CLASS_INT_REGS, i, false, true, false); + commit_seq = SEQ2(commit_seq, SETG(dest_reg, VARG(src_reg))); + } + + for (ut8 i = 0; i <= HEX_REG_PRED_REGS_P3; ++i) { + if (!(rz_bv_get(stats->pred_written, i))) { + continue; + } + const char *dest_reg = hex_get_reg_in_class(HEX_REG_CLASS_PRED_REGS, i, false, false, false); + const char *src_reg = hex_get_reg_in_class(HEX_REG_CLASS_PRED_REGS, i, false, true, false); + commit_seq = SEQ2(commit_seq, SETG(dest_reg, VARG(src_reg))); + } + + hex_reset_il_pkt_stats(stats); + return commit_seq; +} + +RZ_IPI RZ_OWN RzILOpEffect *hex_il_op_jump_flag_init(HexInsnPktBundle *bundle) { + return SETL("jump_flag", IL_FALSE); +} + +RZ_IPI RZ_OWN RzILOpEffect *hex_il_op_next_pkt_jmp(HexInsnPktBundle *bundle) { + return BRANCH(VARL("jump_flag"), JMP(VARL("jump_target")), JMP(U32(bundle->pkt->pkt_addr + (HEX_INSN_SIZE * rz_list_length(bundle->pkt->bin))))); +} diff --git a/handwritten/hexagon_il_c/exclude.c b/handwritten/hexagon_il_c/exclude.c new file mode 100644 index 00000000..03294683 --- /dev/null +++ b/handwritten/hexagon_il_c/exclude.c @@ -0,0 +1,4 @@ +// SPDX-FileCopyrightText: 2021 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +#include diff --git a/handwritten/hexagon_il_c/functions.c b/handwritten/hexagon_il_c/functions.c new file mode 100644 index 00000000..c580f4cb --- /dev/null +++ b/handwritten/hexagon_il_c/functions.c @@ -0,0 +1,859 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +static HexILOp hex_jump_flag_init_op = { + .attr = HEX_IL_INSN_ATTR_NONE, + .get_il_op = (HexILOpGetter)hex_il_op_jump_flag_init, +}; + +static HexILOp hex_next_jump_to_next_pkt = { + .attr = HEX_IL_INSN_ATTR_BRANCH | HEX_IL_INSN_ATTR_COND, + .get_il_op = (HexILOpGetter)hex_il_op_next_pkt_jmp, +}; + +static HexILOp hex_pkt_commit = { + .attr = HEX_IL_INSN_ATTR_NONE, + .get_il_op = (HexILOpGetter)hex_commit_packet, +}; + +static HexILOp hex_endloop0_op = { + .attr = HEX_IL_INSN_ATTR_BRANCH | HEX_IL_INSN_ATTR_COND, + .get_il_op = (HexILOpGetter)hex_il_op_j2_endloop0, +}; + +static HexILOp hex_endloop1_op = { + .attr = HEX_IL_INSN_ATTR_BRANCH | HEX_IL_INSN_ATTR_COND, + .get_il_op = (HexILOpGetter)hex_il_op_j2_endloop1, +}; + +static HexILOp hex_endloop01_op = { + .attr = HEX_IL_INSN_ATTR_BRANCH | HEX_IL_INSN_ATTR_COND, + .get_il_op = (HexILOpGetter)hex_il_op_j2_endloop01, +}; + +/** + * \brief Sends the IL op at \p start to the position \p newloc. + * + * Note: THis is a copy of the same function implemented by Qualcomm in QEMU. + * See: https://gitlab.com/qemu-project/qemu/-/blob/master/target/hexagon/decode.c :: decode_send_insn_to + * + * \param ops The IL ops list. + * \param start Index of the op to move. + * \param newloc Position the op shall be moved to. + */ +static void hex_send_insn_to_i(RzPVector /**/ *ops, ut8 start, ut8 newloc) { + rz_return_if_fail(ops); + + st32 direction; + st32 i; + if (start == newloc) { + return; + } + if (start < newloc) { + /* Move towards end */ + direction = 1; + } else { + /* move towards beginning */ + direction = -1; + } + for (i = start; i != newloc; i += direction) { + HexILOp *tmp_op = *rz_pvector_assign_at(ops, i, (HexILOp *)rz_pvector_at(ops, i + direction)); + rz_pvector_assign_at(ops, i + direction, tmp_op); + } +} + +/** + * \brief Shuffles the IL operations of the packet instructions into the correct execution order + * and stores the result in \p p->il_ops + * + * The shuffle algorithm implemented here is a copy of Qualcomm's implementation in QEMU: + * https://gitlab.com/qemu-project/qemu/-/blob/master/target/hexagon/decode.c :: decode_shuffle_for_execution + * + * Though some changes were made: + * * Endloops are not handled here (they are pushed to the ops list afterwards). + * * ".new cmp jump" instructions were already split by us at this stage. So we don't check for them. + * + * \param p A valid packet which holds all instructions and the IL ops. + * \return true Shuffle was successful. + * \return false Shuffle failed. + */ +RZ_IPI bool hex_shuffle_insns(RZ_INOUT HexPkt *p) { + rz_return_val_if_fail(p, false); + if (!p->is_valid) { + // Incomplete packets cannot be executed. + return false; + } + if (rz_pvector_empty(p->il_ops)) { + RZ_LOG_WARN("Valid packet without RZIL instructions encountered! pkt addr = 0x%" PFMT32x "\n", p->pkt_addr); + return false; + } + RzPVector *ops = p->il_ops; + + // Do the shuffle + bool changed = false; + int i; + bool flag; /* flag means we've seen a non-memory instruction */ + int n_mems; /* Number of memory instructions passed */ + int last_insn = rz_pvector_len(p->il_ops) - 1; + HexILOp *op; + + do { + changed = false; + /* + * Stores go last, must not reorder. + * Cannot shuffle stores past loads, either. + * Iterate backwards. If we see a non-memory instruction, + * then a store, shuffle the store to the front. Don't shuffle + * stores with regard to each other or a load. + */ + n_mems = 0; + flag = false; + for (flag = false, n_mems = 0, i = last_insn; i >= 0; i--) { + op = (HexILOp *)rz_pvector_at(ops, i); + if (!op) { + RZ_LOG_FATAL("NULL il op at index %" PFMT32d "\n", i); + } + if (flag && (op->attr & HEX_IL_INSN_ATTR_MEM_WRITE)) { + hex_send_insn_to_i(ops, i, last_insn - n_mems); + n_mems++; + changed = true; + } else if (op->attr & HEX_IL_INSN_ATTR_MEM_WRITE) { + n_mems++; + } else if (op->attr & HEX_IL_INSN_ATTR_MEM_READ) { + /* + * Don't set flag, since we don't want to shuffle a + * store past a load + */ + n_mems++; + } else if (op->attr & HEX_IL_INSN_ATTR_NEW) { + /* + * Don't set flag, since we don't want to shuffle past + * a .new value + */ + } else { + flag = true; + } + } + if (changed) { + continue; + } + + /* Comparisons go first, may be reordered with regard to each other */ + for (flag = false, i = 0; i < last_insn + 1; i++) { + op = (HexILOp *)rz_pvector_at(ops, i); + if ((op->attr & HEX_IL_INSN_ATTR_WPRED) && + (op->attr & HEX_IL_INSN_ATTR_MEM_WRITE)) { + /* This should be a comparison (not a store conditional) */ + if (flag) { + hex_send_insn_to_i(ops, i, 0); + changed = true; + continue; + } + } else if (op->attr & HEX_IL_INSN_ATTR_WRITE_P3) /* && !is_endloop */ { + // Endloops get pushed afterwards. + if (flag) { + hex_send_insn_to_i(ops, i, 0); + changed = true; + continue; + } + } else if (op->attr & HEX_IL_INSN_ATTR_WRITE_P0) /* && !is_new_cmp_jmp */ { + // We have already split .new cmp jumps at this point. So no need to check for it. + if (flag) { + hex_send_insn_to_i(ops, i, 0); + changed = true; + continue; + } + } else { + flag = true; + } + } + if (changed) { + continue; + } + } while (changed); + + /* + * If we have a .new register compare/branch, move that to the very + * very end, past stores + */ + for (i = 0; i < last_insn; i++) { + op = (HexILOp *)rz_pvector_at(ops, i); + if (op->attr & HEX_IL_INSN_ATTR_NEW) { + hex_send_insn_to_i(ops, i, last_insn); + break; + } + } + return true; +} + +static RzILOpEffect *hex_il_op_to_effect(const HexILOp *il_op, HexPkt *pkt) { + rz_return_val_if_fail(il_op && il_op->get_il_op, NULL); + HexInsnPktBundle bundle = { 0 }; + bundle.insn = (HexInsn *)il_op->hi; + bundle.pkt = pkt; + return il_op->get_il_op(&bundle); +} + +/** + * \brief Transforms a list of HexILOps into a single sequence. + * + * \param pkt The hexagon packet of the + * \return RzILOpEffect* Sequence of operations to emulate the packet. + */ +static RZ_OWN RzILOpEffect *hex_pkt_to_il_seq(HexPkt *pkt) { + rz_return_val_if_fail(pkt && pkt->il_ops, NULL); + + if (rz_pvector_len(pkt->il_ops) == 1) { + rz_pvector_clear(pkt->il_ops); + // We need at least the instruction op and the packet commit. + // So if there aren't at least two ops something went wrong. + RZ_LOG_WARN("Invalid il ops sequence! There should be at least two il ops per packet.\n"); + return NULL; + } + RzILOpEffect *complete_seq = EMPTY(); + for (ut32 i = 0; i < rz_pvector_len(pkt->il_ops); ++i) { + complete_seq = SEQ2(complete_seq, hex_il_op_to_effect((HexILOp *)rz_pvector_at(pkt->il_ops, i), pkt)); + } + return complete_seq; +} + +static bool set_pkt_il_ops(RZ_INOUT HexPkt *p) { + rz_return_val_if_fail(p, false); + hex_reset_il_pkt_stats(&p->il_op_stats); + // This function is a lot of unnecessary overhead so: + // TODO The assignment of IL instructions to their actual instructions should be done in the instruction template. + // But with the current separation between Asm and Analysis plugins this is not possible. + // Because Asm is not allowed to depend on Analysis and the RZIL code. + // This should be fixed ASAP after RzArch has been introduced. + HexInsnContainer *pos; + RzListIter *it; + rz_list_foreach (p->bin, it, pos) { + HexILInsn *cur_il_insn; + if (pos->is_duplex) { + // High sub-instructions + pos->bin.sub[0]->il_insn = hex_il_getter_lt[pos->bin.sub[0]->identifier]; + cur_il_insn = &pos->bin.sub[0]->il_insn; + // high sub operation 0 + cur_il_insn->op0.hi = pos->bin.sub[0]; + if (cur_il_insn->op0.attr == HEX_IL_INSN_ATTR_INVALID) { + goto not_impl; + } + rz_pvector_push(p->il_ops, &cur_il_insn->op0); + + // high sub operation 1 + if (cur_il_insn->op1.attr != HEX_IL_INSN_ATTR_INVALID) { + cur_il_insn->op1.hi = pos->bin.sub[0]; + rz_pvector_push(p->il_ops, &cur_il_insn->op1); + } + + // Low sub-instructions + pos->bin.sub[1]->il_insn = hex_il_getter_lt[pos->bin.sub[1]->identifier]; + cur_il_insn = &pos->bin.sub[1]->il_insn; + // low sub operation 0 + cur_il_insn->op0.hi = pos->bin.sub[1]; + if (cur_il_insn->op0.attr == HEX_IL_INSN_ATTR_INVALID) { + goto not_impl; + } + rz_pvector_push(p->il_ops, &cur_il_insn->op0); + + // low sub operation 1 + if (cur_il_insn->op1.attr != HEX_IL_INSN_ATTR_INVALID) { + pos->bin.sub[1]->il_insn.op1.hi = pos->bin.sub[1]; + rz_pvector_push(p->il_ops, &cur_il_insn->op1); + } + } else { + pos->bin.insn->il_insn = hex_il_getter_lt[pos->bin.insn->identifier]; + cur_il_insn = &pos->bin.insn->il_insn; + // Insn operation 0 + cur_il_insn->op0.hi = pos->bin.insn; + if (cur_il_insn->op0.attr == HEX_IL_INSN_ATTR_INVALID) { + goto not_impl; + } + rz_pvector_push(p->il_ops, &cur_il_insn->op0); + // Insn operation 1 + if (cur_il_insn->op1.attr != HEX_IL_INSN_ATTR_INVALID) { + cur_il_insn->op1.hi = pos->bin.insn; + rz_pvector_push(p->il_ops, &cur_il_insn->op1); + } + } + } + return true; +not_impl: + RZ_LOG_INFO("Hexagon instruction %" PFMT32d " not implemented.\n", pos->bin.insn->identifier); + return false; +} + +static void check_for_jumps(const HexPkt *p, RZ_OUT bool *jump_flag) { + rz_return_if_fail(p && jump_flag); + void **it; + HexILOp *op; + rz_pvector_foreach (p->il_ops, it) { + op = *it; + if (op->attr & HEX_IL_INSN_ATTR_BRANCH) { + *jump_flag = true; + } + } +} + +/** + * \brief Checks if the packet at \p addr has all conditions fulfilled + * to be executed. + * + * \param pkt The packet to check. + * \param addr Address of the requested IL operation. + * + * \return true If the packet can be set up for emulation. + * \return false Otherwise. + */ +static inline bool pkt_at_addr_is_emu_ready(const HexPkt *pkt, const ut32 addr) { + if (rz_list_length(pkt->bin) == 1) { + const HexInsnContainer *hic = rz_list_get_n(pkt->bin, 0); + if (hic->identifier == HEX_INS_INVALID_DECODE) { + return false; + } + } + return addr == pkt->pkt_addr && pkt->is_valid && pkt->last_instr_present; +} + +/** + * \brief Returns the IL operation of the instruction at \p addr. This will always be EMPTY(). + * Except for last instructions in a packet. Those will always return the complete IL operation + * of the packet or NULL if one instruction was not implemented or an error occurred. + * + * \param addr Address of the requested IL operation. + * \param get_pkt_op If true, it returns the IL operation of the whole packet at \p addr. + * It will return EMPTY() if there is no packet which starts at \p addr. + * If false, the behavior is as documented above. + * \return RzILOpEffect* Sequence of operations to emulate the packet. + */ +RZ_IPI RzILOpEffect *hex_get_il_op(const ut32 addr, const bool get_pkt_op) { + static bool might_has_jumped = false; + HexState *state = hexagon_state(false); + if (!state) { + RZ_LOG_WARN("Failed to get hexagon plugin state data!\n"); + return NULL; + } + HexPkt *p = hex_get_pkt(state, addr); + if (!p) { + RZ_LOG_WARN("Packet was NULL although it should have been disassembled at this point.\n"); + return NULL; + } + HexInsnContainer *hic = hex_get_hic_at_addr(state, addr); + if (!hic) { + return EMPTY(); + } + if (hic->identifier == HEX_INS_INVALID_DECODE) { + return NULL; + } + if (state->just_init || might_has_jumped) { + // Assume that the instruction at the address the VM was initialized is the first instruction. + // Also make it valid if a jump let to this packet. + p->is_valid = true; + hic->pkt_info.first_insn = true; + state->just_init = false; + might_has_jumped = false; + } + + if (!get_pkt_op && !hic->pkt_info.last_insn) { + // Only at the last instruction we execute all il ops of the packet. + return EMPTY(); + } + + if (!(get_pkt_op && pkt_at_addr_is_emu_ready(p, addr)) || !pkt_at_addr_is_emu_ready(p, p->pkt_addr)) { + // Invalid packet, EMPTY() + return EMPTY(); + } + + if (!rz_pvector_empty(p->il_ops)) { + check_for_jumps(p, &might_has_jumped); + return hex_pkt_to_il_seq(p); + } + + rz_pvector_push(p->il_ops, &hex_jump_flag_init_op); + + if (!set_pkt_il_ops(p)) { + RZ_LOG_INFO("IL ops at 0x%" PFMT32x " contain not implemented instructions.\n", addr); + return NULL; + } + + if (!hex_shuffle_insns(p)) { + RZ_LOG_WARN("Instruction shuffle failed at 0x%" PFMT32x "\n", addr); + return NULL; + } + + if (hex_get_loop_flag(p) == HEX_LOOP_0) { + rz_pvector_push(p->il_ops, &hex_endloop0_op); + } else if (hex_get_loop_flag(p) == HEX_LOOP_1) { + rz_pvector_push(p->il_ops, &hex_endloop1_op); + } else if (hex_get_loop_flag(p) == HEX_LOOP_01) { + rz_pvector_push(p->il_ops, &hex_endloop01_op); + } + + rz_pvector_push(p->il_ops, &hex_pkt_commit); + // Add a jump to the next packet. This always has to come last. + rz_pvector_push(p->il_ops, &hex_next_jump_to_next_pkt); + + + check_for_jumps(p, &might_has_jumped); + + return hex_pkt_to_il_seq(p); +} + +static void log_reg_read(RZ_BORROW HexPkt *pkt, ut8 reg_num, HexRegClass reg_class, bool tmp_reg) { + rz_return_if_fail(pkt); + if (reg_num > 63 || (reg_class == HEX_REG_CLASS_PRED_REGS && reg_num > 3)) { + rz_warn_if_reached(); + RZ_LOG_WARN("Register number %d should not be greater then 63 (gprs) or 3 (predicates).", reg_num); + } + switch (reg_class) { + default: + rz_warn_if_reached(); + RZ_LOG_WARN("Register reads of register class %d are not yet tracked!", reg_class); + break; + case HEX_REG_CLASS_DOUBLE_REGS: + case HEX_REG_CLASS_GENERAL_DOUBLE_LOW8_REGS: + if (tmp_reg) { + rz_bv_set(pkt->il_op_stats.gpr_tmp_read, (reg_num + 1), true); + } else { + rz_bv_set(pkt->il_op_stats.gpr_read, (reg_num + 1), true); + } + // fallthrough + case HEX_REG_CLASS_INT_REGS: + case HEX_REG_CLASS_INT_REGS_LOW8: + case HEX_REG_CLASS_GENERAL_SUB_REGS: + if (tmp_reg) { + rz_bv_set(pkt->il_op_stats.gpr_tmp_read, reg_num, true); + } else { + rz_bv_set(pkt->il_op_stats.gpr_read, reg_num, true); + } + break; + case HEX_REG_CLASS_CTR_REGS64: + if (tmp_reg) { + rz_bv_set(pkt->il_op_stats.ctr_tmp_read, (reg_num + 1), true); + } else { + rz_bv_set(pkt->il_op_stats.ctr_read, (reg_num + 1), true); + } + // fallthrough + case HEX_REG_CLASS_MOD_REGS: + case HEX_REG_CLASS_CTR_REGS: + if (tmp_reg) { + rz_bv_set(pkt->il_op_stats.ctr_tmp_read, reg_num, true); + } else { + rz_bv_set(pkt->il_op_stats.ctr_read, reg_num, true); + } + break; + case HEX_REG_CLASS_PRED_REGS: + if (tmp_reg) { + rz_bv_set(pkt->il_op_stats.pred_tmp_read, reg_num, true); + } else { + rz_bv_set(pkt->il_op_stats.pred_read, reg_num, true); + } + break; + } +} + +static inline void log_pred_write_slot(HexInsnPktBundle *bundle, ut32 pred_num) { + ut32 pos = (pred_num * HEX_LOG_SLOT_LOG_WIDTH); + rz_bv_set_range(bundle->pkt->il_op_stats.pred_written, HEX_LOG_SLOT_BIT_OFF + pos, HEX_LOG_SLOT_BIT_OFF + pos + 2, false); + rz_bv_set(bundle->pkt->il_op_stats.pred_written, bundle->insn->slot + HEX_LOG_SLOT_BIT_OFF + pos, true); +} + +static void log_reg_write(RZ_BORROW HexInsnPktBundle *bundle, ut8 reg_num, HexRegClass reg_class, bool read, bool tmp_reg) { + rz_return_if_fail(bundle); + HexPkt *pkt = bundle->pkt; + + if (reg_num > 63 || (reg_class == HEX_REG_CLASS_PRED_REGS && reg_num > 3)) { + rz_warn_if_reached(); + RZ_LOG_WARN("Register number %d should not be greater then 63 (gprs) or 3 (predicates).", reg_num); + } + switch (reg_class) { + default: + rz_warn_if_reached(); + RZ_LOG_WARN("Register writes of register class %d are not yet tracked!", reg_class); + break; + case HEX_REG_CLASS_DOUBLE_REGS: + case HEX_REG_CLASS_GENERAL_DOUBLE_LOW8_REGS: + rz_bv_set(pkt->il_op_stats.gpr_written, (reg_num + 1), true); + // fallthrough + case HEX_REG_CLASS_INT_REGS: + case HEX_REG_CLASS_INT_REGS_LOW8: + case HEX_REG_CLASS_GENERAL_SUB_REGS: + rz_bv_set(pkt->il_op_stats.gpr_written, reg_num, true); + break; + case HEX_REG_CLASS_CTR_REGS64: + if (hex_ctr_immut_masks[reg_num + 1] != HEX_IMMUTABLE_REG) { + rz_bv_set(pkt->il_op_stats.ctr_written, (reg_num + 1), true); + } + // fallthrough + case HEX_REG_CLASS_MOD_REGS: + case HEX_REG_CLASS_CTR_REGS: + if (hex_ctr_immut_masks[reg_num] != HEX_IMMUTABLE_REG) { + rz_bv_set(pkt->il_op_stats.ctr_written, reg_num, true); + } + break; + case HEX_REG_CLASS_PRED_REGS: + rz_bv_set(pkt->il_op_stats.pred_written, reg_num, true); + if (bundle->insn) { + log_pred_write_slot(bundle, reg_num); + } + break; + } +} + +static ut32 get_last_slot_w_to_p(const HexPkt *pkt, ut32 pred_num) { + rz_return_val_if_fail(pkt, false); + ut32 slots = (rz_bv_to_ut32(pkt->il_op_stats.pred_written) >> HEX_LOG_SLOT_BIT_OFF); + return (slots >> (pred_num * HEX_LOG_SLOT_LOG_WIDTH)) & HEX_LOG_SLOT_LOG_MASK; +} + +/** + * \brief Checks if another slot wrote to a given predicate reg before. + * + * \param bundle The bundle currently in use. + * \param pred_num The number of the predicate register to check. + * + * \return true The predicate was written before by another slot. + * \return false The predicate was not written by another slot. + */ +static bool other_slot_wrote_to_pred(const HexInsnPktBundle *bundle, ut32 pred_num) { + rz_return_val_if_fail(bundle && bundle->pkt && (pred_num < 4), false); + const HexPkt *pkt = bundle->pkt; + if (!bundle->insn) { + // Non instruction ops + return rz_bv_get(pkt->il_op_stats.pred_written, 1 << pred_num); + } + bool pw = rz_bv_get(pkt->il_op_stats.pred_written, 1 << pred_num); + bool slot_w = get_last_slot_w_to_p(bundle->pkt, pred_num) != bundle->insn->slot; + return pw && slot_w; +} + +static inline RzILOpPure *get_masked_reg_val(RzILOpPure *reg_val, RzILOpPure *val, ut32 mask) { + RzILOpPure *masked_val = LOGAND(val, LOGNOT(U32(mask))); + RzILOpPure *masked_reg = LOGAND(reg_val, U32(mask)); + return LOGOR(masked_reg, masked_val); +} + +/** + * \brief Writes the given value to the register specified in \p op and logs the write. + * If the register is a double register, each of its sub-registers are written separately. + * The double register itself will *not* be written. + * + * \param pkt The currently executed packet. + * \param op The HexOp of the register to write. + * \param val The value to write. + * + * \return The effect which writes the register or NULL in case of failure. + */ +RZ_IPI RZ_OWN RzILOpEffect *hex_write_reg(RZ_BORROW HexInsnPktBundle *bundle, const HexOp *op, RzILOpPure *val) { + rz_return_val_if_fail(bundle && op && val, NULL); + + const char *high_name = NULL; + const char *low_name = NULL; + RzILOpPure *high_val = NULL; + RzILOpPure *low_val = NULL; + RzILOpEffect *p3_0_write_seq = NULL; // If C4 (P3:0) is written this is non-NULL. + ut32 reg_num = hex_resolve_reg_enum_id(op->class, op->op.reg); + ut32 dest_width = HEX_GPR_WIDTH; + switch (op->class) { + default: + rz_warn_if_reached(); + RZ_LOG_WARN("Writing ops of class %d is not implemented yet.", op->class); + return NULL; + case HEX_REG_CLASS_DOUBLE_REGS: + case HEX_REG_CLASS_GENERAL_DOUBLE_LOW8_REGS: + high_name = hex_get_reg_in_class(HEX_REG_CLASS_INT_REGS, reg_num + 1, false, true, true); + if (!high_name) { + return NULL; + } + high_val = SHIFTR0(DUP(val), U8(HEX_GPR_WIDTH)); + // fallthrough + case HEX_REG_CLASS_INT_REGS: + case HEX_REG_CLASS_INT_REGS_LOW8: + case HEX_REG_CLASS_GENERAL_SUB_REGS: + low_name = hex_get_reg_in_class(HEX_REG_CLASS_INT_REGS, reg_num, false, true, true); + if (!low_name) { + return NULL; + } + low_val = CAST(HEX_GPR_WIDTH, IL_FALSE, val); + break; + case HEX_REG_CLASS_CTR_REGS64: + if (hex_ctr_immut_masks[reg_num + 1] != HEX_IMMUTABLE_REG) { + high_name = hex_get_reg_in_class(HEX_REG_CLASS_CTR_REGS, reg_num + 1, false, true, true); + if (!high_name) { + return NULL; + } + high_val = SHIFTR0(DUP(val), U8(HEX_GPR_WIDTH)); + if (hex_ctr_immut_masks[reg_num + 1] != 0) { + high_val = get_masked_reg_val(VARG(high_name), CAST(HEX_GPR_WIDTH, IL_FALSE, high_val), hex_ctr_immut_masks[reg_num + 1]); + } + } + // fallthrough + case HEX_REG_CLASS_MOD_REGS: + case HEX_REG_CLASS_CTR_REGS: + if (hex_ctr_immut_masks[reg_num] != HEX_IMMUTABLE_REG) { + low_name = hex_get_reg_in_class(HEX_REG_CLASS_CTR_REGS, reg_num, false, true, true); + if (!low_name) { + return NULL; + } + low_val = CAST(HEX_GPR_WIDTH, IL_FALSE, val); + if (hex_ctr_immut_masks[reg_num] != 0) { + low_val = get_masked_reg_val(VARG(low_name), low_val, hex_ctr_immut_masks[reg_num]); + } + if (reg_num == 4) { + HexOp pred_op = { 0 }; + pred_op.class = HEX_REG_CLASS_PRED_REGS; + pred_op.op.reg = 0; + p3_0_write_seq = hex_write_reg(bundle, &pred_op, CAST(8, IL_FALSE, DUP(low_val))); + pred_op.op.reg = 1; + p3_0_write_seq = SEQ2(hex_write_reg(bundle, &pred_op, CAST(8, IL_FALSE, SHIFTR0(DUP(low_val), U8(8)))), p3_0_write_seq); + pred_op.op.reg = 2; + p3_0_write_seq = SEQ2(hex_write_reg(bundle, &pred_op, CAST(8, IL_FALSE, SHIFTR0(DUP(low_val), U8(16)))), p3_0_write_seq); + pred_op.op.reg = 3; + p3_0_write_seq = SEQ2(hex_write_reg(bundle, &pred_op, CAST(8, IL_FALSE, SHIFTR0(DUP(low_val), U8(24)))), p3_0_write_seq); + break; + } + } + break; + case HEX_REG_CLASS_PRED_REGS: + low_name = hex_get_reg_in_class(HEX_REG_CLASS_PRED_REGS, reg_num, false, true, true); + if (!low_name) { + return NULL; + } + if (other_slot_wrote_to_pred(bundle, reg_num)) { + // If the register was written before by another slot, the values get ANDed. + low_val = LOGAND(VARG(low_name), val); + } else { + low_val = val; + } + dest_width = HEX_PRED_WIDTH; + break; + } + RzILOpEffect *write_high = high_val ? SETG(high_name, CAST(dest_width, IL_FALSE, high_val)) : NULL; + RzILOpEffect *write_low = low_val ? SETG(low_name, CAST(dest_width, IL_FALSE, low_val)) : NULL; + if (p3_0_write_seq) { + write_low = SEQ2(write_low, p3_0_write_seq); + } + log_reg_write(bundle, reg_num, op->class, false, true); + + if (write_high && write_low) { + return SEQ2(write_low, write_high); + } else if (write_low) { + return write_low; + } else if (write_high) { + return write_high; + } + return EMPTY(); +} + +static inline bool read_cond_faulty(RzILOpPure *low_val, RzILOpPure *high_val, ut32 val_width) { + if (!low_val || val_width == 0 || (val_width % 8 != 0)) { + return true; + } + if (val_width == HEX_GPR64_WIDTH && !high_val) { + return true; + } + return false; +} + +/** + * \brief Checks for rw registers (e.g. Rx) if reads and writes overlap. + * + * \param pkt The packet of the current instruction. + * \param op The operand to check. + * \param reg_num The number of the register to check. + * + * \return true If the register is a "x" register and it was read and written before. + * \return false Otherwise. + */ +static bool x_reg_rw_overlap(const HexPkt *pkt, const HexOp *op, ut32 reg_num) { + switch (op->class) { + default: + rz_warn_if_reached(); + RZ_LOG_WARN("Checking rw overlap of class %d not implemented yet.", op->class); + return false; + case HEX_REG_CLASS_INT_REGS: + case HEX_REG_CLASS_INT_REGS_LOW8: + case HEX_REG_CLASS_GENERAL_SUB_REGS: + case HEX_REG_CLASS_DOUBLE_REGS: + case HEX_REG_CLASS_GENERAL_DOUBLE_LOW8_REGS: + return (rz_bv_get(pkt->il_op_stats.gpr_written, reg_num)) && (rz_bv_get(pkt->il_op_stats.gpr_read, reg_num)) && op->isa_id == 'x'; + case HEX_REG_CLASS_MOD_REGS: + case HEX_REG_CLASS_CTR_REGS: + case HEX_REG_CLASS_CTR_REGS64: + return (rz_bv_get(pkt->il_op_stats.ctr_written, reg_num)) && (rz_bv_get(pkt->il_op_stats.ctr_read, reg_num)) && op->isa_id == 'x'; + case HEX_REG_CLASS_PRED_REGS: + return (rz_bv_get(pkt->il_op_stats.pred_written, reg_num)) && (rz_bv_get(pkt->il_op_stats.pred_read, reg_num)) && op->isa_id == 'x'; + } +} + +/** + * \brief Reads a value from the register specified in \p op and logs the read. + * If the register is a double register, each of its sub-registers are read separately. + * The double register itself will *not* be read. + * + * \param pkt The currently executed packet. + * \param op The HexOp of the register to read. + * \param tmp_reg If true, the .new register will be read. Otherwise simply . + * + * \return The pure which with the value read or NULL in case of failure. + */ +RZ_IPI RZ_OWN RzILOpPure *hex_read_reg(RZ_BORROW HexPkt *pkt, const HexOp *op, bool tmp_reg) { + rz_return_val_if_fail(pkt && op, NULL); + + const char *high_name = NULL; + const char *low_name = NULL; + RzILOpPure *high_val = NULL; + RzILOpPure *low_val = NULL; + ut32 reg_num = hex_resolve_reg_enum_id(op->class, op->op.reg); + ut32 val_width = HEX_GPR_WIDTH; + switch (op->class) { + default: + rz_warn_if_reached(); + RZ_LOG_WARN("Writing ops of class %d is not implemented yet.", op->class); + return NULL; + case HEX_REG_CLASS_DOUBLE_REGS: + case HEX_REG_CLASS_GENERAL_DOUBLE_LOW8_REGS: + if (x_reg_rw_overlap(pkt, op, reg_num + 1)) { + // If read and writes overlap, return the new register for each read. + tmp_reg = true; + } + high_name = hex_get_reg_in_class(HEX_REG_CLASS_INT_REGS, reg_num + 1, false, tmp_reg, true); + if (!high_name) { + return NULL; + } + high_val = SHIFTL0(CAST(HEX_GPR64_WIDTH, IL_FALSE, VARG(high_name)), U8(HEX_GPR_WIDTH)); + val_width = HEX_GPR64_WIDTH; + // fallthrough + case HEX_REG_CLASS_INT_REGS: + case HEX_REG_CLASS_INT_REGS_LOW8: + case HEX_REG_CLASS_GENERAL_SUB_REGS: + if (x_reg_rw_overlap(pkt, op, reg_num)) { + // If read and writes overlap, return the new register for each read. + tmp_reg = true; + } + low_name = hex_get_reg_in_class(HEX_REG_CLASS_INT_REGS, reg_num, false, tmp_reg, true); + if (!low_name) { + return NULL; + } + low_val = VARG(low_name); + break; + case HEX_REG_CLASS_CTR_REGS64: + if (x_reg_rw_overlap(pkt, op, reg_num + 1)) { + // If read and writes overlap, return the new register for each read. + tmp_reg = true; + } + high_name = hex_get_reg_in_class(HEX_REG_CLASS_CTR_REGS, reg_num + 1, false, tmp_reg, true); + if (!high_name) { + return NULL; + } + if (reg_num + 1 == 9) { + // C9 = PC. Does not exist in VM as var + high_val = SHIFTL0(CAST(HEX_GPR64_WIDTH, IL_FALSE, U32(pkt->pkt_addr)), U8(HEX_GPR_WIDTH)); + } else { + high_val = SHIFTL0(CAST(HEX_GPR64_WIDTH, IL_FALSE, VARG(high_name)), U8(HEX_GPR_WIDTH)); + } + val_width = HEX_GPR64_WIDTH; + // fallthrough + case HEX_REG_CLASS_MOD_REGS: + case HEX_REG_CLASS_CTR_REGS: + if (x_reg_rw_overlap(pkt, op, reg_num)) { + // If read and writes overlap, return the new register for each read. + tmp_reg = true; + } + if (reg_num == 4) { + // C4 alias P3:0 register is the concatenation of all predicate registers. + HexOp pred_op = { 0 }; + pred_op.class = HEX_REG_CLASS_PRED_REGS; + pred_op.op.reg = 0; + low_val = hex_read_reg(pkt, &pred_op, tmp_reg); + pred_op.op.reg = 1; + low_val = APPEND(hex_read_reg(pkt, &pred_op, tmp_reg), low_val); + pred_op.op.reg = 2; + low_val = APPEND(hex_read_reg(pkt, &pred_op, tmp_reg), low_val); + pred_op.op.reg = 3; + low_val = APPEND(hex_read_reg(pkt, &pred_op, tmp_reg), low_val); + break; + } + low_name = hex_get_reg_in_class(HEX_REG_CLASS_CTR_REGS, reg_num, false, tmp_reg, true); + if (!low_name) { + return NULL; + } + if (reg_num == 9) { + low_val = U32(pkt->pkt_addr); + } else { + low_val = VARG(low_name); + } + break; + case HEX_REG_CLASS_PRED_REGS: + if (x_reg_rw_overlap(pkt, op, reg_num)) { + // If read and writes overlap, return the new register for each read. + tmp_reg = true; + } + low_name = hex_get_reg_in_class(HEX_REG_CLASS_PRED_REGS, reg_num, false, tmp_reg, true); + if (!low_name) { + return NULL; + } + return VARG(low_name); + } + if (read_cond_faulty(low_val, high_val, val_width)) { + rz_warn_if_reached(); + return NULL; + } + log_reg_read(pkt, reg_num, op->class, tmp_reg); + + if (val_width == HEX_GPR64_WIDTH) { + return LOGOR(high_val, CAST(HEX_GPR64_WIDTH, IL_FALSE, low_val)); + } + return low_val; +} + +RZ_IPI RZ_OWN RzILOpEffect *hex_cancel_slot(RZ_BORROW HexPkt *pkt, ut8 slot) { + rz_return_val_if_fail(pkt, NULL); + if (slot > 3) { + rz_warn_if_reached(); + RZ_LOG_WARN("Slot %d does not exist!", slot); + } + rz_bv_set(pkt->il_op_stats.slot_cancelled, slot, true); + return EMPTY(); +} + +RzILOpPure *hex_get_corresponding_cs(RZ_BORROW HexPkt *pkt, const HexOp *Mu) { + rz_return_val_if_fail(Mu && Mu->class == HEX_REG_CLASS_MOD_REGS, NULL); + HexOp cs_reg = { 0 }; + if (Mu->op.reg == 0) { + // M0 (C6) return CS0 + cs_reg.class = HEX_REG_CLASS_CTR_REGS; + cs_reg.op.reg = 12; + return hex_read_reg(pkt, &cs_reg, true); + } else if (Mu->op.reg == 1) { + // M1 (C7) return CS1 + cs_reg.class = HEX_REG_CLASS_CTR_REGS; + cs_reg.op.reg = 13; + return hex_read_reg(pkt, &cs_reg, true); + } + rz_warn_if_reached(); + return NULL; +} + +RZ_IPI void hex_reset_il_pkt_stats(HexILExecData *stats) { + rz_bv_free(stats->slot_cancelled); + rz_bv_free(stats->ctr_written); + rz_bv_free(stats->gpr_written); + rz_bv_free(stats->pred_written); + rz_bv_free(stats->ctr_read); + rz_bv_free(stats->gpr_read); + rz_bv_free(stats->pred_read); + rz_bv_free(stats->ctr_tmp_read); + rz_bv_free(stats->gpr_tmp_read); + rz_bv_free(stats->pred_tmp_read); + stats->slot_cancelled = rz_bv_new(64); + stats->ctr_written = rz_bv_new(64); + stats->gpr_written = rz_bv_new(64); + stats->pred_written = rz_bv_new(32); + stats->ctr_read = rz_bv_new(64); + stats->gpr_read = rz_bv_new(64); + stats->pred_read = rz_bv_new(32); + stats->ctr_tmp_read = rz_bv_new(64); + stats->gpr_tmp_read = rz_bv_new(64); + stats->pred_tmp_read = rz_bv_new(32); +} diff --git a/handwritten/hexagon_il_c/includes.c b/handwritten/hexagon_il_c/includes.c new file mode 100644 index 00000000..cf06f126 --- /dev/null +++ b/handwritten/hexagon_il_c/includes.c @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/handwritten/hexagon_il_getter_table_h/includes.h b/handwritten/hexagon_il_getter_table_h/includes.h new file mode 100644 index 00000000..bebdf5a1 --- /dev/null +++ b/handwritten/hexagon_il_getter_table_h/includes.h @@ -0,0 +1,7 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +#include +#include +#include +#include diff --git a/handwritten/hexagon_il_h/declarations.h b/handwritten/hexagon_il_h/declarations.h new file mode 100644 index 00000000..f4a03d6d --- /dev/null +++ b/handwritten/hexagon_il_h/declarations.h @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +/// Immutable bits of CTR registers as in QEMU. +static const ut64 hex_ctr_immut_masks[32] = { + [HEX_REG_CTR_REGS_C8] = 0xc13000c0, // USR + [HEX_REG_CTR_REGS_C9] = HEX_IMMUTABLE_REG, // PC + [HEX_REG_CTR_REGS_C11] = 0x3f, // GP + [HEX_REG_CTR_REGS_C14] = HEX_IMMUTABLE_REG, // UPCYCLELO + [HEX_REG_CTR_REGS_C15] = HEX_IMMUTABLE_REG, // UPCYCLEHI + [HEX_REG_CTR_REGS_C30] = HEX_IMMUTABLE_REG, // UTIMERLO + [HEX_REG_CTR_REGS_C31] = HEX_IMMUTABLE_REG, // UTIMERHI +}; + +RZ_IPI bool hex_shuffle_insns(RZ_INOUT HexPkt *p); +RZ_IPI RzILOpEffect *hex_get_il_op(const ut32 addr, const bool get_pkt_op); +RZ_IPI RZ_OWN RzILOpPure *hex_get_rf_property_val(const HexRegFieldProperty property, const HexRegField field); +RZ_IPI RZ_OWN RzILOpEffect *hex_get_npc(const HexPkt *pkt); +RZ_IPI RZ_OWN RzILOpEffect *hex_il_op_jump_flag_init(HexInsnPktBundle *bundle); +RZ_IPI RZ_OWN RzILOpEffect *hex_il_op_next_pkt_jmp(HexInsnPktBundle *bundle); +RZ_IPI RZ_OWN RzILOpEffect *hex_commit_packet(HexInsnPktBundle *bundle); +RZ_IPI RZ_OWN RzILOpEffect *hex_write_reg(RZ_BORROW HexInsnPktBundle *bundle, const HexOp *op, RzILOpPure *val); +RZ_IPI RZ_OWN RzILOpPure *hex_read_reg(RZ_BORROW HexPkt *pkt, const HexOp *op, bool tmp_reg); +RZ_IPI RZ_OWN RzILOpEffect *hex_cancel_slot(RZ_BORROW HexPkt *pkt, ut8 slot); +RZ_IPI void hex_reset_il_pkt_stats(HexILExecData *stats); +RzILOpPure *hex_get_corresponding_cs(RZ_BORROW HexPkt *pkt, const HexOp *Mu); diff --git a/handwritten/hexagon_il_h/includes.h b/handwritten/hexagon_il_h/includes.h new file mode 100644 index 00000000..8be4b32c --- /dev/null +++ b/handwritten/hexagon_il_h/includes.h @@ -0,0 +1,6 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +#include +#include +#include diff --git a/handwritten/hexagon_il_h/macros.h b/handwritten/hexagon_il_h/macros.h new file mode 100644 index 00000000..8faabfa4 --- /dev/null +++ b/handwritten/hexagon_il_h/macros.h @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +#define WRITE_REG(pkt, op, val) hex_write_reg(pkt, op, val) +#define READ_REG(pkt, op, tmp_reg) hex_read_reg(pkt, op, tmp_reg) +#define ISA2REG(hi, var, tmp_reg) hex_isa_to_reg(hi, var, tmp_reg) +#define ISA2IMM(hi, var) hex_isa_to_imm(hi, var) +#define HEX_REGFIELD(prop, reg) hex_get_rf_property_val(prop, reg) +#define HEX_EXTRACT64(val, start, len) hex_extract64(val, start, len) +#define HEX_SEXTRACT64(val, start, len) hex_sextract64(val, start, len) +#define HEX_DEPOSIT64(val, start, len, fieldval) hex_deposit64(val, start, len, fieldval) +#define HEX_GET_NPC(pkt) hex_get_npc(pkt) +#define HEX_WRITE_GLOBAL(name, val) hex_write_global(name, val) +#define INC(val, size) ADD(val, UN(size, 1)) +#define DEC(val, size) SUB(val, UN(size, 1)) +#define HEX_STORE_SLOT_CANCELLED(pkt, slot) hex_cancel_slot(pkt, slot) +#define HEX_FCIRC_ADD(bundle, RxV, offset, mu, CS) hex_fcircadd(bundle, RxV, offset, mu, CS) +#define HEX_GET_CORRESPONDING_CS(pkt, Mu) hex_get_corresponding_cs(pkt, Mu) +#define HEX_GET_INSN_RMODE(insn) (insn->fround_mode) +#define HEX_D_TO_SINT(mode, fval) F2SINT(64, mode, fval) +#define HEX_F_TO_SINT(mode, fval) F2SINT(32, mode, fval) +#define HEX_D_TO_INT(mode, fval) F2INT(64, mode, fval) +#define HEX_F_TO_INT(mode, fval) F2INT(32, mode, fval) +#define HEX_SINT_TO_D(mode, fval) SINT2F(64, mode, fval) +#define HEX_SINT_TO_F(mode, fval) SINT2F(32, mode, fval) +#define HEX_INT_TO_D(mode, fval) INT2F(64, mode, fval) +#define HEX_INT_TO_F(mode, fval) INT2F(32, mode, fval) + +#define HEX_IMMUTABLE_REG (~0) +#define HEX_NOT_MASKED 0 diff --git a/handwritten/hexagon_reg_tables_h/includes.h b/handwritten/hexagon_reg_tables_h/includes.h new file mode 100644 index 00000000..70c56604 --- /dev/null +++ b/handwritten/hexagon_reg_tables_h/includes.h @@ -0,0 +1,4 @@ +// SPDX-FileCopyrightText: 2022 Rot127 +// SPDX-License-Identifier: LGPL-3.0-only + +#include diff --git a/handwritten/misc_il_insns.json b/handwritten/misc_il_insns.json new file mode 100644 index 00000000..e043fb02 --- /dev/null +++ b/handwritten/misc_il_insns.json @@ -0,0 +1,7 @@ +{ + "qemu_defined" : [ + "J2_endloop01", + "J2_endloop1", + "J2_endloop0" + ] +} \ No newline at end of file diff --git a/handwritten/rzil-tests/hexagon b/handwritten/rzil-tests/hexagon new file mode 100644 index 00000000..6f856a7d --- /dev/null +++ b/handwritten/rzil-tests/hexagon @@ -0,0 +1,391 @@ +# These tests run the test binaries from QEMU. +# Only bineries which contain 100% supported instructions are run here. +# If one of those tests fails and you need to debug it, +# please use BAP qemu to generate a trace of it +# and compare it with rz-tracetest to Rizin. +# The broken instructions will mismatch. + +NAME=Run test_vspliceb +FILE=bins/elf/hexagon/rzil/test_vspliceb +TIMEOUT=15 +CMDS=< None: return +def get_delimiter_line() -> str: + return "{}\n".format(PluginInfo.GENERATION_WARNING_DELIMITER) + + def get_generation_warning_c_code() -> str: url = PluginInfo.REPO_URL - msg = "{}\n".format(PluginInfo.GENERATION_WARNING_DELIMITER) + msg = get_delimiter_line() msg += "// The following code is generated.\n" msg += "// Do not edit. Repository of code generator:\n" msg += "// {}\n".format(url) @@ -272,7 +277,7 @@ def get_generation_timestamp(conf: dict) -> str: return commit -def compare_src_to_old_src(new_src: str, comp_src_file: str) -> bool: +def src_matches_old_src(new_src: str, comp_src_file: Path) -> bool: """Compares each line of the new_src string and the src code in the file comp_src_file.""" try: with open(comp_src_file) as f: @@ -340,3 +345,38 @@ def normalize_llvm_syntax(llvm_syntax: str) -> str: syntax = re.sub(r"([A-Z][a-z,A-Z]+)[0-9]+", r"\1", syntax) log("Normalized syntax: {} -> {}".format(llvm_syntax, syntax), LogLevel.VERBOSE) return syntax + + +def gen_c_doxygen(desc: str, ret: (str, str) = None, args: [dict] = None) -> str: + """ + Generates a doxygen doc string and returns it. + All description strings can contain new lines. + + Args: + desc: The general description. + args: Argument list description. Contains dicts {'name': str, 'desc': str} + ret: Return value description with tuple('type', 'desc') + + Returns: The doxygen string. + """ + + def print_lines(lines: []) -> str: + tmp = "" + for line in lines: + tmp += f" * {line}\n" + return tmp + + dl = desc.split("\n") + dox = f"/**\n" + dox += f" * \\brief {dl[0]}\n" + dox += print_lines(dl[1:]) + if args: + for a in args: + dl = a["desc"].split("\n") + dox += f' * \\param {a["name"]}: {dl[0]}' + dox += print_lines(dl[1:]) + if ret: + dl = ret[1].split("\n") + dox += f"* \\return {ret[0]} {dl[0]}" + dox += print_lines(dl[1:]) + return dox + " */" diff --git a/import/Register-template.json b/import/Register-template.json index f446e400..ff85181a 100644 --- a/import/Register-template.json +++ b/import/Register-template.json @@ -1,4 +1,5 @@ { + "SSR": { "!anonymous": false, "!fields": [], "!name": "SSR", @@ -24,4 +25,5 @@ "SubRegIndices": [], "SubRegs": [], "isArtificial": 0 - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C20.json b/import/registers/CtrRegs-C20.json new file mode 100644 index 00000000..67fe0015 --- /dev/null +++ b/import/registers/CtrRegs-C20.json @@ -0,0 +1,46 @@ +{ + "C20": { + "!anonymous": false, + "!fields": [], + "!name": "C20", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [ + "C20" + ], + "AsmName": "c20", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 87 + ], + "HWEncoding": [ + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C21.json b/import/registers/CtrRegs-C21.json new file mode 100644 index 00000000..33b741a2 --- /dev/null +++ b/import/registers/CtrRegs-C21.json @@ -0,0 +1,45 @@ +{ + "C21": { + "!anonymous": false, + "!fields": [], + "!name": "C21", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [ + ], + "AsmName": "c21", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 88 + ], + "HWEncoding": [ + 1, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C22.json b/import/registers/CtrRegs-C22.json new file mode 100644 index 00000000..9f45e79c --- /dev/null +++ b/import/registers/CtrRegs-C22.json @@ -0,0 +1,29 @@ +{ + "C22": { + "!anonymous": false, + "!fields": [], + "!name": "C22", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c22", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 89 + ], + "HWEncoding": [0,1,1,0,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C23.json b/import/registers/CtrRegs-C23.json new file mode 100644 index 00000000..bde321f7 --- /dev/null +++ b/import/registers/CtrRegs-C23.json @@ -0,0 +1,29 @@ +{ + "C23": { + "!anonymous": false, + "!fields": [], + "!name": "C23", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c23", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 90 + ], + "HWEncoding": [1,1,1,0,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C24.json b/import/registers/CtrRegs-C24.json new file mode 100644 index 00000000..eeaaaf55 --- /dev/null +++ b/import/registers/CtrRegs-C24.json @@ -0,0 +1,29 @@ +{ + "C24": { + "!anonymous": false, + "!fields": [], + "!name": "C24", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c24", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 91 + ], + "HWEncoding": [0,0,0,1,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C25.json b/import/registers/CtrRegs-C25.json new file mode 100644 index 00000000..a06cecf8 --- /dev/null +++ b/import/registers/CtrRegs-C25.json @@ -0,0 +1,29 @@ +{ + "C25": { + "!anonymous": false, + "!fields": [], + "!name": "C25", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c25", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 92 + ], + "HWEncoding": [1,0,0,1,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C26.json b/import/registers/CtrRegs-C26.json new file mode 100644 index 00000000..4a0494b6 --- /dev/null +++ b/import/registers/CtrRegs-C26.json @@ -0,0 +1,29 @@ +{ + "C26": { + "!anonymous": false, + "!fields": [], + "!name": "C26", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c26", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 93 + ], + "HWEncoding": [0,1,0,1,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C27.json b/import/registers/CtrRegs-C27.json new file mode 100644 index 00000000..8b3376cb --- /dev/null +++ b/import/registers/CtrRegs-C27.json @@ -0,0 +1,29 @@ +{ + "C27": { + "!anonymous": false, + "!fields": [], + "!name": "C27", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c27", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 94 + ], + "HWEncoding": [1,1,0,1,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C28.json b/import/registers/CtrRegs-C28.json new file mode 100644 index 00000000..35c4287f --- /dev/null +++ b/import/registers/CtrRegs-C28.json @@ -0,0 +1,29 @@ +{ + "C28": { + "!anonymous": false, + "!fields": [], + "!name": "C28", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c28", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 95 + ], + "HWEncoding": [0,0,1,1,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs-C29.json b/import/registers/CtrRegs-C29.json new file mode 100644 index 00000000..549e9e19 --- /dev/null +++ b/import/registers/CtrRegs-C29.json @@ -0,0 +1,44 @@ +{ + "C29": { + "!anonymous": false, + "!fields": [], + "!name": "C29", + "!superclasses": [ + "Register", + "HexagonReg", + "Rc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c29", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 0, + "DwarfNumbers": [ + 96 + ], + "HWEncoding": [ + 1, + 0, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [], + "SubRegs": [], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs64-C21_20.json b/import/registers/CtrRegs64-C21_20.json new file mode 100644 index 00000000..c886ed1b --- /dev/null +++ b/import/registers/CtrRegs64-C21_20.json @@ -0,0 +1,52 @@ +{ + "C21_20": { + "!anonymous": false, + "!fields": [], + "!name": "C21_20", + "!superclasses": [ + "Register", + "RegisterWithSubRegs", + "HexagonDoubleReg", + "Rcc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c21:20", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 1, + "DwarfNumbers": [ + 0 + ], + "HWEncoding": [0,0,1,0,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [ + { + "def": "isub_lo", + "kind": "def", + "printable": "isub_lo" + }, + { + "def": "isub_hi", + "kind": "def", + "printable": "isub_hi" + } + ], + "SubRegs": [ + { + "def": "C20", + "kind": "def", + "printable": "C20" + }, + { + "def": "C21", + "kind": "def", + "printable": "C21" + } + ], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs64-C23_22.json b/import/registers/CtrRegs64-C23_22.json new file mode 100644 index 00000000..d8979e9f --- /dev/null +++ b/import/registers/CtrRegs64-C23_22.json @@ -0,0 +1,52 @@ +{ + "C23_22": { + "!anonymous": false, + "!fields": [], + "!name": "C23_22", + "!superclasses": [ + "Register", + "RegisterWithSubRegs", + "HexagonDoubleReg", + "Rcc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c23:22", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 1, + "DwarfNumbers": [ + 0 + ], + "HWEncoding": [0,1,1,0,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [ + { + "def": "isub_lo", + "kind": "def", + "printable": "isub_lo" + }, + { + "def": "isub_hi", + "kind": "def", + "printable": "isub_hi" + } + ], + "SubRegs": [ + { + "def": "C22", + "kind": "def", + "printable": "C22" + }, + { + "def": "C23", + "kind": "def", + "printable": "C23" + } + ], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs64-C25_24.json b/import/registers/CtrRegs64-C25_24.json new file mode 100644 index 00000000..da5812c6 --- /dev/null +++ b/import/registers/CtrRegs64-C25_24.json @@ -0,0 +1,52 @@ +{ + "C25_24": { + "!anonymous": false, + "!fields": [], + "!name": "C25_24", + "!superclasses": [ + "Register", + "RegisterWithSubRegs", + "HexagonDoubleReg", + "Rcc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c25:24", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 1, + "DwarfNumbers": [ + 0 + ], + "HWEncoding": [0,0,0,1,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [ + { + "def": "isub_lo", + "kind": "def", + "printable": "isub_lo" + }, + { + "def": "isub_hi", + "kind": "def", + "printable": "isub_hi" + } + ], + "SubRegs": [ + { + "def": "C24", + "kind": "def", + "printable": "C24" + }, + { + "def": "C25", + "kind": "def", + "printable": "C25" + } + ], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs64-C27_26.json b/import/registers/CtrRegs64-C27_26.json new file mode 100644 index 00000000..34913ecf --- /dev/null +++ b/import/registers/CtrRegs64-C27_26.json @@ -0,0 +1,52 @@ +{ + "C27_26": { + "!anonymous": false, + "!fields": [], + "!name": "C27_26", + "!superclasses": [ + "Register", + "RegisterWithSubRegs", + "HexagonDoubleReg", + "Rcc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c27:26", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 1, + "DwarfNumbers": [ + 0 + ], + "HWEncoding": [0,1,0,1,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [ + { + "def": "isub_lo", + "kind": "def", + "printable": "isub_lo" + }, + { + "def": "isub_hi", + "kind": "def", + "printable": "isub_hi" + } + ], + "SubRegs": [ + { + "def": "C26", + "kind": "def", + "printable": "C26" + }, + { + "def": "C27", + "kind": "def", + "printable": "C27" + } + ], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/CtrRegs64-C29_28.json b/import/registers/CtrRegs64-C29_28.json new file mode 100644 index 00000000..3445f91e --- /dev/null +++ b/import/registers/CtrRegs64-C29_28.json @@ -0,0 +1,52 @@ +{ + "C29_28": { + "!anonymous": false, + "!fields": [], + "!name": "C29_28", + "!superclasses": [ + "Register", + "RegisterWithSubRegs", + "HexagonDoubleReg", + "Rcc", + "DwarfRegNum" + ], + "Aliases": [], + "AltNames": [], + "AsmName": "c29:28", + "CostPerUse": [ + 0 + ], + "CoveredBySubRegs": 1, + "DwarfNumbers": [ + 0 + ], + "HWEncoding": [0,0,1,1,1,0,0,0,0,0,0,0,0,0], + "Namespace": "Hexagon", + "RegAltNameIndices": [], + "SubRegIndices": [ + { + "def": "isub_lo", + "kind": "def", + "printable": "isub_lo" + }, + { + "def": "isub_hi", + "kind": "def", + "printable": "isub_hi" + } + ], + "SubRegs": [ + { + "def": "C28", + "kind": "def", + "printable": "C28" + }, + { + "def": "C29", + "kind": "def", + "printable": "C29" + } + ], + "isArtificial": 0 + } +} \ No newline at end of file diff --git a/import/registers/README.md b/import/registers/README.md index 4e77d0af..d1052ab5 100644 --- a/import/registers/README.md +++ b/import/registers/README.md @@ -3,4 +3,7 @@ Add a register here for import: - Set data within the `UNDOCUMENTED_RegisterName.json` - Done. `LLVMImporter` will add the register. +If you need to add a register which already belongs to a certain class, simply name the file `-,json`. +So for the `C20` register it would be `CtrRegs-C20.json`. + Same logic applies for register classes (like `SysRegs64-template.json`). \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4f8b146a..88ef76e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ bitarray~=2.1.0 -setuptools~=45.2.0 \ No newline at end of file +setuptools~=68.1.2 \ No newline at end of file diff --git a/rzil_compiler b/rzil_compiler new file mode 160000 index 00000000..5693c49c --- /dev/null +++ b/rzil_compiler @@ -0,0 +1 @@ +Subproject commit 5693c49ccab23e927dba788dd3b0a77d02af3c65