Skip to content

Commit

Permalink
Merge branch 'more-aggressive-traversal' into 'main'
Browse files Browse the repository at this point in the history
More aggressive code traversal

See merge request rewriting/ddisasm!1216
  • Loading branch information
aeflores committed Sep 17, 2024
2 parents 17a5e4f + 05dbe2b commit e1f3ca6
Show file tree
Hide file tree
Showing 15 changed files with 266 additions and 54 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

* Fix a hang due to incorrect jump-table boundaries inferred from irrelevant register correlations to the index register
* Requires gtirb >=2.2.0
* Improved code inference in ARM binaries:
- Do not miss code after literal pools.
- Additional heuristic: Simple string literals in literal pools
- Additional heuristic: Function beginning pattern with push/adjust-sp as plausible instruction sequence
* Fix bug that led to string data blocks potentially overlapping code blocks.
* Fix bug that resulted in integral symbols on ISAs other than x64 (ARM and x86).
* Fix symbolization bug of ADR instructions in ARM32 that refer to code.

# 1.9.0

Expand Down
11 changes: 11 additions & 0 deletions examples/arm_asm_examples/ex_adr_to_code/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.PHONY: clean check
out.txt: ex
@qemu-arm -L /usr/arm-linux-gnueabihf $^ > $@

ex: ex_original.s
arm-linux-gnueabihf-gcc -nostartfiles -o $@ $^
clean:
rm -f ex out.txt
check: ex
qemu-arm -L /usr/arm-linux-gnueabihf $^ > /tmp/res.txt
@ diff out.txt /tmp/res.txt && echo TEST OK
67 changes: 67 additions & 0 deletions examples/arm_asm_examples/ex_adr_to_code/ex_original.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# This example has adr instructions referencing code
# ADR won't include the least signficant bit that determines the decode mode
# So thumb_to_thumb and arm_to_thumb transitions need to add 1 to the relative address

.syntax unified
.section .text

.align 2
.arm
.global _start
_start:
adr r2, arm_to_arm
mov pc, r2
bl exit

.type arm_to_arm, %function
arm_to_arm:
ldr r0, =ok1_str
bl puts
adr r2, arm_to_thumb
# adr won't capture the bit in the label so we need to add 1 to change mode
add r2, r2, #1
bx r2


.thumb
.type arm_to_thumb, %function
arm_to_thumb:
ldr r0, =ok2_str
bl puts
adr r2, thumb_to_thumb
# adr won't capture the bit in the pc to stay in thumb mode.
add r2, r2, #1
bx r2

.type thumb_to_thumb, %function
thumb_to_thumb:
ldr r0, =ok3_str
bl puts
adr r2, thumb_to_arm
bx r2
.arm
.type thumb_to_arm, %function
thumb_to_arm:
ldr r0, =ok4_str
bl puts
mov r0, #0
bl exit

.global main
.type main, %function
.thumb
.align 2
main:
push { lr }
mov r0, #0
pop { pc }

.section .rodata
ok1_str:
.ascii "OK1\n\0"
ok2_str:
.ascii "OK2\n\0"
ok3_str:
.ascii "OK3\n\0"
ok4_str:
.ascii "OK4\n\0"
15 changes: 2 additions & 13 deletions examples/arm_asm_examples/ex_ldr/ex_ldr.s
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,9 @@ main:
.long 0xe1a200d1

.call_thumb:
/*

blx thumbfunc
*/


.exit:
ldr r0, =ok_str
Expand All @@ -172,16 +172,6 @@ main:
mov r0, #0
pop { pc }

/*
TODO: Re-enable these Thumb tests.
Currently, ddisasm doesn't seem to handle this Arm/Thumb interwork well enough
to test this. Uncommenting this function and the call to it results in main
being identified as invalid entirely (even if this func is just replaced with
`bx lr`, so it's not because it contains invalid instructions.
See #275
.thumb
thumbfunc:
.BHI18:
Expand Down Expand Up @@ -222,7 +212,6 @@ thumbfunc:

.exit_thumb:
bx lr
*/

.section .rodata
ok_str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
.section .text
# Unreferenced Thumb code at the start of the section
.thumb
ldr r0, =ok_str
bl puts

mov r0, #0
mov pc, lr
pop { pc }


.align 2
.arm
Expand All @@ -20,3 +24,7 @@ main:
push { lr }
mov r0, #0
pop { pc }

.section .rodata
ok_str:
.ascii "OK\n\0"
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ if(DDISASM_ARM_32)
set(DATALOG_ARM32_SOURCES
datalog/arch/arm32/arch_arm.dl
datalog/arch/arm32/float_operations.dl
datalog/arch/arm32/interrupt_operations.dl
datalog/arch/arm32/jump_operations.dl
datalog/arch/arm32/registers.dl
datalog/arch/arm32/memory_access.dl
Expand Down
5 changes: 4 additions & 1 deletion src/datalog/arch/arm32/interrupt_operations.dl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
//
//===----------------------------------------------------------------------===//
/**
The set of interrupt operations in x64
The set of interrupt operations in arm32
*/

interrupt_operation("SWI").

// Breakpoint operation
interrupt_operation("BKPT").
55 changes: 40 additions & 15 deletions src/datalog/arch/arm32_code_inference.dl
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,10 @@ invalid(EA,"arm: invalid pc"):-
(
Operation = "MLAHS";
Operation = "MLASHS";
Operation = "LDRSHEQ";
Operation = "LDRSHTEQ";
Operation = "LDRSHTLO";
Operation = "LDRSHTVC";
Operation = "LDRBEQ";
Operation = "LDRSBEQ";
Operation = "LDRHHS";
Operation = "LDRHTHS";
Operation = "LDRSBHS";
Operation = "LDRSHHS";
Operation = "LDRSBTHS";
Operation = "LDRSHTHS";
contains("LDRB",Operation);
contains("LDRSB",Operation);
contains("LDRBVC",Operation);
contains("LDRBLO",Operation)
contains("LDRH",Operation);
contains("LDRSH",Operation)
),
instruction_get_dest_op(EA,_,Op),
op_regdirect_contains_reg(Op,PC),
Expand All @@ -189,7 +178,7 @@ invalid(EA,"arm: invalid arithmetic instruction with suffix s"):-
arch.pc_reg(PC).

invalid(EA, "arm: invalid stm/ldm"):-
instruction(EA,_,_,Operation,RegListOp,RegOp,0,_,_,_),
instruction(EA,Size,_,Operation,RegListOp,RegOp,0,_,_,_),
BaseOperation = substr(Operation,0,3),
(
BaseOperation = "STM";
Expand Down Expand Up @@ -433,6 +422,7 @@ indefinite_litpool_ref(RefAddr,Size):-
arch.pc_relative_addr(EA,_,RefAddr),
code_in_block_candidate(EA,_),
!composite_data_access(EA,_,_,_),
!ascii_string(RefAddr,_),
NextLimit = min Limit : {
data_block_limit(Limit),
Limit > RefAddr
Expand Down Expand Up @@ -707,6 +697,20 @@ contains_plausible_instr_seq(Block,"multiple loads from stack"):-
Count > 5.


/**
The following pattern at the beginning of a block:
Block: push { ... }
EA2: sub sp, N
*/
contains_plausible_instr_seq(Block2,"push/adjust-sp"),
contains_plausible_instr_seq(Block,"push/adjust-sp"):-
instruction_get_operation(Block,"PUSH"),
code_in_block_candidate_refined(Block,Block),
must_fallthrough(Block,EA2),
code_in_block_candidate_refined(EA2,Block2),
arch.stack_pointer(SP),
arch.reg_arithmetic_operation(EA2,SP,SP,_,N), N < 0.

//////////////////////////////////////////////////////////////////////////
// block points

Expand Down Expand Up @@ -894,6 +898,19 @@ data_block_candidate(Block,Size):-
End = Block2 - 3
).

/**
EA: adr r0, RefAddr
...
RefAddr: .string "..."
*/
block_heuristic(RefAddr,"data",Size,0,"possible string"),
data_block_candidate(RefAddr,Size):-
arch.pc_relative_addr(EA,_,RefAddr),
code_in_block_candidate(EA,_),
!composite_data_access(EA,_,_,_),
ascii_string(RefAddr,End),
Size = End - RefAddr.

/**
EA: ldr r0, LitPoolAddr
...
Expand Down Expand Up @@ -992,6 +1009,14 @@ negative_block_heuristic(Block,"code",Size,Inst,"arm: contains unplausible instr
block_candidate_boundaries(Block,"code",Start,End),
Size = End-Start.


// bkpt is an unlikely instruction to be found in a binary
negative_block_heuristic(Block,"code",Size,Inst,"arm: contains unplausible instr"):-
instruction(Inst,_,"BKPT",_,_,_,_,_,_,_),
code_in_block_candidate_refined(Inst,Block),
block_candidate_boundaries(Block,"code",Start,End),
Size = End-Start.

// A code block that must fall through to a function start is highly likely false.
negative_block_heuristic(Block,"code",Size,BlockSucc,"arm: must-fallthrough to function symbol"):-
function_symbol(BlockSucc,_),
Expand Down
8 changes: 5 additions & 3 deletions src/datalog/arch/arm32_code_inference_weights.dl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ default_heuristic_weight("arm: jump table: no start",PROPORTIONAL_WEIGHT,2).
default_heuristic_weight("arm: jump table: no symbol",PROPORTIONAL_WEIGHT,15).
default_heuristic_weight("arm: jump table",PROPORTIONAL_WEIGHT,5).

default_heuristic_weight("possible string",PROPORTIONAL_WEIGHT,1).
default_heuristic_weight("possible string: string param for string library",PROPORTIONAL_WEIGHT,1).
default_heuristic_weight("possible string: string pred exists",PROPORTIONAL_WEIGHT,1).
default_heuristic_weight("possible string: string succ exists",PROPORTIONAL_WEIGHT,1).

// Simple weights
default_heuristic_weight("arm: $d symbol",SIMPLE_WEIGHT,10).
Expand Down Expand Up @@ -79,8 +83,6 @@ default_heuristic_weight("plausible_block: mov-lr-pc/jump",SIMPLE_WEIGHT,7).
default_heuristic_weight("plausible_block: movw/movt",SIMPLE_WEIGHT,6).
default_heuristic_weight("plausible_block: multiple loads from stack",SIMPLE_WEIGHT,2).
default_heuristic_weight("plausible_block: set-r0/return",SIMPLE_WEIGHT,1).
default_heuristic_weight("possible string: string param for string library",SIMPLE_WEIGHT,4).
default_heuristic_weight("possible string: string pred exists",SIMPLE_WEIGHT,5).
default_heuristic_weight("possible string: string succ exists",SIMPLE_WEIGHT,5).
default_heuristic_weight("plausible_block: push/adjust-sp",SIMPLE_WEIGHT,2).
default_heuristic_weight("possible string: symbol is target",SIMPLE_WEIGHT,-5).
default_heuristic_weight("possible string: symbol",SIMPLE_WEIGHT,8).
12 changes: 11 additions & 1 deletion src/datalog/arch/arm32_symbolization.dl
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,18 @@ symbolic_operand_candidate(EA,OpIndex,RefAddr,"data"):-
symbolic_operand_candidate(EA,OpIndex,Dest,Type):-
code(EA),
arch.pc_relative_addr(EA,_,Dest0),
// Dest0 is always even, we adjust it when pointing to thumb code
(
code(Dest), Dest = Dest0, Type = "code"
// Dest is ARM code
Dest = Dest0,
code(Dest),
Type = "code"
;
// Dest is Thumb code
Dest = Dest0 + 1,
code(Dest),
Type = "code"

;
Dest = Dest0 - (Dest0 band 1),
data_segment(Begin,End),
Expand Down
Loading

0 comments on commit e1f3ca6

Please sign in to comment.