Skip to content

Commit

Permalink
Merge branch 'junghee/symbolic-displacement' into 'main'
Browse files Browse the repository at this point in the history
Add handling of symbolic displacement in LEA

Closes #621, #620, and debloating/dykondo#152

See merge request rewriting/ddisasm!1230
  • Loading branch information
jdorn-gt committed Nov 21, 2024
2 parents 76ddcab + fca7c96 commit c1d75b8
Show file tree
Hide file tree
Showing 9 changed files with 297 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
* Fix bug that caused assembling error due to wrong `symbol_minus_symbol`
for lsda entries with references to the end of `.gcc_except_table`
* Generate alignments for function entry blocks depending on address
* Fixed bug that could result in missed symbolic expressions
(`symbol_minus_symbol`) in LEA

# 1.9.0

Expand Down
24 changes: 24 additions & 0 deletions examples/asm_examples/ex_aligned_data_in_code/ex_original.s
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ main:
# Load data into YMM register using vmovups: `data256u` does not need to be aligned.
vmovups data256u(%rip), %ymm1

# Integer arithmetic/logical instructions that require alignment
paddq data128.3(%rip), %xmm0
pand data128.4(%rip), %xmm0
psllq data128.5(%rip), %xmm0

# Floating-point instructions that require alignment
addps data128.6(%rip), %xmm0
andpd data128.7(%rip), %xmm0

call print_message2

xorq %rax, %rax
Expand All @@ -53,6 +62,21 @@ data128.1:
.align 16
data128.2:
.byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
.align 16
data128.3:
.byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
.align 16
data128.4:
.byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
.align 16
data128.5:
.byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
.align 16
data128.6:
.byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
.align 16
data128.7:
.byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
.align 32
data256:
.byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
Expand Down
10 changes: 10 additions & 0 deletions examples/asm_examples/ex_sym_minus_sym/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

all: ex_original.s
gcc ex_original.s -o ex
@./ex > out.txt
clean:
rm -f ex out.txt
rm -fr ex.unstripped ex.s *.old* dl_files *.gtirb
check:
./ex > /tmp/res.txt
@ diff out.txt /tmp/res.txt && echo TEST OK
121 changes: 121 additions & 0 deletions examples/asm_examples/ex_sym_minus_sym/ex_original.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// This example contains LEAs with symbolic expressions as displacement.
// See the labels `lea_sym_minus_sym1` and `lea_sym_minus_sym2`.

.text
.intel_syntax noprefix
.globl one # -- Begin function one
.p2align 4, 0x90
.type one,@function
one:
push rbx
mov ebx, edi
lea rdi, [rip + .L.str]
call puts@PLT
mov eax, ebx
pop rbx
ret

.globl two # -- Begin function two
.p2align 4, 0x90
.type two,@function
two:
push rbx
mov ebx, edi
lea rdi, [rip + .L.str.1]
call puts@PLT
mov eax, ebx
pop rbx
ret

.globl fun # -- Begin function fun
.p2align 4, 0x90
.type fun,@function
fun:
push rbp
push r14
push rbx
push r9
mov ebp, esi
mov ebx, edi
cmp ebx, ebp
jge .L3
lea r14, [rip + jump_table]
loop_header:
lea eax, [rbx - 1]
cmp eax, 1
ja .L0
jumping_block:
movsxd rax, dword ptr [r14 + 4*rax]
add rax, r14
jmp rax
jt_target_1:
mov edi, ebx
call one
jmp .L2
.L0:
mov edi, ebx
jmp .L2
lea_sym_minus_sym1:
lea r9, qword ptr [rax + target2 - lea_sym_minus_sym1]
cmp rbx, rdi
jb .L1
lea_sym_minus_sym2:
lea r9, qword ptr [r9 + target1 - target2]
.L1:
jmp r9
target1:
mov edi, ebx
call one
jmp .L2
target2:
mov edi, ebx
call two
.L2:
add ebx, 1
cmp ebp, ebx
jne loop_header
.L3:
pop r9
pop rbx
pop r14
pop rbp
ret

.globl main # -- Begin function main
.type main,@function
.p2align 4, 0x90
main:
push rax
lea rdi, [rip + .L.str.5]
call puts@PLT
mov edi, 1
mov esi, 6
call fun
xor eax, eax
pop rcx
ret

.section .rodata,"a",@progbits
.p2align 2

// here we have a table of relative offsets (symbol minus symbol)
jump_table:
.long jt_target_1-jump_table
.long lea_sym_minus_sym1-jump_table

.section .rodata.str1.1,"aMS",@progbits,1

.type .L.str,@object
.L.str:
.asciz "one"
.size .L.str, 4

.type .L.str.1,@object
.L.str.1:
.asciz "two"
.size .L.str.1, 4

.type .L.str.5,@object
.L.str.5:
.asciz "!!!Hello World!!!"
.size .L.str.5, 18
30 changes: 30 additions & 0 deletions src/datalog/arch/intel/arch_x86.dl
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,36 @@ operation_alignment_required("VMOVNTPD").
operation_alignment_required("VMOVNTDQ").
operation_alignment_required("VMOVNTDQA").

operation_alignment_required(Operation):-
instruction_get_operation(_, Operation),
(
// Floating-Point/Integer Packed Arithmetic
substr(Operation,0,4) = "ADDP";
substr(Operation,0,4) = "SUBP";
substr(Operation,0,4) = "MULP";
substr(Operation,0,4) = "DIVP";

// Integer Packed Arithmetic
substr(Operation,0,4) = "PADD";
substr(Operation,0,4) = "PSUB";
substr(Operation,0,4) = "PMUL";
substr(Operation,0,4) = "PDIV";

// Logical Packed Operations
substr(Operation,0,4) = "ANDP";
substr(Operation,0,3) = "ORP";
substr(Operation,0,4) = "XORP";

substr(Operation,0,4) = "PAND";
substr(Operation,0,3) = "POR";
substr(Operation,0,4) = "PXOR";

// Packed Shifts and Blends
substr(Operation,0,4) = "PSLL";
substr(Operation,0,4) = "PSRL";
substr(Operation,0,4) = "PSRA"
).

alignment_required(EA,AlignInBits):-
instruction_get_operation(EA,Operation),
operation_alignment_required(Operation),
Expand Down
49 changes: 49 additions & 0 deletions src/datalog/symbolization.dl
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,55 @@ symbol_minus_symbol_candidate(EA,Size,Symbol1,Symbol2,as(Scale,unsigned),Offset)
// If Symbol1 is zero, we have an absolute address.
Symbol1 != 0.

// Displacements in indirect operands with address-value register
// need to be symbolized.
//
// E.g., `__memcpy_ssse3` in libc.so:
// 01:.L_1:
// 02: leaq 40(%r9),%r9
// 03: cmpq %rcx,%rdx
// 04: movaps -12(%rsi),%xmm1
// 05: jb .L_aade7
// 06: leaq -7(%r9),%r9
// 07:.L_2:
// 08: leaq -64(%rdx),%rdx
// 09: notrack jmpq *%r9
// 10: ud2
// 11:.L_3:
// 12: prefetchnta -448(%rsi)
// 13:.L_4:
// 14: movaps -28(%rsi),%xmm2
//
// .L_1 is one of the targets of a jump-table jump (jmp %9).
// Before line 2, R9's value is the address of itself.
// After line2 (.L_1 + 40 = .L_4), R9's value is .L_4.
// Note that the LEA instructions can be encoded in either 4 bytes or
// 7 bytes depending on assemblers.
// Therefore, it is important to symbolize the displacements.
//
// Similarly, before line 6, R9's value is the address of .L_4.
// After line 6 (.L_4 - 7 = .L3), R9's value is .L_3.
//
// The displacements ini the indirect operands for both the LEAs
// at line 2 and 6 need to be symbolized as
// leaq (.L_4 - .L_1)(%r9), %r9
// and
// leaq (.L_3 - .L_4)(%r9), %r9
// respectively.
//
symbol_minus_symbol_candidate(EA+DispOffset,DispSize,Symbol1,Symbol2,1,0):-
binary_isa("X64"),
code(EA),
value_reg_address_before(EA,Reg,Address,_),
!arch.pc_reg(Reg),
!instruction_has_relocation(EA,_),
instruction_get_op(EA,Index,Op),
op_indirect_mapped(Op,"NONE",Reg,"NONE",1,Disp,_),
Symbol1 = as(Address,address),
Symbol2 = as(Address+Disp,address),
instruction_displacement_offset(EA,Index,DispOffset,DispSize).


////////////////////////////////////////////////////////////////////////

/**
Expand Down
24 changes: 24 additions & 0 deletions src/datalog/value_analysis.dl
Original file line number Diff line number Diff line change
Expand Up @@ -396,3 +396,27 @@ EARegDef: The address where Reg is defined as Value. Often EADef = EARegDef.
const_value_reg_used(UsedEA,EADef,EARegDef,Reg,Value):-
value_reg(EARegDef,Reg,EADef,"NONE",0,Value,_),
reg_def_use.def_used(EARegDef,Reg,UsedEA,_).

/**
`AddrValue` represents the value of register `Reg` *before* `EA`.
The value should be an address.
*/
.decl value_reg_address_before(EA:address,Reg:register,AddrValue:number,Steps:unsigned)

// At the jump-table target from indirect-jump via register,
// the register should have an address value.
value_reg_address_before(EA,Reg,Address,1):-
code(EA),
relative_jump_table_entry(_,TableStart,_,_,EA,_,_),
jump_table_start(EA_jump,_,TableStart,_,_),
reg_jump(EA_jump,Reg),
Address = as(EA,number).

// Propagate address value
value_reg_address_before(UsedEA,Reg,Address+Offset,Steps+1):-
step_limit(StepLimit),
value_reg_address_before(EADef,RegDef,Address,Steps),
Steps < StepLimit,
reg_def_use.def_used(EADef,Reg,UsedEA,_),
arch.reg_arithmetic_operation(EADef,Reg,RegDef,1,Offset),
track_register(Reg).
4 changes: 2 additions & 2 deletions tests/misc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,8 @@ def test_aligned_data_in_code(self):
)
]

# alignment=16: `data128.1`, `data128.2`
self.assertEqual(alignment_list.count(16), 2)
# alignment=16: `data128.1`~`data128.7`
self.assertEqual(alignment_list.count(16), 7)
# alignment=32: `data256`
self.assertEqual(alignment_list.count(32), 1)

Expand Down
35 changes: 35 additions & 0 deletions tests/symbolic_operand_heuristics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,41 @@ def test_lea_results(self):
)
)

@unittest.skipUnless(
platform.system() == "Linux", "This test is linux only."
)
def test_lea_sym_minus_sym(self):
"""
Test cases where the displacement of indirect operand in LEA is the
distance between EAs.
Such displacements should be symbolized as symbol_minus_symbol.
"""
binary = Path("ex")
with cd(ex_asm_dir / "ex_sym_minus_sym"):
self.assertTrue(compile("gcc", "g++", "-O0", []))
ir_library = disassemble(binary).ir()
m = ir_library.modules[0]

# check that we symbolize the LEA instructions
symbolized = [
("lea_sym_minus_sym1", "target2", "lea_sym_minus_sym1"),
("lea_sym_minus_sym2", "target1", "target2"),
]
for name, sym1, sym2 in symbolized:
symbol = next(m.symbols_named(name))
block = symbol.referent
self.assertIsInstance(block, gtirb.CodeBlock)
_, _, sym_expr = next(
block.byte_interval.symbolic_expressions_at(
range(block.address, block.address + block.size)
)
)
self.assertIsInstance(sym_expr, gtirb.SymAddrAddr)
self.assertEqual(sym_expr.scale, 1)
self.assertEqual(sym_expr.offset, 0)
self.assertEqual(sym_expr.symbol1.name, sym1)
self.assertEqual(sym_expr.symbol2.name, sym2)


if __name__ == "__main__":
unittest.main()

0 comments on commit c1d75b8

Please sign in to comment.