From 2724dd857948ae1e4f1691b7abc38531589f1c13 Mon Sep 17 00:00:00 2001
From: Junghee Lim <junghee@grammatech.com>
Date: Mon, 18 Nov 2024 19:17:10 -0500
Subject: [PATCH 1/3] Add handling of symbolic displacement in LEA

---
 CHANGELOG.md                                  |   2 +
 .../ex_aligned_data_in_code/ex_original.s     |  24 +++
 .../asm_examples/ex_sym_minus_sym/Makefile    |  10 +
 .../ex_sym_minus_sym/ex_original.s            | 190 ++++++++++++++++++
 src/datalog/arch/intel/arch_x86.dl            |  30 +++
 src/datalog/symbolization.dl                  |  49 +++++
 src/datalog/value_analysis.dl                 |  24 +++
 tests/misc_test.py                            |   4 +-
 tests/symbolic_operand_heuristics_test.py     |  32 +++
 9 files changed, 363 insertions(+), 2 deletions(-)
 create mode 100644 examples/asm_examples/ex_sym_minus_sym/Makefile
 create mode 100644 examples/asm_examples/ex_sym_minus_sym/ex_original.s

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9f8917e9e..213929f66 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,8 @@
 * Fix bug that caused assembling error due to wrong `symbol_minus_symbol`
   for lsda entries with references to the end of `.gcc_except_table`
 * Generate alignments for function entry blocks depending on address
+* Fixed bug that could result in missed symbolic expressions
+  (`symbol_minus_symbol`) in LEA
 
 # 1.9.0
 
diff --git a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s
index dd242e556..5f3449a61 100644
--- a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s
+++ b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s
@@ -27,6 +27,15 @@ main:
     # Load data into YMM register using vmovups: `data256u` does not need to be aligned.
     vmovups data256u(%rip), %ymm1
 
+    # Integer arithmetic/logigal instructions that require alignment
+    paddq data128.3(%rip), %xmm0
+    pand data128.4(%rip), %xmm0
+    psllq data128.5(%rip), %xmm0
+
+    # Floating-point instructions that require alignment
+    addps data128.6(%rip), %xmm0
+    andpd data128.7(%rip), %xmm0
+
     call print_message2
 
     xorq %rax, %rax
@@ -53,6 +62,21 @@ data128.1:
 .align 16
 data128.2:
     .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+.align 16
+data128.3:
+    .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+.align 16
+data128.4:
+    .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+.align 16
+data128.5:
+    .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+.align 16
+data128.6:
+    .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+.align 16
+data128.7:
+    .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 .align 32
 data256:
     .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
diff --git a/examples/asm_examples/ex_sym_minus_sym/Makefile b/examples/asm_examples/ex_sym_minus_sym/Makefile
new file mode 100644
index 000000000..cd3634c9c
--- /dev/null
+++ b/examples/asm_examples/ex_sym_minus_sym/Makefile
@@ -0,0 +1,10 @@
+
+all: ex_original.s
+	gcc ex_original.s  -o ex
+	@./ex > out.txt
+clean:
+	rm -f ex out.txt
+	rm -fr ex.unstripped ex.s *.old*  dl_files *.gtirb
+check:
+	./ex > /tmp/res.txt
+	@ diff out.txt /tmp/res.txt && echo TEST OK
diff --git a/examples/asm_examples/ex_sym_minus_sym/ex_original.s b/examples/asm_examples/ex_sym_minus_sym/ex_original.s
new file mode 100644
index 000000000..027b718cf
--- /dev/null
+++ b/examples/asm_examples/ex_sym_minus_sym/ex_original.s
@@ -0,0 +1,190 @@
+    // this example contains a switch table where the differences of two symbols
+    // are what is stored
+
+    .text
+    .intel_syntax noprefix
+    .file	"ex.c"
+    .globl	one                     # -- Begin function one
+    .p2align	4, 0x90
+    .type	one,@function
+one:                                    # @one
+    .cfi_startproc
+# %bb.0:
+    push	rbx
+    .cfi_def_cfa_offset 16
+    .cfi_offset rbx, -16
+    mov	ebx, edi
+    lea	rdi, [rip + .L.str]
+    call	puts@PLT
+    mov	eax, ebx
+    pop	rbx
+    ret
+.Lfunc_end0:
+    .size	one, .Lfunc_end0-one
+    .cfi_endproc
+                                        # -- End function
+    .globl	two                     # -- Begin function two
+    .p2align	4, 0x90
+    .type	two,@function
+two:                                    # @two
+    .cfi_startproc
+# %bb.0:
+    push	rbx
+    .cfi_def_cfa_offset 16
+    .cfi_offset rbx, -16
+    mov	ebx, edi
+    lea	rdi, [rip + .L.str.1]
+    call	puts@PLT
+    mov	eax, ebx
+    pop	rbx
+    ret
+.Lfunc_end1:
+    .size	two, .Lfunc_end1-two
+    .cfi_endproc
+                                        # -- End function
+    .globl	def                     # -- Begin function def
+    .p2align	4, 0x90
+    .type	def,@function
+def:                                    # @def
+    .cfi_startproc
+# %bb.0:
+    push	rbx
+    .cfi_def_cfa_offset 16
+    .cfi_offset rbx, -16
+    mov	ebx, edi
+    lea	rdi, [rip + .L.str.4]
+    call	puts@PLT
+    mov	eax, ebx
+    pop	rbx
+    ret
+.Lfunc_end4:
+    .size	def, .Lfunc_end4-def
+    .cfi_endproc
+                                        # -- End function
+    .globl	fun                     # -- Begin function fun
+    .p2align	4, 0x90
+    .type	fun,@function
+fun:                                    # @fun
+    .cfi_startproc
+# %bb.0:
+    push	rbp
+    .cfi_def_cfa_offset 16
+    push	r14
+    .cfi_def_cfa_offset 24
+    push	rbx
+    .cfi_def_cfa_offset 32
+    push	r9
+    .cfi_offset rbx, -32
+    .cfi_offset r14, -24
+    .cfi_offset rbp, -16
+    mov	ebp, esi
+    mov	ebx, edi
+    cmp	ebx, ebp
+    jge	LBB5_10
+# %bb.1:
+    lea	r14, [rip + .LJTI5_0]
+    .p2align	4, 0x90
+LBB5_2:                                # =>This Inner Loop Header: Depth=1
+    lea	eax, [rbx - 1]
+    cmp	eax, 1
+    ja	LBB5_8
+# %bb.3:                                #   in Loop: Header=BB5_2 Depth=1
+jumping_block:
+    movsxd	rax, dword ptr [r14 + 4*rax]
+    add	rax, r14
+    jmp	rax
+LBB5_4:                                #   in Loop: Header=BB5_2 Depth=1
+    mov	edi, ebx
+    call	one
+    jmp	LBB5_9
+    .p2align	4, 0x90
+LBB5_8:                                #   in Loop: Header=BB5_2 Depth=1
+    mov	edi, ebx
+    call	def
+    jmp	LBB5_9
+    .p2align	4, 0x90
+LBB5_5:                                #   in Loop: Header=BB5_2 Depth=1
+lea_sym_minus_sym1:
+    lea r9, qword ptr [rax + .L3 - LBB5_5]
+    cmp rbx, rdi
+    jb .L1
+lea_sym_minus_sym2:
+    lea r9, qword ptr [r9 + .L2 - .L3]
+.L1:
+    jmp r9
+.L2:
+    mov	edi, ebx
+    call	one
+    jmp .L4
+.L3:
+    mov	edi, ebx
+    call	two
+.L4:
+    jmp	LBB5_9
+    .p2align	4, 0x90
+LBB5_9:                                #   in Loop: Header=BB5_2 Depth=1
+    add	ebx, 1
+    cmp	ebp, ebx
+    jne	LBB5_2
+LBB5_10:
+    pop	r9
+    pop	rbx
+    pop	r14
+    pop	rbp
+    ret
+.Lfunc_end5:
+    .size	fun, .Lfunc_end5-fun
+    .cfi_endproc
+    .section	.rodata,"a",@progbits
+    .p2align	2
+
+// here we have a table of relative offsets (symbol minus symbol)
+.LJTI5_0:
+    .long	LBB5_4-.LJTI5_0
+    .long	LBB5_5-.LJTI5_0
+                                        # -- End function
+    .text
+    .globl	main                    # -- Begin function main
+    .p2align	4, 0x90
+    .type	main,@function
+main:                                   # @main
+    .cfi_startproc
+# %bb.0:
+    push	rax
+    .cfi_def_cfa_offset 16
+    lea	rdi, [rip + .L.str.5]
+    call	puts@PLT
+    mov	edi, 1
+    mov	esi, 6
+    call	fun
+    xor	eax, eax
+    pop	rcx
+    ret
+.Lfunc_end6:
+    .size	main, .Lfunc_end6-main
+    .cfi_endproc
+                                        # -- End function
+    .type	.L.str,@object          # @.str
+    .section	.rodata.str1.1,"aMS",@progbits,1
+.L.str:
+    .asciz	"one"
+    .size	.L.str, 4
+
+    .type	.L.str.1,@object        # @.str.1
+.L.str.1:
+    .asciz	"two"
+    .size	.L.str.1, 4
+
+    .type	.L.str.4,@object        # @.str.4
+.L.str.4:
+    .asciz	"last"
+    .size	.L.str.4, 5
+
+    .type	.L.str.5,@object        # @.str.5
+.L.str.5:
+    .asciz	"!!!Hello World!!!"
+    .size	.L.str.5, 18
+
+
+    .ident	"clang version 6.0.0 (tags/RELEASE_600/final)"
+    .section	".note.GNU-stack","",@progbits
diff --git a/src/datalog/arch/intel/arch_x86.dl b/src/datalog/arch/intel/arch_x86.dl
index c40820336..09cd462dc 100644
--- a/src/datalog/arch/intel/arch_x86.dl
+++ b/src/datalog/arch/intel/arch_x86.dl
@@ -295,6 +295,36 @@ operation_alignment_required("VMOVNTPD").
 operation_alignment_required("VMOVNTDQ").
 operation_alignment_required("VMOVNTDQA").
 
+operation_alignment_required(Operation):-
+    instruction_get_operation(_, Operation),
+    (
+        // Floating-Point/Integer Packed Arithmetic
+        substr(Operation,0,4) = "ADDP";
+        substr(Operation,0,4) = "SUBP";
+        substr(Operation,0,4) = "MULP";
+        substr(Operation,0,4) = "DIVP";
+
+        // Integer Packed Arithmetic
+        substr(Operation,0,4) = "PADD";
+        substr(Operation,0,4) = "PSUB";
+        substr(Operation,0,4) = "PMUL";
+        substr(Operation,0,4) = "PDIV";
+
+        // Logical Packed Operations
+        substr(Operation,0,4) = "ANDP";
+        substr(Operation,0,3) = "ORP";
+        substr(Operation,0,4) = "XORP";
+
+        substr(Operation,0,4) = "PAND";
+        substr(Operation,0,3) = "POR";
+        substr(Operation,0,4) = "PXOR";
+
+        // Packed Shifts and Blends
+        substr(Operation,0,4) = "PSLL";
+        substr(Operation,0,4) = "PSRL";
+        substr(Operation,0,4) = "PSRA"
+    ).
+
 alignment_required(EA,AlignInBits):-
     instruction_get_operation(EA,Operation),
     operation_alignment_required(Operation),
diff --git a/src/datalog/symbolization.dl b/src/datalog/symbolization.dl
index 592e03d73..fd255eb50 100644
--- a/src/datalog/symbolization.dl
+++ b/src/datalog/symbolization.dl
@@ -388,6 +388,55 @@ symbol_minus_symbol_candidate(EA,Size,Symbol1,Symbol2,as(Scale,unsigned),Offset)
     // If Symbol1 is zero, we have an absolute address.
     Symbol1 != 0.
 
+// Displacements in indirect operands with address-value register
+// need to be symbolized.
+//
+// E.g., `__memcpy_ssse3` in libc.so:
+// 01:.L_1:
+// 02:    leaq 40(%r9),%r9
+// 03:    cmpq %rcx,%rdx
+// 04:    movaps -12(%rsi),%xmm1
+// 05:    jb .L_aade7
+// 06:    leaq -7(%r9),%r9
+// 07:.L_2:
+// 08:    leaq -64(%rdx),%rdx
+// 09:    notrack jmpq *%r9
+// 10:    ud2
+// 11:.L_3:
+// 12:    prefetchnta -448(%rsi)
+// 13:.L_4:
+// 14:    movaps -28(%rsi),%xmm2
+//
+// .L_1 is one of the targets of a jump-table jump (jmp %9).
+// Before line 2, R9's value is the address of itself.
+// After line2 (.L_1 + 40 = .L_4), R9's value is .L_4.
+// Note that the LEA instructions can be encoded in either 4 bytes or
+// 7 bytes depending on assemblers.
+// Therefore, it is important to symbolize the displacements.
+//
+// Similarly, before line 6, R9's value is the address of .L_4.
+// After line 6 (.L_4 - 7 = .L3), R9's value is .L_3.
+//
+// The displacements ini the indirect operands for both the LEAs
+// at line 2 and 6 need to be symbolized as
+// leaq (.L_4 - .L_1)(%r9), %r9
+// and
+// leaq (.L_3 - .L_4)(%r9), %r9
+// respectively.
+//
+symbol_minus_symbol_candidate(EA+DispOffset,DispSize,Symbol1,Symbol2,1,0):-
+    binary_isa("X64"),
+    code(EA),
+    value_reg_address_before(EA,Reg,Address,_),
+    !arch.pc_reg(Reg),
+    !instruction_has_relocation(EA,_),
+    instruction_get_op(EA,Index,Op),
+    op_indirect_mapped(Op,"NONE",Reg,"NONE",1,Disp,_),
+    Symbol1 = as(Address,address),
+    Symbol2 = as(Address+Disp,address),
+    instruction_displacement_offset(EA,Index,DispOffset,DispSize).
+
+
 ////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/src/datalog/value_analysis.dl b/src/datalog/value_analysis.dl
index f436eb88d..f3cedae50 100644
--- a/src/datalog/value_analysis.dl
+++ b/src/datalog/value_analysis.dl
@@ -396,3 +396,27 @@ EARegDef: The address where Reg is defined as Value. Often EADef = EARegDef.
 const_value_reg_used(UsedEA,EADef,EARegDef,Reg,Value):-
     value_reg(EARegDef,Reg,EADef,"NONE",0,Value,_),
     reg_def_use.def_used(EARegDef,Reg,UsedEA,_).
+
+/**
+`AddrValue` represents the value of register `Reg` *before* `EA`.
+The value should be an address.
+*/
+.decl value_reg_address_before(EA:address,Reg:register,AddrValue:number,Steps:unsigned)
+
+// At the jump-table target from indirect-jump via register,
+// the register should have an address value.
+value_reg_address_before(EA,Reg,Address,1):-
+    code(EA),
+    relative_jump_table_entry(_,TableStart,_,_,EA,_,_),
+    jump_table_start(EA_jump,_,TableStart,_,_),
+    reg_jump(EA_jump,Reg),
+    Address = as(EA,number).
+
+// Propagate address value
+value_reg_address_before(UsedEA,Reg,Address+Offset,Steps+1):-
+    step_limit(StepLimit),
+    value_reg_address_before(EADef,RegDef,Address,Steps),
+    Steps < StepLimit,
+    reg_def_use.def_used(EADef,Reg,UsedEA,_),
+    arch.reg_arithmetic_operation(EADef,Reg,RegDef,1,Offset),
+    track_register(Reg).
diff --git a/tests/misc_test.py b/tests/misc_test.py
index ed2803be8..cf6975263 100644
--- a/tests/misc_test.py
+++ b/tests/misc_test.py
@@ -473,8 +473,8 @@ def test_aligned_data_in_code(self):
                 )
             ]
 
-            # alignment=16: `data128.1`, `data128.2`
-            self.assertEqual(alignment_list.count(16), 2)
+            # alignment=16: `data128.1`~`data128.7`
+            self.assertEqual(alignment_list.count(16), 7)
             # alignment=32: `data256`
             self.assertEqual(alignment_list.count(32), 1)
 
diff --git a/tests/symbolic_operand_heuristics_test.py b/tests/symbolic_operand_heuristics_test.py
index 3e981ecfb..40040fb70 100644
--- a/tests/symbolic_operand_heuristics_test.py
+++ b/tests/symbolic_operand_heuristics_test.py
@@ -59,6 +59,38 @@ def test_lea_results(self):
                     )
                 )
 
+    @unittest.skipUnless(
+        platform.system() == "Linux", "This test is linux only."
+    )
+    def test_lea_sym_minus_sym(self):
+        """
+        Test cases where the displacement of indirect operand in LEA is the
+        distance between EAs.
+        Such displacements should be symbolized as symbol_minus_symbol.
+        """
+        binary = Path("ex")
+        with cd(ex_asm_dir / "ex_sym_minus_sym"):
+            self.assertTrue(compile("gcc", "g++", "-O0", []))
+            ir_library = disassemble(binary).ir()
+            m = ir_library.modules[0]
+
+            # check that we symbolize the LEA instructions
+            symbolized = [
+                "lea_sym_minus_sym1",
+                "lea_sym_minus_sym2",
+            ]
+            for name in symbolized:
+                symbol = next(m.symbols_named(name))
+                block = symbol.referent
+                self.assertIsInstance(block, gtirb.CodeBlock)
+                _, _, sym_expr = next(
+                    block.byte_interval.symbolic_expressions_at(
+                        range(block.address, block.address + block.size)
+                    )
+                )
+                self.assertIsInstance(sym_expr, gtirb.SymAddrAddr)
+                self.assertEqual(sym_expr.offset, 0)
+
 
 if __name__ == "__main__":
     unittest.main()

From 4a1677dce9863a9b086b3fce08b189159b8de2ca Mon Sep 17 00:00:00 2001
From: Junghee Lim <junghee@grammatech.com>
Date: Wed, 20 Nov 2024 15:35:21 -0500
Subject: [PATCH 2/3] Simplify the assembly example

Also, clean up the test code
---
 .../ex_aligned_data_in_code/ex_original.s     |   2 +-
 .../ex_sym_minus_sym/ex_original.s            | 149 +++++-------------
 tests/symbolic_operand_heuristics_test.py     |   9 +-
 3 files changed, 47 insertions(+), 113 deletions(-)

diff --git a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s
index 5f3449a61..330f70624 100644
--- a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s
+++ b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s
@@ -27,7 +27,7 @@ main:
     # Load data into YMM register using vmovups: `data256u` does not need to be aligned.
     vmovups data256u(%rip), %ymm1
 
-    # Integer arithmetic/logigal instructions that require alignment
+    # Integer arithmetic/logical instructions that require alignment
     paddq data128.3(%rip), %xmm0
     pand data128.4(%rip), %xmm0
     psllq data128.5(%rip), %xmm0
diff --git a/examples/asm_examples/ex_sym_minus_sym/ex_original.s b/examples/asm_examples/ex_sym_minus_sym/ex_original.s
index 027b718cf..58fc13e66 100644
--- a/examples/asm_examples/ex_sym_minus_sym/ex_original.s
+++ b/examples/asm_examples/ex_sym_minus_sym/ex_original.s
@@ -1,157 +1,91 @@
-    // this example contains a switch table where the differences of two symbols
-    // are what is stored
+    // This example contains LEAs with symbolic expressions as displacement.
+    // See the labels `lea_sym_minus_sym1` and `lea_sym_minus_sym2`.
 
     .text
     .intel_syntax noprefix
-    .file	"ex.c"
     .globl	one                     # -- Begin function one
-    .p2align	4, 0x90
+    .p2align   4, 0x90
     .type	one,@function
-one:                                    # @one
-    .cfi_startproc
-# %bb.0:
+one:
     push	rbx
-    .cfi_def_cfa_offset 16
-    .cfi_offset rbx, -16
     mov	ebx, edi
     lea	rdi, [rip + .L.str]
     call	puts@PLT
     mov	eax, ebx
     pop	rbx
     ret
-.Lfunc_end0:
-    .size	one, .Lfunc_end0-one
-    .cfi_endproc
-                                        # -- End function
+
     .globl	two                     # -- Begin function two
-    .p2align	4, 0x90
+    .p2align   4, 0x90
     .type	two,@function
-two:                                    # @two
-    .cfi_startproc
-# %bb.0:
+two:
     push	rbx
-    .cfi_def_cfa_offset 16
-    .cfi_offset rbx, -16
     mov	ebx, edi
     lea	rdi, [rip + .L.str.1]
     call	puts@PLT
     mov	eax, ebx
     pop	rbx
     ret
-.Lfunc_end1:
-    .size	two, .Lfunc_end1-two
-    .cfi_endproc
-                                        # -- End function
-    .globl	def                     # -- Begin function def
-    .p2align	4, 0x90
-    .type	def,@function
-def:                                    # @def
-    .cfi_startproc
-# %bb.0:
-    push	rbx
-    .cfi_def_cfa_offset 16
-    .cfi_offset rbx, -16
-    mov	ebx, edi
-    lea	rdi, [rip + .L.str.4]
-    call	puts@PLT
-    mov	eax, ebx
-    pop	rbx
-    ret
-.Lfunc_end4:
-    .size	def, .Lfunc_end4-def
-    .cfi_endproc
-                                        # -- End function
+
     .globl	fun                     # -- Begin function fun
     .p2align	4, 0x90
     .type	fun,@function
-fun:                                    # @fun
-    .cfi_startproc
-# %bb.0:
+fun:
     push	rbp
-    .cfi_def_cfa_offset 16
     push	r14
-    .cfi_def_cfa_offset 24
     push	rbx
-    .cfi_def_cfa_offset 32
     push	r9
-    .cfi_offset rbx, -32
-    .cfi_offset r14, -24
-    .cfi_offset rbp, -16
     mov	ebp, esi
     mov	ebx, edi
     cmp	ebx, ebp
-    jge	LBB5_10
-# %bb.1:
-    lea	r14, [rip + .LJTI5_0]
-    .p2align	4, 0x90
-LBB5_2:                                # =>This Inner Loop Header: Depth=1
+    jge	.L3
+    lea	r14, [rip + jump_table]
+loop_header:                        # =>This Inner Loop Header: Depth=1
     lea	eax, [rbx - 1]
     cmp	eax, 1
-    ja	LBB5_8
-# %bb.3:                                #   in Loop: Header=BB5_2 Depth=1
+    ja	.L0
 jumping_block:
     movsxd	rax, dword ptr [r14 + 4*rax]
     add	rax, r14
     jmp	rax
-LBB5_4:                                #   in Loop: Header=BB5_2 Depth=1
+jt_target_1:                        #   in Loop: Header=BB5_2 Depth=1
     mov	edi, ebx
     call	one
-    jmp	LBB5_9
-    .p2align	4, 0x90
-LBB5_8:                                #   in Loop: Header=BB5_2 Depth=1
+    jmp .L2
+.L0:                                #   in Loop: Header=BB5_2 Depth=1
     mov	edi, ebx
-    call	def
-    jmp	LBB5_9
-    .p2align	4, 0x90
-LBB5_5:                                #   in Loop: Header=BB5_2 Depth=1
+    jmp	.L2
 lea_sym_minus_sym1:
-    lea r9, qword ptr [rax + .L3 - LBB5_5]
+    lea r9, qword ptr [rax + target2 - lea_sym_minus_sym1]
     cmp rbx, rdi
     jb .L1
 lea_sym_minus_sym2:
-    lea r9, qword ptr [r9 + .L2 - .L3]
+    lea r9, qword ptr [r9 + target1 - target2]
 .L1:
     jmp r9
-.L2:
+target1:
     mov	edi, ebx
     call	one
-    jmp .L4
-.L3:
+    jmp .L2
+target2:
     mov	edi, ebx
     call	two
-.L4:
-    jmp	LBB5_9
-    .p2align	4, 0x90
-LBB5_9:                                #   in Loop: Header=BB5_2 Depth=1
+.L2:
     add	ebx, 1
     cmp	ebp, ebx
-    jne	LBB5_2
-LBB5_10:
+    jne	loop_header
+.L3:
     pop	r9
     pop	rbx
     pop	r14
     pop	rbp
     ret
-.Lfunc_end5:
-    .size	fun, .Lfunc_end5-fun
-    .cfi_endproc
-    .section	.rodata,"a",@progbits
-    .p2align	2
 
-// here we have a table of relative offsets (symbol minus symbol)
-.LJTI5_0:
-    .long	LBB5_4-.LJTI5_0
-    .long	LBB5_5-.LJTI5_0
-                                        # -- End function
-    .text
     .globl	main                    # -- Begin function main
-    .p2align	4, 0x90
     .type	main,@function
-main:                                   # @main
-    .cfi_startproc
-# %bb.0:
+    .p2align   4, 0x90
+main:
     push	rax
-    .cfi_def_cfa_offset 16
     lea	rdi, [rip + .L.str.5]
     call	puts@PLT
     mov	edi, 1
@@ -160,31 +94,28 @@ main:                                   # @main
     xor	eax, eax
     pop	rcx
     ret
-.Lfunc_end6:
-    .size	main, .Lfunc_end6-main
-    .cfi_endproc
-                                        # -- End function
-    .type	.L.str,@object          # @.str
+
+    .section	.rodata,"a",@progbits
+    .p2align	2
+
+// here we have a table of relative offsets (symbol minus symbol)
+jump_table:
+    .long	jt_target_1-jump_table
+    .long	lea_sym_minus_sym1-jump_table
+
     .section	.rodata.str1.1,"aMS",@progbits,1
+
+    .type	.L.str,@object
 .L.str:
     .asciz	"one"
     .size	.L.str, 4
 
-    .type	.L.str.1,@object        # @.str.1
+    .type	.L.str.1,@object
 .L.str.1:
     .asciz	"two"
     .size	.L.str.1, 4
 
-    .type	.L.str.4,@object        # @.str.4
-.L.str.4:
-    .asciz	"last"
-    .size	.L.str.4, 5
-
-    .type	.L.str.5,@object        # @.str.5
+    .type	.L.str.5,@object
 .L.str.5:
     .asciz	"!!!Hello World!!!"
     .size	.L.str.5, 18
-
-
-    .ident	"clang version 6.0.0 (tags/RELEASE_600/final)"
-    .section	".note.GNU-stack","",@progbits
diff --git a/tests/symbolic_operand_heuristics_test.py b/tests/symbolic_operand_heuristics_test.py
index 40040fb70..1823fb711 100644
--- a/tests/symbolic_operand_heuristics_test.py
+++ b/tests/symbolic_operand_heuristics_test.py
@@ -76,10 +76,10 @@ def test_lea_sym_minus_sym(self):
 
             # check that we symbolize the LEA instructions
             symbolized = [
-                "lea_sym_minus_sym1",
-                "lea_sym_minus_sym2",
+                ("lea_sym_minus_sym1", "target2", "lea_sym_minus_sym1"),
+                ("lea_sym_minus_sym2", "target1", "target2"),
             ]
-            for name in symbolized:
+            for name, sym1, sym2 in symbolized:
                 symbol = next(m.symbols_named(name))
                 block = symbol.referent
                 self.assertIsInstance(block, gtirb.CodeBlock)
@@ -89,7 +89,10 @@ def test_lea_sym_minus_sym(self):
                     )
                 )
                 self.assertIsInstance(sym_expr, gtirb.SymAddrAddr)
+                self.assertEqual(sym_expr.scale, 1)
                 self.assertEqual(sym_expr.offset, 0)
+                self.assertEqual(sym_expr.symbol1.name, sym1)
+                self.assertEqual(sym_expr.symbol2.name, sym2)
 
 
 if __name__ == "__main__":

From fca7c960ccb6ec529d00aea9a3a05777fe0fa9a2 Mon Sep 17 00:00:00 2001
From: Junghee Lim <junghee@grammatech.com>
Date: Wed, 20 Nov 2024 15:52:11 -0500
Subject: [PATCH 3/3] Remove confusing comments

---
 examples/asm_examples/ex_sym_minus_sym/ex_original.s | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/asm_examples/ex_sym_minus_sym/ex_original.s b/examples/asm_examples/ex_sym_minus_sym/ex_original.s
index 58fc13e66..9e7906dc7 100644
--- a/examples/asm_examples/ex_sym_minus_sym/ex_original.s
+++ b/examples/asm_examples/ex_sym_minus_sym/ex_original.s
@@ -40,7 +40,7 @@ fun:
     cmp	ebx, ebp
     jge	.L3
     lea	r14, [rip + jump_table]
-loop_header:                        # =>This Inner Loop Header: Depth=1
+loop_header:
     lea	eax, [rbx - 1]
     cmp	eax, 1
     ja	.L0
@@ -48,11 +48,11 @@ jumping_block:
     movsxd	rax, dword ptr [r14 + 4*rax]
     add	rax, r14
     jmp	rax
-jt_target_1:                        #   in Loop: Header=BB5_2 Depth=1
+jt_target_1:
     mov	edi, ebx
     call	one
     jmp .L2
-.L0:                                #   in Loop: Header=BB5_2 Depth=1
+.L0:
     mov	edi, ebx
     jmp	.L2
 lea_sym_minus_sym1: