From 23e3048b7a13c89f51330bdb77e5f091b7c24601 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Thu, 31 Oct 2024 11:35:20 -0400 Subject: [PATCH 1/7] Check on both symbols in symbol_minus_symbol for END symbols Previously, only `A` in `A - B` was checked if it is an END symbol. However, it was observed that `B` can be also an END symbol. Now, both symbols in `symbol_minus_symbol` are checked. --- CHANGELOG.md | 2 ++ src/datalog/binary/elf/exceptions.dl | 18 +++++++++--------- src/datalog/symbolization.dl | 7 ++++++- tests/linux-elf-x64.yaml | 7 +++++++ 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 722691ad..f903b420 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ sections * Fix 16-Thumb STM instructions considered to be invalid if the same register is used in reglist and register operands with writeback enabled. +* Fix bug that caused assembling error due to wrong `symbol_minus_symbol` + for lsda entries with references to the end of `.gcc_except_table` # 1.9.0 diff --git a/src/datalog/binary/elf/exceptions.dl b/src/datalog/binary/elf/exceptions.dl index f1246046..749174af 100644 --- a/src/datalog/binary/elf/exceptions.dl +++ b/src/datalog/binary/elf/exceptions.dl @@ -176,20 +176,20 @@ labeled_ea(Personality):- Same as `symbol_minus_symbol`: this is to avoid cyclic aggregation by `boundary_sym_expr` using `symbol_minus_symbol`. */ -.decl lsda_symbol_minus_symbol(EA:address,Size:unsigned,Symbol1:address,Symbol2:address,Scale:unsigned,Offset:number) +.decl lsda_symbol_minus_symbol(EA:address,Size:unsigned,Symbol1:address,Symbol2:address,Scale:unsigned,Offset:number,Reason:symbol) symbol_minus_symbol(EA,Size,Symbol1,Symbol2,Scale,Offset):- - lsda_symbol_minus_symbol(EA,Size,Symbol1,Symbol2,Scale,Offset). + lsda_symbol_minus_symbol(EA,Size,Symbol1,Symbol2,Scale,Offset,_). // If the second address is the end of the section, create `boundary_sym_expr` // for the address. boundary_sym_expr(EA, Dest):- - lsda_symbol_minus_symbol(EA,_,_,Dest,_,_), + lsda_symbol_minus_symbol(EA,_,_,Dest,_,_,_), loaded_section(_,Dest,_). // type table pointer symbol_special_encoding(TypeTablePointerLocation,"uleb128"), -lsda_symbol_minus_symbol(TypeTablePointerLocation,Size,CallsiteTablePointerLoc-1,LsdaTypeTableAddress,1,0):- +lsda_symbol_minus_symbol(TypeTablePointerLocation,Size,CallsiteTablePointerLoc-1,LsdaTypeTableAddress,1,0,"type-table-pointer"):- lsda_pointer_locations(Lsda,TypeTablePointerLocation,CallsiteTablePointerLoc), lsda(Lsda,_,_,_,LsdaTypeTableAddress, _, _), LsdaTypeTableAddress != 0, @@ -198,7 +198,7 @@ lsda_symbol_minus_symbol(TypeTablePointerLocation,Size,CallsiteTablePointerLoc-1 //callsite table size symbol_special_encoding(CallsiteTablePointerLoc,"uleb128"), -lsda_symbol_minus_symbol(CallsiteTablePointerLoc,Size,CallSiteTable_address,CallSiteTable_address+CallSiteTableLength,1,0):- +lsda_symbol_minus_symbol(CallsiteTablePointerLoc,Size,CallSiteTable_address,CallSiteTable_address+CallSiteTableLength,1,0,"callsite-table-size"):- lsda_pointer_locations(Lsda,_,CallsiteTablePointerLoc), lsda(Lsda,CallSiteTable_address,_,CallSiteTableLength,_, _, _), Size = CallSiteTable_address - CallsiteTablePointerLoc, @@ -217,7 +217,7 @@ symbol_special_encoding(EA_landingPad,EncodingName):- //region beginning -lsda_symbol_minus_symbol(EA_start,Size,LandingPadBaseAddress,StartRegion,1,0):- +lsda_symbol_minus_symbol(EA_start,Size,LandingPadBaseAddress,StartRegion,1,0,"region-beginning"):- lsda_callsite(CallSiteTable_address, EA_start, StartRegion,EA_end,_,_,_,_), lsda(_,CallSiteTable_address,_,_,_,_,LandingPadBaseAddress), Size = EA_end-EA_start, @@ -225,7 +225,7 @@ lsda_symbol_minus_symbol(EA_start,Size,LandingPadBaseAddress,StartRegion,1,0):- //region end boundary_sym_expr(EA_end,EndRegion), -lsda_symbol_minus_symbol(EA_end,Size,StartRegion,EndRegion,1,0):- +lsda_symbol_minus_symbol(EA_end,Size,StartRegion,EndRegion,1,0,"region-end"):- lsda_callsite(_, _, StartRegion,EA_end,EndRegion,EA_landingPad,_,_), Size = EA_landingPad-EA_end, Size > 0, @@ -233,7 +233,7 @@ lsda_symbol_minus_symbol(EA_end,Size,StartRegion,EndRegion,1,0):- block_boundaries(_,_,EndRegion). //landing pad -lsda_symbol_minus_symbol(EA_landingPad,Size,LandingPadBaseAddress,LandingPad,1,0):- +lsda_symbol_minus_symbol(EA_landingPad,Size,LandingPadBaseAddress,LandingPad,1,0,"landing-pad"):- lsda_callsite(CallSiteTable_address, _, _,_,_,EA_landingPad,LandingPad,EA_endLandingPad), lsda(_,CallSiteTable_address,_,_,_,_,LandingPadBaseAddress), LandingPad != 0, @@ -251,7 +251,7 @@ symbolic_data(EAType,EncodingSize,Type):- EAType=LsdaTypeTableAddress-(EncodingSize*(Index+1)), Type != 0. -lsda_symbol_minus_symbol(EAType,EncodingSize,EAType,Type,1,0):- +lsda_symbol_minus_symbol(EAType,EncodingSize,EAType,Type,1,0,"type-table"):- lsda_type_entry(LsdaTypeTableAddress, Index,Type), lsda(_,_,_,_,LsdaTypeTableAddress, TypeTableEncoding, _), TypeTableEncoding band 0xF = Encoding, diff --git a/src/datalog/symbolization.dl b/src/datalog/symbolization.dl index 158aeb7d..fad3852e 100644 --- a/src/datalog/symbolization.dl +++ b/src/datalog/symbolization.dl @@ -954,7 +954,12 @@ symbolic_expr_symbol_minus_symbol(EA,Size,SymbolName1,SymbolName2,Scale,FinalOff NewSymbol2 = Symbol2, FinalOffset = Offset1 ), - best_symexpr_symbol(NewSymbol1,SymbolName1,"Beg"), + ( + boundary_sym_expr(EA,NewSymbol1), Symbol1Loc = "End" + ; + !boundary_sym_expr(EA,NewSymbol1), Symbol1Loc = "Beg" + ), + best_symexpr_symbol(NewSymbol1,SymbolName1,Symbol1Loc), ( boundary_sym_expr(EA,NewSymbol2), Symbol2Loc = "End" ; diff --git a/tests/linux-elf-x64.yaml b/tests/linux-elf-x64.yaml index bfb7a61e..3a598b13 100644 --- a/tests/linux-elf-x64.yaml +++ b/tests/linux-elf-x64.yaml @@ -512,6 +512,9 @@ tests: <<: *assembly binary: fun.so + - name: ex_exceptions5 + <<: *assembly + # ---------------------------------------------------------------------------- # Assembly examples. (stripped) # ---------------------------------------------------------------------------- @@ -597,6 +600,10 @@ tests: <<: *test-strip-default binary: fun.so + - name: ex_exceptions5 + <<: *assembly + <<: *test-strip-default + # ---------------------------------------------------------------------------- # Relocatable ELF objects (.o). # ---------------------------------------------------------------------------- From 5f2f2303e19ea1aa30b2a84ba29ffbf5000a0cb6 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Thu, 31 Oct 2024 11:44:48 -0400 Subject: [PATCH 2/7] Add a unit test --- examples/asm_examples/ex_exceptions5/Makefile | 25 +++++++ .../asm_examples/ex_exceptions5/ex_original.s | 66 +++++++++++++++++++ .../ex_exceptions5/linker-script.ld | 4 ++ 3 files changed, 95 insertions(+) create mode 100644 examples/asm_examples/ex_exceptions5/Makefile create mode 100644 examples/asm_examples/ex_exceptions5/ex_original.s create mode 100644 examples/asm_examples/ex_exceptions5/linker-script.ld diff --git a/examples/asm_examples/ex_exceptions5/Makefile b/examples/asm_examples/ex_exceptions5/Makefile new file mode 100644 index 00000000..4f0aaf5f --- /dev/null +++ b/examples/asm_examples/ex_exceptions5/Makefile @@ -0,0 +1,25 @@ +CXX="g++" +EXEC= + +all: ex_original.s + # NOTE: `-T linker-script.ld` + # This is to place .gcc_except_table *before* .note.gnu.build-id + # so that the reference label for the end of the section is aligned + # with the adjacent section. Otherwise, the next section can be some + # discarded one, which would prevent from demonstrating the issue. + # + # With this example, if a boundary_sym_expr is not correctly created + # for symbol_minus_symbol (either the first or the second symbol, or + # both), the assembler will fail with + # + # "Error: invalid operands (.note.gnu.property and .gcc_except_table + # sections) for '-'" + # + $(CXX) -T linker-script.ld -o ex ex_original.s + @ $(EXEC) ./ex > out.txt +clean: + rm -f ex out.txt + rm -fr ex.unstripped *.old* dl_files *.gtirb +check: + @ $(EXEC) ./ex >/tmp/res.txt + @ diff out.txt /tmp/res.txt && echo TEST OK diff --git a/examples/asm_examples/ex_exceptions5/ex_original.s b/examples/asm_examples/ex_exceptions5/ex_original.s new file mode 100644 index 00000000..a2e311f6 --- /dev/null +++ b/examples/asm_examples/ex_exceptions5/ex_original.s @@ -0,0 +1,66 @@ +.section .text + +#----------------------------------- +.type foo, @function +foo: + +.cfi_startproc +.cfi_lsda 27, .L_call_site_start + endbr64 + pushq %rbp + movq %rsp,%rbp + nop + popq %rbp + retq +.cfi_endproc + +# Entry point +.globl main +.type main, @function +main: + call callThrower # Call a function that may "throw" an exception + mov $60, %rax # syscall: exit + xor %rdi, %rdi # status: 0 + syscall + +# Dummy function simulating an exception thrower +.type callThrower, @function +callThrower: + ret # Simply return (replace with an actual throw in C++) + +# Exception Handling Table +.section .gcc_except_table, "a", @progbits +.align 4 + +.L_entry_start: + .byte 0x1 # Entry indicating an exception + .byte 0x0 + .byte 0x0 + .byte 0x7d + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + +.L_call_site_start: + .byte 0xff # Start of a call site + .byte 0xff # Additional indicator byte + .byte 0x1 # Additional indicator byte + # + # With this example, if a boundary_sym_expr is not correctly created + # for symbol_minus_symbol (either the first or the second symbol, or + # both), the assembler will fail with an error, such as + # + # "Error: invalid operands (.note.gnu.property and .gcc_except_table + # sections) for '-'" + # + .uleb128 .L_end - .L_end # Zero-length entry at the very end of the table +.L_end: diff --git a/examples/asm_examples/ex_exceptions5/linker-script.ld b/examples/asm_examples/ex_exceptions5/linker-script.ld new file mode 100644 index 00000000..26110fdd --- /dev/null +++ b/examples/asm_examples/ex_exceptions5/linker-script.ld @@ -0,0 +1,4 @@ +SECTIONS { + .gcc_except_table : { *(.gcc_except_table) } +} +INSERT BEFORE .note.gnu.build-id; From da1da7c83a34bda4f41abc3dbf0dc17ad7c790f6 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Thu, 31 Oct 2024 11:51:45 -0400 Subject: [PATCH 3/7] Revise comments --- examples/asm_examples/ex_exceptions5/Makefile | 7 ------- examples/asm_examples/ex_exceptions5/ex_original.s | 13 +++++++------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/examples/asm_examples/ex_exceptions5/Makefile b/examples/asm_examples/ex_exceptions5/Makefile index 4f0aaf5f..fd6d7a87 100644 --- a/examples/asm_examples/ex_exceptions5/Makefile +++ b/examples/asm_examples/ex_exceptions5/Makefile @@ -8,13 +8,6 @@ all: ex_original.s # with the adjacent section. Otherwise, the next section can be some # discarded one, which would prevent from demonstrating the issue. # - # With this example, if a boundary_sym_expr is not correctly created - # for symbol_minus_symbol (either the first or the second symbol, or - # both), the assembler will fail with - # - # "Error: invalid operands (.note.gnu.property and .gcc_except_table - # sections) for '-'" - # $(CXX) -T linker-script.ld -o ex ex_original.s @ $(EXEC) ./ex > out.txt clean: diff --git a/examples/asm_examples/ex_exceptions5/ex_original.s b/examples/asm_examples/ex_exceptions5/ex_original.s index a2e311f6..0954c34f 100644 --- a/examples/asm_examples/ex_exceptions5/ex_original.s +++ b/examples/asm_examples/ex_exceptions5/ex_original.s @@ -55,12 +55,13 @@ callThrower: .byte 0xff # Additional indicator byte .byte 0x1 # Additional indicator byte # - # With this example, if a boundary_sym_expr is not correctly created - # for symbol_minus_symbol (either the first or the second symbol, or - # both), the assembler will fail with an error, such as - # - # "Error: invalid operands (.note.gnu.property and .gcc_except_table - # sections) for '-'" + # With this example, if a boundary_sym_expr is not correctly created + # for symbol_minus_symbol (either the first or the second symbol, or + # both) or an END symbol is not chosen up for such a symbol, + # the assembler will fail with an error, such as + # + # "Error: invalid operands (.note.gnu.property and .gcc_except_table + # sections) for '-'" # .uleb128 .L_end - .L_end # Zero-length entry at the very end of the table .L_end: From 416d499c2d40af7c51cbcc337cc0cc2118e526ea Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Thu, 31 Oct 2024 11:53:32 -0400 Subject: [PATCH 4/7] Fix a typo --- examples/asm_examples/ex_exceptions5/ex_original.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/asm_examples/ex_exceptions5/ex_original.s b/examples/asm_examples/ex_exceptions5/ex_original.s index 0954c34f..48f8262b 100644 --- a/examples/asm_examples/ex_exceptions5/ex_original.s +++ b/examples/asm_examples/ex_exceptions5/ex_original.s @@ -57,7 +57,7 @@ callThrower: # # With this example, if a boundary_sym_expr is not correctly created # for symbol_minus_symbol (either the first or the second symbol, or - # both) or an END symbol is not chosen up for such a symbol, + # both) or an END symbol is not chosen for such a symbol, # the assembler will fail with an error, such as # # "Error: invalid operands (.note.gnu.property and .gcc_except_table From 229a3ced089d124404b66afdb11ceece43b8322e Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Tue, 5 Nov 2024 10:01:33 -0500 Subject: [PATCH 5/7] Add a handling of "SecEnd - SecBeg" --- src/datalog/symbolization.dl | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/datalog/symbolization.dl b/src/datalog/symbolization.dl index fad3852e..7957e48f 100644 --- a/src/datalog/symbolization.dl +++ b/src/datalog/symbolization.dl @@ -955,7 +955,26 @@ symbolic_expr_symbol_minus_symbol(EA,Size,SymbolName1,SymbolName2,Scale,FinalOff FinalOffset = Offset1 ), ( - boundary_sym_expr(EA,NewSymbol1), Symbol1Loc = "End" + boundary_sym_expr(EA,NewSymbol1), + // NewSymbol1 is a section beginning and + // NewSymbol2 is in the same section. + loaded_section(NewSymbol1,SectionEnd,_), + NewSymbol2 >= NewSymbol1, + NewSymbol2 <= SectionEnd, + Symbol1Loc = "Beg" + ; + boundary_sym_expr(EA,NewSymbol1), + // NewSymbol1 is the next section beginning and + // NewSymbol2 is in this section. + loaded_section(NewSymbol1,_,_), + loaded_section(ThisSectionBeg,NewSymbol1,_), + NewSymbol2 >= ThisSectionBeg, + NewSymbol2 <= NewSymbol1, + Symbol1Loc = "End" + ; + boundary_sym_expr(EA,NewSymbol1), + !loaded_section(NewSymbol1,_,_), + Symbol1Loc = "Beg" ; !boundary_sym_expr(EA,NewSymbol1), Symbol1Loc = "Beg" ), From 3636f5ef37b0f8424cb53628434dc341c80ce1c3 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Tue, 5 Nov 2024 12:19:11 -0500 Subject: [PATCH 6/7] Favor `A_END - B` over `A - B` when A is the end of a section, and B is in the section --- src/datalog/symbolization.dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datalog/symbolization.dl b/src/datalog/symbolization.dl index 7957e48f..406ab35b 100644 --- a/src/datalog/symbolization.dl +++ b/src/datalog/symbolization.dl @@ -959,7 +959,7 @@ symbolic_expr_symbol_minus_symbol(EA,Size,SymbolName1,SymbolName2,Scale,FinalOff // NewSymbol1 is a section beginning and // NewSymbol2 is in the same section. loaded_section(NewSymbol1,SectionEnd,_), - NewSymbol2 >= NewSymbol1, + NewSymbol2 > NewSymbol1, NewSymbol2 <= SectionEnd, Symbol1Loc = "Beg" ; From 1327529535166b19271f17204adf1b8a589b6968 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Thu, 7 Nov 2024 13:30:29 -0500 Subject: [PATCH 7/7] Simplify the logic --- src/datalog/symbolization.dl | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/src/datalog/symbolization.dl b/src/datalog/symbolization.dl index 406ab35b..592e03d7 100644 --- a/src/datalog/symbolization.dl +++ b/src/datalog/symbolization.dl @@ -955,28 +955,8 @@ symbolic_expr_symbol_minus_symbol(EA,Size,SymbolName1,SymbolName2,Scale,FinalOff FinalOffset = Offset1 ), ( - boundary_sym_expr(EA,NewSymbol1), - // NewSymbol1 is a section beginning and - // NewSymbol2 is in the same section. - loaded_section(NewSymbol1,SectionEnd,_), - NewSymbol2 > NewSymbol1, - NewSymbol2 <= SectionEnd, - Symbol1Loc = "Beg" - ; - boundary_sym_expr(EA,NewSymbol1), - // NewSymbol1 is the next section beginning and - // NewSymbol2 is in this section. - loaded_section(NewSymbol1,_,_), - loaded_section(ThisSectionBeg,NewSymbol1,_), - NewSymbol2 >= ThisSectionBeg, - NewSymbol2 <= NewSymbol1, - Symbol1Loc = "End" - ; - boundary_sym_expr(EA,NewSymbol1), - !loaded_section(NewSymbol1,_,_), - Symbol1Loc = "Beg" - ; - !boundary_sym_expr(EA,NewSymbol1), Symbol1Loc = "Beg" + NewSymbol1 = NewSymbol2, Symbol1Loc = Symbol2Loc; + NewSymbol1 != NewSymbol2, Symbol1Loc = "Beg" ), best_symexpr_symbol(NewSymbol1,SymbolName1,Symbol1Loc), (