From 90e81fad7e4b5abd98a99efbb75b3e0fb56a96a2 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Thu, 25 Sep 2025 00:26:43 -0600 Subject: [PATCH] Improve ARM function size inference This allows 2-byte padding to be trimmed in ARM functions. Resolves #253 --- objdiff-core/src/arch/arm.rs | 14 +++-- objdiff-core/src/arch/mips.rs | 1 + objdiff-core/src/arch/ppc/mod.rs | 1 + objdiff-core/src/obj/mod.rs | 29 ++++++----- objdiff-core/tests/arch_arm.rs | 17 +++++++ objdiff-core/tests/data/arm/issue_253.o | Bin 0 -> 748 bytes .../arch_arm__trim_trailing_hword-2.snap | 7 +++ .../arch_arm__trim_trailing_hword.snap | 48 ++++++++++++++++++ 8 files changed, 98 insertions(+), 19 deletions(-) create mode 100644 objdiff-core/tests/data/arm/issue_253.o create mode 100644 objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword-2.snap create mode 100644 objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword.snap diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs index fc67d10..a6bc271 100644 --- a/objdiff-core/src/arch/arm.rs +++ b/objdiff-core/src/arch/arm.rs @@ -460,12 +460,16 @@ impl Arch for ArchArm { section: &Section, mut next_address: u64, ) -> Result { - // Trim any trailing 4-byte zeroes from the end (padding) - while next_address >= symbol.address + 4 - && let Some(data) = section.data_range(next_address - 4, 4) - && data == [0u8; 4] + // TODO: This should probably check the disasm mode and trim accordingly, + // but self.disasm_modes isn't populated until post_init, so it needs a refactor. + + // Trim any trailing 2-byte zeroes from the end (padding) + while next_address >= symbol.address + 2 + && let Some(data) = section.data_range(next_address - 2, 2) + && data == [0u8; 2] + && section.relocation_at(next_address - 2, 2).is_none() { - next_address -= 4; + next_address -= 2; } Ok(next_address.saturating_sub(symbol.address)) } diff --git a/objdiff-core/src/arch/mips.rs b/objdiff-core/src/arch/mips.rs index d38fc2d..04779ac 100644 --- a/objdiff-core/src/arch/mips.rs +++ b/objdiff-core/src/arch/mips.rs @@ -355,6 +355,7 @@ impl Arch for ArchMips { while new_address >= symbol.address + 4 && let Some(data) = section.data_range(new_address - 4, 4) && data == [0u8; 4] + && section.relocation_at(next_address - 4, 4).is_none() { new_address -= 4; } diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index 31ca501..24f01eb 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -457,6 +457,7 @@ impl Arch for ArchPpc { while next_address >= symbol.address + 4 && let Some(data) = section.data_range(next_address - 4, 4) && data == [0u8; 4] + && section.relocation_at(next_address - 4, 4).is_none() { next_address -= 4; } diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 96a6df4..8a58319 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -107,32 +107,33 @@ impl Section { // The alignment to use when "Combine data/text sections" is enabled. pub fn combined_alignment(&self) -> u64 { const MIN_ALIGNMENT: u64 = 4; - self.align.map(|align| align.get().max(MIN_ALIGNMENT)).unwrap_or(MIN_ALIGNMENT) + self.align.map_or(MIN_ALIGNMENT, |align| align.get().max(MIN_ALIGNMENT)) } - pub fn relocation_at<'obj>( - &'obj self, - obj: &'obj Object, - ins_ref: InstructionRef, - ) -> Option> { - match self.relocations.binary_search_by_key(&ins_ref.address, |r| r.address) { + pub fn relocation_at(&self, address: u64, size: u8) -> Option<&Relocation> { + match self.relocations.binary_search_by_key(&address, |r| r.address) { Ok(mut i) => { // Find the first relocation at the address while i .checked_sub(1) .and_then(|n| self.relocations.get(n)) - .is_some_and(|r| r.address == ins_ref.address) + .is_some_and(|r| r.address == address) { i -= 1; } self.relocations.get(i) } - Err(i) => self - .relocations - .get(i) - .filter(|r| r.address < ins_ref.address + ins_ref.size as u64), + Err(i) => self.relocations.get(i).filter(|r| r.address < address + size as u64), } - .and_then(|relocation| { + } + + pub fn resolve_relocation_at<'obj>( + &'obj self, + obj: &'obj Object, + address: u64, + size: u8, + ) -> Option> { + self.relocation_at(address, size).and_then(|relocation| { let symbol = obj.symbols.get(relocation.target_symbol)?; Some(ResolvedRelocation { relocation, symbol }) }) @@ -316,7 +317,7 @@ impl Object { let section = self.sections.get(section_index)?; let offset = ins_ref.address.checked_sub(section.address)?; let code = section.data.get(offset as usize..offset as usize + ins_ref.size as usize)?; - let relocation = section.relocation_at(self, ins_ref); + let relocation = section.resolve_relocation_at(self, ins_ref.address, ins_ref.size); Some(ResolvedInstructionRef { ins_ref, symbol_index, diff --git a/objdiff-core/tests/arch_arm.rs b/objdiff-core/tests/arch_arm.rs index 887a582..918d099 100644 --- a/objdiff-core/tests/arch_arm.rs +++ b/objdiff-core/tests/arch_arm.rs @@ -56,3 +56,20 @@ fn combine_text_sections() { let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); insta::assert_snapshot!(output); } + +#[test] +#[cfg(feature = "arm")] +fn trim_trailing_hword() { + let diff_config = diff::DiffObjConfig::default(); + let obj = obj::read::parse( + include_object!("data/arm/issue_253.o"), + &diff_config, + diff::DiffSide::Base, + ) + .unwrap(); + let symbol_idx = obj.symbols.iter().position(|s| s.name == "sub_8030F20").unwrap(); + let diff = diff::code::no_diff_code(&obj, symbol_idx, &diff_config).unwrap(); + insta::assert_debug_snapshot!(diff.instruction_rows); + let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); + insta::assert_snapshot!(output); +} diff --git a/objdiff-core/tests/data/arm/issue_253.o b/objdiff-core/tests/data/arm/issue_253.o new file mode 100644 index 0000000000000000000000000000000000000000..36083e291acc275a34cdfc1ec3b3153d8c87dc09 GIT binary patch literal 748 zcmaiy!AiqG5QhIvY)!#~3SRV}LcttLC|1;imqe2iunIAH5<-?1>_I8b7W7o`d3+dO z#k-(0Nj9#hPT0)%Z)RtAX0r^hBOwH|3Ap%Ez(*xoCuM*O2kY?VyPORHf1f7vNiL@l zJA^Gi8J(q9qZn<=5gjv)PTzR@-!6a+tNF%Y)58hAmjtDK{cKgRru8m^g+?E;G_*_F zF?65f8xt51w+-DOflX;o$?GDvnc|x?$sWSX^ft+Y`v^%m%mV)|EFrF4H64#abQQW| z6bqT1_XqvxnD+OIw`Y}0@@iiBCrxVds^E=tmBW)o0gt-rDUNBIfn0;(Argj9h7)@1Vl+t@E)rZ~Kw-Ck;OD1`R&n dCXEgay(b}hv@l?(P3PnP!i;;N4`wYo?iWAOKd}G+ literal 0 HcmV?d00001 diff --git a/objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword-2.snap b/objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword-2.snap new file mode 100644 index 0000000..ddcc5bd --- /dev/null +++ b/objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword-2.snap @@ -0,0 +1,7 @@ +--- +source: objdiff-core/tests/arch_arm.rs +expression: output +--- +[(Address(0), Dim, 5), (Spacing(4), Normal, 0), (Opcode("str", 64), Normal, 10), (Argument(Opaque("r1")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(36)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)] +[(Address(2), Dim, 5), (Spacing(4), Normal, 0), (Opcode("str", 64), Normal, 10), (Argument(Opaque("r2")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(40)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)] +[(Address(4), Dim, 5), (Spacing(4), Normal, 0), (Opcode("bx", 23), Normal, 10), (Argument(Opaque("lr")), Normal, 0), (Eol, Normal, 0)] diff --git a/objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword.snap b/objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword.snap new file mode 100644 index 0000000..f1ebca3 --- /dev/null +++ b/objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword.snap @@ -0,0 +1,48 @@ +--- +source: objdiff-core/tests/arch_arm.rs +expression: diff.instruction_rows +--- +[ + InstructionDiffRow { + ins_ref: Some( + InstructionRef { + address: 0, + size: 2, + opcode: 64, + branch_dest: None, + }, + ), + kind: None, + branch_from: None, + branch_to: None, + arg_diff: [], + }, + InstructionDiffRow { + ins_ref: Some( + InstructionRef { + address: 2, + size: 2, + opcode: 64, + branch_dest: None, + }, + ), + kind: None, + branch_from: None, + branch_to: None, + arg_diff: [], + }, + InstructionDiffRow { + ins_ref: Some( + InstructionRef { + address: 4, + size: 2, + opcode: 23, + branch_dest: None, + }, + ), + kind: None, + branch_from: None, + branch_to: None, + arg_diff: [], + }, +]