Improve ARM function size inference

This allows 2-byte padding to be trimmed in ARM functions.

Resolves #253
This commit is contained in:
Luke Street 2025-09-25 00:26:43 -06:00
parent 7a8efb4c88
commit 90e81fad7e
8 changed files with 98 additions and 19 deletions

View File

@ -460,12 +460,16 @@ impl Arch for ArchArm {
section: &Section, section: &Section,
mut next_address: u64, mut next_address: u64,
) -> Result<u64> { ) -> Result<u64> {
// Trim any trailing 4-byte zeroes from the end (padding) // TODO: This should probably check the disasm mode and trim accordingly,
while next_address >= symbol.address + 4 // but self.disasm_modes isn't populated until post_init, so it needs a refactor.
&& let Some(data) = section.data_range(next_address - 4, 4)
&& data == [0u8; 4] // Trim any trailing 2-byte zeroes from the end (padding)
while next_address >= symbol.address + 2
&& let Some(data) = section.data_range(next_address - 2, 2)
&& data == [0u8; 2]
&& section.relocation_at(next_address - 2, 2).is_none()
{ {
next_address -= 4; next_address -= 2;
} }
Ok(next_address.saturating_sub(symbol.address)) Ok(next_address.saturating_sub(symbol.address))
} }

View File

@ -355,6 +355,7 @@ impl Arch for ArchMips {
while new_address >= symbol.address + 4 while new_address >= symbol.address + 4
&& let Some(data) = section.data_range(new_address - 4, 4) && let Some(data) = section.data_range(new_address - 4, 4)
&& data == [0u8; 4] && data == [0u8; 4]
&& section.relocation_at(next_address - 4, 4).is_none()
{ {
new_address -= 4; new_address -= 4;
} }

View File

@ -457,6 +457,7 @@ impl Arch for ArchPpc {
while next_address >= symbol.address + 4 while next_address >= symbol.address + 4
&& let Some(data) = section.data_range(next_address - 4, 4) && let Some(data) = section.data_range(next_address - 4, 4)
&& data == [0u8; 4] && data == [0u8; 4]
&& section.relocation_at(next_address - 4, 4).is_none()
{ {
next_address -= 4; next_address -= 4;
} }

View File

@ -107,32 +107,33 @@ impl Section {
// The alignment to use when "Combine data/text sections" is enabled. // The alignment to use when "Combine data/text sections" is enabled.
pub fn combined_alignment(&self) -> u64 { pub fn combined_alignment(&self) -> u64 {
const MIN_ALIGNMENT: u64 = 4; const MIN_ALIGNMENT: u64 = 4;
self.align.map(|align| align.get().max(MIN_ALIGNMENT)).unwrap_or(MIN_ALIGNMENT) self.align.map_or(MIN_ALIGNMENT, |align| align.get().max(MIN_ALIGNMENT))
} }
pub fn relocation_at<'obj>( pub fn relocation_at(&self, address: u64, size: u8) -> Option<&Relocation> {
&'obj self, match self.relocations.binary_search_by_key(&address, |r| r.address) {
obj: &'obj Object,
ins_ref: InstructionRef,
) -> Option<ResolvedRelocation<'obj>> {
match self.relocations.binary_search_by_key(&ins_ref.address, |r| r.address) {
Ok(mut i) => { Ok(mut i) => {
// Find the first relocation at the address // Find the first relocation at the address
while i while i
.checked_sub(1) .checked_sub(1)
.and_then(|n| self.relocations.get(n)) .and_then(|n| self.relocations.get(n))
.is_some_and(|r| r.address == ins_ref.address) .is_some_and(|r| r.address == address)
{ {
i -= 1; i -= 1;
} }
self.relocations.get(i) self.relocations.get(i)
} }
Err(i) => self Err(i) => self.relocations.get(i).filter(|r| r.address < address + size as u64),
.relocations
.get(i)
.filter(|r| r.address < ins_ref.address + ins_ref.size as u64),
} }
.and_then(|relocation| { }
pub fn resolve_relocation_at<'obj>(
&'obj self,
obj: &'obj Object,
address: u64,
size: u8,
) -> Option<ResolvedRelocation<'obj>> {
self.relocation_at(address, size).and_then(|relocation| {
let symbol = obj.symbols.get(relocation.target_symbol)?; let symbol = obj.symbols.get(relocation.target_symbol)?;
Some(ResolvedRelocation { relocation, symbol }) Some(ResolvedRelocation { relocation, symbol })
}) })
@ -316,7 +317,7 @@ impl Object {
let section = self.sections.get(section_index)?; let section = self.sections.get(section_index)?;
let offset = ins_ref.address.checked_sub(section.address)?; let offset = ins_ref.address.checked_sub(section.address)?;
let code = section.data.get(offset as usize..offset as usize + ins_ref.size as usize)?; let code = section.data.get(offset as usize..offset as usize + ins_ref.size as usize)?;
let relocation = section.relocation_at(self, ins_ref); let relocation = section.resolve_relocation_at(self, ins_ref.address, ins_ref.size);
Some(ResolvedInstructionRef { Some(ResolvedInstructionRef {
ins_ref, ins_ref,
symbol_index, symbol_index,

View File

@ -56,3 +56,20 @@ fn combine_text_sections() {
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output); insta::assert_snapshot!(output);
} }
#[test]
#[cfg(feature = "arm")]
fn trim_trailing_hword() {
let diff_config = diff::DiffObjConfig::default();
let obj = obj::read::parse(
include_object!("data/arm/issue_253.o"),
&diff_config,
diff::DiffSide::Base,
)
.unwrap();
let symbol_idx = obj.symbols.iter().position(|s| s.name == "sub_8030F20").unwrap();
let diff = diff::code::no_diff_code(&obj, symbol_idx, &diff_config).unwrap();
insta::assert_debug_snapshot!(diff.instruction_rows);
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output);
}

Binary file not shown.

View File

@ -0,0 +1,7 @@
---
source: objdiff-core/tests/arch_arm.rs
expression: output
---
[(Address(0), Dim, 5), (Spacing(4), Normal, 0), (Opcode("str", 64), Normal, 10), (Argument(Opaque("r1")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(36)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
[(Address(2), Dim, 5), (Spacing(4), Normal, 0), (Opcode("str", 64), Normal, 10), (Argument(Opaque("r2")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(40)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
[(Address(4), Dim, 5), (Spacing(4), Normal, 0), (Opcode("bx", 23), Normal, 10), (Argument(Opaque("lr")), Normal, 0), (Eol, Normal, 0)]

View File

@ -0,0 +1,48 @@
---
source: objdiff-core/tests/arch_arm.rs
expression: diff.instruction_rows
---
[
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 0,
size: 2,
opcode: 64,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 2,
size: 2,
opcode: 64,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 4,
size: 2,
opcode: 23,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
]