Improve ARM function size inference

This allows 2-byte padding to be trimmed in ARM functions.

Resolves #253
This commit is contained in:
Luke Street 2025-09-25 00:26:43 -06:00
parent 7a8efb4c88
commit 90e81fad7e
8 changed files with 98 additions and 19 deletions

View File

@ -460,12 +460,16 @@ impl Arch for ArchArm {
section: &Section,
mut next_address: u64,
) -> Result<u64> {
// Trim any trailing 4-byte zeroes from the end (padding)
while next_address >= symbol.address + 4
&& let Some(data) = section.data_range(next_address - 4, 4)
&& data == [0u8; 4]
// TODO: This should probably check the disasm mode and trim accordingly,
// but self.disasm_modes isn't populated until post_init, so it needs a refactor.
// Trim any trailing 2-byte zeroes from the end (padding)
while next_address >= symbol.address + 2
&& let Some(data) = section.data_range(next_address - 2, 2)
&& data == [0u8; 2]
&& section.relocation_at(next_address - 2, 2).is_none()
{
next_address -= 4;
next_address -= 2;
}
Ok(next_address.saturating_sub(symbol.address))
}

View File

@ -355,6 +355,7 @@ impl Arch for ArchMips {
while new_address >= symbol.address + 4
&& let Some(data) = section.data_range(new_address - 4, 4)
&& data == [0u8; 4]
&& section.relocation_at(next_address - 4, 4).is_none()
{
new_address -= 4;
}

View File

@ -457,6 +457,7 @@ impl Arch for ArchPpc {
while next_address >= symbol.address + 4
&& let Some(data) = section.data_range(next_address - 4, 4)
&& data == [0u8; 4]
&& section.relocation_at(next_address - 4, 4).is_none()
{
next_address -= 4;
}

View File

@ -107,32 +107,33 @@ impl Section {
// The alignment to use when "Combine data/text sections" is enabled.
pub fn combined_alignment(&self) -> u64 {
const MIN_ALIGNMENT: u64 = 4;
self.align.map(|align| align.get().max(MIN_ALIGNMENT)).unwrap_or(MIN_ALIGNMENT)
self.align.map_or(MIN_ALIGNMENT, |align| align.get().max(MIN_ALIGNMENT))
}
pub fn relocation_at<'obj>(
&'obj self,
obj: &'obj Object,
ins_ref: InstructionRef,
) -> Option<ResolvedRelocation<'obj>> {
match self.relocations.binary_search_by_key(&ins_ref.address, |r| r.address) {
pub fn relocation_at(&self, address: u64, size: u8) -> Option<&Relocation> {
match self.relocations.binary_search_by_key(&address, |r| r.address) {
Ok(mut i) => {
// Find the first relocation at the address
while i
.checked_sub(1)
.and_then(|n| self.relocations.get(n))
.is_some_and(|r| r.address == ins_ref.address)
.is_some_and(|r| r.address == address)
{
i -= 1;
}
self.relocations.get(i)
}
Err(i) => self
.relocations
.get(i)
.filter(|r| r.address < ins_ref.address + ins_ref.size as u64),
Err(i) => self.relocations.get(i).filter(|r| r.address < address + size as u64),
}
.and_then(|relocation| {
}
pub fn resolve_relocation_at<'obj>(
&'obj self,
obj: &'obj Object,
address: u64,
size: u8,
) -> Option<ResolvedRelocation<'obj>> {
self.relocation_at(address, size).and_then(|relocation| {
let symbol = obj.symbols.get(relocation.target_symbol)?;
Some(ResolvedRelocation { relocation, symbol })
})
@ -316,7 +317,7 @@ impl Object {
let section = self.sections.get(section_index)?;
let offset = ins_ref.address.checked_sub(section.address)?;
let code = section.data.get(offset as usize..offset as usize + ins_ref.size as usize)?;
let relocation = section.relocation_at(self, ins_ref);
let relocation = section.resolve_relocation_at(self, ins_ref.address, ins_ref.size);
Some(ResolvedInstructionRef {
ins_ref,
symbol_index,

View File

@ -56,3 +56,20 @@ fn combine_text_sections() {
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output);
}
#[test]
#[cfg(feature = "arm")]
fn trim_trailing_hword() {
let diff_config = diff::DiffObjConfig::default();
let obj = obj::read::parse(
include_object!("data/arm/issue_253.o"),
&diff_config,
diff::DiffSide::Base,
)
.unwrap();
let symbol_idx = obj.symbols.iter().position(|s| s.name == "sub_8030F20").unwrap();
let diff = diff::code::no_diff_code(&obj, symbol_idx, &diff_config).unwrap();
insta::assert_debug_snapshot!(diff.instruction_rows);
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output);
}

Binary file not shown.

View File

@ -0,0 +1,7 @@
---
source: objdiff-core/tests/arch_arm.rs
expression: output
---
[(Address(0), Dim, 5), (Spacing(4), Normal, 0), (Opcode("str", 64), Normal, 10), (Argument(Opaque("r1")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(36)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
[(Address(2), Dim, 5), (Spacing(4), Normal, 0), (Opcode("str", 64), Normal, 10), (Argument(Opaque("r2")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(40)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
[(Address(4), Dim, 5), (Spacing(4), Normal, 0), (Opcode("bx", 23), Normal, 10), (Argument(Opaque("lr")), Normal, 0), (Eol, Normal, 0)]

View File

@ -0,0 +1,48 @@
---
source: objdiff-core/tests/arch_arm.rs
expression: diff.instruction_rows
---
[
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 0,
size: 2,
opcode: 64,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 2,
size: 2,
opcode: 64,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 4,
size: 2,
opcode: 23,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
]