Make function size inference logic arch-specific

For MIPS, account for delay slot nops. For x86,
check for trailing nops (0x90). For PPC, check
for 4-byte 0x00 padding.

Resolves #229
This commit is contained in:
Luke Street 2025-08-02 10:56:26 -06:00
parent 1205e8ceb4
commit 1e62d4664c
12 changed files with 131 additions and 146 deletions

8
Cargo.lock generated
View File

@ -3437,7 +3437,7 @@ dependencies = [
[[package]]
name = "objdiff-cli"
version = "3.0.0-beta.12"
version = "3.0.0-beta.13"
dependencies = [
"anyhow",
"argp",
@ -3460,7 +3460,7 @@ dependencies = [
[[package]]
name = "objdiff-core"
version = "3.0.0-beta.12"
version = "3.0.0-beta.13"
dependencies = [
"anyhow",
"arm-attr",
@ -3514,7 +3514,7 @@ dependencies = [
[[package]]
name = "objdiff-gui"
version = "3.0.0-beta.12"
version = "3.0.0-beta.13"
dependencies = [
"anyhow",
"cfg-if",
@ -3550,7 +3550,7 @@ dependencies = [
[[package]]
name = "objdiff-wasm"
version = "3.0.0-beta.12"
version = "3.0.0-beta.13"
dependencies = [
"log",
"objdiff-core",

View File

@ -14,7 +14,7 @@ strip = "debuginfo"
codegen-units = 1
[workspace.package]
version = "3.0.0-beta.12"
version = "3.0.0-beta.13"
authors = ["Luke Street <luke@street.dev>"]
edition = "2024"
license = "MIT OR Apache-2.0"

View File

@ -15,7 +15,7 @@ use crate::{
diff::{DiffObjConfig, MipsAbi, MipsInstrCategory, display::InstructionPart},
obj::{
InstructionArg, InstructionArgValue, InstructionRef, Relocation, RelocationFlags,
ResolvedInstructionRef, ResolvedRelocation, SymbolFlag, SymbolFlagSet,
ResolvedInstructionRef, ResolvedRelocation, Section, Symbol, SymbolFlag, SymbolFlagSet,
},
};
@ -140,6 +140,14 @@ impl ArchMips {
})
}
fn default_instruction_flags(&self) -> rabbitizer::InstructionFlags {
match self.isa_extension {
Some(extension) => rabbitizer::InstructionFlags::new_extension(extension),
None => rabbitizer::InstructionFlags::new(IsaVersion::MIPS_III),
}
.with_abi(self.abi)
}
fn instruction_flags(&self, diff_config: &DiffObjConfig) -> rabbitizer::InstructionFlags {
let isa_extension = match diff_config.mips_instr_category {
MipsInstrCategory::Auto => self.isa_extension,
@ -151,7 +159,7 @@ impl ArchMips {
};
match isa_extension {
Some(extension) => rabbitizer::InstructionFlags::new_extension(extension),
None => rabbitizer::InstructionFlags::new_isa(IsaVersion::MIPS_III, None),
None => rabbitizer::InstructionFlags::new(IsaVersion::MIPS_III),
}
.with_abi(match diff_config.mips_abi {
MipsAbi::Auto => self.abi,
@ -331,6 +339,36 @@ impl Arch for ArchMips {
}
flags
}
fn infer_function_size(
&self,
symbol: &Symbol,
section: &Section,
next_address: u64,
) -> Result<u64> {
// Trim any trailing 4-byte zeroes from the end (nops)
let mut new_address = next_address;
while new_address >= symbol.address + 4
&& let Some(data) = section.data_range(new_address - 4, 4)
&& data == [0u8; 4]
{
new_address -= 4;
}
// Check if the last instruction has a delay slot, if so, include the delay slot nop
if new_address + 4 <= next_address
&& new_address >= symbol.address + 4
&& let Some(data) = section.data_range(new_address - 4, 4)
&& let instruction = rabbitizer::Instruction::new(
self.endianness.read_u32_bytes(data.try_into().unwrap()),
Vram::new((new_address - 4) as u32),
self.default_instruction_flags(),
)
&& instruction.opcode().has_delay_slot()
{
new_address += 4;
}
Ok(new_address.saturating_sub(symbol.address))
}
}
fn push_args(

View File

@ -406,6 +406,15 @@ pub trait Arch: Send + Sync + Debug {
) -> Vec<ContextItem> {
Vec::new()
}
fn infer_function_size(
&self,
symbol: &Symbol,
_section: &Section,
next_address: u64,
) -> Result<u64> {
Ok(next_address.saturating_sub(symbol.address))
}
}
pub fn new_arch(object: &object::File) -> Result<Box<dyn Arch>> {

View File

@ -20,7 +20,7 @@ use crate::{
},
obj::{
FlowAnalysisResult, InstructionRef, Object, Relocation, RelocationFlags,
ResolvedInstructionRef, ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet,
ResolvedInstructionRef, ResolvedRelocation, Section, Symbol, SymbolFlag, SymbolFlagSet,
},
};
@ -455,6 +455,22 @@ impl Arch for ArchPpc {
}
out
}
fn infer_function_size(
&self,
symbol: &Symbol,
section: &Section,
mut next_address: u64,
) -> Result<u64> {
// Trim any trailing 4-byte zeroes from the end (padding)
while next_address >= symbol.address + 4
&& let Some(data) = section.data_range(next_address - 4, 4)
&& data == [0u8; 4]
{
next_address -= 4;
}
Ok(next_address.saturating_sub(symbol.address))
}
}
impl ArchPpc {

View File

@ -11,7 +11,7 @@ use object::{Endian as _, Object as _, ObjectSection as _, elf, pe};
use crate::{
arch::{Arch, RelocationOverride, RelocationOverrideTarget},
diff::{DiffObjConfig, X86Formatter, display::InstructionPart},
obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef},
obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, Section, Symbol},
};
#[derive(Debug)]
@ -303,6 +303,52 @@ impl Arch for ArchX86 {
fn data_reloc_size(&self, flags: RelocationFlags) -> usize {
self.reloc_size(flags).unwrap_or(1)
}
fn infer_function_size(
&self,
symbol: &Symbol,
section: &Section,
next_address: u64,
) -> Result<u64> {
let Ok(size) = (next_address - symbol.address).try_into() else {
return Ok(next_address.saturating_sub(symbol.address));
};
let Some(code) = section.data_range(symbol.address, size) else {
return Ok(0);
};
// Decode instructions to find the last non-NOP instruction
let mut decoder = self.decoder(code, symbol.address);
let mut instruction = Instruction::default();
let mut new_address = 0;
let mut reloc_iter = section.relocations.iter().peekable();
'outer: while decoder.can_decode() {
let address = decoder.ip();
while let Some(reloc) = reloc_iter.peek() {
match reloc.address.cmp(&address) {
Ordering::Less => {
reloc_iter.next();
}
Ordering::Equal => {
// If the instruction starts at a relocation, it's inline data
let reloc_size = self.reloc_size(reloc.flags).with_context(|| {
format!("Unsupported inline x86 relocation {:?}", reloc.flags)
})?;
if decoder.set_position(decoder.position() + reloc_size).is_ok() {
new_address = address + reloc_size as u64;
decoder.set_ip(new_address);
continue 'outer;
}
}
Ordering::Greater => break,
}
}
decoder.decode_out(&mut instruction);
if instruction.mnemonic() != iced_x86::Mnemonic::Nop {
new_address = instruction.next_ip();
}
}
Ok(new_address.saturating_sub(symbol.address))
}
}
struct InstructionFormatterOutput<'a> {

View File

@ -122,7 +122,7 @@ fn map_symbols(
}
// Infer symbol sizes for 0-size symbols
infer_symbol_sizes(&mut symbols, sections);
infer_symbol_sizes(arch, &mut symbols, sections)?;
Ok((symbols, symbol_indices))
}
@ -136,7 +136,7 @@ fn is_local_label(symbol: &Symbol) -> bool {
&& LABEL_PREFIXES.iter().any(|p| symbol.name.starts_with(p))
}
fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
fn infer_symbol_sizes(arch: &dyn Arch, symbols: &mut [Symbol], sections: &[Section]) -> Result<()> {
// Create a sorted list of symbol indices by section
let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len());
for (i, symbol) in symbols.iter().enumerate() {
@ -206,18 +206,13 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
iter_idx += 1;
};
let section = &sections[section_idx];
let mut next_address =
let next_address =
next_symbol.map(|s| s.address).unwrap_or_else(|| section.address + section.size);
if section.kind == SectionKind::Code {
// For functions, trim any trailing 4-byte zeroes from the end (padding, nops)
while next_address > symbol.address + 4
&& let Some(data) = section.data_range(next_address - 4, 4)
&& data == [0u8; 4]
{
next_address -= 4;
}
}
let new_size = next_address.saturating_sub(symbol.address);
let new_size = if section.kind == SectionKind::Code {
arch.infer_function_size(symbol, section, next_address)?
} else {
next_address.saturating_sub(symbol.address)
};
if new_size > 0 {
let symbol = &mut symbols[symbol_idx];
symbol.size = new_size;
@ -234,6 +229,7 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
}
}
}
Ok(())
}
fn map_sections(

View File

@ -507,116 +507,4 @@ expression: diff.instruction_rows
),
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 88,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 89,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 90,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 91,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 92,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 93,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 94,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 95,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
]

View File

@ -29,11 +29,3 @@ expression: output
[(Address(76), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(41), Normal, 0), (Basic(" ~>"), Rotating(6), 0), (Eol, Normal, 0)]
[(Address(80), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(47), Normal, 0), (Basic(" ~>"), Rotating(7), 0), (Eol, Normal, 0)]
[(Address(84), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(53), Normal, 0), (Basic(" ~>"), Rotating(8), 0), (Eol, Normal, 0)]
[(Address(88), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(89), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(90), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(91), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(92), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(93), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(94), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(95), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]

View File

@ -63,7 +63,7 @@ Object {
"int __cdecl test(int)",
),
address: 0,
size: 96,
size: 88,
kind: Function,
section: Some(
1,

View File

@ -1,12 +1,12 @@
{
"name": "objdiff-wasm",
"version": "3.0.0-beta.12",
"version": "3.0.0-beta.13",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "objdiff-wasm",
"version": "3.0.0-beta.12",
"version": "3.0.0-beta.13",
"license": "MIT OR Apache-2.0",
"devDependencies": {
"@biomejs/biome": "^1.9.3",

View File

@ -1,6 +1,6 @@
{
"name": "objdiff-wasm",
"version": "3.0.0-beta.12",
"version": "3.0.0-beta.13",
"description": "A local diffing tool for decompilation projects.",
"author": {
"name": "Luke Street",