Make function size inference logic arch-specific

For MIPS, account for delay slot nops. For x86,
check for trailing nops (0x90). For PPC, check
for 4-byte 0x00 padding.

Resolves #229
This commit is contained in:
Luke Street 2025-08-02 10:56:26 -06:00
parent 1205e8ceb4
commit 1e62d4664c
12 changed files with 131 additions and 146 deletions

8
Cargo.lock generated
View File

@ -3437,7 +3437,7 @@ dependencies = [
[[package]] [[package]]
name = "objdiff-cli" name = "objdiff-cli"
version = "3.0.0-beta.12" version = "3.0.0-beta.13"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"argp", "argp",
@ -3460,7 +3460,7 @@ dependencies = [
[[package]] [[package]]
name = "objdiff-core" name = "objdiff-core"
version = "3.0.0-beta.12" version = "3.0.0-beta.13"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arm-attr", "arm-attr",
@ -3514,7 +3514,7 @@ dependencies = [
[[package]] [[package]]
name = "objdiff-gui" name = "objdiff-gui"
version = "3.0.0-beta.12" version = "3.0.0-beta.13"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cfg-if", "cfg-if",
@ -3550,7 +3550,7 @@ dependencies = [
[[package]] [[package]]
name = "objdiff-wasm" name = "objdiff-wasm"
version = "3.0.0-beta.12" version = "3.0.0-beta.13"
dependencies = [ dependencies = [
"log", "log",
"objdiff-core", "objdiff-core",

View File

@ -14,7 +14,7 @@ strip = "debuginfo"
codegen-units = 1 codegen-units = 1
[workspace.package] [workspace.package]
version = "3.0.0-beta.12" version = "3.0.0-beta.13"
authors = ["Luke Street <luke@street.dev>"] authors = ["Luke Street <luke@street.dev>"]
edition = "2024" edition = "2024"
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"

View File

@ -15,7 +15,7 @@ use crate::{
diff::{DiffObjConfig, MipsAbi, MipsInstrCategory, display::InstructionPart}, diff::{DiffObjConfig, MipsAbi, MipsInstrCategory, display::InstructionPart},
obj::{ obj::{
InstructionArg, InstructionArgValue, InstructionRef, Relocation, RelocationFlags, InstructionArg, InstructionArgValue, InstructionRef, Relocation, RelocationFlags,
ResolvedInstructionRef, ResolvedRelocation, SymbolFlag, SymbolFlagSet, ResolvedInstructionRef, ResolvedRelocation, Section, Symbol, SymbolFlag, SymbolFlagSet,
}, },
}; };
@ -140,6 +140,14 @@ impl ArchMips {
}) })
} }
fn default_instruction_flags(&self) -> rabbitizer::InstructionFlags {
match self.isa_extension {
Some(extension) => rabbitizer::InstructionFlags::new_extension(extension),
None => rabbitizer::InstructionFlags::new(IsaVersion::MIPS_III),
}
.with_abi(self.abi)
}
fn instruction_flags(&self, diff_config: &DiffObjConfig) -> rabbitizer::InstructionFlags { fn instruction_flags(&self, diff_config: &DiffObjConfig) -> rabbitizer::InstructionFlags {
let isa_extension = match diff_config.mips_instr_category { let isa_extension = match diff_config.mips_instr_category {
MipsInstrCategory::Auto => self.isa_extension, MipsInstrCategory::Auto => self.isa_extension,
@ -151,7 +159,7 @@ impl ArchMips {
}; };
match isa_extension { match isa_extension {
Some(extension) => rabbitizer::InstructionFlags::new_extension(extension), Some(extension) => rabbitizer::InstructionFlags::new_extension(extension),
None => rabbitizer::InstructionFlags::new_isa(IsaVersion::MIPS_III, None), None => rabbitizer::InstructionFlags::new(IsaVersion::MIPS_III),
} }
.with_abi(match diff_config.mips_abi { .with_abi(match diff_config.mips_abi {
MipsAbi::Auto => self.abi, MipsAbi::Auto => self.abi,
@ -331,6 +339,36 @@ impl Arch for ArchMips {
} }
flags flags
} }
fn infer_function_size(
&self,
symbol: &Symbol,
section: &Section,
next_address: u64,
) -> Result<u64> {
// Trim any trailing 4-byte zeroes from the end (nops)
let mut new_address = next_address;
while new_address >= symbol.address + 4
&& let Some(data) = section.data_range(new_address - 4, 4)
&& data == [0u8; 4]
{
new_address -= 4;
}
// Check if the last instruction has a delay slot, if so, include the delay slot nop
if new_address + 4 <= next_address
&& new_address >= symbol.address + 4
&& let Some(data) = section.data_range(new_address - 4, 4)
&& let instruction = rabbitizer::Instruction::new(
self.endianness.read_u32_bytes(data.try_into().unwrap()),
Vram::new((new_address - 4) as u32),
self.default_instruction_flags(),
)
&& instruction.opcode().has_delay_slot()
{
new_address += 4;
}
Ok(new_address.saturating_sub(symbol.address))
}
} }
fn push_args( fn push_args(

View File

@ -406,6 +406,15 @@ pub trait Arch: Send + Sync + Debug {
) -> Vec<ContextItem> { ) -> Vec<ContextItem> {
Vec::new() Vec::new()
} }
fn infer_function_size(
&self,
symbol: &Symbol,
_section: &Section,
next_address: u64,
) -> Result<u64> {
Ok(next_address.saturating_sub(symbol.address))
}
} }
pub fn new_arch(object: &object::File) -> Result<Box<dyn Arch>> { pub fn new_arch(object: &object::File) -> Result<Box<dyn Arch>> {

View File

@ -20,7 +20,7 @@ use crate::{
}, },
obj::{ obj::{
FlowAnalysisResult, InstructionRef, Object, Relocation, RelocationFlags, FlowAnalysisResult, InstructionRef, Object, Relocation, RelocationFlags,
ResolvedInstructionRef, ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet, ResolvedInstructionRef, ResolvedRelocation, Section, Symbol, SymbolFlag, SymbolFlagSet,
}, },
}; };
@ -455,6 +455,22 @@ impl Arch for ArchPpc {
} }
out out
} }
fn infer_function_size(
&self,
symbol: &Symbol,
section: &Section,
mut next_address: u64,
) -> Result<u64> {
// Trim any trailing 4-byte zeroes from the end (padding)
while next_address >= symbol.address + 4
&& let Some(data) = section.data_range(next_address - 4, 4)
&& data == [0u8; 4]
{
next_address -= 4;
}
Ok(next_address.saturating_sub(symbol.address))
}
} }
impl ArchPpc { impl ArchPpc {

View File

@ -11,7 +11,7 @@ use object::{Endian as _, Object as _, ObjectSection as _, elf, pe};
use crate::{ use crate::{
arch::{Arch, RelocationOverride, RelocationOverrideTarget}, arch::{Arch, RelocationOverride, RelocationOverrideTarget},
diff::{DiffObjConfig, X86Formatter, display::InstructionPart}, diff::{DiffObjConfig, X86Formatter, display::InstructionPart},
obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef}, obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, Section, Symbol},
}; };
#[derive(Debug)] #[derive(Debug)]
@ -303,6 +303,52 @@ impl Arch for ArchX86 {
fn data_reloc_size(&self, flags: RelocationFlags) -> usize { fn data_reloc_size(&self, flags: RelocationFlags) -> usize {
self.reloc_size(flags).unwrap_or(1) self.reloc_size(flags).unwrap_or(1)
} }
fn infer_function_size(
&self,
symbol: &Symbol,
section: &Section,
next_address: u64,
) -> Result<u64> {
let Ok(size) = (next_address - symbol.address).try_into() else {
return Ok(next_address.saturating_sub(symbol.address));
};
let Some(code) = section.data_range(symbol.address, size) else {
return Ok(0);
};
// Decode instructions to find the last non-NOP instruction
let mut decoder = self.decoder(code, symbol.address);
let mut instruction = Instruction::default();
let mut new_address = 0;
let mut reloc_iter = section.relocations.iter().peekable();
'outer: while decoder.can_decode() {
let address = decoder.ip();
while let Some(reloc) = reloc_iter.peek() {
match reloc.address.cmp(&address) {
Ordering::Less => {
reloc_iter.next();
}
Ordering::Equal => {
// If the instruction starts at a relocation, it's inline data
let reloc_size = self.reloc_size(reloc.flags).with_context(|| {
format!("Unsupported inline x86 relocation {:?}", reloc.flags)
})?;
if decoder.set_position(decoder.position() + reloc_size).is_ok() {
new_address = address + reloc_size as u64;
decoder.set_ip(new_address);
continue 'outer;
}
}
Ordering::Greater => break,
}
}
decoder.decode_out(&mut instruction);
if instruction.mnemonic() != iced_x86::Mnemonic::Nop {
new_address = instruction.next_ip();
}
}
Ok(new_address.saturating_sub(symbol.address))
}
} }
struct InstructionFormatterOutput<'a> { struct InstructionFormatterOutput<'a> {

View File

@ -122,7 +122,7 @@ fn map_symbols(
} }
// Infer symbol sizes for 0-size symbols // Infer symbol sizes for 0-size symbols
infer_symbol_sizes(&mut symbols, sections); infer_symbol_sizes(arch, &mut symbols, sections)?;
Ok((symbols, symbol_indices)) Ok((symbols, symbol_indices))
} }
@ -136,7 +136,7 @@ fn is_local_label(symbol: &Symbol) -> bool {
&& LABEL_PREFIXES.iter().any(|p| symbol.name.starts_with(p)) && LABEL_PREFIXES.iter().any(|p| symbol.name.starts_with(p))
} }
fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) { fn infer_symbol_sizes(arch: &dyn Arch, symbols: &mut [Symbol], sections: &[Section]) -> Result<()> {
// Create a sorted list of symbol indices by section // Create a sorted list of symbol indices by section
let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len()); let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len());
for (i, symbol) in symbols.iter().enumerate() { for (i, symbol) in symbols.iter().enumerate() {
@ -206,18 +206,13 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
iter_idx += 1; iter_idx += 1;
}; };
let section = &sections[section_idx]; let section = &sections[section_idx];
let mut next_address = let next_address =
next_symbol.map(|s| s.address).unwrap_or_else(|| section.address + section.size); next_symbol.map(|s| s.address).unwrap_or_else(|| section.address + section.size);
if section.kind == SectionKind::Code { let new_size = if section.kind == SectionKind::Code {
// For functions, trim any trailing 4-byte zeroes from the end (padding, nops) arch.infer_function_size(symbol, section, next_address)?
while next_address > symbol.address + 4 } else {
&& let Some(data) = section.data_range(next_address - 4, 4) next_address.saturating_sub(symbol.address)
&& data == [0u8; 4] };
{
next_address -= 4;
}
}
let new_size = next_address.saturating_sub(symbol.address);
if new_size > 0 { if new_size > 0 {
let symbol = &mut symbols[symbol_idx]; let symbol = &mut symbols[symbol_idx];
symbol.size = new_size; symbol.size = new_size;
@ -234,6 +229,7 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
} }
} }
} }
Ok(())
} }
fn map_sections( fn map_sections(

View File

@ -507,116 +507,4 @@ expression: diff.instruction_rows
), ),
arg_diff: [], arg_diff: [],
}, },
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 88,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 89,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 90,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 91,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 92,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 93,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 94,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 95,
size: 1,
opcode: 465,
branch_dest: None,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
] ]

View File

@ -29,11 +29,3 @@ expression: output
[(Address(76), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(41), Normal, 0), (Basic(" ~>"), Rotating(6), 0), (Eol, Normal, 0)] [(Address(76), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(41), Normal, 0), (Basic(" ~>"), Rotating(6), 0), (Eol, Normal, 0)]
[(Address(80), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(47), Normal, 0), (Basic(" ~>"), Rotating(7), 0), (Eol, Normal, 0)] [(Address(80), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(47), Normal, 0), (Basic(" ~>"), Rotating(7), 0), (Eol, Normal, 0)]
[(Address(84), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(53), Normal, 0), (Basic(" ~>"), Rotating(8), 0), (Eol, Normal, 0)] [(Address(84), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".dword", 65534), Normal, 10), (BranchDest(53), Normal, 0), (Basic(" ~>"), Rotating(8), 0), (Eol, Normal, 0)]
[(Address(88), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(89), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(90), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(91), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(92), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(93), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(94), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]
[(Address(95), Normal, 5), (Spacing(4), Normal, 0), (Opcode("nop", 465), Normal, 10), (Eol, Normal, 0)]

View File

@ -63,7 +63,7 @@ Object {
"int __cdecl test(int)", "int __cdecl test(int)",
), ),
address: 0, address: 0,
size: 96, size: 88,
kind: Function, kind: Function,
section: Some( section: Some(
1, 1,

View File

@ -1,12 +1,12 @@
{ {
"name": "objdiff-wasm", "name": "objdiff-wasm",
"version": "3.0.0-beta.12", "version": "3.0.0-beta.13",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "objdiff-wasm", "name": "objdiff-wasm",
"version": "3.0.0-beta.12", "version": "3.0.0-beta.13",
"license": "MIT OR Apache-2.0", "license": "MIT OR Apache-2.0",
"devDependencies": { "devDependencies": {
"@biomejs/biome": "^1.9.3", "@biomejs/biome": "^1.9.3",

View File

@ -1,6 +1,6 @@
{ {
"name": "objdiff-wasm", "name": "objdiff-wasm",
"version": "3.0.0-beta.12", "version": "3.0.0-beta.13",
"description": "A local diffing tool for decompilation projects.", "description": "A local diffing tool for decompilation projects.",
"author": { "author": {
"name": "Luke Street", "name": "Luke Street",