Skip label symbols when inferring symbol sizes

COFF objects in particular don't contain the size of
symbols. We infer the size of these symbols by
extending them to the next symbol. If a tool emits
symbols for branch targets, this causes the inferred
size to be too small.

This checks if a symbol starts with a certain prefix
(right now, just .L or LAB_), and skips over it
during symbol size inference.

Resolves #174
This commit is contained in:
Luke Street 2025-05-13 22:36:02 -06:00
parent e865f3d598
commit 8e8ab6bef8
6 changed files with 204 additions and 22 deletions

View File

@ -225,7 +225,7 @@ impl Arch for ArchArm {
let mut address = start_addr; let mut address = start_addr;
while address < end_addr { while address < end_addr {
while let Some(next) = next_mapping.take_if(|x| address >= x.address) { while let Some(next) = next_mapping.filter(|x| address >= x.address) {
// Change mapping // Change mapping
mode = next.mapping; mode = next.mapping;
next_mapping = mappings_iter.next(); next_mapping = mappings_iter.next();

View File

@ -118,7 +118,7 @@ impl Section {
Err(i) => self Err(i) => self
.relocations .relocations
.get(i) .get(i)
.take_if(|r| r.address < ins_ref.address + ins_ref.size as u64), .filter(|r| r.address < ins_ref.address + ins_ref.size as u64),
} }
.and_then(|relocation| { .and_then(|relocation| {
let symbol = obj.symbols.get(relocation.target_symbol)?; let symbol = obj.symbols.get(relocation.target_symbol)?;

View File

@ -121,6 +121,15 @@ fn map_symbols(
Ok((symbols, symbol_indices)) Ok((symbols, symbol_indices))
} }
/// When inferring a symbol's size, we ignore symbols that start with specific prefixes. They are
/// usually emitted as branch targets and do not represent the start of a function or object.
fn is_local_label(symbol: &Symbol) -> bool {
const LABEL_PREFIXES: &[&str] = &[".L", "LAB_"];
symbol.size == 0
&& symbol.flags.contains(SymbolFlag::Local)
&& LABEL_PREFIXES.iter().any(|p| symbol.name.starts_with(p))
}
fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) { fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
// Create a sorted list of symbol indices by section // Create a sorted list of symbol indices by section
let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len()); let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len());
@ -167,27 +176,28 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
if last_end.0 == section_idx && last_end.1 > symbol.address { if last_end.0 == section_idx && last_end.1 > symbol.address {
continue; continue;
} }
let next_symbol = match symbol.kind { let next_symbol = loop {
// For function/object symbols, find the next function/object symbol (in other words: if iter_idx >= symbols_with_section.len() {
// skip over labels) break None;
SymbolKind::Function | SymbolKind::Object => loop { }
if iter_idx >= symbols_with_section.len() { let next_symbol = &symbols[symbols_with_section[iter_idx]];
break None; if next_symbol.section != Some(section_idx) {
break None;
}
if match symbol.kind {
SymbolKind::Function | SymbolKind::Object => {
// For function/object symbols, find the next function/object
matches!(next_symbol.kind, SymbolKind::Function | SymbolKind::Object)
} }
let next_symbol = &symbols[symbols_with_section[iter_idx]]; SymbolKind::Unknown | SymbolKind::Section => {
if next_symbol.section != Some(section_idx) { // For labels (or anything else), stop at any symbol
break None; true
} }
if let SymbolKind::Function | SymbolKind::Object = next_symbol.kind { } && !is_local_label(next_symbol)
break Some(next_symbol); {
} break Some(next_symbol);
iter_idx += 1; }
}, iter_idx += 1;
// For labels (or anything else), simply use the next symbol's address
SymbolKind::Unknown | SymbolKind::Section => symbols_with_section
.get(iter_idx)
.map(|&i| &symbols[i])
.take_if(|s| s.section == Some(section_idx)),
}; };
let next_address = next_symbol.map(|s| s.address).unwrap_or_else(|| { let next_address = next_symbol.map(|s| s.address).unwrap_or_else(|| {
let section = &sections[section_idx]; let section = &sections[section_idx];
@ -341,7 +351,7 @@ fn map_section_relocations(
let idx = if let Some(section_symbol) = obj_file let idx = if let Some(section_symbol) = obj_file
.symbol_by_index(idx) .symbol_by_index(idx)
.ok() .ok()
.take_if(|s| s.kind() == object::SymbolKind::Section) .filter(|s| s.kind() == object::SymbolKind::Section)
{ {
let section_index = let section_index =
section_symbol.section_index().context("Section symbol without section")?; section_symbol.section_index().context("Section symbol without section")?;

View File

@ -68,3 +68,12 @@ fn read_x86_jumptable() {
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output); insta::assert_snapshot!(output);
} }
// Inferred size of functions should ignore symbols with specific prefixes
#[test]
#[cfg(feature = "x86")]
fn read_x86_local_labels() {
let diff_config = diff::DiffObjConfig::default();
let obj = obj::read::parse(include_object!("data/x86/local_labels.obj"), &diff_config).unwrap();
insta::assert_debug_snapshot!(obj);
}

Binary file not shown.

View File

@ -0,0 +1,163 @@
---
source: objdiff-core/tests/arch_x86.rs
expression: obj
---
Object {
arch: ArchX86 {
arch: X86,
endianness: Little,
},
endianness: Little,
symbols: [
Symbol {
name: "42b830_convertToUppercaseShiftJIS.obj",
demangled_name: None,
address: 0,
size: 0,
kind: Unknown,
section: None,
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "[.text]",
demangled_name: None,
address: 0,
size: 0,
kind: Section,
section: Some(
0,
),
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "LAB_0042b850",
demangled_name: None,
address: 32,
size: 0,
kind: Object,
section: Some(
0,
),
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "LAB_0042b883",
demangled_name: None,
address: 83,
size: 0,
kind: Object,
section: Some(
0,
),
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "LAB_0042b87c",
demangled_name: None,
address: 76,
size: 0,
kind: Object,
section: Some(
0,
),
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "LAB_0042b884",
demangled_name: None,
address: 84,
size: 0,
kind: Object,
section: Some(
0,
),
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "LAB_0042b889",
demangled_name: None,
address: 89,
size: 0,
kind: Object,
section: Some(
0,
),
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "LAB_0042b845",
demangled_name: None,
address: 21,
size: 0,
kind: Object,
section: Some(
0,
),
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "LAB_0042b869",
demangled_name: None,
address: 57,
size: 0,
kind: Object,
section: Some(
0,
),
flags: FlagSet(Local),
align: None,
virtual_address: None,
},
Symbol {
name: "ConvertToUppercaseShiftJIS",
demangled_name: None,
address: 0,
size: 92,
kind: Function,
section: Some(
0,
),
flags: FlagSet(Global | SizeInferred),
align: None,
virtual_address: None,
},
],
sections: [
Section {
id: ".text-0",
name: ".text",
address: 0,
size: 92,
kind: Code,
data: SectionData(
92,
),
flags: FlagSet(),
align: Some(
16,
),
relocations: [],
line_info: {},
virtual_address: None,
},
],
split_meta: None,
path: None,
timestamp: None,
}