From 8e8ab6bef849471fa916e73b33753781e91dee98 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Tue, 13 May 2025 22:36:02 -0600 Subject: [PATCH] Skip label symbols when inferring symbol sizes COFF objects in particular don't contain the size of symbols. We infer the size of these symbols by extending them to the next symbol. If a tool emits symbols for branch targets, this causes the inferred size to be too small. This checks if a symbol starts with a certain prefix (right now, just .L or LAB_), and skips over it during symbol size inference. Resolves #174 --- objdiff-core/src/arch/arm.rs | 2 +- objdiff-core/src/obj/mod.rs | 2 +- objdiff-core/src/obj/read.rs | 50 +++--- objdiff-core/tests/arch_x86.rs | 9 + objdiff-core/tests/data/x86/local_labels.obj | Bin 0 -> 526 bytes .../arch_x86__read_x86_local_labels.snap | 163 ++++++++++++++++++ 6 files changed, 204 insertions(+), 22 deletions(-) create mode 100644 objdiff-core/tests/data/x86/local_labels.obj create mode 100644 objdiff-core/tests/snapshots/arch_x86__read_x86_local_labels.snap diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs index 6c1412f..45116cd 100644 --- a/objdiff-core/src/arch/arm.rs +++ b/objdiff-core/src/arch/arm.rs @@ -225,7 +225,7 @@ impl Arch for ArchArm { let mut address = start_addr; while address < end_addr { - while let Some(next) = next_mapping.take_if(|x| address >= x.address) { + while let Some(next) = next_mapping.filter(|x| address >= x.address) { // Change mapping mode = next.mapping; next_mapping = mappings_iter.next(); diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index f111f6e..d8a13f9 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -118,7 +118,7 @@ impl Section { Err(i) => self .relocations .get(i) - .take_if(|r| r.address < ins_ref.address + ins_ref.size as u64), + .filter(|r| r.address < ins_ref.address + ins_ref.size as u64), } .and_then(|relocation| { let symbol = obj.symbols.get(relocation.target_symbol)?; diff --git a/objdiff-core/src/obj/read.rs b/objdiff-core/src/obj/read.rs index a3f1fb6..b5cef3b 100644 --- a/objdiff-core/src/obj/read.rs +++ b/objdiff-core/src/obj/read.rs @@ -121,6 +121,15 @@ fn map_symbols( Ok((symbols, symbol_indices)) } +/// When inferring a symbol's size, we ignore symbols that start with specific prefixes. They are +/// usually emitted as branch targets and do not represent the start of a function or object. +fn is_local_label(symbol: &Symbol) -> bool { + const LABEL_PREFIXES: &[&str] = &[".L", "LAB_"]; + symbol.size == 0 + && symbol.flags.contains(SymbolFlag::Local) + && LABEL_PREFIXES.iter().any(|p| symbol.name.starts_with(p)) +} + fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) { // Create a sorted list of symbol indices by section let mut symbols_with_section = Vec::::with_capacity(symbols.len()); @@ -167,27 +176,28 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) { if last_end.0 == section_idx && last_end.1 > symbol.address { continue; } - let next_symbol = match symbol.kind { - // For function/object symbols, find the next function/object symbol (in other words: - // skip over labels) - SymbolKind::Function | SymbolKind::Object => loop { - if iter_idx >= symbols_with_section.len() { - break None; + let next_symbol = loop { + if iter_idx >= symbols_with_section.len() { + break None; + } + let next_symbol = &symbols[symbols_with_section[iter_idx]]; + if next_symbol.section != Some(section_idx) { + break None; + } + if match symbol.kind { + SymbolKind::Function | SymbolKind::Object => { + // For function/object symbols, find the next function/object + matches!(next_symbol.kind, SymbolKind::Function | SymbolKind::Object) } - let next_symbol = &symbols[symbols_with_section[iter_idx]]; - if next_symbol.section != Some(section_idx) { - break None; + SymbolKind::Unknown | SymbolKind::Section => { + // For labels (or anything else), stop at any symbol + true } - if let SymbolKind::Function | SymbolKind::Object = next_symbol.kind { - break Some(next_symbol); - } - iter_idx += 1; - }, - // For labels (or anything else), simply use the next symbol's address - SymbolKind::Unknown | SymbolKind::Section => symbols_with_section - .get(iter_idx) - .map(|&i| &symbols[i]) - .take_if(|s| s.section == Some(section_idx)), + } && !is_local_label(next_symbol) + { + break Some(next_symbol); + } + iter_idx += 1; }; let next_address = next_symbol.map(|s| s.address).unwrap_or_else(|| { let section = §ions[section_idx]; @@ -341,7 +351,7 @@ fn map_section_relocations( let idx = if let Some(section_symbol) = obj_file .symbol_by_index(idx) .ok() - .take_if(|s| s.kind() == object::SymbolKind::Section) + .filter(|s| s.kind() == object::SymbolKind::Section) { let section_index = section_symbol.section_index().context("Section symbol without section")?; diff --git a/objdiff-core/tests/arch_x86.rs b/objdiff-core/tests/arch_x86.rs index 652e80c..f13c032 100644 --- a/objdiff-core/tests/arch_x86.rs +++ b/objdiff-core/tests/arch_x86.rs @@ -68,3 +68,12 @@ fn read_x86_jumptable() { let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); insta::assert_snapshot!(output); } + +// Inferred size of functions should ignore symbols with specific prefixes +#[test] +#[cfg(feature = "x86")] +fn read_x86_local_labels() { + let diff_config = diff::DiffObjConfig::default(); + let obj = obj::read::parse(include_object!("data/x86/local_labels.obj"), &diff_config).unwrap(); + insta::assert_debug_snapshot!(obj); +} diff --git a/objdiff-core/tests/data/x86/local_labels.obj b/objdiff-core/tests/data/x86/local_labels.obj new file mode 100644 index 0000000000000000000000000000000000000000..a4dcbc079923b602e3683fdcbdd4797258995048 GIT binary patch literal 526 zcmeZaWMlw=89>YjW-&16m84dbK-nNF28eBd7^DUU92giB!os^tRCrpymH2nDwHzq% z+z&FUi_NB?h_(3`(`yq;+Y~MzhC0D4CgYP`Cjwu*31nbsKEm-@*!WU6Zv=-AL)Qi4 z18LnXfz1z?S`U;kH|Qz2HP|tf?um~(te2LVlM1!$-+u;%bY>HyBnxAM_~iV&vecrI zko?esg4Ckq#NyQ8jLftWFVA4T{G=?H^WZeHUqH@d1`^D0SpXPhC(004*GU8ev5 literal 0 HcmV?d00001 diff --git a/objdiff-core/tests/snapshots/arch_x86__read_x86_local_labels.snap b/objdiff-core/tests/snapshots/arch_x86__read_x86_local_labels.snap new file mode 100644 index 0000000..95929d7 --- /dev/null +++ b/objdiff-core/tests/snapshots/arch_x86__read_x86_local_labels.snap @@ -0,0 +1,163 @@ +--- +source: objdiff-core/tests/arch_x86.rs +expression: obj +--- +Object { + arch: ArchX86 { + arch: X86, + endianness: Little, + }, + endianness: Little, + symbols: [ + Symbol { + name: "42b830_convertToUppercaseShiftJIS.obj", + demangled_name: None, + address: 0, + size: 0, + kind: Unknown, + section: None, + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "[.text]", + demangled_name: None, + address: 0, + size: 0, + kind: Section, + section: Some( + 0, + ), + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "LAB_0042b850", + demangled_name: None, + address: 32, + size: 0, + kind: Object, + section: Some( + 0, + ), + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "LAB_0042b883", + demangled_name: None, + address: 83, + size: 0, + kind: Object, + section: Some( + 0, + ), + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "LAB_0042b87c", + demangled_name: None, + address: 76, + size: 0, + kind: Object, + section: Some( + 0, + ), + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "LAB_0042b884", + demangled_name: None, + address: 84, + size: 0, + kind: Object, + section: Some( + 0, + ), + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "LAB_0042b889", + demangled_name: None, + address: 89, + size: 0, + kind: Object, + section: Some( + 0, + ), + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "LAB_0042b845", + demangled_name: None, + address: 21, + size: 0, + kind: Object, + section: Some( + 0, + ), + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "LAB_0042b869", + demangled_name: None, + address: 57, + size: 0, + kind: Object, + section: Some( + 0, + ), + flags: FlagSet(Local), + align: None, + virtual_address: None, + }, + Symbol { + name: "ConvertToUppercaseShiftJIS", + demangled_name: None, + address: 0, + size: 92, + kind: Function, + section: Some( + 0, + ), + flags: FlagSet(Global | SizeInferred), + align: None, + virtual_address: None, + }, + ], + sections: [ + Section { + id: ".text-0", + name: ".text", + address: 0, + size: 92, + kind: Code, + data: SectionData( + 92, + ), + flags: FlagSet(), + align: Some( + 16, + ), + relocations: [], + line_info: {}, + virtual_address: None, + }, + ], + split_meta: None, + path: None, + timestamp: None, +}