Improve automatic symbol pairing for nameless literals (#247)

* Improve automatic symbol pairing for nameless literals

* Fix data reloc diffing when the reloc points to an in-function static symbol

* Only pair up literals that match perfectly

* Clippy

* Do two separate passes when pairing up literals

* Fix partially-matching literal pairups not working right

* Remove duplicate $ splitting code

* Implement $ splitting for section names too

* Minor cleanup
This commit is contained in:
LagoLunatic 2025-08-31 01:07:43 -04:00 committed by GitHub
parent 8d24ec6373
commit f2a591356e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 143 additions and 71 deletions

View File

@ -35,6 +35,20 @@ pub fn diff_bss_symbol(
)) ))
} }
pub fn symbol_name_matches(left_name: &str, right_name: &str) -> bool {
// Match Metrowerks symbol$1234 against symbol$2345
if let Some((prefix, suffix)) = left_name.split_once('$') {
if !suffix.chars().all(char::is_numeric) {
return false;
}
right_name
.split_once('$')
.is_some_and(|(p, s)| p == prefix && s.chars().all(char::is_numeric))
} else {
left_name == right_name
}
}
fn reloc_eq( fn reloc_eq(
left_obj: &Object, left_obj: &Object,
right_obj: &Object, right_obj: &Object,
@ -45,8 +59,8 @@ fn reloc_eq(
return false; return false;
} }
let symbol_name_addend_matches = let symbol_name_addend_matches = symbol_name_matches(&left.symbol.name, &right.symbol.name)
left.symbol.name == right.symbol.name && left.relocation.addend == right.relocation.addend; && left.relocation.addend == right.relocation.addend;
match (left.symbol.section, right.symbol.section) { match (left.symbol.section, right.symbol.section) {
(Some(sl), Some(sr)) => { (Some(sl), Some(sr)) => {
// Match if section and name+addend or address match // Match if section and name+addend or address match

View File

@ -13,7 +13,7 @@ use crate::{
code::{diff_code, no_diff_code}, code::{diff_code, no_diff_code},
data::{ data::{
diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol, diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol,
diff_generic_section, no_diff_bss_section, no_diff_data_section, diff_generic_section, no_diff_bss_section, no_diff_data_section, symbol_name_matches,
}, },
}, },
obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag}, obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag},
@ -575,6 +575,9 @@ fn matching_symbols(
&mut matches, &mut matches,
)?; )?;
} }
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
// those are correct first, while the second pass catches near matches.
for fuzzy_literals in [false, true] {
for (symbol_idx, symbol) in left.symbols.iter().enumerate() { for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) { if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue; continue;
@ -588,17 +591,22 @@ fn matching_symbols(
} }
let symbol_match = SymbolMatch { let symbol_match = SymbolMatch {
left: Some(symbol_idx), left: Some(symbol_idx),
right: find_symbol(right, left, symbol, Some(&right_used)), right: find_symbol(right, left, symbol_idx, Some(&right_used), fuzzy_literals),
prev: find_symbol(prev, left, symbol, None), prev: find_symbol(prev, left, symbol_idx, None, fuzzy_literals),
section_kind, section_kind,
}; };
matches.push(symbol_match); matches.push(symbol_match);
if let Some(right) = symbol_match.right { if let Some(right) = symbol_match.right {
left_used.insert(symbol_idx);
right_used.insert(right); right_used.insert(right);
} }
} }
} }
}
if let Some(right) = right { if let Some(right) = right {
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
// those are correct first, while the second pass catches near matches.
for fuzzy_literals in [false, true] {
for (symbol_idx, symbol) in right.symbols.iter().enumerate() { for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) { if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue; continue;
@ -610,12 +618,17 @@ fn matching_symbols(
if right_used.contains(&symbol_idx) { if right_used.contains(&symbol_idx) {
continue; continue;
} }
matches.push(SymbolMatch { let symbol_match = SymbolMatch {
left: None, left: None,
right: Some(symbol_idx), right: Some(symbol_idx),
prev: find_symbol(prev, right, symbol, None), prev: find_symbol(prev, right, symbol_idx, None, fuzzy_literals),
section_kind, section_kind,
}); };
matches.push(symbol_match);
if symbol_match.prev.is_some() {
right_used.insert(symbol_idx);
}
}
} }
} }
Ok(matches) Ok(matches)
@ -637,7 +650,10 @@ where
fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> { fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> {
if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) { if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) {
Some((section.name.as_str(), section.kind)) // Match x86 .rdata$r against .rdata$rs
let section_name =
section.name.split_once('$').map_or(section.name.as_str(), |(prefix, _)| prefix);
Some((section_name, section.kind))
} else if symbol.flags.contains(SymbolFlag::Common) { } else if symbol.flags.contains(SymbolFlag::Common) {
Some((".comm", SectionKind::Common)) Some((".comm", SectionKind::Common))
} else { } else {
@ -653,52 +669,94 @@ fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind {
} }
} }
/// Check if a symbol is a compiler-generated literal like @1234.
fn is_symbol_compiler_generated_literal(symbol: &Symbol) -> bool {
if !symbol.name.starts_with('@') {
return false;
}
if !symbol.name[1..].chars().all(char::is_numeric) {
// Exclude @stringBase0, @GUARD@, etc.
return false;
}
true
}
fn find_symbol( fn find_symbol(
obj: Option<&Object>, obj: Option<&Object>,
in_obj: &Object, in_obj: &Object,
in_symbol: &Symbol, in_symbol_idx: usize,
used: Option<&BTreeSet<usize>>, used: Option<&BTreeSet<usize>>,
fuzzy_literals: bool,
) -> Option<usize> { ) -> Option<usize> {
let in_symbol = &in_obj.symbols[in_symbol_idx];
let obj = obj?; let obj = obj?;
let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?; let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?;
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
// If they are in the same section and have the same value
if is_symbol_compiler_generated_literal(in_symbol)
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
{
let mut closest_match_symbol_idx = None;
let mut closest_match_percent = 0.0;
for (symbol_idx, symbol) in unmatched_symbols(obj, used) {
let Some(section_index) = symbol.section else {
continue;
};
if obj.sections[section_index].name != section_name {
continue;
}
if !is_symbol_compiler_generated_literal(symbol) {
continue;
}
match section_kind {
SectionKind::Data => {
// For data, pick the first symbol with exactly matching bytes and relocations.
// If no symbols match exactly, and `fuzzy_literals` is true, pick the closest
// plausible match instead.
if let Ok((left_diff, _right_diff)) =
diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx)
&& let Some(match_percent) = left_diff.match_percent
&& (match_percent == 100.0
|| (fuzzy_literals
&& match_percent >= 50.0
&& match_percent > closest_match_percent))
{
closest_match_symbol_idx = Some(symbol_idx);
closest_match_percent = match_percent;
if match_percent == 100.0 {
break;
}
}
}
SectionKind::Bss => {
// For BSS, pick the first symbol that has the exact matching size.
if in_symbol.size == symbol.size {
closest_match_symbol_idx = Some(symbol_idx);
break;
}
}
_ => unreachable!(),
}
}
return closest_match_symbol_idx;
}
// Try to find an exact name match // Try to find an exact name match
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| { if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind
}) { }) {
return Some(symbol_idx); return Some(symbol_idx);
} }
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
// If they are at the same address in the same section
if in_symbol.name.starts_with('@')
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
&& let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
let Some(section_index) = symbol.section else {
return false;
};
symbol.name.starts_with('@')
&& symbol.address == in_symbol.address
&& obj.sections[section_index].name == section_name
})
{
return Some(symbol_idx);
}
// Match Metrowerks symbol$1234 against symbol$2345
if let Some((prefix, suffix)) = in_symbol.name.split_once('$') {
if !suffix.chars().all(char::is_numeric) {
return None;
}
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| { if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
if let Some((p, s)) = symbol.name.split_once('$') { symbol_name_matches(&in_symbol.name, &symbol.name)
prefix == p
&& s.chars().all(char::is_numeric)
&& symbol_section_kind(obj, symbol) == section_kind && symbol_section_kind(obj, symbol) == section_kind
} else { && symbol_section(obj, symbol).is_some_and(|(name, _)| name == section_name)
false
}
}) { }) {
return Some(symbol_idx); return Some(symbol_idx);
} }
}
None None
} }