Improve automatic symbol pairing for nameless literals (#247)

* Improve automatic symbol pairing for nameless literals

* Fix data reloc diffing when the reloc points to an in-function static symbol

* Only pair up literals that match perfectly

* Clippy

* Do two separate passes when pairing up literals

* Fix partially-matching literal pairups not working right

* Remove duplicate $ splitting code

* Implement $ splitting for section names too

* Minor cleanup
This commit is contained in:
LagoLunatic 2025-08-31 01:07:43 -04:00 committed by GitHub
parent 8d24ec6373
commit f2a591356e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 143 additions and 71 deletions

View File

@ -35,6 +35,20 @@ pub fn diff_bss_symbol(
))
}
pub fn symbol_name_matches(left_name: &str, right_name: &str) -> bool {
// Match Metrowerks symbol$1234 against symbol$2345
if let Some((prefix, suffix)) = left_name.split_once('$') {
if !suffix.chars().all(char::is_numeric) {
return false;
}
right_name
.split_once('$')
.is_some_and(|(p, s)| p == prefix && s.chars().all(char::is_numeric))
} else {
left_name == right_name
}
}
fn reloc_eq(
left_obj: &Object,
right_obj: &Object,
@ -45,8 +59,8 @@ fn reloc_eq(
return false;
}
let symbol_name_addend_matches =
left.symbol.name == right.symbol.name && left.relocation.addend == right.relocation.addend;
let symbol_name_addend_matches = symbol_name_matches(&left.symbol.name, &right.symbol.name)
&& left.relocation.addend == right.relocation.addend;
match (left.symbol.section, right.symbol.section) {
(Some(sl), Some(sr)) => {
// Match if section and name+addend or address match

View File

@ -13,7 +13,7 @@ use crate::{
code::{diff_code, no_diff_code},
data::{
diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol,
diff_generic_section, no_diff_bss_section, no_diff_data_section,
diff_generic_section, no_diff_bss_section, no_diff_data_section, symbol_name_matches,
},
},
obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag},
@ -575,47 +575,60 @@ fn matching_symbols(
&mut matches,
)?;
}
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue;
}
let section_kind = symbol_section_kind(left, symbol);
if section_kind == SectionKind::Unknown {
continue;
}
if left_used.contains(&symbol_idx) {
continue;
}
let symbol_match = SymbolMatch {
left: Some(symbol_idx),
right: find_symbol(right, left, symbol, Some(&right_used)),
prev: find_symbol(prev, left, symbol, None),
section_kind,
};
matches.push(symbol_match);
if let Some(right) = symbol_match.right {
right_used.insert(right);
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
// those are correct first, while the second pass catches near matches.
for fuzzy_literals in [false, true] {
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue;
}
let section_kind = symbol_section_kind(left, symbol);
if section_kind == SectionKind::Unknown {
continue;
}
if left_used.contains(&symbol_idx) {
continue;
}
let symbol_match = SymbolMatch {
left: Some(symbol_idx),
right: find_symbol(right, left, symbol_idx, Some(&right_used), fuzzy_literals),
prev: find_symbol(prev, left, symbol_idx, None, fuzzy_literals),
section_kind,
};
matches.push(symbol_match);
if let Some(right) = symbol_match.right {
left_used.insert(symbol_idx);
right_used.insert(right);
}
}
}
}
if let Some(right) = right {
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue;
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
// those are correct first, while the second pass catches near matches.
for fuzzy_literals in [false, true] {
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue;
}
let section_kind = symbol_section_kind(right, symbol);
if section_kind == SectionKind::Unknown {
continue;
}
if right_used.contains(&symbol_idx) {
continue;
}
let symbol_match = SymbolMatch {
left: None,
right: Some(symbol_idx),
prev: find_symbol(prev, right, symbol_idx, None, fuzzy_literals),
section_kind,
};
matches.push(symbol_match);
if symbol_match.prev.is_some() {
right_used.insert(symbol_idx);
}
}
let section_kind = symbol_section_kind(right, symbol);
if section_kind == SectionKind::Unknown {
continue;
}
if right_used.contains(&symbol_idx) {
continue;
}
matches.push(SymbolMatch {
left: None,
right: Some(symbol_idx),
prev: find_symbol(prev, right, symbol, None),
section_kind,
});
}
}
Ok(matches)
@ -637,7 +650,10 @@ where
fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> {
if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) {
Some((section.name.as_str(), section.kind))
// Match x86 .rdata$r against .rdata$rs
let section_name =
section.name.split_once('$').map_or(section.name.as_str(), |(prefix, _)| prefix);
Some((section_name, section.kind))
} else if symbol.flags.contains(SymbolFlag::Common) {
Some((".comm", SectionKind::Common))
} else {
@ -653,52 +669,94 @@ fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind {
}
}
/// Check if a symbol is a compiler-generated literal like @1234.
fn is_symbol_compiler_generated_literal(symbol: &Symbol) -> bool {
if !symbol.name.starts_with('@') {
return false;
}
if !symbol.name[1..].chars().all(char::is_numeric) {
// Exclude @stringBase0, @GUARD@, etc.
return false;
}
true
}
fn find_symbol(
obj: Option<&Object>,
in_obj: &Object,
in_symbol: &Symbol,
in_symbol_idx: usize,
used: Option<&BTreeSet<usize>>,
fuzzy_literals: bool,
) -> Option<usize> {
let in_symbol = &in_obj.symbols[in_symbol_idx];
let obj = obj?;
let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?;
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
// If they are in the same section and have the same value
if is_symbol_compiler_generated_literal(in_symbol)
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
{
let mut closest_match_symbol_idx = None;
let mut closest_match_percent = 0.0;
for (symbol_idx, symbol) in unmatched_symbols(obj, used) {
let Some(section_index) = symbol.section else {
continue;
};
if obj.sections[section_index].name != section_name {
continue;
}
if !is_symbol_compiler_generated_literal(symbol) {
continue;
}
match section_kind {
SectionKind::Data => {
// For data, pick the first symbol with exactly matching bytes and relocations.
// If no symbols match exactly, and `fuzzy_literals` is true, pick the closest
// plausible match instead.
if let Ok((left_diff, _right_diff)) =
diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx)
&& let Some(match_percent) = left_diff.match_percent
&& (match_percent == 100.0
|| (fuzzy_literals
&& match_percent >= 50.0
&& match_percent > closest_match_percent))
{
closest_match_symbol_idx = Some(symbol_idx);
closest_match_percent = match_percent;
if match_percent == 100.0 {
break;
}
}
}
SectionKind::Bss => {
// For BSS, pick the first symbol that has the exact matching size.
if in_symbol.size == symbol.size {
closest_match_symbol_idx = Some(symbol_idx);
break;
}
}
_ => unreachable!(),
}
}
return closest_match_symbol_idx;
}
// Try to find an exact name match
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind
}) {
return Some(symbol_idx);
}
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
// If they are at the same address in the same section
if in_symbol.name.starts_with('@')
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
&& let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
let Some(section_index) = symbol.section else {
return false;
};
symbol.name.starts_with('@')
&& symbol.address == in_symbol.address
&& obj.sections[section_index].name == section_name
})
{
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
symbol_name_matches(&in_symbol.name, &symbol.name)
&& symbol_section_kind(obj, symbol) == section_kind
&& symbol_section(obj, symbol).is_some_and(|(name, _)| name == section_name)
}) {
return Some(symbol_idx);
}
// Match Metrowerks symbol$1234 against symbol$2345
if let Some((prefix, suffix)) = in_symbol.name.split_once('$') {
if !suffix.chars().all(char::is_numeric) {
return None;
}
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
if let Some((p, s)) = symbol.name.split_once('$') {
prefix == p
&& s.chars().all(char::is_numeric)
&& symbol_section_kind(obj, symbol) == section_kind
} else {
false
}
}) {
return Some(symbol_idx);
}
}
None
}