mirror of
https://github.com/encounter/objdiff.git
synced 2025-10-05 01:19:49 +00:00
Improve automatic symbol pairing for nameless literals (#247)
* Improve automatic symbol pairing for nameless literals * Fix data reloc diffing when the reloc points to an in-function static symbol * Only pair up literals that match perfectly * Clippy * Do two separate passes when pairing up literals * Fix partially-matching literal pairups not working right * Remove duplicate $ splitting code * Implement $ splitting for section names too * Minor cleanup
This commit is contained in:
parent
8d24ec6373
commit
f2a591356e
@ -35,6 +35,20 @@ pub fn diff_bss_symbol(
|
||||
))
|
||||
}
|
||||
|
||||
pub fn symbol_name_matches(left_name: &str, right_name: &str) -> bool {
|
||||
// Match Metrowerks symbol$1234 against symbol$2345
|
||||
if let Some((prefix, suffix)) = left_name.split_once('$') {
|
||||
if !suffix.chars().all(char::is_numeric) {
|
||||
return false;
|
||||
}
|
||||
right_name
|
||||
.split_once('$')
|
||||
.is_some_and(|(p, s)| p == prefix && s.chars().all(char::is_numeric))
|
||||
} else {
|
||||
left_name == right_name
|
||||
}
|
||||
}
|
||||
|
||||
fn reloc_eq(
|
||||
left_obj: &Object,
|
||||
right_obj: &Object,
|
||||
@ -45,8 +59,8 @@ fn reloc_eq(
|
||||
return false;
|
||||
}
|
||||
|
||||
let symbol_name_addend_matches =
|
||||
left.symbol.name == right.symbol.name && left.relocation.addend == right.relocation.addend;
|
||||
let symbol_name_addend_matches = symbol_name_matches(&left.symbol.name, &right.symbol.name)
|
||||
&& left.relocation.addend == right.relocation.addend;
|
||||
match (left.symbol.section, right.symbol.section) {
|
||||
(Some(sl), Some(sr)) => {
|
||||
// Match if section and name+addend or address match
|
||||
|
@ -13,7 +13,7 @@ use crate::{
|
||||
code::{diff_code, no_diff_code},
|
||||
data::{
|
||||
diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol,
|
||||
diff_generic_section, no_diff_bss_section, no_diff_data_section,
|
||||
diff_generic_section, no_diff_bss_section, no_diff_data_section, symbol_name_matches,
|
||||
},
|
||||
},
|
||||
obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag},
|
||||
@ -575,47 +575,60 @@ fn matching_symbols(
|
||||
&mut matches,
|
||||
)?;
|
||||
}
|
||||
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
|
||||
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
|
||||
continue;
|
||||
}
|
||||
let section_kind = symbol_section_kind(left, symbol);
|
||||
if section_kind == SectionKind::Unknown {
|
||||
continue;
|
||||
}
|
||||
if left_used.contains(&symbol_idx) {
|
||||
continue;
|
||||
}
|
||||
let symbol_match = SymbolMatch {
|
||||
left: Some(symbol_idx),
|
||||
right: find_symbol(right, left, symbol, Some(&right_used)),
|
||||
prev: find_symbol(prev, left, symbol, None),
|
||||
section_kind,
|
||||
};
|
||||
matches.push(symbol_match);
|
||||
if let Some(right) = symbol_match.right {
|
||||
right_used.insert(right);
|
||||
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
|
||||
// those are correct first, while the second pass catches near matches.
|
||||
for fuzzy_literals in [false, true] {
|
||||
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
|
||||
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
|
||||
continue;
|
||||
}
|
||||
let section_kind = symbol_section_kind(left, symbol);
|
||||
if section_kind == SectionKind::Unknown {
|
||||
continue;
|
||||
}
|
||||
if left_used.contains(&symbol_idx) {
|
||||
continue;
|
||||
}
|
||||
let symbol_match = SymbolMatch {
|
||||
left: Some(symbol_idx),
|
||||
right: find_symbol(right, left, symbol_idx, Some(&right_used), fuzzy_literals),
|
||||
prev: find_symbol(prev, left, symbol_idx, None, fuzzy_literals),
|
||||
section_kind,
|
||||
};
|
||||
matches.push(symbol_match);
|
||||
if let Some(right) = symbol_match.right {
|
||||
left_used.insert(symbol_idx);
|
||||
right_used.insert(right);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(right) = right {
|
||||
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
|
||||
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
|
||||
continue;
|
||||
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
|
||||
// those are correct first, while the second pass catches near matches.
|
||||
for fuzzy_literals in [false, true] {
|
||||
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
|
||||
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
|
||||
continue;
|
||||
}
|
||||
let section_kind = symbol_section_kind(right, symbol);
|
||||
if section_kind == SectionKind::Unknown {
|
||||
continue;
|
||||
}
|
||||
if right_used.contains(&symbol_idx) {
|
||||
continue;
|
||||
}
|
||||
let symbol_match = SymbolMatch {
|
||||
left: None,
|
||||
right: Some(symbol_idx),
|
||||
prev: find_symbol(prev, right, symbol_idx, None, fuzzy_literals),
|
||||
section_kind,
|
||||
};
|
||||
matches.push(symbol_match);
|
||||
if symbol_match.prev.is_some() {
|
||||
right_used.insert(symbol_idx);
|
||||
}
|
||||
}
|
||||
let section_kind = symbol_section_kind(right, symbol);
|
||||
if section_kind == SectionKind::Unknown {
|
||||
continue;
|
||||
}
|
||||
if right_used.contains(&symbol_idx) {
|
||||
continue;
|
||||
}
|
||||
matches.push(SymbolMatch {
|
||||
left: None,
|
||||
right: Some(symbol_idx),
|
||||
prev: find_symbol(prev, right, symbol, None),
|
||||
section_kind,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(matches)
|
||||
@ -637,7 +650,10 @@ where
|
||||
|
||||
fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> {
|
||||
if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) {
|
||||
Some((section.name.as_str(), section.kind))
|
||||
// Match x86 .rdata$r against .rdata$rs
|
||||
let section_name =
|
||||
section.name.split_once('$').map_or(section.name.as_str(), |(prefix, _)| prefix);
|
||||
Some((section_name, section.kind))
|
||||
} else if symbol.flags.contains(SymbolFlag::Common) {
|
||||
Some((".comm", SectionKind::Common))
|
||||
} else {
|
||||
@ -653,52 +669,94 @@ fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind {
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a symbol is a compiler-generated literal like @1234.
|
||||
fn is_symbol_compiler_generated_literal(symbol: &Symbol) -> bool {
|
||||
if !symbol.name.starts_with('@') {
|
||||
return false;
|
||||
}
|
||||
if !symbol.name[1..].chars().all(char::is_numeric) {
|
||||
// Exclude @stringBase0, @GUARD@, etc.
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn find_symbol(
|
||||
obj: Option<&Object>,
|
||||
in_obj: &Object,
|
||||
in_symbol: &Symbol,
|
||||
in_symbol_idx: usize,
|
||||
used: Option<&BTreeSet<usize>>,
|
||||
fuzzy_literals: bool,
|
||||
) -> Option<usize> {
|
||||
let in_symbol = &in_obj.symbols[in_symbol_idx];
|
||||
let obj = obj?;
|
||||
let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?;
|
||||
|
||||
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
|
||||
// If they are in the same section and have the same value
|
||||
if is_symbol_compiler_generated_literal(in_symbol)
|
||||
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
|
||||
{
|
||||
let mut closest_match_symbol_idx = None;
|
||||
let mut closest_match_percent = 0.0;
|
||||
for (symbol_idx, symbol) in unmatched_symbols(obj, used) {
|
||||
let Some(section_index) = symbol.section else {
|
||||
continue;
|
||||
};
|
||||
if obj.sections[section_index].name != section_name {
|
||||
continue;
|
||||
}
|
||||
if !is_symbol_compiler_generated_literal(symbol) {
|
||||
continue;
|
||||
}
|
||||
match section_kind {
|
||||
SectionKind::Data => {
|
||||
// For data, pick the first symbol with exactly matching bytes and relocations.
|
||||
// If no symbols match exactly, and `fuzzy_literals` is true, pick the closest
|
||||
// plausible match instead.
|
||||
if let Ok((left_diff, _right_diff)) =
|
||||
diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx)
|
||||
&& let Some(match_percent) = left_diff.match_percent
|
||||
&& (match_percent == 100.0
|
||||
|| (fuzzy_literals
|
||||
&& match_percent >= 50.0
|
||||
&& match_percent > closest_match_percent))
|
||||
{
|
||||
closest_match_symbol_idx = Some(symbol_idx);
|
||||
closest_match_percent = match_percent;
|
||||
if match_percent == 100.0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
SectionKind::Bss => {
|
||||
// For BSS, pick the first symbol that has the exact matching size.
|
||||
if in_symbol.size == symbol.size {
|
||||
closest_match_symbol_idx = Some(symbol_idx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
return closest_match_symbol_idx;
|
||||
}
|
||||
|
||||
// Try to find an exact name match
|
||||
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
|
||||
symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind
|
||||
}) {
|
||||
return Some(symbol_idx);
|
||||
}
|
||||
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
|
||||
// If they are at the same address in the same section
|
||||
if in_symbol.name.starts_with('@')
|
||||
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
|
||||
&& let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
|
||||
let Some(section_index) = symbol.section else {
|
||||
return false;
|
||||
};
|
||||
symbol.name.starts_with('@')
|
||||
&& symbol.address == in_symbol.address
|
||||
&& obj.sections[section_index].name == section_name
|
||||
})
|
||||
{
|
||||
|
||||
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
|
||||
symbol_name_matches(&in_symbol.name, &symbol.name)
|
||||
&& symbol_section_kind(obj, symbol) == section_kind
|
||||
&& symbol_section(obj, symbol).is_some_and(|(name, _)| name == section_name)
|
||||
}) {
|
||||
return Some(symbol_idx);
|
||||
}
|
||||
// Match Metrowerks symbol$1234 against symbol$2345
|
||||
if let Some((prefix, suffix)) = in_symbol.name.split_once('$') {
|
||||
if !suffix.chars().all(char::is_numeric) {
|
||||
return None;
|
||||
}
|
||||
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
|
||||
if let Some((p, s)) = symbol.name.split_once('$') {
|
||||
prefix == p
|
||||
&& s.chars().all(char::is_numeric)
|
||||
&& symbol_section_kind(obj, symbol) == section_kind
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}) {
|
||||
return Some(symbol_idx);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user