mirror of
https://github.com/encounter/objdiff.git
synced 2025-10-05 09:29:51 +00:00
Improve automatic symbol pairing for nameless literals (#247)
* Improve automatic symbol pairing for nameless literals * Fix data reloc diffing when the reloc points to an in-function static symbol * Only pair up literals that match perfectly * Clippy * Do two separate passes when pairing up literals * Fix partially-matching literal pairups not working right * Remove duplicate $ splitting code * Implement $ splitting for section names too * Minor cleanup
This commit is contained in:
parent
8d24ec6373
commit
f2a591356e
@ -35,6 +35,20 @@ pub fn diff_bss_symbol(
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn symbol_name_matches(left_name: &str, right_name: &str) -> bool {
|
||||||
|
// Match Metrowerks symbol$1234 against symbol$2345
|
||||||
|
if let Some((prefix, suffix)) = left_name.split_once('$') {
|
||||||
|
if !suffix.chars().all(char::is_numeric) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
right_name
|
||||||
|
.split_once('$')
|
||||||
|
.is_some_and(|(p, s)| p == prefix && s.chars().all(char::is_numeric))
|
||||||
|
} else {
|
||||||
|
left_name == right_name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn reloc_eq(
|
fn reloc_eq(
|
||||||
left_obj: &Object,
|
left_obj: &Object,
|
||||||
right_obj: &Object,
|
right_obj: &Object,
|
||||||
@ -45,8 +59,8 @@ fn reloc_eq(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
let symbol_name_addend_matches =
|
let symbol_name_addend_matches = symbol_name_matches(&left.symbol.name, &right.symbol.name)
|
||||||
left.symbol.name == right.symbol.name && left.relocation.addend == right.relocation.addend;
|
&& left.relocation.addend == right.relocation.addend;
|
||||||
match (left.symbol.section, right.symbol.section) {
|
match (left.symbol.section, right.symbol.section) {
|
||||||
(Some(sl), Some(sr)) => {
|
(Some(sl), Some(sr)) => {
|
||||||
// Match if section and name+addend or address match
|
// Match if section and name+addend or address match
|
||||||
|
@ -13,7 +13,7 @@ use crate::{
|
|||||||
code::{diff_code, no_diff_code},
|
code::{diff_code, no_diff_code},
|
||||||
data::{
|
data::{
|
||||||
diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol,
|
diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol,
|
||||||
diff_generic_section, no_diff_bss_section, no_diff_data_section,
|
diff_generic_section, no_diff_bss_section, no_diff_data_section, symbol_name_matches,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag},
|
obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag},
|
||||||
@ -575,47 +575,60 @@ fn matching_symbols(
|
|||||||
&mut matches,
|
&mut matches,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
|
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
|
||||||
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
|
// those are correct first, while the second pass catches near matches.
|
||||||
continue;
|
for fuzzy_literals in [false, true] {
|
||||||
}
|
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
|
||||||
let section_kind = symbol_section_kind(left, symbol);
|
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
|
||||||
if section_kind == SectionKind::Unknown {
|
continue;
|
||||||
continue;
|
}
|
||||||
}
|
let section_kind = symbol_section_kind(left, symbol);
|
||||||
if left_used.contains(&symbol_idx) {
|
if section_kind == SectionKind::Unknown {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let symbol_match = SymbolMatch {
|
if left_used.contains(&symbol_idx) {
|
||||||
left: Some(symbol_idx),
|
continue;
|
||||||
right: find_symbol(right, left, symbol, Some(&right_used)),
|
}
|
||||||
prev: find_symbol(prev, left, symbol, None),
|
let symbol_match = SymbolMatch {
|
||||||
section_kind,
|
left: Some(symbol_idx),
|
||||||
};
|
right: find_symbol(right, left, symbol_idx, Some(&right_used), fuzzy_literals),
|
||||||
matches.push(symbol_match);
|
prev: find_symbol(prev, left, symbol_idx, None, fuzzy_literals),
|
||||||
if let Some(right) = symbol_match.right {
|
section_kind,
|
||||||
right_used.insert(right);
|
};
|
||||||
|
matches.push(symbol_match);
|
||||||
|
if let Some(right) = symbol_match.right {
|
||||||
|
left_used.insert(symbol_idx);
|
||||||
|
right_used.insert(right);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let Some(right) = right {
|
if let Some(right) = right {
|
||||||
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
|
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
|
||||||
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
|
// those are correct first, while the second pass catches near matches.
|
||||||
continue;
|
for fuzzy_literals in [false, true] {
|
||||||
|
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
|
||||||
|
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let section_kind = symbol_section_kind(right, symbol);
|
||||||
|
if section_kind == SectionKind::Unknown {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if right_used.contains(&symbol_idx) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let symbol_match = SymbolMatch {
|
||||||
|
left: None,
|
||||||
|
right: Some(symbol_idx),
|
||||||
|
prev: find_symbol(prev, right, symbol_idx, None, fuzzy_literals),
|
||||||
|
section_kind,
|
||||||
|
};
|
||||||
|
matches.push(symbol_match);
|
||||||
|
if symbol_match.prev.is_some() {
|
||||||
|
right_used.insert(symbol_idx);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let section_kind = symbol_section_kind(right, symbol);
|
|
||||||
if section_kind == SectionKind::Unknown {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if right_used.contains(&symbol_idx) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
matches.push(SymbolMatch {
|
|
||||||
left: None,
|
|
||||||
right: Some(symbol_idx),
|
|
||||||
prev: find_symbol(prev, right, symbol, None),
|
|
||||||
section_kind,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(matches)
|
Ok(matches)
|
||||||
@ -637,7 +650,10 @@ where
|
|||||||
|
|
||||||
fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> {
|
fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> {
|
||||||
if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) {
|
if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) {
|
||||||
Some((section.name.as_str(), section.kind))
|
// Match x86 .rdata$r against .rdata$rs
|
||||||
|
let section_name =
|
||||||
|
section.name.split_once('$').map_or(section.name.as_str(), |(prefix, _)| prefix);
|
||||||
|
Some((section_name, section.kind))
|
||||||
} else if symbol.flags.contains(SymbolFlag::Common) {
|
} else if symbol.flags.contains(SymbolFlag::Common) {
|
||||||
Some((".comm", SectionKind::Common))
|
Some((".comm", SectionKind::Common))
|
||||||
} else {
|
} else {
|
||||||
@ -653,52 +669,94 @@ fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if a symbol is a compiler-generated literal like @1234.
|
||||||
|
fn is_symbol_compiler_generated_literal(symbol: &Symbol) -> bool {
|
||||||
|
if !symbol.name.starts_with('@') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if !symbol.name[1..].chars().all(char::is_numeric) {
|
||||||
|
// Exclude @stringBase0, @GUARD@, etc.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
fn find_symbol(
|
fn find_symbol(
|
||||||
obj: Option<&Object>,
|
obj: Option<&Object>,
|
||||||
in_obj: &Object,
|
in_obj: &Object,
|
||||||
in_symbol: &Symbol,
|
in_symbol_idx: usize,
|
||||||
used: Option<&BTreeSet<usize>>,
|
used: Option<&BTreeSet<usize>>,
|
||||||
|
fuzzy_literals: bool,
|
||||||
) -> Option<usize> {
|
) -> Option<usize> {
|
||||||
|
let in_symbol = &in_obj.symbols[in_symbol_idx];
|
||||||
let obj = obj?;
|
let obj = obj?;
|
||||||
let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?;
|
let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?;
|
||||||
|
|
||||||
|
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
|
||||||
|
// If they are in the same section and have the same value
|
||||||
|
if is_symbol_compiler_generated_literal(in_symbol)
|
||||||
|
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
|
||||||
|
{
|
||||||
|
let mut closest_match_symbol_idx = None;
|
||||||
|
let mut closest_match_percent = 0.0;
|
||||||
|
for (symbol_idx, symbol) in unmatched_symbols(obj, used) {
|
||||||
|
let Some(section_index) = symbol.section else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
if obj.sections[section_index].name != section_name {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if !is_symbol_compiler_generated_literal(symbol) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match section_kind {
|
||||||
|
SectionKind::Data => {
|
||||||
|
// For data, pick the first symbol with exactly matching bytes and relocations.
|
||||||
|
// If no symbols match exactly, and `fuzzy_literals` is true, pick the closest
|
||||||
|
// plausible match instead.
|
||||||
|
if let Ok((left_diff, _right_diff)) =
|
||||||
|
diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx)
|
||||||
|
&& let Some(match_percent) = left_diff.match_percent
|
||||||
|
&& (match_percent == 100.0
|
||||||
|
|| (fuzzy_literals
|
||||||
|
&& match_percent >= 50.0
|
||||||
|
&& match_percent > closest_match_percent))
|
||||||
|
{
|
||||||
|
closest_match_symbol_idx = Some(symbol_idx);
|
||||||
|
closest_match_percent = match_percent;
|
||||||
|
if match_percent == 100.0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SectionKind::Bss => {
|
||||||
|
// For BSS, pick the first symbol that has the exact matching size.
|
||||||
|
if in_symbol.size == symbol.size {
|
||||||
|
closest_match_symbol_idx = Some(symbol_idx);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return closest_match_symbol_idx;
|
||||||
|
}
|
||||||
|
|
||||||
// Try to find an exact name match
|
// Try to find an exact name match
|
||||||
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
|
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
|
||||||
symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind
|
symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind
|
||||||
}) {
|
}) {
|
||||||
return Some(symbol_idx);
|
return Some(symbol_idx);
|
||||||
}
|
}
|
||||||
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
|
|
||||||
// If they are at the same address in the same section
|
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
|
||||||
if in_symbol.name.starts_with('@')
|
symbol_name_matches(&in_symbol.name, &symbol.name)
|
||||||
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
|
&& symbol_section_kind(obj, symbol) == section_kind
|
||||||
&& let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
|
&& symbol_section(obj, symbol).is_some_and(|(name, _)| name == section_name)
|
||||||
let Some(section_index) = symbol.section else {
|
}) {
|
||||||
return false;
|
|
||||||
};
|
|
||||||
symbol.name.starts_with('@')
|
|
||||||
&& symbol.address == in_symbol.address
|
|
||||||
&& obj.sections[section_index].name == section_name
|
|
||||||
})
|
|
||||||
{
|
|
||||||
return Some(symbol_idx);
|
return Some(symbol_idx);
|
||||||
}
|
}
|
||||||
// Match Metrowerks symbol$1234 against symbol$2345
|
|
||||||
if let Some((prefix, suffix)) = in_symbol.name.split_once('$') {
|
|
||||||
if !suffix.chars().all(char::is_numeric) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
|
|
||||||
if let Some((p, s)) = symbol.name.split_once('$') {
|
|
||||||
prefix == p
|
|
||||||
&& s.chars().all(char::is_numeric)
|
|
||||||
&& symbol_section_kind(obj, symbol) == section_kind
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}) {
|
|
||||||
return Some(symbol_idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user