mirror of
https://github.com/encounter/objdiff.git
synced 2025-06-07 15:13:47 +00:00
505 lines
19 KiB
Rust
505 lines
19 KiB
Rust
use alloc::{
|
|
collections::{BTreeMap, btree_map},
|
|
string::{String, ToString},
|
|
vec,
|
|
vec::Vec,
|
|
};
|
|
|
|
use anyhow::{Context, Result, anyhow, ensure};
|
|
|
|
use super::{
|
|
DiffObjConfig, FunctionRelocDiffs, InstructionArgDiffIndex, InstructionBranchFrom,
|
|
InstructionBranchTo, InstructionDiffKind, InstructionDiffRow, SymbolDiff,
|
|
display::display_ins_data_literals,
|
|
};
|
|
use crate::obj::{
|
|
InstructionArg, InstructionArgValue, InstructionRef, Object, ResolvedInstructionRef,
|
|
ResolvedRelocation, ScannedInstruction, SymbolFlag, SymbolKind,
|
|
};
|
|
|
|
pub fn no_diff_code(
|
|
obj: &Object,
|
|
symbol_idx: usize,
|
|
diff_config: &DiffObjConfig,
|
|
) -> Result<SymbolDiff> {
|
|
let symbol = &obj.symbols[symbol_idx];
|
|
let section_index = symbol.section.ok_or_else(|| anyhow!("Missing section for symbol"))?;
|
|
let section = &obj.sections[section_index];
|
|
let data = section.data_range(symbol.address, symbol.size as usize).ok_or_else(|| {
|
|
anyhow!(
|
|
"Symbol data out of bounds: {:#x}..{:#x}",
|
|
symbol.address,
|
|
symbol.address + symbol.size
|
|
)
|
|
})?;
|
|
let ops = obj.arch.scan_instructions(symbol.address, data, section_index, diff_config)?;
|
|
let mut instruction_rows = Vec::<InstructionDiffRow>::new();
|
|
for i in &ops {
|
|
instruction_rows
|
|
.push(InstructionDiffRow { ins_ref: Some(i.ins_ref), ..Default::default() });
|
|
}
|
|
resolve_branches(obj, section_index, &ops, &mut instruction_rows);
|
|
Ok(SymbolDiff { target_symbol: None, match_percent: None, diff_score: None, instruction_rows })
|
|
}
|
|
|
|
const PENALTY_IMM_DIFF: u64 = 1;
|
|
const PENALTY_REG_DIFF: u64 = 5;
|
|
const PENALTY_REPLACE: u64 = 60;
|
|
const PENALTY_INSERT_DELETE: u64 = 100;
|
|
|
|
pub fn diff_code(
|
|
left_obj: &Object,
|
|
right_obj: &Object,
|
|
left_symbol_idx: usize,
|
|
right_symbol_idx: usize,
|
|
diff_config: &DiffObjConfig,
|
|
) -> Result<(SymbolDiff, SymbolDiff)> {
|
|
let left_symbol = &left_obj.symbols[left_symbol_idx];
|
|
let right_symbol = &right_obj.symbols[right_symbol_idx];
|
|
let left_section = left_symbol
|
|
.section
|
|
.and_then(|i| left_obj.sections.get(i))
|
|
.ok_or_else(|| anyhow!("Missing section for symbol"))?;
|
|
let right_section = right_symbol
|
|
.section
|
|
.and_then(|i| right_obj.sections.get(i))
|
|
.ok_or_else(|| anyhow!("Missing section for symbol"))?;
|
|
let left_data = left_section
|
|
.data_range(left_symbol.address, left_symbol.size as usize)
|
|
.ok_or_else(|| {
|
|
anyhow!(
|
|
"Symbol data out of bounds: {:#x}..{:#x}",
|
|
left_symbol.address,
|
|
left_symbol.address + left_symbol.size
|
|
)
|
|
})?;
|
|
let right_data = right_section
|
|
.data_range(right_symbol.address, right_symbol.size as usize)
|
|
.ok_or_else(|| {
|
|
anyhow!(
|
|
"Symbol data out of bounds: {:#x}..{:#x}",
|
|
right_symbol.address,
|
|
right_symbol.address + right_symbol.size
|
|
)
|
|
})?;
|
|
|
|
let left_section_idx = left_symbol.section.unwrap();
|
|
let right_section_idx = right_symbol.section.unwrap();
|
|
let left_ops = left_obj.arch.scan_instructions(
|
|
left_symbol.address,
|
|
left_data,
|
|
left_section_idx,
|
|
diff_config,
|
|
)?;
|
|
let right_ops = right_obj.arch.scan_instructions(
|
|
right_symbol.address,
|
|
right_data,
|
|
right_section_idx,
|
|
diff_config,
|
|
)?;
|
|
let (mut left_rows, mut right_rows) = diff_instructions(&left_ops, &right_ops)?;
|
|
resolve_branches(left_obj, left_section_idx, &left_ops, &mut left_rows);
|
|
resolve_branches(right_obj, right_section_idx, &right_ops, &mut right_rows);
|
|
|
|
let mut diff_state = InstructionDiffState::default();
|
|
for (left_row, right_row) in left_rows.iter_mut().zip(right_rows.iter_mut()) {
|
|
let result = diff_instruction(
|
|
left_obj,
|
|
right_obj,
|
|
left_symbol_idx,
|
|
right_symbol_idx,
|
|
left_row.ins_ref,
|
|
right_row.ins_ref,
|
|
left_row,
|
|
right_row,
|
|
diff_config,
|
|
&mut diff_state,
|
|
)?;
|
|
left_row.kind = result.kind;
|
|
right_row.kind = result.kind;
|
|
left_row.arg_diff = result.left_args_diff;
|
|
right_row.arg_diff = result.right_args_diff;
|
|
}
|
|
|
|
let max_score = left_ops.len() as u64 * PENALTY_INSERT_DELETE;
|
|
let diff_score = diff_state.diff_score.min(max_score);
|
|
let match_percent = if max_score == 0 {
|
|
100.0
|
|
} else {
|
|
((1.0 - (diff_score as f64 / max_score as f64)) * 100.0) as f32
|
|
};
|
|
|
|
Ok((
|
|
SymbolDiff {
|
|
target_symbol: Some(right_symbol_idx),
|
|
match_percent: Some(match_percent),
|
|
diff_score: Some((diff_score, max_score)),
|
|
instruction_rows: left_rows,
|
|
},
|
|
SymbolDiff {
|
|
target_symbol: Some(left_symbol_idx),
|
|
match_percent: Some(match_percent),
|
|
diff_score: Some((diff_score, max_score)),
|
|
instruction_rows: right_rows,
|
|
},
|
|
))
|
|
}
|
|
|
|
fn diff_instructions(
|
|
left_insts: &[ScannedInstruction],
|
|
right_insts: &[ScannedInstruction],
|
|
) -> Result<(Vec<InstructionDiffRow>, Vec<InstructionDiffRow>)> {
|
|
let left_ops = left_insts.iter().map(|i| i.ins_ref.opcode).collect::<Vec<_>>();
|
|
let right_ops = right_insts.iter().map(|i| i.ins_ref.opcode).collect::<Vec<_>>();
|
|
let ops = similar::capture_diff_slices(similar::Algorithm::Patience, &left_ops, &right_ops);
|
|
if ops.is_empty() {
|
|
ensure!(left_insts.len() == right_insts.len());
|
|
let left_diff = left_insts
|
|
.iter()
|
|
.map(|i| InstructionDiffRow { ins_ref: Some(i.ins_ref), ..Default::default() })
|
|
.collect();
|
|
let right_diff = right_insts
|
|
.iter()
|
|
.map(|i| InstructionDiffRow { ins_ref: Some(i.ins_ref), ..Default::default() })
|
|
.collect();
|
|
return Ok((left_diff, right_diff));
|
|
}
|
|
|
|
let row_count = ops
|
|
.iter()
|
|
.map(|op| match *op {
|
|
similar::DiffOp::Equal { len, .. } => len,
|
|
similar::DiffOp::Delete { old_len, .. } => old_len,
|
|
similar::DiffOp::Insert { new_len, .. } => new_len,
|
|
similar::DiffOp::Replace { old_len, new_len, .. } => old_len.max(new_len),
|
|
})
|
|
.sum();
|
|
let mut left_diff = Vec::<InstructionDiffRow>::with_capacity(row_count);
|
|
let mut right_diff = Vec::<InstructionDiffRow>::with_capacity(row_count);
|
|
for op in ops {
|
|
let (_tag, left_range, right_range) = op.as_tag_tuple();
|
|
let len = left_range.len().max(right_range.len());
|
|
left_diff.extend(left_range.clone().map(|i| InstructionDiffRow {
|
|
ins_ref: Some(left_insts[i].ins_ref),
|
|
..Default::default()
|
|
}));
|
|
right_diff.extend(right_range.clone().map(|i| InstructionDiffRow {
|
|
ins_ref: Some(right_insts[i].ins_ref),
|
|
..Default::default()
|
|
}));
|
|
if left_range.len() < len {
|
|
left_diff.extend((left_range.len()..len).map(|_| InstructionDiffRow::default()));
|
|
}
|
|
if right_range.len() < len {
|
|
right_diff.extend((right_range.len()..len).map(|_| InstructionDiffRow::default()));
|
|
}
|
|
}
|
|
Ok((left_diff, right_diff))
|
|
}
|
|
|
|
fn arg_to_string(arg: &InstructionArg, reloc: Option<ResolvedRelocation>) -> String {
|
|
match arg {
|
|
InstructionArg::Value(arg) => arg.to_string(),
|
|
InstructionArg::Reloc => {
|
|
reloc.as_ref().map_or_else(|| "<unknown>".to_string(), |r| r.symbol.name.clone())
|
|
}
|
|
InstructionArg::BranchDest(arg) => arg.to_string(),
|
|
}
|
|
}
|
|
|
|
fn resolve_branches(
|
|
obj: &Object,
|
|
section_index: usize,
|
|
ops: &[ScannedInstruction],
|
|
rows: &mut [InstructionDiffRow],
|
|
) {
|
|
let section = &obj.sections[section_index];
|
|
let mut branch_idx = 0u32;
|
|
// Map addresses to indices
|
|
let mut addr_map = BTreeMap::<u64, u32>::new();
|
|
for (i, ins_diff) in rows.iter().enumerate() {
|
|
if let Some(ins) = ins_diff.ins_ref {
|
|
addr_map.insert(ins.address, i as u32);
|
|
}
|
|
}
|
|
// Generate branches
|
|
let mut branches = BTreeMap::<u32, InstructionBranchFrom>::new();
|
|
for ((i, ins_diff), ins) in
|
|
rows.iter_mut().enumerate().filter(|(_, row)| row.ins_ref.is_some()).zip(ops)
|
|
{
|
|
let branch_dest = if let Some(resolved) = section.relocation_at(obj, ins.ins_ref) {
|
|
if resolved.symbol.section == Some(section_index) {
|
|
// If the relocation target is in the same section, use it as the branch destination
|
|
resolved.symbol.address.checked_add_signed(resolved.relocation.addend)
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
ins.branch_dest
|
|
};
|
|
if let Some(ins_idx) = branch_dest.and_then(|a| addr_map.get(&a).copied()) {
|
|
match branches.entry(ins_idx) {
|
|
btree_map::Entry::Vacant(e) => {
|
|
ins_diff.branch_to = Some(InstructionBranchTo { ins_idx, branch_idx });
|
|
e.insert(InstructionBranchFrom { ins_idx: vec![i as u32], branch_idx });
|
|
branch_idx += 1;
|
|
}
|
|
btree_map::Entry::Occupied(e) => {
|
|
let branch = e.into_mut();
|
|
ins_diff.branch_to =
|
|
Some(InstructionBranchTo { ins_idx, branch_idx: branch.branch_idx });
|
|
branch.ins_idx.push(i as u32);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Store branch from
|
|
for (i, branch) in branches {
|
|
rows[i as usize].branch_from = Some(branch);
|
|
}
|
|
}
|
|
|
|
pub(crate) fn address_eq(left: ResolvedRelocation, right: ResolvedRelocation) -> bool {
|
|
if right.symbol.size == 0 && left.symbol.size != 0 {
|
|
// The base relocation is against a pool but the target relocation isn't.
|
|
// This can happen in rare cases where the compiler will generate a pool+addend relocation
|
|
// in the base's data, but the one detected in the target is direct with no addend.
|
|
// Just check that the final address is the same so these count as a match.
|
|
left.symbol.address as i64 + left.relocation.addend
|
|
== right.symbol.address as i64 + right.relocation.addend
|
|
} else {
|
|
// But otherwise, if the compiler isn't using a pool, we're more strict and check that the
|
|
// target symbol address and relocation addend both match exactly.
|
|
left.symbol.address == right.symbol.address
|
|
&& left.relocation.addend == right.relocation.addend
|
|
}
|
|
}
|
|
|
|
pub(crate) fn section_name_eq(
|
|
left_obj: &Object,
|
|
right_obj: &Object,
|
|
left_section_index: usize,
|
|
right_section_index: usize,
|
|
) -> bool {
|
|
left_obj.sections.get(left_section_index).is_some_and(|left_section| {
|
|
right_obj
|
|
.sections
|
|
.get(right_section_index)
|
|
.is_some_and(|right_section| left_section.name == right_section.name)
|
|
})
|
|
}
|
|
|
|
fn reloc_eq(
|
|
left_obj: &Object,
|
|
right_obj: &Object,
|
|
left_ins: ResolvedInstructionRef,
|
|
right_ins: ResolvedInstructionRef,
|
|
diff_config: &DiffObjConfig,
|
|
) -> bool {
|
|
let relax_reloc_diffs = diff_config.function_reloc_diffs == FunctionRelocDiffs::None;
|
|
let (left_reloc, right_reloc) = match (left_ins.relocation, right_ins.relocation) {
|
|
(Some(left_reloc), Some(right_reloc)) => (left_reloc, right_reloc),
|
|
// If relocations are relaxed, match if left is missing a reloc
|
|
(None, Some(_)) => return relax_reloc_diffs,
|
|
(None, None) => return true,
|
|
_ => return false,
|
|
};
|
|
if left_reloc.relocation.flags != right_reloc.relocation.flags {
|
|
return false;
|
|
}
|
|
if relax_reloc_diffs {
|
|
return true;
|
|
}
|
|
|
|
let symbol_name_addend_matches = left_reloc.symbol.name == right_reloc.symbol.name
|
|
&& left_reloc.relocation.addend == right_reloc.relocation.addend;
|
|
match (&left_reloc.symbol.section, &right_reloc.symbol.section) {
|
|
(Some(sl), Some(sr)) => {
|
|
// Match if section and name or address match
|
|
section_name_eq(left_obj, right_obj, *sl, *sr)
|
|
&& (diff_config.function_reloc_diffs == FunctionRelocDiffs::DataValue
|
|
|| symbol_name_addend_matches
|
|
|| address_eq(left_reloc, right_reloc))
|
|
&& (diff_config.function_reloc_diffs == FunctionRelocDiffs::NameAddress
|
|
|| left_reloc.symbol.kind != SymbolKind::Object
|
|
|| display_ins_data_literals(left_obj, left_ins)
|
|
== display_ins_data_literals(right_obj, right_ins))
|
|
}
|
|
(Some(_), None) => false,
|
|
(None, Some(_)) => {
|
|
// Match if possibly stripped weak symbol
|
|
symbol_name_addend_matches && right_reloc.symbol.flags.contains(SymbolFlag::Weak)
|
|
}
|
|
(None, None) => symbol_name_addend_matches,
|
|
}
|
|
}
|
|
|
|
fn arg_eq(
|
|
left_obj: &Object,
|
|
right_obj: &Object,
|
|
left_row: &InstructionDiffRow,
|
|
right_row: &InstructionDiffRow,
|
|
left_arg: &InstructionArg,
|
|
right_arg: &InstructionArg,
|
|
left_ins: ResolvedInstructionRef,
|
|
right_ins: ResolvedInstructionRef,
|
|
diff_config: &DiffObjConfig,
|
|
) -> bool {
|
|
match left_arg {
|
|
InstructionArg::Value(l) => match right_arg {
|
|
InstructionArg::Value(r) => l.loose_eq(r),
|
|
// If relocations are relaxed, match if left is a constant and right is a reloc
|
|
// Useful for instances where the target object is created without relocations
|
|
InstructionArg::Reloc => diff_config.function_reloc_diffs == FunctionRelocDiffs::None,
|
|
_ => false,
|
|
},
|
|
InstructionArg::Reloc => {
|
|
matches!(right_arg, InstructionArg::Reloc)
|
|
&& reloc_eq(left_obj, right_obj, left_ins, right_ins, diff_config)
|
|
}
|
|
InstructionArg::BranchDest(_) => match right_arg {
|
|
// Compare dest instruction idx after diffing
|
|
InstructionArg::BranchDest(_) => {
|
|
left_row.branch_to.as_ref().map(|b| b.ins_idx)
|
|
== right_row.branch_to.as_ref().map(|b| b.ins_idx)
|
|
}
|
|
// If relocations are relaxed, match if left is a constant and right is a reloc
|
|
// Useful for instances where the target object is created without relocations
|
|
InstructionArg::Reloc => diff_config.function_reloc_diffs == FunctionRelocDiffs::None,
|
|
_ => false,
|
|
},
|
|
}
|
|
}
|
|
|
|
#[derive(Default)]
|
|
struct InstructionDiffState {
|
|
diff_score: u64,
|
|
left_arg_idx: u32,
|
|
right_arg_idx: u32,
|
|
left_args_idx: BTreeMap<String, u32>,
|
|
right_args_idx: BTreeMap<String, u32>,
|
|
}
|
|
|
|
#[derive(Default)]
|
|
struct InstructionDiffResult {
|
|
kind: InstructionDiffKind,
|
|
left_args_diff: Vec<InstructionArgDiffIndex>,
|
|
right_args_diff: Vec<InstructionArgDiffIndex>,
|
|
}
|
|
|
|
impl InstructionDiffResult {
|
|
#[inline]
|
|
const fn new(kind: InstructionDiffKind) -> Self {
|
|
Self { kind, left_args_diff: Vec::new(), right_args_diff: Vec::new() }
|
|
}
|
|
}
|
|
|
|
fn diff_instruction(
|
|
left_obj: &Object,
|
|
right_obj: &Object,
|
|
left_symbol_idx: usize,
|
|
right_symbol_idx: usize,
|
|
l: Option<InstructionRef>,
|
|
r: Option<InstructionRef>,
|
|
left_row: &InstructionDiffRow,
|
|
right_row: &InstructionDiffRow,
|
|
diff_config: &DiffObjConfig,
|
|
state: &mut InstructionDiffState,
|
|
) -> Result<InstructionDiffResult> {
|
|
let (l, r) = match (l, r) {
|
|
(Some(l), Some(r)) => (l, r),
|
|
(Some(_), None) => {
|
|
state.diff_score += PENALTY_INSERT_DELETE;
|
|
return Ok(InstructionDiffResult::new(InstructionDiffKind::Delete));
|
|
}
|
|
(None, Some(_)) => {
|
|
state.diff_score += PENALTY_INSERT_DELETE;
|
|
return Ok(InstructionDiffResult::new(InstructionDiffKind::Insert));
|
|
}
|
|
(None, None) => return Ok(InstructionDiffResult::new(InstructionDiffKind::None)),
|
|
};
|
|
|
|
// If opcodes don't match, replace
|
|
if l.opcode != r.opcode {
|
|
state.diff_score += PENALTY_REPLACE;
|
|
return Ok(InstructionDiffResult::new(InstructionDiffKind::Replace));
|
|
}
|
|
|
|
let left_resolved = left_obj
|
|
.resolve_instruction_ref(left_symbol_idx, l)
|
|
.context("Failed to resolve left instruction")?;
|
|
let right_resolved = right_obj
|
|
.resolve_instruction_ref(right_symbol_idx, r)
|
|
.context("Failed to resolve right instruction")?;
|
|
|
|
if left_resolved.code != right_resolved.code
|
|
|| !reloc_eq(left_obj, right_obj, left_resolved, right_resolved, diff_config)
|
|
{
|
|
// If either the raw code bytes or relocations don't match, process instructions and compare args
|
|
let left_ins = left_obj.arch.process_instruction(left_resolved, diff_config)?;
|
|
let right_ins = right_obj.arch.process_instruction(right_resolved, diff_config)?;
|
|
if left_ins.args.len() != right_ins.args.len() {
|
|
state.diff_score += PENALTY_REPLACE;
|
|
return Ok(InstructionDiffResult::new(InstructionDiffKind::Replace));
|
|
}
|
|
let mut result = InstructionDiffResult::new(InstructionDiffKind::None);
|
|
if left_ins.mnemonic != right_ins.mnemonic {
|
|
state.diff_score += PENALTY_REG_DIFF;
|
|
result.kind = InstructionDiffKind::OpMismatch;
|
|
}
|
|
for (a, b) in left_ins.args.iter().zip(right_ins.args.iter()) {
|
|
if arg_eq(
|
|
left_obj,
|
|
right_obj,
|
|
left_row,
|
|
right_row,
|
|
a,
|
|
b,
|
|
left_resolved,
|
|
right_resolved,
|
|
diff_config,
|
|
) {
|
|
result.left_args_diff.push(InstructionArgDiffIndex::NONE);
|
|
result.right_args_diff.push(InstructionArgDiffIndex::NONE);
|
|
} else {
|
|
state.diff_score += if let InstructionArg::Value(
|
|
InstructionArgValue::Signed(_) | InstructionArgValue::Unsigned(_),
|
|
) = a
|
|
{
|
|
PENALTY_IMM_DIFF
|
|
} else {
|
|
PENALTY_REG_DIFF
|
|
};
|
|
if result.kind == InstructionDiffKind::None {
|
|
result.kind = InstructionDiffKind::ArgMismatch;
|
|
}
|
|
let a_str = arg_to_string(a, left_resolved.relocation);
|
|
let a_diff = match state.left_args_idx.entry(a_str) {
|
|
btree_map::Entry::Vacant(e) => {
|
|
let idx = state.left_arg_idx;
|
|
state.left_arg_idx = idx + 1;
|
|
e.insert(idx);
|
|
idx
|
|
}
|
|
btree_map::Entry::Occupied(e) => *e.get(),
|
|
};
|
|
let b_str = arg_to_string(b, right_resolved.relocation);
|
|
let b_diff = match state.right_args_idx.entry(b_str) {
|
|
btree_map::Entry::Vacant(e) => {
|
|
let idx = state.right_arg_idx;
|
|
state.right_arg_idx = idx + 1;
|
|
e.insert(idx);
|
|
idx
|
|
}
|
|
btree_map::Entry::Occupied(e) => *e.get(),
|
|
};
|
|
result.left_args_diff.push(InstructionArgDiffIndex::new(a_diff));
|
|
result.right_args_diff.push(InstructionArgDiffIndex::new(b_diff));
|
|
}
|
|
}
|
|
return Ok(result);
|
|
}
|
|
|
|
Ok(InstructionDiffResult::new(InstructionDiffKind::None))
|
|
}
|