Implement diffing individual data symbols (#244)

* Implement diffing individual data symbols

* Remove unused code for diffing sections

* Data diff view: Make rows show offset within the symbol, not within the section

* Remove SelectedSymbol enum as it only has a single variant now

* Create fake data section symbols to allow diffing entire sections again

* Fix text sections not having their size zeroed out

* Update test snapshots

* Clean up code for inferring section symbol size

* Fix bug where PPC pool references weren't ignoring section symbols

* Update comment

* Always add unique section symbols for data sections

* Update test snapshots

* Remove unnecessary clone in format! call

* Auto-start mapping for unpaired data symbols
This commit is contained in:
LagoLunatic
2025-09-02 21:37:17 -04:00
committed by GitHub
parent 6fb4bb8855
commit 23009bf9a3
20 changed files with 687 additions and 221 deletions

View File

@@ -41,7 +41,13 @@ pub fn no_diff_code(
instruction_rows.push(InstructionDiffRow { ins_ref: Some(*i), ..Default::default() });
}
resolve_branches(&ops, &mut instruction_rows);
Ok(SymbolDiff { target_symbol: None, match_percent: None, diff_score: None, instruction_rows })
Ok(SymbolDiff {
target_symbol: None,
match_percent: None,
diff_score: None,
instruction_rows,
..Default::default()
})
}
const PENALTY_IMM_DIFF: u64 = 1;
@@ -147,12 +153,14 @@ pub fn diff_code(
match_percent: Some(match_percent),
diff_score: Some((diff_score, max_score)),
instruction_rows: left_rows,
..Default::default()
},
SymbolDiff {
target_symbol: Some(left_symbol_idx),
match_percent: Some(match_percent),
diff_score: Some((diff_score, max_score)),
instruction_rows: right_rows,
..Default::default()
},
))
}

View File

@@ -24,13 +24,13 @@ pub fn diff_bss_symbol(
target_symbol: Some(right_symbol_ref),
match_percent: Some(percent),
diff_score: None,
instruction_rows: vec![],
..Default::default()
},
SymbolDiff {
target_symbol: Some(left_symbol_ref),
match_percent: Some(percent),
diff_score: None,
instruction_rows: vec![],
..Default::default()
},
))
}
@@ -84,7 +84,83 @@ pub fn resolve_relocation<'obj>(
ResolvedRelocation { relocation: reloc, symbol }
}
/// Compares relocations contained with a certain data range.
/// Compares the bytes within a certain data range.
fn diff_data_range(left_data: &[u8], right_data: &[u8]) -> (f32, Vec<DataDiff>, Vec<DataDiff>) {
let ops = capture_diff_slices(Algorithm::Patience, left_data, right_data);
let bytes_match_ratio = get_diff_ratio(&ops, left_data.len(), right_data.len());
let mut left_data_diff = Vec::<DataDiff>::new();
let mut right_data_diff = Vec::<DataDiff>::new();
for op in ops {
let (tag, left_range, right_range) = op.as_tag_tuple();
let left_len = left_range.len();
let right_len = right_range.len();
let mut len = left_len.max(right_len);
let kind = match tag {
similar::DiffTag::Equal => DataDiffKind::None,
similar::DiffTag::Delete => DataDiffKind::Delete,
similar::DiffTag::Insert => DataDiffKind::Insert,
similar::DiffTag::Replace => {
// Ensure replacements are equal length
len = left_len.min(right_len);
DataDiffKind::Replace
}
};
let left_data = &left_data[left_range];
let right_data = &right_data[right_range];
left_data_diff.push(DataDiff {
data: left_data[..len.min(left_data.len())].to_vec(),
kind,
len,
..Default::default()
});
right_data_diff.push(DataDiff {
data: right_data[..len.min(right_data.len())].to_vec(),
kind,
len,
..Default::default()
});
if kind == DataDiffKind::Replace {
match left_len.cmp(&right_len) {
Ordering::Less => {
let len = right_len - left_len;
left_data_diff.push(DataDiff {
data: vec![],
kind: DataDiffKind::Insert,
len,
..Default::default()
});
right_data_diff.push(DataDiff {
data: right_data[left_len..right_len].to_vec(),
kind: DataDiffKind::Insert,
len,
..Default::default()
});
}
Ordering::Greater => {
let len = left_len - right_len;
left_data_diff.push(DataDiff {
data: left_data[right_len..left_len].to_vec(),
kind: DataDiffKind::Delete,
len,
..Default::default()
});
right_data_diff.push(DataDiff {
data: vec![],
kind: DataDiffKind::Delete,
len,
..Default::default()
});
}
Ordering::Equal => {}
}
}
}
(bytes_match_ratio, left_data_diff, right_data_diff)
}
/// Compares relocations contained within a certain data range.
fn diff_data_relocs_for_range<'left, 'right>(
left_obj: &'left Object,
right_obj: &'right Object,
@@ -186,76 +262,10 @@ pub fn diff_data_section(
.min(right_section.size);
let left_data = &left_section.data[..left_max as usize];
let right_data = &right_section.data[..right_max as usize];
let ops = capture_diff_slices(Algorithm::Patience, left_data, right_data);
let match_percent = get_diff_ratio(&ops, left_data.len(), right_data.len()) * 100.0;
let mut left_data_diff = Vec::<DataDiff>::new();
let mut right_data_diff = Vec::<DataDiff>::new();
for op in ops {
let (tag, left_range, right_range) = op.as_tag_tuple();
let left_len = left_range.len();
let right_len = right_range.len();
let mut len = left_len.max(right_len);
let kind = match tag {
similar::DiffTag::Equal => DataDiffKind::None,
similar::DiffTag::Delete => DataDiffKind::Delete,
similar::DiffTag::Insert => DataDiffKind::Insert,
similar::DiffTag::Replace => {
// Ensure replacements are equal length
len = left_len.min(right_len);
DataDiffKind::Replace
}
};
let left_data = &left_section.data[left_range];
let right_data = &right_section.data[right_range];
left_data_diff.push(DataDiff {
data: left_data[..len.min(left_data.len())].to_vec(),
kind,
len,
..Default::default()
});
right_data_diff.push(DataDiff {
data: right_data[..len.min(right_data.len())].to_vec(),
kind,
len,
..Default::default()
});
if kind == DataDiffKind::Replace {
match left_len.cmp(&right_len) {
Ordering::Less => {
let len = right_len - left_len;
left_data_diff.push(DataDiff {
data: vec![],
kind: DataDiffKind::Insert,
len,
..Default::default()
});
right_data_diff.push(DataDiff {
data: right_data[left_len..right_len].to_vec(),
kind: DataDiffKind::Insert,
len,
..Default::default()
});
}
Ordering::Greater => {
let len = left_len - right_len;
left_data_diff.push(DataDiff {
data: left_data[right_len..left_len].to_vec(),
kind: DataDiffKind::Delete,
len,
..Default::default()
});
right_data_diff.push(DataDiff {
data: vec![],
kind: DataDiffKind::Delete,
len,
..Default::default()
});
}
Ordering::Equal => {}
}
}
}
let (bytes_match_ratio, left_data_diff, right_data_diff) =
diff_data_range(left_data, right_data);
let match_percent = bytes_match_ratio * 100.0;
let mut left_reloc_diffs = Vec::new();
let mut right_reloc_diffs = Vec::new();
@@ -314,6 +324,55 @@ pub fn diff_data_section(
Ok((left_section_diff, right_section_diff))
}
pub fn no_diff_data_symbol(obj: &Object, symbol_index: usize) -> Result<SymbolDiff> {
let symbol = &obj.symbols[symbol_index];
let section_idx = symbol.section.ok_or_else(|| anyhow!("Data symbol section not found"))?;
let section = &obj.sections[section_idx];
let start = symbol
.address
.checked_sub(section.address)
.ok_or_else(|| anyhow!("Symbol address out of section bounds"))?;
let end = start + symbol.size;
if end > section.size {
return Err(anyhow!(
"Symbol {} size out of section bounds ({} > {})",
symbol.name,
end,
section.size
));
}
let range = start as usize..end as usize;
let data = &section.data[range.clone()];
let len = symbol.size as usize;
let data_diff =
vec![DataDiff { data: data.to_vec(), kind: DataDiffKind::None, len, ..Default::default() }];
let mut reloc_diffs = Vec::new();
for reloc in section.relocations.iter() {
if !range.contains(&(reloc.address as usize)) {
continue;
}
let reloc_len = obj.arch.data_reloc_size(reloc.flags);
let range = reloc.address as usize..reloc.address as usize + reloc_len;
reloc_diffs.push(DataRelocationDiff {
reloc: reloc.clone(),
kind: DataDiffKind::None,
range,
});
}
Ok(SymbolDiff {
target_symbol: None,
match_percent: None,
diff_score: None,
data_diff,
data_reloc_diff: reloc_diffs,
..Default::default()
})
}
pub fn diff_data_symbol(
left_obj: &Object,
right_obj: &Object,
@@ -362,6 +421,9 @@ pub fn diff_data_symbol(
let left_data = &left_section.data[left_range.clone()];
let right_data = &right_section.data[right_range.clone()];
let (bytes_match_ratio, left_data_diff, right_data_diff) =
diff_data_range(left_data, right_data);
let reloc_diffs = diff_data_relocs_for_range(
left_obj,
right_obj,
@@ -371,10 +433,9 @@ pub fn diff_data_symbol(
right_range,
);
let ops = capture_diff_slices(Algorithm::Patience, left_data, right_data);
let bytes_match_ratio = get_diff_ratio(&ops, left_data.len(), right_data.len());
let mut match_ratio = bytes_match_ratio;
let mut left_reloc_diffs = Vec::new();
let mut right_reloc_diffs = Vec::new();
if !reloc_diffs.is_empty() {
let mut total_reloc_bytes = 0;
let mut matching_reloc_bytes = 0;
@@ -390,6 +451,27 @@ pub fn diff_data_symbol(
if diff_kind == DataDiffKind::None {
matching_reloc_bytes += reloc_diff_len;
}
if let Some(left_reloc) = left_reloc {
let len = left_obj.arch.data_reloc_size(left_reloc.relocation.flags);
let range = left_reloc.relocation.address as usize
..left_reloc.relocation.address as usize + len;
left_reloc_diffs.push(DataRelocationDiff {
reloc: left_reloc.relocation.clone(),
kind: diff_kind,
range,
});
}
if let Some(right_reloc) = right_reloc {
let len = right_obj.arch.data_reloc_size(right_reloc.relocation.flags);
let range = right_reloc.relocation.address as usize
..right_reloc.relocation.address as usize + len;
right_reloc_diffs.push(DataRelocationDiff {
reloc: right_reloc.relocation.clone(),
kind: diff_kind,
range,
});
}
}
if total_reloc_bytes > 0 {
let relocs_match_ratio = matching_reloc_bytes as f32 / total_reloc_bytes as f32;
@@ -411,13 +493,17 @@ pub fn diff_data_symbol(
target_symbol: Some(right_symbol_idx),
match_percent: Some(match_percent),
diff_score: None,
instruction_rows: vec![],
data_diff: left_data_diff,
data_reloc_diff: left_reloc_diffs,
..Default::default()
},
SymbolDiff {
target_symbol: Some(left_symbol_idx),
match_percent: Some(match_percent),
diff_score: None,
instruction_rows: vec![],
data_diff: right_data_diff,
data_reloc_diff: right_reloc_diffs,
..Default::default()
},
))
}

View File

@@ -13,7 +13,8 @@ use crate::{
code::{diff_code, no_diff_code},
data::{
diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol,
diff_generic_section, no_diff_bss_section, no_diff_data_section, symbol_name_matches,
diff_generic_section, no_diff_bss_section, no_diff_data_section, no_diff_data_symbol,
symbol_name_matches,
},
},
obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag},
@@ -44,6 +45,8 @@ pub struct SymbolDiff {
pub match_percent: Option<f32>,
pub diff_score: Option<(u64, u64)>,
pub instruction_rows: Vec<InstructionDiffRow>,
pub data_diff: Vec<DataDiff>,
pub data_reloc_diff: Vec<DataRelocationDiff>,
}
#[derive(Debug, Clone, Default)]
@@ -163,7 +166,7 @@ impl ObjectDiff {
target_symbol: None,
match_percent: None,
diff_score: None,
instruction_rows: vec![],
..Default::default()
});
}
for _ in obj.sections.iter() {
@@ -262,7 +265,11 @@ pub fn diff_objs(
left_out.symbols[left_symbol_ref] =
no_diff_code(left_obj, left_symbol_ref, diff_config)?;
}
SectionKind::Data | SectionKind::Bss | SectionKind::Common => {
SectionKind::Data => {
left_out.symbols[left_symbol_ref] =
no_diff_data_symbol(left_obj, left_symbol_ref)?;
}
SectionKind::Bss | SectionKind::Common => {
// Nothing needs to be done
}
SectionKind::Unknown => unreachable!(),
@@ -275,7 +282,11 @@ pub fn diff_objs(
right_out.symbols[right_symbol_ref] =
no_diff_code(right_obj, right_symbol_ref, diff_config)?;
}
SectionKind::Data | SectionKind::Bss | SectionKind::Common => {
SectionKind::Data => {
right_out.symbols[right_symbol_ref] =
no_diff_data_symbol(right_obj, right_symbol_ref)?;
}
SectionKind::Bss | SectionKind::Common => {
// Nothing needs to be done
}
SectionKind::Unknown => unreachable!(),