Refactor data diffing & expose WASM API (#256)

* Refactor data diffing & expose WASM API

* Update test snapshots
This commit is contained in:
2025-09-07 18:59:46 -06:00
committed by GitHub
parent f7cb494a62
commit fbdaa89cc0
14 changed files with 444 additions and 255 deletions

View File

@@ -5,7 +5,7 @@ use anyhow::{Result, anyhow};
use similar::{Algorithm, capture_diff_slices, get_diff_ratio};
use super::{
DataDiff, DataDiffKind, DataRelocationDiff, ObjectDiff, SectionDiff, SymbolDiff,
DataDiff, DataDiffKind, DataDiffRow, DataRelocationDiff, ObjectDiff, SectionDiff, SymbolDiff,
code::{address_eq, section_name_eq},
};
use crate::obj::{Object, Relocation, ResolvedRelocation, Symbol, SymbolFlag, SymbolKind};
@@ -111,14 +111,12 @@ fn diff_data_range(left_data: &[u8], right_data: &[u8]) -> (f32, Vec<DataDiff>,
left_data_diff.push(DataDiff {
data: left_data[..len.min(left_data.len())].to_vec(),
kind,
len,
..Default::default()
size: len,
});
right_data_diff.push(DataDiff {
data: right_data[..len.min(right_data.len())].to_vec(),
kind,
len,
..Default::default()
size: len,
});
if kind == DataDiffKind::Replace {
match left_len.cmp(&right_len) {
@@ -127,14 +125,12 @@ fn diff_data_range(left_data: &[u8], right_data: &[u8]) -> (f32, Vec<DataDiff>,
left_data_diff.push(DataDiff {
data: vec![],
kind: DataDiffKind::Insert,
len,
..Default::default()
size: len,
});
right_data_diff.push(DataDiff {
data: right_data[left_len..right_len].to_vec(),
kind: DataDiffKind::Insert,
len,
..Default::default()
size: len,
});
}
Ordering::Greater => {
@@ -142,14 +138,12 @@ fn diff_data_range(left_data: &[u8], right_data: &[u8]) -> (f32, Vec<DataDiff>,
left_data_diff.push(DataDiff {
data: left_data[right_len..left_len].to_vec(),
kind: DataDiffKind::Delete,
len,
..Default::default()
size: len,
});
right_data_diff.push(DataDiff {
data: vec![],
kind: DataDiffKind::Delete,
len,
..Default::default()
size: len,
});
}
Ordering::Equal => {}
@@ -219,16 +213,17 @@ fn diff_data_relocs_for_range<'left, 'right>(
pub fn no_diff_data_section(obj: &Object, section_idx: usize) -> Result<SectionDiff> {
let section = &obj.sections[section_idx];
let len = section.data.len();
let data = &section.data[0..len];
let data_diff =
vec![DataDiff { data: data.to_vec(), kind: DataDiffKind::None, len, ..Default::default() }];
let data_diff = vec![DataDiff {
data: section.data.0.clone(),
kind: DataDiffKind::None,
size: section.data.len(),
}];
let mut reloc_diffs = Vec::new();
for reloc in section.relocations.iter() {
let reloc_len = obj.arch.data_reloc_size(reloc.flags);
let range = reloc.address as usize..reloc.address as usize + reloc_len;
let range = reloc.address..reloc.address + reloc_len as u64;
reloc_diffs.push(DataRelocationDiff {
reloc: reloc.clone(),
kind: DataDiffKind::None,
@@ -279,8 +274,7 @@ pub fn diff_data_section(
) {
if let Some(left_reloc) = left_reloc {
let len = left_obj.arch.data_reloc_size(left_reloc.relocation.flags);
let range = left_reloc.relocation.address as usize
..left_reloc.relocation.address as usize + len;
let range = left_reloc.relocation.address..left_reloc.relocation.address + len as u64;
left_reloc_diffs.push(DataRelocationDiff {
reloc: left_reloc.relocation.clone(),
kind: diff_kind,
@@ -289,8 +283,7 @@ pub fn diff_data_section(
}
if let Some(right_reloc) = right_reloc {
let len = right_obj.arch.data_reloc_size(right_reloc.relocation.flags);
let range = right_reloc.relocation.address as usize
..right_reloc.relocation.address as usize + len;
let range = right_reloc.relocation.address..right_reloc.relocation.address + len as u64;
right_reloc_diffs.push(DataRelocationDiff {
reloc: right_reloc.relocation.clone(),
kind: diff_kind,
@@ -345,9 +338,11 @@ pub fn no_diff_data_symbol(obj: &Object, symbol_index: usize) -> Result<SymbolDi
let range = start as usize..end as usize;
let data = &section.data[range.clone()];
let len = symbol.size as usize;
let data_diff =
vec![DataDiff { data: data.to_vec(), kind: DataDiffKind::None, len, ..Default::default() }];
let data_diff = vec![DataDiff {
data: data.to_vec(),
kind: DataDiffKind::None,
size: symbol.size as usize,
}];
let mut reloc_diffs = Vec::new();
for reloc in section.relocations.iter() {
@@ -355,7 +350,7 @@ pub fn no_diff_data_symbol(obj: &Object, symbol_index: usize) -> Result<SymbolDi
continue;
}
let reloc_len = obj.arch.data_reloc_size(reloc.flags);
let range = reloc.address as usize..reloc.address as usize + reloc_len;
let range = reloc.address..reloc.address + reloc_len as u64;
reloc_diffs.push(DataRelocationDiff {
reloc: reloc.clone(),
kind: DataDiffKind::None,
@@ -363,12 +358,12 @@ pub fn no_diff_data_symbol(obj: &Object, symbol_index: usize) -> Result<SymbolDi
});
}
let data_rows = build_data_diff_rows(&data_diff, &reloc_diffs, symbol.address);
Ok(SymbolDiff {
target_symbol: None,
match_percent: None,
diff_score: None,
data_diff,
data_reloc_diff: reloc_diffs,
data_rows,
..Default::default()
})
}
@@ -454,8 +449,8 @@ pub fn diff_data_symbol(
if let Some(left_reloc) = left_reloc {
let len = left_obj.arch.data_reloc_size(left_reloc.relocation.flags);
let range = left_reloc.relocation.address as usize
..left_reloc.relocation.address as usize + len;
let range =
left_reloc.relocation.address..left_reloc.relocation.address + len as u64;
left_reloc_diffs.push(DataRelocationDiff {
reloc: left_reloc.relocation.clone(),
kind: diff_kind,
@@ -464,8 +459,8 @@ pub fn diff_data_symbol(
}
if let Some(right_reloc) = right_reloc {
let len = right_obj.arch.data_reloc_size(right_reloc.relocation.flags);
let range = right_reloc.relocation.address as usize
..right_reloc.relocation.address as usize + len;
let range =
right_reloc.relocation.address..right_reloc.relocation.address + len as u64;
right_reloc_diffs.push(DataRelocationDiff {
reloc: right_reloc.relocation.clone(),
kind: diff_kind,
@@ -486,23 +481,29 @@ pub fn diff_data_symbol(
}
}
left_reloc_diffs
.sort_by(|a, b| a.range.start.cmp(&b.range.start).then(a.range.end.cmp(&b.range.end)));
right_reloc_diffs
.sort_by(|a, b| a.range.start.cmp(&b.range.start).then(a.range.end.cmp(&b.range.end)));
let match_percent = match_ratio * 100.0;
let left_rows = build_data_diff_rows(&left_data_diff, &left_reloc_diffs, left_symbol.address);
let right_rows =
build_data_diff_rows(&right_data_diff, &right_reloc_diffs, right_symbol.address);
Ok((
SymbolDiff {
target_symbol: Some(right_symbol_idx),
match_percent: Some(match_percent),
diff_score: None,
data_diff: left_data_diff,
data_reloc_diff: left_reloc_diffs,
data_rows: left_rows,
..Default::default()
},
SymbolDiff {
target_symbol: Some(left_symbol_idx),
match_percent: Some(match_percent),
diff_score: None,
data_diff: right_data_diff,
data_reloc_diff: right_reloc_diffs,
data_rows: right_rows,
..Default::default()
},
))
@@ -593,3 +594,68 @@ fn symbols_matching_section(
&& !s.flags.contains(SymbolFlag::Ignored)
})
}
pub const BYTES_PER_ROW: usize = 16;
fn build_data_diff_row(
data_diffs: &[DataDiff],
reloc_diffs: &[DataRelocationDiff],
symbol_address: u64,
row_index: usize,
) -> DataDiffRow {
let row_start = row_index * BYTES_PER_ROW;
let row_end = row_start + BYTES_PER_ROW;
let mut row_diff = DataDiffRow {
address: symbol_address + row_start as u64,
segments: Vec::new(),
relocations: Vec::new(),
};
// Collect all segments that overlap with this row
let mut current_offset = 0;
for diff in data_diffs {
let diff_end = current_offset + diff.size;
if current_offset < row_end && diff_end > row_start {
let start_in_diff = row_start.saturating_sub(current_offset);
let end_in_diff = row_end.min(diff_end) - current_offset;
if start_in_diff < end_in_diff {
let data_slice = if diff.data.is_empty() {
Vec::new()
} else {
diff.data[start_in_diff..end_in_diff.min(diff.data.len())].to_vec()
};
row_diff.segments.push(DataDiff {
data: data_slice,
kind: diff.kind,
size: end_in_diff - start_in_diff,
});
}
}
current_offset = diff_end;
if current_offset >= row_start + BYTES_PER_ROW {
break;
}
}
// Collect all relocations that overlap with this row
let row_end_absolute = row_diff.address + BYTES_PER_ROW as u64;
row_diff.relocations = reloc_diffs
.iter()
.filter(|rd| rd.range.start < row_end_absolute && rd.range.end > row_diff.address)
.cloned()
.collect();
row_diff
}
fn build_data_diff_rows(
segments: &[DataDiff],
relocations: &[DataRelocationDiff],
symbol_address: u64,
) -> Vec<DataDiffRow> {
let total_len = segments.iter().map(|s| s.size as u64).sum::<u64>();
let num_rows = total_len.div_ceil(BYTES_PER_ROW as u64) as usize;
(0..num_rows)
.map(|row_index| build_data_diff_row(segments, relocations, symbol_address, row_index))
.collect()
}

View File

@@ -12,7 +12,10 @@ use itertools::Itertools;
use regex::Regex;
use crate::{
diff::{DiffObjConfig, InstructionDiffKind, InstructionDiffRow, ObjectDiff, SymbolDiff},
diff::{
DataDiffKind, DataDiffRow, DiffObjConfig, InstructionDiffKind, InstructionDiffRow,
ObjectDiff, SymbolDiff, data::resolve_relocation,
},
obj::{
FlowAnalysisValue, InstructionArg, InstructionArgValue, Object, ParsedInstruction,
ResolvedInstructionRef, ResolvedRelocation, SectionFlag, SectionKind, Symbol, SymbolFlag,
@@ -494,6 +497,57 @@ pub fn relocation_context(
out
}
pub fn data_row_hover(obj: &Object, diff_row: &DataDiffRow) -> Vec<HoverItem> {
let mut out = Vec::new();
let mut prev_reloc = None;
let mut first = true;
for reloc_diff in diff_row.relocations.iter() {
let reloc = &reloc_diff.reloc;
if prev_reloc == Some(reloc) {
// Avoid showing consecutive duplicate relocations.
// We do this because a single relocation can span across multiple diffs if the
// bytes in the relocation changed (e.g. first byte is added, second is unchanged).
continue;
}
prev_reloc = Some(reloc);
if first {
first = false;
} else {
out.push(HoverItem::Separator);
}
let reloc = resolve_relocation(&obj.symbols, reloc);
let color = match reloc_diff.kind {
DataDiffKind::None => HoverItemColor::Normal,
DataDiffKind::Replace => HoverItemColor::Special,
DataDiffKind::Delete => HoverItemColor::Delete,
DataDiffKind::Insert => HoverItemColor::Insert,
};
out.append(&mut relocation_hover(obj, reloc, Some(color)));
}
out
}
pub fn data_row_context(obj: &Object, diff_row: &DataDiffRow) -> Vec<ContextItem> {
let mut out = Vec::new();
let mut prev_reloc = None;
for reloc_diff in diff_row.relocations.iter() {
let reloc = &reloc_diff.reloc;
if prev_reloc == Some(reloc) {
// Avoid showing consecutive duplicate relocations.
// We do this because a single relocation can span across multiple diffs if the
// bytes in the relocation changed (e.g. first byte is added, second is unchanged).
continue;
}
prev_reloc = Some(reloc);
let reloc = resolve_relocation(&obj.symbols, reloc);
out.append(&mut relocation_context(obj, reloc, None));
}
out
}
pub fn relocation_hover(
obj: &Object,
reloc: ResolvedRelocation,
@@ -677,6 +731,7 @@ pub struct SectionDisplay {
pub size: u64,
pub match_percent: Option<f32>,
pub symbols: Vec<SectionDisplaySymbol>,
pub kind: SectionKind,
}
pub fn display_sections(
@@ -755,6 +810,7 @@ pub fn display_sections(
size: section.size,
match_percent: section_diff.match_percent,
symbols,
kind: section.kind,
});
} else {
// Don't sort, preserve order of absolute symbols
@@ -764,6 +820,7 @@ pub fn display_sections(
size: 0,
match_percent: None,
symbols,
kind: SectionKind::Common,
});
}
}

View File

@@ -45,8 +45,7 @@ pub struct SymbolDiff {
pub match_percent: Option<f32>,
pub diff_score: Option<(u64, u64)>,
pub instruction_rows: Vec<InstructionDiffRow>,
pub data_diff: Vec<DataDiff>,
pub data_reloc_diff: Vec<DataRelocationDiff>,
pub data_rows: Vec<DataDiffRow>,
}
#[derive(Debug, Clone, Default)]
@@ -83,16 +82,15 @@ pub enum InstructionDiffKind {
#[derive(Debug, Clone, Default)]
pub struct DataDiff {
pub data: Vec<u8>,
pub size: usize,
pub kind: DataDiffKind,
pub len: usize,
pub symbol: String,
}
#[derive(Debug, Clone)]
pub struct DataRelocationDiff {
pub reloc: Relocation,
pub range: Range<u64>,
pub kind: DataDiffKind,
pub range: Range<usize>,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
@@ -104,6 +102,13 @@ pub enum DataDiffKind {
Insert,
}
#[derive(Debug, Clone, Default)]
pub struct DataDiffRow {
pub address: u64,
pub segments: Vec<DataDiff>,
pub relocations: Vec<DataRelocationDiff>,
}
/// Index of the argument diff for coloring.
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Default)]

View File

@@ -2645,8 +2645,7 @@ expression: "(target_symbol_diff, base_symbol_diff)"
arg_diff: [],
},
],
data_diff: [],
data_reloc_diff: [],
data_rows: [],
},
SymbolDiff {
target_symbol: Some(
@@ -5290,7 +5289,6 @@ expression: "(target_symbol_diff, base_symbol_diff)"
arg_diff: [],
},
],
data_diff: [],
data_reloc_diff: [],
data_rows: [],
},
)

View File

@@ -26,6 +26,7 @@ expression: sections_display
is_mapping_symbol: false,
},
],
kind: Common,
},
SectionDisplay {
id: ".ctors-0",
@@ -40,6 +41,7 @@ expression: sections_display
is_mapping_symbol: false,
},
],
kind: Data,
},
SectionDisplay {
id: ".text-0",
@@ -82,5 +84,6 @@ expression: sections_display
is_mapping_symbol: false,
},
],
kind: Code,
},
]

View File

@@ -14,6 +14,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-1",
@@ -26,6 +27,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-2",
@@ -38,6 +40,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-3",
@@ -50,6 +53,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-4",
@@ -62,6 +66,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-5",
@@ -74,6 +79,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-6",
@@ -86,6 +92,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-7",
@@ -98,6 +105,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-8",
@@ -110,6 +118,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-9",
@@ -122,6 +131,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-10",
@@ -134,6 +144,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-11",
@@ -146,6 +157,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-12",
@@ -158,6 +170,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-13",
@@ -170,6 +183,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-14",
@@ -182,6 +196,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-15",
@@ -194,6 +209,7 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
SectionDisplay {
id: ".text-16",
@@ -206,5 +222,6 @@ expression: section_display
is_mapping_symbol: false,
},
],
kind: Code,
},
]