From dc9eec66b0ffe06163d4d38b65e9de63a144e5ed Mon Sep 17 00:00:00 2001 From: Luke Street Date: Tue, 21 Nov 2023 11:48:18 -0500 Subject: [PATCH] Configurable diff algorithms & new default algorithm Uses the similar crate to support new diff algorithms: - Patience (new default) - Levenshtein (old default) - Myers - LCS (Longest Common Subsequence) Options in "Diff Options" -> "Algorithm..." --- Cargo.lock | 7 + Cargo.toml | 1 + src/app.rs | 18 +- src/diff.rs | 735 -------------------------------------- src/diff/code.rs | 478 +++++++++++++++++++++++++ src/diff/data.rs | 406 +++++++++++++++++++++ src/{ => diff}/editops.rs | 0 src/diff/mod.rs | 114 ++++++ src/jobs/objdiff.rs | 9 +- src/lib.rs | 1 - src/obj/elf.rs | 2 +- src/obj/mips.rs | 9 +- src/obj/ppc.rs | 9 +- src/views/config.rs | 70 +++- 14 files changed, 1112 insertions(+), 747 deletions(-) delete mode 100644 src/diff.rs create mode 100644 src/diff/code.rs create mode 100644 src/diff/data.rs rename src/{ => diff}/editops.rs (100%) create mode 100644 src/diff/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 26268a2..dfb27ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2558,6 +2558,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", + "similar", "tempfile", "thiserror", "time", @@ -3362,6 +3363,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "similar" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aeaf503862c419d66959f5d7ca015337d864e9c49485d771b732e2a20453597" + [[package]] name = "slab" version = "0.4.8" diff --git a/Cargo.toml b/Cargo.toml index c77910b..e328a58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ semver = "1.0.19" serde = { version = "1", features = ["derive"] } serde_json = "1.0.107" serde_yaml = "0.9.25" +similar = "2.3.0" tempfile = "3.8.0" thiserror = "1.0.49" time = { version = "0.3.29", features = ["formatting", "local-offset"] } diff --git a/src/app.rs b/src/app.rs index aa1a4f6..b06f3bc 100644 --- a/src/app.rs +++ b/src/app.rs @@ -18,13 +18,16 @@ use time::UtcOffset; use crate::{ app_config::{deserialize_config, AppConfigVersion}, config::{build_globset, load_project_config, ProjectObject, ProjectObjectNode}, + diff::DiffAlg, jobs::{ objdiff::{start_build, ObjDiffConfig}, Job, JobQueue, JobResult, JobStatus, }, views::{ appearance::{appearance_window, Appearance}, - config::{config_ui, project_window, ConfigViewState, DEFAULT_WATCH_PATTERNS}, + config::{ + config_ui, diff_options_window, project_window, ConfigViewState, DEFAULT_WATCH_PATTERNS, + }, data_diff::data_diff_ui, demangle::{demangle_window, DemangleViewState}, function_diff::function_diff_ui, @@ -42,6 +45,7 @@ pub struct ViewState { pub show_appearance_config: bool, pub show_demangle: bool, pub show_project_config: bool, + pub show_diff_options: bool, } /// The configuration for a single object file. @@ -98,6 +102,10 @@ pub struct AppConfig { pub watch_patterns: Vec, #[serde(default)] pub recent_projects: Vec, + #[serde(default)] + pub code_alg: DiffAlg, + #[serde(default)] + pub data_alg: DiffAlg, #[serde(skip)] pub objects: Vec, @@ -133,6 +141,8 @@ impl Default for AppConfig { auto_update_check: true, watch_patterns: DEFAULT_WATCH_PATTERNS.iter().map(|s| Glob::new(s).unwrap()).collect(), recent_projects: vec![], + code_alg: Default::default(), + data_alg: Default::default(), objects: vec![], object_nodes: vec![], watcher_change: false, @@ -398,6 +408,7 @@ impl eframe::App for App { diff_state, config_state, show_project_config, + show_diff_options, } = view_state; egui::TopBottomPanel::top("top_panel").show(ctx, |ui| { @@ -443,6 +454,10 @@ impl eframe::App for App { } }); ui.menu_button("Diff Options", |ui| { + if ui.button("Algorithm…").clicked() { + *show_diff_options = !*show_diff_options; + ui.close_menu(); + } let mut config = config.write().unwrap(); let response = ui .checkbox(&mut config.rebuild_on_changes, "Rebuild on changes") @@ -493,6 +508,7 @@ impl eframe::App for App { project_window(ctx, config, show_project_config, config_state, appearance); appearance_window(ctx, show_appearance_config, appearance); demangle_window(ctx, show_demangle, demangle_state, appearance); + diff_options_window(ctx, config, show_diff_options, appearance); self.post_update(); diff --git a/src/diff.rs b/src/diff.rs deleted file mode 100644 index 718b989..0000000 --- a/src/diff.rs +++ /dev/null @@ -1,735 +0,0 @@ -use std::{collections::BTreeMap, mem::take}; - -use anyhow::Result; - -use crate::{ - editops::{editops_find, LevEditType}, - obj::{ - mips, ppc, ObjArchitecture, ObjDataDiff, ObjDataDiffKind, ObjInfo, ObjInsArg, - ObjInsArgDiff, ObjInsBranchFrom, ObjInsBranchTo, ObjInsDiff, ObjInsDiffKind, ObjReloc, - ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlags, - }, -}; - -fn no_diff_code( - arch: ObjArchitecture, - data: &[u8], - symbol: &mut ObjSymbol, - relocs: &[ObjReloc], - line_info: &Option>, -) -> Result<()> { - let code = - &data[symbol.section_address as usize..(symbol.section_address + symbol.size) as usize]; - let (_, ins) = match arch { - ObjArchitecture::PowerPc => ppc::process_code(code, symbol.address, relocs, line_info)?, - ObjArchitecture::Mips => mips::process_code( - code, - symbol.address, - symbol.address + symbol.size, - relocs, - line_info, - )?, - }; - - let mut diff = Vec::::new(); - for i in ins { - diff.push(ObjInsDiff { ins: Some(i), kind: ObjInsDiffKind::None, ..Default::default() }); - } - resolve_branches(&mut diff); - symbol.instructions = diff; - Ok(()) -} - -#[allow(clippy::too_many_arguments)] -pub fn diff_code( - arch: ObjArchitecture, - left_data: &[u8], - right_data: &[u8], - left_symbol: &mut ObjSymbol, - right_symbol: &mut ObjSymbol, - left_relocs: &[ObjReloc], - right_relocs: &[ObjReloc], - left_line_info: &Option>, - right_line_info: &Option>, -) -> Result<()> { - let left_code = &left_data[left_symbol.section_address as usize - ..(left_symbol.section_address + left_symbol.size) as usize]; - let right_code = &right_data[right_symbol.section_address as usize - ..(right_symbol.section_address + right_symbol.size) as usize]; - let ((left_ops, left_insts), (right_ops, right_insts)) = match arch { - ObjArchitecture::PowerPc => ( - ppc::process_code(left_code, left_symbol.address, left_relocs, left_line_info)?, - ppc::process_code(right_code, right_symbol.address, right_relocs, right_line_info)?, - ), - ObjArchitecture::Mips => ( - mips::process_code( - left_code, - left_symbol.address, - left_symbol.address + left_symbol.size, - left_relocs, - left_line_info, - )?, - mips::process_code( - right_code, - right_symbol.address, - left_symbol.address + left_symbol.size, - right_relocs, - right_line_info, - )?, - ), - }; - - let mut left_diff = Vec::::new(); - let mut right_diff = Vec::::new(); - let edit_ops = editops_find(&left_ops, &right_ops); - - { - let mut op_iter = edit_ops.iter(); - let mut left_iter = left_insts.iter(); - let mut right_iter = right_insts.iter(); - let mut cur_op = op_iter.next(); - let mut cur_left = left_iter.next(); - let mut cur_right = right_iter.next(); - while let Some(op) = cur_op { - let left_addr = op.first_start as u32 * 4; - let right_addr = op.second_start as u32 * 4; - while let (Some(left), Some(right)) = (cur_left, cur_right) { - if (left.address - left_symbol.address as u32) < left_addr { - left_diff.push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() }); - right_diff - .push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() }); - } else { - break; - } - cur_left = left_iter.next(); - cur_right = right_iter.next(); - } - if let (Some(left), Some(right)) = (cur_left, cur_right) { - if (left.address - left_symbol.address as u32) != left_addr { - return Err(anyhow::Error::msg("Instruction address mismatch (left)")); - } - if (right.address - right_symbol.address as u32) != right_addr { - return Err(anyhow::Error::msg("Instruction address mismatch (right)")); - } - match op.op_type { - LevEditType::Replace => { - left_diff - .push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() }); - right_diff - .push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() }); - cur_left = left_iter.next(); - cur_right = right_iter.next(); - } - LevEditType::Insert => { - left_diff.push(ObjInsDiff::default()); - right_diff - .push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() }); - cur_right = right_iter.next(); - } - LevEditType::Delete => { - left_diff - .push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() }); - right_diff.push(ObjInsDiff::default()); - cur_left = left_iter.next(); - } - } - } else { - break; - } - cur_op = op_iter.next(); - } - // Finalize - while cur_left.is_some() || cur_right.is_some() { - left_diff.push(ObjInsDiff { ins: cur_left.cloned(), ..ObjInsDiff::default() }); - right_diff.push(ObjInsDiff { ins: cur_right.cloned(), ..ObjInsDiff::default() }); - cur_left = left_iter.next(); - cur_right = right_iter.next(); - } - } - - resolve_branches(&mut left_diff); - resolve_branches(&mut right_diff); - - let mut diff_state = InsDiffState::default(); - for (left, right) in left_diff.iter_mut().zip(right_diff.iter_mut()) { - let result = compare_ins(left, right, &mut diff_state)?; - left.kind = result.kind; - right.kind = result.kind; - left.arg_diff = result.left_args_diff; - right.arg_diff = result.right_args_diff; - } - - let total = left_insts.len(); - let percent = if diff_state.diff_count >= total { - 0.0 - } else { - ((total - diff_state.diff_count) as f32 / total as f32) * 100.0 - }; - left_symbol.match_percent = Some(percent); - right_symbol.match_percent = Some(percent); - - left_symbol.instructions = left_diff; - right_symbol.instructions = right_diff; - - Ok(()) -} - -fn resolve_branches(vec: &mut [ObjInsDiff]) { - let mut branch_idx = 0usize; - // Map addresses to indices - let mut addr_map = BTreeMap::::new(); - for (i, ins_diff) in vec.iter().enumerate() { - if let Some(ins) = &ins_diff.ins { - addr_map.insert(ins.address, i); - } - } - // Generate branches - let mut branches = BTreeMap::::new(); - for (i, ins_diff) in vec.iter_mut().enumerate() { - if let Some(ins) = &ins_diff.ins { - // if ins.ins.is_blr() || ins.reloc.is_some() { - // continue; - // } - if let Some(ins_idx) = ins - .args - .iter() - .find_map(|a| if let ObjInsArg::BranchOffset(offs) = a { Some(offs) } else { None }) - .and_then(|offs| addr_map.get(&((ins.address as i32 + offs) as u32))) - { - if let Some(branch) = branches.get_mut(ins_idx) { - ins_diff.branch_to = - Some(ObjInsBranchTo { ins_idx: *ins_idx, branch_idx: branch.branch_idx }); - branch.ins_idx.push(i); - } else { - ins_diff.branch_to = Some(ObjInsBranchTo { ins_idx: *ins_idx, branch_idx }); - branches.insert(*ins_idx, ObjInsBranchFrom { ins_idx: vec![i], branch_idx }); - branch_idx += 1; - } - } - } - } - // Store branch from - for (i, branch) in branches { - vec[i].branch_from = Some(branch); - } -} - -fn address_eq(left: &ObjSymbol, right: &ObjSymbol) -> bool { - left.address as i64 + left.addend == right.address as i64 + right.addend -} - -fn reloc_eq(left_reloc: Option<&ObjReloc>, right_reloc: Option<&ObjReloc>) -> bool { - let (Some(left), Some(right)) = (left_reloc, right_reloc) else { - return false; - }; - if left.kind != right.kind { - return false; - } - - let name_matches = left.target.name == right.target.name; - match (&left.target_section, &right.target_section) { - (Some(sl), Some(sr)) => { - // Match if section and name or address match - sl == sr && (name_matches || address_eq(&left.target, &right.target)) - } - (Some(_), None) => false, - (None, Some(_)) => { - // Match if possibly stripped weak symbol - name_matches && right.target.flags.0.contains(ObjSymbolFlags::Weak) - } - (None, None) => name_matches, - } -} - -fn arg_eq( - left: &ObjInsArg, - right: &ObjInsArg, - left_diff: &ObjInsDiff, - right_diff: &ObjInsDiff, -) -> bool { - return match left { - ObjInsArg::PpcArg(l) => match right { - ObjInsArg::PpcArg(r) => format!("{l}") == format!("{r}"), - _ => false, - }, - ObjInsArg::Reloc => { - matches!(right, ObjInsArg::Reloc) - && reloc_eq( - left_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()), - right_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()), - ) - } - ObjInsArg::RelocWithBase => { - matches!(right, ObjInsArg::RelocWithBase) - && reloc_eq( - left_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()), - right_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()), - ) - } - ObjInsArg::MipsArg(ls) | ObjInsArg::MipsArgWithBase(ls) => { - matches!(right, ObjInsArg::MipsArg(rs) | ObjInsArg::MipsArgWithBase(rs) if ls == rs) - } - ObjInsArg::BranchOffset(_) => { - // Compare dest instruction idx after diffing - left_diff.branch_to.as_ref().map(|b| b.ins_idx) - == right_diff.branch_to.as_ref().map(|b| b.ins_idx) - } - }; -} - -#[derive(Default)] -struct InsDiffState { - diff_count: usize, - left_arg_idx: usize, - right_arg_idx: usize, - left_args_idx: BTreeMap, - right_args_idx: BTreeMap, -} -#[derive(Default)] -struct InsDiffResult { - kind: ObjInsDiffKind, - left_args_diff: Vec>, - right_args_diff: Vec>, -} - -fn compare_ins( - left: &ObjInsDiff, - right: &ObjInsDiff, - state: &mut InsDiffState, -) -> Result { - let mut result = InsDiffResult::default(); - if let (Some(left_ins), Some(right_ins)) = (&left.ins, &right.ins) { - if left_ins.args.len() != right_ins.args.len() || left_ins.op != right_ins.op { - // Totally different op - result.kind = ObjInsDiffKind::Replace; - state.diff_count += 1; - return Ok(result); - } - if left_ins.mnemonic != right_ins.mnemonic { - // Same op but different mnemonic, still cmp args - result.kind = ObjInsDiffKind::OpMismatch; - state.diff_count += 1; - } - for (a, b) in left_ins.args.iter().zip(&right_ins.args) { - if arg_eq(a, b, left, right) { - result.left_args_diff.push(None); - result.right_args_diff.push(None); - } else { - if result.kind == ObjInsDiffKind::None { - result.kind = ObjInsDiffKind::ArgMismatch; - state.diff_count += 1; - } - let a_str = match a { - ObjInsArg::PpcArg(arg) => format!("{arg}"), - ObjInsArg::Reloc | ObjInsArg::RelocWithBase => String::new(), - ObjInsArg::MipsArg(str) | ObjInsArg::MipsArgWithBase(str) => str.clone(), - ObjInsArg::BranchOffset(arg) => format!("{arg}"), - }; - let a_diff = if let Some(idx) = state.left_args_idx.get(&a_str) { - ObjInsArgDiff { idx: *idx } - } else { - let idx = state.left_arg_idx; - state.left_args_idx.insert(a_str, idx); - state.left_arg_idx += 1; - ObjInsArgDiff { idx } - }; - let b_str = match b { - ObjInsArg::PpcArg(arg) => format!("{arg}"), - ObjInsArg::Reloc | ObjInsArg::RelocWithBase => String::new(), - ObjInsArg::MipsArg(str) | ObjInsArg::MipsArgWithBase(str) => str.clone(), - ObjInsArg::BranchOffset(arg) => format!("{arg}"), - }; - let b_diff = if let Some(idx) = state.right_args_idx.get(&b_str) { - ObjInsArgDiff { idx: *idx } - } else { - let idx = state.right_arg_idx; - state.right_args_idx.insert(b_str, idx); - state.right_arg_idx += 1; - ObjInsArgDiff { idx } - }; - result.left_args_diff.push(Some(a_diff)); - result.right_args_diff.push(Some(b_diff)); - } - } - } else if left.ins.is_some() { - result.kind = ObjInsDiffKind::Delete; - state.diff_count += 1; - } else { - result.kind = ObjInsDiffKind::Insert; - state.diff_count += 1; - } - Ok(result) -} - -fn find_section_and_symbol(obj: &ObjInfo, name: &str) -> Option<(usize, usize)> { - for (section_idx, section) in obj.sections.iter().enumerate() { - let symbol_idx = match section.symbols.iter().position(|symbol| symbol.name == name) { - Some(symbol_idx) => symbol_idx, - None => continue, - }; - return Some((section_idx, symbol_idx)); - } - None -} - -pub fn diff_objs(mut left: Option<&mut ObjInfo>, mut right: Option<&mut ObjInfo>) -> Result<()> { - if let Some(left) = left.as_mut() { - for left_section in &mut left.sections { - if left_section.kind == ObjSectionKind::Code { - for left_symbol in &mut left_section.symbols { - if let Some((right, (right_section_idx, right_symbol_idx))) = - right.as_mut().and_then(|obj| { - find_section_and_symbol(obj, &left_symbol.name).map(|s| (obj, s)) - }) - { - let right_section = &mut right.sections[right_section_idx]; - let right_symbol = &mut right_section.symbols[right_symbol_idx]; - left_symbol.diff_symbol = Some(right_symbol.name.clone()); - right_symbol.diff_symbol = Some(left_symbol.name.clone()); - diff_code( - left.architecture, - &left_section.data, - &right_section.data, - left_symbol, - right_symbol, - &left_section.relocations, - &right_section.relocations, - &left.line_info, - &right.line_info, - )?; - } else { - no_diff_code( - left.architecture, - &left_section.data, - left_symbol, - &left_section.relocations, - &left.line_info, - )?; - } - } - } else if let Some(right_section) = right - .as_mut() - .and_then(|obj| obj.sections.iter_mut().find(|s| s.name == left_section.name)) - { - if left_section.kind == ObjSectionKind::Data { - diff_data(left_section, right_section); - // diff_data_symbols(left_section, right_section)?; - } else if left_section.kind == ObjSectionKind::Bss { - diff_bss_symbols(&mut left_section.symbols, &mut right_section.symbols)?; - } - } else if left_section.kind == ObjSectionKind::Data { - no_diff_data(left_section); - } - } - } - if let Some(right) = right.as_mut() { - for right_section in right.sections.iter_mut() { - if right_section.kind == ObjSectionKind::Code { - for right_symbol in &mut right_section.symbols { - if right_symbol.instructions.is_empty() { - no_diff_code( - right.architecture, - &right_section.data, - right_symbol, - &right_section.relocations, - &right.line_info, - )?; - } - } - } else if right_section.kind == ObjSectionKind::Data - && right_section.data_diff.is_empty() - { - no_diff_data(right_section); - } - } - } - if let (Some(left), Some(right)) = (left, right) { - diff_bss_symbols(&mut left.common, &mut right.common)?; - } - Ok(()) -} - -fn diff_bss_symbols(left_symbols: &mut [ObjSymbol], right_symbols: &mut [ObjSymbol]) -> Result<()> { - for left_symbol in left_symbols { - if let Some(right_symbol) = right_symbols.iter_mut().find(|s| s.name == left_symbol.name) { - left_symbol.diff_symbol = Some(right_symbol.name.clone()); - right_symbol.diff_symbol = Some(left_symbol.name.clone()); - let percent = if left_symbol.size == right_symbol.size { 100.0 } else { 50.0 }; - left_symbol.match_percent = Some(percent); - right_symbol.match_percent = Some(percent); - } - } - Ok(()) -} - -// WIP diff-by-symbol -#[allow(dead_code)] -fn diff_data_symbols(left: &mut ObjSection, right: &mut ObjSection) -> Result<()> { - let mut left_ops = Vec::::with_capacity(left.symbols.len()); - let mut right_ops = Vec::::with_capacity(right.symbols.len()); - for left_symbol in &left.symbols { - let data = &left.data - [left_symbol.address as usize..(left_symbol.address + left_symbol.size) as usize]; - let hash = twox_hash::xxh3::hash64(data); - left_ops.push(hash as u32); - } - for symbol in &right.symbols { - let data = &right.data[symbol.address as usize..(symbol.address + symbol.size) as usize]; - let hash = twox_hash::xxh3::hash64(data); - right_ops.push(hash as u32); - } - - let edit_ops = editops_find(&left_ops, &right_ops); - if edit_ops.is_empty() && !left.data.is_empty() { - let mut left_iter = left.symbols.iter_mut(); - let mut right_iter = right.symbols.iter_mut(); - loop { - let (left_symbol, right_symbol) = match (left_iter.next(), right_iter.next()) { - (Some(l), Some(r)) => (l, r), - (None, None) => break, - _ => return Err(anyhow::Error::msg("L/R mismatch in diff_data_symbols")), - }; - let left_data = &left.data - [left_symbol.address as usize..(left_symbol.address + left_symbol.size) as usize]; - let right_data = &right.data[right_symbol.address as usize - ..(right_symbol.address + right_symbol.size) as usize]; - - left.data_diff.push(ObjDataDiff { - data: left_data.to_vec(), - kind: ObjDataDiffKind::None, - len: left_symbol.size as usize, - symbol: left_symbol.name.clone(), - }); - right.data_diff.push(ObjDataDiff { - data: right_data.to_vec(), - kind: ObjDataDiffKind::None, - len: right_symbol.size as usize, - symbol: right_symbol.name.clone(), - }); - left_symbol.diff_symbol = Some(right_symbol.name.clone()); - left_symbol.match_percent = Some(100.0); - right_symbol.diff_symbol = Some(left_symbol.name.clone()); - right_symbol.match_percent = Some(100.0); - } - return Ok(()); - } - Ok(()) -} - -fn diff_data(left: &mut ObjSection, right: &mut ObjSection) { - let edit_ops = editops_find(&left.data, &right.data); - if edit_ops.is_empty() && !left.data.is_empty() { - left.data_diff = vec![ObjDataDiff { - data: left.data.clone(), - kind: ObjDataDiffKind::None, - len: left.data.len(), - symbol: String::new(), - }]; - right.data_diff = vec![ObjDataDiff { - data: right.data.clone(), - kind: ObjDataDiffKind::None, - len: right.data.len(), - symbol: String::new(), - }]; - return; - } - - let mut left_diff = Vec::::new(); - let mut right_diff = Vec::::new(); - let mut left_cur = 0usize; - let mut right_cur = 0usize; - let mut cur_op = LevEditType::Replace; - let mut cur_left_data = Vec::::new(); - let mut cur_right_data = Vec::::new(); - for op in edit_ops { - if cur_op != op.op_type || left_cur < op.first_start || right_cur < op.second_start { - match cur_op { - LevEditType::Replace => { - let left_data = take(&mut cur_left_data); - let right_data = take(&mut cur_right_data); - let left_data_len = left_data.len(); - let right_data_len = right_data.len(); - left_diff.push(ObjDataDiff { - data: left_data, - kind: ObjDataDiffKind::Replace, - len: left_data_len, - symbol: String::new(), - }); - right_diff.push(ObjDataDiff { - data: right_data, - kind: ObjDataDiffKind::Replace, - len: right_data_len, - symbol: String::new(), - }); - } - LevEditType::Insert => { - let right_data = take(&mut cur_right_data); - let right_data_len = right_data.len(); - left_diff.push(ObjDataDiff { - data: vec![], - kind: ObjDataDiffKind::Insert, - len: right_data_len, - symbol: String::new(), - }); - right_diff.push(ObjDataDiff { - data: right_data, - kind: ObjDataDiffKind::Insert, - len: right_data_len, - symbol: String::new(), - }); - } - LevEditType::Delete => { - let left_data = take(&mut cur_left_data); - let left_data_len = left_data.len(); - left_diff.push(ObjDataDiff { - data: left_data, - kind: ObjDataDiffKind::Delete, - len: left_data_len, - symbol: String::new(), - }); - right_diff.push(ObjDataDiff { - data: vec![], - kind: ObjDataDiffKind::Delete, - len: left_data_len, - symbol: String::new(), - }); - } - } - } - if left_cur < op.first_start { - left_diff.push(ObjDataDiff { - data: left.data[left_cur..op.first_start].to_vec(), - kind: ObjDataDiffKind::None, - len: op.first_start - left_cur, - symbol: String::new(), - }); - left_cur = op.first_start; - } - if right_cur < op.second_start { - right_diff.push(ObjDataDiff { - data: right.data[right_cur..op.second_start].to_vec(), - kind: ObjDataDiffKind::None, - len: op.second_start - right_cur, - symbol: String::new(), - }); - right_cur = op.second_start; - } - match op.op_type { - LevEditType::Replace => { - cur_left_data.push(left.data[left_cur]); - cur_right_data.push(right.data[right_cur]); - left_cur += 1; - right_cur += 1; - } - LevEditType::Insert => { - cur_right_data.push(right.data[right_cur]); - right_cur += 1; - } - LevEditType::Delete => { - cur_left_data.push(left.data[left_cur]); - left_cur += 1; - } - } - cur_op = op.op_type; - } - // if left_cur < left.data.len() { - // let len = left.data.len() - left_cur; - // left_diff.push(ObjDataDiff { - // data: left.data[left_cur..].to_vec(), - // kind: ObjDataDiffKind::Delete, - // len, - // }); - // right_diff.push(ObjDataDiff { data: vec![], kind: ObjDataDiffKind::Delete, len }); - // } else if right_cur < right.data.len() { - // let len = right.data.len() - right_cur; - // left_diff.push(ObjDataDiff { data: vec![], kind: ObjDataDiffKind::Insert, len }); - // right_diff.push(ObjDataDiff { - // data: right.data[right_cur..].to_vec(), - // kind: ObjDataDiffKind::Insert, - // len, - // }); - // } - - // TODO: merge with above - match cur_op { - LevEditType::Replace => { - let left_data = take(&mut cur_left_data); - let right_data = take(&mut cur_right_data); - let left_data_len = left_data.len(); - let right_data_len = right_data.len(); - left_diff.push(ObjDataDiff { - data: left_data, - kind: ObjDataDiffKind::Replace, - len: left_data_len, - symbol: String::new(), - }); - right_diff.push(ObjDataDiff { - data: right_data, - kind: ObjDataDiffKind::Replace, - len: right_data_len, - symbol: String::new(), - }); - } - LevEditType::Insert => { - let right_data = take(&mut cur_right_data); - let right_data_len = right_data.len(); - left_diff.push(ObjDataDiff { - data: vec![], - kind: ObjDataDiffKind::Insert, - len: right_data_len, - symbol: String::new(), - }); - right_diff.push(ObjDataDiff { - data: right_data, - kind: ObjDataDiffKind::Insert, - len: right_data_len, - symbol: String::new(), - }); - } - LevEditType::Delete => { - let left_data = take(&mut cur_left_data); - let left_data_len = left_data.len(); - left_diff.push(ObjDataDiff { - data: left_data, - kind: ObjDataDiffKind::Delete, - len: left_data_len, - symbol: String::new(), - }); - right_diff.push(ObjDataDiff { - data: vec![], - kind: ObjDataDiffKind::Delete, - len: left_data_len, - symbol: String::new(), - }); - } - } - - if left_cur < left.data.len() { - left_diff.push(ObjDataDiff { - data: left.data[left_cur..].to_vec(), - kind: ObjDataDiffKind::None, - len: left.data.len() - left_cur, - symbol: String::new(), - }); - } - if right_cur < right.data.len() { - right_diff.push(ObjDataDiff { - data: right.data[right_cur..].to_vec(), - kind: ObjDataDiffKind::None, - len: right.data.len() - right_cur, - symbol: String::new(), - }); - } - - left.data_diff = left_diff; - right.data_diff = right_diff; -} - -fn no_diff_data(section: &mut ObjSection) { - section.data_diff = vec![ObjDataDiff { - data: section.data.clone(), - kind: ObjDataDiffKind::None, - len: section.data.len(), - symbol: String::new(), - }]; -} diff --git a/src/diff/code.rs b/src/diff/code.rs new file mode 100644 index 0000000..fdccc18 --- /dev/null +++ b/src/diff/code.rs @@ -0,0 +1,478 @@ +use std::{ + cmp::max, + collections::BTreeMap, + time::{Duration, Instant}, +}; + +use anyhow::Result; +use similar::{capture_diff_slices_deadline, Algorithm}; + +use crate::{ + diff::{ + editops::{editops_find, LevEditType}, + DiffAlg, ProcessCodeResult, + }, + obj::{ + mips, ppc, ObjArchitecture, ObjInfo, ObjInsArg, ObjInsArgDiff, ObjInsBranchFrom, + ObjInsBranchTo, ObjInsDiff, ObjInsDiffKind, ObjReloc, ObjSymbol, ObjSymbolFlags, + }, +}; + +pub fn no_diff_code( + arch: ObjArchitecture, + data: &[u8], + symbol: &mut ObjSymbol, + relocs: &[ObjReloc], + line_info: &Option>, +) -> Result<()> { + let code = + &data[symbol.section_address as usize..(symbol.section_address + symbol.size) as usize]; + let out = match arch { + ObjArchitecture::PowerPc => ppc::process_code(code, symbol.address, relocs, line_info)?, + ObjArchitecture::Mips => mips::process_code( + code, + symbol.address, + symbol.address + symbol.size, + relocs, + line_info, + )?, + }; + + let mut diff = Vec::::new(); + for i in out.insts { + diff.push(ObjInsDiff { ins: Some(i), kind: ObjInsDiffKind::None, ..Default::default() }); + } + resolve_branches(&mut diff); + symbol.instructions = diff; + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +pub fn diff_code( + alg: DiffAlg, + arch: ObjArchitecture, + left_data: &[u8], + right_data: &[u8], + left_symbol: &mut ObjSymbol, + right_symbol: &mut ObjSymbol, + left_relocs: &[ObjReloc], + right_relocs: &[ObjReloc], + left_line_info: &Option>, + right_line_info: &Option>, +) -> Result<()> { + let left_code = &left_data[left_symbol.section_address as usize + ..(left_symbol.section_address + left_symbol.size) as usize]; + let right_code = &right_data[right_symbol.section_address as usize + ..(right_symbol.section_address + right_symbol.size) as usize]; + let (left_out, right_out) = match arch { + ObjArchitecture::PowerPc => ( + ppc::process_code(left_code, left_symbol.address, left_relocs, left_line_info)?, + ppc::process_code(right_code, right_symbol.address, right_relocs, right_line_info)?, + ), + ObjArchitecture::Mips => ( + mips::process_code( + left_code, + left_symbol.address, + left_symbol.address + left_symbol.size, + left_relocs, + left_line_info, + )?, + mips::process_code( + right_code, + right_symbol.address, + left_symbol.address + left_symbol.size, + right_relocs, + right_line_info, + )?, + ), + }; + + let mut left_diff = Vec::::new(); + let mut right_diff = Vec::::new(); + match alg { + DiffAlg::Levenshtein => { + diff_instructions_lev( + &mut left_diff, + &mut right_diff, + left_symbol, + right_symbol, + &left_out, + &right_out, + )?; + } + DiffAlg::Lcs => { + diff_instructions_similar( + Algorithm::Lcs, + &mut left_diff, + &mut right_diff, + &left_out, + &right_out, + )?; + } + DiffAlg::Myers => { + diff_instructions_similar( + Algorithm::Myers, + &mut left_diff, + &mut right_diff, + &left_out, + &right_out, + )?; + } + DiffAlg::Patience => { + diff_instructions_similar( + Algorithm::Patience, + &mut left_diff, + &mut right_diff, + &left_out, + &right_out, + )?; + } + } + + resolve_branches(&mut left_diff); + resolve_branches(&mut right_diff); + + let mut diff_state = InsDiffState::default(); + for (left, right) in left_diff.iter_mut().zip(right_diff.iter_mut()) { + let result = compare_ins(left, right, &mut diff_state)?; + left.kind = result.kind; + right.kind = result.kind; + left.arg_diff = result.left_args_diff; + right.arg_diff = result.right_args_diff; + } + + let total = left_out.insts.len(); + let percent = if diff_state.diff_count >= total { + 0.0 + } else { + ((total - diff_state.diff_count) as f32 / total as f32) * 100.0 + }; + left_symbol.match_percent = Some(percent); + right_symbol.match_percent = Some(percent); + + left_symbol.instructions = left_diff; + right_symbol.instructions = right_diff; + + Ok(()) +} + +fn diff_instructions_similar( + alg: Algorithm, + left_diff: &mut Vec, + right_diff: &mut Vec, + left_code: &ProcessCodeResult, + right_code: &ProcessCodeResult, +) -> Result<()> { + let deadline = Instant::now() + Duration::from_secs(5); + let ops = capture_diff_slices_deadline(alg, &left_code.ops, &right_code.ops, Some(deadline)); + if ops.is_empty() { + left_diff.extend( + left_code + .insts + .iter() + .map(|i| ObjInsDiff { ins: Some(i.clone()), ..Default::default() }), + ); + right_diff.extend( + right_code + .insts + .iter() + .map(|i| ObjInsDiff { ins: Some(i.clone()), ..Default::default() }), + ); + return Ok(()); + } + + for op in ops { + let (_tag, left_range, right_range) = op.as_tag_tuple(); + let len = max(left_range.len(), right_range.len()); + left_diff.extend( + left_code.insts[left_range.clone()] + .iter() + .map(|i| ObjInsDiff { ins: Some(i.clone()), ..Default::default() }), + ); + right_diff.extend( + right_code.insts[right_range.clone()] + .iter() + .map(|i| ObjInsDiff { ins: Some(i.clone()), ..Default::default() }), + ); + if left_range.len() < len { + left_diff.extend((left_range.len()..len).map(|_| ObjInsDiff::default())); + } + if right_range.len() < len { + right_diff.extend((right_range.len()..len).map(|_| ObjInsDiff::default())); + } + } + + Ok(()) +} + +fn diff_instructions_lev( + left_diff: &mut Vec, + right_diff: &mut Vec, + left_symbol: &ObjSymbol, + right_symbol: &ObjSymbol, + left_code: &ProcessCodeResult, + right_code: &ProcessCodeResult, +) -> Result<()> { + let edit_ops = editops_find(&left_code.ops, &right_code.ops); + + let mut op_iter = edit_ops.iter(); + let mut left_iter = left_code.insts.iter(); + let mut right_iter = right_code.insts.iter(); + let mut cur_op = op_iter.next(); + let mut cur_left = left_iter.next(); + let mut cur_right = right_iter.next(); + while let Some(op) = cur_op { + let left_addr = op.first_start as u32 * 4; + let right_addr = op.second_start as u32 * 4; + while let (Some(left), Some(right)) = (cur_left, cur_right) { + if (left.address - left_symbol.address as u32) < left_addr { + left_diff.push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() }); + right_diff.push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() }); + } else { + break; + } + cur_left = left_iter.next(); + cur_right = right_iter.next(); + } + if let (Some(left), Some(right)) = (cur_left, cur_right) { + if (left.address - left_symbol.address as u32) != left_addr { + return Err(anyhow::Error::msg("Instruction address mismatch (left)")); + } + if (right.address - right_symbol.address as u32) != right_addr { + return Err(anyhow::Error::msg("Instruction address mismatch (right)")); + } + match op.op_type { + LevEditType::Replace => { + left_diff.push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() }); + right_diff + .push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() }); + cur_left = left_iter.next(); + cur_right = right_iter.next(); + } + LevEditType::Insert => { + left_diff.push(ObjInsDiff::default()); + right_diff + .push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() }); + cur_right = right_iter.next(); + } + LevEditType::Delete => { + left_diff.push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() }); + right_diff.push(ObjInsDiff::default()); + cur_left = left_iter.next(); + } + } + } else { + break; + } + cur_op = op_iter.next(); + } + + // Finalize + while cur_left.is_some() || cur_right.is_some() { + left_diff.push(ObjInsDiff { ins: cur_left.cloned(), ..ObjInsDiff::default() }); + right_diff.push(ObjInsDiff { ins: cur_right.cloned(), ..ObjInsDiff::default() }); + cur_left = left_iter.next(); + cur_right = right_iter.next(); + } + + Ok(()) +} + +fn resolve_branches(vec: &mut [ObjInsDiff]) { + let mut branch_idx = 0usize; + // Map addresses to indices + let mut addr_map = BTreeMap::::new(); + for (i, ins_diff) in vec.iter().enumerate() { + if let Some(ins) = &ins_diff.ins { + addr_map.insert(ins.address, i); + } + } + // Generate branches + let mut branches = BTreeMap::::new(); + for (i, ins_diff) in vec.iter_mut().enumerate() { + if let Some(ins) = &ins_diff.ins { + // if ins.ins.is_blr() || ins.reloc.is_some() { + // continue; + // } + if let Some(ins_idx) = ins + .args + .iter() + .find_map(|a| if let ObjInsArg::BranchOffset(offs) = a { Some(offs) } else { None }) + .and_then(|offs| addr_map.get(&((ins.address as i32 + offs) as u32))) + { + if let Some(branch) = branches.get_mut(ins_idx) { + ins_diff.branch_to = + Some(ObjInsBranchTo { ins_idx: *ins_idx, branch_idx: branch.branch_idx }); + branch.ins_idx.push(i); + } else { + ins_diff.branch_to = Some(ObjInsBranchTo { ins_idx: *ins_idx, branch_idx }); + branches.insert(*ins_idx, ObjInsBranchFrom { ins_idx: vec![i], branch_idx }); + branch_idx += 1; + } + } + } + } + // Store branch from + for (i, branch) in branches { + vec[i].branch_from = Some(branch); + } +} + +fn address_eq(left: &ObjSymbol, right: &ObjSymbol) -> bool { + left.address as i64 + left.addend == right.address as i64 + right.addend +} + +fn reloc_eq(left_reloc: Option<&ObjReloc>, right_reloc: Option<&ObjReloc>) -> bool { + let (Some(left), Some(right)) = (left_reloc, right_reloc) else { + return false; + }; + if left.kind != right.kind { + return false; + } + + let name_matches = left.target.name == right.target.name; + match (&left.target_section, &right.target_section) { + (Some(sl), Some(sr)) => { + // Match if section and name or address match + sl == sr && (name_matches || address_eq(&left.target, &right.target)) + } + (Some(_), None) => false, + (None, Some(_)) => { + // Match if possibly stripped weak symbol + name_matches && right.target.flags.0.contains(ObjSymbolFlags::Weak) + } + (None, None) => name_matches, + } +} + +fn arg_eq( + left: &ObjInsArg, + right: &ObjInsArg, + left_diff: &ObjInsDiff, + right_diff: &ObjInsDiff, +) -> bool { + return match left { + ObjInsArg::PpcArg(l) => match right { + ObjInsArg::PpcArg(r) => format!("{l}") == format!("{r}"), + _ => false, + }, + ObjInsArg::Reloc => { + matches!(right, ObjInsArg::Reloc) + && reloc_eq( + left_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()), + right_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()), + ) + } + ObjInsArg::RelocWithBase => { + matches!(right, ObjInsArg::RelocWithBase) + && reloc_eq( + left_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()), + right_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()), + ) + } + ObjInsArg::MipsArg(ls) | ObjInsArg::MipsArgWithBase(ls) => { + matches!(right, ObjInsArg::MipsArg(rs) | ObjInsArg::MipsArgWithBase(rs) if ls == rs) + } + ObjInsArg::BranchOffset(_) => { + // Compare dest instruction idx after diffing + left_diff.branch_to.as_ref().map(|b| b.ins_idx) + == right_diff.branch_to.as_ref().map(|b| b.ins_idx) + } + }; +} + +#[derive(Default)] +struct InsDiffState { + diff_count: usize, + left_arg_idx: usize, + right_arg_idx: usize, + left_args_idx: BTreeMap, + right_args_idx: BTreeMap, +} + +#[derive(Default)] +struct InsDiffResult { + kind: ObjInsDiffKind, + left_args_diff: Vec>, + right_args_diff: Vec>, +} + +fn compare_ins( + left: &ObjInsDiff, + right: &ObjInsDiff, + state: &mut InsDiffState, +) -> Result { + let mut result = InsDiffResult::default(); + if let (Some(left_ins), Some(right_ins)) = (&left.ins, &right.ins) { + if left_ins.args.len() != right_ins.args.len() || left_ins.op != right_ins.op { + // Totally different op + result.kind = ObjInsDiffKind::Replace; + state.diff_count += 1; + return Ok(result); + } + if left_ins.mnemonic != right_ins.mnemonic { + // Same op but different mnemonic, still cmp args + result.kind = ObjInsDiffKind::OpMismatch; + state.diff_count += 1; + } + for (a, b) in left_ins.args.iter().zip(&right_ins.args) { + if arg_eq(a, b, left, right) { + result.left_args_diff.push(None); + result.right_args_diff.push(None); + } else { + if result.kind == ObjInsDiffKind::None { + result.kind = ObjInsDiffKind::ArgMismatch; + state.diff_count += 1; + } + let a_str = match a { + ObjInsArg::PpcArg(arg) => format!("{arg}"), + ObjInsArg::Reloc | ObjInsArg::RelocWithBase => String::new(), + ObjInsArg::MipsArg(str) | ObjInsArg::MipsArgWithBase(str) => str.clone(), + ObjInsArg::BranchOffset(arg) => format!("{arg}"), + }; + let a_diff = if let Some(idx) = state.left_args_idx.get(&a_str) { + ObjInsArgDiff { idx: *idx } + } else { + let idx = state.left_arg_idx; + state.left_args_idx.insert(a_str, idx); + state.left_arg_idx += 1; + ObjInsArgDiff { idx } + }; + let b_str = match b { + ObjInsArg::PpcArg(arg) => format!("{arg}"), + ObjInsArg::Reloc | ObjInsArg::RelocWithBase => String::new(), + ObjInsArg::MipsArg(str) | ObjInsArg::MipsArgWithBase(str) => str.clone(), + ObjInsArg::BranchOffset(arg) => format!("{arg}"), + }; + let b_diff = if let Some(idx) = state.right_args_idx.get(&b_str) { + ObjInsArgDiff { idx: *idx } + } else { + let idx = state.right_arg_idx; + state.right_args_idx.insert(b_str, idx); + state.right_arg_idx += 1; + ObjInsArgDiff { idx } + }; + result.left_args_diff.push(Some(a_diff)); + result.right_args_diff.push(Some(b_diff)); + } + } + } else if left.ins.is_some() { + result.kind = ObjInsDiffKind::Delete; + state.diff_count += 1; + } else { + result.kind = ObjInsDiffKind::Insert; + state.diff_count += 1; + } + Ok(result) +} + +pub fn find_section_and_symbol(obj: &ObjInfo, name: &str) -> Option<(usize, usize)> { + for (section_idx, section) in obj.sections.iter().enumerate() { + let symbol_idx = match section.symbols.iter().position(|symbol| symbol.name == name) { + Some(symbol_idx) => symbol_idx, + None => continue, + }; + return Some((section_idx, symbol_idx)); + } + None +} diff --git a/src/diff/data.rs b/src/diff/data.rs new file mode 100644 index 0000000..ea33dd4 --- /dev/null +++ b/src/diff/data.rs @@ -0,0 +1,406 @@ +use std::{ + cmp::{max, min, Ordering}, + mem::take, + time::{Duration, Instant}, +}; + +use anyhow::{bail, Result}; +use similar::{capture_diff_slices_deadline, Algorithm}; + +use crate::{ + diff::{ + editops::{editops_find, LevEditType}, + DiffAlg, + }, + obj::{ObjDataDiff, ObjDataDiffKind, ObjSection, ObjSymbol}, +}; + +pub fn diff_data(alg: DiffAlg, left: &mut ObjSection, right: &mut ObjSection) -> Result<()> { + match alg { + DiffAlg::Levenshtein => diff_data_lev(left, right), + DiffAlg::Lcs => diff_data_similar(Algorithm::Lcs, left, right), + DiffAlg::Myers => diff_data_similar(Algorithm::Myers, left, right), + DiffAlg::Patience => diff_data_similar(Algorithm::Patience, left, right), + } +} + +pub fn diff_bss_symbols( + left_symbols: &mut [ObjSymbol], + right_symbols: &mut [ObjSymbol], +) -> Result<()> { + for left_symbol in left_symbols { + if let Some(right_symbol) = right_symbols.iter_mut().find(|s| s.name == left_symbol.name) { + left_symbol.diff_symbol = Some(right_symbol.name.clone()); + right_symbol.diff_symbol = Some(left_symbol.name.clone()); + let percent = if left_symbol.size == right_symbol.size { 100.0 } else { 50.0 }; + left_symbol.match_percent = Some(percent); + right_symbol.match_percent = Some(percent); + } + } + Ok(()) +} + +// WIP diff-by-symbol +#[allow(dead_code)] +pub fn diff_data_symbols(left: &mut ObjSection, right: &mut ObjSection) -> Result<()> { + let mut left_ops = Vec::::with_capacity(left.symbols.len()); + let mut right_ops = Vec::::with_capacity(right.symbols.len()); + for left_symbol in &left.symbols { + let data = &left.data + [left_symbol.address as usize..(left_symbol.address + left_symbol.size) as usize]; + let hash = twox_hash::xxh3::hash64(data); + left_ops.push(hash as u32); + } + for symbol in &right.symbols { + let data = &right.data[symbol.address as usize..(symbol.address + symbol.size) as usize]; + let hash = twox_hash::xxh3::hash64(data); + right_ops.push(hash as u32); + } + + let edit_ops = editops_find(&left_ops, &right_ops); + if edit_ops.is_empty() && !left.data.is_empty() { + let mut left_iter = left.symbols.iter_mut(); + let mut right_iter = right.symbols.iter_mut(); + loop { + let (left_symbol, right_symbol) = match (left_iter.next(), right_iter.next()) { + (Some(l), Some(r)) => (l, r), + (None, None) => break, + _ => return Err(anyhow::Error::msg("L/R mismatch in diff_data_symbols")), + }; + let left_data = &left.data + [left_symbol.address as usize..(left_symbol.address + left_symbol.size) as usize]; + let right_data = &right.data[right_symbol.address as usize + ..(right_symbol.address + right_symbol.size) as usize]; + + left.data_diff.push(ObjDataDiff { + data: left_data.to_vec(), + kind: ObjDataDiffKind::None, + len: left_symbol.size as usize, + symbol: left_symbol.name.clone(), + }); + right.data_diff.push(ObjDataDiff { + data: right_data.to_vec(), + kind: ObjDataDiffKind::None, + len: right_symbol.size as usize, + symbol: right_symbol.name.clone(), + }); + left_symbol.diff_symbol = Some(right_symbol.name.clone()); + left_symbol.match_percent = Some(100.0); + right_symbol.diff_symbol = Some(left_symbol.name.clone()); + right_symbol.match_percent = Some(100.0); + } + return Ok(()); + } + Ok(()) +} + +pub fn diff_data_similar( + alg: Algorithm, + left: &mut ObjSection, + right: &mut ObjSection, +) -> Result<()> { + let deadline = Instant::now() + Duration::from_secs(5); + let ops = capture_diff_slices_deadline(alg, &left.data, &right.data, Some(deadline)); + + let mut left_diff = Vec::::new(); + let mut right_diff = Vec::::new(); + for op in ops { + let (tag, left_range, right_range) = op.as_tag_tuple(); + let left_len = left_range.len(); + let right_len = right_range.len(); + let mut len = max(left_len, right_len); + let kind = match tag { + similar::DiffTag::Equal => ObjDataDiffKind::None, + similar::DiffTag::Delete => ObjDataDiffKind::Delete, + similar::DiffTag::Insert => ObjDataDiffKind::Insert, + similar::DiffTag::Replace => { + // Ensure replacements are equal length + len = min(left_len, right_len); + ObjDataDiffKind::Replace + } + }; + let left_data = &left.data[left_range]; + let right_data = &right.data[right_range]; + left_diff.push(ObjDataDiff { + data: left_data[..min(len, left_data.len())].to_vec(), + kind, + len, + ..Default::default() + }); + right_diff.push(ObjDataDiff { + data: right_data[..min(len, right_data.len())].to_vec(), + kind, + len, + ..Default::default() + }); + if kind == ObjDataDiffKind::Replace { + match left_len.cmp(&right_len) { + Ordering::Less => { + let len = right_len - left_len; + left_diff.push(ObjDataDiff { + data: vec![], + kind: ObjDataDiffKind::Insert, + len, + ..Default::default() + }); + right_diff.push(ObjDataDiff { + data: right_data[left_len..right_len].to_vec(), + kind: ObjDataDiffKind::Insert, + len, + ..Default::default() + }); + } + Ordering::Greater => { + let len = left_len - right_len; + left_diff.push(ObjDataDiff { + data: left_data[right_len..left_len].to_vec(), + kind: ObjDataDiffKind::Delete, + len, + ..Default::default() + }); + right_diff.push(ObjDataDiff { + data: vec![], + kind: ObjDataDiffKind::Delete, + len, + ..Default::default() + }); + } + Ordering::Equal => {} + } + } + } + + left.data_diff = left_diff; + right.data_diff = right_diff; + Ok(()) +} + +pub fn diff_data_lev(left: &mut ObjSection, right: &mut ObjSection) -> Result<()> { + let matrix_size = (left.data.len() as u64).saturating_mul(right.data.len() as u64); + if matrix_size > 1_000_000_000 { + bail!( + "Data section {} too large for Levenshtein diff ({} * {} = {})", + left.name, + left.data.len(), + right.data.len(), + matrix_size + ); + } + + let edit_ops = editops_find(&left.data, &right.data); + if edit_ops.is_empty() && !left.data.is_empty() { + left.data_diff = vec![ObjDataDiff { + data: left.data.clone(), + kind: ObjDataDiffKind::None, + len: left.data.len(), + symbol: String::new(), + }]; + right.data_diff = vec![ObjDataDiff { + data: right.data.clone(), + kind: ObjDataDiffKind::None, + len: right.data.len(), + symbol: String::new(), + }]; + return Ok(()); + } + + let mut left_diff = Vec::::new(); + let mut right_diff = Vec::::new(); + let mut left_cur = 0usize; + let mut right_cur = 0usize; + let mut cur_op = LevEditType::Replace; + let mut cur_left_data = Vec::::new(); + let mut cur_right_data = Vec::::new(); + for op in edit_ops { + if cur_op != op.op_type || left_cur < op.first_start || right_cur < op.second_start { + match cur_op { + LevEditType::Replace => { + let left_data = take(&mut cur_left_data); + let right_data = take(&mut cur_right_data); + let left_data_len = left_data.len(); + let right_data_len = right_data.len(); + left_diff.push(ObjDataDiff { + data: left_data, + kind: ObjDataDiffKind::Replace, + len: left_data_len, + symbol: String::new(), + }); + right_diff.push(ObjDataDiff { + data: right_data, + kind: ObjDataDiffKind::Replace, + len: right_data_len, + symbol: String::new(), + }); + } + LevEditType::Insert => { + let right_data = take(&mut cur_right_data); + let right_data_len = right_data.len(); + left_diff.push(ObjDataDiff { + data: vec![], + kind: ObjDataDiffKind::Insert, + len: right_data_len, + symbol: String::new(), + }); + right_diff.push(ObjDataDiff { + data: right_data, + kind: ObjDataDiffKind::Insert, + len: right_data_len, + symbol: String::new(), + }); + } + LevEditType::Delete => { + let left_data = take(&mut cur_left_data); + let left_data_len = left_data.len(); + left_diff.push(ObjDataDiff { + data: left_data, + kind: ObjDataDiffKind::Delete, + len: left_data_len, + symbol: String::new(), + }); + right_diff.push(ObjDataDiff { + data: vec![], + kind: ObjDataDiffKind::Delete, + len: left_data_len, + symbol: String::new(), + }); + } + } + } + if left_cur < op.first_start { + left_diff.push(ObjDataDiff { + data: left.data[left_cur..op.first_start].to_vec(), + kind: ObjDataDiffKind::None, + len: op.first_start - left_cur, + symbol: String::new(), + }); + left_cur = op.first_start; + } + if right_cur < op.second_start { + right_diff.push(ObjDataDiff { + data: right.data[right_cur..op.second_start].to_vec(), + kind: ObjDataDiffKind::None, + len: op.second_start - right_cur, + symbol: String::new(), + }); + right_cur = op.second_start; + } + match op.op_type { + LevEditType::Replace => { + cur_left_data.push(left.data[left_cur]); + cur_right_data.push(right.data[right_cur]); + left_cur += 1; + right_cur += 1; + } + LevEditType::Insert => { + cur_right_data.push(right.data[right_cur]); + right_cur += 1; + } + LevEditType::Delete => { + cur_left_data.push(left.data[left_cur]); + left_cur += 1; + } + } + cur_op = op.op_type; + } + // if left_cur < left.data.len() { + // let len = left.data.len() - left_cur; + // left_diff.push(ObjDataDiff { + // data: left.data[left_cur..].to_vec(), + // kind: ObjDataDiffKind::Delete, + // len, + // }); + // right_diff.push(ObjDataDiff { data: vec![], kind: ObjDataDiffKind::Delete, len }); + // } else if right_cur < right.data.len() { + // let len = right.data.len() - right_cur; + // left_diff.push(ObjDataDiff { data: vec![], kind: ObjDataDiffKind::Insert, len }); + // right_diff.push(ObjDataDiff { + // data: right.data[right_cur..].to_vec(), + // kind: ObjDataDiffKind::Insert, + // len, + // }); + // } + + // TODO: merge with above + match cur_op { + LevEditType::Replace => { + let left_data = take(&mut cur_left_data); + let right_data = take(&mut cur_right_data); + let left_data_len = left_data.len(); + let right_data_len = right_data.len(); + left_diff.push(ObjDataDiff { + data: left_data, + kind: ObjDataDiffKind::Replace, + len: left_data_len, + symbol: String::new(), + }); + right_diff.push(ObjDataDiff { + data: right_data, + kind: ObjDataDiffKind::Replace, + len: right_data_len, + symbol: String::new(), + }); + } + LevEditType::Insert => { + let right_data = take(&mut cur_right_data); + let right_data_len = right_data.len(); + left_diff.push(ObjDataDiff { + data: vec![], + kind: ObjDataDiffKind::Insert, + len: right_data_len, + symbol: String::new(), + }); + right_diff.push(ObjDataDiff { + data: right_data, + kind: ObjDataDiffKind::Insert, + len: right_data_len, + symbol: String::new(), + }); + } + LevEditType::Delete => { + let left_data = take(&mut cur_left_data); + let left_data_len = left_data.len(); + left_diff.push(ObjDataDiff { + data: left_data, + kind: ObjDataDiffKind::Delete, + len: left_data_len, + symbol: String::new(), + }); + right_diff.push(ObjDataDiff { + data: vec![], + kind: ObjDataDiffKind::Delete, + len: left_data_len, + symbol: String::new(), + }); + } + } + + if left_cur < left.data.len() { + left_diff.push(ObjDataDiff { + data: left.data[left_cur..].to_vec(), + kind: ObjDataDiffKind::None, + len: left.data.len() - left_cur, + symbol: String::new(), + }); + } + if right_cur < right.data.len() { + right_diff.push(ObjDataDiff { + data: right.data[right_cur..].to_vec(), + kind: ObjDataDiffKind::None, + len: right.data.len() - right_cur, + symbol: String::new(), + }); + } + + left.data_diff = left_diff; + right.data_diff = right_diff; + return Ok(()); +} + +pub fn no_diff_data(section: &mut ObjSection) { + section.data_diff = vec![ObjDataDiff { + data: section.data.clone(), + kind: ObjDataDiffKind::None, + len: section.data.len(), + symbol: String::new(), + }]; +} diff --git a/src/editops.rs b/src/diff/editops.rs similarity index 100% rename from src/editops.rs rename to src/diff/editops.rs diff --git a/src/diff/mod.rs b/src/diff/mod.rs new file mode 100644 index 0000000..6b16ad1 --- /dev/null +++ b/src/diff/mod.rs @@ -0,0 +1,114 @@ +pub mod code; +pub mod data; +pub mod editops; + +use anyhow::Result; +use serde::{Deserialize, Serialize}; + +use crate::{ + diff::{ + code::{diff_code, find_section_and_symbol, no_diff_code}, + data::{diff_bss_symbols, diff_data, no_diff_data}, + }, + obj::{ObjInfo, ObjIns, ObjSectionKind}, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +pub enum DiffAlg { + #[default] + Patience, + Levenshtein, + Myers, + Lcs, +} + +pub struct DiffObjConfig { + pub code_alg: DiffAlg, + pub data_alg: DiffAlg, +} + +pub struct ProcessCodeResult { + pub ops: Vec, + pub insts: Vec, +} + +pub fn diff_objs( + config: &DiffObjConfig, + mut left: Option<&mut ObjInfo>, + mut right: Option<&mut ObjInfo>, +) -> Result<()> { + if let Some(left) = left.as_mut() { + for left_section in &mut left.sections { + if left_section.kind == ObjSectionKind::Code { + for left_symbol in &mut left_section.symbols { + if let Some((right, (right_section_idx, right_symbol_idx))) = + right.as_mut().and_then(|obj| { + find_section_and_symbol(obj, &left_symbol.name).map(|s| (obj, s)) + }) + { + let right_section = &mut right.sections[right_section_idx]; + let right_symbol = &mut right_section.symbols[right_symbol_idx]; + left_symbol.diff_symbol = Some(right_symbol.name.clone()); + right_symbol.diff_symbol = Some(left_symbol.name.clone()); + diff_code( + config.code_alg, + left.architecture, + &left_section.data, + &right_section.data, + left_symbol, + right_symbol, + &left_section.relocations, + &right_section.relocations, + &left.line_info, + &right.line_info, + )?; + } else { + no_diff_code( + left.architecture, + &left_section.data, + left_symbol, + &left_section.relocations, + &left.line_info, + )?; + } + } + } else if let Some(right_section) = right + .as_mut() + .and_then(|obj| obj.sections.iter_mut().find(|s| s.name == left_section.name)) + { + if left_section.kind == ObjSectionKind::Data { + diff_data(config.data_alg, left_section, right_section)?; + } else if left_section.kind == ObjSectionKind::Bss { + diff_bss_symbols(&mut left_section.symbols, &mut right_section.symbols)?; + } + } else if left_section.kind == ObjSectionKind::Data { + no_diff_data(left_section); + } + } + } + if let Some(right) = right.as_mut() { + for right_section in right.sections.iter_mut() { + if right_section.kind == ObjSectionKind::Code { + for right_symbol in &mut right_section.symbols { + if right_symbol.instructions.is_empty() { + no_diff_code( + right.architecture, + &right_section.data, + right_symbol, + &right_section.relocations, + &right.line_info, + )?; + } + } + } else if right_section.kind == ObjSectionKind::Data + && right_section.data_diff.is_empty() + { + no_diff_data(right_section); + } + } + } + if let (Some(left), Some(right)) = (left, right) { + diff_bss_symbols(&mut left.common, &mut right.common)?; + } + Ok(()) +} diff --git a/src/jobs/objdiff.rs b/src/jobs/objdiff.rs index 39c4313..9133dad 100644 --- a/src/jobs/objdiff.rs +++ b/src/jobs/objdiff.rs @@ -10,7 +10,7 @@ use time::OffsetDateTime; use crate::{ app::{AppConfig, ObjectConfig}, - diff::diff_objs, + diff::{diff_objs, DiffAlg, DiffObjConfig}, jobs::{start_job, update_status, Job, JobResult, JobState, JobStatusRef}, obj::{elf, ObjInfo}, }; @@ -27,6 +27,8 @@ pub struct ObjDiffConfig { pub project_dir: Option, pub selected_obj: Option, pub selected_wsl_distro: Option, + pub code_alg: DiffAlg, + pub data_alg: DiffAlg, } impl ObjDiffConfig { @@ -38,6 +40,8 @@ impl ObjDiffConfig { project_dir: config.project_dir.clone(), selected_obj: config.selected_obj.clone(), selected_wsl_distro: config.selected_wsl_distro.clone(), + code_alg: config.code_alg, + data_alg: config.data_alg, } } } @@ -200,7 +204,8 @@ fn run_build( }; update_status(status, "Performing diff".to_string(), 4, total, &cancel)?; - diff_objs(first_obj.as_mut(), second_obj.as_mut())?; + let diff_config = DiffObjConfig { code_alg: config.code_alg, data_alg: config.data_alg }; + diff_objs(&diff_config, first_obj.as_mut(), second_obj.as_mut())?; update_status(status, "Complete".to_string(), total, total, &cancel)?; Ok(Box::new(ObjDiffResult { first_status, second_status, first_obj, second_obj, time })) diff --git a/src/lib.rs b/src/lib.rs index d2266d7..b332c3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,6 @@ mod app; mod app_config; mod config; mod diff; -mod editops; mod jobs; mod obj; mod update; diff --git a/src/obj/elf.rs b/src/obj/elf.rs index caedeea..6bd27f2 100644 --- a/src/obj/elf.rs +++ b/src/obj/elf.rs @@ -298,7 +298,7 @@ fn line_info(obj_file: &File<'_>) -> Result>> { let address_delta = reader.read_u32::()?; map.insert(base_address + address_delta, line_number); } - log::debug!("Line info: {map:#X?}"); + // log::debug!("Line info: {map:#X?}"); return Ok(Some(map)); } Ok(None) diff --git a/src/obj/mips.rs b/src/obj/mips.rs index d9b1b0c..3d6a7aa 100644 --- a/src/obj/mips.rs +++ b/src/obj/mips.rs @@ -3,7 +3,10 @@ use std::collections::BTreeMap; use anyhow::Result; use rabbitizer::{config, Abi, InstrCategory, Instruction, OperandType}; -use crate::obj::{ObjIns, ObjInsArg, ObjReloc}; +use crate::{ + diff::ProcessCodeResult, + obj::{ObjIns, ObjInsArg, ObjReloc}, +}; fn configure_rabbitizer() { unsafe { @@ -17,7 +20,7 @@ pub fn process_code( end_address: u64, relocs: &[ObjReloc], line_info: &Option>, -) -> Result<(Vec, Vec)> { +) -> Result { configure_rabbitizer(); let ins_count = data.len() / 4; @@ -95,5 +98,5 @@ pub fn process_code( }); cur_addr += 4; } - Ok((ops, insts)) + Ok(ProcessCodeResult { ops, insts }) } diff --git a/src/obj/ppc.rs b/src/obj/ppc.rs index 78dddf9..8856d74 100644 --- a/src/obj/ppc.rs +++ b/src/obj/ppc.rs @@ -3,7 +3,10 @@ use std::collections::BTreeMap; use anyhow::Result; use ppc750cl::{disasm_iter, Argument, SimplifiedIns}; -use crate::obj::{ObjIns, ObjInsArg, ObjReloc, ObjRelocKind}; +use crate::{ + diff::ProcessCodeResult, + obj::{ObjIns, ObjInsArg, ObjReloc, ObjRelocKind}, +}; // Relative relocation, can be Simm or BranchOffset fn is_relative_arg(arg: &ObjInsArg) -> bool { @@ -22,7 +25,7 @@ pub fn process_code( address: u64, relocs: &[ObjReloc], line_info: &Option>, -) -> Result<(Vec, Vec)> { +) -> Result { let ins_count = data.len() / 4; let mut ops = Vec::::with_capacity(ins_count); let mut insts = Vec::::with_capacity(ins_count); @@ -92,5 +95,5 @@ pub fn process_code( orig: Some(format!("{}", SimplifiedIns::basic_form(ins))), }); } - Ok((ops, insts)) + Ok(ProcessCodeResult { ops, insts }) } diff --git a/src/views/config.rs b/src/views/config.rs index 3b6cbe9..00d169c 100644 --- a/src/views/config.rs +++ b/src/views/config.rs @@ -11,7 +11,7 @@ use anyhow::{Context, Result}; use const_format::formatcp; use egui::{ output::OpenUrl, text::LayoutJob, CollapsingHeader, FontFamily, FontId, RichText, - SelectableLabel, TextFormat, Widget, + SelectableLabel, TextFormat, Widget, WidgetText, }; use globset::Glob; use self_update::cargo_crate_version; @@ -19,6 +19,7 @@ use self_update::cargo_crate_version; use crate::{ app::{AppConfig, AppConfigRef, ObjectConfig}, config::{ProjectObject, ProjectObjectNode}, + diff::DiffAlg, jobs::{ check_update::{start_check_update, CheckUpdateResult}, update::start_update, @@ -751,3 +752,70 @@ fn split_obj_config_ui( } }); } + +pub fn diff_options_window( + ctx: &egui::Context, + config: &AppConfigRef, + show: &mut bool, + appearance: &Appearance, +) { + let mut config_guard = config.write().unwrap(); + egui::Window::new("Diff Options").open(show).show(ctx, |ui| { + diff_options_ui(ui, &mut config_guard, appearance); + }); +} + +fn diff_options_ui(ui: &mut egui::Ui, config: &mut AppConfig, appearance: &Appearance) { + let mut job = LayoutJob::default(); + job.append( + "Current default: ", + 0.0, + TextFormat::simple(appearance.ui_font.clone(), appearance.text_color), + ); + job.append( + diff_alg_to_string(DiffAlg::default()), + 0.0, + TextFormat::simple(appearance.ui_font.clone(), appearance.emphasized_text_color), + ); + ui.label(job); + let mut job = LayoutJob::default(); + job.append( + "Previous default: ", + 0.0, + TextFormat::simple(appearance.ui_font.clone(), appearance.text_color), + ); + job.append( + "Levenshtein", + 0.0, + TextFormat::simple(appearance.ui_font.clone(), appearance.emphasized_text_color), + ); + ui.label(job); + ui.label("Please provide feedback!"); + if diff_alg_ui(ui, "Code diff algorithm", &mut config.code_alg) { + config.queue_reload = true; + } + if diff_alg_ui(ui, "Data diff algorithm", &mut config.data_alg) { + config.queue_reload = true; + } +} + +fn diff_alg_ui(ui: &mut egui::Ui, label: impl Into, alg: &mut DiffAlg) -> bool { + let response = egui::ComboBox::from_label(label) + .selected_text(diff_alg_to_string(*alg)) + .show_ui(ui, |ui| { + ui.selectable_value(alg, DiffAlg::Patience, "Patience").changed() + | ui.selectable_value(alg, DiffAlg::Levenshtein, "Levenshtein").changed() + | ui.selectable_value(alg, DiffAlg::Myers, "Myers").changed() + | ui.selectable_value(alg, DiffAlg::Lcs, "LCS").changed() + }); + response.inner.unwrap_or(false) +} + +const fn diff_alg_to_string(alg: DiffAlg) -> &'static str { + match alg { + DiffAlg::Patience => "Patience", + DiffAlg::Levenshtein => "Levenshtein", + DiffAlg::Lcs => "LCS", + DiffAlg::Myers => "Myers", + } +}