Initial commit

This commit is contained in:
Luke Street 2022-09-08 17:19:20 -04:00
commit cb3c6062c7
22 changed files with 5061 additions and 0 deletions

61
.github/workflows/build.yaml vendored Normal file
View File

@ -0,0 +1,61 @@
name: Build
on: [ push, pull_request ]
jobs:
check:
name: Check
runs-on: ubuntu-latest
strategy:
matrix:
toolchain: [ stable, 1.61.0, nightly ]
fail-fast: false
env:
RUSTFLAGS: -D warnings
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.toolchain }}
override: true
components: rustfmt, clippy
- uses: EmbarkStudios/cargo-deny-action@v1
- uses: actions-rs/cargo@v1
with:
command: check
args: --all-features
- uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-features
build:
name: Build
strategy:
matrix:
platform: [ ubuntu-latest, macos-latest, windows-latest ]
toolchain: [ stable, 1.61.0, nightly ]
fail-fast: false
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.toolchain }}
override: true
- uses: actions-rs/cargo@v1
with:
command: test
args: --release --all-features
- uses: actions-rs/cargo@v1
with:
command: build
args: --release --all-features
- uses: actions/upload-artifact@v2
with:
name: ${{ matrix.platform }}-${{ matrix.toolchain }}
path: |
target/release/objdiff
target/release/objdiff.exe

24
.gitignore vendored Normal file
View File

@ -0,0 +1,24 @@
# Rust
target/
**/*.rs.bk
generated/
# cargo-mobile
.cargo/
/gen
# macOS
.DS_Store
# JetBrains
.idea
# Generated SPIR-V
*.spv
# project
textures/
android.keystore
*.frag
*.vert
*.metal

2445
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

36
Cargo.toml Normal file
View File

@ -0,0 +1,36 @@
[package]
name = "objdiff"
version = "0.1.0"
edition = "2021"
rust-version = "1.61"
authors = ["Luke Street <luke@street.dev>"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/encounter/objdiff"
readme = "README.md"
description = """
A tool for decompilation projects.
"""
[dependencies]
egui = "0.19.0"
eframe = { version = "0.19.0", features = ["persistence"] } # , "wgpu"
serde = { version = "1", features = ["derive"] }
anyhow = "1.0.63"
thiserror = "1.0.33"
flagset = "0.4.3"
object = "0.29.0"
notify = "5.0.0"
cwdemangle = "0.1.1"
log = "0.4.17"
rfd = { version = "0.10.0" } # , default-features = false, features = ['xdg-portal']
egui_extras = "0.19.0"
ppc750cl = { git = "https://github.com/terorie/ppc750cl" }
# native:
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
tracing-subscriber = "0.3"
# web:
[target.'cfg(target_arch = "wasm32")'.dependencies]
console_error_panic_hook = "0.1.6"
tracing-wasm = "0.2"

28
README.md Normal file
View File

@ -0,0 +1,28 @@
# objdiff [![Build Status]][actions]
[Build Status]: https://github.com/encounter/objdiff/actions/workflows/build.yaml/badge.svg
[actions]: https://github.com/encounter/objdiff/actions
A tool for decompilation projects.
Currently supports:
- PowerPC 750CL (GameCube & Wii)
**WARNING:** Very early & unstable.
![Screenshot](assets/screen-diff.png)
### License
Licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
additional terms or conditions.

BIN
assets/screen-diff.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

210
deny.toml Normal file
View File

@ -0,0 +1,210 @@
# This template contains all of the possible sections and their default values
# Note that all fields that take a lint level have these possible values:
# * deny - An error will be produced and the check will fail
# * warn - A warning will be produced, but the check will not fail
# * allow - No warning or error will be produced, though in some cases a note
# will be
# The values provided in this template are the default values that will be used
# when any section or field is not specified in your own configuration
# If 1 or more target triples (and optionally, target_features) are specified,
# only the specified targets will be checked when running `cargo deny check`.
# This means, if a particular package is only ever used as a target specific
# dependency, such as, for example, the `nix` crate only being used via the
# `target_family = "unix"` configuration, that only having windows targets in
# this list would mean the nix crate, as well as any of its exclusive
# dependencies not shared by any other crates, would be ignored, as the target
# list here is effectively saying which targets you are building for.
targets = [
# The triple can be any string, but only the target triples built in to
# rustc (as of 1.40) can be checked against actual config expressions
#{ triple = "x86_64-unknown-linux-musl" },
# You can also specify which target_features you promise are enabled for a
# particular target. target_features are currently not validated against
# the actual valid features supported by the target architecture.
#{ triple = "wasm32-unknown-unknown", features = ["atomics"] },
]
# This section is considered when running `cargo deny check advisories`
# More documentation for the advisories section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
[advisories]
# The path where the advisory database is cloned/fetched into
db-path = "~/.cargo/advisory-db"
# The url(s) of the advisory databases to use
db-urls = ["https://github.com/rustsec/advisory-db"]
# The lint level for security vulnerabilities
vulnerability = "deny"
# The lint level for unmaintained crates
unmaintained = "warn"
# The lint level for crates that have been yanked from their source registry
yanked = "warn"
# The lint level for crates with security notices. Note that as of
# 2019-12-17 there are no security notice advisories in
# https://github.com/rustsec/advisory-db
notice = "warn"
# A list of advisory IDs to ignore. Note that ignored advisories will still
# output a note when they are encountered.
ignore = [
#"RUSTSEC-0000-0000",
]
# Threshold for security vulnerabilities, any vulnerability with a CVSS score
# lower than the range specified will be ignored. Note that ignored advisories
# will still output a note when they are encountered.
# * None - CVSS Score 0.0
# * Low - CVSS Score 0.1 - 3.9
# * Medium - CVSS Score 4.0 - 6.9
# * High - CVSS Score 7.0 - 8.9
# * Critical - CVSS Score 9.0 - 10.0
#severity-threshold =
# This section is considered when running `cargo deny check licenses`
# More documentation for the licenses section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
[licenses]
# The lint level for crates which do not have a detectable license
unlicensed = "deny"
# List of explictly allowed licenses
# See https://spdx.org/licenses/ for list of possible licenses
# [possible values: any SPDX 3.11 short identifier (+ optional exception)].
allow = [
"MIT",
"Apache-2.0",
"ISC",
"BSD-2-Clause",
"BSD-3-Clause",
"BSL-1.0",
"CC0-1.0",
"MPL-2.0",
"Unicode-DFS-2016",
"Zlib",
]
# List of explictly disallowed licenses
# See https://spdx.org/licenses/ for list of possible licenses
# [possible values: any SPDX 3.11 short identifier (+ optional exception)].
deny = [
#"Nokia",
]
# Lint level for licenses considered copyleft
copyleft = "warn"
# Blanket approval or denial for OSI-approved or FSF Free/Libre licenses
# * both - The license will be approved if it is both OSI-approved *AND* FSF
# * either - The license will be approved if it is either OSI-approved *OR* FSF
# * osi-only - The license will be approved if is OSI-approved *AND NOT* FSF
# * fsf-only - The license will be approved if is FSF *AND NOT* OSI-approved
# * neither - This predicate is ignored and the default lint level is used
allow-osi-fsf-free = "neither"
# Lint level used when no other predicates are matched
# 1. License isn't in the allow or deny lists
# 2. License isn't copyleft
# 3. License isn't OSI/FSF, or allow-osi-fsf-free = "neither"
default = "deny"
# The confidence threshold for detecting a license from license text.
# The higher the value, the more closely the license text must be to the
# canonical license text of a valid SPDX license file.
# [possible values: any between 0.0 and 1.0].
confidence-threshold = 0.8
# Allow 1 or more licenses on a per-crate basis, so that particular licenses
# aren't accepted for every possible crate as with the normal allow list
exceptions = [
# Each entry is the crate and version constraint, and its specific allow
# list
#{ allow = ["Zlib"], name = "adler32", version = "*" },
]
# Some crates don't have (easily) machine readable licensing information,
# adding a clarification entry for it allows you to manually specify the
# licensing information
#[[licenses.clarify]]
# The name of the crate the clarification applies to
#name = "ring"
# The optional version constraint for the crate
#version = "*"
# The SPDX expression for the license requirements of the crate
#expression = "MIT AND ISC AND OpenSSL"
# One or more files in the crate's source used as the "source of truth" for
# the license expression. If the contents match, the clarification will be used
# when running the license check, otherwise the clarification will be ignored
# and the crate will be checked normally, which may produce warnings or errors
# depending on the rest of your configuration
#license-files = [
# Each entry is a crate relative path, and the (opaque) hash of its contents
#{ path = "LICENSE", hash = 0xbd0eed23 }
#]
[licenses.private]
# If true, ignores workspace crates that aren't published, or are only
# published to private registries
ignore = false
# One or more private registries that you might publish crates to, if a crate
# is only published to private registries, and ignore is true, the crate will
# not have its license(s) checked
registries = [
#"https://sekretz.com/registry
]
# This section is considered when running `cargo deny check bans`.
# More documentation about the 'bans' section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
[bans]
# Lint level for when multiple versions of the same crate are detected
multiple-versions = "warn"
# Lint level for when a crate version requirement is `*`
wildcards = "allow"
# The graph highlighting used when creating dotgraphs for crates
# with multiple versions
# * lowest-version - The path to the lowest versioned duplicate is highlighted
# * simplest-path - The path to the version with the fewest edges is highlighted
# * all - Both lowest-version and simplest-path are used
highlight = "all"
# List of crates that are allowed. Use with care!
allow = [
#{ name = "ansi_term", version = "=0.11.0" },
]
# List of crates to deny
deny = [
# Each entry the name of a crate and a version range. If version is
# not specified, all versions will be matched.
#{ name = "ansi_term", version = "=0.11.0" },
#
# Wrapper crates can optionally be specified to allow the crate when it
# is a direct dependency of the otherwise banned crate
#{ name = "ansi_term", version = "=0.11.0", wrappers = [] },
]
# Certain crates/versions that will be skipped when doing duplicate detection.
skip = [
#{ name = "ansi_term", version = "=0.11.0" },
]
# Similarly to `skip` allows you to skip certain crates during duplicate
# detection. Unlike skip, it also includes the entire tree of transitive
# dependencies starting at the specified crate, up to a certain depth, which is
# by default infinite
skip-tree = [
#{ name = "ansi_term", version = "=0.11.0", depth = 20 },
]
# This section is considered when running `cargo deny check sources`.
# More documentation about the 'sources' section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html
[sources]
# Lint level for what to happen when a crate from a crate registry that is not
# in the allow list is encountered
unknown-registry = "warn"
# Lint level for what to happen when a crate from a git repository that is not
# in the allow list is encountered
unknown-git = "warn"
# List of URLs for allowed crate registries. Defaults to the crates.io index
# if not specified. If it is specified but empty, no registries are allowed.
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
# List of URLs for allowed Git repositories
allow-git = []
[sources.allow-org]
# 1 or more github.com organizations to allow git sources for
github = ["encounter", "terorie"]
# 1 or more gitlab.com organizations to allow git sources for
#gitlab = [""]
# 1 or more bitbucket.org organizations to allow git sources for
#bitbucket = [""]

8
rustfmt.toml Normal file
View File

@ -0,0 +1,8 @@
fn_single_line = true
group_imports = "StdExternalCrate"
imports_granularity = "Crate"
overflow_delimited_expr = true
reorder_impl_items = true
use_field_init_shorthand = true
use_small_heuristics = "Max"
where_single_line = true

266
src/app.rs Normal file
View File

@ -0,0 +1,266 @@
use std::{
default::Default,
ffi::OsStr,
path::{Path, PathBuf},
sync::{
atomic::{AtomicBool, Ordering},
Arc, RwLock,
},
time::Duration,
};
use eframe::Frame;
use notify::{RecursiveMode, Watcher};
use crate::{
jobs::{
build::{queue_build, BuildResult},
Job, JobResult, JobState,
},
views::{
config::config_ui, function_diff::function_diff_ui, jobs::jobs_ui,
symbol_diff::symbol_diff_ui,
},
};
#[derive(Default, Eq, PartialEq)]
pub enum View {
#[default]
SymbolDiff,
FunctionDiff,
}
#[derive(Default, serde::Deserialize, serde::Serialize)]
#[serde(default)]
pub struct ViewState {
#[serde(skip)]
pub jobs: Vec<JobState>,
#[serde(skip)]
pub build: Option<Box<BuildResult>>,
#[serde(skip)]
pub highlighted_symbol: Option<String>,
#[serde(skip)]
pub selected_symbol: Option<String>,
#[serde(skip)]
pub current_view: View,
// Config
pub reverse_fn_order: bool,
}
#[derive(Default, Clone, serde::Deserialize, serde::Serialize)]
#[serde(default)]
pub struct AppConfig {
pub project_dir: Option<PathBuf>,
pub build_asm_dir: Option<PathBuf>,
pub build_src_dir: Option<PathBuf>,
pub build_obj: Option<String>,
#[serde(skip)]
pub project_dir_change: bool,
}
/// We derive Deserialize/Serialize so we can persist app state on shutdown.
#[derive(serde::Deserialize, serde::Serialize)]
#[serde(default)]
pub struct App {
view_state: ViewState,
#[serde(skip)]
config: Arc<RwLock<AppConfig>>,
#[serde(skip)]
modified: Arc<AtomicBool>,
#[serde(skip)]
watcher: Option<notify::RecommendedWatcher>,
}
impl Default for App {
fn default() -> Self {
Self {
view_state: ViewState::default(),
config: Arc::new(Default::default()),
modified: Arc::new(Default::default()),
watcher: None,
}
}
}
const CONFIG_KEY: &str = "app_config";
impl App {
/// Called once before the first frame.
pub fn new(cc: &eframe::CreationContext<'_>) -> Self {
// This is also where you can customized the look at feel of egui using
// `cc.egui_ctx.set_visuals` and `cc.egui_ctx.set_fonts`.
// Load previous app state (if any).
// Note that you must enable the `persistence` feature for this to work.
if let Some(storage) = cc.storage {
let mut app: App = eframe::get_value(storage, eframe::APP_KEY).unwrap_or_default();
let mut config: AppConfig = eframe::get_value(storage, CONFIG_KEY).unwrap_or_default();
if config.project_dir.is_some() {
config.project_dir_change = true;
}
app.config = Arc::new(RwLock::new(config));
app
} else {
Self::default()
}
}
}
impl eframe::App for App {
/// Called each time the UI needs repainting, which may be many times per second.
/// Put your widgets into a `SidePanel`, `TopPanel`, `CentralPanel`, `Window` or `Area`.
fn update(&mut self, ctx: &egui::Context, frame: &mut Frame) {
let Self { config, view_state, .. } = self;
egui::TopBottomPanel::top("top_panel").show(ctx, |ui| {
egui::menu::bar(ui, |ui| {
ui.menu_button("File", |ui| {
if ui.button("Quit").clicked() {
frame.close();
}
});
});
});
if view_state.current_view == View::FunctionDiff
&& matches!(&view_state.build, Some(b) if b.first_status.success && b.second_status.success)
{
egui::SidePanel::left("side_panel").show(ctx, |ui| {
if ui.button("Back").clicked() {
view_state.current_view = View::SymbolDiff;
}
ui.separator();
jobs_ui(ui, view_state);
});
egui::CentralPanel::default().show(ctx, |ui| {
function_diff_ui(ui, view_state);
});
} else {
egui::SidePanel::left("side_panel").show(ctx, |ui| {
ui.heading("Config");
config_ui(ui, config, view_state);
jobs_ui(ui, view_state);
});
egui::CentralPanel::default().show(ctx, |ui| {
symbol_diff_ui(ui, view_state);
});
}
if view_state.jobs.iter().any(|job| {
if let Some(handle) = &job.handle {
return !handle.is_finished();
}
false
}) {
ctx.request_repaint();
} else {
ctx.request_repaint();
ctx.request_repaint_after(Duration::from_millis(100));
}
}
/// Called by the frame work to save state before shutdown.
fn save(&mut self, storage: &mut dyn eframe::Storage) {
if let Ok(config) = self.config.read() {
eframe::set_value(storage, CONFIG_KEY, &*config);
}
eframe::set_value(storage, eframe::APP_KEY, self);
}
fn post_rendering(&mut self, _window_size_px: [u32; 2], _frame: &Frame) {
for job in &mut self.view_state.jobs {
if let Some(handle) = &job.handle {
if !handle.is_finished() {
continue;
}
match job.handle.take().unwrap().join() {
Ok(result) => {
log::info!("Job {} finished", job.id);
match result {
JobResult::None => {
if let Some(err) = &job.status.read().unwrap().error {
log::error!("{:?}", err);
}
}
JobResult::Build(state) => {
self.view_state.build = Some(state);
}
}
}
Err(e) => {
log::error!("Failed to join job handle: {:?}", e);
}
}
}
}
if self.view_state.jobs.iter().any(|v| v.should_remove) {
let mut i = 0;
while i < self.view_state.jobs.len() {
let job = &self.view_state.jobs[i];
if job.should_remove && job.handle.is_none() {
self.view_state.jobs.remove(i);
} else {
i += 1;
}
}
}
if let Ok(mut config) = self.config.write() {
if config.project_dir_change {
drop(self.watcher.take());
if let Some(project_dir) = &config.project_dir {
match create_watcher(self.modified.clone(), project_dir) {
Ok(watcher) => self.watcher = Some(watcher),
Err(e) => eprintln!("Failed to create watcher: {}", e),
}
config.project_dir_change = false;
self.modified.store(true, Ordering::Relaxed);
}
}
if let Some(build_obj) = &config.build_obj {
if self.modified.load(Ordering::Relaxed) {
if !self
.view_state
.jobs
.iter()
.any(|j| j.job_type == Job::Build && j.handle.is_some())
{
self.view_state
.jobs
.push(queue_build(build_obj.clone(), self.config.clone()));
}
self.modified.store(false, Ordering::Relaxed);
}
}
}
}
}
fn create_watcher(
modified: Arc<AtomicBool>,
project_dir: &Path,
) -> notify::Result<notify::RecommendedWatcher> {
let mut watcher =
notify::recommended_watcher(move |res: notify::Result<notify::Event>| match res {
Ok(event) => {
if matches!(event.kind, notify::EventKind::Modify(..)) {
let watch_extensions = &[
Some(OsStr::new("c")),
Some(OsStr::new("cp")),
Some(OsStr::new("cpp")),
Some(OsStr::new("h")),
Some(OsStr::new("hpp")),
];
if event.paths.iter().any(|p| watch_extensions.contains(&p.extension())) {
modified.store(true, Ordering::Relaxed);
}
}
}
Err(e) => println!("watch error: {:?}", e),
})?;
watcher.watch(project_dir, RecursiveMode::Recursive)?;
Ok(watcher)
}

396
src/diff.rs Normal file
View File

@ -0,0 +1,396 @@
use std::collections::BTreeMap;
use anyhow::Result;
use ppc750cl::{disasm_iter, Argument};
use crate::{
editops::{editops_find, LevEditType},
obj::{
ObjInfo, ObjIns, ObjInsArg, ObjInsArgDiff, ObjInsBranchFrom, ObjInsBranchTo, ObjInsDiff,
ObjInsDiffKind, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol,
ObjSymbolFlags,
},
};
// Relative relocation, can be Simm or BranchDest
fn is_relative_arg(arg: &ObjInsArg) -> bool {
matches!(arg, ObjInsArg::Arg(arg) if matches!(arg, Argument::Simm(_) | Argument::BranchDest(_)))
}
// Relative or absolute relocation, can be Uimm, Simm or Offset
fn is_rel_abs_arg(arg: &ObjInsArg) -> bool {
matches!(arg, ObjInsArg::Arg(arg) if matches!(arg, Argument::Uimm(_) | Argument::Simm(_) | Argument::Offset(_)))
}
fn is_offset_arg(arg: &ObjInsArg) -> bool { matches!(arg, ObjInsArg::Arg(Argument::Offset(_))) }
fn process_code(data: &[u8], address: u64, relocs: &[ObjReloc]) -> Result<(Vec<u8>, Vec<ObjIns>)> {
let ins_count = data.len() / 4;
let mut ops = Vec::<u8>::with_capacity(ins_count);
let mut insts = Vec::<ObjIns>::with_capacity(ins_count);
for mut ins in disasm_iter(data, address as u32) {
let reloc = relocs.iter().find(|r| (r.address as u32 & !3) == ins.addr);
if let Some(reloc) = reloc {
// Zero out relocations
ins.code = match reloc.kind {
ObjRelocKind::PpcEmbSda21 => ins.code & !0x1FFFFF,
ObjRelocKind::PpcRel24 => ins.code & !0x3FFFFFC,
ObjRelocKind::PpcRel14 => ins.code & !0xFFFC,
ObjRelocKind::PpcAddr16Hi
| ObjRelocKind::PpcAddr16Ha
| ObjRelocKind::PpcAddr16Lo => ins.code & !0xFFFF,
_ => ins.code,
};
}
let simplified = ins.simplified();
let mut args: Vec<ObjInsArg> =
simplified.args.iter().map(|a| ObjInsArg::Arg(a.clone())).collect();
if let Some(reloc) = reloc {
match reloc.kind {
ObjRelocKind::PpcEmbSda21 => {
args = vec![args[0].clone(), ObjInsArg::Reloc];
}
ObjRelocKind::PpcRel24 | ObjRelocKind::PpcRel14 => {
let arg = args
.iter_mut()
.rfind(|a| is_relative_arg(a))
.ok_or_else(|| anyhow::Error::msg("Failed to locate rel arg for reloc"))?;
*arg = ObjInsArg::Reloc;
}
ObjRelocKind::PpcAddr16Hi
| ObjRelocKind::PpcAddr16Ha
| ObjRelocKind::PpcAddr16Lo => {
let arg = args.iter_mut().rfind(|a| is_rel_abs_arg(a)).ok_or_else(|| {
anyhow::Error::msg("Failed to locate rel/abs arg for reloc")
})?;
*arg =
if is_offset_arg(arg) { ObjInsArg::RelocOffset } else { ObjInsArg::Reloc };
}
_ => {}
}
}
ops.push(simplified.ins.op as u8);
let suffix = simplified.ins.suffix();
insts.push(ObjIns {
ins: simplified.ins,
mnemonic: format!("{}{}", simplified.mnemonic, suffix),
args,
reloc: reloc.cloned(),
});
}
Ok((ops, insts))
}
pub fn diff_code(
left_data: &[u8],
right_data: &[u8],
left_symbol: &mut ObjSymbol,
right_symbol: &mut ObjSymbol,
left_relocs: &[ObjReloc],
right_relocs: &[ObjReloc],
) -> Result<()> {
let left_code =
&left_data[left_symbol.address as usize..(left_symbol.address + left_symbol.size) as usize];
let (left_ops, left_insts) = process_code(left_code, left_symbol.address, left_relocs)?;
let right_code = &right_data
[right_symbol.address as usize..(right_symbol.address + right_symbol.size) as usize];
let (right_ops, right_insts) = process_code(right_code, right_symbol.address, right_relocs)?;
let mut left_diff = Vec::<ObjInsDiff>::new();
let mut right_diff = Vec::<ObjInsDiff>::new();
let edit_ops = editops_find(&left_ops, &right_ops);
{
let mut op_iter = edit_ops.iter();
let mut left_iter = left_insts.iter();
let mut right_iter = right_insts.iter();
let mut cur_op = op_iter.next();
let mut cur_left = left_iter.next();
let mut cur_right = right_iter.next();
while let Some(op) = cur_op {
let left_addr = op.first_start as u32 * 4;
let right_addr = op.second_start as u32 * 4;
while let (Some(left), Some(right)) = (cur_left, cur_right) {
if (left.ins.addr - left_symbol.address as u32) < left_addr {
left_diff.push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() });
right_diff
.push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() });
} else {
break;
}
cur_left = left_iter.next();
cur_right = right_iter.next();
}
if let (Some(left), Some(right)) = (cur_left, cur_right) {
if (left.ins.addr - left_symbol.address as u32) != left_addr {
return Err(anyhow::Error::msg("Instruction address mismatch (left)"));
}
if (right.ins.addr - right_symbol.address as u32) != right_addr {
return Err(anyhow::Error::msg("Instruction address mismatch (right)"));
}
match op.op_type {
LevEditType::Replace => {
left_diff
.push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() });
right_diff
.push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() });
cur_left = left_iter.next();
cur_right = right_iter.next();
}
LevEditType::Insert => {
left_diff.push(ObjInsDiff::default());
right_diff
.push(ObjInsDiff { ins: Some(right.clone()), ..ObjInsDiff::default() });
cur_right = right_iter.next();
}
LevEditType::Delete => {
left_diff
.push(ObjInsDiff { ins: Some(left.clone()), ..ObjInsDiff::default() });
right_diff.push(ObjInsDiff::default());
cur_left = left_iter.next();
}
LevEditType::Keep => unreachable!(),
}
} else {
break;
}
cur_op = op_iter.next();
}
// Finalize
while cur_left.is_some() || cur_right.is_some() {
left_diff.push(ObjInsDiff { ins: cur_left.cloned(), ..ObjInsDiff::default() });
right_diff.push(ObjInsDiff { ins: cur_right.cloned(), ..ObjInsDiff::default() });
cur_left = left_iter.next();
cur_right = right_iter.next();
}
}
resolve_branches(&mut left_diff);
resolve_branches(&mut right_diff);
let mut diff_state = InsDiffState::default();
for (left, right) in left_diff.iter_mut().zip(right_diff.iter_mut()) {
let result = compare_ins(left, right, &mut diff_state)?;
left.kind = result.kind;
right.kind = result.kind;
left.arg_diff = result.left_args_diff;
right.arg_diff = result.right_args_diff;
}
let total = left_insts.len();
let percent = ((total - diff_state.diff_count) as f32 / total as f32) * 100.0;
left_symbol.match_percent = percent;
right_symbol.match_percent = percent;
left_symbol.instructions = left_diff;
right_symbol.instructions = right_diff;
Ok(())
}
fn resolve_branches(vec: &mut [ObjInsDiff]) {
let mut branch_idx = 0usize;
// Map addresses to indices
let mut addr_map = BTreeMap::<u32, usize>::new();
for (i, ins_diff) in vec.iter().enumerate() {
if let Some(ins) = &ins_diff.ins {
addr_map.insert(ins.ins.addr, i);
}
}
// Generate branches
let mut branches = BTreeMap::<usize, ObjInsBranchFrom>::new();
for (i, ins_diff) in vec.iter_mut().enumerate() {
if let Some(ins) = &ins_diff.ins {
if ins.ins.is_blr() || ins.reloc.is_some() {
continue;
}
if let Some(ins_idx) = ins.ins.branch_dest().and_then(|dest| addr_map.get(&dest)) {
if let Some(branch) = branches.get_mut(ins_idx) {
ins_diff.branch_to =
Some(ObjInsBranchTo { ins_idx: *ins_idx, branch_idx: branch.branch_idx });
branch.ins_idx.push(i);
} else {
ins_diff.branch_to = Some(ObjInsBranchTo { ins_idx: *ins_idx, branch_idx });
branches.insert(*ins_idx, ObjInsBranchFrom { ins_idx: vec![i], branch_idx });
branch_idx += 1;
}
}
}
}
// Store branch from
for (i, branch) in branches {
vec[i].branch_from = Some(branch);
}
}
fn reloc_eq(left_reloc: Option<&ObjReloc>, right_reloc: Option<&ObjReloc>) -> bool {
if let (Some(left), Some(right)) = (left_reloc, right_reloc) {
if left.kind != right.kind {
return false;
}
let name_matches = left.target.name == right.target.name;
match (&left.target_section, &right.target_section) {
(Some(sl), Some(sr)) => {
// Match if section and name or address match
sl == sr && (name_matches || left.target.address == right.target.address)
}
(Some(_), None) => false,
(None, Some(_)) => {
// Match if possibly stripped weak symbol
name_matches && right.target.flags.0.contains(ObjSymbolFlags::Weak)
}
(None, None) => name_matches,
}
} else {
false
}
}
fn arg_eq(
left: &ObjInsArg,
right: &ObjInsArg,
left_diff: &ObjInsDiff,
right_diff: &ObjInsDiff,
) -> bool {
return match left {
ObjInsArg::Arg(l) => match right {
ObjInsArg::Arg(r) => match r {
Argument::BranchDest(_) => {
// Compare dest instruction idx after diffing
left_diff.branch_to.as_ref().map(|b| b.ins_idx)
== right_diff.branch_to.as_ref().map(|b| b.ins_idx)
}
_ => format!("{}", l) == format!("{}", r),
},
_ => false,
},
ObjInsArg::Reloc => {
matches!(right, ObjInsArg::Reloc)
&& reloc_eq(
left_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()),
right_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()),
)
}
ObjInsArg::RelocOffset => {
matches!(right, ObjInsArg::RelocOffset)
&& reloc_eq(
left_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()),
right_diff.ins.as_ref().and_then(|i| i.reloc.as_ref()),
)
}
};
}
#[derive(Default)]
struct InsDiffState {
diff_count: usize,
left_arg_idx: usize,
right_arg_idx: usize,
left_args_idx: BTreeMap<String, usize>,
right_args_idx: BTreeMap<String, usize>,
}
#[derive(Default)]
struct InsDiffResult {
kind: ObjInsDiffKind,
left_args_diff: Vec<Option<ObjInsArgDiff>>,
right_args_diff: Vec<Option<ObjInsArgDiff>>,
}
fn compare_ins(
left: &ObjInsDiff,
right: &ObjInsDiff,
state: &mut InsDiffState,
) -> Result<InsDiffResult> {
let mut result = InsDiffResult::default();
if let (Some(left_ins), Some(right_ins)) = (&left.ins, &right.ins) {
if left_ins.args.len() != right_ins.args.len() || left_ins.ins.op != right_ins.ins.op {
// Totally different op
result.kind = ObjInsDiffKind::Replace;
state.diff_count += 1;
return Ok(result);
}
if left_ins.mnemonic != right_ins.mnemonic {
// Same op but different mnemonic, still cmp args
result.kind = ObjInsDiffKind::OpMismatch;
state.diff_count += 1;
}
for (a, b) in left_ins.args.iter().zip(&right_ins.args) {
if arg_eq(a, b, left, right) {
result.left_args_diff.push(None);
result.right_args_diff.push(None);
} else {
if result.kind == ObjInsDiffKind::None {
result.kind = ObjInsDiffKind::ArgMismatch;
state.diff_count += 1;
}
let a_str = match a {
ObjInsArg::Arg(arg) => format!("{}", arg),
ObjInsArg::Reloc | ObjInsArg::RelocOffset => String::new(),
};
let a_diff = if let Some(idx) = state.left_args_idx.get(&a_str) {
ObjInsArgDiff { idx: *idx }
} else {
let idx = state.left_arg_idx;
state.left_args_idx.insert(a_str, idx);
state.left_arg_idx += 1;
ObjInsArgDiff { idx }
};
let b_str = match b {
ObjInsArg::Arg(arg) => format!("{}", arg),
ObjInsArg::Reloc | ObjInsArg::RelocOffset => String::new(),
};
let b_diff = if let Some(idx) = state.right_args_idx.get(&b_str) {
ObjInsArgDiff { idx: *idx }
} else {
let idx = state.right_arg_idx;
state.right_args_idx.insert(b_str, idx);
state.right_arg_idx += 1;
ObjInsArgDiff { idx }
};
result.left_args_diff.push(Some(a_diff));
result.right_args_diff.push(Some(b_diff));
}
}
} else if left.ins.is_some() {
result.kind = ObjInsDiffKind::Delete;
state.diff_count += 1;
} else {
result.kind = ObjInsDiffKind::Insert;
state.diff_count += 1;
}
Ok(result)
}
fn find_section<'a>(obj: &'a mut ObjInfo, name: &str) -> Option<&'a mut ObjSection> {
obj.sections.iter_mut().find(|s| s.name == name)
}
fn find_symbol<'a>(symbols: &'a mut [ObjSymbol], name: &str) -> Option<&'a mut ObjSymbol> {
symbols.iter_mut().find(|s| s.name == name)
}
pub fn diff_objs(left: &mut ObjInfo, right: &mut ObjInfo) -> Result<()> {
for left_section in &mut left.sections {
if let Some(right_section) = find_section(right, &left_section.name) {
for left_symbol in &mut left_section.symbols {
if let Some(right_symbol) =
find_symbol(&mut right_section.symbols, &left_symbol.name)
{
left_symbol.diff_symbol = Some(right_symbol.name.clone());
right_symbol.diff_symbol = Some(left_symbol.name.clone());
if left_section.kind == ObjSectionKind::Code {
diff_code(
&left_section.data,
&right_section.data,
left_symbol,
right_symbol,
&left_section.relocations,
&right_section.relocations,
)?;
}
}
}
}
}
Ok(())
}

253
src/editops.rs Normal file
View File

@ -0,0 +1,253 @@
/// Adapted from https://crates.io/crates/rapidfuzz
// Copyright 2020 maxbachmann
//
// Permission is hereby granted, free of charge, to any
// person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the
// Software without restriction, including without
// limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of
// the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice
// shall be included in all copies or substantial portions
// of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum LevEditType {
Keep,
Replace,
Insert,
Delete,
}
#[derive(Debug, PartialEq, Eq)]
pub struct LevEditOp {
pub op_type: LevEditType, /* editing operation type */
pub first_start: usize, /* source block position */
pub second_start: usize, /* destination position */
}
#[derive(Debug, PartialEq, Eq)]
pub struct LevMatchingBlock {
pub first_start: usize,
pub second_start: usize,
pub len: usize,
}
pub fn editops_find(query: &[u8], choice: &[u8]) -> Vec<LevEditOp> {
let string_affix = Affix::find(query, choice);
let first_string_len = string_affix.first_string_len;
let second_string_len = string_affix.second_string_len;
let prefix_len = string_affix.prefix_len;
let first_string = &query[prefix_len..prefix_len + first_string_len];
let second_string = &choice[prefix_len..prefix_len + second_string_len];
let matrix_columns = first_string_len + 1;
let matrix_rows = second_string_len + 1;
// TODO maybe use an actual matrix for readability
let mut cache_matrix: Vec<usize> = vec![0; matrix_rows * matrix_columns];
for (i, elem) in cache_matrix.iter_mut().enumerate().take(matrix_rows) {
*elem = i;
}
for i in 1..matrix_columns {
cache_matrix[matrix_rows * i] = i;
}
for (i, char1) in first_string.iter().enumerate() {
let mut prev = i * matrix_rows;
let current = prev + matrix_rows;
let mut x = i + 1;
for (p, char2p) in second_string.iter().enumerate() {
let mut c3 = cache_matrix[prev] + (char1 != char2p) as usize;
prev += 1;
x += 1;
if x >= c3 {
x = c3;
}
c3 = cache_matrix[prev] + 1;
if x > c3 {
x = c3;
}
cache_matrix[current + 1 + p] = x;
}
}
editops_from_cost_matrix(
first_string,
second_string,
matrix_columns,
matrix_rows,
prefix_len,
cache_matrix,
)
}
fn editops_from_cost_matrix(
string1: &[u8],
string2: &[u8],
len1: usize,
len2: usize,
prefix_len: usize,
cache_matrix: Vec<usize>,
) -> Vec<LevEditOp> {
let mut dir = 0;
let mut ops: Vec<LevEditOp> = vec![];
ops.reserve(cache_matrix[len1 * len2 - 1]);
let mut i = len1 - 1;
let mut j = len2 - 1;
let mut p = len1 * len2 - 1;
// let string1_chars: Vec<char> = string1.chars().collect();
// let string2_chars: Vec<char> = string2.chars().collect();
//TODO this is still pretty ugly
while i > 0 || j > 0 {
let current_value = cache_matrix[p];
let op_type;
if dir == -1 && j > 0 && current_value == cache_matrix[p - 1] + 1 {
op_type = LevEditType::Insert;
} else if dir == 1 && i > 0 && current_value == cache_matrix[p - len2] + 1 {
op_type = LevEditType::Delete;
} else if i > 0
&& j > 0
&& current_value == cache_matrix[p - len2 - 1]
&& string1[i - 1] == string2[j - 1]
{
op_type = LevEditType::Keep;
} else if i > 0 && j > 0 && current_value == cache_matrix[p - len2 - 1] + 1 {
op_type = LevEditType::Replace;
}
/* we can't turn directly from -1 to 1, in this case it would be better
* to go diagonally, but check it (dir == 0) */
else if dir == 0 && j > 0 && current_value == cache_matrix[p - 1] + 1 {
op_type = LevEditType::Insert;
} else if dir == 0 && i > 0 && current_value == cache_matrix[p - len2] + 1 {
op_type = LevEditType::Delete;
} else {
panic!("something went terribly wrong");
}
match op_type {
LevEditType::Insert => {
j -= 1;
p -= 1;
dir = -1;
}
LevEditType::Delete => {
i -= 1;
p -= len2;
dir = 1;
}
LevEditType::Replace => {
i -= 1;
j -= 1;
p -= len2 + 1;
dir = 0;
}
LevEditType::Keep => {
i -= 1;
j -= 1;
p -= len2 + 1;
dir = 0;
/* LevEditKeep does not has to be stored */
continue;
}
};
let edit_op =
LevEditOp { op_type, first_start: i + prefix_len, second_start: j + prefix_len };
ops.insert(0, edit_op);
}
ops
}
pub struct Affix {
pub prefix_len: usize,
pub first_string_len: usize,
pub second_string_len: usize,
}
impl Affix {
pub fn find(first_string: &[u8], second_string: &[u8]) -> Affix {
// remove common prefix and suffix (linear vs square runtime for levensthein)
let mut first_iter = first_string.iter();
let mut second_iter = second_string.iter();
let mut limit_start = 0;
let mut first_iter_char = first_iter.next();
let mut second_iter_char = second_iter.next();
while first_iter_char.is_some() && first_iter_char == second_iter_char {
first_iter_char = first_iter.next();
second_iter_char = second_iter.next();
limit_start += 1;
}
// save char since the iterator was already consumed
let first_iter_cache = first_iter_char;
let second_iter_cache = second_iter_char;
if second_iter_char.is_some() && first_iter_char.is_some() {
first_iter_char = first_iter.next_back();
second_iter_char = second_iter.next_back();
while first_iter_char.is_some() && first_iter_char == second_iter_char {
first_iter_char = first_iter.next_back();
second_iter_char = second_iter.next_back();
}
}
match (first_iter_char, second_iter_char) {
(None, None) => {
// characters might not match even though they were consumed
let remaining_char = (first_iter_cache != second_iter_cache) as usize;
Affix {
prefix_len: limit_start,
first_string_len: remaining_char,
second_string_len: remaining_char,
}
}
(None, _) => {
let remaining_char =
(first_iter_cache.is_some() && first_iter_cache != second_iter_char) as usize;
Affix {
prefix_len: limit_start,
first_string_len: remaining_char,
second_string_len: second_iter.count() + 1 + remaining_char,
}
}
(_, None) => {
let remaining_char =
(second_iter_cache.is_some() && second_iter_cache != first_iter_char) as usize;
Affix {
prefix_len: limit_start,
first_string_len: first_iter.count() + 1 + remaining_char,
second_string_len: remaining_char,
}
}
_ => Affix {
prefix_len: limit_start,
first_string_len: first_iter.count() + 2,
second_string_len: second_iter.count() + 2,
},
}
}
}

242
src/elf.rs Normal file
View File

@ -0,0 +1,242 @@
use std::{fs, path::Path};
use anyhow::{Context, Result};
use cwdemangle::demangle;
use flagset::Flags;
use object::{
Object, ObjectSection, ObjectSymbol, RelocationKind, RelocationTarget, SectionKind, SymbolKind,
SymbolSection,
};
use crate::obj::{
ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet,
ObjSymbolFlags,
};
fn to_obj_section_kind(kind: SectionKind) -> ObjSectionKind {
match kind {
SectionKind::Text => ObjSectionKind::Code,
SectionKind::Data | SectionKind::ReadOnlyData => ObjSectionKind::Data,
SectionKind::UninitializedData => ObjSectionKind::Bss,
_ => panic!("Unhandled section kind {:?}", kind),
}
}
fn to_obj_symbol(symbol: &object::Symbol<'_, '_>) -> Result<ObjSymbol> {
let mut name = symbol.name().context("Failed to process symbol name")?;
if name.is_empty() {
println!("Found empty sym: {:?}", symbol);
name = "?";
}
let mut flags = ObjSymbolFlagSet(ObjSymbolFlags::none());
if symbol.is_global() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Global);
}
if symbol.is_local() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Local);
}
if symbol.is_common() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Common);
}
if symbol.is_weak() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Weak);
}
Ok(ObjSymbol {
name: name.to_string(),
demangled_name: demangle(name),
address: symbol.address(),
size: symbol.size(),
size_known: symbol.size() != 0,
flags,
diff_symbol: None,
instructions: vec![],
match_percent: 0.0,
})
}
const R_PPC_ADDR16_LO: u32 = 4;
const R_PPC_ADDR16_HI: u32 = 5;
const R_PPC_ADDR16_HA: u32 = 6;
const R_PPC_REL24: u32 = 10;
const R_PPC_REL14: u32 = 11;
const R_PPC_EMB_SDA21: u32 = 109;
fn filter_sections(obj_file: &object::File<'_>) -> Result<Vec<ObjSection>> {
let mut result = Vec::<ObjSection>::new();
for section in obj_file.sections() {
if section.size() == 0 {
continue;
}
if section.kind() != SectionKind::Text
&& section.kind() != SectionKind::Data
&& section.kind() != SectionKind::ReadOnlyData
&& section.kind() != SectionKind::UninitializedData
{
continue;
}
let name = section.name().context("Failed to process section name")?;
let data = section.data().context("Failed to read section data")?;
result.push(ObjSection {
name: name.to_string(),
kind: to_obj_section_kind(section.kind()),
address: section.address(),
size: section.size(),
data: data.to_vec(),
index: section.index().0,
symbols: Vec::new(),
relocations: Vec::new(),
});
}
result.sort_by(|a, b| a.name.cmp(&b.name));
Ok(result)
}
fn symbols_by_section(obj_file: &object::File<'_>, section: &ObjSection) -> Result<Vec<ObjSymbol>> {
let mut result = Vec::<ObjSymbol>::new();
for symbol in obj_file.symbols() {
if symbol.kind() == SymbolKind::Section {
continue;
}
if let Some(index) = symbol.section().index() {
if index.0 == section.index {
if symbol.is_local() && section.kind == ObjSectionKind::Code {
// TODO strip local syms in diff?
let name = symbol.name().context("Failed to process symbol name")?;
if name.starts_with("lbl_") {
continue;
}
}
result.push(to_obj_symbol(&