mirror of
https://github.com/encounter/objdiff.git
synced 2025-12-18 17:35:24 +00:00
492 lines
16 KiB
Rust
492 lines
16 KiB
Rust
use alloc::{
|
|
borrow::Cow,
|
|
boxed::Box,
|
|
format,
|
|
string::{String, ToString},
|
|
vec::Vec,
|
|
};
|
|
use core::{
|
|
any::Any,
|
|
ffi::CStr,
|
|
fmt::{self, Debug},
|
|
};
|
|
|
|
use anyhow::{Result, bail};
|
|
use encoding_rs::SHIFT_JIS;
|
|
use object::Endian as _;
|
|
|
|
use crate::{
|
|
diff::{
|
|
DiffObjConfig, DiffSide,
|
|
display::{ContextItem, HoverItem, InstructionPart},
|
|
},
|
|
obj::{
|
|
FlowAnalysisResult, InstructionArg, InstructionRef, Object, ParsedInstruction, Relocation,
|
|
RelocationFlags, ResolvedInstructionRef, ResolvedSymbol, Section, Symbol, SymbolFlagSet,
|
|
SymbolKind,
|
|
},
|
|
util::ReallySigned,
|
|
};
|
|
|
|
#[cfg(feature = "arm")]
|
|
pub mod arm;
|
|
#[cfg(feature = "arm64")]
|
|
pub mod arm64;
|
|
#[cfg(feature = "mips")]
|
|
pub mod mips;
|
|
#[cfg(feature = "ppc")]
|
|
pub mod ppc;
|
|
#[cfg(feature = "superh")]
|
|
pub mod superh;
|
|
#[cfg(feature = "x86")]
|
|
pub mod x86;
|
|
|
|
pub const OPCODE_INVALID: u16 = u16::MAX;
|
|
pub const OPCODE_DATA: u16 = u16::MAX - 1;
|
|
|
|
/// Represents the type of data associated with an instruction
|
|
#[derive(PartialEq)]
|
|
pub enum DataType {
|
|
Int8,
|
|
Int16,
|
|
Int32,
|
|
Int64,
|
|
Float,
|
|
Double,
|
|
Bytes,
|
|
String,
|
|
}
|
|
|
|
impl fmt::Display for DataType {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
f.write_str(match self {
|
|
DataType::Int8 => "Int8",
|
|
DataType::Int16 => "Int16",
|
|
DataType::Int32 => "Int32",
|
|
DataType::Int64 => "Int64",
|
|
DataType::Float => "Float",
|
|
DataType::Double => "Double",
|
|
DataType::Bytes => "Bytes",
|
|
DataType::String => "String",
|
|
})
|
|
}
|
|
}
|
|
|
|
impl DataType {
|
|
pub fn display_labels(&self, endian: object::Endianness, bytes: &[u8]) -> Vec<String> {
|
|
let mut strs = Vec::new();
|
|
for (literal, label_override) in self.display_literals(endian, bytes) {
|
|
let label = label_override.unwrap_or_else(|| self.to_string());
|
|
strs.push(format!("{label}: {literal}"))
|
|
}
|
|
strs
|
|
}
|
|
|
|
pub fn display_literals(
|
|
&self,
|
|
endian: object::Endianness,
|
|
bytes: &[u8],
|
|
) -> Vec<(String, Option<String>)> {
|
|
let mut strs = Vec::new();
|
|
if self.required_len().is_some_and(|l| bytes.len() < l) {
|
|
log::warn!(
|
|
"Failed to display a symbol value for a symbol whose size is too small for instruction referencing it."
|
|
);
|
|
return strs;
|
|
}
|
|
let mut bytes = bytes;
|
|
if self.required_len().is_some_and(|l| bytes.len() > l) {
|
|
// If the symbol's size is larger a single instance of this data type, we take just the
|
|
// bytes necessary for one of them in order to display the first element of the array.
|
|
bytes = &bytes[0..self.required_len().unwrap()];
|
|
// TODO: Attempt to interpret large symbols as arrays of a smaller type and show all
|
|
// elements of the array instead. https://github.com/encounter/objdiff/issues/124
|
|
// However, note that the stride of an array can not always be determined just by the
|
|
// data type guessed by the single instruction accessing it. There can also be arrays of
|
|
// structs that contain multiple elements of different types, so if other elements after
|
|
// the first one were to be displayed in this manner, they may be inaccurate.
|
|
}
|
|
|
|
match self {
|
|
DataType::Int8 => {
|
|
let i = i8::from_ne_bytes(bytes.try_into().unwrap());
|
|
strs.push((format!("{i:#x}"), None));
|
|
|
|
if i < 0 {
|
|
strs.push((format!("{:#x}", ReallySigned(i)), None));
|
|
}
|
|
}
|
|
DataType::Int16 => {
|
|
let i = endian.read_i16_bytes(bytes.try_into().unwrap());
|
|
strs.push((format!("{i:#x}"), None));
|
|
|
|
if i < 0 {
|
|
strs.push((format!("{:#x}", ReallySigned(i)), None));
|
|
}
|
|
}
|
|
DataType::Int32 => {
|
|
let i = endian.read_i32_bytes(bytes.try_into().unwrap());
|
|
strs.push((format!("{i:#x}"), None));
|
|
|
|
if i < 0 {
|
|
strs.push((format!("{:#x}", ReallySigned(i)), None));
|
|
}
|
|
}
|
|
DataType::Int64 => {
|
|
let i = endian.read_i64_bytes(bytes.try_into().unwrap());
|
|
strs.push((format!("{i:#x}"), None));
|
|
|
|
if i < 0 {
|
|
strs.push((format!("{:#x}", ReallySigned(i)), None));
|
|
}
|
|
}
|
|
DataType::Float => {
|
|
let bytes: [u8; 4] = bytes.try_into().unwrap();
|
|
strs.push((
|
|
format!("{:?}f", match endian {
|
|
object::Endianness::Little => f32::from_le_bytes(bytes),
|
|
object::Endianness::Big => f32::from_be_bytes(bytes),
|
|
}),
|
|
None,
|
|
));
|
|
}
|
|
DataType::Double => {
|
|
let bytes: [u8; 8] = bytes.try_into().unwrap();
|
|
strs.push((
|
|
format!("{:?}", match endian {
|
|
object::Endianness::Little => f64::from_le_bytes(bytes),
|
|
object::Endianness::Big => f64::from_be_bytes(bytes),
|
|
}),
|
|
None,
|
|
));
|
|
}
|
|
DataType::Bytes => {
|
|
strs.push((format!("{bytes:#?}"), None));
|
|
}
|
|
DataType::String => {
|
|
if let Ok(cstr) = CStr::from_bytes_until_nul(bytes) {
|
|
strs.push((format!("{cstr:?}"), None));
|
|
}
|
|
if let Some(nul_idx) = bytes.iter().position(|&c| c == b'\0') {
|
|
let (cow, _, had_errors) = SHIFT_JIS.decode(&bytes[..nul_idx]);
|
|
if !had_errors {
|
|
let str = format!("{cow:?}");
|
|
// Only add the Shift JIS string if it's different from the ASCII string.
|
|
if !strs.iter().any(|x| x.0 == str) {
|
|
strs.push((str, Some("Shift JIS".into())));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
strs
|
|
}
|
|
|
|
fn required_len(&self) -> Option<usize> {
|
|
match self {
|
|
DataType::Int8 => Some(1),
|
|
DataType::Int16 => Some(2),
|
|
DataType::Int32 => Some(4),
|
|
DataType::Int64 => Some(8),
|
|
DataType::Float => Some(4),
|
|
DataType::Double => Some(8),
|
|
DataType::Bytes => None,
|
|
DataType::String => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl dyn Arch {
|
|
/// Generate a list of instructions references (offset, size, opcode) from the given code.
|
|
///
|
|
/// See [`scan_instructions_internal`] for more details.
|
|
pub fn scan_instructions(
|
|
&self,
|
|
resolved: ResolvedSymbol,
|
|
diff_config: &DiffObjConfig,
|
|
) -> Result<Vec<InstructionRef>> {
|
|
let mut result = self.scan_instructions_internal(
|
|
resolved.symbol.address,
|
|
resolved.data,
|
|
resolved.section_index,
|
|
&resolved.section.relocations,
|
|
diff_config,
|
|
)?;
|
|
|
|
let function_start = resolved.symbol.address;
|
|
let function_end = function_start + resolved.symbol.size;
|
|
|
|
// Remove any branch destinations that are outside the function range
|
|
for ins in result.iter_mut() {
|
|
if let Some(branch_dest) = ins.branch_dest
|
|
&& (branch_dest < function_start || branch_dest >= function_end)
|
|
{
|
|
ins.branch_dest = None;
|
|
}
|
|
}
|
|
|
|
// Resolve relocation targets within the same function to branch destinations
|
|
let mut ins_iter = result.iter_mut().peekable();
|
|
'outer: for reloc in resolved
|
|
.section
|
|
.relocations
|
|
.iter()
|
|
.skip_while(|r| r.address < function_start)
|
|
.take_while(|r| r.address < function_end)
|
|
{
|
|
let ins = loop {
|
|
let Some(ins) = ins_iter.peek_mut() else {
|
|
break 'outer;
|
|
};
|
|
if reloc.address < ins.address {
|
|
continue 'outer;
|
|
}
|
|
let ins = ins_iter.next().unwrap();
|
|
if reloc.address >= ins.address && reloc.address < ins.address + ins.size as u64 {
|
|
break ins;
|
|
}
|
|
};
|
|
// Clear existing branch destination for instructions with relocations
|
|
ins.branch_dest = None;
|
|
let Some(target) = resolved.obj.symbols.get(reloc.target_symbol) else {
|
|
continue;
|
|
};
|
|
if target.section != Some(resolved.section_index) {
|
|
continue;
|
|
}
|
|
let Some(target_address) = target.address.checked_add_signed(reloc.addend) else {
|
|
continue;
|
|
};
|
|
// If the target address is within the function range, set it as a branch destination
|
|
if target_address >= function_start && target_address < function_end {
|
|
ins.branch_dest = Some(target_address);
|
|
}
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Parse an instruction to gather its mnemonic and arguments for more detailed comparison.
|
|
///
|
|
/// This is called only when we need to compare the arguments of an instruction.
|
|
pub fn process_instruction(
|
|
&self,
|
|
resolved: ResolvedInstructionRef,
|
|
diff_config: &DiffObjConfig,
|
|
) -> Result<ParsedInstruction> {
|
|
let mut mnemonic = None;
|
|
let mut args = Vec::with_capacity(8);
|
|
let mut relocation_emitted = false;
|
|
self.display_instruction(resolved, diff_config, &mut |part| {
|
|
match part {
|
|
InstructionPart::Opcode(m, _) => mnemonic = Some(Cow::Owned(m.into_owned())),
|
|
InstructionPart::Arg(arg) => {
|
|
if arg == InstructionArg::Reloc {
|
|
relocation_emitted = true;
|
|
// If the relocation was resolved to a branch destination, emit that instead.
|
|
if let Some(dest) = resolved.ins_ref.branch_dest {
|
|
args.push(InstructionArg::BranchDest(dest));
|
|
return Ok(());
|
|
}
|
|
}
|
|
args.push(arg.into_static());
|
|
}
|
|
_ => {}
|
|
}
|
|
Ok(())
|
|
})?;
|
|
// If the instruction has a relocation, but we didn't format it in the display, add it to
|
|
// the end of the arguments list.
|
|
if resolved.relocation.is_some() && !relocation_emitted {
|
|
args.push(InstructionArg::Reloc);
|
|
}
|
|
Ok(ParsedInstruction {
|
|
ins_ref: resolved.ins_ref,
|
|
mnemonic: mnemonic.unwrap_or_default(),
|
|
args,
|
|
})
|
|
}
|
|
}
|
|
|
|
pub trait Arch: Any + Debug + Send + Sync {
|
|
/// Finishes arch-specific initialization that must be done after sections have been combined.
|
|
fn post_init(&mut self, _sections: &[Section], _symbols: &[Symbol]) {}
|
|
|
|
/// Generate a list of instructions references (offset, size, opcode) from the given code.
|
|
///
|
|
/// The opcode IDs are used to generate the initial diff. Implementations should do as little
|
|
/// parsing as possible here: just enough to identify the base instruction opcode, size, and
|
|
/// possible branch destination (for visual representation). As needed, instructions are parsed
|
|
/// via `process_instruction` to compare their arguments.
|
|
fn scan_instructions_internal(
|
|
&self,
|
|
address: u64,
|
|
code: &[u8],
|
|
section_index: usize,
|
|
relocations: &[Relocation],
|
|
diff_config: &DiffObjConfig,
|
|
) -> Result<Vec<InstructionRef>>;
|
|
|
|
/// Format an instruction for display.
|
|
///
|
|
/// Implementations should call the callback for each part of the instruction: usually the
|
|
/// mnemonic and arguments, plus any separators and visual formatting.
|
|
fn display_instruction(
|
|
&self,
|
|
resolved: ResolvedInstructionRef,
|
|
diff_config: &DiffObjConfig,
|
|
cb: &mut dyn FnMut(InstructionPart) -> Result<()>,
|
|
) -> Result<()>;
|
|
|
|
/// Generate a list of fake relocations from the given code that represent pooled data accesses.
|
|
fn generate_pooled_relocations(
|
|
&self,
|
|
_address: u64,
|
|
_code: &[u8],
|
|
_relocations: &[Relocation],
|
|
_symbols: &[Symbol],
|
|
) -> Vec<Relocation> {
|
|
Vec::new()
|
|
}
|
|
|
|
// Perform detailed data flow analysis
|
|
fn data_flow_analysis(
|
|
&self,
|
|
_obj: &Object,
|
|
_symbol: &Symbol,
|
|
_code: &[u8],
|
|
_relocations: &[Relocation],
|
|
) -> Option<Box<dyn FlowAnalysisResult>> {
|
|
None
|
|
}
|
|
|
|
fn relocation_override(
|
|
&self,
|
|
_file: &object::File<'_>,
|
|
_section: &object::Section,
|
|
_address: u64,
|
|
_relocation: &object::Relocation,
|
|
) -> Result<Option<RelocationOverride>> {
|
|
Ok(None)
|
|
}
|
|
|
|
fn reloc_name(&self, _flags: RelocationFlags) -> Option<&'static str> { None }
|
|
|
|
fn data_reloc_size(&self, flags: RelocationFlags) -> usize;
|
|
|
|
fn symbol_address(&self, address: u64, _kind: SymbolKind) -> u64 { address }
|
|
|
|
fn extra_symbol_flags(&self, _symbol: &object::Symbol) -> SymbolFlagSet {
|
|
SymbolFlagSet::default()
|
|
}
|
|
|
|
fn guess_data_type(
|
|
&self,
|
|
_resolved: ResolvedInstructionRef,
|
|
_bytes: &[u8],
|
|
) -> Option<DataType> {
|
|
None
|
|
}
|
|
|
|
fn symbol_hover(&self, _obj: &Object, _symbol_index: usize) -> Vec<HoverItem> { Vec::new() }
|
|
|
|
fn symbol_context(&self, _obj: &Object, _symbol_index: usize) -> Vec<ContextItem> { Vec::new() }
|
|
|
|
fn instruction_hover(
|
|
&self,
|
|
_obj: &Object,
|
|
_resolved: ResolvedInstructionRef,
|
|
) -> Vec<HoverItem> {
|
|
Vec::new()
|
|
}
|
|
|
|
fn instruction_context(
|
|
&self,
|
|
_obj: &Object,
|
|
_resolved: ResolvedInstructionRef,
|
|
) -> Vec<ContextItem> {
|
|
Vec::new()
|
|
}
|
|
|
|
fn infer_function_size(
|
|
&self,
|
|
symbol: &Symbol,
|
|
_section: &Section,
|
|
next_address: u64,
|
|
) -> Result<u64> {
|
|
Ok(next_address.saturating_sub(symbol.address))
|
|
}
|
|
}
|
|
|
|
pub fn new_arch(object: &object::File, diff_side: DiffSide) -> Result<Box<dyn Arch>> {
|
|
use object::Object as _;
|
|
// Avoid unused warnings on non-mips builds
|
|
let _ = diff_side;
|
|
|
|
Ok(match object.architecture() {
|
|
#[cfg(feature = "ppc")]
|
|
object::Architecture::PowerPc | object::Architecture::PowerPc64 => {
|
|
Box::new(ppc::ArchPpc::new(object)?)
|
|
}
|
|
#[cfg(feature = "mips")]
|
|
object::Architecture::Mips => Box::new(mips::ArchMips::new(object, diff_side)?),
|
|
#[cfg(feature = "x86")]
|
|
object::Architecture::I386 | object::Architecture::X86_64 => {
|
|
Box::new(x86::ArchX86::new(object)?)
|
|
}
|
|
#[cfg(feature = "arm")]
|
|
object::Architecture::Arm => Box::new(arm::ArchArm::new(object)?),
|
|
#[cfg(feature = "arm64")]
|
|
object::Architecture::Aarch64 => Box::new(arm64::ArchArm64::new(object)?),
|
|
#[cfg(feature = "superh")]
|
|
object::Architecture::SuperH => Box::new(superh::ArchSuperH::new(object)?),
|
|
arch => bail!("Unsupported architecture: {arch:?}"),
|
|
})
|
|
}
|
|
|
|
#[derive(Debug, Default)]
|
|
pub struct ArchDummy {}
|
|
|
|
impl ArchDummy {
|
|
pub fn new() -> Box<Self> { Box::new(Self {}) }
|
|
}
|
|
|
|
impl Arch for ArchDummy {
|
|
fn scan_instructions_internal(
|
|
&self,
|
|
_address: u64,
|
|
_code: &[u8],
|
|
_section_index: usize,
|
|
_relocations: &[Relocation],
|
|
_diff_config: &DiffObjConfig,
|
|
) -> Result<Vec<InstructionRef>> {
|
|
Ok(Vec::new())
|
|
}
|
|
|
|
fn display_instruction(
|
|
&self,
|
|
_resolved: ResolvedInstructionRef,
|
|
_diff_config: &DiffObjConfig,
|
|
_cb: &mut dyn FnMut(InstructionPart) -> Result<()>,
|
|
) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
fn data_reloc_size(&self, _flags: RelocationFlags) -> usize { 0 }
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum RelocationOverrideTarget {
|
|
Keep,
|
|
Skip,
|
|
Symbol(object::SymbolIndex),
|
|
Section(object::SectionIndex),
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub struct RelocationOverride {
|
|
pub target: RelocationOverrideTarget,
|
|
pub addend: i64,
|
|
}
|