diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 78819f0..7111341 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -1,11 +1,13 @@ -use std::{borrow::Cow, collections::BTreeMap}; +use std::{borrow::Cow, collections::BTreeMap, ffi::CStr}; use anyhow::{bail, Result}; +use byteorder::ByteOrder; use object::{Architecture, File, Object, ObjectSymbol, Relocation, RelocationFlags, Symbol}; use crate::{ diff::DiffObjConfig, obj::{ObjIns, ObjReloc, ObjSection}, + util::ReallySigned, }; #[cfg(feature = "arm")] @@ -17,6 +19,97 @@ pub mod ppc; #[cfg(feature = "x86")] pub mod x86; +/// Represents the type of data associated with an instruction +pub enum DataType { + Int8, + Int16, + Int32, + Int64, + Int128, + Float, + Double, + Bytes, + String, +} + +impl DataType { + pub fn display_bytes(&self, bytes: &[u8]) -> Option { + if self.required_len().is_some_and(|l| bytes.len() < l) { + return None; + } + + match self { + DataType::Int8 => { + let i = i8::from_ne_bytes(bytes.try_into().unwrap()); + if i < 0 { + format!("Int8: {:#x} ({:#x})", i, ReallySigned(i)) + } else { + format!("Int8: {:#x}", i) + } + } + DataType::Int16 => { + let i = Endian::read_i16(bytes); + if i < 0 { + format!("Int16: {:#x} ({:#x})", i, ReallySigned(i)) + } else { + format!("Int16: {:#x}", i) + } + } + DataType::Int32 => { + let i = Endian::read_i32(bytes); + if i < 0 { + format!("Int32: {:#x} ({:#x})", i, ReallySigned(i)) + } else { + format!("Int32: {:#x}", i) + } + } + DataType::Int64 => { + let i = Endian::read_i64(bytes); + if i < 0 { + format!("Int64: {:#x} ({:#x})", i, ReallySigned(i)) + } else { + format!("Int64: {:#x}", i) + } + } + DataType::Int128 => { + let i = Endian::read_i128(bytes); + if i < 0 { + format!("Int128: {:#x} ({:#x})", i, ReallySigned(i)) + } else { + format!("Int128: {:#x}", i) + } + } + DataType::Float => { + format!("Float: {}", Endian::read_f32(bytes)) + } + DataType::Double => { + format!("Double: {}", Endian::read_f64(bytes)) + } + DataType::Bytes => { + format!("Bytes: {:#?}", bytes) + } + DataType::String => { + format!("String: {:?}", CStr::from_bytes_until_nul(bytes).ok()?) + } + } + .into() + } + + fn required_len(&self) -> Option { + match self { + DataType::Int8 => Some(1), + DataType::Int16 => Some(2), + DataType::Int32 => Some(4), + DataType::Int64 => Some(8), + DataType::Int128 => Some(16), + DataType::Float => Some(4), + DataType::Double => Some(8), + DataType::Bytes => None, + DataType::String => None, + } + } +} + pub trait ObjArch: Send + Sync { fn process_code( &self, @@ -42,6 +135,12 @@ pub trait ObjArch: Send + Sync { fn symbol_address(&self, symbol: &Symbol) -> u64 { symbol.address() } + fn guess_data_type(&self, _instruction: &ObjIns) -> Option { None } + + fn display_data_type(&self, _ty: DataType, bytes: &[u8]) -> Option { + Some(format!("Bytes: {:#x?}", bytes)) + } + // Downcast methods #[cfg(feature = "ppc")] fn ppc(&self) -> Option<&ppc::ObjArchPpc> { None } diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index a9fffa5..67f426a 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -1,15 +1,16 @@ use std::{borrow::Cow, collections::BTreeMap}; use anyhow::{bail, ensure, Result}; +use byteorder::BigEndian; use cwextab::{decode_extab, ExceptionTableData}; use object::{ elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget, Symbol, SymbolKind, }; -use ppc750cl::{Argument, InsIter, GPR}; +use ppc750cl::{Argument, InsIter, Opcode, GPR}; use crate::{ - arch::{ObjArch, ProcessCodeResult}, + arch::{DataType, ObjArch, ProcessCodeResult}, diff::DiffObjConfig, obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjSection, ObjSymbol}, }; @@ -186,6 +187,36 @@ impl ObjArch for ObjArchPpc { } } + fn guess_data_type(&self, instruction: &ObjIns) -> Option { + // Always shows the first string of the table. Not ideal, but it's really hard to find + // the actual string being referenced. + if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) { + return Some(DataType::String); + } + + // SAFETY: ppc750cl::Opcode is repr(u8) and op is originally obtained on PPC from casting + // an Opcode to a u8 so we know it's a valid value for Opcode. + match unsafe { std::mem::transmute::(instruction.op as u8) } { + Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), + Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), + Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), + Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), + Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), + Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), + + Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), + Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), + Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), + Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), + Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), + _ => None, + } + } + + fn display_data_type(&self, ty: DataType, bytes: &[u8]) -> Option { + ty.display_bytes::(bytes) + } + fn ppc(&self) -> Option<&ObjArchPpc> { Some(self) } } diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 8825992..6e96d85 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -126,6 +126,7 @@ pub struct ObjSymbol { pub virtual_address: Option, /// Original index in object symbol table pub original_index: Option, + pub bytes: Vec, } pub struct ObjInfo { diff --git a/objdiff-core/src/obj/read.rs b/objdiff-core/src/obj/read.rs index 04665c1..ad9804b 100644 --- a/objdiff-core/src/obj/read.rs +++ b/objdiff-core/src/obj/read.rs @@ -78,6 +78,16 @@ fn to_obj_symbol( let virtual_address = split_meta .and_then(|m| m.virtual_addresses.as_ref()) .and_then(|v| v.get(symbol.index().0).cloned()); + + let bytes = symbol + .section_index() + .and_then(|idx| obj_file.section_by_index(idx).ok()) + .and_then(|section| section.data().ok()) + .and_then(|data| { + data.get(section_address as usize..(section_address + symbol.size()) as usize) + }) + .unwrap_or(&[]); + Ok(ObjSymbol { name: name.to_string(), demangled_name, @@ -89,6 +99,7 @@ fn to_obj_symbol( addend, virtual_address, original_index: Some(symbol.index().0), + bytes: bytes.to_vec(), }) } @@ -179,6 +190,7 @@ fn symbols_by_section( addend: 0, virtual_address: None, original_index: None, + bytes: Vec::new(), }); } Ok(result) @@ -239,6 +251,7 @@ fn find_section_symbol( addend: offset_addr as i64, virtual_address: None, original_index: None, + bytes: Vec::new(), }) } @@ -521,6 +534,7 @@ fn update_combined_symbol(symbol: ObjSymbol, address_change: i64) -> Result