use alloc::{ borrow::Cow, boxed::Box, format, string::{String, ToString}, vec::Vec, }; use core::{ any::Any, ffi::CStr, fmt::{self, Debug}, }; use anyhow::{Result, bail}; use encoding_rs::SHIFT_JIS; use object::Endian as _; use crate::{ diff::{ DiffObjConfig, DiffSide, display::{ContextItem, HoverItem, InstructionPart}, }, obj::{ FlowAnalysisResult, InstructionArg, InstructionRef, Object, ParsedInstruction, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedSymbol, Section, Symbol, SymbolFlagSet, SymbolKind, }, util::ReallySigned, }; #[cfg(feature = "arm")] pub mod arm; #[cfg(feature = "arm64")] pub mod arm64; #[cfg(feature = "mips")] pub mod mips; #[cfg(feature = "ppc")] pub mod ppc; #[cfg(feature = "superh")] pub mod superh; #[cfg(feature = "x86")] pub mod x86; pub const OPCODE_INVALID: u16 = u16::MAX; pub const OPCODE_DATA: u16 = u16::MAX - 1; /// Represents the type of data associated with an instruction #[derive(PartialEq)] pub enum DataType { Int8, Int16, Int32, Int64, Float, Double, Bytes, String, } impl fmt::Display for DataType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(match self { DataType::Int8 => "Int8", DataType::Int16 => "Int16", DataType::Int32 => "Int32", DataType::Int64 => "Int64", DataType::Float => "Float", DataType::Double => "Double", DataType::Bytes => "Bytes", DataType::String => "String", }) } } impl DataType { pub fn display_labels(&self, endian: object::Endianness, bytes: &[u8]) -> Vec { let mut strs = Vec::new(); for (literal, label_override) in self.display_literals(endian, bytes) { let label = label_override.unwrap_or_else(|| self.to_string()); strs.push(format!("{label}: {literal}")) } strs } pub fn display_literals( &self, endian: object::Endianness, bytes: &[u8], ) -> Vec<(String, Option)> { let mut strs = Vec::new(); if self.required_len().is_some_and(|l| bytes.len() < l) { log::warn!( "Failed to display a symbol value for a symbol whose size is too small for instruction referencing it." ); return strs; } let mut bytes = bytes; if self.required_len().is_some_and(|l| bytes.len() > l) { // If the symbol's size is larger a single instance of this data type, we take just the // bytes necessary for one of them in order to display the first element of the array. bytes = &bytes[0..self.required_len().unwrap()]; // TODO: Attempt to interpret large symbols as arrays of a smaller type and show all // elements of the array instead. https://github.com/encounter/objdiff/issues/124 // However, note that the stride of an array can not always be determined just by the // data type guessed by the single instruction accessing it. There can also be arrays of // structs that contain multiple elements of different types, so if other elements after // the first one were to be displayed in this manner, they may be inaccurate. } match self { DataType::Int8 => { let i = i8::from_ne_bytes(bytes.try_into().unwrap()); strs.push((format!("{i:#x}"), None)); if i < 0 { strs.push((format!("{:#x}", ReallySigned(i)), None)); } } DataType::Int16 => { let i = endian.read_i16_bytes(bytes.try_into().unwrap()); strs.push((format!("{i:#x}"), None)); if i < 0 { strs.push((format!("{:#x}", ReallySigned(i)), None)); } } DataType::Int32 => { let i = endian.read_i32_bytes(bytes.try_into().unwrap()); strs.push((format!("{i:#x}"), None)); if i < 0 { strs.push((format!("{:#x}", ReallySigned(i)), None)); } } DataType::Int64 => { let i = endian.read_i64_bytes(bytes.try_into().unwrap()); strs.push((format!("{i:#x}"), None)); if i < 0 { strs.push((format!("{:#x}", ReallySigned(i)), None)); } } DataType::Float => { let bytes: [u8; 4] = bytes.try_into().unwrap(); strs.push(( format!("{:?}f", match endian { object::Endianness::Little => f32::from_le_bytes(bytes), object::Endianness::Big => f32::from_be_bytes(bytes), }), None, )); } DataType::Double => { let bytes: [u8; 8] = bytes.try_into().unwrap(); strs.push(( format!("{:?}", match endian { object::Endianness::Little => f64::from_le_bytes(bytes), object::Endianness::Big => f64::from_be_bytes(bytes), }), None, )); } DataType::Bytes => { strs.push((format!("{bytes:#?}"), None)); } DataType::String => { if let Ok(cstr) = CStr::from_bytes_until_nul(bytes) { strs.push((format!("{cstr:?}"), None)); } if let Some(nul_idx) = bytes.iter().position(|&c| c == b'\0') { let (cow, _, had_errors) = SHIFT_JIS.decode(&bytes[..nul_idx]); if !had_errors { let str = format!("{cow:?}"); // Only add the Shift JIS string if it's different from the ASCII string. if !strs.iter().any(|x| x.0 == str) { strs.push((str, Some("Shift JIS".into()))); } } } } } strs } fn required_len(&self) -> Option { match self { DataType::Int8 => Some(1), DataType::Int16 => Some(2), DataType::Int32 => Some(4), DataType::Int64 => Some(8), DataType::Float => Some(4), DataType::Double => Some(8), DataType::Bytes => None, DataType::String => None, } } } impl dyn Arch { /// Generate a list of instructions references (offset, size, opcode) from the given code. /// /// See [`scan_instructions_internal`] for more details. pub fn scan_instructions( &self, resolved: ResolvedSymbol, diff_config: &DiffObjConfig, ) -> Result> { let mut result = self.scan_instructions_internal( resolved.symbol.address, resolved.data, resolved.section_index, &resolved.section.relocations, diff_config, )?; let function_start = resolved.symbol.address; let function_end = function_start + resolved.symbol.size; // Remove any branch destinations that are outside the function range for ins in result.iter_mut() { if let Some(branch_dest) = ins.branch_dest && (branch_dest < function_start || branch_dest >= function_end) { ins.branch_dest = None; } } // Resolve relocation targets within the same function to branch destinations let mut ins_iter = result.iter_mut().peekable(); 'outer: for reloc in resolved .section .relocations .iter() .skip_while(|r| r.address < function_start) .take_while(|r| r.address < function_end) { let ins = loop { let Some(ins) = ins_iter.peek_mut() else { break 'outer; }; if reloc.address < ins.address { continue 'outer; } let ins = ins_iter.next().unwrap(); if reloc.address >= ins.address && reloc.address < ins.address + ins.size as u64 { break ins; } }; // Clear existing branch destination for instructions with relocations ins.branch_dest = None; let Some(target) = resolved.obj.symbols.get(reloc.target_symbol) else { continue; }; if target.section != Some(resolved.section_index) { continue; } let Some(target_address) = target.address.checked_add_signed(reloc.addend) else { continue; }; // If the target address is within the function range, set it as a branch destination if target_address >= function_start && target_address < function_end { ins.branch_dest = Some(target_address); } } Ok(result) } /// Parse an instruction to gather its mnemonic and arguments for more detailed comparison. /// /// This is called only when we need to compare the arguments of an instruction. pub fn process_instruction( &self, resolved: ResolvedInstructionRef, diff_config: &DiffObjConfig, ) -> Result { let mut mnemonic = None; let mut args = Vec::with_capacity(8); let mut relocation_emitted = false; self.display_instruction(resolved, diff_config, &mut |part| { match part { InstructionPart::Opcode(m, _) => mnemonic = Some(Cow::Owned(m.into_owned())), InstructionPart::Arg(arg) => { if arg == InstructionArg::Reloc { relocation_emitted = true; // If the relocation was resolved to a branch destination, emit that instead. if let Some(dest) = resolved.ins_ref.branch_dest { args.push(InstructionArg::BranchDest(dest)); return Ok(()); } } args.push(arg.into_static()); } _ => {} } Ok(()) })?; // If the instruction has a relocation, but we didn't format it in the display, add it to // the end of the arguments list. if resolved.relocation.is_some() && !relocation_emitted { args.push(InstructionArg::Reloc); } Ok(ParsedInstruction { ins_ref: resolved.ins_ref, mnemonic: mnemonic.unwrap_or_default(), args, }) } } pub trait Arch: Any + Debug + Send + Sync { /// Finishes arch-specific initialization that must be done after sections have been combined. fn post_init(&mut self, _sections: &[Section], _symbols: &[Symbol]) {} /// Generate a list of instructions references (offset, size, opcode) from the given code. /// /// The opcode IDs are used to generate the initial diff. Implementations should do as little /// parsing as possible here: just enough to identify the base instruction opcode, size, and /// possible branch destination (for visual representation). As needed, instructions are parsed /// via `process_instruction` to compare their arguments. fn scan_instructions_internal( &self, address: u64, code: &[u8], section_index: usize, relocations: &[Relocation], diff_config: &DiffObjConfig, ) -> Result>; /// Format an instruction for display. /// /// Implementations should call the callback for each part of the instruction: usually the /// mnemonic and arguments, plus any separators and visual formatting. fn display_instruction( &self, resolved: ResolvedInstructionRef, diff_config: &DiffObjConfig, cb: &mut dyn FnMut(InstructionPart) -> Result<()>, ) -> Result<()>; /// Generate a list of fake relocations from the given code that represent pooled data accesses. fn generate_pooled_relocations( &self, _address: u64, _code: &[u8], _relocations: &[Relocation], _symbols: &[Symbol], ) -> Vec { Vec::new() } // Perform detailed data flow analysis fn data_flow_analysis( &self, _obj: &Object, _symbol: &Symbol, _code: &[u8], _relocations: &[Relocation], ) -> Option> { None } fn relocation_override( &self, _file: &object::File<'_>, _section: &object::Section, _address: u64, _relocation: &object::Relocation, ) -> Result> { Ok(None) } fn reloc_name(&self, _flags: RelocationFlags) -> Option<&'static str> { None } fn data_reloc_size(&self, flags: RelocationFlags) -> usize; fn symbol_address(&self, address: u64, _kind: SymbolKind) -> u64 { address } fn extra_symbol_flags(&self, _symbol: &object::Symbol) -> SymbolFlagSet { SymbolFlagSet::default() } fn guess_data_type( &self, _resolved: ResolvedInstructionRef, _bytes: &[u8], ) -> Option { None } fn symbol_hover(&self, _obj: &Object, _symbol_index: usize) -> Vec { Vec::new() } fn symbol_context(&self, _obj: &Object, _symbol_index: usize) -> Vec { Vec::new() } fn instruction_hover( &self, _obj: &Object, _resolved: ResolvedInstructionRef, ) -> Vec { Vec::new() } fn instruction_context( &self, _obj: &Object, _resolved: ResolvedInstructionRef, ) -> Vec { Vec::new() } fn infer_function_size( &self, symbol: &Symbol, _section: &Section, next_address: u64, ) -> Result { Ok(next_address.saturating_sub(symbol.address)) } } pub fn new_arch(object: &object::File, diff_side: DiffSide) -> Result> { use object::Object as _; // Avoid unused warnings on non-mips builds let _ = diff_side; Ok(match object.architecture() { #[cfg(feature = "ppc")] object::Architecture::PowerPc | object::Architecture::PowerPc64 => { Box::new(ppc::ArchPpc::new(object)?) } #[cfg(feature = "mips")] object::Architecture::Mips => Box::new(mips::ArchMips::new(object, diff_side)?), #[cfg(feature = "x86")] object::Architecture::I386 | object::Architecture::X86_64 => { Box::new(x86::ArchX86::new(object)?) } #[cfg(feature = "arm")] object::Architecture::Arm => Box::new(arm::ArchArm::new(object)?), #[cfg(feature = "arm64")] object::Architecture::Aarch64 => Box::new(arm64::ArchArm64::new(object)?), #[cfg(feature = "superh")] object::Architecture::SuperH => Box::new(superh::ArchSuperH::new(object)?), arch => bail!("Unsupported architecture: {arch:?}"), }) } #[derive(Debug, Default)] pub struct ArchDummy {} impl ArchDummy { pub fn new() -> Box { Box::new(Self {}) } } impl Arch for ArchDummy { fn scan_instructions_internal( &self, _address: u64, _code: &[u8], _section_index: usize, _relocations: &[Relocation], _diff_config: &DiffObjConfig, ) -> Result> { Ok(Vec::new()) } fn display_instruction( &self, _resolved: ResolvedInstructionRef, _diff_config: &DiffObjConfig, _cb: &mut dyn FnMut(InstructionPart) -> Result<()>, ) -> Result<()> { Ok(()) } fn data_reloc_size(&self, _flags: RelocationFlags) -> usize { 0 } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum RelocationOverrideTarget { Keep, Skip, Symbol(object::SymbolIndex), Section(object::SectionIndex), } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct RelocationOverride { pub target: RelocationOverrideTarget, pub addend: i64, }