diff --git a/.gitignore b/.gitignore index c46c7cc..e4a9258 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ android.keystore *.frag *.vert *.metal -.vscode/launch.json +.vscode/ diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs index b36b5b7..2126608 100644 --- a/objdiff-core/src/arch/arm.rs +++ b/objdiff-core/src/arch/arm.rs @@ -113,6 +113,7 @@ impl ObjArch for ObjArchArm { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + _sections: &[ObjSection], ) -> Result { let start_addr = address as u32; let end_addr = start_addr + code.len() as u32; @@ -216,6 +217,7 @@ impl ObjArch for ObjArchArm { mnemonic: Cow::Borrowed(parsed_ins.mnemonic), args, reloc, + fake_pool_reloc: None, branch_dest, line, formatted: parsed_ins.display(display_options).to_string(), diff --git a/objdiff-core/src/arch/arm64.rs b/objdiff-core/src/arch/arm64.rs index 7e396fa..b803252 100644 --- a/objdiff-core/src/arch/arm64.rs +++ b/objdiff-core/src/arch/arm64.rs @@ -29,6 +29,7 @@ impl ObjArch for ObjArchArm64 { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + _sections: &[ObjSection], ) -> Result { let start_address = address; let end_address = address + code.len() as u64; @@ -59,6 +60,7 @@ impl ObjArch for ObjArchArm64 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: None, + fake_pool_reloc: None, branch_dest: None, line: None, formatted: "".to_string(), @@ -121,6 +123,7 @@ impl ObjArch for ObjArchArm64 { mnemonic: Cow::Borrowed(mnemonic), args, reloc, + fake_pool_reloc: None, branch_dest, line, formatted: ins.to_string(), diff --git a/objdiff-core/src/arch/mips.rs b/objdiff-core/src/arch/mips.rs index 4bb3be0..754d119 100644 --- a/objdiff-core/src/arch/mips.rs +++ b/objdiff-core/src/arch/mips.rs @@ -87,6 +87,7 @@ impl ObjArch for ObjArchMips { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + _sections: &[ObjSection], ) -> Result { let _guard = RABBITIZER_MUTEX.lock().map_err(|e| anyhow!("Failed to lock mutex: {e}"))?; configure_rabbitizer(match config.mips_abi { @@ -205,6 +206,7 @@ impl ObjArch for ObjArchMips { mnemonic: Cow::Borrowed(mnemonic), args, reloc: reloc.cloned(), + fake_pool_reloc: None, branch_dest, line, formatted, diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 9799e14..5fc2d01 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -36,13 +36,22 @@ pub enum DataType { impl DataType { pub fn display_bytes(&self, bytes: &[u8]) -> Option { - // TODO: Attempt to interpret large symbols as arrays of a smaller type, - // fallback to intrepreting it as bytes. - // https://github.com/encounter/objdiff/issues/124 - if self.required_len().is_some_and(|l| bytes.len() != l) { - log::warn!("Failed to display a symbol value for a symbol whose size doesn't match the instruction referencing it."); + if self.required_len().is_some_and(|l| bytes.len() < l) { + log::warn!("Failed to display a symbol value for a symbol whose size is too small for instruction referencing it."); return None; } + let mut bytes = bytes; + if self.required_len().is_some_and(|l| bytes.len() > l) { + // If the symbol's size is larger a single instance of this data type, we take just the + // bytes necessary for one of them in order to display the first element of the array. + bytes = &bytes[0..self.required_len().unwrap()]; + // TODO: Attempt to interpret large symbols as arrays of a smaller type and show all + // elements of the array instead. https://github.com/encounter/objdiff/issues/124 + // However, note that the stride of an array can not always be determined just by the + // data type guessed by the single instruction accessing it. There can also be arrays of + // structs that contain multiple elements of different types, so if other elements after + // the first one were to be displayed in this manner, they may be inaccurate. + } match self { DataType::Int8 => { @@ -86,10 +95,10 @@ impl DataType { } } DataType::Float => { - format!("Float: {}", Endian::read_f32(bytes)) + format!("Float: {:?}f", Endian::read_f32(bytes)) } DataType::Double => { - format!("Double: {}", Endian::read_f64(bytes)) + format!("Double: {:?}", Endian::read_f64(bytes)) } DataType::Bytes => { format!("Bytes: {:#?}", bytes) @@ -117,6 +126,7 @@ impl DataType { } pub trait ObjArch: Send + Sync { + #[expect(clippy::too_many_arguments)] fn process_code( &self, address: u64, @@ -125,6 +135,7 @@ pub trait ObjArch: Send + Sync { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + sections: &[ObjSection], ) -> Result; fn implcit_addend( diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index 6c487fc..be84122 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -1,4 +1,7 @@ -use std::{borrow::Cow, collections::BTreeMap}; +use std::{ + borrow::Cow, + collections::{BTreeMap, HashMap}, +}; use anyhow::{bail, ensure, Result}; use byteorder::BigEndian; @@ -7,7 +10,7 @@ use object::{ elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget, Symbol, SymbolKind, }; -use ppc750cl::{Argument, InsIter, Opcode, GPR}; +use ppc750cl::{Argument, InsIter, Opcode, ParsedIns, GPR}; use crate::{ arch::{DataType, ObjArch, ProcessCodeResult}, @@ -45,10 +48,13 @@ impl ObjArch for ObjArchPpc { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + sections: &[ObjSection], ) -> Result { let ins_count = code.len() / 4; let mut ops = Vec::::with_capacity(ins_count); let mut insts = Vec::::with_capacity(ins_count); + let fake_pool_reloc_for_addr = + generate_fake_pool_reloc_for_addr_mapping(address, code, relocations, sections); for (cur_addr, mut ins) in InsIter::new(code, address as u32) { let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); if let Some(reloc) = reloc { @@ -146,6 +152,7 @@ impl ObjArch for ObjArchPpc { mnemonic: Cow::Borrowed(simplified.mnemonic), args, reloc: reloc.cloned(), + fake_pool_reloc: fake_pool_reloc_for_addr.get(&cur_addr).cloned(), op: ins.op as u16, branch_dest, line, @@ -173,6 +180,7 @@ impl ObjArch for ObjArchPpc { fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> { match flags { RelocationFlags::Elf { r_type } => match r_type { + elf::R_PPC_NONE => Cow::Borrowed("R_PPC_NONE"), // We use this for fake pool relocs elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"), elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"), elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"), @@ -188,27 +196,16 @@ impl ObjArch for ObjArchPpc { } fn guess_data_type(&self, instruction: &ObjIns) -> Option { - // Always shows the first string of the table. Not ideal, but it's really hard to find - // the actual string being referenced. - if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) { + if instruction + .reloc + .as_ref() + .or(instruction.fake_pool_reloc.as_ref()) + .is_some_and(|r| r.target.name.starts_with("@stringBase")) + { return Some(DataType::String); } - match Opcode::from(instruction.op as u8) { - Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), - Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), - Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), - Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), - Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), - Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), - - Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), - Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), - Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), - Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), - Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), - _ => None, - } + guess_data_type_from_load_store_inst_op(Opcode::from(instruction.op as u8)) } fn display_data_type(&self, ty: DataType, bytes: &[u8]) -> Option { @@ -381,3 +378,177 @@ fn make_symbol_ref(symbol: &Symbol) -> Result { let demangled_name = cwdemangle::demangle(&name, &cwdemangle::DemangleOptions::default()); Ok(ExtabSymbolRef { original_index: symbol.index().0, name, demangled_name }) } + +fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option { + match inst_op { + Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), + Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), + Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), + Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), + Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), + Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), + + Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), + Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), + Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), + Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), + Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), + _ => None, + } +} + +// Given an instruction, determine if it could accessing data at the address in a register. +// If so, return the offset added to the register's address, the register containing that address, +// and (optionally) which destination register the address is being copied into. +fn get_offset_and_addr_gpr_for_possible_pool_reference( + opcode: Opcode, + simplified: &ParsedIns, +) -> Option<(i16, GPR, Option)> { + let args = &simplified.args; + if guess_data_type_from_load_store_inst_op(opcode).is_some() { + match (args[1], args[2]) { + (Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => { + // e.g. lwz. Immediate offset. + Some((offset.0, addr_src_gpr, None)) + } + (Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => { + // e.g. lwzx. The offset is in a register and was likely calculated from an index. + // Treat the offset as being 0 in this case to show the first element of the array. + // It may be possible to show all elements by figuring out the stride of the array + // from the calculations performed on the index before it's put into offset_gpr, but + // this would be much more complicated, so it's not currently done. + Some((0, addr_src_gpr, None)) + } + _ => None, + } + } else { + // If it's not a load/store instruction, there's two more possibilities we need to handle. + // 1. It could be a reference to @stringBase. + // 2. It could be moving the relocation address plus an offset into a different register to + // load from later. + // If either of these match, we also want to return the destination register that the + // address is being copied into so that we can detect any future references to that new + // register as well. + match (opcode, args[0], args[1], args[2]) { + ( + Opcode::Addi, + Argument::GPR(addr_dst_gpr), + Argument::GPR(addr_src_gpr), + Argument::Simm(simm), + ) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))), + ( + Opcode::Or, + Argument::GPR(addr_dst_gpr), + Argument::GPR(addr_src_gpr), + Argument::None, + ) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.` + _ => None, + } + } +} + +// We create a fake relocation for an instruction, vaguely simulating what the actual relocation +// might have looked like if it wasn't pooled. This is so minimal changes are needed to display +// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that +// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense. +// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an +// addend to indicate that. +fn make_fake_pool_reloc( + offset: i16, + cur_addr: u32, + pool_reloc: &ObjReloc, + sections: &[ObjSection], +) -> Option { + let offset_from_pool = pool_reloc.addend + offset as i64; + let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?; + let orig_section_index = pool_reloc.target.orig_section_index?; + let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; + let target_symbol = section + .symbols + .iter() + .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&target_address))?; + let addend = (target_address - target_symbol.address) as i64; + Some(ObjReloc { + flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE }, + address: cur_addr as u64, + target: target_symbol.clone(), + addend, + }) +} + +// Searches through all instructions in a function, determining which registers have the addresses +// of pooled data relocations in them, finding which instructions load data from those addresses, +// and constructing a mapping of the address of that instruction to a "fake pool relocation" that +// simulates what that instruction's relocation would look like if data hadn't been pooled. +// Limitations: This method currently only goes through the instructions in a function in linear +// order, from start to finish. It does *not* follow any branches. This means that it could have +// false positives or false negatives in determining which relocation is currently loaded in which +// register at any given point in the function, as control flow is not respected. +// There are currently no known examples of this method producing inaccurate results in reality, but +// if examples are found, it may be possible to update this method to also follow all branches so +// that it produces more accurate results. +fn generate_fake_pool_reloc_for_addr_mapping( + address: u64, + code: &[u8], + relocations: &[ObjReloc], + sections: &[ObjSection], +) -> HashMap { + let mut active_pool_relocs = HashMap::new(); + let mut pool_reloc_for_addr = HashMap::new(); + for (cur_addr, ins) in InsIter::new(code, address as u32) { + let simplified = ins.simplified(); + let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); + + if let Some(reloc) = reloc { + // This instruction has a real relocation, so it may be a pool load we want to keep + // track of. + let args = &simplified.args; + match (ins.op, args[0], args[1], args[2]) { + ( + Opcode::Addi, + Argument::GPR(addr_dst_gpr), + Argument::GPR(_addr_src_gpr), + Argument::Simm(_simm), + ) => { + active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi` + } + ( + Opcode::Ori, + Argument::GPR(addr_dst_gpr), + Argument::GPR(_addr_src_gpr), + Argument::Uimm(_uimm), + ) => { + active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori` + } + _ => {} + } + } else if let Some((offset, addr_src_gpr, addr_dst_gpr)) = + get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified) + { + // This instruction doesn't have a real relocation, so it may be a reference to one of + // the already-loaded pools. + if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) { + if let Some(fake_pool_reloc) = + make_fake_pool_reloc(offset, cur_addr, pool_reloc, sections) + { + pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc); + } + if let Some(addr_dst_gpr) = addr_dst_gpr { + // If the address of the pool relocation got copied into another register, we + // need to keep track of it in that register too as future instructions may + // reference the symbol indirectly via this new register, instead of the + // register the symbol's address was originally loaded into. + // For example, the start of the function might `lis` + `addi` the start of the + // ...data pool into r25, and then later the start of a loop will `addi` r25 + // with the offset within the .data section of an array variable into r21. + // Then the body of the loop will `lwzx` one of the array elements from r21. + let mut new_reloc = pool_reloc.clone(); + new_reloc.addend += offset as i64; + active_pool_relocs.insert(addr_dst_gpr.0, new_reloc); + } + } + } + } + + pool_reloc_for_addr +} diff --git a/objdiff-core/src/arch/x86.rs b/objdiff-core/src/arch/x86.rs index 82399fb..a92e09f 100644 --- a/objdiff-core/src/arch/x86.rs +++ b/objdiff-core/src/arch/x86.rs @@ -34,6 +34,7 @@ impl ObjArch for ObjArchX86 { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + _sections: &[ObjSection], ) -> Result { let mut result = ProcessCodeResult { ops: Vec::new(), insts: Vec::new() }; let mut decoder = Decoder::with_ip(self.bits, code, address, DecoderOptions::NONE); @@ -54,6 +55,7 @@ impl ObjArch for ObjArchX86 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: None, + fake_pool_reloc: None, branch_dest: None, line: None, formatted: String::new(), @@ -79,6 +81,7 @@ impl ObjArch for ObjArchX86 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: reloc.cloned(), + fake_pool_reloc: None, branch_dest: None, line, formatted: String::new(), diff --git a/objdiff-core/src/diff/code.rs b/objdiff-core/src/diff/code.rs index 3d3273a..9e2fb88 100644 --- a/objdiff-core/src/diff/code.rs +++ b/objdiff-core/src/diff/code.rs @@ -28,6 +28,7 @@ pub fn process_code_symbol( §ion.relocations, §ion.line_info, config, + &obj.sections, ) } diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 1ef6d0f..800259d 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -106,6 +106,7 @@ pub struct ObjIns { pub mnemonic: Cow<'static, str>, pub args: Vec, pub reloc: Option, + pub fake_pool_reloc: Option, pub branch_dest: Option, /// Line number pub line: Option, diff --git a/objdiff-gui/Cargo.toml b/objdiff-gui/Cargo.toml index aac8873..555872f 100644 --- a/objdiff-gui/Cargo.toml +++ b/objdiff-gui/Cargo.toml @@ -95,7 +95,7 @@ exec = "0.3" # native: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] -tracing-subscriber = "0.3" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } # web: [target.'cfg(target_arch = "wasm32")'.dependencies] diff --git a/objdiff-gui/src/views/function_diff.rs b/objdiff-gui/src/views/function_diff.rs index dde736e..d555065 100644 --- a/objdiff-gui/src/views/function_diff.rs +++ b/objdiff-gui/src/views/function_diff.rs @@ -1,4 +1,4 @@ -use std::default::Default; +use std::{cmp::Ordering, default::Default}; use egui::{text::LayoutJob, Id, Label, Response, RichText, Sense, Widget}; use egui_extras::TableRow; @@ -118,9 +118,17 @@ fn ins_hover_ui( } } - if let Some(reloc) = &ins.reloc { + if let Some(reloc) = ins.reloc.as_ref().or(ins.fake_pool_reloc.as_ref()) { ui.label(format!("Relocation type: {}", obj.arch.display_reloc(reloc.flags))); - ui.colored_label(appearance.highlight_color, format!("Name: {}", reloc.target.name)); + let addend_str = match reloc.addend.cmp(&0i64) { + Ordering::Greater => format!("+{:x}", reloc.addend), + Ordering::Less => format!("-{:x}", -reloc.addend), + _ => "".to_string(), + }; + ui.colored_label( + appearance.highlight_color, + format!("Name: {}{}", reloc.target.name, addend_str), + ); if let Some(orig_section_index) = reloc.target.orig_section_index { if let Some(section) = obj.sections.iter().find(|s| s.orig_index == orig_section_index) @@ -132,18 +140,18 @@ fn ins_hover_ui( } ui.colored_label( appearance.highlight_color, - format!("Address: {:x}", reloc.target.address), + format!("Address: {:x}{}", reloc.target.address, addend_str), ); ui.colored_label( appearance.highlight_color, format!("Size: {:x}", reloc.target.size), ); - if let Some(s) = obj - .arch - .guess_data_type(ins) - .and_then(|ty| obj.arch.display_data_type(ty, &reloc.target.bytes)) - { - ui.colored_label(appearance.highlight_color, s); + if reloc.addend >= 0 && reloc.target.bytes.len() > reloc.addend as usize { + if let Some(s) = obj.arch.guess_data_type(ins).and_then(|ty| { + obj.arch.display_data_type(ty, &reloc.target.bytes[reloc.addend as usize..]) + }) { + ui.colored_label(appearance.highlight_color, s); + } } } else { ui.colored_label(appearance.highlight_color, "Extern".to_string());