This commit is contained in:
LagoLunatic 2024-12-01 22:23:37 -07:00 committed by GitHub
commit 01d1eb5311
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 241 additions and 39 deletions

2
.gitignore vendored
View File

@ -18,4 +18,4 @@ android.keystore
*.frag
*.vert
*.metal
.vscode/launch.json
.vscode/

View File

@ -113,6 +113,7 @@ impl ObjArch for ObjArchArm {
relocations: &[ObjReloc],
line_info: &BTreeMap<u64, u32>,
config: &DiffObjConfig,
_sections: &[ObjSection],
) -> Result<ProcessCodeResult> {
let start_addr = address as u32;
let end_addr = start_addr + code.len() as u32;
@ -216,6 +217,7 @@ impl ObjArch for ObjArchArm {
mnemonic: Cow::Borrowed(parsed_ins.mnemonic),
args,
reloc,
fake_pool_reloc: None,
branch_dest,
line,
formatted: parsed_ins.display(display_options).to_string(),

View File

@ -29,6 +29,7 @@ impl ObjArch for ObjArchArm64 {
relocations: &[ObjReloc],
line_info: &BTreeMap<u64, u32>,
config: &DiffObjConfig,
_sections: &[ObjSection],
) -> Result<ProcessCodeResult> {
let start_address = address;
let end_address = address + code.len() as u64;
@ -59,6 +60,7 @@ impl ObjArch for ObjArchArm64 {
mnemonic: Cow::Borrowed("<invalid>"),
args: vec![],
reloc: None,
fake_pool_reloc: None,
branch_dest: None,
line: None,
formatted: "".to_string(),
@ -121,6 +123,7 @@ impl ObjArch for ObjArchArm64 {
mnemonic: Cow::Borrowed(mnemonic),
args,
reloc,
fake_pool_reloc: None,
branch_dest,
line,
formatted: ins.to_string(),

View File

@ -87,6 +87,7 @@ impl ObjArch for ObjArchMips {
relocations: &[ObjReloc],
line_info: &BTreeMap<u64, u32>,
config: &DiffObjConfig,
_sections: &[ObjSection],
) -> Result<ProcessCodeResult> {
let _guard = RABBITIZER_MUTEX.lock().map_err(|e| anyhow!("Failed to lock mutex: {e}"))?;
configure_rabbitizer(match config.mips_abi {
@ -205,6 +206,7 @@ impl ObjArch for ObjArchMips {
mnemonic: Cow::Borrowed(mnemonic),
args,
reloc: reloc.cloned(),
fake_pool_reloc: None,
branch_dest,
line,
formatted,

View File

@ -36,13 +36,22 @@ pub enum DataType {
impl DataType {
pub fn display_bytes<Endian: ByteOrder>(&self, bytes: &[u8]) -> Option<String> {
// TODO: Attempt to interpret large symbols as arrays of a smaller type,
// fallback to intrepreting it as bytes.
// https://github.com/encounter/objdiff/issues/124
if self.required_len().is_some_and(|l| bytes.len() != l) {
log::warn!("Failed to display a symbol value for a symbol whose size doesn't match the instruction referencing it.");
if self.required_len().is_some_and(|l| bytes.len() < l) {
log::warn!("Failed to display a symbol value for a symbol whose size is too small for instruction referencing it.");
return None;
}
let mut bytes = bytes;
if self.required_len().is_some_and(|l| bytes.len() > l) {
// If the symbol's size is larger a single instance of this data type, we take just the
// bytes necessary for one of them in order to display the first element of the array.
bytes = &bytes[0..self.required_len().unwrap()];
// TODO: Attempt to interpret large symbols as arrays of a smaller type and show all
// elements of the array instead. https://github.com/encounter/objdiff/issues/124
// However, note that the stride of an array can not always be determined just by the
// data type guessed by the single instruction accessing it. There can also be arrays of
// structs that contain multiple elements of different types, so if other elements after
// the first one were to be displayed in this manner, they may be inaccurate.
}
match self {
DataType::Int8 => {
@ -86,10 +95,10 @@ impl DataType {
}
}
DataType::Float => {
format!("Float: {}", Endian::read_f32(bytes))
format!("Float: {:?}f", Endian::read_f32(bytes))
}
DataType::Double => {
format!("Double: {}", Endian::read_f64(bytes))
format!("Double: {:?}", Endian::read_f64(bytes))
}
DataType::Bytes => {
format!("Bytes: {:#?}", bytes)
@ -117,6 +126,7 @@ impl DataType {
}
pub trait ObjArch: Send + Sync {
#[expect(clippy::too_many_arguments)]
fn process_code(
&self,
address: u64,
@ -125,6 +135,7 @@ pub trait ObjArch: Send + Sync {
relocations: &[ObjReloc],
line_info: &BTreeMap<u64, u32>,
config: &DiffObjConfig,
sections: &[ObjSection],
) -> Result<ProcessCodeResult>;
fn implcit_addend(

View File

@ -1,4 +1,7 @@
use std::{borrow::Cow, collections::BTreeMap};
use std::{
borrow::Cow,
collections::{BTreeMap, HashMap},
};
use anyhow::{bail, ensure, Result};
use byteorder::BigEndian;
@ -7,7 +10,7 @@ use object::{
elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget,
Symbol, SymbolKind,
};
use ppc750cl::{Argument, InsIter, Opcode, GPR};
use ppc750cl::{Argument, InsIter, Opcode, ParsedIns, GPR};
use crate::{
arch::{DataType, ObjArch, ProcessCodeResult},
@ -27,6 +30,180 @@ fn is_rel_abs_arg(arg: &Argument) -> bool {
fn is_offset_arg(arg: &Argument) -> bool { matches!(arg, Argument::Offset(_)) }
fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option<DataType> {
match inst_op {
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),
Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
_ => None,
}
}
// Given an instruction, determine if it could accessing data at the address in a register.
// If so, return the offset added to the register's address, the register containing that address,
// and (optionally) which destination register the address is being copied into.
fn get_offset_and_addr_gpr_for_possible_pool_reference(
opcode: Opcode,
simplified: &ParsedIns,
) -> Option<(i16, GPR, Option<GPR>)> {
let args = &simplified.args;
if guess_data_type_from_load_store_inst_op(opcode).is_some() {
match (args[1], args[2]) {
(Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => {
// e.g. lwz. Immediate offset.
Some((offset.0, addr_src_gpr, None))
}
(Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => {
// e.g. lwzx. The offset is in a register and was likely calculated from an index.
// Treat the offset as being 0 in this case to show the first element of the array.
// It may be possible to show all elements by figuring out the stride of the array
// from the calculations performed on the index before it's put into offset_gpr, but
// this would be much more complicated, so it's not currently done.
Some((0, addr_src_gpr, None))
}
_ => None,
}
} else {
// If it's not a load/store instruction, there's two more possibilities we need to handle.
// 1. It could be a reference to @stringBase.
// 2. It could be moving the relocation address plus an offset into a different register to
// load from later.
// If either of these match, we also want to return the destination register that the
// address is being copied into so that we can detect any future references to that new
// register as well.
match (opcode, args[0], args[1], args[2]) {
(
Opcode::Addi,
Argument::GPR(addr_dst_gpr),
Argument::GPR(addr_src_gpr),
Argument::Simm(simm),
) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))),
(
Opcode::Or,
Argument::GPR(addr_dst_gpr),
Argument::GPR(addr_src_gpr),
Argument::None,
) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.`
_ => None,
}
}
}
// We create a fake relocation for an instruction, vaguely simulating what the actual relocation
// might have looked like if it wasn't pooled. This is so minimal changes are needed to display
// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
// addend to indicate that.
fn make_fake_pool_reloc(
offset: i16,
cur_addr: u32,
pool_reloc: &ObjReloc,
sections: &[ObjSection],
) -> Option<ObjReloc> {
let offset_from_pool = pool_reloc.addend + offset as i64;
let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?;
let orig_section_index = pool_reloc.target.orig_section_index?;
let section = sections.iter().find(|s| s.orig_index == orig_section_index)?;
let target_symbol = section
.symbols
.iter()
.find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&target_address))?;
let addend = (target_address - target_symbol.address) as i64;
Some(ObjReloc {
flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE },
address: cur_addr as u64,
target: target_symbol.clone(),
addend,
})
}
// Searches through all instructions in a function, determining which registers have the addresses
// of pooled data relocations in them, finding which instructions load data from those addresses,
// and constructing a mapping of the address of that instruction to a "fake pool relocation" that
// simulates what that instruction's relocation would look like if data hadn't been pooled.
// Limitations: This method currently only goes through the instructions in a function in linear
// order, from start to finish. It does *not* follow any branches. This means that it could have
// false positives or false negatives in determining which relocation is currently loaded in which
// register at any given point in the function, as control flow is not respected.
// There are currently no known examples of this method producing inaccurate results in reality, but
// if examples are found, it may be possible to update this method to also follow all branches so
// that it produces more accurate results.
fn generate_fake_pool_reloc_for_addr_mapping(
address: u64,
code: &[u8],
relocations: &[ObjReloc],
sections: &[ObjSection],
) -> HashMap<u32, ObjReloc> {
let mut active_pool_relocs = HashMap::new();
let mut pool_reloc_for_addr = HashMap::new();
for (cur_addr, ins) in InsIter::new(code, address as u32) {
let simplified = ins.simplified();
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
if let Some(reloc) = reloc {
// This instruction has a real relocation, so it may be a pool load we want to keep
// track of.
let args = &simplified.args;
match (ins.op, args[0], args[1], args[2]) {
(
Opcode::Addi,
Argument::GPR(addr_dst_gpr),
Argument::GPR(_addr_src_gpr),
Argument::Simm(_simm),
) => {
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi`
}
(
Opcode::Ori,
Argument::GPR(addr_dst_gpr),
Argument::GPR(_addr_src_gpr),
Argument::Uimm(_uimm),
) => {
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori`
}
_ => {}
}
} else if let Some((offset, addr_src_gpr, addr_dst_gpr)) =
get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified)
{
// This instruction doesn't have a real relocation, so it may be a reference to one of
// the already-loaded pools.
if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) {
if let Some(fake_pool_reloc) =
make_fake_pool_reloc(offset, cur_addr, pool_reloc, sections)
{
pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc);
}
if let Some(addr_dst_gpr) = addr_dst_gpr {
// If the address of the pool relocation got copied into another register, we
// need to keep track of it in that register too as future instructions may
// reference the symbol indirectly via this new register, instead of the
// register the symbol's address was originally loaded into.
// For example, the start of the function might `lis` + `addi` the start of the
// ...data pool into r25, and then later the start of a loop will `addi` r25
// with the offset within the .data section of an array variable into r21.
// Then the body of the loop will `lwzx` one of the array elements from r21.
let mut new_reloc = pool_reloc.clone();
new_reloc.addend += offset as i64;
active_pool_relocs.insert(addr_dst_gpr.0, new_reloc);
}
}
}
}
pool_reloc_for_addr
}
pub struct ObjArchPpc {
/// Exception info
pub extab: Option<BTreeMap<usize, ExceptionInfo>>,
@ -45,10 +222,13 @@ impl ObjArch for ObjArchPpc {
relocations: &[ObjReloc],
line_info: &BTreeMap<u64, u32>,
config: &DiffObjConfig,
sections: &[ObjSection],
) -> Result<ProcessCodeResult> {
let ins_count = code.len() / 4;
let mut ops = Vec::<u16>::with_capacity(ins_count);
let mut insts = Vec::<ObjIns>::with_capacity(ins_count);
let fake_pool_reloc_for_addr =
generate_fake_pool_reloc_for_addr_mapping(address, code, relocations, sections);
for (cur_addr, mut ins) in InsIter::new(code, address as u32) {
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
if let Some(reloc) = reloc {
@ -146,6 +326,7 @@ impl ObjArch for ObjArchPpc {
mnemonic: Cow::Borrowed(simplified.mnemonic),
args,
reloc: reloc.cloned(),
fake_pool_reloc: fake_pool_reloc_for_addr.get(&cur_addr).cloned(),
op: ins.op as u16,
branch_dest,
line,
@ -173,6 +354,7 @@ impl ObjArch for ObjArchPpc {
fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> {
match flags {
RelocationFlags::Elf { r_type } => match r_type {
elf::R_PPC_NONE => Cow::Borrowed("R_PPC_NONE"), // We use this for fake pool relocs
elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"),
elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"),
elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"),
@ -188,27 +370,16 @@ impl ObjArch for ObjArchPpc {
}
fn guess_data_type(&self, instruction: &ObjIns) -> Option<super::DataType> {
// Always shows the first string of the table. Not ideal, but it's really hard to find
// the actual string being referenced.
if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) {
if instruction
.reloc
.as_ref()
.or(instruction.fake_pool_reloc.as_ref())
.is_some_and(|r| r.target.name.starts_with("@stringBase"))
{
return Some(DataType::String);
}
match Opcode::from(instruction.op as u8) {
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),
Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
_ => None,
}
guess_data_type_from_load_store_inst_op(Opcode::from(instruction.op as u8))
}
fn display_data_type(&self, ty: DataType, bytes: &[u8]) -> Option<String> {

View File

@ -34,6 +34,7 @@ impl ObjArch for ObjArchX86 {
relocations: &[ObjReloc],
line_info: &BTreeMap<u64, u32>,
config: &DiffObjConfig,
_sections: &[ObjSection],
) -> Result<ProcessCodeResult> {
let mut result = ProcessCodeResult { ops: Vec::new(), insts: Vec::new() };
let mut decoder = Decoder::with_ip(self.bits, code, address, DecoderOptions::NONE);
@ -54,6 +55,7 @@ impl ObjArch for ObjArchX86 {
mnemonic: Cow::Borrowed("<invalid>"),
args: vec![],
reloc: None,
fake_pool_reloc: None,
branch_dest: None,
line: None,
formatted: String::new(),
@ -79,6 +81,7 @@ impl ObjArch for ObjArchX86 {
mnemonic: Cow::Borrowed("<invalid>"),
args: vec![],
reloc: reloc.cloned(),
fake_pool_reloc: None,
branch_dest: None,
line,
formatted: String::new(),

View File

@ -28,6 +28,7 @@ pub fn process_code_symbol(
&section.relocations,
&section.line_info,
config,
&obj.sections,
)
}

View File

@ -106,6 +106,7 @@ pub struct ObjIns {
pub mnemonic: Cow<'static, str>,
pub args: Vec<ObjInsArg>,
pub reloc: Option<ObjReloc>,
pub fake_pool_reloc: Option<ObjReloc>,
pub branch_dest: Option<u64>,
/// Line number
pub line: Option<u32>,

View File

@ -95,7 +95,7 @@ exec = "0.3"
# native:
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
tracing-subscriber = "0.3"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# web:
[target.'cfg(target_arch = "wasm32")'.dependencies]

View File

@ -1,4 +1,4 @@
use std::default::Default;
use std::{cmp::Ordering, default::Default};
use egui::{text::LayoutJob, Id, Label, Response, RichText, Sense, Widget};
use egui_extras::TableRow;
@ -118,9 +118,17 @@ fn ins_hover_ui(
}
}
if let Some(reloc) = &ins.reloc {
if let Some(reloc) = ins.reloc.as_ref().or(ins.fake_pool_reloc.as_ref()) {
ui.label(format!("Relocation type: {}", obj.arch.display_reloc(reloc.flags)));
ui.colored_label(appearance.highlight_color, format!("Name: {}", reloc.target.name));
let addend_str = match reloc.addend.cmp(&0i64) {
Ordering::Greater => format!("+{:x}", reloc.addend),
Ordering::Less => format!("-{:x}", -reloc.addend),
_ => "".to_string(),
};
ui.colored_label(
appearance.highlight_color,
format!("Name: {}{}", reloc.target.name, addend_str),
);
if let Some(orig_section_index) = reloc.target.orig_section_index {
if let Some(section) =
obj.sections.iter().find(|s| s.orig_index == orig_section_index)
@ -132,19 +140,19 @@ fn ins_hover_ui(
}
ui.colored_label(
appearance.highlight_color,
format!("Address: {:x}", reloc.target.address),
format!("Address: {:x}{}", reloc.target.address, addend_str),
);
ui.colored_label(
appearance.highlight_color,
format!("Size: {:x}", reloc.target.size),
);
if let Some(s) = obj
.arch
.guess_data_type(ins)
.and_then(|ty| obj.arch.display_data_type(ty, &reloc.target.bytes))
{
if reloc.addend >= 0 && reloc.target.bytes.len() > reloc.addend as usize {
if let Some(s) = obj.arch.guess_data_type(ins).and_then(|ty| {
obj.arch.display_data_type(ty, &reloc.target.bytes[reloc.addend as usize..])
}) {
ui.colored_label(appearance.highlight_color, s);
}
}
} else {
ui.colored_label(appearance.highlight_color, "Extern".to_string());
}