From b9919600809ecb16e7cdace5c04d5ebd995c9193 Mon Sep 17 00:00:00 2001 From: Aetias <144526980+AetiasHax@users.noreply.github.com> Date: Tue, 4 Jun 2024 03:08:49 +0200 Subject: [PATCH] ARMv5TE (DS) support (#68) * Initial ARM support * Disassemble const pool reloc * Disasm ARM/Thumb/data based on mapping symbols * Fallback to mapping symbol `$a` * Support multiple DWARF sequences * Update line info * Rework DWARF line info parsing - Properly handles multiple sections in DWARF 1 - line_info moved into ObjSection - DWARF 2 parser no longer errors with no .text section - Both parsers properly skip empty sections * Simplify line_info (no Option) * Get line info from section; output formatted ins string * Unwrap code section in `arm.rs` * Handle reloc `R_ARM_SBREL32` * Update ARM disassembler * Update README.md * Format * Revert "Update README.md" This reverts commit 8bbfcc6f453c13340764bbae2946025bc41659ae. * Update README.md --------- Co-authored-by: Luke Street --- Cargo.lock | 7 + README.md | 1 + objdiff-core/Cargo.toml | 6 +- objdiff-core/src/arch/arm.rs | 297 +++++++++++++++++++++++++++++++++++ objdiff-core/src/arch/mod.rs | 4 + 5 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 objdiff-core/src/arch/arm.rs diff --git a/Cargo.lock b/Cargo.lock index 8bff785..7cc3919 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3124,6 +3124,7 @@ dependencies = [ "serde_yaml", "similar", "strum", + "unarm", ] [[package]] @@ -4695,6 +4696,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "unarm" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c13fc9a9c95348bf7565e5c30688fc288239962958cac0ccdc7cd009141d850" + [[package]] name = "unicase" version = "2.7.0" diff --git a/README.md b/README.md index a946780..8afc5ad 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Supports: - PowerPC 750CL (GameCube, Wii) - MIPS (N64, PS1, PS2, PSP) - x86 (COFF only at the moment) +- ARM (DS) See [Usage](#usage) for more information. diff --git a/objdiff-core/Cargo.toml b/objdiff-core/Cargo.toml index 31b04e1..f60d499 100644 --- a/objdiff-core/Cargo.toml +++ b/objdiff-core/Cargo.toml @@ -12,13 +12,14 @@ A local diffing tool for decompilation projects. """ [features] -all = ["config", "dwarf", "mips", "ppc", "x86"] +all = ["config", "dwarf", "mips", "ppc", "x86", "arm"] any-arch = [] # Implicit, used to check if any arch is enabled config = ["globset", "semver", "serde_json", "serde_yaml"] dwarf = ["gimli"] mips = ["any-arch", "rabbitizer"] ppc = ["any-arch", "cwdemangle", "ppc750cl"] x86 = ["any-arch", "cpp_demangle", "iced-x86", "msvc-demangler"] +arm = ["any-arch", "cpp_demangle", "unarm"] [dependencies] anyhow = "1.0.82" @@ -53,3 +54,6 @@ rabbitizer = { version = "1.11.0", optional = true } cpp_demangle = { version = "0.4.3", optional = true } iced-x86 = { version = "1.21.0", default-features = false, features = ["std", "decoder", "intel", "gas", "masm", "nasm", "exhaustive_enums"], optional = true } msvc-demangler = { version = "0.10.0", optional = true } + +# arm +unarm = { version = "1.0.0", optional = true } diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs new file mode 100644 index 0000000..406313e --- /dev/null +++ b/objdiff-core/src/arch/arm.rs @@ -0,0 +1,297 @@ +use std::{borrow::Cow, collections::HashMap}; + +use anyhow::{anyhow, bail, Result}; +use object::{ + elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, SectionIndex, + SectionKind, Symbol, +}; +use unarm::{ + args::{Argument, OffsetImm, OffsetReg, Register}, + parse::{ArmVersion, ParseMode, Parser}, + ParsedIns, +}; + +use crate::{ + arch::{ObjArch, ProcessCodeResult}, + diff::DiffObjConfig, + obj::{ObjInfo, ObjIns, ObjInsArg, ObjInsArgValue, ObjSection, SymbolRef}, +}; + +pub struct ObjArchArm { + /// Maps section index, to list of disasm modes (arm, thumb or data) sorted by address + disasm_modes: HashMap>, +} + +impl ObjArchArm { + pub fn new(file: &File) -> Result { + match file { + File::Elf32(_) => { + let disasm_modes: HashMap<_, _> = file + .sections() + .filter(|s| s.kind() == SectionKind::Text) + .map(|s| { + let index = s.index(); + let mut mapping_symbols: Vec<_> = file + .symbols() + .filter(|s| s.section_index().map(|i| i == index).unwrap_or(false)) + .filter_map(|s| DisasmMode::from_symbol(&s)) + .collect(); + mapping_symbols.sort_unstable_by_key(|x| x.address); + (s.index(), mapping_symbols) + }) + .collect(); + Ok(Self { disasm_modes }) + } + _ => bail!("Unsupported file format {:?}", file.format()), + } + } +} + +impl ObjArch for ObjArchArm { + fn process_code( + &self, + obj: &ObjInfo, + symbol_ref: SymbolRef, + config: &DiffObjConfig, + ) -> Result { + let (section, symbol) = obj.section_symbol(symbol_ref); + let section = section.ok_or_else(|| anyhow!("Code symbol section not found"))?; + let code = §ion.data + [symbol.section_address as usize..(symbol.section_address + symbol.size) as usize]; + + let start_addr = symbol.address as u32; + let end_addr = start_addr + symbol.size as u32; + + // Mapping symbols decide what kind of data comes after it. $a for ARM code, $t for Thumb code and $d for data. + let fallback_mappings = + [DisasmMode { address: symbol.address as u32, mapping: ParseMode::Arm }]; + let mapping_symbols = self + .disasm_modes + .get(&SectionIndex(section.orig_index)) + .map(|x| x.as_slice()) + .unwrap_or(&fallback_mappings); + let first_mapping_idx = + match mapping_symbols.binary_search_by_key(&(symbol.address as u32), |x| x.address) { + Ok(idx) => idx, + Err(idx) => idx - 1, + }; + let first_mapping = mapping_symbols[first_mapping_idx].mapping; + + let mut mappings_iter = + mapping_symbols.iter().skip(first_mapping_idx + 1).take_while(|x| x.address < end_addr); + let mut next_mapping = mappings_iter.next(); + + let ins_count = code.len() / first_mapping.instruction_size(); + let mut ops = Vec::::with_capacity(ins_count); + let mut insts = Vec::::with_capacity(ins_count); + let mut parser = Parser::new(ArmVersion::V5Te, first_mapping, start_addr, code); + + while let Some((address, op, ins)) = parser.next() { + if let Some(next) = next_mapping { + let next_address = parser.address; + if next_address >= next.address { + // Change mapping + parser.mode = next.mapping; + next_mapping = mappings_iter.next(); + } + } + + let line = section.line_info.range(..=address as u64).last().map(|(_, &b)| b); + + let reloc = + section.relocations.iter().find(|r| (r.address as u32 & !1) == address).cloned(); + + let mut reloc_arg = None; + if let Some(reloc) = &reloc { + match reloc.flags { + RelocationFlags::Elf { r_type: elf::R_ARM_THM_XPC22 } + | RelocationFlags::Elf { r_type: elf::R_ARM_PC24 } => { + reloc_arg = + ins.args.iter().rposition(|a| matches!(a, Argument::BranchDest(_))); + } + _ => (), + } + }; + + let (args, branch_dest) = if reloc.is_some() && parser.mode == ParseMode::Data { + (vec![ObjInsArg::Reloc], None) + } else { + push_args(&ins, config, reloc_arg, address)? + }; + + ops.push(op.id()); + insts.push(ObjIns { + address: address as u64, + size: (parser.address - address) as u8, + op: op.id(), + mnemonic: ins.mnemonic.to_string(), + args, + reloc, + branch_dest, + line, + formatted: ins.to_string(), + orig: None, + }); + } + + Ok(ProcessCodeResult { ops, insts }) + } + + fn implcit_addend( + &self, + _section: &ObjSection, + address: u64, + reloc: &Relocation, + ) -> anyhow::Result { + bail!("Unsupported ARM implicit relocation {:#x}{:?}", address, reloc.flags()) + } + + fn demangle(&self, name: &str) -> Option { + cpp_demangle::Symbol::new(name) + .ok() + .and_then(|s| s.demangle(&cpp_demangle::DemangleOptions::default()).ok()) + } + + fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> { + Cow::Owned(format!("<{flags:?}>")) + } +} + +#[derive(Clone, Copy, Debug)] +struct DisasmMode { + address: u32, + mapping: ParseMode, +} + +impl DisasmMode { + fn from_symbol<'a>(sym: &Symbol<'a, '_, &'a [u8]>) -> Option { + if let Ok(name) = sym.name() { + ParseMode::from_mapping_symbol(name) + .map(|mapping| DisasmMode { address: sym.address() as u32, mapping }) + } else { + None + } + } +} + +fn push_args( + parsed_ins: &ParsedIns, + config: &DiffObjConfig, + reloc_arg: Option, + cur_addr: u32, +) -> Result<(Vec, Option)> { + let mut args = vec![]; + let mut branch_dest = None; + let mut writeback = false; + let mut deref = false; + for (i, arg) in parsed_ins.args_iter().enumerate() { + // Emit punctuation before separator + if deref { + match arg { + Argument::OffsetImm(OffsetImm { post_indexed: true, value: _ }) + | Argument::OffsetReg(OffsetReg { add: _, post_indexed: true, reg: _ }) + | Argument::CoOption(_) => { + deref = false; + args.push(ObjInsArg::PlainText("]".into())); + if writeback { + writeback = false; + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque("!".into()))); + } + } + _ => {} + } + } + + if i > 0 { + args.push(ObjInsArg::PlainText(config.separator().into())); + } + + if reloc_arg == Some(i) { + args.push(ObjInsArg::Reloc); + } else { + match arg { + Argument::Reg(reg) => { + if reg.deref { + deref = true; + args.push(ObjInsArg::PlainText("[".into())); + } + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(reg.reg.to_string().into()))); + if reg.writeback { + if reg.deref { + writeback = true; + } else { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque("!".into()))); + } + } + } + Argument::RegList(reg_list) => { + args.push(ObjInsArg::PlainText("{".into())); + let mut first = true; + for i in 0..16 { + if (reg_list.regs & (1 << i)) != 0 { + if !first { + args.push(ObjInsArg::PlainText(config.separator().into())); + } + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( + Register::parse(i).to_string().into(), + ))); + first = false; + } + } + args.push(ObjInsArg::PlainText("}".into())); + if reg_list.user_mode { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque("^".to_string().into()))); + } + } + Argument::UImm(value) | Argument::CoOpcode(value) => { + args.push(ObjInsArg::PlainText("#".into())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(*value as u64))); + } + Argument::SImm(value) + | Argument::OffsetImm(OffsetImm { post_indexed: _, value }) => { + args.push(ObjInsArg::PlainText("#".into())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Signed(*value as i64))); + } + Argument::BranchDest(value) => { + let dest = cur_addr.wrapping_add_signed(*value) as u64; + args.push(ObjInsArg::BranchDest(dest)); + branch_dest = Some(dest); + } + Argument::CoOption(value) => { + args.push(ObjInsArg::PlainText("{".into())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(*value as u64))); + args.push(ObjInsArg::PlainText("}".into())); + } + Argument::CoprocNum(value) => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(format!("p{}", value).into()))); + } + Argument::ShiftImm(shift) => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(shift.op.to_string().into()))); + args.push(ObjInsArg::PlainText(" #".into())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(shift.imm as u64))); + } + Argument::ShiftReg(shift) => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(shift.op.to_string().into()))); + args.push(ObjInsArg::PlainText(" ".into())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(shift.reg.to_string().into()))); + } + Argument::OffsetReg(offset) => { + if !offset.add { + args.push(ObjInsArg::PlainText("-".into())); + } + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( + offset.reg.to_string().into(), + ))); + } + _ => args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(arg.to_string().into()))), + } + } + } + if deref { + args.push(ObjInsArg::PlainText("]".into())); + if writeback { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque("!".into()))); + } + } + Ok((args, branch_dest)) +} diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index ce4b545..3dfe1d5 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -8,6 +8,8 @@ use crate::{ obj::{ObjIns, ObjReloc, ObjSection}, }; +#[cfg(feature = "arm")] +mod arm; #[cfg(feature = "mips")] pub mod mips; #[cfg(feature = "ppc")] @@ -46,6 +48,8 @@ pub fn new_arch(object: &object::File) -> Result> { Architecture::Mips => Box::new(mips::ObjArchMips::new(object)?), #[cfg(feature = "x86")] Architecture::I386 | Architecture::X86_64 => Box::new(x86::ObjArchX86::new(object)?), + #[cfg(feature = "arm")] + Architecture::Arm => Box::new(arm::ObjArchArm::new(object)?), arch => bail!("Unsupported architecture: {arch:?}"), }) }