diff --git a/Cargo.lock b/Cargo.lock index af0b7bd..b537346 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,9 +217,9 @@ dependencies = [ [[package]] name = "armv5te" -version = "0.1.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7603974675228909093b94530f81c2ebaeafb7dc05968e48f422e453970dc80" +checksum = "fd80f9acaa34edbec1011a43c96f1dba19c2e1afd6f144f2156c2ae8c2035f04" [[package]] name = "arrayref" diff --git a/objdiff-core/Cargo.toml b/objdiff-core/Cargo.toml index b490a4b..cbaddf7 100644 --- a/objdiff-core/Cargo.toml +++ b/objdiff-core/Cargo.toml @@ -55,4 +55,4 @@ iced-x86 = { version = "1.21.0", default-features = false, features = ["std", "d msvc-demangler = { version = "0.10.0", optional = true } # arm -armv5te = { version = "0.1.0", optional = true } +armv5te = { version = "0.3.0", optional = true } diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs index 11b24dd..7460032 100644 --- a/objdiff-core/src/arch/arm.rs +++ b/objdiff-core/src/arch/arm.rs @@ -1,8 +1,11 @@ -use std::borrow::Cow; +use std::{borrow::Cow, collections::HashMap}; -use anyhow::{bail, Result}; -use armv5te::arm; -use object::{elf, File, Relocation, RelocationFlags}; +use anyhow::{bail, Context, Result}; +use armv5te::{arm, thumb}; +use object::{ + elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, SectionIndex, + SectionKind, Symbol, +}; use crate::{ arch::{ObjArch, ProcessCodeResult}, @@ -10,11 +13,33 @@ use crate::{ obj::{ObjInfo, ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjSection, SymbolRef}, }; -pub struct ObjArchArm {} +pub struct ObjArchArm { + /// Maps section index, to list of disasm modes (arm, thumb or data) sorted by address + disasm_modes: HashMap>, +} impl ObjArchArm { - pub fn new(_file: &File) -> Result { - Ok(Self {}) + pub fn new(file: &File) -> Result { + match file { + File::Elf32(_) => { + let disasm_modes: HashMap<_, _> = file + .sections() + .filter(|s| s.kind() == SectionKind::Text) + .map(|s| { + let index = s.index(); + let mut mapping_symbols: Vec<_> = file + .symbols() + .filter(|s| s.section_index().map(|i| i == index).unwrap_or(false)) + .filter_map(|s| DisasmMode::from_symbol(&s)) + .collect(); + mapping_symbols.sort_unstable_by_key(|x| x.address); + (s.index(), mapping_symbols) + }) + .collect(); + Ok(Self { disasm_modes }) + } + _ => bail!("Unsupported file format {:?}", file.format()), + } } } @@ -29,58 +54,181 @@ impl ObjArch for ObjArchArm { let mut code = §ion.data [symbol.section_address as usize..(symbol.section_address + symbol.size) as usize]; - let ins_count = code.len() / 4; + let start_addr = symbol.address as u32; + let end_addr = start_addr + symbol.size as u32; + + // Mapping symbols decide what kind of data comes after it. $a for ARM code, $t for Thumb code and $d for data. + let mapping_symbols = self + .disasm_modes + .get(&SectionIndex(section.orig_index)) + .with_context(|| format!("No mappings symbols in the section of '{}'", symbol.name))?; + let first_mapping = self + .disasm_modes + .get(&SectionIndex(section.orig_index)) + .map(|s| match s.binary_search_by_key(&(symbol.address as u32), |x| x.address) { + Ok(idx) => idx, + Err(idx) => idx - 1, + }) + .with_context(|| format!("No mapping symbol found before or at '{}'", symbol.name))?; + let mut mapping = mapping_symbols[first_mapping].mapping; + + let mut mappings_iter = + mapping_symbols.iter().skip(first_mapping + 1).take_while(|x| x.address < end_addr); + let mut next_mapping = mappings_iter.next(); + + let ins_count = code.len() / mapping.ins_size(); let mut ops = Vec::::with_capacity(ins_count); let mut insts = Vec::::with_capacity(ins_count); - let mut cur_addr = symbol.address as u32; + let mut cur_addr = start_addr; - while code.len() >= 4 { - let bytes = [code[0], code[1], code[2], code[3]]; - code = &code[4..]; - let ins_code = u32::from_le_bytes(bytes); + while cur_addr < end_addr { + if let Some(next) = next_mapping { + if cur_addr >= next.address { + // Change mapping + mapping = next.mapping; + next_mapping = mappings_iter.next(); + } + } + if code.len() < mapping.ins_size() { + break; + } let line = obj .line_info .as_ref() .and_then(|map| map.range(..=cur_addr as u64).last().map(|(_, &b)| b)); - let reloc = section.relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); - let ins_code = mask_reloc_from_code(ins_code, reloc)?; - let is_data = is_data(reloc)?; + let ins = match mapping { + MappingSymbol::Arm => { + let bytes = [code[0], code[1], code[2], code[3]]; + code = &code[4..]; + let ins_code = u32::from_le_bytes(bytes); - let (op, mnemonic, args, branch_dest) = if is_data { - (u16::MAX, ".word", vec![ObjInsArg::Reloc], None) - } else { - let ins = arm::Ins::new(ins_code); - let parsed_ins = arm::ParsedIns::parse(ins); + let reloc = + section.relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); + let ins_code = mask_reloc_from_code(ins_code, reloc)?; - let mut reloc_arg = None; - if let Some(reloc) = reloc { - if let RelocationFlags::Elf { r_type: elf::R_ARM_PC24 } = reloc.flags { - reloc_arg = parsed_ins - .args - .iter() - .rposition(|a| matches!(a, arm::Argument::BranchDest(_))); + let ins = arm::Ins::new(ins_code); + let parsed_ins = arm::ParsedIns::parse(ins); + + let mut reloc_arg = None; + if let Some(reloc) = reloc { + if let RelocationFlags::Elf { r_type: elf::R_ARM_PC24 } = reloc.flags { + reloc_arg = parsed_ins + .args + .iter() + .rposition(|a| matches!(a, arm::Argument::BranchDest(_))); + } + } + + let (args, branch_dest) = + push_arm_args(&parsed_ins, config, reloc_arg, cur_addr)?; + let op = ins.op as u16; + let mnemonic = parsed_ins.mnemonic; + + ObjIns { + address: cur_addr as u64, + size: mapping.ins_size() as u8, + op, + mnemonic: mnemonic.to_string(), + args, + reloc: reloc.cloned(), + branch_dest, + line, + orig: Some(parsed_ins.to_string()), } } + MappingSymbol::Thumb => { + let bytes = [code[0], code[1]]; + code = &code[2..]; + let ins_code = u16::from_le_bytes(bytes) as u32; - let (args, branch_dest) = push_args(&parsed_ins, config, reloc_arg, cur_addr)?; - (ins.op as u16, parsed_ins.mnemonic, args, branch_dest) + let reloc = + section.relocations.iter().find(|r| (r.address as u32 & !1) == cur_addr); + let ins_code = mask_reloc_from_code(ins_code, reloc)?; + + let ins = thumb::Ins::new(ins_code); + + let mut parsed_ins = thumb::ParsedIns::parse(ins); + let mut size = 2; + let address = cur_addr as u64; + if ins.is_half_bl() { + cur_addr += 2; + let bytes = [code[0], code[1]]; + code = &code[2..]; + let second_code = u16::from_le_bytes(bytes) as u32; + let reloc = section + .relocations + .iter() + .find(|r| (r.address as u32 & !1) == cur_addr); + let second_code = mask_reloc_from_code(second_code, reloc)?; + + let second_ins = thumb::Ins::new(second_code); + let second_ins = thumb::ParsedIns::parse(second_ins); + parsed_ins = parsed_ins.combine_bl(&second_ins); + size = 4; + } + + let mut reloc_arg = None; + if let Some(reloc) = reloc { + if let RelocationFlags::Elf { r_type: elf::R_ARM_THM_XPC22 } = reloc.flags { + reloc_arg = parsed_ins + .args + .iter() + .rposition(|a| matches!(a, thumb::Argument::BranchDest(_))); + } + } + + let (args, branch_dest) = + push_thumb_args(&parsed_ins, config, reloc_arg, cur_addr)?; + let op = ins.op as u16; + let mnemonic = parsed_ins.mnemonic; + + ObjIns { + address, + size, + op, + mnemonic: mnemonic.to_string(), + args, + reloc: reloc.cloned(), + branch_dest, + line, + orig: Some(parsed_ins.to_string()), + } + } + MappingSymbol::Data => { + let bytes = [code[0], code[1], code[2], code[3]]; + code = &code[4..]; + let data = u32::from_le_bytes(bytes); + + let reloc = + section.relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); + let data = mask_reloc_from_code(data, reloc)?; + + let mut args = vec![]; + if reloc.is_some() { + args.push(ObjInsArg::Reloc); + } else { + args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(data as u64))); + } + + ObjIns { + address: cur_addr as u64, + size: mapping.ins_size() as u8, + op: u16::MAX, + mnemonic: ".word".to_string(), + args, + reloc: reloc.cloned(), + branch_dest: None, + line, + orig: None, + } + } }; - ops.push(op); - insts.push(ObjIns { - address: cur_addr as u64, - size: 4, - op, - mnemonic: mnemonic.to_string(), - args, - reloc: reloc.cloned(), - branch_dest, - line, - orig: None, - }); - cur_addr += 4; + ops.push(ins.op); + insts.push(ins); + cur_addr += mapping.ins_size() as u32; } Ok(ProcessCodeResult { ops, insts }) @@ -106,18 +254,46 @@ impl ObjArch for ObjArchArm { } } -fn is_data(reloc: Option<&ObjReloc>) -> Result { - if let Some(reloc) = reloc { - match reloc.flags { - RelocationFlags::Elf { r_type } => match r_type { - elf::R_ARM_PC24 | elf::R_ARM_THM_PC22 | elf::R_ARM_THM_XPC22 => Ok(false), - elf::R_ARM_ABS32 => Ok(true), - _ => bail!("Unhandled ELF relocation type {:?}", r_type), - }, - _ => bail!("Unhandled relocation flags {:?}", reloc.flags), +#[derive(Clone, Copy, Debug)] +struct DisasmMode { + address: u32, + mapping: MappingSymbol, +} + +impl DisasmMode { + fn from_symbol<'a>(sym: &Symbol<'a, '_, &'a [u8]>) -> Option { + if let Ok(name) = sym.name() { + MappingSymbol::from_symbol_name(name) + .map(|mapping| DisasmMode { address: sym.address() as u32, mapping }) + } else { + None + } + } +} + +#[derive(Clone, Copy, Debug)] +enum MappingSymbol { + Arm, + Thumb, + Data, +} + +impl MappingSymbol { + fn ins_size(self) -> usize { + match self { + MappingSymbol::Arm => 4, + MappingSymbol::Thumb => 2, + MappingSymbol::Data => 4, + } + } + + fn from_symbol_name(sym: &str) -> Option { + match sym { + "$a" => Some(Self::Arm), + "$t" => Some(Self::Thumb), + "$d" => Some(Self::Data), + _ => None, } - } else { - Ok(false) } } @@ -126,8 +302,9 @@ fn mask_reloc_from_code(code: u32, reloc: Option<&ObjReloc>) -> Result { match reloc.flags { RelocationFlags::Elf { r_type } => match r_type { elf::R_ARM_PC24 => Ok(code & !0xffffff), - elf::R_ARM_ABS32 => Ok(code), + elf::R_ARM_ABS32 => Ok(0), elf::R_ARM_THM_PC22 => Ok(code & !0x7ff), + elf::R_ARM_XPC25 => Ok(code & !0xffffff), elf::R_ARM_THM_XPC22 => Ok(code & !0x7ff), _ => bail!("Unhandled ELF relocation type {:?}", r_type), }, @@ -138,7 +315,7 @@ fn mask_reloc_from_code(code: u32, reloc: Option<&ObjReloc>) -> Result { } } -fn push_args( +fn push_arm_args( parsed_ins: &arm::ParsedIns, config: &DiffObjConfig, reloc_arg: Option, @@ -190,10 +367,10 @@ fn push_args( args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(reg.to_string().into()))); } arm::Argument::RegList(reg_list) => { - push_reg_list(reg_list, &mut args, config); + push_reg_list(*reg_list, &mut args, config); } arm::Argument::RegListC(reg_list) => { - push_reg_list(reg_list, &mut args, config); + push_reg_list(*reg_list, &mut args, config); args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque("^".to_string().into()))); } arm::Argument::UImm(value) | arm::Argument::CoOpcode(value) => { @@ -232,7 +409,67 @@ fn push_args( Ok((args, branch_dest)) } -fn push_reg_list(reg_list: &u32, args: &mut Vec, config: &DiffObjConfig) { +fn push_thumb_args( + parsed_ins: &thumb::ParsedIns, + config: &DiffObjConfig, + reloc_arg: Option, + cur_addr: u32, +) -> Result<(Vec, Option)> { + let mut args = vec![]; + let mut branch_dest = None; + let mut deref = false; + for (i, arg) in parsed_ins.args_iter().enumerate() { + if i > 0 { + args.push(ObjInsArg::PlainText(config.separator().into())); + } + + if reloc_arg == Some(i) { + args.push(ObjInsArg::Reloc); + } else { + match arg { + thumb::Argument::RegWb(reg) => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(reg.to_string().into()))); + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque("!".into()))); + } + thumb::Argument::RegDeref(reg) => { + deref = true; + args.push(ObjInsArg::PlainText("[".into())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(reg.to_string().into()))); + } + thumb::Argument::RegList(reg_list) => { + push_reg_list(*reg_list, &mut args, config); + } + thumb::Argument::RegListPc(reg_list) => { + push_reg_list( + reg_list | ((1 << thumb::Reg::Pc as u8) as u32), + &mut args, + config, + ); + } + thumb::Argument::UImm(value) => { + args.push(ObjInsArg::PlainText("#".into())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(*value as u64))); + } + thumb::Argument::SImm((value, _)) | thumb::Argument::Offset((value, _)) => { + args.push(ObjInsArg::PlainText("#".into())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Signed(*value as i64))); + } + thumb::Argument::BranchDest((value, _)) => { + let dest = cur_addr.wrapping_add_signed(*value) as u64; + args.push(ObjInsArg::BranchDest(dest)); + branch_dest = Some(dest); + } + _ => args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(arg.to_string().into()))), + } + } + } + if deref { + args.push(ObjInsArg::PlainText("]".into())); + } + Ok((args, branch_dest)) +} + +fn push_reg_list(reg_list: u32, args: &mut Vec, config: &DiffObjConfig) { args.push(ObjInsArg::PlainText("{".into())); let mut first = true; for i in 0..16 {