ARMv4T (GBA) and ARMv6K (3DS) support (#75)

* Initial ARM support

* Disassemble const pool reloc

* Disasm ARM/Thumb/data based on mapping symbols

* Fallback to mapping symbol `$a`

* Support multiple DWARF sequences

* Update line info

* Rework DWARF line info parsing

- Properly handles multiple sections
  in DWARF 1
- line_info moved into ObjSection
- DWARF 2 parser no longer errors with
  no .text section
- Both parsers properly skip empty
  sections

* Simplify line_info (no Option)

* Get line info from section; output formatted ins string

* Unwrap code section in `arm.rs`

* Handle reloc `R_ARM_SBREL32`

* Update ARM disassembler

* Update README.md

* Format

* Revert "Update README.md"

This reverts commit 8bbfcc6f45.

* Update README.md

* Detect ARM version; support ARMv4T and v6K

* Combobox to force ARM version

* Clear LSB in ARM symbol addresses

* Support big-endian ARM ELF files

* Bump `unarm`, `arm-attr`

* Handle ARM implicit addends

* Update README.md

* Explicitly handle all ARM argument types

* Format

* Display more ARM relocs

* Mask LSB on ARM code symbols only

* Read ARM implicit addends

* Format

---------

Co-authored-by: Luke Street <luke.street@encounterpc.com>
This commit is contained in:
Aetias 2024-06-21 02:36:25 +02:00 committed by GitHub
parent 1fd901a863
commit 97981160f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 219 additions and 41 deletions

24
Cargo.lock generated
View File

@ -150,6 +150,15 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "arm-attr"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d0cabd3a7d2dfa96ab3faa7b532a83c5e090061bf6d83197ca2bc91f5afac6c"
dependencies = [
"thiserror",
]
[[package]]
name = "arrayvec"
version = "0.7.4"
@ -968,7 +977,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e3d747f100290a1ca24b752186f61f6637e1deffe3bf6320de6fcb29510a307"
dependencies = [
"bitflags 2.5.0",
"libloading 0.7.4",
"libloading 0.8.3",
"winapi",
]
@ -2802,6 +2811,7 @@ name = "objdiff-core"
version = "2.0.0-alpha.4"
dependencies = [
"anyhow",
"arm-attr",
"byteorder",
"cpp_demangle",
"cwdemangle",
@ -4029,18 +4039,18 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.59"
version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa"
checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.59"
version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66"
checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
dependencies = [
"proc-macro2",
"quote",
@ -4320,9 +4330,9 @@ dependencies = [
[[package]]
name = "unarm"
version = "1.0.0"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c13fc9a9c95348bf7565e5c30688fc288239962958cac0ccdc7cd009141d850"
checksum = "c6bff109f0171a299559d82a7236e056093fc0dcd2a7da86aa745f82281e2d31"
[[package]]
name = "unicase"

View File

@ -17,7 +17,7 @@ Supports:
- PowerPC 750CL (GameCube, Wii)
- MIPS (N64, PS1, PS2, PSP)
- x86 (COFF only at the moment)
- ARMv5 (DS)
- ARM (GBA, DS, 3DS)
See [Usage](#usage) for more information.

View File

@ -816,6 +816,7 @@ impl FunctionDiffUi {
x86_formatter: Default::default(), // TODO
mips_abi: Default::default(), // TODO
mips_instr_category: Default::default(), // TODO
arm_arch_version: Default::default(), // TODO
};
let target = self
.target_path

View File

@ -19,7 +19,7 @@ dwarf = ["gimli"]
mips = ["any-arch", "rabbitizer"]
ppc = ["any-arch", "cwdemangle", "ppc750cl"]
x86 = ["any-arch", "cpp_demangle", "iced-x86", "msvc-demangler"]
arm = ["any-arch", "cpp_demangle", "unarm"]
arm = ["any-arch", "cpp_demangle", "unarm", "arm-attr"]
[dependencies]
anyhow = "1.0.82"
@ -56,4 +56,5 @@ iced-x86 = { version = "1.21.0", default-features = false, features = ["std", "d
msvc-demangler = { version = "0.10.0", optional = true }
# arm
unarm = { version = "1.0.0", optional = true }
unarm = { version = "1.3.0", optional = true }
arm-attr = { version = "0.1.1", optional = true }

View File

@ -4,9 +4,11 @@ use std::{
};
use anyhow::{bail, Result};
use arm_attr::{enums::CpuArch, tag::Tag, BuildAttrs};
use object::{
elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, SectionIndex,
SectionKind, Symbol,
elf::{self, SHT_ARM_ATTRIBUTES},
Endian, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, SectionIndex,
SectionKind, Symbol, SymbolKind,
};
use unarm::{
args::{Argument, OffsetImm, OffsetReg, Register},
@ -16,41 +18,93 @@ use unarm::{
use crate::{
arch::{ObjArch, ProcessCodeResult},
diff::DiffObjConfig,
diff::{ArmArchVersion, DiffObjConfig},
obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjSection},
};
pub struct ObjArchArm {
/// Maps section index, to list of disasm modes (arm, thumb or data) sorted by address
disasm_modes: HashMap<SectionIndex, Vec<DisasmMode>>,
detected_version: Option<ArmVersion>,
endianness: object::Endianness,
}
impl ObjArchArm {
pub fn new(file: &File) -> Result<Self> {
let endianness = file.endianness();
match file {
File::Elf32(_) => {
let disasm_modes: HashMap<_, _> = file
.sections()
.filter(|s| s.kind() == SectionKind::Text)
.map(|s| {
let index = s.index();
let mut mapping_symbols: Vec<_> = file
.symbols()
.filter(|s| s.section_index().map(|i| i == index).unwrap_or(false))
.filter_map(|s| DisasmMode::from_symbol(&s))
.collect();
mapping_symbols.sort_unstable_by_key(|x| x.address);
(s.index(), mapping_symbols)
})
.collect();
Ok(Self { disasm_modes })
let disasm_modes = Self::elf_get_mapping_symbols(file);
let detected_version = Self::elf_detect_arm_version(file)?;
Ok(Self { disasm_modes, detected_version, endianness })
}
_ => bail!("Unsupported file format {:?}", file.format()),
}
}
fn elf_detect_arm_version(file: &File) -> Result<Option<ArmVersion>> {
// Check ARM attributes
if let Some(arm_attrs) = file.sections().find(|s| {
s.kind() == SectionKind::Elf(SHT_ARM_ATTRIBUTES) && s.name() == Ok(".ARM.attributes")
}) {
let attr_data = arm_attrs.uncompressed_data()?;
let build_attrs = BuildAttrs::new(&attr_data, match file.endianness() {
object::Endianness::Little => arm_attr::Endian::Little,
object::Endianness::Big => arm_attr::Endian::Big,
})?;
for subsection in build_attrs.subsections() {
let subsection = subsection?;
if !subsection.is_aeabi() {
continue;
}
// Only checking first CpuArch tag. Others may exist, but that's very unlikely.
let cpu_arch = subsection.into_public_tag_iter()?.find_map(|(_, tag)| {
if let Tag::CpuArch(cpu_arch) = tag {
Some(cpu_arch)
} else {
None
}
});
match cpu_arch {
Some(CpuArch::V4T) => return Ok(Some(ArmVersion::V4T)),
Some(CpuArch::V5TE) => return Ok(Some(ArmVersion::V5Te)),
Some(CpuArch::V6K) => return Ok(Some(ArmVersion::V6K)),
Some(arch) => bail!("ARM arch {} not supported", arch),
None => {}
};
}
}
Ok(None)
}
fn elf_get_mapping_symbols(file: &File) -> HashMap<SectionIndex, Vec<DisasmMode>> {
file.sections()
.filter(|s| s.kind() == SectionKind::Text)
.map(|s| {
let index = s.index();
let mut mapping_symbols: Vec<_> = file
.symbols()
.filter(|s| s.section_index().map(|i| i == index).unwrap_or(false))
.filter_map(|s| DisasmMode::from_symbol(&s))
.collect();
mapping_symbols.sort_unstable_by_key(|x| x.address);
(s.index(), mapping_symbols)
})
.collect()
}
}
impl ObjArch for ObjArchArm {
fn symbol_address(&self, symbol: &Symbol) -> u64 {
let address = symbol.address();
if symbol.kind() == SymbolKind::Text {
address & !1
} else {
address
}
}
fn process_code(
&self,
address: u64,
@ -81,10 +135,21 @@ impl ObjArch for ObjArchArm {
mapping_symbols.iter().skip(first_mapping_idx + 1).take_while(|x| x.address < end_addr);
let mut next_mapping = mappings_iter.next();
let ins_count = code.len() / first_mapping.instruction_size();
let ins_count = code.len() / first_mapping.instruction_size(start_addr);
let mut ops = Vec::<u16>::with_capacity(ins_count);
let mut insts = Vec::<ObjIns>::with_capacity(ins_count);
let mut parser = Parser::new(ArmVersion::V5Te, first_mapping, start_addr, code);
let version = match config.arm_arch_version {
ArmArchVersion::Auto => self.detected_version.unwrap_or(ArmVersion::V5Te),
ArmArchVersion::V4T => ArmVersion::V4T,
ArmArchVersion::V5TE => ArmVersion::V5Te,
ArmArchVersion::V6K => ArmVersion::V6K,
};
let endian = match self.endianness {
object::Endianness::Little => unarm::Endian::Little,
object::Endianness::Big => unarm::Endian::Big,
};
let mut parser = Parser::new(version, first_mapping, start_addr, endian, code);
while let Some((address, op, ins)) = parser.next() {
if let Some(next) = next_mapping {
@ -95,7 +160,6 @@ impl ObjArch for ObjArchArm {
next_mapping = mappings_iter.next();
}
}
let line = line_info.range(..=address as u64).last().map(|(_, &b)| b);
let reloc = relocations.iter().find(|r| (r.address as u32 & !1) == address).cloned();
@ -103,11 +167,19 @@ impl ObjArch for ObjArchArm {
let mut reloc_arg = None;
if let Some(reloc) = &reloc {
match reloc.flags {
// Calls
RelocationFlags::Elf { r_type: elf::R_ARM_THM_XPC22 }
| RelocationFlags::Elf { r_type: elf::R_ARM_PC24 } => {
| RelocationFlags::Elf { r_type: elf::R_ARM_THM_PC22 }
| RelocationFlags::Elf { r_type: elf::R_ARM_PC24 }
| RelocationFlags::Elf { r_type: elf::R_ARM_XPC25 }
| RelocationFlags::Elf { r_type: elf::R_ARM_CALL } => {
reloc_arg =
ins.args.iter().rposition(|a| matches!(a, Argument::BranchDest(_)));
}
// Data
RelocationFlags::Elf { r_type: elf::R_ARM_ABS32 } => {
reloc_arg = ins.args.iter().rposition(|a| matches!(a, Argument::UImm(_)));
}
_ => (),
}
};
@ -138,11 +210,42 @@ impl ObjArch for ObjArchArm {
fn implcit_addend(
&self,
_section: &ObjSection,
section: &ObjSection,
address: u64,
reloc: &Relocation,
) -> anyhow::Result<i64> {
bail!("Unsupported ARM implicit relocation {:#x}{:?}", address, reloc.flags())
let address = address as usize;
Ok(match reloc.flags() {
// ARM calls
RelocationFlags::Elf { r_type: elf::R_ARM_PC24 }
| RelocationFlags::Elf { r_type: elf::R_ARM_XPC25 }
| RelocationFlags::Elf { r_type: elf::R_ARM_CALL } => {
let data = section.data[address..address + 4].try_into()?;
let addend = self.endianness.read_i32_bytes(data);
let imm24 = addend & 0xffffff;
(imm24 << 2) << 8 >> 8
}
// Thumb calls
RelocationFlags::Elf { r_type: elf::R_ARM_THM_PC22 }
| RelocationFlags::Elf { r_type: elf::R_ARM_THM_XPC22 } => {
let data = section.data[address..address + 2].try_into()?;
let high = self.endianness.read_i16_bytes(data) as i32;
let data = section.data[address + 2..address + 4].try_into()?;
let low = self.endianness.read_i16_bytes(data) as i32;
let imm22 = ((high & 0x7ff) << 11) | (low & 0x7ff);
(imm22 << 1) << 9 >> 9
}
// Data
RelocationFlags::Elf { r_type: elf::R_ARM_ABS32 } => {
let data = section.data[address..address + 4].try_into()?;
self.endianness.read_i32_bytes(data)
}
flags => bail!("Unsupported ARM implicit relocation {flags:?}"),
} as i64)
}
fn demangle(&self, name: &str) -> Option<String> {
@ -209,6 +312,7 @@ fn push_args(
args.push(ObjInsArg::Reloc);
} else {
match arg {
Argument::None => {}
Argument::Reg(reg) => {
if reg.deref {
deref = true;
@ -242,7 +346,7 @@ fn push_args(
args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque("^".to_string().into())));
}
}
Argument::UImm(value) | Argument::CoOpcode(value) => {
Argument::UImm(value) | Argument::CoOpcode(value) | Argument::SatImm(value) => {
args.push(ObjInsArg::PlainText("#".into()));
args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(*value as u64)));
}
@ -282,7 +386,21 @@ fn push_args(
offset.reg.to_string().into(),
)));
}
_ => args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(arg.to_string().into()))),
Argument::CpsrMode(mode) => {
args.push(ObjInsArg::PlainText("#".into()));
args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(mode.mode as u64)));
if mode.writeback {
args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque("!".into())));
}
}
Argument::CoReg(_)
| Argument::StatusReg(_)
| Argument::StatusMask(_)
| Argument::Shift(_)
| Argument::CpsrFlags(_)
| Argument::Endian(_) => {
args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(arg.to_string().into())))
}
}
}
}

View File

@ -1,7 +1,7 @@
use std::{borrow::Cow, collections::BTreeMap};
use anyhow::{bail, Result};
use object::{Architecture, Object, Relocation, RelocationFlags};
use object::{Architecture, Object, ObjectSymbol, Relocation, RelocationFlags, Symbol};
use crate::{
diff::DiffObjConfig,
@ -34,6 +34,8 @@ pub trait ObjArch: Send + Sync {
fn demangle(&self, _name: &str) -> Option<String> { None }
fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str>;
fn symbol_address(&self, symbol: &Symbol) -> u64 { symbol.address() }
}
pub struct ProcessCodeResult {

View File

@ -93,6 +93,30 @@ pub enum MipsInstrCategory {
R5900,
}
#[derive(
Debug,
Copy,
Clone,
Default,
Eq,
PartialEq,
serde::Deserialize,
serde::Serialize,
strum::VariantArray,
strum::EnumMessage,
)]
pub enum ArmArchVersion {
#[default]
#[strum(message = "Auto (default)")]
Auto,
#[strum(message = "ARMv4T (GBA)")]
V4T,
#[strum(message = "ARMv5TE (DS)")]
V5TE,
#[strum(message = "ARMv6K (3DS)")]
V6K,
}
#[inline]
const fn default_true() -> bool { true }
@ -108,6 +132,8 @@ pub struct DiffObjConfig {
// MIPS
pub mips_abi: MipsAbi,
pub mips_instr_category: MipsInstrCategory,
// ARM
pub arm_arch_version: ArmArchVersion,
}
impl Default for DiffObjConfig {
@ -119,6 +145,7 @@ impl Default for DiffObjConfig {
x86_formatter: Default::default(),
mips_abi: Default::default(),
mips_instr_category: Default::default(),
arm_arch_version: Default::default(),
}
}
}

View File

@ -55,12 +55,13 @@ fn to_obj_symbol(
if obj_file.format() == BinaryFormat::Elf && symbol.scope() == SymbolScope::Linkage {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Hidden);
}
let address = arch.symbol_address(symbol);
let section_address = if let Some(section) =
symbol.section_index().and_then(|idx| obj_file.section_by_index(idx).ok())
{
symbol.address() - section.address()
address - section.address()
} else {
symbol.address()
address
};
let demangled_name = arch.demangle(name);
// Find the virtual address for the symbol if available
@ -70,7 +71,7 @@ fn to_obj_symbol(
Ok(ObjSymbol {
name: name.to_string(),
demangled_name,
address: symbol.address(),
address,
section_address,
size: symbol.size(),
size_known: symbol.size() != 0,

View File

@ -16,7 +16,7 @@ use egui::{
use globset::Glob;
use objdiff_core::{
config::{ProjectObject, DEFAULT_WATCH_PATTERNS},
diff::{MipsAbi, MipsInstrCategory, X86Formatter},
diff::{ArmArchVersion, MipsAbi, MipsInstrCategory, X86Formatter},
};
use self_update::cargo_crate_version;
use strum::{EnumMessage, VariantArray};
@ -907,4 +907,22 @@ fn arch_config_ui(ui: &mut egui::Ui, config: &mut AppConfig, _appearance: &Appea
}
}
});
ui.separator();
ui.heading("ARM");
egui::ComboBox::new("arm_arch_version", "Architecture Version")
.selected_text(config.diff_obj_config.arm_arch_version.get_message().unwrap())
.show_ui(ui, |ui| {
for &version in ArmArchVersion::VARIANTS {
if ui
.selectable_label(
config.diff_obj_config.arm_arch_version == version,
version.get_message().unwrap(),
)
.clicked()
{
config.diff_obj_config.arm_arch_version = version;
config.queue_reload = true;
}
}
});
}