Version 0.2.0

- Add `elf disasm` (disassemble an ELF)
- Add `elf fixup` (for GNU assembler)
- Add `map order` (link order deduction)
- Add `map slices` (ppcdis slices.yml, WIP)
- Add `map symbols` (ppcdis symbols.yml, WIP)
- Big speed improvement for map processing
- Minor `elf2dol` cleanup
This commit is contained in:
Luke Street 2022-12-10 01:28:23 -05:00
parent f6dbe94bac
commit 141339fcb0
18 changed files with 2548 additions and 334 deletions

View File

@ -95,7 +95,9 @@ jobs:
uses: actions/checkout@v3
- name: Install dependencies
if: matrix.packages != ''
run: sudo apt-get -y install ${{ matrix.packages }}
run: |
sudo apt-get -y update
sudo apt-get -y install ${{ matrix.packages }}
- name: Setup Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:

130
Cargo.lock generated
View File

@ -2,6 +2,17 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "0.7.20"
@ -57,12 +68,27 @@ dependencies = [
"winapi",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base16ct"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce"
[[package]]
name = "bincode"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
dependencies = [
"serde",
]
[[package]]
name = "bitflags"
version = "1.3.2"
@ -102,6 +128,15 @@ dependencies = [
"libc",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
@ -123,23 +158,28 @@ dependencies = [
[[package]]
name = "decomp-toolkit"
version = "0.1.0"
version = "0.2.0"
dependencies = [
"anyhow",
"argh",
"base16ct",
"cwdemangle",
"dol",
"filetime",
"flagset",
"hex",
"indexmap",
"lazy_static",
"log",
"memchr",
"memmap2",
"multimap",
"object",
"ppc750cl",
"pretty_env_logger",
"regex",
"sha-1",
"smallvec",
"topological-sort",
"vergen",
]
@ -154,6 +194,16 @@ dependencies = [
"crypto-common",
]
[[package]]
name = "dol"
version = "0.1.0"
source = "git+https://github.com/encounter/ppc750cl?rev=aa631a33de7882c679afca89350898b87cb3ba3f#aa631a33de7882c679afca89350898b87cb3ba3f"
dependencies = [
"bincode",
"serde",
"thiserror",
]
[[package]]
name = "enum-iterator"
version = "1.1.3"
@ -199,6 +249,12 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "flagset"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda653ca797810c02f7ca4b804b40b8b95ae046eb989d356bce17919a8c25499"
[[package]]
name = "form_urlencoded"
version = "1.1.0"
@ -243,6 +299,21 @@ dependencies = [
"url",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
dependencies = [
"ahash",
]
[[package]]
name = "heck"
version = "0.4.0"
@ -283,6 +354,16 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "indexmap"
version = "1.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
]
[[package]]
name = "itoa"
version = "1.0.4"
@ -367,15 +448,33 @@ dependencies = [
"serde",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "object"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "239da7f290cfa979f43f85a8efeee9a8a76d0827c356d37f9d3d7254d6b537fb"
dependencies = [
"crc32fast",
"hashbrown 0.13.1",
"indexmap",
"memchr",
]
[[package]]
name = "once_cell"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
[[package]]
name = "percent-encoding"
version = "2.2.0"
@ -388,6 +487,15 @@ version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
[[package]]
name = "ppc750cl"
version = "0.2.0"
source = "git+https://github.com/encounter/ppc750cl?rev=aa631a33de7882c679afca89350898b87cb3ba3f#aa631a33de7882c679afca89350898b87cb3ba3f"
dependencies = [
"num-traits",
"serde",
]
[[package]]
name = "pretty_env_logger"
version = "0.4.0"
@ -483,6 +591,20 @@ name = "serde"
version = "1.0.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "sha-1"
@ -495,6 +617,12 @@ dependencies = [
"digest",
]
[[package]]
name = "smallvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
[[package]]
name = "syn"
version = "1.0.103"

View File

@ -3,7 +3,7 @@ name = "decomp-toolkit"
description = "GameCube/Wii decompilation project tools."
authors = ["Luke Street <luke@street.dev>"]
license = "MIT OR Apache-2.0"
version = "0.1.1"
version = "0.2.0"
edition = "2021"
publish = false
build = "build.rs"
@ -15,22 +15,32 @@ categories = ["command-line-utilities"]
name = "dtk"
path = "src/main.rs"
[profile.release]
lto = "thin"
panic = "abort"
strip = "debuginfo"
[dependencies]
anyhow = "1.0.64"
argh = "0.1.8"
base16ct = "0.1.1"
cwdemangle = "0.1.3"
dol = { git = "https://github.com/encounter/ppc750cl", rev = "aa631a33de7882c679afca89350898b87cb3ba3f" }
filetime = "0.2.18"
flagset = "0.4.3"
hex = "0.4.3"
indexmap = "1.9.2"
lazy_static = "1.4.0"
log = "0.4.17"
memchr = "2.5.0"
memmap2 = "0.5.7"
multimap = "0.8.3"
object = { version = "0.30.0", features = ["read_core", "std", "elf"], default-features = false }
object = { version = "0.30.0", features = ["read_core", "std", "elf", "write_std"], default-features = false }
ppc750cl = { git = "https://github.com/encounter/ppc750cl", rev = "aa631a33de7882c679afca89350898b87cb3ba3f" }
pretty_env_logger = "0.4.0"
regex = "1.6.0"
sha-1 = "0.10.0"
smallvec = "1.10.0"
topological-sort = "0.2.2"
[build-dependencies]

198
assets/macros.inc Normal file
View File

@ -0,0 +1,198 @@
# General Purpose Registers (GPRs)
.set r0, 0
.set r1, 1
.set r2, 2
.set r3, 3
.set r4, 4
.set r5, 5
.set r6, 6
.set r7, 7
.set r8, 8
.set r9, 9
.set r10, 10
.set r11, 11
.set r12, 12
.set r13, 13
.set r14, 14
.set r15, 15
.set r16, 16
.set r17, 17
.set r18, 18
.set r19, 19
.set r20, 20
.set r21, 21
.set r22, 22
.set r23, 23
.set r24, 24
.set r25, 25
.set r26, 26
.set r27, 27
.set r28, 28
.set r29, 29
.set r30, 30
.set r31, 31
# Floating Point Registers (FPRs)
.set f0, 0
.set f1, 1
.set f2, 2
.set f3, 3
.set f4, 4
.set f5, 5
.set f6, 6
.set f7, 7
.set f8, 8
.set f9, 9
.set f10, 10
.set f11, 11
.set f12, 12
.set f13, 13
.set f14, 14
.set f15, 15
.set f16, 16
.set f17, 17
.set f18, 18
.set f19, 19
.set f20, 20
.set f21, 21
.set f22, 22
.set f23, 23
.set f24, 24
.set f25, 25
.set f26, 26
.set f27, 27
.set f28, 28
.set f29, 29
.set f30, 30
.set f31, 31
# Graphics Quantization Registers (GQRs)
.set qr0, 0
.set qr1, 1
.set qr2, 2
.set qr3, 3
.set qr4, 4
.set qr5, 5
.set qr6, 6
.set qr7, 7
# Special Purpose Registers (SPRs)
.set XER, 1
.set LR, 8
.set CTR, 9
.set DSISR, 18
.set DAR, 19
.set DEC, 22
.set SDR1, 25
.set SRR0, 26
.set SRR1, 27
.set SPRG0, 272
.set SPRG1, 273
.set SPRG2, 274
.set SPRG3, 275
.set EAR, 282
.set PVR, 287
.set IBAT0U, 528
.set IBAT0L, 529
.set IBAT1U, 530
.set IBAT1L, 531
.set IBAT2U, 532
.set IBAT2L, 533
.set IBAT3U, 534
.set IBAT3L, 535
.set DBAT0U, 536
.set DBAT0L, 537
.set DBAT1U, 538
.set DBAT1L, 539
.set DBAT2U, 540
.set DBAT2L, 541
.set DBAT3U, 542
.set DBAT3L, 543
.set GQR0, 912
.set GQR1, 913
.set GQR2, 914
.set GQR3, 915
.set GQR4, 916
.set GQR5, 917
.set GQR6, 918
.set GQR7, 919
.set HID2, 920
.set WPAR, 921
.set DMA_U, 922
.set DMA_L, 923
.set UMMCR0, 936
.set UPMC1, 937
.set UPMC2, 938
.set USIA, 939
.set UMMCR1, 940
.set UPMC3, 941
.set UPMC4, 942
.set USDA, 943
.set MMCR0, 952
.set PMC1, 953
.set PMC2, 954
.set SIA, 955
.set MMCR1, 956
.set PMC3, 957
.set PMC4, 958
.set SDA, 959
.set HID0, 1008
.set HID1, 1009
.set IABR, 1010
.set DABR, 1013
.set L2CR, 1017
.set ICTC, 1019
.set THRM1, 1020
.set THRM2, 1021
.set THRM3, 1022
# Defines a sized symbol with function type.
# Usage:
# .fn my_function, local
# /* asm here */
# .endfn my_function
.macro .fn name, visibility=global
.\visibility "\name"
.type "\name", @function
"\name":
.endm
.macro .endfn name
.size "\name", . - "\name"
.endm
# Defines a sized symbol with object type.
# Usage:
# .obj my_object, local
# /* data here */
# .endobj my_object
.macro .obj name, visibility=global
.\visibility "\name"
.type "\name", @object
"\name":
.endm
.macro .endobj name
.size "\name", . - "\name"
.endm
# Defines a sized symbol without a type.
# Usage:
# .sym my_sym, local
# /* anything here */
# .endsym my_sym
.macro .sym name, visibility=global
.\visibility "\name"
"\name":
.endm
.macro .endsym name
.size "\name", . - "\name"
.endm
# Generates a relative relocation against a symbol.
# Usage:
# .rel my_function, .L_label
.macro .rel name, label
.4byte "\name" + ("\label" - "\name")
.endm

View File

@ -43,8 +43,7 @@ where T: FromArgs
};
Err(argh::EarlyExit {
output: format!(
"{} -V, --version print version information and exit",
help
"{help} -V, --version print version information and exit"
),
status: Ok(()),
})

View File

@ -18,7 +18,7 @@ pub fn run(args: Args) -> Result<()> {
let options = DemangleOptions { omit_empty_parameters: !args.keep_void };
match demangle(args.symbol.as_str(), &options) {
Some(symbol) => {
println!("{}", symbol);
println!("{symbol}");
Ok(())
}
None => Err(Error::msg("Failed to demangle symbol")),

275
src/cmd/elf.rs Normal file
View File

@ -0,0 +1,275 @@
use std::{
collections::{btree_map::Entry, BTreeMap},
fs,
fs::File,
io::{BufWriter, Write},
};
use anyhow::{Context, Error, Result};
use argh::FromArgs;
use object::{
write::{SectionId, SymbolId},
Object, ObjectSection, ObjectSymbol, RelocationKind, RelocationTarget, SectionFlags,
SectionIndex, SectionKind, SymbolFlags, SymbolKind, SymbolSection,
};
use crate::util::{asm::write_asm, elf::process_elf};
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing ELF files.
#[argh(subcommand, name = "elf")]
pub struct Args {
#[argh(subcommand)]
command: SubCommand,
}
#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand)]
enum SubCommand {
Disasm(DisasmArgs),
Fixup(FixupArgs),
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Disassembles an ELF file.
#[argh(subcommand, name = "disasm")]
pub struct DisasmArgs {
#[argh(positional)]
/// input file
elf_file: String,
#[argh(positional)]
/// output directory
out_dir: String,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Fixes issues with GNU assembler built object files.
#[argh(subcommand, name = "fixup")]
pub struct FixupArgs {
#[argh(positional)]
/// input file
in_file: String,
#[argh(positional)]
/// output file
out_file: String,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::Disasm(c_args) => disasm(c_args),
SubCommand::Fixup(c_args) => fixup(c_args),
}
}
fn disasm(args: DisasmArgs) -> Result<()> {
let obj = process_elf(&args.elf_file)?;
write_asm(&args.out_dir, &obj)?;
for unit in obj.link_order {
let name = format!("$(OBJ_DIR)/asm/{}", file_name_from_unit(&unit));
println!(" {name: <70}\\");
}
Ok(())
}
fn file_name_from_unit(str: &str) -> String {
let str = str.strip_prefix("C:").unwrap_or(str);
let str = str
.strip_suffix(".c")
.or_else(|| str.strip_suffix(".cp"))
.or_else(|| str.strip_suffix(".cpp"))
.or_else(|| str.strip_suffix(".s"))
.unwrap_or(str);
let str = str.replace('\\', "/");
format!("{}.o", str.strip_prefix('/').unwrap_or(&str))
}
fn fixup(args: FixupArgs) -> Result<()> {
let in_buf = fs::read(&args.in_file).context("Failed to open input file")?;
let in_file = object::read::File::parse(&*in_buf).context("Failed to parse input ELF")?;
let mut out_file =
object::write::Object::new(in_file.format(), in_file.architecture(), in_file.endianness());
// Write file symbol(s) first
for symbol in in_file.symbols() {
if symbol.kind() != SymbolKind::File {
continue;
}
out_file.add_symbol(to_write_symbol(&symbol, &[])?);
}
// Write section symbols & sections
let mut section_ids: Vec<Option<SectionId>> = vec![];
for section in in_file.sections() {
// Skip empty sections or metadata sections
if section.size() == 0 || section.kind() == SectionKind::Metadata {
section_ids.push(None);
continue;
}
let section_id =
out_file.add_section(vec![], section.name_bytes()?.to_vec(), section.kind());
section_ids.push(Some(section_id));
let out_section = out_file.section_mut(section_id);
if section.kind() == SectionKind::UninitializedData {
out_section.append_bss(section.size(), section.align());
} else {
out_section.set_data(section.uncompressed_data()?.into_owned(), section.align());
}
if has_section_flags(section.flags(), object::elf::SHF_ALLOC)? {
// Generate section symbol
out_file.section_symbol(section_id);
}
}
// Write symbols
let mut symbol_ids: Vec<Option<SymbolId>> = vec![];
let mut addr_to_sym: BTreeMap<SectionId, BTreeMap<u32, SymbolId>> = BTreeMap::new();
for symbol in in_file.symbols() {
// Skip section and file symbols, we wrote them above
if matches!(symbol.kind(), SymbolKind::Section | SymbolKind::File | SymbolKind::Null) {
symbol_ids.push(None);
continue;
}
let out_symbol = to_write_symbol(&symbol, &section_ids)?;
let section_id = out_symbol.section.id();
let symbol_id = out_file.add_symbol(out_symbol);
symbol_ids.push(Some(symbol_id));
if symbol.size() != 0 {
if let Some(section_id) = section_id {
let map = match addr_to_sym.entry(section_id) {
Entry::Vacant(e) => e.insert(BTreeMap::new()),
Entry::Occupied(e) => e.into_mut(),
};
map.insert(symbol.address() as u32, symbol_id);
}
}
}
// Write relocations
for section in in_file.sections() {
let section_id = match section_ids[section.index().0] {
Some(id) => id,
None => continue,
};
for (addr, reloc) in section.relocations() {
let mut symbol = match reloc.target() {
RelocationTarget::Symbol(idx) => match symbol_ids[idx.0] {
Some(id) => id,
None => {
let in_symbol = in_file.symbol_by_index(idx)?;
match in_symbol.kind() {
SymbolKind::Section => {
let section_idx = match in_symbol.section_index() {
Some(id) => id,
None => {
return Err(Error::msg("Missing section for relocation"))
}
};
let section_id = match section_ids[section_idx.0] {
Some(id) => id,
None => {
return Err(Error::msg("Missing section for relocation"))
}
};
out_file.section_symbol(section_id)
}
_ => return Err(Error::msg("Missing symbol for relocation")),
}
}
},
RelocationTarget::Section(idx) => {
let section_id = match section_ids[idx.0] {
Some(id) => id,
None => return Err(Error::msg("Missing section for relocation")),
};
out_file.section_symbol(section_id)
}
RelocationTarget::Absolute => todo!("Absolute relocation target"),
_ => return Err(Error::msg("Invalid relocation target")),
};
let mut addend = reloc.addend();
// Attempt to replace section symbols with direct symbol references
let target_sym = out_file.symbol(symbol);
if target_sym.kind == SymbolKind::Section {
if let Some(new_symbol_id) = target_sym
.section
.id()
.and_then(|id| addr_to_sym.get(&id))
.and_then(|map| map.get(&(addend as u32)))
{
symbol = *new_symbol_id;
addend = 0;
}
}
let kind = match reloc.kind() {
// This is a hack to avoid replacement with a section symbol
// See [`object::write::elf::object::elf_fixup_relocation`]
RelocationKind::Absolute => RelocationKind::Elf(object::elf::R_PPC_ADDR32),
other => other,
};
out_file.add_relocation(section_id, object::write::Relocation {
offset: addr,
size: reloc.size(),
kind,
encoding: reloc.encoding(),
symbol,
addend,
})?;
}
}
let mut out =
BufWriter::new(File::create(&args.out_file).context("Failed to create out file")?);
out_file.write_stream(&mut out).map_err(|e| Error::msg(format!("{e:?}")))?;
out.flush()?;
Ok(())
}
fn to_write_symbol_section(
section: SymbolSection,
section_ids: &[Option<SectionId>],
) -> Result<object::write::SymbolSection> {
Ok(match section {
SymbolSection::None => object::write::SymbolSection::None,
SymbolSection::Absolute => object::write::SymbolSection::Absolute,
SymbolSection::Common => object::write::SymbolSection::Common,
SymbolSection::Section(idx) => match section_ids.get(idx.0).and_then(|opt| *opt) {
Some(section_id) => object::write::SymbolSection::Section(section_id),
None => return Err(Error::msg("Missing symbol section")),
},
_ => object::write::SymbolSection::Undefined,
})
}
fn to_write_symbol_flags(flags: SymbolFlags<SectionIndex>) -> Result<SymbolFlags<SectionId>> {
Ok(match flags {
SymbolFlags::Elf { st_info, st_other } => SymbolFlags::Elf { st_info, st_other },
SymbolFlags::None => SymbolFlags::None,
_ => return Err(Error::msg("Unexpected symbol flags")),
})
}
fn to_write_symbol(
symbol: &object::read::Symbol,
section_ids: &[Option<SectionId>],
) -> Result<object::write::Symbol> {
Ok(object::write::Symbol {
name: symbol.name_bytes()?.to_vec(),
value: symbol.address(),
size: symbol.size(),
kind: symbol.kind(),
scope: symbol.scope(),
weak: symbol.is_weak(),
section: to_write_symbol_section(symbol.section(), section_ids)?,
flags: to_write_symbol_flags(symbol.flags())?,
})
}
fn has_section_flags(flags: SectionFlags, flag: u32) -> Result<bool> {
match flags {
SectionFlags::Elf { sh_flags } => Ok(sh_flags & flag as u64 == flag as u64),
_ => Err(Error::msg("Unexpected section flags")),
}
}

View File

@ -29,8 +29,10 @@ pub struct DolSection {
#[derive(Debug, Clone, Default)]
pub struct DolHeader {
pub text_sections: Vec<DolSection>,
pub data_sections: Vec<DolSection>,
pub text_section_count: usize,
pub data_section_count: usize,
pub text_sections: [DolSection; MAX_TEXT_SECTIONS],
pub data_sections: [DolSection; MAX_DATA_SECTIONS],
pub bss_address: u32,
pub bss_size: u32,
pub entry_point: u32,
@ -38,32 +40,32 @@ pub struct DolHeader {
const MAX_TEXT_SECTIONS: usize = 7;
const MAX_DATA_SECTIONS: usize = 11;
const ZERO_BUF: [u8; 32] = [0u8; 32];
pub fn run(args: Args) -> Result<()> {
let elf_file = File::open(&args.elf_file)
.with_context(|| format!("Failed to open ELF file '{}'", args.elf_file))?;
let map = unsafe { MmapOptions::new().map(&elf_file) }
.with_context(|| format!("Failed to mmap binary: '{}'", args.elf_file))?;
.with_context(|| format!("Failed to mmap ELF file: '{}'", args.elf_file))?;
let obj_file = object::read::File::parse(&*map)?;
match obj_file.architecture() {
Architecture::PowerPc => {}
arch => return Err(Error::msg(format!("Unexpected architecture: {:?}", arch))),
arch => return Err(Error::msg(format!("Unexpected architecture: {arch:?}"))),
};
if obj_file.is_little_endian() {
return Err(Error::msg("Expected big endian"));
}
match obj_file.kind() {
ObjectKind::Executable => {}
kind => return Err(Error::msg(format!("Unexpected ELF type: {:?}", kind))),
kind => return Err(Error::msg(format!("Unexpected ELF type: {kind:?}"))),
}
let mut header = DolHeader { entry_point: obj_file.entry() as u32, ..Default::default() };
let mut offset = 0x100u32;
let mut out = BufWriter::new(
File::create(&args.dol_file)
.with_context(|| format!("Failed to create DOL file '{}'", args.dol_file))?,
);
let mut header = DolHeader { entry_point: obj_file.entry() as u32, ..Default::default() };
let mut offset = 0x100u32;
out.seek(SeekFrom::Start(offset as u64))?;
// Text sections
for section in obj_file.sections() {
@ -72,9 +74,14 @@ pub fn run(args: Args) -> Result<()> {
}
let address = section.address() as u32;
let size = align32(section.size() as u32);
header.text_sections.push(DolSection { offset, address, size });
out.seek(SeekFrom::Start(offset as u64))?;
write_aligned(&mut out, section.data()?)?;
*header.text_sections.get_mut(header.text_section_count).ok_or_else(|| {
Error::msg(format!(
"Too many text sections (while processing '{}')",
section.name().unwrap_or("[error]")
))
})? = DolSection { offset, address, size };
header.text_section_count += 1;
write_aligned(&mut out, section.data()?, size)?;
offset += size;
}
@ -85,9 +92,14 @@ pub fn run(args: Args) -> Result<()> {
}
let address = section.address() as u32;
let size = align32(section.size() as u32);
header.data_sections.push(DolSection { offset, address, size });
out.seek(SeekFrom::Start(offset as u64))?;
write_aligned(&mut out, section.data()?)?;
*header.data_sections.get_mut(header.data_section_count).ok_or_else(|| {
Error::msg(format!(
"Too many data sections (while processing '{}')",
section.name().unwrap_or("[error]")
))
})? = DolSection { offset, address, size };
header.data_section_count += 1;
write_aligned(&mut out, section.data()?, size)?;
offset += size;
}
@ -104,68 +116,50 @@ pub fn run(args: Args) -> Result<()> {
header.bss_size = (address + size) - header.bss_address;
}
if header.text_sections.len() > MAX_TEXT_SECTIONS {
return Err(Error::msg(format!(
"Too many text sections: {} / {}",
header.text_sections.len(),
MAX_TEXT_SECTIONS
)));
}
if header.data_sections.len() > MAX_DATA_SECTIONS {
return Err(Error::msg(format!(
"Too many data sections: {} / {}",
header.data_sections.len(),
MAX_DATA_SECTIONS
)));
}
// Offsets
out.rewind()?;
for section in &header.text_sections {
out.write_all(&section.offset.to_be_bytes())?;
}
out.seek(SeekFrom::Start(0x1c))?;
for section in &header.data_sections {
out.write_all(&section.offset.to_be_bytes())?;
}
// Addresses
out.seek(SeekFrom::Start(0x48))?;
for section in &header.text_sections {
out.write_all(&section.address.to_be_bytes())?;
}
out.seek(SeekFrom::Start(0x64))?;
for section in &header.data_sections {
out.write_all(&section.address.to_be_bytes())?;
}
// Sizes
out.seek(SeekFrom::Start(0x90))?;
for section in &header.text_sections {
out.write_all(&section.size.to_be_bytes())?;
}
out.seek(SeekFrom::Start(0xac))?;
for section in &header.data_sections {
out.write_all(&section.size.to_be_bytes())?;
}
// BSS + entry
out.seek(SeekFrom::Start(0xd8))?;
out.write_all(&header.bss_address.to_be_bytes())?;
out.write_all(&header.bss_size.to_be_bytes())?;
out.write_all(&header.entry_point.to_be_bytes())?;
// Done!
out.flush()?;
Ok(())
}
#[inline]
fn align32(x: u32) -> u32 { (x + 31) & !31 }
const fn align32(x: u32) -> u32 { (x + 31) & !31 }
const ZERO_BUF: [u8; 32] = [0u8; 32];
#[inline]
fn write_aligned<T: Write>(out: &mut T, bytes: &[u8]) -> std::io::Result<()> {
let len = bytes.len() as u32;
let padding = align32(len) - len;
fn write_aligned<T: Write>(out: &mut T, bytes: &[u8], aligned_size: u32) -> std::io::Result<()> {
out.write_all(bytes)?;
let padding = aligned_size - bytes.len() as u32;
if padding > 0 {
out.write_all(&ZERO_BUF[0..padding as usize])?;
}

View File

@ -1,9 +1,9 @@
use std::{fs::File, io::BufReader};
use std::{fs::File, io::BufReader, ops::Range};
use anyhow::{Context, Error, Result};
use argh::FromArgs;
use crate::util::map::{process_map, SymbolEntry, SymbolRef};
use crate::util::map::{process_map, resolve_link_order, SymbolEntry, SymbolRef};
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing CodeWarrior maps.
@ -18,6 +18,9 @@ pub struct Args {
enum SubCommand {
Entries(EntriesArgs),
Symbol(SymbolArgs),
Order(OrderArgs),
Slices(SlicesArgs),
Symbols(SymbolsArgs),
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
@ -44,10 +47,40 @@ pub struct SymbolArgs {
symbol: String,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Attempts to resolve global link order.
#[argh(subcommand, name = "order")]
pub struct OrderArgs {
#[argh(positional)]
/// path to input map
map_file: String,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Emits a slices.yml for ppcdis. (WIP)
#[argh(subcommand, name = "slices")]
pub struct SlicesArgs {
#[argh(positional)]
/// path to input map
map_file: String,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Emits a symbols.yml for ppcdis. (WIP)
#[argh(subcommand, name = "symbols")]
pub struct SymbolsArgs {
#[argh(positional)]
/// path to input map
map_file: String,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::Entries(c_args) => entries(c_args),
SubCommand::Symbol(c_args) => symbol(c_args),
SubCommand::Order(c_args) => order(c_args),
SubCommand::Slices(c_args) => slices(c_args),
SubCommand::Symbols(c_args) => symbols(c_args),
}
}
@ -138,3 +171,64 @@ fn symbol(args: SymbolArgs) -> Result<()> {
}
Ok(())
}
fn order(args: OrderArgs) -> Result<()> {
let reader = BufReader::new(
File::open(&args.map_file)
.with_context(|| format!("Failed to open file '{}'", args.map_file))?,
);
let entries = process_map(reader)?;
let order = resolve_link_order(&entries.unit_order)?;
for unit in order {
println!("{unit}");
}
Ok(())
}
fn slices(args: SlicesArgs) -> Result<()> {
let reader = BufReader::new(
File::open(&args.map_file)
.with_context(|| format!("Failed to open file '{}'", args.map_file))?,
);
let entries = process_map(reader)?;
let order = resolve_link_order(&entries.unit_order)?;
for unit in order {
let unit_path = if let Some((lib, name)) = unit.split_once(' ') {
format!("{}/{}", lib.strip_suffix(".a").unwrap_or(lib), name)
} else if let Some(strip) = unit.strip_suffix(".o") {
format!("{strip}.c")
} else {
unit.clone()
};
println!("{unit_path}:");
let mut ranges = Vec::<(String, Range<u32>)>::new();
match entries.unit_section_ranges.get(&unit) {
Some(sections) => {
for (name, range) in sections {
ranges.push((name.clone(), range.clone()));
}
}
None => return Err(Error::msg(format!("Failed to locate sections for unit '{unit}'"))),
}
ranges.sort_by(|(_, a), (_, b)| a.start.cmp(&b.start));
for (name, range) in ranges {
println!("\t{}: [{:#010x}, {:#010x}]", name, range.start, range.end);
}
}
Ok(())
}
fn symbols(args: SymbolsArgs) -> Result<()> {
let reader = BufReader::new(
File::open(&args.map_file)
.with_context(|| format!("Failed to open file '{}'", args.map_file))?,
);
let entries = process_map(reader)?;
for (address, symbol) in entries.address_to_symbol {
if symbol.name.starts_with('@') {
continue;
}
println!("{:#010x}: {}", address, symbol.name);
}
Ok(())
}

View File

@ -22,10 +22,10 @@ pub fn run(args: Args) -> Result<()> {
let build_string = std::fs::read_to_string(&args.build_info)
.with_context(|| format!("Failed to read build info string from '{}'", args.build_info))?;
let build_string_trim = build_string.trim_end();
if build_string_trim.as_bytes().len() > BUILD_STRING_MAX {
let build_string_bytes = build_string_trim.as_bytes();
if build_string_bytes.len() > BUILD_STRING_MAX {
return Err(Error::msg(format!(
"Build string '{}' is greater than maximum size of {}",
build_string_trim, BUILD_STRING_MAX
"Build string '{build_string_trim}' is greater than maximum size of {BUILD_STRING_MAX}"
)));
}
@ -40,8 +40,8 @@ pub fn run(args: Args) -> Result<()> {
Some(idx) => idx + BUILD_STRING_TAG.as_bytes().len(),
None => return Err(Error::msg("Failed to find build string tag in binary")),
};
let end = start + build_string_trim.as_bytes().len();
map[start..end].copy_from_slice(build_string_trim.as_bytes());
let end = start + build_string_bytes.len();
map[start..end].copy_from_slice(build_string_bytes);
map[end] = 0;
Ok(())
}

View File

@ -1,4 +1,5 @@
pub(crate) mod demangle;
pub(crate) mod elf;
pub(crate) mod elf2dol;
pub(crate) mod map;
pub(crate) mod metroidbuildinfo;

View File

@ -42,34 +42,34 @@ fn check(args: Args, file: File) -> Result<()> {
for line in reader.lines() {
let line = match line {
Ok(line) => line,
Err(e) => return Err(Error::msg(format!("File read failed: {}", e))),
Err(e) => return Err(Error::msg(format!("File read failed: {e}"))),
};
let (hash, file_name) =
line.split_once(' ').ok_or_else(|| Error::msg(format!("Invalid line: {}", line)))?;
line.split_once(' ').ok_or_else(|| Error::msg(format!("Invalid line: {line}")))?;
let file_name = match file_name.chars().next() {
Some(' ') | Some('*') => &file_name[1..],
_ => return Err(Error::msg(format!("Invalid line: {}", line))),
_ => return Err(Error::msg(format!("Invalid line: {line}"))),
};
let mut hash_bytes = [0u8; 20];
hex::decode_to_slice(hash, &mut hash_bytes)
.with_context(|| format!("Invalid line: {}", line))?;
.with_context(|| format!("Invalid line: {line}"))?;
let file = File::open(file_name)
.with_context(|| format!("Failed to open file '{}'", file_name))?;
let file =
File::open(file_name).with_context(|| format!("Failed to open file '{file_name}'"))?;
let found_hash = file_sha1(file)?;
if hash_bytes == found_hash.as_ref() {
println!("{}: OK", file_name);
println!("{file_name}: OK");
} else {
println!("{}: FAILED", file_name);
println!("{file_name}: FAILED");
mismatches += 1;
}
}
if mismatches != 0 {
eprintln!("WARNING: {} computed checksum did NOT match", mismatches);
eprintln!("WARNING: {mismatches} computed checksum did NOT match");
std::process::exit(1);
}
if let Some(out_path) = args.output {
touch(&out_path).with_context(|| format!("Failed to touch output file '{}'", out_path))?;
touch(&out_path).with_context(|| format!("Failed to touch output file '{out_path}'"))?;
}
Ok(())
}
@ -78,7 +78,7 @@ fn hash(args: Args, file: File) -> Result<()> {
let hash = file_sha1(file)?;
let mut hash_buf = [0u8; 40];
let hash_str = base16ct::lower::encode_str(&hash, &mut hash_buf)
.map_err(|e| Error::msg(format!("Failed to encode hash: {}", e)))?;
.map_err(|e| Error::msg(format!("Failed to encode hash: {e}")))?;
println!("{} {}", hash_str, args.file);
Ok(())
}

View File

@ -17,6 +17,7 @@ struct TopLevel {
#[argh(subcommand)]
enum SubCommand {
Demangle(cmd::demangle::Args),
Elf(cmd::elf::Args),
Elf2Dol(cmd::elf2dol::Args),
Map(cmd::map::Args),
MetroidBuildInfo(cmd::metroidbuildinfo::Args),
@ -24,16 +25,19 @@ enum SubCommand {
}
fn main() {
pretty_env_logger::init();
let args: TopLevel = argh_version::from_env();
let result = match args.command {
SubCommand::Demangle(c_args) => cmd::demangle::run(c_args),
SubCommand::Elf(c_args) => cmd::elf::run(c_args),
SubCommand::Elf2Dol(c_args) => cmd::elf2dol::run(c_args),
SubCommand::Map(c_args) => cmd::map::run(c_args),
SubCommand::MetroidBuildInfo(c_args) => cmd::metroidbuildinfo::run(c_args),
SubCommand::Shasum(c_args) => cmd::shasum::run(c_args),
};
if let Err(e) = result {
eprintln!("{:?}", e);
eprintln!("{e:?}");
std::process::exit(1);
}
}

820
src/util/asm.rs Normal file
View File

@ -0,0 +1,820 @@
use std::{
cmp::{min, Ordering},
collections::{btree_map, hash_map::Entry, BTreeMap, HashMap},
fmt::Display,
fs,
fs::{DirBuilder, File},
io::{BufWriter, Write},
path::Path,
};
use anyhow::{Error, Result};
use ppc750cl::{disasm_iter, Argument, Ins, Opcode};
use crate::util::obj::{
ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlags,
ObjSymbolKind,
};
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum SymbolEntryKind {
Start,
End,
Label,
}
#[derive(Debug, Copy, Clone)]
struct SymbolEntry {
index: usize,
kind: SymbolEntryKind,
}
pub fn write_asm<P: AsRef<Path> + Display>(path: P, obj: &ObjInfo) -> Result<()> {
let mut file_map = HashMap::<String, BufWriter<File>>::new();
let asm_dir = path.as_ref().join("asm");
let include_dir = path.as_ref().join("include");
DirBuilder::new().recursive(true).create(&include_dir)?;
fs::write(&include_dir.join("macros.inc"), include_bytes!("../../assets/macros.inc"))?;
for unit in &obj.link_order {
let w = match file_map.entry(unit.clone()) {
Entry::Occupied(_) => {
return Err(Error::msg(format!("Duplicate file {unit}")));
}
Entry::Vacant(e) => {
let file_path = asm_dir.join(file_name_from_unit(unit));
if let Some(parent) = file_path.parent() {
DirBuilder::new().recursive(true).create(parent)?;
}
e.insert(BufWriter::new(File::create(file_path)?))
}
};
writeln!(w, ".include \"macros.inc\"")?;
writeln!(w, ".file \"{}\"", unit.replace('\\', "\\\\"))?;
}
let mut symbols = Vec::<ObjSymbol>::new();
let mut addr_sym = BTreeMap::<u32, Vec<SymbolEntry>>::new();
for section in &obj.sections {
for symbol in &section.symbols {
let symbol_index = symbols.len();
symbols.push(symbol.clone());
let symbol_start = symbol.address as u32;
let symbol_end = (symbol.address + symbol.size) as u32;
if symbol.size > 0 {
match addr_sym.entry(symbol_start) {
btree_map::Entry::Occupied(mut e) => {
e.get_mut().push(SymbolEntry {
index: symbol_index,
kind: SymbolEntryKind::Start,
});
}
btree_map::Entry::Vacant(e) => {
e.insert(vec![SymbolEntry {
index: symbol_index,
kind: SymbolEntryKind::Start,
}]);
}
}
match addr_sym.entry(symbol_end) {
btree_map::Entry::Occupied(mut e) => {
// Always push first
e.get_mut().insert(0, SymbolEntry {
index: symbol_index,
kind: SymbolEntryKind::End,
});
}
btree_map::Entry::Vacant(e) => {
e.insert(vec![SymbolEntry {
index: symbol_index,
kind: SymbolEntryKind::End,
}]);
}
}
} else {
match addr_sym.entry(symbol_start) {
btree_map::Entry::Occupied(mut e) => {
e.get_mut().push(SymbolEntry {
index: symbol_index,
kind: SymbolEntryKind::Label,
});
}
btree_map::Entry::Vacant(e) => {
e.insert(vec![SymbolEntry {
index: symbol_index,
kind: SymbolEntryKind::Label,
}]);
}
}
}
}
// Generate labels for .text relocations
for reloc in &section.relocations {
let target_section = match &reloc.target_section {
Some(v) => v,
None => continue,
};
let section = match obj.sections.iter().find(|s| &s.name == target_section) {
Some(v) => v,
None => continue,
};
match section.kind {
ObjSectionKind::Code => {}
_ => continue,
}
if reloc.target.addend == 0 {
continue;
}
let address = (reloc.target.address as i64 + reloc.target.addend) as u64;
let vec = match addr_sym.entry(address as u32) {
btree_map::Entry::Occupied(e) => e.into_mut(),
btree_map::Entry::Vacant(e) => e.insert(vec![]),
};
if !vec
.iter()
.any(|e| e.kind == SymbolEntryKind::Label || e.kind == SymbolEntryKind::Start)
{
let symbol_index = symbols.len();
symbols.push(ObjSymbol {
name: format!(".L_{address:8X}"),
demangled_name: None,
address,
section_address: address - section.address,
size: 0,
size_known: true,
flags: Default::default(),
addend: 0,
kind: ObjSymbolKind::Unknown,
});
vec.push(SymbolEntry { index: symbol_index, kind: SymbolEntryKind::Label });
}
}
// Generate local jump labels
for ins in disasm_iter(&section.data, section.address as u32) {
if let Some(address) = ins.branch_dest() {
let section =
match obj.sections.iter().find(|s| {
s.address <= address as u64 && (s.address + s.size) > address as u64
}) {
Some(s) => s,
None => continue,
};
let vec = match addr_sym.entry(address) {
btree_map::Entry::Occupied(e) => e.into_mut(),
btree_map::Entry::Vacant(e) => e.insert(vec![]),
};
if !vec
.iter()
.any(|e| e.kind == SymbolEntryKind::Label || e.kind == SymbolEntryKind::Start)
{
let symbol_index = symbols.len();
symbols.push(ObjSymbol {
name: format!(".L_{address:8X}"),
demangled_name: None,
address: address as u64,
section_address: address as u64 - section.address,
size: 0,
size_known: true,
flags: Default::default(),
addend: 0,
kind: ObjSymbolKind::Unknown,
});
vec.push(SymbolEntry { index: symbol_index, kind: SymbolEntryKind::Label });
}
}
}
}
for section in &obj.sections {
log::info!(
"Writing section {} ({:#10X} - {:#10X})",
section.name,
section.address,
section.address + section.size
);
let mut current_address = section.address as u32;
let section_end = (section.address + section.size) as u32;
let mut file_iter = obj.splits.range(current_address..).peekable();
let mut relocations = BTreeMap::<u32, ObjReloc>::new();
for reloc in &section.relocations {
let address = reloc.address as u32;
match relocations.entry(address) {
btree_map::Entry::Vacant(e) => {
e.insert(reloc.clone());
}
btree_map::Entry::Occupied(_) => {
return Err(Error::msg(format!("Duplicate relocation @ {address:#10X}")));
}
}
}
let mut subsection = 0;
let mut current_unit = String::new();
loop {
if current_address >= section_end {
break;
}
let (file_addr, unit) = match file_iter.next() {
Some((addr, unit)) => (*addr, unit),
None => return Err(Error::msg("No file found")),
};
if file_addr > current_address {
return Err(Error::msg(format!(
"Gap in files: {} @ {:#10X}, {} @ {:#10X}",
section.name, section.address, unit, file_addr
)));
}
let mut file_end = section_end;
if let Some((next_addr, _)) = file_iter.peek() {
file_end = min(**next_addr, section_end);
}
if unit == &current_unit {
subsection += 1;
} else {
current_unit = unit.clone();
subsection = 0;
}
let w = write_section_header(
&mut file_map,
unit,
section,
subsection,
current_address,
file_end,
)?;
match section.kind {
ObjSectionKind::Code | ObjSectionKind::Data => {
write_data(
w,
&symbols,
&addr_sym,
&relocations,
section,
current_address,
file_end,
)?;
}
ObjSectionKind::Bss => {
write_bss(w, &symbols, &addr_sym, current_address, file_end)?;
}
}
current_address = file_end;
}
}
for (_, mut w) in file_map {
w.flush()?;
}
Ok(())
}
fn write_code_chunk(
w: &mut BufWriter<File>,
symbols: &[ObjSymbol],
sym_map: &BTreeMap<u32, Vec<SymbolEntry>>,
relocations: &BTreeMap<u32, ObjReloc>,
section: &ObjSection,
address: u32,
data: &[u8],
) -> Result<()> {
for ins in disasm_iter(data, address) {
let mut reloc = relocations.get(&ins.addr);
let mut generated_reloc: Option<ObjReloc> = None;
// HACK: GCC-built objects generate section-relative jump relocations,
// which aren't always possible to express in GNU assembler accurately,
// specifically when dealing with multiple sections with the same name.
// Use a (hacky) heuristic to clear them so we generate a local label jump below.
if let Some(rel) = reloc {
if rel.target.addend != 0
&& matches!(rel.kind, ObjRelocKind::PpcRel14 | ObjRelocKind::PpcRel24)
{
reloc = None;
}
}
// If this is a branch instruction, automatically "relocate" to a label.
// Local branch labels are generated above.
if reloc.is_none() {
if let Some(symbol_entry) =
ins.branch_dest().and_then(|dest| sym_map.get(&dest)).and_then(|entries| {
entries
.iter()
.find(|e| e.kind == SymbolEntryKind::Label)
.or_else(|| entries.iter().find(|e| e.kind == SymbolEntryKind::Start))
})
{
let symbol = &symbols[symbol_entry.index];
generated_reloc = Some(ObjReloc {
kind: ObjRelocKind::Absolute,
address: ins.addr as u64,
target: symbol.clone(),
target_section: None,
});
}
}
let file_offset = section.file_offset + (ins.addr as u64 - section.address);
write_ins(w, ins, reloc.or(generated_reloc.as_ref()), file_offset)?;
}
Ok(())
}
fn write_ins(
w: &mut BufWriter<File>,
ins: Ins,
reloc: Option<&ObjReloc>,
file_offset: u64,
) -> Result<()> {
write!(
w,
"/* {:08X} {:08X} {:02X} {:02X} {:02X} {:02X} */\t",
ins.addr,
file_offset,
(ins.code >> 24) & 0xFF,
(ins.code >> 16) & 0xFF,
(ins.code >> 8) & 0xFF,
ins.code & 0xFF
)?;
if ins.op == Opcode::Illegal {
write!(w, ".4byte {:#010X} /* invalid */", ins.code)?;
} else if is_illegal_instruction(ins.code) {
let sins = ins.simplified();
write!(w, ".4byte {:#010X} /* illegal: {} */", sins.ins.code, sins)?;
} else {
let sins = ins.simplified();
write!(w, "{}{}", sins.mnemonic, sins.ins.suffix())?;
let mut writing_offset = false;
for (i, arg) in sins.args.iter().enumerate() {
if !writing_offset {
if i == 0 {
write!(w, " ")?;
} else {
write!(w, ", ")?;
}
}
match arg {
Argument::Uimm(_) | Argument::Simm(_) | Argument::BranchDest(_) => {
if let Some(reloc) = reloc {
write_reloc(w, reloc)?;
} else {
write!(w, "{arg}")?;
}
}
Argument::Offset(_) => {
if let Some(reloc) = reloc {
write_reloc(w, reloc)?;
} else {
write!(w, "{arg}")?;
}
write!(w, "(")?;
writing_offset = true;
continue;
}
_ => {
write!(w, "{arg}")?;
}
}
if writing_offset {
write!(w, ")")?;
writing_offset = false;
}
}
}
writeln!(w)?;
Ok(())
}
fn write_reloc(w: &mut BufWriter<File>, reloc: &ObjReloc) -> Result<()> {
write_symbol(w, &reloc.target)?;
match reloc.kind {
ObjRelocKind::Absolute | ObjRelocKind::PpcRel24 | ObjRelocKind::PpcRel14 => {
// pass
}
ObjRelocKind::PpcAddr16Hi => {
write!(w, "@h")?;
}
ObjRelocKind::PpcAddr16Ha => {
write!(w, "@ha")?;
}
ObjRelocKind::PpcAddr16Lo => {
write!(w, "@l")?;
}
ObjRelocKind::PpcEmbSda21 => {
write!(w, "@sda21")?;
}
}
Ok(())
}
fn write_symbol_entry(
w: &mut BufWriter<File>,
symbols: &[ObjSymbol],
entry: &SymbolEntry,
) -> Result<()> {
let symbol = &symbols[entry.index];
assert_eq!(symbol.addend, 0);
// Skip writing certain symbols
if is_skip_symbol(&symbol.name) {
return Ok(());
}
// Comment out linker-generated symbols
let mut start_newline = true;
if entry.kind == SymbolEntryKind::Start && is_linker_symbol(&symbol.name) {
writeln!(w, "\n/* Linker generated")?;
start_newline = false;
}
let symbol_kind = match symbol.kind {
ObjSymbolKind::Function => "fn",
ObjSymbolKind::Object => "obj",
ObjSymbolKind::Unknown => "sym",
};
let visibility = if symbol.flags.0.contains(ObjSymbolFlags::Weak) {
"weak"
} else if symbol.flags.0.contains(ObjSymbolFlags::Global) {
"global"
} else {
"local"
};
match entry.kind {
SymbolEntryKind::Label => {
if symbol.name.starts_with(".L") {
write_symbol_name(w, &symbol.name)?;
writeln!(w, ":")?;
} else {
write!(w, ".sym ")?;
write_symbol_name(w, &symbol.name)?;
writeln!(w, ", {visibility}")?;
}
}
SymbolEntryKind::Start => {
if start_newline {
writeln!(w)?;
}
if let Some(name) = &symbol.demangled_name {
writeln!(w, "# {name}")?;
}
write!(w, ".{symbol_kind} ")?;
write_symbol_name(w, &symbol.name)?;
writeln!(w, ", {visibility}")?;
}
SymbolEntryKind::End => {
write!(w, ".end{symbol_kind} ")?;
write_symbol_name(w, &symbol.name)?;
writeln!(w)?;
}
}
if entry.kind == SymbolEntryKind::End && is_linker_symbol(&symbol.name) {
writeln!(w, "*/")?;
}
Ok(())
}
fn write_data(
w: &mut BufWriter<File>,
symbols: &[ObjSymbol],
sym_map: &BTreeMap<u32, Vec<SymbolEntry>>,
relocations: &BTreeMap<u32, ObjReloc>,
section: &ObjSection,
start: u32,
end: u32,
) -> Result<()> {
let mut sym_iter = sym_map.range(start..end);
let mut reloc_iter = relocations.range(start..end);
let mut current_address = start;
let mut current_symbol_kind = ObjSymbolKind::Unknown;
let mut sym = sym_iter.next();
let mut reloc = reloc_iter.next();
let mut begin = true;
loop {
if current_address == end {
break;
}
if let Some((sym_addr, vec)) = sym {
if current_address == *sym_addr {
for entry in vec {
if entry.kind == SymbolEntryKind::End && begin {
continue;
}
write_symbol_entry(w, symbols, entry)?;
}
current_symbol_kind = find_symbol_kind(current_symbol_kind, symbols, vec)?;
sym = sym_iter.next();
}
}
begin = false;
let symbol_kind = if current_symbol_kind == ObjSymbolKind::Unknown {
match section.kind {
ObjSectionKind::Code => ObjSymbolKind::Function,
ObjSectionKind::Data | ObjSectionKind::Bss => ObjSymbolKind::Object,
}
} else {
current_symbol_kind
};
if let Some((reloc_addr, r)) = reloc {
if current_address == *reloc_addr {
reloc = reloc_iter.next();
match symbol_kind {
ObjSymbolKind::Object => {
current_address = write_data_reloc(w, symbols, sym_map, r)?;
continue;
}
ObjSymbolKind::Function => {
// handled in write_code_chunk
}
ObjSymbolKind::Unknown => unreachable!(),
}
}
}
let until = match (sym, reloc) {
(Some((sym_addr, _)), Some((reloc_addr, _))) => min(*reloc_addr, *sym_addr),
(Some((addr, _)), None) | (None, Some((addr, _))) => *addr,
(None, None) => end,
};
let data = &section.data[(current_address - section.address as u32) as usize
..(until - section.address as u32) as usize];
if symbol_kind == ObjSymbolKind::Function {
if current_address & 3 != 0 || data.len() & 3 != 0 {
return Err(Error::msg(format!(
"Unaligned code write @ {:#010X} size {:#X}",
current_address,
data.len()
)));
}
write_code_chunk(w, symbols, sym_map, relocations, section, current_address, data)?;
} else {
write_data_chunk(w, data)?;
}
current_address = until;
}
// Write end of symbols
if let Some(entries) = sym_map.get(&end) {
for entry in entries {
if entry.kind != SymbolEntryKind::End {
continue;
}
write_symbol_entry(w, symbols, entry)?;
}
}
Ok(())
}
fn find_symbol_kind(
current: ObjSymbolKind,
symbols: &[ObjSymbol],
entries: &Vec<SymbolEntry>,
) -> Result<ObjSymbolKind> {
let mut kind = current;
let mut found = false;
for entry in entries {
match entry.kind {
SymbolEntryKind::Start => {
let new_kind = symbols[entry.index].kind;
if new_kind != ObjSymbolKind::Unknown {
if found && new_kind != kind {
return Err(Error::msg(format!(
"Conflicting symbol kinds found: {kind:?} and {new_kind:?}"
)));
}
kind = new_kind;
found = true;
}
}
_ => continue,
}
}
Ok(kind)
}
fn write_data_chunk(w: &mut BufWriter<File>, data: &[u8]) -> Result<()> {
let remain = data;
for chunk in remain.chunks(4) {
match chunk.len() {
4 => {
let data = u32::from_be_bytes(chunk.try_into().unwrap());
writeln!(w, "\t.4byte {data:#010X}")?;
}
3 => {
writeln!(w, "\t.byte {:#04X}, {:#04X}, {:#04X}", chunk[0], chunk[1], chunk[2])?;
}
2 => {
writeln!(w, "\t.2byte {:#06X}", u16::from_be_bytes(chunk.try_into().unwrap()))?;
}
1 => {
writeln!(w, "\t.byte {:#04X}", chunk[0])?;
}
_ => unreachable!(),
}
}
Ok(())
}
fn write_data_reloc(
w: &mut BufWriter<File>,
symbols: &[ObjSymbol],
sym_map: &BTreeMap<u32, Vec<SymbolEntry>>,
reloc: &ObjReloc,
) -> Result<u32> {
Ok(match reloc.kind {
ObjRelocKind::Absolute => {
// Attempt to use .rel macro for relative relocations
if reloc.target.addend != 0 {
let target_addr = (reloc.target.address as i64 + reloc.target.addend) as u32;
if let Some(entry) = sym_map
.get(&target_addr)
.and_then(|entries| entries.iter().find(|e| e.kind == SymbolEntryKind::Label))
{
let symbol = &symbols[entry.index];
write!(w, "\t.rel ")?;
write_symbol_name(w, &reloc.target.name)?;
write!(w, ", ")?;
write_symbol_name(w, &symbol.name)?;
writeln!(w)?;
return Ok((reloc.address + 4) as u32);
}
}
write!(w, "\t.4byte ")?;
write_symbol(w, &reloc.target)?;
writeln!(w)?;
(reloc.address + 4) as u32
}
_ => todo!(),
})
}
fn write_bss(
w: &mut BufWriter<File>,
symbols: &[ObjSymbol],
sym_map: &BTreeMap<u32, Vec<SymbolEntry>>,
start: u32,
end: u32,
) -> Result<()> {
let mut sym_iter = sym_map.range(start..end);
let mut current_address = start;
let mut sym = sym_iter.next();
let mut begin = true;
loop {
if current_address == end {
break;
}
if let Some((sym_addr, vec)) = sym {
if current_address == *sym_addr {
for entry in vec {
if entry.kind == SymbolEntryKind::End && begin {
continue;
}
write_symbol_entry(w, symbols, entry)?;
}
sym = sym_iter.next();
}
}
begin = false;
let until = sym.map(|(addr, _)| *addr).unwrap_or(end);
let size = until - current_address;
if size > 0 {
writeln!(w, "\t.skip {size:#X}")?;
}
current_address = until;
}
// Write end of symbols
if let Some(entries) = sym_map.get(&end) {
for entry in entries {
if entry.kind != SymbolEntryKind::End {
continue;
}
write_symbol_entry(w, symbols, entry)?;
}
}
Ok(())
}
fn file_name_from_unit(str: &str) -> String {
let str = str.strip_prefix("C:").unwrap_or(str);
let str = str
.strip_suffix(".c")
.or_else(|| str.strip_suffix(".cp"))
.or_else(|| str.strip_suffix(".cpp"))
.or_else(|| str.strip_suffix(".s"))
.unwrap_or(str);
let str = str.replace('\\', "/");
format!("{}.s", str.strip_prefix('/').unwrap_or(&str))
}
fn write_section_header<'a>(
file_map: &'a mut HashMap<String, BufWriter<File>>,
unit: &String,
section: &ObjSection,
subsection: usize,
start: u32,
end: u32,
) -> Result<&'a mut BufWriter<File>> {
let w = file_map
.get_mut(unit)
.ok_or_else(|| Error::msg(format!("Failed to locate file for {unit}")))?;
writeln!(w, "\n# {start:#10X} - {end:#10X}")?;
let alignment = match section.name.as_str() {
".text" if subsection == 0 => {
write!(w, "{}", section.name)?;
4
}
".data" | ".bss" | ".rodata" if subsection == 0 => {
write!(w, "{}", section.name)?;
8
}
".text" | ".init" => {
write!(w, ".section {}", section.name)?;
write!(w, ", \"ax\"")?;
4
}
".data" | ".sdata" => {
write!(w, ".section {}", section.name)?;
write!(w, ", \"wa\"")?;
8
}
".rodata" | ".sdata2" => {
write!(w, ".section {}", section.name)?;
write!(w, ", \"a\"")?;
8
}
".bss" | ".sbss" => {
write!(w, ".section {}", section.name)?;
write!(w, ", \"wa\", @nobits")?;
8
}
".sbss2" => {
write!(w, ".section {}", section.name)?;
write!(w, ", \"a\", @nobits")?;
8
}
".ctors" | ".dtors" | "extab" | "extabindex" => {
write!(w, ".section {}", section.name)?;
write!(w, ", \"a\"")?;
4
}
name => todo!("Section {}", name),
};
if subsection != 0 {
write!(w, ", unique, {subsection}")?;
}
writeln!(w)?;
if alignment != 0 {
writeln!(w, ".balign {alignment}")?;
}
Ok(w)
}
fn write_symbol(w: &mut BufWriter<File>, sym: &ObjSymbol) -> std::io::Result<()> {
write_symbol_name(w, &sym.name)?;
match sym.addend.cmp(&0i64) {
Ordering::Greater => write!(w, "+{:#X}", sym.addend),
Ordering::Less => write!(w, "-{:#X}", -sym.addend),
Ordering::Equal => Ok(()),
}
}
fn write_symbol_name(w: &mut BufWriter<File>, name: &str) -> std::io::Result<()> {
// TODO more?
if name.contains('@') || name.contains('<') {
write!(w, "\"{name}\"")?;
} else {
write!(w, "{name}")?;
}
Ok(())
}
#[inline]
fn is_skip_symbol(name: &str) -> bool {
// Avoid generating these, they span across files
matches!(name, "_ctors" | "_dtors")
}
#[inline]
fn is_linker_symbol(name: &str) -> bool {
matches!(name, "_eti_init_info" | "_rom_copy_info" | "_bss_init_info")
}
#[inline]
fn is_illegal_instruction(code: u32) -> bool {
matches!(code, 0x43000000 /* bc 24, lt, 0x0 */ | 0xB8030000 /* lmw r0, 0(r3) */)
}

390
src/util/elf.rs Normal file
View File

@ -0,0 +1,390 @@
use std::{collections::BTreeMap, fmt::Display, fs::File, path::Path};
use anyhow::{Context, Error, Result};
use cwdemangle::demangle;
use flagset::Flags;
use indexmap::IndexMap;
use memmap2::MmapOptions;
use object::{
elf::{
R_PPC_ADDR16_HA, R_PPC_ADDR16_HI, R_PPC_ADDR16_LO, R_PPC_EMB_SDA21, R_PPC_REL14,
R_PPC_REL24,
},
Architecture, Object, ObjectKind, ObjectSection, ObjectSymbol, Relocation, RelocationKind,
RelocationTarget, Section, SectionKind, Symbol, SymbolKind, SymbolSection,
};
use crate::util::obj::{
ObjArchitecture, ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol,
ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind,
};
pub fn process_elf<P: AsRef<Path> + Display>(path: P) -> Result<ObjInfo> {
let elf_file =
File::open(&path).with_context(|| format!("Failed to open ELF file '{path}'"))?;
let map = unsafe { MmapOptions::new().map(&elf_file) }
.with_context(|| format!("Failed to mmap ELF file: '{path}'"))?;
let obj_file = object::read::File::parse(&*map)?;
let architecture = match obj_file.architecture() {
Architecture::PowerPc => ObjArchitecture::PowerPc,
arch => return Err(Error::msg(format!("Unexpected architecture: {arch:?}"))),
};
if obj_file.is_little_endian() {
return Err(Error::msg("Expected big endian"));
}
match obj_file.kind() {
ObjectKind::Executable => {}
kind => return Err(Error::msg(format!("Unexpected ELF type: {kind:?}"))),
}
let mut stack_address: Option<u32> = None;
let mut stack_end: Option<u32> = None;
let mut db_stack_addr: Option<u32> = None;
let mut arena_lo: Option<u32> = None;
let mut arena_hi: Option<u32> = None;
let mut common: Vec<ObjSymbol> = vec![];
let mut current_file: Option<String> = None;
let mut section_starts = IndexMap::<String, Vec<(u64, String)>>::new();
for symbol in obj_file.symbols() {
// Locate linker-generated symbols
let symbol_name = symbol.name()?;
match symbol_name {
"_stack_addr" => {
stack_address = Some(symbol.address() as u32);
continue;
}
"_stack_end" => {
stack_end = Some(symbol.address() as u32);
continue;
}
"_db_stack_addr" => {
db_stack_addr = Some(symbol.address() as u32);
continue;
}
"__ArenaLo" => {
arena_lo = Some(symbol.address() as u32);
continue;
}
"__ArenaHi" => {
arena_hi = Some(symbol.address() as u32);
continue;
}
_ => {}
}
match symbol.kind() {
// Detect file boundaries
SymbolKind::File => {
let file_name = symbol_name.to_string();
match section_starts.entry(file_name.clone()) {
indexmap::map::Entry::Occupied(_) => {
return Err(Error::msg(format!("Duplicate file name: {file_name}")));
}
indexmap::map::Entry::Vacant(e) => e.insert(Default::default()),
};
current_file = Some(file_name);
continue;
}
// Detect sections within a file
SymbolKind::Section => {
if let Some(file_name) = &current_file {
let sections = match section_starts.get_mut(file_name) {
Some(entries) => entries,
None => return Err(Error::msg("Failed to create entry")),
};
let section_index = symbol
.section_index()
.ok_or_else(|| Error::msg("Section symbol without section"))?;
let section = obj_file.section_by_index(section_index)?;
let section_name = section.name()?.to_string();
sections.push((symbol.address(), section_name));
};
continue;
}
// Sometimes, the section symbol address is 0,
// so attempt to detect it from first symbol within section
SymbolKind::Data | SymbolKind::Text => {
if let Some(file_name) = &current_file {
let section_map = match section_starts.get_mut(file_name) {
Some(entries) => entries,
None => return Err(Error::msg("Failed to create entry")),
};
let section_index = symbol
.section_index()
.ok_or_else(|| Error::msg("Section symbol without section"))?;
let section = obj_file.section_by_index(section_index)?;
let section_name = section.name()?;
if let Some((addr, _)) =
section_map.iter_mut().find(|(_, name)| name == section_name)
{
if *addr == 0 {
*addr = symbol.address();
}
};
};
continue;
}
_ => match symbol.section() {
// Linker generated symbols indicate the end
SymbolSection::Absolute => {
current_file = None;
}
SymbolSection::Section(_) | SymbolSection::Undefined => {}
_ => todo!("Symbol section type {:?}", symbol),
},
}
// Keep track of common symbols
if !symbol.is_common() {
continue;
}
common.push(to_obj_symbol(&obj_file, &symbol, 0)?);
}
// Link order is trivially deduced
let mut link_order = Vec::<String>::new();
for file_name in section_starts.keys() {
link_order.push(file_name.clone());
}
// Create a map of address -> file splits
let mut splits = BTreeMap::<u32, String>::new();
for (file_name, sections) in section_starts {
for (address, _) in sections {
splits.insert(address as u32, file_name.clone());
}
}
let mut sections: Vec<ObjSection> = vec![];
for section in obj_file.sections() {
let section_index = section.index();
let section_kind = match section.kind() {
SectionKind::Text => ObjSectionKind::Code,
SectionKind::Data => ObjSectionKind::Data,
SectionKind::ReadOnlyData => ObjSectionKind::Data,
SectionKind::UninitializedData => ObjSectionKind::Bss,
_ => continue,
};
let name = section.name()?;
log::info!("Processing section {}", name);
let data = section.uncompressed_data()?.to_vec();
// Generate symbols
let mut symbols: Vec<ObjSymbol> = vec![];
for symbol in obj_file.symbols() {
if !matches!(symbol.section_index(), Some(idx) if idx == section_index) {
continue;
}
if symbol.address() == 0 || symbol.name()?.is_empty() {
continue;
}
symbols.push(to_obj_symbol(&obj_file, &symbol, 0)?);
}
// Generate relocations
let mut relocations: Vec<ObjReloc> = vec![];
for (address, reloc) in section.relocations() {
relocations.push(to_obj_reloc(&obj_file, &section, &data, address, reloc)?);
}
let file_offset = section.file_range().map(|(v, _)| v).unwrap_or_default();
sections.push(ObjSection {
name: name.to_string(),
kind: section_kind,
address: section.address(),
size: section.size(),
data,
index: sections.len(),
symbols,
relocations,
file_offset,
});
}
Ok(ObjInfo {
architecture,
path: path.as_ref().to_path_buf(),
sections,
common,
entry: obj_file.entry() as u32,
stack_address,
stack_end,
db_stack_addr,
arena_lo,
arena_hi,
splits,
link_order,
})
}
fn to_obj_symbol(
obj_file: &object::File<'_>,
symbol: &Symbol<'_, '_>,
addend: i64,
) -> Result<ObjSymbol> {
let section = match symbol.section_index() {
Some(idx) => Some(obj_file.section_by_index(idx)?),
None => None,
};
let name = match symbol.kind() {
SymbolKind::Section => {
if let Some(section) = &section {
section.name()?
} else {
return Err(Error::msg("Section symbol without section"));
}
}
_ => symbol.name()?,
};
if name.is_empty() {
return Err(Error::msg("Empty symbol name"));
}
let mut flags = ObjSymbolFlagSet(ObjSymbolFlags::none());
if symbol.is_global() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Global);
}
if symbol.is_local() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Local);
}
if symbol.is_common() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Common);
}
if symbol.is_weak() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Weak);
}
let section_address = if let Some(section) = &section {
symbol.address() - section.address()
} else {
symbol.address()
};
Ok(ObjSymbol {
name: name.to_string(),
demangled_name: demangle(name, &Default::default()),
address: symbol.address(),
section_address,
size: symbol.size(),
size_known: symbol.size() != 0,
flags,
addend,
kind: match symbol.kind() {
SymbolKind::Text => ObjSymbolKind::Function,
SymbolKind::Data => ObjSymbolKind::Object,
_ => ObjSymbolKind::Unknown,
},
})
}
fn to_obj_reloc(
obj_file: &object::File<'_>,
_section: &Section<'_, '_>,
section_data: &[u8],
address: u64,
reloc: Relocation,
) -> Result<ObjReloc> {
let reloc_kind = match reloc.kind() {
RelocationKind::Absolute => ObjRelocKind::Absolute,
RelocationKind::Elf(kind) => match kind {
R_PPC_ADDR16_LO => ObjRelocKind::PpcAddr16Lo,
R_PPC_ADDR16_HI => ObjRelocKind::PpcAddr16Hi,
R_PPC_ADDR16_HA => ObjRelocKind::PpcAddr16Ha,
R_PPC_REL24 => ObjRelocKind::PpcRel24,
R_PPC_REL14 => ObjRelocKind::PpcRel14,
R_PPC_EMB_SDA21 => ObjRelocKind::PpcEmbSda21,
_ => return Err(Error::msg(format!("Unhandled PPC relocation type: {kind}"))),
},
_ => return Err(Error::msg(format!("Unhandled relocation type: {:?}", reloc.kind()))),
};
let symbol = match reloc.target() {
RelocationTarget::Symbol(idx) => {
obj_file.symbol_by_index(idx).context("Failed to locate relocation target symbol")?
}
_ => {
return Err(Error::msg(format!("Unhandled relocation target: {:?}", reloc.target())));
}
};
let target_section = match symbol.section() {
SymbolSection::Common => Some(".comm".to_string()),
SymbolSection::Section(idx) => {
obj_file.section_by_index(idx).and_then(|s| s.name().map(|s| s.to_string())).ok()
}
_ => None,
};
let target = match symbol.kind() {
SymbolKind::Text | SymbolKind::Data | SymbolKind::Unknown => {
to_obj_symbol(obj_file, &symbol, reloc.addend())
}
SymbolKind::Section => {
let addend = if reloc.has_implicit_addend() {
let addend = u32::from_be_bytes(
section_data[address as usize..address as usize + 4].try_into()?,
) as i64;
match reloc_kind {
ObjRelocKind::Absolute => addend,
_ => todo!(),
}
} else {
let addend = reloc.addend();
if addend < 0 {
return Err(Error::msg(format!("Negative addend in section reloc: {addend}")));
}
addend
};
// find_section_symbol(&obj_file, &symbol, addend as u64)
to_obj_symbol(obj_file, &symbol, addend)
}
_ => Err(Error::msg(format!("Unhandled relocation symbol type {:?}", symbol.kind()))),
}?;
let address = address & !3; // FIXME round down for instruction
let reloc_data = ObjReloc { kind: reloc_kind, address, target, target_section };
Ok(reloc_data)
}
// TODO needed?
#[allow(dead_code)]
fn find_section_symbol(
obj_file: &object::File<'_>,
target: &Symbol<'_, '_>,
addend: u64,
) -> Result<ObjSymbol> {
let section_index =
target.section_index().ok_or_else(|| Error::msg("Unknown section index"))?;
let section = obj_file.section_by_index(section_index)?;
let target_address = target.address() + addend;
let mut closest_symbol: Option<Symbol<'_, '_>> = None;
for symbol in obj_file.symbols() {
if !matches!(symbol.section_index(), Some(idx) if idx == section_index) {
continue;
}
if symbol.kind() == SymbolKind::Section || symbol.address() != target_address {
if symbol.address() < target_address
&& symbol.size() != 0
&& (closest_symbol.is_none()
|| matches!(&closest_symbol, Some(s) if s.address() <= symbol.address()))
{
closest_symbol = Some(symbol);
}
continue;
}
return to_obj_symbol(obj_file, &symbol, 0);
}
if let Some(symbol) = closest_symbol {
let addend = target_address - symbol.address();
Ok(to_obj_symbol(obj_file, &symbol, addend as i64)?)
} else {
let addend = target_address - section.address();
Ok(ObjSymbol {
name: section.name()?.to_string(),
demangled_name: None,
address: section.address(),
section_address: 0,
size: section.size(),
size_known: true,
flags: Default::default(),
addend: addend as i64,
kind: ObjSymbolKind::Unknown,
})
}
}

View File

@ -1,14 +1,15 @@
use std::{
collections::{btree_map::Entry, BTreeMap, HashMap},
collections::{btree_map, hash_map, BTreeMap, HashMap},
hash::Hash,
io::BufRead,
ops::Range,
};
use anyhow::{Error, Result};
use anyhow::{Context, Error, Result};
use cwdemangle::{demangle, DemangleOptions};
use lazy_static::lazy_static;
use multimap::MultiMap;
use regex::Regex;
use regex::{Captures, Regex};
use topological_sort::TopologicalSort;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
@ -60,14 +61,11 @@ fn resolve_section_order(
let mut ordering = SectionOrder::default();
let mut last_unit = String::new();
let mut unit_override = String::new();
let mut last_section = String::new();
let mut section_unit_idx = 0usize;
for symbol_ref in address_to_symbol.values() {
if let Some(symbol) = symbol_entries.get_mut(symbol_ref) {
if last_unit != symbol.unit {
unit_override.clear();
if last_section != symbol.section {
ordering.unit_order.push((symbol.section.clone(), vec![]));
section_unit_idx = ordering.unit_order.len() - 1;
@ -84,18 +82,10 @@ fn resolve_section_order(
ordering.unit_order.push((".comm".to_string(), vec![symbol.unit.clone()]));
section_unit_idx = ordering.unit_order.len() - 1;
} else {
// Since the map doesn't contain file paths, it's likely that
// a TU name conflict is simply a separate file.
// TODO need to resolve and split unit in other sections as well
unit_override =
format!("{}_{}_{:X}", symbol.unit, symbol.section, symbol.address);
log::warn!(
"TU order conflict: {} exists multiple times in {}. Renaming to {}.",
symbol.unit,
symbol.section,
unit_override,
);
unit_order.1.push(unit_override.clone());
return Err(Error::msg(format!(
"TU order conflict: {} exists multiple times in {}.",
symbol.unit, symbol.section,
)));
}
} else {
unit_order.1.push(symbol.unit.clone());
@ -125,13 +115,9 @@ fn resolve_section_order(
symbol.kind = SymbolKind::Object;
}
}
// If we're renaming this TU, replace it in the symbol.
if !unit_override.is_empty() {
symbol.unit = unit_override.clone();
}
ordering.symbol_order.push(symbol_ref.clone());
} else {
return Err(Error::msg(format!("Symbol has address but no entry: {:?}", symbol_ref)));
return Err(Error::msg(format!("Symbol has address but no entry: {symbol_ref:?}")));
}
}
@ -179,7 +165,7 @@ pub fn resolve_link_order(section_unit_order: &[(String, Vec<String>)]) -> Resul
for (_, order) in section_unit_order {
for unit in order {
if !global_unit_order.contains(unit) {
return Err(Error::msg(format!("Failed to find an order for {}", unit)));
return Err(Error::msg(format!("Failed to find an order for {unit}")));
}
}
}
@ -187,7 +173,7 @@ pub fn resolve_link_order(section_unit_order: &[(String, Vec<String>)]) -> Resul
}
lazy_static! {
static ref LINK_MAP_START: Regex = Regex::new("^Link map of (.*)$").unwrap();
static ref LINK_MAP_START: Regex = Regex::new("^Link map of (?P<entry>.*)$").unwrap();
static ref LINK_MAP_ENTRY: Regex = Regex::new(
"^\\s*(?P<depth>\\d+)] (?P<sym>.*) \\((?P<type>.*),(?P<vis>.*)\\) found in (?P<tu>.*)$",
)
@ -207,58 +193,184 @@ lazy_static! {
.unwrap();
static ref MEMORY_MAP_HEADER: Regex = Regex::new("^\\s*Memory map:\\s*$").unwrap();
static ref EXTERN_SYMBOL: Regex = Regex::new("^\\s*>>> SYMBOL NOT FOUND: (.*)$").unwrap();
static ref LINKER_SYMBOLS_HEADER: Regex = Regex::new("^\\s*Linker generated symbols:\\s*$").unwrap();
}
#[derive(Default)]
pub struct MapEntries {
pub entry_point: String,
pub symbols: HashMap<SymbolRef, SymbolEntry>,
pub unit_entries: MultiMap<String, SymbolRef>,
pub entry_references: MultiMap<SymbolRef, SymbolRef>,
pub entry_referenced_from: MultiMap<SymbolRef, SymbolRef>,
pub address_to_symbol: BTreeMap<u32, SymbolRef>,
pub unit_section_ranges: HashMap<String, Range<u32>>,
pub unit_section_ranges: HashMap<String, HashMap<String, Range<u32>>>,
pub symbol_order: Vec<SymbolRef>,
pub unit_order: Vec<(String, Vec<String>)>,
}
pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
let mut entries = MapEntries::default();
#[derive(Default)]
struct LinkMapState {
last_name: String,
symbol_stack: Vec<SymbolRef>,
}
let mut symbol_stack = Vec::<SymbolRef>::new();
let mut current_section = String::new();
let mut last_name = String::new();
let mut last_unit = String::new();
let mut has_link_map = false;
let mut relative_offset = 0u32;
let mut last_section_end = 0u32;
for result in reader.lines() {
match result {
Ok(line) => {
#[derive(Default)]
struct SectionLayoutState {
current_section: String,
section_units: Vec<String>,
unit_override: Option<String>,
relative_offset: u32,
last_unit_start: u32,
last_section_end: u32,
has_link_map: bool,
}
enum ProcessMapState {
None,
LinkMap(LinkMapState),
SectionLayout(SectionLayoutState),
MemoryMap,
LinkerGeneratedSymbols,
}
struct StateMachine {
state: ProcessMapState,
entries: MapEntries,
has_link_map: bool,
}
impl StateMachine {
fn process_line(&mut self, line: String) -> Result<()> {
if line.trim().is_empty() {
return Ok(());
}
match &mut self.state {
ProcessMapState::None => {
if let Some(captures) = LINK_MAP_START.captures(&line) {
log::debug!("Entry point: {}", &captures[1]);
has_link_map = true;
} else if let Some(captures) = LINK_MAP_ENTRY.captures(&line) {
self.entries.entry_point = captures["entry"].to_string();
self.switch_state(ProcessMapState::LinkMap(Default::default()))?;
} else if let Some(captures) = SECTION_LAYOUT_START.captures(&line) {
self.switch_state(ProcessMapState::SectionLayout(SectionLayoutState {
current_section: captures["section"].to_string(),
has_link_map: self.has_link_map,
..Default::default()
}))?;
} else if MEMORY_MAP_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::MemoryMap)?;
} else if LINKER_SYMBOLS_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::LinkerGeneratedSymbols)?;
} else {
return Err(Error::msg(format!(
"Unexpected line while processing map: '{line}'"
)));
}
}
ProcessMapState::LinkMap(ref mut state) => {
if let Some(captures) = LINK_MAP_ENTRY.captures(&line) {
StateMachine::process_link_map_entry(captures, state, &mut self.entries)?;
} else if let Some(captures) = LINK_MAP_ENTRY_GENERATED.captures(&line) {
StateMachine::process_link_map_generated(captures, state, &mut self.entries)?;
} else if LINK_MAP_ENTRY_DUPLICATE.is_match(&line) || EXTERN_SYMBOL.is_match(&line)
{
// Ignore
} else if let Some(captures) = SECTION_LAYOUT_START.captures(&line) {
self.switch_state(ProcessMapState::SectionLayout(SectionLayoutState {
current_section: captures["section"].to_string(),
has_link_map: self.has_link_map,
..Default::default()
}))?;
} else if MEMORY_MAP_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::MemoryMap)?;
} else if LINKER_SYMBOLS_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::LinkerGeneratedSymbols)?;
} else {
return Err(Error::msg(format!(
"Unexpected line while processing map: '{line}'"
)));
}
}
ProcessMapState::SectionLayout(ref mut state) => {
if let Some(captures) = SECTION_LAYOUT_SYMBOL.captures(&line) {
StateMachine::section_layout_entry(captures, state, &mut self.entries)?;
} else if let Some(captures) = SECTION_LAYOUT_START.captures(&line) {
let last_section_end = state.last_section_end;
self.switch_state(ProcessMapState::SectionLayout(SectionLayoutState {
current_section: captures["section"].to_string(),
has_link_map: self.has_link_map,
last_section_end,
..Default::default()
}))?;
} else if SECTION_LAYOUT_HEADER.is_match(&line) {
// Ignore
} else if MEMORY_MAP_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::MemoryMap)?;
} else if LINKER_SYMBOLS_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::LinkerGeneratedSymbols)?;
} else {
return Err(Error::msg(format!(
"Unexpected line while processing map: '{line}'"
)));
}
}
ProcessMapState::MemoryMap => {
// TODO
if LINKER_SYMBOLS_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::LinkerGeneratedSymbols)?;
}
}
ProcessMapState::LinkerGeneratedSymbols => {
// TODO
}
}
Ok(())
}
fn switch_state(&mut self, new_state: ProcessMapState) -> Result<()> {
self.end_state()?;
self.state = new_state;
Ok(())
}
fn end_state(&mut self) -> Result<()> {
match self.state {
ProcessMapState::LinkMap { .. } => {
self.has_link_map = true;
}
ProcessMapState::SectionLayout(ref mut state) => {
StateMachine::end_section_layout(state, &mut self.entries)?;
}
_ => {}
}
Ok(())
}
fn process_link_map_entry(
captures: Captures,
state: &mut LinkMapState,
entries: &mut MapEntries,
) -> Result<()> {
if captures["sym"].starts_with('.') {
last_name.clear();
continue;
state.last_name.clear();
return Ok(());
}
let is_duplicate = &captures["sym"] == ">>>";
let unit = captures["tu"].trim().to_string();
let name = if is_duplicate {
if last_name.is_empty() {
if state.last_name.is_empty() {
return Err(Error::msg("Last name empty?"));
}
last_name.clone()
state.last_name.clone()
} else {
captures["sym"].to_string()
};
let symbol_ref = SymbolRef { name: name.clone(), unit: unit.clone() };
let depth: usize = captures["depth"].parse()?;
if depth > symbol_stack.len() {
symbol_stack.push(symbol_ref.clone());
} else if depth <= symbol_stack.len() {
symbol_stack.truncate(depth - 1);
symbol_stack.push(symbol_ref.clone());
if depth > state.symbol_stack.len() {
state.symbol_stack.push(symbol_ref.clone());
} else if depth <= state.symbol_stack.len() {
state.symbol_stack.truncate(depth - 1);
state.symbol_stack.push(symbol_ref.clone());
}
// println!("Entry: {} ({})", name, tu);
let kind = match &captures["type"] {
@ -267,10 +379,7 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
"section" => SymbolKind::Section,
"notype" => SymbolKind::NoType,
_ => {
return Err(Error::msg(format!(
"Unknown symbol type: {}",
&captures["type"],
)));
return Err(Error::msg(format!("Unknown symbol type: {}", &captures["type"],)));
}
};
let visibility = match &captures["vis"] {
@ -278,14 +387,13 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
"local" => SymbolVisibility::Local,
"weak" => SymbolVisibility::Weak,
_ => {
return Err(Error::msg(format!(
"Unknown symbol visibility: {}",
&captures["vis"],
)));
return Err(Error::msg(
format!("Unknown symbol visibility: {}", &captures["vis"],),
));
}
};
if !is_duplicate && symbol_stack.len() > 1 {
let from = &symbol_stack[symbol_stack.len() - 2];
if !is_duplicate && state.symbol_stack.len() > 1 {
let from = &state.symbol_stack[state.symbol_stack.len() - 2];
entries.entry_referenced_from.insert(symbol_ref.clone(), from.clone());
entries.entry_references.insert(from.clone(), symbol_ref.clone());
}
@ -311,8 +419,7 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
should_insert = false;
}
if should_insert {
let demangled =
demangle(&name, &DemangleOptions { omit_empty_parameters: true });
let demangled = demangle(&name, &DemangleOptions { omit_empty_parameters: true });
entries.symbols.insert(symbol_ref.clone(), SymbolEntry {
name: name.clone(),
demangled,
@ -323,15 +430,20 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
size: 0,
section: String::new(),
});
last_name = name.clone();
entries.unit_entries.insert(unit, symbol_ref.clone());
state.last_name = name;
entries.unit_entries.insert(unit, symbol_ref);
}
} else if let Some(captures) = LINK_MAP_ENTRY_GENERATED.captures(&line) {
Ok(())
}
fn process_link_map_generated(
captures: Captures,
_state: &mut LinkMapState,
entries: &mut MapEntries,
) -> Result<()> {
let name = captures["sym"].to_string();
let demangled =
demangle(&name, &DemangleOptions { omit_empty_parameters: true });
let symbol_ref =
SymbolRef { name: name.clone(), unit: "[generated]".to_string() };
let demangled = demangle(&name, &DemangleOptions { omit_empty_parameters: true });
let symbol_ref = SymbolRef { name: name.clone(), unit: "[generated]".to_string() };
entries.symbols.insert(symbol_ref, SymbolEntry {
name,
demangled,
@ -342,63 +454,109 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
size: 0,
section: String::new(),
});
} else if line.trim().is_empty()
|| LINK_MAP_ENTRY_DUPLICATE.is_match(&line)
|| SECTION_LAYOUT_HEADER.is_match(&line)
|| EXTERN_SYMBOL.is_match(&line)
{
// Ignore
} else if let Some(captures) = SECTION_LAYOUT_START.captures(&line) {
current_section = captures["section"].trim().to_string();
last_unit.clear();
log::debug!("Processing section layout for {}", current_section);
} else if let Some(captures) = SECTION_LAYOUT_SYMBOL.captures(&line) {
Ok(())
}
fn end_section_layout(state: &mut SectionLayoutState, entries: &mut MapEntries) -> Result<()> {
// Set last section size
if let Some(last_unit) = state.section_units.last() {
let last_unit = state.unit_override.as_ref().unwrap_or(last_unit);
nested_try_insert(
&mut entries.unit_section_ranges,
last_unit.clone(),
state.current_section.clone(),
state.last_unit_start..state.last_section_end,
)
.with_context(|| {
format!("TU '{}' already exists in section '{}'", last_unit, state.current_section)
})?;
}
Ok(())
}
fn section_layout_entry(
captures: Captures,
state: &mut SectionLayoutState,
entries: &mut MapEntries,
) -> Result<()> {
if captures["rom_addr"].trim() == "UNUSED" {
continue;
return Ok(());
}
let sym_name = captures["sym"].trim();
let tu = captures["tu"].trim();
let mut tu = captures["tu"].trim().to_string();
let mut address = u32::from_str_radix(captures["addr"].trim(), 16)?;
let mut size = u32::from_str_radix(captures["size"].trim(), 16)?;
// For RELs, the each section starts at address 0. For our purposes
// we'll create "fake" addresses by simply starting at the end of the
// previous section.
if last_unit.is_empty() {
if state.section_units.is_empty() {
if address == 0 {
relative_offset = last_section_end;
state.relative_offset = state.last_section_end;
} else {
relative_offset = 0;
state.relative_offset = 0;
}
}
address += relative_offset;
address += state.relative_offset;
let original_tu = tu.clone();
if state.section_units.last() != Some(&tu) || sym_name == state.current_section {
// Set last section size
if let Some(last_unit) = state.section_units.last() {
let last_unit = state.unit_override.as_ref().unwrap_or(last_unit);
nested_try_insert(
&mut entries.unit_section_ranges,
last_unit.clone(),
state.current_section.clone(),
state.last_unit_start..address,
)
.with_context(|| {
format!(
"TU '{}' already exists in section '{}'",
last_unit, state.current_section
)
})?;
}
state.last_unit_start = address;
// Since the map doesn't contain file paths, it's likely that
// a duplicate TU inside of a section is simply a separate file.
// We can rename it and remap symbols to the new TU name.
// TODO: Find symbols in other sections and rename?
if state.section_units.contains(&tu) {
let new_unit = format!("{}_{}_{:08x}", tu, state.current_section, address);
log::warn!(
"TU order conflict: {} exists multiple times in {}. Renaming to {}.",
tu,
state.current_section,
new_unit,
);
state.unit_override = Some(new_unit);
} else {
state.unit_override = None;
}
}
if let Some(unit) = &state.unit_override {
tu = unit.clone();
}
// Section symbol (i.e. ".data") indicates section size for a TU
if sym_name == current_section {
// ...but we can't rely on it because of UNUSED symbols
if sym_name == state.current_section {
// Skip empty sections
if size == 0 {
continue;
if size != 0 {
state.section_units.push(original_tu);
}
let end = address + size;
entries.unit_section_ranges.insert(tu.to_string(), address..end);
last_unit = tu.to_string();
last_section_end = end;
continue;
return Ok(());
}
// Otherwise, for ASM-generated objects, the first section symbol in a TU
// has the full size of the section.
if tu != last_unit {
if state.section_units.last() != Some(&original_tu) {
if size == 0 {
return Err(Error::msg(format!(
"No section size for {} in {}",
sym_name, tu
)));
return Err(Error::msg(format!("No section size for {sym_name} in {tu}")));
}
let end = address + size;
entries.unit_section_ranges.insert(tu.to_string(), address..end);
last_unit = tu.to_string();
last_section_end = end;
state.section_units.push(original_tu);
// Clear it, so that we guess the "real" symbol size later.
size = 0;
@ -406,19 +564,38 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
// Ignore ...data.0 and similar
if sym_name.starts_with("...") {
continue;
return Ok(());
}
let symbol_ref = SymbolRef { name: sym_name.to_string(), unit: tu.to_string() };
if let Some(symbol) = entries.symbols.get_mut(&symbol_ref) {
// Increment section end
state.last_section_end = address + size;
let symbol_ref = SymbolRef { name: sym_name.to_string(), unit: tu.clone() };
match entries.symbols.entry(symbol_ref.clone()) {
hash_map::Entry::Occupied(entry) => {
// let symbol = if tu != original_tu {
// let old_entry = entry.remove();
// match entries.symbols.entry(SymbolRef {
// name: sym_name.to_string(),
// unit: tu.clone(),
// }) {
// Entry::Occupied(entry) => entry.into_mut(),
// Entry::Vacant(entry) => entry.insert(old_entry),
// }
// } else {
// entry.into_mut()
// };
let symbol = entry.into_mut();
symbol.address = address;
symbol.size = size;
symbol.section = current_section.clone();
symbol.section = state.current_section.clone();
// Move symbol to renamed TU if necessary
// symbol.unit = tu.clone();
match entries.address_to_symbol.entry(address) {
Entry::Vacant(entry) => {
entry.insert(symbol_ref.clone());
btree_map::Entry::Vacant(entry) => {
entry.insert(symbol_ref);
}
Entry::Occupied(entry) => {
btree_map::Entry::Occupied(entry) => {
log::warn!(
"Symbol overridden @ {:X} from {} to {} in {}",
symbol.address,
@ -428,8 +605,9 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
);
}
}
} else {
let visibility = if has_link_map {
}
hash_map::Entry::Vacant(entry) => {
let visibility = if state.has_link_map {
log::warn!(
"Symbol not in link map: {} ({}). Type and visibility unknown.",
sym_name,
@ -439,21 +617,21 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
} else {
SymbolVisibility::Global
};
entries.symbols.insert(symbol_ref.clone(), SymbolEntry {
entry.insert(SymbolEntry {
name: sym_name.to_string(),
demangled: None,
kind: SymbolKind::NoType,
visibility,
unit: tu.to_string(),
unit: tu.clone(),
address,
size,
section: current_section.clone(),
section: state.current_section.clone(),
});
match entries.address_to_symbol.entry(address) {
Entry::Vacant(entry) => {
entry.insert(symbol_ref.clone());
btree_map::Entry::Vacant(entry) => {
entry.insert(symbol_ref);
}
Entry::Occupied(entry) => {
btree_map::Entry::Occupied(entry) => {
log::warn!(
"Symbol overridden @ {:X} from {} to {} in {}",
address,
@ -464,22 +642,50 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
}
}
}
} else if MEMORY_MAP_HEADER.is_match(&line) {
// log::debug!("Done");
break;
} else {
todo!("{}", line);
}
}
Err(e) => {
return Err(Error::from(e));
}
Ok(())
}
}
pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
let mut state = StateMachine {
state: ProcessMapState::None,
entries: Default::default(),
has_link_map: false,
};
for result in reader.lines() {
match result {
Ok(line) => state.process_line(line)?,
Err(e) => return Err(Error::from(e)),
}
}
state.end_state()?;
let mut entries = state.entries;
let section_order = resolve_section_order(&entries.address_to_symbol, &mut entries.symbols)?;
entries.symbol_order = section_order.symbol_order;
entries.unit_order = section_order.unit_order;
Ok(entries)
}
#[inline]
fn nested_try_insert<T1, T2, T3>(
map: &mut HashMap<T1, HashMap<T2, T3>>,
v1: T1,
v2: T2,
v3: T3,
) -> Result<()>
where
T1: Hash + Eq,
T2: Hash + Eq,
{
let map = match map.entry(v1) {
hash_map::Entry::Occupied(entry) => entry.into_mut(),
hash_map::Entry::Vacant(entry) => entry.insert(Default::default()),
};
match map.entry(v2) {
hash_map::Entry::Occupied(_) => return Err(Error::msg("Entry already exists")),
hash_map::Entry::Vacant(entry) => entry.insert(v3),
};
Ok(())
}

View File

@ -1 +1,4 @@
pub(crate) mod asm;
pub(crate) mod elf;
pub(crate) mod map;
pub(crate) mod obj;

90
src/util/obj.rs Normal file
View File

@ -0,0 +1,90 @@
use std::{collections::BTreeMap, path::PathBuf};
use flagset::{flags, FlagSet};
flags! {
pub enum ObjSymbolFlags: u8 {
Global,
Local,
Weak,
Common,
}
}
#[derive(Debug, Copy, Clone, Default)]
pub struct ObjSymbolFlagSet(pub(crate) FlagSet<ObjSymbolFlags>);
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub enum ObjSectionKind {
Code,
Data,
Bss,
}
#[derive(Debug, Clone)]
pub struct ObjSection {
pub name: String,
pub kind: ObjSectionKind,
pub address: u64,
pub size: u64,
pub data: Vec<u8>,
pub index: usize,
pub symbols: Vec<ObjSymbol>,
pub relocations: Vec<ObjReloc>,
pub file_offset: u64,
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub enum ObjSymbolKind {
Unknown,
Function,
Object,
}
#[derive(Debug, Clone)]
pub struct ObjSymbol {
pub name: String,
pub demangled_name: Option<String>,
pub address: u64,
pub section_address: u64,
pub size: u64,
pub size_known: bool,
pub flags: ObjSymbolFlagSet,
pub addend: i64,
pub kind: ObjSymbolKind,
}
#[derive(Debug, Copy, Clone)]
pub enum ObjArchitecture {
PowerPc,
}
#[derive(Debug, Clone)]
pub struct ObjInfo {
pub architecture: ObjArchitecture,
pub path: PathBuf,
pub sections: Vec<ObjSection>,
pub common: Vec<ObjSymbol>,
pub entry: u32,
// Linker generated
pub stack_address: Option<u32>,
pub stack_end: Option<u32>,
pub db_stack_addr: Option<u32>,
pub arena_lo: Option<u32>,
pub arena_hi: Option<u32>,
// Extracted
pub splits: BTreeMap<u32, String>,
pub link_order: Vec<String>,
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub enum ObjRelocKind {
Absolute,
PpcAddr16Hi,
PpcAddr16Ha,
PpcAddr16Lo,
PpcRel24,
PpcRel14,
PpcEmbSda21,
}
#[derive(Debug, Clone)]
pub struct ObjReloc {
pub kind: ObjRelocKind,
pub address: u64,
pub target: ObjSymbol,
pub target_section: Option<String>,
}