From ec4caf5000dc951084564b12c3b7fbf7d2409426 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Tue, 7 Nov 2023 23:21:59 -0500 Subject: [PATCH] Smarter configuration updates - Avoid overwriting `symbols.txt` or `splits.txt` if the file was modified since it was read or if the file's contents didn't change. - Remove `binrw` and `byteorder` dependencies, moving to `FromReader`/`ToWriter` traits. - Migrate generic bounds to `where` clauses. - Remove unused `build.rs` logic. --- Cargo.lock | 76 +------ Cargo.toml | 18 +- build.rs | 470 +---------------------------------------- src/argp_version.rs | 5 +- src/cmd/dol.rs | 229 +++++++++++--------- src/cmd/dwarf.rs | 7 +- src/cmd/elf.rs | 13 +- src/cmd/elf2dol.rs | 3 +- src/cmd/map.rs | 233 ++++++++------------ src/cmd/shasum.rs | 12 +- src/main.rs | 4 +- src/obj/symbols.rs | 5 +- src/util/asm.rs | 74 ++++--- src/util/comment.rs | 322 ++++++++++++++++------------ src/util/config.rs | 106 ++++++++-- src/util/dep.rs | 6 +- src/util/dol.rs | 5 +- src/util/dwarf.rs | 64 +++--- src/util/elf.rs | 37 ++-- src/util/file.rs | 156 +++++++++----- src/util/map.rs | 42 +++- src/util/mod.rs | 1 + src/util/rarc.rs | 275 ++++++++++++++++++------ src/util/reader.rs | 6 + src/util/rel.rs | 311 ++++++++++++++++++++++----- src/util/rso.rs | 461 +++++++++++++++++++++++++++++++++------- src/util/signatures.rs | 6 +- src/util/take_seek.rs | 128 +++++++++++ src/util/yaz0.rs | 76 +++---- 29 files changed, 1817 insertions(+), 1334 deletions(-) create mode 100644 src/util/take_seek.rs diff --git a/Cargo.lock b/Cargo.lock index 60beb53..67c04f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -112,12 +112,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "array-init" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" - [[package]] name = "atty" version = "0.2.14" @@ -171,30 +165,6 @@ dependencies = [ "serde", ] -[[package]] -name = "binrw" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8318fda24dc135cdd838f57a2b5ccb6e8f04ff6b6c65528c4bd9b5fcdc5cf6" -dependencies = [ - "array-init", - "binrw_derive", - "bytemuck", -] - -[[package]] -name = "binrw_derive" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db0832bed83248115532dfb25af54fae1c83d67a2e4e3e2f591c13062e372e7e" -dependencies = [ - "either", - "owo-colors", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "bitflags" version = "1.3.2" @@ -216,12 +186,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "bytemuck" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" - [[package]] name = "byteorder" version = "1.5.0" @@ -330,15 +294,13 @@ dependencies = [ [[package]] name = "decomp-toolkit" -version = "0.5.7" +version = "0.5.8" dependencies = [ "anyhow", "ar", "argp", "base16ct", "base64", - "binrw", - "byteorder", "cwdemangle", "dol", "enable-ansi-support", @@ -364,7 +326,6 @@ dependencies = [ "ppc750cl", "rayon", "regex", - "rmp-serde", "rustc-hash", "serde", "serde_json", @@ -376,6 +337,7 @@ dependencies = [ "tracing", "tracing-attributes", "tracing-subscriber", + "xxhash-rust", ] [[package]] @@ -745,12 +707,6 @@ dependencies = [ "supports-color 1.3.1", ] -[[package]] -name = "paste" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" - [[package]] name = "path-slash" version = "0.2.1" @@ -896,28 +852,6 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" -[[package]] -name = "rmp" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44519172358fd6d58656c86ab8e7fbc9e1490c3e8f14d35ed78ca0dd07403c9f" -dependencies = [ - "byteorder", - "num-traits", - "paste", -] - -[[package]] -name = "rmp-serde" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffea85eea980d8a74453e5d02a8d93028f3c34725de143085a844ebe953258a" -dependencies = [ - "byteorder", - "rmp", - "serde", -] - [[package]] name = "rustc-demangle" version = "0.1.23" @@ -1389,3 +1323,9 @@ name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "xxhash-rust" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9828b178da53440fa9c766a3d2f73f7cf5d0ac1fe3980c1e5018d899fd19e07b" diff --git a/Cargo.toml b/Cargo.toml index b305205..b1e9405 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,10 +3,9 @@ name = "decomp-toolkit" description = "Yet another GameCube/Wii decompilation toolkit." authors = ["Luke Street "] license = "MIT OR Apache-2.0" -version = "0.5.7" +version = "0.5.8" edition = "2021" publish = false -build = "build.rs" repository = "https://github.com/encounter/decomp-toolkit" readme = "README.md" categories = ["command-line-utilities"] @@ -27,8 +26,6 @@ ar = { git = "https://github.com/bjorn3/rust-ar.git", branch = "write_symbol_tab argp = "0.3.0" base16ct = "0.2.0" base64 = "0.21.4" -binrw = "0.12.0" -byteorder = "1.5.0" cwdemangle = "0.1.6" dol = { git = "https://github.com/encounter/ppc750cl", rev = "4a2bbbc6f84dcb76255ab6f3595a8d4a0ce96618" } enable-ansi-support = "0.2.1" @@ -50,7 +47,7 @@ object = { version = "0.32.1", features = ["read_core", "std", "elf", "write_std once_cell = "1.18.0" owo-colors = { version = "3.5.0", features = ["supports-colors"] } path-slash = "0.2.1" -petgraph = "0.6.4" +petgraph = { version = "0.6.4", default-features = false } ppc750cl = { git = "https://github.com/encounter/ppc750cl", rev = "4a2bbbc6f84dcb76255ab6f3595a8d4a0ce96618" } rayon = "1.8.0" regex = "1.9.6" @@ -65,13 +62,4 @@ supports-color = "2.1.0" tracing = "0.1.37" tracing-attributes = "0.1.26" tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } - -[build-dependencies] -anyhow = { version = "1.0.75", features = ["backtrace"] } -base64 = "0.21.4" -flagset = { version = "0.4.4", features = ["serde"] } -serde = "1.0.188" -serde_repr = "0.1.16" -serde_yaml = "0.9.25" -rmp-serde = "1.1.2" -flate2 = "1.0.27" +xxhash-rust = { version = "0.8.7", features = ["xxh3"] } diff --git a/build.rs b/build.rs index 8e223b0..a488c56 100644 --- a/build.rs +++ b/build.rs @@ -1,467 +1,9 @@ -use std::{collections::HashMap, env, fs, path::PathBuf}; - -use anyhow::{anyhow, bail, Context, Result}; -use base64::{engine::general_purpose::STANDARD, Engine}; -use flagset::{flags, FlagSet}; -use flate2::{write::GzEncoder, Compression}; -use serde::{Deserialize, Serialize}; -use serde_repr::{Deserialize_repr, Serialize_repr}; - -flags! { - #[repr(u8)] - #[derive(Deserialize_repr, Serialize_repr)] - pub enum ObjSymbolFlags: u8 { - Global, - Local, - Weak, - Common, - Hidden, - } -} -#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Serialize, Deserialize)] -pub struct ObjSymbolFlagSet(pub FlagSet); - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub enum ObjSymbolKind { - Unknown, - Function, - Object, - Section, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub enum ObjRelocKind { - Absolute, - PpcAddr16Hi, - PpcAddr16Ha, - PpcAddr16Lo, - PpcRel24, - PpcRel14, - PpcEmbSda21, -} - -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct OutSymbol { - pub kind: ObjSymbolKind, - pub name: String, - pub size: u32, - pub flags: ObjSymbolFlagSet, - pub section: Option, -} - -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct OutReloc { - pub offset: u32, - pub kind: ObjRelocKind, - pub symbol: usize, - pub addend: i32, -} - -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct FunctionSignature { - pub symbol: usize, - pub hash: String, - pub signature: String, - pub symbols: Vec, - pub relocations: Vec, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Deserialize_repr, Serialize_repr)] -#[repr(u8)] -pub enum SigSymbolKind { - Unknown = 0, - Function = 1, - Object = 2, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Deserialize_repr, Serialize_repr)] -#[repr(u8)] -pub enum SigSection { - Init = 0, - Extab = 1, - Extabindex = 2, - Text = 3, - Ctors = 4, - Dtors = 5, - Rodata = 6, - Data = 7, - Bss = 8, - Sdata = 9, - Sbss = 10, - Sdata2 = 11, - Sbss2 = 12, - Dbgtext = 13, - Unknown = 255, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -#[repr(u8)] -pub enum SigSymbolFlag { - Global = 1 << 0, - Local = 1 << 1, - Weak = 1 << 2, - Common = 1 << 3, - Hidden = 1 << 4, -} - -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct SigSymbol { - pub kind: SigSymbolKind, - pub name: String, - pub size: u32, - pub flags: u8, // SigSymbolFlag - pub section: SigSection, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Deserialize_repr, Serialize_repr)] -#[repr(u8)] -pub enum SigRelocKind { - Absolute = 0, - PpcAddr16Hi = 1, - PpcAddr16Ha = 2, - PpcAddr16Lo = 3, - PpcRel24 = 4, - PpcRel14 = 5, - PpcEmbSda21 = 6, -} - -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct SigReloc { - pub offset: u32, - pub symbol: usize, - pub kind: SigRelocKind, - pub addend: i32, -} - -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct Sig { - pub symbol: usize, - pub data: Vec, - pub relocations: Vec, - pub search: bool, -} - -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct Output { - pub symbols: Vec, - pub signatures: HashMap, -} - -pub fn parse_yml(sig_str: &str) -> Result> { - Ok(serde_yaml::from_str(sig_str)?) -} - -const SIGNATURES: &[(&str, bool)] = &[ - ("__start", false), - ("__init_registers", false), - ("__init_hardware", false), - ("__init_data", false), - ("__set_debug_bba", false), - ("__OSPSInit", false), - ("__OSFPRInit", false), - ("__OSCacheInit", false), - ("DMAErrorHandler", false), - ("DBInit", false), - ("OSInit", false), - ("__OSThreadInit", false), - ("__OSInitIPCBuffer", false), - ("EXIInit", false), - ("EXIGetID", false), - ("exit", false), - ("_ExitProcess", false), - ("__fini_cpp", false), - ("__fini_cpp_exceptions", false), - ("__destroy_global_chain", false), - ("__init_cpp", false), - ("__init_cpp_exceptions", false), - ("InitMetroTRK", false), - ("InitMetroTRKCommTable", false), - ("OSExceptionInit", false), - ("OSDefaultExceptionHandler", false), - ("__OSUnhandledException", false), - ("OSDisableScheduler", false), - ("__OSReschedule", false), - ("__OSInitSystemCall", false), - ("OSInitAlarm", false), - ("__OSInitAlarm", false), - // TODO aliases - // ("__OSEVStart", false), - // ("__OSDBINTSTART", false), - // ("__OSDBJUMPSTART", false), - ("SIInit", false), - ("SIGetType", false), - ("SISetSamplingRate", false), - ("SISetXY", false), - ("VIGetTvFormat", false), - ("DVDInit", false), - ("DVDSetAutoFatalMessaging", false), - ("OSSetArenaLo", false), - ("OSSetArenaHi", false), - ("OSSetMEM1ArenaLo", false), - ("OSSetMEM1ArenaHi", false), - ("OSSetMEM2ArenaLo", false), - ("OSSetMEM2ArenaHi", false), - ("__OSInitAudioSystem", false), - ("__OSInitMemoryProtection", false), - // ("BATConfig", false), TODO - ("ReportOSInfo", false), - ("__check_pad3", false), - ("OSResetSystem", false), - ("OSReturnToMenu", false), - ("__OSReturnToMenu", false), - ("__OSShutdownDevices", false), - ("__OSInitSram", false), - ("__OSSyncSram", false), - ("__OSGetExceptionHandler", false), - ("OSRegisterResetFunction", false), - ("OSRegisterShutdownFunction", false), - ("DecrementerExceptionHandler", false), - ("DecrementerExceptionCallback", false), - ("__OSInterruptInit", false), - ("__OSContextInit", false), - ("OSSwitchFPUContext", false), - ("OSReport", false), - ("TRK_main", false), - ("TRKNubWelcome", false), - ("TRKInitializeNub", false), - ("TRKInitializeIntDrivenUART", false), - ("TRKEXICallBack", false), - ("TRKLoadContext", false), - ("TRKInterruptHandler", false), - ("TRKExceptionHandler", false), - ("TRKSaveExtended1Block", false), - ("TRKNubMainLoop", false), - ("TRKTargetContinue", false), - ("TRKSwapAndGo", false), - ("TRKRestoreExtended1Block", false), - ("TRKInterruptHandlerEnableInterrupts", false), - ("memset", false), - ("__msl_runtime_constraint_violation_s", false), - ("ClearArena", false), - ("IPCCltInit", false), - ("__OSInitSTM", false), - ("IOS_Open", false), - ("__ios_Ipc2", false), - ("IPCiProfQueueReq", false), - ("SCInit", false), - ("SCReloadConfFileAsync", false), - ("NANDPrivateOpenAsync", false), - ("nandIsInitialized", false), - ("nandOpen", false), - ("nandGenerateAbsPath", false), - ("nandGetHeadToken", false), - ("ISFS_OpenAsync", false), - ("nandConvertErrorCode", false), - ("NANDLoggingAddMessageAsync", false), - ("__NANDPrintErrorMessage", false), - ("__OSInitNet", false), - ("__DVDCheckDevice", false), - ("__OSInitPlayTime", false), - ("__OSStartPlayRecord", false), - ("NANDInit", false), - ("ISFS_OpenLib", false), - ("ESP_GetTitleId", false), - ("NANDSetAutoErrorMessaging", false), - ("__DVDFSInit", false), - ("__DVDClearWaitingQueue", false), - ("__DVDInitWA", false), - ("__DVDLowSetWAType", false), - ("__fstLoad", false), - ("DVDReset", false), - ("DVDLowReset", false), - ("DVDReadDiskID", false), - ("stateReady", false), - ("DVDLowWaitCoverClose", false), - ("__DVDStoreErrorCode", false), - ("DVDLowStopMotor", false), - ("DVDGetDriveStatus", false), - ("printf", false), - ("sprintf", false), - ("vprintf", false), - ("vsprintf", false), - ("vsnprintf", false), - ("__pformatter", false), - ("longlong2str", false), - ("__mod2u", false), - ("__FileWrite", false), - ("fwrite", false), - ("__fwrite", false), - ("__stdio_atexit", false), - ("__StringWrite", false), - ("RSOStaticLocateObject", true), -]; - -fn main() -> Result<()> { - let output = std::process::Command::new("git").args(["rev-parse", "HEAD"]).output()?; - let rev = String::from_utf8(output.stdout)?; +fn main() { + let output = std::process::Command::new("git") + .args(["rev-parse", "HEAD"]) + .output() + .expect("Failed to execute git"); + let rev = String::from_utf8(output.stdout).expect("Failed to parse git output"); println!("cargo:rustc-env=GIT_COMMIT_SHA={rev}"); println!("cargo:rustc-rerun-if-changed=.git/HEAD"); - - let mut symbols = Vec::::new(); - let mut out = HashMap::::new(); - let in_dir = PathBuf::from("assets/signatures"); - for &(name, search) in SIGNATURES { - let path = in_dir.join(format!("{name}.yml")); - println!("cargo:rustc-rerun-if-changed={}", path.display()); - let str = fs::read_to_string(&path) - .with_context(|| format!("Failed to open '{}'", path.display()))?; - apply_sig(&str, &mut symbols, &mut out, search)?; - } - let mut encoder = GzEncoder::new(Vec::new(), Compression::best()); - rmp_serde::encode::write(&mut encoder, &Output { symbols, signatures: out })?; - let compressed = encoder.finish()?; - let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - fs::write(out_dir.join("signatures.bin"), compressed)?; - - Ok(()) -} - -fn apply_sig( - sig_str: &str, - symbols: &mut Vec, - out: &mut HashMap, - search: bool, -) -> Result<()> { - let data = parse_yml(sig_str)?; - for in_sig in data { - let in_sym = &in_sig.symbols[in_sig.symbol]; - let mut out_sig = Sig { - symbol: add_symbol(symbols, in_sym)?, - data: STANDARD.decode(&in_sig.signature)?, - relocations: vec![], - search, - }; - for in_reloc in &in_sig.relocations { - out_sig.relocations.push(SigReloc { - offset: in_reloc.offset, - symbol: add_symbol(symbols, &in_sig.symbols[in_reloc.symbol])?, - kind: to_sig_reloc_kind(in_reloc.kind)?, - addend: in_reloc.addend, - }); - } - out.insert(in_sym.name.clone(), out_sig); - } - Ok(()) -} - -fn to_sym_section(s: Option<&str>) -> Result { - match s { - None => Ok(SigSection::Unknown), - Some(".init") => Ok(SigSection::Init), - Some("extab") => Ok(SigSection::Extab), - Some("extabindex") => Ok(SigSection::Extabindex), - Some(".text") => Ok(SigSection::Text), - Some(".ctors") => Ok(SigSection::Ctors), - Some(".dtors") => Ok(SigSection::Dtors), - Some(".rodata") => Ok(SigSection::Rodata), - Some(".data") => Ok(SigSection::Data), - Some(".bss") => Ok(SigSection::Bss), - Some(".sdata") => Ok(SigSection::Sdata), - Some(".sbss") => Ok(SigSection::Sbss), - Some(".sdata2") => Ok(SigSection::Sdata2), - Some(".sbss2") => Ok(SigSection::Sbss2), - Some(".dbgtext") => Ok(SigSection::Dbgtext), - Some(section) => Err(anyhow!("Unknown section {}", section)), - } -} - -fn to_sig_symbol_kind(kind: ObjSymbolKind) -> Result { - match kind { - ObjSymbolKind::Unknown => Ok(SigSymbolKind::Unknown), - ObjSymbolKind::Function => Ok(SigSymbolKind::Function), - ObjSymbolKind::Object => Ok(SigSymbolKind::Object), - ObjSymbolKind::Section => Err(anyhow!("Section symbols unsupported")), - } -} - -fn to_sig_symbol_flags(flags: ObjSymbolFlagSet) -> Result { - let mut out = 0; - for flag in flags.0 { - match flag { - ObjSymbolFlags::Global => { - out |= SigSymbolFlag::Global as u8; - } - ObjSymbolFlags::Local => { - out |= SigSymbolFlag::Local as u8; - } - ObjSymbolFlags::Weak => { - out |= SigSymbolFlag::Weak as u8; - } - ObjSymbolFlags::Common => { - out |= SigSymbolFlag::Common as u8; - } - ObjSymbolFlags::Hidden => { - out |= SigSymbolFlag::Hidden as u8; - } - } - } - Ok(out) -} - -fn to_sig_reloc_kind(kind: ObjRelocKind) -> Result { - match kind { - ObjRelocKind::Absolute => Ok(SigRelocKind::Absolute), - ObjRelocKind::PpcAddr16Hi => Ok(SigRelocKind::PpcAddr16Hi), - ObjRelocKind::PpcAddr16Ha => Ok(SigRelocKind::PpcAddr16Ha), - ObjRelocKind::PpcAddr16Lo => Ok(SigRelocKind::PpcAddr16Lo), - ObjRelocKind::PpcRel24 => Ok(SigRelocKind::PpcRel24), - ObjRelocKind::PpcRel14 => Ok(SigRelocKind::PpcRel14), - ObjRelocKind::PpcEmbSda21 => Ok(SigRelocKind::PpcEmbSda21), - } -} - -fn add_symbol(symbols: &mut Vec, in_sym: &OutSymbol) -> Result { - let sig_section = to_sym_section(in_sym.section.as_deref())?; - let sig_symbol_kind = to_sig_symbol_kind(in_sym.kind)?; - let sig_symbol_flags = to_sig_symbol_flags(in_sym.flags)?; - if let Some((idx, existing)) = symbols.iter_mut().enumerate().find(|(_, sym)| { - sym.kind == sig_symbol_kind && sym.size == in_sym.size && sym.name == in_sym.name - }) { - if existing.kind != sig_symbol_kind { - bail!( - "Mismatched types for {}: {:?} != {:?}", - in_sym.name, - sig_symbol_kind, - existing.kind - ); - } - if existing.section != sig_section { - if existing.section == SigSection::Unknown || sig_section == SigSection::Unknown { - existing.section = SigSection::Unknown; - } else { - eprintln!( - "Mismatched sections for {}: {:?} != {:?}", - in_sym.name, sig_section, existing.section - ); - existing.section = SigSection::Unknown; - } - } - if existing.size != in_sym.size { - bail!("Mismatched size for {}: {} != {}", in_sym.name, in_sym.size, existing.size); - } - if existing.flags != sig_symbol_flags { - if (existing.flags & (SigSymbolFlag::Weak as u8) != 0 - && sig_symbol_flags & (SigSymbolFlag::Weak as u8) == 0) - || (sig_symbol_flags & (SigSymbolFlag::Weak as u8) != 0 - && existing.flags & (SigSymbolFlag::Weak as u8) == 0) - { - existing.flags |= SigSymbolFlag::Weak as u8; - } else { - eprintln!( - "Mismatched flags for {}: {} != {}", - in_sym.name, sig_symbol_flags, existing.flags - ); - } - } - return Ok(idx); - } - let idx = symbols.len(); - symbols.push(SigSymbol { - kind: sig_symbol_kind, - name: in_sym.name.clone(), - size: in_sym.size, - flags: sig_symbol_flags, - section: sig_section, - }); - Ok(idx) } diff --git a/src/argp_version.rs b/src/argp_version.rs index b67e830..eccce05 100644 --- a/src/argp_version.rs +++ b/src/argp_version.rs @@ -6,8 +6,11 @@ use std::ffi::OsStr; use argp::{parser::ParseGlobalOptions, EarlyExit, FromArgs, TopLevelCommand}; -struct ArgsOrVersion(T); +struct ArgsOrVersion(T) +where T: FromArgs; + impl TopLevelCommand for ArgsOrVersion where T: FromArgs {} + impl FromArgs for ArgsOrVersion where T: FromArgs { diff --git a/src/cmd/dol.rs b/src/cmd/dol.rs index dd46542..5bbc908 100644 --- a/src/cmd/dol.rs +++ b/src/cmd/dol.rs @@ -44,7 +44,7 @@ use crate::{ dep::DepFile, dol::process_dol, elf::{process_elf, write_elf}, - file::{buf_reader, buf_writer, map_file, touch, verify_hash, FileIterator}, + file::{buf_reader, buf_writer, map_file, touch, verify_hash, FileIterator, FileReadInfo}, lcf::{asm_path_for_unit, generate_ldscript, obj_path_for_unit}, map::apply_map_file, rel::{process_rel, process_rel_header, update_rel_section_alignment}, @@ -145,7 +145,10 @@ fn bool_true() -> bool { true } fn is_true(b: &bool) -> bool { *b } #[inline] -fn is_default(t: &T) -> bool { t == &T::default() } +fn is_default(t: &T) -> bool +where T: Default + PartialEq { + t == &T::default() +} mod path_slash_serde { use std::path::PathBuf; @@ -453,7 +456,14 @@ pub fn info(args: InfoArgs) -> Result<()> { Ok(()) } -type ModuleMap<'a> = BTreeMap; +struct ModuleInfo<'a> { + obj: ObjInfo, + config: &'a ModuleConfig, + symbols_cache: Option, + splits_cache: Option, +} + +type ModuleMap<'a> = BTreeMap>; fn update_symbols(obj: &mut ObjInfo, modules: &ModuleMap<'_>, create_symbols: bool) -> Result<()> { log::debug!("Updating symbols for module {}", obj.module_id); @@ -463,8 +473,8 @@ fn update_symbols(obj: &mut ObjInfo, modules: &ModuleMap<'_>, create_symbols: bo .unresolved_relocations .iter() .map(|r| (obj.module_id, r)) - .chain(modules.iter().flat_map(|(_, (_, obj))| { - obj.unresolved_relocations.iter().map(|r| (obj.module_id, r)) + .chain(modules.iter().flat_map(|(_, info)| { + info.obj.unresolved_relocations.iter().map(|r| (info.obj.module_id, r)) })) .filter(|(_, r)| r.module_id == obj.module_id) { @@ -565,7 +575,7 @@ fn create_relocations(obj: &mut ObjInfo, modules: &ModuleMap<'_>, dol_obj: &ObjI &modules .get(&rel_reloc.module_id) .ok_or_else(|| anyhow!("Failed to locate module {}", rel_reloc.module_id))? - .1 + .obj }; let (target_section_index, _target_section) = if rel_reloc.module_id == 0 { @@ -646,7 +656,7 @@ fn resolve_external_relocations( .ok_or_else(|| { anyhow!("Failed to locate module {}", reloc.module.unwrap()) })? - .1 + .obj }; let target_symbol = &target_obj.symbols[reloc.target_symbol]; @@ -670,7 +680,12 @@ fn resolve_external_relocations( Ok(()) } -type AnalyzeResult = (ObjInfo, Vec); +struct AnalyzeResult { + obj: ObjInfo, + dep: Vec, + symbols_cache: Option, + splits_cache: Option, +} fn load_analyze_dol(config: &ProjectConfig) -> Result { log::debug!("Loading {}", config.base.object.display()); @@ -692,15 +707,19 @@ fn load_analyze_dol(config: &ProjectConfig) -> Result { dep.push(map_path.clone()); } - if let Some(splits_path) = &config.base.splits { - apply_splits_file(splits_path, &mut obj)?; + let splits_cache = if let Some(splits_path) = &config.base.splits { dep.push(splits_path.clone()); - } + apply_splits_file(splits_path, &mut obj)? + } else { + None + }; - if let Some(symbols_path) = &config.base.symbols { - apply_symbols_file(symbols_path, &mut obj)?; + let symbols_cache = if let Some(symbols_path) = &config.base.symbols { dep.push(symbols_path.clone()); - } + apply_symbols_file(symbols_path, &mut obj)? + } else { + None + }; if !config.symbols_known { // TODO move before symbols? @@ -732,72 +751,73 @@ fn load_analyze_dol(config: &ProjectConfig) -> Result { // Create _ctors and _dtors symbols if missing update_ctors_dtors(&mut obj)?; - Ok((obj, dep)) + Ok(AnalyzeResult { obj, dep, symbols_cache, splits_cache }) } fn split_write_obj( - obj: &mut ObjInfo, + module: &mut ModuleInfo, config: &ProjectConfig, - module_config: &ModuleConfig, out_dir: &Path, no_update: bool, ) -> Result { debug!("Performing relocation analysis"); - let mut tracker = Tracker::new(obj); - tracker.process(obj)?; + let mut tracker = Tracker::new(&module.obj); + tracker.process(&module.obj)?; debug!("Applying relocations"); - tracker.apply(obj, false)?; + tracker.apply(&mut module.obj, false)?; if !config.symbols_known && config.detect_objects { debug!("Detecting object boundaries"); - detect_objects(obj)?; + detect_objects(&mut module.obj)?; } if config.detect_strings { debug!("Detecting strings"); - detect_strings(obj)?; + detect_strings(&mut module.obj)?; } debug!("Adjusting splits"); + let module_id = module.obj.module_id; update_splits( - obj, - if obj.module_id == 0 { config.common_start } else { None }, + &mut module.obj, + if module_id == 0 { config.common_start } else { None }, config.fill_gaps, )?; if !no_update { debug!("Writing configuration"); - if let Some(symbols_path) = &module_config.symbols { - write_symbols_file(symbols_path, obj)?; + if let Some(symbols_path) = &module.config.symbols { + write_symbols_file(symbols_path, &module.obj, module.symbols_cache)?; } - if let Some(splits_path) = &module_config.splits { - write_splits_file(splits_path, obj, false)?; + if let Some(splits_path) = &module.config.splits { + write_splits_file(splits_path, &module.obj, false, module.splits_cache)?; } } - debug!("Splitting {} objects", obj.link_order.len()); - let split_objs = split_obj(obj)?; + debug!("Splitting {} objects", module.obj.link_order.len()); + let split_objs = split_obj(&module.obj)?; debug!("Writing object files"); let obj_dir = out_dir.join("obj"); - let entry = if obj.kind == ObjKind::Executable { - obj.entry.and_then(|e| { - let (section_index, _) = obj.sections.at_address(e as u32).ok()?; - let symbols = obj.symbols.at_section_address(section_index, e as u32).collect_vec(); + let entry = if module.obj.kind == ObjKind::Executable { + module.obj.entry.and_then(|e| { + let (section_index, _) = module.obj.sections.at_address(e as u32).ok()?; + let symbols = + module.obj.symbols.at_section_address(section_index, e as u32).collect_vec(); best_match_for_reloc(symbols, ObjRelocKind::PpcRel24).map(|(_, s)| s.name.clone()) }) } else { - obj.symbols.by_name("_prolog")?.map(|(_, s)| s.name.clone()) + module.obj.symbols.by_name("_prolog")?.map(|(_, s)| s.name.clone()) }; let mut out_config = OutputModule { - name: module_config.name().to_string(), - module_id: obj.module_id, + name: module.config.name().to_string(), + module_id, ldscript: out_dir.join("ldscript.lcf"), units: Vec::with_capacity(split_objs.len()), entry, }; - for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { + for (unit, split_obj) in module.obj.link_order.iter().zip(&split_objs) { let out_obj = write_elf(split_obj)?; let out_path = obj_dir.join(obj_path_for_unit(&unit.name)); out_config.units.push(OutputUnit { @@ -815,7 +835,7 @@ fn split_write_obj( } // Generate ldscript.lcf - let ldscript_template = if let Some(template) = &module_config.ldscript_template { + let ldscript_template = if let Some(template) = &module.config.ldscript_template { Some(fs::read_to_string(template).with_context(|| { format!("Failed to read linker script template '{}'", template.display()) })?) @@ -824,13 +844,13 @@ fn split_write_obj( }; fs::write( &out_config.ldscript, - generate_ldscript(obj, ldscript_template.as_deref(), &module_config.force_active)?, + generate_ldscript(&module.obj, ldscript_template.as_deref(), &module.config.force_active)?, )?; if config.write_asm { debug!("Writing disassembly"); let asm_dir = out_dir.join("asm"); - for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { + for (unit, split_obj) in module.obj.link_order.iter().zip(&split_objs) { let out_path = asm_dir.join(asm_path_for_unit(&unit.name)); let mut w = buf_writer(&out_path)?; @@ -861,15 +881,19 @@ fn load_analyze_rel(config: &ProjectConfig, module_config: &ModuleConfig) -> Res dep.push(map_path.clone()); } - if let Some(splits_path) = &module_config.splits { - apply_splits_file(splits_path, &mut module_obj)?; + let splits_cache = if let Some(splits_path) = &module_config.splits { dep.push(splits_path.clone()); - } + apply_splits_file(splits_path, &mut module_obj)? + } else { + None + }; - if let Some(symbols_path) = &module_config.symbols { - apply_symbols_file(symbols_path, &mut module_obj)?; + let symbols_cache = if let Some(symbols_path) = &module_config.symbols { dep.push(symbols_path.clone()); - } + apply_symbols_file(symbols_path, &mut module_obj)? + } else { + None + }; if !config.symbols_known { debug!("Analyzing module {}", module_obj.module_id); @@ -890,7 +914,7 @@ fn load_analyze_rel(config: &ProjectConfig, module_config: &ModuleConfig) -> Res // Determine REL section alignment update_rel_section_alignment(&mut module_obj, &header)?; - Ok((module_obj, dep)) + Ok(AnalyzeResult { obj: module_obj, dep, symbols_cache, splits_cache }) } fn split(args: SplitArgs) -> Result<()> { @@ -955,17 +979,30 @@ fn split(args: SplitArgs) -> Result<()> { }); }); let duration = start.elapsed(); - let (mut obj, dep_v) = dol_result.unwrap()?; - let mut function_count = obj.symbols.by_kind(ObjSymbolKind::Function).count(); - dep.extend(dep_v); + let mut dol = { + let result = dol_result.unwrap()?; + dep.extend(result.dep); + ModuleInfo { + obj: result.obj, + config: &config.base, + symbols_cache: result.symbols_cache, + splits_cache: result.splits_cache, + } + }; + let mut function_count = dol.obj.symbols.by_kind(ObjSymbolKind::Function).count(); - let mut modules = BTreeMap::::new(); - for (idx, (module_obj, dep_v)) in modules_result.unwrap()?.into_iter().enumerate() { - function_count += module_obj.symbols.by_kind(ObjSymbolKind::Function).count(); - dep.extend(dep_v); - match modules.entry(module_obj.module_id) { - Entry::Vacant(e) => e.insert((&config.modules[idx], module_obj)), - Entry::Occupied(_) => bail!("Duplicate module ID {}", obj.module_id), + let mut modules = BTreeMap::>::new(); + for (idx, result) in modules_result.unwrap()?.into_iter().enumerate() { + function_count += result.obj.symbols.by_kind(ObjSymbolKind::Function).count(); + dep.extend(result.dep); + match modules.entry(result.obj.module_id) { + Entry::Vacant(e) => e.insert(ModuleInfo { + obj: result.obj, + config: &config.modules[idx], + symbols_cache: result.symbols_cache, + splits_cache: result.splits_cache, + }), + Entry::Occupied(_) => bail!("Duplicate module ID {}", result.obj.module_id), }; } info!( @@ -979,26 +1016,26 @@ fn split(args: SplitArgs) -> Result<()> { let module_ids = modules.keys().cloned().collect_vec(); // Create any missing symbols (referenced from other modules) and set FORCEACTIVE - update_symbols(&mut obj, &modules, !config.symbols_known)?; + update_symbols(&mut dol.obj, &modules, !config.symbols_known)?; for &module_id in &module_ids { - let (module_config, mut module_obj) = modules.remove(&module_id).unwrap(); - update_symbols(&mut module_obj, &modules, !config.symbols_known)?; - modules.insert(module_id, (module_config, module_obj)); + let mut module = modules.remove(&module_id).unwrap(); + update_symbols(&mut module.obj, &modules, !config.symbols_known)?; + modules.insert(module_id, module); } // Create relocations to symbols in other modules for &module_id in &module_ids { - let (module_config, mut module_obj) = modules.remove(&module_id).unwrap(); - create_relocations(&mut module_obj, &modules, &obj)?; - modules.insert(module_id, (module_config, module_obj)); + let mut module = modules.remove(&module_id).unwrap(); + create_relocations(&mut module.obj, &modules, &dol.obj)?; + modules.insert(module_id, module); } // Replace external relocations with internal ones, creating extern symbols - resolve_external_relocations(&mut obj, &modules, None)?; + resolve_external_relocations(&mut dol.obj, &modules, None)?; for &module_id in &module_ids { - let (module_config, mut module_obj) = modules.remove(&module_id).unwrap(); - resolve_external_relocations(&mut module_obj, &modules, Some(&obj))?; - modules.insert(module_id, (module_config, module_obj)); + let mut module = modules.remove(&module_id).unwrap(); + resolve_external_relocations(&mut module.obj, &modules, Some(&dol.obj))?; + modules.insert(module_id, module); } } @@ -1017,16 +1054,17 @@ fn split(args: SplitArgs) -> Result<()> { // DOL s.spawn(|_| { let _span = - info_span!("module", name = %config.base.name(), id = obj.module_id).entered(); + info_span!("module", name = %config.base.name(), id = dol.obj.module_id).entered(); dol_result = Some( - split_write_obj(&mut obj, &config, &config.base, &args.out_dir, args.no_update) - .with_context(|| { + split_write_obj(&mut dol, &config, &args.out_dir, args.no_update).with_context( + || { format!( "While processing object '{}' (module ID {})", config.base.file_name(), - obj.module_id + dol.obj.module_id ) - }), + }, + ), ); }); // Modules @@ -1034,25 +1072,20 @@ fn split(args: SplitArgs) -> Result<()> { modules_result = Some( modules .par_iter_mut() - .map(|(&module_id, (module_config, module_obj))| { + .map(|(&module_id, module)| { let _span = - info_span!("module", name = %module_config.name(), id = module_id) + info_span!("module", name = %module.config.name(), id = module_id) .entered(); - let out_dir = args.out_dir.join(module_config.name().as_ref()); - split_write_obj( - module_obj, - &config, - module_config, - &out_dir, - args.no_update, + let out_dir = args.out_dir.join(module.config.name().as_ref()); + split_write_obj(module, &config, &out_dir, args.no_update).with_context( + || { + format!( + "While processing object '{}' (module ID {})", + module.config.file_name(), + module_id + ) + }, ) - .with_context(|| { - format!( - "While processing object '{}' (module ID {})", - module_config.file_name(), - module_id - ) - }) }) .collect(), ); @@ -1101,7 +1134,8 @@ fn split(args: SplitArgs) -> Result<()> { } #[allow(dead_code)] -fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) -> Result<()> { +fn validate

(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) -> Result<()> +where P: AsRef { let real_obj = process_elf(elf_file)?; for (section_index, real_section) in real_obj.sections.iter() { let obj_section = match obj.sections.get(section_index) { @@ -1410,13 +1444,12 @@ fn apply(args: ApplyArgs) -> Result<()> { process_dol(file.as_slice(), config.base.name().as_ref())? }; - if let Some(symbols_path) = &config.base.symbols { - if !apply_symbols_file(symbols_path, &mut obj)? { - bail!("Symbols file '{}' does not exist", symbols_path.display()); - } - } else { + let Some(symbols_path) = &config.base.symbols else { bail!("No symbols file specified in config"); - } + }; + let Some(symbols_cache) = apply_symbols_file(symbols_path, &mut obj)? else { + bail!("Symbols file '{}' does not exist", symbols_path.display()); + }; log::info!("Loading {}", args.elf_file.display()); let linked_obj = process_elf(&args.elf_file)?; @@ -1550,7 +1583,7 @@ fn apply(args: ApplyArgs) -> Result<()> { } } - write_symbols_file(config.base.symbols.as_ref().unwrap(), &obj)?; + write_symbols_file(config.base.symbols.as_ref().unwrap(), &obj, Some(symbols_cache))?; Ok(()) } diff --git a/src/cmd/dwarf.rs b/src/cmd/dwarf.rs index 2bb19e5..e932bdd 100644 --- a/src/cmd/dwarf.rs +++ b/src/cmd/dwarf.rs @@ -101,11 +101,14 @@ fn dump(args: DumpArgs) -> Result<()> { Ok(()) } -fn dump_debug_section( +fn dump_debug_section( w: &mut W, obj_file: &object::File<'_>, debug_section: Section, -) -> Result<()> { +) -> Result<()> +where + W: Write + ?Sized, +{ let mut data = debug_section.uncompressed_data()?.into_owned(); // Apply relocations to data diff --git a/src/cmd/elf.rs b/src/cmd/elf.rs index 687ad73..73420ea 100644 --- a/src/cmd/elf.rs +++ b/src/cmd/elf.rs @@ -19,10 +19,11 @@ use crate::{ obj::ObjKind, util::{ asm::write_asm, - comment::{read_comment_sym, MWComment}, + comment::{CommentSym, MWComment}, config::{write_splits_file, write_symbols_file}, elf::{process_elf, write_elf}, file::{buf_writer, process_rsp}, + reader::{Endian, FromReader}, signatures::{compare_signature, generate_signature, FunctionSignature}, split::split_obj, IntoCow, ToCow, @@ -136,8 +137,8 @@ fn config(args: ConfigArgs) -> Result<()> { let obj = process_elf(&args.in_file)?; DirBuilder::new().recursive(true).create(&args.out_dir)?; - write_symbols_file(args.out_dir.join("symbols.txt"), &obj)?; - write_splits_file(args.out_dir.join("splits.txt"), &obj, false)?; + write_symbols_file(args.out_dir.join("symbols.txt"), &obj, None)?; + write_splits_file(args.out_dir.join("splits.txt"), &obj, false, None)?; Ok(()) } @@ -545,8 +546,8 @@ fn info(args: InfoArgs) -> Result<()> { let data = comment_section.uncompressed_data()?; if !data.is_empty() { let mut reader = Cursor::new(&*data); - let header = - MWComment::parse_header(&mut reader).context("While reading .comment section")?; + let header = MWComment::from_reader(&mut reader, Endian::Big) + .context("While reading .comment section")?; println!("\nMetrowerks metadata (.comment):"); println!("\tVersion: {}", header.version); println!( @@ -577,7 +578,7 @@ fn info(args: InfoArgs) -> Result<()> { println!("\tUnsafe global reg vars: {}", header.unsafe_global_reg_vars); println!("\n{: >10} | {: <6} | {: <6} | {: <10}", "Align", "Vis", "Active", "Symbol"); for symbol in in_file.symbols() { - let comment_sym = read_comment_sym(&mut reader)?; + let comment_sym = CommentSym::from_reader(&mut reader, Endian::Big)?; if symbol.is_definition() { println!( "{: >10} | {: <#6X} | {: <#6X} | {: <10}", diff --git a/src/cmd/elf2dol.rs b/src/cmd/elf2dol.rs index 78b100f..0bc4e1d 100644 --- a/src/cmd/elf2dol.rs +++ b/src/cmd/elf2dol.rs @@ -150,7 +150,8 @@ const fn align32(x: u32) -> u32 { (x + 31) & !31 } const ZERO_BUF: [u8; 32] = [0u8; 32]; #[inline] -fn write_aligned(out: &mut T, bytes: &[u8], aligned_size: u32) -> std::io::Result<()> { +fn write_aligned(out: &mut T, bytes: &[u8], aligned_size: u32) -> std::io::Result<()> +where T: Write + ?Sized { out.write_all(bytes)?; let padding = aligned_size - bytes.len() as u32; if padding > 0 { diff --git a/src/cmd/map.rs b/src/cmd/map.rs index d0b8d37..57fe327 100644 --- a/src/cmd/map.rs +++ b/src/cmd/map.rs @@ -1,7 +1,6 @@ -#![allow(clippy::needless_borrow)] use std::path::PathBuf; -use anyhow::{bail, Result}; +use anyhow::{bail, ensure, Result}; use argp::FromArgs; use cwdemangle::{demangle, DemangleOptions}; @@ -23,9 +22,6 @@ pub struct Args { enum SubCommand { Entries(EntriesArgs), Symbol(SymbolArgs), - Order(OrderArgs), - Slices(SlicesArgs), - Symbols(SymbolsArgs), } #[derive(FromArgs, PartialEq, Eq, Debug)] @@ -52,40 +48,10 @@ pub struct SymbolArgs { symbol: String, } -#[derive(FromArgs, PartialEq, Eq, Debug)] -/// Attempts to resolve global link order. -#[argp(subcommand, name = "order")] -pub struct OrderArgs { - #[argp(positional)] - /// path to input map - map_file: PathBuf, -} - -#[derive(FromArgs, PartialEq, Eq, Debug)] -/// Emits a slices.yml for ppcdis. (WIP) -#[argp(subcommand, name = "slices")] -pub struct SlicesArgs { - #[argp(positional)] - /// path to input map - map_file: PathBuf, -} - -#[derive(FromArgs, PartialEq, Eq, Debug)] -/// Emits a symbols.yml for ppcdis. (WIP) -#[argp(subcommand, name = "symbols")] -pub struct SymbolsArgs { - #[argp(positional)] - /// path to input map - map_file: PathBuf, -} - pub fn run(args: Args) -> Result<()> { match args.command { SubCommand::Entries(c_args) => entries(c_args), SubCommand::Symbol(c_args) => symbol(c_args), - SubCommand::Order(c_args) => order(c_args), - SubCommand::Slices(c_args) => slices(c_args), - SubCommand::Symbols(c_args) => symbols(c_args), } } @@ -94,12 +60,25 @@ fn entries(args: EntriesArgs) -> Result<()> { let entries = process_map(&mut file.as_reader())?; match entries.unit_entries.get_vec(&args.unit) { Some(vec) => { + println!("Entries for {}:", args.unit); for symbol_ref in vec { if symbol_ref.name.starts_with('@') { continue; } - let demangled = demangle(&symbol_ref.name, &DemangleOptions::default()); - println!("{}", demangled.as_deref().unwrap_or(&symbol_ref.name)); + if let Some((section, entry)) = entries.get_section_symbol(symbol_ref) { + println!( + ">>> {} ({:?},{:?}) @ {}:{:#010X} [{}]", + entry.demangled.as_ref().unwrap_or(&entry.name), + entry.kind, + entry.visibility, + section, + entry.address, + entry.unit.as_deref().unwrap_or("(generated)"), + ); + } else { + let demangled = demangle(&symbol_ref.name, &DemangleOptions::default()); + println!(">>> {}", demangled.as_deref().unwrap_or(&symbol_ref.name)); + } } } None => bail!("Failed to find entries for TU '{}' in map", args.unit), @@ -109,121 +88,75 @@ fn entries(args: EntriesArgs) -> Result<()> { fn symbol(args: SymbolArgs) -> Result<()> { let file = map_file(&args.map_file)?; + log::info!("Processing map..."); let entries = process_map(&mut file.as_reader())?; - let opt_ref: Option<(SymbolRef, SymbolEntry)> = None; + log::info!("Done!"); + let mut opt_ref: Option<(String, SymbolEntry)> = None; - _ = entries; - _ = opt_ref; - // TODO + for (section, symbol_map) in &entries.section_symbols { + for symbol_entry in symbol_map.values().flatten() { + if symbol_entry.name == args.symbol { + ensure!(opt_ref.is_none(), "Found multiple symbols with name '{}'", args.symbol); + opt_ref = Some((section.clone(), symbol_entry.clone())); + } + } + } + let Some((section, symbol)) = opt_ref else { + bail!("Failed to find symbol '{}' in map", args.symbol); + }; - // for (symbol_ref, entry) in &entries.symbols { - // if symbol_ref.name == args.symbol { - // ensure!(opt_ref.is_none(), "Symbol '{}' found in multiple TUs", args.symbol); - // opt_ref = Some((symbol_ref.clone(), entry.clone())); - // } - // } - // match opt_ref { - // Some((symbol_ref, symbol)) => { - // println!("Located symbol {}", symbol.demangled.as_ref().unwrap_or(&symbol.name)); - // if let Some(vec) = entries.entry_references.get_vec(&symbol_ref) { - // println!("\nReferences:"); - // for x in vec { - // if let Some(reference) = entries.symbols.get(x) { - // println!( - // ">>> {} ({:?},{:?}) [{}]", - // reference.demangled.as_ref().unwrap_or(&reference.name), - // reference.kind, - // reference.visibility, - // reference.unit.as_deref().unwrap_or("[generated]") - // ); - // } else { - // println!(">>> {} (NOT FOUND)", x.name); - // } - // } - // } - // if let Some(vec) = entries.entry_referenced_from.get_vec(&symbol_ref) { - // println!("\nReferenced from:"); - // for x in vec { - // if let Some(reference) = entries.symbols.get(x) { - // println!( - // ">>> {} ({:?}, {:?}) [{}]", - // reference.demangled.as_ref().unwrap_or(&reference.name), - // reference.kind, - // reference.visibility, - // reference.unit.as_deref().unwrap_or("[generated]") - // ); - // } else { - // println!(">>> {} (NOT FOUND)", x.name); - // } - // } - // } - // println!("\n"); - // } - // None => bail!("Failed to find symbol '{}' in map", args.symbol), - // } - Ok(()) -} - -fn order(args: OrderArgs) -> Result<()> { - let file = map_file(&args.map_file)?; - let entries = process_map(&mut file.as_reader())?; - - _ = entries; - // TODO - - // let order = resolve_link_order(&entries.unit_order)?; - // for unit in order { - // println!("{unit}"); - // } - Ok(()) -} - -fn slices(args: SlicesArgs) -> Result<()> { - let file = map_file(&args.map_file)?; - let entries = process_map(&mut file.as_reader())?; - - _ = entries; - // TODO - - // let order = resolve_link_order(&entries.unit_order)?; - // for unit in order { - // let unit_path = if let Some((lib, name)) = unit.split_once(' ') { - // format!("{}/{}", lib.strip_suffix(".a").unwrap_or(lib), name) - // } else if let Some(strip) = unit.strip_suffix(".o") { - // format!("{strip}.c") - // } else { - // unit.clone() - // }; - // println!("{unit_path}:"); - // let mut ranges = Vec::<(String, Range)>::new(); - // match entries.unit_section_ranges.get(&unit) { - // Some(sections) => { - // for (name, range) in sections { - // ranges.push((name.clone(), range.clone())); - // } - // } - // None => bail!("Failed to locate sections for unit '{unit}'"), - // } - // ranges.sort_by(|(_, a), (_, b)| a.start.cmp(&b.start)); - // for (name, range) in ranges { - // println!("\t{}: [{:#010x}, {:#010x}]", name, range.start, range.end); - // } - // } - Ok(()) -} - -fn symbols(args: SymbolsArgs) -> Result<()> { - let file = map_file(&args.map_file)?; - let entries = process_map(&mut file.as_reader())?; - - _ = entries; - // TODO - - // for (address, symbol) in entries.address_to_symbol { - // if symbol.name.starts_with('@') { - // continue; - // } - // println!("{:#010x}: {}", address, symbol.name); - // } + println!( + "Located symbol {} ({:?},{:?}) @ {}:{:#010X} [{}]", + symbol.demangled.as_ref().unwrap_or(&symbol.name), + symbol.kind, + symbol.visibility, + section, + symbol.address, + symbol.unit.as_deref().unwrap_or("(generated)"), + ); + let symbol_ref = SymbolRef { name: symbol.name.clone(), unit: symbol.unit.clone() }; + if let Some(vec) = entries.entry_references.get_vec(&symbol_ref) { + println!("\nKnown references:"); + for x in vec { + if let Some((section, entry)) = entries.get_section_symbol(x) { + println!( + ">>> {} ({:?},{:?}) @ {}:{:#010X} [{}]", + entry.demangled.as_ref().unwrap_or(&entry.name), + entry.kind, + entry.visibility, + section, + entry.address, + entry.unit.as_deref().unwrap_or("(generated)"), + ); + } else { + println!(">>> {} (NOT FOUND)", x.name); + } + } + } + if let Some(vec) = entries.entry_referenced_from.get_vec(&symbol_ref) { + println!("\nKnown referenced from:"); + for x in vec { + if let Some((section, entry)) = entries.get_section_symbol(x) { + println!( + ">>> {} ({:?}, {:?}) @ {}:{:#010X} [{}]", + entry.demangled.as_ref().unwrap_or(&entry.name), + entry.kind, + entry.visibility, + section, + entry.address, + entry.unit.as_deref().unwrap_or("(generated)"), + ); + } else { + println!(">>> {} (NOT FOUND)", x.name); + } + } + } + if let Some(vec) = entries.unit_references.get_vec(&symbol_ref) { + println!("\nGenerated in TUs:"); + for x in vec { + println!(">>> {}", x); + } + } + println!("\n"); Ok(()) } diff --git a/src/cmd/shasum.rs b/src/cmd/shasum.rs index 6ae68ed..05608bb 100644 --- a/src/cmd/shasum.rs +++ b/src/cmd/shasum.rs @@ -47,7 +47,8 @@ pub fn run(args: Args) -> Result<()> { Ok(()) } -fn check(args: &Args, reader: &mut R) -> Result<()> { +fn check(args: &Args, reader: &mut R) -> Result<()> +where R: BufRead + ?Sized { let mut matches = 0usize; let mut mismatches = 0usize; for line in reader.lines() { @@ -97,7 +98,8 @@ fn check(args: &Args, reader: &mut R) -> Result<()> { Ok(()) } -fn hash(reader: &mut R, path: &Path) -> Result<()> { +fn hash(reader: &mut R, path: &Path) -> Result<()> +where R: Read + ?Sized { let hash = file_sha1(reader)?; let mut hash_buf = [0u8; 40]; let hash_str = base16ct::lower::encode_str(&hash, &mut hash_buf) @@ -106,7 +108,8 @@ fn hash(reader: &mut R, path: &Path) -> Result<()> { Ok(()) } -pub fn file_sha1(reader: &mut R) -> Result> { +pub fn file_sha1(reader: &mut R) -> Result> +where R: Read + ?Sized { let mut buf = [0u8; DEFAULT_BUF_SIZE]; let mut hasher = Sha1::new(); Ok(loop { @@ -118,7 +121,8 @@ pub fn file_sha1(reader: &mut R) -> Result> }) } -pub fn file_sha1_string(reader: &mut R) -> Result { +pub fn file_sha1_string(reader: &mut R) -> Result +where R: Read + ?Sized { let hash = file_sha1(reader)?; let mut hash_buf = [0u8; 40]; let hash_str = base16ct::lower::encode_str(&hash, &mut hash_buf) diff --git a/src/main.rs b/src/main.rs index 9e392b0..a1aa8d8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -87,7 +87,7 @@ enum SubCommand { Dwarf(cmd::dwarf::Args), Elf(cmd::elf::Args), Elf2Dol(cmd::elf2dol::Args), - // Map(cmd::map::Args), + Map(cmd::map::Args), MetroidBuildInfo(cmd::metroidbuildinfo::Args), Nlzss(cmd::nlzss::Args), Rarc(cmd::rarc::Args), @@ -159,7 +159,7 @@ fn main() { SubCommand::Dwarf(c_args) => cmd::dwarf::run(c_args), SubCommand::Elf(c_args) => cmd::elf::run(c_args), SubCommand::Elf2Dol(c_args) => cmd::elf2dol::run(c_args), - // SubCommand::Map(c_args) => cmd::map::run(c_args), + SubCommand::Map(c_args) => cmd::map::run(c_args), SubCommand::MetroidBuildInfo(c_args) => cmd::metroidbuildinfo::run(c_args), SubCommand::Nlzss(c_args) => cmd::nlzss::run(c_args), SubCommand::Rarc(c_args) => cmd::rarc::run(c_args), diff --git a/src/obj/symbols.rs b/src/obj/symbols.rs index 114bc98..74220be 100644 --- a/src/obj/symbols.rs +++ b/src/obj/symbols.rs @@ -125,7 +125,10 @@ impl ObjSymbolFlagSet { #[allow(clippy::derived_hash_with_manual_eq)] impl Hash for ObjSymbolFlagSet { - fn hash(&self, state: &mut H) { self.0.bits().hash(state) } + fn hash(&self, state: &mut H) + where H: Hasher { + self.0.bits().hash(state) + } } #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Default, Serialize, Deserialize)] diff --git a/src/util/asm.rs b/src/util/asm.rs index 9778b90..05b3497 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -29,7 +29,8 @@ struct SymbolEntry { kind: SymbolEntryKind, } -pub fn write_asm(w: &mut W, obj: &ObjInfo) -> Result<()> { +pub fn write_asm(w: &mut W, obj: &ObjInfo) -> Result<()> +where W: Write + ?Sized { writeln!(w, ".include \"macros.inc\"")?; if !obj.name.is_empty() { let name = obj @@ -230,7 +231,7 @@ pub fn write_asm(w: &mut W, obj: &ObjInfo) -> Result<()> { Ok(()) } -fn write_code_chunk( +fn write_code_chunk( w: &mut W, symbols: &[ObjSymbol], _entries: &BTreeMap>, @@ -238,7 +239,10 @@ fn write_code_chunk( section: &ObjSection, address: u32, data: &[u8], -) -> Result<()> { +) -> Result<()> +where + W: Write + ?Sized, +{ for ins in disasm_iter(data, address) { let reloc = relocations.get(&ins.addr); let file_offset = section.file_offset + (ins.addr as u64 - section.address); @@ -247,14 +251,17 @@ fn write_code_chunk( Ok(()) } -fn write_ins( +fn write_ins( w: &mut W, symbols: &[ObjSymbol], ins: Ins, reloc: Option<&ObjReloc>, file_offset: u64, section_address: u64, -) -> Result<()> { +) -> Result<()> +where + W: Write + ?Sized, +{ write!( w, "/* {:08X} {:08X} {:02X} {:02X} {:02X} {:02X} */\t", @@ -316,7 +323,8 @@ fn write_ins( Ok(()) } -fn write_reloc(w: &mut W, symbols: &[ObjSymbol], reloc: &ObjReloc) -> Result<()> { +fn write_reloc(w: &mut W, symbols: &[ObjSymbol], reloc: &ObjReloc) -> Result<()> +where W: Write + ?Sized { write_reloc_symbol(w, symbols, reloc)?; match reloc.kind { ObjRelocKind::Absolute | ObjRelocKind::PpcRel24 | ObjRelocKind::PpcRel14 => { @@ -338,11 +346,8 @@ fn write_reloc(w: &mut W, symbols: &[ObjSymbol], reloc: &ObjReloc) -> Ok(()) } -fn write_symbol_entry( - w: &mut W, - symbols: &[ObjSymbol], - entry: &SymbolEntry, -) -> Result<()> { +fn write_symbol_entry(w: &mut W, symbols: &[ObjSymbol], entry: &SymbolEntry) -> Result<()> +where W: Write + ?Sized { let symbol = &symbols[entry.index]; // Skip writing certain symbols @@ -405,7 +410,7 @@ fn write_symbol_entry( } #[allow(clippy::too_many_arguments)] -fn write_data( +fn write_data( w: &mut W, symbols: &[ObjSymbol], entries: &BTreeMap>, @@ -414,7 +419,10 @@ fn write_data( start: u32, end: u32, section_entries: &[BTreeMap>], -) -> Result<()> { +) -> Result<()> +where + W: Write + ?Sized, +{ let mut entry_iter = entries.range(start..end); let mut reloc_iter = relocations.range(start..end); @@ -577,7 +585,8 @@ fn find_data_kind( Ok(kind) } -fn write_string(w: &mut W, data: &[u8]) -> Result<()> { +fn write_string(w: &mut W, data: &[u8]) -> Result<()> +where W: Write + ?Sized { let terminated = matches!(data.last(), Some(&b) if b == 0); if terminated { write!(w, "\t.string \"")?; @@ -601,7 +610,8 @@ fn write_string(w: &mut W, data: &[u8]) -> Result<()> { Ok(()) } -fn write_string16(w: &mut W, data: &[u16]) -> Result<()> { +fn write_string16(w: &mut W, data: &[u16]) -> Result<()> +where W: Write + ?Sized { if matches!(data.last(), Some(&b) if b == 0) { write!(w, "\t.string16 \"")?; } else { @@ -630,7 +640,8 @@ fn write_string16(w: &mut W, data: &[u16]) -> Result<()> { Ok(()) } -fn write_data_chunk(w: &mut W, data: &[u8], data_kind: ObjDataKind) -> Result<()> { +fn write_data_chunk(w: &mut W, data: &[u8], data_kind: ObjDataKind) -> Result<()> +where W: Write + ?Sized { let remain = data; match data_kind { ObjDataKind::String => { @@ -718,14 +729,17 @@ fn write_data_chunk(w: &mut W, data: &[u8], data_kind: ObjDataKind) -> Ok(()) } -fn write_data_reloc( +fn write_data_reloc( w: &mut W, symbols: &[ObjSymbol], _entries: &BTreeMap>, reloc_address: u32, reloc: &ObjReloc, section_entries: &[BTreeMap>], -) -> Result { +) -> Result +where + W: Write + ?Sized, +{ match reloc.kind { ObjRelocKind::Absolute => { // Attempt to use .rel macro for relative relocations @@ -759,13 +773,16 @@ fn write_data_reloc( } } -fn write_bss( +fn write_bss( w: &mut W, symbols: &[ObjSymbol], entries: &BTreeMap>, start: u32, end: u32, -) -> Result<()> { +) -> Result<()> +where + W: Write + ?Sized, +{ let mut entry_iter = entries.range(start..end); let mut current_address = start; @@ -798,13 +815,16 @@ fn write_bss( Ok(()) } -fn write_section_header( +fn write_section_header( w: &mut W, section: &ObjSection, subsection: usize, start: u32, end: u32, -) -> Result<()> { +) -> Result<()> +where + W: Write + ?Sized, +{ writeln!( w, "\n# {:#010X} - {:#010X}", @@ -865,11 +885,14 @@ fn write_section_header( Ok(()) } -fn write_reloc_symbol( +fn write_reloc_symbol( w: &mut W, symbols: &[ObjSymbol], reloc: &ObjReloc, -) -> std::io::Result<()> { +) -> std::io::Result<()> +where + W: Write + ?Sized, +{ write_symbol_name(w, &symbols[reloc.target_symbol].name)?; match reloc.addend.cmp(&0i64) { Ordering::Greater => write!(w, "+{:#X}", reloc.addend), @@ -878,7 +901,8 @@ fn write_reloc_symbol( } } -fn write_symbol_name(w: &mut W, name: &str) -> std::io::Result<()> { +fn write_symbol_name(w: &mut W, name: &str) -> std::io::Result<()> +where W: Write + ?Sized { if name.contains('@') || name.contains('<') || name.contains('\\') diff --git a/src/util/comment.rs b/src/util/comment.rs index 13044d5..adab1c2 100644 --- a/src/util/comment.rs +++ b/src/util/comment.rs @@ -1,13 +1,15 @@ use std::{ - io::{Read, Seek, SeekFrom, Write}, - ops::Deref, + io, + io::{Read, Seek, Write}, }; -use anyhow::{bail, ensure, Context, Result}; -use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use anyhow::{bail, Result}; use num_enum::{IntoPrimitive, TryFromPrimitive}; -use crate::obj::{ObjSymbol, ObjSymbolKind}; +use crate::{ + obj::{ObjSymbol, ObjSymbolKind}, + util::reader::{skip_bytes, struct_size, Endian, FromReader, ToWriter}, +}; #[derive(Debug, Copy, Clone, IntoPrimitive, TryFromPrimitive)] #[repr(u8)] @@ -29,6 +31,131 @@ pub struct MWComment { pub unsafe_global_reg_vars: bool, } +const MAGIC: &[u8] = "CodeWarrior".as_bytes(); +const HEADER_SIZE: u8 = 0x2C; +const PADDING: &[u8] = &[0u8; 0x16]; + +impl FromReader for MWComment { + type Args = (); + + const STATIC_SIZE: usize = HEADER_SIZE as usize; + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + let mut header = MWComment { + version: 0, + compiler_version: [0; 4], + pool_data: false, + float: MWFloatKind::None, + processor: 0, + incompatible_return_small_structs: false, + incompatible_sfpe_double_params: false, + unsafe_global_reg_vars: false, + }; + // 0x0 - 0xA + let magic = <[u8; MAGIC.len()]>::from_reader(reader, e)?; + if magic != MAGIC { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Invalid .comment section magic: {:?}", magic), + )); + } + // 0xB + header.version = u8::from_reader(reader, e)?; + if !matches!(header.version, 8 | 10 | 11 | 13 | 14 | 15) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Unknown .comment section version: {}", header.version), + )); + } + // 0xC - 0xF + reader.read_exact(&mut header.compiler_version)?; + // 0x10 + header.pool_data = match u8::from_reader(reader, e)? { + 0 => false, + 1 => true, + value => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Invalid value for pool_data: {}", value), + )) + } + }; + // 0x11 + header.float = MWFloatKind::try_from(u8::from_reader(reader, e)?) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid value for float"))?; + // 0x12 - 0x13 + header.processor = u16::from_reader(reader, e)?; + // 0x14 + match u8::from_reader(reader, e)? { + HEADER_SIZE => {} + v => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Expected header size {:#X}, got {:#X}", HEADER_SIZE, v), + )) + } + } + // 0x15 + let flags = u8::from_reader(reader, e)?; + if flags & !7 != 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Unexpected flag value {:#X}", flags), + )); + } + if flags & 1 == 1 { + header.incompatible_return_small_structs = true; + } + if flags & 2 == 2 { + header.incompatible_sfpe_double_params = true; + } + if flags & 4 == 4 { + header.unsafe_global_reg_vars = true; + } + // 0x16 - 0x2C + skip_bytes::<0x16, _>(reader)?; + Ok(header) + } +} + +impl ToWriter for MWComment { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + // 0x0 - 0xA + MAGIC.to_writer(writer, e)?; + // 0xB + self.version.to_writer(writer, e)?; + // 0xC - 0xF + self.compiler_version.to_writer(writer, e)?; + // 0x10 + (if self.pool_data { 1u8 } else { 0u8 }).to_writer(writer, e)?; + // 0x11 + u8::from(self.float).to_writer(writer, e)?; + // 0x12 - 0x13 + self.processor.to_writer(writer, e)?; + // 0x14 + HEADER_SIZE.to_writer(writer, e)?; + // 0x15 + let mut flags = 0u8; + if self.incompatible_return_small_structs { + flags |= 1; + } + if self.incompatible_sfpe_double_params { + flags |= 2; + } + if self.unsafe_global_reg_vars { + flags |= 4; + } + flags.to_writer(writer, e)?; + // 0x16 - 0x2C + PADDING.to_writer(writer, e)?; + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + impl MWComment { pub fn new(version: u8) -> Result { // Metrowerks C/C++ Compiler for Embedded PowerPC. @@ -60,107 +187,6 @@ impl MWComment { } } -const MAGIC: &[u8] = "CodeWarrior".as_bytes(); -const PADDING: &[u8] = &[0u8; 0x16]; - -impl MWComment { - pub fn parse_header(reader: &mut R) -> Result { - let mut header = MWComment { - version: 0, - compiler_version: [0; 4], - pool_data: false, - float: MWFloatKind::None, - processor: 0, - incompatible_return_small_structs: false, - incompatible_sfpe_double_params: false, - unsafe_global_reg_vars: false, - }; - // 0x0 - 0xA - let mut magic = vec![0u8; MAGIC.len()]; - reader.read_exact(&mut magic).context("While reading magic")?; - if magic.deref() != MAGIC { - bail!("Invalid .comment section magic: {:?}", magic); - } - // 0xB - header.version = reader.read_u8()?; - ensure!( - matches!(header.version, 8 | 10 | 11 | 13 | 14 | 15), - "Unknown .comment section version: {}", - header.version - ); - // 0xC - 0xF - reader - .read_exact(&mut header.compiler_version) - .context("While reading compiler version")?; - // 0x10 - header.pool_data = match reader.read_u8()? { - 0 => false, - 1 => true, - value => bail!("Invalid value for pool_data: {}", value), - }; - // 0x11 - header.float = - MWFloatKind::try_from(reader.read_u8()?).context("Invalid value for float")?; - // 0x12 - 0x13 - header.processor = reader.read_u16::()?; - // 0x14 - match reader.read_u8()? as char { - // This is 0x2C, which could also be the size of the header? Unclear - ',' => {} - c => bail!("Expected ',' after processor, got '{}'", c), - } - // 0x15 - let flags = reader.read_u8()?; - if flags & !7 != 0 { - bail!("Unexpected flag value {:#X}", flags); - } - if flags & 1 == 1 { - header.incompatible_return_small_structs = true; - } - if flags & 2 == 2 { - header.incompatible_sfpe_double_params = true; - } - if flags & 4 == 4 { - header.unsafe_global_reg_vars = true; - } - // 0x16 - 0x2C - reader.seek(SeekFrom::Current(0x16))?; - Ok(header) - } - - pub fn write_header(&self, w: &mut W) -> Result<()> { - // 0x0 - 0xA - w.write_all(MAGIC)?; - // 0xB - w.write_u8(self.version)?; - // 0xC - 0xF - w.write_all(&self.compiler_version)?; - // 0x10 - w.write_u8(if self.pool_data { 1 } else { 0 })?; - // 0x11 - w.write_u8(self.float.into())?; - // 0x12 - 0x13 - w.write_u16::(self.processor)?; - // 0x14 - w.write_u8(0x2C)?; - // 0x15 - let mut flags = 0u8; - if self.incompatible_return_small_structs { - flags |= 1; - } - if self.incompatible_sfpe_double_params { - flags |= 2; - } - if self.unsafe_global_reg_vars { - flags |= 4; - } - w.write_u8(flags)?; - // 0x16 - 0x2C - w.write_all(PADDING)?; - Ok(()) - } -} - #[derive(Debug, Copy, Clone)] pub struct CommentSym { pub align: u32, @@ -168,6 +194,65 @@ pub struct CommentSym { pub active_flags: u8, } +impl FromReader for CommentSym { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // align + u8::STATIC_SIZE, // vis_flags + u8::STATIC_SIZE, // active_flags + 2, // padding + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + let mut out = CommentSym { align: 0, vis_flags: 0, active_flags: 0 }; + out.align = u32::from_reader(reader, e)?; + out.vis_flags = u8::from_reader(reader, e)?; + if !matches!(out.vis_flags, 0 | 0xD | 0xE) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Unknown vis_flags: {:#X}", out.vis_flags), + )); + } + out.active_flags = u8::from_reader(reader, e)?; + if !matches!(out.active_flags, 0 | 0x8 | 0x10 | 0x20) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Unknown active_flags: {:#X}", out.active_flags), + )); + } + let value = u8::from_reader(reader, e)?; + if value != 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Unexpected value after active_flags (1): {:#X}", value), + )); + } + let value = u8::from_reader(reader, e)?; + if value != 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Unexpected value after active_flags (2): {:#X}", value), + )); + } + Ok(out) + } +} + +impl ToWriter for CommentSym { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.align.to_writer(writer, e)?; + self.vis_flags.to_writer(writer, e)?; + self.active_flags.to_writer(writer, e)?; + [0u8; 2].to_writer(writer, e)?; + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + impl CommentSym { pub fn from(symbol: &ObjSymbol, force_active: bool) -> Self { let align = match symbol.align { @@ -205,28 +290,3 @@ impl CommentSym { Self { align, vis_flags, active_flags } } } - -pub fn write_comment_sym(w: &mut W, symbol: CommentSym) -> Result<()> { - w.write_u32::(symbol.align)?; - w.write_u8(symbol.vis_flags)?; - w.write_u8(symbol.active_flags)?; - w.write_u8(0)?; - w.write_u8(0)?; - Ok(()) -} - -pub fn read_comment_sym(r: &mut R) -> Result { - let mut out = CommentSym { align: 0, vis_flags: 0, active_flags: 0 }; - out.align = r.read_u32::()?; - out.vis_flags = r.read_u8()?; - ensure!(matches!(out.vis_flags, 0 | 0xD | 0xE), "Unknown vis_flags {}", out.vis_flags); - out.active_flags = r.read_u8()?; - ensure!( - matches!(out.active_flags, 0 | 0x8 | 0x10 | 0x20), - "Unknown active_flags {}", - out.active_flags - ); - ensure!(r.read_u8()? == 0, "Unexpected value after active_flags (1)"); - ensure!(r.read_u8()? == 0, "Unexpected value after active_flags (2)"); - Ok(out) -} diff --git a/src/util/config.rs b/src/util/config.rs index 15fb3cc..1255184 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -1,4 +1,5 @@ use std::{ + fs, io::{BufRead, Write}, num::ParseIntError, path::Path, @@ -7,8 +8,11 @@ use std::{ use anyhow::{anyhow, bail, ensure, Context, Result}; use cwdemangle::{demangle, DemangleOptions}; +use filetime::FileTime; use once_cell::sync::Lazy; use regex::{Captures, Regex}; +use tracing::{debug, info, warn}; +use xxhash_rust::xxh3::xxh3_64; use crate::{ analysis::cfa::SectionAddress, @@ -17,7 +21,7 @@ use crate::{ ObjSymbolFlags, ObjSymbolKind, ObjUnit, }, util::{ - file::{buf_writer, map_file}, + file::{buf_writer, map_file, FileReadInfo}, split::default_section_align, }, }; @@ -30,9 +34,11 @@ fn parse_hex(s: &str) -> Result { } } -pub fn apply_symbols_file>(path: P, obj: &mut ObjInfo) -> Result { +pub fn apply_symbols_file

(path: P, obj: &mut ObjInfo) -> Result> +where P: AsRef { Ok(if path.as_ref().is_file() { let file = map_file(path)?; + let cached = FileReadInfo::new(&file)?; for result in file.as_reader().lines() { let line = match result { Ok(line) => line, @@ -42,9 +48,9 @@ pub fn apply_symbols_file>(path: P, obj: &mut ObjInfo) -> Result< obj.add_symbol(symbol, true)?; } } - true + Some(cached) } else { - false + None }) } @@ -168,15 +174,58 @@ pub fn is_auto_symbol(symbol: &ObjSymbol) -> bool { || symbol.name.starts_with("jumptable_") } -#[inline] -pub fn write_symbols_file>(path: P, obj: &ObjInfo) -> Result<()> { - let mut w = buf_writer(path)?; - write_symbols(&mut w, obj)?; - w.flush()?; +fn write_if_unchanged(path: P, cb: Cb, cached_file: Option) -> Result<()> +where + P: AsRef, + Cb: FnOnce(&mut dyn Write) -> Result<()>, +{ + if let Some(cached_file) = cached_file { + // Check file mtime + let path = path.as_ref(); + let new_mtime = fs::metadata(path).ok().map(|m| FileTime::from_last_modification_time(&m)); + if let Some(new_mtime) = new_mtime { + if new_mtime != cached_file.mtime { + // File changed, don't write + warn!(path = %path.display(), "File changed since read, not updating"); + return Ok(()); + } + } + + // Write to buffer and compare with hash + let mut buf = Vec::new(); + cb(&mut buf)?; + if xxh3_64(&buf) == cached_file.hash { + // No changes + debug!(path = %path.display(), "File unchanged"); + return Ok(()); + } + + // Write to file + info!("Writing updated {}", path.display()); + fs::write(path, &buf)?; + } else { + // Write directly + let mut w = buf_writer(path)?; + cb(&mut w)?; + w.flush()?; + } Ok(()) } -pub fn write_symbols(w: &mut W, obj: &ObjInfo) -> Result<()> { +#[inline] +pub fn write_symbols_file

( + path: P, + obj: &ObjInfo, + cached_file: Option, +) -> Result<()> +where + P: AsRef, +{ + write_if_unchanged(path, |w| write_symbols(w, obj), cached_file) +} + +pub fn write_symbols(w: &mut W, obj: &ObjInfo) -> Result<()> +where W: Write + ?Sized { for (_, symbol) in obj.symbols.iter_ordered() { if symbol.kind == ObjSymbolKind::Section || is_skip_symbol(symbol) { continue; @@ -186,7 +235,8 @@ pub fn write_symbols(w: &mut W, obj: &ObjInfo) -> Result<()> { Ok(()) } -fn write_symbol(w: &mut W, obj: &ObjInfo, symbol: &ObjSymbol) -> Result<()> { +fn write_symbol(w: &mut W, obj: &ObjInfo, symbol: &ObjSymbol) -> Result<()> +where W: Write + ?Sized { write!(w, "{} = ", symbol.name)?; let section = symbol.section.and_then(|idx| obj.sections.get(idx)); if let Some(section) = section { @@ -330,14 +380,20 @@ fn section_kind_to_str(kind: ObjSectionKind) -> &'static str { } #[inline] -pub fn write_splits_file>(path: P, obj: &ObjInfo, all: bool) -> Result<()> { - let mut w = buf_writer(path)?; - write_splits(&mut w, obj, all)?; - w.flush()?; - Ok(()) +pub fn write_splits_file

( + path: P, + obj: &ObjInfo, + all: bool, + cached_file: Option, +) -> Result<()> +where + P: AsRef, +{ + write_if_unchanged(path, |w| write_splits(w, obj, all), cached_file) } -pub fn write_splits(w: &mut W, obj: &ObjInfo, all: bool) -> Result<()> { +pub fn write_splits(w: &mut W, obj: &ObjInfo, all: bool) -> Result<()> +where W: Write + ?Sized { writeln!(w, "Sections:")?; for (_, section) in obj.sections.iter() { write!(w, "\t{:<11} type:{}", section.name, section_kind_to_str(section.kind))?; @@ -530,17 +586,20 @@ enum SplitState { Unit(String), } -pub fn apply_splits_file>(path: P, obj: &mut ObjInfo) -> Result { +pub fn apply_splits_file

(path: P, obj: &mut ObjInfo) -> Result> +where P: AsRef { Ok(if path.as_ref().is_file() { let file = map_file(path)?; - apply_splits(file.as_reader(), obj)?; - true + let cached = FileReadInfo::new(&file)?; + apply_splits(&mut file.as_reader(), obj)?; + Some(cached) } else { - false + None }) } -pub fn apply_splits(r: R, obj: &mut ObjInfo) -> Result<()> { +pub fn apply_splits(r: &mut R, obj: &mut ObjInfo) -> Result<()> +where R: BufRead + ?Sized { let mut state = SplitState::None; for result in r.lines() { let line = match result { @@ -637,7 +696,8 @@ pub fn apply_splits(r: R, obj: &mut ObjInfo) -> Result<()> { Ok(()) } -pub fn read_splits_sections>(path: P) -> Result>> { +pub fn read_splits_sections

(path: P) -> Result>> +where P: AsRef { if !path.as_ref().is_file() { return Ok(None); } diff --git a/src/util/dep.rs b/src/util/dep.rs index 8d9c880..244b10c 100644 --- a/src/util/dep.rs +++ b/src/util/dep.rs @@ -16,7 +16,8 @@ pub struct DepFile { impl DepFile { pub fn new(name: PathBuf) -> Self { Self { name, dependencies: vec![] } } - pub fn push>(&mut self, dependency: P) { + pub fn push

(&mut self, dependency: P) + where P: AsRef { let path = split_path(dependency.as_ref()) .map(|(p, _)| p) .unwrap_or_else(|_| dependency.as_ref().to_path_buf()); @@ -29,7 +30,8 @@ impl DepFile { })); } - pub fn write(&self, w: &mut W) -> std::io::Result<()> { + pub fn write(&self, w: &mut W) -> std::io::Result<()> + where W: Write + ?Sized { write!(w, "{}:", self.name.to_slash_lossy())?; for dep in self.dependencies.iter().unique() { write!(w, " \\\n {}", dep.to_slash_lossy().replace(' ', "\\ "))?; diff --git a/src/util/dol.rs b/src/util/dol.rs index d6b4a24..ba671bf 100644 --- a/src/util/dol.rs +++ b/src/util/dol.rs @@ -8,12 +8,13 @@ use anyhow::{anyhow, bail, ensure, Result}; use crate::{ analysis::cfa::{locate_sda_bases, SectionAddress}, + array_ref, obj::{ ObjArchitecture, ObjInfo, ObjKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, }, util::{ - alf::{AlfFile, AlfSymbol}, + alf::{AlfFile, AlfSymbol, ALF_MAGIC}, reader::{skip_bytes, Endian, FromReader}, }, }; @@ -179,7 +180,7 @@ fn read_u32(buf: &[u8], dol: &dyn DolLike, addr: u32) -> Result { pub fn process_dol(buf: &[u8], name: &str) -> Result { let mut reader = Cursor::new(buf); - let dol: Box = if buf.len() > 4 && &buf[0..4] == b"RBOF" { + let dol: Box = if buf.len() > 4 && *array_ref!(buf, 0, 4) == ALF_MAGIC { Box::new(AlfFile::from_reader(&mut reader, Endian::Little)?) } else { Box::new(DolFile::from_reader(&mut reader, Endian::Big)?) diff --git a/src/util/dwarf.rs b/src/util/dwarf.rs index 5e2c662..bfe346e 100644 --- a/src/util/dwarf.rs +++ b/src/util/dwarf.rs @@ -7,10 +7,12 @@ use std::{ }; use anyhow::{anyhow, bail, ensure, Context, Result}; -use byteorder::{BigEndian, ReadBytesExt}; use num_enum::{IntoPrimitive, TryFromPrimitive}; -use crate::array_ref; +use crate::{ + array_ref, + util::reader::{Endian, FromReader}, +}; #[derive(Debug, Eq, PartialEq, Copy, Clone, IntoPrimitive, TryFromPrimitive)] #[repr(u16)] @@ -362,7 +364,8 @@ impl Tag { } } -pub fn read_debug_section(reader: &mut R) -> Result { +pub fn read_debug_section(reader: &mut R) -> Result +where R: BufRead + Seek + ?Sized { let len = { let old_pos = reader.stream_position()?; let len = reader.seek(SeekFrom::End(0))?; @@ -383,7 +386,8 @@ pub fn read_debug_section(reader: &mut R) -> Result { } #[allow(unused)] -pub fn read_aranges_section(reader: &mut R) -> Result<()> { +pub fn read_aranges_section(reader: &mut R) -> Result<()> +where R: BufRead + Seek + ?Sized { let len = { let old_pos = reader.stream_position()?; let len = reader.seek(SeekFrom::End(0))?; @@ -398,22 +402,23 @@ pub fn read_aranges_section(reader: &mut R) -> Result<()> { break; } - let size = reader.read_u32::()?; - let version = reader.read_u8()?; + let size = u32::from_reader(reader, Endian::Big)?; + let version = u8::from_reader(reader, Endian::Big)?; ensure!(version == 1, "Expected version 1, got {version}"); - let _debug_offs = reader.read_u32::()?; - let _debug_size = reader.read_u32::()?; + let _debug_offs = u32::from_reader(reader, Endian::Big)?; + let _debug_size = u32::from_reader(reader, Endian::Big)?; while reader.stream_position()? < position + size as u64 { - let _address = reader.read_u32::()?; - let _length = reader.read_u32::()?; + let _address = u32::from_reader(reader, Endian::Big)?; + let _length = u32::from_reader(reader, Endian::Big)?; } } Ok(()) } -fn read_tag(reader: &mut R) -> Result { +fn read_tag(reader: &mut R) -> Result +where R: BufRead + Seek + ?Sized { let position = reader.stream_position()?; - let size = reader.read_u32::()?; + let size = u32::from_reader(reader, Endian::Big)?; if size < 8 { // Null entry if size > 4 { @@ -422,8 +427,8 @@ fn read_tag(reader: &mut R) -> Result { return Ok(Tag { key: position as u32, kind: TagKind::Padding, attributes: vec![] }); } - let tag = - TagKind::try_from(reader.read_u16::()?).context("Unknown DWARF tag type")?; + let tag = TagKind::try_from(u16::from_reader(reader, Endian::Big)?) + .context("Unknown DWARF tag type")?; let mut attributes = Vec::new(); if tag == TagKind::Padding { reader.seek(SeekFrom::Start(position + size as u64))?; // Skip padding @@ -437,40 +442,43 @@ fn read_tag(reader: &mut R) -> Result { } // TODO Shift-JIS? -fn read_string(reader: &mut R) -> Result { +fn read_string(reader: &mut R) -> Result +where R: BufRead + ?Sized { let mut str = String::new(); + let mut buf = [0u8; 1]; loop { - let byte = reader.read_u8()?; - if byte == 0 { + reader.read_exact(&mut buf)?; + if buf[0] == 0 { break; } - str.push(byte as char); + str.push(buf[0] as char); } Ok(str) } -fn read_attribute(reader: &mut R) -> Result { - let attr_type = reader.read_u16::()?; +fn read_attribute(reader: &mut R) -> Result +where R: BufRead + Seek + ?Sized { + let attr_type = u16::from_reader(reader, Endian::Big)?; let attr = AttributeKind::try_from(attr_type).context("Unknown DWARF attribute type")?; let form = FormKind::try_from(attr_type & FORM_MASK).context("Unknown DWARF form type")?; let value = match form { - FormKind::Addr => AttributeValue::Address(reader.read_u32::()?), - FormKind::Ref => AttributeValue::Reference(reader.read_u32::()?), + FormKind::Addr => AttributeValue::Address(u32::from_reader(reader, Endian::Big)?), + FormKind::Ref => AttributeValue::Reference(u32::from_reader(reader, Endian::Big)?), FormKind::Block2 => { - let size = reader.read_u16::()?; + let size = u16::from_reader(reader, Endian::Big)?; let mut data = vec![0u8; size as usize]; reader.read_exact(&mut data)?; AttributeValue::Block(data) } FormKind::Block4 => { - let size = reader.read_u32::()?; + let size = u32::from_reader(reader, Endian::Big)?; let mut data = vec![0u8; size as usize]; reader.read_exact(&mut data)?; AttributeValue::Block(data) } - FormKind::Data2 => AttributeValue::Data2(reader.read_u16::()?), - FormKind::Data4 => AttributeValue::Data4(reader.read_u32::()?), - FormKind::Data8 => AttributeValue::Data8(reader.read_u64::()?), + FormKind::Data2 => AttributeValue::Data2(u16::from_reader(reader, Endian::Big)?), + FormKind::Data4 => AttributeValue::Data4(u32::from_reader(reader, Endian::Big)?), + FormKind::Data8 => AttributeValue::Data8(u64::from_reader(reader, Endian::Big)?), FormKind::String => AttributeValue::String(read_string(reader)?), }; Ok(Attribute { kind: attr, value }) @@ -1033,7 +1041,7 @@ pub fn ud_type(tags: &TagMap, tag: &Tag) -> Result { let mut members = Vec::new(); let mut cursor = Cursor::new(data); while cursor.position() < data.len() as u64 { - let value = cursor.read_u32::()?; + let value = u32::from_reader(&mut cursor, Endian::Big)?; let name = read_string(&mut cursor)?; members.push(EnumerationMember { name, value }); } diff --git a/src/util/elf.rs b/src/util/elf.rs index 1110316..01730ed 100644 --- a/src/util/elf.rs +++ b/src/util/elf.rs @@ -27,8 +27,9 @@ use crate::{ ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjUnit, }, util::{ - comment::{read_comment_sym, write_comment_sym, CommentSym, MWComment}, + comment::{CommentSym, MWComment}, file::map_file, + reader::{Endian, FromReader, ToWriter}, }, }; @@ -41,7 +42,8 @@ enum BoundaryState { FilesEnded, } -pub fn process_elf>(path: P) -> Result { +pub fn process_elf

(path: P) -> Result +where P: AsRef { let file = map_file(path)?; let obj_file = object::read::File::parse(file.as_slice())?; let architecture = match obj_file.architecture() { @@ -106,12 +108,12 @@ pub fn process_elf>(path: P) -> Result { None } else { let mut reader = Cursor::new(&*data); - let header = - MWComment::parse_header(&mut reader).context("While reading .comment section")?; + let header = MWComment::from_reader(&mut reader, Endian::Big) + .context("While reading .comment section")?; log::debug!("Loaded .comment section header {:?}", header); let mut comment_syms = Vec::with_capacity(obj_file.symbols().count()); for symbol in obj_file.symbols() { - let comment_sym = read_comment_sym(&mut reader)?; + let comment_sym = CommentSym::from_reader(&mut reader, Endian::Big)?; log::debug!("Symbol {:?} -> Comment {:?}", symbol, comment_sym); comment_syms.push(comment_sym); } @@ -406,13 +408,10 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { name, rela_name: None, }); - mw_comment.write_header(&mut comment_data)?; + mw_comment.to_writer_static(&mut comment_data, Endian::Big)?; // Null symbol - write_comment_sym(&mut comment_data, CommentSym { - align: 0, - vis_flags: 0, - active_flags: 0, - })?; + CommentSym { align: 0, vis_flags: 0, active_flags: 0 } + .to_writer_static(&mut comment_data, Endian::Big)?; Some(comment_data) } else { None @@ -451,11 +450,8 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { }, }); if let Some(comment_data) = &mut comment_data { - write_comment_sym(comment_data, CommentSym { - align: 1, - vis_flags: 0, - active_flags: 0, - })?; + CommentSym { align: 1, vis_flags: 0, active_flags: 0 } + .to_writer_static(comment_data, Endian::Big)?; } section_symbol_offset += 1; } @@ -477,11 +473,8 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { num_local = writer.symbol_count(); out_symbols.push(OutSymbol { index, sym }); if let Some(comment_data) = &mut comment_data { - write_comment_sym(comment_data, CommentSym { - align: section.align as u32, - vis_flags: 0, - active_flags: 0, - })?; + CommentSym { align: section.align as u32, vis_flags: 0, active_flags: 0 } + .to_writer_static(comment_data, Endian::Big)?; } } } @@ -547,7 +540,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { out_symbols.push(OutSymbol { index, sym }); symbol_map[symbol_index] = Some(index.0); if let Some(comment_data) = &mut comment_data { - write_comment_sym(comment_data, CommentSym::from(symbol, true))?; + CommentSym::from(symbol, true).to_writer_static(comment_data, Endian::Big)?; } } diff --git a/src/util/file.rs b/src/util/file.rs index fa5eed0..95882d9 100644 --- a/src/util/file.rs +++ b/src/util/file.rs @@ -7,17 +7,27 @@ use std::{ }; use anyhow::{anyhow, Context, Result}; -use binrw::io::{TakeSeek, TakeSeekExt}; -use byteorder::ReadBytesExt; use filetime::{set_file_mtime, FileTime}; use memmap2::{Mmap, MmapOptions}; use path_slash::PathBufExt; use sha1::{Digest, Sha1}; +use xxhash_rust::xxh3::xxh3_64; -use crate::util::{rarc, rarc::Node, yaz0, IntoCow, ToCow}; +use crate::{ + array_ref, + util::{ + rarc, + rarc::{Node, RARC_MAGIC}, + take_seek::{TakeSeek, TakeSeekExt}, + yaz0, + yaz0::YAZ0_MAGIC, + IntoCow, ToCow, + }, +}; pub struct MappedFile { mmap: Mmap, + mtime: FileTime, offset: u64, len: u64, } @@ -36,7 +46,8 @@ impl MappedFile { pub fn into_inner(self) -> Mmap { self.mmap } } -pub fn split_path>(path: P) -> Result<(PathBuf, Option)> { +pub fn split_path

(path: P) -> Result<(PathBuf, Option)> +where P: AsRef { let mut base_path = PathBuf::new(); let mut sub_path: Option = None; for component in path.as_ref().components() { @@ -58,22 +69,30 @@ pub fn split_path>(path: P) -> Result<(PathBuf, Option)> } /// Opens a memory mapped file, and decompresses it if needed. -pub fn map_file>(path: P) -> Result { +pub fn map_file

(path: P) -> Result +where P: AsRef { let (base_path, sub_path) = split_path(path.as_ref())?; let file = File::open(&base_path) .with_context(|| format!("Failed to open file '{}'", base_path.display()))?; + let mtime = FileTime::from_last_modification_time(&file.metadata()?); let mmap = unsafe { MmapOptions::new().map(&file) } .with_context(|| format!("Failed to mmap file: '{}'", base_path.display()))?; let (offset, len) = if let Some(sub_path) = sub_path { let mut reader = Cursor::new(&*mmap); if sub_path.as_os_str() == OsStr::new("nlzss") { - return Ok(FileEntry::Buffer(nintendo_lz::decompress(&mut reader).map_err(|e| { - anyhow!("Failed to decompress '{}' with NLZSS: {}", path.as_ref().display(), e) - })?)); + return Ok(FileEntry::Buffer( + nintendo_lz::decompress(&mut reader).map_err(|e| { + anyhow!("Failed to decompress '{}' with NLZSS: {}", path.as_ref().display(), e) + })?, + mtime, + )); } else if sub_path.as_os_str() == OsStr::new("yaz0") { - return Ok(FileEntry::Buffer(yaz0::decompress_file(&mut reader).with_context( - || format!("Failed to decompress '{}' with Yaz0", path.as_ref().display()), - )?)); + return Ok(FileEntry::Buffer( + yaz0::decompress_file(&mut reader).with_context(|| { + format!("Failed to decompress '{}' with Yaz0", path.as_ref().display()) + })?, + mtime, + )); } let rarc = rarc::RarcReader::new(&mut reader) @@ -84,13 +103,16 @@ pub fn map_file>(path: P) -> Result { } else { (0, mmap.len() as u64) }; - let map = MappedFile { mmap, offset, len }; + let map = MappedFile { mmap, mtime, offset, len }; let buf = map.as_slice(); // Auto-detect compression if there's a magic number. - if buf.len() > 4 && buf[0..4] == *b"Yaz0" { - return Ok(FileEntry::Buffer(yaz0::decompress_file(&mut map.as_reader()).with_context( - || format!("Failed to decompress '{}' with Yaz0", path.as_ref().display()), - )?)); + if buf.len() > 4 && buf[0..4] == YAZ0_MAGIC { + return Ok(FileEntry::Buffer( + yaz0::decompress_file(&mut map.as_reader()).with_context(|| { + format!("Failed to decompress '{}' with Yaz0", path.as_ref().display()) + })?, + mtime, + )); } Ok(FileEntry::MappedFile(map)) } @@ -98,7 +120,8 @@ pub fn map_file>(path: P) -> Result { pub type OpenedFile = TakeSeek; /// Opens a file (not memory mapped). No decompression is performed. -pub fn open_file>(path: P) -> Result { +pub fn open_file

(path: P) -> Result +where P: AsRef { let (base_path, sub_path) = split_path(path)?; let mut file = File::open(&base_path) .with_context(|| format!("Failed to open file '{}'", base_path.display()))?; @@ -118,17 +141,18 @@ pub fn open_file>(path: P) -> Result { pub trait Reader: BufRead + Seek {} impl Reader for Cursor<&[u8]> {} -// impl Reader for &mut OpenedFile {} /// Creates a buffered reader around a file (not memory mapped). -pub fn buf_reader>(path: P) -> Result> { +pub fn buf_reader

(path: P) -> Result> +where P: AsRef { let file = File::open(&path) .with_context(|| format!("Failed to open file '{}'", path.as_ref().display()))?; Ok(BufReader::new(file)) } /// Creates a buffered writer around a file (not memory mapped). -pub fn buf_writer>(path: P) -> Result> { +pub fn buf_writer

(path: P) -> Result> +where P: AsRef { if let Some(parent) = path.as_ref().parent() { DirBuilder::new().recursive(true).create(parent)?; } @@ -138,7 +162,8 @@ pub fn buf_writer>(path: P) -> Result> { } /// Reads a string with known size at the specified offset. -pub fn read_string(reader: &mut R, off: u64, size: usize) -> Result { +pub fn read_string(reader: &mut R, off: u64, size: usize) -> Result +where R: Read + Seek + ?Sized { let mut data = vec![0u8; size]; let pos = reader.stream_position()?; reader.seek(SeekFrom::Start(off))?; @@ -148,16 +173,18 @@ pub fn read_string(reader: &mut R, off: u64, size: usize) -> Res } /// Reads a zero-terminated string at the specified offset. -pub fn read_c_string(reader: &mut R, off: u64) -> Result { +pub fn read_c_string(reader: &mut R, off: u64) -> Result +where R: Read + Seek + ?Sized { let pos = reader.stream_position()?; reader.seek(SeekFrom::Start(off))?; let mut s = String::new(); + let mut buf = [0u8; 1]; loop { - let b = reader.read_u8()?; - if b == 0 { + reader.read_exact(&mut buf)?; + if buf[0] == 0 { break; } - s.push(b as char); + s.push(buf[0] as char); } reader.seek(SeekFrom::Start(pos))?; Ok(s) @@ -190,15 +217,15 @@ pub fn process_rsp(files: &[PathBuf]) -> Result> { /// Iterator over files in a RARC archive. struct RarcIterator { - file: Mmap, + file: MappedFile, base_path: PathBuf, paths: Vec<(PathBuf, u64, u32)>, index: usize, } impl RarcIterator { - pub fn new(file: Mmap, base_path: &Path) -> Result { - let reader = rarc::RarcReader::new(&mut Cursor::new(&*file))?; + pub fn new(file: MappedFile, base_path: &Path) -> Result { + let reader = rarc::RarcReader::new(&mut file.as_reader())?; let paths = Self::collect_paths(&reader, base_path); Ok(Self { file, base_path: base_path.to_owned(), paths, index: 0 }) } @@ -237,7 +264,7 @@ impl Iterator for RarcIterator { let (path, off, size) = self.paths[self.index].clone(); self.index += 1; - let slice = &self.file[off as usize..off as usize + size as usize]; + let slice = &self.file.as_slice()[off as usize..off as usize + size as usize]; match decompress_if_needed(slice) { Ok(buf) => Some(Ok((path, buf.into_owned()))), Err(e) => Some(Err(e)), @@ -248,38 +275,61 @@ impl Iterator for RarcIterator { /// A file entry, either a memory mapped file or an owned buffer. pub enum FileEntry { MappedFile(MappedFile), - Buffer(Vec), + Buffer(Vec, FileTime), } impl FileEntry { /// Creates a reader for the file. - pub fn as_reader(&self) -> Box { + pub fn as_reader(&self) -> Cursor<&[u8]> { match self { - Self::MappedFile(file) => Box::new(file.as_reader()), - Self::Buffer(slice) => Box::new(Cursor::new(slice.as_slice())), + Self::MappedFile(file) => file.as_reader(), + Self::Buffer(slice, _) => Cursor::new(slice.as_slice()), } } pub fn as_slice(&self) -> &[u8] { match self { Self::MappedFile(file) => file.as_slice(), - Self::Buffer(slice) => slice.as_slice(), + Self::Buffer(slice, _) => slice.as_slice(), } } pub fn len(&self) -> u64 { match self { Self::MappedFile(file) => file.len(), - Self::Buffer(slice) => slice.len() as u64, + Self::Buffer(slice, _) => slice.len() as u64, } } pub fn is_empty(&self) -> bool { match self { Self::MappedFile(file) => file.is_empty(), - Self::Buffer(slice) => slice.is_empty(), + Self::Buffer(slice, _) => slice.is_empty(), } } + + pub fn mtime(&self) -> FileTime { + match self { + Self::MappedFile(file) => file.mtime, + Self::Buffer(_, mtime) => *mtime, + } + } +} + +/// Information about a file when it was read. +/// Used to determine if a file has changed since it was read (mtime) +/// and if it needs to be written (hash). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct FileReadInfo { + pub mtime: FileTime, + pub hash: u64, +} + +impl FileReadInfo { + pub fn new(entry: &FileEntry) -> Result { + let hash = xxh3_64(entry.as_slice()); + Ok(Self { mtime: entry.mtime(), hash }) + } } /// Iterate over file paths, expanding response files (@) and glob patterns (*). @@ -303,7 +353,7 @@ impl FileIterator { let mut path_str = rarc.base_path.as_os_str().to_os_string(); path_str.push(OsStr::new(":")); path_str.push(path.as_os_str()); - return Some(Ok((path, FileEntry::Buffer(buf)))); + return Some(Ok((path, FileEntry::Buffer(buf, rarc.file.mtime)))); } Some(Err(err)) => return Some(Err(err)), None => self.rarc = None, @@ -321,7 +371,7 @@ impl FileIterator { self.index += 1; match map_file(&path) { Ok(FileEntry::MappedFile(map)) => self.handle_file(map, path), - Ok(FileEntry::Buffer(_)) => todo!(), + Ok(FileEntry::Buffer(_, _)) => todo!(), Err(err) => Some(Err(err)), } } @@ -336,26 +386,30 @@ impl FileIterator { return Some(Ok((path, FileEntry::MappedFile(file)))); } - match &buf[0..4] { - b"Yaz0" => self.handle_yaz0(file.as_reader(), path), - b"RARC" => self.handle_rarc(file.into_inner(), path), + match *array_ref!(buf, 0, 4) { + YAZ0_MAGIC => self.handle_yaz0(file, path), + RARC_MAGIC => self.handle_rarc(file, path), _ => Some(Ok((path, FileEntry::MappedFile(file)))), } } fn handle_yaz0( &mut self, - mut reader: Cursor<&[u8]>, + file: MappedFile, path: PathBuf, ) -> Option> { - Some(match yaz0::decompress_file(&mut reader) { - Ok(buf) => Ok((path, FileEntry::Buffer(buf))), + Some(match yaz0::decompress_file(&mut file.as_reader()) { + Ok(buf) => Ok((path, FileEntry::Buffer(buf, file.mtime))), Err(e) => Err(e), }) } - fn handle_rarc(&mut self, map: Mmap, path: PathBuf) -> Option> { - self.rarc = match RarcIterator::new(map, &path) { + fn handle_rarc( + &mut self, + file: MappedFile, + path: PathBuf, + ) -> Option> { + self.rarc = match RarcIterator::new(file, &path) { Ok(iter) => Some(iter), Err(e) => return Some(Err(e)), }; @@ -369,7 +423,8 @@ impl Iterator for FileIterator { fn next(&mut self) -> Option { self.next_rarc().or_else(|| self.next_path()) } } -pub fn touch>(path: P) -> std::io::Result<()> { +pub fn touch

(path: P) -> std::io::Result<()> +where P: AsRef { if path.as_ref().exists() { set_file_mtime(path, FileTime::now()) } else { @@ -381,14 +436,15 @@ pub fn touch>(path: P) -> std::io::Result<()> { } pub fn decompress_if_needed(buf: &[u8]) -> Result> { - Ok(if buf.len() > 4 && buf[0..4] == *b"Yaz0" { + Ok(if buf.len() > 4 && buf[0..4] == YAZ0_MAGIC { yaz0::decompress_file(&mut Cursor::new(buf))?.into_cow() } else { buf.to_cow() }) } -pub fn decompress_reader(reader: &mut R) -> Result> { +pub fn decompress_reader(reader: &mut R) -> Result> +where R: Read + Seek + ?Sized { let mut magic = [0u8; 4]; if reader.read_exact(&mut magic).is_err() { reader.seek(SeekFrom::Start(0))?; @@ -396,7 +452,7 @@ pub fn decompress_reader(reader: &mut R) -> Result> { reader.read_to_end(&mut buf)?; return Ok(buf); } - Ok(if magic == *b"Yaz0" { + Ok(if magic == YAZ0_MAGIC { reader.seek(SeekFrom::Start(0))?; yaz0::decompress_file(reader)? } else { diff --git a/src/util/map.rs b/src/util/map.rs index a590ed2..92ab679 100644 --- a/src/util/map.rs +++ b/src/util/map.rs @@ -8,7 +8,7 @@ use std::{ path::Path, }; -use anyhow::{anyhow, bail, ensure, Error, Result}; +use anyhow::{anyhow, bail, Error, Result}; use cwdemangle::{demangle, DemangleOptions}; use flagset::FlagSet; use multimap::MultiMap; @@ -119,15 +119,27 @@ pub struct MapInfo { pub unit_entries: MultiMap, pub entry_references: MultiMap, pub entry_referenced_from: MultiMap, + pub unit_references: MultiMap, pub sections: Vec, pub link_map_symbols: HashMap, pub section_symbols: HashMap>>, pub section_units: HashMap>, } +impl MapInfo { + // TODO rework to make this lookup easier + pub fn get_section_symbol(&self, symbol: &SymbolRef) -> Option<(String, &SymbolEntry)> { + self.section_symbols.iter().find_map(|(section, m)| { + m.values() + .find_map(|v| v.iter().find(|e| e.name == symbol.name && e.unit == symbol.unit)) + .map(|e| (section.clone(), e)) + }) + } +} + #[derive(Default)] struct LinkMapState { - last_symbol_name: String, + last_symbol: Option, symbol_stack: Vec, } @@ -245,7 +257,7 @@ impl StateMachine { fn end_state(&mut self, old_state: ProcessMapState) -> Result<()> { match old_state { ProcessMapState::LinkMap(state) => { - self.has_link_map = !state.last_symbol_name.is_empty(); + self.has_link_map = state.last_symbol.is_some(); } ProcessMapState::SectionLayout(state) => { StateMachine::end_section_layout(state, &mut self.result)?; @@ -293,8 +305,10 @@ impl StateMachine { let is_duplicate = &captures["sym"] == ">>>"; let unit = captures["tu"].trim().to_string(); let name = if is_duplicate { - ensure!(!state.last_symbol_name.is_empty(), "Last name empty?"); - state.last_symbol_name.clone() + let Some(last_symbol) = &state.last_symbol else { + bail!("Last symbol empty?"); + }; + last_symbol.name.clone() } else { captures["sym"].to_string() }; @@ -325,6 +339,14 @@ impl StateMachine { result.entry_referenced_from.insert(symbol_ref.clone(), from.clone()); result.entry_references.insert(from.clone(), symbol_ref.clone()); } + result.unit_references.insert( + if is_duplicate { + state.last_symbol.as_ref().unwrap().clone() + } else { + symbol_ref.clone() + }, + unit.clone(), + ); let mut should_insert = true; if let Some(symbol) = result.link_map_symbols.get(&symbol_ref) { if symbol.kind != kind { @@ -358,7 +380,9 @@ impl StateMachine { size: 0, align: None, }); - state.last_symbol_name = name; + if !is_duplicate { + state.last_symbol = Some(symbol_ref.clone()); + } result.unit_entries.insert(unit, symbol_ref); } Ok(()) @@ -564,7 +588,8 @@ impl StateMachine { } } -pub fn process_map(reader: &mut R) -> Result { +pub fn process_map(reader: &mut R) -> Result +where R: BufRead + ?Sized { let mut sm = StateMachine { state: ProcessMapState::None, result: Default::default(), @@ -582,7 +607,8 @@ pub fn process_map(reader: &mut R) -> Result { Ok(sm.result) } -pub fn apply_map_file>(path: P, obj: &mut ObjInfo) -> Result<()> { +pub fn apply_map_file

(path: P, obj: &mut ObjInfo) -> Result<()> +where P: AsRef { let file = map_file(&path)?; let info = process_map(&mut file.as_reader())?; apply_map(&info, obj) diff --git a/src/util/mod.rs b/src/util/mod.rs index f2800aa..1617fad 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -18,6 +18,7 @@ pub mod rel; pub mod rso; pub mod signatures; pub mod split; +pub mod take_seek; pub mod yaz0; #[inline] diff --git a/src/util/rarc.rs b/src/util/rarc.rs index 6099839..6922d78 100644 --- a/src/util/rarc.rs +++ b/src/util/rarc.rs @@ -1,17 +1,21 @@ // Source: https://github.com/Julgodis/picori/blob/650da9f4fe6050b39b80d5360416591c748058d5/src/rarc.rs // License: MIT -// Modified to use `std::io::Cursor<&[u8]>` and `byteorder` +// Modified to use `std::io::Cursor<&[u8]>` and project's FromReader trait use std::{ collections::HashMap, fmt::Display, + hash::{Hash, Hasher}, + io, io::{Read, Seek, SeekFrom}, path::{Component, Path, PathBuf}, }; use anyhow::{anyhow, bail, ensure, Result}; -use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; -use crate::util::file::read_c_string; +use crate::util::{ + file::read_c_string, + reader::{struct_size, Endian, FromReader}, +}; #[derive(Debug, Clone)] pub struct NamedHash { @@ -25,8 +29,11 @@ impl Display for NamedHash { } } -impl std::hash::Hash for NamedHash { - fn hash(&self, state: &mut H) { self.hash.hash(state); } +impl Hash for NamedHash { + fn hash(&self, state: &mut H) + where H: Hasher { + self.hash.hash(state); + } } impl PartialEq for NamedHash { @@ -73,105 +80,238 @@ pub struct RarcReader { root_node: NamedHash, } +pub const RARC_MAGIC: [u8; 4] = *b"RARC"; + +struct RarcHeader { + magic: [u8; 4], + _file_length: u32, + header_length: u32, + file_offset: u32, + _file_length_2: u32, + _unk0: u32, + _unk1: u32, + _unk2: u32, + node_count: u32, + node_offset: u32, + directory_count: u32, + directory_offset: u32, + string_table_length: u32, + string_table_offset: u32, + _file_count: u16, + _unk3: u16, + _unk4: u32, +} + +impl FromReader for RarcHeader { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + 4, // magic + u32::STATIC_SIZE, // file_length + u32::STATIC_SIZE, // header_length + u32::STATIC_SIZE, // file_offset + u32::STATIC_SIZE, // file_length + u32::STATIC_SIZE, // unk0 + u32::STATIC_SIZE, // unk1 + u32::STATIC_SIZE, // unk2 + u32::STATIC_SIZE, // node_count + u32::STATIC_SIZE, // node_offset + u32::STATIC_SIZE, // directory_count + u32::STATIC_SIZE, // directory_offset + u32::STATIC_SIZE, // string_table_length + u32::STATIC_SIZE, // string_table_offset + u16::STATIC_SIZE, // file_count + u16::STATIC_SIZE, // unk3 + u32::STATIC_SIZE, // unk4 + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + let header = Self { + magic: <[u8; 4]>::from_reader(reader, e)?, + _file_length: u32::from_reader(reader, e)?, + header_length: u32::from_reader(reader, e)?, + file_offset: u32::from_reader(reader, e)?, + _file_length_2: u32::from_reader(reader, e)?, + _unk0: u32::from_reader(reader, e)?, + _unk1: u32::from_reader(reader, e)?, + _unk2: u32::from_reader(reader, e)?, + node_count: u32::from_reader(reader, e)?, + node_offset: u32::from_reader(reader, e)?, + directory_count: u32::from_reader(reader, e)?, + directory_offset: u32::from_reader(reader, e)?, + string_table_length: u32::from_reader(reader, e)?, + string_table_offset: u32::from_reader(reader, e)?, + _file_count: u16::from_reader(reader, e)?, + _unk3: u16::from_reader(reader, e)?, + _unk4: u32::from_reader(reader, e)?, + }; + if header.magic != RARC_MAGIC { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid RARC magic: {:?}", header.magic), + )); + } + if header.node_count >= 0x10000 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid node count: {}", header.node_count), + )); + } + if header.directory_count >= 0x10000 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid directory count: {}", header.directory_count), + )); + } + Ok(header) + } +} + +struct RarcFileNode { + index: u16, + name_hash: u16, + _unk0: u16, // 0x200 for folders, 0x1100 for files + name_offset: u16, + data_offset: u32, + data_length: u32, + _unk1: u32, +} + +impl FromReader for RarcFileNode { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u16::STATIC_SIZE, // index + u16::STATIC_SIZE, // name_hash + u16::STATIC_SIZE, // unk0 + u16::STATIC_SIZE, // name_offset + u32::STATIC_SIZE, // data_offset + u32::STATIC_SIZE, // data_length + u32::STATIC_SIZE, // unk1 + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + Ok(Self { + index: u16::from_reader(reader, e)?, + name_hash: u16::from_reader(reader, e)?, + _unk0: u16::from_reader(reader, e)?, + name_offset: u16::from_reader(reader, e)?, + data_offset: u32::from_reader(reader, e)?, + data_length: u32::from_reader(reader, e)?, + _unk1: u32::from_reader(reader, e)?, + }) + } +} + +struct RarcDirectoryNode { + _identifier: u32, + name_offset: u32, + name_hash: u16, + count: u16, + index: u32, +} + +impl FromReader for RarcDirectoryNode { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // identifier + u32::STATIC_SIZE, // name_offset + u16::STATIC_SIZE, // name_hash + u16::STATIC_SIZE, // count + u32::STATIC_SIZE, // index + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + Ok(Self { + _identifier: u32::from_reader(reader, e)?, + name_offset: u32::from_reader(reader, e)?, + name_hash: u16::from_reader(reader, e)?, + count: u16::from_reader(reader, e)?, + index: u32::from_reader(reader, e)?, + }) + } +} + impl RarcReader { /// Creates a new RARC reader. - pub fn new(reader: &mut R) -> Result { + pub fn new(reader: &mut R) -> Result + where R: Read + Seek + ?Sized { let base = reader.stream_position()?; + let header = RarcHeader::from_reader(reader, Endian::Big)?; - let magic = reader.read_u32::()?; - let _file_length = reader.read_u32::()?; - let header_length = reader.read_u32::()?; - let file_offset = reader.read_u32::()?; - let _file_length = reader.read_u32::()?; - let _ = reader.read_u32::()?; - let _ = reader.read_u32::()?; - let _ = reader.read_u32::()?; - let node_count = reader.read_u32::()?; - let node_offset = reader.read_u32::()?; - let directory_count = reader.read_u32::()?; - let directory_offset = reader.read_u32::()?; - let string_table_length = reader.read_u32::()?; - let string_table_offset = reader.read_u32::()?; - let _file_count = reader.read_u16::()?; - let _ = reader.read_u16::()?; - let _ = reader.read_u32::()?; - - ensure!(magic == 0x43524152, "invalid RARC magic"); - ensure!(node_count < 0x10000, "invalid node count"); - ensure!(directory_count < 0x10000, "invalid directory count"); - - let base = base + header_length as u64; - let directory_base = base + directory_offset as u64; - let data_base = base + file_offset as u64; - let mut directories = Vec::with_capacity(directory_count as usize); - for i in 0..directory_count { + let base = base + header.header_length as u64; + let directory_base = base + header.directory_offset as u64; + let data_base = base + header.file_offset as u64; + let mut directories = Vec::with_capacity(header.directory_count as usize); + for i in 0..header.directory_count { reader.seek(SeekFrom::Start(directory_base + 20 * i as u64))?; - let index = reader.read_u16::()?; - let name_hash = reader.read_u16::()?; - let _ = reader.read_u16::()?; // 0x200 for folders, 0x1100 for files - let name_offset = reader.read_u16::()?; - let data_offset = reader.read_u32::()?; - let data_length = reader.read_u32::()?; - let _ = reader.read_u32::()?; + let node = RarcFileNode::from_reader(reader, Endian::Big)?; let name = { - let offset = string_table_offset as u64; - let offset = offset + name_offset as u64; - ensure!((name_offset as u32) < string_table_length, "invalid string table offset"); + let offset = header.string_table_offset as u64; + let offset = offset + node.name_offset as u64; + ensure!( + (node.name_offset as u32) < header.string_table_length, + "invalid string table offset" + ); read_c_string(reader, base + offset) }?; - if index == 0xFFFF { + if node.index == 0xFFFF { if name == "." { directories.push(RarcDirectory::CurrentFolder); } else if name == ".." { directories.push(RarcDirectory::ParentFolder); } else { - directories - .push(RarcDirectory::Folder { name: NamedHash { name, hash: name_hash } }); + directories.push(RarcDirectory::Folder { + name: NamedHash { name, hash: node.name_hash }, + }); } } else { directories.push(RarcDirectory::File { - name: NamedHash { name, hash: name_hash }, - offset: data_base + data_offset as u64, - size: data_length, + name: NamedHash { name, hash: node.name_hash }, + offset: data_base + node.data_offset as u64, + size: node.data_length, }); } } - let node_base = base + node_offset as u64; + let node_base = base + header.node_offset as u64; let mut root_node: Option = None; - let mut nodes = HashMap::with_capacity(node_count as usize); - for i in 0..node_count { + let mut nodes = HashMap::with_capacity(header.node_count as usize); + for i in 0..header.node_count { reader.seek(SeekFrom::Start(node_base + 16 * i as u64))?; - let _identifier = reader.read_u32::()?; - let name_offset = reader.read_u32::()?; - let name_hash = reader.read_u16::()?; - let count = reader.read_u16::()? as u32; - let index = reader.read_u32::()?; + let node = RarcDirectoryNode::from_reader(reader, Endian::Big)?; - ensure!(index < directory_count, "first directory index out of bounds"); + ensure!(node.index < header.directory_count, "first directory index out of bounds"); - let last_index = index.checked_add(count); + let last_index = node.index.checked_add(node.count as u32); ensure!( - last_index.is_some() && last_index.unwrap() <= directory_count, + last_index.is_some() && last_index.unwrap() <= header.directory_count, "last directory index out of bounds" ); let name = { - let offset = string_table_offset as u64; - let offset = offset + name_offset as u64; - ensure!(name_offset < string_table_length, "invalid string table offset"); + let offset = header.string_table_offset as u64; + let offset = offset + node.name_offset as u64; + ensure!( + node.name_offset < header.string_table_length, + "invalid string table offset" + ); read_c_string(reader, base + offset) }?; // FIXME: this assumes that the root node is the first node in the list if root_node.is_none() { - root_node = Some(NamedHash { name: name.clone(), hash: name_hash }); + root_node = Some(NamedHash { name: name.clone(), hash: node.name_hash }); } - let name = NamedHash { name, hash: name_hash }; - nodes.insert(name.clone(), RarcNode { index, count }); + let name = NamedHash { name, hash: node.name_hash }; + nodes.insert(name.clone(), RarcNode { index: node.index, count: node.count as u32 }); } if let Some(root_node) = root_node { @@ -188,7 +328,8 @@ impl RarcReader { } /// Find a file in the RARC file. - pub fn find_file>(&self, path: P) -> Result> { + pub fn find_file

(&self, path: P) -> Result> + where P: AsRef { let mut cmp_path = PathBuf::new(); for component in path.as_ref().components() { match component { diff --git a/src/util/reader.rs b/src/util/reader.rs index 5d5f0ab..f0ccfdd 100644 --- a/src/util/reader.rs +++ b/src/util/reader.rs @@ -172,6 +172,12 @@ pub trait ToWriter: Sized { fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> where W: Write + ?Sized; + #[inline] + fn to_writer_static(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + writer.write_all(&self.to_bytes(e)?) + } + fn to_bytes(&self, e: Endian) -> io::Result> { let mut buf = vec![0u8; self.write_size()]; self.to_writer(&mut buf.as_mut_slice(), e)?; diff --git a/src/util/rel.rs b/src/util/rel.rs index 65b1319..7cc83e0 100644 --- a/src/util/rel.rs +++ b/src/util/rel.rs @@ -1,10 +1,10 @@ use std::{ cmp::Ordering, + io, io::{Read, Seek, SeekFrom, Write}, }; use anyhow::{anyhow, bail, ensure, Context, Result}; -use binrw::{binrw, io::NoSeek, BinRead, BinWrite}; use itertools::Itertools; use object::{elf, Object, ObjectSection, ObjectSymbol}; use tracing::warn; @@ -15,7 +15,12 @@ use crate::{ ObjArchitecture, ObjInfo, ObjKind, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, }, - util::{align_up, split::default_section_align, IntoCow}, + util::{ + align_up, + reader::{struct_size, Endian, FromReader, ToWriter, DYNAMIC_SIZE}, + split::default_section_align, + IntoCow, + }, }; /// Do not relocate anything, but accumulate the offset field for the next relocation offset calculation. @@ -30,12 +35,7 @@ pub const R_DOLPHIN_END: u32 = 203; #[allow(unused)] pub const R_DOLPHIN_MRKREF: u32 = 204; -#[binrw] #[derive(Clone, Debug)] -#[br(assert(next == 0))] -#[br(assert(prev == 0))] -#[br(assert(bss_section == 0))] -#[brw(assert(matches!(version, 1..=3), "Unsupported REL version {version}"))] pub struct RelHeader { /// Arbitrary identification number. /// Must be unique amongst all RELs used by a game. @@ -43,12 +43,10 @@ pub struct RelHeader { pub module_id: u32, /// Pointer to next module. /// Filled at runtime. - #[bw(calc = 0)] - pub next: u32, + // pub next: u32, /// Pointer to previous module. /// Filled at runtime. - #[bw(calc = 0)] - pub prev: u32, + // pub prev: u32, /// Number of sections in the file. pub num_sections: u32, /// Offset to the start of the section table. @@ -75,8 +73,7 @@ pub struct RelHeader { pub unresolved_section: u8, /// Index into section table which bss is relative to. /// Filled at runtime. - #[bw(calc = 0)] - pub bss_section: u8, + // pub bss_section: u8, /// Offset into the section containing `_prolog`. pub prolog_offset: u32, /// Offset into the section containing `_epilog`. @@ -85,36 +82,218 @@ pub struct RelHeader { pub unresolved_offset: u32, /// (Version >= 2 only) /// Alignment constraint on all sections. - #[br(if(version >= 2))] - #[bw(if(*version >= 2))] pub align: Option, /// (Version >= 2 only) /// Alignment constraint on the `.bss` section. - #[br(if(version >= 2))] - #[bw(if(*version >= 2))] pub bss_align: Option, /// (Version >= 3 only) /// If REL is linked with `OSLinkFixed` (instead of `OSLink`), the /// space after this offset can be used for other purposes, like BSS. - #[br(if(version >= 3))] - #[bw(if(*version >= 3))] pub fix_size: Option, } -#[binrw] +impl FromReader for RelHeader { + type Args = (); + + // Differs by version + const STATIC_SIZE: usize = DYNAMIC_SIZE; + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + let module_id = u32::from_reader(reader, e)?; + let next = u32::from_reader(reader, e)?; + if next != 0 { + return Err(io::Error::new(io::ErrorKind::InvalidData, "Expected next == 0")); + } + let prev = u32::from_reader(reader, e)?; + if prev != 0 { + return Err(io::Error::new(io::ErrorKind::InvalidData, "Expected prev == 0")); + } + let num_sections = u32::from_reader(reader, e)?; + let section_info_offset = u32::from_reader(reader, e)?; + let name_offset = u32::from_reader(reader, e)?; + let name_size = u32::from_reader(reader, e)?; + let version = u32::from_reader(reader, e)?; + if version > 3 { + return Err(io::Error::new(io::ErrorKind::InvalidData, "Unsupported REL version")); + } + let bss_size = u32::from_reader(reader, e)?; + let rel_offset = u32::from_reader(reader, e)?; + let imp_offset = u32::from_reader(reader, e)?; + let imp_size = u32::from_reader(reader, e)?; + let prolog_section = u8::from_reader(reader, e)?; + let epilog_section = u8::from_reader(reader, e)?; + let unresolved_section = u8::from_reader(reader, e)?; + let bss_section = u8::from_reader(reader, e)?; + if bss_section != 0 { + return Err(io::Error::new(io::ErrorKind::InvalidData, "Expected bss_section == 0")); + } + let prolog_offset = u32::from_reader(reader, e)?; + let epilog_offset = u32::from_reader(reader, e)?; + let unresolved_offset = u32::from_reader(reader, e)?; + let align = if version >= 2 { Some(u32::from_reader(reader, e)?) } else { None }; + let bss_align = if version >= 2 { Some(u32::from_reader(reader, e)?) } else { None }; + let fix_size = if version >= 3 { Some(u32::from_reader(reader, e)?) } else { None }; + Ok(Self { + module_id, + num_sections, + section_info_offset, + name_offset, + name_size, + version, + bss_size, + rel_offset, + imp_offset, + imp_size, + prolog_section, + epilog_section, + unresolved_section, + prolog_offset, + epilog_offset, + unresolved_offset, + align, + bss_align, + fix_size, + }) + } +} + +impl ToWriter for RelHeader { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.module_id.to_writer(writer, e)?; + 0u32.to_writer(writer, e)?; // next + 0u32.to_writer(writer, e)?; // prev + self.num_sections.to_writer(writer, e)?; + self.section_info_offset.to_writer(writer, e)?; + self.name_offset.to_writer(writer, e)?; + self.name_size.to_writer(writer, e)?; + self.version.to_writer(writer, e)?; + self.bss_size.to_writer(writer, e)?; + self.rel_offset.to_writer(writer, e)?; + self.imp_offset.to_writer(writer, e)?; + self.imp_size.to_writer(writer, e)?; + self.prolog_section.to_writer(writer, e)?; + self.epilog_section.to_writer(writer, e)?; + self.unresolved_section.to_writer(writer, e)?; + 0u8.to_writer(writer, e)?; // bss_section + self.prolog_offset.to_writer(writer, e)?; + self.epilog_offset.to_writer(writer, e)?; + self.unresolved_offset.to_writer(writer, e)?; + if let Some(align) = self.align { + align.to_writer(writer, e)?; + } + if let Some(bss_align) = self.bss_align { + bss_align.to_writer(writer, e)?; + } + if let Some(fix_size) = self.fix_size { + fix_size.to_writer(writer, e)?; + } + Ok(()) + } + + fn write_size(&self) -> usize { + const V1_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // module_id + u32::STATIC_SIZE, // next + u32::STATIC_SIZE, // prev + u32::STATIC_SIZE, // num_sections + u32::STATIC_SIZE, // section_info_offset + u32::STATIC_SIZE, // name_offset + u32::STATIC_SIZE, // name_size + u32::STATIC_SIZE, // version + u32::STATIC_SIZE, // bss_size + u32::STATIC_SIZE, // rel_offset + u32::STATIC_SIZE, // imp_offset + u32::STATIC_SIZE, // imp_size + u8::STATIC_SIZE, // prolog_section + u8::STATIC_SIZE, // epilog_section + u8::STATIC_SIZE, // unresolved_section + u8::STATIC_SIZE, // bss_section + u32::STATIC_SIZE, // prolog_offset + u32::STATIC_SIZE, // epilog_offset + u32::STATIC_SIZE, // unresolved_offset + ]); + const V2_SIZE: usize = V1_SIZE + + struct_size([ + u32::STATIC_SIZE, // align + u32::STATIC_SIZE, // bss_align + ]); + const V3_SIZE: usize = V2_SIZE + u32::STATIC_SIZE; // fix_size + match self.version { + 1 => V1_SIZE, + 2 => V2_SIZE, + 3 => V3_SIZE, + _ => panic!("Unsupported REL version {}", self.version), + } + } +} + #[derive(Copy, Clone, Debug)] struct RelImport { module_id: u32, offset: u32, } -#[binrw] +impl FromReader for RelImport { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // module_id + u32::STATIC_SIZE, // offset + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + Ok(Self { module_id: u32::from_reader(reader, e)?, offset: u32::from_reader(reader, e)? }) + } +} + +impl ToWriter for RelImport { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.module_id.to_writer(writer, e)?; + self.offset.to_writer(writer, e)?; + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + #[derive(Copy, Clone, Debug)] pub struct RelSectionHeader { offset_and_flags: u32, size: u32, } +impl FromReader for RelSectionHeader { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // offset_and_flags + u32::STATIC_SIZE, // size + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + Ok(Self { + offset_and_flags: u32::from_reader(reader, e)?, + size: u32::from_reader(reader, e)?, + }) + } +} + +impl ToWriter for RelSectionHeader { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.offset_and_flags.to_writer(writer, e)?; + self.size.to_writer(writer, e)?; + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + impl RelSectionHeader { fn new(offset: u32, size: u32, exec: bool) -> Self { Self { offset_and_flags: offset | (exec as u32), size } @@ -127,7 +306,6 @@ impl RelSectionHeader { pub fn exec(&self) -> bool { self.offset_and_flags & 1 != 0 } } -#[binrw] #[derive(Copy, Clone, Debug)] struct RelRelocRaw { offset: u16, @@ -136,25 +314,64 @@ struct RelRelocRaw { addend: u32, } -pub fn process_rel_header(reader: &mut R) -> Result { - RelHeader::read_be(reader).context("Failed to read REL header") +impl FromReader for RelRelocRaw { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u16::STATIC_SIZE, // offset + u8::STATIC_SIZE, // kind + u8::STATIC_SIZE, // section + u32::STATIC_SIZE, // addend + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + Ok(Self { + offset: u16::from_reader(reader, e)?, + kind: u8::from_reader(reader, e)?, + section: u8::from_reader(reader, e)?, + addend: u32::from_reader(reader, e)?, + }) + } } -pub fn process_rel_sections( +impl ToWriter for RelRelocRaw { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.offset.to_writer(writer, e)?; + self.kind.to_writer(writer, e)?; + self.section.to_writer(writer, e)?; + self.addend.to_writer(writer, e)?; + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + +pub fn process_rel_header(reader: &mut R) -> Result +where R: Read + Seek + ?Sized { + RelHeader::from_reader(reader, Endian::Big).context("Failed to read REL header") +} + +pub fn process_rel_sections( reader: &mut R, header: &RelHeader, -) -> Result> { +) -> Result> +where + R: Read + Seek + ?Sized, +{ let mut sections = Vec::with_capacity(header.num_sections as usize); reader.seek(SeekFrom::Start(header.section_info_offset as u64))?; for idx in 0..header.num_sections { - let section = RelSectionHeader::read_be(reader) + let section = RelSectionHeader::from_reader(reader, Endian::Big) .with_context(|| format!("Failed to read REL section header {}", idx))?; sections.push(section); } Ok(sections) } -pub fn process_rel(reader: &mut R, name: &str) -> Result<(RelHeader, ObjInfo)> { +pub fn process_rel(reader: &mut R, name: &str) -> Result<(RelHeader, ObjInfo)> +where R: Read + Seek + ?Sized { let header = process_rel_header(reader)?; let mut sections = Vec::with_capacity(header.num_sections as usize); let mut text_section = None; @@ -250,7 +467,7 @@ pub fn process_rel(reader: &mut R, name: &str) -> Result<(RelHea let imp_end = (header.imp_offset + header.imp_size) as u64; reader.seek(SeekFrom::Start(header.imp_offset as u64))?; while reader.stream_position()? < imp_end { - let import = RelImport::read_be(reader)?; + let import = RelImport::from_reader(reader, Endian::Big)?; if imp_idx == 0 { ensure!( @@ -278,7 +495,7 @@ pub fn process_rel(reader: &mut R, name: &str) -> Result<(RelHea let mut address = 0u32; let mut section = u8::MAX; loop { - let reloc = RelRelocRaw::read_be(reader)?; + let reloc = RelRelocRaw::from_reader(reader, Endian::Big)?; let kind = match reloc.kind as u32 { elf::R_PPC_NONE => continue, elf::R_PPC_ADDR32 | elf::R_PPC_UADDR32 => ObjRelocKind::Absolute, @@ -437,12 +654,15 @@ pub fn should_write_section(section: &object::Section) -> bool { section.kind() != object::SectionKind::UninitializedData } -pub fn write_rel( +pub fn write_rel( w: &mut W, info: &RelWriteInfo, file: &object::File, mut relocations: Vec, -) -> Result<()> { +) -> Result<()> +where + W: Write + Seek + ?Sized, +{ relocations.sort_by(|a, b| { if a.module_id == 0 { if b.module_id == 0 { @@ -537,12 +757,7 @@ pub fn write_rel( let mut header = RelHeader { module_id: info.module_id, num_sections, - section_info_offset: match info.version { - 1 => 0x40, - 2 => 0x48, - 3 => 0x4C, - _ => bail!("Unsupported REL version {}", info.version), - }, + section_info_offset: 0, // Calculated below name_offset: info.name_offset.unwrap_or(0), name_size: info.name_size.unwrap_or(0), version: info.version, @@ -560,8 +775,9 @@ pub fn write_rel( bss_align: if info.version >= 2 { Some(bss_align) } else { None }, fix_size: None, }; - let mut offset = header.section_info_offset; - offset += num_sections * 8; + let mut offset = header.write_size() as u32; + header.section_info_offset = offset; + offset += num_sections * RelSectionHeader::STATIC_SIZE as u32; let section_data_offset = offset; for (idx, section) in file.sections().filter(is_permitted_section).enumerate() { if !should_write_section(§ion) { @@ -573,7 +789,7 @@ pub fn write_rel( } header.imp_offset = offset; let imp_count = relocations.iter().map(|r| r.module_id).dedup().count(); - header.imp_size = imp_count as u32 * 8; + header.imp_size = imp_count as u32 * RelImport::STATIC_SIZE as u32; offset += header.imp_size; header.rel_offset = offset; @@ -673,15 +889,14 @@ pub fn write_rel( } } - let mut w = NoSeek::new(w); - header.write_be(&mut w)?; + header.to_writer(w, Endian::Big)?; ensure!(w.stream_position()? as u32 == header.section_info_offset); let mut current_data_offset = section_data_offset; let mut permitted_section_idx = 0; for section_index in 0..num_sections { let Ok(section) = file.section_by_index(object::SectionIndex(section_index as usize)) else { - RelSectionHeader::new(0, 0, false).write_be(&mut w)?; + RelSectionHeader::new(0, 0, false).to_writer(w, Endian::Big)?; continue; }; if is_permitted_section(§ion) { @@ -697,10 +912,10 @@ pub fn write_rel( section.size() as u32, section.kind() == object::SectionKind::Text, ) - .write_be(&mut w)?; + .to_writer(w, Endian::Big)?; permitted_section_idx += 1; } else { - RelSectionHeader::new(0, 0, false).write_be(&mut w)?; + RelSectionHeader::new(0, 0, false).to_writer(w, Endian::Big)?; } } ensure!(w.stream_position()? as u32 == section_data_offset); @@ -730,11 +945,11 @@ pub fn write_rel( } ensure!(w.stream_position()? as u32 == header.imp_offset); for entry in imp_entries { - entry.write_be(&mut w)?; + entry.to_writer(w, Endian::Big)?; } ensure!(w.stream_position()? as u32 == header.rel_offset); for reloc in raw_relocations { - reloc.write_be(&mut w)?; + reloc.to_writer(w, Endian::Big)?; } ensure!(w.stream_position()? as u32 == offset); Ok(()) diff --git a/src/util/rso.rs b/src/util/rso.rs index 5fcaf30..e55c79b 100644 --- a/src/util/rso.rs +++ b/src/util/rso.rs @@ -1,7 +1,9 @@ -use std::io::{Read, Seek, SeekFrom}; +use std::{ + io, + io::{Read, Seek, SeekFrom, Write}, +}; use anyhow::{anyhow, ensure, Result}; -use byteorder::{BigEndian, ReadBytesExt}; use cwdemangle::{demangle, DemangleOptions}; use crate::{ @@ -9,7 +11,10 @@ use crate::{ ObjArchitecture, ObjInfo, ObjKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, }, - util::file::{read_c_string, read_string}, + util::{ + file::{read_c_string, read_string}, + reader::{struct_size, Endian, FromReader, ToWriter, DYNAMIC_SIZE}, + }, }; /// For RSO references to the DOL, the sections are hardcoded. @@ -34,46 +39,348 @@ pub const DOL_SECTION_ETI: u32 = 241; /// ABS symbol section index. pub const DOL_SECTION_ABS: u32 = 65521; -pub fn process_rso(reader: &mut R) -> Result { - ensure!(reader.read_u32::()? == 0, "Expected 'next' to be 0"); - ensure!(reader.read_u32::()? == 0, "Expected 'prev' to be 0"); - let num_sections = reader.read_u32::()?; - let section_info_offset = reader.read_u32::()?; - let name_offset = reader.read_u32::()?; - let name_size = reader.read_u32::()?; - let version = reader.read_u32::()?; - ensure!(version == 1, "Unsupported RSO version {}", version); - let bss_size = reader.read_u32::()?; - let prolog_section = reader.read_u8()?; - let epilog_section = reader.read_u8()?; - let unresolved_section = reader.read_u8()?; - ensure!(reader.read_u8()? == 0, "Expected 'bssSection' to be 0"); - let prolog_offset = reader.read_u32::()?; - let epilog_offset = reader.read_u32::()?; - let unresolved_offset = reader.read_u32::()?; - let _internal_rel_offset = reader.read_u32::()?; - let _internal_rel_size = reader.read_u32::()?; - let external_rel_offset = reader.read_u32::()?; - let external_rel_size = reader.read_u32::()?; - let export_table_offset = reader.read_u32::()?; - let export_table_size = reader.read_u32::()?; - let export_table_name_offset = reader.read_u32::()?; - let import_table_offset = reader.read_u32::()?; - let import_table_size = reader.read_u32::()?; - let import_table_name_offset = reader.read_u32::()?; +pub struct RsoHeader { + // Pointer to the next module, forming a linked list. Always 0, filled in at runtime. + // pub next: u32, + // Pointer to the previous module, forming a linked list. Always 0, filled in at runtime. + // pub prev: u32, + /// Number of sections contained in the file. + pub num_sections: u32, + /// Offset to the section info table. Always 0x58. + pub section_info_offset: u32, + /// Offset to the module name. Can be 0, in which case this module doesn't contain a name string. + pub name_offset: u32, + /// Size of the module name string. + pub name_size: u32, + /// Module version number. Always 1. + pub version: u32, + /// Size of the BSS section, which is allocated at runtime (not included in the file). + pub bss_size: u32, + /// Section index of the prolog function, which is called when the module is linked. + /// 0 if this module doesn't contain a prolog function. + pub prolog_section: u8, + /// Section index of the epilog function, which is called when the module is unlinked. + /// 0 if this module doesn't contain an epilog function. + pub epilog_section: u8, + /// Section index of the unresolved function, which is called if the module attempts to call + /// an unlinked function. 0 if this module doesn't contain an unresolved function. + pub unresolved_section: u8, + // Section index of the BSS section. Always 0, filled in at runtime. + // pub bss_section: u8, + /// Section-relative offset of the prolog function. + /// 0 if this module doesn't contain a prolog function. + pub prolog_offset: u32, + /// Section-relative offset of the epilog function. + /// 0 if this module doesn't contain an epilog function. + pub epilog_offset: u32, + /// Section-relative offset of the unresolved function. + /// 0 if this module doesn't contain an unresolved function. + pub unresolved_offset: u32, + /// Absolute offset of the relocation table for internal relocations + /// (relocations for symbols within this module). + pub internal_rel_offset: u32, + /// Size of the relocation table for internal relocations. + pub internal_rel_size: u32, + /// Absolute offset of the relocation table for external relocations + /// (relocations for symbols within other modules). + pub external_rel_offset: u32, + /// Size of the relocation table for external relocations. + pub external_rel_size: u32, + /// Absolute offset of the symbol table for exports (symbols within this module). + pub export_table_offset: u32, + /// Size of the symbol table for exports. + pub export_table_size: u32, + /// Absolute offset of the string table containing export symbol names. + pub export_table_name_offset: u32, + /// Absolute offset of the symbol table for imports + /// (symbols within other modules, referenced by this one). + pub import_table_offset: u32, + /// Size of the symbol table for imports. + pub import_table_size: u32, + /// Absolute offset of the string table containing import symbol names. + pub import_table_name_offset: u32, +} - let mut sections = Vec::with_capacity(num_sections as usize); - reader.seek(SeekFrom::Start(section_info_offset as u64))?; +impl FromReader for RsoHeader { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // next + u32::STATIC_SIZE, // prev + u32::STATIC_SIZE, // num_sections + u32::STATIC_SIZE, // section_info_offset + u32::STATIC_SIZE, // name_offset + u32::STATIC_SIZE, // name_size + u32::STATIC_SIZE, // version + u32::STATIC_SIZE, // bss_size + u8::STATIC_SIZE, // prolog_section + u8::STATIC_SIZE, // epilog_section + u8::STATIC_SIZE, // unresolved_section + u8::STATIC_SIZE, // bss_section + u32::STATIC_SIZE, // prolog_offset + u32::STATIC_SIZE, // epilog_offset + u32::STATIC_SIZE, // unresolved_offset + u32::STATIC_SIZE, // internal_rel_offset + u32::STATIC_SIZE, // internal_rel_size + u32::STATIC_SIZE, // external_rel_offset + u32::STATIC_SIZE, // external_rel_size + u32::STATIC_SIZE, // export_table_offset + u32::STATIC_SIZE, // export_table_size + u32::STATIC_SIZE, // export_table_name_offset + u32::STATIC_SIZE, // import_table_offset + u32::STATIC_SIZE, // import_table_size + u32::STATIC_SIZE, // import_table_name_offset + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + let next = u32::from_reader(reader, e)?; + if next != 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Expected 'next' to be 0, got {:#X}", next), + )); + } + let prev = u32::from_reader(reader, e)?; + if prev != 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Expected 'prev' to be 0, got {:#X}", prev), + )); + } + let num_sections = u32::from_reader(reader, e)?; + let section_info_offset = u32::from_reader(reader, e)?; + let name_offset = u32::from_reader(reader, e)?; + let name_size = u32::from_reader(reader, e)?; + let version = u32::from_reader(reader, e)?; + let bss_size = u32::from_reader(reader, e)?; + let prolog_section = u8::from_reader(reader, e)?; + let epilog_section = u8::from_reader(reader, e)?; + let unresolved_section = u8::from_reader(reader, e)?; + let bss_section = u8::from_reader(reader, e)?; + if bss_section != 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Expected 'bssSection' to be 0, got {:#X}", bss_section), + )); + } + let prolog_offset = u32::from_reader(reader, e)?; + let epilog_offset = u32::from_reader(reader, e)?; + let unresolved_offset = u32::from_reader(reader, e)?; + let internal_rel_offset = u32::from_reader(reader, e)?; + let internal_rel_size = u32::from_reader(reader, e)?; + let external_rel_offset = u32::from_reader(reader, e)?; + let external_rel_size = u32::from_reader(reader, e)?; + let export_table_offset = u32::from_reader(reader, e)?; + let export_table_size = u32::from_reader(reader, e)?; + let export_table_name_offset = u32::from_reader(reader, e)?; + let import_table_offset = u32::from_reader(reader, e)?; + let import_table_size = u32::from_reader(reader, e)?; + let import_table_name_offset = u32::from_reader(reader, e)?; + + Ok(Self { + num_sections, + section_info_offset, + name_offset, + name_size, + version, + bss_size, + prolog_section, + epilog_section, + unresolved_section, + prolog_offset, + epilog_offset, + unresolved_offset, + internal_rel_offset, + internal_rel_size, + external_rel_offset, + external_rel_size, + export_table_offset, + export_table_size, + export_table_name_offset, + import_table_offset, + import_table_size, + import_table_name_offset, + }) + } +} + +#[derive(Copy, Clone, Debug)] +pub struct RsoSectionHeader { + /// Absolute offset of the section. + /// The lowest bit is set if the section is executable. + offset_and_flags: u32, + /// Size of the section. + size: u32, +} + +impl FromReader for RsoSectionHeader { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // offset + u32::STATIC_SIZE, // size + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + Ok(Self { + offset_and_flags: u32::from_reader(reader, e)?, + size: u32::from_reader(reader, e)?, + }) + } +} + +impl ToWriter for RsoSectionHeader { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.offset_and_flags.to_writer(writer, e)?; + self.size.to_writer(writer, e)?; + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + +impl RsoSectionHeader { + #[allow(dead_code)] + fn new(offset: u32, size: u32, exec: bool) -> Self { + Self { offset_and_flags: offset | (exec as u32), size } + } + + pub fn offset(&self) -> u32 { self.offset_and_flags & !1 } + + pub fn size(&self) -> u32 { self.size } + + pub fn exec(&self) -> bool { self.offset_and_flags & 1 != 0 } +} + +struct RsoRelocation { + /// Absolute offset of this relocation (relative to the start of the RSO file). + offset: u32, + /// For internal relocations, this is the section index of the symbol being patched to. + /// For external relocations, this is the index of the symbol within the import symbol table. + /// The lowest 8 bits are the relocation type. + id_and_type: u32, + /// For internal relocations, this is the section-relative offset of the target symbol. + /// For external relocations, this is unused and always 0 (the offset is calculated using the + /// import symbol table). + target_offset: u32, +} + +impl FromReader for RsoRelocation { + type Args = (); + + const STATIC_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // offset + u32::STATIC_SIZE, // id_and_type + u32::STATIC_SIZE, // sym_offset + ]); + + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + Ok(Self { + offset: u32::from_reader(reader, e)?, + id_and_type: u32::from_reader(reader, e)?, + target_offset: u32::from_reader(reader, e)?, + }) + } +} + +impl ToWriter for RsoRelocation { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.offset.to_writer(writer, e)?; + self.id_and_type.to_writer(writer, e)?; + self.target_offset.to_writer(writer, e)?; + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + +impl RsoRelocation { + #[allow(dead_code)] + pub fn new(offset: u32, id: u32, rel_type: u8, sym_offset: u32) -> Self { + Self { offset, id_and_type: (id << 8) | rel_type as u32, target_offset: sym_offset } + } + + pub fn offset(&self) -> u32 { self.offset } + + pub fn id(&self) -> u32 { (self.id_and_type & 0xFFFFFF00) >> 8 } + + pub fn rel_type(&self) -> u8 { (self.id_and_type & 0xFF) as u8 } + + pub fn sym_offset(&self) -> u32 { self.target_offset } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum RsoSymbolKind { + Import, + Export, +} + +struct RsoSymbol { + /// Relative offset into the name table pointed to in the header, + /// which points to the name of this symbol. + name_offset: u32, + /// The section-relative offset to the symbol. This is always 0 for imports. + offset: u32, + /// For exports, index of the section that contains this symbol. + /// For imports, appears to be an offset? + section_index: u32, + /// A hash of the symbol name. Only present for exports. + hash: Option, +} + +impl FromReader for RsoSymbol { + type Args = RsoSymbolKind; + + const STATIC_SIZE: usize = DYNAMIC_SIZE; + + fn from_reader_args(reader: &mut R, e: Endian, args: Self::Args) -> io::Result + where R: Read + Seek + ?Sized { + Ok(Self { + name_offset: u32::from_reader(reader, e)?, + offset: u32::from_reader(reader, e)?, + section_index: u32::from_reader(reader, e)?, + hash: if args == RsoSymbolKind::Export { + Some(u32::from_reader(reader, e)?) + } else { + None + }, + }) + } +} + +impl ToWriter for RsoSymbol { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.name_offset.to_writer(writer, e)?; + self.offset.to_writer(writer, e)?; + self.section_index.to_writer(writer, e)?; + if let Some(hash) = self.hash { + hash.to_writer(writer, e)?; + } + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + +pub fn process_rso(reader: &mut R) -> Result +where R: Read + Seek + ?Sized { + let header = RsoHeader::from_reader(reader, Endian::Big)?; + let mut sections = Vec::with_capacity(header.num_sections as usize); + reader.seek(SeekFrom::Start(header.section_info_offset as u64))?; let mut total_bss_size = 0; - for idx in 0..num_sections { - let offset = reader.read_u32::()?; - let size = reader.read_u32::()?; - log::debug!("Section {}: offset {:#X}, size {:#X}", idx, offset, size); + for idx in 0..header.num_sections { + let section = RsoSectionHeader::from_reader(reader, Endian::Big)?; + let offset = section.offset(); + let size = section.size(); if size == 0 { continue; } - let exec = (offset & 1) == 1; - let offset = offset & !3; let data = if offset == 0 { vec![] @@ -92,7 +399,7 @@ pub fn process_rso(reader: &mut R) -> Result { name: format!(".section{}", idx), kind: if offset == 0 { ObjSectionKind::Bss - } else if exec { + } else if section.exec() { ObjSectionKind::Code } else { ObjSectionKind::Data @@ -113,10 +420,10 @@ pub fn process_rso(reader: &mut R) -> Result { } } ensure!( - total_bss_size == bss_size, + total_bss_size == header.bss_size, "Mismatched BSS size: {:#X} != {:#X}", total_bss_size, - bss_size + header.bss_size ); let mut symbols = Vec::new(); @@ -139,34 +446,31 @@ pub fn process_rso(reader: &mut R) -> Result { } Ok(()) }; - add_symbol(prolog_section, prolog_offset, "_prolog")?; - add_symbol(epilog_section, epilog_offset, "_epilog")?; - add_symbol(unresolved_section, unresolved_offset, "_unresolved")?; + add_symbol(header.prolog_section, header.prolog_offset, "_prolog")?; + add_symbol(header.epilog_section, header.epilog_offset, "_epilog")?; + add_symbol(header.unresolved_section, header.unresolved_offset, "_unresolved")?; - reader.seek(SeekFrom::Start(external_rel_offset as u64))?; - while reader.stream_position()? < (external_rel_offset + external_rel_size) as u64 { - let offset = reader.read_u32::()?; - let id_and_type = reader.read_u32::()?; - let id = (id_and_type & 0xFFFFFF00) >> 8; - let rel_type = id_and_type & 0xFF; - let sym_offset = reader.read_u32::()?; + reader.seek(SeekFrom::Start(header.external_rel_offset as u64))?; + while reader.stream_position()? < (header.external_rel_offset + header.external_rel_size) as u64 + { + let reloc = RsoRelocation::from_reader(reader, Endian::Big)?; log::debug!( "Reloc offset: {:#X}, id: {}, type: {}, sym offset: {:#X}", - offset, - id, - rel_type, - sym_offset + reloc.offset(), + reloc.id(), + reloc.rel_type(), + reloc.sym_offset() ); } - reader.seek(SeekFrom::Start(export_table_offset as u64))?; - while reader.stream_position()? < (export_table_offset + export_table_size) as u64 { - let name_off = reader.read_u32::()?; - let name = read_c_string(reader, (export_table_name_offset + name_off) as u64)?; - let sym_off = reader.read_u32::()?; - let section_idx = reader.read_u32::()?; - let hash_n = reader.read_u32::()?; + reader.seek(SeekFrom::Start(header.export_table_offset as u64))?; + while reader.stream_position()? < (header.export_table_offset + header.export_table_size) as u64 + { + let symbol = RsoSymbol::from_reader_args(reader, Endian::Big, RsoSymbolKind::Export)?; + let name = + read_c_string(reader, (header.export_table_name_offset + symbol.name_offset) as u64)?; let calc = symbol_hash(&name); + let hash_n = symbol.hash.unwrap_or_default(); ensure!( hash_n == calc, "Mismatched calculated hash for symbol {}: {:#X} != {:#X}", @@ -178,37 +482,42 @@ pub fn process_rso(reader: &mut R) -> Result { let section = sections .iter() .enumerate() - .find(|&(_, section)| section.elf_index == section_idx as usize) + .find(|&(_, section)| section.elf_index == symbol.section_index as usize) .map(|(idx, _)| idx) // HACK: selfiles won't have any sections - .unwrap_or(section_idx as usize); + .unwrap_or(symbol.section_index as usize); log::debug!( "Export: {}, sym off: {:#X}, section: {}, ELF hash: {:#X}", demangled_name.as_deref().unwrap_or(&name), - sym_off, - section_idx, + symbol.offset, + symbol.section_index, hash_n ); symbols.push(ObjSymbol { name, demangled_name, - address: sym_off as u64, + address: symbol.offset as u64, section: Some(section), ..Default::default() }); } - reader.seek(SeekFrom::Start(import_table_offset as u64))?; - while reader.stream_position()? < (import_table_offset + import_table_size) as u64 { - let name_off = reader.read_u32::()?; - let name = read_c_string(reader, (import_table_name_offset + name_off) as u64)?; - let sym_off = reader.read_u32::()?; - let section_idx = reader.read_u32::()?; - log::debug!("Import: {}, sym off: {}, section: {}", name, sym_off, section_idx); + reader.seek(SeekFrom::Start(header.import_table_offset as u64))?; + while reader.stream_position()? < (header.import_table_offset + header.import_table_size) as u64 + { + let symbol = RsoSymbol::from_reader_args(reader, Endian::Big, RsoSymbolKind::Import)?; + let name = + read_c_string(reader, (header.import_table_name_offset + symbol.name_offset) as u64)?; + log::debug!( + "Import: {}, sym off: {}, section: {}", + name, + symbol.offset, + symbol.section_index + ); } - let name = match name_offset { + let name = match header.name_offset { 0 => String::new(), - _ => read_string(reader, name_offset as u64, name_size as usize)?, + _ => read_string(reader, header.name_offset as u64, header.name_size as usize)?, }; let obj = ObjInfo::new(ObjKind::Relocatable, ObjArchitecture::PowerPc, name, symbols, sections); diff --git a/src/util/signatures.rs b/src/util/signatures.rs index 1e64eee..043fd08 100644 --- a/src/util/signatures.rs +++ b/src/util/signatures.rs @@ -246,10 +246,8 @@ pub fn compare_signature(existing: &mut FunctionSignature, new: &FunctionSignatu Ok(()) } -pub fn generate_signature>( - path: P, - symbol_name: &str, -) -> Result> { +pub fn generate_signature

(path: P, symbol_name: &str) -> Result> +where P: AsRef { let mut out_symbols: Vec = Vec::new(); let mut out_relocs: Vec = Vec::new(); let mut symbol_map: BTreeMap = BTreeMap::new(); diff --git a/src/util/take_seek.rs b/src/util/take_seek.rs new file mode 100644 index 0000000..e7bf3d5 --- /dev/null +++ b/src/util/take_seek.rs @@ -0,0 +1,128 @@ +// MIT License +// +// Copyright (c) jam1garner and other contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// Source: https://github.com/jam1garner/binrw/blob/919c395316b9e971bdccb5547272098f20702c5b/binrw/src/io/take_seek.rs + +//! Types for seekable reader adapters which limit the number of bytes read from +//! the underlying reader. + +use std::io::{Read, Result, Seek, SeekFrom}; + +/// Read adapter which limits the bytes read from an underlying reader, with +/// seek support. +/// +/// This struct is generally created by importing the [`TakeSeekExt`] extension +/// and calling [`take_seek`] on a reader. +/// +/// [`take_seek`]: TakeSeekExt::take_seek +#[derive(Debug)] +pub struct TakeSeek { + inner: T, + pos: u64, + end: u64, +} + +impl TakeSeek { + /// Gets a reference to the underlying reader. + pub fn get_ref(&self) -> &T { &self.inner } + + /// Gets a mutable reference to the underlying reader. + /// + /// Care should be taken to avoid modifying the internal I/O state of the + /// underlying reader as doing so may corrupt the internal limit of this + /// `TakeSeek`. + pub fn get_mut(&mut self) -> &mut T { &mut self.inner } + + /// Consumes this wrapper, returning the wrapped value. + pub fn into_inner(self) -> T { self.inner } + + /// Returns the number of bytes that can be read before this instance will + /// return EOF. + /// + /// # Note + /// + /// This instance may reach EOF after reading fewer bytes than indicated by + /// this method if the underlying [`Read`] instance reaches EOF. + pub fn limit(&self) -> u64 { self.end.saturating_sub(self.pos) } +} + +impl TakeSeek { + /// Sets the number of bytes that can be read before this instance will + /// return EOF. This is the same as constructing a new `TakeSeek` instance, + /// so the amount of bytes read and the previous limit value don’t matter + /// when calling this method. + /// + /// # Panics + /// + /// Panics if the inner stream returns an error from `stream_position`. + pub fn set_limit(&mut self, limit: u64) { + let pos = self.inner.stream_position().expect("cannot get position for `set_limit`"); + self.pos = pos; + self.end = pos + limit; + } +} + +impl Read for TakeSeek { + fn read(&mut self, buf: &mut [u8]) -> Result { + let limit = self.limit(); + + // Don't call into inner reader at all at EOF because it may still block + if limit == 0 { + return Ok(0); + } + + // Lint: It is impossible for this cast to truncate because the value + // being cast is the minimum of two values, and one of the value types + // is already `usize`. + #[allow(clippy::cast_possible_truncation)] + let max = (buf.len() as u64).min(limit) as usize; + let n = self.inner.read(&mut buf[0..max])?; + self.pos += n as u64; + Ok(n) + } +} + +impl Seek for TakeSeek { + fn seek(&mut self, pos: SeekFrom) -> Result { + self.pos = self.inner.seek(pos)?; + Ok(self.pos) + } + + fn stream_position(&mut self) -> Result { Ok(self.pos) } +} + +/// An extension trait that implements `take_seek()` for compatible streams. +pub trait TakeSeekExt { + /// Creates an adapter which will read at most `limit` bytes from the + /// wrapped stream. + fn take_seek(self, limit: u64) -> TakeSeek + where Self: Sized; +} + +impl TakeSeekExt for T { + fn take_seek(mut self, limit: u64) -> TakeSeek + where Self: Sized { + let pos = self.stream_position().expect("cannot get position for `take_seek`"); + + TakeSeek { inner: self, pos, end: pos + limit } + } +} diff --git a/src/util/yaz0.rs b/src/util/yaz0.rs index 2b60c8c..d392188 100644 --- a/src/util/yaz0.rs +++ b/src/util/yaz0.rs @@ -1,52 +1,57 @@ // Source: https://github.com/Julgodis/picori/blob/650da9f4fe6050b39b80d5360416591c748058d5/src/yaz0.rs // License: MIT -// Modified to use `std::io::Read`/`Seek` and `byteorder` +// Modified to use `std::io::Read`/`Seek` and project's FromReader trait. use std::io::{Read, Seek}; use anyhow::{ensure, Result}; -use byteorder::{BigEndian, ReadBytesExt}; + +use crate::util::reader::{skip_bytes, struct_size, Endian, FromReader}; + +pub const YAZ0_MAGIC: [u8; 4] = *b"Yaz0"; /// Yaz0 header. pub struct Header { - /// Yaz0 magic (0x59617A30). - pub magic: u32, /// Size of decompressed data. pub decompressed_size: u32, - _reserved0: u32, - _reserved1: u32, } -impl Header { - /// Reads a Yaz0 header from a reader. - pub fn from_binary(input: &mut D) -> Result

{ - Ok(Header { - magic: input.read_u32::()?, - decompressed_size: input.read_u32::()?, - _reserved0: input.read_u32::()?, - _reserved1: input.read_u32::()?, - }) - } +impl FromReader for Header { + type Args = (); - /// Checks if the header is valid. - pub fn is_valid(&self) -> bool { self.magic == 0x59617A30 } + const STATIC_SIZE: usize = struct_size([ + u32::STATIC_SIZE, // magic + u32::STATIC_SIZE, // decompressed_size + u32::STATIC_SIZE, // reserved0 + u32::STATIC_SIZE, // reserved1 + ]); - pub fn decompressed_size(input: &mut impl Read) -> Result { - let header = Header::from_binary(input)?; - ensure!(header.is_valid(), "Invalid Yaz0 magic"); - Ok(header.decompressed_size as usize) + fn from_reader_args(reader: &mut R, e: Endian, _args: Self::Args) -> std::io::Result + where R: Read + Seek + ?Sized { + let magic = <[u8; 4]>::from_reader(reader, e)?; + if magic != YAZ0_MAGIC { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Invalid Yaz0 magic: {:?}", magic), + )); + } + let decompressed_size = u32::from_reader(reader, e)?; + skip_bytes::<8, _>(reader)?; + Ok(Self { decompressed_size }) } } /// Decompresses the data into a new allocated [`Vec`]. Assumes a Yaz0 header followed by /// compressed data. -pub fn decompress_file(input: &mut D) -> Result> { - let decompressed_size = Header::decompressed_size(input)?; - decompress(input, decompressed_size) +pub fn decompress_file(input: &mut R) -> Result> +where R: Read + Seek + ?Sized { + let header = Header::from_reader(input, Endian::Big)?; + decompress(input, header.decompressed_size as usize) } /// Decompresses the data into a new allocated [`Vec`]. `decompressed_size` can be determined /// by looking at the Yaz0 header [`Header`]. -pub fn decompress(input: &mut D, decompressed_size: usize) -> Result> { +pub fn decompress(input: &mut R, decompressed_size: usize) -> Result> +where R: Read + Seek + ?Sized { let mut output = vec![0; decompressed_size]; decompress_into(input, output.as_mut_slice())?; Ok(output) @@ -54,7 +59,8 @@ pub fn decompress(input: &mut D, decompressed_size: usize) -> Re /// Decompresses the data into the given buffer. The buffer must be large /// enough to hold the decompressed data. -pub fn decompress_into(input: &mut D, destination: &mut [u8]) -> Result<()> { +pub fn decompress_into(input: &mut R, destination: &mut [u8]) -> Result<()> +where R: Read + Seek + ?Sized { let decompressed_size = destination.len(); let mut dest = 0; let mut code = 0; @@ -62,22 +68,20 @@ pub fn decompress_into(input: &mut D, destination: &mut [u8]) -> while dest < decompressed_size { if code_bits == 0 { - code = input.read_u8()? as u32; + code = u8::from_reader(input, Endian::Big)? as u32; code_bits = 8; } if code & 0x80 != 0 { - let byte = input.read_u8()?; - destination[dest] = byte; + destination[dest] = u8::from_reader(input, Endian::Big)?; dest += 1; } else { - let byte0 = input.read_u8()?; - let byte1 = input.read_u8()?; - let a = (byte0 & 0xf) as usize; - let b = (byte0 >> 4) as usize; - let offset = (a << 8) | (byte1 as usize); + let bytes = <[u8; 2]>::from_reader(input, Endian::Big)?; + let a = (bytes[0] & 0xf) as usize; + let b = (bytes[0] >> 4) as usize; + let offset = (a << 8) | (bytes[1] as usize); let length = match b { - 0 => (input.read_u8()? as usize) + 0x12, + 0 => (u8::from_reader(input, Endian::Big)? as usize) + 0x12, length => length + 2, };