From f9f7fb2e1e64ad5f5d6cbcf3228ed461cff1e573 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Sun, 3 Sep 2023 10:42:52 -0400 Subject: [PATCH] Working `rel make` & more - Added `elf info` - Improved `rel info` - Colored output for `shasum` - Fix section `rename` in RELs - Added padding symbols to avoid linker issues - Automatically set symbols to "active" in .comment output --- Cargo.lock | 128 +++++++- Cargo.toml | 7 +- README.md | 10 +- src/analysis/cfa.rs | 9 +- src/analysis/pass.rs | 12 +- src/analysis/signatures.rs | 5 +- src/analysis/slices.rs | 4 +- src/analysis/tracker.rs | 5 +- src/cmd/dol.rs | 97 +++--- src/cmd/elf.rs | 132 ++++++++- src/cmd/rel.rs | 247 +++++++++++++++- src/cmd/shasum.rs | 10 +- src/main.rs | 32 +- src/obj/mod.rs | 14 +- src/obj/splits.rs | 4 + src/obj/symbols.rs | 8 +- src/util/asm.rs | 19 +- src/util/comment.rs | 10 +- src/util/config.rs | 62 ++-- src/util/dol.rs | 13 +- src/util/elf.rs | 44 ++- src/util/file.rs | 48 ++- src/util/lcf.rs | 6 +- src/util/map.rs | 3 +- src/util/mod.rs | 28 ++ src/util/rel.rs | 588 +++++++++++++++++++++++++++++++------ src/util/split.rs | 156 +++++++++- 27 files changed, 1443 insertions(+), 258 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34d296c..3c1abe0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,6 +100,23 @@ dependencies = [ "syn 1.0.107", ] +[[package]] +name = "array-init" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -142,6 +159,30 @@ dependencies = [ "serde", ] +[[package]] +name = "binrw" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab81d22cbd2d745852348b2138f3db2103afa8ce043117a374581926a523e267" +dependencies = [ + "array-init", + "binrw_derive", + "bytemuck", +] + +[[package]] +name = "binrw_derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6b019a3efebe7f453612083202887b6f1ace59e20d010672e336eea4ed5be97" +dependencies = [ + "either", + "owo-colors", + "proc-macro2", + "quote", + "syn 1.0.107", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -157,6 +198,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bytemuck" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" + [[package]] name = "byteorder" version = "1.4.3" @@ -260,13 +307,14 @@ dependencies = [ [[package]] name = "decomp-toolkit" -version = "0.3.7" +version = "0.4.0" dependencies = [ "anyhow", "ar", "argp", "base16ct", "base64", + "binrw", "byteorder", "cwdemangle", "dol", @@ -285,12 +333,14 @@ dependencies = [ "num_enum", "object 0.31.1", "once_cell", + "owo-colors", "path-slash", "petgraph", "ppc750cl", "rayon", "regex", "rmp-serde", + "rustc-hash", "serde", "serde_json", "serde_repr", @@ -423,6 +473,15 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.2" @@ -455,6 +514,12 @@ dependencies = [ "hashbrown 0.14.0", ] +[[package]] +name = "is_ci" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616cde7c720bb2bb5824a224687d8f77bfd38922027f01d825cd7453be5099fb" + [[package]] name = "itertools" version = "0.11.0" @@ -488,6 +553,15 @@ version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "memchr" version = "2.5.0" @@ -564,7 +638,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.2", "libc", ] @@ -622,6 +696,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +dependencies = [ + "supports-color", +] + [[package]] name = "paste" version = "1.0.11" @@ -739,8 +822,17 @@ checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.3.0", + "regex-syntax 0.7.3", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -751,9 +843,15 @@ checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.7.3", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.7.3" @@ -788,6 +886,12 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "ryu" version = "1.0.12" @@ -881,6 +985,16 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +[[package]] +name = "supports-color" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba6faf2ca7ee42fdd458f4347ae0a9bd6bcc445ad7cb57ad82b383f18870d6f" +dependencies = [ + "atty", + "is_ci", +] + [[package]] name = "syn" version = "1.0.107" @@ -992,10 +1106,14 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" dependencies = [ + "matchers", "nu-ansi-term", + "once_cell", + "regex", "sharded-slab", "smallvec", "thread_local", + "tracing", "tracing-core", "tracing-log", ] diff --git a/Cargo.toml b/Cargo.toml index 5a93143..8c1a82b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "decomp-toolkit" description = "Yet another GameCube/Wii decompilation toolkit." authors = ["Luke Street "] license = "MIT OR Apache-2.0" -version = "0.3.7" +version = "0.4.0" edition = "2021" publish = false build = "build.rs" @@ -26,6 +26,7 @@ ar = { git = "https://github.com/bjorn3/rust-ar.git", branch = "write_symbol_tab argp = "0.3.0" base16ct = "0.2.0" base64 = "0.21.2" +binrw = "0.11.2" byteorder = "1.4.3" cwdemangle = "0.1.5" dol = { git = "https://github.com/encounter/ppc750cl", rev = "5f6e991bf495388c4104f188d2e90c79da9f78de" } @@ -44,11 +45,13 @@ multimap = "0.9.0" num_enum = "0.6.1" object = { version = "0.31.1", features = ["read_core", "std", "elf", "write_std"], default-features = false } once_cell = "1.18.0" +owo-colors = { version = "3.5.0", features = ["supports-colors"] } path-slash = "0.2.1" petgraph = "0.6.3" ppc750cl = { git = "https://github.com/encounter/ppc750cl", rev = "5f6e991bf495388c4104f188d2e90c79da9f78de" } rayon = "1.7.0" regex = "1.9.0" +rustc-hash = "1.1.0" serde = "1.0.166" serde_json = "1.0.104" serde_repr = "0.1.14" @@ -57,7 +60,7 @@ sha-1 = "0.10.1" smallvec = "1.11.0" tracing = "0.1.37" tracing-attributes = "0.1.26" -tracing-subscriber = "0.3.17" +tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } [build-dependencies] anyhow = { version = "1.0.71", features = ["backtrace"] } diff --git a/README.md b/README.md index f440b51..c15a1d2 100644 --- a/README.md +++ b/README.md @@ -105,8 +105,8 @@ it operates as a one-shot assembly generator, it still suffers from many of the ### ppcdis [ppcdis](https://github.com/SeekyCt/ppcdis) is one of the tools that inspired decomp-toolkit. It has more accurate -analysis than doldisasm.py, and has similar goals to decomp-toolkit. It also has some features that decomp-toolkit does -not yet, like support for REL files. +analysis than doldisasm.py, and has similar goals to decomp-toolkit. It's been used successfully in several +decompilation projects. However, decomp-toolkit has a few advantages: @@ -226,7 +226,7 @@ Generates `ldscript.lcf` for `mwldeppc.exe`. **Future work** -- Support REL and RSO files +- Support RSO files - Add more signatures - Rework CodeWarrior map parsing @@ -335,7 +335,7 @@ $ dtk map symbol Game.MAP 'Function__5ClassFv' ### rel info -Prints basic information about a REL file. +Prints information about a REL file. ```shell $ dtk rel info input.rel @@ -355,7 +355,7 @@ $ dtk rel info main.dol rels/*.rel -o merged.elf > [!WARNING] > This command is not yet functional. -Prints basic information about an RSO file. +Prints information about an RSO file. ```shell $ dtk rso info input.rso diff --git a/src/analysis/cfa.rs b/src/analysis/cfa.rs index 0a9e9de..8a15afe 100644 --- a/src/analysis/cfa.rs +++ b/src/analysis/cfa.rs @@ -166,13 +166,8 @@ impl AnalyzerState { pub fn detect_functions(&mut self, obj: &ObjInfo) -> Result<()> { // Apply known functions from extab for (&addr, &size) in &obj.known_functions { - let (section_index, _) = obj - .sections - .at_address(addr) - .context(format!("Function {:#010X} outside of any section", addr))?; - let addr_ref = SectionAddress::new(section_index, addr); - self.function_entries.insert(addr_ref); - self.function_bounds.insert(addr_ref, Some(addr_ref + size)); + self.function_entries.insert(addr); + self.function_bounds.insert(addr, Some(addr + size)); } // Apply known functions from symbols for (_, symbol) in obj.symbols.by_kind(ObjSymbolKind::Function) { diff --git a/src/analysis/pass.rs b/src/analysis/pass.rs index e3c93ec..bb8e09d 100644 --- a/src/analysis/pass.rs +++ b/src/analysis/pass.rs @@ -201,7 +201,11 @@ impl AnalysisPass for FindRelCtorsDtors { return Ok(()); } - log::debug!("Found .ctors and .dtors: {:?}", possible_sections); + log::debug!( + "Found .ctors and .dtors: {}, {}", + possible_sections[0].0, + possible_sections[1].0 + ); let ctors_section_index = possible_sections[0].0; state.known_sections.insert(ctors_section_index, ".ctors".to_string()); state.known_symbols.insert(SectionAddress::new(ctors_section_index, 0), ObjSymbol { @@ -311,7 +315,11 @@ impl AnalysisPass for FindRelRodataData { return Ok(()); } - log::debug!("Found .rodata and .data: {:?}", possible_sections); + log::debug!( + "Found .rodata and .data: {}, {}", + possible_sections[0].0, + possible_sections[1].0 + ); let rodata_section_index = possible_sections[0].0; state.known_sections.insert(rodata_section_index, ".rodata".to_string()); diff --git a/src/analysis/signatures.rs b/src/analysis/signatures.rs index b6acd4b..d0b06fc 100644 --- a/src/analysis/signatures.rs +++ b/src/analysis/signatures.rs @@ -264,6 +264,8 @@ fn apply_ctors_signatures(obj: &mut ObjInfo) -> Result<()> { align: None, common: false, autogenerated: true, + skip: false, + rename: None, })?; } Ok(()) @@ -362,6 +364,8 @@ fn apply_dtors_signatures(obj: &mut ObjInfo) -> Result<()> { align: None, common: false, autogenerated: true, + skip: false, + rename: None, })?; } } @@ -439,6 +443,5 @@ pub fn apply_signatures_post(obj: &mut ObjInfo) -> Result<()> { apply_signature(obj, symbol_addr, &signature)?; } } - log::debug!("Done!"); Ok(()) } diff --git a/src/analysis/slices.rs b/src/analysis/slices.rs index 532093a..8fea366 100644 --- a/src/analysis/slices.rs +++ b/src/analysis/slices.rs @@ -74,7 +74,7 @@ fn check_sequence( impl FunctionSlices { pub fn end(&self) -> Option { - self.blocks.last_key_value().map(|(_, &end)| end).flatten() + self.blocks.last_key_value().and_then(|(_, &end)| end) } pub fn start(&self) -> Option { @@ -404,7 +404,7 @@ impl FunctionSlices { // Likely __noreturn } (None, Some(e)) => { - log::info!("{:#010X?}", self); + log::warn!("{:#010X?}", self); bail!("Unpaired epilogue {:#010X}", e); } } diff --git a/src/analysis/tracker.rs b/src/analysis/tracker.rs index 013ed49..bd36094 100644 --- a/src/analysis/tracker.rs +++ b/src/analysis/tracker.rs @@ -403,8 +403,9 @@ impl Tracker { from: SectionAddress, addr: u32, ) -> Option { - if let Some((&start, &end)) = obj.blocked_ranges.range(..=from.address).next_back() { - if from.address >= start && from.address < end { + if let Some((&start, &end)) = obj.blocked_ranges.range(..=from).next_back() { + if from.section == start.section && from.address >= start.address && from.address < end + { return None; } } diff --git a/src/cmd/dol.rs b/src/cmd/dol.rs index aef7be5..36e19da 100644 --- a/src/cmd/dol.rs +++ b/src/cmd/dol.rs @@ -12,7 +12,6 @@ use std::{ use anyhow::{anyhow, bail, Context, Result}; use argp::FromArgs; use itertools::Itertools; -use memmap2::Mmap; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use tracing::{debug, info, info_span}; @@ -28,7 +27,6 @@ use crate::{ signatures::{apply_signatures, apply_signatures_post}, tracker::Tracker, }, - cmd::shasum::file_sha1, obj::{ best_match_for_reloc, ObjDataKind, ObjInfo, ObjReloc, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, SymbolIndex, @@ -43,13 +41,13 @@ use crate::{ dep::DepFile, dol::process_dol, elf::{process_elf, write_elf}, - file::{buf_writer, map_file, map_reader, touch, Reader}, + file::{buf_writer, decompress_if_needed, map_file, touch, verify_hash, Reader}, lcf::{asm_path_for_unit, generate_ldscript, obj_path_for_unit}, map::apply_map_file, rel::process_rel, rso::{process_rso, DOL_SECTION_ABS, DOL_SECTION_NAMES}, split::{is_linker_generated_object, split_obj, update_splits}, - yaz0, + IntoCow, ToCow, }, }; @@ -225,7 +223,7 @@ impl ModuleConfig { pub fn file_prefix(&self) -> Cow<'_, str> { match self.file_name() { Cow::Borrowed(s) => { - Cow::Borrowed(s.split_once('.').map(|(prefix, _)| prefix).unwrap_or(&s)) + Cow::Borrowed(s.split_once('.').map(|(prefix, _)| prefix).unwrap_or(s)) } Cow::Owned(s) => { Cow::Owned(s.split_once('.').map(|(prefix, _)| prefix).unwrap_or(&s).to_string()) @@ -379,40 +377,32 @@ fn info(args: InfoArgs) -> Result<()> { ); } println!("\nDiscovered symbols:"); - println!("\t{: >23} | {: <10} | {: <10}", "Name", "Address", "Size"); + println!("\t{: >10} | {: <10} | {: <10} | {: <10}", "Section", "Address", "Size", "Name"); for (_, symbol) in obj.symbols.iter_ordered().chain(obj.symbols.iter_abs()) { - if symbol.name.starts_with('@') || is_auto_symbol(&symbol.name) { + if symbol.name.starts_with('@') || is_auto_symbol(symbol) { continue; } - if symbol.size_known { - println!("\t{: >23} | {:#010X} | {: <#10X}", symbol.name, symbol.address, symbol.size); + let section_str = if let Some(section) = symbol.section { + obj.sections[section].name.as_str() } else { - let size_str = if symbol.section.is_none() { "ABS" } else { "?" }; - println!("\t{: >23} | {:#010X} | {: <10}", symbol.name, symbol.address, size_str); - } + "ABS" + }; + let size_str = if symbol.size_known { + format!("{:#X}", symbol.size).into_cow() + } else if symbol.section.is_none() { + "ABS".to_cow() + } else { + "?".to_cow() + }; + println!( + "\t{: >10} | {: <#10X} | {: <10} | {: <10}", + section_str, symbol.address, size_str, symbol.name + ); } println!("\n{} discovered functions from exception table", obj.known_functions.len()); Ok(()) } -fn verify_hash>(path: P, hash_str: &str) -> Result<()> { - let mut hash_bytes = [0u8; 20]; - hex::decode_to_slice(hash_str, &mut hash_bytes) - .with_context(|| format!("Invalid SHA-1 '{hash_str}'"))?; - let file = File::open(path.as_ref()) - .with_context(|| format!("Failed to open file '{}'", path.as_ref().display()))?; - let found_hash = file_sha1(file)?; - if found_hash.as_ref() == hash_bytes { - Ok(()) - } else { - Err(anyhow!( - "Hash mismatch: expected {}, but was {}", - hex::encode(hash_bytes), - hex::encode(found_hash) - )) - } -} - type ModuleMap<'a> = BTreeMap; fn update_symbols(obj: &mut ObjInfo, modules: &ModuleMap<'_>) -> Result<()> { @@ -632,15 +622,9 @@ fn resolve_external_relocations( Ok(()) } -fn decompress_if_needed(map: &Mmap) -> Result> { - Ok(if map.len() > 4 && map[0..4] == *b"Yaz0" { - Cow::Owned(yaz0::decompress_file(&mut map_reader(map))?) - } else { - Cow::Borrowed(map) - }) -} +type AnalyzeResult = (ObjInfo, Vec); -fn load_analyze_dol(config: &ProjectConfig) -> Result<(ObjInfo, Vec)> { +fn load_analyze_dol(config: &ProjectConfig) -> Result { // log::info!("Loading {}", config.object.display()); if let Some(hash_str) = &config.base.hash { verify_hash(&config.base.object, hash_str)?; @@ -697,7 +681,7 @@ fn split_write_obj( obj: &mut ObjInfo, config: &ProjectConfig, module_config: &ModuleConfig, - out_dir: &PathBuf, + out_dir: &Path, no_update: bool, ) -> Result { debug!("Performing relocation analysis"); @@ -723,15 +707,15 @@ fn split_write_obj( if !no_update { debug!("Writing configuration"); if let Some(symbols_path) = &module_config.symbols { - write_symbols_file(symbols_path, &obj)?; + write_symbols_file(symbols_path, obj)?; } if let Some(splits_path) = &module_config.splits { - write_splits_file(splits_path, &obj, false)?; + write_splits_file(splits_path, obj, false)?; } } debug!("Splitting {} objects", obj.link_order.len()); - let split_objs = split_obj(&obj)?; + let split_objs = split_obj(obj)?; debug!("Writing object files"); let obj_dir = out_dir.join("obj"); @@ -757,7 +741,7 @@ fn split_write_obj( } // Generate ldscript.lcf - fs::write(&out_config.ldscript, generate_ldscript(&obj, config.auto_force_files)?)?; + fs::write(&out_config.ldscript, generate_ldscript(obj, config.auto_force_files)?)?; debug!("Writing disassembly"); let asm_dir = out_dir.join("asm"); @@ -772,17 +756,18 @@ fn split_write_obj( Ok(out_config) } -fn load_analyze_rel( - config: &ProjectConfig, - module_config: &ModuleConfig, -) -> Result<(ObjInfo, Vec)> { +fn load_analyze_rel(config: &ProjectConfig, module_config: &ModuleConfig) -> Result { debug!("Loading {}", module_config.object.display()); if let Some(hash_str) = &module_config.hash { verify_hash(&module_config.object, hash_str)?; } let map = map_file(&module_config.object)?; let buf = decompress_if_needed(&map)?; - let mut module_obj = process_rel(Reader::new(&buf))?; + let (_, mut module_obj) = process_rel(&mut Reader::new(buf.as_ref()))?; + + if let Some(comment_version) = config.mw_comment_version { + module_obj.mw_comment = Some(MWComment::new(comment_version)?); + } let mut dep = vec![module_config.object.clone()]; if let Some(map_path) = &module_config.map { @@ -833,8 +818,8 @@ fn split(args: SplitArgs) -> Result<()> { module_count, rayon::current_num_threads() ); - let mut dol_result: Option)>> = None; - let mut modules_result: Option)>>> = None; + let mut dol_result: Option> = None; + let mut modules_result: Option>> = None; let start = Instant::now(); rayon::scope(|s| { // DOL @@ -999,7 +984,7 @@ fn split(args: SplitArgs) -> Result<()> { // } let duration = command_start.elapsed(); - info!("Total duration: {}.{:03}s", duration.as_secs(), duration.subsec_millis()); + info!("Total time: {}.{:03}s", duration.as_secs(), duration.subsec_millis()); Ok(()) } @@ -1167,7 +1152,11 @@ fn diff(args: DiffArgs) -> Result<()> { log::info!("Loading {}", args.map_file.display()); apply_map_file(&args.map_file, &mut linked_obj)?; - for orig_sym in obj.symbols.iter().filter(|s| s.kind != ObjSymbolKind::Section) { + for orig_sym in obj + .symbols + .iter() + .filter(|s| !matches!(s.kind, ObjSymbolKind::Unknown | ObjSymbolKind::Section)) + { let Some(orig_section_index) = orig_sym.section else { continue }; let orig_section = &obj.sections[orig_section_index]; let (linked_section_index, linked_section) = @@ -1244,7 +1233,9 @@ fn diff(args: DiffArgs) -> Result<()> { } // Data diff - for orig_sym in obj.symbols.iter().filter(|s| s.kind != ObjSymbolKind::Section) { + for orig_sym in obj.symbols.iter().filter(|s| { + s.size > 0 && !matches!(s.kind, ObjSymbolKind::Unknown | ObjSymbolKind::Section) + }) { let Some(orig_section_index) = orig_sym.section else { continue }; let orig_section = &obj.sections[orig_section_index]; let (linked_section_index, linked_section) = diff --git a/src/cmd/elf.rs b/src/cmd/elf.rs index 60d622b..c1098bb 100644 --- a/src/cmd/elf.rs +++ b/src/cmd/elf.rs @@ -19,11 +19,13 @@ use crate::{ obj::ObjKind, util::{ asm::write_asm, + comment::{read_comment_sym, MWComment}, config::{write_splits_file, write_symbols_file}, elf::{process_elf, write_elf}, - file::{buf_writer, process_rsp}, + file::{buf_writer, process_rsp, Reader}, signatures::{compare_signature, generate_signature, FunctionSignature}, split::split_obj, + IntoCow, ToCow, }, }; @@ -43,6 +45,7 @@ enum SubCommand { Fixup(FixupArgs), Signatures(SignaturesArgs), Split(SplitArgs), + Info(InfoArgs), } #[derive(FromArgs, PartialEq, Eq, Debug)] @@ -108,6 +111,15 @@ pub struct SignaturesArgs { out_file: PathBuf, } +#[derive(FromArgs, PartialEq, Eq, Debug)] +/// Prints information about an ELF file. +#[argp(subcommand, name = "info")] +pub struct InfoArgs { + #[argp(positional)] + /// input file + input: PathBuf, +} + pub fn run(args: Args) -> Result<()> { match args.command { SubCommand::Config(c_args) => config(c_args), @@ -115,6 +127,7 @@ pub fn run(args: Args) -> Result<()> { SubCommand::Fixup(c_args) => fixup(c_args), SubCommand::Split(c_args) => split(c_args), SubCommand::Signatures(c_args) => signatures(c_args), + SubCommand::Info(c_args) => info(c_args), } } @@ -467,3 +480,120 @@ fn signatures(args: SignaturesArgs) -> Result<()> { out.flush()?; Ok(()) } + +fn info(args: InfoArgs) -> Result<()> { + let in_buf = fs::read(&args.input) + .with_context(|| format!("Failed to open input file: '{}'", args.input.display()))?; + let in_file = object::read::File::parse(&*in_buf).context("Failed to parse input ELF")?; + + println!("ELF type: {:?}", in_file.kind()); + println!("Section count: {}", in_file.sections().count()); + println!("Symbol count: {}", in_file.symbols().count()); + println!( + "Relocation count: {}", + in_file.sections().map(|s| s.relocations().count()).sum::() + ); + + println!("\nSections:"); + println!( + "{: >15} | {: <10} | {: <10} | {: <10} | {: <10}", + "Name", "Type", "Size", "File Off", "Index" + ); + for section in in_file.sections().skip(1) { + let kind_str = match section.kind() { + SectionKind::Text => "code".to_cow(), + SectionKind::Data => "data".to_cow(), + SectionKind::ReadOnlyData => "rodata".to_cow(), + SectionKind::UninitializedData => "bss".to_cow(), + SectionKind::Metadata => continue, // "metadata".to_cow() + SectionKind::Other => "other".to_cow(), + _ => format!("unknown: {:?}", section.kind()).into_cow(), + }; + println!( + "{: >15} | {: <10} | {: <#10X} | {: <#10X} | {: <10}", + section.name()?, + kind_str, + section.size(), + section.file_range().unwrap_or_default().0, + section.index().0 + ); + } + + println!("\nSymbols:"); + println!("{: >15} | {: <10} | {: <10} | {: <10}", "Section", "Address", "Size", "Name"); + for symbol in in_file.symbols().filter(|s| s.is_definition()) { + let section_str = if let Some(section) = symbol.section_index() { + in_file.section_by_index(section)?.name()?.to_string().into_cow() + } else { + "ABS".to_cow() + }; + let size_str = if symbol.section_index().is_none() { + "ABS".to_cow() + } else { + format!("{:#X}", symbol.size()).into_cow() + }; + println!( + "{: >15} | {: <#10X} | {: <10} | {: <10}", + section_str, + symbol.address(), + size_str, + symbol.name()? + ); + } + + if let Some(comment_section) = in_file.section_by_name(".comment") { + let data = comment_section.uncompressed_data()?; + if !data.is_empty() { + let mut reader = Reader::new(&*data); + let header = + MWComment::parse_header(&mut reader).context("While reading .comment section")?; + println!("\nMetrowerks metadata (.comment):"); + println!("\tVersion: {}", header.version); + println!( + "\tCompiler version: {}.{}.{}.{}", + header.compiler_version[0], + header.compiler_version[1], + header.compiler_version[2], + header.compiler_version[3] + ); + println!("\tPool data: {}", header.pool_data); + println!("\tFloat: {:?}", header.float); + println!( + "\tProcessor: {}", + if header.processor == 0x16 { + "Gekko".to_cow() + } else { + format!("{:#X}", header.processor).into_cow() + } + ); + println!( + "\tIncompatible return small structs: {}", + header.incompatible_return_small_structs + ); + println!( + "\tIncompatible sfpe double params: {}", + header.incompatible_sfpe_double_params + ); + println!("\tUnsafe global reg vars: {}", header.unsafe_global_reg_vars); + println!("\n{: >10} | {: <6} | {: <6} | {: <10}", "Align", "Vis", "Active", "Symbol"); + for symbol in in_file.symbols() { + let comment_sym = read_comment_sym(&mut reader)?; + if symbol.is_definition() { + println!( + "{: >10} | {: <#6X} | {: <#6X} | {: <10}", + comment_sym.align, + comment_sym.vis_flags, + comment_sym.active_flags, + symbol.name()? + ); + } + } + ensure!( + data.len() - reader.position() as usize == 0, + ".comment section data not fully read" + ); + } + } + + Ok(()) +} diff --git a/src/cmd/rel.rs b/src/cmd/rel.rs index 8eb26ba..f0b97fa 100644 --- a/src/cmd/rel.rs +++ b/src/cmd/rel.rs @@ -1,27 +1,45 @@ use std::{ collections::{btree_map, BTreeMap}, + ffi::OsStr, fs, + io::Write, path::PathBuf, + time::Instant, }; -use anyhow::{bail, ensure, Context, Result}; +use anyhow::{anyhow, bail, ensure, Context, Result}; use argp::FromArgs; +use object::{ + Architecture, Endianness, Object, ObjectSection, ObjectSymbol, RelocationTarget, SymbolIndex, +}; +use rayon::prelude::*; +use rustc_hash::FxHashMap; +use tracing::{info, info_span}; use crate::{ analysis::{ cfa::{AnalyzerState, SectionAddress}, - pass::{AnalysisPass, FindSaveRestSleds, FindTRKInterruptVectorTable}, + pass::{ + AnalysisPass, FindRelCtorsDtors, FindRelRodataData, FindSaveRestSleds, + FindTRKInterruptVectorTable, + }, signatures::{apply_signatures, apply_signatures_post}, tracker::Tracker, }, array_ref_mut, - obj::{ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSymbol}, + cmd::dol::ProjectConfig, + obj::{ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol}, util::{ + config::is_auto_symbol, dol::process_dol, - elf::write_elf, - file::{map_file, map_reader, FileIterator}, + elf::{to_obj_reloc_kind, write_elf}, + file::{ + buf_reader, buf_writer, decompress_if_needed, map_file, process_rsp, verify_hash, + FileIterator, Reader, + }, nested::NestedMap, - rel::process_rel, + rel::{process_rel, process_rel_header, write_rel, RelHeader, RelReloc, RelWriteInfo}, + IntoCow, ToCow, }, }; @@ -37,6 +55,7 @@ pub struct Args { #[argp(subcommand)] enum SubCommand { Info(InfoArgs), + Make(MakeArgs), Merge(MergeArgs), } @@ -64,17 +83,227 @@ pub struct MergeArgs { out_file: PathBuf, } +#[derive(FromArgs, PartialEq, Eq, Debug)] +/// Creates RELs from an ELF + PLF(s). +#[argp(subcommand, name = "make")] +pub struct MakeArgs { + #[argp(positional)] + /// input file(s) + files: Vec, + #[argp(option, short = 'c')] + /// (optional) project configuration file + config: Option, +} + pub fn run(args: Args) -> Result<()> { match args.command { SubCommand::Info(c_args) => info(c_args), SubCommand::Merge(c_args) => merge(c_args), + SubCommand::Make(c_args) => make(c_args), } } +fn load_obj(buf: &[u8]) -> Result { + let obj = object::read::File::parse(buf)?; + match obj.architecture() { + Architecture::PowerPc => {} + arch => bail!("Unexpected architecture: {arch:?}"), + }; + ensure!(obj.endianness() == Endianness::Big, "Expected big endian"); + Ok(obj) +} + +fn make(args: MakeArgs) -> Result<()> { + let total = Instant::now(); + + // Load existing REL headers (if specified) + let mut existing_headers = BTreeMap::::new(); + if let Some(config_path) = &args.config { + let config: ProjectConfig = serde_yaml::from_reader(&mut buf_reader(config_path)?)?; + for module_config in &config.modules { + if let Some(hash_str) = &module_config.hash { + verify_hash(&module_config.object, hash_str)?; + } + let map = map_file(&module_config.object)?; + let buf = decompress_if_needed(&map)?; + let header = process_rel_header(&mut Reader::new(buf.as_ref()))?; + existing_headers.insert(header.module_id, header); + } + } + + let files = process_rsp(&args.files)?; + info!("Loading {} modules", files.len()); + + // Load all modules + let handles = files.iter().map(map_file).collect::>>()?; + let modules = handles + .par_iter() + .zip(&files) + .map(|(map, path)| { + load_obj(map).with_context(|| format!("Failed to load '{}'", path.display())) + }) + .collect::>>()?; + + // Create symbol map + let start = Instant::now(); + let mut symbol_map = FxHashMap::<&[u8], (usize, SymbolIndex)>::default(); + for (module_id, module) in modules.iter().enumerate() { + for symbol in module.symbols() { + if symbol.is_definition() && symbol.scope() == object::SymbolScope::Dynamic { + symbol_map.entry(symbol.name_bytes()?).or_insert((module_id, symbol.index())); + } + } + } + + // Resolve relocations + let mut resolved = 0usize; + let mut relocations = Vec::>::with_capacity(modules.len() - 1); + relocations.resize_with(modules.len() - 1, Vec::new); + for ((module_id, module), relocations) in + modules.iter().enumerate().skip(1).zip(&mut relocations) + { + for section in module.sections() { + for (address, reloc) in section.relocations() { + let reloc_target = match reloc.target() { + RelocationTarget::Symbol(idx) => { + module.symbol_by_index(idx).with_context(|| { + format!("Relocation against invalid symbol index {}", idx.0) + })? + } + reloc_target => bail!("Unsupported relocation target: {reloc_target:?}"), + }; + let (target_module_id, target_symbol) = if reloc_target.is_undefined() { + resolved += 1; + symbol_map + .get(reloc_target.name_bytes()?) + .map(|&(module_id, symbol_idx)| { + (module_id, modules[module_id].symbol_by_index(symbol_idx).unwrap()) + }) + .ok_or_else(|| { + anyhow!( + "Failed to find symbol {} in any module", + reloc_target.name().unwrap_or("[invalid]") + ) + })? + } else { + (module_id, reloc_target) + }; + relocations.push(RelReloc { + kind: to_obj_reloc_kind(reloc.kind())?, + section: section.index().0 as u8, + address: address as u32, + module_id: target_module_id as u32, + target_section: target_symbol.section_index().unwrap().0 as u8, + addend: target_symbol.address() as u32, + }); + } + } + } + + let duration = start.elapsed(); + info!( + "Symbol resolution completed in {}.{:03}s (resolved {} symbols)", + duration.as_secs(), + duration.subsec_millis(), + resolved + ); + + // Write RELs + let start = Instant::now(); + for (((module_id, module), path), relocations) in + modules.iter().enumerate().zip(&files).skip(1).zip(relocations) + { + let name = + path.file_stem().unwrap_or(OsStr::new("[unknown]")).to_str().unwrap_or("[invalid]"); + let _span = info_span!("module", name = %name).entered(); + let mut info = RelWriteInfo { + module_id: module_id as u32, + version: 3, + name_offset: None, + name_size: None, + align: None, + bss_align: None, + section_count: None, + }; + if let Some(existing_module) = existing_headers.get(&(module_id as u32)) { + info.version = existing_module.version; + info.name_offset = Some(existing_module.name_offset); + info.name_size = Some(existing_module.name_size); + info.align = existing_module.align; + info.bss_align = existing_module.bss_align; + info.section_count = Some(existing_module.num_sections as usize); + } + let rel_path = path.with_extension("rel"); + let mut w = buf_writer(&rel_path)?; + write_rel(&mut w, &info, module, relocations) + .with_context(|| format!("Failed to write '{}'", rel_path.display()))?; + w.flush()?; + } + let duration = start.elapsed(); + info!("RELs written in {}.{:03}s", duration.as_secs(), duration.subsec_millis()); + + let duration = total.elapsed(); + info!("Total time: {}.{:03}s", duration.as_secs(), duration.subsec_millis()); + Ok(()) +} + fn info(args: InfoArgs) -> Result<()> { let map = map_file(args.rel_file)?; - let rel = process_rel(map_reader(&map))?; - println!("Read REL module ID {}", rel.module_id); + let buf = decompress_if_needed(&map)?; + let (header, mut module_obj) = process_rel(&mut Reader::new(buf.as_ref()))?; + + let mut state = AnalyzerState::default(); + state.detect_functions(&module_obj)?; + FindRelCtorsDtors::execute(&mut state, &module_obj)?; + FindRelRodataData::execute(&mut state, &module_obj)?; + state.apply(&mut module_obj)?; + + apply_signatures(&mut module_obj)?; + apply_signatures_post(&mut module_obj)?; + + println!("REL module ID: {}", header.module_id); + println!("REL version: {}", header.version); + println!("Original section count: {}", header.num_sections); + println!("\nSections:"); + println!( + "{: >10} | {: <10} | {: <10} | {: <10} | {: <10}", + "Name", "Type", "Size", "File Off", "Index" + ); + for (_, section) in module_obj.sections.iter() { + let kind_str = match section.kind { + ObjSectionKind::Code => "code", + ObjSectionKind::Data => "data", + ObjSectionKind::ReadOnlyData => "rodata", + ObjSectionKind::Bss => "bss", + }; + println!( + "{: >10} | {: <10} | {: <#10X} | {: <#10X} | {: <10}", + section.name, kind_str, section.size, section.file_offset, section.elf_index + ); + } + println!("\nDiscovered symbols:"); + println!("{: >10} | {: <10} | {: <10} | {: <10}", "Section", "Address", "Size", "Name"); + for (_, symbol) in module_obj.symbols.iter_ordered() { + if symbol.name.starts_with('@') || is_auto_symbol(symbol) { + continue; + } + let section_str = if let Some(section) = symbol.section { + module_obj.sections[section].name.as_str() + } else { + "ABS" + }; + let size_str = if symbol.size_known { + format!("{:#X}", symbol.size).into_cow() + } else if symbol.section.is_none() { + "ABS".to_cow() + } else { + "?".to_cow() + }; + println!( + "{: >10} | {: <#10X} | {: <10} | {: <10}", + section_str, symbol.address, size_str, symbol.name + ); + } Ok(()) } @@ -94,7 +323,7 @@ fn merge(args: MergeArgs) -> Result<()> { for result in FileIterator::new(&args.rel_files)? { let (path, entry) = result?; log::info!("Loading {}", path.display()); - let obj = process_rel(entry.as_reader())?; + let (_, obj) = process_rel(&mut entry.as_reader())?; match module_map.entry(obj.module_id) { btree_map::Entry::Vacant(e) => e.insert(obj), btree_map::Entry::Occupied(_) => bail!("Duplicate module ID {}", obj.module_id), diff --git a/src/cmd/shasum.rs b/src/cmd/shasum.rs index af44bc5..5c7ddf3 100644 --- a/src/cmd/shasum.rs +++ b/src/cmd/shasum.rs @@ -6,6 +6,7 @@ use std::{ use anyhow::{anyhow, bail, Context, Result}; use argp::FromArgs; +use owo_colors::OwoColorize; use sha1::{Digest, Sha1}; use crate::util::file::{process_rsp, touch}; @@ -66,14 +67,17 @@ fn check(file: File) -> Result<()> { File::open(file_name).with_context(|| format!("Failed to open file '{file_name}'"))?; let found_hash = file_sha1(file)?; if hash_bytes == found_hash.as_ref() { - println!("{file_name}: OK"); + println!("{}: {}", file_name, "OK".green()); } else { - println!("{file_name}: FAILED"); + println!("{}: {}", file_name, "FAILED".red()); mismatches += 1; } } if mismatches != 0 { - eprintln!("WARNING: {mismatches} computed checksum did NOT match"); + eprintln!( + "{}", + format!("WARNING: {mismatches} computed checksum(s) did NOT match").yellow() + ); std::process::exit(1); } Ok(()) diff --git a/src/main.rs b/src/main.rs index e11d186..b932e47 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,8 @@ use std::{ffi::OsStr, path::PathBuf, str::FromStr}; use argp::{FromArgValue, FromArgs}; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::EnvFilter; pub mod analysis; pub mod argp_version; @@ -60,10 +62,10 @@ struct TopLevel { #[argp(option, short = 'C')] /// Change working directory. chdir: Option, - #[argp(option, short = 'L', default = "LogLevel::Info")] + #[argp(option, short = 'L')] /// Minimum logging level. (Default: info) /// Possible values: error, warn, info, debug, trace - log_level: LogLevel, + log_level: Option, /// Print version information and exit. #[argp(switch, short = 'V')] version: bool, @@ -86,11 +88,29 @@ enum SubCommand { } fn main() { - let format = tracing_subscriber::fmt::format().with_target(false).without_time(); - tracing_subscriber::fmt().event_format(format).init(); - // TODO reimplement log level selection - let args: TopLevel = argp_version::from_env(); + let format = tracing_subscriber::fmt::format().with_target(false).without_time(); + let builder = tracing_subscriber::fmt().event_format(format); + if let Some(level) = args.log_level { + builder + .with_max_level(match level { + LogLevel::Error => LevelFilter::ERROR, + LogLevel::Warn => LevelFilter::WARN, + LogLevel::Info => LevelFilter::INFO, + LogLevel::Debug => LevelFilter::DEBUG, + LogLevel::Trace => LevelFilter::TRACE, + }) + .init(); + } else { + builder + .with_env_filter( + EnvFilter::builder() + .with_default_directive(LevelFilter::INFO.into()) + .from_env_lossy(), + ) + .init(); + } + let mut result = Ok(()); if let Some(dir) = &args.chdir { result = std::env::set_current_dir(dir).map_err(|e| { diff --git a/src/obj/mod.rs b/src/obj/mod.rs index 5bdacb9..4cac4fe 100644 --- a/src/obj/mod.rs +++ b/src/obj/mod.rs @@ -18,7 +18,10 @@ pub use symbols::{ ObjSymbolScope, ObjSymbols, SymbolIndex, }; -use crate::util::{comment::MWComment, rel::RelReloc}; +use crate::{ + analysis::cfa::SectionAddress, + util::{comment::MWComment, rel::RelReloc}, +}; #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub enum ObjKind { @@ -63,12 +66,11 @@ pub struct ObjInfo { pub arena_hi: Option, // Extracted - pub named_sections: BTreeMap, pub link_order: Vec, - pub blocked_ranges: BTreeMap, // start -> end + pub blocked_ranges: BTreeMap, // start -> end // From extab - pub known_functions: BTreeMap, + pub known_functions: BTreeMap, // REL /// Module ID (0 for main) @@ -99,8 +101,6 @@ impl ObjInfo { db_stack_addr: None, arena_lo: None, arena_hi: None, - // splits: Default::default(), - named_sections: Default::default(), link_order: vec![], blocked_ranges: Default::default(), known_functions: Default::default(), @@ -276,6 +276,8 @@ impl ObjInfo { align: new_align, common: split.common, autogenerated: new_autogenerated, + skip: false, // ? + rename: None, // ? })?; return Ok(()); } diff --git a/src/obj/splits.rs b/src/obj/splits.rs index 88e61d9..7c0bb1b 100644 --- a/src/obj/splits.rs +++ b/src/obj/splits.rs @@ -15,6 +15,10 @@ pub struct ObjSplit { pub common: bool, /// Generated, replaceable by user. pub autogenerated: bool, + /// Skip when emitting the split object. + pub skip: bool, + /// Override the section name in the split object. (e.g. `.ctors$10`) + pub rename: Option, } /// Splits within a section. diff --git a/src/obj/symbols.rs b/src/obj/symbols.rs index fdc24cb..8dfbcd4 100644 --- a/src/obj/symbols.rs +++ b/src/obj/symbols.rs @@ -37,6 +37,8 @@ flags! { ForceActive, /// Symbol isn't referenced by any relocations RelocationIgnore, + /// Symbol won't be written to symbols file + NoWrite, } } @@ -78,6 +80,9 @@ impl ObjSymbolFlagSet { #[inline] pub fn is_relocation_ignore(&self) -> bool { self.0.contains(ObjSymbolFlags::RelocationIgnore) } + #[inline] + pub fn is_no_write(&self) -> bool { self.0.contains(ObjSymbolFlags::NoWrite) } + #[inline] pub fn set_scope(&mut self, scope: ObjSymbolScope) { match scope { @@ -196,7 +201,7 @@ impl ObjSymbols { self.at_section_address(section_index, in_symbol.address as u32).find(|(_, symbol)| { symbol.kind == in_symbol.kind || // Replace auto symbols with real symbols - (symbol.kind == ObjSymbolKind::Unknown && is_auto_symbol(&symbol.name)) + (symbol.kind == ObjSymbolKind::Unknown && is_auto_symbol(symbol)) }) } else if self.obj_kind == ObjKind::Executable { // TODO hmmm @@ -205,6 +210,7 @@ impl ObjSymbols { bail!("ABS symbol in relocatable object: {:?}", in_symbol); }; let target_symbol_idx = if let Some((symbol_idx, existing)) = opt { + let replace = replace || (is_auto_symbol(existing) && !is_auto_symbol(&in_symbol)); let size = if existing.size_known && in_symbol.size_known && existing.size != in_symbol.size { // TODO fix and promote back to warning diff --git a/src/util/asm.rs b/src/util/asm.rs index 9f80610..9778b90 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -4,7 +4,7 @@ use std::{ io::Write, }; -use anyhow::{anyhow, bail, ensure, Result}; +use anyhow::{anyhow, bail, ensure, Context, Result}; use itertools::Itertools; use ppc750cl::{disasm_iter, Argument, Ins, Opcode}; @@ -438,7 +438,8 @@ fn write_data( write_symbol_entry(w, symbols, entry)?; } current_symbol_kind = find_symbol_kind(current_symbol_kind, symbols, vec)?; - current_data_kind = find_data_kind(current_data_kind, symbols, vec)?; + current_data_kind = find_data_kind(current_data_kind, symbols, vec) + .with_context(|| format!("At address {:#010X}", sym_addr))?; entry = entry_iter.next(); } else if current_address > sym_addr { let dbg_symbols = vec.iter().map(|e| &symbols[e.index]).collect_vec(); @@ -550,10 +551,16 @@ fn find_data_kind( SymbolEntryKind::Start => { let new_kind = symbols[entry.index].data_kind; if !matches!(new_kind, ObjDataKind::Unknown) { - ensure!( - !found || new_kind == kind, - "Conflicting data kinds found: {kind:?} and {new_kind:?}" - ); + if found && new_kind != kind { + for entry in entries { + log::error!("Symbol {:?}", symbols[entry.index]); + } + bail!( + "Conflicting data kinds found: {kind:?} and {new_kind:?}", + kind = kind, + new_kind = new_kind + ); + } found = true; kind = new_kind; } diff --git a/src/util/comment.rs b/src/util/comment.rs index 6449951..25b6ccf 100644 --- a/src/util/comment.rs +++ b/src/util/comment.rs @@ -169,7 +169,7 @@ pub struct CommentSym { } impl CommentSym { - pub fn from(symbol: &ObjSymbol) -> Self { + pub fn from(symbol: &ObjSymbol, force_active: bool) -> Self { let align = match symbol.align { Some(align) => align, None => { @@ -196,8 +196,12 @@ impl CommentSym { vis_flags |= 0xD; } let mut active_flags = 0; - if symbol.flags.is_force_active() { - active_flags |= 0x8; // TODO what is 0x10? + if symbol.flags.is_force_active() + || (force_active + && matches!(symbol.kind, ObjSymbolKind::Function | ObjSymbolKind::Object) + && symbol.flags.is_global()) + { + active_flags |= 0x8; } Self { align, vis_flags, active_flags } } diff --git a/src/util/config.rs b/src/util/config.rs index e6e1fc0..0a23e6d 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -11,6 +11,7 @@ use once_cell::sync::Lazy; use regex::{Captures, Regex}; use crate::{ + analysis::cfa::SectionAddress, obj::{ ObjDataKind, ObjInfo, ObjKind, ObjSectionKind, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjUnit, @@ -118,7 +119,13 @@ pub fn parse_symbol_line(line: &str, obj: &mut ObjInfo) -> Result bail!("Unknown symbol attribute '{attr}'"), } @@ -133,7 +140,9 @@ pub fn parse_symbol_line(line: &str, obj: &mut ObjInfo) -> Result bool { - let _ = symbol; + if symbol.flags.is_no_write() { + return true; + } // symbol.name.starts_with("lbl_") // || symbol.name.starts_with("func_") // || symbol.name.starts_with("switch_") @@ -142,7 +151,9 @@ pub fn is_skip_symbol(symbol: &ObjSymbol) -> bool { false } -pub fn is_auto_symbol(name: &str) -> bool { name.starts_with("lbl_") || name.starts_with("fn_") } +pub fn is_auto_symbol(symbol: &ObjSymbol) -> bool { + symbol.name.starts_with("lbl_") || symbol.name.starts_with("fn_") +} #[inline] pub fn write_symbols_file>(path: P, obj: &ObjInfo) -> Result<()> { @@ -188,8 +199,10 @@ fn write_symbol(w: &mut W, obj: &ObjInfo, symbol: &ObjSymbol) -> Resul // if symbol.flags.is_force_active() { // write!(w, " force_active")?; // } - if obj.blocked_ranges.contains_key(&(symbol.address as u32)) { - write!(w, " noreloc")?; + if let Some(section) = symbol.section { + if obj.blocked_ranges.contains_key(&SectionAddress::new(section, symbol.address as u32)) { + write!(w, " noreloc")?; + } } writeln!(w)?; Ok(()) @@ -335,10 +348,11 @@ pub fn write_splits(w: &mut W, obj: &ObjInfo, all: bool) -> Result<()> if split.common { write!(w, " common")?; } - if let Some(name) = obj.named_sections.get(&addr) { - if name != §ion.name { - write!(w, " rename:{}", name)?; - } + if let Some(name) = &split.rename { + write!(w, " rename:{}", name)?; + } + if split.skip { + write!(w, " skip")?; } writeln!(w)?; } @@ -354,6 +368,7 @@ struct SplitSection { /// Whether this is a part of common BSS. common: bool, rename: Option, + skip: bool, } struct SplitUnit { @@ -443,6 +458,8 @@ fn parse_section_line(captures: Captures, state: &SplitState) -> Result Result section.start = parse_hex(value)?, - "end" => section.end = parse_hex(value)?, + "start" => start = Some(parse_hex(value)?), + "end" => end = Some(parse_hex(value)?), "align" => section.align = Some(u32::from_str(value)?), "rename" => section.rename = Some(value.to_string()), _ => bail!("Unknown split attribute '{attr}'"), @@ -469,11 +487,14 @@ fn parse_section_line(captures: Captures, state: &SplitState) -> Result section.skip = true, _ => bail!("Unknown split attribute '{attr}'"), } } } - if section.start > 0 && section.end > 0 { + if let (Some(start), Some(end)) = (start, end) { + section.start = start; + section.end = end; Ok(SplitLine::UnitSection(section)) } else { Err(anyhow!("Section '{}' missing start or end address", section.name)) @@ -531,7 +552,7 @@ pub fn apply_splits(r: R, obj: &mut ObjInfo) -> Result<()> { obj.sections.count() ); }; - if let Err(_) = obj_section.rename(name.clone()) { + if obj_section.rename(name.clone()).is_err() { // Manual section obj_section.kind = kind.ok_or_else(|| anyhow!("Section '{}' missing type", name))?; @@ -545,7 +566,15 @@ pub fn apply_splits(r: R, obj: &mut ObjInfo) -> Result<()> { } ( SplitState::Unit(unit), - SplitLine::UnitSection(SplitSection { name, start, end, align, common, rename }), + SplitLine::UnitSection(SplitSection { + name, + start, + end, + align, + common, + rename, + skip, + }), ) => { let (section_index, _) = match obj.sections.by_name(&name)? { Some(v) => Ok(v), @@ -573,10 +602,9 @@ pub fn apply_splits(r: R, obj: &mut ObjInfo) -> Result<()> { align, common, autogenerated: false, + skip, + rename, }); - if let Some(name) = rename { - obj.named_sections.insert(start, name); - } } _ => {} } diff --git a/src/util/dol.rs b/src/util/dol.rs index 24441e5..d4399ec 100644 --- a/src/util/dol.rs +++ b/src/util/dol.rs @@ -4,7 +4,7 @@ use anyhow::{anyhow, bail, ensure, Result}; use dol::{Dol, DolSection, DolSectionType}; use crate::{ - analysis::cfa::locate_sda_bases, + analysis::cfa::{locate_sda_bases, SectionAddress}, obj::{ ObjArchitecture, ObjInfo, ObjKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, @@ -406,8 +406,15 @@ pub fn process_dol>(path: P) -> Result { for entry in &eti_entries { // Add functions from extabindex entries as known function bounds - if let Some(old_value) = obj.known_functions.insert(entry.function, entry.function_size) - { + let (section_index, _) = obj.sections.at_address(entry.function).map_err(|_| { + anyhow!( + "Failed to locate section for function {:#010X} (referenced from extabindex entry {:#010X})", + entry.function, + entry.address, + ) + })?; + let addr = SectionAddress::new(section_index, entry.function); + if let Some(old_value) = obj.known_functions.insert(addr, entry.function_size) { if old_value != entry.function_size { log::warn!( "Conflicting sizes for {:#010X}: {:#X} != {:#X}", diff --git a/src/util/elf.rs b/src/util/elf.rs index 0db4ec7..8761bea 100644 --- a/src/util/elf.rs +++ b/src/util/elf.rs @@ -304,6 +304,8 @@ pub fn process_elf>(path: P) -> Result { align: None, common: false, // TODO autogenerated: false, + skip: false, + rename: None, }); } } @@ -483,13 +485,19 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { } } - // Add symbols - for (symbol, symbol_map) in obj.symbols.iter().zip(&mut symbol_map) { + // Add symbols, starting with local symbols + for (symbol_index, symbol) in obj + .symbols + .iter() + .enumerate() + .filter(|&(_, s)| s.flags.is_local()) + .chain(obj.symbols.iter().enumerate().filter(|&(_, s)| !s.flags.is_local())) + { if obj.kind == ObjKind::Relocatable && symbol.kind == ObjSymbolKind::Section { // We wrote section symbols above, so skip them here let section_index = symbol.section.ok_or_else(|| anyhow!("section symbol without section index"))?; - *symbol_map = Some(section_symbol_offset + section_index as u32); + symbol_map[symbol_index] = Some(section_symbol_offset + section_index as u32); continue; } @@ -536,9 +544,9 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { num_local = writer.symbol_count(); } out_symbols.push(OutSymbol { index, sym }); - *symbol_map = Some(index.0); + symbol_map[symbol_index] = Some(index.0); if let Some(comment_data) = &mut comment_data { - write_comment_sym(comment_data, CommentSym::from(symbol))?; + write_comment_sym(comment_data, CommentSym::from(symbol, true))?; } } @@ -807,14 +815,8 @@ fn to_obj_symbol( }) } -fn to_obj_reloc( - obj_file: &object::File<'_>, - symbol_indexes: &[Option], - section_data: &[u8], - address: u64, - reloc: Relocation, -) -> Result> { - let reloc_kind = match reloc.kind() { +pub fn to_obj_reloc_kind(kind: RelocationKind) -> Result { + Ok(match kind { RelocationKind::Absolute => ObjRelocKind::Absolute, RelocationKind::Elf(kind) => match kind { elf::R_PPC_ADDR16_LO => ObjRelocKind::PpcAddr16Lo, @@ -823,10 +825,20 @@ fn to_obj_reloc( elf::R_PPC_REL24 => ObjRelocKind::PpcRel24, elf::R_PPC_REL14 => ObjRelocKind::PpcRel14, elf::R_PPC_EMB_SDA21 => ObjRelocKind::PpcEmbSda21, - _ => bail!("Unhandled PPC relocation type: {kind}"), + _ => bail!("Unhandled ELF relocation type: {kind}"), }, - _ => bail!("Unhandled relocation type: {:?}", reloc.kind()), - }; + _ => bail!("Unhandled relocation type: {:?}", kind), + }) +} + +fn to_obj_reloc( + obj_file: &object::File<'_>, + symbol_indexes: &[Option], + section_data: &[u8], + address: u64, + reloc: Relocation, +) -> Result> { + let reloc_kind = to_obj_reloc_kind(reloc.kind())?; let symbol = match reloc.target() { RelocationTarget::Symbol(idx) => { obj_file.symbol_by_index(idx).context("Failed to locate relocation target symbol")? diff --git a/src/util/file.rs b/src/util/file.rs index 63a0acf..e4c20fb 100644 --- a/src/util/file.rs +++ b/src/util/file.rs @@ -1,4 +1,5 @@ use std::{ + borrow::Cow, fs::{DirBuilder, File, OpenOptions}, io::{BufRead, BufReader, BufWriter, Cursor, Read}, path::{Path, PathBuf}, @@ -10,7 +11,10 @@ use filetime::{set_file_mtime, FileTime}; use memmap2::{Mmap, MmapOptions}; use path_slash::PathBufExt; -use crate::util::{rarc, rarc::Node, yaz0}; +use crate::{ + cmd::shasum::file_sha1, + util::{rarc, rarc::Node, yaz0, IntoCow, ToCow}, +}; /// Opens a memory mapped file. pub fn map_file>(path: P) -> Result { @@ -25,7 +29,7 @@ pub type Reader<'a> = Cursor<&'a [u8]>; /// Creates a reader for the memory mapped file. #[inline] -pub fn map_reader(mmap: &Mmap) -> Reader { Cursor::new(&**mmap) } +pub fn map_reader(mmap: &Mmap) -> Reader { Reader::new(&**mmap) } /// Creates a buffered reader around a file (not memory mapped). pub fn buf_reader>(path: P) -> Result> { @@ -130,14 +134,6 @@ impl RarcIterator { } paths } - - fn decompress_if_needed(buf: &[u8]) -> Result> { - if buf.len() > 4 && buf[0..4] == *b"Yaz0" { - yaz0::decompress_file(&mut Cursor::new(buf)) - } else { - Ok(buf.to_vec()) - } - } } impl Iterator for RarcIterator { @@ -152,8 +148,8 @@ impl Iterator for RarcIterator { self.index += 1; let slice = &self.file[off as usize..off as usize + size as usize]; - match Self::decompress_if_needed(slice) { - Ok(buf) => Some(Ok((path, buf))), + match decompress_if_needed(slice) { + Ok(buf) => Some(Ok((path, buf.into_owned()))), Err(e) => Some(Err(e)), } } @@ -170,7 +166,7 @@ impl FileEntry { pub fn as_reader(&self) -> Reader { match self { Self::Map(map) => map_reader(map), - Self::Buffer(slice) => Cursor::new(slice), + Self::Buffer(slice) => Reader::new(slice), } } } @@ -257,3 +253,29 @@ pub fn touch>(path: P) -> std::io::Result<()> { } } } + +pub fn decompress_if_needed(buf: &[u8]) -> Result> { + Ok(if buf.len() > 4 && buf[0..4] == *b"Yaz0" { + yaz0::decompress_file(&mut Reader::new(buf))?.into_cow() + } else { + buf.to_cow() + }) +} + +pub fn verify_hash>(path: P, hash_str: &str) -> Result<()> { + let mut hash_bytes = [0u8; 20]; + hex::decode_to_slice(hash_str, &mut hash_bytes) + .with_context(|| format!("Invalid SHA-1 '{hash_str}'"))?; + let file = File::open(path.as_ref()) + .with_context(|| format!("Failed to open file '{}'", path.as_ref().display()))?; + let found_hash = file_sha1(file)?; + if found_hash.as_ref() == hash_bytes { + Ok(()) + } else { + Err(anyhow!( + "Hash mismatch: expected {}, but was {}", + hex::encode(hash_bytes), + hex::encode(found_hash) + )) + } +} diff --git a/src/util/lcf.rs b/src/util/lcf.rs index cb672a1..8c22ff1 100644 --- a/src/util/lcf.rs +++ b/src/util/lcf.rs @@ -56,7 +56,8 @@ pub fn generate_ldscript(obj: &ObjInfo, auto_force_files: bool) -> Result Resul let mut force_active = vec![]; for symbol in obj.symbols.iter() { - if symbol.flags.is_force_active() && symbol.flags.is_global() { + if symbol.flags.is_force_active() && symbol.flags.is_global() && !symbol.flags.is_no_write() + { force_active.push(symbol.name.clone()); } } diff --git a/src/util/map.rs b/src/util/map.rs index ea7c57b..4d80db9 100644 --- a/src/util/map.rs +++ b/src/util/map.rs @@ -565,7 +565,6 @@ pub fn apply_map_file>(path: P, obj: &mut ObjInfo) -> Result<()> pub fn apply_map(result: &MapInfo, obj: &mut ObjInfo) -> Result<()> { for (section_index, section) in obj.sections.iter_mut() { - log::info!("Section {}: {} ({:?})", section_index, section.name, result.sections); let opt = if obj.kind == ObjKind::Executable { result.sections.iter().find(|s| s.address == section.address as u32) } else { @@ -628,6 +627,8 @@ pub fn apply_map(result: &MapInfo, obj: &mut ObjInfo) -> Result<()> { align: None, common: false, autogenerated: false, + skip: false, + rename: None, }); } } diff --git a/src/util/mod.rs b/src/util/mod.rs index 5dd1897..0aac14d 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,3 +1,5 @@ +use std::{borrow::Cow, ops::Deref}; + pub mod asm; pub mod comment; pub mod config; @@ -39,3 +41,29 @@ macro_rules! array_ref_mut { to_array_mut(&mut $slice[$offset..$offset + $size]) }}; } + +pub trait IntoCow<'a, B> +where B: ToOwned + ?Sized +{ + fn into_cow(self) -> Cow<'a, B>; +} + +pub trait ToCow<'a, B> +where B: ToOwned + ?Sized +{ + fn to_cow(&'a self) -> Cow<'a, B>; +} + +impl<'a, O> IntoCow<'a, ::Target> for O +where + O: Deref + Clone + 'a, + ::Target: ToOwned, +{ + fn into_cow(self) -> Cow<'a, ::Target> { Cow::Owned(self) } +} + +impl<'a, B> ToCow<'a, B> for B +where B: ToOwned + ?Sized +{ + fn to_cow(&'a self) -> Cow<'a, B> { Cow::Borrowed(self) } +} diff --git a/src/util/rel.rs b/src/util/rel.rs index da9148a..ab1a512 100644 --- a/src/util/rel.rs +++ b/src/util/rel.rs @@ -1,15 +1,21 @@ -use std::io::Read; +use std::{ + cmp::Ordering, + io::{Read, Seek, Write}, +}; -use anyhow::{anyhow, bail, ensure, Result}; -use byteorder::{BigEndian, ReadBytesExt}; -use object::elf; +use anyhow::{anyhow, bail, ensure, Context, Result}; +use binrw::{binrw, io::NoSeek, BinRead, BinWrite}; +use itertools::Itertools; +use object::{elf, Object, ObjectSection, ObjectSymbol}; +use tracing::warn; use crate::{ + array_ref_mut, obj::{ ObjArchitecture, ObjInfo, ObjKind, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, }, - util::file::Reader, + util::{file::Reader, IntoCow}, }; /// Do not relocate anything, but accumulate the offset field for the next relocation offset calculation. @@ -24,48 +30,130 @@ pub const R_DOLPHIN_END: u32 = 203; #[allow(unused)] pub const R_DOLPHIN_MRKREF: u32 = 204; -pub fn process_rel(mut reader: Reader) -> Result { - let module_id = reader.read_u32::()?; - ensure!(reader.read_u32::()? == 0, "Expected 'next' to be 0"); - ensure!(reader.read_u32::()? == 0, "Expected 'prev' to be 0"); - let num_sections = reader.read_u32::()?; - let section_info_offset = reader.read_u32::()?; - let _name_offset = reader.read_u32::()?; - let _name_size = reader.read_u32::()?; - let version = reader.read_u32::()?; - ensure!(matches!(version, 1..=3), "Unsupported REL version {}", version); - let bss_size = reader.read_u32::()?; - let rel_offset = reader.read_u32::()?; - let imp_offset = reader.read_u32::()?; - let imp_size = reader.read_u32::()?; - let prolog_section = reader.read_u8()?; - let epilog_section = reader.read_u8()?; - let unresolved_section = reader.read_u8()?; - ensure!(reader.read_u8()? == 0, "Expected 'bssSection' to be 0"); - let prolog_offset = reader.read_u32::()?; - let epilog_offset = reader.read_u32::()?; - let unresolved_offset = reader.read_u32::()?; - let (align, bss_align) = if version >= 2 { - let align = reader.read_u32::()?; - let bss_align = reader.read_u32::()?; - (Some(align), Some(bss_align)) - } else { - (None, None) - }; - let fix_size = if version >= 3 { Some(reader.read_u32::()?) } else { None }; +#[binrw] +#[derive(Clone, Debug)] +#[br(assert(next == 0))] +#[br(assert(prev == 0))] +#[br(assert(bss_section == 0))] +#[brw(assert(matches!(version, 1..=3), "Unsupported REL version {version}"))] +pub struct RelHeader { + /// Arbitrary identification number. + /// Must be unique amongst all RELs used by a game. + /// 0 is reserved for the DOL. + pub module_id: u32, + /// Pointer to next module. + /// Filled at runtime. + #[bw(calc = 0)] + pub next: u32, + /// Pointer to previous module. + /// Filled at runtime. + #[bw(calc = 0)] + pub prev: u32, + /// Number of sections in the file. + pub num_sections: u32, + /// Offset to the start of the section table. + pub section_info_offset: u32, + /// Offset in the external module name string table file. + pub name_offset: u32, + /// Size of the module name in bytes. + pub name_size: u32, + /// Version number of the REL file format. + pub version: u32, + /// Size of the `.bss` section. + pub bss_size: u32, + /// Offset to the start of the relocation table. + pub rel_offset: u32, + /// Offset to the start of the import table. + pub imp_offset: u32, + /// Size of the import table. + pub imp_size: u32, + /// Section containing the `_prolog` function. + pub prolog_section: u8, + /// Section containing the `_epilog` function. + pub epilog_section: u8, + /// Section containing the `_unresolved` function. + pub unresolved_section: u8, + /// Index into section table which bss is relative to. + /// Filled at runtime. + #[bw(calc = 0)] + pub bss_section: u8, + /// Offset into the section containing `_prolog`. + pub prolog_offset: u32, + /// Offset into the section containing `_epilog`. + pub epilog_offset: u32, + /// Offset into the section containing `_unresolved`. + pub unresolved_offset: u32, + /// (Version >= 2 only) + /// Alignment constraint on all sections. + #[br(if(version >= 2))] + #[bw(if(*version >= 2))] + pub align: Option, + /// (Version >= 2 only) + /// Alignment constraint on the `.bss` section. + #[br(if(version >= 2))] + #[bw(if(*version >= 2))] + pub bss_align: Option, + /// (Version >= 3 only) + /// If REL is linked with `OSLinkFixed` (instead of `OSLink`), the + /// space after this offset can be used for other purposes, like BSS. + #[br(if(version >= 3))] + #[bw(if(*version >= 3))] + pub fix_size: Option, +} - let mut sections = Vec::with_capacity(num_sections as usize); - reader.set_position(section_info_offset as u64); +#[binrw] +#[derive(Copy, Clone, Debug)] +struct RelImport { + module_id: u32, + offset: u32, +} + +#[binrw] +#[derive(Copy, Clone, Debug)] +struct RelSectionHeader { + offset_and_flags: u32, + size: u32, +} + +impl RelSectionHeader { + fn new(offset: u32, size: u32, exec: bool) -> Self { + Self { offset_and_flags: offset | (exec as u32), size } + } + + fn offset(&self) -> u32 { self.offset_and_flags & !1 } + + fn size(&self) -> u32 { self.size } + + fn exec(&self) -> bool { self.offset_and_flags & 1 != 0 } +} + +#[binrw] +#[derive(Copy, Clone, Debug)] +struct RelRelocRaw { + offset: u16, + kind: u8, + section: u8, + addend: u32, +} + +pub fn process_rel_header(reader: &mut Reader) -> Result { + RelHeader::read_be(reader).context("Failed to read REL header") +} + +pub fn process_rel(reader: &mut Reader) -> Result<(RelHeader, ObjInfo)> { + let header = process_rel_header(reader)?; + let mut sections = Vec::with_capacity(header.num_sections as usize); + reader.set_position(header.section_info_offset as u64); let mut found_text = false; let mut total_bss_size = 0; - for idx in 0..num_sections { - let offset = reader.read_u32::()?; - let size = reader.read_u32::()?; + for idx in 0..header.num_sections { + let section = RelSectionHeader::read_be(reader) + .with_context(|| format!("Failed to read REL section header {}", idx))?; + let offset = section.offset(); + let size = section.size(); if size == 0 { continue; } - let exec = (offset & 1) == 1; - let offset = offset & !3; let data = if offset == 0 { vec![] @@ -73,18 +161,18 @@ pub fn process_rel(mut reader: Reader) -> Result { let position = reader.position(); reader.set_position(offset as u64); let mut data = vec![0u8; size as usize]; - reader.read_exact(&mut data)?; + reader.read_exact(&mut data).with_context(|| { + format!("Failed to read REL section {} data with size {:#X}", idx, size) + })?; reader.set_position(position); data }; - // println!("Section {} offset {:#X} size {:#X}", idx, offset, size); - let (name, kind, section_known) = if offset == 0 { ensure!(total_bss_size == 0, "Multiple BSS sections in REL"); total_bss_size = size; (".bss".to_string(), ObjSectionKind::Bss, true) - } else if exec { + } else if section.exec() { ensure!(!found_text, "Multiple text sections in REL"); found_text = true; (".text".to_string(), ObjSectionKind::Code, true) @@ -98,8 +186,8 @@ pub fn process_rel(mut reader: Reader) -> Result { size: size as u64, data, align: match offset { - 0 => bss_align, - _ => align, + 0 => header.bss_align, + _ => header.align, } .unwrap_or_default() as u64, elf_index: idx as usize, @@ -111,10 +199,10 @@ pub fn process_rel(mut reader: Reader) -> Result { }); } ensure!( - total_bss_size == bss_size, + total_bss_size == header.bss_size, "Mismatched BSS size: {:#X} != {:#X}", total_bss_size, - bss_size + header.bss_size ); let mut symbols = Vec::new(); @@ -141,49 +229,45 @@ pub fn process_rel(mut reader: Reader) -> Result { } Ok(()) }; - add_symbol(prolog_section, prolog_offset, "_prolog")?; - add_symbol(epilog_section, epilog_offset, "_epilog")?; - add_symbol(unresolved_section, unresolved_offset, "_unresolved")?; + add_symbol(header.prolog_section, header.prolog_offset, "_prolog")?; + add_symbol(header.epilog_section, header.epilog_offset, "_epilog")?; + add_symbol(header.unresolved_section, header.unresolved_offset, "_unresolved")?; let mut unresolved_relocations = Vec::new(); let mut imp_idx = 0; - let imp_end = (imp_offset + imp_size) as u64; - reader.set_position(imp_offset as u64); + let imp_end = (header.imp_offset + header.imp_size) as u64; + reader.set_position(header.imp_offset as u64); while reader.position() < imp_end { - let reloc_module_id = reader.read_u32::()?; - let reloc_offset = reader.read_u32::()?; + let import = RelImport::read_be(reader)?; if imp_idx == 0 { ensure!( - reloc_offset == rel_offset, + import.offset == header.rel_offset, "imp index 0 offset mismatch: {:#X} != {:#X}", - reloc_offset, - rel_offset + import.offset, + header.rel_offset ); } imp_idx += 1; - if reloc_module_id == module_id { - if let Some(fix_size) = fix_size { + if import.module_id == header.module_id { + if let Some(fix_size) = header.fix_size { ensure!( - fix_size == reloc_offset, + fix_size == import.offset, "fix_size mismatch: {:#X} != {:#X}", fix_size, - reloc_offset + import.offset ); } } let position = reader.position(); - reader.set_position(reloc_offset as u64); + reader.set_position(import.offset as u64); let mut address = 0u32; let mut section = u8::MAX; loop { - let offset = reader.read_u16::()?; - let type_id = reader.read_u8()? as u32; - let target_section = reader.read_u8()?; - let addend = reader.read_u32::()?; - let kind = match type_id { + let reloc = RelRelocRaw::read_be(reader)?; + let kind = match reloc.kind as u32 { elf::R_PPC_NONE => continue, elf::R_PPC_ADDR32 | elf::R_PPC_UADDR32 => ObjRelocKind::Absolute, // elf::R_PPC_ADDR24 => ObjRelocKind::PpcAddr24, @@ -199,36 +283,33 @@ pub fn process_rel(mut reader: Reader) -> Result { // elf::R_PPC_REL14_BRTAKEN => ObjRelocKind::PpcRel14BrTaken, // elf::R_PPC_REL14_BRNTAKEN => ObjRelocKind::PpcRel14BrnTaken, R_DOLPHIN_NOP => { - address += offset as u32; + address += reloc.offset as u32; continue; } R_DOLPHIN_SECTION => { address = 0; - section = target_section; + section = reloc.section; continue; } R_DOLPHIN_END => break, // R_DOLPHIN_MRKREF => ? reloc_type => bail!("Unhandled REL relocation type {reloc_type}"), }; - address += offset as u32; - unresolved_relocations.push(RelReloc { + address += reloc.offset as u32; + let reloc = RelReloc { kind, section, address: address & !3, - module_id: reloc_module_id, - target_section, - addend, - }); + module_id: import.module_id, + target_section: reloc.section, + addend: reloc.addend, + }; + unresolved_relocations.push(reloc); } reader.set_position(position); } - // let name = match name_offset { - // 0 => String::new(), - // _ => read_string(&mut reader, name_offset as u64, name_size as usize).unwrap_or_default(), - // }; - log::debug!("Read REL ID {module_id}"); + log::debug!("Read REL ID {}", header.module_id); let mut obj = ObjInfo::new( ObjKind::Relocatable, ObjArchitecture::PowerPc, @@ -236,9 +317,9 @@ pub fn process_rel(mut reader: Reader) -> Result { symbols, sections, ); - obj.module_id = module_id; + obj.module_id = header.module_id; obj.unresolved_relocations = unresolved_relocations; - Ok(obj) + Ok((header, obj)) } /// REL relocation. @@ -258,3 +339,342 @@ pub struct RelReloc { /// If target module ID is 0 (DOL), this is an absolute address. pub addend: u32, } + +#[inline] +fn reloc_can_be_applied(_module_id: u32, rel_reloc: &RelReloc) -> bool { + matches!(rel_reloc.kind, ObjRelocKind::PpcRel24 | ObjRelocKind::PpcRel14) +} + +#[inline] +fn skip_reloc(module_id: u32, rel_reloc: &RelReloc) -> bool { + rel_reloc.module_id == module_id + && rel_reloc.section == rel_reloc.target_section + && matches!(rel_reloc.kind, ObjRelocKind::PpcRel24 | ObjRelocKind::PpcRel14) +} + +fn apply_relocation( + data: &mut [u8], + module_id: u32, + rel_reloc: &RelReloc, + unresolved: u32, +) -> Result<()> { + let diff = if rel_reloc.module_id == module_id && rel_reloc.section == rel_reloc.target_section + { + rel_reloc.addend as i32 - rel_reloc.address as i32 + } else { + unresolved as i32 - rel_reloc.address as i32 + }; + let ins_ref = array_ref_mut!(data, rel_reloc.address as usize, 4); + let mut ins = u32::from_be_bytes(*ins_ref); + match rel_reloc.kind { + ObjRelocKind::PpcRel24 => { + ensure!((-0x2000000..0x2000000).contains(&diff), "R_PPC_REL24 relocation out of range"); + ins = (ins & !0x3fffffc) | (diff as u32 & 0x3fffffc); + } + ObjRelocKind::PpcRel14 => { + ensure!((-0x2000..0x2000).contains(&diff), "R_PPC_REL14 relocation out of range"); + ins = (ins & !0xfffc) | (diff as u32 & 0xfffc); + } + kind => bail!("Unsupported relocation kind {:?}", kind), + } + *ins_ref = ins.to_be_bytes(); + Ok(()) +} + +#[derive(Clone, Debug)] +pub struct RelWriteInfo { + /// REL module ID. + pub module_id: u32, + /// REL version. + pub version: u32, + /// Override `name_offset` in the REL header. + /// Useful for matching RELs without the original string table. + pub name_offset: Option, + /// Override `name_size` in the REL header. + /// Useful for matching RELs without the original string table. + pub name_size: Option, + /// Override `align` in the REL header. + pub align: Option, + /// Override `bss_align` in the REL header. + pub bss_align: Option, + /// Override the number of sections in the file. + /// Useful for matching RELs that included debug sections. + pub section_count: Option, +} + +const PERMITTED_SECTIONS: [&str; 7] = + [".init", ".text", ".ctors", ".dtors", ".rodata", ".data", ".bss"]; + +pub fn should_write_section(section: &object::Section) -> bool { + matches!(section.name(), Ok(name) if PERMITTED_SECTIONS.contains(&name)) + && section.kind() != object::SectionKind::UninitializedData +} + +pub fn write_rel( + w: &mut W, + info: &RelWriteInfo, + file: &object::File, + mut relocations: Vec, +) -> Result<()> { + relocations.sort_by(|a, b| { + if a.module_id == 0 { + if b.module_id == 0 { + Ordering::Equal + } else { + Ordering::Greater + } + } else if a.module_id == info.module_id { + if b.module_id == 0 { + Ordering::Less + } else if b.module_id == info.module_id { + Ordering::Equal + } else { + Ordering::Greater + } + } else if b.module_id == 0 || b.module_id == info.module_id { + Ordering::Less + } else { + a.module_id.cmp(&b.module_id) + } + .then(a.section.cmp(&b.section)) + .then(a.address.cmp(&b.address)) + }); + + let mut apply_relocations = vec![]; + relocations.retain(|r| { + if !should_write_section( + &file.section_by_index(object::SectionIndex(r.section as usize)).unwrap(), + ) { + return false; + } + if reloc_can_be_applied(info.module_id, r) { + apply_relocations.push(r.clone()); + !skip_reloc(info.module_id, r) + } else { + true + } + }); + + let mut align = + file.sections().filter(should_write_section).map(|s| s.align() as u32).max().unwrap_or(0); + let bss = file.sections().find(|s| s.name() == Ok(".bss")); + let mut bss_align = bss.as_ref().map(|s| s.align() as u32).unwrap_or(1); + let mut num_sections = file.sections().count() as u32; + + // Apply overrides + if let Some(section_count) = info.section_count { + if section_count != num_sections as usize { + warn!(from = num_sections, to = section_count, "Overriding section count"); + } + num_sections = section_count as u32; + } + if info.version >= 2 { + if let Some(align_override) = info.align { + if align_override != align { + warn!(from = align, to = align_override, "Overriding alignment"); + } + align = align_override; + } + if let Some(bss_align_override) = info.bss_align { + if bss_align_override != bss_align { + warn!(from = bss_align, to = bss_align_override, "Overriding BSS alignment"); + } + bss_align = bss_align_override; + } + } + + let mut header = RelHeader { + module_id: info.module_id, + num_sections, + section_info_offset: match info.version { + 1 => 0x40, + 2 => 0x48, + 3 => 0x4C, + _ => bail!("Unsupported REL version {}", info.version), + }, + name_offset: info.name_offset.unwrap_or(0), + name_size: info.name_size.unwrap_or(0), + version: info.version, + bss_size: bss.as_ref().map(|s| s.size() as u32).unwrap_or(0), + rel_offset: 0, + imp_offset: 0, + imp_size: 0, + prolog_section: 0, + epilog_section: 0, + unresolved_section: 0, + prolog_offset: 0, + epilog_offset: 0, + unresolved_offset: 0, + align: if info.version >= 2 { Some(align) } else { None }, + bss_align: if info.version >= 2 { Some(bss_align) } else { None }, + fix_size: None, + }; + let mut offset = header.section_info_offset; + offset += num_sections * 8; + let section_data_offset = offset; + for section in file.sections().filter(should_write_section) { + let align = section.align() as u32 - 1; + offset = (offset + align) & !align; + offset += section.size() as u32; + } + header.imp_offset = offset; + let imp_count = relocations.iter().map(|r| r.module_id).dedup().count(); + header.imp_size = imp_count as u32 * 8; + offset += header.imp_size; + header.rel_offset = offset; + + let mut imp_entries = Vec::::with_capacity(imp_count); + let mut raw_relocations = vec![]; + { + let mut address = 0u32; + let mut section = u8::MAX; + let mut last_module_id = u32::MAX; + for reloc in &relocations { + if reloc.module_id != last_module_id { + if last_module_id != u32::MAX { + raw_relocations.push(RelRelocRaw { + offset: 0, + kind: R_DOLPHIN_END as u8, + section: 0, + addend: 0, + }); + offset += 8; + } + imp_entries.push(RelImport { module_id: reloc.module_id, offset }); + section = u8::MAX; + last_module_id = reloc.module_id; + } + if info.version >= 3 + && header.fix_size.is_none() + && (reloc.module_id == 0 || reloc.module_id == info.module_id) + { + header.fix_size = Some(offset); + } + if reloc.section != section { + raw_relocations.push(RelRelocRaw { + offset: 0, + kind: R_DOLPHIN_SECTION as u8, + section: reloc.section, + addend: 0, + }); + offset += 8; + address = 0; + section = reloc.section; + } + let mut reloc_offset = reloc.address - address; + while reloc_offset > 0xffff { + raw_relocations.push(RelRelocRaw { + offset: 0xffff, + kind: R_DOLPHIN_NOP as u8, + section: 0, + addend: 0, + }); + offset += 8; + reloc_offset -= 0xffff; + } + raw_relocations.push(RelRelocRaw { + offset: reloc_offset as u16, + kind: match reloc.kind { + ObjRelocKind::Absolute => elf::R_PPC_ADDR32, + ObjRelocKind::PpcAddr16Lo => elf::R_PPC_ADDR16_LO, + ObjRelocKind::PpcAddr16Hi => elf::R_PPC_ADDR16_HI, + ObjRelocKind::PpcAddr16Ha => elf::R_PPC_ADDR16_HA, + ObjRelocKind::PpcRel24 => elf::R_PPC_REL24, + ObjRelocKind::PpcRel14 => elf::R_PPC_REL14, + _ => bail!("Unsupported relocation kind {:?}", reloc.kind), + } as u8, + section: reloc.target_section, + addend: reloc.addend, + }); + address = reloc.address; + offset += 8; + } + } + raw_relocations.push(RelRelocRaw { + offset: 0, + kind: R_DOLPHIN_END as u8, + section: 0, + addend: 0, + }); + offset += 8; + + for symbol in file.symbols().filter(|s| s.is_definition()) { + let Some(symbol_section) = symbol.section_index() else { + continue; + }; + match symbol.name() { + Ok("_prolog") => { + header.prolog_section = symbol_section.0 as u8; + header.prolog_offset = symbol.address() as u32; + } + Ok("_epilog") => { + header.epilog_section = symbol_section.0 as u8; + header.epilog_offset = symbol.address() as u32; + } + Ok("_unresolved") => { + header.unresolved_section = symbol_section.0 as u8; + header.unresolved_offset = symbol.address() as u32; + } + _ => {} + } + } + + let mut w = NoSeek::new(w); + header.write_be(&mut w)?; + ensure!(w.stream_position()? as u32 == header.section_info_offset); + let mut current_data_offset = section_data_offset; + for section_index in 0..num_sections { + let Ok(section) = file.section_by_index(object::SectionIndex(section_index as usize)) + else { + RelSectionHeader::new(0, 0, false).write_be(&mut w)?; + continue; + }; + if matches!(section.name(), Ok(name) if PERMITTED_SECTIONS.contains(&name)) { + let mut offset = 0; + if section.kind() != object::SectionKind::UninitializedData { + let align = section.align() as u32 - 1; + current_data_offset = (current_data_offset + align) & !align; + offset = current_data_offset; + current_data_offset += section.size() as u32; + } + RelSectionHeader::new( + offset, + section.size() as u32, + section.kind() == object::SectionKind::Text, + ) + .write_be(&mut w)?; + } else { + RelSectionHeader::new(0, 0, false).write_be(&mut w)?; + } + } + ensure!(w.stream_position()? as u32 == section_data_offset); + for section in file.sections().filter(should_write_section) { + fn calculate_padding(position: u64, align: u64) -> u64 { + let align = align - 1; + ((position + align) & !align) - position + } + let position = w.stream_position()?; + w.write_all(&vec![0; calculate_padding(position, section.align()) as usize])?; + + let section_index = section.index().0 as u8; + let mut section_data = section.uncompressed_data()?; + if apply_relocations.iter().any(|r| r.section == section_index) { + let mut data = section_data.into_owned(); + for reloc in apply_relocations.iter().filter(|r| r.section == section_index) { + apply_relocation(&mut data, info.module_id, reloc, header.unresolved_offset)?; + } + section_data = data.into_cow(); + } + w.write_all(§ion_data)?; + } + ensure!(w.stream_position()? as u32 == header.imp_offset); + for entry in imp_entries { + entry.write_be(&mut w)?; + } + ensure!(w.stream_position()? as u32 == header.rel_offset); + for reloc in raw_relocations { + reloc.write_be(&mut w)?; + } + ensure!(w.stream_position()? as u32 == offset); + Ok(()) +} diff --git a/src/util/split.rs b/src/util/split.rs index 7d20864..7271362 100644 --- a/src/util/split.rs +++ b/src/util/split.rs @@ -1,5 +1,5 @@ use std::{ - cmp::min, + cmp::{min, Ordering}, collections::{BTreeMap, HashMap, HashSet}, }; @@ -85,6 +85,8 @@ fn split_ctors_dtors(obj: &mut ObjInfo, start: SectionAddress, end: SectionAddre align: None, common: false, autogenerated: true, + skip: false, + rename: None, }); } if function_split.is_none() { @@ -95,6 +97,8 @@ fn split_ctors_dtors(obj: &mut ObjInfo, start: SectionAddress, end: SectionAddre align: None, common: false, autogenerated: true, + skip: false, + rename: None, }); } } @@ -263,6 +267,8 @@ fn split_extabindex(obj: &mut ObjInfo, start: SectionAddress) -> Result<()> { align: None, common: false, autogenerated: true, + skip: false, + rename: None, }); } if extab_split.is_none() { @@ -274,6 +280,8 @@ fn split_extabindex(obj: &mut ObjInfo, start: SectionAddress) -> Result<()> { align: None, common: false, autogenerated: true, + skip: false, + rename: None, }); } if function_split.is_none() { @@ -285,6 +293,8 @@ fn split_extabindex(obj: &mut ObjInfo, start: SectionAddress) -> Result<()> { align: None, common: false, autogenerated: true, + skip: false, + rename: None, }); } } @@ -374,6 +384,8 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { align: None, common: false, autogenerated: true, + skip: false, + rename: None, }); current_address = new_split_end; continue; @@ -485,6 +497,131 @@ fn validate_splits(obj: &ObjInfo) -> Result<()> { Ok(()) } +/// Add padding symbols to fill in gaps between splits and symbols. +fn add_padding_symbols(obj: &mut ObjInfo) -> Result<()> { + for (section_index, section, addr, _split) in obj.sections.all_splits() { + if section.name == ".ctors" || section.name == ".dtors" { + continue; + } + + if obj + .symbols + .kind_at_section_address(section_index, addr, match section.kind { + ObjSectionKind::Code => ObjSymbolKind::Function, + ObjSectionKind::Data => ObjSymbolKind::Object, + ObjSectionKind::ReadOnlyData => ObjSymbolKind::Object, + ObjSectionKind::Bss => ObjSymbolKind::Object, + })? + .is_none() + { + let next_symbol_address = obj + .symbols + .for_section_range(section_index, addr + 1..) + .find(|&(_, s)| s.size_known && s.size > 0) + .map(|(_, s)| s.address) + .unwrap_or(section.address + section.size); + let symbol_name = format!( + "pad_{:02}_{:08X}_{}", + section_index, + addr, + section.name.trim_start_matches('.') + ); + log::debug!("Adding padding symbol {} at {:#010X}", symbol_name, addr); + obj.symbols.add_direct(ObjSymbol { + name: symbol_name, + demangled_name: None, + address: addr as u64, + section: Some(section_index), + size: next_symbol_address - addr as u64, + size_known: true, + flags: ObjSymbolFlagSet( + ObjSymbolFlags::Local | ObjSymbolFlags::ForceActive | ObjSymbolFlags::NoWrite, + ), + kind: match section.kind { + ObjSectionKind::Code => ObjSymbolKind::Function, + ObjSectionKind::Data | ObjSectionKind::ReadOnlyData | ObjSectionKind::Bss => { + ObjSymbolKind::Object + } + }, + align: None, + data_kind: Default::default(), + })?; + } + } + + // Add padding symbols for gaps between symbols + for (section_index, section) in obj.sections.iter() { + if section.name == ".ctors" || section.name == ".dtors" { + continue; + } + + let mut to_add = vec![]; + let mut iter = obj + .symbols + .for_section(section_index) + .filter(|(_, s)| s.size_known && s.size > 0) + .peekable(); + while let (Some((_, symbol)), Some(&(_, next_symbol))) = (iter.next(), iter.peek()) { + let aligned_end = + align_up((symbol.address + symbol.size) as u32, next_symbol.align.unwrap_or(1)); + match aligned_end.cmp(&(next_symbol.address as u32)) { + Ordering::Less => { + let symbol_name = format!( + "gap_{:02}_{:08X}_{}", + section_index, + aligned_end, + section.name.trim_start_matches('.') + ); + log::debug!("Adding gap symbol {} at {:#010X}", symbol_name, aligned_end); + to_add.push(ObjSymbol { + name: symbol_name, + demangled_name: None, + address: aligned_end as u64, + section: Some(section_index), + size: next_symbol.address - aligned_end as u64, + size_known: true, + flags: ObjSymbolFlagSet( + ObjSymbolFlags::Global + | ObjSymbolFlags::ForceActive + | ObjSymbolFlags::NoWrite, + ), + kind: match section.kind { + ObjSectionKind::Code => ObjSymbolKind::Function, + ObjSectionKind::Data + | ObjSectionKind::ReadOnlyData + | ObjSectionKind::Bss => ObjSymbolKind::Object, + }, + align: None, + data_kind: Default::default(), + }); + } + Ordering::Equal => {} + Ordering::Greater => { + bail!( + "Symbol {} ({:#010X}..{:#010X}) overlaps with symbol {} ({:#010X}..{:#010X}, align {})", + symbol.name, + symbol.address, + symbol.address + symbol.size, + next_symbol.name, + next_symbol.address, + next_symbol.address + next_symbol.size, + next_symbol.align.unwrap_or(1) + ); + } + } + } + drop(iter); + + for symbol in to_add { + obj.symbols.add_direct(symbol)?; + } + } + Ok(()) +} + +#[inline] +const fn align_up(value: u32, align: u32) -> u32 { (value + (align - 1)) & !(align - 1) } + /// Perform any necessary adjustments to allow relinking. /// This includes: /// - Ensuring .ctors & .dtors entries are split with their associated function @@ -526,6 +663,9 @@ pub fn update_splits(obj: &mut ObjInfo, common_start: Option) -> Result<()> // Ensure splits don't overlap symbols or each other validate_splits(obj)?; + // Add symbols to beginning of any split that doesn't start with a symbol + add_padding_symbols(obj)?; + // Resolve link order obj.link_order = resolve_link_order(obj)?; @@ -677,6 +817,12 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { file_end = min(next_addr, section_end); } + // Skip over this data + if split.skip { + current_address = file_end; + continue; + } + let file = name_to_obj .get(&split.unit) .and_then(|&idx| objects.get_mut(idx)) @@ -792,13 +938,8 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { ..(file_end.address as u64 - section.address) as usize] .to_vec(), }; - let name = if let Some(name) = obj.named_sections.get(¤t_address.address) { - name.clone() - } else { - section.name.clone() - }; file.sections.push(ObjSection { - name, + name: split.rename.as_ref().unwrap_or(§ion.name).clone(), kind: section.kind, address: 0, size: file_end.address as u64 - current_address.address as u64, @@ -1032,7 +1173,6 @@ pub fn end_for_section(obj: &ObjInfo, section_index: usize) -> Result