From 9cafb77d3fa6a918b894c69d9c56684514475da1 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Sun, 1 Jun 2025 20:22:08 -0600 Subject: [PATCH] Add `dtk extab clean` & config.yml `clean_extab` It was discovered that certain extab actions contain uninitalized data from the compiler. This provides a way to zero out uninitialized data in DOL or object files. Usage: `dtk extab clean input.dol output.dol` A `clean_extab` setting was added to config.yml, so projects can link the cleaned objects and target the cleaned DOL hash. --- Cargo.lock | 72 +++++++++++++++++++------------ Cargo.toml | 4 +- src/cmd/dol.rs | 46 ++++++++++---------- src/cmd/elf2dol.rs | 86 +++---------------------------------- src/cmd/extab.rs | 69 ++++++++++++++++++++++++++++++ src/cmd/mod.rs | 1 + src/main.rs | 2 + src/util/dol.rs | 104 +++++++++++++++++++++++++++++++++++++++++++-- src/util/elf.rs | 21 ++++++--- src/util/extab.rs | 51 ++++++++++++++++++++++ src/util/mod.rs | 1 + src/util/reader.rs | 12 ++++++ 12 files changed, 331 insertions(+), 138 deletions(-) create mode 100644 src/cmd/extab.rs create mode 100644 src/util/extab.rs diff --git a/Cargo.lock b/Cargo.lock index bb2e99e..27939e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -339,16 +339,16 @@ checksum = "c2e06f9bce634a3c898eb1e5cb949ff63133cbb218af93cc9b38b31d6f3ea285" [[package]] name = "cwextab" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "003567b96ff9d8ac3275831650385891bca370092937be625157778b1e58f755" +checksum = "701f6867c92e1b64ddcc4b416194be3121b8f7ba5352a70ed5fd3295a7d8e0e1" dependencies = [ - "thiserror", + "thiserror 2.0.12", ] [[package]] name = "decomp-toolkit" -version = "1.5.2" +version = "1.6.0" dependencies = [ "aes", "anyhow", @@ -873,7 +873,7 @@ dependencies = [ "miniz_oxide", "rayon", "sha1", - "thiserror", + "thiserror 1.0.64", "zerocopy", "zstd", ] @@ -946,7 +946,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1110,7 +1110,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" dependencies = [ "proc-macro2", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1124,9 +1124,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.88" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -1158,7 +1158,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.79", + "syn 2.0.101", "tempfile", ] @@ -1172,7 +1172,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1374,7 +1374,7 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1385,7 +1385,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1408,7 +1408,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1527,7 +1527,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1549,7 +1549,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1584,9 +1584,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.79" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -1609,7 +1609,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "thiserror", + "thiserror 1.0.64", "walkdir", ] @@ -1632,7 +1632,16 @@ version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.64", +] + +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl 2.0.12", ] [[package]] @@ -1643,7 +1652,18 @@ checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", ] [[package]] @@ -1692,7 +1712,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1755,7 +1775,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] @@ -1853,7 +1873,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", "wasm-bindgen-shared", ] @@ -1875,7 +1895,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2088,7 +2108,7 @@ checksum = "3ca22c4ad176b37bd81a565f66635bde3d654fe6832730c3e52e1018ae1655ee" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.101", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 3ef2afa..8ef7bbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ name = "decomp-toolkit" description = "Yet another GameCube/Wii decompilation toolkit." authors = ["Luke Street "] license = "MIT OR Apache-2.0" -version = "1.5.2" +version = "1.6.0" edition = "2021" publish = false repository = "https://github.com/encounter/decomp-toolkit" @@ -37,7 +37,7 @@ typed-path = "0.9" cbc = "0.1" crossterm = "0.28" cwdemangle = "1.0" -cwextab = "1.0" +cwextab = "1.1" dyn-clone = "1.0" enable-ansi-support = "0.2" filetime = "0.2" diff --git a/src/cmd/dol.rs b/src/cmd/dol.rs index eccd7ec..f8c6357 100644 --- a/src/cmd/dol.rs +++ b/src/cmd/dol.rs @@ -47,6 +47,7 @@ use crate::{ diff::{calc_diff_ranges, print_diff, process_code}, dol::process_dol, elf::{process_elf, write_elf}, + extab::clean_extab, file::{ buf_copy_with_hash, buf_writer, check_hash_str, touch, verify_hash, FileIterator, FileReadInfo, @@ -293,6 +294,9 @@ pub struct ModuleConfig { pub block_relocations: Vec, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub add_relocations: Vec, + /// Process exception tables and zero out uninitialized data. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub clean_extab: Option, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] @@ -818,17 +822,29 @@ struct AnalyzeResult { splits_cache: Option, } -fn load_analyze_dol(config: &ProjectConfig, object_base: &ObjectBase) -> Result { - let object_path = object_base.join(&config.base.object); +fn load_dol_module( + config: &ModuleConfig, + object_base: &ObjectBase, +) -> Result<(ObjInfo, Utf8NativePathBuf)> { + let object_path = object_base.join(&config.object); log::debug!("Loading {}", object_path); let mut obj = { - let mut file = object_base.open(&config.base.object)?; + let mut file = object_base.open(&config.object)?; let data = file.map()?; - if let Some(hash_str) = &config.base.hash { + if let Some(hash_str) = &config.hash { verify_hash(data, hash_str)?; } - process_dol(data, config.base.name())? + process_dol(data, config.name())? }; + if config.clean_extab.unwrap_or(false) { + log::debug!("Cleaning extab for {}", config.name()); + clean_extab(&mut obj)?; + } + Ok((obj, object_path)) +} + +fn load_analyze_dol(config: &ProjectConfig, object_base: &ObjectBase) -> Result { + let (mut obj, object_path) = load_dol_module(&config.base, object_base)?; let mut dep = vec![object_path]; if let Some(comment_version) = config.mw_comment_version { @@ -1658,15 +1674,7 @@ fn diff(args: DiffArgs) -> Result<()> { let config: ProjectConfig = serde_yaml::from_reader(config_file.as_mut())?; let object_base = find_object_base(&config)?; - log::info!("Loading {}", object_base.join(&config.base.object)); - let mut obj = { - let mut file = object_base.open(&config.base.object)?; - let data = file.map()?; - if let Some(hash_str) = &config.base.hash { - verify_hash(data, hash_str)?; - } - process_dol(data, config.base.name())? - }; + let (mut obj, _object_path) = load_dol_module(&config.base, &object_base)?; if let Some(symbols_path) = &config.base.symbols { apply_symbols_file(&symbols_path.with_encoding(), &mut obj)?; @@ -1882,15 +1890,7 @@ fn apply(args: ApplyArgs) -> Result<()> { let config: ProjectConfig = serde_yaml::from_reader(config_file.as_mut())?; let object_base = find_object_base(&config)?; - log::info!("Loading {}", object_base.join(&config.base.object)); - let mut obj = { - let mut file = object_base.open(&config.base.object)?; - let data = file.map()?; - if let Some(hash_str) = &config.base.hash { - verify_hash(data, hash_str)?; - } - process_dol(data, config.base.name())? - }; + let (mut obj, _object_path) = load_dol_module(&config.base, &object_base)?; let Some(symbols_path) = &config.base.symbols else { bail!("No symbols file specified in config"); diff --git a/src/cmd/elf2dol.rs b/src/cmd/elf2dol.rs index 8fc7cb7..e293414 100644 --- a/src/cmd/elf2dol.rs +++ b/src/cmd/elf2dol.rs @@ -6,8 +6,12 @@ use object::{Architecture, Endianness, Object, ObjectKind, ObjectSection, Sectio use typed_path::Utf8NativePathBuf; use crate::{ - obj::ObjSectionKind, - util::{alf::ALF_MAGIC, dol::process_dol, file::buf_writer, path::native_path}, + util::{ + alf::ALF_MAGIC, + dol::{process_dol, write_dol}, + file::buf_writer, + path::native_path, + }, vfs::open_file, }; @@ -161,84 +165,8 @@ pub fn run(args: Args) -> Result<()> { fn convert_alf(args: Args, data: &[u8]) -> Result<()> { let obj = process_dol(data, "")?; - - let mut header = DolHeader { entry_point: obj.entry.unwrap() as u32, ..Default::default() }; - let mut offset = 0x100u32; let mut out = buf_writer(&args.dol_file)?; - out.seek(SeekFrom::Start(offset as u64))?; - - // Text sections - for (_, section) in obj.sections.iter().filter(|(_, s)| s.kind == ObjSectionKind::Code) { - log::debug!("Processing text section '{}'", section.name); - let address = section.address as u32; - let size = align32(section.size as u32); - *header.text_sections.get_mut(header.text_section_count).ok_or_else(|| { - anyhow!("Too many text sections (while processing '{}')", section.name) - })? = DolSection { offset, address, size }; - header.text_section_count += 1; - write_aligned(&mut out, §ion.data, size)?; - offset += size; - } - - // Data sections - for (_, section) in obj - .sections - .iter() - .filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData)) - { - log::debug!("Processing data section '{}'", section.name); - let address = section.address as u32; - let size = align32(section.size as u32); - *header.data_sections.get_mut(header.data_section_count).ok_or_else(|| { - anyhow!("Too many data sections (while processing '{}')", section.name) - })? = DolSection { offset, address, size }; - header.data_section_count += 1; - write_aligned(&mut out, §ion.data, size)?; - offset += size; - } - - // BSS sections - for (_, section) in obj.sections.iter().filter(|(_, s)| s.kind == ObjSectionKind::Bss) { - let address = section.address as u32; - let size = section.size as u32; - if header.bss_address == 0 { - header.bss_address = address; - } - header.bss_size = (address + size) - header.bss_address; - } - - // Offsets - out.rewind()?; - for section in &header.text_sections { - out.write_all(§ion.offset.to_be_bytes())?; - } - for section in &header.data_sections { - out.write_all(§ion.offset.to_be_bytes())?; - } - - // Addresses - for section in &header.text_sections { - out.write_all(§ion.address.to_be_bytes())?; - } - for section in &header.data_sections { - out.write_all(§ion.address.to_be_bytes())?; - } - - // Sizes - for section in &header.text_sections { - out.write_all(§ion.size.to_be_bytes())?; - } - for section in &header.data_sections { - out.write_all(§ion.size.to_be_bytes())?; - } - - // BSS + entry - out.write_all(&header.bss_address.to_be_bytes())?; - out.write_all(&header.bss_size.to_be_bytes())?; - out.write_all(&header.entry_point.to_be_bytes())?; - - // Done! - out.flush()?; + write_dol(&obj, &mut out)?; Ok(()) } diff --git a/src/cmd/extab.rs b/src/cmd/extab.rs new file mode 100644 index 0000000..07c0a36 --- /dev/null +++ b/src/cmd/extab.rs @@ -0,0 +1,69 @@ +use std::io::Write; + +use anyhow::{Context, Result}; +use argp::FromArgs; +use typed_path::Utf8NativePathBuf; + +use crate::{ + util, + util::{ + dol::{process_dol, write_dol}, + elf::{is_elf_file, process_elf, write_elf}, + file::buf_writer, + path::native_path, + }, + vfs::open_file, +}; + +#[derive(FromArgs, PartialEq, Debug)] +/// Commands for processing extab (exception table) data. +#[argp(subcommand, name = "extab")] +pub struct Args { + #[argp(subcommand)] + command: SubCommand, +} + +#[derive(FromArgs, PartialEq, Debug)] +#[argp(subcommand)] +enum SubCommand { + Clean(CleanArgs), +} + +#[derive(FromArgs, PartialEq, Eq, Debug)] +/// Rewrites extab data in a DOL or ELF file, zeroing out any uninitialized padding bytes. +#[argp(subcommand, name = "clean")] +pub struct CleanArgs { + #[argp(positional, from_str_fn(native_path))] + /// path to input file + input: Utf8NativePathBuf, + #[argp(positional, from_str_fn(native_path))] + /// path to output file + output: Utf8NativePathBuf, +} + +pub fn run(args: Args) -> Result<()> { + match args.command { + SubCommand::Clean(clean_args) => clean_extab(clean_args), + } +} + +fn clean_extab(args: CleanArgs) -> Result<()> { + let is_elf = is_elf_file(&args.input)?; + let mut obj = if is_elf { + process_elf(&args.input)? + } else { + let mut file = open_file(&args.input, true)?; + let name = args.input.file_stem().unwrap_or_default(); + process_dol(file.map()?, name)? + }; + let num_cleaned = util::extab::clean_extab(&mut obj)?; + tracing::debug!("Cleaned {num_cleaned} extab symbols"); + let mut out = buf_writer(&args.output)?; + if is_elf { + let data = write_elf(&obj, false)?; + out.write_all(&data).context("Failed to write ELF")?; + } else { + write_dol(&obj, &mut out).context("Failed to write DOL")?; + } + Ok(()) +} diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index e9a3e72..2162501 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -6,6 +6,7 @@ pub mod dol; pub mod dwarf; pub mod elf; pub mod elf2dol; +pub mod extab; pub mod map; pub mod nlzss; pub mod rarc; diff --git a/src/main.rs b/src/main.rs index 67a64b8..308c6bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -96,6 +96,7 @@ enum SubCommand { Dwarf(cmd::dwarf::Args), Elf(cmd::elf::Args), Elf2Dol(cmd::elf2dol::Args), + Extab(cmd::extab::Args), Map(cmd::map::Args), Nlzss(cmd::nlzss::Args), Rarc(cmd::rarc::Args), @@ -172,6 +173,7 @@ fn main() { SubCommand::Dwarf(c_args) => cmd::dwarf::run(c_args), SubCommand::Elf(c_args) => cmd::elf::run(c_args), SubCommand::Elf2Dol(c_args) => cmd::elf2dol::run(c_args), + SubCommand::Extab(c_args) => cmd::extab::run(c_args), SubCommand::Map(c_args) => cmd::map::run(c_args), SubCommand::Nlzss(c_args) => cmd::nlzss::run(c_args), SubCommand::Rarc(c_args) => cmd::rarc::run(c_args), diff --git a/src/util/dol.rs b/src/util/dol.rs index 7fd9d86..99b58a1 100644 --- a/src/util/dol.rs +++ b/src/util/dol.rs @@ -1,7 +1,7 @@ use std::{ collections::BTreeMap, io, - io::{Cursor, Read, Seek}, + io::{Cursor, Read, Seek, SeekFrom, Write}, }; use anyhow::{anyhow, bail, ensure, Result}; @@ -16,7 +16,7 @@ use crate::{ util::{ alf::{AlfFile, AlfSymbol, ALF_MAGIC}, align_up, - reader::{skip_bytes, Endian, FromReader}, + reader::{skip_bytes, Endian, FromReader, ToWriter}, }, }; @@ -131,7 +131,7 @@ impl FromReader for DolFile { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct DolHeader { pub text_offs: [u32; MAX_TEXT_SECTIONS], pub data_offs: [u32; MAX_DATA_SECTIONS], @@ -167,6 +167,28 @@ impl FromReader for DolHeader { } } +impl ToWriter for DolHeader { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + self.text_offs.to_writer(writer, e)?; + self.data_offs.to_writer(writer, e)?; + self.text_addrs.to_writer(writer, e)?; + self.data_addrs.to_writer(writer, e)?; + self.text_sizes.to_writer(writer, e)?; + self.data_sizes.to_writer(writer, e)?; + self.bss_addr.to_writer(writer, e)?; + self.bss_size.to_writer(writer, e)?; + self.entry_point.to_writer(writer, e)?; + // padding + for _ in 0..0x1C { + writer.write_all(&[0])?; + } + Ok(()) + } + + fn write_size(&self) -> usize { Self::STATIC_SIZE } +} + impl DolLike for DolFile { fn sections(&self) -> &[DolSection] { &self.sections } @@ -847,3 +869,79 @@ fn validate_eti_init_info( } Ok(false) } + +pub fn write_dol(obj: &ObjInfo, out: &mut W) -> Result<()> +where W: Write + Seek + ?Sized { + let mut header = DolHeader { entry_point: obj.entry.unwrap() as u32, ..Default::default() }; + let mut offset = 0x100u32; + out.seek(SeekFrom::Start(offset as u64))?; + + // Text sections + for (num_sections, (_, section)) in + obj.sections.iter().filter(|(_, s)| s.kind == ObjSectionKind::Code).enumerate() + { + log::debug!("Processing text section '{}'", section.name); + let size = align32(section.size as u32); + if num_sections >= MAX_TEXT_SECTIONS { + bail!("Too many text sections (while processing '{}')", section.name); + } + header.text_offs[num_sections] = offset; + header.text_addrs[num_sections] = section.address as u32; + header.text_sizes[num_sections] = size; + write_aligned(out, §ion.data, size)?; + offset += size; + } + + // Data sections + for (num_sections, (_, section)) in obj + .sections + .iter() + .filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData)) + .enumerate() + { + log::debug!("Processing data section '{}'", section.name); + let size = align32(section.size as u32); + if num_sections >= MAX_DATA_SECTIONS { + bail!("Too many data sections (while processing '{}')", section.name); + } + header.data_offs[num_sections] = offset; + header.data_addrs[num_sections] = section.address as u32; + header.data_sizes[num_sections] = size; + write_aligned(out, §ion.data, size)?; + offset += size; + } + + // BSS sections + for (_, section) in obj.sections.iter().filter(|(_, s)| s.kind == ObjSectionKind::Bss) { + let address = section.address as u32; + let size = section.size as u32; + if header.bss_addr == 0 { + header.bss_addr = address; + } + header.bss_size = (address + size) - header.bss_addr; + } + + // Header + out.rewind()?; + header.to_writer(out, Endian::Big)?; + + // Done! + out.flush()?; + Ok(()) +} + +#[inline] +const fn align32(x: u32) -> u32 { (x + 31) & !31 } + +const ZERO_BUF: [u8; 32] = [0u8; 32]; + +#[inline] +fn write_aligned(out: &mut T, bytes: &[u8], aligned_size: u32) -> std::io::Result<()> +where T: Write + ?Sized { + out.write_all(bytes)?; + let padding = aligned_size - bytes.len() as u32; + if padding > 0 { + out.write_all(&ZERO_BUF[0..padding as usize])?; + } + Ok(()) +} diff --git a/src/util/elf.rs b/src/util/elf.rs index 846d48e..7872b75 100644 --- a/src/util/elf.rs +++ b/src/util/elf.rs @@ -20,7 +20,7 @@ use object::{ Architecture, Endianness, File, Object, ObjectKind, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget, SectionKind, Symbol, SymbolKind, SymbolScope, SymbolSection, }; -use typed_path::Utf8NativePath; +use typed_path::{Utf8NativePath, Utf8NativePathBuf}; use crate::{ array_ref, @@ -275,8 +275,8 @@ pub fn process_elf(path: &Utf8NativePath) -> Result { continue; } symbol_indexes.push(Some(symbols.len() as ObjSymbolIndex)); - let align = mw_comment.as_ref().map(|(_, vec)| vec[symbol.index().0].align); - symbols.push(to_obj_symbol(&obj_file, &symbol, §ion_indexes, align)?); + let comment_sym = mw_comment.as_ref().map(|(_, vec)| &vec[symbol.index().0 - 1]); + symbols.push(to_obj_symbol(&obj_file, &symbol, §ion_indexes, comment_sym)?); } let mut link_order = Vec::::new(); @@ -374,6 +374,7 @@ fn load_comment(obj_file: &File) -> Result)>> let mut reader = Cursor::new(&*data); let header = MWComment::from_reader(&mut reader, Endian::Big)?; log::debug!("Loaded .comment section header {:?}", header); + CommentSym::from_reader(&mut reader, Endian::Big)?; // Null symbol let mut comment_syms = Vec::with_capacity(obj_file.symbols().count()); for symbol in obj_file.symbols() { let comment_sym = CommentSym::from_reader(&mut reader, Endian::Big)?; @@ -861,7 +862,7 @@ fn to_obj_symbol( obj_file: &object::File<'_>, symbol: &Symbol<'_, '_>, section_indexes: &[Option], - align: Option, + comment_sym: Option<&CommentSym>, ) -> Result { let section = match symbol.section_index() { Some(idx) => Some(obj_file.section_by_index(idx)?), @@ -891,6 +892,9 @@ fn to_obj_symbol( if symbol.scope() == SymbolScope::Linkage { flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Hidden); } + if comment_sym.is_some_and(|c| c.active_flags & 0x8 != 0) { + flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Exported); + } let section_idx = section.as_ref().and_then(|section| section_indexes[section.index().0]); Ok(ObjSymbol { name: name.to_string(), @@ -907,7 +911,7 @@ fn to_obj_symbol( SymbolKind::Section => ObjSymbolKind::Section, _ => bail!("Unsupported symbol kind: {:?}", symbol), }, - align, + align: comment_sym.map(|c| c.align), ..Default::default() }) } @@ -1005,3 +1009,10 @@ fn write_relocatable_section_data(w: &mut Writer, section: &ObjSection) -> Resul w.write(§ion.data[current_address..]); Ok(()) } + +pub fn is_elf_file(path: &Utf8NativePathBuf) -> Result { + let mut file = open_file(path, true)?; + let mut magic = [0; 4]; + file.read_exact(&mut magic)?; + Ok(magic == elf::ELFMAG) +} diff --git a/src/util/extab.rs b/src/util/extab.rs new file mode 100644 index 0000000..0c6dfff --- /dev/null +++ b/src/util/extab.rs @@ -0,0 +1,51 @@ +use anyhow::{Context, Result}; +use itertools::Itertools; + +use crate::obj::ObjInfo; + +pub fn clean_extab(obj: &mut ObjInfo) -> Result { + let (extab_section_index, extab_section) = obj + .sections + .iter_mut() + .find(|(_, s)| s.name == "extab") + .ok_or_else(|| anyhow::anyhow!("No extab section found"))?; + let mut num_cleaned = 0; + for (_symbol_index, symbol) in obj + .symbols + .for_section(extab_section_index) + .filter(|(_, s)| s.size > 0) + .sorted_by_key(|(_, s)| s.address) + { + let data = extab_section.symbol_data(symbol)?; + let decoded = cwextab::decode_extab(data).with_context(|| { + format!( + "Failed to decode {} (extab {:#010X}..{:#010X})", + symbol.name, + symbol.address, + symbol.address + symbol.size + ) + })?; + let mut updated = false; + for action in &decoded.exception_actions { + let section_offset = + (symbol.address - extab_section.address) as usize + action.action_offset as usize; + let clean_data = action.get_exaction_bytes(true); + let orig_data = + &mut extab_section.data[section_offset..section_offset + clean_data.len()]; + if orig_data != clean_data { + updated = true; + orig_data.copy_from_slice(&clean_data); + } + } + if updated { + tracing::debug!( + "Removed uninitialized bytes in {} (extab {:#010X}..{:#010X})", + symbol.name, + symbol.address, + symbol.address + symbol.size + ); + num_cleaned += 1; + } + } + Ok(num_cleaned) +} diff --git a/src/util/mod.rs b/src/util/mod.rs index a4bf1c4..828604d 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -10,6 +10,7 @@ pub mod diff; pub mod dol; pub mod dwarf; pub mod elf; +pub mod extab; pub mod file; pub mod lcf; pub mod map; diff --git a/src/util/reader.rs b/src/util/reader.rs index db8f264..ca3a61b 100644 --- a/src/util/reader.rs +++ b/src/util/reader.rs @@ -263,6 +263,18 @@ impl ToWriter for Vec { fn write_size(&self) -> usize { self.len() } } +impl ToWriter for [u32; N] { + fn to_writer(&self, writer: &mut W, e: Endian) -> io::Result<()> + where W: Write + ?Sized { + for &value in self { + value.to_writer(writer, e)?; + } + Ok(()) + } + + fn write_size(&self) -> usize { N * u32::STATIC_SIZE } +} + pub fn write_vec(writer: &mut W, vec: &[T], e: Endian) -> io::Result<()> where T: ToWriter,