From 9cafb77d3fa6a918b894c69d9c56684514475da1 Mon Sep 17 00:00:00 2001
From: Luke Street <luke@street.dev>
Date: Sun, 1 Jun 2025 20:22:08 -0600
Subject: [PATCH] Add `dtk extab clean` & config.yml `clean_extab`

It was discovered that certain extab actions contain
uninitalized data from the compiler. This provides
a way to zero out uninitialized data in DOL or object
files. Usage: `dtk extab clean input.dol output.dol`

A `clean_extab` setting was added to config.yml, so
projects can link the cleaned objects and target the
cleaned DOL hash.
---
 Cargo.lock         |  72 +++++++++++++++++++------------
 Cargo.toml         |   4 +-
 src/cmd/dol.rs     |  46 ++++++++++----------
 src/cmd/elf2dol.rs |  86 +++----------------------------------
 src/cmd/extab.rs   |  69 ++++++++++++++++++++++++++++++
 src/cmd/mod.rs     |   1 +
 src/main.rs        |   2 +
 src/util/dol.rs    | 104 +++++++++++++++++++++++++++++++++++++++++++--
 src/util/elf.rs    |  21 ++++++---
 src/util/extab.rs  |  51 ++++++++++++++++++++++
 src/util/mod.rs    |   1 +
 src/util/reader.rs |  12 ++++++
 12 files changed, 331 insertions(+), 138 deletions(-)
 create mode 100644 src/cmd/extab.rs
 create mode 100644 src/util/extab.rs

diff --git a/Cargo.lock b/Cargo.lock
index bb2e99e..27939e4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -339,16 +339,16 @@ checksum = "c2e06f9bce634a3c898eb1e5cb949ff63133cbb218af93cc9b38b31d6f3ea285"
 
 [[package]]
 name = "cwextab"
-version = "1.0.3"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "003567b96ff9d8ac3275831650385891bca370092937be625157778b1e58f755"
+checksum = "701f6867c92e1b64ddcc4b416194be3121b8f7ba5352a70ed5fd3295a7d8e0e1"
 dependencies = [
- "thiserror",
+ "thiserror 2.0.12",
 ]
 
 [[package]]
 name = "decomp-toolkit"
-version = "1.5.2"
+version = "1.6.0"
 dependencies = [
  "aes",
  "anyhow",
@@ -873,7 +873,7 @@ dependencies = [
  "miniz_oxide",
  "rayon",
  "sha1",
- "thiserror",
+ "thiserror 1.0.64",
  "zerocopy",
  "zstd",
 ]
@@ -946,7 +946,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1110,7 +1110,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba"
 dependencies = [
  "proc-macro2",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1124,9 +1124,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.88"
+version = "1.0.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
 dependencies = [
  "unicode-ident",
 ]
@@ -1158,7 +1158,7 @@ dependencies = [
  "prost",
  "prost-types",
  "regex",
- "syn 2.0.79",
+ "syn 2.0.101",
  "tempfile",
 ]
 
@@ -1172,7 +1172,7 @@ dependencies = [
  "itertools",
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1374,7 +1374,7 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1385,7 +1385,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1408,7 +1408,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1527,7 +1527,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1549,7 +1549,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1584,9 +1584,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.79"
+version = "2.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
+checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1609,7 +1609,7 @@ dependencies = [
  "serde",
  "serde_derive",
  "serde_json",
- "thiserror",
+ "thiserror 1.0.64",
  "walkdir",
 ]
 
@@ -1632,7 +1632,16 @@ version = "1.0.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84"
 dependencies = [
- "thiserror-impl",
+ "thiserror-impl 1.0.64",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
+dependencies = [
+ "thiserror-impl 2.0.12",
 ]
 
 [[package]]
@@ -1643,7 +1652,18 @@ checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1692,7 +1712,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1755,7 +1775,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1853,7 +1873,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
  "wasm-bindgen-shared",
 ]
 
@@ -1875,7 +1895,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -2088,7 +2108,7 @@ checksum = "3ca22c4ad176b37bd81a565f66635bde3d654fe6832730c3e52e1018ae1655ee"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.101",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 3ef2afa..8ef7bbe 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ name = "decomp-toolkit"
 description = "Yet another GameCube/Wii decompilation toolkit."
 authors = ["Luke Street <luke@street.dev>"]
 license = "MIT OR Apache-2.0"
-version = "1.5.2"
+version = "1.6.0"
 edition = "2021"
 publish = false
 repository = "https://github.com/encounter/decomp-toolkit"
@@ -37,7 +37,7 @@ typed-path = "0.9"
 cbc = "0.1"
 crossterm = "0.28"
 cwdemangle = "1.0"
-cwextab = "1.0"
+cwextab = "1.1"
 dyn-clone = "1.0"
 enable-ansi-support = "0.2"
 filetime = "0.2"
diff --git a/src/cmd/dol.rs b/src/cmd/dol.rs
index eccd7ec..f8c6357 100644
--- a/src/cmd/dol.rs
+++ b/src/cmd/dol.rs
@@ -47,6 +47,7 @@ use crate::{
         diff::{calc_diff_ranges, print_diff, process_code},
         dol::process_dol,
         elf::{process_elf, write_elf},
+        extab::clean_extab,
         file::{
             buf_copy_with_hash, buf_writer, check_hash_str, touch, verify_hash, FileIterator,
             FileReadInfo,
@@ -293,6 +294,9 @@ pub struct ModuleConfig {
     pub block_relocations: Vec<BlockRelocationConfig>,
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub add_relocations: Vec<AddRelocationConfig>,
+    /// Process exception tables and zero out uninitialized data.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub clean_extab: Option<bool>,
 }
 
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
@@ -818,17 +822,29 @@ struct AnalyzeResult {
     splits_cache: Option<FileReadInfo>,
 }
 
-fn load_analyze_dol(config: &ProjectConfig, object_base: &ObjectBase) -> Result<AnalyzeResult> {
-    let object_path = object_base.join(&config.base.object);
+fn load_dol_module(
+    config: &ModuleConfig,
+    object_base: &ObjectBase,
+) -> Result<(ObjInfo, Utf8NativePathBuf)> {
+    let object_path = object_base.join(&config.object);
     log::debug!("Loading {}", object_path);
     let mut obj = {
-        let mut file = object_base.open(&config.base.object)?;
+        let mut file = object_base.open(&config.object)?;
         let data = file.map()?;
-        if let Some(hash_str) = &config.base.hash {
+        if let Some(hash_str) = &config.hash {
             verify_hash(data, hash_str)?;
         }
-        process_dol(data, config.base.name())?
+        process_dol(data, config.name())?
     };
+    if config.clean_extab.unwrap_or(false) {
+        log::debug!("Cleaning extab for {}", config.name());
+        clean_extab(&mut obj)?;
+    }
+    Ok((obj, object_path))
+}
+
+fn load_analyze_dol(config: &ProjectConfig, object_base: &ObjectBase) -> Result<AnalyzeResult> {
+    let (mut obj, object_path) = load_dol_module(&config.base, object_base)?;
     let mut dep = vec![object_path];
 
     if let Some(comment_version) = config.mw_comment_version {
@@ -1658,15 +1674,7 @@ fn diff(args: DiffArgs) -> Result<()> {
     let config: ProjectConfig = serde_yaml::from_reader(config_file.as_mut())?;
     let object_base = find_object_base(&config)?;
 
-    log::info!("Loading {}", object_base.join(&config.base.object));
-    let mut obj = {
-        let mut file = object_base.open(&config.base.object)?;
-        let data = file.map()?;
-        if let Some(hash_str) = &config.base.hash {
-            verify_hash(data, hash_str)?;
-        }
-        process_dol(data, config.base.name())?
-    };
+    let (mut obj, _object_path) = load_dol_module(&config.base, &object_base)?;
 
     if let Some(symbols_path) = &config.base.symbols {
         apply_symbols_file(&symbols_path.with_encoding(), &mut obj)?;
@@ -1882,15 +1890,7 @@ fn apply(args: ApplyArgs) -> Result<()> {
     let config: ProjectConfig = serde_yaml::from_reader(config_file.as_mut())?;
     let object_base = find_object_base(&config)?;
 
-    log::info!("Loading {}", object_base.join(&config.base.object));
-    let mut obj = {
-        let mut file = object_base.open(&config.base.object)?;
-        let data = file.map()?;
-        if let Some(hash_str) = &config.base.hash {
-            verify_hash(data, hash_str)?;
-        }
-        process_dol(data, config.base.name())?
-    };
+    let (mut obj, _object_path) = load_dol_module(&config.base, &object_base)?;
 
     let Some(symbols_path) = &config.base.symbols else {
         bail!("No symbols file specified in config");
diff --git a/src/cmd/elf2dol.rs b/src/cmd/elf2dol.rs
index 8fc7cb7..e293414 100644
--- a/src/cmd/elf2dol.rs
+++ b/src/cmd/elf2dol.rs
@@ -6,8 +6,12 @@ use object::{Architecture, Endianness, Object, ObjectKind, ObjectSection, Sectio
 use typed_path::Utf8NativePathBuf;
 
 use crate::{
-    obj::ObjSectionKind,
-    util::{alf::ALF_MAGIC, dol::process_dol, file::buf_writer, path::native_path},
+    util::{
+        alf::ALF_MAGIC,
+        dol::{process_dol, write_dol},
+        file::buf_writer,
+        path::native_path,
+    },
     vfs::open_file,
 };
 
@@ -161,84 +165,8 @@ pub fn run(args: Args) -> Result<()> {
 
 fn convert_alf(args: Args, data: &[u8]) -> Result<()> {
     let obj = process_dol(data, "")?;
-
-    let mut header = DolHeader { entry_point: obj.entry.unwrap() as u32, ..Default::default() };
-    let mut offset = 0x100u32;
     let mut out = buf_writer(&args.dol_file)?;
-    out.seek(SeekFrom::Start(offset as u64))?;
-
-    // Text sections
-    for (_, section) in obj.sections.iter().filter(|(_, s)| s.kind == ObjSectionKind::Code) {
-        log::debug!("Processing text section '{}'", section.name);
-        let address = section.address as u32;
-        let size = align32(section.size as u32);
-        *header.text_sections.get_mut(header.text_section_count).ok_or_else(|| {
-            anyhow!("Too many text sections (while processing '{}')", section.name)
-        })? = DolSection { offset, address, size };
-        header.text_section_count += 1;
-        write_aligned(&mut out, &section.data, size)?;
-        offset += size;
-    }
-
-    // Data sections
-    for (_, section) in obj
-        .sections
-        .iter()
-        .filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData))
-    {
-        log::debug!("Processing data section '{}'", section.name);
-        let address = section.address as u32;
-        let size = align32(section.size as u32);
-        *header.data_sections.get_mut(header.data_section_count).ok_or_else(|| {
-            anyhow!("Too many data sections (while processing '{}')", section.name)
-        })? = DolSection { offset, address, size };
-        header.data_section_count += 1;
-        write_aligned(&mut out, &section.data, size)?;
-        offset += size;
-    }
-
-    // BSS sections
-    for (_, section) in obj.sections.iter().filter(|(_, s)| s.kind == ObjSectionKind::Bss) {
-        let address = section.address as u32;
-        let size = section.size as u32;
-        if header.bss_address == 0 {
-            header.bss_address = address;
-        }
-        header.bss_size = (address + size) - header.bss_address;
-    }
-
-    // Offsets
-    out.rewind()?;
-    for section in &header.text_sections {
-        out.write_all(&section.offset.to_be_bytes())?;
-    }
-    for section in &header.data_sections {
-        out.write_all(&section.offset.to_be_bytes())?;
-    }
-
-    // Addresses
-    for section in &header.text_sections {
-        out.write_all(&section.address.to_be_bytes())?;
-    }
-    for section in &header.data_sections {
-        out.write_all(&section.address.to_be_bytes())?;
-    }
-
-    // Sizes
-    for section in &header.text_sections {
-        out.write_all(&section.size.to_be_bytes())?;
-    }
-    for section in &header.data_sections {
-        out.write_all(&section.size.to_be_bytes())?;
-    }
-
-    // BSS + entry
-    out.write_all(&header.bss_address.to_be_bytes())?;
-    out.write_all(&header.bss_size.to_be_bytes())?;
-    out.write_all(&header.entry_point.to_be_bytes())?;
-
-    // Done!
-    out.flush()?;
+    write_dol(&obj, &mut out)?;
     Ok(())
 }
 
diff --git a/src/cmd/extab.rs b/src/cmd/extab.rs
new file mode 100644
index 0000000..07c0a36
--- /dev/null
+++ b/src/cmd/extab.rs
@@ -0,0 +1,69 @@
+use std::io::Write;
+
+use anyhow::{Context, Result};
+use argp::FromArgs;
+use typed_path::Utf8NativePathBuf;
+
+use crate::{
+    util,
+    util::{
+        dol::{process_dol, write_dol},
+        elf::{is_elf_file, process_elf, write_elf},
+        file::buf_writer,
+        path::native_path,
+    },
+    vfs::open_file,
+};
+
+#[derive(FromArgs, PartialEq, Debug)]
+/// Commands for processing extab (exception table) data.
+#[argp(subcommand, name = "extab")]
+pub struct Args {
+    #[argp(subcommand)]
+    command: SubCommand,
+}
+
+#[derive(FromArgs, PartialEq, Debug)]
+#[argp(subcommand)]
+enum SubCommand {
+    Clean(CleanArgs),
+}
+
+#[derive(FromArgs, PartialEq, Eq, Debug)]
+/// Rewrites extab data in a DOL or ELF file, zeroing out any uninitialized padding bytes.
+#[argp(subcommand, name = "clean")]
+pub struct CleanArgs {
+    #[argp(positional, from_str_fn(native_path))]
+    /// path to input file
+    input: Utf8NativePathBuf,
+    #[argp(positional, from_str_fn(native_path))]
+    /// path to output file
+    output: Utf8NativePathBuf,
+}
+
+pub fn run(args: Args) -> Result<()> {
+    match args.command {
+        SubCommand::Clean(clean_args) => clean_extab(clean_args),
+    }
+}
+
+fn clean_extab(args: CleanArgs) -> Result<()> {
+    let is_elf = is_elf_file(&args.input)?;
+    let mut obj = if is_elf {
+        process_elf(&args.input)?
+    } else {
+        let mut file = open_file(&args.input, true)?;
+        let name = args.input.file_stem().unwrap_or_default();
+        process_dol(file.map()?, name)?
+    };
+    let num_cleaned = util::extab::clean_extab(&mut obj)?;
+    tracing::debug!("Cleaned {num_cleaned} extab symbols");
+    let mut out = buf_writer(&args.output)?;
+    if is_elf {
+        let data = write_elf(&obj, false)?;
+        out.write_all(&data).context("Failed to write ELF")?;
+    } else {
+        write_dol(&obj, &mut out).context("Failed to write DOL")?;
+    }
+    Ok(())
+}
diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs
index e9a3e72..2162501 100644
--- a/src/cmd/mod.rs
+++ b/src/cmd/mod.rs
@@ -6,6 +6,7 @@ pub mod dol;
 pub mod dwarf;
 pub mod elf;
 pub mod elf2dol;
+pub mod extab;
 pub mod map;
 pub mod nlzss;
 pub mod rarc;
diff --git a/src/main.rs b/src/main.rs
index 67a64b8..308c6bd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -96,6 +96,7 @@ enum SubCommand {
     Dwarf(cmd::dwarf::Args),
     Elf(cmd::elf::Args),
     Elf2Dol(cmd::elf2dol::Args),
+    Extab(cmd::extab::Args),
     Map(cmd::map::Args),
     Nlzss(cmd::nlzss::Args),
     Rarc(cmd::rarc::Args),
@@ -172,6 +173,7 @@ fn main() {
         SubCommand::Dwarf(c_args) => cmd::dwarf::run(c_args),
         SubCommand::Elf(c_args) => cmd::elf::run(c_args),
         SubCommand::Elf2Dol(c_args) => cmd::elf2dol::run(c_args),
+        SubCommand::Extab(c_args) => cmd::extab::run(c_args),
         SubCommand::Map(c_args) => cmd::map::run(c_args),
         SubCommand::Nlzss(c_args) => cmd::nlzss::run(c_args),
         SubCommand::Rarc(c_args) => cmd::rarc::run(c_args),
diff --git a/src/util/dol.rs b/src/util/dol.rs
index 7fd9d86..99b58a1 100644
--- a/src/util/dol.rs
+++ b/src/util/dol.rs
@@ -1,7 +1,7 @@
 use std::{
     collections::BTreeMap,
     io,
-    io::{Cursor, Read, Seek},
+    io::{Cursor, Read, Seek, SeekFrom, Write},
 };
 
 use anyhow::{anyhow, bail, ensure, Result};
@@ -16,7 +16,7 @@ use crate::{
     util::{
         alf::{AlfFile, AlfSymbol, ALF_MAGIC},
         align_up,
-        reader::{skip_bytes, Endian, FromReader},
+        reader::{skip_bytes, Endian, FromReader, ToWriter},
     },
 };
 
@@ -131,7 +131,7 @@ impl FromReader for DolFile {
     }
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub struct DolHeader {
     pub text_offs: [u32; MAX_TEXT_SECTIONS],
     pub data_offs: [u32; MAX_DATA_SECTIONS],
@@ -167,6 +167,28 @@ impl FromReader for DolHeader {
     }
 }
 
+impl ToWriter for DolHeader {
+    fn to_writer<W>(&self, writer: &mut W, e: Endian) -> io::Result<()>
+    where W: Write + ?Sized {
+        self.text_offs.to_writer(writer, e)?;
+        self.data_offs.to_writer(writer, e)?;
+        self.text_addrs.to_writer(writer, e)?;
+        self.data_addrs.to_writer(writer, e)?;
+        self.text_sizes.to_writer(writer, e)?;
+        self.data_sizes.to_writer(writer, e)?;
+        self.bss_addr.to_writer(writer, e)?;
+        self.bss_size.to_writer(writer, e)?;
+        self.entry_point.to_writer(writer, e)?;
+        // padding
+        for _ in 0..0x1C {
+            writer.write_all(&[0])?;
+        }
+        Ok(())
+    }
+
+    fn write_size(&self) -> usize { Self::STATIC_SIZE }
+}
+
 impl DolLike for DolFile {
     fn sections(&self) -> &[DolSection] { &self.sections }
 
@@ -847,3 +869,79 @@ fn validate_eti_init_info(
     }
     Ok(false)
 }
+
+pub fn write_dol<W>(obj: &ObjInfo, out: &mut W) -> Result<()>
+where W: Write + Seek + ?Sized {
+    let mut header = DolHeader { entry_point: obj.entry.unwrap() as u32, ..Default::default() };
+    let mut offset = 0x100u32;
+    out.seek(SeekFrom::Start(offset as u64))?;
+
+    // Text sections
+    for (num_sections, (_, section)) in
+        obj.sections.iter().filter(|(_, s)| s.kind == ObjSectionKind::Code).enumerate()
+    {
+        log::debug!("Processing text section '{}'", section.name);
+        let size = align32(section.size as u32);
+        if num_sections >= MAX_TEXT_SECTIONS {
+            bail!("Too many text sections (while processing '{}')", section.name);
+        }
+        header.text_offs[num_sections] = offset;
+        header.text_addrs[num_sections] = section.address as u32;
+        header.text_sizes[num_sections] = size;
+        write_aligned(out, &section.data, size)?;
+        offset += size;
+    }
+
+    // Data sections
+    for (num_sections, (_, section)) in obj
+        .sections
+        .iter()
+        .filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData))
+        .enumerate()
+    {
+        log::debug!("Processing data section '{}'", section.name);
+        let size = align32(section.size as u32);
+        if num_sections >= MAX_DATA_SECTIONS {
+            bail!("Too many data sections (while processing '{}')", section.name);
+        }
+        header.data_offs[num_sections] = offset;
+        header.data_addrs[num_sections] = section.address as u32;
+        header.data_sizes[num_sections] = size;
+        write_aligned(out, &section.data, size)?;
+        offset += size;
+    }
+
+    // BSS sections
+    for (_, section) in obj.sections.iter().filter(|(_, s)| s.kind == ObjSectionKind::Bss) {
+        let address = section.address as u32;
+        let size = section.size as u32;
+        if header.bss_addr == 0 {
+            header.bss_addr = address;
+        }
+        header.bss_size = (address + size) - header.bss_addr;
+    }
+
+    // Header
+    out.rewind()?;
+    header.to_writer(out, Endian::Big)?;
+
+    // Done!
+    out.flush()?;
+    Ok(())
+}
+
+#[inline]
+const fn align32(x: u32) -> u32 { (x + 31) & !31 }
+
+const ZERO_BUF: [u8; 32] = [0u8; 32];
+
+#[inline]
+fn write_aligned<T>(out: &mut T, bytes: &[u8], aligned_size: u32) -> std::io::Result<()>
+where T: Write + ?Sized {
+    out.write_all(bytes)?;
+    let padding = aligned_size - bytes.len() as u32;
+    if padding > 0 {
+        out.write_all(&ZERO_BUF[0..padding as usize])?;
+    }
+    Ok(())
+}
diff --git a/src/util/elf.rs b/src/util/elf.rs
index 846d48e..7872b75 100644
--- a/src/util/elf.rs
+++ b/src/util/elf.rs
@@ -20,7 +20,7 @@ use object::{
     Architecture, Endianness, File, Object, ObjectKind, ObjectSection, ObjectSymbol, Relocation,
     RelocationFlags, RelocationTarget, SectionKind, Symbol, SymbolKind, SymbolScope, SymbolSection,
 };
-use typed_path::Utf8NativePath;
+use typed_path::{Utf8NativePath, Utf8NativePathBuf};
 
 use crate::{
     array_ref,
@@ -275,8 +275,8 @@ pub fn process_elf(path: &Utf8NativePath) -> Result<ObjInfo> {
             continue;
         }
         symbol_indexes.push(Some(symbols.len() as ObjSymbolIndex));
-        let align = mw_comment.as_ref().map(|(_, vec)| vec[symbol.index().0].align);
-        symbols.push(to_obj_symbol(&obj_file, &symbol, &section_indexes, align)?);
+        let comment_sym = mw_comment.as_ref().map(|(_, vec)| &vec[symbol.index().0 - 1]);
+        symbols.push(to_obj_symbol(&obj_file, &symbol, &section_indexes, comment_sym)?);
     }
 
     let mut link_order = Vec::<ObjUnit>::new();
@@ -374,6 +374,7 @@ fn load_comment(obj_file: &File) -> Result<Option<(MWComment, Vec<CommentSym>)>>
     let mut reader = Cursor::new(&*data);
     let header = MWComment::from_reader(&mut reader, Endian::Big)?;
     log::debug!("Loaded .comment section header {:?}", header);
+    CommentSym::from_reader(&mut reader, Endian::Big)?; // Null symbol
     let mut comment_syms = Vec::with_capacity(obj_file.symbols().count());
     for symbol in obj_file.symbols() {
         let comment_sym = CommentSym::from_reader(&mut reader, Endian::Big)?;
@@ -861,7 +862,7 @@ fn to_obj_symbol(
     obj_file: &object::File<'_>,
     symbol: &Symbol<'_, '_>,
     section_indexes: &[Option<usize>],
-    align: Option<u32>,
+    comment_sym: Option<&CommentSym>,
 ) -> Result<ObjSymbol> {
     let section = match symbol.section_index() {
         Some(idx) => Some(obj_file.section_by_index(idx)?),
@@ -891,6 +892,9 @@ fn to_obj_symbol(
     if symbol.scope() == SymbolScope::Linkage {
         flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Hidden);
     }
+    if comment_sym.is_some_and(|c| c.active_flags & 0x8 != 0) {
+        flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Exported);
+    }
     let section_idx = section.as_ref().and_then(|section| section_indexes[section.index().0]);
     Ok(ObjSymbol {
         name: name.to_string(),
@@ -907,7 +911,7 @@ fn to_obj_symbol(
             SymbolKind::Section => ObjSymbolKind::Section,
             _ => bail!("Unsupported symbol kind: {:?}", symbol),
         },
-        align,
+        align: comment_sym.map(|c| c.align),
         ..Default::default()
     })
 }
@@ -1005,3 +1009,10 @@ fn write_relocatable_section_data(w: &mut Writer, section: &ObjSection) -> Resul
     w.write(&section.data[current_address..]);
     Ok(())
 }
+
+pub fn is_elf_file(path: &Utf8NativePathBuf) -> Result<bool> {
+    let mut file = open_file(path, true)?;
+    let mut magic = [0; 4];
+    file.read_exact(&mut magic)?;
+    Ok(magic == elf::ELFMAG)
+}
diff --git a/src/util/extab.rs b/src/util/extab.rs
new file mode 100644
index 0000000..0c6dfff
--- /dev/null
+++ b/src/util/extab.rs
@@ -0,0 +1,51 @@
+use anyhow::{Context, Result};
+use itertools::Itertools;
+
+use crate::obj::ObjInfo;
+
+pub fn clean_extab(obj: &mut ObjInfo) -> Result<usize> {
+    let (extab_section_index, extab_section) = obj
+        .sections
+        .iter_mut()
+        .find(|(_, s)| s.name == "extab")
+        .ok_or_else(|| anyhow::anyhow!("No extab section found"))?;
+    let mut num_cleaned = 0;
+    for (_symbol_index, symbol) in obj
+        .symbols
+        .for_section(extab_section_index)
+        .filter(|(_, s)| s.size > 0)
+        .sorted_by_key(|(_, s)| s.address)
+    {
+        let data = extab_section.symbol_data(symbol)?;
+        let decoded = cwextab::decode_extab(data).with_context(|| {
+            format!(
+                "Failed to decode {} (extab {:#010X}..{:#010X})",
+                symbol.name,
+                symbol.address,
+                symbol.address + symbol.size
+            )
+        })?;
+        let mut updated = false;
+        for action in &decoded.exception_actions {
+            let section_offset =
+                (symbol.address - extab_section.address) as usize + action.action_offset as usize;
+            let clean_data = action.get_exaction_bytes(true);
+            let orig_data =
+                &mut extab_section.data[section_offset..section_offset + clean_data.len()];
+            if orig_data != clean_data {
+                updated = true;
+                orig_data.copy_from_slice(&clean_data);
+            }
+        }
+        if updated {
+            tracing::debug!(
+                "Removed uninitialized bytes in {} (extab {:#010X}..{:#010X})",
+                symbol.name,
+                symbol.address,
+                symbol.address + symbol.size
+            );
+            num_cleaned += 1;
+        }
+    }
+    Ok(num_cleaned)
+}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index a4bf1c4..828604d 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -10,6 +10,7 @@ pub mod diff;
 pub mod dol;
 pub mod dwarf;
 pub mod elf;
+pub mod extab;
 pub mod file;
 pub mod lcf;
 pub mod map;
diff --git a/src/util/reader.rs b/src/util/reader.rs
index db8f264..ca3a61b 100644
--- a/src/util/reader.rs
+++ b/src/util/reader.rs
@@ -263,6 +263,18 @@ impl ToWriter for Vec<u8> {
     fn write_size(&self) -> usize { self.len() }
 }
 
+impl<const N: usize> ToWriter for [u32; N] {
+    fn to_writer<W>(&self, writer: &mut W, e: Endian) -> io::Result<()>
+    where W: Write + ?Sized {
+        for &value in self {
+            value.to_writer(writer, e)?;
+        }
+        Ok(())
+    }
+
+    fn write_size(&self) -> usize { N * u32::STATIC_SIZE }
+}
+
 pub fn write_vec<T, W>(writer: &mut W, vec: &[T], e: Endian) -> io::Result<()>
 where
     T: ToWriter,