From 46cf0be183413ee4b534621790b90e279d21ffdc Mon Sep 17 00:00:00 2001 From: Luke Street Date: Mon, 3 Jun 2024 20:34:35 -0600 Subject: [PATCH] Add U8 (newer .arc) support Supports the U8 .arc format, just like the older RARC format. `u8 list`, `u8 extract` and support for U8 archive paths in config.yml --- Cargo.lock | 9 ++- Cargo.toml | 1 + README.md | 22 +++++- src/cmd/mod.rs | 1 + src/cmd/rarc.rs | 18 ++++- src/cmd/u8_arc.rs | 121 +++++++++++++++++++++++++++++ src/main.rs | 2 + src/util/file.rs | 37 ++++++--- src/util/mod.rs | 9 +++ src/util/u8_arc.rs | 188 +++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 391 insertions(+), 17 deletions(-) create mode 100644 src/cmd/u8_arc.rs create mode 100644 src/util/u8_arc.rs diff --git a/Cargo.lock b/Cargo.lock index 8b15402..fdf91c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -415,6 +415,7 @@ dependencies = [ "tracing-attributes", "tracing-subscriber", "xxhash-rust", + "zerocopy", ] [[package]] @@ -1897,9 +1898,9 @@ checksum = "927da81e25be1e1a2901d59b81b37dd2efd1fc9c9345a55007f09bf5a2d3ee03" [[package]] name = "zerocopy" -version = "0.7.32" +version = "0.7.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" dependencies = [ "byteorder", "zerocopy-derive", @@ -1907,9 +1908,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.7.32" +version = "0.7.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index d68acdc..1f2bdbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,3 +72,4 @@ tracing = "0.1.40" tracing-attributes = "0.1.27" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } xxhash-rust = { version = "0.8.10", features = ["xxh3"] } +zerocopy = { version = "0.7.34", features = ["derive"] } diff --git a/README.md b/README.md index 1f28f20..d3d9708 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,8 @@ project structure and build system that uses decomp-toolkit under the hood. - [nlzss decompress](#nlzss-decompress) - [rarc list](#rarc-list) - [rarc extract](#rarc-extract) + - [u8 list](#u8-list) + - [u8 extract](#u8-extract) - [yay0 decompress](#yay0-decompress) - [yay0 compress](#yay0-compress) - [yaz0 decompress](#yaz0-decompress) @@ -390,7 +392,7 @@ $ dtk nlzss decompress rels/*.lz -o rels ### rarc list -Lists the contents of an RARC archive. +Lists the contents of an RARC (older .arc) archive. ```shell $ dtk rarc list input.arc @@ -398,12 +400,28 @@ $ dtk rarc list input.arc ### rarc extract -Extracts the contents of an RARC archive. +Extracts the contents of an RARC (older .arc) archive. ```shell $ dtk rarc extract input.arc -o output_dir ``` +### u8 list + +Extracts the contents of a U8 (newer .arc) archive. + +```shell +$ dtk u8 list input.arc +``` + +### u8 extract + +Extracts the contents of a U8 (newer .arc) archive. + +```shell +$ dtk u8 extract input.arc -o output_dir +``` + ### yay0 decompress Decompresses Yay0-compressed files. diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index bb077b0..46e1ab3 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -13,5 +13,6 @@ pub mod rarc; pub mod rel; pub mod rso; pub mod shasum; +pub mod u8_arc; pub mod yay0; pub mod yaz0; diff --git a/src/cmd/rarc.rs b/src/cmd/rarc.rs index d5a7824..3208dd5 100644 --- a/src/cmd/rarc.rs +++ b/src/cmd/rarc.rs @@ -42,6 +42,9 @@ pub struct ExtractArgs { #[argp(option, short = 'o')] /// output directory output: Option, + #[argp(switch, short = 'q')] + /// quiet output + quiet: bool, } pub fn run(args: Args) -> Result<()> { @@ -95,8 +98,19 @@ fn extract(args: ExtractArgs) -> Result<()> { &file.as_slice()[offset as usize..offset as usize + size as usize], )?; let file_path = current_path.join(&name.name); - let output_path = - args.output.as_ref().map(|p| p.join(&file_path)).unwrap_or_else(|| file_path); + let output_path = args + .output + .as_ref() + .map(|p| p.join(&file_path)) + .unwrap_or_else(|| file_path.clone()); + if !args.quiet { + println!( + "Extracting {} to {} ({} bytes)", + file_path.display(), + output_path.display(), + size + ); + } if let Some(parent) = output_path.parent() { DirBuilder::new().recursive(true).create(parent)?; } diff --git a/src/cmd/u8_arc.rs b/src/cmd/u8_arc.rs new file mode 100644 index 0000000..ecadb02 --- /dev/null +++ b/src/cmd/u8_arc.rs @@ -0,0 +1,121 @@ +use std::{borrow::Cow, fs, fs::DirBuilder, path::PathBuf}; + +use anyhow::{anyhow, Context, Result}; +use argp::FromArgs; +use itertools::Itertools; + +use crate::util::{ + file::{decompress_if_needed, map_file}, + u8_arc::{U8Node, U8View}, +}; + +#[derive(FromArgs, PartialEq, Debug)] +/// Commands for processing U8 (arc) files. +#[argp(subcommand, name = "u8")] +pub struct Args { + #[argp(subcommand)] + command: SubCommand, +} + +#[derive(FromArgs, PartialEq, Debug)] +#[argp(subcommand)] +enum SubCommand { + List(ListArgs), + Extract(ExtractArgs), +} + +#[derive(FromArgs, PartialEq, Eq, Debug)] +/// Views U8 (arc) file information. +#[argp(subcommand, name = "list")] +pub struct ListArgs { + #[argp(positional)] + /// U8 (arc) file + file: PathBuf, +} + +#[derive(FromArgs, PartialEq, Eq, Debug)] +/// Extracts U8 (arc) file contents. +#[argp(subcommand, name = "extract")] +pub struct ExtractArgs { + #[argp(positional)] + /// U8 (arc) file + file: PathBuf, + #[argp(option, short = 'o')] + /// output directory + output: Option, + #[argp(switch, short = 'q')] + /// quiet output + quiet: bool, +} + +pub fn run(args: Args) -> Result<()> { + match args.command { + SubCommand::List(c_args) => list(c_args), + SubCommand::Extract(c_args) => extract(c_args), + } +} + +fn list(args: ListArgs) -> Result<()> { + let file = map_file(&args.file)?; + let view = U8View::new(file.as_slice()) + .map_err(|e| anyhow!("Failed to open U8 file '{}': {}", args.file.display(), e))?; + visit_files(&view, |_, node, path| { + println!("{}: {} bytes, offset {:#X}", path, node.length(), node.offset()); + Ok(()) + }) +} + +fn extract(args: ExtractArgs) -> Result<()> { + let file = map_file(&args.file)?; + let view = U8View::new(file.as_slice()) + .map_err(|e| anyhow!("Failed to open U8 file '{}': {}", args.file.display(), e))?; + visit_files(&view, |_, node, path| { + let offset = node.offset(); + let size = node.length(); + let file_data = decompress_if_needed( + &file.as_slice()[offset as usize..offset as usize + size as usize], + )?; + let output_path = args + .output + .as_ref() + .map(|p| p.join(&path)) + .unwrap_or_else(|| PathBuf::from(path.clone())); + if !args.quiet { + println!("Extracting {} to {} ({} bytes)", path, output_path.display(), size); + } + if let Some(parent) = output_path.parent() { + DirBuilder::new().recursive(true).create(parent)?; + } + fs::write(&output_path, file_data) + .with_context(|| format!("Failed to write file '{}'", output_path.display()))?; + Ok(()) + }) +} + +fn visit_files( + view: &U8View, + mut visitor: impl FnMut(usize, &U8Node, String) -> Result<()>, +) -> Result<()> { + let mut path_segments = Vec::<(Cow, usize)>::new(); + for (idx, node, name) in view.iter() { + // Remove ended path segments + let mut new_size = 0; + for (_, end) in path_segments.iter() { + if *end == idx { + break; + } + new_size += 1; + } + path_segments.truncate(new_size); + + // Add the new path segment + let end = if node.is_dir() { node.length() as usize } else { idx + 1 }; + path_segments.push((name.map_err(|e| anyhow!("{}", e))?, end)); + + let path = path_segments.iter().map(|(name, _)| name.as_ref()).join("/"); + if !node.is_dir() { + visitor(idx, node, path)?; + } + } + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 88a21fb..861a6d2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -95,6 +95,7 @@ enum SubCommand { Rel(cmd::rel::Args), Rso(cmd::rso::Args), Shasum(cmd::shasum::Args), + U8(cmd::u8_arc::Args), Yay0(cmd::yay0::Args), Yaz0(cmd::yaz0::Args), } @@ -169,6 +170,7 @@ fn main() { SubCommand::Rel(c_args) => cmd::rel::run(c_args), SubCommand::Rso(c_args) => cmd::rso::run(c_args), SubCommand::Shasum(c_args) => cmd::shasum::run(c_args), + SubCommand::U8(c_args) => cmd::u8_arc::run(c_args), SubCommand::Yay0(c_args) => cmd::yay0::run(c_args), SubCommand::Yaz0(c_args) => cmd::yaz0::run(c_args), }); diff --git a/src/util/file.rs b/src/util/file.rs index 3b17f54..03b09ea 100644 --- a/src/util/file.rs +++ b/src/util/file.rs @@ -5,10 +5,11 @@ use std::{ path::{Component, Path, PathBuf}, }; -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; use filetime::{set_file_mtime, FileTime}; use memmap2::{Mmap, MmapOptions}; use path_slash::PathBufExt; +use rarc::RarcReader; use sha1::{Digest, Sha1}; use xxhash_rust::xxh3::xxh3_64; @@ -19,6 +20,7 @@ use crate::{ rarc, rarc::{Node, RARC_MAGIC}, take_seek::{TakeSeek, TakeSeekExt}, + u8_arc::{U8View, U8_MAGIC}, Bytes, }, }; @@ -105,11 +107,28 @@ where P: AsRef { )); } - let rarc = rarc::RarcReader::new(&mut Cursor::new(mmap.as_ref())) - .with_context(|| format!("Failed to open '{}' as RARC archive", base_path.display()))?; - rarc.find_file(&sub_path)?.map(|(o, s)| (o, s as u64)).ok_or_else(|| { - anyhow!("File '{}' not found in '{}'", sub_path.display(), base_path.display()) - })? + let buf = mmap.as_ref(); + match *array_ref!(buf, 0, 4) { + RARC_MAGIC => { + let rarc = RarcReader::new(&mut Cursor::new(mmap.as_ref())).with_context(|| { + format!("Failed to open '{}' as RARC archive", base_path.display()) + })?; + let (offset, size) = rarc.find_file(&sub_path)?.ok_or_else(|| { + anyhow!("File '{}' not found in '{}'", sub_path.display(), base_path.display()) + })?; + (offset, size as u64) + } + U8_MAGIC => { + let arc = U8View::new(buf).map_err(|e| { + anyhow!("Failed to open '{}' as U8 archive: {}", base_path.display(), e) + })?; + let (_, node) = arc.find(sub_path.to_slash_lossy().as_ref()).ok_or_else(|| { + anyhow!("File '{}' not found in '{}'", sub_path.display(), base_path.display()) + })?; + (node.offset() as u64, node.length() as u64) + } + _ => bail!("Couldn't detect archive type for '{}'", path.as_ref().display()), + } } else { (0, mmap.len() as u64) }; @@ -162,7 +181,7 @@ where P: AsRef { let mut file = File::open(&base_path) .with_context(|| format!("Failed to open file '{}'", base_path.display()))?; let (offset, size) = if let Some(sub_path) = sub_path { - let rarc = rarc::RarcReader::new(&mut BufReader::new(&file)) + let rarc = RarcReader::new(&mut BufReader::new(&file)) .with_context(|| format!("Failed to read RARC '{}'", base_path.display()))?; rarc.find_file(&sub_path)?.map(|(o, s)| (o, s as u64)).ok_or_else(|| { anyhow!("File '{}' not found in '{}'", sub_path.display(), base_path.display()) @@ -261,12 +280,12 @@ struct RarcIterator { impl RarcIterator { pub fn new(file: MappedFile, base_path: &Path) -> Result { - let reader = rarc::RarcReader::new(&mut file.as_reader())?; + let reader = RarcReader::new(&mut file.as_reader())?; let paths = Self::collect_paths(&reader, base_path); Ok(Self { file, base_path: base_path.to_owned(), paths, index: 0 }) } - fn collect_paths(reader: &rarc::RarcReader, base_path: &Path) -> Vec<(PathBuf, u64, u32)> { + fn collect_paths(reader: &RarcReader, base_path: &Path) -> Vec<(PathBuf, u64, u32)> { let mut current_path = PathBuf::new(); let mut paths = vec![]; for node in reader.nodes() { diff --git a/src/util/mod.rs b/src/util/mod.rs index facb8dc..db6872a 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -22,6 +22,7 @@ pub mod rso; pub mod signatures; pub mod split; pub mod take_seek; +pub mod u8_arc; #[inline] pub const fn align_up(value: u32, align: u32) -> u32 { (value + (align - 1)) & !(align - 1) } @@ -50,6 +51,14 @@ macro_rules! array_ref_mut { }}; } +/// Compile-time assertion. +#[macro_export] +macro_rules! static_assert { + ($condition:expr) => { + const _: () = core::assert!($condition); + }; +} + pub trait IntoCow<'a, B> where B: ToOwned + ?Sized { diff --git a/src/util/u8_arc.rs b/src/util/u8_arc.rs new file mode 100644 index 0000000..c805502 --- /dev/null +++ b/src/util/u8_arc.rs @@ -0,0 +1,188 @@ +use std::{borrow::Cow, ffi::CStr, mem::size_of}; + +use anyhow::Result; +use zerocopy::{big_endian::U32, AsBytes, FromBytes, FromZeroes}; + +use crate::static_assert; + +pub const U8_MAGIC: [u8; 4] = [0x55, 0xAA, 0x38, 0x2D]; + +/// U8 archive header. +#[derive(Clone, Debug, PartialEq, FromBytes, FromZeroes, AsBytes)] +#[repr(C, align(4))] +pub struct U8Header { + magic: [u8; 4], + node_table_offset: U32, + node_table_size: U32, + data_offset: U32, + _pad: [u8; 16], +} + +static_assert!(size_of::() == 32); + +/// File system node kind. +#[derive(Clone, Debug, PartialEq)] +pub enum U8NodeKind { + /// Node is a file. + File, + /// Node is a directory. + Directory, + /// Invalid node kind. (Should not normally occur) + Invalid, +} + +/// An individual file system node. +#[derive(Clone, Debug, PartialEq, FromBytes, FromZeroes, AsBytes)] +#[repr(C, align(4))] +pub struct U8Node { + kind: u8, + // u24 big-endian + name_offset: [u8; 3], + offset: U32, + length: U32, +} + +static_assert!(size_of::() == 12); + +impl U8Node { + /// File system node kind. + pub fn kind(&self) -> U8NodeKind { + match self.kind { + 0 => U8NodeKind::File, + 1 => U8NodeKind::Directory, + _ => U8NodeKind::Invalid, + } + } + + /// Whether the node is a file. + pub fn is_file(&self) -> bool { self.kind == 0 } + + /// Whether the node is a directory. + pub fn is_dir(&self) -> bool { self.kind == 1 } + + /// Offset in the string table to the filename. + pub fn name_offset(&self) -> u32 { + u32::from_be_bytes([0, self.name_offset[0], self.name_offset[1], self.name_offset[2]]) + } + + /// For files, this is the data offset of the file data (relative to header.data_offset). + /// + /// For directories, this is the parent node index in the node table. + pub fn offset(&self) -> u32 { self.offset.get() } + + /// For files, this is the byte size of the file. + /// + /// For directories, this is the child end index in the node table. + /// + /// Number of child files and directories recursively is `length - offset`. + pub fn length(&self) -> u32 { self.length.get() } +} + +/// A view into a U8 archive. +pub struct U8View<'a> { + /// The U8 archive header. + pub header: &'a U8Header, + /// The nodes in the U8 archive. + pub nodes: &'a [U8Node], + /// The string table containing all file and directory names. + pub string_table: &'a [u8], +} + +impl<'a> U8View<'a> { + /// Create a new U8 view from a buffer. + pub fn new(buf: &'a [u8]) -> Result { + let Some(header) = U8Header::ref_from_prefix(buf) else { + return Err("Buffer not large enough for U8 header"); + }; + if header.magic != U8_MAGIC { + return Err("U8 magic mismatch"); + } + let node_table_offset = header.node_table_offset.get() as usize; + let nodes_buf = buf + .get(node_table_offset..node_table_offset + header.node_table_size.get() as usize) + .ok_or("U8 node table out of bounds")?; + let root_node = U8Node::ref_from_prefix(nodes_buf).ok_or("U8 root node not aligned")?; + if root_node.kind() != U8NodeKind::Directory { + return Err("U8 root node is not a directory"); + } + if root_node.offset() != 0 { + return Err("U8 root node offset is not zero"); + } + let node_count = root_node.length() as usize; + if node_count * size_of::() > header.node_table_size.get() as usize { + return Err("U8 node table size mismatch"); + } + let (nodes_buf, string_table) = nodes_buf.split_at(node_count * size_of::()); + let nodes = U8Node::slice_from(nodes_buf).ok_or("U8 node table not aligned")?; + Ok(Self { header, nodes, string_table }) + } + + /// Iterate over the nodes in the U8 archive. + pub fn iter(&self) -> U8Iter { U8Iter { inner: self, idx: 1 } } + + /// Get the name of a node. + pub fn get_name(&self, node: &U8Node) -> Result, String> { + let name_buf = self.string_table.get(node.name_offset() as usize..).ok_or_else(|| { + format!( + "U8: name offset {} out of bounds (string table size: {})", + node.name_offset(), + self.string_table.len() + ) + })?; + let c_string = CStr::from_bytes_until_nul(name_buf).map_err(|_| { + format!("U8: name at offset {} not null-terminated", node.name_offset()) + })?; + Ok(c_string.to_string_lossy()) + } + + /// Finds a particular file or directory by path. + pub fn find(&self, path: &str) -> Option<(usize, &U8Node)> { + let mut split = path.trim_matches('/').split('/'); + let mut current = split.next()?; + let mut idx = 1; + let mut stop_at = None; + while let Some(node) = self.nodes.get(idx) { + if self.get_name(node).as_ref().map_or(false, |name| name.eq_ignore_ascii_case(current)) + { + if let Some(next) = split.next() { + current = next; + } else { + return Some((idx, node)); + } + // Descend into directory + idx += 1; + stop_at = Some(node.length() as usize + idx); + } else if node.is_dir() { + // Skip directory + idx = node.length() as usize; + } else { + // Skip file + idx += 1; + } + if let Some(stop) = stop_at { + if idx >= stop { + break; + } + } + } + None + } +} + +/// Iterator over the nodes in a U8 archive. +pub struct U8Iter<'a> { + inner: &'a U8View<'a>, + idx: usize, +} + +impl<'a> Iterator for U8Iter<'a> { + type Item = (usize, &'a U8Node, Result, String>); + + fn next(&mut self) -> Option { + let idx = self.idx; + let node = self.inner.nodes.get(idx)?; + let name = self.inner.get_name(node); + self.idx += 1; + Some((idx, node, name)) + } +}