Add U8 (newer .arc) support

Supports the U8 .arc format, just
like the older RARC format.

`u8 list`, `u8 extract` and support
for U8 archive paths in config.yml
This commit is contained in:
Luke Street 2024-06-03 20:34:35 -06:00
parent 255123796e
commit 46cf0be183
10 changed files with 391 additions and 17 deletions

9
Cargo.lock generated
View File

@ -415,6 +415,7 @@ dependencies = [
"tracing-attributes",
"tracing-subscriber",
"xxhash-rust",
"zerocopy",
]
[[package]]
@ -1897,9 +1898,9 @@ checksum = "927da81e25be1e1a2901d59b81b37dd2efd1fc9c9345a55007f09bf5a2d3ee03"
[[package]]
name = "zerocopy"
version = "0.7.32"
version = "0.7.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087"
dependencies = [
"byteorder",
"zerocopy-derive",
@ -1907,9 +1908,9 @@ dependencies = [
[[package]]
name = "zerocopy-derive"
version = "0.7.32"
version = "0.7.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
dependencies = [
"proc-macro2",
"quote",

View File

@ -72,3 +72,4 @@ tracing = "0.1.40"
tracing-attributes = "0.1.27"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
xxhash-rust = { version = "0.8.10", features = ["xxh3"] }
zerocopy = { version = "0.7.34", features = ["derive"] }

View File

@ -43,6 +43,8 @@ project structure and build system that uses decomp-toolkit under the hood.
- [nlzss decompress](#nlzss-decompress)
- [rarc list](#rarc-list)
- [rarc extract](#rarc-extract)
- [u8 list](#u8-list)
- [u8 extract](#u8-extract)
- [yay0 decompress](#yay0-decompress)
- [yay0 compress](#yay0-compress)
- [yaz0 decompress](#yaz0-decompress)
@ -390,7 +392,7 @@ $ dtk nlzss decompress rels/*.lz -o rels
### rarc list
Lists the contents of an RARC archive.
Lists the contents of an RARC (older .arc) archive.
```shell
$ dtk rarc list input.arc
@ -398,12 +400,28 @@ $ dtk rarc list input.arc
### rarc extract
Extracts the contents of an RARC archive.
Extracts the contents of an RARC (older .arc) archive.
```shell
$ dtk rarc extract input.arc -o output_dir
```
### u8 list
Extracts the contents of a U8 (newer .arc) archive.
```shell
$ dtk u8 list input.arc
```
### u8 extract
Extracts the contents of a U8 (newer .arc) archive.
```shell
$ dtk u8 extract input.arc -o output_dir
```
### yay0 decompress
Decompresses Yay0-compressed files.

View File

@ -13,5 +13,6 @@ pub mod rarc;
pub mod rel;
pub mod rso;
pub mod shasum;
pub mod u8_arc;
pub mod yay0;
pub mod yaz0;

View File

@ -42,6 +42,9 @@ pub struct ExtractArgs {
#[argp(option, short = 'o')]
/// output directory
output: Option<PathBuf>,
#[argp(switch, short = 'q')]
/// quiet output
quiet: bool,
}
pub fn run(args: Args) -> Result<()> {
@ -95,8 +98,19 @@ fn extract(args: ExtractArgs) -> Result<()> {
&file.as_slice()[offset as usize..offset as usize + size as usize],
)?;
let file_path = current_path.join(&name.name);
let output_path =
args.output.as_ref().map(|p| p.join(&file_path)).unwrap_or_else(|| file_path);
let output_path = args
.output
.as_ref()
.map(|p| p.join(&file_path))
.unwrap_or_else(|| file_path.clone());
if !args.quiet {
println!(
"Extracting {} to {} ({} bytes)",
file_path.display(),
output_path.display(),
size
);
}
if let Some(parent) = output_path.parent() {
DirBuilder::new().recursive(true).create(parent)?;
}

121
src/cmd/u8_arc.rs Normal file
View File

@ -0,0 +1,121 @@
use std::{borrow::Cow, fs, fs::DirBuilder, path::PathBuf};
use anyhow::{anyhow, Context, Result};
use argp::FromArgs;
use itertools::Itertools;
use crate::util::{
file::{decompress_if_needed, map_file},
u8_arc::{U8Node, U8View},
};
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing U8 (arc) files.
#[argp(subcommand, name = "u8")]
pub struct Args {
#[argp(subcommand)]
command: SubCommand,
}
#[derive(FromArgs, PartialEq, Debug)]
#[argp(subcommand)]
enum SubCommand {
List(ListArgs),
Extract(ExtractArgs),
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Views U8 (arc) file information.
#[argp(subcommand, name = "list")]
pub struct ListArgs {
#[argp(positional)]
/// U8 (arc) file
file: PathBuf,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Extracts U8 (arc) file contents.
#[argp(subcommand, name = "extract")]
pub struct ExtractArgs {
#[argp(positional)]
/// U8 (arc) file
file: PathBuf,
#[argp(option, short = 'o')]
/// output directory
output: Option<PathBuf>,
#[argp(switch, short = 'q')]
/// quiet output
quiet: bool,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::List(c_args) => list(c_args),
SubCommand::Extract(c_args) => extract(c_args),
}
}
fn list(args: ListArgs) -> Result<()> {
let file = map_file(&args.file)?;
let view = U8View::new(file.as_slice())
.map_err(|e| anyhow!("Failed to open U8 file '{}': {}", args.file.display(), e))?;
visit_files(&view, |_, node, path| {
println!("{}: {} bytes, offset {:#X}", path, node.length(), node.offset());
Ok(())
})
}
fn extract(args: ExtractArgs) -> Result<()> {
let file = map_file(&args.file)?;
let view = U8View::new(file.as_slice())
.map_err(|e| anyhow!("Failed to open U8 file '{}': {}", args.file.display(), e))?;
visit_files(&view, |_, node, path| {
let offset = node.offset();
let size = node.length();
let file_data = decompress_if_needed(
&file.as_slice()[offset as usize..offset as usize + size as usize],
)?;
let output_path = args
.output
.as_ref()
.map(|p| p.join(&path))
.unwrap_or_else(|| PathBuf::from(path.clone()));
if !args.quiet {
println!("Extracting {} to {} ({} bytes)", path, output_path.display(), size);
}
if let Some(parent) = output_path.parent() {
DirBuilder::new().recursive(true).create(parent)?;
}
fs::write(&output_path, file_data)
.with_context(|| format!("Failed to write file '{}'", output_path.display()))?;
Ok(())
})
}
fn visit_files(
view: &U8View,
mut visitor: impl FnMut(usize, &U8Node, String) -> Result<()>,
) -> Result<()> {
let mut path_segments = Vec::<(Cow<str>, usize)>::new();
for (idx, node, name) in view.iter() {
// Remove ended path segments
let mut new_size = 0;
for (_, end) in path_segments.iter() {
if *end == idx {
break;
}
new_size += 1;
}
path_segments.truncate(new_size);
// Add the new path segment
let end = if node.is_dir() { node.length() as usize } else { idx + 1 };
path_segments.push((name.map_err(|e| anyhow!("{}", e))?, end));
let path = path_segments.iter().map(|(name, _)| name.as_ref()).join("/");
if !node.is_dir() {
visitor(idx, node, path)?;
}
}
Ok(())
}

View File

@ -95,6 +95,7 @@ enum SubCommand {
Rel(cmd::rel::Args),
Rso(cmd::rso::Args),
Shasum(cmd::shasum::Args),
U8(cmd::u8_arc::Args),
Yay0(cmd::yay0::Args),
Yaz0(cmd::yaz0::Args),
}
@ -169,6 +170,7 @@ fn main() {
SubCommand::Rel(c_args) => cmd::rel::run(c_args),
SubCommand::Rso(c_args) => cmd::rso::run(c_args),
SubCommand::Shasum(c_args) => cmd::shasum::run(c_args),
SubCommand::U8(c_args) => cmd::u8_arc::run(c_args),
SubCommand::Yay0(c_args) => cmd::yay0::run(c_args),
SubCommand::Yaz0(c_args) => cmd::yaz0::run(c_args),
});

View File

@ -5,10 +5,11 @@ use std::{
path::{Component, Path, PathBuf},
};
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, bail, Context, Result};
use filetime::{set_file_mtime, FileTime};
use memmap2::{Mmap, MmapOptions};
use path_slash::PathBufExt;
use rarc::RarcReader;
use sha1::{Digest, Sha1};
use xxhash_rust::xxh3::xxh3_64;
@ -19,6 +20,7 @@ use crate::{
rarc,
rarc::{Node, RARC_MAGIC},
take_seek::{TakeSeek, TakeSeekExt},
u8_arc::{U8View, U8_MAGIC},
Bytes,
},
};
@ -105,11 +107,28 @@ where P: AsRef<Path> {
));
}
let rarc = rarc::RarcReader::new(&mut Cursor::new(mmap.as_ref()))
.with_context(|| format!("Failed to open '{}' as RARC archive", base_path.display()))?;
rarc.find_file(&sub_path)?.map(|(o, s)| (o, s as u64)).ok_or_else(|| {
let buf = mmap.as_ref();
match *array_ref!(buf, 0, 4) {
RARC_MAGIC => {
let rarc = RarcReader::new(&mut Cursor::new(mmap.as_ref())).with_context(|| {
format!("Failed to open '{}' as RARC archive", base_path.display())
})?;
let (offset, size) = rarc.find_file(&sub_path)?.ok_or_else(|| {
anyhow!("File '{}' not found in '{}'", sub_path.display(), base_path.display())
})?
})?;
(offset, size as u64)
}
U8_MAGIC => {
let arc = U8View::new(buf).map_err(|e| {
anyhow!("Failed to open '{}' as U8 archive: {}", base_path.display(), e)
})?;
let (_, node) = arc.find(sub_path.to_slash_lossy().as_ref()).ok_or_else(|| {
anyhow!("File '{}' not found in '{}'", sub_path.display(), base_path.display())
})?;
(node.offset() as u64, node.length() as u64)
}
_ => bail!("Couldn't detect archive type for '{}'", path.as_ref().display()),
}
} else {
(0, mmap.len() as u64)
};
@ -162,7 +181,7 @@ where P: AsRef<Path> {
let mut file = File::open(&base_path)
.with_context(|| format!("Failed to open file '{}'", base_path.display()))?;
let (offset, size) = if let Some(sub_path) = sub_path {
let rarc = rarc::RarcReader::new(&mut BufReader::new(&file))
let rarc = RarcReader::new(&mut BufReader::new(&file))
.with_context(|| format!("Failed to read RARC '{}'", base_path.display()))?;
rarc.find_file(&sub_path)?.map(|(o, s)| (o, s as u64)).ok_or_else(|| {
anyhow!("File '{}' not found in '{}'", sub_path.display(), base_path.display())
@ -261,12 +280,12 @@ struct RarcIterator {
impl RarcIterator {
pub fn new(file: MappedFile, base_path: &Path) -> Result<Self> {
let reader = rarc::RarcReader::new(&mut file.as_reader())?;
let reader = RarcReader::new(&mut file.as_reader())?;
let paths = Self::collect_paths(&reader, base_path);
Ok(Self { file, base_path: base_path.to_owned(), paths, index: 0 })
}
fn collect_paths(reader: &rarc::RarcReader, base_path: &Path) -> Vec<(PathBuf, u64, u32)> {
fn collect_paths(reader: &RarcReader, base_path: &Path) -> Vec<(PathBuf, u64, u32)> {
let mut current_path = PathBuf::new();
let mut paths = vec![];
for node in reader.nodes() {

View File

@ -22,6 +22,7 @@ pub mod rso;
pub mod signatures;
pub mod split;
pub mod take_seek;
pub mod u8_arc;
#[inline]
pub const fn align_up(value: u32, align: u32) -> u32 { (value + (align - 1)) & !(align - 1) }
@ -50,6 +51,14 @@ macro_rules! array_ref_mut {
}};
}
/// Compile-time assertion.
#[macro_export]
macro_rules! static_assert {
($condition:expr) => {
const _: () = core::assert!($condition);
};
}
pub trait IntoCow<'a, B>
where B: ToOwned + ?Sized
{

188
src/util/u8_arc.rs Normal file
View File

@ -0,0 +1,188 @@
use std::{borrow::Cow, ffi::CStr, mem::size_of};
use anyhow::Result;
use zerocopy::{big_endian::U32, AsBytes, FromBytes, FromZeroes};
use crate::static_assert;
pub const U8_MAGIC: [u8; 4] = [0x55, 0xAA, 0x38, 0x2D];
/// U8 archive header.
#[derive(Clone, Debug, PartialEq, FromBytes, FromZeroes, AsBytes)]
#[repr(C, align(4))]
pub struct U8Header {
magic: [u8; 4],
node_table_offset: U32,
node_table_size: U32,
data_offset: U32,
_pad: [u8; 16],
}
static_assert!(size_of::<U8Header>() == 32);
/// File system node kind.
#[derive(Clone, Debug, PartialEq)]
pub enum U8NodeKind {
/// Node is a file.
File,
/// Node is a directory.
Directory,
/// Invalid node kind. (Should not normally occur)
Invalid,
}
/// An individual file system node.
#[derive(Clone, Debug, PartialEq, FromBytes, FromZeroes, AsBytes)]
#[repr(C, align(4))]
pub struct U8Node {
kind: u8,
// u24 big-endian
name_offset: [u8; 3],
offset: U32,
length: U32,
}
static_assert!(size_of::<U8Node>() == 12);
impl U8Node {
/// File system node kind.
pub fn kind(&self) -> U8NodeKind {
match self.kind {
0 => U8NodeKind::File,
1 => U8NodeKind::Directory,
_ => U8NodeKind::Invalid,
}
}
/// Whether the node is a file.
pub fn is_file(&self) -> bool { self.kind == 0 }
/// Whether the node is a directory.
pub fn is_dir(&self) -> bool { self.kind == 1 }
/// Offset in the string table to the filename.
pub fn name_offset(&self) -> u32 {
u32::from_be_bytes([0, self.name_offset[0], self.name_offset[1], self.name_offset[2]])
}
/// For files, this is the data offset of the file data (relative to header.data_offset).
///
/// For directories, this is the parent node index in the node table.
pub fn offset(&self) -> u32 { self.offset.get() }
/// For files, this is the byte size of the file.
///
/// For directories, this is the child end index in the node table.
///
/// Number of child files and directories recursively is `length - offset`.
pub fn length(&self) -> u32 { self.length.get() }
}
/// A view into a U8 archive.
pub struct U8View<'a> {
/// The U8 archive header.
pub header: &'a U8Header,
/// The nodes in the U8 archive.
pub nodes: &'a [U8Node],
/// The string table containing all file and directory names.
pub string_table: &'a [u8],
}
impl<'a> U8View<'a> {
/// Create a new U8 view from a buffer.
pub fn new(buf: &'a [u8]) -> Result<Self, &'static str> {
let Some(header) = U8Header::ref_from_prefix(buf) else {
return Err("Buffer not large enough for U8 header");
};
if header.magic != U8_MAGIC {
return Err("U8 magic mismatch");
}
let node_table_offset = header.node_table_offset.get() as usize;
let nodes_buf = buf
.get(node_table_offset..node_table_offset + header.node_table_size.get() as usize)
.ok_or("U8 node table out of bounds")?;
let root_node = U8Node::ref_from_prefix(nodes_buf).ok_or("U8 root node not aligned")?;
if root_node.kind() != U8NodeKind::Directory {
return Err("U8 root node is not a directory");
}
if root_node.offset() != 0 {
return Err("U8 root node offset is not zero");
}
let node_count = root_node.length() as usize;
if node_count * size_of::<U8Node>() > header.node_table_size.get() as usize {
return Err("U8 node table size mismatch");
}
let (nodes_buf, string_table) = nodes_buf.split_at(node_count * size_of::<U8Node>());
let nodes = U8Node::slice_from(nodes_buf).ok_or("U8 node table not aligned")?;
Ok(Self { header, nodes, string_table })
}
/// Iterate over the nodes in the U8 archive.
pub fn iter(&self) -> U8Iter { U8Iter { inner: self, idx: 1 } }
/// Get the name of a node.
pub fn get_name(&self, node: &U8Node) -> Result<Cow<str>, String> {
let name_buf = self.string_table.get(node.name_offset() as usize..).ok_or_else(|| {
format!(
"U8: name offset {} out of bounds (string table size: {})",
node.name_offset(),
self.string_table.len()
)
})?;
let c_string = CStr::from_bytes_until_nul(name_buf).map_err(|_| {
format!("U8: name at offset {} not null-terminated", node.name_offset())
})?;
Ok(c_string.to_string_lossy())
}
/// Finds a particular file or directory by path.
pub fn find(&self, path: &str) -> Option<(usize, &U8Node)> {
let mut split = path.trim_matches('/').split('/');
let mut current = split.next()?;
let mut idx = 1;
let mut stop_at = None;
while let Some(node) = self.nodes.get(idx) {
if self.get_name(node).as_ref().map_or(false, |name| name.eq_ignore_ascii_case(current))
{
if let Some(next) = split.next() {
current = next;
} else {
return Some((idx, node));
}
// Descend into directory
idx += 1;
stop_at = Some(node.length() as usize + idx);
} else if node.is_dir() {
// Skip directory
idx = node.length() as usize;
} else {
// Skip file
idx += 1;
}
if let Some(stop) = stop_at {
if idx >= stop {
break;
}
}
}
None
}
}
/// Iterator over the nodes in a U8 archive.
pub struct U8Iter<'a> {
inner: &'a U8View<'a>,
idx: usize,
}
impl<'a> Iterator for U8Iter<'a> {
type Item = (usize, &'a U8Node, Result<Cow<'a, str>, String>);
fn next(&mut self) -> Option<Self::Item> {
let idx = self.idx;
let node = self.inner.nodes.get(idx)?;
let name = self.inner.get_name(node);
self.idx += 1;
Some((idx, node, name))
}
}