From 4611a4b5019fb5511c42c3077abe092ab435fd77 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Fri, 4 Oct 2024 21:02:04 -0600 Subject: [PATCH] Vendor nintendo-lz crate to fix issues & avoid old deps --- Cargo.lock | 77 +----------- Cargo.toml | 2 +- deny.toml | 182 ++++++++++++++++----------- src/cmd/nlzss.rs | 4 +- src/util/mod.rs | 1 + src/util/nlzss.rs | 305 ++++++++++++++++++++++++++++++++++++++++++++++ src/vfs/mod.rs | 6 +- 7 files changed, 422 insertions(+), 155 deletions(-) create mode 100644 src/util/nlzss.rs diff --git a/Cargo.lock b/Cargo.lock index 476ec30..e069adc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,15 +43,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - [[package]] name = "anyhow" version = "1.0.89" @@ -87,17 +78,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.4.0" @@ -260,21 +240,6 @@ dependencies = [ "inout", ] -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags 1.3.2", - "strsim", - "textwrap", - "unicode-width", - "vec_map", -] - [[package]] name = "console" version = "0.15.8" @@ -390,6 +355,7 @@ dependencies = [ "argp", "base16ct", "base64", + "byteorder", "crossterm", "cwdemangle", "cwextab", @@ -408,7 +374,6 @@ dependencies = [ "memmap2", "mimalloc", "multimap", - "nintendo-lz", "nodtool", "num_enum", "objdiff-core", @@ -617,15 +582,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "hermit-abi" version = "0.3.9" @@ -894,16 +850,6 @@ dependencies = [ "serde", ] -[[package]] -name = "nintendo-lz" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "768b95cd65a1a8b82d6c7e90a69d080b20996a876cd62987ab5bcb350c5ae646" -dependencies = [ - "byteorder", - "clap", -] - [[package]] name = "nod" version = "1.4.3" @@ -1590,12 +1536,6 @@ dependencies = [ "syn 2.0.79", ] -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - [[package]] name = "strum" version = "0.26.3" @@ -1692,15 +1632,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - [[package]] name = "thiserror" version = "1.0.64" @@ -1872,12 +1803,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index bacba3d..afaf5f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ ar = { git = "https://github.com/bjorn3/rust-ar.git", branch = "write_symbol_tab argp = "0.3" base16ct = "0.2" base64 = "0.22" +byteorder = "1.5" crossterm = "0.28" cwdemangle = "1.0" cwextab = "1.0" @@ -47,7 +48,6 @@ log = "0.4" memchr = "2.7" memmap2 = "0.9" multimap = "0.10" -nintendo-lz = "0.1" nodtool = "1.4" #nodtool = { path = "../nod-rs/nodtool" } num_enum = "0.7" diff --git a/deny.toml b/deny.toml index 2382675..76f591b 100644 --- a/deny.toml +++ b/deny.toml @@ -9,6 +9,11 @@ # The values provided in this template are the default values that will be used # when any section or field is not specified in your own configuration +# Root options + +# The graph table configures how the dependency graph is constructed and thus +# which crates the checks are performed against +[graph] # If 1 or more target triples (and optionally, target_features) are specified, # only the specified targets will be checked when running `cargo deny check`. # This means, if a particular package is only ever used as a target specific @@ -20,53 +25,67 @@ targets = [ # The triple can be any string, but only the target triples built in to # rustc (as of 1.40) can be checked against actual config expressions - #{ triple = "x86_64-unknown-linux-musl" }, + #"x86_64-unknown-linux-musl", # You can also specify which target_features you promise are enabled for a # particular target. target_features are currently not validated against # the actual valid features supported by the target architecture. #{ triple = "wasm32-unknown-unknown", features = ["atomics"] }, ] +# When creating the dependency graph used as the source of truth when checks are +# executed, this field can be used to prune crates from the graph, removing them +# from the view of cargo-deny. This is an extremely heavy hammer, as if a crate +# is pruned from the graph, all of its dependencies will also be pruned unless +# they are connected to another crate in the graph that hasn't been pruned, +# so it should be used with care. The identifiers are [Package ID Specifications] +# (https://doc.rust-lang.org/cargo/reference/pkgid-spec.html) +#exclude = [] +# If true, metadata will be collected with `--all-features`. Note that this can't +# be toggled off if true, if you want to conditionally enable `--all-features` it +# is recommended to pass `--all-features` on the cmd line instead +all-features = false +# If true, metadata will be collected with `--no-default-features`. The same +# caveat with `all-features` applies +no-default-features = false +# If set, these feature will be enabled when collecting metadata. If `--features` +# is specified on the cmd line they will take precedence over this option. +#features = [] + +# The output table provides options for how/if diagnostics are outputted +[output] +# When outputting inclusion graphs in diagnostics that include features, this +# option can be used to specify the depth at which feature edges will be added. +# This option is included since the graphs can be quite large and the addition +# of features from the crate(s) to all of the graph roots can be far too verbose. +# This option can be overridden via `--feature-depth` on the cmd line +feature-depth = 1 # This section is considered when running `cargo deny check advisories` # More documentation for the advisories section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html [advisories] -# The path where the advisory database is cloned/fetched into -db-path = "~/.cargo/advisory-db" +# The path where the advisory databases are cloned/fetched into +#db-path = "$CARGO_HOME/advisory-dbs" # The url(s) of the advisory databases to use -db-urls = ["https://github.com/rustsec/advisory-db"] -# The lint level for security vulnerabilities -vulnerability = "deny" -# The lint level for unmaintained crates -unmaintained = "warn" -# The lint level for crates that have been yanked from their source registry -yanked = "warn" -# The lint level for crates with security notices. Note that as of -# 2019-12-17 there are no security notice advisories in -# https://github.com/rustsec/advisory-db -notice = "warn" +#db-urls = ["https://github.com/rustsec/advisory-db"] # A list of advisory IDs to ignore. Note that ignored advisories will still # output a note when they are encountered. ignore = [ #"RUSTSEC-0000-0000", + #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" }, + #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish + #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" }, ] -# Threshold for security vulnerabilities, any vulnerability with a CVSS score -# lower than the range specified will be ignored. Note that ignored advisories -# will still output a note when they are encountered. -# * None - CVSS Score 0.0 -# * Low - CVSS Score 0.1 - 3.9 -# * Medium - CVSS Score 4.0 - 6.9 -# * High - CVSS Score 7.0 - 8.9 -# * Critical - CVSS Score 9.0 - 10.0 -#severity-threshold = +# If this is true, then cargo deny will use the git executable to fetch advisory database. +# If this is false, then it uses a built-in git library. +# Setting this to true can be helpful if you have special authentication requirements that cargo-deny does not support. +# See Git Authentication for more information about setting up git authentication. +#git-fetch-with-cli = true # This section is considered when running `cargo deny check licenses` # More documentation for the licenses section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html [licenses] -# The lint level for crates which do not have a detectable license -unlicensed = "deny" -# List of explictly allowed licenses +# List of explicitly allowed licenses # See https://spdx.org/licenses/ for list of possible licenses # [possible values: any SPDX 3.11 short identifier (+ optional exception)]. allow = [ @@ -77,28 +96,10 @@ allow = [ "BSL-1.0", "ISC", "MIT", + "MPL-2.0", "Unicode-DFS-2016", + "Zlib", ] -# List of explictly disallowed licenses -# See https://spdx.org/licenses/ for list of possible licenses -# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. -deny = [ - #"Nokia", -] -# Lint level for licenses considered copyleft -copyleft = "warn" -# Blanket approval or denial for OSI-approved or FSF Free/Libre licenses -# * both - The license will be approved if it is both OSI-approved *AND* FSF -# * either - The license will be approved if it is either OSI-approved *OR* FSF -# * osi-only - The license will be approved if is OSI-approved *AND NOT* FSF -# * fsf-only - The license will be approved if is FSF *AND NOT* OSI-approved -# * neither - This predicate is ignored and the default lint level is used -allow-osi-fsf-free = "neither" -# Lint level used when no other predicates are matched -# 1. License isn't in the allow or deny lists -# 2. License isn't copyleft -# 3. License isn't OSI/FSF, or allow-osi-fsf-free = "neither" -default = "deny" # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the # canonical license text of a valid SPDX license file. @@ -109,32 +110,32 @@ confidence-threshold = 0.8 exceptions = [ # Each entry is the crate and version constraint, and its specific allow # list - #{ allow = ["Zlib"], name = "adler32", version = "*" }, + #{ allow = ["Zlib"], crate = "adler32" }, ] # Some crates don't have (easily) machine readable licensing information, # adding a clarification entry for it allows you to manually specify the # licensing information -[[licenses.clarify]] -# The name of the crate the clarification applies to -name = "encoding_rs" -# The optional version constraint for the crate -#version = "*" +#[[licenses.clarify]] +# The package spec the clarification applies to +#crate = "ring" # The SPDX expression for the license requirements of the crate -expression = "(Apache-2.0 OR MIT) AND BSD-3-Clause" +#expression = "MIT AND ISC AND OpenSSL" # One or more files in the crate's source used as the "source of truth" for # the license expression. If the contents match, the clarification will be used # when running the license check, otherwise the clarification will be ignored # and the crate will be checked normally, which may produce warnings or errors # depending on the rest of your configuration -license-files = [ - # Each entry is a crate relative path, and the (opaque) hash of its contents - { path = "COPYRIGHT", hash = 0x39f8ad31 } -] +#license-files = [ +# Each entry is a crate relative path, and the (opaque) hash of its contents +#{ path = "LICENSE", hash = 0xbd0eed23 } +#] [licenses.private] # If true, ignores workspace crates that aren't published, or are only -# published to private registries +# published to private registries. +# To see how to mark a crate as unpublished (to the official registry), +# visit https://doc.rust-lang.org/cargo/reference/manifest.html#the-publish-field. ignore = false # One or more private registries that you might publish crates to, if a crate # is only published to private registries, and ignore is true, the crate will @@ -148,7 +149,7 @@ registries = [ # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html [bans] # Lint level for when multiple versions of the same crate are detected -multiple-versions = "warn" +multiple-versions = "allow" # Lint level for when a crate version requirement is `*` wildcards = "allow" # The graph highlighting used when creating dotgraphs for crates @@ -157,30 +158,63 @@ wildcards = "allow" # * simplest-path - The path to the version with the fewest edges is highlighted # * all - Both lowest-version and simplest-path are used highlight = "all" +# The default lint level for `default` features for crates that are members of +# the workspace that is being checked. This can be overridden by allowing/denying +# `default` on a crate-by-crate basis if desired. +workspace-default-features = "allow" +# The default lint level for `default` features for external crates that are not +# members of the workspace. This can be overridden by allowing/denying `default` +# on a crate-by-crate basis if desired. +external-default-features = "allow" # List of crates that are allowed. Use with care! allow = [ - #{ name = "ansi_term", version = "=0.11.0" }, + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" }, ] # List of crates to deny deny = [ - # Each entry the name of a crate and a version range. If version is - # not specified, all versions will be matched. - #{ name = "ansi_term", version = "=0.11.0" }, - # + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" }, # Wrapper crates can optionally be specified to allow the crate when it # is a direct dependency of the otherwise banned crate - #{ name = "ansi_term", version = "=0.11.0", wrappers = [] }, + #{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] }, ] + +# List of features to allow/deny +# Each entry the name of a crate and a version range. If version is +# not specified, all versions will be matched. +#[[bans.features]] +#crate = "reqwest" +# Features to not allow +#deny = ["json"] +# Features to allow +#allow = [ +# "rustls", +# "__rustls", +# "__tls", +# "hyper-rustls", +# "rustls", +# "rustls-pemfile", +# "rustls-tls-webpki-roots", +# "tokio-rustls", +# "webpki-roots", +#] +# If true, the allowed features must exactly match the enabled feature set. If +# this is set there is no point setting `deny` +#exact = true + # Certain crates/versions that will be skipped when doing duplicate detection. skip = [ - #{ name = "ansi_term", version = "=0.11.0" }, + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason why it can't be updated/removed" }, ] # Similarly to `skip` allows you to skip certain crates during duplicate # detection. Unlike skip, it also includes the entire tree of transitive # dependencies starting at the specified crate, up to a certain depth, which is -# by default infinite +# by default infinite. skip-tree = [ - #{ name = "ansi_term", version = "=0.11.0", depth = 20 }, + #"ansi_term@0.11.0", # will be skipped along with _all_ of its direct and transitive dependencies + #{ crate = "ansi_term@0.11.0", depth = 20 }, ] # This section is considered when running `cargo deny check sources`. @@ -200,9 +234,9 @@ allow-registry = ["https://github.com/rust-lang/crates.io-index"] allow-git = [] [sources.allow-org] -# 1 or more github.com organizations to allow git sources for -#github = [""] -# 1 or more gitlab.com organizations to allow git sources for -#gitlab = [""] -# 1 or more bitbucket.org organizations to allow git sources for -#bitbucket = [""] +# github.com organizations to allow git sources for +github = ["bjorn3"] +# gitlab.com organizations to allow git sources for +gitlab = [] +# bitbucket.org organizations to allow git sources for +bitbucket = [] diff --git a/src/cmd/nlzss.rs b/src/cmd/nlzss.rs index 3340396..d002a26 100644 --- a/src/cmd/nlzss.rs +++ b/src/cmd/nlzss.rs @@ -4,7 +4,7 @@ use anyhow::{anyhow, Context, Result}; use argp::FromArgs; use crate::{ - util::{file::process_rsp, IntoCow, ToCow}, + util::{file::process_rsp, nlzss, IntoCow, ToCow}, vfs::open_file, }; @@ -46,7 +46,7 @@ fn decompress(args: DecompressArgs) -> Result<()> { let single_file = files.len() == 1; for path in files { let mut file = open_file(&path, false)?; - let data = nintendo_lz::decompress(&mut file) + let data = nlzss::decompress(file.as_mut()) .map_err(|e| anyhow!("Failed to decompress '{}' with NLZSS: {}", path.display(), e))?; let out_path = if let Some(output) = &args.output { if single_file { diff --git a/src/util/mod.rs b/src/util/mod.rs index db6872a..1536c62 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -15,6 +15,7 @@ pub mod lcf; pub mod map; pub mod ncompress; pub mod nested; +pub mod nlzss; pub mod rarc; pub mod reader; pub mod rel; diff --git a/src/util/nlzss.rs b/src/util/nlzss.rs new file mode 100644 index 0000000..9b14e95 --- /dev/null +++ b/src/util/nlzss.rs @@ -0,0 +1,305 @@ +// BSD 2-Clause License +// +// Copyright (c) 2018, Charlotte D +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Source: https://gitlab.com/DarkKirb/nintendo-lz +// Modified to compile with latest edition, use anyhow::Error, and fix various issues. + +use std::io::{Cursor, Read, Write}; + +use anyhow::{bail, ensure, Result}; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; + +/// Decompresses an LZ10/LZ11 compressed file. It returns an error when: +/// +/// - The file is not a valid LZ10/LZ11 file +/// - The file is truncated (More data was expected than present) +/// +/// # Example +/// +/// ```rust,ignore +/// let mut f = File::open("Archive.bin.cmp"); +/// let mut decompressed = nintendo_lz::decompress(&mut f).unwrap(); +/// ``` +pub fn decompress(inp: &mut R) -> Result> +where R: Read + ?Sized { + let mut length = inp.read_u32::()? as usize; + let ver = match length & 0xFF { + 0x10 => 0, + 0x11 => 1, + _ => bail!("Invalid magic number"), + }; + length >>= 8; + if length == 0 && ver == 1 { + length = inp.read_u32::()? as usize; + } + let mut out = Vec::::with_capacity(length); + while out.len() < length { + let byte = inp.read_u8()?; + for bit_no in (0..8).rev() { + if out.len() >= length { + break; + } + if ((byte >> bit_no) & 1) == 0 { + let data = inp.read_u8()?; + out.push(data); + } else { + let lenmsb = inp.read_u8()? as usize; + let lsb = inp.read_u8()? as usize; + let mut length: usize = lenmsb >> 4; + let mut disp: usize = ((lenmsb & 15) << 8) + lsb; + if ver == 0 { + length += 3; + } else if length > 1 { + length += 1; + } else if length == 0 { + length = (lenmsb & 15) << 4; + length += lsb >> 4; + length += 0x11; + let msb = inp.read_u8()? as usize; + disp = ((lsb & 15) << 8) + msb; + } else { + length = (lenmsb & 15) << 12; + length += lsb << 4; + let byte1 = inp.read_u8()? as usize; + let byte2 = inp.read_u8()? as usize; + length += byte1 >> 4; + length += 0x111; + disp = ((byte1 & 15) << 8) + byte2; + } + let start: usize = out.len() - disp - 1; + + for i in 0..length { + let val = out[start + i]; + out.push(val); + } + } + } + } + Ok(out) +} + +/// This function is a convenience wrapper around `decompress` for decompressing slices, arrays or +/// vectors. +pub fn decompress_arr(input: &[u8]) -> Result> { + let mut reader = Cursor::new(input); + decompress(&mut reader) +} + +/// This enum contains the possible compression levels for LZ compression. +pub enum CompressionLevel { + /// LZ10 compression. Maximum repeat size: 18 bytes + LZ10, + /// LZ11 compression. Maximum repeat size: 65809 bytes + /// + /// Argument: Maximum repeat size (0..65810), lower means worse compression but higher speed. + /// for values < 3 compression is disabled + LZ11(u32), +} + +fn find_longest_match(data: &[u8], off: usize, max: usize) -> Option<(usize, usize)> { + if off < 4 || data.len() - off < 4 { + return None; + } + let mut longest_pos: usize = 0; + let mut longest_len: usize = 0; + let mut start = 0; + if off > 0x1000 { + start = off - 0x1000; + } + for pos in search(&data[start..off + 2], &data[off..off + 3]) { + let mut length = 0; + for (i, p) in (off..data.len()).enumerate() { + if length == max { + return Some((start + pos, length)); + } + if data[p] != data[start + pos + i] { + break; + } + length += 1; + } + if length > longest_len { + longest_pos = pos; + longest_len = length; + } + } + if longest_len < 3 { + return None; + } + Some((start + longest_pos, longest_len)) +} + +/// Compresses data to LZ10/LZ11. It returns an error when: +/// +/// - The input is too large for the selected LZ version (LZ10 supports at most 16MiB) +/// - The maximum repeat length is out of range (for LZ11, has to be in the range (0..65810) +/// - Writing to the output file failed +/// +/// # Example +/// +/// ```rust,ignore +/// let mut f = File::create("Archive.bin.cmp"); +/// let data = b"This is an example text. This is an example text"; +/// nintendo_lz::compress(&data, &mut f, nintendo_lz::CompressionLevel::LZ11(65809)).unwrap(); +/// ``` +pub fn compress(inp: &[u8], out: &mut W, level: CompressionLevel) -> Result<()> +where W: Write + ?Sized { + let ver = match level { + CompressionLevel::LZ10 => 0, + CompressionLevel::LZ11(_) => 1, + }; + if ver == 0 && inp.len() > 16777216 { + bail!("Input data too large for LZ10"); + } + if ver == 1 && inp.len() as u64 > 0xFFFFFFFF { + bail!("Input data too large for LZ11"); + } + let repeat_size = match level { + CompressionLevel::LZ10 => 18, + CompressionLevel::LZ11(max) => max, + }; + ensure!(repeat_size < 65810, "Maximum repeat size out of range. (0..65810)"); + + let size: usize = inp.len(); + + if size < 16777216 && (size != 0 || ver == 0) { + let header = 0x10 + ver + ((size as u32) << 8); + out.write_u32::(header)?; + } else { + out.write_u32::(0x11)?; + out.write_u32::(size as u32)?; + } + + let mut off: usize = 0; + let mut byte: u8 = 0; + let mut index = 7; + let mut cmpbuf: Vec = Vec::new(); + + while off < size { + match find_longest_match(inp, off, repeat_size as usize) { + None => { + index -= 1; + cmpbuf.push(inp[off]); + off += 1; + } + Some((pos, len)) => { + let lz_off: usize = off - pos - 1; + byte |= 1 << index; + index -= 1; + if ver == 0 { + let l = len - 3; + let cmp: [u8; 2] = [((lz_off >> 8) as u8) + ((l << 4) as u8), lz_off as u8]; + cmpbuf.extend_from_slice(&cmp); + } else if len < 0x11 { + let l = len - 1; + let cmp: [u8; 2] = [((lz_off >> 8) as u8) + ((l << 4) as u8), lz_off as u8]; + cmpbuf.extend_from_slice(&cmp); + } else if len < 0x111 { + let l = len - 0x11; + let cmp: [u8; 3] = + [(l >> 4) as u8, ((lz_off >> 8) as u8) + ((l << 4) as u8), lz_off as u8]; + cmpbuf.extend_from_slice(&cmp); + } else { + let l = len - 0x111; + let cmp: [u8; 4] = [ + (l >> 12) as u8 + 0x10, + (l >> 4) as u8, + ((lz_off >> 8) as u8) + ((l << 4) as u8), + lz_off as u8, + ]; + cmpbuf.extend_from_slice(&cmp); + } + off += len; + } + }; + if index < 0 { + out.write_u8(byte)?; + out.write_all(&cmpbuf)?; + byte = 0; + index = 7; + cmpbuf.clear(); + } + } + if !cmpbuf.is_empty() { + out.write_u8(byte)?; + out.write_all(&cmpbuf)?; + } + out.write_u8(0xFF)?; + + Ok(()) +} + +/// This function is a convenience wrapper around `compress` for compressing to a Vec. +/// Additionally, it uses LZ11 as compression algorithm by default. +pub fn compress_arr(input: &[u8]) -> Result> { + let mut out: Vec = Vec::new(); + { + let mut writer = Cursor::new(&mut out); + compress(input, &mut writer, CompressionLevel::LZ11(65809))?; + } + Ok(out) +} + +fn get_needle_table(needle: &[u8]) -> [usize; 256] { + let mut needle_table = [needle.len(); 256]; + for (i, c) in needle.iter().enumerate() { + needle_table[*c as usize] = needle.len() - i; + } + needle_table +} + +pub fn search_one(haystack: &[u8], needle: &[u8], needle_table: &[usize; 256]) -> Option { + let mut cur = 0; + while haystack.len() - cur >= needle.len() { + let mut output = None; + for i in (0..needle.len()).rev() { + if haystack[cur + i] == needle[i] { + output = Some(cur); + break; + } + } + if output.is_some() { + return output; + } + cur += needle_table[haystack[cur + needle.len() - 1] as usize]; + } + None +} + +fn search(haystack: &[u8], needle: &[u8]) -> Vec { + let needle_table = get_needle_table(needle); + let mut cur = 0usize; + let mut positions = Vec::new(); + while cur + needle.len() < haystack.len() { + let found_pos = search_one(&haystack[cur..], needle, &needle_table); + if let Some(pos) = found_pos { + positions.push(pos); + cur += pos + needle.len() + 1; + } else { + return positions; + } + } + positions +} diff --git a/src/vfs/mod.rs b/src/vfs/mod.rs index 3f8c282..e771141 100644 --- a/src/vfs/mod.rs +++ b/src/vfs/mod.rs @@ -25,6 +25,7 @@ use u8_arc::U8Fs; use crate::util::{ ncompress::{YAY0_MAGIC, YAZ0_MAGIC}, + nlzss, rarc::RARC_MAGIC, u8_arc::U8_MAGIC, }; @@ -339,20 +340,21 @@ pub fn decompress_file( kind: CompressionKind, ) -> io::Result> { let metadata = file.metadata()?; - let data = file.map()?; match kind { CompressionKind::Yay0 => { + let data = file.map()?; let result = orthrus_ncompress::yay0::Yay0::decompress_from(data) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; Ok(Box::new(StaticFile::new(Arc::from(result), metadata.mtime))) } CompressionKind::Yaz0 => { + let data = file.map()?; let result = orthrus_ncompress::yaz0::Yaz0::decompress_from(data) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; Ok(Box::new(StaticFile::new(Arc::from(result), metadata.mtime))) } CompressionKind::Nlzss => { - let result = nintendo_lz::decompress_arr(data) + let result = nlzss::decompress(file) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; Ok(Box::new(StaticFile::new(Arc::from(result.as_slice()), metadata.mtime))) }