diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 79b8f42..c48bc0d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -8,31 +8,21 @@ jobs: strategy: matrix: platform: [ ubuntu-latest, macos-latest, windows-latest ] - toolchain: [ stable, 1.71.0, nightly ] + toolchain: [ stable, 1.81.0, nightly ] features: - - compress,alloc - - compress,decompress - - compress,decompress,std + - compress,alloc + - compress,decompress + - compress,decompress,std fail-fast: false runs-on: ${{ matrix.platform }} steps: - - name: Install LLVM and Clang # required for bindgen to work, see https://github.com/rust-lang/rust-bindgen/issues/1797 - uses: KyleMayes/install-llvm-action@v2.0.8 - if: matrix.platform == 'windows-latest' - with: - version: 21.1.3 - directory: ${{ runner.temp }}/llvm - - name: Set LIBCLANG_PATH - run: echo "LIBCLANG_PATH=$((gcm clang).source -replace "clang.exe")" >> $env:GITHUB_ENV - if: matrix.platform == 'windows-latest' - - uses: actions/checkout@v2 + - name: Checkout + uses: actions/checkout@v2 with: submodules: recursive - - uses: actions-rs/toolchain@v1 + - name: Setup Rust toolchain + uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.toolchain }} - override: true - - uses: actions-rs/cargo@v1 - with: - command: test - args: --release --no-default-features --features ${{ matrix.features }} + - name: Cargo test + run: cargo test --release --no-default-features --features ${{ matrix.features }} diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 3c97ead..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "lzokay"] - path = lzokay - url = https://github.com/jackoalan/lzokay.git diff --git a/Cargo.toml b/Cargo.toml index 136b37f..80671ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "lzokay" -version = "1.0.2" -edition = "2018" +version = "2.0.0" +edition = "2021" license = "MIT" repository = "https://github.com/encounter/lzokay-rs" documentation = "https://docs.rs/lzokay" @@ -11,7 +11,7 @@ A minimal, MIT-licensed implementation of the LZO compression format. """ keywords = ["lzo", "compression", "no_std"] categories = ["compression", "no-std", "api-bindings"] -rust-version = "1.71.0" +rust-version = "1.81.0" [features] alloc = [] @@ -19,7 +19,3 @@ std = ["alloc"] decompress = [] compress = [] default = ["compress", "decompress", "std"] - -[build-dependencies] -bindgen = "0.72.1" -cc = "1.2.41" diff --git a/README.md b/README.md index b0ab8e7..4078f1d 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ [crates.io]: https://crates.io/crates/lzokay [Api Rustdoc]: https://img.shields.io/badge/api-rustdoc-blue.svg [rustdoc]: https://docs.rs/lzokay -[Rust Version]: https://img.shields.io/badge/rust-1.70+-blue.svg?maxAge=3600 +[Rust Version]: https://img.shields.io/badge/rust-1.81+-blue.svg?maxAge=3600 -Rust wrapper for [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed implementation of the +Pure-Rust port of [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed implementation of the [LZO compression format](http://www.oberhumer.com/opensource/lzo/). See the original [README](https://github.com/jackoalan/lzokay/blob/master/README.md) for more information. @@ -21,29 +21,29 @@ See the original [README](https://github.com/jackoalan/lzokay/blob/master/README ### Usage -See the [compress](https://docs.rs/lzokay/latest/lzokay/compress) -or [decompress](https://docs.rs/lzokay/latest/lzokay/decompress) +See the [compress](https://docs.rs/lzokay/latest/lzokay/compress) +or [decompress](https://docs.rs/lzokay/latest/lzokay/decompress) documentation for reference. In `Cargo.toml`: ```toml [dependencies] -lzokay = "1.0.1" +lzokay = "2.0.0" ``` Or, to only enable certain features: ```toml [dependencies.lzokay] -version = "1.0.1" +version = "2.0.0" default-features = false features = ["decompress", "compress"] ``` - `decompress`: Enables decompression functions. - `compress`: Enables compression functions. -- `alloc`: Enables optional compression functions that perform heap allocation. +- `alloc`: Enables optional compression functions that perform heap allocation. Without `std`, this uses `extern crate alloc`. - `std`: Enables use of `std`. Implies `alloc`. diff --git a/build.rs b/build.rs deleted file mode 100644 index e17ac2b..0000000 --- a/build.rs +++ /dev/null @@ -1,39 +0,0 @@ -use std::{env, path::PathBuf}; - -fn main() { - println!("cargo:rerun-if-changed=wrapper.hpp"); - println!("cargo:rerun-if-changed=lzokay/lzokay.cpp"); - println!("cargo:rerun-if-changed=lzokay/lzokay.hpp"); - cc::Build::new() - .cpp(true) - .file("lzokay/lzokay.cpp") - .flag_if_supported("-std=c++14") // GCC/Clang - .flag_if_supported("/std:c++14") // MSVC - .compile("lzokay"); - #[allow(unused_mut)] - let mut bindings = bindgen::Builder::default() - .header("wrapper.hpp") - .clang_arg("-Ilzokay") - .allowlist_function("lzokay::.*") - .size_t_is_usize(true) - .ctypes_prefix("types") - .derive_debug(false) - .clang_arg("-std=c++14") - .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())); - #[cfg(not(feature = "std"))] - { - bindings = bindings.layout_tests(false); - } - if matches!(env::var("CARGO_CFG_TARGET_OS"), Result::Ok(v) if v == "android") { - if let Result::Ok(cc) = env::var("TARGET_CXX") { - let mut sysroot = PathBuf::from(cc).with_file_name("../sysroot"); - sysroot = sysroot.canonicalize().unwrap_or_else(|err| { - panic!("Failed to locate {}: {}", sysroot.to_string_lossy(), err) - }); - bindings = bindings.clang_arg(format!("--sysroot={}", sysroot.to_string_lossy())); - } - } - let result = bindings.generate().expect("Unable to generate bindings"); - let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); - result.write_to_file(out_path.join("bindings.rs")).expect("Couldn't write bindings!"); -} diff --git a/lzokay b/lzokay deleted file mode 160000 index db2df1f..0000000 --- a/lzokay +++ /dev/null @@ -1 +0,0 @@ -Subproject commit db2df1fcbebc2ed06c10f727f72567d40f06a2be diff --git a/src/compress.rs b/src/compress.rs index cc32677..1e85a41 100644 --- a/src/compress.rs +++ b/src/compress.rs @@ -2,7 +2,7 @@ //! //! Available with feature `compress`. //! -//! [`compress`] and [`compress_with_dict`] available with features `std` and/or `alloc`. +//! [`compress`] and [`compress_with_dict`] are available when the `alloc` feature is enabled. //! //! # Examples //! @@ -42,7 +42,7 @@ //! // Allocate dst on stack, with worst-case compression size //! let mut dst = [0u8; compress_worst_size(input.len())]; //! // Allocate dictionary storage on stack -//! let mut storage = [0u8; dict_storage_size()]; +//! let mut storage = DictStorage::new(); //! // Create dictionary from storage //! let mut dict = dict_from_storage(&mut storage); //! let size = compress_no_alloc(&input, &mut dst, &mut dict)?; @@ -50,120 +50,638 @@ //! # Ok::<(), lzokay::Error>(()) //! ``` -#[cfg(all(not(feature = "std"), feature = "alloc"))] +#[cfg(all(feature = "alloc", not(feature = "std")))] extern crate alloc; -#[cfg(all(not(feature = "std"), feature = "alloc"))] -use alloc::{boxed::Box, vec::Vec}; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::{boxed::Box, vec, vec::Vec}; +use core::{cmp, mem::size_of}; +#[cfg(all(feature = "alloc", feature = "std"))] +use std::{boxed::Box, vec, vec::Vec}; + +use crate::Error; + #[cfg(feature = "alloc")] -use core::ptr::null_mut; -use core::{marker::PhantomData, mem::size_of}; - -use crate::{bindings, lzokay_result, Error}; - -type DictStorage = bindings::lzokay_DictBase_storage_type; - -/// Dictionary type -pub struct Dict<'a> { - base: bindings::lzokay_DictBase, - #[cfg(feature = "alloc")] - storage: Option>, - phantom: PhantomData<&'a DictStorage>, +/// Compress `src` into a freshly allocated `Vec` using a temporary dictionary. +pub fn compress(src: &[u8]) -> Result, Error> { + let mut dict = new_dict(); + compress_with_dict(src, &mut dict) } -/// Creates a new heap-allocated dictionary. #[cfg(feature = "alloc")] -pub fn new_dict() -> Dict<'static> { - let mut dict = Dict { - base: bindings::lzokay_DictBase { _storage: null_mut() }, - storage: Option::Some(Box::new([0u8; dict_storage_size()])), - phantom: PhantomData, - }; - dict.base._storage = dict.storage.as_mut().unwrap().as_mut_ptr() as *mut DictStorage; - dict +/// Compress `src`, reusing the provided dictionary across calls. +pub fn compress_with_dict(src: &[u8], dict: &mut Dict) -> Result, Error> { + let capacity = compress_worst_size(src.len()); + let mut buf = vec![0u8; capacity]; + let size = compress_no_alloc(src, &mut buf, dict)?; + buf.truncate(size); + Ok(buf) } -/// Dictionary storage size, for manual or stack allocation. -pub const fn dict_storage_size() -> usize { size_of::() } - -/// Creates a dictionary from the supplied storage. -/// -/// Storage **must** be at least [`dict_storage_size()`] bytes, -/// otherwise this function will panic. -pub fn dict_from_storage(storage: &mut [u8]) -> Dict<'_> { - if storage.len() < dict_storage_size() { - panic!( - "Dictionary storage is not large enough: {}, expected {}", - storage.len(), - dict_storage_size() - ); - } - Dict { - base: bindings::lzokay_DictBase { _storage: storage.as_mut_ptr() as *mut DictStorage }, - #[cfg(feature = "alloc")] - storage: Option::None, - phantom: PhantomData, - } -} - -/// Worst-case compression size. +/// Worst-case compressed size according to the LZO format guarantees. pub const fn compress_worst_size(s: usize) -> usize { s + s / 16 + 64 + 3 } -/// Compress the supplied buffer into a heap-allocated vector. -/// -/// Creates a new dictionary for each invocation. -#[cfg(feature = "alloc")] -pub fn compress(src: &[u8]) -> Result, Error> { compress_with_dict(src, &mut new_dict()) } - -/// Compress the supplied buffer into a heap-allocated vector, -/// with the supplied pre-allocated dictionary. -#[cfg(feature = "alloc")] -pub fn compress_with_dict(src: &[u8], dict: &mut Dict) -> Result, Error> { - let mut out_size = 0usize; - let capacity = compress_worst_size(src.len()); - let mut dst = Vec::with_capacity(capacity); - let result = unsafe { - let result = bindings::lzokay_compress( - src.as_ptr(), - src.len(), - dst.as_mut_ptr(), - capacity, - &mut out_size, - &mut dict.base, - ); - if result == bindings::lzokay_EResult_Success { - dst.set_len(out_size as usize); - } - result - }; - lzokay_result(dst, result) +/// Compress without heap allocations, writing the output into `dst`. +pub fn compress_no_alloc(src: &[u8], dst: &mut [u8], dict: &mut Dict) -> Result { + let storage = dict.storage_mut(); + compress_impl(src, dst, storage) } -/// Compress the supplied buffer. -/// -/// For sizing `dst`, use [`compress_worst_size`]. -pub fn compress_no_alloc(src: &[u8], dst: &mut [u8], dict: &mut Dict) -> Result { - let mut out_size = 0usize; - let result = unsafe { - bindings::lzokay_compress( - src.as_ptr(), - src.len(), - dst.as_mut_ptr(), - dst.len(), - &mut out_size, - &mut dict.base, - ) - }; - lzokay_result(out_size as usize, result) +const HASH_SIZE: usize = 0x4000; +const MAX_DIST: usize = 0xBFFF; +const MAX_MATCH_LEN: usize = 0x800; +const BUF_SIZE: usize = MAX_DIST + MAX_MATCH_LEN; +const MAX_MATCH_TABLE: usize = 34; +const BUF_GUARD: usize = BUF_SIZE + MAX_MATCH_LEN; + +const M1_MAX_OFFSET: u32 = 0x0400; +const M2_MAX_OFFSET: u32 = 0x0800; +const M3_MAX_OFFSET: u32 = 0x4000; +const M4_BASE_OFFSET: u32 = 0x4000; + +const M2_MIN_LEN: u32 = 3; +const M2_MAX_LEN: u32 = 8; +const M3_MAX_LEN: u32 = 33; +const M4_MAX_LEN: u32 = 9; + +const M3_MARKER: u8 = 0x20; +const M4_MARKER: u8 = 0x10; + +/// Hash chains tracking recent 3-byte sequences, keeping per-key chains and +/// remembering the best match length at each node. +#[derive(Clone)] +struct Match3 { + head: [u16; HASH_SIZE], + chain_sz: [u16; HASH_SIZE], + chain: [u16; BUF_SIZE], + best_len: [u16; BUF_SIZE], +} + +impl Match3 { + const fn new() -> Self { + Self { + head: [0; HASH_SIZE], + chain_sz: [0; HASH_SIZE], + chain: [0; BUF_SIZE], + best_len: [0; BUF_SIZE], + } + } + + #[inline] + fn make_key(bytes: &[u8]) -> usize { + let a = bytes[0] as u32; + let b = bytes[1] as u32; + let c = bytes[2] as u32; + let mix = (((a << 5) ^ b).wrapping_shl(5)) ^ c; + let prod = 0x9f5f_u32.wrapping_mul(mix); + ((prod >> 5) & 0x3fff) as usize + } + + #[inline] + fn get_head(&self, key: usize) -> u16 { + if self.chain_sz[key] == 0 { + u16::MAX + } else { + self.head[key] + } + } + + fn init(&mut self) { self.chain_sz.fill(0); } + + fn remove(&mut self, pos: usize, buffer: &[u8; BUF_GUARD]) { + let key = Self::make_key(&buffer[pos..]); + self.chain_sz[key] = self.chain_sz[key].saturating_sub(1); + } + + /// Insert the current position into the hash chains and return the head + /// position alongside the bounded chain length to inspect. + fn advance(&mut self, state: &State, buffer: &[u8; BUF_GUARD]) -> (u16, u32) { + let key = Self::make_key(&buffer[state.wind_b as usize..]); + let head = self.get_head(key); + self.chain[state.wind_b as usize] = head; + let mut count = self.chain_sz[key] as u32; + self.chain_sz[key] = self.chain_sz[key].wrapping_add(1); + if count > MAX_MATCH_LEN as u32 { + count = MAX_MATCH_LEN as u32; + } + self.head[key] = state.wind_b as u16; + (head, count) + } + + /// Fast path for known matches: advance the hash chains without searching. + fn skip_advance(&mut self, state: &State, buffer: &[u8; BUF_GUARD]) { + let key = Self::make_key(&buffer[state.wind_b as usize..]); + self.chain[state.wind_b as usize] = self.get_head(key); + self.head[key] = state.wind_b as u16; + self.best_len[state.wind_b as usize] = (MAX_MATCH_LEN + 1) as u16; + self.chain_sz[key] = self.chain_sz[key].wrapping_add(1); + } +} + +/// Direct lookup table for 2-byte prefixes used to seed matches quickly. +#[derive(Clone)] +struct Match2 { + head: [u16; 1 << 16], +} + +impl Match2 { + const fn new() -> Self { Self { head: [u16::MAX; 1 << 16] } } + + #[inline] + fn make_key(bytes: &[u8]) -> usize { (bytes[0] as usize) ^ ((bytes[1] as usize) << 8) } + + fn init(&mut self) { self.head.fill(u16::MAX); } + + fn add(&mut self, pos: u16, buffer: &[u8; BUF_GUARD]) { + let key = Self::make_key(&buffer[pos as usize..]); + self.head[key] = pos; + } + + fn remove(&mut self, pos: usize, buffer: &[u8; BUF_GUARD]) { + let key = Self::make_key(&buffer[pos..]); + if self.head[key] as usize == pos { + self.head[key] = u16::MAX; + } + } + + /// Try to find a 2-byte prefix match at the current window position. + fn search( + &self, + state: &State, + lb_pos: &mut u32, + lb_len: &mut u32, + best_pos: &mut [u32; MAX_MATCH_TABLE], + buffer: &[u8; BUF_GUARD], + ) -> bool { + let key = Self::make_key(&buffer[state.wind_b as usize..]); + let pos = self.head[key]; + if pos == u16::MAX { + return false; + } + if best_pos[2] == 0 { + best_pos[2] = pos as u32 + 1; + } + if *lb_len < 2 { + *lb_len = 2; + *lb_pos = pos as u32; + } + true + } +} + +/// Concrete storage backing a dictionary instance. Buffers and match tables are +/// stored side by side so the encoder can share logic across heap and stack +/// configurations. +#[derive(Clone)] +pub struct DictStorage { + match3: Match3, + match2: Match2, + buffer: [u8; BUF_GUARD], +} + +impl DictStorage { + pub const fn new() -> Self { + Self { match3: Match3::new(), match2: Match2::new(), buffer: [0; BUF_GUARD] } + } + + /// Initialize dictionary tables and preload the first window from `state.src`. + fn init(&mut self, state: &mut State<'_>) { + self.match3.init(); + self.match2.init(); + + state.cycle1_countdown = MAX_DIST as u32; + state.inp = 0; + state.wind_sz = cmp::min(state.src.len(), MAX_MATCH_LEN) as u32; + state.wind_b = 0; + state.wind_e = state.wind_sz; + if state.wind_sz > 0 { + let len = state.wind_sz as usize; + self.buffer[..len].copy_from_slice(&state.src[..len]); + } + state.inp += state.wind_sz as usize; + + if state.wind_sz < 3 { + let start = state.wind_b as usize + state.wind_sz as usize; + let end = start + (3 - state.wind_sz as usize); + self.buffer[start..end].fill(0); + } + } + + /// Remove stale entries before the sliding window overwrites them. + fn reset_next_input_entry(&mut self, state: &mut State) { + if state.cycle1_countdown == 0 { + let pos = state.wind_e as usize; + self.match3.remove(pos, &self.buffer); + self.match2.remove(pos, &self.buffer); + } else { + state.cycle1_countdown -= 1; + } + } + + /// Advance the dictionary by one position, returning the best match offset + /// and length. When `skip` is true the already-emitted match bytes are + /// fast-forwarded first so the dictionary stays aligned with the encoded + /// output. + fn advance( + &mut self, + state: &mut State, + prev_len: u32, + best_off: &mut [u32; MAX_MATCH_TABLE], + skip: bool, + ) -> (u32, u32) { + if skip { + // Skip phase: advance through already-encoded match bytes while + // keeping the dictionary in sync with the emitted output. + for _ in 0..prev_len.saturating_sub(1) { + self.reset_next_input_entry(state); + self.match3.skip_advance(state, &self.buffer); + self.match2.add(state.wind_b as u16, &self.buffer); + state.get_byte(&mut self.buffer); + } + } + + let mut lb_len = 1u32; + let mut lb_off = 0u32; + let mut lb_pos = 0u32; + let mut best_pos = [0u32; MAX_MATCH_TABLE]; + + let (match_head, mut match_count) = self.match3.advance(state, &self.buffer); + if match_head == u16::MAX { + match_count = 0; + } + + let mut should_terminate = false; + let best_len = lb_len; + + if lb_len >= state.wind_sz { + // Window exhausted: no further matches possible once we reach EOF. + if state.wind_sz == 0 { + should_terminate = true; + } + self.match3.best_len[state.wind_b as usize] = (MAX_MATCH_LEN + 1) as u16; + } else { + if self.match2.search(state, &mut lb_pos, &mut lb_len, &mut best_pos, &self.buffer) + && state.wind_sz >= 3 + { + let mut match_pos = match_head as usize; + for _ in 0..match_count { + if match_pos >= BUF_SIZE { + break; + } + let ref_pos = state.wind_b as usize; + let window = state.wind_sz as usize; + let mut matched = 0usize; + while matched < window + && self.buffer[ref_pos + matched] == self.buffer[match_pos + matched] + { + matched += 1; + } + if matched >= 2 { + if matched < MAX_MATCH_TABLE && best_pos[matched] == 0 { + // Remember first occurrence for potential match length tweaks. + best_pos[matched] = match_pos as u32 + 1; + } + let matched_u32 = matched as u32; + if matched_u32 > lb_len { + lb_len = matched_u32; + lb_pos = match_pos as u32; + if lb_len == state.wind_sz + || lb_len > self.match3.best_len[match_pos] as u32 + { + break; + } + } + } + match_pos = self.match3.chain[match_pos] as usize; + } + } + if lb_len > best_len { + lb_off = state.pos2off(lb_pos); + } + self.match3.best_len[state.wind_b as usize] = lb_len as u16; + for i in 2..MAX_MATCH_TABLE { + best_off[i] = if best_pos[i] != 0 { state.pos2off(best_pos[i] - 1) } else { 0 }; + } + } + + self.reset_next_input_entry(state); + self.match2.add(state.wind_b as u16, &self.buffer); + state.get_byte(&mut self.buffer); + + if should_terminate { + state.buf_sz = 0; + lb_len = 0; + } else { + // Buffer size counts the current byte plus the lookahead window. + state.buf_sz = state.wind_sz + 1; + } + state.bufp = state.inp - state.buf_sz as usize; + + (lb_off, lb_len) + } +} + +/// Sliding window state tracked while searching for matches. +struct State<'a> { + src: &'a [u8], + inp: usize, + wind_sz: u32, + wind_b: u32, + wind_e: u32, + cycle1_countdown: u32, + bufp: usize, + buf_sz: u32, +} + +impl<'a> State<'a> { + /// Create a new window over `src`. + fn new(src: &'a [u8]) -> Self { + Self { + src, + inp: 0, + wind_sz: 0, + wind_b: 0, + wind_e: 0, + cycle1_countdown: 0, + bufp: 0, + buf_sz: 0, + } + } + + /// Advance the window by one byte, copying from `src` and maintaining the + /// duplicated tail used for wrap-around reads. + fn get_byte(&mut self, buffer: &mut [u8; BUF_GUARD]) { + if self.inp >= self.src.len() { + if self.wind_sz > 0 { + self.wind_sz -= 1; + } + let idx = self.wind_e as usize; + buffer[idx] = 0; + if idx < MAX_MATCH_LEN { + buffer[BUF_SIZE + idx] = 0; + } + } else { + let value = self.src[self.inp]; + let idx = self.wind_e as usize; + buffer[idx] = value; + if idx < MAX_MATCH_LEN { + buffer[BUF_SIZE + idx] = value; + } + self.inp += 1; + } + self.wind_e = (self.wind_e + 1) % BUF_SIZE as u32; + self.wind_b = (self.wind_b + 1) % BUF_SIZE as u32; + } + + /// Convert a buffer index into a backwards distance within the window. + #[inline] + fn pos2off(&self, pos: u32) -> u32 { + if self.wind_b > pos { + self.wind_b - pos + } else { + BUF_SIZE as u32 - (pos - self.wind_b) + } + } +} + +/// Internal representation for dictionaries, either borrowed or owned. +enum DictInner<'a> { + Borrowed(&'a mut DictStorage), + #[cfg(feature = "alloc")] + Owned(Box), +} + +/// Compression dictionary used to retain the sliding window between calls. +pub struct Dict<'a> { + inner: DictInner<'a>, +} + +impl<'a> Dict<'a> { + /// Return the mutable storage backing this dictionary, regardless of + /// whether it is owned or borrowed. + fn storage_mut(&mut self) -> &mut DictStorage { + match &mut self.inner { + DictInner::Borrowed(storage) => storage, + #[cfg(feature = "alloc")] + DictInner::Owned(storage) => storage.as_mut(), + } + } +} + +#[cfg(feature = "alloc")] +/// Create a heap-allocated dictionary with the canonical storage layout. +pub fn new_dict() -> Dict<'static> { + Dict { inner: DictInner::Owned(Box::new(DictStorage::new())) } +} + +/// Total number of bytes required to back a dictionary. +pub const fn dict_storage_size() -> usize { size_of::() } + +/// Wrap user-provided storage (e.g. stack-allocated) inside a dictionary. +pub fn dict_from_storage(storage: &mut DictStorage) -> Dict<'_> { + Dict { inner: DictInner::Borrowed(storage) } +} + +/// Emit the repeated zero-byte encoding used for long literal/match lengths. +fn write_zero_byte_length( + dst: &mut [u8], + out_pos: &mut usize, + mut len: usize, +) -> Result<(), Error> { + while len > 255 { + write_dst(dst, out_pos, &[0])?; + len -= 255; + } + write_dst(dst, out_pos, &[len as u8])?; + Ok(()) +} + +/// Emit a literal run following the LZO opcode rules. +fn encode_literal_run( + dst: &mut [u8], + out_pos: &mut usize, + src: &[u8], + lit_ptr: usize, + lit_len: usize, +) -> Result<(), Error> { + if *out_pos == 0 && lit_len <= 238 { + write_dst(dst, out_pos, &[17 + lit_len as u8])?; + } else if lit_len <= 3 { + let idx = out_pos.checked_sub(2).ok_or(Error::OutputOverrun)?; + *dst_byte_mut(dst, idx)? |= lit_len as u8; + } else if lit_len <= 18 { + write_dst(dst, out_pos, &[(lit_len - 3) as u8])?; + } else { + write_dst(dst, out_pos, &[0])?; + write_zero_byte_length(dst, out_pos, lit_len - 18)?; + } + let src_chunk = src.get(lit_ptr..lit_ptr + lit_len).ok_or(Error::InputOverrun)?; + write_dst(dst, out_pos, src_chunk)?; + Ok(()) +} + +/// Emit a back-reference according to the LZOKAY/LZO opcode encoding. +fn encode_lookback_match( + dst: &mut [u8], + out_pos: &mut usize, + lb_len: u32, + mut lb_off: u32, + last_lit_len: u32, +) -> Result<(), Error> { + if lb_len == 2 { + lb_off -= 1; + write_dst(dst, out_pos, &[((lb_off & 0x3) << 2) as u8, (lb_off >> 2) as u8])?; + } else if lb_len <= M2_MAX_LEN && lb_off <= M2_MAX_OFFSET { + lb_off -= 1; + write_dst(dst, out_pos, &[ + (((lb_len - 1) << 5) | ((lb_off & 0x7) << 2)) as u8, + (lb_off >> 3) as u8, + ])?; + } else if lb_len == M2_MIN_LEN && lb_off <= M1_MAX_OFFSET + M2_MAX_OFFSET && last_lit_len >= 4 { + lb_off -= 1 + M2_MAX_OFFSET; + write_dst(dst, out_pos, &[((lb_off & 0x3) << 2) as u8, (lb_off >> 2) as u8])?; + } else if lb_off <= M3_MAX_OFFSET { + lb_off -= 1; + if lb_len <= M3_MAX_LEN { + write_dst(dst, out_pos, &[M3_MARKER | (lb_len as u8 - 2)])?; + } else { + let extra = (lb_len - M3_MAX_LEN) as usize; + write_dst(dst, out_pos, &[M3_MARKER])?; + write_zero_byte_length(dst, out_pos, extra)?; + } + write_dst(dst, out_pos, &[(lb_off << 2) as u8, (lb_off >> 6) as u8])?; + } else { + lb_off -= M4_BASE_OFFSET; + if lb_len <= M4_MAX_LEN { + write_dst(dst, out_pos, &[M4_MARKER + | (((lb_off & 0x4000) >> 11) as u8) + | (lb_len as u8 - 2)])?; + } else { + let extra = (lb_len - M4_MAX_LEN) as usize; + write_dst(dst, out_pos, &[M4_MARKER | (((lb_off & 0x4000) >> 11) as u8)])?; + write_zero_byte_length(dst, out_pos, extra)?; + } + write_dst(dst, out_pos, &[(lb_off << 2) as u8, (lb_off >> 6) as u8])?; + } + Ok(()) +} + +/// Apply the heuristics that prefer cheaper opcodes when a shorter match can be +/// emitted at a closer distance. +fn find_better_match(best_off: &[u32; MAX_MATCH_TABLE], lb_len: &mut u32, lb_off: &mut u32) { + let len = *lb_len; + let off = *lb_off; + if len <= M2_MIN_LEN || off <= M2_MAX_OFFSET { + return; + } + // Prefer re-encoding long matches as cheaper opcodes whenever the distance + // permits switching to a shorter back-reference class. + if off > M2_MAX_OFFSET + && len >= M2_MIN_LEN + 1 + && len <= M2_MAX_LEN + 1 + && best_off[len as usize - 1] != 0 + && best_off[len as usize - 1] <= M2_MAX_OFFSET + { + *lb_len = len - 1; + *lb_off = best_off[len as usize - 1]; + } else if off > M3_MAX_OFFSET + && len >= M4_MAX_LEN + 1 + && len <= M2_MAX_LEN + 2 + && best_off[len as usize - 2] != 0 + && best_off[len as usize] <= M2_MAX_OFFSET + { + *lb_len = len - 2; + *lb_off = best_off[len as usize - 2]; + } else if off > M3_MAX_OFFSET + && len >= M4_MAX_LEN + 1 + && len <= M3_MAX_LEN + 1 + && best_off[len as usize - 1] != 0 + && best_off[len as usize - 2] <= M3_MAX_OFFSET + { + *lb_len = len - 1; + *lb_off = best_off[len as usize - 1]; + } +} + +/// Core compression routine shared by the heap-allocating and stack variants. +/// Maintains the window management and opcode selection heuristics required by +/// the LZO format while using safe Rust semantics. +fn compress_impl(src: &[u8], dst: &mut [u8], storage: &mut DictStorage) -> Result { + let mut state = State::new(src); + storage.init(&mut state); + + let mut out_pos = 0usize; + let mut lit_len = 0u32; + let mut best_off = [0u32; MAX_MATCH_TABLE]; + + let mut lit_ptr = state.inp; + let (mut lb_off, mut lb_len) = storage.advance(&mut state, 0, &mut best_off, false); + + while state.buf_sz > 0 { + if lit_len == 0 { + // Capture the starting point for the next literal run. + lit_ptr = state.bufp; + } + + if lb_len < 2 + || (lb_len == 2 && (lb_off > M1_MAX_OFFSET || lit_len == 0 || lit_len >= 4)) + || (lb_len == 2 && out_pos == 0) + || (out_pos == 0 && lit_len == 0) + { + lb_len = 0; + } else if lb_len == M2_MIN_LEN && lb_off > M1_MAX_OFFSET + M2_MAX_OFFSET && lit_len >= 4 { + lb_len = 0; + } + + if lb_len == 0 { + lit_len += 1; + // No match chosen: step forward by one literal byte. + let (next_off, next_len) = storage.advance(&mut state, 0, &mut best_off, false); + lb_off = next_off; + lb_len = next_len; + continue; + } + + find_better_match(&best_off, &mut lb_len, &mut lb_off); + encode_literal_run(dst, &mut out_pos, src, lit_ptr, lit_len as usize)?; + encode_lookback_match(dst, &mut out_pos, lb_len, lb_off, lit_len)?; + let prev_len = lb_len; + lit_len = 0; + // Advance over the matched bytes, updating the search structures. + let (next_off, next_len) = storage.advance(&mut state, prev_len, &mut best_off, true); + lb_off = next_off; + lb_len = next_len; + } + + // Flush any trailing literal bytes. + encode_literal_run(dst, &mut out_pos, src, lit_ptr, lit_len as usize)?; + + // Emit terminating M4 instruction (distance 0x4000, length 3). + write_dst(dst, &mut out_pos, &[M4_MARKER | 1, 0, 0])?; + + Ok(out_pos) +} + +#[inline(always)] +fn dst_byte_mut<'a>(dst: &'a mut [u8], idx: usize) -> Result<&'a mut u8, Error> { + dst.get_mut(idx).ok_or(Error::OutputOverrun) +} + +#[inline(always)] +fn write_dst(dst: &mut [u8], out_pos: &mut usize, slice: &[u8]) -> Result<(), Error> { + let pos = *out_pos; + let end = pos.checked_add(slice.len()).ok_or(Error::OutputOverrun)?; + let dst_chunk = dst.get_mut(pos..end).ok_or(Error::OutputOverrun)?; + dst_chunk.copy_from_slice(slice); + *out_pos = end; + Ok(()) } #[cfg(test)] mod tests { #[cfg(feature = "alloc")] - use crate::compress::{compress, compress_with_dict, new_dict}; - use crate::compress::{ - compress_no_alloc, compress_worst_size, dict_from_storage, dict_storage_size, - }; + use super::{compress, compress_with_dict, new_dict}; + use super::{compress_no_alloc, compress_worst_size, dict_from_storage, DictStorage}; const INPUT_1: &[u8] = include_bytes!("test1.txt"); const EXPECTED_1: &[u8] = include_bytes!("test1.bin"); @@ -183,7 +701,6 @@ mod tests { let mut dict = new_dict(); let dst = compress_with_dict(INPUT_1, &mut dict).expect("Failed to compress (1)"); assert_eq!(dst, EXPECTED_1); - // Compress a second time to test dictionary reuse let dst = compress_with_dict(INPUT_2, &mut dict).expect("Failed to compress (2)"); assert_eq!(dst, EXPECTED_2); } @@ -191,12 +708,11 @@ mod tests { #[test] fn test_compress_no_alloc() { let mut dst = [0u8; compress_worst_size(INPUT_1.len())]; - let mut storage = [0u8; dict_storage_size()]; + let mut storage = DictStorage::new(); let mut dict = dict_from_storage(&mut storage); let out_size = compress_no_alloc(INPUT_1, &mut dst, &mut dict).expect("Failed to compress (1)"); assert_eq!(&dst[0..out_size], EXPECTED_1); - // Compress a second time to test dictionary reuse let out_size = compress_no_alloc(INPUT_2, &mut dst, &mut dict).expect("Failed to compress (2)"); assert_eq!(&dst[0..out_size], EXPECTED_2); diff --git a/src/decompress.rs b/src/decompress.rs index cf1df19..12eda40 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -16,23 +16,238 @@ //! # Ok::<(), lzokay::Error>(()) //! ``` -use crate::{bindings, lzokay_result, Error}; +use crate::Error; + +/// Maximum repeat count representable via zero marker bytes when extending +/// literal or match lengths. +const MAX255_COUNT: usize = usize::MAX / 255 - 2; +/// Opcode marker for mid-range matches (labelled "M3" in the LZO reference). +const M3_MARKER: u8 = 0x20; +/// Opcode marker for far matches ("M4") and the terminator instruction. +const M4_MARKER: u8 = 0x10; /// Decompress `src` into `dst`. /// -/// `dst` must be large enough to hold the entire decompressed output. +/// `dst` must be large enough to hold the entire decompressed output. The +/// function follows the documented LZO opcode semantics and state transitions. pub fn decompress(src: &[u8], dst: &mut [u8]) -> Result { - let mut out_size = 0usize; - let result = unsafe { - bindings::lzokay_decompress( - src.as_ptr(), - src.len(), - dst.as_mut_ptr(), - dst.len(), - &mut out_size, - ) - }; - lzokay_result(out_size as usize, result) + if src.len() < 3 { + return Err(Error::InputOverrun); + } + + let mut inp = 0usize; + let mut outp = 0usize; + let mut state = 0usize; + let mut nstate: usize; + let mut lblen: usize; + let mut lbcur: usize; + + let first = input_byte(src, &mut inp)?; + // The LZO bitstream reserves the first byte for literal priming. Codes >= 22 + // copy a literal block immediately; 18..21 seed the literal countdown (`state`). + if first >= 22 { + let len = (first as usize) - 17; + copy_slice(src, &mut inp, dst, &mut outp, len)?; + state = 4; + } else if first >= 18 { + nstate = (first as usize) - 17; + state = nstate; + copy_slice(src, &mut inp, dst, &mut outp, nstate)?; + } + + loop { + let inst = input_byte(src, &mut inp)?; + if inst & 0xC0 != 0 { + // [M2] + // 1 L L D D D S S (128..255) + // Copy 5-8 bytes from block within 2kB distance + // state = S + // length = 5 + L + // 0 1 L D D D S S (64..127) + // Copy 3-4 bytes from block within 2kB distance + // length = 3 + L + // Always followed by one byte: distance = (next << 3) + D + 1 + let next = input_byte(src, &mut inp)?; + let distance = ((next as usize) << 3) + (((inst as usize) >> 2) & 0x7) + 1; + lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?; + lblen = ((inst as usize) >> 5) + 1; + nstate = (inst as usize) & 0x3; + } else if inst & M3_MARKER != 0 { + // [M3] + // 0 0 1 L L L L L (32..63) + // Copy from <= 16kB distance + // length = 2 + (L ?: 31 + zero-runs + tail) + // Followed by LE16: distance = (value >> 2) + 1, state = value & 3 + lblen = ((inst as usize) & 0x1F) + 2; + if lblen == 2 { + let offset = consume_zero_byte_length(src, &mut inp)?; + let tail = input_byte(src, &mut inp)?; + lblen += offset * 255 + 31 + tail as usize; + } + let raw = read_le16(src, &mut inp)?; + let distance = ((raw as usize) >> 2) + 1; + lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?; + nstate = (raw as usize) & 0x3; + } else if inst & M4_MARKER != 0 { + // [M4] + // 0 0 0 1 H L L L (16..31) + // Copy from 16..48kB distance + // length = 2 + (L ?: 7 + zero-runs + tail) + // Followed by LE16: distance = 16384 + (H << 14) + value, state = value & 3 + // Terminating opcode when distance == 16384. + lblen = ((inst as usize) & 0x7) + 2; + if lblen == 2 { + let offset = consume_zero_byte_length(src, &mut inp)?; + let tail = input_byte(src, &mut inp)?; + lblen += offset * 255 + 7 + tail as usize; + } + let raw = read_le16(src, &mut inp)?; + let base_dist = ((inst as usize & 0x8) << 11) + ((raw as usize) >> 2); + if base_dist == 0 { + // Stream finished + break; + } + let distance = base_dist + 16384; + lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?; + nstate = (raw as usize) & 0x3; + } else { + if state == 0 { + // [Literal] + // 0 0 0 0 L L L L (0..15) + // Copy long literal string: length = 3 + extended length bytes. + let mut len = inst as usize + 3; + if len == 3 { + let offset = consume_zero_byte_length(src, &mut inp)?; + let tail = input_byte(src, &mut inp)?; + len += offset * 255 + 15 + tail as usize; + } + copy_slice(src, &mut inp, dst, &mut outp, len)?; + state = 4; + continue; + } else if state != 4 { + // [M1, short] + // state = 1..3 + // 0 0 0 0 D D S S (0..15) + // Copy 2 bytes within 1kB distance, state = S afterwards. + let tail = input_byte(src, &mut inp)?; + let distance = ((inst as usize) >> 2) + ((tail as usize) << 2) + 1; + lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?; + lblen = 2; + nstate = (inst as usize) & 0x3; + } else { + // [M1, long] + // state == 4 + // 0 0 0 0 D D S S (0..15) + // Copy 3 bytes within 2..3kB distance, state = S afterwards. + let tail = input_byte(src, &mut inp)?; + let distance = ((inst as usize) >> 2) + ((tail as usize) << 2) + 2049; + lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?; + lblen = 3; + nstate = (inst as usize) & 0x3; + } + } + + // Copy the lookback run (source and destination may overlap). + if lblen > 0 { + let out_end = outp.checked_add(lblen).ok_or(Error::OutputOverrun)?; + let lb_end = lbcur.checked_add(lblen).ok_or(Error::OutputOverrun)?; + if out_end > dst.len() || lb_end > dst.len() { + return Err(Error::OutputOverrun); + } + for i in 0..lblen { + dst[outp + i] = dst[lbcur + i]; + } + outp = out_end; + } + + // Copy the following literal run dictated by `nstate`. + copy_slice(src, &mut inp, dst, &mut outp, nstate)?; + + state = nstate; + } + + // The stream must end with the terminating M4 instruction (length == 3). + if lblen != 3 { + return Err(Error::Error); + } + + if inp == src.len() { + Ok(outp) + } else if inp < src.len() { + Err(Error::InputNotConsumed) + } else { + Err(Error::InputOverrun) + } +} + +/// Read a single byte from `src`. +#[inline(always)] +fn input_byte(src: &[u8], idx: &mut usize) -> Result { + let n = src.get(*idx).copied().ok_or(Error::InputOverrun)?; + *idx += 1; + Ok(n) +} + +/// Read a slice of length `len` starting at `start` from `src`. +#[inline(always)] +fn input_slice<'a>(src: &'a [u8], start: &mut usize, len: usize) -> Result<&'a [u8], Error> { + let end = start.checked_add(len).ok_or(Error::InputOverrun)?; + let slice = src.get(*start..end).ok_or(Error::InputOverrun)?; + *start = end; + Ok(slice) +} + +/// Read a little-endian `u16` starting at `pos`. +#[inline(always)] +fn read_le16(bytes: &[u8], pos: &mut usize) -> Result { + let slice = input_slice(bytes, pos, 2)?; + Ok(u16::from_le_bytes(slice.try_into().unwrap())) +} + +/// Get a mutable slice of length `len` starting at `start` from `dst`. +#[inline(always)] +fn output_slice<'a>( + dst: &'a mut [u8], + start: &mut usize, + len: usize, +) -> Result<&'a mut [u8], Error> { + let end = start.checked_add(len).ok_or(Error::OutputOverrun)?; + let slice = dst.get_mut(*start..end).ok_or(Error::OutputOverrun)?; + *start = end; + Ok(slice) +} + +/// Copy a slice from `src` to `dst`. +#[inline(always)] +fn copy_slice( + src: &[u8], + src_start: &mut usize, + dst: &mut [u8], + dst_start: &mut usize, + len: usize, +) -> Result<(), Error> { + if len == 0 { + return Ok(()); + } + let src_slice = input_slice(src, src_start, len)?; + let dst_slice = output_slice(dst, dst_start, len)?; + dst_slice.copy_from_slice(src_slice); + Ok(()) +} + +/// Consume a run of zero marker bytes used for long length encodings. +#[inline(always)] +fn consume_zero_byte_length(src: &[u8], inp: &mut usize) -> Result { + let start = *inp; + while src.get(*inp).copied() == Some(0) { + *inp += 1; + } + let offset = *inp - start; + if offset > MAX255_COUNT { + Err(Error::Error) + } else { + Ok(offset) + } } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index b28849d..ad00300 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ #![cfg_attr(not(feature = "std"), no_std)] //! # LZ👌-rs //! -//! Rust wrapper for [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed +//! Pure-Rust port of [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed //! implementation of the [LZO compression format](http://www.oberhumer.com/opensource/lzo/). //! //! See the original [README](https://github.com/jackoalan/lzokay/blob/master/README.md) for more information. @@ -20,14 +20,14 @@ //! //! ```toml //! [dependencies] -//! lzokay = "1.0.1" +//! lzokay = "2.0.0" //! ``` //! //! Or, to only enable certain features: //! //! ```toml //! [dependencies.lzokay] -//! version = "1.0.1" +//! version = "2.0.0" //! default-features = false //! features = ["decompress", "compress"] //! ``` @@ -49,36 +49,8 @@ pub mod compress; #[cfg(feature = "decompress")] pub mod decompress; -mod bindings { - #![allow(unknown_lints)] - #![allow(non_upper_case_globals)] - #![allow(non_camel_case_types)] - #![allow(non_snake_case)] - #![allow(deref_nullptr)] - #![allow(dead_code)] - #[cfg(not(feature = "std"))] - mod types { - pub type c_uchar = u8; - pub type c_ushort = u16; - pub type c_uint = u32; - pub type c_int = i32; - pub type c_ulong = usize; - pub type c_ulonglong = usize; - } - #[cfg(feature = "std")] - mod types { - pub type c_uchar = ::std::os::raw::c_uchar; - pub type c_ushort = ::std::os::raw::c_ushort; - pub type c_uint = ::std::os::raw::c_uint; - pub type c_int = ::std::os::raw::c_int; - pub type c_ulong = usize; - pub type c_ulonglong = usize; - } - include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -} - /// Error result codes -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Error { /// Likely indicates bad compressed LZO input. LookbehindOverrun, @@ -92,20 +64,20 @@ pub enum Error { InputNotConsumed, } -fn lzokay_result(result: T, error: bindings::lzokay_EResult) -> Result { - if error == bindings::lzokay_EResult_Success { - Result::Ok(result) - } else { - Result::Err(match error { - bindings::lzokay_EResult_LookbehindOverrun => Error::LookbehindOverrun, - bindings::lzokay_EResult_OutputOverrun => Error::OutputOverrun, - bindings::lzokay_EResult_InputOverrun => Error::InputOverrun, - bindings::lzokay_EResult_InputNotConsumed => Error::InputNotConsumed, - _ => Error::Error, - }) +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self { + Error::LookbehindOverrun => write!(f, "lookbehind overrun"), + Error::OutputOverrun => write!(f, "output overrun"), + Error::InputOverrun => write!(f, "input overrun"), + Error::Error => write!(f, "unknown error"), + Error::InputNotConsumed => write!(f, "input not consumed"), + } } } +impl core::error::Error for Error {} + #[cfg(test)] #[cfg(all(feature = "compress", feature = "decompress", feature = "alloc"))] mod tests { @@ -117,13 +89,22 @@ mod tests { use super::{compress::compress, decompress::decompress}; - const INPUT: &[u8] = include_bytes!("test1.txt"); + const INPUT1: &[u8] = include_bytes!("test1.txt"); + const INPUT2: &[u8] = include_bytes!("test2.txt"); #[test] - fn test_round_trip() { - let compressed = compress(INPUT).expect("Failed to compress"); - let mut dst = vec![0u8; INPUT.len()]; + fn test_round_trip1() { + let compressed = compress(INPUT1).expect("Failed to compress"); + let mut dst = vec![0u8; INPUT1.len()]; decompress(&compressed, &mut dst).expect("Failed to decompress"); - assert_eq!(INPUT, dst.as_slice()); + assert_eq!(INPUT1, dst.as_slice()); + } + + #[test] + fn test_round_trip2() { + let compressed = compress(INPUT2).expect("Failed to compress"); + let mut dst = vec![0u8; INPUT2.len()]; + decompress(&compressed, &mut dst).expect("Failed to decompress"); + assert_eq!(INPUT2, dst.as_slice()); } } diff --git a/wrapper.hpp b/wrapper.hpp deleted file mode 100644 index b09f349..0000000 --- a/wrapper.hpp +++ /dev/null @@ -1 +0,0 @@ -#include