mirror of
https://github.com/encounter/lzokay-rs.git
synced 2025-10-17 14:45:10 +00:00
Version v2.0.0: Native Rust port
This commit is contained in:
parent
e81a6d439b
commit
9dcfecc8b2
30
.github/workflows/build.yaml
vendored
30
.github/workflows/build.yaml
vendored
@ -8,31 +8,21 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
platform: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
toolchain: [ stable, 1.71.0, nightly ]
|
||||
toolchain: [ stable, 1.81.0, nightly ]
|
||||
features:
|
||||
- compress,alloc
|
||||
- compress,decompress
|
||||
- compress,decompress,std
|
||||
- compress,alloc
|
||||
- compress,decompress
|
||||
- compress,decompress,std
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.platform }}
|
||||
steps:
|
||||
- name: Install LLVM and Clang # required for bindgen to work, see https://github.com/rust-lang/rust-bindgen/issues/1797
|
||||
uses: KyleMayes/install-llvm-action@v2.0.8
|
||||
if: matrix.platform == 'windows-latest'
|
||||
with:
|
||||
version: 21.1.3
|
||||
directory: ${{ runner.temp }}/llvm
|
||||
- name: Set LIBCLANG_PATH
|
||||
run: echo "LIBCLANG_PATH=$((gcm clang).source -replace "clang.exe")" >> $env:GITHUB_ENV
|
||||
if: matrix.platform == 'windows-latest'
|
||||
- uses: actions/checkout@v2
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- name: Setup Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ matrix.toolchain }}
|
||||
override: true
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --release --no-default-features --features ${{ matrix.features }}
|
||||
- name: Cargo test
|
||||
run: cargo test --release --no-default-features --features ${{ matrix.features }}
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +0,0 @@
|
||||
[submodule "lzokay"]
|
||||
path = lzokay
|
||||
url = https://github.com/jackoalan/lzokay.git
|
10
Cargo.toml
10
Cargo.toml
@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lzokay"
|
||||
version = "1.0.2"
|
||||
edition = "2018"
|
||||
version = "2.0.0"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
repository = "https://github.com/encounter/lzokay-rs"
|
||||
documentation = "https://docs.rs/lzokay"
|
||||
@ -11,7 +11,7 @@ A minimal, MIT-licensed implementation of the LZO compression format.
|
||||
"""
|
||||
keywords = ["lzo", "compression", "no_std"]
|
||||
categories = ["compression", "no-std", "api-bindings"]
|
||||
rust-version = "1.71.0"
|
||||
rust-version = "1.81.0"
|
||||
|
||||
[features]
|
||||
alloc = []
|
||||
@ -19,7 +19,3 @@ std = ["alloc"]
|
||||
decompress = []
|
||||
compress = []
|
||||
default = ["compress", "decompress", "std"]
|
||||
|
||||
[build-dependencies]
|
||||
bindgen = "0.72.1"
|
||||
cc = "1.2.41"
|
||||
|
14
README.md
14
README.md
@ -6,9 +6,9 @@
|
||||
[crates.io]: https://crates.io/crates/lzokay
|
||||
[Api Rustdoc]: https://img.shields.io/badge/api-rustdoc-blue.svg
|
||||
[rustdoc]: https://docs.rs/lzokay
|
||||
[Rust Version]: https://img.shields.io/badge/rust-1.70+-blue.svg?maxAge=3600
|
||||
[Rust Version]: https://img.shields.io/badge/rust-1.81+-blue.svg?maxAge=3600
|
||||
|
||||
Rust wrapper for [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed implementation of the
|
||||
Pure-Rust port of [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed implementation of the
|
||||
[LZO compression format](http://www.oberhumer.com/opensource/lzo/).
|
||||
|
||||
See the original [README](https://github.com/jackoalan/lzokay/blob/master/README.md) for more information.
|
||||
@ -21,29 +21,29 @@ See the original [README](https://github.com/jackoalan/lzokay/blob/master/README
|
||||
|
||||
### Usage
|
||||
|
||||
See the [compress](https://docs.rs/lzokay/latest/lzokay/compress)
|
||||
or [decompress](https://docs.rs/lzokay/latest/lzokay/decompress)
|
||||
See the [compress](https://docs.rs/lzokay/latest/lzokay/compress)
|
||||
or [decompress](https://docs.rs/lzokay/latest/lzokay/decompress)
|
||||
documentation for reference.
|
||||
|
||||
In `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
lzokay = "1.0.1"
|
||||
lzokay = "2.0.0"
|
||||
```
|
||||
|
||||
Or, to only enable certain features:
|
||||
|
||||
```toml
|
||||
[dependencies.lzokay]
|
||||
version = "1.0.1"
|
||||
version = "2.0.0"
|
||||
default-features = false
|
||||
features = ["decompress", "compress"]
|
||||
```
|
||||
|
||||
- `decompress`: Enables decompression functions.
|
||||
- `compress`: Enables compression functions.
|
||||
- `alloc`: Enables optional compression functions that perform heap allocation.
|
||||
- `alloc`: Enables optional compression functions that perform heap allocation.
|
||||
Without `std`, this uses `extern crate alloc`.
|
||||
- `std`: Enables use of `std`. Implies `alloc`.
|
||||
|
||||
|
39
build.rs
39
build.rs
@ -1,39 +0,0 @@
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=wrapper.hpp");
|
||||
println!("cargo:rerun-if-changed=lzokay/lzokay.cpp");
|
||||
println!("cargo:rerun-if-changed=lzokay/lzokay.hpp");
|
||||
cc::Build::new()
|
||||
.cpp(true)
|
||||
.file("lzokay/lzokay.cpp")
|
||||
.flag_if_supported("-std=c++14") // GCC/Clang
|
||||
.flag_if_supported("/std:c++14") // MSVC
|
||||
.compile("lzokay");
|
||||
#[allow(unused_mut)]
|
||||
let mut bindings = bindgen::Builder::default()
|
||||
.header("wrapper.hpp")
|
||||
.clang_arg("-Ilzokay")
|
||||
.allowlist_function("lzokay::.*")
|
||||
.size_t_is_usize(true)
|
||||
.ctypes_prefix("types")
|
||||
.derive_debug(false)
|
||||
.clang_arg("-std=c++14")
|
||||
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()));
|
||||
#[cfg(not(feature = "std"))]
|
||||
{
|
||||
bindings = bindings.layout_tests(false);
|
||||
}
|
||||
if matches!(env::var("CARGO_CFG_TARGET_OS"), Result::Ok(v) if v == "android") {
|
||||
if let Result::Ok(cc) = env::var("TARGET_CXX") {
|
||||
let mut sysroot = PathBuf::from(cc).with_file_name("../sysroot");
|
||||
sysroot = sysroot.canonicalize().unwrap_or_else(|err| {
|
||||
panic!("Failed to locate {}: {}", sysroot.to_string_lossy(), err)
|
||||
});
|
||||
bindings = bindings.clang_arg(format!("--sysroot={}", sysroot.to_string_lossy()));
|
||||
}
|
||||
}
|
||||
let result = bindings.generate().expect("Unable to generate bindings");
|
||||
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
|
||||
result.write_to_file(out_path.join("bindings.rs")).expect("Couldn't write bindings!");
|
||||
}
|
1
lzokay
1
lzokay
@ -1 +0,0 @@
|
||||
Subproject commit db2df1fcbebc2ed06c10f727f72567d40f06a2be
|
720
src/compress.rs
720
src/compress.rs
@ -2,7 +2,7 @@
|
||||
//!
|
||||
//! Available with feature `compress`.
|
||||
//!
|
||||
//! [`compress`] and [`compress_with_dict`] available with features `std` and/or `alloc`.
|
||||
//! [`compress`] and [`compress_with_dict`] are available when the `alloc` feature is enabled.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
@ -42,7 +42,7 @@
|
||||
//! // Allocate dst on stack, with worst-case compression size
|
||||
//! let mut dst = [0u8; compress_worst_size(input.len())];
|
||||
//! // Allocate dictionary storage on stack
|
||||
//! let mut storage = [0u8; dict_storage_size()];
|
||||
//! let mut storage = DictStorage::new();
|
||||
//! // Create dictionary from storage
|
||||
//! let mut dict = dict_from_storage(&mut storage);
|
||||
//! let size = compress_no_alloc(&input, &mut dst, &mut dict)?;
|
||||
@ -50,120 +50,638 @@
|
||||
//! # Ok::<(), lzokay::Error>(())
|
||||
//! ```
|
||||
|
||||
#[cfg(all(not(feature = "std"), feature = "alloc"))]
|
||||
#[cfg(all(feature = "alloc", not(feature = "std")))]
|
||||
extern crate alloc;
|
||||
|
||||
#[cfg(all(not(feature = "std"), feature = "alloc"))]
|
||||
use alloc::{boxed::Box, vec::Vec};
|
||||
#[cfg(all(feature = "alloc", not(feature = "std")))]
|
||||
use alloc::{boxed::Box, vec, vec::Vec};
|
||||
use core::{cmp, mem::size_of};
|
||||
#[cfg(all(feature = "alloc", feature = "std"))]
|
||||
use std::{boxed::Box, vec, vec::Vec};
|
||||
|
||||
use crate::Error;
|
||||
|
||||
#[cfg(feature = "alloc")]
|
||||
use core::ptr::null_mut;
|
||||
use core::{marker::PhantomData, mem::size_of};
|
||||
|
||||
use crate::{bindings, lzokay_result, Error};
|
||||
|
||||
type DictStorage = bindings::lzokay_DictBase_storage_type;
|
||||
|
||||
/// Dictionary type
|
||||
pub struct Dict<'a> {
|
||||
base: bindings::lzokay_DictBase,
|
||||
#[cfg(feature = "alloc")]
|
||||
storage: Option<Box<[u8; dict_storage_size()]>>,
|
||||
phantom: PhantomData<&'a DictStorage>,
|
||||
/// Compress `src` into a freshly allocated `Vec<u8>` using a temporary dictionary.
|
||||
pub fn compress(src: &[u8]) -> Result<Vec<u8>, Error> {
|
||||
let mut dict = new_dict();
|
||||
compress_with_dict(src, &mut dict)
|
||||
}
|
||||
|
||||
/// Creates a new heap-allocated dictionary.
|
||||
#[cfg(feature = "alloc")]
|
||||
pub fn new_dict() -> Dict<'static> {
|
||||
let mut dict = Dict {
|
||||
base: bindings::lzokay_DictBase { _storage: null_mut() },
|
||||
storage: Option::Some(Box::new([0u8; dict_storage_size()])),
|
||||
phantom: PhantomData,
|
||||
};
|
||||
dict.base._storage = dict.storage.as_mut().unwrap().as_mut_ptr() as *mut DictStorage;
|
||||
dict
|
||||
/// Compress `src`, reusing the provided dictionary across calls.
|
||||
pub fn compress_with_dict(src: &[u8], dict: &mut Dict) -> Result<Vec<u8>, Error> {
|
||||
let capacity = compress_worst_size(src.len());
|
||||
let mut buf = vec![0u8; capacity];
|
||||
let size = compress_no_alloc(src, &mut buf, dict)?;
|
||||
buf.truncate(size);
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Dictionary storage size, for manual or stack allocation.
|
||||
pub const fn dict_storage_size() -> usize { size_of::<DictStorage>() }
|
||||
|
||||
/// Creates a dictionary from the supplied storage.
|
||||
///
|
||||
/// Storage **must** be at least [`dict_storage_size()`] bytes,
|
||||
/// otherwise this function will panic.
|
||||
pub fn dict_from_storage(storage: &mut [u8]) -> Dict<'_> {
|
||||
if storage.len() < dict_storage_size() {
|
||||
panic!(
|
||||
"Dictionary storage is not large enough: {}, expected {}",
|
||||
storage.len(),
|
||||
dict_storage_size()
|
||||
);
|
||||
}
|
||||
Dict {
|
||||
base: bindings::lzokay_DictBase { _storage: storage.as_mut_ptr() as *mut DictStorage },
|
||||
#[cfg(feature = "alloc")]
|
||||
storage: Option::None,
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Worst-case compression size.
|
||||
/// Worst-case compressed size according to the LZO format guarantees.
|
||||
pub const fn compress_worst_size(s: usize) -> usize { s + s / 16 + 64 + 3 }
|
||||
|
||||
/// Compress the supplied buffer into a heap-allocated vector.
|
||||
///
|
||||
/// Creates a new dictionary for each invocation.
|
||||
#[cfg(feature = "alloc")]
|
||||
pub fn compress(src: &[u8]) -> Result<Vec<u8>, Error> { compress_with_dict(src, &mut new_dict()) }
|
||||
|
||||
/// Compress the supplied buffer into a heap-allocated vector,
|
||||
/// with the supplied pre-allocated dictionary.
|
||||
#[cfg(feature = "alloc")]
|
||||
pub fn compress_with_dict(src: &[u8], dict: &mut Dict) -> Result<Vec<u8>, Error> {
|
||||
let mut out_size = 0usize;
|
||||
let capacity = compress_worst_size(src.len());
|
||||
let mut dst = Vec::with_capacity(capacity);
|
||||
let result = unsafe {
|
||||
let result = bindings::lzokay_compress(
|
||||
src.as_ptr(),
|
||||
src.len(),
|
||||
dst.as_mut_ptr(),
|
||||
capacity,
|
||||
&mut out_size,
|
||||
&mut dict.base,
|
||||
);
|
||||
if result == bindings::lzokay_EResult_Success {
|
||||
dst.set_len(out_size as usize);
|
||||
}
|
||||
result
|
||||
};
|
||||
lzokay_result(dst, result)
|
||||
/// Compress without heap allocations, writing the output into `dst`.
|
||||
pub fn compress_no_alloc(src: &[u8], dst: &mut [u8], dict: &mut Dict) -> Result<usize, Error> {
|
||||
let storage = dict.storage_mut();
|
||||
compress_impl(src, dst, storage)
|
||||
}
|
||||
|
||||
/// Compress the supplied buffer.
|
||||
///
|
||||
/// For sizing `dst`, use [`compress_worst_size`].
|
||||
pub fn compress_no_alloc(src: &[u8], dst: &mut [u8], dict: &mut Dict) -> Result<usize, Error> {
|
||||
let mut out_size = 0usize;
|
||||
let result = unsafe {
|
||||
bindings::lzokay_compress(
|
||||
src.as_ptr(),
|
||||
src.len(),
|
||||
dst.as_mut_ptr(),
|
||||
dst.len(),
|
||||
&mut out_size,
|
||||
&mut dict.base,
|
||||
)
|
||||
};
|
||||
lzokay_result(out_size as usize, result)
|
||||
const HASH_SIZE: usize = 0x4000;
|
||||
const MAX_DIST: usize = 0xBFFF;
|
||||
const MAX_MATCH_LEN: usize = 0x800;
|
||||
const BUF_SIZE: usize = MAX_DIST + MAX_MATCH_LEN;
|
||||
const MAX_MATCH_TABLE: usize = 34;
|
||||
const BUF_GUARD: usize = BUF_SIZE + MAX_MATCH_LEN;
|
||||
|
||||
const M1_MAX_OFFSET: u32 = 0x0400;
|
||||
const M2_MAX_OFFSET: u32 = 0x0800;
|
||||
const M3_MAX_OFFSET: u32 = 0x4000;
|
||||
const M4_BASE_OFFSET: u32 = 0x4000;
|
||||
|
||||
const M2_MIN_LEN: u32 = 3;
|
||||
const M2_MAX_LEN: u32 = 8;
|
||||
const M3_MAX_LEN: u32 = 33;
|
||||
const M4_MAX_LEN: u32 = 9;
|
||||
|
||||
const M3_MARKER: u8 = 0x20;
|
||||
const M4_MARKER: u8 = 0x10;
|
||||
|
||||
/// Hash chains tracking recent 3-byte sequences, keeping per-key chains and
|
||||
/// remembering the best match length at each node.
|
||||
#[derive(Clone)]
|
||||
struct Match3 {
|
||||
head: [u16; HASH_SIZE],
|
||||
chain_sz: [u16; HASH_SIZE],
|
||||
chain: [u16; BUF_SIZE],
|
||||
best_len: [u16; BUF_SIZE],
|
||||
}
|
||||
|
||||
impl Match3 {
|
||||
const fn new() -> Self {
|
||||
Self {
|
||||
head: [0; HASH_SIZE],
|
||||
chain_sz: [0; HASH_SIZE],
|
||||
chain: [0; BUF_SIZE],
|
||||
best_len: [0; BUF_SIZE],
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn make_key(bytes: &[u8]) -> usize {
|
||||
let a = bytes[0] as u32;
|
||||
let b = bytes[1] as u32;
|
||||
let c = bytes[2] as u32;
|
||||
let mix = (((a << 5) ^ b).wrapping_shl(5)) ^ c;
|
||||
let prod = 0x9f5f_u32.wrapping_mul(mix);
|
||||
((prod >> 5) & 0x3fff) as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_head(&self, key: usize) -> u16 {
|
||||
if self.chain_sz[key] == 0 {
|
||||
u16::MAX
|
||||
} else {
|
||||
self.head[key]
|
||||
}
|
||||
}
|
||||
|
||||
fn init(&mut self) { self.chain_sz.fill(0); }
|
||||
|
||||
fn remove(&mut self, pos: usize, buffer: &[u8; BUF_GUARD]) {
|
||||
let key = Self::make_key(&buffer[pos..]);
|
||||
self.chain_sz[key] = self.chain_sz[key].saturating_sub(1);
|
||||
}
|
||||
|
||||
/// Insert the current position into the hash chains and return the head
|
||||
/// position alongside the bounded chain length to inspect.
|
||||
fn advance(&mut self, state: &State, buffer: &[u8; BUF_GUARD]) -> (u16, u32) {
|
||||
let key = Self::make_key(&buffer[state.wind_b as usize..]);
|
||||
let head = self.get_head(key);
|
||||
self.chain[state.wind_b as usize] = head;
|
||||
let mut count = self.chain_sz[key] as u32;
|
||||
self.chain_sz[key] = self.chain_sz[key].wrapping_add(1);
|
||||
if count > MAX_MATCH_LEN as u32 {
|
||||
count = MAX_MATCH_LEN as u32;
|
||||
}
|
||||
self.head[key] = state.wind_b as u16;
|
||||
(head, count)
|
||||
}
|
||||
|
||||
/// Fast path for known matches: advance the hash chains without searching.
|
||||
fn skip_advance(&mut self, state: &State, buffer: &[u8; BUF_GUARD]) {
|
||||
let key = Self::make_key(&buffer[state.wind_b as usize..]);
|
||||
self.chain[state.wind_b as usize] = self.get_head(key);
|
||||
self.head[key] = state.wind_b as u16;
|
||||
self.best_len[state.wind_b as usize] = (MAX_MATCH_LEN + 1) as u16;
|
||||
self.chain_sz[key] = self.chain_sz[key].wrapping_add(1);
|
||||
}
|
||||
}
|
||||
|
||||
/// Direct lookup table for 2-byte prefixes used to seed matches quickly.
|
||||
#[derive(Clone)]
|
||||
struct Match2 {
|
||||
head: [u16; 1 << 16],
|
||||
}
|
||||
|
||||
impl Match2 {
|
||||
const fn new() -> Self { Self { head: [u16::MAX; 1 << 16] } }
|
||||
|
||||
#[inline]
|
||||
fn make_key(bytes: &[u8]) -> usize { (bytes[0] as usize) ^ ((bytes[1] as usize) << 8) }
|
||||
|
||||
fn init(&mut self) { self.head.fill(u16::MAX); }
|
||||
|
||||
fn add(&mut self, pos: u16, buffer: &[u8; BUF_GUARD]) {
|
||||
let key = Self::make_key(&buffer[pos as usize..]);
|
||||
self.head[key] = pos;
|
||||
}
|
||||
|
||||
fn remove(&mut self, pos: usize, buffer: &[u8; BUF_GUARD]) {
|
||||
let key = Self::make_key(&buffer[pos..]);
|
||||
if self.head[key] as usize == pos {
|
||||
self.head[key] = u16::MAX;
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to find a 2-byte prefix match at the current window position.
|
||||
fn search(
|
||||
&self,
|
||||
state: &State,
|
||||
lb_pos: &mut u32,
|
||||
lb_len: &mut u32,
|
||||
best_pos: &mut [u32; MAX_MATCH_TABLE],
|
||||
buffer: &[u8; BUF_GUARD],
|
||||
) -> bool {
|
||||
let key = Self::make_key(&buffer[state.wind_b as usize..]);
|
||||
let pos = self.head[key];
|
||||
if pos == u16::MAX {
|
||||
return false;
|
||||
}
|
||||
if best_pos[2] == 0 {
|
||||
best_pos[2] = pos as u32 + 1;
|
||||
}
|
||||
if *lb_len < 2 {
|
||||
*lb_len = 2;
|
||||
*lb_pos = pos as u32;
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Concrete storage backing a dictionary instance. Buffers and match tables are
|
||||
/// stored side by side so the encoder can share logic across heap and stack
|
||||
/// configurations.
|
||||
#[derive(Clone)]
|
||||
pub struct DictStorage {
|
||||
match3: Match3,
|
||||
match2: Match2,
|
||||
buffer: [u8; BUF_GUARD],
|
||||
}
|
||||
|
||||
impl DictStorage {
|
||||
pub const fn new() -> Self {
|
||||
Self { match3: Match3::new(), match2: Match2::new(), buffer: [0; BUF_GUARD] }
|
||||
}
|
||||
|
||||
/// Initialize dictionary tables and preload the first window from `state.src`.
|
||||
fn init(&mut self, state: &mut State<'_>) {
|
||||
self.match3.init();
|
||||
self.match2.init();
|
||||
|
||||
state.cycle1_countdown = MAX_DIST as u32;
|
||||
state.inp = 0;
|
||||
state.wind_sz = cmp::min(state.src.len(), MAX_MATCH_LEN) as u32;
|
||||
state.wind_b = 0;
|
||||
state.wind_e = state.wind_sz;
|
||||
if state.wind_sz > 0 {
|
||||
let len = state.wind_sz as usize;
|
||||
self.buffer[..len].copy_from_slice(&state.src[..len]);
|
||||
}
|
||||
state.inp += state.wind_sz as usize;
|
||||
|
||||
if state.wind_sz < 3 {
|
||||
let start = state.wind_b as usize + state.wind_sz as usize;
|
||||
let end = start + (3 - state.wind_sz as usize);
|
||||
self.buffer[start..end].fill(0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove stale entries before the sliding window overwrites them.
|
||||
fn reset_next_input_entry(&mut self, state: &mut State) {
|
||||
if state.cycle1_countdown == 0 {
|
||||
let pos = state.wind_e as usize;
|
||||
self.match3.remove(pos, &self.buffer);
|
||||
self.match2.remove(pos, &self.buffer);
|
||||
} else {
|
||||
state.cycle1_countdown -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Advance the dictionary by one position, returning the best match offset
|
||||
/// and length. When `skip` is true the already-emitted match bytes are
|
||||
/// fast-forwarded first so the dictionary stays aligned with the encoded
|
||||
/// output.
|
||||
fn advance(
|
||||
&mut self,
|
||||
state: &mut State,
|
||||
prev_len: u32,
|
||||
best_off: &mut [u32; MAX_MATCH_TABLE],
|
||||
skip: bool,
|
||||
) -> (u32, u32) {
|
||||
if skip {
|
||||
// Skip phase: advance through already-encoded match bytes while
|
||||
// keeping the dictionary in sync with the emitted output.
|
||||
for _ in 0..prev_len.saturating_sub(1) {
|
||||
self.reset_next_input_entry(state);
|
||||
self.match3.skip_advance(state, &self.buffer);
|
||||
self.match2.add(state.wind_b as u16, &self.buffer);
|
||||
state.get_byte(&mut self.buffer);
|
||||
}
|
||||
}
|
||||
|
||||
let mut lb_len = 1u32;
|
||||
let mut lb_off = 0u32;
|
||||
let mut lb_pos = 0u32;
|
||||
let mut best_pos = [0u32; MAX_MATCH_TABLE];
|
||||
|
||||
let (match_head, mut match_count) = self.match3.advance(state, &self.buffer);
|
||||
if match_head == u16::MAX {
|
||||
match_count = 0;
|
||||
}
|
||||
|
||||
let mut should_terminate = false;
|
||||
let best_len = lb_len;
|
||||
|
||||
if lb_len >= state.wind_sz {
|
||||
// Window exhausted: no further matches possible once we reach EOF.
|
||||
if state.wind_sz == 0 {
|
||||
should_terminate = true;
|
||||
}
|
||||
self.match3.best_len[state.wind_b as usize] = (MAX_MATCH_LEN + 1) as u16;
|
||||
} else {
|
||||
if self.match2.search(state, &mut lb_pos, &mut lb_len, &mut best_pos, &self.buffer)
|
||||
&& state.wind_sz >= 3
|
||||
{
|
||||
let mut match_pos = match_head as usize;
|
||||
for _ in 0..match_count {
|
||||
if match_pos >= BUF_SIZE {
|
||||
break;
|
||||
}
|
||||
let ref_pos = state.wind_b as usize;
|
||||
let window = state.wind_sz as usize;
|
||||
let mut matched = 0usize;
|
||||
while matched < window
|
||||
&& self.buffer[ref_pos + matched] == self.buffer[match_pos + matched]
|
||||
{
|
||||
matched += 1;
|
||||
}
|
||||
if matched >= 2 {
|
||||
if matched < MAX_MATCH_TABLE && best_pos[matched] == 0 {
|
||||
// Remember first occurrence for potential match length tweaks.
|
||||
best_pos[matched] = match_pos as u32 + 1;
|
||||
}
|
||||
let matched_u32 = matched as u32;
|
||||
if matched_u32 > lb_len {
|
||||
lb_len = matched_u32;
|
||||
lb_pos = match_pos as u32;
|
||||
if lb_len == state.wind_sz
|
||||
|| lb_len > self.match3.best_len[match_pos] as u32
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
match_pos = self.match3.chain[match_pos] as usize;
|
||||
}
|
||||
}
|
||||
if lb_len > best_len {
|
||||
lb_off = state.pos2off(lb_pos);
|
||||
}
|
||||
self.match3.best_len[state.wind_b as usize] = lb_len as u16;
|
||||
for i in 2..MAX_MATCH_TABLE {
|
||||
best_off[i] = if best_pos[i] != 0 { state.pos2off(best_pos[i] - 1) } else { 0 };
|
||||
}
|
||||
}
|
||||
|
||||
self.reset_next_input_entry(state);
|
||||
self.match2.add(state.wind_b as u16, &self.buffer);
|
||||
state.get_byte(&mut self.buffer);
|
||||
|
||||
if should_terminate {
|
||||
state.buf_sz = 0;
|
||||
lb_len = 0;
|
||||
} else {
|
||||
// Buffer size counts the current byte plus the lookahead window.
|
||||
state.buf_sz = state.wind_sz + 1;
|
||||
}
|
||||
state.bufp = state.inp - state.buf_sz as usize;
|
||||
|
||||
(lb_off, lb_len)
|
||||
}
|
||||
}
|
||||
|
||||
/// Sliding window state tracked while searching for matches.
|
||||
struct State<'a> {
|
||||
src: &'a [u8],
|
||||
inp: usize,
|
||||
wind_sz: u32,
|
||||
wind_b: u32,
|
||||
wind_e: u32,
|
||||
cycle1_countdown: u32,
|
||||
bufp: usize,
|
||||
buf_sz: u32,
|
||||
}
|
||||
|
||||
impl<'a> State<'a> {
|
||||
/// Create a new window over `src`.
|
||||
fn new(src: &'a [u8]) -> Self {
|
||||
Self {
|
||||
src,
|
||||
inp: 0,
|
||||
wind_sz: 0,
|
||||
wind_b: 0,
|
||||
wind_e: 0,
|
||||
cycle1_countdown: 0,
|
||||
bufp: 0,
|
||||
buf_sz: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Advance the window by one byte, copying from `src` and maintaining the
|
||||
/// duplicated tail used for wrap-around reads.
|
||||
fn get_byte(&mut self, buffer: &mut [u8; BUF_GUARD]) {
|
||||
if self.inp >= self.src.len() {
|
||||
if self.wind_sz > 0 {
|
||||
self.wind_sz -= 1;
|
||||
}
|
||||
let idx = self.wind_e as usize;
|
||||
buffer[idx] = 0;
|
||||
if idx < MAX_MATCH_LEN {
|
||||
buffer[BUF_SIZE + idx] = 0;
|
||||
}
|
||||
} else {
|
||||
let value = self.src[self.inp];
|
||||
let idx = self.wind_e as usize;
|
||||
buffer[idx] = value;
|
||||
if idx < MAX_MATCH_LEN {
|
||||
buffer[BUF_SIZE + idx] = value;
|
||||
}
|
||||
self.inp += 1;
|
||||
}
|
||||
self.wind_e = (self.wind_e + 1) % BUF_SIZE as u32;
|
||||
self.wind_b = (self.wind_b + 1) % BUF_SIZE as u32;
|
||||
}
|
||||
|
||||
/// Convert a buffer index into a backwards distance within the window.
|
||||
#[inline]
|
||||
fn pos2off(&self, pos: u32) -> u32 {
|
||||
if self.wind_b > pos {
|
||||
self.wind_b - pos
|
||||
} else {
|
||||
BUF_SIZE as u32 - (pos - self.wind_b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Internal representation for dictionaries, either borrowed or owned.
|
||||
enum DictInner<'a> {
|
||||
Borrowed(&'a mut DictStorage),
|
||||
#[cfg(feature = "alloc")]
|
||||
Owned(Box<DictStorage>),
|
||||
}
|
||||
|
||||
/// Compression dictionary used to retain the sliding window between calls.
|
||||
pub struct Dict<'a> {
|
||||
inner: DictInner<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Dict<'a> {
|
||||
/// Return the mutable storage backing this dictionary, regardless of
|
||||
/// whether it is owned or borrowed.
|
||||
fn storage_mut(&mut self) -> &mut DictStorage {
|
||||
match &mut self.inner {
|
||||
DictInner::Borrowed(storage) => storage,
|
||||
#[cfg(feature = "alloc")]
|
||||
DictInner::Owned(storage) => storage.as_mut(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "alloc")]
|
||||
/// Create a heap-allocated dictionary with the canonical storage layout.
|
||||
pub fn new_dict() -> Dict<'static> {
|
||||
Dict { inner: DictInner::Owned(Box::new(DictStorage::new())) }
|
||||
}
|
||||
|
||||
/// Total number of bytes required to back a dictionary.
|
||||
pub const fn dict_storage_size() -> usize { size_of::<DictStorage>() }
|
||||
|
||||
/// Wrap user-provided storage (e.g. stack-allocated) inside a dictionary.
|
||||
pub fn dict_from_storage(storage: &mut DictStorage) -> Dict<'_> {
|
||||
Dict { inner: DictInner::Borrowed(storage) }
|
||||
}
|
||||
|
||||
/// Emit the repeated zero-byte encoding used for long literal/match lengths.
|
||||
fn write_zero_byte_length(
|
||||
dst: &mut [u8],
|
||||
out_pos: &mut usize,
|
||||
mut len: usize,
|
||||
) -> Result<(), Error> {
|
||||
while len > 255 {
|
||||
write_dst(dst, out_pos, &[0])?;
|
||||
len -= 255;
|
||||
}
|
||||
write_dst(dst, out_pos, &[len as u8])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Emit a literal run following the LZO opcode rules.
|
||||
fn encode_literal_run(
|
||||
dst: &mut [u8],
|
||||
out_pos: &mut usize,
|
||||
src: &[u8],
|
||||
lit_ptr: usize,
|
||||
lit_len: usize,
|
||||
) -> Result<(), Error> {
|
||||
if *out_pos == 0 && lit_len <= 238 {
|
||||
write_dst(dst, out_pos, &[17 + lit_len as u8])?;
|
||||
} else if lit_len <= 3 {
|
||||
let idx = out_pos.checked_sub(2).ok_or(Error::OutputOverrun)?;
|
||||
*dst_byte_mut(dst, idx)? |= lit_len as u8;
|
||||
} else if lit_len <= 18 {
|
||||
write_dst(dst, out_pos, &[(lit_len - 3) as u8])?;
|
||||
} else {
|
||||
write_dst(dst, out_pos, &[0])?;
|
||||
write_zero_byte_length(dst, out_pos, lit_len - 18)?;
|
||||
}
|
||||
let src_chunk = src.get(lit_ptr..lit_ptr + lit_len).ok_or(Error::InputOverrun)?;
|
||||
write_dst(dst, out_pos, src_chunk)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Emit a back-reference according to the LZOKAY/LZO opcode encoding.
|
||||
fn encode_lookback_match(
|
||||
dst: &mut [u8],
|
||||
out_pos: &mut usize,
|
||||
lb_len: u32,
|
||||
mut lb_off: u32,
|
||||
last_lit_len: u32,
|
||||
) -> Result<(), Error> {
|
||||
if lb_len == 2 {
|
||||
lb_off -= 1;
|
||||
write_dst(dst, out_pos, &[((lb_off & 0x3) << 2) as u8, (lb_off >> 2) as u8])?;
|
||||
} else if lb_len <= M2_MAX_LEN && lb_off <= M2_MAX_OFFSET {
|
||||
lb_off -= 1;
|
||||
write_dst(dst, out_pos, &[
|
||||
(((lb_len - 1) << 5) | ((lb_off & 0x7) << 2)) as u8,
|
||||
(lb_off >> 3) as u8,
|
||||
])?;
|
||||
} else if lb_len == M2_MIN_LEN && lb_off <= M1_MAX_OFFSET + M2_MAX_OFFSET && last_lit_len >= 4 {
|
||||
lb_off -= 1 + M2_MAX_OFFSET;
|
||||
write_dst(dst, out_pos, &[((lb_off & 0x3) << 2) as u8, (lb_off >> 2) as u8])?;
|
||||
} else if lb_off <= M3_MAX_OFFSET {
|
||||
lb_off -= 1;
|
||||
if lb_len <= M3_MAX_LEN {
|
||||
write_dst(dst, out_pos, &[M3_MARKER | (lb_len as u8 - 2)])?;
|
||||
} else {
|
||||
let extra = (lb_len - M3_MAX_LEN) as usize;
|
||||
write_dst(dst, out_pos, &[M3_MARKER])?;
|
||||
write_zero_byte_length(dst, out_pos, extra)?;
|
||||
}
|
||||
write_dst(dst, out_pos, &[(lb_off << 2) as u8, (lb_off >> 6) as u8])?;
|
||||
} else {
|
||||
lb_off -= M4_BASE_OFFSET;
|
||||
if lb_len <= M4_MAX_LEN {
|
||||
write_dst(dst, out_pos, &[M4_MARKER
|
||||
| (((lb_off & 0x4000) >> 11) as u8)
|
||||
| (lb_len as u8 - 2)])?;
|
||||
} else {
|
||||
let extra = (lb_len - M4_MAX_LEN) as usize;
|
||||
write_dst(dst, out_pos, &[M4_MARKER | (((lb_off & 0x4000) >> 11) as u8)])?;
|
||||
write_zero_byte_length(dst, out_pos, extra)?;
|
||||
}
|
||||
write_dst(dst, out_pos, &[(lb_off << 2) as u8, (lb_off >> 6) as u8])?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Apply the heuristics that prefer cheaper opcodes when a shorter match can be
|
||||
/// emitted at a closer distance.
|
||||
fn find_better_match(best_off: &[u32; MAX_MATCH_TABLE], lb_len: &mut u32, lb_off: &mut u32) {
|
||||
let len = *lb_len;
|
||||
let off = *lb_off;
|
||||
if len <= M2_MIN_LEN || off <= M2_MAX_OFFSET {
|
||||
return;
|
||||
}
|
||||
// Prefer re-encoding long matches as cheaper opcodes whenever the distance
|
||||
// permits switching to a shorter back-reference class.
|
||||
if off > M2_MAX_OFFSET
|
||||
&& len >= M2_MIN_LEN + 1
|
||||
&& len <= M2_MAX_LEN + 1
|
||||
&& best_off[len as usize - 1] != 0
|
||||
&& best_off[len as usize - 1] <= M2_MAX_OFFSET
|
||||
{
|
||||
*lb_len = len - 1;
|
||||
*lb_off = best_off[len as usize - 1];
|
||||
} else if off > M3_MAX_OFFSET
|
||||
&& len >= M4_MAX_LEN + 1
|
||||
&& len <= M2_MAX_LEN + 2
|
||||
&& best_off[len as usize - 2] != 0
|
||||
&& best_off[len as usize] <= M2_MAX_OFFSET
|
||||
{
|
||||
*lb_len = len - 2;
|
||||
*lb_off = best_off[len as usize - 2];
|
||||
} else if off > M3_MAX_OFFSET
|
||||
&& len >= M4_MAX_LEN + 1
|
||||
&& len <= M3_MAX_LEN + 1
|
||||
&& best_off[len as usize - 1] != 0
|
||||
&& best_off[len as usize - 2] <= M3_MAX_OFFSET
|
||||
{
|
||||
*lb_len = len - 1;
|
||||
*lb_off = best_off[len as usize - 1];
|
||||
}
|
||||
}
|
||||
|
||||
/// Core compression routine shared by the heap-allocating and stack variants.
|
||||
/// Maintains the window management and opcode selection heuristics required by
|
||||
/// the LZO format while using safe Rust semantics.
|
||||
fn compress_impl(src: &[u8], dst: &mut [u8], storage: &mut DictStorage) -> Result<usize, Error> {
|
||||
let mut state = State::new(src);
|
||||
storage.init(&mut state);
|
||||
|
||||
let mut out_pos = 0usize;
|
||||
let mut lit_len = 0u32;
|
||||
let mut best_off = [0u32; MAX_MATCH_TABLE];
|
||||
|
||||
let mut lit_ptr = state.inp;
|
||||
let (mut lb_off, mut lb_len) = storage.advance(&mut state, 0, &mut best_off, false);
|
||||
|
||||
while state.buf_sz > 0 {
|
||||
if lit_len == 0 {
|
||||
// Capture the starting point for the next literal run.
|
||||
lit_ptr = state.bufp;
|
||||
}
|
||||
|
||||
if lb_len < 2
|
||||
|| (lb_len == 2 && (lb_off > M1_MAX_OFFSET || lit_len == 0 || lit_len >= 4))
|
||||
|| (lb_len == 2 && out_pos == 0)
|
||||
|| (out_pos == 0 && lit_len == 0)
|
||||
{
|
||||
lb_len = 0;
|
||||
} else if lb_len == M2_MIN_LEN && lb_off > M1_MAX_OFFSET + M2_MAX_OFFSET && lit_len >= 4 {
|
||||
lb_len = 0;
|
||||
}
|
||||
|
||||
if lb_len == 0 {
|
||||
lit_len += 1;
|
||||
// No match chosen: step forward by one literal byte.
|
||||
let (next_off, next_len) = storage.advance(&mut state, 0, &mut best_off, false);
|
||||
lb_off = next_off;
|
||||
lb_len = next_len;
|
||||
continue;
|
||||
}
|
||||
|
||||
find_better_match(&best_off, &mut lb_len, &mut lb_off);
|
||||
encode_literal_run(dst, &mut out_pos, src, lit_ptr, lit_len as usize)?;
|
||||
encode_lookback_match(dst, &mut out_pos, lb_len, lb_off, lit_len)?;
|
||||
let prev_len = lb_len;
|
||||
lit_len = 0;
|
||||
// Advance over the matched bytes, updating the search structures.
|
||||
let (next_off, next_len) = storage.advance(&mut state, prev_len, &mut best_off, true);
|
||||
lb_off = next_off;
|
||||
lb_len = next_len;
|
||||
}
|
||||
|
||||
// Flush any trailing literal bytes.
|
||||
encode_literal_run(dst, &mut out_pos, src, lit_ptr, lit_len as usize)?;
|
||||
|
||||
// Emit terminating M4 instruction (distance 0x4000, length 3).
|
||||
write_dst(dst, &mut out_pos, &[M4_MARKER | 1, 0, 0])?;
|
||||
|
||||
Ok(out_pos)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn dst_byte_mut<'a>(dst: &'a mut [u8], idx: usize) -> Result<&'a mut u8, Error> {
|
||||
dst.get_mut(idx).ok_or(Error::OutputOverrun)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn write_dst(dst: &mut [u8], out_pos: &mut usize, slice: &[u8]) -> Result<(), Error> {
|
||||
let pos = *out_pos;
|
||||
let end = pos.checked_add(slice.len()).ok_or(Error::OutputOverrun)?;
|
||||
let dst_chunk = dst.get_mut(pos..end).ok_or(Error::OutputOverrun)?;
|
||||
dst_chunk.copy_from_slice(slice);
|
||||
*out_pos = end;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[cfg(feature = "alloc")]
|
||||
use crate::compress::{compress, compress_with_dict, new_dict};
|
||||
use crate::compress::{
|
||||
compress_no_alloc, compress_worst_size, dict_from_storage, dict_storage_size,
|
||||
};
|
||||
use super::{compress, compress_with_dict, new_dict};
|
||||
use super::{compress_no_alloc, compress_worst_size, dict_from_storage, DictStorage};
|
||||
|
||||
const INPUT_1: &[u8] = include_bytes!("test1.txt");
|
||||
const EXPECTED_1: &[u8] = include_bytes!("test1.bin");
|
||||
@ -183,7 +701,6 @@ mod tests {
|
||||
let mut dict = new_dict();
|
||||
let dst = compress_with_dict(INPUT_1, &mut dict).expect("Failed to compress (1)");
|
||||
assert_eq!(dst, EXPECTED_1);
|
||||
// Compress a second time to test dictionary reuse
|
||||
let dst = compress_with_dict(INPUT_2, &mut dict).expect("Failed to compress (2)");
|
||||
assert_eq!(dst, EXPECTED_2);
|
||||
}
|
||||
@ -191,12 +708,11 @@ mod tests {
|
||||
#[test]
|
||||
fn test_compress_no_alloc() {
|
||||
let mut dst = [0u8; compress_worst_size(INPUT_1.len())];
|
||||
let mut storage = [0u8; dict_storage_size()];
|
||||
let mut storage = DictStorage::new();
|
||||
let mut dict = dict_from_storage(&mut storage);
|
||||
let out_size =
|
||||
compress_no_alloc(INPUT_1, &mut dst, &mut dict).expect("Failed to compress (1)");
|
||||
assert_eq!(&dst[0..out_size], EXPECTED_1);
|
||||
// Compress a second time to test dictionary reuse
|
||||
let out_size =
|
||||
compress_no_alloc(INPUT_2, &mut dst, &mut dict).expect("Failed to compress (2)");
|
||||
assert_eq!(&dst[0..out_size], EXPECTED_2);
|
||||
|
@ -16,23 +16,238 @@
|
||||
//! # Ok::<(), lzokay::Error>(())
|
||||
//! ```
|
||||
|
||||
use crate::{bindings, lzokay_result, Error};
|
||||
use crate::Error;
|
||||
|
||||
/// Maximum repeat count representable via zero marker bytes when extending
|
||||
/// literal or match lengths.
|
||||
const MAX255_COUNT: usize = usize::MAX / 255 - 2;
|
||||
/// Opcode marker for mid-range matches (labelled "M3" in the LZO reference).
|
||||
const M3_MARKER: u8 = 0x20;
|
||||
/// Opcode marker for far matches ("M4") and the terminator instruction.
|
||||
const M4_MARKER: u8 = 0x10;
|
||||
|
||||
/// Decompress `src` into `dst`.
|
||||
///
|
||||
/// `dst` must be large enough to hold the entire decompressed output.
|
||||
/// `dst` must be large enough to hold the entire decompressed output. The
|
||||
/// function follows the documented LZO opcode semantics and state transitions.
|
||||
pub fn decompress(src: &[u8], dst: &mut [u8]) -> Result<usize, Error> {
|
||||
let mut out_size = 0usize;
|
||||
let result = unsafe {
|
||||
bindings::lzokay_decompress(
|
||||
src.as_ptr(),
|
||||
src.len(),
|
||||
dst.as_mut_ptr(),
|
||||
dst.len(),
|
||||
&mut out_size,
|
||||
)
|
||||
};
|
||||
lzokay_result(out_size as usize, result)
|
||||
if src.len() < 3 {
|
||||
return Err(Error::InputOverrun);
|
||||
}
|
||||
|
||||
let mut inp = 0usize;
|
||||
let mut outp = 0usize;
|
||||
let mut state = 0usize;
|
||||
let mut nstate: usize;
|
||||
let mut lblen: usize;
|
||||
let mut lbcur: usize;
|
||||
|
||||
let first = input_byte(src, &mut inp)?;
|
||||
// The LZO bitstream reserves the first byte for literal priming. Codes >= 22
|
||||
// copy a literal block immediately; 18..21 seed the literal countdown (`state`).
|
||||
if first >= 22 {
|
||||
let len = (first as usize) - 17;
|
||||
copy_slice(src, &mut inp, dst, &mut outp, len)?;
|
||||
state = 4;
|
||||
} else if first >= 18 {
|
||||
nstate = (first as usize) - 17;
|
||||
state = nstate;
|
||||
copy_slice(src, &mut inp, dst, &mut outp, nstate)?;
|
||||
}
|
||||
|
||||
loop {
|
||||
let inst = input_byte(src, &mut inp)?;
|
||||
if inst & 0xC0 != 0 {
|
||||
// [M2]
|
||||
// 1 L L D D D S S (128..255)
|
||||
// Copy 5-8 bytes from block within 2kB distance
|
||||
// state = S
|
||||
// length = 5 + L
|
||||
// 0 1 L D D D S S (64..127)
|
||||
// Copy 3-4 bytes from block within 2kB distance
|
||||
// length = 3 + L
|
||||
// Always followed by one byte: distance = (next << 3) + D + 1
|
||||
let next = input_byte(src, &mut inp)?;
|
||||
let distance = ((next as usize) << 3) + (((inst as usize) >> 2) & 0x7) + 1;
|
||||
lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
|
||||
lblen = ((inst as usize) >> 5) + 1;
|
||||
nstate = (inst as usize) & 0x3;
|
||||
} else if inst & M3_MARKER != 0 {
|
||||
// [M3]
|
||||
// 0 0 1 L L L L L (32..63)
|
||||
// Copy from <= 16kB distance
|
||||
// length = 2 + (L ?: 31 + zero-runs + tail)
|
||||
// Followed by LE16: distance = (value >> 2) + 1, state = value & 3
|
||||
lblen = ((inst as usize) & 0x1F) + 2;
|
||||
if lblen == 2 {
|
||||
let offset = consume_zero_byte_length(src, &mut inp)?;
|
||||
let tail = input_byte(src, &mut inp)?;
|
||||
lblen += offset * 255 + 31 + tail as usize;
|
||||
}
|
||||
let raw = read_le16(src, &mut inp)?;
|
||||
let distance = ((raw as usize) >> 2) + 1;
|
||||
lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
|
||||
nstate = (raw as usize) & 0x3;
|
||||
} else if inst & M4_MARKER != 0 {
|
||||
// [M4]
|
||||
// 0 0 0 1 H L L L (16..31)
|
||||
// Copy from 16..48kB distance
|
||||
// length = 2 + (L ?: 7 + zero-runs + tail)
|
||||
// Followed by LE16: distance = 16384 + (H << 14) + value, state = value & 3
|
||||
// Terminating opcode when distance == 16384.
|
||||
lblen = ((inst as usize) & 0x7) + 2;
|
||||
if lblen == 2 {
|
||||
let offset = consume_zero_byte_length(src, &mut inp)?;
|
||||
let tail = input_byte(src, &mut inp)?;
|
||||
lblen += offset * 255 + 7 + tail as usize;
|
||||
}
|
||||
let raw = read_le16(src, &mut inp)?;
|
||||
let base_dist = ((inst as usize & 0x8) << 11) + ((raw as usize) >> 2);
|
||||
if base_dist == 0 {
|
||||
// Stream finished
|
||||
break;
|
||||
}
|
||||
let distance = base_dist + 16384;
|
||||
lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
|
||||
nstate = (raw as usize) & 0x3;
|
||||
} else {
|
||||
if state == 0 {
|
||||
// [Literal]
|
||||
// 0 0 0 0 L L L L (0..15)
|
||||
// Copy long literal string: length = 3 + extended length bytes.
|
||||
let mut len = inst as usize + 3;
|
||||
if len == 3 {
|
||||
let offset = consume_zero_byte_length(src, &mut inp)?;
|
||||
let tail = input_byte(src, &mut inp)?;
|
||||
len += offset * 255 + 15 + tail as usize;
|
||||
}
|
||||
copy_slice(src, &mut inp, dst, &mut outp, len)?;
|
||||
state = 4;
|
||||
continue;
|
||||
} else if state != 4 {
|
||||
// [M1, short]
|
||||
// state = 1..3
|
||||
// 0 0 0 0 D D S S (0..15)
|
||||
// Copy 2 bytes within 1kB distance, state = S afterwards.
|
||||
let tail = input_byte(src, &mut inp)?;
|
||||
let distance = ((inst as usize) >> 2) + ((tail as usize) << 2) + 1;
|
||||
lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
|
||||
lblen = 2;
|
||||
nstate = (inst as usize) & 0x3;
|
||||
} else {
|
||||
// [M1, long]
|
||||
// state == 4
|
||||
// 0 0 0 0 D D S S (0..15)
|
||||
// Copy 3 bytes within 2..3kB distance, state = S afterwards.
|
||||
let tail = input_byte(src, &mut inp)?;
|
||||
let distance = ((inst as usize) >> 2) + ((tail as usize) << 2) + 2049;
|
||||
lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
|
||||
lblen = 3;
|
||||
nstate = (inst as usize) & 0x3;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the lookback run (source and destination may overlap).
|
||||
if lblen > 0 {
|
||||
let out_end = outp.checked_add(lblen).ok_or(Error::OutputOverrun)?;
|
||||
let lb_end = lbcur.checked_add(lblen).ok_or(Error::OutputOverrun)?;
|
||||
if out_end > dst.len() || lb_end > dst.len() {
|
||||
return Err(Error::OutputOverrun);
|
||||
}
|
||||
for i in 0..lblen {
|
||||
dst[outp + i] = dst[lbcur + i];
|
||||
}
|
||||
outp = out_end;
|
||||
}
|
||||
|
||||
// Copy the following literal run dictated by `nstate`.
|
||||
copy_slice(src, &mut inp, dst, &mut outp, nstate)?;
|
||||
|
||||
state = nstate;
|
||||
}
|
||||
|
||||
// The stream must end with the terminating M4 instruction (length == 3).
|
||||
if lblen != 3 {
|
||||
return Err(Error::Error);
|
||||
}
|
||||
|
||||
if inp == src.len() {
|
||||
Ok(outp)
|
||||
} else if inp < src.len() {
|
||||
Err(Error::InputNotConsumed)
|
||||
} else {
|
||||
Err(Error::InputOverrun)
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a single byte from `src`.
|
||||
#[inline(always)]
|
||||
fn input_byte(src: &[u8], idx: &mut usize) -> Result<u8, Error> {
|
||||
let n = src.get(*idx).copied().ok_or(Error::InputOverrun)?;
|
||||
*idx += 1;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
/// Read a slice of length `len` starting at `start` from `src`.
|
||||
#[inline(always)]
|
||||
fn input_slice<'a>(src: &'a [u8], start: &mut usize, len: usize) -> Result<&'a [u8], Error> {
|
||||
let end = start.checked_add(len).ok_or(Error::InputOverrun)?;
|
||||
let slice = src.get(*start..end).ok_or(Error::InputOverrun)?;
|
||||
*start = end;
|
||||
Ok(slice)
|
||||
}
|
||||
|
||||
/// Read a little-endian `u16` starting at `pos`.
|
||||
#[inline(always)]
|
||||
fn read_le16(bytes: &[u8], pos: &mut usize) -> Result<u16, Error> {
|
||||
let slice = input_slice(bytes, pos, 2)?;
|
||||
Ok(u16::from_le_bytes(slice.try_into().unwrap()))
|
||||
}
|
||||
|
||||
/// Get a mutable slice of length `len` starting at `start` from `dst`.
|
||||
#[inline(always)]
|
||||
fn output_slice<'a>(
|
||||
dst: &'a mut [u8],
|
||||
start: &mut usize,
|
||||
len: usize,
|
||||
) -> Result<&'a mut [u8], Error> {
|
||||
let end = start.checked_add(len).ok_or(Error::OutputOverrun)?;
|
||||
let slice = dst.get_mut(*start..end).ok_or(Error::OutputOverrun)?;
|
||||
*start = end;
|
||||
Ok(slice)
|
||||
}
|
||||
|
||||
/// Copy a slice from `src` to `dst`.
|
||||
#[inline(always)]
|
||||
fn copy_slice(
|
||||
src: &[u8],
|
||||
src_start: &mut usize,
|
||||
dst: &mut [u8],
|
||||
dst_start: &mut usize,
|
||||
len: usize,
|
||||
) -> Result<(), Error> {
|
||||
if len == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
let src_slice = input_slice(src, src_start, len)?;
|
||||
let dst_slice = output_slice(dst, dst_start, len)?;
|
||||
dst_slice.copy_from_slice(src_slice);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Consume a run of zero marker bytes used for long length encodings.
|
||||
#[inline(always)]
|
||||
fn consume_zero_byte_length(src: &[u8], inp: &mut usize) -> Result<usize, Error> {
|
||||
let start = *inp;
|
||||
while src.get(*inp).copied() == Some(0) {
|
||||
*inp += 1;
|
||||
}
|
||||
let offset = *inp - start;
|
||||
if offset > MAX255_COUNT {
|
||||
Err(Error::Error)
|
||||
} else {
|
||||
Ok(offset)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
77
src/lib.rs
77
src/lib.rs
@ -1,7 +1,7 @@
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
//! # LZ👌-rs
|
||||
//!
|
||||
//! Rust wrapper for [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed
|
||||
//! Pure-Rust port of [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed
|
||||
//! implementation of the [LZO compression format](http://www.oberhumer.com/opensource/lzo/).
|
||||
//!
|
||||
//! See the original [README](https://github.com/jackoalan/lzokay/blob/master/README.md) for more information.
|
||||
@ -20,14 +20,14 @@
|
||||
//!
|
||||
//! ```toml
|
||||
//! [dependencies]
|
||||
//! lzokay = "1.0.1"
|
||||
//! lzokay = "2.0.0"
|
||||
//! ```
|
||||
//!
|
||||
//! Or, to only enable certain features:
|
||||
//!
|
||||
//! ```toml
|
||||
//! [dependencies.lzokay]
|
||||
//! version = "1.0.1"
|
||||
//! version = "2.0.0"
|
||||
//! default-features = false
|
||||
//! features = ["decompress", "compress"]
|
||||
//! ```
|
||||
@ -49,36 +49,8 @@ pub mod compress;
|
||||
#[cfg(feature = "decompress")]
|
||||
pub mod decompress;
|
||||
|
||||
mod bindings {
|
||||
#![allow(unknown_lints)]
|
||||
#![allow(non_upper_case_globals)]
|
||||
#![allow(non_camel_case_types)]
|
||||
#![allow(non_snake_case)]
|
||||
#![allow(deref_nullptr)]
|
||||
#![allow(dead_code)]
|
||||
#[cfg(not(feature = "std"))]
|
||||
mod types {
|
||||
pub type c_uchar = u8;
|
||||
pub type c_ushort = u16;
|
||||
pub type c_uint = u32;
|
||||
pub type c_int = i32;
|
||||
pub type c_ulong = usize;
|
||||
pub type c_ulonglong = usize;
|
||||
}
|
||||
#[cfg(feature = "std")]
|
||||
mod types {
|
||||
pub type c_uchar = ::std::os::raw::c_uchar;
|
||||
pub type c_ushort = ::std::os::raw::c_ushort;
|
||||
pub type c_uint = ::std::os::raw::c_uint;
|
||||
pub type c_int = ::std::os::raw::c_int;
|
||||
pub type c_ulong = usize;
|
||||
pub type c_ulonglong = usize;
|
||||
}
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
}
|
||||
|
||||
/// Error result codes
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum Error {
|
||||
/// Likely indicates bad compressed LZO input.
|
||||
LookbehindOverrun,
|
||||
@ -92,20 +64,20 @@ pub enum Error {
|
||||
InputNotConsumed,
|
||||
}
|
||||
|
||||
fn lzokay_result<T>(result: T, error: bindings::lzokay_EResult) -> Result<T, Error> {
|
||||
if error == bindings::lzokay_EResult_Success {
|
||||
Result::Ok(result)
|
||||
} else {
|
||||
Result::Err(match error {
|
||||
bindings::lzokay_EResult_LookbehindOverrun => Error::LookbehindOverrun,
|
||||
bindings::lzokay_EResult_OutputOverrun => Error::OutputOverrun,
|
||||
bindings::lzokay_EResult_InputOverrun => Error::InputOverrun,
|
||||
bindings::lzokay_EResult_InputNotConsumed => Error::InputNotConsumed,
|
||||
_ => Error::Error,
|
||||
})
|
||||
impl core::fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
match self {
|
||||
Error::LookbehindOverrun => write!(f, "lookbehind overrun"),
|
||||
Error::OutputOverrun => write!(f, "output overrun"),
|
||||
Error::InputOverrun => write!(f, "input overrun"),
|
||||
Error::Error => write!(f, "unknown error"),
|
||||
Error::InputNotConsumed => write!(f, "input not consumed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl core::error::Error for Error {}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(all(feature = "compress", feature = "decompress", feature = "alloc"))]
|
||||
mod tests {
|
||||
@ -117,13 +89,22 @@ mod tests {
|
||||
|
||||
use super::{compress::compress, decompress::decompress};
|
||||
|
||||
const INPUT: &[u8] = include_bytes!("test1.txt");
|
||||
const INPUT1: &[u8] = include_bytes!("test1.txt");
|
||||
const INPUT2: &[u8] = include_bytes!("test2.txt");
|
||||
|
||||
#[test]
|
||||
fn test_round_trip() {
|
||||
let compressed = compress(INPUT).expect("Failed to compress");
|
||||
let mut dst = vec![0u8; INPUT.len()];
|
||||
fn test_round_trip1() {
|
||||
let compressed = compress(INPUT1).expect("Failed to compress");
|
||||
let mut dst = vec![0u8; INPUT1.len()];
|
||||
decompress(&compressed, &mut dst).expect("Failed to decompress");
|
||||
assert_eq!(INPUT, dst.as_slice());
|
||||
assert_eq!(INPUT1, dst.as_slice());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_round_trip2() {
|
||||
let compressed = compress(INPUT2).expect("Failed to compress");
|
||||
let mut dst = vec![0u8; INPUT2.len()];
|
||||
decompress(&compressed, &mut dst).expect("Failed to decompress");
|
||||
assert_eq!(INPUT2, dst.as_slice());
|
||||
}
|
||||
}
|
||||
|
@ -1 +0,0 @@
|
||||
#include <lzokay.hpp>
|
Loading…
x
Reference in New Issue
Block a user