diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 79b8f42..c48bc0d 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -8,31 +8,21 @@ jobs:
     strategy:
       matrix:
         platform: [ ubuntu-latest, macos-latest, windows-latest ]
-        toolchain: [ stable, 1.71.0, nightly ]
+        toolchain: [ stable, 1.81.0, nightly ]
         features:
-            - compress,alloc
-            - compress,decompress
-            - compress,decompress,std
+          - compress,alloc
+          - compress,decompress
+          - compress,decompress,std
       fail-fast: false
     runs-on: ${{ matrix.platform }}
     steps:
-      - name: Install LLVM and Clang # required for bindgen to work, see https://github.com/rust-lang/rust-bindgen/issues/1797
-        uses: KyleMayes/install-llvm-action@v2.0.8
-        if: matrix.platform == 'windows-latest'
-        with:
-          version: 21.1.3
-          directory: ${{ runner.temp }}/llvm
-      - name: Set LIBCLANG_PATH
-        run: echo "LIBCLANG_PATH=$((gcm clang).source -replace "clang.exe")" >> $env:GITHUB_ENV
-        if: matrix.platform == 'windows-latest'
-      - uses: actions/checkout@v2
+      - name: Checkout
+        uses: actions/checkout@v2
         with:
           submodules: recursive
-      - uses: actions-rs/toolchain@v1
+      - name: Setup Rust toolchain
+        uses: dtolnay/rust-toolchain@master
         with:
           toolchain: ${{ matrix.toolchain }}
-          override: true
-      - uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --release --no-default-features --features ${{ matrix.features }}
+      - name: Cargo test
+        run: cargo test --release --no-default-features --features ${{ matrix.features }}
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 3c97ead..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "lzokay"]
-	path = lzokay
-	url = https://github.com/jackoalan/lzokay.git
diff --git a/Cargo.toml b/Cargo.toml
index 136b37f..80671ab 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lzokay"
-version = "1.0.2"
-edition = "2018"
+version = "2.0.0"
+edition = "2021"
 license = "MIT"
 repository = "https://github.com/encounter/lzokay-rs"
 documentation = "https://docs.rs/lzokay"
@@ -11,7 +11,7 @@ A minimal, MIT-licensed implementation of the LZO compression format.
 """
 keywords = ["lzo", "compression", "no_std"]
 categories = ["compression", "no-std", "api-bindings"]
-rust-version = "1.71.0"
+rust-version = "1.81.0"
 
 [features]
 alloc = []
@@ -19,7 +19,3 @@ std = ["alloc"]
 decompress = []
 compress = []
 default = ["compress", "decompress", "std"]
-
-[build-dependencies]
-bindgen = "0.72.1"
-cc = "1.2.41"
diff --git a/README.md b/README.md
index b0ab8e7..4078f1d 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,9 @@
 [crates.io]: https://crates.io/crates/lzokay
 [Api Rustdoc]: https://img.shields.io/badge/api-rustdoc-blue.svg
 [rustdoc]: https://docs.rs/lzokay
-[Rust Version]: https://img.shields.io/badge/rust-1.70+-blue.svg?maxAge=3600
+[Rust Version]: https://img.shields.io/badge/rust-1.81+-blue.svg?maxAge=3600
 
-Rust wrapper for [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed implementation of the
+Pure-Rust port of [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed implementation of the
 [LZO compression format](http://www.oberhumer.com/opensource/lzo/).
 
 See the original [README](https://github.com/jackoalan/lzokay/blob/master/README.md) for more information.
@@ -21,29 +21,29 @@ See the original [README](https://github.com/jackoalan/lzokay/blob/master/README
 
 ### Usage
 
-See the [compress](https://docs.rs/lzokay/latest/lzokay/compress) 
-or [decompress](https://docs.rs/lzokay/latest/lzokay/decompress) 
+See the [compress](https://docs.rs/lzokay/latest/lzokay/compress)
+or [decompress](https://docs.rs/lzokay/latest/lzokay/decompress)
 documentation for reference.
 
 In `Cargo.toml`:
 
 ```toml
 [dependencies]
-lzokay = "1.0.1"
+lzokay = "2.0.0"
 ```
 
 Or, to only enable certain features:
 
 ```toml
 [dependencies.lzokay]
-version = "1.0.1"
+version = "2.0.0"
 default-features = false
 features = ["decompress", "compress"]
 ```
 
 - `decompress`: Enables decompression functions.
 - `compress`: Enables compression functions.
-- `alloc`: Enables optional compression functions that perform heap allocation.  
+- `alloc`: Enables optional compression functions that perform heap allocation.
    Without `std`, this uses `extern crate alloc`.
 - `std`: Enables use of `std`. Implies `alloc`.
 
diff --git a/build.rs b/build.rs
deleted file mode 100644
index e17ac2b..0000000
--- a/build.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use std::{env, path::PathBuf};
-
-fn main() {
-    println!("cargo:rerun-if-changed=wrapper.hpp");
-    println!("cargo:rerun-if-changed=lzokay/lzokay.cpp");
-    println!("cargo:rerun-if-changed=lzokay/lzokay.hpp");
-    cc::Build::new()
-        .cpp(true)
-        .file("lzokay/lzokay.cpp")
-        .flag_if_supported("-std=c++14") // GCC/Clang
-        .flag_if_supported("/std:c++14") // MSVC
-        .compile("lzokay");
-    #[allow(unused_mut)]
-    let mut bindings = bindgen::Builder::default()
-        .header("wrapper.hpp")
-        .clang_arg("-Ilzokay")
-        .allowlist_function("lzokay::.*")
-        .size_t_is_usize(true)
-        .ctypes_prefix("types")
-        .derive_debug(false)
-        .clang_arg("-std=c++14")
-        .parse_callbacks(Box::new(bindgen::CargoCallbacks::new()));
-    #[cfg(not(feature = "std"))]
-    {
-        bindings = bindings.layout_tests(false);
-    }
-    if matches!(env::var("CARGO_CFG_TARGET_OS"), Result::Ok(v) if v == "android") {
-        if let Result::Ok(cc) = env::var("TARGET_CXX") {
-            let mut sysroot = PathBuf::from(cc).with_file_name("../sysroot");
-            sysroot = sysroot.canonicalize().unwrap_or_else(|err| {
-                panic!("Failed to locate {}: {}", sysroot.to_string_lossy(), err)
-            });
-            bindings = bindings.clang_arg(format!("--sysroot={}", sysroot.to_string_lossy()));
-        }
-    }
-    let result = bindings.generate().expect("Unable to generate bindings");
-    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
-    result.write_to_file(out_path.join("bindings.rs")).expect("Couldn't write bindings!");
-}
diff --git a/lzokay b/lzokay
deleted file mode 160000
index db2df1f..0000000
--- a/lzokay
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit db2df1fcbebc2ed06c10f727f72567d40f06a2be
diff --git a/src/compress.rs b/src/compress.rs
index cc32677..1e85a41 100644
--- a/src/compress.rs
+++ b/src/compress.rs
@@ -2,7 +2,7 @@
 //!
 //! Available with feature `compress`.
 //!
-//! [`compress`] and [`compress_with_dict`] available with features `std` and/or `alloc`.
+//! [`compress`] and [`compress_with_dict`] are available when the `alloc` feature is enabled.
 //!
 //! # Examples
 //!
@@ -42,7 +42,7 @@
 //! // Allocate dst on stack, with worst-case compression size
 //! let mut dst = [0u8; compress_worst_size(input.len())];
 //! // Allocate dictionary storage on stack
-//! let mut storage = [0u8; dict_storage_size()];
+//! let mut storage = DictStorage::new();
 //! // Create dictionary from storage
 //! let mut dict = dict_from_storage(&mut storage);
 //! let size = compress_no_alloc(&input, &mut dst, &mut dict)?;
@@ -50,120 +50,638 @@
 //! # Ok::<(), lzokay::Error>(())
 //! ```
 
-#[cfg(all(not(feature = "std"), feature = "alloc"))]
+#[cfg(all(feature = "alloc", not(feature = "std")))]
 extern crate alloc;
 
-#[cfg(all(not(feature = "std"), feature = "alloc"))]
-use alloc::{boxed::Box, vec::Vec};
+#[cfg(all(feature = "alloc", not(feature = "std")))]
+use alloc::{boxed::Box, vec, vec::Vec};
+use core::{cmp, mem::size_of};
+#[cfg(all(feature = "alloc", feature = "std"))]
+use std::{boxed::Box, vec, vec::Vec};
+
+use crate::Error;
+
 #[cfg(feature = "alloc")]
-use core::ptr::null_mut;
-use core::{marker::PhantomData, mem::size_of};
-
-use crate::{bindings, lzokay_result, Error};
-
-type DictStorage = bindings::lzokay_DictBase_storage_type;
-
-/// Dictionary type
-pub struct Dict<'a> {
-    base: bindings::lzokay_DictBase,
-    #[cfg(feature = "alloc")]
-    storage: Option<Box<[u8; dict_storage_size()]>>,
-    phantom: PhantomData<&'a DictStorage>,
+/// Compress `src` into a freshly allocated `Vec<u8>` using a temporary dictionary.
+pub fn compress(src: &[u8]) -> Result<Vec<u8>, Error> {
+    let mut dict = new_dict();
+    compress_with_dict(src, &mut dict)
 }
 
-/// Creates a new heap-allocated dictionary.
 #[cfg(feature = "alloc")]
-pub fn new_dict() -> Dict<'static> {
-    let mut dict = Dict {
-        base: bindings::lzokay_DictBase { _storage: null_mut() },
-        storage: Option::Some(Box::new([0u8; dict_storage_size()])),
-        phantom: PhantomData,
-    };
-    dict.base._storage = dict.storage.as_mut().unwrap().as_mut_ptr() as *mut DictStorage;
-    dict
+/// Compress `src`, reusing the provided dictionary across calls.
+pub fn compress_with_dict(src: &[u8], dict: &mut Dict) -> Result<Vec<u8>, Error> {
+    let capacity = compress_worst_size(src.len());
+    let mut buf = vec![0u8; capacity];
+    let size = compress_no_alloc(src, &mut buf, dict)?;
+    buf.truncate(size);
+    Ok(buf)
 }
 
-/// Dictionary storage size, for manual or stack allocation.
-pub const fn dict_storage_size() -> usize { size_of::<DictStorage>() }
-
-/// Creates a dictionary from the supplied storage.
-///
-/// Storage **must** be at least [`dict_storage_size()`] bytes,
-/// otherwise this function will panic.
-pub fn dict_from_storage(storage: &mut [u8]) -> Dict<'_> {
-    if storage.len() < dict_storage_size() {
-        panic!(
-            "Dictionary storage is not large enough: {}, expected {}",
-            storage.len(),
-            dict_storage_size()
-        );
-    }
-    Dict {
-        base: bindings::lzokay_DictBase { _storage: storage.as_mut_ptr() as *mut DictStorage },
-        #[cfg(feature = "alloc")]
-        storage: Option::None,
-        phantom: PhantomData,
-    }
-}
-
-/// Worst-case compression size.
+/// Worst-case compressed size according to the LZO format guarantees.
 pub const fn compress_worst_size(s: usize) -> usize { s + s / 16 + 64 + 3 }
 
-/// Compress the supplied buffer into a heap-allocated vector.
-///
-/// Creates a new dictionary for each invocation.
-#[cfg(feature = "alloc")]
-pub fn compress(src: &[u8]) -> Result<Vec<u8>, Error> { compress_with_dict(src, &mut new_dict()) }
-
-/// Compress the supplied buffer into a heap-allocated vector,
-/// with the supplied pre-allocated dictionary.
-#[cfg(feature = "alloc")]
-pub fn compress_with_dict(src: &[u8], dict: &mut Dict) -> Result<Vec<u8>, Error> {
-    let mut out_size = 0usize;
-    let capacity = compress_worst_size(src.len());
-    let mut dst = Vec::with_capacity(capacity);
-    let result = unsafe {
-        let result = bindings::lzokay_compress(
-            src.as_ptr(),
-            src.len(),
-            dst.as_mut_ptr(),
-            capacity,
-            &mut out_size,
-            &mut dict.base,
-        );
-        if result == bindings::lzokay_EResult_Success {
-            dst.set_len(out_size as usize);
-        }
-        result
-    };
-    lzokay_result(dst, result)
+/// Compress without heap allocations, writing the output into `dst`.
+pub fn compress_no_alloc(src: &[u8], dst: &mut [u8], dict: &mut Dict) -> Result<usize, Error> {
+    let storage = dict.storage_mut();
+    compress_impl(src, dst, storage)
 }
 
-/// Compress the supplied buffer.
-///
-/// For sizing `dst`, use [`compress_worst_size`].
-pub fn compress_no_alloc(src: &[u8], dst: &mut [u8], dict: &mut Dict) -> Result<usize, Error> {
-    let mut out_size = 0usize;
-    let result = unsafe {
-        bindings::lzokay_compress(
-            src.as_ptr(),
-            src.len(),
-            dst.as_mut_ptr(),
-            dst.len(),
-            &mut out_size,
-            &mut dict.base,
-        )
-    };
-    lzokay_result(out_size as usize, result)
+const HASH_SIZE: usize = 0x4000;
+const MAX_DIST: usize = 0xBFFF;
+const MAX_MATCH_LEN: usize = 0x800;
+const BUF_SIZE: usize = MAX_DIST + MAX_MATCH_LEN;
+const MAX_MATCH_TABLE: usize = 34;
+const BUF_GUARD: usize = BUF_SIZE + MAX_MATCH_LEN;
+
+const M1_MAX_OFFSET: u32 = 0x0400;
+const M2_MAX_OFFSET: u32 = 0x0800;
+const M3_MAX_OFFSET: u32 = 0x4000;
+const M4_BASE_OFFSET: u32 = 0x4000;
+
+const M2_MIN_LEN: u32 = 3;
+const M2_MAX_LEN: u32 = 8;
+const M3_MAX_LEN: u32 = 33;
+const M4_MAX_LEN: u32 = 9;
+
+const M3_MARKER: u8 = 0x20;
+const M4_MARKER: u8 = 0x10;
+
+/// Hash chains tracking recent 3-byte sequences, keeping per-key chains and
+/// remembering the best match length at each node.
+#[derive(Clone)]
+struct Match3 {
+    head: [u16; HASH_SIZE],
+    chain_sz: [u16; HASH_SIZE],
+    chain: [u16; BUF_SIZE],
+    best_len: [u16; BUF_SIZE],
+}
+
+impl Match3 {
+    const fn new() -> Self {
+        Self {
+            head: [0; HASH_SIZE],
+            chain_sz: [0; HASH_SIZE],
+            chain: [0; BUF_SIZE],
+            best_len: [0; BUF_SIZE],
+        }
+    }
+
+    #[inline]
+    fn make_key(bytes: &[u8]) -> usize {
+        let a = bytes[0] as u32;
+        let b = bytes[1] as u32;
+        let c = bytes[2] as u32;
+        let mix = (((a << 5) ^ b).wrapping_shl(5)) ^ c;
+        let prod = 0x9f5f_u32.wrapping_mul(mix);
+        ((prod >> 5) & 0x3fff) as usize
+    }
+
+    #[inline]
+    fn get_head(&self, key: usize) -> u16 {
+        if self.chain_sz[key] == 0 {
+            u16::MAX
+        } else {
+            self.head[key]
+        }
+    }
+
+    fn init(&mut self) { self.chain_sz.fill(0); }
+
+    fn remove(&mut self, pos: usize, buffer: &[u8; BUF_GUARD]) {
+        let key = Self::make_key(&buffer[pos..]);
+        self.chain_sz[key] = self.chain_sz[key].saturating_sub(1);
+    }
+
+    /// Insert the current position into the hash chains and return the head
+    /// position alongside the bounded chain length to inspect.
+    fn advance(&mut self, state: &State, buffer: &[u8; BUF_GUARD]) -> (u16, u32) {
+        let key = Self::make_key(&buffer[state.wind_b as usize..]);
+        let head = self.get_head(key);
+        self.chain[state.wind_b as usize] = head;
+        let mut count = self.chain_sz[key] as u32;
+        self.chain_sz[key] = self.chain_sz[key].wrapping_add(1);
+        if count > MAX_MATCH_LEN as u32 {
+            count = MAX_MATCH_LEN as u32;
+        }
+        self.head[key] = state.wind_b as u16;
+        (head, count)
+    }
+
+    /// Fast path for known matches: advance the hash chains without searching.
+    fn skip_advance(&mut self, state: &State, buffer: &[u8; BUF_GUARD]) {
+        let key = Self::make_key(&buffer[state.wind_b as usize..]);
+        self.chain[state.wind_b as usize] = self.get_head(key);
+        self.head[key] = state.wind_b as u16;
+        self.best_len[state.wind_b as usize] = (MAX_MATCH_LEN + 1) as u16;
+        self.chain_sz[key] = self.chain_sz[key].wrapping_add(1);
+    }
+}
+
+/// Direct lookup table for 2-byte prefixes used to seed matches quickly.
+#[derive(Clone)]
+struct Match2 {
+    head: [u16; 1 << 16],
+}
+
+impl Match2 {
+    const fn new() -> Self { Self { head: [u16::MAX; 1 << 16] } }
+
+    #[inline]
+    fn make_key(bytes: &[u8]) -> usize { (bytes[0] as usize) ^ ((bytes[1] as usize) << 8) }
+
+    fn init(&mut self) { self.head.fill(u16::MAX); }
+
+    fn add(&mut self, pos: u16, buffer: &[u8; BUF_GUARD]) {
+        let key = Self::make_key(&buffer[pos as usize..]);
+        self.head[key] = pos;
+    }
+
+    fn remove(&mut self, pos: usize, buffer: &[u8; BUF_GUARD]) {
+        let key = Self::make_key(&buffer[pos..]);
+        if self.head[key] as usize == pos {
+            self.head[key] = u16::MAX;
+        }
+    }
+
+    /// Try to find a 2-byte prefix match at the current window position.
+    fn search(
+        &self,
+        state: &State,
+        lb_pos: &mut u32,
+        lb_len: &mut u32,
+        best_pos: &mut [u32; MAX_MATCH_TABLE],
+        buffer: &[u8; BUF_GUARD],
+    ) -> bool {
+        let key = Self::make_key(&buffer[state.wind_b as usize..]);
+        let pos = self.head[key];
+        if pos == u16::MAX {
+            return false;
+        }
+        if best_pos[2] == 0 {
+            best_pos[2] = pos as u32 + 1;
+        }
+        if *lb_len < 2 {
+            *lb_len = 2;
+            *lb_pos = pos as u32;
+        }
+        true
+    }
+}
+
+/// Concrete storage backing a dictionary instance. Buffers and match tables are
+/// stored side by side so the encoder can share logic across heap and stack
+/// configurations.
+#[derive(Clone)]
+pub struct DictStorage {
+    match3: Match3,
+    match2: Match2,
+    buffer: [u8; BUF_GUARD],
+}
+
+impl DictStorage {
+    pub const fn new() -> Self {
+        Self { match3: Match3::new(), match2: Match2::new(), buffer: [0; BUF_GUARD] }
+    }
+
+    /// Initialize dictionary tables and preload the first window from `state.src`.
+    fn init(&mut self, state: &mut State<'_>) {
+        self.match3.init();
+        self.match2.init();
+
+        state.cycle1_countdown = MAX_DIST as u32;
+        state.inp = 0;
+        state.wind_sz = cmp::min(state.src.len(), MAX_MATCH_LEN) as u32;
+        state.wind_b = 0;
+        state.wind_e = state.wind_sz;
+        if state.wind_sz > 0 {
+            let len = state.wind_sz as usize;
+            self.buffer[..len].copy_from_slice(&state.src[..len]);
+        }
+        state.inp += state.wind_sz as usize;
+
+        if state.wind_sz < 3 {
+            let start = state.wind_b as usize + state.wind_sz as usize;
+            let end = start + (3 - state.wind_sz as usize);
+            self.buffer[start..end].fill(0);
+        }
+    }
+
+    /// Remove stale entries before the sliding window overwrites them.
+    fn reset_next_input_entry(&mut self, state: &mut State) {
+        if state.cycle1_countdown == 0 {
+            let pos = state.wind_e as usize;
+            self.match3.remove(pos, &self.buffer);
+            self.match2.remove(pos, &self.buffer);
+        } else {
+            state.cycle1_countdown -= 1;
+        }
+    }
+
+    /// Advance the dictionary by one position, returning the best match offset
+    /// and length. When `skip` is true the already-emitted match bytes are
+    /// fast-forwarded first so the dictionary stays aligned with the encoded
+    /// output.
+    fn advance(
+        &mut self,
+        state: &mut State,
+        prev_len: u32,
+        best_off: &mut [u32; MAX_MATCH_TABLE],
+        skip: bool,
+    ) -> (u32, u32) {
+        if skip {
+            // Skip phase: advance through already-encoded match bytes while
+            // keeping the dictionary in sync with the emitted output.
+            for _ in 0..prev_len.saturating_sub(1) {
+                self.reset_next_input_entry(state);
+                self.match3.skip_advance(state, &self.buffer);
+                self.match2.add(state.wind_b as u16, &self.buffer);
+                state.get_byte(&mut self.buffer);
+            }
+        }
+
+        let mut lb_len = 1u32;
+        let mut lb_off = 0u32;
+        let mut lb_pos = 0u32;
+        let mut best_pos = [0u32; MAX_MATCH_TABLE];
+
+        let (match_head, mut match_count) = self.match3.advance(state, &self.buffer);
+        if match_head == u16::MAX {
+            match_count = 0;
+        }
+
+        let mut should_terminate = false;
+        let best_len = lb_len;
+
+        if lb_len >= state.wind_sz {
+            // Window exhausted: no further matches possible once we reach EOF.
+            if state.wind_sz == 0 {
+                should_terminate = true;
+            }
+            self.match3.best_len[state.wind_b as usize] = (MAX_MATCH_LEN + 1) as u16;
+        } else {
+            if self.match2.search(state, &mut lb_pos, &mut lb_len, &mut best_pos, &self.buffer)
+                && state.wind_sz >= 3
+            {
+                let mut match_pos = match_head as usize;
+                for _ in 0..match_count {
+                    if match_pos >= BUF_SIZE {
+                        break;
+                    }
+                    let ref_pos = state.wind_b as usize;
+                    let window = state.wind_sz as usize;
+                    let mut matched = 0usize;
+                    while matched < window
+                        && self.buffer[ref_pos + matched] == self.buffer[match_pos + matched]
+                    {
+                        matched += 1;
+                    }
+                    if matched >= 2 {
+                        if matched < MAX_MATCH_TABLE && best_pos[matched] == 0 {
+                            // Remember first occurrence for potential match length tweaks.
+                            best_pos[matched] = match_pos as u32 + 1;
+                        }
+                        let matched_u32 = matched as u32;
+                        if matched_u32 > lb_len {
+                            lb_len = matched_u32;
+                            lb_pos = match_pos as u32;
+                            if lb_len == state.wind_sz
+                                || lb_len > self.match3.best_len[match_pos] as u32
+                            {
+                                break;
+                            }
+                        }
+                    }
+                    match_pos = self.match3.chain[match_pos] as usize;
+                }
+            }
+            if lb_len > best_len {
+                lb_off = state.pos2off(lb_pos);
+            }
+            self.match3.best_len[state.wind_b as usize] = lb_len as u16;
+            for i in 2..MAX_MATCH_TABLE {
+                best_off[i] = if best_pos[i] != 0 { state.pos2off(best_pos[i] - 1) } else { 0 };
+            }
+        }
+
+        self.reset_next_input_entry(state);
+        self.match2.add(state.wind_b as u16, &self.buffer);
+        state.get_byte(&mut self.buffer);
+
+        if should_terminate {
+            state.buf_sz = 0;
+            lb_len = 0;
+        } else {
+            // Buffer size counts the current byte plus the lookahead window.
+            state.buf_sz = state.wind_sz + 1;
+        }
+        state.bufp = state.inp - state.buf_sz as usize;
+
+        (lb_off, lb_len)
+    }
+}
+
+/// Sliding window state tracked while searching for matches.
+struct State<'a> {
+    src: &'a [u8],
+    inp: usize,
+    wind_sz: u32,
+    wind_b: u32,
+    wind_e: u32,
+    cycle1_countdown: u32,
+    bufp: usize,
+    buf_sz: u32,
+}
+
+impl<'a> State<'a> {
+    /// Create a new window over `src`.
+    fn new(src: &'a [u8]) -> Self {
+        Self {
+            src,
+            inp: 0,
+            wind_sz: 0,
+            wind_b: 0,
+            wind_e: 0,
+            cycle1_countdown: 0,
+            bufp: 0,
+            buf_sz: 0,
+        }
+    }
+
+    /// Advance the window by one byte, copying from `src` and maintaining the
+    /// duplicated tail used for wrap-around reads.
+    fn get_byte(&mut self, buffer: &mut [u8; BUF_GUARD]) {
+        if self.inp >= self.src.len() {
+            if self.wind_sz > 0 {
+                self.wind_sz -= 1;
+            }
+            let idx = self.wind_e as usize;
+            buffer[idx] = 0;
+            if idx < MAX_MATCH_LEN {
+                buffer[BUF_SIZE + idx] = 0;
+            }
+        } else {
+            let value = self.src[self.inp];
+            let idx = self.wind_e as usize;
+            buffer[idx] = value;
+            if idx < MAX_MATCH_LEN {
+                buffer[BUF_SIZE + idx] = value;
+            }
+            self.inp += 1;
+        }
+        self.wind_e = (self.wind_e + 1) % BUF_SIZE as u32;
+        self.wind_b = (self.wind_b + 1) % BUF_SIZE as u32;
+    }
+
+    /// Convert a buffer index into a backwards distance within the window.
+    #[inline]
+    fn pos2off(&self, pos: u32) -> u32 {
+        if self.wind_b > pos {
+            self.wind_b - pos
+        } else {
+            BUF_SIZE as u32 - (pos - self.wind_b)
+        }
+    }
+}
+
+/// Internal representation for dictionaries, either borrowed or owned.
+enum DictInner<'a> {
+    Borrowed(&'a mut DictStorage),
+    #[cfg(feature = "alloc")]
+    Owned(Box<DictStorage>),
+}
+
+/// Compression dictionary used to retain the sliding window between calls.
+pub struct Dict<'a> {
+    inner: DictInner<'a>,
+}
+
+impl<'a> Dict<'a> {
+    /// Return the mutable storage backing this dictionary, regardless of
+    /// whether it is owned or borrowed.
+    fn storage_mut(&mut self) -> &mut DictStorage {
+        match &mut self.inner {
+            DictInner::Borrowed(storage) => storage,
+            #[cfg(feature = "alloc")]
+            DictInner::Owned(storage) => storage.as_mut(),
+        }
+    }
+}
+
+#[cfg(feature = "alloc")]
+/// Create a heap-allocated dictionary with the canonical storage layout.
+pub fn new_dict() -> Dict<'static> {
+    Dict { inner: DictInner::Owned(Box::new(DictStorage::new())) }
+}
+
+/// Total number of bytes required to back a dictionary.
+pub const fn dict_storage_size() -> usize { size_of::<DictStorage>() }
+
+/// Wrap user-provided storage (e.g. stack-allocated) inside a dictionary.
+pub fn dict_from_storage(storage: &mut DictStorage) -> Dict<'_> {
+    Dict { inner: DictInner::Borrowed(storage) }
+}
+
+/// Emit the repeated zero-byte encoding used for long literal/match lengths.
+fn write_zero_byte_length(
+    dst: &mut [u8],
+    out_pos: &mut usize,
+    mut len: usize,
+) -> Result<(), Error> {
+    while len > 255 {
+        write_dst(dst, out_pos, &[0])?;
+        len -= 255;
+    }
+    write_dst(dst, out_pos, &[len as u8])?;
+    Ok(())
+}
+
+/// Emit a literal run following the LZO opcode rules.
+fn encode_literal_run(
+    dst: &mut [u8],
+    out_pos: &mut usize,
+    src: &[u8],
+    lit_ptr: usize,
+    lit_len: usize,
+) -> Result<(), Error> {
+    if *out_pos == 0 && lit_len <= 238 {
+        write_dst(dst, out_pos, &[17 + lit_len as u8])?;
+    } else if lit_len <= 3 {
+        let idx = out_pos.checked_sub(2).ok_or(Error::OutputOverrun)?;
+        *dst_byte_mut(dst, idx)? |= lit_len as u8;
+    } else if lit_len <= 18 {
+        write_dst(dst, out_pos, &[(lit_len - 3) as u8])?;
+    } else {
+        write_dst(dst, out_pos, &[0])?;
+        write_zero_byte_length(dst, out_pos, lit_len - 18)?;
+    }
+    let src_chunk = src.get(lit_ptr..lit_ptr + lit_len).ok_or(Error::InputOverrun)?;
+    write_dst(dst, out_pos, src_chunk)?;
+    Ok(())
+}
+
+/// Emit a back-reference according to the LZOKAY/LZO opcode encoding.
+fn encode_lookback_match(
+    dst: &mut [u8],
+    out_pos: &mut usize,
+    lb_len: u32,
+    mut lb_off: u32,
+    last_lit_len: u32,
+) -> Result<(), Error> {
+    if lb_len == 2 {
+        lb_off -= 1;
+        write_dst(dst, out_pos, &[((lb_off & 0x3) << 2) as u8, (lb_off >> 2) as u8])?;
+    } else if lb_len <= M2_MAX_LEN && lb_off <= M2_MAX_OFFSET {
+        lb_off -= 1;
+        write_dst(dst, out_pos, &[
+            (((lb_len - 1) << 5) | ((lb_off & 0x7) << 2)) as u8,
+            (lb_off >> 3) as u8,
+        ])?;
+    } else if lb_len == M2_MIN_LEN && lb_off <= M1_MAX_OFFSET + M2_MAX_OFFSET && last_lit_len >= 4 {
+        lb_off -= 1 + M2_MAX_OFFSET;
+        write_dst(dst, out_pos, &[((lb_off & 0x3) << 2) as u8, (lb_off >> 2) as u8])?;
+    } else if lb_off <= M3_MAX_OFFSET {
+        lb_off -= 1;
+        if lb_len <= M3_MAX_LEN {
+            write_dst(dst, out_pos, &[M3_MARKER | (lb_len as u8 - 2)])?;
+        } else {
+            let extra = (lb_len - M3_MAX_LEN) as usize;
+            write_dst(dst, out_pos, &[M3_MARKER])?;
+            write_zero_byte_length(dst, out_pos, extra)?;
+        }
+        write_dst(dst, out_pos, &[(lb_off << 2) as u8, (lb_off >> 6) as u8])?;
+    } else {
+        lb_off -= M4_BASE_OFFSET;
+        if lb_len <= M4_MAX_LEN {
+            write_dst(dst, out_pos, &[M4_MARKER
+                | (((lb_off & 0x4000) >> 11) as u8)
+                | (lb_len as u8 - 2)])?;
+        } else {
+            let extra = (lb_len - M4_MAX_LEN) as usize;
+            write_dst(dst, out_pos, &[M4_MARKER | (((lb_off & 0x4000) >> 11) as u8)])?;
+            write_zero_byte_length(dst, out_pos, extra)?;
+        }
+        write_dst(dst, out_pos, &[(lb_off << 2) as u8, (lb_off >> 6) as u8])?;
+    }
+    Ok(())
+}
+
+/// Apply the heuristics that prefer cheaper opcodes when a shorter match can be
+/// emitted at a closer distance.
+fn find_better_match(best_off: &[u32; MAX_MATCH_TABLE], lb_len: &mut u32, lb_off: &mut u32) {
+    let len = *lb_len;
+    let off = *lb_off;
+    if len <= M2_MIN_LEN || off <= M2_MAX_OFFSET {
+        return;
+    }
+    // Prefer re-encoding long matches as cheaper opcodes whenever the distance
+    // permits switching to a shorter back-reference class.
+    if off > M2_MAX_OFFSET
+        && len >= M2_MIN_LEN + 1
+        && len <= M2_MAX_LEN + 1
+        && best_off[len as usize - 1] != 0
+        && best_off[len as usize - 1] <= M2_MAX_OFFSET
+    {
+        *lb_len = len - 1;
+        *lb_off = best_off[len as usize - 1];
+    } else if off > M3_MAX_OFFSET
+        && len >= M4_MAX_LEN + 1
+        && len <= M2_MAX_LEN + 2
+        && best_off[len as usize - 2] != 0
+        && best_off[len as usize] <= M2_MAX_OFFSET
+    {
+        *lb_len = len - 2;
+        *lb_off = best_off[len as usize - 2];
+    } else if off > M3_MAX_OFFSET
+        && len >= M4_MAX_LEN + 1
+        && len <= M3_MAX_LEN + 1
+        && best_off[len as usize - 1] != 0
+        && best_off[len as usize - 2] <= M3_MAX_OFFSET
+    {
+        *lb_len = len - 1;
+        *lb_off = best_off[len as usize - 1];
+    }
+}
+
+/// Core compression routine shared by the heap-allocating and stack variants.
+/// Maintains the window management and opcode selection heuristics required by
+/// the LZO format while using safe Rust semantics.
+fn compress_impl(src: &[u8], dst: &mut [u8], storage: &mut DictStorage) -> Result<usize, Error> {
+    let mut state = State::new(src);
+    storage.init(&mut state);
+
+    let mut out_pos = 0usize;
+    let mut lit_len = 0u32;
+    let mut best_off = [0u32; MAX_MATCH_TABLE];
+
+    let mut lit_ptr = state.inp;
+    let (mut lb_off, mut lb_len) = storage.advance(&mut state, 0, &mut best_off, false);
+
+    while state.buf_sz > 0 {
+        if lit_len == 0 {
+            // Capture the starting point for the next literal run.
+            lit_ptr = state.bufp;
+        }
+
+        if lb_len < 2
+            || (lb_len == 2 && (lb_off > M1_MAX_OFFSET || lit_len == 0 || lit_len >= 4))
+            || (lb_len == 2 && out_pos == 0)
+            || (out_pos == 0 && lit_len == 0)
+        {
+            lb_len = 0;
+        } else if lb_len == M2_MIN_LEN && lb_off > M1_MAX_OFFSET + M2_MAX_OFFSET && lit_len >= 4 {
+            lb_len = 0;
+        }
+
+        if lb_len == 0 {
+            lit_len += 1;
+            // No match chosen: step forward by one literal byte.
+            let (next_off, next_len) = storage.advance(&mut state, 0, &mut best_off, false);
+            lb_off = next_off;
+            lb_len = next_len;
+            continue;
+        }
+
+        find_better_match(&best_off, &mut lb_len, &mut lb_off);
+        encode_literal_run(dst, &mut out_pos, src, lit_ptr, lit_len as usize)?;
+        encode_lookback_match(dst, &mut out_pos, lb_len, lb_off, lit_len)?;
+        let prev_len = lb_len;
+        lit_len = 0;
+        // Advance over the matched bytes, updating the search structures.
+        let (next_off, next_len) = storage.advance(&mut state, prev_len, &mut best_off, true);
+        lb_off = next_off;
+        lb_len = next_len;
+    }
+
+    // Flush any trailing literal bytes.
+    encode_literal_run(dst, &mut out_pos, src, lit_ptr, lit_len as usize)?;
+
+    // Emit terminating M4 instruction (distance 0x4000, length 3).
+    write_dst(dst, &mut out_pos, &[M4_MARKER | 1, 0, 0])?;
+
+    Ok(out_pos)
+}
+
+#[inline(always)]
+fn dst_byte_mut<'a>(dst: &'a mut [u8], idx: usize) -> Result<&'a mut u8, Error> {
+    dst.get_mut(idx).ok_or(Error::OutputOverrun)
+}
+
+#[inline(always)]
+fn write_dst(dst: &mut [u8], out_pos: &mut usize, slice: &[u8]) -> Result<(), Error> {
+    let pos = *out_pos;
+    let end = pos.checked_add(slice.len()).ok_or(Error::OutputOverrun)?;
+    let dst_chunk = dst.get_mut(pos..end).ok_or(Error::OutputOverrun)?;
+    dst_chunk.copy_from_slice(slice);
+    *out_pos = end;
+    Ok(())
 }
 
 #[cfg(test)]
 mod tests {
     #[cfg(feature = "alloc")]
-    use crate::compress::{compress, compress_with_dict, new_dict};
-    use crate::compress::{
-        compress_no_alloc, compress_worst_size, dict_from_storage, dict_storage_size,
-    };
+    use super::{compress, compress_with_dict, new_dict};
+    use super::{compress_no_alloc, compress_worst_size, dict_from_storage, DictStorage};
 
     const INPUT_1: &[u8] = include_bytes!("test1.txt");
     const EXPECTED_1: &[u8] = include_bytes!("test1.bin");
@@ -183,7 +701,6 @@ mod tests {
         let mut dict = new_dict();
         let dst = compress_with_dict(INPUT_1, &mut dict).expect("Failed to compress (1)");
         assert_eq!(dst, EXPECTED_1);
-        // Compress a second time to test dictionary reuse
         let dst = compress_with_dict(INPUT_2, &mut dict).expect("Failed to compress (2)");
         assert_eq!(dst, EXPECTED_2);
     }
@@ -191,12 +708,11 @@ mod tests {
     #[test]
     fn test_compress_no_alloc() {
         let mut dst = [0u8; compress_worst_size(INPUT_1.len())];
-        let mut storage = [0u8; dict_storage_size()];
+        let mut storage = DictStorage::new();
         let mut dict = dict_from_storage(&mut storage);
         let out_size =
             compress_no_alloc(INPUT_1, &mut dst, &mut dict).expect("Failed to compress (1)");
         assert_eq!(&dst[0..out_size], EXPECTED_1);
-        // Compress a second time to test dictionary reuse
         let out_size =
             compress_no_alloc(INPUT_2, &mut dst, &mut dict).expect("Failed to compress (2)");
         assert_eq!(&dst[0..out_size], EXPECTED_2);
diff --git a/src/decompress.rs b/src/decompress.rs
index cf1df19..12eda40 100644
--- a/src/decompress.rs
+++ b/src/decompress.rs
@@ -16,23 +16,238 @@
 //! # Ok::<(), lzokay::Error>(())
 //! ```
 
-use crate::{bindings, lzokay_result, Error};
+use crate::Error;
+
+/// Maximum repeat count representable via zero marker bytes when extending
+/// literal or match lengths.
+const MAX255_COUNT: usize = usize::MAX / 255 - 2;
+/// Opcode marker for mid-range matches (labelled "M3" in the LZO reference).
+const M3_MARKER: u8 = 0x20;
+/// Opcode marker for far matches ("M4") and the terminator instruction.
+const M4_MARKER: u8 = 0x10;
 
 /// Decompress `src` into `dst`.
 ///
-/// `dst` must be large enough to hold the entire decompressed output.
+/// `dst` must be large enough to hold the entire decompressed output. The
+/// function follows the documented LZO opcode semantics and state transitions.
 pub fn decompress(src: &[u8], dst: &mut [u8]) -> Result<usize, Error> {
-    let mut out_size = 0usize;
-    let result = unsafe {
-        bindings::lzokay_decompress(
-            src.as_ptr(),
-            src.len(),
-            dst.as_mut_ptr(),
-            dst.len(),
-            &mut out_size,
-        )
-    };
-    lzokay_result(out_size as usize, result)
+    if src.len() < 3 {
+        return Err(Error::InputOverrun);
+    }
+
+    let mut inp = 0usize;
+    let mut outp = 0usize;
+    let mut state = 0usize;
+    let mut nstate: usize;
+    let mut lblen: usize;
+    let mut lbcur: usize;
+
+    let first = input_byte(src, &mut inp)?;
+    // The LZO bitstream reserves the first byte for literal priming. Codes >= 22
+    // copy a literal block immediately; 18..21 seed the literal countdown (`state`).
+    if first >= 22 {
+        let len = (first as usize) - 17;
+        copy_slice(src, &mut inp, dst, &mut outp, len)?;
+        state = 4;
+    } else if first >= 18 {
+        nstate = (first as usize) - 17;
+        state = nstate;
+        copy_slice(src, &mut inp, dst, &mut outp, nstate)?;
+    }
+
+    loop {
+        let inst = input_byte(src, &mut inp)?;
+        if inst & 0xC0 != 0 {
+            // [M2]
+            // 1 L L D D D S S  (128..255)
+            //   Copy 5-8 bytes from block within 2kB distance
+            //   state = S
+            //   length = 5 + L
+            // 0 1 L D D D S S  (64..127)
+            //   Copy 3-4 bytes from block within 2kB distance
+            //   length = 3 + L
+            // Always followed by one byte: distance = (next << 3) + D + 1
+            let next = input_byte(src, &mut inp)?;
+            let distance = ((next as usize) << 3) + (((inst as usize) >> 2) & 0x7) + 1;
+            lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
+            lblen = ((inst as usize) >> 5) + 1;
+            nstate = (inst as usize) & 0x3;
+        } else if inst & M3_MARKER != 0 {
+            // [M3]
+            // 0 0 1 L L L L L  (32..63)
+            //   Copy from <= 16kB distance
+            //   length = 2 + (L ?: 31 + zero-runs + tail)
+            // Followed by LE16: distance = (value >> 2) + 1, state = value & 3
+            lblen = ((inst as usize) & 0x1F) + 2;
+            if lblen == 2 {
+                let offset = consume_zero_byte_length(src, &mut inp)?;
+                let tail = input_byte(src, &mut inp)?;
+                lblen += offset * 255 + 31 + tail as usize;
+            }
+            let raw = read_le16(src, &mut inp)?;
+            let distance = ((raw as usize) >> 2) + 1;
+            lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
+            nstate = (raw as usize) & 0x3;
+        } else if inst & M4_MARKER != 0 {
+            // [M4]
+            // 0 0 0 1 H L L L  (16..31)
+            //   Copy from 16..48kB distance
+            //   length = 2 + (L ?: 7 + zero-runs + tail)
+            // Followed by LE16: distance = 16384 + (H << 14) + value, state = value & 3
+            //   Terminating opcode when distance == 16384.
+            lblen = ((inst as usize) & 0x7) + 2;
+            if lblen == 2 {
+                let offset = consume_zero_byte_length(src, &mut inp)?;
+                let tail = input_byte(src, &mut inp)?;
+                lblen += offset * 255 + 7 + tail as usize;
+            }
+            let raw = read_le16(src, &mut inp)?;
+            let base_dist = ((inst as usize & 0x8) << 11) + ((raw as usize) >> 2);
+            if base_dist == 0 {
+                // Stream finished
+                break;
+            }
+            let distance = base_dist + 16384;
+            lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
+            nstate = (raw as usize) & 0x3;
+        } else {
+            if state == 0 {
+                // [Literal]
+                // 0 0 0 0 L L L L  (0..15)
+                //   Copy long literal string: length = 3 + extended length bytes.
+                let mut len = inst as usize + 3;
+                if len == 3 {
+                    let offset = consume_zero_byte_length(src, &mut inp)?;
+                    let tail = input_byte(src, &mut inp)?;
+                    len += offset * 255 + 15 + tail as usize;
+                }
+                copy_slice(src, &mut inp, dst, &mut outp, len)?;
+                state = 4;
+                continue;
+            } else if state != 4 {
+                // [M1, short]
+                // state = 1..3
+                // 0 0 0 0 D D S S  (0..15)
+                //   Copy 2 bytes within 1kB distance, state = S afterwards.
+                let tail = input_byte(src, &mut inp)?;
+                let distance = ((inst as usize) >> 2) + ((tail as usize) << 2) + 1;
+                lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
+                lblen = 2;
+                nstate = (inst as usize) & 0x3;
+            } else {
+                // [M1, long]
+                // state == 4
+                // 0 0 0 0 D D S S  (0..15)
+                //   Copy 3 bytes within 2..3kB distance, state = S afterwards.
+                let tail = input_byte(src, &mut inp)?;
+                let distance = ((inst as usize) >> 2) + ((tail as usize) << 2) + 2049;
+                lbcur = outp.checked_sub(distance).ok_or(Error::LookbehindOverrun)?;
+                lblen = 3;
+                nstate = (inst as usize) & 0x3;
+            }
+        }
+
+        // Copy the lookback run (source and destination may overlap).
+        if lblen > 0 {
+            let out_end = outp.checked_add(lblen).ok_or(Error::OutputOverrun)?;
+            let lb_end = lbcur.checked_add(lblen).ok_or(Error::OutputOverrun)?;
+            if out_end > dst.len() || lb_end > dst.len() {
+                return Err(Error::OutputOverrun);
+            }
+            for i in 0..lblen {
+                dst[outp + i] = dst[lbcur + i];
+            }
+            outp = out_end;
+        }
+
+        // Copy the following literal run dictated by `nstate`.
+        copy_slice(src, &mut inp, dst, &mut outp, nstate)?;
+
+        state = nstate;
+    }
+
+    // The stream must end with the terminating M4 instruction (length == 3).
+    if lblen != 3 {
+        return Err(Error::Error);
+    }
+
+    if inp == src.len() {
+        Ok(outp)
+    } else if inp < src.len() {
+        Err(Error::InputNotConsumed)
+    } else {
+        Err(Error::InputOverrun)
+    }
+}
+
+/// Read a single byte from `src`.
+#[inline(always)]
+fn input_byte(src: &[u8], idx: &mut usize) -> Result<u8, Error> {
+    let n = src.get(*idx).copied().ok_or(Error::InputOverrun)?;
+    *idx += 1;
+    Ok(n)
+}
+
+/// Read a slice of length `len` starting at `start` from `src`.
+#[inline(always)]
+fn input_slice<'a>(src: &'a [u8], start: &mut usize, len: usize) -> Result<&'a [u8], Error> {
+    let end = start.checked_add(len).ok_or(Error::InputOverrun)?;
+    let slice = src.get(*start..end).ok_or(Error::InputOverrun)?;
+    *start = end;
+    Ok(slice)
+}
+
+/// Read a little-endian `u16` starting at `pos`.
+#[inline(always)]
+fn read_le16(bytes: &[u8], pos: &mut usize) -> Result<u16, Error> {
+    let slice = input_slice(bytes, pos, 2)?;
+    Ok(u16::from_le_bytes(slice.try_into().unwrap()))
+}
+
+/// Get a mutable slice of length `len` starting at `start` from `dst`.
+#[inline(always)]
+fn output_slice<'a>(
+    dst: &'a mut [u8],
+    start: &mut usize,
+    len: usize,
+) -> Result<&'a mut [u8], Error> {
+    let end = start.checked_add(len).ok_or(Error::OutputOverrun)?;
+    let slice = dst.get_mut(*start..end).ok_or(Error::OutputOverrun)?;
+    *start = end;
+    Ok(slice)
+}
+
+/// Copy a slice from `src` to `dst`.
+#[inline(always)]
+fn copy_slice(
+    src: &[u8],
+    src_start: &mut usize,
+    dst: &mut [u8],
+    dst_start: &mut usize,
+    len: usize,
+) -> Result<(), Error> {
+    if len == 0 {
+        return Ok(());
+    }
+    let src_slice = input_slice(src, src_start, len)?;
+    let dst_slice = output_slice(dst, dst_start, len)?;
+    dst_slice.copy_from_slice(src_slice);
+    Ok(())
+}
+
+/// Consume a run of zero marker bytes used for long length encodings.
+#[inline(always)]
+fn consume_zero_byte_length(src: &[u8], inp: &mut usize) -> Result<usize, Error> {
+    let start = *inp;
+    while src.get(*inp).copied() == Some(0) {
+        *inp += 1;
+    }
+    let offset = *inp - start;
+    if offset > MAX255_COUNT {
+        Err(Error::Error)
+    } else {
+        Ok(offset)
+    }
 }
 
 #[cfg(test)]
diff --git a/src/lib.rs b/src/lib.rs
index b28849d..ad00300 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,7 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 //! # LZ👌-rs
 //!
-//! Rust wrapper for [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed
+//! Pure-Rust port of [LZ👌](https://github.com/jackoalan/lzokay), a minimal, MIT-licensed
 //! implementation of the [LZO compression format](http://www.oberhumer.com/opensource/lzo/).
 //!
 //! See the original [README](https://github.com/jackoalan/lzokay/blob/master/README.md) for more information.
@@ -20,14 +20,14 @@
 //!
 //! ```toml
 //! [dependencies]
-//! lzokay = "1.0.1"
+//! lzokay = "2.0.0"
 //! ```
 //!
 //! Or, to only enable certain features:
 //!
 //! ```toml
 //! [dependencies.lzokay]
-//! version = "1.0.1"
+//! version = "2.0.0"
 //! default-features = false
 //! features = ["decompress", "compress"]
 //! ```
@@ -49,36 +49,8 @@ pub mod compress;
 #[cfg(feature = "decompress")]
 pub mod decompress;
 
-mod bindings {
-    #![allow(unknown_lints)]
-    #![allow(non_upper_case_globals)]
-    #![allow(non_camel_case_types)]
-    #![allow(non_snake_case)]
-    #![allow(deref_nullptr)]
-    #![allow(dead_code)]
-    #[cfg(not(feature = "std"))]
-    mod types {
-        pub type c_uchar = u8;
-        pub type c_ushort = u16;
-        pub type c_uint = u32;
-        pub type c_int = i32;
-        pub type c_ulong = usize;
-        pub type c_ulonglong = usize;
-    }
-    #[cfg(feature = "std")]
-    mod types {
-        pub type c_uchar = ::std::os::raw::c_uchar;
-        pub type c_ushort = ::std::os::raw::c_ushort;
-        pub type c_uint = ::std::os::raw::c_uint;
-        pub type c_int = ::std::os::raw::c_int;
-        pub type c_ulong = usize;
-        pub type c_ulonglong = usize;
-    }
-    include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
-}
-
 /// Error result codes
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum Error {
     /// Likely indicates bad compressed LZO input.
     LookbehindOverrun,
@@ -92,20 +64,20 @@ pub enum Error {
     InputNotConsumed,
 }
 
-fn lzokay_result<T>(result: T, error: bindings::lzokay_EResult) -> Result<T, Error> {
-    if error == bindings::lzokay_EResult_Success {
-        Result::Ok(result)
-    } else {
-        Result::Err(match error {
-            bindings::lzokay_EResult_LookbehindOverrun => Error::LookbehindOverrun,
-            bindings::lzokay_EResult_OutputOverrun => Error::OutputOverrun,
-            bindings::lzokay_EResult_InputOverrun => Error::InputOverrun,
-            bindings::lzokay_EResult_InputNotConsumed => Error::InputNotConsumed,
-            _ => Error::Error,
-        })
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        match self {
+            Error::LookbehindOverrun => write!(f, "lookbehind overrun"),
+            Error::OutputOverrun => write!(f, "output overrun"),
+            Error::InputOverrun => write!(f, "input overrun"),
+            Error::Error => write!(f, "unknown error"),
+            Error::InputNotConsumed => write!(f, "input not consumed"),
+        }
     }
 }
 
+impl core::error::Error for Error {}
+
 #[cfg(test)]
 #[cfg(all(feature = "compress", feature = "decompress", feature = "alloc"))]
 mod tests {
@@ -117,13 +89,22 @@ mod tests {
 
     use super::{compress::compress, decompress::decompress};
 
-    const INPUT: &[u8] = include_bytes!("test1.txt");
+    const INPUT1: &[u8] = include_bytes!("test1.txt");
+    const INPUT2: &[u8] = include_bytes!("test2.txt");
 
     #[test]
-    fn test_round_trip() {
-        let compressed = compress(INPUT).expect("Failed to compress");
-        let mut dst = vec![0u8; INPUT.len()];
+    fn test_round_trip1() {
+        let compressed = compress(INPUT1).expect("Failed to compress");
+        let mut dst = vec![0u8; INPUT1.len()];
         decompress(&compressed, &mut dst).expect("Failed to decompress");
-        assert_eq!(INPUT, dst.as_slice());
+        assert_eq!(INPUT1, dst.as_slice());
+    }
+
+    #[test]
+    fn test_round_trip2() {
+        let compressed = compress(INPUT2).expect("Failed to compress");
+        let mut dst = vec![0u8; INPUT2.len()];
+        decompress(&compressed, &mut dst).expect("Failed to decompress");
+        assert_eq!(INPUT2, dst.as_slice());
     }
 }
diff --git a/wrapper.hpp b/wrapper.hpp
deleted file mode 100644
index b09f349..0000000
--- a/wrapper.hpp
+++ /dev/null
@@ -1 +0,0 @@
-#include <lzokay.hpp>