Merge pull request #48 from lioncash/lz

LZBase/LZLookupTable: Minor cleanup
This commit is contained in:
Phillip Stephens 2019-08-16 18:18:14 -07:00 committed by GitHub
commit 1fc1c9eab3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 113 additions and 123 deletions

View File

@ -1,35 +1,29 @@
#pragma once #pragma once
#include <string>
#include "LZ77/LZLookupTable.hpp" #include "LZ77/LZLookupTable.hpp"
class LZBase { class LZBase {
public: public:
explicit LZBase(atInt32 minimumOffset = 1, atInt32 slidingWindow = 4096, atInt32 minimumMatch = 3, explicit LZBase(atInt32 minimumOffset = 1, atInt32 slidingWindow = 4096, atInt32 minimumMatch = 3,
atInt32 blockSize = 8); atInt32 blockSize = 8);
virtual ~LZBase() {} virtual ~LZBase();
virtual atUint32 compress(const atUint8* src, atUint8** dest, atUint32 srcLength) = 0; virtual atUint32 compress(const atUint8* src, atUint8** dest, atUint32 srcLength) = 0;
virtual atUint32 decompress(const atUint8* src, atUint8** dest, atUint32 srcLength) = 0; virtual atUint32 decompress(const atUint8* src, atUint8** dest, atUint32 srcLength) = 0;
void setSlidingWindow(atInt32 SlidingWindow); void setSlidingWindow(atInt32 SlidingWindow);
atInt32 slidingWindow(); atInt32 slidingWindow() const;
void setReadAheadBuffer(atInt32 ReadAheadBuffer); void setReadAheadBuffer(atInt32 ReadAheadBuffer);
atInt32 readAheadBuffer(); atInt32 readAheadBuffer() const;
void setMinMatch(atInt32 minimumMatch); void setMinMatch(atInt32 minimumMatch);
atInt32 minMatch(); atInt32 minMatch() const;
void setBlockSize(atInt32 BlockSize); void setBlockSize(atInt32 BlockSize);
atInt32 blockSize(); atInt32 blockSize() const;
void setMinimumOffset(atUint32 minimumOffset); void setMinimumOffset(atUint32 minimumOffset);
atUint32 minimumOffset(); atUint32 minimumOffset() const;
private:
atInt32 subMatch(const atUint8* str1, const uint8_t* str2, const atInt32 len);
LZLengthOffset windowSearch(atUint8* beginSearchPtr, atUint8* searchPosPtr, atUint8* endLABufferPtr,
atUint8* startLBPtr);
protected: protected:
LZLengthOffset search(atUint8* posPtr, atUint8* dataBegin, atUint8* dataEnd); LZLengthOffset search(const atUint8* posPtr, const atUint8* dataBegin, const atUint8* dataEnd) const;
atInt32 m_slidingWindow; atInt32 m_slidingWindow;
atInt32 m_readAheadBuffer; atInt32 m_readAheadBuffer;

View File

@ -1,15 +1,15 @@
#pragma once #pragma once
#include <map>
#include <deque>
#include <vector>
#include <cstdint> #include <cstdint>
#include <map>
#include <vector>
#include <athena/Types.hpp> #include <athena/Types.hpp>
struct LZLengthOffset { struct LZLengthOffset {
atUint32 length; // The number of bytes compressed atUint32 length; // The number of bytes compressed
atUint16 offset; // How far back in sliding window where bytes that match the lookAheadBuffer is located atUint16 offset; // How far back in sliding window where bytes that match the lookAheadBuffer is located
bool compare_equal(const LZLengthOffset& lo_pair) { return length == lo_pair.length && offset == lo_pair.offset; } bool operator==(const LZLengthOffset& lo_pair) const { return length == lo_pair.length && offset == lo_pair.offset; }
bool operator!=(const LZLengthOffset& lo_pair) const { return !operator==(lo_pair); }
}; };
class LZLookupTable { class LZLookupTable {
@ -17,14 +17,14 @@ public:
LZLookupTable(); LZLookupTable();
LZLookupTable(atInt32 minimumMatch, atInt32 slidingWindow = 4096, atInt32 lookAheadWindow = 18); LZLookupTable(atInt32 minimumMatch, atInt32 slidingWindow = 4096, atInt32 lookAheadWindow = 18);
~LZLookupTable(); ~LZLookupTable();
LZLengthOffset search(atUint8* curPos, const atUint8* dataBegin, const atUint8* dataEnd); LZLengthOffset search(const atUint8* curPos, const atUint8* dataBegin, const atUint8* dataEnd);
void setLookAheadWindow(atInt32 lookAheadWindow); void setLookAheadWindow(atInt32 lookAheadWindow);
private: private:
typedef std::multimap<std::vector<uint8_t>, int32_t> LookupTable; using LookupTable = std::multimap<std::vector<uint8_t>, int32_t>;
LookupTable table; LookupTable table;
atInt32 m_minimumMatch; atInt32 m_minimumMatch = 3;
atInt32 m_slidingWindow; atInt32 m_slidingWindow = 4096;
atInt32 m_lookAheadWindow; atInt32 m_lookAheadWindow = 18;
std::vector<uint8_t> m_buffer; std::vector<uint8_t> m_buffer;
}; };

View File

@ -1,75 +1,10 @@
#include "LZ77/LZLookupTable.hpp" #include "LZ77/LZLookupTable.hpp"
#include "LZ77/LZBase.hpp" #include "LZ77/LZBase.hpp"
LZBase::LZBase(atInt32 minimumOffset, atInt32 slidingWindow, atInt32 minimumMatch, atInt32 blockSize) namespace {
: m_slidingWindow(slidingWindow)
, m_readAheadBuffer(minimumMatch)
, m_minMatch(minimumMatch)
, m_blockSize(blockSize)
, m_minOffset(minimumOffset) {}
void LZBase::setSlidingWindow(atInt32 slidingWindow) { m_slidingWindow = slidingWindow; }
atInt32 LZBase::slidingWindow() { return m_slidingWindow; }
void LZBase::setReadAheadBuffer(atInt32 readAheadBuffer) { m_readAheadBuffer = readAheadBuffer; }
atInt32 LZBase::readAheadBuffer() { return m_readAheadBuffer; }
void LZBase::setMinMatch(atInt32 minimumMatch) { m_minMatch = minimumMatch; }
atInt32 LZBase::minMatch() { return m_minMatch; }
void LZBase::setBlockSize(atInt32 blockSize) { m_blockSize = blockSize; }
atInt32 LZBase::blockSize() { return m_blockSize; }
void LZBase::setMinimumOffset(atUint32 minimumOffset) { m_minOffset = minimumOffset; }
atUint32 LZBase::minimumOffset() { return m_minOffset; }
/*
DerricMc:
This search function is my own work and is no way affilated with any one else
I use the my own window_search function to drastically speed up the search function
Normally a search for one byte is matched, then two, then three, all the way up
to the size of the LookAheadBuffer. So I decided to skip the incremental search
and search for the entire LookAheadBuffer and if I don't find the bytes are equal I return
the next best match(which means if I look for 18 bytes and they are not found 18 bytess did not match,
and 17 bytes did match then 17 bytes match is return).
*/
LZLengthOffset LZBase::search(atUint8* posPtr, atUint8* dataBegin, atUint8* dataEnd) {
LZLengthOffset results = {0, 0};
// Returns negative 1 for Search failures since the current position is passed the size to be compressed
if (posPtr >= dataEnd) {
results.length = -1;
return results;
}
atUint8* searchWindow;
// LookAheadBuffer is ReadAheadBuffer Size if there are more bytes than ReadAheadBufferSize waiting
// to be compressed else the number of remaining bytes is the LookAheadBuffer
int lookAheadBuffer_len = ((int)(dataEnd - posPtr) < m_readAheadBuffer) ? (int)(dataEnd - posPtr) : m_readAheadBuffer;
int slidingBuffer = (int)(posPtr - dataBegin) - m_slidingWindow;
if (slidingBuffer > 0)
searchWindow = dataBegin + slidingBuffer;
else
searchWindow = dataBegin;
atUint8* endPos = posPtr + lookAheadBuffer_len;
if (!((posPtr - dataBegin < 1) || (dataEnd - posPtr < m_minMatch)))
results = windowSearch(searchWindow, posPtr, endPos, posPtr - m_minOffset);
return results;
}
// Returns the full length of string2 if they are equal else // Returns the full length of string2 if they are equal else
// Return the number of characters that were equal before they weren't equal // Return the number of characters that were equal before they weren't equal
int LZBase::subMatch(const uint8_t* str1, const uint8_t* str2, const int len) { int subMatch(const uint8_t* str1, const uint8_t* str2, const int len) {
for (int i = 0; i < len; ++i) for (int i = 0; i < len; ++i)
if (str1[i] != str2[i]) if (str1[i] != str2[i])
return i; return i;
@ -77,24 +12,22 @@ int LZBase::subMatch(const uint8_t* str1, const uint8_t* str2, const int len) {
return len; return len;
} }
/* // Normally a search for one byte is matched, then two, then three, all the way up
Normally a search for one byte is matched, then two, then three, all the way up // to the size of the LookAheadBuffer. So I decided to skip the incremental search
to the size of the LookAheadBuffer. So I decided to skip the incremental search // and search for the entire LookAheadBuffer and if the function doesn't find the bytes are
and search for the entire LookAheadBuffer and if the function doesn't find the bytes are // equal the function return the next best match(which means if the function look for 18 bytes and they are not found,
equal the function return the next best match(which means if the function look for 18 bytes and they are not found, // return the number of bytes that did match before it failed to match. The submatch is function returns the number of
return the number of bytes that did match before it failed to match. The submatch is function returns the number of // bytes that were equal, which can result up to the bytes total length if both byte strings are equal.
bytes that were equal, which can result up to the bytes total length if both byte strings are equal. //
//
// ...[][][][][][][][][][][][]|[][][][][][][][][][][][][][]
...[][][][][][][][][][][][]|[][][][][][][][][][][][][][] // |
| // Search Window Current Pos LookAheadBuffer
Search Window Current Pos LookAheadBuffer // Up to 4096 bytes Up to 18 bytes
Up to 4096 bytes Up to 18 bytes // Sliding Window
Sliding Window // Up to 4114 bytes
Up to 4114 bytes LZLengthOffset windowSearch(const atUint8* beginSearchPtr, const atUint8* searchPosPtr, const atUint8* endLABufferPtr,
*/ const atUint8* startLBPtr) {
LZLengthOffset LZBase::windowSearch(atUint8* beginSearchPtr, atUint8* searchPosPtr, atUint8* endLABufferPtr,
atUint8* startLBPtr) {
atInt32 size = (atUint32)(endLABufferPtr - beginSearchPtr); // Size of the entire sliding window atInt32 size = (atUint32)(endLABufferPtr - beginSearchPtr); // Size of the entire sliding window
atInt32 n = (atUint32)(endLABufferPtr - searchPosPtr); atInt32 n = (atUint32)(endLABufferPtr - searchPosPtr);
LZLengthOffset result = {0, 0}; LZLengthOffset result = {0, 0};
@ -103,10 +36,10 @@ LZLengthOffset LZBase::windowSearch(atUint8* beginSearchPtr, atUint8* searchPosP
if (n > size) // If the string that is being looked for is bigger than the string that is being searched if (n > size) // If the string that is being looked for is bigger than the string that is being searched
return result; return result;
/*This makes sure that search for the searchPosPtr can be searched if an invalid position is given // This makes sure that search for the searchPosPtr can be searched if an invalid position is given
An invalid position occurs if the amount of characters to search in_beginSearchPtr is less than the size // An invalid position occurs if the amount of characters to search in_beginSearchPtr is less than the size
of searchPosPtr. In other words there has to be at least n characters left in the string // of searchPosPtr. In other words there has to be at least n characters left in the string
to have a chance to find n characters*/ // to have a chance to find n characters
do { do {
temp = subMatch(startLBPtr, searchPosPtr, n); temp = subMatch(startLBPtr, searchPosPtr, n);
@ -125,3 +58,74 @@ LZLengthOffset LZBase::windowSearch(atUint8* beginSearchPtr, atUint8* searchPosP
return result; return result;
} }
} // Anonymous namespace
LZBase::LZBase(atInt32 minimumOffset, atInt32 slidingWindow, atInt32 minimumMatch, atInt32 blockSize)
: m_slidingWindow(slidingWindow)
, m_readAheadBuffer(minimumMatch)
, m_minMatch(minimumMatch)
, m_blockSize(blockSize)
, m_minOffset(minimumOffset) {}
LZBase::~LZBase() = default;
void LZBase::setSlidingWindow(atInt32 slidingWindow) { m_slidingWindow = slidingWindow; }
atInt32 LZBase::slidingWindow() const { return m_slidingWindow; }
void LZBase::setReadAheadBuffer(atInt32 readAheadBuffer) { m_readAheadBuffer = readAheadBuffer; }
atInt32 LZBase::readAheadBuffer() const { return m_readAheadBuffer; }
void LZBase::setMinMatch(atInt32 minimumMatch) { m_minMatch = minimumMatch; }
atInt32 LZBase::minMatch() const { return m_minMatch; }
void LZBase::setBlockSize(atInt32 blockSize) { m_blockSize = blockSize; }
atInt32 LZBase::blockSize() const { return m_blockSize; }
void LZBase::setMinimumOffset(atUint32 minimumOffset) { m_minOffset = minimumOffset; }
atUint32 LZBase::minimumOffset() const { return m_minOffset; }
/*
DerricMc:
This search function is my own work and is no way affiliated with any one else
I use the my own window_search function to drastically speed up the search function
Normally a search for one byte is matched, then two, then three, all the way up
to the size of the LookAheadBuffer. So I decided to skip the incremental search
and search for the entire LookAheadBuffer and if I don't find the bytes are equal I return
the next best match(which means if I look for 18 bytes and they are not found 18 bytes did not match,
and 17 bytes did match then 17 bytes match is return).
*/
LZLengthOffset LZBase::search(const atUint8* posPtr, const atUint8* dataBegin, const atUint8* dataEnd) const {
LZLengthOffset results = {0, 0};
// Returns negative 1 for Search failures since the current position is passed the size to be compressed
if (posPtr >= dataEnd) {
results.length = -1;
return results;
}
const atUint8* searchWindow;
// LookAheadBuffer is ReadAheadBuffer Size if there are more bytes than ReadAheadBufferSize waiting
// to be compressed else the number of remaining bytes is the LookAheadBuffer
const int lookAheadBuffer_len =
((int)(dataEnd - posPtr) < m_readAheadBuffer) ? (int)(dataEnd - posPtr) : m_readAheadBuffer;
const int slidingBuffer = (int)(posPtr - dataBegin) - m_slidingWindow;
if (slidingBuffer > 0)
searchWindow = dataBegin + slidingBuffer;
else
searchWindow = dataBegin;
const atUint8* endPos = posPtr + lookAheadBuffer_len;
if (!((posPtr - dataBegin < 1) || (dataEnd - posPtr < m_minMatch)))
results = windowSearch(searchWindow, posPtr, endPos, posPtr - m_minOffset);
return results;
}

View File

@ -1,12 +1,7 @@
#include "LZ77/LZLookupTable.hpp" #include "LZ77/LZLookupTable.hpp"
#include <algorithm> #include <algorithm>
LZLookupTable::LZLookupTable() { LZLookupTable::LZLookupTable() : m_buffer(m_minimumMatch) {}
m_minimumMatch = 3;
m_slidingWindow = 4096;
m_lookAheadWindow = 18;
m_buffer.resize(m_minimumMatch);
}
LZLookupTable::LZLookupTable(atInt32 minimumMatch, atInt32 slidingWindow, atInt32 lookAheadWindow) { LZLookupTable::LZLookupTable(atInt32 minimumMatch, atInt32 slidingWindow, atInt32 lookAheadWindow) {
if (minimumMatch > 0) if (minimumMatch > 0)
@ -19,15 +14,12 @@ LZLookupTable::LZLookupTable(atInt32 minimumMatch, atInt32 slidingWindow, atInt3
else else
m_slidingWindow = 4096; m_slidingWindow = 4096;
if (lookAheadWindow > 0) setLookAheadWindow(lookAheadWindow);
m_lookAheadWindow = lookAheadWindow;
else
m_lookAheadWindow = 18;
m_buffer.reserve(m_minimumMatch); m_buffer.reserve(m_minimumMatch);
} }
LZLookupTable::~LZLookupTable() {} LZLookupTable::~LZLookupTable() = default;
void LZLookupTable::setLookAheadWindow(atInt32 lookAheadWindow) { void LZLookupTable::setLookAheadWindow(atInt32 lookAheadWindow) {
if (lookAheadWindow > 0) if (lookAheadWindow > 0)
@ -36,7 +28,7 @@ void LZLookupTable::setLookAheadWindow(atInt32 lookAheadWindow) {
m_lookAheadWindow = 18; m_lookAheadWindow = 18;
} }
LZLengthOffset LZLookupTable::search(atUint8* curPos, const atUint8* dataBegin, const atUint8* dataEnd) { LZLengthOffset LZLookupTable::search(const atUint8* curPos, const atUint8* dataBegin, const atUint8* dataEnd) {
LZLengthOffset loPair = {0, 0}; LZLengthOffset loPair = {0, 0};
// Returns negative 1 for search failures since the current position is passed the size to be compressed // Returns negative 1 for search failures since the current position is passed the size to be compressed