* Add LZ77 Compression

This commit is contained in:
Phillip Stephens 2014-09-09 09:36:29 -07:00
parent 5a621215de
commit e8fad79134
11 changed files with 770 additions and 15 deletions

View File

@ -45,7 +45,11 @@ SOURCES += \
$$PWD/src/sha1.cpp \
$$PWD/src/aes.c \
$$PWD/src/lzo.c \
$$PWD/src/Athena/PHYSFSFileReader.cpp
$$PWD/src/Athena/PHYSFSFileReader.cpp \
$$PWD/src/LZ77/LZLookupTable.cpp \
$$PWD/src/LZ77/LZType10.cpp \
$$PWD/src/LZ77/LZType11.cpp \
$$PWD/src/LZ77/LZBase.cpp
win32:SOURCES += $$PWD/src/win32_largefilewrapper.c
HEADERS += \
@ -103,7 +107,11 @@ HEADERS += \
$$PWD/include/Athena/ZQuestFile.hpp \
$$PWD/include/Athena/ZQuestFileReader.hpp \
$$PWD/include/Athena/ZQuestFileWriter.hpp \
Athena/include/Athena/PHYSFSFileReader.hpp
$$PWD/include/Athena/PHYSFSFileReader.hpp \
$$PWD/include/LZ77/LZBase.hpp \
$$PWD/include/LZ77/LZLookupTable.hpp \
$$PWD/include/LZ77/LZType10.hpp \
$$PWD/include/LZ77/LZType11.hpp
win32:HEADERS += \
$$PWD/include/win32_largefilewrapper.h

View File

@ -24,17 +24,19 @@ namespace io
{
namespace Compression
{
// Zlib compression
atInt32 decompressZlib(const atUint8* src, atUint32 srcLen, atUint8* dst, atUint32 dstLen);
atInt32 compressZlib(const atUint8* src, atUint32 srcLen, atUint8* dst, atUint32 dstLen);
// Zlib compression
atInt32 decompressZlib(const atUint8* src, atUint32 srcLen, atUint8*& dst, atUint32 dstLen);
atInt32 compressZlib(const atUint8* src, atUint32 srcLen, atUint8*& dst, atUint32 dstLen);
// lzo compression
atInt32 decompressLZO(const atUint8* source, atInt32 sourceSize, atUint8* dest, atInt32& dstSize);
// lzo compression
atInt32 decompressLZO(const atUint8* source, atInt32 sourceSize, atUint8*& dst, atInt32& dstSize);
// Yaz0 encoding
atUint32 yaz0Decode(const atUint8* src, atUint8* dst, atUint32 uncompressedSize);
atUint32 yaz0Encode(const atUint8* src, atUint32 srcSize, atUint8* data);
// Yaz0 encoding
atUint32 yaz0Decode(const atUint8* src, atUint8*& dst, atUint32 uncompressedSize);
atUint32 yaz0Encode(const atUint8* src, atUint32 srcSize, atUint8* data);
atUint32 decompressLZ77(const atUint8* src, atUint32 srcLen, atUint8*& dst);
atUint32 compressLZ77(const atUint8* src, atUint32 srcLen, atUint8*& dst, bool extended = false);
}
}
}

44
include/LZ77/LZBase.hpp Normal file
View File

@ -0,0 +1,44 @@
#ifndef LZBASE_HPP
#define LZBASE_HPP
#include <string>
#include "LZ77/LZLookupTable.hpp"
class LZBase
{
public:
explicit LZBase(atInt32 minimumOffset=1,atInt32 slidingWindow=4096, atInt32 minimumMatch=3, atInt32 blockSize=8);
virtual ~LZBase() {}
virtual atUint32 compress(const atUint8* src, atUint8*& dest, atUint32 srcLength)=0;
virtual atUint32 decompress(const atUint8* src, atUint8*& dest, atUint32 srcLength)=0;
void setSlidingWindow(atInt32 SlidingWindow);
atInt32 slidingWindow();
void setReadAheadBuffer(atInt32 ReadAheadBuffer);
atInt32 readAheadBuffer();
void setMinMatch(atInt32 minimumMatch);
atInt32 minMatch();
void setBlockSize(atInt32 BlockSize);
atInt32 blockSize();
void setMinimumOffset(atUint32 minimumOffset);
atUint32 minimumOffset();
private:
atInt32 subMatch(const atUint8* str1,const uint8_t* str2,const atInt32 len);
LZLengthOffset windowSearch(atUint8* beginSearchPtr, atUint8* searchPosPtr, atUint8* endLABufferPtr, atUint8* startLBPtr);
protected:
LZLengthOffset search(atUint8* posPtr, atUint8* dataBegin, atUint8* dataEnd);
atInt32 m_slidingWindow;
atInt32 m_readAheadBuffer;
atInt32 m_minMatch;//Minimum number of bytes that have to matched to go through with compression
atInt32 m_blockSize;
atUint32 m_minOffset;
LZLookupTable m_lookupTable;
};
#endif // LZBASE_HPP

View File

@ -0,0 +1,39 @@
#ifndef LZLOOKUPTABLE_HPP
#define LZLOOKUPTABLE_HPP
#include <map>
#include <deque>
#include <vector>
#include <cstdint>
#include <Athena/Types.hpp>
struct LZLengthOffset
{
atUint32 length;//The number of bytes compressed
atUint16 offset;//How far back in sliding window where bytes that match the lookAheadBuffer is located
bool compare_equal(const LZLengthOffset& lo_pair)
{
return length == lo_pair.length && offset == lo_pair.offset;
}
};
class LZLookupTable
{
public:
LZLookupTable();
LZLookupTable(atInt32 minimumMatch, atInt32 slidingWindow=4096, atInt32 lookAheadWindow=18);
~LZLookupTable();
LZLengthOffset search(atUint8* curPos, const atUint8* dataBegin, const atUint8* dataEnd);
void setLookAheadWindow(atInt32 lookAheadWindow);
private:
typedef std::multimap<std::vector<uint8_t>, int32_t> LookupTable;
LookupTable table;
atInt32 m_minimumMatch;
atInt32 m_slidingWindow;
atInt32 m_lookAheadWindow;
std::vector<uint8_t> m_buffer;
};
#endif // LZLOOKUPTABLE_HPP

13
include/LZ77/LZType10.hpp Normal file
View File

@ -0,0 +1,13 @@
#ifndef LZ77TYPE10_HPP
#define LZ77TYPE10_HPP
#include "LZBase.hpp"
class LZType10 : public LZBase {
public:
explicit LZType10(atInt32 minimumOffset=1, atInt32 SlidingWindow=4096, atInt32 MinimumMatch=3, atInt32 BlockSize=8);
atUint32 compress(const atUint8* src, atUint8*& dest, atUint32 srcLength);
atUint32 decompress(const atUint8* src, atUint8*& dst, atUint32 srcLen);
};
#endif // LZ77TYPE10_HPP

15
include/LZ77/LZType11.hpp Normal file
View File

@ -0,0 +1,15 @@
#ifndef LZTYPE11_HPP
#define LZTYPE11_HPP
#include "LZBase.hpp"
class LZType11 : public LZBase {
public:
explicit LZType11(atInt32 MinimumOffset=1, atInt32 SlidingWindow=4096, atInt32 MinimumMatch=3, atInt32 BlockSize=8);
atUint32 compress(const atUint8 *src, atUint8*& dest, atUint32 srcLength);
atUint32 decompress(const atUint8 *src, atUint8*& dest, atUint32 srcLength);
};
#endif // LZTYPE11_HPP

View File

@ -18,6 +18,8 @@
#include "lzo.h"
#include <iostream>
#include <zlib.h>
#include "LZ77/LZType10.hpp"
#include "LZ77/LZType11.hpp"
namespace Athena
{
@ -26,7 +28,7 @@ namespace io
namespace Compression
{
atInt32 decompressZlib(const atUint8* src, atUint32 srcLen, atUint8* dst, atUint32 dstLen)
atInt32 decompressZlib(const atUint8* src, atUint32 srcLen, atUint8*& dst, atUint32 dstLen)
{
z_stream strm = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
strm.total_in = strm.avail_in = srcLen;
@ -64,7 +66,7 @@ atInt32 decompressZlib(const atUint8* src, atUint32 srcLen, atUint8* dst, atUint
return ret;
}
atInt32 compressZlib(const atUint8 *src, atUint32 srcLen, atUint8 *dst, atUint32 dstLen)
atInt32 compressZlib(const atUint8 *src, atUint32 srcLen, atUint8*& dst, atUint32 dstLen)
{
z_stream strm = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
strm.total_in = strm.avail_in = srcLen;
@ -103,10 +105,10 @@ atInt32 compressZlib(const atUint8 *src, atUint32 srcLen, atUint8 *dst, atUint32
return ret;
}
atInt32 decompressLZO(const atUint8* source, atInt32 sourceSize, atUint8* dest, atInt32& dstSize)
atInt32 decompressLZO(const atUint8* source, atInt32 sourceSize, atUint8*& dst, atInt32& dstSize)
{
int size = dstSize;
int result = lzo1x_decode(dest, &size, source, &sourceSize);
int result = lzo1x_decode(dst, &size, source, &sourceSize);
dstSize = size;
return result;
}
@ -114,7 +116,7 @@ atInt32 decompressLZO(const atUint8* source, atInt32 sourceSize, atUint8* dest,
//src points to the yaz0 source data (to the "real" source data, not at the header!)
//dst points to a buffer uncompressedSize bytes large (you get uncompressedSize from
//the second 4 bytes in the Yaz0 header).
atUint32 yaz0Decode(const atUint8* src, atUint8* dst, atUint32 uncompressedSize)
atUint32 yaz0Decode(const atUint8* src, atUint8*& dst, atUint32 uncompressedSize)
{
atUint32 srcPlace = 0, dstPlace = 0; //current read/write positions
@ -324,6 +326,34 @@ atUint32 simpleEnc(const atUint8* src, atInt32 size, atInt32 pos, atUint32 *pMat
return numBytes;
}
atUint32 decompressLZ77(const atUint8* src, atUint32 srcLen, atUint8*& dst)
{
LZBase* lzCodec;
if (*(atUint8*)src == 0x11)
lzCodec = new LZType11;
else
lzCodec = new LZType10;
atUint32 retLength = lzCodec->decompress(src, dst, srcLen);
delete lzCodec;
return retLength;
}
atUint32 compressLZ77(const atUint8* src, atUint32 srcLen, atUint8*& dst, bool extended)
{
LZBase* lzCodec;
if (extended)
lzCodec = new LZType11;
else
lzCodec = new LZType10(2);
atUint32 retLength = lzCodec->compress(src, dst, srcLen);
delete lzCodec;
return retLength;
}
} // Compression

144
src/LZ77/LZBase.cpp Normal file
View File

@ -0,0 +1,144 @@
#include "LZ77/LZLookupTable.hpp"
#include "LZ77/LZBase.hpp"
LZBase::LZBase(atInt32 minimumOffset, atInt32 slidingWindow, atInt32 minimumMatch, atInt32 blockSize)
: m_slidingWindow(slidingWindow),
m_readAheadBuffer(minimumMatch),
m_minMatch(minimumMatch),
m_blockSize(blockSize),
m_minOffset(minimumOffset)
{
}
void LZBase::setSlidingWindow(atInt32 slidingWindow)
{
m_slidingWindow = slidingWindow;
}
atInt32 LZBase::slidingWindow() {return m_slidingWindow;}
void LZBase::setReadAheadBuffer(atInt32 readAheadBuffer)
{
m_readAheadBuffer = readAheadBuffer;
}
atInt32 LZBase::readAheadBuffer(){return m_readAheadBuffer;}
void LZBase::setMinMatch(atInt32 minimumMatch) { m_minMatch =minimumMatch;}
atInt32 LZBase::minMatch(){return m_minMatch;}
void LZBase::setBlockSize(atInt32 blockSize)
{
m_blockSize = blockSize ;
}
atInt32 LZBase::blockSize(){return m_blockSize;}
void LZBase::setMinimumOffset(atUint32 minimumOffset) { m_minOffset = minimumOffset;}
atUint32 LZBase::minimumOffset()
{
return m_minOffset;
}
/*
DerricMc:
This search function is my own work and is no way affilated with any one else
I use the my own window_search function to drastically speed up the search function
Normally a search for one byte is matched, then two, then three, all the way up
to the size of the LookAheadBuffer. So I decided to skip the incremental search
and search for the entire LookAheadBuffer and if I don't find the bytes are equal I return
the next best match(which means if I look for 18 bytes and they are not found 18 bytess did not match,
and 17 bytes did match then 17 bytes match is return).
*/
LZLengthOffset LZBase::search(atUint8* posPtr, atUint8* dataBegin, atUint8* dataEnd)
{
LZLengthOffset results={0,0};
//Returns negative 1 for Search failures since the current position is passed the size to be compressed
if(posPtr >=dataEnd)
{
results.length=-1;
return results;
}
atUint8* searchWindow;
//LookAheadBuffer is ReadAheadBuffer Size if there are more bytes than ReadAheadBufferSize waiting
//to be compressed else the number of remaining bytes is the LookAheadBuffer
int lookAheadBuffer_len=((int)(dataEnd-posPtr)<m_readAheadBuffer) ? (int)(dataEnd-posPtr) :m_readAheadBuffer;
int slidingBuffer=(int)(posPtr - dataBegin)-m_slidingWindow;
if(slidingBuffer > 0)
searchWindow=dataBegin+slidingBuffer;
else
searchWindow=dataBegin;
atUint8* endPos=posPtr+lookAheadBuffer_len;
if(!( (posPtr-dataBegin < 1)||( dataEnd-posPtr < m_minMatch) ))
results=windowSearch(searchWindow,posPtr,endPos,posPtr-m_minOffset);
return results;
}
//Returns the full length of string2 if they are equal else
//Return the number of characters that were equal before they weren't equal
int LZBase::subMatch(const uint8_t* str1,const uint8_t* str2,const int len)
{
for(int i=0;i<len;++i)
if(str1[i]!=str2[i])
return i;
return len;
}
/*
Normally a search for one byte is matched, then two, then three, all the way up
to the size of the LookAheadBuffer. So I decided to skip the incremental search
and search for the entire LookAheadBuffer and if the function doesn't find the bytes are
equal the function return the next best match(which means if the function look for 18 bytes and they are not found, return
the number of bytes that did match before it failed to match. The submatch is function returns the number of bytes that
were equal, which can result up to the bytes total length if both byte strings are equal.
...[][][][][][][][][][][][]|[][][][][][][][][][][][][][]
|
Search Window Current Pos LookAheadBuffer
Up to 4096 bytes Up to 18 bytes
Sliding Window
Up to 4114 bytes
*/
LZLengthOffset LZBase::windowSearch(atUint8* beginSearchPtr, atUint8* searchPosPtr, atUint8* endLABufferPtr, atUint8* startLBPtr)
{
atInt32 size=endLABufferPtr-beginSearchPtr;//Size of the entire sliding window
atInt32 n=endLABufferPtr-searchPosPtr;
LZLengthOffset result={0,0};
atInt32 temp=0;
if(n > size)//If the string that is being looked for is bigger than the string that is being searched
return result;
/*This makes sure that search for the searchPosPtr can be searched if an invalid position is given
An invalid position occurs if the amount of characters to search in_beginSearchPtr is less than the size
of searchPosPtr. In other words there has to be at least n characters left in the string
to have a chance to find n characters*/
do
{
temp=subMatch(startLBPtr,searchPosPtr,n);
if(result.length<(atUint32)temp)
{
result.length=temp;
result.offset=(atInt32)(searchPosPtr-startLBPtr);
}
if(result.length==(atUint32)n)
return result;
//ReadAheadBuffer is the maximum size of a character match
}
while((startLBPtr--) > beginSearchPtr);
return result;
}

112
src/LZ77/LZLookupTable.cpp Normal file
View File

@ -0,0 +1,112 @@
#include "LZ77/LZLookupTable.hpp"
LZLookupTable::LZLookupTable()
{
m_minimumMatch = 3;
m_slidingWindow = 4096;
m_lookAheadWindow = 18;
m_buffer.resize(m_minimumMatch);
}
LZLookupTable::LZLookupTable(atInt32 minimumMatch, atInt32 slidingWindow, atInt32 lookAheadWindow)
{
if(minimumMatch > 0 )
m_minimumMatch = minimumMatch;
else
m_minimumMatch = 3;
if(slidingWindow > 0)
m_slidingWindow = slidingWindow;
else
m_slidingWindow = 4096;
if(lookAheadWindow > 0)
m_lookAheadWindow = lookAheadWindow;
else
m_lookAheadWindow = 18;
m_buffer.reserve(m_minimumMatch);
}
LZLookupTable::~LZLookupTable()
{}
void LZLookupTable::setLookAheadWindow(atInt32 lookAheadWindow)
{
if(lookAheadWindow > 0)
m_lookAheadWindow = lookAheadWindow;
else
m_lookAheadWindow = 18;
}
LZLengthOffset LZLookupTable::search(atUint8* curPos, const atUint8* dataBegin, const atUint8* dataEnd)
{
LZLengthOffset loPair = {0,0};
//Returns negative 1 for search failures since the current position is passed the size to be compressed
if(curPos >=dataEnd)
{
loPair.length=-1;
return loPair;
}
std::copy(curPos, curPos + m_minimumMatch, m_buffer.begin());
int32_t currentOffset = static_cast<atInt32>(curPos - dataBegin);
//Find code
if(currentOffset > 0 && (dataEnd - curPos) >= m_minimumMatch)
{
auto elements = table.equal_range(m_buffer);
elements.second--;
elements.first--;
//Iterate over keys in reverse order. C++11 guarantees that the relative order of elements is maintained for the same key
for(auto iter = elements.second; iter != elements.first; iter--)
{
int32_t matchLength = m_minimumMatch;
int32_t lookAheadBufferLength = ((dataEnd - curPos) < m_lookAheadWindow) ? static_cast<int32_t>(dataEnd - curPos) : m_lookAheadWindow;
for(; matchLength < lookAheadBufferLength; ++matchLength)
{
if(*(dataBegin + iter->second + matchLength) != *(curPos + matchLength))
break;
}
//Store the longest match found so far into length_offset struct.
//When lengths are the same the closer offset to the lookahead buffer wins
if(loPair.length < (atUint32)matchLength)
{
loPair.length = matchLength;
loPair.offset = currentOffset - iter->second;
}
//Found the longest match so break out of loop
if(loPair.length == (atUint32)m_lookAheadWindow)
break;
}
}
//end find code
//Insert code
table.insert(std::make_pair(m_buffer, currentOffset));
for(atUint32 i = 1; i < loPair.length; i++)
{
if(dataEnd - (curPos + i) < m_minimumMatch)
break;
std::copy(curPos + i, curPos + m_minimumMatch + i, m_buffer.begin());
table.insert(std::make_pair(m_buffer, currentOffset + i));
}
//end insert code
//Delete code
int32_t slidingWindowOffset = std::max(0, currentOffset - m_slidingWindow);//Absolute offset
int32_t tablesize=static_cast<int32_t>(table.size());
for(int32_t i = 0; i < tablesize - m_slidingWindow; ++i)
{
std::copy(dataBegin + slidingWindowOffset + i, dataBegin + slidingWindowOffset + m_minimumMatch + i, m_buffer.begin());
auto elements = table.equal_range(m_buffer);
for(auto iter = elements.first; iter != elements.second; iter++)
{
if(slidingWindowOffset + i == iter->second)
{
table.erase(iter);
//There should no occurance of the map with the same value
break;
}
}
}
//end delete code
return loPair;
//break lookupTable.cpp:109 if table.size()> 4096
}

135
src/LZ77/LZType10.cpp Normal file
View File

@ -0,0 +1,135 @@
#include "LZ77/LZLookupTable.hpp"
#include "LZ77/LZType10.hpp"
#include <Athena/BinaryWriter.hpp>
#include <memory.h>
LZType10::LZType10(atInt32 MinimumOffset, atInt32 SlidingWindow, atInt32 MinimumMatch, atInt32 BlockSize)
: LZBase(MinimumOffset,SlidingWindow,MinimumMatch,BlockSize)
{
//ReadAheadBuffer is normalize between (minumum match) and(minimum match + 15) so that matches fit within
//4-bits.
m_readAheadBuffer = m_minMatch + 0xF;
}
atUint32 LZType10::compress(const atUint8* src, atUint8*& dstBuf, atUint32 srcLength)
{
atUint32 encodeSize=(srcLength<<8)|(0x10);
encodeSize = Athena::utility::LittleUint32(encodeSize); //File size needs to be written as little endian always
Athena::io::BinaryWriter outbuf("tmp");
outbuf.writeUint32(encodeSize);
atUint8* ptrStart=(atUint8*)src;
atUint8* ptrEnd=(atUint8*)(src+srcLength);
//At most their will be two bytes written if the bytes can be compressed. So if all bytes in the block can be compressed it would take blockSize*2 bytes
atUint8* compressedBytes= new atUint8[m_blockSize *2];//Holds the compressed bytes yet to be written
while( ptrStart < ptrEnd )
{
atUint8 blockLen=0;
//In Binary represents 1 if byte is compressed or 0 if not compressed
//For example 01001000 means that the second and fifth byte in the blockSize from the left is compressed
atUint8 *ptrBytes=compressedBytes;
for(atInt32 i=0; i < m_blockSize; i++)
{
//length_offset searchResult=Search(ptrStart, filedata, ptrEnd);
LZLengthOffset searchResult = m_lookupTable.search(ptrStart, src, ptrEnd);
//If the number of bytes to be compressed is at least the size of the Minimum match
if(searchResult.length >= (atUint32)m_minMatch)
{ //Gotta swap the bytes since system is wii is big endian and most computers are little endian
atUint16 lenOff = (((searchResult.length - m_minMatch) & 0xF) << 12) | ((searchResult.offset - 1) & 0xFFF);
Athena::utility::BigUint16(lenOff);
memcpy(ptrBytes,&lenOff,sizeof(atUint16));
ptrBytes+= sizeof(atUint16);
ptrStart+=searchResult.length;
blockLen |=(1 << (7-i));
//Stores which of the next 8 bytes is compressed
//bit 1 for compress and bit 0 for not compressed
}
else
*ptrBytes++=*ptrStart++;
}
outbuf.writeByte(blockLen);
outbuf.writeUBytes(compressedBytes,(atUint64)(ptrBytes-compressedBytes));
}
delete[] compressedBytes;
compressedBytes = nullptr;
//Add zeros until the file is a multiple of 4
while ((outbuf.position()%4) !=0 )
outbuf.writeByte(0);
dstBuf = outbuf.data();
outbuf.save();
return outbuf.length();
}
atUint32 LZType10::decompress(const atUint8* src, atUint8*& dst, atUint32 srcLength)
{
if (*(atUint8*)(src) != 0x10)
return 0;
atUint32 uncompressedSize = *(atUint32*)(src); //Size of data when it is uncompressed
Athena::utility::LittleUint32(uncompressedSize); //The compressed file has the filesize encoded in little endian
uncompressedSize = uncompressedSize >> 8;//first byte is the encode flag
atUint8* uncompressedData=new atUint8[uncompressedSize];
atUint8* outputPtr=uncompressedData;
atUint8* outputEndPtr=uncompressedData+uncompressedSize;
atUint8* inputPtr=(atUint8*)src + 4;
atUint8* inputEndPtr=(atUint8*)src + srcLength;
while(inputPtr<inputEndPtr && outputPtr<outputEndPtr)
{
atUint8 isCompressed=*inputPtr++;
for(atUint32 i=0;i < (atUint32)m_blockSize; i++)
{
//Checks to see if the next byte is compressed by looking
//at its binary representation - E.g 10010000
//This says that the first extracted byte and the four extracted byte is compressed
if ((isCompressed>>(7-i)) & 0x1)
{
atUint16 lenOff;
memcpy(&lenOff,inputPtr,sizeof(atUint16));
Athena::utility::BigUint16(lenOff);
inputPtr+=sizeof(atUint16);//Move forward two bytes
//length offset pair has been decoded.
LZLengthOffset decoding;
decoding.length = (lenOff>>12)+m_minMatch;
decoding.offset = static_cast<atUint16>((lenOff & 0xFFF) + 1);
if((outputPtr - decoding.offset) < uncompressedData)
{//If the offset to look for uncompressed is passed the current uncompresed data then the data is not compressed
delete[] uncompressedData;
uncompressedData=nullptr;
dst = nullptr;
return 0;
}
for(atUint32 j=0;j<decoding.length;++j)
outputPtr[j]=(outputPtr-decoding.offset)[j];
outputPtr+=decoding.length;
}
else
*outputPtr++=*inputPtr++;
if(!(inputPtr<inputEndPtr && outputPtr<outputEndPtr))
break;
}
}
dst = uncompressedData;
return uncompressedSize;
}

213
src/LZ77/LZType11.cpp Normal file
View File

@ -0,0 +1,213 @@
#include "LZ77/LZLookupTable.hpp"
#include "LZ77/LZType11.hpp"
#include <Athena/BinaryWriter.hpp>
#include <memory.h>
LZType11::LZType11(atInt32 minimumOffset, atInt32 slidingWindow, atInt32 minimumMatch, atInt32 blockSize)
: LZBase(minimumOffset,slidingWindow,minimumMatch,blockSize)
{
m_readAheadBuffer=(0xF + 0xFF + 0xFFFF + m_minMatch);
m_lookupTable.setLookAheadWindow(m_readAheadBuffer);
}
atUint32 LZType11::compress(const atUint8* src, atUint8*& dst, atUint32 srcLength)
{
Athena::io::BinaryWriter outbuff("tmp");
if (srcLength>0xFFFFFF){// If length is greater than 24 bits or 16 Megs
atUint32 encodeFlag=0x11;
Athena::utility::LittleUint32(encodeFlag);
Athena::utility::LittleUint32(srcLength);//Filesize data is little endian
outbuff.writeUint32(encodeFlag);
outbuff.writeUint32(srcLength);
}
else{
atUint32 encodeSize=(srcLength<<8)|(0x11);
Athena::utility::LittleUint32(encodeSize);
outbuff.writeUint32(encodeSize);
}
atUint8 *ptrStart=(atUint8*)src;
atUint8 *ptrEnd=(atUint8*)(src+srcLength);
//At most their will be two bytes written if the bytes can be compressed. So if all bytes in the block can be compressed it would take blockSize*2 bytes
atUint8 *compressedBytes=new atUint8[m_blockSize *2];//Holds the compressed bytes yet to be written
atUint8 maxTwoByteMatch= 0xF+1;
atUint8 minThreeByteMatch=maxTwoByteMatch+1;//Minimum Three byte match is maximum TwoByte match + 1
atUint16 maxThreeByteMatch= 0xFF+minThreeByteMatch;
atUint16 minFourByteMatch=maxThreeByteMatch+1;//Minimum Four byte match is maximum Three Byte match + 1
atInt32 maxFourByteMatch=0xFFFF+minFourByteMatch;
/*
Normaliazation Example: If MIN_MATCH is 3 then 3 gets mapped to 2 and 16 gets mapped to 15.
17 gets mapped to 1 and 272 gets mapped to 255
273 gets mapped to 0 and 65808 gets mapped to 65535
A two byte match uses 4 bits
A three byte match uses 8 bits
A four byte match uses 16 bits
In each case the offset uses 12 bits
In the two byte case the length is normalized so that the first 4 bits are numbers between between 2 and 15
In the three byte case the first 4 bits are 0000
In the four byte case the first 4 bits a 0001
*/
while( ptrStart < ptrEnd )
{
atUint8 blockSize=0;
//In Binary represents 1 if byte is compressed or 0 if not compressed
//For example 01001000 means that the second and fifth byte in the blockSize from the left is compressed
atUint8 *ptrBytes=compressedBytes;
for(atInt32 i=0;i < m_blockSize;i++)
{
//length_offset searchResult=Search(filedata,ptrStart,ptrEnd);
LZLengthOffset searchResult=m_lookupTable.search(ptrStart, src, ptrEnd);
//If the number of bytes to be compressed is at least the size of the Minimum match
if(searchResult.length >= (atUint32)m_minMatch)
{ //Gotta swap the bytes since system is wii is big endian and most computers are little endian
if(searchResult.length <= maxTwoByteMatch){
atUint16 lenOff=((((searchResult.length - 1) & 0xF) << 12) | //Bits 15-12
((searchResult.offset - 1) & 0xFFF) //Bits 11-0
);
Athena::utility::BigUint16(lenOff);
memcpy(ptrBytes,&lenOff,2);
ptrBytes+=2;
}
else if(searchResult.length <= maxThreeByteMatch){
atUint32 lenOff=((((searchResult.length - minThreeByteMatch) & 0xFF)<< 12) | //Bits 20-12
((searchResult.offset - 1) & 0xFFF) //Bits 11-0
);
Athena::utility::BigUint32(lenOff);
memcpy(ptrBytes,(atUint8*)&lenOff+1,3); //Make sure to copy the lower 24 bits. 0x12345678- This statement copies 0x123456
ptrBytes+=3;
}
else if(searchResult.length <= (atUint32)maxFourByteMatch){
atUint32 lenOff=((1<<28) | //Bits 31-28 Flag to say that this is four bytes
(((searchResult.length - minFourByteMatch) & 0xFFFF)<< 12) | //Bits 28-12
((searchResult.offset - 1) & 0xFFF) //Bits 11-0
);
Athena::utility::BigUint32(lenOff);
memcpy(ptrBytes,&lenOff,4);
ptrBytes+=4;
}
ptrStart+=searchResult.length;
blockSize |=(1 << (7-i));
//Stores which of the next 8 bytes is compressed
//bit 1 for compress and bit 0 for not compressed
}
else
*ptrBytes++=*ptrStart++;
}
outbuff.writeByte(blockSize);
outbuff.writeUBytes(compressedBytes,(atUint64)(ptrBytes-compressedBytes));
}
delete []compressedBytes;
compressedBytes=NULL;
//Add zeros until the file is a multiple of 4
while((outbuff.position()%4) !=0 )
outbuff.writeByte(0);
dst = outbuff.data();
return outbuff.length();
}
atUint32 LZType11::decompress(const atUint8* src, atUint8*& dst, atUint32 srcLength)
{
if(*(atUint8*)(src) != 0x11)
return 0;
atUint32 uncompressedLen = *(atUint32*)(src);
Athena::utility::LittleUint32(uncompressedLen);//The compressed file has the filesize encoded in little endian
uncompressedLen = uncompressedLen >> 8; //First byte is the encode flag
atUint32 currentOffset = 4;
if(uncompressedLen==0)//If the filesize var is zero then the true filesize is over 14MB and must be read in from the next 4 bytes
{
atUint32 filesize = *(atUint32*)(src + 4);
filesize = Athena::utility::LittleUint32(filesize);
currentOffset += 4;
}
atUint8 *uncompressedData=new atUint8[uncompressedLen];
atUint8 *outputPtr=uncompressedData;
atUint8 *outputEndPtr=uncompressedData+uncompressedLen;
atUint8 *inputPtr=(atUint8*)src + currentOffset;
atUint8 *inputEndPtr=(atUint8*)src+srcLength;
LZLengthOffset decoding;
atUint8 maxTwoByteMatch= 0xF+1;
atUint8 threeByteDenorm=maxTwoByteMatch+1;//Amount to add to length when compression is 3 bytes
atUint16 maxThreeByteMatch=0xFF+threeByteDenorm;
atUint16 fourByteDenorm=maxThreeByteMatch+1;
while(inputPtr<inputEndPtr && outputPtr<outputEndPtr)
{
atUint8 isCompressed=*inputPtr++;
for(atInt32 i=0;i < m_blockSize; i++)
{
//Checks to see if the next byte is compressed by looking
//at its binary representation - E.g 10010000
//This says that the first extracted byte and the four extracted byte is compressed
if ((isCompressed>>(7-i)) & 0x1)
{
atUint8 metaDataSize=*inputPtr >> 4;//Look at the top 4 bits
if(metaDataSize >= 2){ //Two Bytes of Length/Offset MetaData
atUint16 lenOff=0;
memcpy(&lenOff,inputPtr,2);
inputPtr+=2;
Athena::utility::BigUint16(lenOff);
decoding.length=(lenOff>>12)+1;
decoding.offset=(lenOff & 0xFFF) + 1;
}
else if (metaDataSize==0){ //Three Bytes of Length/Offset MetaData
atUint32 lenOff=0;
memcpy((atUint8*)&lenOff+1,inputPtr,3);
inputPtr+=3;
Athena::utility::BigUint32(lenOff);
decoding.length=(lenOff>>12)+threeByteDenorm;
decoding.offset=(lenOff & 0xFFF) + 1;
}
else if(metaDataSize==1){ //Four Bytes of Length/Offset MetaData
atUint32 lenOff=0;
memcpy(&lenOff,inputPtr,4);
inputPtr+=4;
Athena::utility::BigUint32(lenOff);
decoding.length=((lenOff>>12) & 0xFFFF)+fourByteDenorm; //Gets rid of the Four byte flag
decoding.offset=(lenOff & 0xFFF) + 1;
}
else{
delete[] uncompressedData;
uncompressedData = nullptr;
return 0;
}
if((outputPtr - decoding.offset) < uncompressedData){//If the offset to look for uncompressed is passed the current uncompresed data then the data is not compressed
delete []uncompressedData;
return 0;
}
for(atUint32 j=0;j<decoding.length;++j)
outputPtr[j]=(outputPtr-decoding.offset)[j];
outputPtr+=decoding.length;
}
else
*outputPtr++=*inputPtr++;
if(!(inputPtr<inputEndPtr && outputPtr<outputEndPtr))
break;
}
}
dst = uncompressedData;
return uncompressedLen;
}