Blame | Last modification | View Log | RSS feed
/* LzFind.c -- Match finder for LZ algorithms2008-10-04 : Igor Pavlov : Public domain */#include <string.h>#include "LzFind.h"#include "LzHash.h"#define kEmptyHashValue 0#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */#define kNormalizeMask (~(kNormalizeStepMin - 1))#define kMaxHistorySize ((UInt32)3 << 30)#define kStartMaxLen 3static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc){if (!p->directInput){alloc->Free(alloc, p->bufferBase);p->bufferBase = 0;}}/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc){UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;if (p->directInput){p->blockSize = blockSize;return 1;}if (p->bufferBase == 0 || p->blockSize != blockSize){LzInWindow_Free(p, alloc);p->blockSize = blockSize;p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);}return (p->bufferBase != 0);}Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue){p->posLimit -= subValue;p->pos -= subValue;p->streamPos -= subValue;}static void MatchFinder_ReadBlock(CMatchFinder *p){if (p->streamEndWasReached || p->result != SZ_OK)return;for (;;){Byte *dest = p->buffer + (p->streamPos - p->pos);size_t size = (p->bufferBase + p->blockSize - dest);if (size == 0)return;p->result = p->stream->Read(p->stream, dest, &size);if (p->result != SZ_OK)return;if (size == 0){p->streamEndWasReached = 1;return;}p->streamPos += (UInt32)size;if (p->streamPos - p->pos > p->keepSizeAfter)return;}}void MatchFinder_MoveBlock(CMatchFinder *p){memmove(p->bufferBase,p->buffer - p->keepSizeBefore,(size_t)(p->streamPos - p->pos + p->keepSizeBefore));p->buffer = p->bufferBase + p->keepSizeBefore;}int MatchFinder_NeedMove(CMatchFinder *p){/* if (p->streamEndWasReached) return 0; */return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);}void MatchFinder_ReadIfRequired(CMatchFinder *p){if (p->streamEndWasReached)return;if (p->keepSizeAfter >= p->streamPos - p->pos)MatchFinder_ReadBlock(p);}static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p){if (MatchFinder_NeedMove(p))MatchFinder_MoveBlock(p);MatchFinder_ReadBlock(p);}static void MatchFinder_SetDefaultSettings(CMatchFinder *p){p->cutValue = 32;p->btMode = 1;p->numHashBytes = 4;/* p->skipModeBits = 0; */p->directInput = 0;p->bigHash = 0;}#define kCrcPoly 0xEDB88320void MatchFinder_Construct(CMatchFinder *p){UInt32 i;p->bufferBase = 0;p->directInput = 0;p->hash = 0;MatchFinder_SetDefaultSettings(p);for (i = 0; i < 256; i++){UInt32 r = i;int j;for (j = 0; j < 8; j++)r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));p->crc[i] = r;}}static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc){alloc->Free(alloc, p->hash);p->hash = 0;}void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc){MatchFinder_FreeThisClassMemory(p, alloc);LzInWindow_Free(p, alloc);}static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc){size_t sizeInBytes = (size_t)num * sizeof(CLzRef);if (sizeInBytes / sizeof(CLzRef) != num)return 0;return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);}int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,ISzAlloc *alloc){UInt32 sizeReserv;if (historySize > kMaxHistorySize){MatchFinder_Free(p, alloc);return 0;}sizeReserv = historySize >> 1;if (historySize > ((UInt32)2 << 30))sizeReserv = historySize >> 2;sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);p->keepSizeBefore = historySize + keepAddBufferBefore + 1;p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;/* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */if (LzInWindow_Create(p, sizeReserv, alloc)){UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1;UInt32 hs;p->matchMaxLen = matchMaxLen;{p->fixedHashSize = 0;if (p->numHashBytes == 2)hs = (1 << 16) - 1;else{hs = historySize - 1;hs |= (hs >> 1);hs |= (hs >> 2);hs |= (hs >> 4);hs |= (hs >> 8);hs >>= 1;/* hs >>= p->skipModeBits; */hs |= 0xFFFF; /* don't change it! It's required for Deflate */if (hs > (1 << 24)){if (p->numHashBytes == 3)hs = (1 << 24) - 1;elsehs >>= 1;}}p->hashMask = hs;hs++;if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;hs += p->fixedHashSize;}{UInt32 prevSize = p->hashSizeSum + p->numSons;UInt32 newSize;p->historySize = historySize;p->hashSizeSum = hs;p->cyclicBufferSize = newCyclicBufferSize;p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);newSize = p->hashSizeSum + p->numSons;if (p->hash != 0 && prevSize == newSize)return 1;MatchFinder_FreeThisClassMemory(p, alloc);p->hash = AllocRefs(newSize, alloc);if (p->hash != 0){p->son = p->hash + p->hashSizeSum;return 1;}}}MatchFinder_Free(p, alloc);return 0;}static void MatchFinder_SetLimits(CMatchFinder *p){UInt32 limit = kMaxValForNormalize - p->pos;UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;if (limit2 < limit)limit = limit2;limit2 = p->streamPos - p->pos;if (limit2 <= p->keepSizeAfter){if (limit2 > 0)limit2 = 1;}elselimit2 -= p->keepSizeAfter;if (limit2 < limit)limit = limit2;{UInt32 lenLimit = p->streamPos - p->pos;if (lenLimit > p->matchMaxLen)lenLimit = p->matchMaxLen;p->lenLimit = lenLimit;}p->posLimit = p->pos + limit;}void MatchFinder_Init(CMatchFinder *p){UInt32 i;for (i = 0; i < p->hashSizeSum; i++)p->hash[i] = kEmptyHashValue;p->cyclicBufferPos = 0;p->buffer = p->bufferBase;p->pos = p->streamPos = p->cyclicBufferSize;p->result = SZ_OK;p->streamEndWasReached = 0;MatchFinder_ReadBlock(p);MatchFinder_SetLimits(p);}static UInt32 MatchFinder_GetSubValue(CMatchFinder *p){return (p->pos - p->historySize - 1) & kNormalizeMask;}void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems){UInt32 i;for (i = 0; i < numItems; i++){UInt32 value = items[i];if (value <= subValue)value = kEmptyHashValue;elsevalue -= subValue;items[i] = value;}}static void MatchFinder_Normalize(CMatchFinder *p){UInt32 subValue = MatchFinder_GetSubValue(p);MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);MatchFinder_ReduceOffsets(p, subValue);}static void MatchFinder_CheckLimits(CMatchFinder *p){if (p->pos == kMaxValForNormalize)MatchFinder_Normalize(p);if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)MatchFinder_CheckAndMoveAndRead(p);if (p->cyclicBufferPos == p->cyclicBufferSize)p->cyclicBufferPos = 0;MatchFinder_SetLimits(p);}static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,UInt32 *distances, UInt32 maxLen){son[_cyclicBufferPos] = curMatch;for (;;){UInt32 delta = pos - curMatch;if (cutValue-- == 0 || delta >= _cyclicBufferSize)return distances;{const Byte *pb = cur - delta;curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];if (pb[maxLen] == cur[maxLen] && *pb == *cur){UInt32 len = 0;while (++len != lenLimit)if (pb[len] != cur[len])break;if (maxLen < len){*distances++ = maxLen = len;*distances++ = delta - 1;if (len == lenLimit)return distances;}}}}}UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,UInt32 *distances, UInt32 maxLen){CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;CLzRef *ptr1 = son + (_cyclicBufferPos << 1);UInt32 len0 = 0, len1 = 0;for (;;){UInt32 delta = pos - curMatch;if (cutValue-- == 0 || delta >= _cyclicBufferSize){*ptr0 = *ptr1 = kEmptyHashValue;return distances;}{CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);const Byte *pb = cur - delta;UInt32 len = (len0 < len1 ? len0 : len1);if (pb[len] == cur[len]){if (++len != lenLimit && pb[len] == cur[len])while (++len != lenLimit)if (pb[len] != cur[len])break;if (maxLen < len){*distances++ = maxLen = len;*distances++ = delta - 1;if (len == lenLimit){*ptr1 = pair[0];*ptr0 = pair[1];return distances;}}}if (pb[len] < cur[len]){*ptr1 = curMatch;ptr1 = pair + 1;curMatch = *ptr1;len1 = len;}else{*ptr0 = curMatch;ptr0 = pair;curMatch = *ptr0;len0 = len;}}}}static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue){CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;CLzRef *ptr1 = son + (_cyclicBufferPos << 1);UInt32 len0 = 0, len1 = 0;for (;;){UInt32 delta = pos - curMatch;if (cutValue-- == 0 || delta >= _cyclicBufferSize){*ptr0 = *ptr1 = kEmptyHashValue;return;}{CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);const Byte *pb = cur - delta;UInt32 len = (len0 < len1 ? len0 : len1);if (pb[len] == cur[len]){while (++len != lenLimit)if (pb[len] != cur[len])break;{if (len == lenLimit){*ptr1 = pair[0];*ptr0 = pair[1];return;}}}if (pb[len] < cur[len]){*ptr1 = curMatch;ptr1 = pair + 1;curMatch = *ptr1;len1 = len;}else{*ptr0 = curMatch;ptr0 = pair;curMatch = *ptr0;len0 = len;}}}}#define MOVE_POS \++p->cyclicBufferPos; \p->buffer++; \if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);#define MOVE_POS_RET MOVE_POS return offset;static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }#define GET_MATCHES_HEADER2(minLen, ret_op) \UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \cur = p->buffer;#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue#define GET_MATCHES_FOOTER(offset, maxLen) \offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \distances + offset, maxLen) - distances); MOVE_POS_RET;#define SKIP_FOOTER \SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances){UInt32 offset;GET_MATCHES_HEADER(2)HASH2_CALC;curMatch = p->hash[hashValue];p->hash[hashValue] = p->pos;offset = 0;GET_MATCHES_FOOTER(offset, 1)}UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances){UInt32 offset;GET_MATCHES_HEADER(3)HASH_ZIP_CALC;curMatch = p->hash[hashValue];p->hash[hashValue] = p->pos;offset = 0;GET_MATCHES_FOOTER(offset, 2)}static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances){UInt32 hash2Value, delta2, maxLen, offset;GET_MATCHES_HEADER(3)HASH3_CALC;delta2 = p->pos - p->hash[hash2Value];curMatch = p->hash[kFix3HashSize + hashValue];p->hash[hash2Value] =p->hash[kFix3HashSize + hashValue] = p->pos;maxLen = 2;offset = 0;if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur){for (; maxLen != lenLimit; maxLen++)if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])break;distances[0] = maxLen;distances[1] = delta2 - 1;offset = 2;if (maxLen == lenLimit){SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));MOVE_POS_RET;}}GET_MATCHES_FOOTER(offset, maxLen)}static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances){UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;GET_MATCHES_HEADER(4)HASH4_CALC;delta2 = p->pos - p->hash[ hash2Value];delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];curMatch = p->hash[kFix4HashSize + hashValue];p->hash[ hash2Value] =p->hash[kFix3HashSize + hash3Value] =p->hash[kFix4HashSize + hashValue] = p->pos;maxLen = 1;offset = 0;if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur){distances[0] = maxLen = 2;distances[1] = delta2 - 1;offset = 2;}if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur){maxLen = 3;distances[offset + 1] = delta3 - 1;offset += 2;delta2 = delta3;}if (offset != 0){for (; maxLen != lenLimit; maxLen++)if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])break;distances[offset - 2] = maxLen;if (maxLen == lenLimit){SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));MOVE_POS_RET;}}if (maxLen < 3)maxLen = 3;GET_MATCHES_FOOTER(offset, maxLen)}static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances){UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;GET_MATCHES_HEADER(4)HASH4_CALC;delta2 = p->pos - p->hash[ hash2Value];delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];curMatch = p->hash[kFix4HashSize + hashValue];p->hash[ hash2Value] =p->hash[kFix3HashSize + hash3Value] =p->hash[kFix4HashSize + hashValue] = p->pos;maxLen = 1;offset = 0;if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur){distances[0] = maxLen = 2;distances[1] = delta2 - 1;offset = 2;}if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur){maxLen = 3;distances[offset + 1] = delta3 - 1;offset += 2;delta2 = delta3;}if (offset != 0){for (; maxLen != lenLimit; maxLen++)if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])break;distances[offset - 2] = maxLen;if (maxLen == lenLimit){p->son[p->cyclicBufferPos] = curMatch;MOVE_POS_RET;}}if (maxLen < 3)maxLen = 3;offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),distances + offset, maxLen) - (distances));MOVE_POS_RET}UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances){UInt32 offset;GET_MATCHES_HEADER(3)HASH_ZIP_CALC;curMatch = p->hash[hashValue];p->hash[hashValue] = p->pos;offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),distances, 2) - (distances));MOVE_POS_RET}static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num){do{SKIP_HEADER(2)HASH2_CALC;curMatch = p->hash[hashValue];p->hash[hashValue] = p->pos;SKIP_FOOTER}while (--num != 0);}void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num){do{SKIP_HEADER(3)HASH_ZIP_CALC;curMatch = p->hash[hashValue];p->hash[hashValue] = p->pos;SKIP_FOOTER}while (--num != 0);}static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num){do{UInt32 hash2Value;SKIP_HEADER(3)HASH3_CALC;curMatch = p->hash[kFix3HashSize + hashValue];p->hash[hash2Value] =p->hash[kFix3HashSize + hashValue] = p->pos;SKIP_FOOTER}while (--num != 0);}static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num){do{UInt32 hash2Value, hash3Value;SKIP_HEADER(4)HASH4_CALC;curMatch = p->hash[kFix4HashSize + hashValue];p->hash[ hash2Value] =p->hash[kFix3HashSize + hash3Value] = p->pos;p->hash[kFix4HashSize + hashValue] = p->pos;SKIP_FOOTER}while (--num != 0);}static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num){do{UInt32 hash2Value, hash3Value;SKIP_HEADER(4)HASH4_CALC;curMatch = p->hash[kFix4HashSize + hashValue];p->hash[ hash2Value] =p->hash[kFix3HashSize + hash3Value] =p->hash[kFix4HashSize + hashValue] = p->pos;p->son[p->cyclicBufferPos] = curMatch;MOVE_POS}while (--num != 0);}void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num){do{SKIP_HEADER(3)HASH_ZIP_CALC;curMatch = p->hash[hashValue];p->hash[hashValue] = p->pos;p->son[p->cyclicBufferPos] = curMatch;MOVE_POS}while (--num != 0);}void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable){vTable->Init = (Mf_Init_Func)MatchFinder_Init;vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;if (!p->btMode){vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;}else if (p->numHashBytes == 2){vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;}else if (p->numHashBytes == 3){vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;}else{vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;}}