1
2
3#include "../../../../Common/Defs.h"
4#include "../../../../Common/CRC.h"
5#include "../../../../Common/Alloc.h"
6
7#include "BinTree.h"
8
9
10
11
12
13namespace BT_NAMESPACE {
14
15#ifdef HASH_ARRAY_2
16 static const UInt32 kHash2Size = 1 << 10;
17 #define kNumHashDirectBytes 0
18 #ifdef HASH_ARRAY_3
19 static const UInt32 kNumHashBytes = 4;
20 static const UInt32 kHash3Size = 1 << 16;
21 #else
22 static const UInt32 kNumHashBytes = 3;
23 #endif
24 static const UInt32 kHashSize = 0;
25 static const UInt32 kMinMatchCheck = kNumHashBytes;
26 static const UInt32 kStartMaxLen = 1;
27#else
28 #ifdef HASH_ZIP
29 #define kNumHashDirectBytes 0
30 static const UInt32 kNumHashBytes = 3;
31 static const UInt32 kHashSize = 1 << 16;
32 static const UInt32 kMinMatchCheck = kNumHashBytes;
33 static const UInt32 kStartMaxLen = 1;
34 #else
35 #define kNumHashDirectBytes 2
36 static const UInt32 kNumHashBytes = 2;
37 static const UInt32 kHashSize = 1 << (8 * kNumHashBytes);
38 static const UInt32 kMinMatchCheck = kNumHashBytes + 1;
39 static const UInt32 kStartMaxLen = 1;
40 #endif
41#endif
42
43#ifdef HASH_ARRAY_2
44#ifdef HASH_ARRAY_3
45static const UInt32 kHash3Offset = kHash2Size;
46#endif
47#endif
48
49static const UInt32 kFixHashSize = 0
50 #ifdef HASH_ARRAY_2
51 + kHash2Size
52 #ifdef HASH_ARRAY_3
53 + kHash3Size
54 #endif
55 #endif
56 ;
57
58CMatchFinder::CMatchFinder():
59 _hash(0)
60{
61}
62
63void CMatchFinder::FreeThisClassMemory()
64{
65 BigFree(_hash);
66 _hash = 0;
67}
68
69void CMatchFinder::FreeMemory()
70{
71 FreeThisClassMemory();
72 CLZInWindow::Free();
73}
74
75CMatchFinder::~CMatchFinder()
76{
77 FreeMemory();
78}
79
80STDMETHODIMP CMatchFinder::Create(UInt32 historySize, UInt32 keepAddBufferBefore,
81 UInt32 matchMaxLen, UInt32 keepAddBufferAfter)
82{
83 if (historySize > kMaxValForNormalize - 256)
84 {
85 FreeMemory();
86 return E_INVALIDARG;
87 }
88 _cutValue =
89 #ifdef _HASH_CHAIN
90 8 + (matchMaxLen >> 2);
91 #else
92 16 + (matchMaxLen >> 1);
93 #endif
94 UInt32 sizeReserv = (historySize + keepAddBufferBefore +
95 matchMaxLen + keepAddBufferAfter) / 2 + 256;
96 if (CLZInWindow::Create(historySize + keepAddBufferBefore,
97 matchMaxLen + keepAddBufferAfter, sizeReserv))
98 {
99 _matchMaxLen = matchMaxLen;
100 UInt32 newCyclicBufferSize = historySize + 1;
101 if (_hash != 0 && newCyclicBufferSize == _cyclicBufferSize)
102 return S_OK;
103 FreeThisClassMemory();
104 _cyclicBufferSize = newCyclicBufferSize;
105
106 UInt32 hs = kHashSize;
107
108 #ifdef HASH_ARRAY_2
109 hs = historySize - 1;
110 hs |= (hs >> 1);
111 hs |= (hs >> 2);
112 hs |= (hs >> 4);
113 hs |= (hs >> 8);
114 hs >>= 1;
115 hs |= 0xFFFF;
116 if (hs > (1 << 24))
117 {
118 #ifdef HASH_ARRAY_3
119 hs >>= 1;
120 #else
121 hs = (1 << 24) - 1;
122 #endif
123 }
124 _hashMask = hs;
125 hs++;
126 #endif
127 _hashSizeSum = hs + kFixHashSize;
128 UInt32 numItems = _hashSizeSum + _cyclicBufferSize
129 #ifndef _HASH_CHAIN
130 * 2
131 #endif
132 ;
133 size_t sizeInBytes = (size_t)numItems * sizeof(CIndex);
134 if (sizeInBytes / sizeof(CIndex) != numItems)
135 return E_OUTOFMEMORY;
136 _hash = (CIndex *)BigAlloc(sizeInBytes);
137 _son = _hash + _hashSizeSum;
138 if (_hash != 0)
139 return S_OK;
140 }
141 FreeMemory();
142 return E_OUTOFMEMORY;
143}
144
145static const UInt32 kEmptyHashValue = 0;
146
147STDMETHODIMP CMatchFinder::SetStream(ISequentialInStream *stream)
148{
149 CLZInWindow::SetStream(stream);
150 return S_OK;
151}
152
153STDMETHODIMP CMatchFinder::Init()
154{
155 RINOK(CLZInWindow::Init());
156 for(UInt32 i = 0; i < _hashSizeSum; i++)
157 _hash[i] = kEmptyHashValue;
158 _cyclicBufferPos = 0;
159 ReduceOffsets(-1);
160 return S_OK;
161}
162
163STDMETHODIMP_(void) CMatchFinder::ReleaseStream()
164{
165
166}
167
168#ifdef HASH_ARRAY_2
169#ifdef HASH_ARRAY_3
170
171#define HASH_CALC { \
172 UInt32 temp = CCRC::Table[cur[0]] ^ cur[1]; \
173 hash2Value = temp & (kHash2Size - 1); \
174 hash3Value = (temp ^ (UInt32(cur[2]) << 8)) & (kHash3Size - 1); \
175 hashValue = (temp ^ (UInt32(cur[2]) << 8) ^ (CCRC::Table[cur[3]] << 5)) & _hashMask; }
176
177#else
178#define HASH_CALC { \
179 UInt32 temp = CCRC::Table[cur[0]] ^ cur[1]; \
180 hash2Value = temp & (kHash2Size - 1); \
181 hashValue = (temp ^ (UInt32(cur[2]) << 8)) & _hashMask; }
182#endif
183#else
184#ifdef HASH_ZIP
185inline UInt32 Hash(const Byte *pointer)
186{
187 return ((UInt32(pointer[0]) << 8) ^ CCRC::Table[pointer[1]] ^ pointer[2]) & (kHashSize - 1);
188}
189#else
190inline UInt32 Hash(const Byte *pointer)
191{
192 return pointer[0] ^ (UInt32(pointer[1]) << 8);
193}
194#endif
195#endif
196
197STDMETHODIMP CMatchFinder::GetMatches(UInt32 *distances)
198{
199 UInt32 lenLimit;
200 if (_pos + _matchMaxLen <= _streamPos)
201 lenLimit = _matchMaxLen;
202 else
203 {
204 lenLimit = _streamPos - _pos;
205 if(lenLimit < kMinMatchCheck)
206 {
207 distances[0] = 0;
208 return MovePos();
209 }
210 }
211
212 int offset = 1;
213
214 UInt32 matchMinPos = (_pos > _cyclicBufferSize) ? (_pos - _cyclicBufferSize) : 0;
215 const Byte *cur = _buffer + _pos;
216
217 UInt32 maxLen = kStartMaxLen;
218
219 #ifdef HASH_ARRAY_2
220 UInt32 hash2Value;
221 #ifdef HASH_ARRAY_3
222 UInt32 hash3Value;
223 #endif
224 UInt32 hashValue;
225 HASH_CALC;
226 #else
227 UInt32 hashValue = Hash(cur);
228 #endif
229
230 UInt32 curMatch = _hash[kFixHashSize + hashValue];
231 #ifdef HASH_ARRAY_2
232 UInt32 curMatch2 = _hash[hash2Value];
233 #ifdef HASH_ARRAY_3
234 UInt32 curMatch3 = _hash[kHash3Offset + hash3Value];
235 #endif
236 _hash[hash2Value] = _pos;
237 if(curMatch2 > matchMinPos)
238 if (_buffer[curMatch2] == cur[0])
239 {
240 distances[offset++] = maxLen = 2;
241 distances[offset++] = _pos - curMatch2 - 1;
242 }
243
244 #ifdef HASH_ARRAY_3
245 _hash[kHash3Offset + hash3Value] = _pos;
246 if(curMatch3 > matchMinPos)
247 if (_buffer[curMatch3] == cur[0])
248 {
249 if (curMatch3 == curMatch2)
250 offset -= 2;
251 distances[offset++] = maxLen = 3;
252 distances[offset++] = _pos - curMatch3 - 1;
253 curMatch2 = curMatch3;
254 }
255 #endif
256 if (offset != 1 && curMatch2 == curMatch)
257 {
258 offset -= 2;
259 maxLen = kStartMaxLen;
260 }
261 #endif
262
263 _hash[kFixHashSize + hashValue] = _pos;
264
265 CIndex *son = _son;
266
267 #ifdef _HASH_CHAIN
268 son[_cyclicBufferPos] = curMatch;
269 #else
270 CIndex *ptr0 = son + (_cyclicBufferPos << 1) + 1;
271 CIndex *ptr1 = son + (_cyclicBufferPos << 1);
272
273 UInt32 len0, len1;
274 len0 = len1 = kNumHashDirectBytes;
275 #endif
276
277 #if kNumHashDirectBytes != 0
278 if(curMatch > matchMinPos)
279 {
280 if (_buffer[curMatch + kNumHashDirectBytes] != cur[kNumHashDirectBytes])
281 {
282 distances[offset++] = maxLen = kNumHashDirectBytes;
283 distances[offset++] = _pos - curMatch - 1;
284 }
285 }
286 #endif
287 UInt32 count = _cutValue;
288 while(true)
289 {
290 if(curMatch <= matchMinPos || count-- == 0)
291 {
292 #ifndef _HASH_CHAIN
293 *ptr0 = *ptr1 = kEmptyHashValue;
294 #endif
295 break;
296 }
297 UInt32 delta = _pos - curMatch;
298 UInt32 cyclicPos = (delta <= _cyclicBufferPos) ?
299 (_cyclicBufferPos - delta):
300 (_cyclicBufferPos - delta + _cyclicBufferSize);
301 CIndex *pair = son +
302 #ifdef _HASH_CHAIN
303 cyclicPos;
304 #else
305 (cyclicPos << 1);
306 #endif
307
308
309
310 const Byte *pb = _buffer + curMatch;
311 UInt32 len =
312 #ifdef _HASH_CHAIN
313 kNumHashDirectBytes;
314 if (pb[maxLen] == cur[maxLen])
315 #else
316 MyMin(len0, len1);
317 #endif
318 if (pb[len] == cur[len])
319 {
320 while(++len != lenLimit)
321 if (pb[len] != cur[len])
322 break;
323 if (maxLen < len)
324 {
325 distances[offset++] = maxLen = len;
326 distances[offset++] = delta - 1;
327 if (len == lenLimit)
328 {
329 #ifndef _HASH_CHAIN
330 *ptr1 = pair[0];
331 *ptr0 = pair[1];
332 #endif
333 break;
334 }
335 }
336 }
337 #ifdef _HASH_CHAIN
338 curMatch = *pair;
339 #else
340 if (pb[len] < cur[len])
341 {
342 *ptr1 = curMatch;
343 ptr1 = pair + 1;
344 curMatch = *ptr1;
345 len1 = len;
346 }
347 else
348 {
349 *ptr0 = curMatch;
350 ptr0 = pair;
351 curMatch = *ptr0;
352 len0 = len;
353 }
354 #endif
355 }
356 distances[0] = offset - 1;
357 if (++_cyclicBufferPos == _cyclicBufferSize)
358 _cyclicBufferPos = 0;
359 RINOK(CLZInWindow::MovePos());
360 if (_pos == kMaxValForNormalize)
361 Normalize();
362 return S_OK;
363}
364
365STDMETHODIMP CMatchFinder::Skip(UInt32 num)
366{
367 do
368 {
369 #ifdef _HASH_CHAIN
370 if (_streamPos - _pos < kNumHashBytes)
371 {
372 RINOK(MovePos());
373 continue;
374 }
375 #else
376 UInt32 lenLimit;
377 if (_pos + _matchMaxLen <= _streamPos)
378 lenLimit = _matchMaxLen;
379 else
380 {
381 lenLimit = _streamPos - _pos;
382 if(lenLimit < kMinMatchCheck)
383 {
384 RINOK(MovePos());
385 continue;
386 }
387 }
388 UInt32 matchMinPos = (_pos > _cyclicBufferSize) ? (_pos - _cyclicBufferSize) : 0;
389 #endif
390 const Byte *cur = _buffer + _pos;
391
392 #ifdef HASH_ARRAY_2
393 UInt32 hash2Value;
394 #ifdef HASH_ARRAY_3
395 UInt32 hash3Value;
396 UInt32 hashValue;
397 HASH_CALC;
398 _hash[kHash3Offset + hash3Value] = _pos;
399 #else
400 UInt32 hashValue;
401 HASH_CALC;
402 #endif
403 _hash[hash2Value] = _pos;
404 #else
405 UInt32 hashValue = Hash(cur);
406 #endif
407
408 UInt32 curMatch = _hash[kFixHashSize + hashValue];
409 _hash[kFixHashSize + hashValue] = _pos;
410
411 #ifdef _HASH_CHAIN
412 _son[_cyclicBufferPos] = curMatch;
413 #else
414 CIndex *son = _son;
415 CIndex *ptr0 = son + (_cyclicBufferPos << 1) + 1;
416 CIndex *ptr1 = son + (_cyclicBufferPos << 1);
417
418 UInt32 len0, len1;
419 len0 = len1 = kNumHashDirectBytes;
420 UInt32 count = _cutValue;
421 while(true)
422 {
423 if(curMatch <= matchMinPos || count-- == 0)
424 {
425 *ptr0 = *ptr1 = kEmptyHashValue;
426 break;
427 }
428
429 UInt32 delta = _pos - curMatch;
430 UInt32 cyclicPos = (delta <= _cyclicBufferPos) ?
431 (_cyclicBufferPos - delta):
432 (_cyclicBufferPos - delta + _cyclicBufferSize);
433 CIndex *pair = son + (cyclicPos << 1);
434
435
436
437 const Byte *pb = _buffer + curMatch;
438 UInt32 len = MyMin(len0, len1);
439
440 if (pb[len] == cur[len])
441 {
442 while(++len != lenLimit)
443 if (pb[len] != cur[len])
444 break;
445 if (len == lenLimit)
446 {
447 *ptr1 = pair[0];
448 *ptr0 = pair[1];
449 break;
450 }
451 }
452 if (pb[len] < cur[len])
453 {
454 *ptr1 = curMatch;
455 ptr1 = pair + 1;
456 curMatch = *ptr1;
457 len1 = len;
458 }
459 else
460 {
461 *ptr0 = curMatch;
462 ptr0 = pair;
463 curMatch = *ptr0;
464 len0 = len;
465 }
466 }
467 #endif
468 if (++_cyclicBufferPos == _cyclicBufferSize)
469 _cyclicBufferPos = 0;
470 RINOK(CLZInWindow::MovePos());
471 if (_pos == kMaxValForNormalize)
472 Normalize();
473 }
474 while(--num != 0);
475 return S_OK;
476}
477
478void CMatchFinder::Normalize()
479{
480 UInt32 subValue = _pos - _cyclicBufferSize;
481 CIndex *items = _hash;
482 UInt32 numItems = (_hashSizeSum + _cyclicBufferSize
483 #ifndef _HASH_CHAIN
484 * 2
485 #endif
486 );
487 for (UInt32 i = 0; i < numItems; i++)
488 {
489 UInt32 value = items[i];
490 if (value <= subValue)
491 value = kEmptyHashValue;
492 else
493 value -= subValue;
494 items[i] = value;
495 }
496 ReduceOffsets(subValue);
497}
498
499HRESULT CMatchFinder::MovePos()
500{
501 if (++_cyclicBufferPos == _cyclicBufferSize)
502 _cyclicBufferPos = 0;
503 RINOK(CLZInWindow::MovePos());
504 if (_pos == kMaxValForNormalize)
505 Normalize();
506 return S_OK;
507}
508
509STDMETHODIMP_(Byte) CMatchFinder::GetIndexByte(Int32 index)
510 { return CLZInWindow::GetIndexByte(index); }
511
512STDMETHODIMP_(UInt32) CMatchFinder::GetMatchLen(Int32 index,
513 UInt32 back, UInt32 limit)
514 { return CLZInWindow::GetMatchLen(index, back, limit); }
515
516STDMETHODIMP_(UInt32) CMatchFinder::GetNumAvailableBytes()
517 { return CLZInWindow::GetNumAvailableBytes(); }
518
519STDMETHODIMP_(const Byte *) CMatchFinder::GetPointerToCurrentPos()
520 { return CLZInWindow::GetPointerToCurrentPos(); }
521
522STDMETHODIMP_(Int32) CMatchFinder::NeedChangeBufferPos(UInt32 numCheckBytes)
523 { return CLZInWindow::NeedMove(numCheckBytes) ? 1: 0; }
524
525STDMETHODIMP_(void) CMatchFinder::ChangeBufferPos()
526 { CLZInWindow::MoveBlock();}
527
528#undef HASH_CALC
529#undef kNumHashDirectBytes
530
531}
532