xref: /aosp_15_r20/external/zstd/lib/compress/zstd_double_fast.c (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui /*
2*01826a49SYabin Cui  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui  * All rights reserved.
4*01826a49SYabin Cui  *
5*01826a49SYabin Cui  * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui  * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui  * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui  */
10*01826a49SYabin Cui 
11*01826a49SYabin Cui #include "zstd_compress_internal.h"
12*01826a49SYabin Cui #include "zstd_double_fast.h"
13*01826a49SYabin Cui 
14*01826a49SYabin Cui #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
15*01826a49SYabin Cui 
16*01826a49SYabin Cui static
17*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t * ms,void const * end,ZSTD_dictTableLoadMethod_e dtlm)18*01826a49SYabin Cui void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
19*01826a49SYabin Cui                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
20*01826a49SYabin Cui {
21*01826a49SYabin Cui     const ZSTD_compressionParameters* const cParams = &ms->cParams;
22*01826a49SYabin Cui     U32* const hashLarge = ms->hashTable;
23*01826a49SYabin Cui     U32  const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
24*01826a49SYabin Cui     U32  const mls = cParams->minMatch;
25*01826a49SYabin Cui     U32* const hashSmall = ms->chainTable;
26*01826a49SYabin Cui     U32  const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
27*01826a49SYabin Cui     const BYTE* const base = ms->window.base;
28*01826a49SYabin Cui     const BYTE* ip = base + ms->nextToUpdate;
29*01826a49SYabin Cui     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
30*01826a49SYabin Cui     const U32 fastHashFillStep = 3;
31*01826a49SYabin Cui 
32*01826a49SYabin Cui     /* Always insert every fastHashFillStep position into the hash tables.
33*01826a49SYabin Cui      * Insert the other positions into the large hash table if their entry
34*01826a49SYabin Cui      * is empty.
35*01826a49SYabin Cui      */
36*01826a49SYabin Cui     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
37*01826a49SYabin Cui         U32 const curr = (U32)(ip - base);
38*01826a49SYabin Cui         U32 i;
39*01826a49SYabin Cui         for (i = 0; i < fastHashFillStep; ++i) {
40*01826a49SYabin Cui             size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
41*01826a49SYabin Cui             size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
42*01826a49SYabin Cui             if (i == 0) {
43*01826a49SYabin Cui                 ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
44*01826a49SYabin Cui             }
45*01826a49SYabin Cui             if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
46*01826a49SYabin Cui                 ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
47*01826a49SYabin Cui             }
48*01826a49SYabin Cui             /* Only load extra positions for ZSTD_dtlm_full */
49*01826a49SYabin Cui             if (dtlm == ZSTD_dtlm_fast)
50*01826a49SYabin Cui                 break;
51*01826a49SYabin Cui     }   }
52*01826a49SYabin Cui }
53*01826a49SYabin Cui 
54*01826a49SYabin Cui static
55*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t * ms,void const * end,ZSTD_dictTableLoadMethod_e dtlm)56*01826a49SYabin Cui void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
57*01826a49SYabin Cui                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
58*01826a49SYabin Cui {
59*01826a49SYabin Cui     const ZSTD_compressionParameters* const cParams = &ms->cParams;
60*01826a49SYabin Cui     U32* const hashLarge = ms->hashTable;
61*01826a49SYabin Cui     U32  const hBitsL = cParams->hashLog;
62*01826a49SYabin Cui     U32  const mls = cParams->minMatch;
63*01826a49SYabin Cui     U32* const hashSmall = ms->chainTable;
64*01826a49SYabin Cui     U32  const hBitsS = cParams->chainLog;
65*01826a49SYabin Cui     const BYTE* const base = ms->window.base;
66*01826a49SYabin Cui     const BYTE* ip = base + ms->nextToUpdate;
67*01826a49SYabin Cui     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
68*01826a49SYabin Cui     const U32 fastHashFillStep = 3;
69*01826a49SYabin Cui 
70*01826a49SYabin Cui     /* Always insert every fastHashFillStep position into the hash tables.
71*01826a49SYabin Cui      * Insert the other positions into the large hash table if their entry
72*01826a49SYabin Cui      * is empty.
73*01826a49SYabin Cui      */
74*01826a49SYabin Cui     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
75*01826a49SYabin Cui         U32 const curr = (U32)(ip - base);
76*01826a49SYabin Cui         U32 i;
77*01826a49SYabin Cui         for (i = 0; i < fastHashFillStep; ++i) {
78*01826a49SYabin Cui             size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
79*01826a49SYabin Cui             size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
80*01826a49SYabin Cui             if (i == 0)
81*01826a49SYabin Cui                 hashSmall[smHash] = curr + i;
82*01826a49SYabin Cui             if (i == 0 || hashLarge[lgHash] == 0)
83*01826a49SYabin Cui                 hashLarge[lgHash] = curr + i;
84*01826a49SYabin Cui             /* Only load extra positions for ZSTD_dtlm_full */
85*01826a49SYabin Cui             if (dtlm == ZSTD_dtlm_fast)
86*01826a49SYabin Cui                 break;
87*01826a49SYabin Cui         }   }
88*01826a49SYabin Cui }
89*01826a49SYabin Cui 
ZSTD_fillDoubleHashTable(ZSTD_matchState_t * ms,const void * const end,ZSTD_dictTableLoadMethod_e dtlm,ZSTD_tableFillPurpose_e tfp)90*01826a49SYabin Cui void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
91*01826a49SYabin Cui                         const void* const end,
92*01826a49SYabin Cui                         ZSTD_dictTableLoadMethod_e dtlm,
93*01826a49SYabin Cui                         ZSTD_tableFillPurpose_e tfp)
94*01826a49SYabin Cui {
95*01826a49SYabin Cui     if (tfp == ZSTD_tfp_forCDict) {
96*01826a49SYabin Cui         ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
97*01826a49SYabin Cui     } else {
98*01826a49SYabin Cui         ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
99*01826a49SYabin Cui     }
100*01826a49SYabin Cui }
101*01826a49SYabin Cui 
102*01826a49SYabin Cui 
103*01826a49SYabin Cui FORCE_INLINE_TEMPLATE
104*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_compressBlock_doubleFast_noDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)105*01826a49SYabin Cui size_t ZSTD_compressBlock_doubleFast_noDict_generic(
106*01826a49SYabin Cui         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
107*01826a49SYabin Cui         void const* src, size_t srcSize, U32 const mls /* template */)
108*01826a49SYabin Cui {
109*01826a49SYabin Cui     ZSTD_compressionParameters const* cParams = &ms->cParams;
110*01826a49SYabin Cui     U32* const hashLong = ms->hashTable;
111*01826a49SYabin Cui     const U32 hBitsL = cParams->hashLog;
112*01826a49SYabin Cui     U32* const hashSmall = ms->chainTable;
113*01826a49SYabin Cui     const U32 hBitsS = cParams->chainLog;
114*01826a49SYabin Cui     const BYTE* const base = ms->window.base;
115*01826a49SYabin Cui     const BYTE* const istart = (const BYTE*)src;
116*01826a49SYabin Cui     const BYTE* anchor = istart;
117*01826a49SYabin Cui     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
118*01826a49SYabin Cui     /* presumes that, if there is a dictionary, it must be using Attach mode */
119*01826a49SYabin Cui     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
120*01826a49SYabin Cui     const BYTE* const prefixLowest = base + prefixLowestIndex;
121*01826a49SYabin Cui     const BYTE* const iend = istart + srcSize;
122*01826a49SYabin Cui     const BYTE* const ilimit = iend - HASH_READ_SIZE;
123*01826a49SYabin Cui     U32 offset_1=rep[0], offset_2=rep[1];
124*01826a49SYabin Cui     U32 offsetSaved1 = 0, offsetSaved2 = 0;
125*01826a49SYabin Cui 
126*01826a49SYabin Cui     size_t mLength;
127*01826a49SYabin Cui     U32 offset;
128*01826a49SYabin Cui     U32 curr;
129*01826a49SYabin Cui 
130*01826a49SYabin Cui     /* how many positions to search before increasing step size */
131*01826a49SYabin Cui     const size_t kStepIncr = 1 << kSearchStrength;
132*01826a49SYabin Cui     /* the position at which to increment the step size if no match is found */
133*01826a49SYabin Cui     const BYTE* nextStep;
134*01826a49SYabin Cui     size_t step; /* the current step size */
135*01826a49SYabin Cui 
136*01826a49SYabin Cui     size_t hl0; /* the long hash at ip */
137*01826a49SYabin Cui     size_t hl1; /* the long hash at ip1 */
138*01826a49SYabin Cui 
139*01826a49SYabin Cui     U32 idxl0; /* the long match index for ip */
140*01826a49SYabin Cui     U32 idxl1; /* the long match index for ip1 */
141*01826a49SYabin Cui 
142*01826a49SYabin Cui     const BYTE* matchl0; /* the long match for ip */
143*01826a49SYabin Cui     const BYTE* matchs0; /* the short match for ip */
144*01826a49SYabin Cui     const BYTE* matchl1; /* the long match for ip1 */
145*01826a49SYabin Cui 
146*01826a49SYabin Cui     const BYTE* ip = istart; /* the current position */
147*01826a49SYabin Cui     const BYTE* ip1; /* the next position */
148*01826a49SYabin Cui 
149*01826a49SYabin Cui     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
150*01826a49SYabin Cui 
151*01826a49SYabin Cui     /* init */
152*01826a49SYabin Cui     ip += ((ip - prefixLowest) == 0);
153*01826a49SYabin Cui     {
154*01826a49SYabin Cui         U32 const current = (U32)(ip - base);
155*01826a49SYabin Cui         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
156*01826a49SYabin Cui         U32 const maxRep = current - windowLow;
157*01826a49SYabin Cui         if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
158*01826a49SYabin Cui         if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
159*01826a49SYabin Cui     }
160*01826a49SYabin Cui 
161*01826a49SYabin Cui     /* Outer Loop: one iteration per match found and stored */
162*01826a49SYabin Cui     while (1) {
163*01826a49SYabin Cui         step = 1;
164*01826a49SYabin Cui         nextStep = ip + kStepIncr;
165*01826a49SYabin Cui         ip1 = ip + step;
166*01826a49SYabin Cui 
167*01826a49SYabin Cui         if (ip1 > ilimit) {
168*01826a49SYabin Cui             goto _cleanup;
169*01826a49SYabin Cui         }
170*01826a49SYabin Cui 
171*01826a49SYabin Cui         hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
172*01826a49SYabin Cui         idxl0 = hashLong[hl0];
173*01826a49SYabin Cui         matchl0 = base + idxl0;
174*01826a49SYabin Cui 
175*01826a49SYabin Cui         /* Inner Loop: one iteration per search / position */
176*01826a49SYabin Cui         do {
177*01826a49SYabin Cui             const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
178*01826a49SYabin Cui             const U32 idxs0 = hashSmall[hs0];
179*01826a49SYabin Cui             curr = (U32)(ip-base);
180*01826a49SYabin Cui             matchs0 = base + idxs0;
181*01826a49SYabin Cui 
182*01826a49SYabin Cui             hashLong[hl0] = hashSmall[hs0] = curr;   /* update hash tables */
183*01826a49SYabin Cui 
184*01826a49SYabin Cui             /* check noDict repcode */
185*01826a49SYabin Cui             if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
186*01826a49SYabin Cui                 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
187*01826a49SYabin Cui                 ip++;
188*01826a49SYabin Cui                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
189*01826a49SYabin Cui                 goto _match_stored;
190*01826a49SYabin Cui             }
191*01826a49SYabin Cui 
192*01826a49SYabin Cui             hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
193*01826a49SYabin Cui 
194*01826a49SYabin Cui             if (idxl0 > prefixLowestIndex) {
195*01826a49SYabin Cui                 /* check prefix long match */
196*01826a49SYabin Cui                 if (MEM_read64(matchl0) == MEM_read64(ip)) {
197*01826a49SYabin Cui                     mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
198*01826a49SYabin Cui                     offset = (U32)(ip-matchl0);
199*01826a49SYabin Cui                     while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
200*01826a49SYabin Cui                     goto _match_found;
201*01826a49SYabin Cui                 }
202*01826a49SYabin Cui             }
203*01826a49SYabin Cui 
204*01826a49SYabin Cui             idxl1 = hashLong[hl1];
205*01826a49SYabin Cui             matchl1 = base + idxl1;
206*01826a49SYabin Cui 
207*01826a49SYabin Cui             if (idxs0 > prefixLowestIndex) {
208*01826a49SYabin Cui                 /* check prefix short match */
209*01826a49SYabin Cui                 if (MEM_read32(matchs0) == MEM_read32(ip)) {
210*01826a49SYabin Cui                     goto _search_next_long;
211*01826a49SYabin Cui                 }
212*01826a49SYabin Cui             }
213*01826a49SYabin Cui 
214*01826a49SYabin Cui             if (ip1 >= nextStep) {
215*01826a49SYabin Cui                 PREFETCH_L1(ip1 + 64);
216*01826a49SYabin Cui                 PREFETCH_L1(ip1 + 128);
217*01826a49SYabin Cui                 step++;
218*01826a49SYabin Cui                 nextStep += kStepIncr;
219*01826a49SYabin Cui             }
220*01826a49SYabin Cui             ip = ip1;
221*01826a49SYabin Cui             ip1 += step;
222*01826a49SYabin Cui 
223*01826a49SYabin Cui             hl0 = hl1;
224*01826a49SYabin Cui             idxl0 = idxl1;
225*01826a49SYabin Cui             matchl0 = matchl1;
226*01826a49SYabin Cui     #if defined(__aarch64__)
227*01826a49SYabin Cui             PREFETCH_L1(ip+256);
228*01826a49SYabin Cui     #endif
229*01826a49SYabin Cui         } while (ip1 <= ilimit);
230*01826a49SYabin Cui 
231*01826a49SYabin Cui _cleanup:
232*01826a49SYabin Cui         /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
233*01826a49SYabin Cui          * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
234*01826a49SYabin Cui         offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
235*01826a49SYabin Cui 
236*01826a49SYabin Cui         /* save reps for next block */
237*01826a49SYabin Cui         rep[0] = offset_1 ? offset_1 : offsetSaved1;
238*01826a49SYabin Cui         rep[1] = offset_2 ? offset_2 : offsetSaved2;
239*01826a49SYabin Cui 
240*01826a49SYabin Cui         /* Return the last literals size */
241*01826a49SYabin Cui         return (size_t)(iend - anchor);
242*01826a49SYabin Cui 
243*01826a49SYabin Cui _search_next_long:
244*01826a49SYabin Cui 
245*01826a49SYabin Cui         /* check prefix long +1 match */
246*01826a49SYabin Cui         if (idxl1 > prefixLowestIndex) {
247*01826a49SYabin Cui             if (MEM_read64(matchl1) == MEM_read64(ip1)) {
248*01826a49SYabin Cui                 ip = ip1;
249*01826a49SYabin Cui                 mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
250*01826a49SYabin Cui                 offset = (U32)(ip-matchl1);
251*01826a49SYabin Cui                 while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
252*01826a49SYabin Cui                 goto _match_found;
253*01826a49SYabin Cui             }
254*01826a49SYabin Cui         }
255*01826a49SYabin Cui 
256*01826a49SYabin Cui         /* if no long +1 match, explore the short match we found */
257*01826a49SYabin Cui         mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
258*01826a49SYabin Cui         offset = (U32)(ip - matchs0);
259*01826a49SYabin Cui         while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
260*01826a49SYabin Cui 
261*01826a49SYabin Cui         /* fall-through */
262*01826a49SYabin Cui 
263*01826a49SYabin Cui _match_found: /* requires ip, offset, mLength */
264*01826a49SYabin Cui         offset_2 = offset_1;
265*01826a49SYabin Cui         offset_1 = offset;
266*01826a49SYabin Cui 
267*01826a49SYabin Cui         if (step < 4) {
268*01826a49SYabin Cui             /* It is unsafe to write this value back to the hashtable when ip1 is
269*01826a49SYabin Cui              * greater than or equal to the new ip we will have after we're done
270*01826a49SYabin Cui              * processing this match. Rather than perform that test directly
271*01826a49SYabin Cui              * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
272*01826a49SYabin Cui              * more predictable test. The minmatch even if we take a short match is
273*01826a49SYabin Cui              * 4 bytes, so as long as step, the distance between ip and ip1
274*01826a49SYabin Cui              * (initially) is less than 4, we know ip1 < new ip. */
275*01826a49SYabin Cui             hashLong[hl1] = (U32)(ip1 - base);
276*01826a49SYabin Cui         }
277*01826a49SYabin Cui 
278*01826a49SYabin Cui         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
279*01826a49SYabin Cui 
280*01826a49SYabin Cui _match_stored:
281*01826a49SYabin Cui         /* match found */
282*01826a49SYabin Cui         ip += mLength;
283*01826a49SYabin Cui         anchor = ip;
284*01826a49SYabin Cui 
285*01826a49SYabin Cui         if (ip <= ilimit) {
286*01826a49SYabin Cui             /* Complementary insertion */
287*01826a49SYabin Cui             /* done after iLimit test, as candidates could be > iend-8 */
288*01826a49SYabin Cui             {   U32 const indexToInsert = curr+2;
289*01826a49SYabin Cui                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
290*01826a49SYabin Cui                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
291*01826a49SYabin Cui                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
292*01826a49SYabin Cui                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
293*01826a49SYabin Cui             }
294*01826a49SYabin Cui 
295*01826a49SYabin Cui             /* check immediate repcode */
296*01826a49SYabin Cui             while ( (ip <= ilimit)
297*01826a49SYabin Cui                  && ( (offset_2>0)
298*01826a49SYabin Cui                     & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
299*01826a49SYabin Cui                 /* store sequence */
300*01826a49SYabin Cui                 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
301*01826a49SYabin Cui                 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
302*01826a49SYabin Cui                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
303*01826a49SYabin Cui                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
304*01826a49SYabin Cui                 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
305*01826a49SYabin Cui                 ip += rLength;
306*01826a49SYabin Cui                 anchor = ip;
307*01826a49SYabin Cui                 continue;   /* faster when present ... (?) */
308*01826a49SYabin Cui             }
309*01826a49SYabin Cui         }
310*01826a49SYabin Cui     }
311*01826a49SYabin Cui }
312*01826a49SYabin Cui 
313*01826a49SYabin Cui 
314*01826a49SYabin Cui FORCE_INLINE_TEMPLATE
315*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_compressBlock_doubleFast_dictMatchState_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)316*01826a49SYabin Cui size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
317*01826a49SYabin Cui         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
318*01826a49SYabin Cui         void const* src, size_t srcSize,
319*01826a49SYabin Cui         U32 const mls /* template */)
320*01826a49SYabin Cui {
321*01826a49SYabin Cui     ZSTD_compressionParameters const* cParams = &ms->cParams;
322*01826a49SYabin Cui     U32* const hashLong = ms->hashTable;
323*01826a49SYabin Cui     const U32 hBitsL = cParams->hashLog;
324*01826a49SYabin Cui     U32* const hashSmall = ms->chainTable;
325*01826a49SYabin Cui     const U32 hBitsS = cParams->chainLog;
326*01826a49SYabin Cui     const BYTE* const base = ms->window.base;
327*01826a49SYabin Cui     const BYTE* const istart = (const BYTE*)src;
328*01826a49SYabin Cui     const BYTE* ip = istart;
329*01826a49SYabin Cui     const BYTE* anchor = istart;
330*01826a49SYabin Cui     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
331*01826a49SYabin Cui     /* presumes that, if there is a dictionary, it must be using Attach mode */
332*01826a49SYabin Cui     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
333*01826a49SYabin Cui     const BYTE* const prefixLowest = base + prefixLowestIndex;
334*01826a49SYabin Cui     const BYTE* const iend = istart + srcSize;
335*01826a49SYabin Cui     const BYTE* const ilimit = iend - HASH_READ_SIZE;
336*01826a49SYabin Cui     U32 offset_1=rep[0], offset_2=rep[1];
337*01826a49SYabin Cui 
338*01826a49SYabin Cui     const ZSTD_matchState_t* const dms = ms->dictMatchState;
339*01826a49SYabin Cui     const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
340*01826a49SYabin Cui     const U32* const dictHashLong  = dms->hashTable;
341*01826a49SYabin Cui     const U32* const dictHashSmall = dms->chainTable;
342*01826a49SYabin Cui     const U32 dictStartIndex       = dms->window.dictLimit;
343*01826a49SYabin Cui     const BYTE* const dictBase     = dms->window.base;
344*01826a49SYabin Cui     const BYTE* const dictStart    = dictBase + dictStartIndex;
345*01826a49SYabin Cui     const BYTE* const dictEnd      = dms->window.nextSrc;
346*01826a49SYabin Cui     const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
347*01826a49SYabin Cui     const U32 dictHBitsL           = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
348*01826a49SYabin Cui     const U32 dictHBitsS           = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
349*01826a49SYabin Cui     const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
350*01826a49SYabin Cui 
351*01826a49SYabin Cui     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
352*01826a49SYabin Cui 
353*01826a49SYabin Cui     /* if a dictionary is attached, it must be within window range */
354*01826a49SYabin Cui     assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
355*01826a49SYabin Cui 
356*01826a49SYabin Cui     if (ms->prefetchCDictTables) {
357*01826a49SYabin Cui         size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
358*01826a49SYabin Cui         size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
359*01826a49SYabin Cui         PREFETCH_AREA(dictHashLong, hashTableBytes);
360*01826a49SYabin Cui         PREFETCH_AREA(dictHashSmall, chainTableBytes);
361*01826a49SYabin Cui     }
362*01826a49SYabin Cui 
363*01826a49SYabin Cui     /* init */
364*01826a49SYabin Cui     ip += (dictAndPrefixLength == 0);
365*01826a49SYabin Cui 
366*01826a49SYabin Cui     /* dictMatchState repCode checks don't currently handle repCode == 0
367*01826a49SYabin Cui      * disabling. */
368*01826a49SYabin Cui     assert(offset_1 <= dictAndPrefixLength);
369*01826a49SYabin Cui     assert(offset_2 <= dictAndPrefixLength);
370*01826a49SYabin Cui 
371*01826a49SYabin Cui     /* Main Search Loop */
372*01826a49SYabin Cui     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
373*01826a49SYabin Cui         size_t mLength;
374*01826a49SYabin Cui         U32 offset;
375*01826a49SYabin Cui         size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
376*01826a49SYabin Cui         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
377*01826a49SYabin Cui         size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
378*01826a49SYabin Cui         size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
379*01826a49SYabin Cui         U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
380*01826a49SYabin Cui         U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
381*01826a49SYabin Cui         int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
382*01826a49SYabin Cui         int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
383*01826a49SYabin Cui         U32 const curr = (U32)(ip-base);
384*01826a49SYabin Cui         U32 const matchIndexL = hashLong[h2];
385*01826a49SYabin Cui         U32 matchIndexS = hashSmall[h];
386*01826a49SYabin Cui         const BYTE* matchLong = base + matchIndexL;
387*01826a49SYabin Cui         const BYTE* match = base + matchIndexS;
388*01826a49SYabin Cui         const U32 repIndex = curr + 1 - offset_1;
389*01826a49SYabin Cui         const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
390*01826a49SYabin Cui                                dictBase + (repIndex - dictIndexDelta) :
391*01826a49SYabin Cui                                base + repIndex;
392*01826a49SYabin Cui         hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
393*01826a49SYabin Cui 
394*01826a49SYabin Cui         /* check repcode */
395*01826a49SYabin Cui         if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
396*01826a49SYabin Cui             && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
397*01826a49SYabin Cui             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
398*01826a49SYabin Cui             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
399*01826a49SYabin Cui             ip++;
400*01826a49SYabin Cui             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
401*01826a49SYabin Cui             goto _match_stored;
402*01826a49SYabin Cui         }
403*01826a49SYabin Cui 
404*01826a49SYabin Cui         if (matchIndexL > prefixLowestIndex) {
405*01826a49SYabin Cui             /* check prefix long match */
406*01826a49SYabin Cui             if (MEM_read64(matchLong) == MEM_read64(ip)) {
407*01826a49SYabin Cui                 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
408*01826a49SYabin Cui                 offset = (U32)(ip-matchLong);
409*01826a49SYabin Cui                 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
410*01826a49SYabin Cui                 goto _match_found;
411*01826a49SYabin Cui             }
412*01826a49SYabin Cui         } else if (dictTagsMatchL) {
413*01826a49SYabin Cui             /* check dictMatchState long match */
414*01826a49SYabin Cui             U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
415*01826a49SYabin Cui             const BYTE* dictMatchL = dictBase + dictMatchIndexL;
416*01826a49SYabin Cui             assert(dictMatchL < dictEnd);
417*01826a49SYabin Cui 
418*01826a49SYabin Cui             if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
419*01826a49SYabin Cui                 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
420*01826a49SYabin Cui                 offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
421*01826a49SYabin Cui                 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
422*01826a49SYabin Cui                 goto _match_found;
423*01826a49SYabin Cui         }   }
424*01826a49SYabin Cui 
425*01826a49SYabin Cui         if (matchIndexS > prefixLowestIndex) {
426*01826a49SYabin Cui             /* check prefix short match */
427*01826a49SYabin Cui             if (MEM_read32(match) == MEM_read32(ip)) {
428*01826a49SYabin Cui                 goto _search_next_long;
429*01826a49SYabin Cui             }
430*01826a49SYabin Cui         } else if (dictTagsMatchS) {
431*01826a49SYabin Cui             /* check dictMatchState short match */
432*01826a49SYabin Cui             U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
433*01826a49SYabin Cui             match = dictBase + dictMatchIndexS;
434*01826a49SYabin Cui             matchIndexS = dictMatchIndexS + dictIndexDelta;
435*01826a49SYabin Cui 
436*01826a49SYabin Cui             if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
437*01826a49SYabin Cui                 goto _search_next_long;
438*01826a49SYabin Cui         }   }
439*01826a49SYabin Cui 
440*01826a49SYabin Cui         ip += ((ip-anchor) >> kSearchStrength) + 1;
441*01826a49SYabin Cui #if defined(__aarch64__)
442*01826a49SYabin Cui         PREFETCH_L1(ip+256);
443*01826a49SYabin Cui #endif
444*01826a49SYabin Cui         continue;
445*01826a49SYabin Cui 
446*01826a49SYabin Cui _search_next_long:
447*01826a49SYabin Cui         {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
448*01826a49SYabin Cui             size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
449*01826a49SYabin Cui             U32 const matchIndexL3 = hashLong[hl3];
450*01826a49SYabin Cui             U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
451*01826a49SYabin Cui             int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
452*01826a49SYabin Cui             const BYTE* matchL3 = base + matchIndexL3;
453*01826a49SYabin Cui             hashLong[hl3] = curr + 1;
454*01826a49SYabin Cui 
455*01826a49SYabin Cui             /* check prefix long +1 match */
456*01826a49SYabin Cui             if (matchIndexL3 > prefixLowestIndex) {
457*01826a49SYabin Cui                 if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
458*01826a49SYabin Cui                     mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
459*01826a49SYabin Cui                     ip++;
460*01826a49SYabin Cui                     offset = (U32)(ip-matchL3);
461*01826a49SYabin Cui                     while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
462*01826a49SYabin Cui                     goto _match_found;
463*01826a49SYabin Cui                 }
464*01826a49SYabin Cui             } else if (dictTagsMatchL3) {
465*01826a49SYabin Cui                 /* check dict long +1 match */
466*01826a49SYabin Cui                 U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
467*01826a49SYabin Cui                 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
468*01826a49SYabin Cui                 assert(dictMatchL3 < dictEnd);
469*01826a49SYabin Cui                 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
470*01826a49SYabin Cui                     mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
471*01826a49SYabin Cui                     ip++;
472*01826a49SYabin Cui                     offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
473*01826a49SYabin Cui                     while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
474*01826a49SYabin Cui                     goto _match_found;
475*01826a49SYabin Cui         }   }   }
476*01826a49SYabin Cui 
477*01826a49SYabin Cui         /* if no long +1 match, explore the short match we found */
478*01826a49SYabin Cui         if (matchIndexS < prefixLowestIndex) {
479*01826a49SYabin Cui             mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
480*01826a49SYabin Cui             offset = (U32)(curr - matchIndexS);
481*01826a49SYabin Cui             while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
482*01826a49SYabin Cui         } else {
483*01826a49SYabin Cui             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
484*01826a49SYabin Cui             offset = (U32)(ip - match);
485*01826a49SYabin Cui             while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
486*01826a49SYabin Cui         }
487*01826a49SYabin Cui 
488*01826a49SYabin Cui _match_found:
489*01826a49SYabin Cui         offset_2 = offset_1;
490*01826a49SYabin Cui         offset_1 = offset;
491*01826a49SYabin Cui 
492*01826a49SYabin Cui         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
493*01826a49SYabin Cui 
494*01826a49SYabin Cui _match_stored:
495*01826a49SYabin Cui         /* match found */
496*01826a49SYabin Cui         ip += mLength;
497*01826a49SYabin Cui         anchor = ip;
498*01826a49SYabin Cui 
499*01826a49SYabin Cui         if (ip <= ilimit) {
500*01826a49SYabin Cui             /* Complementary insertion */
501*01826a49SYabin Cui             /* done after iLimit test, as candidates could be > iend-8 */
502*01826a49SYabin Cui             {   U32 const indexToInsert = curr+2;
503*01826a49SYabin Cui                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
504*01826a49SYabin Cui                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
505*01826a49SYabin Cui                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
506*01826a49SYabin Cui                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
507*01826a49SYabin Cui             }
508*01826a49SYabin Cui 
509*01826a49SYabin Cui             /* check immediate repcode */
510*01826a49SYabin Cui             while (ip <= ilimit) {
511*01826a49SYabin Cui                 U32 const current2 = (U32)(ip-base);
512*01826a49SYabin Cui                 U32 const repIndex2 = current2 - offset_2;
513*01826a49SYabin Cui                 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
514*01826a49SYabin Cui                         dictBase + repIndex2 - dictIndexDelta :
515*01826a49SYabin Cui                         base + repIndex2;
516*01826a49SYabin Cui                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
517*01826a49SYabin Cui                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
518*01826a49SYabin Cui                     const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
519*01826a49SYabin Cui                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
520*01826a49SYabin Cui                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
521*01826a49SYabin Cui                     ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
522*01826a49SYabin Cui                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
523*01826a49SYabin Cui                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
524*01826a49SYabin Cui                     ip += repLength2;
525*01826a49SYabin Cui                     anchor = ip;
526*01826a49SYabin Cui                     continue;
527*01826a49SYabin Cui                 }
528*01826a49SYabin Cui                 break;
529*01826a49SYabin Cui             }
530*01826a49SYabin Cui         }
531*01826a49SYabin Cui     }   /* while (ip < ilimit) */
532*01826a49SYabin Cui 
533*01826a49SYabin Cui     /* save reps for next block */
534*01826a49SYabin Cui     rep[0] = offset_1;
535*01826a49SYabin Cui     rep[1] = offset_2;
536*01826a49SYabin Cui 
537*01826a49SYabin Cui     /* Return the last literals size */
538*01826a49SYabin Cui     return (size_t)(iend - anchor);
539*01826a49SYabin Cui }
540*01826a49SYabin Cui 
541*01826a49SYabin Cui #define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \
542*01826a49SYabin Cui     static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \
543*01826a49SYabin Cui             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
544*01826a49SYabin Cui             void const* src, size_t srcSize)                                                             \
545*01826a49SYabin Cui     {                                                                                                    \
546*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
547*01826a49SYabin Cui     }
548*01826a49SYabin Cui 
549*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(noDict, 4)
550*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(noDict, 5)
551*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(noDict, 6)
552*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(noDict, 7)
553*01826a49SYabin Cui 
554*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(dictMatchState, 4)
555*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(dictMatchState, 5)
556*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(dictMatchState, 6)
557*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(dictMatchState, 7)
558*01826a49SYabin Cui 
559*01826a49SYabin Cui 
ZSTD_compressBlock_doubleFast(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)560*01826a49SYabin Cui size_t ZSTD_compressBlock_doubleFast(
561*01826a49SYabin Cui         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
562*01826a49SYabin Cui         void const* src, size_t srcSize)
563*01826a49SYabin Cui {
564*01826a49SYabin Cui     const U32 mls = ms->cParams.minMatch;
565*01826a49SYabin Cui     switch(mls)
566*01826a49SYabin Cui     {
567*01826a49SYabin Cui     default: /* includes case 3 */
568*01826a49SYabin Cui     case 4 :
569*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
570*01826a49SYabin Cui     case 5 :
571*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
572*01826a49SYabin Cui     case 6 :
573*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
574*01826a49SYabin Cui     case 7 :
575*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
576*01826a49SYabin Cui     }
577*01826a49SYabin Cui }
578*01826a49SYabin Cui 
579*01826a49SYabin Cui 
ZSTD_compressBlock_doubleFast_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)580*01826a49SYabin Cui size_t ZSTD_compressBlock_doubleFast_dictMatchState(
581*01826a49SYabin Cui         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
582*01826a49SYabin Cui         void const* src, size_t srcSize)
583*01826a49SYabin Cui {
584*01826a49SYabin Cui     const U32 mls = ms->cParams.minMatch;
585*01826a49SYabin Cui     switch(mls)
586*01826a49SYabin Cui     {
587*01826a49SYabin Cui     default: /* includes case 3 */
588*01826a49SYabin Cui     case 4 :
589*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
590*01826a49SYabin Cui     case 5 :
591*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
592*01826a49SYabin Cui     case 6 :
593*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
594*01826a49SYabin Cui     case 7 :
595*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
596*01826a49SYabin Cui     }
597*01826a49SYabin Cui }
598*01826a49SYabin Cui 
599*01826a49SYabin Cui 
600*01826a49SYabin Cui static
601*01826a49SYabin Cui ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)602*01826a49SYabin Cui size_t ZSTD_compressBlock_doubleFast_extDict_generic(
603*01826a49SYabin Cui         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
604*01826a49SYabin Cui         void const* src, size_t srcSize,
605*01826a49SYabin Cui         U32 const mls /* template */)
606*01826a49SYabin Cui {
607*01826a49SYabin Cui     ZSTD_compressionParameters const* cParams = &ms->cParams;
608*01826a49SYabin Cui     U32* const hashLong = ms->hashTable;
609*01826a49SYabin Cui     U32  const hBitsL = cParams->hashLog;
610*01826a49SYabin Cui     U32* const hashSmall = ms->chainTable;
611*01826a49SYabin Cui     U32  const hBitsS = cParams->chainLog;
612*01826a49SYabin Cui     const BYTE* const istart = (const BYTE*)src;
613*01826a49SYabin Cui     const BYTE* ip = istart;
614*01826a49SYabin Cui     const BYTE* anchor = istart;
615*01826a49SYabin Cui     const BYTE* const iend = istart + srcSize;
616*01826a49SYabin Cui     const BYTE* const ilimit = iend - 8;
617*01826a49SYabin Cui     const BYTE* const base = ms->window.base;
618*01826a49SYabin Cui     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
619*01826a49SYabin Cui     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
620*01826a49SYabin Cui     const U32   dictStartIndex = lowLimit;
621*01826a49SYabin Cui     const U32   dictLimit = ms->window.dictLimit;
622*01826a49SYabin Cui     const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
623*01826a49SYabin Cui     const BYTE* const prefixStart = base + prefixStartIndex;
624*01826a49SYabin Cui     const BYTE* const dictBase = ms->window.dictBase;
625*01826a49SYabin Cui     const BYTE* const dictStart = dictBase + dictStartIndex;
626*01826a49SYabin Cui     const BYTE* const dictEnd = dictBase + prefixStartIndex;
627*01826a49SYabin Cui     U32 offset_1=rep[0], offset_2=rep[1];
628*01826a49SYabin Cui 
629*01826a49SYabin Cui     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
630*01826a49SYabin Cui 
631*01826a49SYabin Cui     /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
632*01826a49SYabin Cui     if (prefixStartIndex == dictStartIndex)
633*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
634*01826a49SYabin Cui 
635*01826a49SYabin Cui     /* Search Loop */
636*01826a49SYabin Cui     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
637*01826a49SYabin Cui         const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
638*01826a49SYabin Cui         const U32 matchIndex = hashSmall[hSmall];
639*01826a49SYabin Cui         const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
640*01826a49SYabin Cui         const BYTE* match = matchBase + matchIndex;
641*01826a49SYabin Cui 
642*01826a49SYabin Cui         const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
643*01826a49SYabin Cui         const U32 matchLongIndex = hashLong[hLong];
644*01826a49SYabin Cui         const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
645*01826a49SYabin Cui         const BYTE* matchLong = matchLongBase + matchLongIndex;
646*01826a49SYabin Cui 
647*01826a49SYabin Cui         const U32 curr = (U32)(ip-base);
648*01826a49SYabin Cui         const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
649*01826a49SYabin Cui         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
650*01826a49SYabin Cui         const BYTE* const repMatch = repBase + repIndex;
651*01826a49SYabin Cui         size_t mLength;
652*01826a49SYabin Cui         hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
653*01826a49SYabin Cui 
654*01826a49SYabin Cui         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
655*01826a49SYabin Cui             & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
656*01826a49SYabin Cui           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
657*01826a49SYabin Cui             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
658*01826a49SYabin Cui             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
659*01826a49SYabin Cui             ip++;
660*01826a49SYabin Cui             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
661*01826a49SYabin Cui         } else {
662*01826a49SYabin Cui             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
663*01826a49SYabin Cui                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
664*01826a49SYabin Cui                 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
665*01826a49SYabin Cui                 U32 offset;
666*01826a49SYabin Cui                 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
667*01826a49SYabin Cui                 offset = curr - matchLongIndex;
668*01826a49SYabin Cui                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
669*01826a49SYabin Cui                 offset_2 = offset_1;
670*01826a49SYabin Cui                 offset_1 = offset;
671*01826a49SYabin Cui                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
672*01826a49SYabin Cui 
673*01826a49SYabin Cui             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
674*01826a49SYabin Cui                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
675*01826a49SYabin Cui                 U32 const matchIndex3 = hashLong[h3];
676*01826a49SYabin Cui                 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
677*01826a49SYabin Cui                 const BYTE* match3 = match3Base + matchIndex3;
678*01826a49SYabin Cui                 U32 offset;
679*01826a49SYabin Cui                 hashLong[h3] = curr + 1;
680*01826a49SYabin Cui                 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
681*01826a49SYabin Cui                     const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
682*01826a49SYabin Cui                     const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
683*01826a49SYabin Cui                     mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
684*01826a49SYabin Cui                     ip++;
685*01826a49SYabin Cui                     offset = curr+1 - matchIndex3;
686*01826a49SYabin Cui                     while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
687*01826a49SYabin Cui                 } else {
688*01826a49SYabin Cui                     const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
689*01826a49SYabin Cui                     const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
690*01826a49SYabin Cui                     mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
691*01826a49SYabin Cui                     offset = curr - matchIndex;
692*01826a49SYabin Cui                     while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
693*01826a49SYabin Cui                 }
694*01826a49SYabin Cui                 offset_2 = offset_1;
695*01826a49SYabin Cui                 offset_1 = offset;
696*01826a49SYabin Cui                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
697*01826a49SYabin Cui 
698*01826a49SYabin Cui             } else {
699*01826a49SYabin Cui                 ip += ((ip-anchor) >> kSearchStrength) + 1;
700*01826a49SYabin Cui                 continue;
701*01826a49SYabin Cui         }   }
702*01826a49SYabin Cui 
703*01826a49SYabin Cui         /* move to next sequence start */
704*01826a49SYabin Cui         ip += mLength;
705*01826a49SYabin Cui         anchor = ip;
706*01826a49SYabin Cui 
707*01826a49SYabin Cui         if (ip <= ilimit) {
708*01826a49SYabin Cui             /* Complementary insertion */
709*01826a49SYabin Cui             /* done after iLimit test, as candidates could be > iend-8 */
710*01826a49SYabin Cui             {   U32 const indexToInsert = curr+2;
711*01826a49SYabin Cui                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
712*01826a49SYabin Cui                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
713*01826a49SYabin Cui                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
714*01826a49SYabin Cui                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
715*01826a49SYabin Cui             }
716*01826a49SYabin Cui 
717*01826a49SYabin Cui             /* check immediate repcode */
718*01826a49SYabin Cui             while (ip <= ilimit) {
719*01826a49SYabin Cui                 U32 const current2 = (U32)(ip-base);
720*01826a49SYabin Cui                 U32 const repIndex2 = current2 - offset_2;
721*01826a49SYabin Cui                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
722*01826a49SYabin Cui                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
723*01826a49SYabin Cui                     & (offset_2 <= current2 - dictStartIndex))
724*01826a49SYabin Cui                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
725*01826a49SYabin Cui                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
726*01826a49SYabin Cui                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
727*01826a49SYabin Cui                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
728*01826a49SYabin Cui                     ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
729*01826a49SYabin Cui                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
730*01826a49SYabin Cui                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
731*01826a49SYabin Cui                     ip += repLength2;
732*01826a49SYabin Cui                     anchor = ip;
733*01826a49SYabin Cui                     continue;
734*01826a49SYabin Cui                 }
735*01826a49SYabin Cui                 break;
736*01826a49SYabin Cui     }   }   }
737*01826a49SYabin Cui 
738*01826a49SYabin Cui     /* save reps for next block */
739*01826a49SYabin Cui     rep[0] = offset_1;
740*01826a49SYabin Cui     rep[1] = offset_2;
741*01826a49SYabin Cui 
742*01826a49SYabin Cui     /* Return the last literals size */
743*01826a49SYabin Cui     return (size_t)(iend - anchor);
744*01826a49SYabin Cui }
745*01826a49SYabin Cui 
746*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(extDict, 4)
747*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(extDict, 5)
748*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(extDict, 6)
749*01826a49SYabin Cui ZSTD_GEN_DFAST_FN(extDict, 7)
750*01826a49SYabin Cui 
ZSTD_compressBlock_doubleFast_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)751*01826a49SYabin Cui size_t ZSTD_compressBlock_doubleFast_extDict(
752*01826a49SYabin Cui         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
753*01826a49SYabin Cui         void const* src, size_t srcSize)
754*01826a49SYabin Cui {
755*01826a49SYabin Cui     U32 const mls = ms->cParams.minMatch;
756*01826a49SYabin Cui     switch(mls)
757*01826a49SYabin Cui     {
758*01826a49SYabin Cui     default: /* includes case 3 */
759*01826a49SYabin Cui     case 4 :
760*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
761*01826a49SYabin Cui     case 5 :
762*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
763*01826a49SYabin Cui     case 6 :
764*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
765*01826a49SYabin Cui     case 7 :
766*01826a49SYabin Cui         return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
767*01826a49SYabin Cui     }
768*01826a49SYabin Cui }
769*01826a49SYabin Cui 
770*01826a49SYabin Cui #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
771