diff options
author | Rutger Broekhoff | 2023-12-29 21:31:53 +0100 |
---|---|---|
committer | Rutger Broekhoff | 2023-12-29 21:31:53 +0100 |
commit | 404aeae4545d2426c089a5f8d5e82dae56f5212b (patch) | |
tree | 2d84e00af272b39fc04f3795ae06bc48970e57b5 /vendor/github.com/klauspost/compress/s2/encode_better.go | |
parent | 209d8b0187ed025dec9ac149ebcced3462877bff (diff) | |
download | gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.tar.gz gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.zip |
Make Nix builds work
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encode_better.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/s2/encode_better.go | 1106 |
1 files changed, 1106 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encode_better.go b/vendor/github.com/klauspost/compress/s2/encode_better.go new file mode 100644 index 0000000..544cb1e --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/encode_better.go | |||
@@ -0,0 +1,1106 @@ | |||
1 | // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||
2 | // Copyright (c) 2019 Klaus Post. All rights reserved. | ||
3 | // Use of this source code is governed by a BSD-style | ||
4 | // license that can be found in the LICENSE file. | ||
5 | |||
6 | package s2 | ||
7 | |||
8 | import ( | ||
9 | "bytes" | ||
10 | "fmt" | ||
11 | "math/bits" | ||
12 | ) | ||
13 | |||
14 | // hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. | ||
15 | // Preferably h should be a constant and should always be <32. | ||
16 | func hash4(u uint64, h uint8) uint32 { | ||
17 | const prime4bytes = 2654435761 | ||
18 | return (uint32(u) * prime4bytes) >> ((32 - h) & 31) | ||
19 | } | ||
20 | |||
21 | // hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits. | ||
22 | // Preferably h should be a constant and should always be <64. | ||
23 | func hash5(u uint64, h uint8) uint32 { | ||
24 | const prime5bytes = 889523592379 | ||
25 | return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63)) | ||
26 | } | ||
27 | |||
28 | // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. | ||
29 | // Preferably h should be a constant and should always be <64. | ||
30 | func hash7(u uint64, h uint8) uint32 { | ||
31 | const prime7bytes = 58295818150454627 | ||
32 | return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) | ||
33 | } | ||
34 | |||
35 | // hash8 returns the hash of u to fit in a hash table with h bits. | ||
36 | // Preferably h should be a constant and should always be <64. | ||
37 | func hash8(u uint64, h uint8) uint32 { | ||
38 | const prime8bytes = 0xcf1bbcdcb7a56463 | ||
39 | return uint32((u * prime8bytes) >> ((64 - h) & 63)) | ||
40 | } | ||
41 | |||
42 | // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It | ||
43 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
44 | // been written. | ||
45 | // | ||
46 | // It also assumes that: | ||
47 | // | ||
48 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
49 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
50 | func encodeBlockBetterGo(dst, src []byte) (d int) { | ||
51 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
52 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
53 | // looking for copies. | ||
54 | sLimit := len(src) - inputMargin | ||
55 | if len(src) < minNonLiteralBlockSize { | ||
56 | return 0 | ||
57 | } | ||
58 | |||
59 | // Initialize the hash tables. | ||
60 | const ( | ||
61 | // Long hash matches. | ||
62 | lTableBits = 17 | ||
63 | maxLTableSize = 1 << lTableBits | ||
64 | |||
65 | // Short hash matches. | ||
66 | sTableBits = 14 | ||
67 | maxSTableSize = 1 << sTableBits | ||
68 | ) | ||
69 | |||
70 | var lTable [maxLTableSize]uint32 | ||
71 | var sTable [maxSTableSize]uint32 | ||
72 | |||
73 | // Bail if we can't compress to at least this. | ||
74 | dstLimit := len(src) - len(src)>>5 - 6 | ||
75 | |||
76 | // nextEmit is where in src the next emitLiteral should start from. | ||
77 | nextEmit := 0 | ||
78 | |||
79 | // The encoded form must start with a literal, as there are no previous | ||
80 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
81 | s := 1 | ||
82 | cv := load64(src, s) | ||
83 | |||
84 | // We initialize repeat to 0, so we never match on first attempt | ||
85 | repeat := 0 | ||
86 | |||
87 | for { | ||
88 | candidateL := 0 | ||
89 | nextS := 0 | ||
90 | for { | ||
91 | // Next src position to check | ||
92 | nextS = s + (s-nextEmit)>>7 + 1 | ||
93 | if nextS > sLimit { | ||
94 | goto emitRemainder | ||
95 | } | ||
96 | hashL := hash7(cv, lTableBits) | ||
97 | hashS := hash4(cv, sTableBits) | ||
98 | candidateL = int(lTable[hashL]) | ||
99 | candidateS := int(sTable[hashS]) | ||
100 | lTable[hashL] = uint32(s) | ||
101 | sTable[hashS] = uint32(s) | ||
102 | |||
103 | valLong := load64(src, candidateL) | ||
104 | valShort := load64(src, candidateS) | ||
105 | |||
106 | // If long matches at least 8 bytes, use that. | ||
107 | if cv == valLong { | ||
108 | break | ||
109 | } | ||
110 | if cv == valShort { | ||
111 | candidateL = candidateS | ||
112 | break | ||
113 | } | ||
114 | |||
115 | // Check repeat at offset checkRep. | ||
116 | const checkRep = 1 | ||
117 | // Minimum length of a repeat. Tested with various values. | ||
118 | // While 4-5 offers improvements in some, 6 reduces | ||
119 | // regressions significantly. | ||
120 | const wantRepeatBytes = 6 | ||
121 | const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep) | ||
122 | if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask { | ||
123 | base := s + checkRep | ||
124 | // Extend back | ||
125 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
126 | i-- | ||
127 | base-- | ||
128 | } | ||
129 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
130 | |||
131 | // Extend forward | ||
132 | candidate := s - repeat + wantRepeatBytes + checkRep | ||
133 | s += wantRepeatBytes + checkRep | ||
134 | for s < len(src) { | ||
135 | if len(src)-s < 8 { | ||
136 | if src[s] == src[candidate] { | ||
137 | s++ | ||
138 | candidate++ | ||
139 | continue | ||
140 | } | ||
141 | break | ||
142 | } | ||
143 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
144 | s += bits.TrailingZeros64(diff) >> 3 | ||
145 | break | ||
146 | } | ||
147 | s += 8 | ||
148 | candidate += 8 | ||
149 | } | ||
150 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
151 | d += emitRepeat(dst[d:], repeat, s-base) | ||
152 | nextEmit = s | ||
153 | if s >= sLimit { | ||
154 | goto emitRemainder | ||
155 | } | ||
156 | // Index in-between | ||
157 | index0 := base + 1 | ||
158 | index1 := s - 2 | ||
159 | |||
160 | for index0 < index1 { | ||
161 | cv0 := load64(src, index0) | ||
162 | cv1 := load64(src, index1) | ||
163 | lTable[hash7(cv0, lTableBits)] = uint32(index0) | ||
164 | sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) | ||
165 | |||
166 | lTable[hash7(cv1, lTableBits)] = uint32(index1) | ||
167 | sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) | ||
168 | index0 += 2 | ||
169 | index1 -= 2 | ||
170 | } | ||
171 | |||
172 | cv = load64(src, s) | ||
173 | continue | ||
174 | } | ||
175 | |||
176 | // Long likely matches 7, so take that. | ||
177 | if uint32(cv) == uint32(valLong) { | ||
178 | break | ||
179 | } | ||
180 | |||
181 | // Check our short candidate | ||
182 | if uint32(cv) == uint32(valShort) { | ||
183 | // Try a long candidate at s+1 | ||
184 | hashL = hash7(cv>>8, lTableBits) | ||
185 | candidateL = int(lTable[hashL]) | ||
186 | lTable[hashL] = uint32(s + 1) | ||
187 | if uint32(cv>>8) == load32(src, candidateL) { | ||
188 | s++ | ||
189 | break | ||
190 | } | ||
191 | // Use our short candidate. | ||
192 | candidateL = candidateS | ||
193 | break | ||
194 | } | ||
195 | |||
196 | cv = load64(src, nextS) | ||
197 | s = nextS | ||
198 | } | ||
199 | |||
200 | // Extend backwards | ||
201 | for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { | ||
202 | candidateL-- | ||
203 | s-- | ||
204 | } | ||
205 | |||
206 | // Bail if we exceed the maximum size. | ||
207 | if d+(s-nextEmit) > dstLimit { | ||
208 | return 0 | ||
209 | } | ||
210 | |||
211 | base := s | ||
212 | offset := base - candidateL | ||
213 | |||
214 | // Extend the 4-byte match as long as possible. | ||
215 | s += 4 | ||
216 | candidateL += 4 | ||
217 | for s < len(src) { | ||
218 | if len(src)-s < 8 { | ||
219 | if src[s] == src[candidateL] { | ||
220 | s++ | ||
221 | candidateL++ | ||
222 | continue | ||
223 | } | ||
224 | break | ||
225 | } | ||
226 | if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { | ||
227 | s += bits.TrailingZeros64(diff) >> 3 | ||
228 | break | ||
229 | } | ||
230 | s += 8 | ||
231 | candidateL += 8 | ||
232 | } | ||
233 | |||
234 | if offset > 65535 && s-base <= 5 && repeat != offset { | ||
235 | // Bail if the match is equal or worse to the encoding. | ||
236 | s = nextS + 1 | ||
237 | if s >= sLimit { | ||
238 | goto emitRemainder | ||
239 | } | ||
240 | cv = load64(src, s) | ||
241 | continue | ||
242 | } | ||
243 | |||
244 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
245 | if repeat == offset { | ||
246 | d += emitRepeat(dst[d:], offset, s-base) | ||
247 | } else { | ||
248 | d += emitCopy(dst[d:], offset, s-base) | ||
249 | repeat = offset | ||
250 | } | ||
251 | |||
252 | nextEmit = s | ||
253 | if s >= sLimit { | ||
254 | goto emitRemainder | ||
255 | } | ||
256 | |||
257 | if d > dstLimit { | ||
258 | // Do we have space for more, if not bail. | ||
259 | return 0 | ||
260 | } | ||
261 | |||
262 | // Index short & long | ||
263 | index0 := base + 1 | ||
264 | index1 := s - 2 | ||
265 | |||
266 | cv0 := load64(src, index0) | ||
267 | cv1 := load64(src, index1) | ||
268 | lTable[hash7(cv0, lTableBits)] = uint32(index0) | ||
269 | sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) | ||
270 | |||
271 | // lTable could be postponed, but very minor difference. | ||
272 | lTable[hash7(cv1, lTableBits)] = uint32(index1) | ||
273 | sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) | ||
274 | index0 += 1 | ||
275 | index1 -= 1 | ||
276 | cv = load64(src, s) | ||
277 | |||
278 | // Index large values sparsely in between. | ||
279 | // We do two starting from different offsets for speed. | ||
280 | index2 := (index0 + index1 + 1) >> 1 | ||
281 | for index2 < index1 { | ||
282 | lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) | ||
283 | lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2) | ||
284 | index0 += 2 | ||
285 | index2 += 2 | ||
286 | } | ||
287 | } | ||
288 | |||
289 | emitRemainder: | ||
290 | if nextEmit < len(src) { | ||
291 | // Bail if we exceed the maximum size. | ||
292 | if d+len(src)-nextEmit > dstLimit { | ||
293 | return 0 | ||
294 | } | ||
295 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
296 | } | ||
297 | return d | ||
298 | } | ||
299 | |||
300 | // encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It | ||
301 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
302 | // been written. | ||
303 | // | ||
304 | // It also assumes that: | ||
305 | // | ||
306 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
307 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
308 | func encodeBlockBetterSnappyGo(dst, src []byte) (d int) { | ||
309 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
310 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
311 | // looking for copies. | ||
312 | sLimit := len(src) - inputMargin | ||
313 | if len(src) < minNonLiteralBlockSize { | ||
314 | return 0 | ||
315 | } | ||
316 | |||
317 | // Initialize the hash tables. | ||
318 | const ( | ||
319 | // Long hash matches. | ||
320 | lTableBits = 16 | ||
321 | maxLTableSize = 1 << lTableBits | ||
322 | |||
323 | // Short hash matches. | ||
324 | sTableBits = 14 | ||
325 | maxSTableSize = 1 << sTableBits | ||
326 | ) | ||
327 | |||
328 | var lTable [maxLTableSize]uint32 | ||
329 | var sTable [maxSTableSize]uint32 | ||
330 | |||
331 | // Bail if we can't compress to at least this. | ||
332 | dstLimit := len(src) - len(src)>>5 - 6 | ||
333 | |||
334 | // nextEmit is where in src the next emitLiteral should start from. | ||
335 | nextEmit := 0 | ||
336 | |||
337 | // The encoded form must start with a literal, as there are no previous | ||
338 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
339 | s := 1 | ||
340 | cv := load64(src, s) | ||
341 | |||
342 | // We initialize repeat to 0, so we never match on first attempt | ||
343 | repeat := 0 | ||
344 | const maxSkip = 100 | ||
345 | |||
346 | for { | ||
347 | candidateL := 0 | ||
348 | nextS := 0 | ||
349 | for { | ||
350 | // Next src position to check | ||
351 | nextS = (s-nextEmit)>>7 + 1 | ||
352 | if nextS > maxSkip { | ||
353 | nextS = s + maxSkip | ||
354 | } else { | ||
355 | nextS += s | ||
356 | } | ||
357 | |||
358 | if nextS > sLimit { | ||
359 | goto emitRemainder | ||
360 | } | ||
361 | hashL := hash7(cv, lTableBits) | ||
362 | hashS := hash4(cv, sTableBits) | ||
363 | candidateL = int(lTable[hashL]) | ||
364 | candidateS := int(sTable[hashS]) | ||
365 | lTable[hashL] = uint32(s) | ||
366 | sTable[hashS] = uint32(s) | ||
367 | |||
368 | if uint32(cv) == load32(src, candidateL) { | ||
369 | break | ||
370 | } | ||
371 | |||
372 | // Check our short candidate | ||
373 | if uint32(cv) == load32(src, candidateS) { | ||
374 | // Try a long candidate at s+1 | ||
375 | hashL = hash7(cv>>8, lTableBits) | ||
376 | candidateL = int(lTable[hashL]) | ||
377 | lTable[hashL] = uint32(s + 1) | ||
378 | if uint32(cv>>8) == load32(src, candidateL) { | ||
379 | s++ | ||
380 | break | ||
381 | } | ||
382 | // Use our short candidate. | ||
383 | candidateL = candidateS | ||
384 | break | ||
385 | } | ||
386 | |||
387 | cv = load64(src, nextS) | ||
388 | s = nextS | ||
389 | } | ||
390 | |||
391 | // Extend backwards | ||
392 | for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { | ||
393 | candidateL-- | ||
394 | s-- | ||
395 | } | ||
396 | |||
397 | // Bail if we exceed the maximum size. | ||
398 | if d+(s-nextEmit) > dstLimit { | ||
399 | return 0 | ||
400 | } | ||
401 | |||
402 | base := s | ||
403 | offset := base - candidateL | ||
404 | |||
405 | // Extend the 4-byte match as long as possible. | ||
406 | s += 4 | ||
407 | candidateL += 4 | ||
408 | for s < len(src) { | ||
409 | if len(src)-s < 8 { | ||
410 | if src[s] == src[candidateL] { | ||
411 | s++ | ||
412 | candidateL++ | ||
413 | continue | ||
414 | } | ||
415 | break | ||
416 | } | ||
417 | if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { | ||
418 | s += bits.TrailingZeros64(diff) >> 3 | ||
419 | break | ||
420 | } | ||
421 | s += 8 | ||
422 | candidateL += 8 | ||
423 | } | ||
424 | |||
425 | if offset > 65535 && s-base <= 5 && repeat != offset { | ||
426 | // Bail if the match is equal or worse to the encoding. | ||
427 | s = nextS + 1 | ||
428 | if s >= sLimit { | ||
429 | goto emitRemainder | ||
430 | } | ||
431 | cv = load64(src, s) | ||
432 | continue | ||
433 | } | ||
434 | |||
435 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
436 | d += emitCopyNoRepeat(dst[d:], offset, s-base) | ||
437 | repeat = offset | ||
438 | |||
439 | nextEmit = s | ||
440 | if s >= sLimit { | ||
441 | goto emitRemainder | ||
442 | } | ||
443 | |||
444 | if d > dstLimit { | ||
445 | // Do we have space for more, if not bail. | ||
446 | return 0 | ||
447 | } | ||
448 | |||
449 | // Index short & long | ||
450 | index0 := base + 1 | ||
451 | index1 := s - 2 | ||
452 | |||
453 | cv0 := load64(src, index0) | ||
454 | cv1 := load64(src, index1) | ||
455 | lTable[hash7(cv0, lTableBits)] = uint32(index0) | ||
456 | sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) | ||
457 | |||
458 | lTable[hash7(cv1, lTableBits)] = uint32(index1) | ||
459 | sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) | ||
460 | index0 += 1 | ||
461 | index1 -= 1 | ||
462 | cv = load64(src, s) | ||
463 | |||
464 | // Index large values sparsely in between. | ||
465 | // We do two starting from different offsets for speed. | ||
466 | index2 := (index0 + index1 + 1) >> 1 | ||
467 | for index2 < index1 { | ||
468 | lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) | ||
469 | lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2) | ||
470 | index0 += 2 | ||
471 | index2 += 2 | ||
472 | } | ||
473 | } | ||
474 | |||
475 | emitRemainder: | ||
476 | if nextEmit < len(src) { | ||
477 | // Bail if we exceed the maximum size. | ||
478 | if d+len(src)-nextEmit > dstLimit { | ||
479 | return 0 | ||
480 | } | ||
481 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
482 | } | ||
483 | return d | ||
484 | } | ||
485 | |||
486 | // encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It | ||
487 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
488 | // been written. | ||
489 | // | ||
490 | // It also assumes that: | ||
491 | // | ||
492 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
493 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
494 | func encodeBlockBetterDict(dst, src []byte, dict *Dict) (d int) { | ||
495 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
496 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
497 | // looking for copies. | ||
498 | // Initialize the hash tables. | ||
499 | const ( | ||
500 | // Long hash matches. | ||
501 | lTableBits = 17 | ||
502 | maxLTableSize = 1 << lTableBits | ||
503 | |||
504 | // Short hash matches. | ||
505 | sTableBits = 14 | ||
506 | maxSTableSize = 1 << sTableBits | ||
507 | |||
508 | maxAhead = 8 // maximum bytes ahead without checking sLimit | ||
509 | |||
510 | debug = false | ||
511 | ) | ||
512 | |||
513 | sLimit := len(src) - inputMargin | ||
514 | if sLimit > MaxDictSrcOffset-maxAhead { | ||
515 | sLimit = MaxDictSrcOffset - maxAhead | ||
516 | } | ||
517 | if len(src) < minNonLiteralBlockSize { | ||
518 | return 0 | ||
519 | } | ||
520 | |||
521 | dict.initBetter() | ||
522 | |||
523 | var lTable [maxLTableSize]uint32 | ||
524 | var sTable [maxSTableSize]uint32 | ||
525 | |||
526 | // Bail if we can't compress to at least this. | ||
527 | dstLimit := len(src) - len(src)>>5 - 6 | ||
528 | |||
529 | // nextEmit is where in src the next emitLiteral should start from. | ||
530 | nextEmit := 0 | ||
531 | |||
532 | // The encoded form must start with a literal, as there are no previous | ||
533 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
534 | s := 0 | ||
535 | cv := load64(src, s) | ||
536 | |||
537 | // We initialize repeat to 0, so we never match on first attempt | ||
538 | repeat := len(dict.dict) - dict.repeat | ||
539 | |||
540 | // While in dict | ||
541 | searchDict: | ||
542 | for { | ||
543 | candidateL := 0 | ||
544 | nextS := 0 | ||
545 | for { | ||
546 | // Next src position to check | ||
547 | nextS = s + (s-nextEmit)>>7 + 1 | ||
548 | if nextS > sLimit { | ||
549 | break searchDict | ||
550 | } | ||
551 | hashL := hash7(cv, lTableBits) | ||
552 | hashS := hash4(cv, sTableBits) | ||
553 | candidateL = int(lTable[hashL]) | ||
554 | candidateS := int(sTable[hashS]) | ||
555 | dictL := int(dict.betterTableLong[hashL]) | ||
556 | dictS := int(dict.betterTableShort[hashS]) | ||
557 | lTable[hashL] = uint32(s) | ||
558 | sTable[hashS] = uint32(s) | ||
559 | |||
560 | valLong := load64(src, candidateL) | ||
561 | valShort := load64(src, candidateS) | ||
562 | |||
563 | // If long matches at least 8 bytes, use that. | ||
564 | if s != 0 { | ||
565 | if cv == valLong { | ||
566 | goto emitMatch | ||
567 | } | ||
568 | if cv == valShort { | ||
569 | candidateL = candidateS | ||
570 | goto emitMatch | ||
571 | } | ||
572 | } | ||
573 | |||
574 | // Check dict repeat. | ||
575 | if repeat >= s+4 { | ||
576 | candidate := len(dict.dict) - repeat + s | ||
577 | if candidate > 0 && uint32(cv) == load32(dict.dict, candidate) { | ||
578 | // Extend back | ||
579 | base := s | ||
580 | for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; { | ||
581 | i-- | ||
582 | base-- | ||
583 | } | ||
584 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
585 | if debug && nextEmit != base { | ||
586 | fmt.Println("emitted ", base-nextEmit, "literals") | ||
587 | } | ||
588 | s += 4 | ||
589 | candidate += 4 | ||
590 | for candidate < len(dict.dict)-8 && s <= len(src)-8 { | ||
591 | if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 { | ||
592 | s += bits.TrailingZeros64(diff) >> 3 | ||
593 | break | ||
594 | } | ||
595 | s += 8 | ||
596 | candidate += 8 | ||
597 | } | ||
598 | d += emitRepeat(dst[d:], repeat, s-base) | ||
599 | if debug { | ||
600 | fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s) | ||
601 | } | ||
602 | nextEmit = s | ||
603 | if s >= sLimit { | ||
604 | break searchDict | ||
605 | } | ||
606 | // Index in-between | ||
607 | index0 := base + 1 | ||
608 | index1 := s - 2 | ||
609 | |||
610 | cv = load64(src, s) | ||
611 | for index0 < index1 { | ||
612 | cv0 := load64(src, index0) | ||
613 | cv1 := load64(src, index1) | ||
614 | lTable[hash7(cv0, lTableBits)] = uint32(index0) | ||
615 | sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) | ||
616 | |||
617 | lTable[hash7(cv1, lTableBits)] = uint32(index1) | ||
618 | sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) | ||
619 | index0 += 2 | ||
620 | index1 -= 2 | ||
621 | } | ||
622 | continue | ||
623 | } | ||
624 | } | ||
625 | // Don't try to find match at s==0 | ||
626 | if s == 0 { | ||
627 | cv = load64(src, nextS) | ||
628 | s = nextS | ||
629 | continue | ||
630 | } | ||
631 | |||
632 | // Long likely matches 7, so take that. | ||
633 | if uint32(cv) == uint32(valLong) { | ||
634 | goto emitMatch | ||
635 | } | ||
636 | |||
637 | // Long dict... | ||
638 | if uint32(cv) == load32(dict.dict, dictL) { | ||
639 | candidateL = dictL | ||
640 | goto emitDict | ||
641 | } | ||
642 | |||
643 | // Check our short candidate | ||
644 | if uint32(cv) == uint32(valShort) { | ||
645 | // Try a long candidate at s+1 | ||
646 | hashL = hash7(cv>>8, lTableBits) | ||
647 | candidateL = int(lTable[hashL]) | ||
648 | lTable[hashL] = uint32(s + 1) | ||
649 | if uint32(cv>>8) == load32(src, candidateL) { | ||
650 | s++ | ||
651 | goto emitMatch | ||
652 | } | ||
653 | // Use our short candidate. | ||
654 | candidateL = candidateS | ||
655 | goto emitMatch | ||
656 | } | ||
657 | if uint32(cv) == load32(dict.dict, dictS) { | ||
658 | // Try a long candidate at s+1 | ||
659 | hashL = hash7(cv>>8, lTableBits) | ||
660 | candidateL = int(lTable[hashL]) | ||
661 | lTable[hashL] = uint32(s + 1) | ||
662 | if uint32(cv>>8) == load32(src, candidateL) { | ||
663 | s++ | ||
664 | goto emitMatch | ||
665 | } | ||
666 | candidateL = dictS | ||
667 | goto emitDict | ||
668 | } | ||
669 | cv = load64(src, nextS) | ||
670 | s = nextS | ||
671 | } | ||
672 | emitDict: | ||
673 | { | ||
674 | if debug { | ||
675 | if load32(dict.dict, candidateL) != load32(src, s) { | ||
676 | panic("dict emit mismatch") | ||
677 | } | ||
678 | } | ||
679 | // Extend backwards. | ||
680 | // The top bytes will be rechecked to get the full match. | ||
681 | for candidateL > 0 && s > nextEmit && dict.dict[candidateL-1] == src[s-1] { | ||
682 | candidateL-- | ||
683 | s-- | ||
684 | } | ||
685 | |||
686 | // Bail if we exceed the maximum size. | ||
687 | if d+(s-nextEmit) > dstLimit { | ||
688 | return 0 | ||
689 | } | ||
690 | |||
691 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
692 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
693 | // them as literal bytes. | ||
694 | |||
695 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
696 | if debug && nextEmit != s { | ||
697 | fmt.Println("emitted ", s-nextEmit, "literals") | ||
698 | } | ||
699 | { | ||
700 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
701 | // literal bytes prior to s. | ||
702 | base := s | ||
703 | offset := s + (len(dict.dict)) - candidateL | ||
704 | |||
705 | // Extend the 4-byte match as long as possible. | ||
706 | s += 4 | ||
707 | candidateL += 4 | ||
708 | for s <= len(src)-8 && len(dict.dict)-candidateL >= 8 { | ||
709 | if diff := load64(src, s) ^ load64(dict.dict, candidateL); diff != 0 { | ||
710 | s += bits.TrailingZeros64(diff) >> 3 | ||
711 | break | ||
712 | } | ||
713 | s += 8 | ||
714 | candidateL += 8 | ||
715 | } | ||
716 | |||
717 | if repeat == offset { | ||
718 | if debug { | ||
719 | fmt.Println("emitted dict repeat, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL) | ||
720 | } | ||
721 | d += emitRepeat(dst[d:], offset, s-base) | ||
722 | } else { | ||
723 | if debug { | ||
724 | fmt.Println("emitted dict copy, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL) | ||
725 | } | ||
726 | // Matches longer than 64 are split. | ||
727 | if s <= sLimit || s-base < 8 { | ||
728 | d += emitCopy(dst[d:], offset, s-base) | ||
729 | } else { | ||
730 | // Split to ensure we don't start a copy within next block. | ||
731 | d += emitCopy(dst[d:], offset, 4) | ||
732 | d += emitRepeat(dst[d:], offset, s-base-4) | ||
733 | } | ||
734 | repeat = offset | ||
735 | } | ||
736 | if false { | ||
737 | // Validate match. | ||
738 | if s <= candidateL { | ||
739 | panic("s <= candidate") | ||
740 | } | ||
741 | a := src[base:s] | ||
742 | b := dict.dict[base-repeat : base-repeat+(s-base)] | ||
743 | if !bytes.Equal(a, b) { | ||
744 | panic("mismatch") | ||
745 | } | ||
746 | } | ||
747 | |||
748 | nextEmit = s | ||
749 | if s >= sLimit { | ||
750 | break searchDict | ||
751 | } | ||
752 | |||
753 | if d > dstLimit { | ||
754 | // Do we have space for more, if not bail. | ||
755 | return 0 | ||
756 | } | ||
757 | |||
758 | // Index short & long | ||
759 | index0 := base + 1 | ||
760 | index1 := s - 2 | ||
761 | |||
762 | cv0 := load64(src, index0) | ||
763 | cv1 := load64(src, index1) | ||
764 | lTable[hash7(cv0, lTableBits)] = uint32(index0) | ||
765 | sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) | ||
766 | |||
767 | lTable[hash7(cv1, lTableBits)] = uint32(index1) | ||
768 | sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) | ||
769 | index0 += 1 | ||
770 | index1 -= 1 | ||
771 | cv = load64(src, s) | ||
772 | |||
773 | // index every second long in between. | ||
774 | for index0 < index1 { | ||
775 | lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) | ||
776 | lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1) | ||
777 | index0 += 2 | ||
778 | index1 -= 2 | ||
779 | } | ||
780 | } | ||
781 | continue | ||
782 | } | ||
783 | emitMatch: | ||
784 | |||
785 | // Extend backwards | ||
786 | for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { | ||
787 | candidateL-- | ||
788 | s-- | ||
789 | } | ||
790 | |||
791 | // Bail if we exceed the maximum size. | ||
792 | if d+(s-nextEmit) > dstLimit { | ||
793 | return 0 | ||
794 | } | ||
795 | |||
796 | base := s | ||
797 | offset := base - candidateL | ||
798 | |||
799 | // Extend the 4-byte match as long as possible. | ||
800 | s += 4 | ||
801 | candidateL += 4 | ||
802 | for s < len(src) { | ||
803 | if len(src)-s < 8 { | ||
804 | if src[s] == src[candidateL] { | ||
805 | s++ | ||
806 | candidateL++ | ||
807 | continue | ||
808 | } | ||
809 | break | ||
810 | } | ||
811 | if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { | ||
812 | s += bits.TrailingZeros64(diff) >> 3 | ||
813 | break | ||
814 | } | ||
815 | s += 8 | ||
816 | candidateL += 8 | ||
817 | } | ||
818 | |||
819 | if offset > 65535 && s-base <= 5 && repeat != offset { | ||
820 | // Bail if the match is equal or worse to the encoding. | ||
821 | s = nextS + 1 | ||
822 | if s >= sLimit { | ||
823 | goto emitRemainder | ||
824 | } | ||
825 | cv = load64(src, s) | ||
826 | continue | ||
827 | } | ||
828 | |||
829 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
830 | if debug && nextEmit != s { | ||
831 | fmt.Println("emitted ", s-nextEmit, "literals") | ||
832 | } | ||
833 | if repeat == offset { | ||
834 | if debug { | ||
835 | fmt.Println("emitted match repeat, length", s-base, "offset:", offset, "s:", s) | ||
836 | } | ||
837 | d += emitRepeat(dst[d:], offset, s-base) | ||
838 | } else { | ||
839 | if debug { | ||
840 | fmt.Println("emitted match copy, length", s-base, "offset:", offset, "s:", s) | ||
841 | } | ||
842 | d += emitCopy(dst[d:], offset, s-base) | ||
843 | repeat = offset | ||
844 | } | ||
845 | |||
846 | nextEmit = s | ||
847 | if s >= sLimit { | ||
848 | goto emitRemainder | ||
849 | } | ||
850 | |||
851 | if d > dstLimit { | ||
852 | // Do we have space for more, if not bail. | ||
853 | return 0 | ||
854 | } | ||
855 | |||
856 | // Index short & long | ||
857 | index0 := base + 1 | ||
858 | index1 := s - 2 | ||
859 | |||
860 | cv0 := load64(src, index0) | ||
861 | cv1 := load64(src, index1) | ||
862 | lTable[hash7(cv0, lTableBits)] = uint32(index0) | ||
863 | sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) | ||
864 | |||
865 | lTable[hash7(cv1, lTableBits)] = uint32(index1) | ||
866 | sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) | ||
867 | index0 += 1 | ||
868 | index1 -= 1 | ||
869 | cv = load64(src, s) | ||
870 | |||
871 | // Index large values sparsely in between. | ||
872 | // We do two starting from different offsets for speed. | ||
873 | index2 := (index0 + index1 + 1) >> 1 | ||
874 | for index2 < index1 { | ||
875 | lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) | ||
876 | lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2) | ||
877 | index0 += 2 | ||
878 | index2 += 2 | ||
879 | } | ||
880 | } | ||
881 | |||
882 | // Search without dict: | ||
883 | if repeat > s { | ||
884 | repeat = 0 | ||
885 | } | ||
886 | |||
887 | // No more dict | ||
888 | sLimit = len(src) - inputMargin | ||
889 | if s >= sLimit { | ||
890 | goto emitRemainder | ||
891 | } | ||
892 | cv = load64(src, s) | ||
893 | if debug { | ||
894 | fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s) | ||
895 | } | ||
896 | for { | ||
897 | candidateL := 0 | ||
898 | nextS := 0 | ||
899 | for { | ||
900 | // Next src position to check | ||
901 | nextS = s + (s-nextEmit)>>7 + 1 | ||
902 | if nextS > sLimit { | ||
903 | goto emitRemainder | ||
904 | } | ||
905 | hashL := hash7(cv, lTableBits) | ||
906 | hashS := hash4(cv, sTableBits) | ||
907 | candidateL = int(lTable[hashL]) | ||
908 | candidateS := int(sTable[hashS]) | ||
909 | lTable[hashL] = uint32(s) | ||
910 | sTable[hashS] = uint32(s) | ||
911 | |||
912 | valLong := load64(src, candidateL) | ||
913 | valShort := load64(src, candidateS) | ||
914 | |||
915 | // If long matches at least 8 bytes, use that. | ||
916 | if cv == valLong { | ||
917 | break | ||
918 | } | ||
919 | if cv == valShort { | ||
920 | candidateL = candidateS | ||
921 | break | ||
922 | } | ||
923 | |||
924 | // Check repeat at offset checkRep. | ||
925 | const checkRep = 1 | ||
926 | // Minimum length of a repeat. Tested with various values. | ||
927 | // While 4-5 offers improvements in some, 6 reduces | ||
928 | // regressions significantly. | ||
929 | const wantRepeatBytes = 6 | ||
930 | const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep) | ||
931 | if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask { | ||
932 | base := s + checkRep | ||
933 | // Extend back | ||
934 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
935 | i-- | ||
936 | base-- | ||
937 | } | ||
938 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
939 | |||
940 | // Extend forward | ||
941 | candidate := s - repeat + wantRepeatBytes + checkRep | ||
942 | s += wantRepeatBytes + checkRep | ||
943 | for s < len(src) { | ||
944 | if len(src)-s < 8 { | ||
945 | if src[s] == src[candidate] { | ||
946 | s++ | ||
947 | candidate++ | ||
948 | continue | ||
949 | } | ||
950 | break | ||
951 | } | ||
952 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
953 | s += bits.TrailingZeros64(diff) >> 3 | ||
954 | break | ||
955 | } | ||
956 | s += 8 | ||
957 | candidate += 8 | ||
958 | } | ||
959 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
960 | d += emitRepeat(dst[d:], repeat, s-base) | ||
961 | nextEmit = s | ||
962 | if s >= sLimit { | ||
963 | goto emitRemainder | ||
964 | } | ||
965 | // Index in-between | ||
966 | index0 := base + 1 | ||
967 | index1 := s - 2 | ||
968 | |||
969 | for index0 < index1 { | ||
970 | cv0 := load64(src, index0) | ||
971 | cv1 := load64(src, index1) | ||
972 | lTable[hash7(cv0, lTableBits)] = uint32(index0) | ||
973 | sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) | ||
974 | |||
975 | lTable[hash7(cv1, lTableBits)] = uint32(index1) | ||
976 | sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) | ||
977 | index0 += 2 | ||
978 | index1 -= 2 | ||
979 | } | ||
980 | |||
981 | cv = load64(src, s) | ||
982 | continue | ||
983 | } | ||
984 | |||
985 | // Long likely matches 7, so take that. | ||
986 | if uint32(cv) == uint32(valLong) { | ||
987 | break | ||
988 | } | ||
989 | |||
990 | // Check our short candidate | ||
991 | if uint32(cv) == uint32(valShort) { | ||
992 | // Try a long candidate at s+1 | ||
993 | hashL = hash7(cv>>8, lTableBits) | ||
994 | candidateL = int(lTable[hashL]) | ||
995 | lTable[hashL] = uint32(s + 1) | ||
996 | if uint32(cv>>8) == load32(src, candidateL) { | ||
997 | s++ | ||
998 | break | ||
999 | } | ||
1000 | // Use our short candidate. | ||
1001 | candidateL = candidateS | ||
1002 | break | ||
1003 | } | ||
1004 | |||
1005 | cv = load64(src, nextS) | ||
1006 | s = nextS | ||
1007 | } | ||
1008 | |||
1009 | // Extend backwards | ||
1010 | for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { | ||
1011 | candidateL-- | ||
1012 | s-- | ||
1013 | } | ||
1014 | |||
1015 | // Bail if we exceed the maximum size. | ||
1016 | if d+(s-nextEmit) > dstLimit { | ||
1017 | return 0 | ||
1018 | } | ||
1019 | |||
1020 | base := s | ||
1021 | offset := base - candidateL | ||
1022 | |||
1023 | // Extend the 4-byte match as long as possible. | ||
1024 | s += 4 | ||
1025 | candidateL += 4 | ||
1026 | for s < len(src) { | ||
1027 | if len(src)-s < 8 { | ||
1028 | if src[s] == src[candidateL] { | ||
1029 | s++ | ||
1030 | candidateL++ | ||
1031 | continue | ||
1032 | } | ||
1033 | break | ||
1034 | } | ||
1035 | if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { | ||
1036 | s += bits.TrailingZeros64(diff) >> 3 | ||
1037 | break | ||
1038 | } | ||
1039 | s += 8 | ||
1040 | candidateL += 8 | ||
1041 | } | ||
1042 | |||
1043 | if offset > 65535 && s-base <= 5 && repeat != offset { | ||
1044 | // Bail if the match is equal or worse to the encoding. | ||
1045 | s = nextS + 1 | ||
1046 | if s >= sLimit { | ||
1047 | goto emitRemainder | ||
1048 | } | ||
1049 | cv = load64(src, s) | ||
1050 | continue | ||
1051 | } | ||
1052 | |||
1053 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
1054 | if repeat == offset { | ||
1055 | d += emitRepeat(dst[d:], offset, s-base) | ||
1056 | } else { | ||
1057 | d += emitCopy(dst[d:], offset, s-base) | ||
1058 | repeat = offset | ||
1059 | } | ||
1060 | |||
1061 | nextEmit = s | ||
1062 | if s >= sLimit { | ||
1063 | goto emitRemainder | ||
1064 | } | ||
1065 | |||
1066 | if d > dstLimit { | ||
1067 | // Do we have space for more, if not bail. | ||
1068 | return 0 | ||
1069 | } | ||
1070 | |||
1071 | // Index short & long | ||
1072 | index0 := base + 1 | ||
1073 | index1 := s - 2 | ||
1074 | |||
1075 | cv0 := load64(src, index0) | ||
1076 | cv1 := load64(src, index1) | ||
1077 | lTable[hash7(cv0, lTableBits)] = uint32(index0) | ||
1078 | sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1) | ||
1079 | |||
1080 | lTable[hash7(cv1, lTableBits)] = uint32(index1) | ||
1081 | sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1) | ||
1082 | index0 += 1 | ||
1083 | index1 -= 1 | ||
1084 | cv = load64(src, s) | ||
1085 | |||
1086 | // Index large values sparsely in between. | ||
1087 | // We do two starting from different offsets for speed. | ||
1088 | index2 := (index0 + index1 + 1) >> 1 | ||
1089 | for index2 < index1 { | ||
1090 | lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0) | ||
1091 | lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2) | ||
1092 | index0 += 2 | ||
1093 | index2 += 2 | ||
1094 | } | ||
1095 | } | ||
1096 | |||
1097 | emitRemainder: | ||
1098 | if nextEmit < len(src) { | ||
1099 | // Bail if we exceed the maximum size. | ||
1100 | if d+len(src)-nextEmit > dstLimit { | ||
1101 | return 0 | ||
1102 | } | ||
1103 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
1104 | } | ||
1105 | return d | ||
1106 | } | ||