aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/compress/s2/encode_better.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encode_better.go')
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_better.go1106
1 files changed, 1106 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encode_better.go b/vendor/github.com/klauspost/compress/s2/encode_better.go
new file mode 100644
index 0000000..544cb1e
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/s2/encode_better.go
@@ -0,0 +1,1106 @@
1// Copyright 2016 The Snappy-Go Authors. All rights reserved.
2// Copyright (c) 2019 Klaus Post. All rights reserved.
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file.
5
6package s2
7
8import (
9 "bytes"
10 "fmt"
11 "math/bits"
12)
13
14// hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
15// Preferably h should be a constant and should always be <32.
16func hash4(u uint64, h uint8) uint32 {
17 const prime4bytes = 2654435761
18 return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
19}
20
21// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
22// Preferably h should be a constant and should always be <64.
23func hash5(u uint64, h uint8) uint32 {
24 const prime5bytes = 889523592379
25 return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
26}
27
28// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
29// Preferably h should be a constant and should always be <64.
30func hash7(u uint64, h uint8) uint32 {
31 const prime7bytes = 58295818150454627
32 return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
33}
34
35// hash8 returns the hash of u to fit in a hash table with h bits.
36// Preferably h should be a constant and should always be <64.
37func hash8(u uint64, h uint8) uint32 {
38 const prime8bytes = 0xcf1bbcdcb7a56463
39 return uint32((u * prime8bytes) >> ((64 - h) & 63))
40}
41
42// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
43// assumes that the varint-encoded length of the decompressed bytes has already
44// been written.
45//
46// It also assumes that:
47//
48// len(dst) >= MaxEncodedLen(len(src)) &&
49// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
50func encodeBlockBetterGo(dst, src []byte) (d int) {
51 // sLimit is when to stop looking for offset/length copies. The inputMargin
52 // lets us use a fast path for emitLiteral in the main loop, while we are
53 // looking for copies.
54 sLimit := len(src) - inputMargin
55 if len(src) < minNonLiteralBlockSize {
56 return 0
57 }
58
59 // Initialize the hash tables.
60 const (
61 // Long hash matches.
62 lTableBits = 17
63 maxLTableSize = 1 << lTableBits
64
65 // Short hash matches.
66 sTableBits = 14
67 maxSTableSize = 1 << sTableBits
68 )
69
70 var lTable [maxLTableSize]uint32
71 var sTable [maxSTableSize]uint32
72
73 // Bail if we can't compress to at least this.
74 dstLimit := len(src) - len(src)>>5 - 6
75
76 // nextEmit is where in src the next emitLiteral should start from.
77 nextEmit := 0
78
79 // The encoded form must start with a literal, as there are no previous
80 // bytes to copy, so we start looking for hash matches at s == 1.
81 s := 1
82 cv := load64(src, s)
83
84 // We initialize repeat to 0, so we never match on first attempt
85 repeat := 0
86
87 for {
88 candidateL := 0
89 nextS := 0
90 for {
91 // Next src position to check
92 nextS = s + (s-nextEmit)>>7 + 1
93 if nextS > sLimit {
94 goto emitRemainder
95 }
96 hashL := hash7(cv, lTableBits)
97 hashS := hash4(cv, sTableBits)
98 candidateL = int(lTable[hashL])
99 candidateS := int(sTable[hashS])
100 lTable[hashL] = uint32(s)
101 sTable[hashS] = uint32(s)
102
103 valLong := load64(src, candidateL)
104 valShort := load64(src, candidateS)
105
106 // If long matches at least 8 bytes, use that.
107 if cv == valLong {
108 break
109 }
110 if cv == valShort {
111 candidateL = candidateS
112 break
113 }
114
115 // Check repeat at offset checkRep.
116 const checkRep = 1
117 // Minimum length of a repeat. Tested with various values.
118 // While 4-5 offers improvements in some, 6 reduces
119 // regressions significantly.
120 const wantRepeatBytes = 6
121 const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
122 if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
123 base := s + checkRep
124 // Extend back
125 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
126 i--
127 base--
128 }
129 d += emitLiteral(dst[d:], src[nextEmit:base])
130
131 // Extend forward
132 candidate := s - repeat + wantRepeatBytes + checkRep
133 s += wantRepeatBytes + checkRep
134 for s < len(src) {
135 if len(src)-s < 8 {
136 if src[s] == src[candidate] {
137 s++
138 candidate++
139 continue
140 }
141 break
142 }
143 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
144 s += bits.TrailingZeros64(diff) >> 3
145 break
146 }
147 s += 8
148 candidate += 8
149 }
150 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
151 d += emitRepeat(dst[d:], repeat, s-base)
152 nextEmit = s
153 if s >= sLimit {
154 goto emitRemainder
155 }
156 // Index in-between
157 index0 := base + 1
158 index1 := s - 2
159
160 for index0 < index1 {
161 cv0 := load64(src, index0)
162 cv1 := load64(src, index1)
163 lTable[hash7(cv0, lTableBits)] = uint32(index0)
164 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
165
166 lTable[hash7(cv1, lTableBits)] = uint32(index1)
167 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
168 index0 += 2
169 index1 -= 2
170 }
171
172 cv = load64(src, s)
173 continue
174 }
175
176 // Long likely matches 7, so take that.
177 if uint32(cv) == uint32(valLong) {
178 break
179 }
180
181 // Check our short candidate
182 if uint32(cv) == uint32(valShort) {
183 // Try a long candidate at s+1
184 hashL = hash7(cv>>8, lTableBits)
185 candidateL = int(lTable[hashL])
186 lTable[hashL] = uint32(s + 1)
187 if uint32(cv>>8) == load32(src, candidateL) {
188 s++
189 break
190 }
191 // Use our short candidate.
192 candidateL = candidateS
193 break
194 }
195
196 cv = load64(src, nextS)
197 s = nextS
198 }
199
200 // Extend backwards
201 for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
202 candidateL--
203 s--
204 }
205
206 // Bail if we exceed the maximum size.
207 if d+(s-nextEmit) > dstLimit {
208 return 0
209 }
210
211 base := s
212 offset := base - candidateL
213
214 // Extend the 4-byte match as long as possible.
215 s += 4
216 candidateL += 4
217 for s < len(src) {
218 if len(src)-s < 8 {
219 if src[s] == src[candidateL] {
220 s++
221 candidateL++
222 continue
223 }
224 break
225 }
226 if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
227 s += bits.TrailingZeros64(diff) >> 3
228 break
229 }
230 s += 8
231 candidateL += 8
232 }
233
234 if offset > 65535 && s-base <= 5 && repeat != offset {
235 // Bail if the match is equal or worse to the encoding.
236 s = nextS + 1
237 if s >= sLimit {
238 goto emitRemainder
239 }
240 cv = load64(src, s)
241 continue
242 }
243
244 d += emitLiteral(dst[d:], src[nextEmit:base])
245 if repeat == offset {
246 d += emitRepeat(dst[d:], offset, s-base)
247 } else {
248 d += emitCopy(dst[d:], offset, s-base)
249 repeat = offset
250 }
251
252 nextEmit = s
253 if s >= sLimit {
254 goto emitRemainder
255 }
256
257 if d > dstLimit {
258 // Do we have space for more, if not bail.
259 return 0
260 }
261
262 // Index short & long
263 index0 := base + 1
264 index1 := s - 2
265
266 cv0 := load64(src, index0)
267 cv1 := load64(src, index1)
268 lTable[hash7(cv0, lTableBits)] = uint32(index0)
269 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
270
271 // lTable could be postponed, but very minor difference.
272 lTable[hash7(cv1, lTableBits)] = uint32(index1)
273 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
274 index0 += 1
275 index1 -= 1
276 cv = load64(src, s)
277
278 // Index large values sparsely in between.
279 // We do two starting from different offsets for speed.
280 index2 := (index0 + index1 + 1) >> 1
281 for index2 < index1 {
282 lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
283 lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
284 index0 += 2
285 index2 += 2
286 }
287 }
288
289emitRemainder:
290 if nextEmit < len(src) {
291 // Bail if we exceed the maximum size.
292 if d+len(src)-nextEmit > dstLimit {
293 return 0
294 }
295 d += emitLiteral(dst[d:], src[nextEmit:])
296 }
297 return d
298}
299
300// encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It
301// assumes that the varint-encoded length of the decompressed bytes has already
302// been written.
303//
304// It also assumes that:
305//
306// len(dst) >= MaxEncodedLen(len(src)) &&
307// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
308func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
309 // sLimit is when to stop looking for offset/length copies. The inputMargin
310 // lets us use a fast path for emitLiteral in the main loop, while we are
311 // looking for copies.
312 sLimit := len(src) - inputMargin
313 if len(src) < minNonLiteralBlockSize {
314 return 0
315 }
316
317 // Initialize the hash tables.
318 const (
319 // Long hash matches.
320 lTableBits = 16
321 maxLTableSize = 1 << lTableBits
322
323 // Short hash matches.
324 sTableBits = 14
325 maxSTableSize = 1 << sTableBits
326 )
327
328 var lTable [maxLTableSize]uint32
329 var sTable [maxSTableSize]uint32
330
331 // Bail if we can't compress to at least this.
332 dstLimit := len(src) - len(src)>>5 - 6
333
334 // nextEmit is where in src the next emitLiteral should start from.
335 nextEmit := 0
336
337 // The encoded form must start with a literal, as there are no previous
338 // bytes to copy, so we start looking for hash matches at s == 1.
339 s := 1
340 cv := load64(src, s)
341
342 // We initialize repeat to 0, so we never match on first attempt
343 repeat := 0
344 const maxSkip = 100
345
346 for {
347 candidateL := 0
348 nextS := 0
349 for {
350 // Next src position to check
351 nextS = (s-nextEmit)>>7 + 1
352 if nextS > maxSkip {
353 nextS = s + maxSkip
354 } else {
355 nextS += s
356 }
357
358 if nextS > sLimit {
359 goto emitRemainder
360 }
361 hashL := hash7(cv, lTableBits)
362 hashS := hash4(cv, sTableBits)
363 candidateL = int(lTable[hashL])
364 candidateS := int(sTable[hashS])
365 lTable[hashL] = uint32(s)
366 sTable[hashS] = uint32(s)
367
368 if uint32(cv) == load32(src, candidateL) {
369 break
370 }
371
372 // Check our short candidate
373 if uint32(cv) == load32(src, candidateS) {
374 // Try a long candidate at s+1
375 hashL = hash7(cv>>8, lTableBits)
376 candidateL = int(lTable[hashL])
377 lTable[hashL] = uint32(s + 1)
378 if uint32(cv>>8) == load32(src, candidateL) {
379 s++
380 break
381 }
382 // Use our short candidate.
383 candidateL = candidateS
384 break
385 }
386
387 cv = load64(src, nextS)
388 s = nextS
389 }
390
391 // Extend backwards
392 for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
393 candidateL--
394 s--
395 }
396
397 // Bail if we exceed the maximum size.
398 if d+(s-nextEmit) > dstLimit {
399 return 0
400 }
401
402 base := s
403 offset := base - candidateL
404
405 // Extend the 4-byte match as long as possible.
406 s += 4
407 candidateL += 4
408 for s < len(src) {
409 if len(src)-s < 8 {
410 if src[s] == src[candidateL] {
411 s++
412 candidateL++
413 continue
414 }
415 break
416 }
417 if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
418 s += bits.TrailingZeros64(diff) >> 3
419 break
420 }
421 s += 8
422 candidateL += 8
423 }
424
425 if offset > 65535 && s-base <= 5 && repeat != offset {
426 // Bail if the match is equal or worse to the encoding.
427 s = nextS + 1
428 if s >= sLimit {
429 goto emitRemainder
430 }
431 cv = load64(src, s)
432 continue
433 }
434
435 d += emitLiteral(dst[d:], src[nextEmit:base])
436 d += emitCopyNoRepeat(dst[d:], offset, s-base)
437 repeat = offset
438
439 nextEmit = s
440 if s >= sLimit {
441 goto emitRemainder
442 }
443
444 if d > dstLimit {
445 // Do we have space for more, if not bail.
446 return 0
447 }
448
449 // Index short & long
450 index0 := base + 1
451 index1 := s - 2
452
453 cv0 := load64(src, index0)
454 cv1 := load64(src, index1)
455 lTable[hash7(cv0, lTableBits)] = uint32(index0)
456 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
457
458 lTable[hash7(cv1, lTableBits)] = uint32(index1)
459 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
460 index0 += 1
461 index1 -= 1
462 cv = load64(src, s)
463
464 // Index large values sparsely in between.
465 // We do two starting from different offsets for speed.
466 index2 := (index0 + index1 + 1) >> 1
467 for index2 < index1 {
468 lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
469 lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
470 index0 += 2
471 index2 += 2
472 }
473 }
474
475emitRemainder:
476 if nextEmit < len(src) {
477 // Bail if we exceed the maximum size.
478 if d+len(src)-nextEmit > dstLimit {
479 return 0
480 }
481 d += emitLiteral(dst[d:], src[nextEmit:])
482 }
483 return d
484}
485
486// encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It
487// assumes that the varint-encoded length of the decompressed bytes has already
488// been written.
489//
490// It also assumes that:
491//
492// len(dst) >= MaxEncodedLen(len(src)) &&
493// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
494func encodeBlockBetterDict(dst, src []byte, dict *Dict) (d int) {
495 // sLimit is when to stop looking for offset/length copies. The inputMargin
496 // lets us use a fast path for emitLiteral in the main loop, while we are
497 // looking for copies.
498 // Initialize the hash tables.
499 const (
500 // Long hash matches.
501 lTableBits = 17
502 maxLTableSize = 1 << lTableBits
503
504 // Short hash matches.
505 sTableBits = 14
506 maxSTableSize = 1 << sTableBits
507
508 maxAhead = 8 // maximum bytes ahead without checking sLimit
509
510 debug = false
511 )
512
513 sLimit := len(src) - inputMargin
514 if sLimit > MaxDictSrcOffset-maxAhead {
515 sLimit = MaxDictSrcOffset - maxAhead
516 }
517 if len(src) < minNonLiteralBlockSize {
518 return 0
519 }
520
521 dict.initBetter()
522
523 var lTable [maxLTableSize]uint32
524 var sTable [maxSTableSize]uint32
525
526 // Bail if we can't compress to at least this.
527 dstLimit := len(src) - len(src)>>5 - 6
528
529 // nextEmit is where in src the next emitLiteral should start from.
530 nextEmit := 0
531
532 // The encoded form must start with a literal, as there are no previous
533 // bytes to copy, so we start looking for hash matches at s == 1.
534 s := 0
535 cv := load64(src, s)
536
537 // We initialize repeat to 0, so we never match on first attempt
538 repeat := len(dict.dict) - dict.repeat
539
540 // While in dict
541searchDict:
542 for {
543 candidateL := 0
544 nextS := 0
545 for {
546 // Next src position to check
547 nextS = s + (s-nextEmit)>>7 + 1
548 if nextS > sLimit {
549 break searchDict
550 }
551 hashL := hash7(cv, lTableBits)
552 hashS := hash4(cv, sTableBits)
553 candidateL = int(lTable[hashL])
554 candidateS := int(sTable[hashS])
555 dictL := int(dict.betterTableLong[hashL])
556 dictS := int(dict.betterTableShort[hashS])
557 lTable[hashL] = uint32(s)
558 sTable[hashS] = uint32(s)
559
560 valLong := load64(src, candidateL)
561 valShort := load64(src, candidateS)
562
563 // If long matches at least 8 bytes, use that.
564 if s != 0 {
565 if cv == valLong {
566 goto emitMatch
567 }
568 if cv == valShort {
569 candidateL = candidateS
570 goto emitMatch
571 }
572 }
573
574 // Check dict repeat.
575 if repeat >= s+4 {
576 candidate := len(dict.dict) - repeat + s
577 if candidate > 0 && uint32(cv) == load32(dict.dict, candidate) {
578 // Extend back
579 base := s
580 for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
581 i--
582 base--
583 }
584 d += emitLiteral(dst[d:], src[nextEmit:base])
585 if debug && nextEmit != base {
586 fmt.Println("emitted ", base-nextEmit, "literals")
587 }
588 s += 4
589 candidate += 4
590 for candidate < len(dict.dict)-8 && s <= len(src)-8 {
591 if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
592 s += bits.TrailingZeros64(diff) >> 3
593 break
594 }
595 s += 8
596 candidate += 8
597 }
598 d += emitRepeat(dst[d:], repeat, s-base)
599 if debug {
600 fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
601 }
602 nextEmit = s
603 if s >= sLimit {
604 break searchDict
605 }
606 // Index in-between
607 index0 := base + 1
608 index1 := s - 2
609
610 cv = load64(src, s)
611 for index0 < index1 {
612 cv0 := load64(src, index0)
613 cv1 := load64(src, index1)
614 lTable[hash7(cv0, lTableBits)] = uint32(index0)
615 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
616
617 lTable[hash7(cv1, lTableBits)] = uint32(index1)
618 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
619 index0 += 2
620 index1 -= 2
621 }
622 continue
623 }
624 }
625 // Don't try to find match at s==0
626 if s == 0 {
627 cv = load64(src, nextS)
628 s = nextS
629 continue
630 }
631
632 // Long likely matches 7, so take that.
633 if uint32(cv) == uint32(valLong) {
634 goto emitMatch
635 }
636
637 // Long dict...
638 if uint32(cv) == load32(dict.dict, dictL) {
639 candidateL = dictL
640 goto emitDict
641 }
642
643 // Check our short candidate
644 if uint32(cv) == uint32(valShort) {
645 // Try a long candidate at s+1
646 hashL = hash7(cv>>8, lTableBits)
647 candidateL = int(lTable[hashL])
648 lTable[hashL] = uint32(s + 1)
649 if uint32(cv>>8) == load32(src, candidateL) {
650 s++
651 goto emitMatch
652 }
653 // Use our short candidate.
654 candidateL = candidateS
655 goto emitMatch
656 }
657 if uint32(cv) == load32(dict.dict, dictS) {
658 // Try a long candidate at s+1
659 hashL = hash7(cv>>8, lTableBits)
660 candidateL = int(lTable[hashL])
661 lTable[hashL] = uint32(s + 1)
662 if uint32(cv>>8) == load32(src, candidateL) {
663 s++
664 goto emitMatch
665 }
666 candidateL = dictS
667 goto emitDict
668 }
669 cv = load64(src, nextS)
670 s = nextS
671 }
672 emitDict:
673 {
674 if debug {
675 if load32(dict.dict, candidateL) != load32(src, s) {
676 panic("dict emit mismatch")
677 }
678 }
679 // Extend backwards.
680 // The top bytes will be rechecked to get the full match.
681 for candidateL > 0 && s > nextEmit && dict.dict[candidateL-1] == src[s-1] {
682 candidateL--
683 s--
684 }
685
686 // Bail if we exceed the maximum size.
687 if d+(s-nextEmit) > dstLimit {
688 return 0
689 }
690
691 // A 4-byte match has been found. We'll later see if more than 4 bytes
692 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
693 // them as literal bytes.
694
695 d += emitLiteral(dst[d:], src[nextEmit:s])
696 if debug && nextEmit != s {
697 fmt.Println("emitted ", s-nextEmit, "literals")
698 }
699 {
700 // Invariant: we have a 4-byte match at s, and no need to emit any
701 // literal bytes prior to s.
702 base := s
703 offset := s + (len(dict.dict)) - candidateL
704
705 // Extend the 4-byte match as long as possible.
706 s += 4
707 candidateL += 4
708 for s <= len(src)-8 && len(dict.dict)-candidateL >= 8 {
709 if diff := load64(src, s) ^ load64(dict.dict, candidateL); diff != 0 {
710 s += bits.TrailingZeros64(diff) >> 3
711 break
712 }
713 s += 8
714 candidateL += 8
715 }
716
717 if repeat == offset {
718 if debug {
719 fmt.Println("emitted dict repeat, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL)
720 }
721 d += emitRepeat(dst[d:], offset, s-base)
722 } else {
723 if debug {
724 fmt.Println("emitted dict copy, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL)
725 }
726 // Matches longer than 64 are split.
727 if s <= sLimit || s-base < 8 {
728 d += emitCopy(dst[d:], offset, s-base)
729 } else {
730 // Split to ensure we don't start a copy within next block.
731 d += emitCopy(dst[d:], offset, 4)
732 d += emitRepeat(dst[d:], offset, s-base-4)
733 }
734 repeat = offset
735 }
736 if false {
737 // Validate match.
738 if s <= candidateL {
739 panic("s <= candidate")
740 }
741 a := src[base:s]
742 b := dict.dict[base-repeat : base-repeat+(s-base)]
743 if !bytes.Equal(a, b) {
744 panic("mismatch")
745 }
746 }
747
748 nextEmit = s
749 if s >= sLimit {
750 break searchDict
751 }
752
753 if d > dstLimit {
754 // Do we have space for more, if not bail.
755 return 0
756 }
757
758 // Index short & long
759 index0 := base + 1
760 index1 := s - 2
761
762 cv0 := load64(src, index0)
763 cv1 := load64(src, index1)
764 lTable[hash7(cv0, lTableBits)] = uint32(index0)
765 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
766
767 lTable[hash7(cv1, lTableBits)] = uint32(index1)
768 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
769 index0 += 1
770 index1 -= 1
771 cv = load64(src, s)
772
773 // index every second long in between.
774 for index0 < index1 {
775 lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
776 lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
777 index0 += 2
778 index1 -= 2
779 }
780 }
781 continue
782 }
783 emitMatch:
784
785 // Extend backwards
786 for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
787 candidateL--
788 s--
789 }
790
791 // Bail if we exceed the maximum size.
792 if d+(s-nextEmit) > dstLimit {
793 return 0
794 }
795
796 base := s
797 offset := base - candidateL
798
799 // Extend the 4-byte match as long as possible.
800 s += 4
801 candidateL += 4
802 for s < len(src) {
803 if len(src)-s < 8 {
804 if src[s] == src[candidateL] {
805 s++
806 candidateL++
807 continue
808 }
809 break
810 }
811 if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
812 s += bits.TrailingZeros64(diff) >> 3
813 break
814 }
815 s += 8
816 candidateL += 8
817 }
818
819 if offset > 65535 && s-base <= 5 && repeat != offset {
820 // Bail if the match is equal or worse to the encoding.
821 s = nextS + 1
822 if s >= sLimit {
823 goto emitRemainder
824 }
825 cv = load64(src, s)
826 continue
827 }
828
829 d += emitLiteral(dst[d:], src[nextEmit:base])
830 if debug && nextEmit != s {
831 fmt.Println("emitted ", s-nextEmit, "literals")
832 }
833 if repeat == offset {
834 if debug {
835 fmt.Println("emitted match repeat, length", s-base, "offset:", offset, "s:", s)
836 }
837 d += emitRepeat(dst[d:], offset, s-base)
838 } else {
839 if debug {
840 fmt.Println("emitted match copy, length", s-base, "offset:", offset, "s:", s)
841 }
842 d += emitCopy(dst[d:], offset, s-base)
843 repeat = offset
844 }
845
846 nextEmit = s
847 if s >= sLimit {
848 goto emitRemainder
849 }
850
851 if d > dstLimit {
852 // Do we have space for more, if not bail.
853 return 0
854 }
855
856 // Index short & long
857 index0 := base + 1
858 index1 := s - 2
859
860 cv0 := load64(src, index0)
861 cv1 := load64(src, index1)
862 lTable[hash7(cv0, lTableBits)] = uint32(index0)
863 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
864
865 lTable[hash7(cv1, lTableBits)] = uint32(index1)
866 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
867 index0 += 1
868 index1 -= 1
869 cv = load64(src, s)
870
871 // Index large values sparsely in between.
872 // We do two starting from different offsets for speed.
873 index2 := (index0 + index1 + 1) >> 1
874 for index2 < index1 {
875 lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
876 lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
877 index0 += 2
878 index2 += 2
879 }
880 }
881
882 // Search without dict:
883 if repeat > s {
884 repeat = 0
885 }
886
887 // No more dict
888 sLimit = len(src) - inputMargin
889 if s >= sLimit {
890 goto emitRemainder
891 }
892 cv = load64(src, s)
893 if debug {
894 fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
895 }
896 for {
897 candidateL := 0
898 nextS := 0
899 for {
900 // Next src position to check
901 nextS = s + (s-nextEmit)>>7 + 1
902 if nextS > sLimit {
903 goto emitRemainder
904 }
905 hashL := hash7(cv, lTableBits)
906 hashS := hash4(cv, sTableBits)
907 candidateL = int(lTable[hashL])
908 candidateS := int(sTable[hashS])
909 lTable[hashL] = uint32(s)
910 sTable[hashS] = uint32(s)
911
912 valLong := load64(src, candidateL)
913 valShort := load64(src, candidateS)
914
915 // If long matches at least 8 bytes, use that.
916 if cv == valLong {
917 break
918 }
919 if cv == valShort {
920 candidateL = candidateS
921 break
922 }
923
924 // Check repeat at offset checkRep.
925 const checkRep = 1
926 // Minimum length of a repeat. Tested with various values.
927 // While 4-5 offers improvements in some, 6 reduces
928 // regressions significantly.
929 const wantRepeatBytes = 6
930 const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
931 if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
932 base := s + checkRep
933 // Extend back
934 for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
935 i--
936 base--
937 }
938 d += emitLiteral(dst[d:], src[nextEmit:base])
939
940 // Extend forward
941 candidate := s - repeat + wantRepeatBytes + checkRep
942 s += wantRepeatBytes + checkRep
943 for s < len(src) {
944 if len(src)-s < 8 {
945 if src[s] == src[candidate] {
946 s++
947 candidate++
948 continue
949 }
950 break
951 }
952 if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
953 s += bits.TrailingZeros64(diff) >> 3
954 break
955 }
956 s += 8
957 candidate += 8
958 }
959 // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
960 d += emitRepeat(dst[d:], repeat, s-base)
961 nextEmit = s
962 if s >= sLimit {
963 goto emitRemainder
964 }
965 // Index in-between
966 index0 := base + 1
967 index1 := s - 2
968
969 for index0 < index1 {
970 cv0 := load64(src, index0)
971 cv1 := load64(src, index1)
972 lTable[hash7(cv0, lTableBits)] = uint32(index0)
973 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
974
975 lTable[hash7(cv1, lTableBits)] = uint32(index1)
976 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
977 index0 += 2
978 index1 -= 2
979 }
980
981 cv = load64(src, s)
982 continue
983 }
984
985 // Long likely matches 7, so take that.
986 if uint32(cv) == uint32(valLong) {
987 break
988 }
989
990 // Check our short candidate
991 if uint32(cv) == uint32(valShort) {
992 // Try a long candidate at s+1
993 hashL = hash7(cv>>8, lTableBits)
994 candidateL = int(lTable[hashL])
995 lTable[hashL] = uint32(s + 1)
996 if uint32(cv>>8) == load32(src, candidateL) {
997 s++
998 break
999 }
1000 // Use our short candidate.
1001 candidateL = candidateS
1002 break
1003 }
1004
1005 cv = load64(src, nextS)
1006 s = nextS
1007 }
1008
1009 // Extend backwards
1010 for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
1011 candidateL--
1012 s--
1013 }
1014
1015 // Bail if we exceed the maximum size.
1016 if d+(s-nextEmit) > dstLimit {
1017 return 0
1018 }
1019
1020 base := s
1021 offset := base - candidateL
1022
1023 // Extend the 4-byte match as long as possible.
1024 s += 4
1025 candidateL += 4
1026 for s < len(src) {
1027 if len(src)-s < 8 {
1028 if src[s] == src[candidateL] {
1029 s++
1030 candidateL++
1031 continue
1032 }
1033 break
1034 }
1035 if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
1036 s += bits.TrailingZeros64(diff) >> 3
1037 break
1038 }
1039 s += 8
1040 candidateL += 8
1041 }
1042
1043 if offset > 65535 && s-base <= 5 && repeat != offset {
1044 // Bail if the match is equal or worse to the encoding.
1045 s = nextS + 1
1046 if s >= sLimit {
1047 goto emitRemainder
1048 }
1049 cv = load64(src, s)
1050 continue
1051 }
1052
1053 d += emitLiteral(dst[d:], src[nextEmit:base])
1054 if repeat == offset {
1055 d += emitRepeat(dst[d:], offset, s-base)
1056 } else {
1057 d += emitCopy(dst[d:], offset, s-base)
1058 repeat = offset
1059 }
1060
1061 nextEmit = s
1062 if s >= sLimit {
1063 goto emitRemainder
1064 }
1065
1066 if d > dstLimit {
1067 // Do we have space for more, if not bail.
1068 return 0
1069 }
1070
1071 // Index short & long
1072 index0 := base + 1
1073 index1 := s - 2
1074
1075 cv0 := load64(src, index0)
1076 cv1 := load64(src, index1)
1077 lTable[hash7(cv0, lTableBits)] = uint32(index0)
1078 sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
1079
1080 lTable[hash7(cv1, lTableBits)] = uint32(index1)
1081 sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
1082 index0 += 1
1083 index1 -= 1
1084 cv = load64(src, s)
1085
1086 // Index large values sparsely in between.
1087 // We do two starting from different offsets for speed.
1088 index2 := (index0 + index1 + 1) >> 1
1089 for index2 < index1 {
1090 lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
1091 lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
1092 index0 += 2
1093 index2 += 2
1094 }
1095 }
1096
1097emitRemainder:
1098 if nextEmit < len(src) {
1099 // Bail if we exceed the maximum size.
1100 if d+len(src)-nextEmit > dstLimit {
1101 return 0
1102 }
1103 d += emitLiteral(dst[d:], src[nextEmit:])
1104 }
1105 return d
1106}