diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encode_all.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/s2/encode_all.go | 1048 |
1 files changed, 1048 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encode_all.go b/vendor/github.com/klauspost/compress/s2/encode_all.go new file mode 100644 index 0000000..5e57995 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/encode_all.go | |||
@@ -0,0 +1,1048 @@ | |||
1 | // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||
2 | // Copyright (c) 2019 Klaus Post. All rights reserved. | ||
3 | // Use of this source code is governed by a BSD-style | ||
4 | // license that can be found in the LICENSE file. | ||
5 | |||
6 | package s2 | ||
7 | |||
8 | import ( | ||
9 | "bytes" | ||
10 | "encoding/binary" | ||
11 | "fmt" | ||
12 | "math/bits" | ||
13 | ) | ||
14 | |||
15 | func load32(b []byte, i int) uint32 { | ||
16 | return binary.LittleEndian.Uint32(b[i:]) | ||
17 | } | ||
18 | |||
19 | func load64(b []byte, i int) uint64 { | ||
20 | return binary.LittleEndian.Uint64(b[i:]) | ||
21 | } | ||
22 | |||
23 | // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. | ||
24 | // Preferably h should be a constant and should always be <64. | ||
25 | func hash6(u uint64, h uint8) uint32 { | ||
26 | const prime6bytes = 227718039650203 | ||
27 | return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) | ||
28 | } | ||
29 | |||
30 | func encodeGo(dst, src []byte) []byte { | ||
31 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
32 | panic(ErrTooLarge) | ||
33 | } else if len(dst) < n { | ||
34 | dst = make([]byte, n) | ||
35 | } | ||
36 | |||
37 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
38 | d := binary.PutUvarint(dst, uint64(len(src))) | ||
39 | |||
40 | if len(src) == 0 { | ||
41 | return dst[:d] | ||
42 | } | ||
43 | if len(src) < minNonLiteralBlockSize { | ||
44 | d += emitLiteral(dst[d:], src) | ||
45 | return dst[:d] | ||
46 | } | ||
47 | n := encodeBlockGo(dst[d:], src) | ||
48 | if n > 0 { | ||
49 | d += n | ||
50 | return dst[:d] | ||
51 | } | ||
52 | // Not compressible | ||
53 | d += emitLiteral(dst[d:], src) | ||
54 | return dst[:d] | ||
55 | } | ||
56 | |||
57 | // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It | ||
58 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
59 | // been written. | ||
60 | // | ||
61 | // It also assumes that: | ||
62 | // | ||
63 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
64 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
65 | func encodeBlockGo(dst, src []byte) (d int) { | ||
66 | // Initialize the hash table. | ||
67 | const ( | ||
68 | tableBits = 14 | ||
69 | maxTableSize = 1 << tableBits | ||
70 | |||
71 | debug = false | ||
72 | ) | ||
73 | |||
74 | var table [maxTableSize]uint32 | ||
75 | |||
76 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
77 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
78 | // looking for copies. | ||
79 | sLimit := len(src) - inputMargin | ||
80 | |||
81 | // Bail if we can't compress to at least this. | ||
82 | dstLimit := len(src) - len(src)>>5 - 5 | ||
83 | |||
84 | // nextEmit is where in src the next emitLiteral should start from. | ||
85 | nextEmit := 0 | ||
86 | |||
87 | // The encoded form must start with a literal, as there are no previous | ||
88 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
89 | s := 1 | ||
90 | cv := load64(src, s) | ||
91 | |||
92 | // We search for a repeat at -1, but don't output repeats when nextEmit == 0 | ||
93 | repeat := 1 | ||
94 | |||
95 | for { | ||
96 | candidate := 0 | ||
97 | for { | ||
98 | // Next src position to check | ||
99 | nextS := s + (s-nextEmit)>>6 + 4 | ||
100 | if nextS > sLimit { | ||
101 | goto emitRemainder | ||
102 | } | ||
103 | hash0 := hash6(cv, tableBits) | ||
104 | hash1 := hash6(cv>>8, tableBits) | ||
105 | candidate = int(table[hash0]) | ||
106 | candidate2 := int(table[hash1]) | ||
107 | table[hash0] = uint32(s) | ||
108 | table[hash1] = uint32(s + 1) | ||
109 | hash2 := hash6(cv>>16, tableBits) | ||
110 | |||
111 | // Check repeat at offset checkRep. | ||
112 | const checkRep = 1 | ||
113 | if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { | ||
114 | base := s + checkRep | ||
115 | // Extend back | ||
116 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
117 | i-- | ||
118 | base-- | ||
119 | } | ||
120 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
121 | |||
122 | // Extend forward | ||
123 | candidate := s - repeat + 4 + checkRep | ||
124 | s += 4 + checkRep | ||
125 | for s <= sLimit { | ||
126 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
127 | s += bits.TrailingZeros64(diff) >> 3 | ||
128 | break | ||
129 | } | ||
130 | s += 8 | ||
131 | candidate += 8 | ||
132 | } | ||
133 | if debug { | ||
134 | // Validate match. | ||
135 | if s <= candidate { | ||
136 | panic("s <= candidate") | ||
137 | } | ||
138 | a := src[base:s] | ||
139 | b := src[base-repeat : base-repeat+(s-base)] | ||
140 | if !bytes.Equal(a, b) { | ||
141 | panic("mismatch") | ||
142 | } | ||
143 | } | ||
144 | if nextEmit > 0 { | ||
145 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
146 | d += emitRepeat(dst[d:], repeat, s-base) | ||
147 | } else { | ||
148 | // First match, cannot be repeat. | ||
149 | d += emitCopy(dst[d:], repeat, s-base) | ||
150 | } | ||
151 | nextEmit = s | ||
152 | if s >= sLimit { | ||
153 | goto emitRemainder | ||
154 | } | ||
155 | |||
156 | cv = load64(src, s) | ||
157 | continue | ||
158 | } | ||
159 | |||
160 | if uint32(cv) == load32(src, candidate) { | ||
161 | break | ||
162 | } | ||
163 | candidate = int(table[hash2]) | ||
164 | if uint32(cv>>8) == load32(src, candidate2) { | ||
165 | table[hash2] = uint32(s + 2) | ||
166 | candidate = candidate2 | ||
167 | s++ | ||
168 | break | ||
169 | } | ||
170 | table[hash2] = uint32(s + 2) | ||
171 | if uint32(cv>>16) == load32(src, candidate) { | ||
172 | s += 2 | ||
173 | break | ||
174 | } | ||
175 | |||
176 | cv = load64(src, nextS) | ||
177 | s = nextS | ||
178 | } | ||
179 | |||
180 | // Extend backwards. | ||
181 | // The top bytes will be rechecked to get the full match. | ||
182 | for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { | ||
183 | candidate-- | ||
184 | s-- | ||
185 | } | ||
186 | |||
187 | // Bail if we exceed the maximum size. | ||
188 | if d+(s-nextEmit) > dstLimit { | ||
189 | return 0 | ||
190 | } | ||
191 | |||
192 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
193 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
194 | // them as literal bytes. | ||
195 | |||
196 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
197 | |||
198 | // Call emitCopy, and then see if another emitCopy could be our next | ||
199 | // move. Repeat until we find no match for the input immediately after | ||
200 | // what was consumed by the last emitCopy call. | ||
201 | // | ||
202 | // If we exit this loop normally then we need to call emitLiteral next, | ||
203 | // though we don't yet know how big the literal will be. We handle that | ||
204 | // by proceeding to the next iteration of the main loop. We also can | ||
205 | // exit this loop via goto if we get close to exhausting the input. | ||
206 | for { | ||
207 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
208 | // literal bytes prior to s. | ||
209 | base := s | ||
210 | repeat = base - candidate | ||
211 | |||
212 | // Extend the 4-byte match as long as possible. | ||
213 | s += 4 | ||
214 | candidate += 4 | ||
215 | for s <= len(src)-8 { | ||
216 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
217 | s += bits.TrailingZeros64(diff) >> 3 | ||
218 | break | ||
219 | } | ||
220 | s += 8 | ||
221 | candidate += 8 | ||
222 | } | ||
223 | |||
224 | d += emitCopy(dst[d:], repeat, s-base) | ||
225 | if debug { | ||
226 | // Validate match. | ||
227 | if s <= candidate { | ||
228 | panic("s <= candidate") | ||
229 | } | ||
230 | a := src[base:s] | ||
231 | b := src[base-repeat : base-repeat+(s-base)] | ||
232 | if !bytes.Equal(a, b) { | ||
233 | panic("mismatch") | ||
234 | } | ||
235 | } | ||
236 | |||
237 | nextEmit = s | ||
238 | if s >= sLimit { | ||
239 | goto emitRemainder | ||
240 | } | ||
241 | |||
242 | if d > dstLimit { | ||
243 | // Do we have space for more, if not bail. | ||
244 | return 0 | ||
245 | } | ||
246 | // Check for an immediate match, otherwise start search at s+1 | ||
247 | x := load64(src, s-2) | ||
248 | m2Hash := hash6(x, tableBits) | ||
249 | currHash := hash6(x>>16, tableBits) | ||
250 | candidate = int(table[currHash]) | ||
251 | table[m2Hash] = uint32(s - 2) | ||
252 | table[currHash] = uint32(s) | ||
253 | if debug && s == candidate { | ||
254 | panic("s == candidate") | ||
255 | } | ||
256 | if uint32(x>>16) != load32(src, candidate) { | ||
257 | cv = load64(src, s+1) | ||
258 | s++ | ||
259 | break | ||
260 | } | ||
261 | } | ||
262 | } | ||
263 | |||
264 | emitRemainder: | ||
265 | if nextEmit < len(src) { | ||
266 | // Bail if we exceed the maximum size. | ||
267 | if d+len(src)-nextEmit > dstLimit { | ||
268 | return 0 | ||
269 | } | ||
270 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
271 | } | ||
272 | return d | ||
273 | } | ||
274 | |||
275 | func encodeBlockSnappyGo(dst, src []byte) (d int) { | ||
276 | // Initialize the hash table. | ||
277 | const ( | ||
278 | tableBits = 14 | ||
279 | maxTableSize = 1 << tableBits | ||
280 | ) | ||
281 | |||
282 | var table [maxTableSize]uint32 | ||
283 | |||
284 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
285 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
286 | // looking for copies. | ||
287 | sLimit := len(src) - inputMargin | ||
288 | |||
289 | // Bail if we can't compress to at least this. | ||
290 | dstLimit := len(src) - len(src)>>5 - 5 | ||
291 | |||
292 | // nextEmit is where in src the next emitLiteral should start from. | ||
293 | nextEmit := 0 | ||
294 | |||
295 | // The encoded form must start with a literal, as there are no previous | ||
296 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
297 | s := 1 | ||
298 | cv := load64(src, s) | ||
299 | |||
300 | // We search for a repeat at -1, but don't output repeats when nextEmit == 0 | ||
301 | repeat := 1 | ||
302 | |||
303 | for { | ||
304 | candidate := 0 | ||
305 | for { | ||
306 | // Next src position to check | ||
307 | nextS := s + (s-nextEmit)>>6 + 4 | ||
308 | if nextS > sLimit { | ||
309 | goto emitRemainder | ||
310 | } | ||
311 | hash0 := hash6(cv, tableBits) | ||
312 | hash1 := hash6(cv>>8, tableBits) | ||
313 | candidate = int(table[hash0]) | ||
314 | candidate2 := int(table[hash1]) | ||
315 | table[hash0] = uint32(s) | ||
316 | table[hash1] = uint32(s + 1) | ||
317 | hash2 := hash6(cv>>16, tableBits) | ||
318 | |||
319 | // Check repeat at offset checkRep. | ||
320 | const checkRep = 1 | ||
321 | if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { | ||
322 | base := s + checkRep | ||
323 | // Extend back | ||
324 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
325 | i-- | ||
326 | base-- | ||
327 | } | ||
328 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
329 | |||
330 | // Extend forward | ||
331 | candidate := s - repeat + 4 + checkRep | ||
332 | s += 4 + checkRep | ||
333 | for s <= sLimit { | ||
334 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
335 | s += bits.TrailingZeros64(diff) >> 3 | ||
336 | break | ||
337 | } | ||
338 | s += 8 | ||
339 | candidate += 8 | ||
340 | } | ||
341 | |||
342 | d += emitCopyNoRepeat(dst[d:], repeat, s-base) | ||
343 | nextEmit = s | ||
344 | if s >= sLimit { | ||
345 | goto emitRemainder | ||
346 | } | ||
347 | |||
348 | cv = load64(src, s) | ||
349 | continue | ||
350 | } | ||
351 | |||
352 | if uint32(cv) == load32(src, candidate) { | ||
353 | break | ||
354 | } | ||
355 | candidate = int(table[hash2]) | ||
356 | if uint32(cv>>8) == load32(src, candidate2) { | ||
357 | table[hash2] = uint32(s + 2) | ||
358 | candidate = candidate2 | ||
359 | s++ | ||
360 | break | ||
361 | } | ||
362 | table[hash2] = uint32(s + 2) | ||
363 | if uint32(cv>>16) == load32(src, candidate) { | ||
364 | s += 2 | ||
365 | break | ||
366 | } | ||
367 | |||
368 | cv = load64(src, nextS) | ||
369 | s = nextS | ||
370 | } | ||
371 | |||
372 | // Extend backwards | ||
373 | for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { | ||
374 | candidate-- | ||
375 | s-- | ||
376 | } | ||
377 | |||
378 | // Bail if we exceed the maximum size. | ||
379 | if d+(s-nextEmit) > dstLimit { | ||
380 | return 0 | ||
381 | } | ||
382 | |||
383 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
384 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
385 | // them as literal bytes. | ||
386 | |||
387 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
388 | |||
389 | // Call emitCopy, and then see if another emitCopy could be our next | ||
390 | // move. Repeat until we find no match for the input immediately after | ||
391 | // what was consumed by the last emitCopy call. | ||
392 | // | ||
393 | // If we exit this loop normally then we need to call emitLiteral next, | ||
394 | // though we don't yet know how big the literal will be. We handle that | ||
395 | // by proceeding to the next iteration of the main loop. We also can | ||
396 | // exit this loop via goto if we get close to exhausting the input. | ||
397 | for { | ||
398 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
399 | // literal bytes prior to s. | ||
400 | base := s | ||
401 | repeat = base - candidate | ||
402 | |||
403 | // Extend the 4-byte match as long as possible. | ||
404 | s += 4 | ||
405 | candidate += 4 | ||
406 | for s <= len(src)-8 { | ||
407 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
408 | s += bits.TrailingZeros64(diff) >> 3 | ||
409 | break | ||
410 | } | ||
411 | s += 8 | ||
412 | candidate += 8 | ||
413 | } | ||
414 | |||
415 | d += emitCopyNoRepeat(dst[d:], repeat, s-base) | ||
416 | if false { | ||
417 | // Validate match. | ||
418 | a := src[base:s] | ||
419 | b := src[base-repeat : base-repeat+(s-base)] | ||
420 | if !bytes.Equal(a, b) { | ||
421 | panic("mismatch") | ||
422 | } | ||
423 | } | ||
424 | |||
425 | nextEmit = s | ||
426 | if s >= sLimit { | ||
427 | goto emitRemainder | ||
428 | } | ||
429 | |||
430 | if d > dstLimit { | ||
431 | // Do we have space for more, if not bail. | ||
432 | return 0 | ||
433 | } | ||
434 | // Check for an immediate match, otherwise start search at s+1 | ||
435 | x := load64(src, s-2) | ||
436 | m2Hash := hash6(x, tableBits) | ||
437 | currHash := hash6(x>>16, tableBits) | ||
438 | candidate = int(table[currHash]) | ||
439 | table[m2Hash] = uint32(s - 2) | ||
440 | table[currHash] = uint32(s) | ||
441 | if uint32(x>>16) != load32(src, candidate) { | ||
442 | cv = load64(src, s+1) | ||
443 | s++ | ||
444 | break | ||
445 | } | ||
446 | } | ||
447 | } | ||
448 | |||
449 | emitRemainder: | ||
450 | if nextEmit < len(src) { | ||
451 | // Bail if we exceed the maximum size. | ||
452 | if d+len(src)-nextEmit > dstLimit { | ||
453 | return 0 | ||
454 | } | ||
455 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
456 | } | ||
457 | return d | ||
458 | } | ||
459 | |||
460 | // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It | ||
461 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
462 | // been written. | ||
463 | // | ||
464 | // It also assumes that: | ||
465 | // | ||
466 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
467 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
468 | func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) { | ||
469 | // Initialize the hash table. | ||
470 | const ( | ||
471 | tableBits = 14 | ||
472 | maxTableSize = 1 << tableBits | ||
473 | maxAhead = 8 // maximum bytes ahead without checking sLimit | ||
474 | |||
475 | debug = false | ||
476 | ) | ||
477 | dict.initFast() | ||
478 | |||
479 | var table [maxTableSize]uint32 | ||
480 | |||
481 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
482 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
483 | // looking for copies. | ||
484 | sLimit := len(src) - inputMargin | ||
485 | if sLimit > MaxDictSrcOffset-maxAhead { | ||
486 | sLimit = MaxDictSrcOffset - maxAhead | ||
487 | } | ||
488 | |||
489 | // Bail if we can't compress to at least this. | ||
490 | dstLimit := len(src) - len(src)>>5 - 5 | ||
491 | |||
492 | // nextEmit is where in src the next emitLiteral should start from. | ||
493 | nextEmit := 0 | ||
494 | |||
495 | // The encoded form can start with a dict entry (copy or repeat). | ||
496 | s := 0 | ||
497 | |||
498 | // Convert dict repeat to offset | ||
499 | repeat := len(dict.dict) - dict.repeat | ||
500 | cv := load64(src, 0) | ||
501 | |||
502 | // While in dict | ||
503 | searchDict: | ||
504 | for { | ||
505 | // Next src position to check | ||
506 | nextS := s + (s-nextEmit)>>6 + 4 | ||
507 | hash0 := hash6(cv, tableBits) | ||
508 | hash1 := hash6(cv>>8, tableBits) | ||
509 | if nextS > sLimit { | ||
510 | if debug { | ||
511 | fmt.Println("slimit reached", s, nextS) | ||
512 | } | ||
513 | break searchDict | ||
514 | } | ||
515 | candidateDict := int(dict.fastTable[hash0]) | ||
516 | candidateDict2 := int(dict.fastTable[hash1]) | ||
517 | candidate2 := int(table[hash1]) | ||
518 | candidate := int(table[hash0]) | ||
519 | table[hash0] = uint32(s) | ||
520 | table[hash1] = uint32(s + 1) | ||
521 | hash2 := hash6(cv>>16, tableBits) | ||
522 | |||
523 | // Check repeat at offset checkRep. | ||
524 | const checkRep = 1 | ||
525 | |||
526 | if repeat > s { | ||
527 | candidate := len(dict.dict) - repeat + s | ||
528 | if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) { | ||
529 | // Extend back | ||
530 | base := s | ||
531 | for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; { | ||
532 | i-- | ||
533 | base-- | ||
534 | } | ||
535 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
536 | if debug && nextEmit != base { | ||
537 | fmt.Println("emitted ", base-nextEmit, "literals") | ||
538 | } | ||
539 | s += 4 | ||
540 | candidate += 4 | ||
541 | for candidate < len(dict.dict)-8 && s <= len(src)-8 { | ||
542 | if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 { | ||
543 | s += bits.TrailingZeros64(diff) >> 3 | ||
544 | break | ||
545 | } | ||
546 | s += 8 | ||
547 | candidate += 8 | ||
548 | } | ||
549 | d += emitRepeat(dst[d:], repeat, s-base) | ||
550 | if debug { | ||
551 | fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s) | ||
552 | } | ||
553 | nextEmit = s | ||
554 | if s >= sLimit { | ||
555 | break searchDict | ||
556 | } | ||
557 | cv = load64(src, s) | ||
558 | continue | ||
559 | } | ||
560 | } else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { | ||
561 | base := s + checkRep | ||
562 | // Extend back | ||
563 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
564 | i-- | ||
565 | base-- | ||
566 | } | ||
567 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
568 | if debug && nextEmit != base { | ||
569 | fmt.Println("emitted ", base-nextEmit, "literals") | ||
570 | } | ||
571 | |||
572 | // Extend forward | ||
573 | candidate := s - repeat + 4 + checkRep | ||
574 | s += 4 + checkRep | ||
575 | for s <= sLimit { | ||
576 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
577 | s += bits.TrailingZeros64(diff) >> 3 | ||
578 | break | ||
579 | } | ||
580 | s += 8 | ||
581 | candidate += 8 | ||
582 | } | ||
583 | if debug { | ||
584 | // Validate match. | ||
585 | if s <= candidate { | ||
586 | panic("s <= candidate") | ||
587 | } | ||
588 | a := src[base:s] | ||
589 | b := src[base-repeat : base-repeat+(s-base)] | ||
590 | if !bytes.Equal(a, b) { | ||
591 | panic("mismatch") | ||
592 | } | ||
593 | } | ||
594 | |||
595 | if nextEmit > 0 { | ||
596 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
597 | d += emitRepeat(dst[d:], repeat, s-base) | ||
598 | } else { | ||
599 | // First match, cannot be repeat. | ||
600 | d += emitCopy(dst[d:], repeat, s-base) | ||
601 | } | ||
602 | |||
603 | nextEmit = s | ||
604 | if s >= sLimit { | ||
605 | break searchDict | ||
606 | } | ||
607 | if debug { | ||
608 | fmt.Println("emitted reg repeat", s-base, "s:", s) | ||
609 | } | ||
610 | cv = load64(src, s) | ||
611 | continue searchDict | ||
612 | } | ||
613 | if s == 0 { | ||
614 | cv = load64(src, nextS) | ||
615 | s = nextS | ||
616 | continue searchDict | ||
617 | } | ||
618 | // Start with table. These matches will always be closer. | ||
619 | if uint32(cv) == load32(src, candidate) { | ||
620 | goto emitMatch | ||
621 | } | ||
622 | candidate = int(table[hash2]) | ||
623 | if uint32(cv>>8) == load32(src, candidate2) { | ||
624 | table[hash2] = uint32(s + 2) | ||
625 | candidate = candidate2 | ||
626 | s++ | ||
627 | goto emitMatch | ||
628 | } | ||
629 | |||
630 | // Check dict. Dicts have longer offsets, so we want longer matches. | ||
631 | if cv == load64(dict.dict, candidateDict) { | ||
632 | table[hash2] = uint32(s + 2) | ||
633 | goto emitDict | ||
634 | } | ||
635 | |||
636 | candidateDict = int(dict.fastTable[hash2]) | ||
637 | // Check if upper 7 bytes match | ||
638 | if candidateDict2 >= 1 { | ||
639 | if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) { | ||
640 | table[hash2] = uint32(s + 2) | ||
641 | candidateDict = candidateDict2 | ||
642 | s++ | ||
643 | goto emitDict | ||
644 | } | ||
645 | } | ||
646 | |||
647 | table[hash2] = uint32(s + 2) | ||
648 | if uint32(cv>>16) == load32(src, candidate) { | ||
649 | s += 2 | ||
650 | goto emitMatch | ||
651 | } | ||
652 | if candidateDict >= 2 { | ||
653 | // Check if upper 6 bytes match | ||
654 | if cv^load64(dict.dict, candidateDict-2) < (1 << 16) { | ||
655 | s += 2 | ||
656 | goto emitDict | ||
657 | } | ||
658 | } | ||
659 | |||
660 | cv = load64(src, nextS) | ||
661 | s = nextS | ||
662 | continue searchDict | ||
663 | |||
664 | emitDict: | ||
665 | { | ||
666 | if debug { | ||
667 | if load32(dict.dict, candidateDict) != load32(src, s) { | ||
668 | panic("dict emit mismatch") | ||
669 | } | ||
670 | } | ||
671 | // Extend backwards. | ||
672 | // The top bytes will be rechecked to get the full match. | ||
673 | for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] { | ||
674 | candidateDict-- | ||
675 | s-- | ||
676 | } | ||
677 | |||
678 | // Bail if we exceed the maximum size. | ||
679 | if d+(s-nextEmit) > dstLimit { | ||
680 | return 0 | ||
681 | } | ||
682 | |||
683 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
684 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
685 | // them as literal bytes. | ||
686 | |||
687 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
688 | if debug && nextEmit != s { | ||
689 | fmt.Println("emitted ", s-nextEmit, "literals") | ||
690 | } | ||
691 | { | ||
692 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
693 | // literal bytes prior to s. | ||
694 | base := s | ||
695 | repeat = s + (len(dict.dict)) - candidateDict | ||
696 | |||
697 | // Extend the 4-byte match as long as possible. | ||
698 | s += 4 | ||
699 | candidateDict += 4 | ||
700 | for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 { | ||
701 | if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 { | ||
702 | s += bits.TrailingZeros64(diff) >> 3 | ||
703 | break | ||
704 | } | ||
705 | s += 8 | ||
706 | candidateDict += 8 | ||
707 | } | ||
708 | |||
709 | // Matches longer than 64 are split. | ||
710 | if s <= sLimit || s-base < 8 { | ||
711 | d += emitCopy(dst[d:], repeat, s-base) | ||
712 | } else { | ||
713 | // Split to ensure we don't start a copy within next block | ||
714 | d += emitCopy(dst[d:], repeat, 4) | ||
715 | d += emitRepeat(dst[d:], repeat, s-base-4) | ||
716 | } | ||
717 | if false { | ||
718 | // Validate match. | ||
719 | if s <= candidate { | ||
720 | panic("s <= candidate") | ||
721 | } | ||
722 | a := src[base:s] | ||
723 | b := dict.dict[base-repeat : base-repeat+(s-base)] | ||
724 | if !bytes.Equal(a, b) { | ||
725 | panic("mismatch") | ||
726 | } | ||
727 | } | ||
728 | if debug { | ||
729 | fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s) | ||
730 | } | ||
731 | nextEmit = s | ||
732 | if s >= sLimit { | ||
733 | break searchDict | ||
734 | } | ||
735 | |||
736 | if d > dstLimit { | ||
737 | // Do we have space for more, if not bail. | ||
738 | return 0 | ||
739 | } | ||
740 | |||
741 | // Index and continue loop to try new candidate. | ||
742 | x := load64(src, s-2) | ||
743 | m2Hash := hash6(x, tableBits) | ||
744 | currHash := hash6(x>>8, tableBits) | ||
745 | table[m2Hash] = uint32(s - 2) | ||
746 | table[currHash] = uint32(s - 1) | ||
747 | cv = load64(src, s) | ||
748 | } | ||
749 | continue | ||
750 | } | ||
751 | emitMatch: | ||
752 | |||
753 | // Extend backwards. | ||
754 | // The top bytes will be rechecked to get the full match. | ||
755 | for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { | ||
756 | candidate-- | ||
757 | s-- | ||
758 | } | ||
759 | |||
760 | // Bail if we exceed the maximum size. | ||
761 | if d+(s-nextEmit) > dstLimit { | ||
762 | return 0 | ||
763 | } | ||
764 | |||
765 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
766 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
767 | // them as literal bytes. | ||
768 | |||
769 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
770 | if debug && nextEmit != s { | ||
771 | fmt.Println("emitted ", s-nextEmit, "literals") | ||
772 | } | ||
773 | // Call emitCopy, and then see if another emitCopy could be our next | ||
774 | // move. Repeat until we find no match for the input immediately after | ||
775 | // what was consumed by the last emitCopy call. | ||
776 | // | ||
777 | // If we exit this loop normally then we need to call emitLiteral next, | ||
778 | // though we don't yet know how big the literal will be. We handle that | ||
779 | // by proceeding to the next iteration of the main loop. We also can | ||
780 | // exit this loop via goto if we get close to exhausting the input. | ||
781 | for { | ||
782 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
783 | // literal bytes prior to s. | ||
784 | base := s | ||
785 | repeat = base - candidate | ||
786 | |||
787 | // Extend the 4-byte match as long as possible. | ||
788 | s += 4 | ||
789 | candidate += 4 | ||
790 | for s <= len(src)-8 { | ||
791 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
792 | s += bits.TrailingZeros64(diff) >> 3 | ||
793 | break | ||
794 | } | ||
795 | s += 8 | ||
796 | candidate += 8 | ||
797 | } | ||
798 | |||
799 | d += emitCopy(dst[d:], repeat, s-base) | ||
800 | if debug { | ||
801 | // Validate match. | ||
802 | if s <= candidate { | ||
803 | panic("s <= candidate") | ||
804 | } | ||
805 | a := src[base:s] | ||
806 | b := src[base-repeat : base-repeat+(s-base)] | ||
807 | if !bytes.Equal(a, b) { | ||
808 | panic("mismatch") | ||
809 | } | ||
810 | } | ||
811 | if debug { | ||
812 | fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s) | ||
813 | } | ||
814 | nextEmit = s | ||
815 | if s >= sLimit { | ||
816 | break searchDict | ||
817 | } | ||
818 | |||
819 | if d > dstLimit { | ||
820 | // Do we have space for more, if not bail. | ||
821 | return 0 | ||
822 | } | ||
823 | // Check for an immediate match, otherwise start search at s+1 | ||
824 | x := load64(src, s-2) | ||
825 | m2Hash := hash6(x, tableBits) | ||
826 | currHash := hash6(x>>16, tableBits) | ||
827 | candidate = int(table[currHash]) | ||
828 | table[m2Hash] = uint32(s - 2) | ||
829 | table[currHash] = uint32(s) | ||
830 | if debug && s == candidate { | ||
831 | panic("s == candidate") | ||
832 | } | ||
833 | if uint32(x>>16) != load32(src, candidate) { | ||
834 | cv = load64(src, s+1) | ||
835 | s++ | ||
836 | break | ||
837 | } | ||
838 | } | ||
839 | } | ||
840 | |||
841 | // Search without dict: | ||
842 | if repeat > s { | ||
843 | repeat = 0 | ||
844 | } | ||
845 | |||
846 | // No more dict | ||
847 | sLimit = len(src) - inputMargin | ||
848 | if s >= sLimit { | ||
849 | goto emitRemainder | ||
850 | } | ||
851 | if debug { | ||
852 | fmt.Println("non-dict matching at", s, "repeat:", repeat) | ||
853 | } | ||
854 | cv = load64(src, s) | ||
855 | if debug { | ||
856 | fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s) | ||
857 | } | ||
858 | for { | ||
859 | candidate := 0 | ||
860 | for { | ||
861 | // Next src position to check | ||
862 | nextS := s + (s-nextEmit)>>6 + 4 | ||
863 | if nextS > sLimit { | ||
864 | goto emitRemainder | ||
865 | } | ||
866 | hash0 := hash6(cv, tableBits) | ||
867 | hash1 := hash6(cv>>8, tableBits) | ||
868 | candidate = int(table[hash0]) | ||
869 | candidate2 := int(table[hash1]) | ||
870 | table[hash0] = uint32(s) | ||
871 | table[hash1] = uint32(s + 1) | ||
872 | hash2 := hash6(cv>>16, tableBits) | ||
873 | |||
874 | // Check repeat at offset checkRep. | ||
875 | const checkRep = 1 | ||
876 | if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { | ||
877 | base := s + checkRep | ||
878 | // Extend back | ||
879 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
880 | i-- | ||
881 | base-- | ||
882 | } | ||
883 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
884 | if debug && nextEmit != base { | ||
885 | fmt.Println("emitted ", base-nextEmit, "literals") | ||
886 | } | ||
887 | // Extend forward | ||
888 | candidate := s - repeat + 4 + checkRep | ||
889 | s += 4 + checkRep | ||
890 | for s <= sLimit { | ||
891 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
892 | s += bits.TrailingZeros64(diff) >> 3 | ||
893 | break | ||
894 | } | ||
895 | s += 8 | ||
896 | candidate += 8 | ||
897 | } | ||
898 | if debug { | ||
899 | // Validate match. | ||
900 | if s <= candidate { | ||
901 | panic("s <= candidate") | ||
902 | } | ||
903 | a := src[base:s] | ||
904 | b := src[base-repeat : base-repeat+(s-base)] | ||
905 | if !bytes.Equal(a, b) { | ||
906 | panic("mismatch") | ||
907 | } | ||
908 | } | ||
909 | if nextEmit > 0 { | ||
910 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
911 | d += emitRepeat(dst[d:], repeat, s-base) | ||
912 | } else { | ||
913 | // First match, cannot be repeat. | ||
914 | d += emitCopy(dst[d:], repeat, s-base) | ||
915 | } | ||
916 | if debug { | ||
917 | fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s) | ||
918 | } | ||
919 | nextEmit = s | ||
920 | if s >= sLimit { | ||
921 | goto emitRemainder | ||
922 | } | ||
923 | |||
924 | cv = load64(src, s) | ||
925 | continue | ||
926 | } | ||
927 | |||
928 | if uint32(cv) == load32(src, candidate) { | ||
929 | break | ||
930 | } | ||
931 | candidate = int(table[hash2]) | ||
932 | if uint32(cv>>8) == load32(src, candidate2) { | ||
933 | table[hash2] = uint32(s + 2) | ||
934 | candidate = candidate2 | ||
935 | s++ | ||
936 | break | ||
937 | } | ||
938 | table[hash2] = uint32(s + 2) | ||
939 | if uint32(cv>>16) == load32(src, candidate) { | ||
940 | s += 2 | ||
941 | break | ||
942 | } | ||
943 | |||
944 | cv = load64(src, nextS) | ||
945 | s = nextS | ||
946 | } | ||
947 | |||
948 | // Extend backwards. | ||
949 | // The top bytes will be rechecked to get the full match. | ||
950 | for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { | ||
951 | candidate-- | ||
952 | s-- | ||
953 | } | ||
954 | |||
955 | // Bail if we exceed the maximum size. | ||
956 | if d+(s-nextEmit) > dstLimit { | ||
957 | return 0 | ||
958 | } | ||
959 | |||
960 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
961 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
962 | // them as literal bytes. | ||
963 | |||
964 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
965 | if debug && nextEmit != s { | ||
966 | fmt.Println("emitted ", s-nextEmit, "literals") | ||
967 | } | ||
968 | // Call emitCopy, and then see if another emitCopy could be our next | ||
969 | // move. Repeat until we find no match for the input immediately after | ||
970 | // what was consumed by the last emitCopy call. | ||
971 | // | ||
972 | // If we exit this loop normally then we need to call emitLiteral next, | ||
973 | // though we don't yet know how big the literal will be. We handle that | ||
974 | // by proceeding to the next iteration of the main loop. We also can | ||
975 | // exit this loop via goto if we get close to exhausting the input. | ||
976 | for { | ||
977 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
978 | // literal bytes prior to s. | ||
979 | base := s | ||
980 | repeat = base - candidate | ||
981 | |||
982 | // Extend the 4-byte match as long as possible. | ||
983 | s += 4 | ||
984 | candidate += 4 | ||
985 | for s <= len(src)-8 { | ||
986 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
987 | s += bits.TrailingZeros64(diff) >> 3 | ||
988 | break | ||
989 | } | ||
990 | s += 8 | ||
991 | candidate += 8 | ||
992 | } | ||
993 | |||
994 | d += emitCopy(dst[d:], repeat, s-base) | ||
995 | if debug { | ||
996 | // Validate match. | ||
997 | if s <= candidate { | ||
998 | panic("s <= candidate") | ||
999 | } | ||
1000 | a := src[base:s] | ||
1001 | b := src[base-repeat : base-repeat+(s-base)] | ||
1002 | if !bytes.Equal(a, b) { | ||
1003 | panic("mismatch") | ||
1004 | } | ||
1005 | } | ||
1006 | if debug { | ||
1007 | fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s) | ||
1008 | } | ||
1009 | nextEmit = s | ||
1010 | if s >= sLimit { | ||
1011 | goto emitRemainder | ||
1012 | } | ||
1013 | |||
1014 | if d > dstLimit { | ||
1015 | // Do we have space for more, if not bail. | ||
1016 | return 0 | ||
1017 | } | ||
1018 | // Check for an immediate match, otherwise start search at s+1 | ||
1019 | x := load64(src, s-2) | ||
1020 | m2Hash := hash6(x, tableBits) | ||
1021 | currHash := hash6(x>>16, tableBits) | ||
1022 | candidate = int(table[currHash]) | ||
1023 | table[m2Hash] = uint32(s - 2) | ||
1024 | table[currHash] = uint32(s) | ||
1025 | if debug && s == candidate { | ||
1026 | panic("s == candidate") | ||
1027 | } | ||
1028 | if uint32(x>>16) != load32(src, candidate) { | ||
1029 | cv = load64(src, s+1) | ||
1030 | s++ | ||
1031 | break | ||
1032 | } | ||
1033 | } | ||
1034 | } | ||
1035 | |||
1036 | emitRemainder: | ||
1037 | if nextEmit < len(src) { | ||
1038 | // Bail if we exceed the maximum size. | ||
1039 | if d+len(src)-nextEmit > dstLimit { | ||
1040 | return 0 | ||
1041 | } | ||
1042 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
1043 | if debug && nextEmit != s { | ||
1044 | fmt.Println("emitted ", len(src)-nextEmit, "literals") | ||
1045 | } | ||
1046 | } | ||
1047 | return d | ||
1048 | } | ||