diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encode_best.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/s2/encode_best.go | 796 |
1 files changed, 796 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go new file mode 100644 index 0000000..47bac74 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/encode_best.go | |||
@@ -0,0 +1,796 @@ | |||
1 | // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||
2 | // Copyright (c) 2019 Klaus Post. All rights reserved. | ||
3 | // Use of this source code is governed by a BSD-style | ||
4 | // license that can be found in the LICENSE file. | ||
5 | |||
6 | package s2 | ||
7 | |||
8 | import ( | ||
9 | "fmt" | ||
10 | "math" | ||
11 | "math/bits" | ||
12 | ) | ||
13 | |||
14 | // encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It | ||
15 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
16 | // been written. | ||
17 | // | ||
18 | // It also assumes that: | ||
19 | // | ||
20 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
21 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
22 | func encodeBlockBest(dst, src []byte, dict *Dict) (d int) { | ||
23 | // Initialize the hash tables. | ||
24 | const ( | ||
25 | // Long hash matches. | ||
26 | lTableBits = 19 | ||
27 | maxLTableSize = 1 << lTableBits | ||
28 | |||
29 | // Short hash matches. | ||
30 | sTableBits = 16 | ||
31 | maxSTableSize = 1 << sTableBits | ||
32 | |||
33 | inputMargin = 8 + 2 | ||
34 | |||
35 | debug = false | ||
36 | ) | ||
37 | |||
38 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
39 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
40 | // looking for copies. | ||
41 | sLimit := len(src) - inputMargin | ||
42 | if len(src) < minNonLiteralBlockSize { | ||
43 | return 0 | ||
44 | } | ||
45 | sLimitDict := len(src) - inputMargin | ||
46 | if sLimitDict > MaxDictSrcOffset-inputMargin { | ||
47 | sLimitDict = MaxDictSrcOffset - inputMargin | ||
48 | } | ||
49 | |||
50 | var lTable [maxLTableSize]uint64 | ||
51 | var sTable [maxSTableSize]uint64 | ||
52 | |||
53 | // Bail if we can't compress to at least this. | ||
54 | dstLimit := len(src) - 5 | ||
55 | |||
56 | // nextEmit is where in src the next emitLiteral should start from. | ||
57 | nextEmit := 0 | ||
58 | |||
59 | // The encoded form must start with a literal, as there are no previous | ||
60 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
61 | s := 1 | ||
62 | repeat := 1 | ||
63 | if dict != nil { | ||
64 | dict.initBest() | ||
65 | s = 0 | ||
66 | repeat = len(dict.dict) - dict.repeat | ||
67 | } | ||
68 | cv := load64(src, s) | ||
69 | |||
70 | // We search for a repeat at -1, but don't output repeats when nextEmit == 0 | ||
71 | const lowbitMask = 0xffffffff | ||
72 | getCur := func(x uint64) int { | ||
73 | return int(x & lowbitMask) | ||
74 | } | ||
75 | getPrev := func(x uint64) int { | ||
76 | return int(x >> 32) | ||
77 | } | ||
78 | const maxSkip = 64 | ||
79 | |||
80 | for { | ||
81 | type match struct { | ||
82 | offset int | ||
83 | s int | ||
84 | length int | ||
85 | score int | ||
86 | rep, dict bool | ||
87 | } | ||
88 | var best match | ||
89 | for { | ||
90 | // Next src position to check | ||
91 | nextS := (s-nextEmit)>>8 + 1 | ||
92 | if nextS > maxSkip { | ||
93 | nextS = s + maxSkip | ||
94 | } else { | ||
95 | nextS += s | ||
96 | } | ||
97 | if nextS > sLimit { | ||
98 | goto emitRemainder | ||
99 | } | ||
100 | if dict != nil && s >= MaxDictSrcOffset { | ||
101 | dict = nil | ||
102 | if repeat > s { | ||
103 | repeat = math.MinInt32 | ||
104 | } | ||
105 | } | ||
106 | hashL := hash8(cv, lTableBits) | ||
107 | hashS := hash4(cv, sTableBits) | ||
108 | candidateL := lTable[hashL] | ||
109 | candidateS := sTable[hashS] | ||
110 | |||
111 | score := func(m match) int { | ||
112 | // Matches that are longer forward are penalized since we must emit it as a literal. | ||
113 | score := m.length - m.s | ||
114 | if nextEmit == m.s { | ||
115 | // If we do not have to emit literals, we save 1 byte | ||
116 | score++ | ||
117 | } | ||
118 | offset := m.s - m.offset | ||
119 | if m.rep { | ||
120 | return score - emitRepeatSize(offset, m.length) | ||
121 | } | ||
122 | return score - emitCopySize(offset, m.length) | ||
123 | } | ||
124 | |||
125 | matchAt := func(offset, s int, first uint32, rep bool) match { | ||
126 | if best.length != 0 && best.s-best.offset == s-offset { | ||
127 | // Don't retest if we have the same offset. | ||
128 | return match{offset: offset, s: s} | ||
129 | } | ||
130 | if load32(src, offset) != first { | ||
131 | return match{offset: offset, s: s} | ||
132 | } | ||
133 | m := match{offset: offset, s: s, length: 4 + offset, rep: rep} | ||
134 | s += 4 | ||
135 | for s < len(src) { | ||
136 | if len(src)-s < 8 { | ||
137 | if src[s] == src[m.length] { | ||
138 | m.length++ | ||
139 | s++ | ||
140 | continue | ||
141 | } | ||
142 | break | ||
143 | } | ||
144 | if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { | ||
145 | m.length += bits.TrailingZeros64(diff) >> 3 | ||
146 | break | ||
147 | } | ||
148 | s += 8 | ||
149 | m.length += 8 | ||
150 | } | ||
151 | m.length -= offset | ||
152 | m.score = score(m) | ||
153 | if m.score <= -m.s { | ||
154 | // Eliminate if no savings, we might find a better one. | ||
155 | m.length = 0 | ||
156 | } | ||
157 | return m | ||
158 | } | ||
159 | matchDict := func(candidate, s int, first uint32, rep bool) match { | ||
160 | if s >= MaxDictSrcOffset { | ||
161 | return match{offset: candidate, s: s} | ||
162 | } | ||
163 | // Calculate offset as if in continuous array with s | ||
164 | offset := -len(dict.dict) + candidate | ||
165 | if best.length != 0 && best.s-best.offset == s-offset && !rep { | ||
166 | // Don't retest if we have the same offset. | ||
167 | return match{offset: offset, s: s} | ||
168 | } | ||
169 | |||
170 | if load32(dict.dict, candidate) != first { | ||
171 | return match{offset: offset, s: s} | ||
172 | } | ||
173 | m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true} | ||
174 | s += 4 | ||
175 | if !rep { | ||
176 | for s < sLimitDict && m.length < len(dict.dict) { | ||
177 | if len(src)-s < 8 || len(dict.dict)-m.length < 8 { | ||
178 | if src[s] == dict.dict[m.length] { | ||
179 | m.length++ | ||
180 | s++ | ||
181 | continue | ||
182 | } | ||
183 | break | ||
184 | } | ||
185 | if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { | ||
186 | m.length += bits.TrailingZeros64(diff) >> 3 | ||
187 | break | ||
188 | } | ||
189 | s += 8 | ||
190 | m.length += 8 | ||
191 | } | ||
192 | } else { | ||
193 | for s < len(src) && m.length < len(dict.dict) { | ||
194 | if len(src)-s < 8 || len(dict.dict)-m.length < 8 { | ||
195 | if src[s] == dict.dict[m.length] { | ||
196 | m.length++ | ||
197 | s++ | ||
198 | continue | ||
199 | } | ||
200 | break | ||
201 | } | ||
202 | if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { | ||
203 | m.length += bits.TrailingZeros64(diff) >> 3 | ||
204 | break | ||
205 | } | ||
206 | s += 8 | ||
207 | m.length += 8 | ||
208 | } | ||
209 | } | ||
210 | m.length -= candidate | ||
211 | m.score = score(m) | ||
212 | if m.score <= -m.s { | ||
213 | // Eliminate if no savings, we might find a better one. | ||
214 | m.length = 0 | ||
215 | } | ||
216 | return m | ||
217 | } | ||
218 | |||
219 | bestOf := func(a, b match) match { | ||
220 | if b.length == 0 { | ||
221 | return a | ||
222 | } | ||
223 | if a.length == 0 { | ||
224 | return b | ||
225 | } | ||
226 | as := a.score + b.s | ||
227 | bs := b.score + a.s | ||
228 | if as >= bs { | ||
229 | return a | ||
230 | } | ||
231 | return b | ||
232 | } | ||
233 | |||
234 | if s > 0 { | ||
235 | best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false)) | ||
236 | best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false)) | ||
237 | best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false)) | ||
238 | } | ||
239 | if dict != nil { | ||
240 | candidateL := dict.bestTableLong[hashL] | ||
241 | candidateS := dict.bestTableShort[hashS] | ||
242 | best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) | ||
243 | best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false)) | ||
244 | best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) | ||
245 | best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false)) | ||
246 | } | ||
247 | { | ||
248 | if (dict == nil || repeat <= s) && repeat > 0 { | ||
249 | best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true)) | ||
250 | } else if s-repeat < -4 && dict != nil { | ||
251 | candidate := len(dict.dict) - (repeat - s) | ||
252 | best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) | ||
253 | candidate++ | ||
254 | best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true)) | ||
255 | } | ||
256 | |||
257 | if best.length > 0 { | ||
258 | hashS := hash4(cv>>8, sTableBits) | ||
259 | // s+1 | ||
260 | nextShort := sTable[hashS] | ||
261 | s := s + 1 | ||
262 | cv := load64(src, s) | ||
263 | hashL := hash8(cv, lTableBits) | ||
264 | nextLong := lTable[hashL] | ||
265 | best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) | ||
266 | best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) | ||
267 | best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) | ||
268 | best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) | ||
269 | |||
270 | // Dict at + 1 | ||
271 | if dict != nil { | ||
272 | candidateL := dict.bestTableLong[hashL] | ||
273 | candidateS := dict.bestTableShort[hashS] | ||
274 | |||
275 | best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) | ||
276 | best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) | ||
277 | } | ||
278 | |||
279 | // s+2 | ||
280 | if true { | ||
281 | hashS := hash4(cv>>8, sTableBits) | ||
282 | |||
283 | nextShort = sTable[hashS] | ||
284 | s++ | ||
285 | cv = load64(src, s) | ||
286 | hashL := hash8(cv, lTableBits) | ||
287 | nextLong = lTable[hashL] | ||
288 | |||
289 | if (dict == nil || repeat <= s) && repeat > 0 { | ||
290 | // Repeat at + 2 | ||
291 | best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true)) | ||
292 | } else if repeat-s > 4 && dict != nil { | ||
293 | candidate := len(dict.dict) - (repeat - s) | ||
294 | best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) | ||
295 | } | ||
296 | best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) | ||
297 | best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) | ||
298 | best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) | ||
299 | best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) | ||
300 | |||
301 | // Dict at +2 | ||
302 | // Very small gain | ||
303 | if dict != nil { | ||
304 | candidateL := dict.bestTableLong[hashL] | ||
305 | candidateS := dict.bestTableShort[hashS] | ||
306 | |||
307 | best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) | ||
308 | best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) | ||
309 | } | ||
310 | } | ||
311 | // Search for a match at best match end, see if that is better. | ||
312 | // Allow some bytes at the beginning to mismatch. | ||
313 | // Sweet spot is around 1-2 bytes, but depends on input. | ||
314 | // The skipped bytes are tested in Extend backwards, | ||
315 | // and still picked up as part of the match if they do. | ||
316 | const skipBeginning = 2 | ||
317 | const skipEnd = 1 | ||
318 | if sAt := best.s + best.length - skipEnd; sAt < sLimit { | ||
319 | |||
320 | sBack := best.s + skipBeginning - skipEnd | ||
321 | backL := best.length - skipBeginning | ||
322 | // Load initial values | ||
323 | cv = load64(src, sBack) | ||
324 | |||
325 | // Grab candidates... | ||
326 | next := lTable[hash8(load64(src, sAt), lTableBits)] | ||
327 | |||
328 | if checkAt := getCur(next) - backL; checkAt > 0 { | ||
329 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) | ||
330 | } | ||
331 | if checkAt := getPrev(next) - backL; checkAt > 0 { | ||
332 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) | ||
333 | } | ||
334 | // Disabled: Extremely small gain | ||
335 | if false { | ||
336 | next = sTable[hash4(load64(src, sAt), sTableBits)] | ||
337 | if checkAt := getCur(next) - backL; checkAt > 0 { | ||
338 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) | ||
339 | } | ||
340 | if checkAt := getPrev(next) - backL; checkAt > 0 { | ||
341 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) | ||
342 | } | ||
343 | } | ||
344 | } | ||
345 | } | ||
346 | } | ||
347 | |||
348 | // Update table | ||
349 | lTable[hashL] = uint64(s) | candidateL<<32 | ||
350 | sTable[hashS] = uint64(s) | candidateS<<32 | ||
351 | |||
352 | if best.length > 0 { | ||
353 | break | ||
354 | } | ||
355 | |||
356 | cv = load64(src, nextS) | ||
357 | s = nextS | ||
358 | } | ||
359 | |||
360 | // Extend backwards, not needed for repeats... | ||
361 | s = best.s | ||
362 | if !best.rep && !best.dict { | ||
363 | for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { | ||
364 | best.offset-- | ||
365 | best.length++ | ||
366 | s-- | ||
367 | } | ||
368 | } | ||
369 | if false && best.offset >= s { | ||
370 | panic(fmt.Errorf("t %d >= s %d", best.offset, s)) | ||
371 | } | ||
372 | // Bail if we exceed the maximum size. | ||
373 | if d+(s-nextEmit) > dstLimit { | ||
374 | return 0 | ||
375 | } | ||
376 | |||
377 | base := s | ||
378 | offset := s - best.offset | ||
379 | s += best.length | ||
380 | |||
381 | if offset > 65535 && s-base <= 5 && !best.rep { | ||
382 | // Bail if the match is equal or worse to the encoding. | ||
383 | s = best.s + 1 | ||
384 | if s >= sLimit { | ||
385 | goto emitRemainder | ||
386 | } | ||
387 | cv = load64(src, s) | ||
388 | continue | ||
389 | } | ||
390 | if debug && nextEmit != base { | ||
391 | fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base) | ||
392 | } | ||
393 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
394 | if best.rep { | ||
395 | if nextEmit > 0 || best.dict { | ||
396 | if debug { | ||
397 | fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) | ||
398 | } | ||
399 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
400 | d += emitRepeat(dst[d:], offset, best.length) | ||
401 | } else { | ||
402 | // First match without dict cannot be a repeat. | ||
403 | if debug { | ||
404 | fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) | ||
405 | } | ||
406 | d += emitCopy(dst[d:], offset, best.length) | ||
407 | } | ||
408 | } else { | ||
409 | if debug { | ||
410 | fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) | ||
411 | } | ||
412 | d += emitCopy(dst[d:], offset, best.length) | ||
413 | } | ||
414 | repeat = offset | ||
415 | |||
416 | nextEmit = s | ||
417 | if s >= sLimit { | ||
418 | goto emitRemainder | ||
419 | } | ||
420 | |||
421 | if d > dstLimit { | ||
422 | // Do we have space for more, if not bail. | ||
423 | return 0 | ||
424 | } | ||
425 | // Fill tables... | ||
426 | for i := best.s + 1; i < s; i++ { | ||
427 | cv0 := load64(src, i) | ||
428 | long0 := hash8(cv0, lTableBits) | ||
429 | short0 := hash4(cv0, sTableBits) | ||
430 | lTable[long0] = uint64(i) | lTable[long0]<<32 | ||
431 | sTable[short0] = uint64(i) | sTable[short0]<<32 | ||
432 | } | ||
433 | cv = load64(src, s) | ||
434 | } | ||
435 | |||
436 | emitRemainder: | ||
437 | if nextEmit < len(src) { | ||
438 | // Bail if we exceed the maximum size. | ||
439 | if d+len(src)-nextEmit > dstLimit { | ||
440 | return 0 | ||
441 | } | ||
442 | if debug && nextEmit != s { | ||
443 | fmt.Println("emitted ", len(src)-nextEmit, "literals") | ||
444 | } | ||
445 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
446 | } | ||
447 | return d | ||
448 | } | ||
449 | |||
450 | // encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It | ||
451 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
452 | // been written. | ||
453 | // | ||
454 | // It also assumes that: | ||
455 | // | ||
456 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
457 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
458 | func encodeBlockBestSnappy(dst, src []byte) (d int) { | ||
459 | // Initialize the hash tables. | ||
460 | const ( | ||
461 | // Long hash matches. | ||
462 | lTableBits = 19 | ||
463 | maxLTableSize = 1 << lTableBits | ||
464 | |||
465 | // Short hash matches. | ||
466 | sTableBits = 16 | ||
467 | maxSTableSize = 1 << sTableBits | ||
468 | |||
469 | inputMargin = 8 + 2 | ||
470 | ) | ||
471 | |||
472 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
473 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
474 | // looking for copies. | ||
475 | sLimit := len(src) - inputMargin | ||
476 | if len(src) < minNonLiteralBlockSize { | ||
477 | return 0 | ||
478 | } | ||
479 | |||
480 | var lTable [maxLTableSize]uint64 | ||
481 | var sTable [maxSTableSize]uint64 | ||
482 | |||
483 | // Bail if we can't compress to at least this. | ||
484 | dstLimit := len(src) - 5 | ||
485 | |||
486 | // nextEmit is where in src the next emitLiteral should start from. | ||
487 | nextEmit := 0 | ||
488 | |||
489 | // The encoded form must start with a literal, as there are no previous | ||
490 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
491 | s := 1 | ||
492 | cv := load64(src, s) | ||
493 | |||
494 | // We search for a repeat at -1, but don't output repeats when nextEmit == 0 | ||
495 | repeat := 1 | ||
496 | const lowbitMask = 0xffffffff | ||
497 | getCur := func(x uint64) int { | ||
498 | return int(x & lowbitMask) | ||
499 | } | ||
500 | getPrev := func(x uint64) int { | ||
501 | return int(x >> 32) | ||
502 | } | ||
503 | const maxSkip = 64 | ||
504 | |||
505 | for { | ||
506 | type match struct { | ||
507 | offset int | ||
508 | s int | ||
509 | length int | ||
510 | score int | ||
511 | } | ||
512 | var best match | ||
513 | for { | ||
514 | // Next src position to check | ||
515 | nextS := (s-nextEmit)>>8 + 1 | ||
516 | if nextS > maxSkip { | ||
517 | nextS = s + maxSkip | ||
518 | } else { | ||
519 | nextS += s | ||
520 | } | ||
521 | if nextS > sLimit { | ||
522 | goto emitRemainder | ||
523 | } | ||
524 | hashL := hash8(cv, lTableBits) | ||
525 | hashS := hash4(cv, sTableBits) | ||
526 | candidateL := lTable[hashL] | ||
527 | candidateS := sTable[hashS] | ||
528 | |||
529 | score := func(m match) int { | ||
530 | // Matches that are longer forward are penalized since we must emit it as a literal. | ||
531 | score := m.length - m.s | ||
532 | if nextEmit == m.s { | ||
533 | // If we do not have to emit literals, we save 1 byte | ||
534 | score++ | ||
535 | } | ||
536 | offset := m.s - m.offset | ||
537 | |||
538 | return score - emitCopyNoRepeatSize(offset, m.length) | ||
539 | } | ||
540 | |||
541 | matchAt := func(offset, s int, first uint32) match { | ||
542 | if best.length != 0 && best.s-best.offset == s-offset { | ||
543 | // Don't retest if we have the same offset. | ||
544 | return match{offset: offset, s: s} | ||
545 | } | ||
546 | if load32(src, offset) != first { | ||
547 | return match{offset: offset, s: s} | ||
548 | } | ||
549 | m := match{offset: offset, s: s, length: 4 + offset} | ||
550 | s += 4 | ||
551 | for s <= sLimit { | ||
552 | if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { | ||
553 | m.length += bits.TrailingZeros64(diff) >> 3 | ||
554 | break | ||
555 | } | ||
556 | s += 8 | ||
557 | m.length += 8 | ||
558 | } | ||
559 | m.length -= offset | ||
560 | m.score = score(m) | ||
561 | if m.score <= -m.s { | ||
562 | // Eliminate if no savings, we might find a better one. | ||
563 | m.length = 0 | ||
564 | } | ||
565 | return m | ||
566 | } | ||
567 | |||
568 | bestOf := func(a, b match) match { | ||
569 | if b.length == 0 { | ||
570 | return a | ||
571 | } | ||
572 | if a.length == 0 { | ||
573 | return b | ||
574 | } | ||
575 | as := a.score + b.s | ||
576 | bs := b.score + a.s | ||
577 | if as >= bs { | ||
578 | return a | ||
579 | } | ||
580 | return b | ||
581 | } | ||
582 | |||
583 | best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv))) | ||
584 | best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv))) | ||
585 | best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv))) | ||
586 | |||
587 | { | ||
588 | best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8))) | ||
589 | if best.length > 0 { | ||
590 | // s+1 | ||
591 | nextShort := sTable[hash4(cv>>8, sTableBits)] | ||
592 | s := s + 1 | ||
593 | cv := load64(src, s) | ||
594 | nextLong := lTable[hash8(cv, lTableBits)] | ||
595 | best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv))) | ||
596 | best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv))) | ||
597 | best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv))) | ||
598 | best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv))) | ||
599 | // Repeat at + 2 | ||
600 | best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8))) | ||
601 | |||
602 | // s+2 | ||
603 | if true { | ||
604 | nextShort = sTable[hash4(cv>>8, sTableBits)] | ||
605 | s++ | ||
606 | cv = load64(src, s) | ||
607 | nextLong = lTable[hash8(cv, lTableBits)] | ||
608 | best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv))) | ||
609 | best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv))) | ||
610 | best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv))) | ||
611 | best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv))) | ||
612 | } | ||
613 | // Search for a match at best match end, see if that is better. | ||
614 | if sAt := best.s + best.length; sAt < sLimit { | ||
615 | sBack := best.s | ||
616 | backL := best.length | ||
617 | // Load initial values | ||
618 | cv = load64(src, sBack) | ||
619 | // Search for mismatch | ||
620 | next := lTable[hash8(load64(src, sAt), lTableBits)] | ||
621 | //next := sTable[hash4(load64(src, sAt), sTableBits)] | ||
622 | |||
623 | if checkAt := getCur(next) - backL; checkAt > 0 { | ||
624 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv))) | ||
625 | } | ||
626 | if checkAt := getPrev(next) - backL; checkAt > 0 { | ||
627 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv))) | ||
628 | } | ||
629 | } | ||
630 | } | ||
631 | } | ||
632 | |||
633 | // Update table | ||
634 | lTable[hashL] = uint64(s) | candidateL<<32 | ||
635 | sTable[hashS] = uint64(s) | candidateS<<32 | ||
636 | |||
637 | if best.length > 0 { | ||
638 | break | ||
639 | } | ||
640 | |||
641 | cv = load64(src, nextS) | ||
642 | s = nextS | ||
643 | } | ||
644 | |||
645 | // Extend backwards, not needed for repeats... | ||
646 | s = best.s | ||
647 | if true { | ||
648 | for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { | ||
649 | best.offset-- | ||
650 | best.length++ | ||
651 | s-- | ||
652 | } | ||
653 | } | ||
654 | if false && best.offset >= s { | ||
655 | panic(fmt.Errorf("t %d >= s %d", best.offset, s)) | ||
656 | } | ||
657 | // Bail if we exceed the maximum size. | ||
658 | if d+(s-nextEmit) > dstLimit { | ||
659 | return 0 | ||
660 | } | ||
661 | |||
662 | base := s | ||
663 | offset := s - best.offset | ||
664 | |||
665 | s += best.length | ||
666 | |||
667 | if offset > 65535 && s-base <= 5 { | ||
668 | // Bail if the match is equal or worse to the encoding. | ||
669 | s = best.s + 1 | ||
670 | if s >= sLimit { | ||
671 | goto emitRemainder | ||
672 | } | ||
673 | cv = load64(src, s) | ||
674 | continue | ||
675 | } | ||
676 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
677 | d += emitCopyNoRepeat(dst[d:], offset, best.length) | ||
678 | repeat = offset | ||
679 | |||
680 | nextEmit = s | ||
681 | if s >= sLimit { | ||
682 | goto emitRemainder | ||
683 | } | ||
684 | |||
685 | if d > dstLimit { | ||
686 | // Do we have space for more, if not bail. | ||
687 | return 0 | ||
688 | } | ||
689 | // Fill tables... | ||
690 | for i := best.s + 1; i < s; i++ { | ||
691 | cv0 := load64(src, i) | ||
692 | long0 := hash8(cv0, lTableBits) | ||
693 | short0 := hash4(cv0, sTableBits) | ||
694 | lTable[long0] = uint64(i) | lTable[long0]<<32 | ||
695 | sTable[short0] = uint64(i) | sTable[short0]<<32 | ||
696 | } | ||
697 | cv = load64(src, s) | ||
698 | } | ||
699 | |||
700 | emitRemainder: | ||
701 | if nextEmit < len(src) { | ||
702 | // Bail if we exceed the maximum size. | ||
703 | if d+len(src)-nextEmit > dstLimit { | ||
704 | return 0 | ||
705 | } | ||
706 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
707 | } | ||
708 | return d | ||
709 | } | ||
710 | |||
711 | // emitCopySize returns the size to encode the offset+length | ||
712 | // | ||
713 | // It assumes that: | ||
714 | // | ||
715 | // 1 <= offset && offset <= math.MaxUint32 | ||
716 | // 4 <= length && length <= 1 << 24 | ||
717 | func emitCopySize(offset, length int) int { | ||
718 | if offset >= 65536 { | ||
719 | i := 0 | ||
720 | if length > 64 { | ||
721 | length -= 64 | ||
722 | if length >= 4 { | ||
723 | // Emit remaining as repeats | ||
724 | return 5 + emitRepeatSize(offset, length) | ||
725 | } | ||
726 | i = 5 | ||
727 | } | ||
728 | if length == 0 { | ||
729 | return i | ||
730 | } | ||
731 | return i + 5 | ||
732 | } | ||
733 | |||
734 | // Offset no more than 2 bytes. | ||
735 | if length > 64 { | ||
736 | if offset < 2048 { | ||
737 | // Emit 8 bytes, then rest as repeats... | ||
738 | return 2 + emitRepeatSize(offset, length-8) | ||
739 | } | ||
740 | // Emit remaining as repeats, at least 4 bytes remain. | ||
741 | return 3 + emitRepeatSize(offset, length-60) | ||
742 | } | ||
743 | if length >= 12 || offset >= 2048 { | ||
744 | return 3 | ||
745 | } | ||
746 | // Emit the remaining copy, encoded as 2 bytes. | ||
747 | return 2 | ||
748 | } | ||
749 | |||
750 | // emitCopyNoRepeatSize returns the size to encode the offset+length | ||
751 | // | ||
752 | // It assumes that: | ||
753 | // | ||
754 | // 1 <= offset && offset <= math.MaxUint32 | ||
755 | // 4 <= length && length <= 1 << 24 | ||
756 | func emitCopyNoRepeatSize(offset, length int) int { | ||
757 | if offset >= 65536 { | ||
758 | return 5 + 5*(length/64) | ||
759 | } | ||
760 | |||
761 | // Offset no more than 2 bytes. | ||
762 | if length > 64 { | ||
763 | // Emit remaining as repeats, at least 4 bytes remain. | ||
764 | return 3 + 3*(length/60) | ||
765 | } | ||
766 | if length >= 12 || offset >= 2048 { | ||
767 | return 3 | ||
768 | } | ||
769 | // Emit the remaining copy, encoded as 2 bytes. | ||
770 | return 2 | ||
771 | } | ||
772 | |||
773 | // emitRepeatSize returns the number of bytes required to encode a repeat. | ||
774 | // Length must be at least 4 and < 1<<24 | ||
775 | func emitRepeatSize(offset, length int) int { | ||
776 | // Repeat offset, make length cheaper | ||
777 | if length <= 4+4 || (length < 8+4 && offset < 2048) { | ||
778 | return 2 | ||
779 | } | ||
780 | if length < (1<<8)+4+4 { | ||
781 | return 3 | ||
782 | } | ||
783 | if length < (1<<16)+(1<<8)+4 { | ||
784 | return 4 | ||
785 | } | ||
786 | const maxRepeat = (1 << 24) - 1 | ||
787 | length -= (1 << 16) - 4 | ||
788 | left := 0 | ||
789 | if length > maxRepeat { | ||
790 | left = length - maxRepeat + 4 | ||
791 | } | ||
792 | if left > 0 { | ||
793 | return 5 + emitRepeatSize(offset, left) | ||
794 | } | ||
795 | return 5 | ||
796 | } | ||