diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encode_best.go')
| -rw-r--r-- | vendor/github.com/klauspost/compress/s2/encode_best.go | 796 |
1 files changed, 796 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go new file mode 100644 index 0000000..47bac74 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/encode_best.go | |||
| @@ -0,0 +1,796 @@ | |||
| 1 | // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||
| 2 | // Copyright (c) 2019 Klaus Post. All rights reserved. | ||
| 3 | // Use of this source code is governed by a BSD-style | ||
| 4 | // license that can be found in the LICENSE file. | ||
| 5 | |||
| 6 | package s2 | ||
| 7 | |||
| 8 | import ( | ||
| 9 | "fmt" | ||
| 10 | "math" | ||
| 11 | "math/bits" | ||
| 12 | ) | ||
| 13 | |||
| 14 | // encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It | ||
| 15 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
| 16 | // been written. | ||
| 17 | // | ||
| 18 | // It also assumes that: | ||
| 19 | // | ||
| 20 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
| 21 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
| 22 | func encodeBlockBest(dst, src []byte, dict *Dict) (d int) { | ||
| 23 | // Initialize the hash tables. | ||
| 24 | const ( | ||
| 25 | // Long hash matches. | ||
| 26 | lTableBits = 19 | ||
| 27 | maxLTableSize = 1 << lTableBits | ||
| 28 | |||
| 29 | // Short hash matches. | ||
| 30 | sTableBits = 16 | ||
| 31 | maxSTableSize = 1 << sTableBits | ||
| 32 | |||
| 33 | inputMargin = 8 + 2 | ||
| 34 | |||
| 35 | debug = false | ||
| 36 | ) | ||
| 37 | |||
| 38 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
| 39 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
| 40 | // looking for copies. | ||
| 41 | sLimit := len(src) - inputMargin | ||
| 42 | if len(src) < minNonLiteralBlockSize { | ||
| 43 | return 0 | ||
| 44 | } | ||
| 45 | sLimitDict := len(src) - inputMargin | ||
| 46 | if sLimitDict > MaxDictSrcOffset-inputMargin { | ||
| 47 | sLimitDict = MaxDictSrcOffset - inputMargin | ||
| 48 | } | ||
| 49 | |||
| 50 | var lTable [maxLTableSize]uint64 | ||
| 51 | var sTable [maxSTableSize]uint64 | ||
| 52 | |||
| 53 | // Bail if we can't compress to at least this. | ||
| 54 | dstLimit := len(src) - 5 | ||
| 55 | |||
| 56 | // nextEmit is where in src the next emitLiteral should start from. | ||
| 57 | nextEmit := 0 | ||
| 58 | |||
| 59 | // The encoded form must start with a literal, as there are no previous | ||
| 60 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
| 61 | s := 1 | ||
| 62 | repeat := 1 | ||
| 63 | if dict != nil { | ||
| 64 | dict.initBest() | ||
| 65 | s = 0 | ||
| 66 | repeat = len(dict.dict) - dict.repeat | ||
| 67 | } | ||
| 68 | cv := load64(src, s) | ||
| 69 | |||
| 70 | // We search for a repeat at -1, but don't output repeats when nextEmit == 0 | ||
| 71 | const lowbitMask = 0xffffffff | ||
| 72 | getCur := func(x uint64) int { | ||
| 73 | return int(x & lowbitMask) | ||
| 74 | } | ||
| 75 | getPrev := func(x uint64) int { | ||
| 76 | return int(x >> 32) | ||
| 77 | } | ||
| 78 | const maxSkip = 64 | ||
| 79 | |||
| 80 | for { | ||
| 81 | type match struct { | ||
| 82 | offset int | ||
| 83 | s int | ||
| 84 | length int | ||
| 85 | score int | ||
| 86 | rep, dict bool | ||
| 87 | } | ||
| 88 | var best match | ||
| 89 | for { | ||
| 90 | // Next src position to check | ||
| 91 | nextS := (s-nextEmit)>>8 + 1 | ||
| 92 | if nextS > maxSkip { | ||
| 93 | nextS = s + maxSkip | ||
| 94 | } else { | ||
| 95 | nextS += s | ||
| 96 | } | ||
| 97 | if nextS > sLimit { | ||
| 98 | goto emitRemainder | ||
| 99 | } | ||
| 100 | if dict != nil && s >= MaxDictSrcOffset { | ||
| 101 | dict = nil | ||
| 102 | if repeat > s { | ||
| 103 | repeat = math.MinInt32 | ||
| 104 | } | ||
| 105 | } | ||
| 106 | hashL := hash8(cv, lTableBits) | ||
| 107 | hashS := hash4(cv, sTableBits) | ||
| 108 | candidateL := lTable[hashL] | ||
| 109 | candidateS := sTable[hashS] | ||
| 110 | |||
| 111 | score := func(m match) int { | ||
| 112 | // Matches that are longer forward are penalized since we must emit it as a literal. | ||
| 113 | score := m.length - m.s | ||
| 114 | if nextEmit == m.s { | ||
| 115 | // If we do not have to emit literals, we save 1 byte | ||
| 116 | score++ | ||
| 117 | } | ||
| 118 | offset := m.s - m.offset | ||
| 119 | if m.rep { | ||
| 120 | return score - emitRepeatSize(offset, m.length) | ||
| 121 | } | ||
| 122 | return score - emitCopySize(offset, m.length) | ||
| 123 | } | ||
| 124 | |||
| 125 | matchAt := func(offset, s int, first uint32, rep bool) match { | ||
| 126 | if best.length != 0 && best.s-best.offset == s-offset { | ||
| 127 | // Don't retest if we have the same offset. | ||
| 128 | return match{offset: offset, s: s} | ||
| 129 | } | ||
| 130 | if load32(src, offset) != first { | ||
| 131 | return match{offset: offset, s: s} | ||
| 132 | } | ||
| 133 | m := match{offset: offset, s: s, length: 4 + offset, rep: rep} | ||
| 134 | s += 4 | ||
| 135 | for s < len(src) { | ||
| 136 | if len(src)-s < 8 { | ||
| 137 | if src[s] == src[m.length] { | ||
| 138 | m.length++ | ||
| 139 | s++ | ||
| 140 | continue | ||
| 141 | } | ||
| 142 | break | ||
| 143 | } | ||
| 144 | if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { | ||
| 145 | m.length += bits.TrailingZeros64(diff) >> 3 | ||
| 146 | break | ||
| 147 | } | ||
| 148 | s += 8 | ||
| 149 | m.length += 8 | ||
| 150 | } | ||
| 151 | m.length -= offset | ||
| 152 | m.score = score(m) | ||
| 153 | if m.score <= -m.s { | ||
| 154 | // Eliminate if no savings, we might find a better one. | ||
| 155 | m.length = 0 | ||
| 156 | } | ||
| 157 | return m | ||
| 158 | } | ||
| 159 | matchDict := func(candidate, s int, first uint32, rep bool) match { | ||
| 160 | if s >= MaxDictSrcOffset { | ||
| 161 | return match{offset: candidate, s: s} | ||
| 162 | } | ||
| 163 | // Calculate offset as if in continuous array with s | ||
| 164 | offset := -len(dict.dict) + candidate | ||
| 165 | if best.length != 0 && best.s-best.offset == s-offset && !rep { | ||
| 166 | // Don't retest if we have the same offset. | ||
| 167 | return match{offset: offset, s: s} | ||
| 168 | } | ||
| 169 | |||
| 170 | if load32(dict.dict, candidate) != first { | ||
| 171 | return match{offset: offset, s: s} | ||
| 172 | } | ||
| 173 | m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true} | ||
| 174 | s += 4 | ||
| 175 | if !rep { | ||
| 176 | for s < sLimitDict && m.length < len(dict.dict) { | ||
| 177 | if len(src)-s < 8 || len(dict.dict)-m.length < 8 { | ||
| 178 | if src[s] == dict.dict[m.length] { | ||
| 179 | m.length++ | ||
| 180 | s++ | ||
| 181 | continue | ||
| 182 | } | ||
| 183 | break | ||
| 184 | } | ||
| 185 | if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { | ||
| 186 | m.length += bits.TrailingZeros64(diff) >> 3 | ||
| 187 | break | ||
| 188 | } | ||
| 189 | s += 8 | ||
| 190 | m.length += 8 | ||
| 191 | } | ||
| 192 | } else { | ||
| 193 | for s < len(src) && m.length < len(dict.dict) { | ||
| 194 | if len(src)-s < 8 || len(dict.dict)-m.length < 8 { | ||
| 195 | if src[s] == dict.dict[m.length] { | ||
| 196 | m.length++ | ||
| 197 | s++ | ||
| 198 | continue | ||
| 199 | } | ||
| 200 | break | ||
| 201 | } | ||
| 202 | if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { | ||
| 203 | m.length += bits.TrailingZeros64(diff) >> 3 | ||
| 204 | break | ||
| 205 | } | ||
| 206 | s += 8 | ||
| 207 | m.length += 8 | ||
| 208 | } | ||
| 209 | } | ||
| 210 | m.length -= candidate | ||
| 211 | m.score = score(m) | ||
| 212 | if m.score <= -m.s { | ||
| 213 | // Eliminate if no savings, we might find a better one. | ||
| 214 | m.length = 0 | ||
| 215 | } | ||
| 216 | return m | ||
| 217 | } | ||
| 218 | |||
| 219 | bestOf := func(a, b match) match { | ||
| 220 | if b.length == 0 { | ||
| 221 | return a | ||
| 222 | } | ||
| 223 | if a.length == 0 { | ||
| 224 | return b | ||
| 225 | } | ||
| 226 | as := a.score + b.s | ||
| 227 | bs := b.score + a.s | ||
| 228 | if as >= bs { | ||
| 229 | return a | ||
| 230 | } | ||
| 231 | return b | ||
| 232 | } | ||
| 233 | |||
| 234 | if s > 0 { | ||
| 235 | best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false)) | ||
| 236 | best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false)) | ||
| 237 | best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false)) | ||
| 238 | } | ||
| 239 | if dict != nil { | ||
| 240 | candidateL := dict.bestTableLong[hashL] | ||
| 241 | candidateS := dict.bestTableShort[hashS] | ||
| 242 | best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) | ||
| 243 | best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false)) | ||
| 244 | best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) | ||
| 245 | best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false)) | ||
| 246 | } | ||
| 247 | { | ||
| 248 | if (dict == nil || repeat <= s) && repeat > 0 { | ||
| 249 | best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true)) | ||
| 250 | } else if s-repeat < -4 && dict != nil { | ||
| 251 | candidate := len(dict.dict) - (repeat - s) | ||
| 252 | best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) | ||
| 253 | candidate++ | ||
| 254 | best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true)) | ||
| 255 | } | ||
| 256 | |||
| 257 | if best.length > 0 { | ||
| 258 | hashS := hash4(cv>>8, sTableBits) | ||
| 259 | // s+1 | ||
| 260 | nextShort := sTable[hashS] | ||
| 261 | s := s + 1 | ||
| 262 | cv := load64(src, s) | ||
| 263 | hashL := hash8(cv, lTableBits) | ||
| 264 | nextLong := lTable[hashL] | ||
| 265 | best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) | ||
| 266 | best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) | ||
| 267 | best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) | ||
| 268 | best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) | ||
| 269 | |||
| 270 | // Dict at + 1 | ||
| 271 | if dict != nil { | ||
| 272 | candidateL := dict.bestTableLong[hashL] | ||
| 273 | candidateS := dict.bestTableShort[hashS] | ||
| 274 | |||
| 275 | best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) | ||
| 276 | best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) | ||
| 277 | } | ||
| 278 | |||
| 279 | // s+2 | ||
| 280 | if true { | ||
| 281 | hashS := hash4(cv>>8, sTableBits) | ||
| 282 | |||
| 283 | nextShort = sTable[hashS] | ||
| 284 | s++ | ||
| 285 | cv = load64(src, s) | ||
| 286 | hashL := hash8(cv, lTableBits) | ||
| 287 | nextLong = lTable[hashL] | ||
| 288 | |||
| 289 | if (dict == nil || repeat <= s) && repeat > 0 { | ||
| 290 | // Repeat at + 2 | ||
| 291 | best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true)) | ||
| 292 | } else if repeat-s > 4 && dict != nil { | ||
| 293 | candidate := len(dict.dict) - (repeat - s) | ||
| 294 | best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) | ||
| 295 | } | ||
| 296 | best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) | ||
| 297 | best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) | ||
| 298 | best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) | ||
| 299 | best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) | ||
| 300 | |||
| 301 | // Dict at +2 | ||
| 302 | // Very small gain | ||
| 303 | if dict != nil { | ||
| 304 | candidateL := dict.bestTableLong[hashL] | ||
| 305 | candidateS := dict.bestTableShort[hashS] | ||
| 306 | |||
| 307 | best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) | ||
| 308 | best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) | ||
| 309 | } | ||
| 310 | } | ||
| 311 | // Search for a match at best match end, see if that is better. | ||
| 312 | // Allow some bytes at the beginning to mismatch. | ||
| 313 | // Sweet spot is around 1-2 bytes, but depends on input. | ||
| 314 | // The skipped bytes are tested in Extend backwards, | ||
| 315 | // and still picked up as part of the match if they do. | ||
| 316 | const skipBeginning = 2 | ||
| 317 | const skipEnd = 1 | ||
| 318 | if sAt := best.s + best.length - skipEnd; sAt < sLimit { | ||
| 319 | |||
| 320 | sBack := best.s + skipBeginning - skipEnd | ||
| 321 | backL := best.length - skipBeginning | ||
| 322 | // Load initial values | ||
| 323 | cv = load64(src, sBack) | ||
| 324 | |||
| 325 | // Grab candidates... | ||
| 326 | next := lTable[hash8(load64(src, sAt), lTableBits)] | ||
| 327 | |||
| 328 | if checkAt := getCur(next) - backL; checkAt > 0 { | ||
| 329 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) | ||
| 330 | } | ||
| 331 | if checkAt := getPrev(next) - backL; checkAt > 0 { | ||
| 332 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) | ||
| 333 | } | ||
| 334 | // Disabled: Extremely small gain | ||
| 335 | if false { | ||
| 336 | next = sTable[hash4(load64(src, sAt), sTableBits)] | ||
| 337 | if checkAt := getCur(next) - backL; checkAt > 0 { | ||
| 338 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) | ||
| 339 | } | ||
| 340 | if checkAt := getPrev(next) - backL; checkAt > 0 { | ||
| 341 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) | ||
| 342 | } | ||
| 343 | } | ||
| 344 | } | ||
| 345 | } | ||
| 346 | } | ||
| 347 | |||
| 348 | // Update table | ||
| 349 | lTable[hashL] = uint64(s) | candidateL<<32 | ||
| 350 | sTable[hashS] = uint64(s) | candidateS<<32 | ||
| 351 | |||
| 352 | if best.length > 0 { | ||
| 353 | break | ||
| 354 | } | ||
| 355 | |||
| 356 | cv = load64(src, nextS) | ||
| 357 | s = nextS | ||
| 358 | } | ||
| 359 | |||
| 360 | // Extend backwards, not needed for repeats... | ||
| 361 | s = best.s | ||
| 362 | if !best.rep && !best.dict { | ||
| 363 | for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { | ||
| 364 | best.offset-- | ||
| 365 | best.length++ | ||
| 366 | s-- | ||
| 367 | } | ||
| 368 | } | ||
| 369 | if false && best.offset >= s { | ||
| 370 | panic(fmt.Errorf("t %d >= s %d", best.offset, s)) | ||
| 371 | } | ||
| 372 | // Bail if we exceed the maximum size. | ||
| 373 | if d+(s-nextEmit) > dstLimit { | ||
| 374 | return 0 | ||
| 375 | } | ||
| 376 | |||
| 377 | base := s | ||
| 378 | offset := s - best.offset | ||
| 379 | s += best.length | ||
| 380 | |||
| 381 | if offset > 65535 && s-base <= 5 && !best.rep { | ||
| 382 | // Bail if the match is equal or worse to the encoding. | ||
| 383 | s = best.s + 1 | ||
| 384 | if s >= sLimit { | ||
| 385 | goto emitRemainder | ||
| 386 | } | ||
| 387 | cv = load64(src, s) | ||
| 388 | continue | ||
| 389 | } | ||
| 390 | if debug && nextEmit != base { | ||
| 391 | fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base) | ||
| 392 | } | ||
| 393 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
| 394 | if best.rep { | ||
| 395 | if nextEmit > 0 || best.dict { | ||
| 396 | if debug { | ||
| 397 | fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) | ||
| 398 | } | ||
| 399 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
| 400 | d += emitRepeat(dst[d:], offset, best.length) | ||
| 401 | } else { | ||
| 402 | // First match without dict cannot be a repeat. | ||
| 403 | if debug { | ||
| 404 | fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) | ||
| 405 | } | ||
| 406 | d += emitCopy(dst[d:], offset, best.length) | ||
| 407 | } | ||
| 408 | } else { | ||
| 409 | if debug { | ||
| 410 | fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) | ||
| 411 | } | ||
| 412 | d += emitCopy(dst[d:], offset, best.length) | ||
| 413 | } | ||
| 414 | repeat = offset | ||
| 415 | |||
| 416 | nextEmit = s | ||
| 417 | if s >= sLimit { | ||
| 418 | goto emitRemainder | ||
| 419 | } | ||
| 420 | |||
| 421 | if d > dstLimit { | ||
| 422 | // Do we have space for more, if not bail. | ||
| 423 | return 0 | ||
| 424 | } | ||
| 425 | // Fill tables... | ||
| 426 | for i := best.s + 1; i < s; i++ { | ||
| 427 | cv0 := load64(src, i) | ||
| 428 | long0 := hash8(cv0, lTableBits) | ||
| 429 | short0 := hash4(cv0, sTableBits) | ||
| 430 | lTable[long0] = uint64(i) | lTable[long0]<<32 | ||
| 431 | sTable[short0] = uint64(i) | sTable[short0]<<32 | ||
| 432 | } | ||
| 433 | cv = load64(src, s) | ||
| 434 | } | ||
| 435 | |||
| 436 | emitRemainder: | ||
| 437 | if nextEmit < len(src) { | ||
| 438 | // Bail if we exceed the maximum size. | ||
| 439 | if d+len(src)-nextEmit > dstLimit { | ||
| 440 | return 0 | ||
| 441 | } | ||
| 442 | if debug && nextEmit != s { | ||
| 443 | fmt.Println("emitted ", len(src)-nextEmit, "literals") | ||
| 444 | } | ||
| 445 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
| 446 | } | ||
| 447 | return d | ||
| 448 | } | ||
| 449 | |||
| 450 | // encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It | ||
| 451 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
| 452 | // been written. | ||
| 453 | // | ||
| 454 | // It also assumes that: | ||
| 455 | // | ||
| 456 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
| 457 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
| 458 | func encodeBlockBestSnappy(dst, src []byte) (d int) { | ||
| 459 | // Initialize the hash tables. | ||
| 460 | const ( | ||
| 461 | // Long hash matches. | ||
| 462 | lTableBits = 19 | ||
| 463 | maxLTableSize = 1 << lTableBits | ||
| 464 | |||
| 465 | // Short hash matches. | ||
| 466 | sTableBits = 16 | ||
| 467 | maxSTableSize = 1 << sTableBits | ||
| 468 | |||
| 469 | inputMargin = 8 + 2 | ||
| 470 | ) | ||
| 471 | |||
| 472 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
| 473 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
| 474 | // looking for copies. | ||
| 475 | sLimit := len(src) - inputMargin | ||
| 476 | if len(src) < minNonLiteralBlockSize { | ||
| 477 | return 0 | ||
| 478 | } | ||
| 479 | |||
| 480 | var lTable [maxLTableSize]uint64 | ||
| 481 | var sTable [maxSTableSize]uint64 | ||
| 482 | |||
| 483 | // Bail if we can't compress to at least this. | ||
| 484 | dstLimit := len(src) - 5 | ||
| 485 | |||
| 486 | // nextEmit is where in src the next emitLiteral should start from. | ||
| 487 | nextEmit := 0 | ||
| 488 | |||
| 489 | // The encoded form must start with a literal, as there are no previous | ||
| 490 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
| 491 | s := 1 | ||
| 492 | cv := load64(src, s) | ||
| 493 | |||
| 494 | // We search for a repeat at -1, but don't output repeats when nextEmit == 0 | ||
| 495 | repeat := 1 | ||
| 496 | const lowbitMask = 0xffffffff | ||
| 497 | getCur := func(x uint64) int { | ||
| 498 | return int(x & lowbitMask) | ||
| 499 | } | ||
| 500 | getPrev := func(x uint64) int { | ||
| 501 | return int(x >> 32) | ||
| 502 | } | ||
| 503 | const maxSkip = 64 | ||
| 504 | |||
| 505 | for { | ||
| 506 | type match struct { | ||
| 507 | offset int | ||
| 508 | s int | ||
| 509 | length int | ||
| 510 | score int | ||
| 511 | } | ||
| 512 | var best match | ||
| 513 | for { | ||
| 514 | // Next src position to check | ||
| 515 | nextS := (s-nextEmit)>>8 + 1 | ||
| 516 | if nextS > maxSkip { | ||
| 517 | nextS = s + maxSkip | ||
| 518 | } else { | ||
| 519 | nextS += s | ||
| 520 | } | ||
| 521 | if nextS > sLimit { | ||
| 522 | goto emitRemainder | ||
| 523 | } | ||
| 524 | hashL := hash8(cv, lTableBits) | ||
| 525 | hashS := hash4(cv, sTableBits) | ||
| 526 | candidateL := lTable[hashL] | ||
| 527 | candidateS := sTable[hashS] | ||
| 528 | |||
| 529 | score := func(m match) int { | ||
| 530 | // Matches that are longer forward are penalized since we must emit it as a literal. | ||
| 531 | score := m.length - m.s | ||
| 532 | if nextEmit == m.s { | ||
| 533 | // If we do not have to emit literals, we save 1 byte | ||
| 534 | score++ | ||
| 535 | } | ||
| 536 | offset := m.s - m.offset | ||
| 537 | |||
| 538 | return score - emitCopyNoRepeatSize(offset, m.length) | ||
| 539 | } | ||
| 540 | |||
| 541 | matchAt := func(offset, s int, first uint32) match { | ||
| 542 | if best.length != 0 && best.s-best.offset == s-offset { | ||
| 543 | // Don't retest if we have the same offset. | ||
| 544 | return match{offset: offset, s: s} | ||
| 545 | } | ||
| 546 | if load32(src, offset) != first { | ||
| 547 | return match{offset: offset, s: s} | ||
| 548 | } | ||
| 549 | m := match{offset: offset, s: s, length: 4 + offset} | ||
| 550 | s += 4 | ||
| 551 | for s <= sLimit { | ||
| 552 | if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { | ||
| 553 | m.length += bits.TrailingZeros64(diff) >> 3 | ||
| 554 | break | ||
| 555 | } | ||
| 556 | s += 8 | ||
| 557 | m.length += 8 | ||
| 558 | } | ||
| 559 | m.length -= offset | ||
| 560 | m.score = score(m) | ||
| 561 | if m.score <= -m.s { | ||
| 562 | // Eliminate if no savings, we might find a better one. | ||
| 563 | m.length = 0 | ||
| 564 | } | ||
| 565 | return m | ||
| 566 | } | ||
| 567 | |||
| 568 | bestOf := func(a, b match) match { | ||
| 569 | if b.length == 0 { | ||
| 570 | return a | ||
| 571 | } | ||
| 572 | if a.length == 0 { | ||
| 573 | return b | ||
| 574 | } | ||
| 575 | as := a.score + b.s | ||
| 576 | bs := b.score + a.s | ||
| 577 | if as >= bs { | ||
| 578 | return a | ||
| 579 | } | ||
| 580 | return b | ||
| 581 | } | ||
| 582 | |||
| 583 | best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv))) | ||
| 584 | best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv))) | ||
| 585 | best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv))) | ||
| 586 | |||
| 587 | { | ||
| 588 | best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8))) | ||
| 589 | if best.length > 0 { | ||
| 590 | // s+1 | ||
| 591 | nextShort := sTable[hash4(cv>>8, sTableBits)] | ||
| 592 | s := s + 1 | ||
| 593 | cv := load64(src, s) | ||
| 594 | nextLong := lTable[hash8(cv, lTableBits)] | ||
| 595 | best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv))) | ||
| 596 | best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv))) | ||
| 597 | best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv))) | ||
| 598 | best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv))) | ||
| 599 | // Repeat at + 2 | ||
| 600 | best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8))) | ||
| 601 | |||
| 602 | // s+2 | ||
| 603 | if true { | ||
| 604 | nextShort = sTable[hash4(cv>>8, sTableBits)] | ||
| 605 | s++ | ||
| 606 | cv = load64(src, s) | ||
| 607 | nextLong = lTable[hash8(cv, lTableBits)] | ||
| 608 | best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv))) | ||
| 609 | best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv))) | ||
| 610 | best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv))) | ||
| 611 | best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv))) | ||
| 612 | } | ||
| 613 | // Search for a match at best match end, see if that is better. | ||
| 614 | if sAt := best.s + best.length; sAt < sLimit { | ||
| 615 | sBack := best.s | ||
| 616 | backL := best.length | ||
| 617 | // Load initial values | ||
| 618 | cv = load64(src, sBack) | ||
| 619 | // Search for mismatch | ||
| 620 | next := lTable[hash8(load64(src, sAt), lTableBits)] | ||
| 621 | //next := sTable[hash4(load64(src, sAt), sTableBits)] | ||
| 622 | |||
| 623 | if checkAt := getCur(next) - backL; checkAt > 0 { | ||
| 624 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv))) | ||
| 625 | } | ||
| 626 | if checkAt := getPrev(next) - backL; checkAt > 0 { | ||
| 627 | best = bestOf(best, matchAt(checkAt, sBack, uint32(cv))) | ||
| 628 | } | ||
| 629 | } | ||
| 630 | } | ||
| 631 | } | ||
| 632 | |||
| 633 | // Update table | ||
| 634 | lTable[hashL] = uint64(s) | candidateL<<32 | ||
| 635 | sTable[hashS] = uint64(s) | candidateS<<32 | ||
| 636 | |||
| 637 | if best.length > 0 { | ||
| 638 | break | ||
| 639 | } | ||
| 640 | |||
| 641 | cv = load64(src, nextS) | ||
| 642 | s = nextS | ||
| 643 | } | ||
| 644 | |||
| 645 | // Extend backwards, not needed for repeats... | ||
| 646 | s = best.s | ||
| 647 | if true { | ||
| 648 | for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { | ||
| 649 | best.offset-- | ||
| 650 | best.length++ | ||
| 651 | s-- | ||
| 652 | } | ||
| 653 | } | ||
| 654 | if false && best.offset >= s { | ||
| 655 | panic(fmt.Errorf("t %d >= s %d", best.offset, s)) | ||
| 656 | } | ||
| 657 | // Bail if we exceed the maximum size. | ||
| 658 | if d+(s-nextEmit) > dstLimit { | ||
| 659 | return 0 | ||
| 660 | } | ||
| 661 | |||
| 662 | base := s | ||
| 663 | offset := s - best.offset | ||
| 664 | |||
| 665 | s += best.length | ||
| 666 | |||
| 667 | if offset > 65535 && s-base <= 5 { | ||
| 668 | // Bail if the match is equal or worse to the encoding. | ||
| 669 | s = best.s + 1 | ||
| 670 | if s >= sLimit { | ||
| 671 | goto emitRemainder | ||
| 672 | } | ||
| 673 | cv = load64(src, s) | ||
| 674 | continue | ||
| 675 | } | ||
| 676 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
| 677 | d += emitCopyNoRepeat(dst[d:], offset, best.length) | ||
| 678 | repeat = offset | ||
| 679 | |||
| 680 | nextEmit = s | ||
| 681 | if s >= sLimit { | ||
| 682 | goto emitRemainder | ||
| 683 | } | ||
| 684 | |||
| 685 | if d > dstLimit { | ||
| 686 | // Do we have space for more, if not bail. | ||
| 687 | return 0 | ||
| 688 | } | ||
| 689 | // Fill tables... | ||
| 690 | for i := best.s + 1; i < s; i++ { | ||
| 691 | cv0 := load64(src, i) | ||
| 692 | long0 := hash8(cv0, lTableBits) | ||
| 693 | short0 := hash4(cv0, sTableBits) | ||
| 694 | lTable[long0] = uint64(i) | lTable[long0]<<32 | ||
| 695 | sTable[short0] = uint64(i) | sTable[short0]<<32 | ||
| 696 | } | ||
| 697 | cv = load64(src, s) | ||
| 698 | } | ||
| 699 | |||
| 700 | emitRemainder: | ||
| 701 | if nextEmit < len(src) { | ||
| 702 | // Bail if we exceed the maximum size. | ||
| 703 | if d+len(src)-nextEmit > dstLimit { | ||
| 704 | return 0 | ||
| 705 | } | ||
| 706 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
| 707 | } | ||
| 708 | return d | ||
| 709 | } | ||
| 710 | |||
| 711 | // emitCopySize returns the size to encode the offset+length | ||
| 712 | // | ||
| 713 | // It assumes that: | ||
| 714 | // | ||
| 715 | // 1 <= offset && offset <= math.MaxUint32 | ||
| 716 | // 4 <= length && length <= 1 << 24 | ||
| 717 | func emitCopySize(offset, length int) int { | ||
| 718 | if offset >= 65536 { | ||
| 719 | i := 0 | ||
| 720 | if length > 64 { | ||
| 721 | length -= 64 | ||
| 722 | if length >= 4 { | ||
| 723 | // Emit remaining as repeats | ||
| 724 | return 5 + emitRepeatSize(offset, length) | ||
| 725 | } | ||
| 726 | i = 5 | ||
| 727 | } | ||
| 728 | if length == 0 { | ||
| 729 | return i | ||
| 730 | } | ||
| 731 | return i + 5 | ||
| 732 | } | ||
| 733 | |||
| 734 | // Offset no more than 2 bytes. | ||
| 735 | if length > 64 { | ||
| 736 | if offset < 2048 { | ||
| 737 | // Emit 8 bytes, then rest as repeats... | ||
| 738 | return 2 + emitRepeatSize(offset, length-8) | ||
| 739 | } | ||
| 740 | // Emit remaining as repeats, at least 4 bytes remain. | ||
| 741 | return 3 + emitRepeatSize(offset, length-60) | ||
| 742 | } | ||
| 743 | if length >= 12 || offset >= 2048 { | ||
| 744 | return 3 | ||
| 745 | } | ||
| 746 | // Emit the remaining copy, encoded as 2 bytes. | ||
| 747 | return 2 | ||
| 748 | } | ||
| 749 | |||
| 750 | // emitCopyNoRepeatSize returns the size to encode the offset+length | ||
| 751 | // | ||
| 752 | // It assumes that: | ||
| 753 | // | ||
| 754 | // 1 <= offset && offset <= math.MaxUint32 | ||
| 755 | // 4 <= length && length <= 1 << 24 | ||
| 756 | func emitCopyNoRepeatSize(offset, length int) int { | ||
| 757 | if offset >= 65536 { | ||
| 758 | return 5 + 5*(length/64) | ||
| 759 | } | ||
| 760 | |||
| 761 | // Offset no more than 2 bytes. | ||
| 762 | if length > 64 { | ||
| 763 | // Emit remaining as repeats, at least 4 bytes remain. | ||
| 764 | return 3 + 3*(length/60) | ||
| 765 | } | ||
| 766 | if length >= 12 || offset >= 2048 { | ||
| 767 | return 3 | ||
| 768 | } | ||
| 769 | // Emit the remaining copy, encoded as 2 bytes. | ||
| 770 | return 2 | ||
| 771 | } | ||
| 772 | |||
| 773 | // emitRepeatSize returns the number of bytes required to encode a repeat. | ||
| 774 | // Length must be at least 4 and < 1<<24 | ||
| 775 | func emitRepeatSize(offset, length int) int { | ||
| 776 | // Repeat offset, make length cheaper | ||
| 777 | if length <= 4+4 || (length < 8+4 && offset < 2048) { | ||
| 778 | return 2 | ||
| 779 | } | ||
| 780 | if length < (1<<8)+4+4 { | ||
| 781 | return 3 | ||
| 782 | } | ||
| 783 | if length < (1<<16)+(1<<8)+4 { | ||
| 784 | return 4 | ||
| 785 | } | ||
| 786 | const maxRepeat = (1 << 24) - 1 | ||
| 787 | length -= (1 << 16) - 4 | ||
| 788 | left := 0 | ||
| 789 | if length > maxRepeat { | ||
| 790 | left = length - maxRepeat + 4 | ||
| 791 | } | ||
| 792 | if left > 0 { | ||
| 793 | return 5 + emitRepeatSize(offset, left) | ||
| 794 | } | ||
| 795 | return 5 | ||
| 796 | } | ||