diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encode_all.go')
| -rw-r--r-- | vendor/github.com/klauspost/compress/s2/encode_all.go | 1048 |
1 files changed, 1048 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encode_all.go b/vendor/github.com/klauspost/compress/s2/encode_all.go new file mode 100644 index 0000000..5e57995 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/encode_all.go | |||
| @@ -0,0 +1,1048 @@ | |||
| 1 | // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||
| 2 | // Copyright (c) 2019 Klaus Post. All rights reserved. | ||
| 3 | // Use of this source code is governed by a BSD-style | ||
| 4 | // license that can be found in the LICENSE file. | ||
| 5 | |||
| 6 | package s2 | ||
| 7 | |||
| 8 | import ( | ||
| 9 | "bytes" | ||
| 10 | "encoding/binary" | ||
| 11 | "fmt" | ||
| 12 | "math/bits" | ||
| 13 | ) | ||
| 14 | |||
| 15 | func load32(b []byte, i int) uint32 { | ||
| 16 | return binary.LittleEndian.Uint32(b[i:]) | ||
| 17 | } | ||
| 18 | |||
| 19 | func load64(b []byte, i int) uint64 { | ||
| 20 | return binary.LittleEndian.Uint64(b[i:]) | ||
| 21 | } | ||
| 22 | |||
| 23 | // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. | ||
| 24 | // Preferably h should be a constant and should always be <64. | ||
| 25 | func hash6(u uint64, h uint8) uint32 { | ||
| 26 | const prime6bytes = 227718039650203 | ||
| 27 | return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) | ||
| 28 | } | ||
| 29 | |||
| 30 | func encodeGo(dst, src []byte) []byte { | ||
| 31 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
| 32 | panic(ErrTooLarge) | ||
| 33 | } else if len(dst) < n { | ||
| 34 | dst = make([]byte, n) | ||
| 35 | } | ||
| 36 | |||
| 37 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
| 38 | d := binary.PutUvarint(dst, uint64(len(src))) | ||
| 39 | |||
| 40 | if len(src) == 0 { | ||
| 41 | return dst[:d] | ||
| 42 | } | ||
| 43 | if len(src) < minNonLiteralBlockSize { | ||
| 44 | d += emitLiteral(dst[d:], src) | ||
| 45 | return dst[:d] | ||
| 46 | } | ||
| 47 | n := encodeBlockGo(dst[d:], src) | ||
| 48 | if n > 0 { | ||
| 49 | d += n | ||
| 50 | return dst[:d] | ||
| 51 | } | ||
| 52 | // Not compressible | ||
| 53 | d += emitLiteral(dst[d:], src) | ||
| 54 | return dst[:d] | ||
| 55 | } | ||
| 56 | |||
| 57 | // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It | ||
| 58 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
| 59 | // been written. | ||
| 60 | // | ||
| 61 | // It also assumes that: | ||
| 62 | // | ||
| 63 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
| 64 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
| 65 | func encodeBlockGo(dst, src []byte) (d int) { | ||
| 66 | // Initialize the hash table. | ||
| 67 | const ( | ||
| 68 | tableBits = 14 | ||
| 69 | maxTableSize = 1 << tableBits | ||
| 70 | |||
| 71 | debug = false | ||
| 72 | ) | ||
| 73 | |||
| 74 | var table [maxTableSize]uint32 | ||
| 75 | |||
| 76 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
| 77 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
| 78 | // looking for copies. | ||
| 79 | sLimit := len(src) - inputMargin | ||
| 80 | |||
| 81 | // Bail if we can't compress to at least this. | ||
| 82 | dstLimit := len(src) - len(src)>>5 - 5 | ||
| 83 | |||
| 84 | // nextEmit is where in src the next emitLiteral should start from. | ||
| 85 | nextEmit := 0 | ||
| 86 | |||
| 87 | // The encoded form must start with a literal, as there are no previous | ||
| 88 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
| 89 | s := 1 | ||
| 90 | cv := load64(src, s) | ||
| 91 | |||
| 92 | // We search for a repeat at -1, but don't output repeats when nextEmit == 0 | ||
| 93 | repeat := 1 | ||
| 94 | |||
| 95 | for { | ||
| 96 | candidate := 0 | ||
| 97 | for { | ||
| 98 | // Next src position to check | ||
| 99 | nextS := s + (s-nextEmit)>>6 + 4 | ||
| 100 | if nextS > sLimit { | ||
| 101 | goto emitRemainder | ||
| 102 | } | ||
| 103 | hash0 := hash6(cv, tableBits) | ||
| 104 | hash1 := hash6(cv>>8, tableBits) | ||
| 105 | candidate = int(table[hash0]) | ||
| 106 | candidate2 := int(table[hash1]) | ||
| 107 | table[hash0] = uint32(s) | ||
| 108 | table[hash1] = uint32(s + 1) | ||
| 109 | hash2 := hash6(cv>>16, tableBits) | ||
| 110 | |||
| 111 | // Check repeat at offset checkRep. | ||
| 112 | const checkRep = 1 | ||
| 113 | if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { | ||
| 114 | base := s + checkRep | ||
| 115 | // Extend back | ||
| 116 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
| 117 | i-- | ||
| 118 | base-- | ||
| 119 | } | ||
| 120 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
| 121 | |||
| 122 | // Extend forward | ||
| 123 | candidate := s - repeat + 4 + checkRep | ||
| 124 | s += 4 + checkRep | ||
| 125 | for s <= sLimit { | ||
| 126 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
| 127 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 128 | break | ||
| 129 | } | ||
| 130 | s += 8 | ||
| 131 | candidate += 8 | ||
| 132 | } | ||
| 133 | if debug { | ||
| 134 | // Validate match. | ||
| 135 | if s <= candidate { | ||
| 136 | panic("s <= candidate") | ||
| 137 | } | ||
| 138 | a := src[base:s] | ||
| 139 | b := src[base-repeat : base-repeat+(s-base)] | ||
| 140 | if !bytes.Equal(a, b) { | ||
| 141 | panic("mismatch") | ||
| 142 | } | ||
| 143 | } | ||
| 144 | if nextEmit > 0 { | ||
| 145 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
| 146 | d += emitRepeat(dst[d:], repeat, s-base) | ||
| 147 | } else { | ||
| 148 | // First match, cannot be repeat. | ||
| 149 | d += emitCopy(dst[d:], repeat, s-base) | ||
| 150 | } | ||
| 151 | nextEmit = s | ||
| 152 | if s >= sLimit { | ||
| 153 | goto emitRemainder | ||
| 154 | } | ||
| 155 | |||
| 156 | cv = load64(src, s) | ||
| 157 | continue | ||
| 158 | } | ||
| 159 | |||
| 160 | if uint32(cv) == load32(src, candidate) { | ||
| 161 | break | ||
| 162 | } | ||
| 163 | candidate = int(table[hash2]) | ||
| 164 | if uint32(cv>>8) == load32(src, candidate2) { | ||
| 165 | table[hash2] = uint32(s + 2) | ||
| 166 | candidate = candidate2 | ||
| 167 | s++ | ||
| 168 | break | ||
| 169 | } | ||
| 170 | table[hash2] = uint32(s + 2) | ||
| 171 | if uint32(cv>>16) == load32(src, candidate) { | ||
| 172 | s += 2 | ||
| 173 | break | ||
| 174 | } | ||
| 175 | |||
| 176 | cv = load64(src, nextS) | ||
| 177 | s = nextS | ||
| 178 | } | ||
| 179 | |||
| 180 | // Extend backwards. | ||
| 181 | // The top bytes will be rechecked to get the full match. | ||
| 182 | for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { | ||
| 183 | candidate-- | ||
| 184 | s-- | ||
| 185 | } | ||
| 186 | |||
| 187 | // Bail if we exceed the maximum size. | ||
| 188 | if d+(s-nextEmit) > dstLimit { | ||
| 189 | return 0 | ||
| 190 | } | ||
| 191 | |||
| 192 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
| 193 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
| 194 | // them as literal bytes. | ||
| 195 | |||
| 196 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
| 197 | |||
| 198 | // Call emitCopy, and then see if another emitCopy could be our next | ||
| 199 | // move. Repeat until we find no match for the input immediately after | ||
| 200 | // what was consumed by the last emitCopy call. | ||
| 201 | // | ||
| 202 | // If we exit this loop normally then we need to call emitLiteral next, | ||
| 203 | // though we don't yet know how big the literal will be. We handle that | ||
| 204 | // by proceeding to the next iteration of the main loop. We also can | ||
| 205 | // exit this loop via goto if we get close to exhausting the input. | ||
| 206 | for { | ||
| 207 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
| 208 | // literal bytes prior to s. | ||
| 209 | base := s | ||
| 210 | repeat = base - candidate | ||
| 211 | |||
| 212 | // Extend the 4-byte match as long as possible. | ||
| 213 | s += 4 | ||
| 214 | candidate += 4 | ||
| 215 | for s <= len(src)-8 { | ||
| 216 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
| 217 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 218 | break | ||
| 219 | } | ||
| 220 | s += 8 | ||
| 221 | candidate += 8 | ||
| 222 | } | ||
| 223 | |||
| 224 | d += emitCopy(dst[d:], repeat, s-base) | ||
| 225 | if debug { | ||
| 226 | // Validate match. | ||
| 227 | if s <= candidate { | ||
| 228 | panic("s <= candidate") | ||
| 229 | } | ||
| 230 | a := src[base:s] | ||
| 231 | b := src[base-repeat : base-repeat+(s-base)] | ||
| 232 | if !bytes.Equal(a, b) { | ||
| 233 | panic("mismatch") | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | nextEmit = s | ||
| 238 | if s >= sLimit { | ||
| 239 | goto emitRemainder | ||
| 240 | } | ||
| 241 | |||
| 242 | if d > dstLimit { | ||
| 243 | // Do we have space for more, if not bail. | ||
| 244 | return 0 | ||
| 245 | } | ||
| 246 | // Check for an immediate match, otherwise start search at s+1 | ||
| 247 | x := load64(src, s-2) | ||
| 248 | m2Hash := hash6(x, tableBits) | ||
| 249 | currHash := hash6(x>>16, tableBits) | ||
| 250 | candidate = int(table[currHash]) | ||
| 251 | table[m2Hash] = uint32(s - 2) | ||
| 252 | table[currHash] = uint32(s) | ||
| 253 | if debug && s == candidate { | ||
| 254 | panic("s == candidate") | ||
| 255 | } | ||
| 256 | if uint32(x>>16) != load32(src, candidate) { | ||
| 257 | cv = load64(src, s+1) | ||
| 258 | s++ | ||
| 259 | break | ||
| 260 | } | ||
| 261 | } | ||
| 262 | } | ||
| 263 | |||
| 264 | emitRemainder: | ||
| 265 | if nextEmit < len(src) { | ||
| 266 | // Bail if we exceed the maximum size. | ||
| 267 | if d+len(src)-nextEmit > dstLimit { | ||
| 268 | return 0 | ||
| 269 | } | ||
| 270 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
| 271 | } | ||
| 272 | return d | ||
| 273 | } | ||
| 274 | |||
| 275 | func encodeBlockSnappyGo(dst, src []byte) (d int) { | ||
| 276 | // Initialize the hash table. | ||
| 277 | const ( | ||
| 278 | tableBits = 14 | ||
| 279 | maxTableSize = 1 << tableBits | ||
| 280 | ) | ||
| 281 | |||
| 282 | var table [maxTableSize]uint32 | ||
| 283 | |||
| 284 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
| 285 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
| 286 | // looking for copies. | ||
| 287 | sLimit := len(src) - inputMargin | ||
| 288 | |||
| 289 | // Bail if we can't compress to at least this. | ||
| 290 | dstLimit := len(src) - len(src)>>5 - 5 | ||
| 291 | |||
| 292 | // nextEmit is where in src the next emitLiteral should start from. | ||
| 293 | nextEmit := 0 | ||
| 294 | |||
| 295 | // The encoded form must start with a literal, as there are no previous | ||
| 296 | // bytes to copy, so we start looking for hash matches at s == 1. | ||
| 297 | s := 1 | ||
| 298 | cv := load64(src, s) | ||
| 299 | |||
| 300 | // We search for a repeat at -1, but don't output repeats when nextEmit == 0 | ||
| 301 | repeat := 1 | ||
| 302 | |||
| 303 | for { | ||
| 304 | candidate := 0 | ||
| 305 | for { | ||
| 306 | // Next src position to check | ||
| 307 | nextS := s + (s-nextEmit)>>6 + 4 | ||
| 308 | if nextS > sLimit { | ||
| 309 | goto emitRemainder | ||
| 310 | } | ||
| 311 | hash0 := hash6(cv, tableBits) | ||
| 312 | hash1 := hash6(cv>>8, tableBits) | ||
| 313 | candidate = int(table[hash0]) | ||
| 314 | candidate2 := int(table[hash1]) | ||
| 315 | table[hash0] = uint32(s) | ||
| 316 | table[hash1] = uint32(s + 1) | ||
| 317 | hash2 := hash6(cv>>16, tableBits) | ||
| 318 | |||
| 319 | // Check repeat at offset checkRep. | ||
| 320 | const checkRep = 1 | ||
| 321 | if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { | ||
| 322 | base := s + checkRep | ||
| 323 | // Extend back | ||
| 324 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
| 325 | i-- | ||
| 326 | base-- | ||
| 327 | } | ||
| 328 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
| 329 | |||
| 330 | // Extend forward | ||
| 331 | candidate := s - repeat + 4 + checkRep | ||
| 332 | s += 4 + checkRep | ||
| 333 | for s <= sLimit { | ||
| 334 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
| 335 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 336 | break | ||
| 337 | } | ||
| 338 | s += 8 | ||
| 339 | candidate += 8 | ||
| 340 | } | ||
| 341 | |||
| 342 | d += emitCopyNoRepeat(dst[d:], repeat, s-base) | ||
| 343 | nextEmit = s | ||
| 344 | if s >= sLimit { | ||
| 345 | goto emitRemainder | ||
| 346 | } | ||
| 347 | |||
| 348 | cv = load64(src, s) | ||
| 349 | continue | ||
| 350 | } | ||
| 351 | |||
| 352 | if uint32(cv) == load32(src, candidate) { | ||
| 353 | break | ||
| 354 | } | ||
| 355 | candidate = int(table[hash2]) | ||
| 356 | if uint32(cv>>8) == load32(src, candidate2) { | ||
| 357 | table[hash2] = uint32(s + 2) | ||
| 358 | candidate = candidate2 | ||
| 359 | s++ | ||
| 360 | break | ||
| 361 | } | ||
| 362 | table[hash2] = uint32(s + 2) | ||
| 363 | if uint32(cv>>16) == load32(src, candidate) { | ||
| 364 | s += 2 | ||
| 365 | break | ||
| 366 | } | ||
| 367 | |||
| 368 | cv = load64(src, nextS) | ||
| 369 | s = nextS | ||
| 370 | } | ||
| 371 | |||
| 372 | // Extend backwards | ||
| 373 | for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { | ||
| 374 | candidate-- | ||
| 375 | s-- | ||
| 376 | } | ||
| 377 | |||
| 378 | // Bail if we exceed the maximum size. | ||
| 379 | if d+(s-nextEmit) > dstLimit { | ||
| 380 | return 0 | ||
| 381 | } | ||
| 382 | |||
| 383 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
| 384 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
| 385 | // them as literal bytes. | ||
| 386 | |||
| 387 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
| 388 | |||
| 389 | // Call emitCopy, and then see if another emitCopy could be our next | ||
| 390 | // move. Repeat until we find no match for the input immediately after | ||
| 391 | // what was consumed by the last emitCopy call. | ||
| 392 | // | ||
| 393 | // If we exit this loop normally then we need to call emitLiteral next, | ||
| 394 | // though we don't yet know how big the literal will be. We handle that | ||
| 395 | // by proceeding to the next iteration of the main loop. We also can | ||
| 396 | // exit this loop via goto if we get close to exhausting the input. | ||
| 397 | for { | ||
| 398 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
| 399 | // literal bytes prior to s. | ||
| 400 | base := s | ||
| 401 | repeat = base - candidate | ||
| 402 | |||
| 403 | // Extend the 4-byte match as long as possible. | ||
| 404 | s += 4 | ||
| 405 | candidate += 4 | ||
| 406 | for s <= len(src)-8 { | ||
| 407 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
| 408 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 409 | break | ||
| 410 | } | ||
| 411 | s += 8 | ||
| 412 | candidate += 8 | ||
| 413 | } | ||
| 414 | |||
| 415 | d += emitCopyNoRepeat(dst[d:], repeat, s-base) | ||
| 416 | if false { | ||
| 417 | // Validate match. | ||
| 418 | a := src[base:s] | ||
| 419 | b := src[base-repeat : base-repeat+(s-base)] | ||
| 420 | if !bytes.Equal(a, b) { | ||
| 421 | panic("mismatch") | ||
| 422 | } | ||
| 423 | } | ||
| 424 | |||
| 425 | nextEmit = s | ||
| 426 | if s >= sLimit { | ||
| 427 | goto emitRemainder | ||
| 428 | } | ||
| 429 | |||
| 430 | if d > dstLimit { | ||
| 431 | // Do we have space for more, if not bail. | ||
| 432 | return 0 | ||
| 433 | } | ||
| 434 | // Check for an immediate match, otherwise start search at s+1 | ||
| 435 | x := load64(src, s-2) | ||
| 436 | m2Hash := hash6(x, tableBits) | ||
| 437 | currHash := hash6(x>>16, tableBits) | ||
| 438 | candidate = int(table[currHash]) | ||
| 439 | table[m2Hash] = uint32(s - 2) | ||
| 440 | table[currHash] = uint32(s) | ||
| 441 | if uint32(x>>16) != load32(src, candidate) { | ||
| 442 | cv = load64(src, s+1) | ||
| 443 | s++ | ||
| 444 | break | ||
| 445 | } | ||
| 446 | } | ||
| 447 | } | ||
| 448 | |||
| 449 | emitRemainder: | ||
| 450 | if nextEmit < len(src) { | ||
| 451 | // Bail if we exceed the maximum size. | ||
| 452 | if d+len(src)-nextEmit > dstLimit { | ||
| 453 | return 0 | ||
| 454 | } | ||
| 455 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
| 456 | } | ||
| 457 | return d | ||
| 458 | } | ||
| 459 | |||
| 460 | // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It | ||
| 461 | // assumes that the varint-encoded length of the decompressed bytes has already | ||
| 462 | // been written. | ||
| 463 | // | ||
| 464 | // It also assumes that: | ||
| 465 | // | ||
| 466 | // len(dst) >= MaxEncodedLen(len(src)) && | ||
| 467 | // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||
| 468 | func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) { | ||
| 469 | // Initialize the hash table. | ||
| 470 | const ( | ||
| 471 | tableBits = 14 | ||
| 472 | maxTableSize = 1 << tableBits | ||
| 473 | maxAhead = 8 // maximum bytes ahead without checking sLimit | ||
| 474 | |||
| 475 | debug = false | ||
| 476 | ) | ||
| 477 | dict.initFast() | ||
| 478 | |||
| 479 | var table [maxTableSize]uint32 | ||
| 480 | |||
| 481 | // sLimit is when to stop looking for offset/length copies. The inputMargin | ||
| 482 | // lets us use a fast path for emitLiteral in the main loop, while we are | ||
| 483 | // looking for copies. | ||
| 484 | sLimit := len(src) - inputMargin | ||
| 485 | if sLimit > MaxDictSrcOffset-maxAhead { | ||
| 486 | sLimit = MaxDictSrcOffset - maxAhead | ||
| 487 | } | ||
| 488 | |||
| 489 | // Bail if we can't compress to at least this. | ||
| 490 | dstLimit := len(src) - len(src)>>5 - 5 | ||
| 491 | |||
| 492 | // nextEmit is where in src the next emitLiteral should start from. | ||
| 493 | nextEmit := 0 | ||
| 494 | |||
| 495 | // The encoded form can start with a dict entry (copy or repeat). | ||
| 496 | s := 0 | ||
| 497 | |||
| 498 | // Convert dict repeat to offset | ||
| 499 | repeat := len(dict.dict) - dict.repeat | ||
| 500 | cv := load64(src, 0) | ||
| 501 | |||
| 502 | // While in dict | ||
| 503 | searchDict: | ||
| 504 | for { | ||
| 505 | // Next src position to check | ||
| 506 | nextS := s + (s-nextEmit)>>6 + 4 | ||
| 507 | hash0 := hash6(cv, tableBits) | ||
| 508 | hash1 := hash6(cv>>8, tableBits) | ||
| 509 | if nextS > sLimit { | ||
| 510 | if debug { | ||
| 511 | fmt.Println("slimit reached", s, nextS) | ||
| 512 | } | ||
| 513 | break searchDict | ||
| 514 | } | ||
| 515 | candidateDict := int(dict.fastTable[hash0]) | ||
| 516 | candidateDict2 := int(dict.fastTable[hash1]) | ||
| 517 | candidate2 := int(table[hash1]) | ||
| 518 | candidate := int(table[hash0]) | ||
| 519 | table[hash0] = uint32(s) | ||
| 520 | table[hash1] = uint32(s + 1) | ||
| 521 | hash2 := hash6(cv>>16, tableBits) | ||
| 522 | |||
| 523 | // Check repeat at offset checkRep. | ||
| 524 | const checkRep = 1 | ||
| 525 | |||
| 526 | if repeat > s { | ||
| 527 | candidate := len(dict.dict) - repeat + s | ||
| 528 | if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) { | ||
| 529 | // Extend back | ||
| 530 | base := s | ||
| 531 | for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; { | ||
| 532 | i-- | ||
| 533 | base-- | ||
| 534 | } | ||
| 535 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
| 536 | if debug && nextEmit != base { | ||
| 537 | fmt.Println("emitted ", base-nextEmit, "literals") | ||
| 538 | } | ||
| 539 | s += 4 | ||
| 540 | candidate += 4 | ||
| 541 | for candidate < len(dict.dict)-8 && s <= len(src)-8 { | ||
| 542 | if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 { | ||
| 543 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 544 | break | ||
| 545 | } | ||
| 546 | s += 8 | ||
| 547 | candidate += 8 | ||
| 548 | } | ||
| 549 | d += emitRepeat(dst[d:], repeat, s-base) | ||
| 550 | if debug { | ||
| 551 | fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s) | ||
| 552 | } | ||
| 553 | nextEmit = s | ||
| 554 | if s >= sLimit { | ||
| 555 | break searchDict | ||
| 556 | } | ||
| 557 | cv = load64(src, s) | ||
| 558 | continue | ||
| 559 | } | ||
| 560 | } else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { | ||
| 561 | base := s + checkRep | ||
| 562 | // Extend back | ||
| 563 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
| 564 | i-- | ||
| 565 | base-- | ||
| 566 | } | ||
| 567 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
| 568 | if debug && nextEmit != base { | ||
| 569 | fmt.Println("emitted ", base-nextEmit, "literals") | ||
| 570 | } | ||
| 571 | |||
| 572 | // Extend forward | ||
| 573 | candidate := s - repeat + 4 + checkRep | ||
| 574 | s += 4 + checkRep | ||
| 575 | for s <= sLimit { | ||
| 576 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
| 577 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 578 | break | ||
| 579 | } | ||
| 580 | s += 8 | ||
| 581 | candidate += 8 | ||
| 582 | } | ||
| 583 | if debug { | ||
| 584 | // Validate match. | ||
| 585 | if s <= candidate { | ||
| 586 | panic("s <= candidate") | ||
| 587 | } | ||
| 588 | a := src[base:s] | ||
| 589 | b := src[base-repeat : base-repeat+(s-base)] | ||
| 590 | if !bytes.Equal(a, b) { | ||
| 591 | panic("mismatch") | ||
| 592 | } | ||
| 593 | } | ||
| 594 | |||
| 595 | if nextEmit > 0 { | ||
| 596 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
| 597 | d += emitRepeat(dst[d:], repeat, s-base) | ||
| 598 | } else { | ||
| 599 | // First match, cannot be repeat. | ||
| 600 | d += emitCopy(dst[d:], repeat, s-base) | ||
| 601 | } | ||
| 602 | |||
| 603 | nextEmit = s | ||
| 604 | if s >= sLimit { | ||
| 605 | break searchDict | ||
| 606 | } | ||
| 607 | if debug { | ||
| 608 | fmt.Println("emitted reg repeat", s-base, "s:", s) | ||
| 609 | } | ||
| 610 | cv = load64(src, s) | ||
| 611 | continue searchDict | ||
| 612 | } | ||
| 613 | if s == 0 { | ||
| 614 | cv = load64(src, nextS) | ||
| 615 | s = nextS | ||
| 616 | continue searchDict | ||
| 617 | } | ||
| 618 | // Start with table. These matches will always be closer. | ||
| 619 | if uint32(cv) == load32(src, candidate) { | ||
| 620 | goto emitMatch | ||
| 621 | } | ||
| 622 | candidate = int(table[hash2]) | ||
| 623 | if uint32(cv>>8) == load32(src, candidate2) { | ||
| 624 | table[hash2] = uint32(s + 2) | ||
| 625 | candidate = candidate2 | ||
| 626 | s++ | ||
| 627 | goto emitMatch | ||
| 628 | } | ||
| 629 | |||
| 630 | // Check dict. Dicts have longer offsets, so we want longer matches. | ||
| 631 | if cv == load64(dict.dict, candidateDict) { | ||
| 632 | table[hash2] = uint32(s + 2) | ||
| 633 | goto emitDict | ||
| 634 | } | ||
| 635 | |||
| 636 | candidateDict = int(dict.fastTable[hash2]) | ||
| 637 | // Check if upper 7 bytes match | ||
| 638 | if candidateDict2 >= 1 { | ||
| 639 | if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) { | ||
| 640 | table[hash2] = uint32(s + 2) | ||
| 641 | candidateDict = candidateDict2 | ||
| 642 | s++ | ||
| 643 | goto emitDict | ||
| 644 | } | ||
| 645 | } | ||
| 646 | |||
| 647 | table[hash2] = uint32(s + 2) | ||
| 648 | if uint32(cv>>16) == load32(src, candidate) { | ||
| 649 | s += 2 | ||
| 650 | goto emitMatch | ||
| 651 | } | ||
| 652 | if candidateDict >= 2 { | ||
| 653 | // Check if upper 6 bytes match | ||
| 654 | if cv^load64(dict.dict, candidateDict-2) < (1 << 16) { | ||
| 655 | s += 2 | ||
| 656 | goto emitDict | ||
| 657 | } | ||
| 658 | } | ||
| 659 | |||
| 660 | cv = load64(src, nextS) | ||
| 661 | s = nextS | ||
| 662 | continue searchDict | ||
| 663 | |||
| 664 | emitDict: | ||
| 665 | { | ||
| 666 | if debug { | ||
| 667 | if load32(dict.dict, candidateDict) != load32(src, s) { | ||
| 668 | panic("dict emit mismatch") | ||
| 669 | } | ||
| 670 | } | ||
| 671 | // Extend backwards. | ||
| 672 | // The top bytes will be rechecked to get the full match. | ||
| 673 | for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] { | ||
| 674 | candidateDict-- | ||
| 675 | s-- | ||
| 676 | } | ||
| 677 | |||
| 678 | // Bail if we exceed the maximum size. | ||
| 679 | if d+(s-nextEmit) > dstLimit { | ||
| 680 | return 0 | ||
| 681 | } | ||
| 682 | |||
| 683 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
| 684 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
| 685 | // them as literal bytes. | ||
| 686 | |||
| 687 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
| 688 | if debug && nextEmit != s { | ||
| 689 | fmt.Println("emitted ", s-nextEmit, "literals") | ||
| 690 | } | ||
| 691 | { | ||
| 692 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
| 693 | // literal bytes prior to s. | ||
| 694 | base := s | ||
| 695 | repeat = s + (len(dict.dict)) - candidateDict | ||
| 696 | |||
| 697 | // Extend the 4-byte match as long as possible. | ||
| 698 | s += 4 | ||
| 699 | candidateDict += 4 | ||
| 700 | for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 { | ||
| 701 | if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 { | ||
| 702 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 703 | break | ||
| 704 | } | ||
| 705 | s += 8 | ||
| 706 | candidateDict += 8 | ||
| 707 | } | ||
| 708 | |||
| 709 | // Matches longer than 64 are split. | ||
| 710 | if s <= sLimit || s-base < 8 { | ||
| 711 | d += emitCopy(dst[d:], repeat, s-base) | ||
| 712 | } else { | ||
| 713 | // Split to ensure we don't start a copy within next block | ||
| 714 | d += emitCopy(dst[d:], repeat, 4) | ||
| 715 | d += emitRepeat(dst[d:], repeat, s-base-4) | ||
| 716 | } | ||
| 717 | if false { | ||
| 718 | // Validate match. | ||
| 719 | if s <= candidate { | ||
| 720 | panic("s <= candidate") | ||
| 721 | } | ||
| 722 | a := src[base:s] | ||
| 723 | b := dict.dict[base-repeat : base-repeat+(s-base)] | ||
| 724 | if !bytes.Equal(a, b) { | ||
| 725 | panic("mismatch") | ||
| 726 | } | ||
| 727 | } | ||
| 728 | if debug { | ||
| 729 | fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s) | ||
| 730 | } | ||
| 731 | nextEmit = s | ||
| 732 | if s >= sLimit { | ||
| 733 | break searchDict | ||
| 734 | } | ||
| 735 | |||
| 736 | if d > dstLimit { | ||
| 737 | // Do we have space for more, if not bail. | ||
| 738 | return 0 | ||
| 739 | } | ||
| 740 | |||
| 741 | // Index and continue loop to try new candidate. | ||
| 742 | x := load64(src, s-2) | ||
| 743 | m2Hash := hash6(x, tableBits) | ||
| 744 | currHash := hash6(x>>8, tableBits) | ||
| 745 | table[m2Hash] = uint32(s - 2) | ||
| 746 | table[currHash] = uint32(s - 1) | ||
| 747 | cv = load64(src, s) | ||
| 748 | } | ||
| 749 | continue | ||
| 750 | } | ||
| 751 | emitMatch: | ||
| 752 | |||
| 753 | // Extend backwards. | ||
| 754 | // The top bytes will be rechecked to get the full match. | ||
| 755 | for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { | ||
| 756 | candidate-- | ||
| 757 | s-- | ||
| 758 | } | ||
| 759 | |||
| 760 | // Bail if we exceed the maximum size. | ||
| 761 | if d+(s-nextEmit) > dstLimit { | ||
| 762 | return 0 | ||
| 763 | } | ||
| 764 | |||
| 765 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
| 766 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
| 767 | // them as literal bytes. | ||
| 768 | |||
| 769 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
| 770 | if debug && nextEmit != s { | ||
| 771 | fmt.Println("emitted ", s-nextEmit, "literals") | ||
| 772 | } | ||
| 773 | // Call emitCopy, and then see if another emitCopy could be our next | ||
| 774 | // move. Repeat until we find no match for the input immediately after | ||
| 775 | // what was consumed by the last emitCopy call. | ||
| 776 | // | ||
| 777 | // If we exit this loop normally then we need to call emitLiteral next, | ||
| 778 | // though we don't yet know how big the literal will be. We handle that | ||
| 779 | // by proceeding to the next iteration of the main loop. We also can | ||
| 780 | // exit this loop via goto if we get close to exhausting the input. | ||
| 781 | for { | ||
| 782 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
| 783 | // literal bytes prior to s. | ||
| 784 | base := s | ||
| 785 | repeat = base - candidate | ||
| 786 | |||
| 787 | // Extend the 4-byte match as long as possible. | ||
| 788 | s += 4 | ||
| 789 | candidate += 4 | ||
| 790 | for s <= len(src)-8 { | ||
| 791 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
| 792 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 793 | break | ||
| 794 | } | ||
| 795 | s += 8 | ||
| 796 | candidate += 8 | ||
| 797 | } | ||
| 798 | |||
| 799 | d += emitCopy(dst[d:], repeat, s-base) | ||
| 800 | if debug { | ||
| 801 | // Validate match. | ||
| 802 | if s <= candidate { | ||
| 803 | panic("s <= candidate") | ||
| 804 | } | ||
| 805 | a := src[base:s] | ||
| 806 | b := src[base-repeat : base-repeat+(s-base)] | ||
| 807 | if !bytes.Equal(a, b) { | ||
| 808 | panic("mismatch") | ||
| 809 | } | ||
| 810 | } | ||
| 811 | if debug { | ||
| 812 | fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s) | ||
| 813 | } | ||
| 814 | nextEmit = s | ||
| 815 | if s >= sLimit { | ||
| 816 | break searchDict | ||
| 817 | } | ||
| 818 | |||
| 819 | if d > dstLimit { | ||
| 820 | // Do we have space for more, if not bail. | ||
| 821 | return 0 | ||
| 822 | } | ||
| 823 | // Check for an immediate match, otherwise start search at s+1 | ||
| 824 | x := load64(src, s-2) | ||
| 825 | m2Hash := hash6(x, tableBits) | ||
| 826 | currHash := hash6(x>>16, tableBits) | ||
| 827 | candidate = int(table[currHash]) | ||
| 828 | table[m2Hash] = uint32(s - 2) | ||
| 829 | table[currHash] = uint32(s) | ||
| 830 | if debug && s == candidate { | ||
| 831 | panic("s == candidate") | ||
| 832 | } | ||
| 833 | if uint32(x>>16) != load32(src, candidate) { | ||
| 834 | cv = load64(src, s+1) | ||
| 835 | s++ | ||
| 836 | break | ||
| 837 | } | ||
| 838 | } | ||
| 839 | } | ||
| 840 | |||
| 841 | // Search without dict: | ||
| 842 | if repeat > s { | ||
| 843 | repeat = 0 | ||
| 844 | } | ||
| 845 | |||
| 846 | // No more dict | ||
| 847 | sLimit = len(src) - inputMargin | ||
| 848 | if s >= sLimit { | ||
| 849 | goto emitRemainder | ||
| 850 | } | ||
| 851 | if debug { | ||
| 852 | fmt.Println("non-dict matching at", s, "repeat:", repeat) | ||
| 853 | } | ||
| 854 | cv = load64(src, s) | ||
| 855 | if debug { | ||
| 856 | fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s) | ||
| 857 | } | ||
| 858 | for { | ||
| 859 | candidate := 0 | ||
| 860 | for { | ||
| 861 | // Next src position to check | ||
| 862 | nextS := s + (s-nextEmit)>>6 + 4 | ||
| 863 | if nextS > sLimit { | ||
| 864 | goto emitRemainder | ||
| 865 | } | ||
| 866 | hash0 := hash6(cv, tableBits) | ||
| 867 | hash1 := hash6(cv>>8, tableBits) | ||
| 868 | candidate = int(table[hash0]) | ||
| 869 | candidate2 := int(table[hash1]) | ||
| 870 | table[hash0] = uint32(s) | ||
| 871 | table[hash1] = uint32(s + 1) | ||
| 872 | hash2 := hash6(cv>>16, tableBits) | ||
| 873 | |||
| 874 | // Check repeat at offset checkRep. | ||
| 875 | const checkRep = 1 | ||
| 876 | if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { | ||
| 877 | base := s + checkRep | ||
| 878 | // Extend back | ||
| 879 | for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { | ||
| 880 | i-- | ||
| 881 | base-- | ||
| 882 | } | ||
| 883 | d += emitLiteral(dst[d:], src[nextEmit:base]) | ||
| 884 | if debug && nextEmit != base { | ||
| 885 | fmt.Println("emitted ", base-nextEmit, "literals") | ||
| 886 | } | ||
| 887 | // Extend forward | ||
| 888 | candidate := s - repeat + 4 + checkRep | ||
| 889 | s += 4 + checkRep | ||
| 890 | for s <= sLimit { | ||
| 891 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
| 892 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 893 | break | ||
| 894 | } | ||
| 895 | s += 8 | ||
| 896 | candidate += 8 | ||
| 897 | } | ||
| 898 | if debug { | ||
| 899 | // Validate match. | ||
| 900 | if s <= candidate { | ||
| 901 | panic("s <= candidate") | ||
| 902 | } | ||
| 903 | a := src[base:s] | ||
| 904 | b := src[base-repeat : base-repeat+(s-base)] | ||
| 905 | if !bytes.Equal(a, b) { | ||
| 906 | panic("mismatch") | ||
| 907 | } | ||
| 908 | } | ||
| 909 | if nextEmit > 0 { | ||
| 910 | // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. | ||
| 911 | d += emitRepeat(dst[d:], repeat, s-base) | ||
| 912 | } else { | ||
| 913 | // First match, cannot be repeat. | ||
| 914 | d += emitCopy(dst[d:], repeat, s-base) | ||
| 915 | } | ||
| 916 | if debug { | ||
| 917 | fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s) | ||
| 918 | } | ||
| 919 | nextEmit = s | ||
| 920 | if s >= sLimit { | ||
| 921 | goto emitRemainder | ||
| 922 | } | ||
| 923 | |||
| 924 | cv = load64(src, s) | ||
| 925 | continue | ||
| 926 | } | ||
| 927 | |||
| 928 | if uint32(cv) == load32(src, candidate) { | ||
| 929 | break | ||
| 930 | } | ||
| 931 | candidate = int(table[hash2]) | ||
| 932 | if uint32(cv>>8) == load32(src, candidate2) { | ||
| 933 | table[hash2] = uint32(s + 2) | ||
| 934 | candidate = candidate2 | ||
| 935 | s++ | ||
| 936 | break | ||
| 937 | } | ||
| 938 | table[hash2] = uint32(s + 2) | ||
| 939 | if uint32(cv>>16) == load32(src, candidate) { | ||
| 940 | s += 2 | ||
| 941 | break | ||
| 942 | } | ||
| 943 | |||
| 944 | cv = load64(src, nextS) | ||
| 945 | s = nextS | ||
| 946 | } | ||
| 947 | |||
| 948 | // Extend backwards. | ||
| 949 | // The top bytes will be rechecked to get the full match. | ||
| 950 | for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { | ||
| 951 | candidate-- | ||
| 952 | s-- | ||
| 953 | } | ||
| 954 | |||
| 955 | // Bail if we exceed the maximum size. | ||
| 956 | if d+(s-nextEmit) > dstLimit { | ||
| 957 | return 0 | ||
| 958 | } | ||
| 959 | |||
| 960 | // A 4-byte match has been found. We'll later see if more than 4 bytes | ||
| 961 | // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||
| 962 | // them as literal bytes. | ||
| 963 | |||
| 964 | d += emitLiteral(dst[d:], src[nextEmit:s]) | ||
| 965 | if debug && nextEmit != s { | ||
| 966 | fmt.Println("emitted ", s-nextEmit, "literals") | ||
| 967 | } | ||
| 968 | // Call emitCopy, and then see if another emitCopy could be our next | ||
| 969 | // move. Repeat until we find no match for the input immediately after | ||
| 970 | // what was consumed by the last emitCopy call. | ||
| 971 | // | ||
| 972 | // If we exit this loop normally then we need to call emitLiteral next, | ||
| 973 | // though we don't yet know how big the literal will be. We handle that | ||
| 974 | // by proceeding to the next iteration of the main loop. We also can | ||
| 975 | // exit this loop via goto if we get close to exhausting the input. | ||
| 976 | for { | ||
| 977 | // Invariant: we have a 4-byte match at s, and no need to emit any | ||
| 978 | // literal bytes prior to s. | ||
| 979 | base := s | ||
| 980 | repeat = base - candidate | ||
| 981 | |||
| 982 | // Extend the 4-byte match as long as possible. | ||
| 983 | s += 4 | ||
| 984 | candidate += 4 | ||
| 985 | for s <= len(src)-8 { | ||
| 986 | if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { | ||
| 987 | s += bits.TrailingZeros64(diff) >> 3 | ||
| 988 | break | ||
| 989 | } | ||
| 990 | s += 8 | ||
| 991 | candidate += 8 | ||
| 992 | } | ||
| 993 | |||
| 994 | d += emitCopy(dst[d:], repeat, s-base) | ||
| 995 | if debug { | ||
| 996 | // Validate match. | ||
| 997 | if s <= candidate { | ||
| 998 | panic("s <= candidate") | ||
| 999 | } | ||
| 1000 | a := src[base:s] | ||
| 1001 | b := src[base-repeat : base-repeat+(s-base)] | ||
| 1002 | if !bytes.Equal(a, b) { | ||
| 1003 | panic("mismatch") | ||
| 1004 | } | ||
| 1005 | } | ||
| 1006 | if debug { | ||
| 1007 | fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s) | ||
| 1008 | } | ||
| 1009 | nextEmit = s | ||
| 1010 | if s >= sLimit { | ||
| 1011 | goto emitRemainder | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | if d > dstLimit { | ||
| 1015 | // Do we have space for more, if not bail. | ||
| 1016 | return 0 | ||
| 1017 | } | ||
| 1018 | // Check for an immediate match, otherwise start search at s+1 | ||
| 1019 | x := load64(src, s-2) | ||
| 1020 | m2Hash := hash6(x, tableBits) | ||
| 1021 | currHash := hash6(x>>16, tableBits) | ||
| 1022 | candidate = int(table[currHash]) | ||
| 1023 | table[m2Hash] = uint32(s - 2) | ||
| 1024 | table[currHash] = uint32(s) | ||
| 1025 | if debug && s == candidate { | ||
| 1026 | panic("s == candidate") | ||
| 1027 | } | ||
| 1028 | if uint32(x>>16) != load32(src, candidate) { | ||
| 1029 | cv = load64(src, s+1) | ||
| 1030 | s++ | ||
| 1031 | break | ||
| 1032 | } | ||
| 1033 | } | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | emitRemainder: | ||
| 1037 | if nextEmit < len(src) { | ||
| 1038 | // Bail if we exceed the maximum size. | ||
| 1039 | if d+len(src)-nextEmit > dstLimit { | ||
| 1040 | return 0 | ||
| 1041 | } | ||
| 1042 | d += emitLiteral(dst[d:], src[nextEmit:]) | ||
| 1043 | if debug && nextEmit != s { | ||
| 1044 | fmt.Println("emitted ", len(src)-nextEmit, "literals") | ||
| 1045 | } | ||
| 1046 | } | ||
| 1047 | return d | ||
| 1048 | } | ||