From 404aeae4545d2426c089a5f8d5e82dae56f5212b Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Fri, 29 Dec 2023 21:31:53 +0100 Subject: Make Nix builds work --- .../klauspost/compress/s2/encode_best.go | 796 +++++++++++++++++++++ 1 file changed, 796 insertions(+) create mode 100644 vendor/github.com/klauspost/compress/s2/encode_best.go (limited to 'vendor/github.com/klauspost/compress/s2/encode_best.go') diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go new file mode 100644 index 0000000..47bac74 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/encode_best.go @@ -0,0 +1,796 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Copyright (c) 2019 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s2 + +import ( + "fmt" + "math" + "math/bits" +) + +// encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlockBest(dst, src []byte, dict *Dict) (d int) { + // Initialize the hash tables. + const ( + // Long hash matches. + lTableBits = 19 + maxLTableSize = 1 << lTableBits + + // Short hash matches. + sTableBits = 16 + maxSTableSize = 1 << sTableBits + + inputMargin = 8 + 2 + + debug = false + ) + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + if len(src) < minNonLiteralBlockSize { + return 0 + } + sLimitDict := len(src) - inputMargin + if sLimitDict > MaxDictSrcOffset-inputMargin { + sLimitDict = MaxDictSrcOffset - inputMargin + } + + var lTable [maxLTableSize]uint64 + var sTable [maxSTableSize]uint64 + + // Bail if we can't compress to at least this. + dstLimit := len(src) - 5 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + repeat := 1 + if dict != nil { + dict.initBest() + s = 0 + repeat = len(dict.dict) - dict.repeat + } + cv := load64(src, s) + + // We search for a repeat at -1, but don't output repeats when nextEmit == 0 + const lowbitMask = 0xffffffff + getCur := func(x uint64) int { + return int(x & lowbitMask) + } + getPrev := func(x uint64) int { + return int(x >> 32) + } + const maxSkip = 64 + + for { + type match struct { + offset int + s int + length int + score int + rep, dict bool + } + var best match + for { + // Next src position to check + nextS := (s-nextEmit)>>8 + 1 + if nextS > maxSkip { + nextS = s + maxSkip + } else { + nextS += s + } + if nextS > sLimit { + goto emitRemainder + } + if dict != nil && s >= MaxDictSrcOffset { + dict = nil + if repeat > s { + repeat = math.MinInt32 + } + } + hashL := hash8(cv, lTableBits) + hashS := hash4(cv, sTableBits) + candidateL := lTable[hashL] + candidateS := sTable[hashS] + + score := func(m match) int { + // Matches that are longer forward are penalized since we must emit it as a literal. + score := m.length - m.s + if nextEmit == m.s { + // If we do not have to emit literals, we save 1 byte + score++ + } + offset := m.s - m.offset + if m.rep { + return score - emitRepeatSize(offset, m.length) + } + return score - emitCopySize(offset, m.length) + } + + matchAt := func(offset, s int, first uint32, rep bool) match { + if best.length != 0 && best.s-best.offset == s-offset { + // Don't retest if we have the same offset. + return match{offset: offset, s: s} + } + if load32(src, offset) != first { + return match{offset: offset, s: s} + } + m := match{offset: offset, s: s, length: 4 + offset, rep: rep} + s += 4 + for s < len(src) { + if len(src)-s < 8 { + if src[s] == src[m.length] { + m.length++ + s++ + continue + } + break + } + if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { + m.length += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + m.length += 8 + } + m.length -= offset + m.score = score(m) + if m.score <= -m.s { + // Eliminate if no savings, we might find a better one. + m.length = 0 + } + return m + } + matchDict := func(candidate, s int, first uint32, rep bool) match { + if s >= MaxDictSrcOffset { + return match{offset: candidate, s: s} + } + // Calculate offset as if in continuous array with s + offset := -len(dict.dict) + candidate + if best.length != 0 && best.s-best.offset == s-offset && !rep { + // Don't retest if we have the same offset. + return match{offset: offset, s: s} + } + + if load32(dict.dict, candidate) != first { + return match{offset: offset, s: s} + } + m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true} + s += 4 + if !rep { + for s < sLimitDict && m.length < len(dict.dict) { + if len(src)-s < 8 || len(dict.dict)-m.length < 8 { + if src[s] == dict.dict[m.length] { + m.length++ + s++ + continue + } + break + } + if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { + m.length += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + m.length += 8 + } + } else { + for s < len(src) && m.length < len(dict.dict) { + if len(src)-s < 8 || len(dict.dict)-m.length < 8 { + if src[s] == dict.dict[m.length] { + m.length++ + s++ + continue + } + break + } + if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 { + m.length += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + m.length += 8 + } + } + m.length -= candidate + m.score = score(m) + if m.score <= -m.s { + // Eliminate if no savings, we might find a better one. + m.length = 0 + } + return m + } + + bestOf := func(a, b match) match { + if b.length == 0 { + return a + } + if a.length == 0 { + return b + } + as := a.score + b.s + bs := b.score + a.s + if as >= bs { + return a + } + return b + } + + if s > 0 { + best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false)) + best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false)) + best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false)) + } + if dict != nil { + candidateL := dict.bestTableLong[hashL] + candidateS := dict.bestTableShort[hashS] + best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false)) + } + { + if (dict == nil || repeat <= s) && repeat > 0 { + best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true)) + } else if s-repeat < -4 && dict != nil { + candidate := len(dict.dict) - (repeat - s) + best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) + candidate++ + best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true)) + } + + if best.length > 0 { + hashS := hash4(cv>>8, sTableBits) + // s+1 + nextShort := sTable[hashS] + s := s + 1 + cv := load64(src, s) + hashL := hash8(cv, lTableBits) + nextLong := lTable[hashL] + best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) + best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) + best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) + best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) + + // Dict at + 1 + if dict != nil { + candidateL := dict.bestTableLong[hashL] + candidateS := dict.bestTableShort[hashS] + + best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) + } + + // s+2 + if true { + hashS := hash4(cv>>8, sTableBits) + + nextShort = sTable[hashS] + s++ + cv = load64(src, s) + hashL := hash8(cv, lTableBits) + nextLong = lTable[hashL] + + if (dict == nil || repeat <= s) && repeat > 0 { + // Repeat at + 2 + best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true)) + } else if repeat-s > 4 && dict != nil { + candidate := len(dict.dict) - (repeat - s) + best = bestOf(best, matchDict(candidate, s, uint32(cv), true)) + } + best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false)) + best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false)) + best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false)) + best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) + + // Dict at +2 + // Very small gain + if dict != nil { + candidateL := dict.bestTableLong[hashL] + candidateS := dict.bestTableShort[hashS] + + best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false)) + best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false)) + } + } + // Search for a match at best match end, see if that is better. + // Allow some bytes at the beginning to mismatch. + // Sweet spot is around 1-2 bytes, but depends on input. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + const skipEnd = 1 + if sAt := best.s + best.length - skipEnd; sAt < sLimit { + + sBack := best.s + skipBeginning - skipEnd + backL := best.length - skipBeginning + // Load initial values + cv = load64(src, sBack) + + // Grab candidates... + next := lTable[hash8(load64(src, sAt), lTableBits)] + + if checkAt := getCur(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) + } + if checkAt := getPrev(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) + } + // Disabled: Extremely small gain + if false { + next = sTable[hash4(load64(src, sAt), sTableBits)] + if checkAt := getCur(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) + } + if checkAt := getPrev(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) + } + } + } + } + } + + // Update table + lTable[hashL] = uint64(s) | candidateL<<32 + sTable[hashS] = uint64(s) | candidateS<<32 + + if best.length > 0 { + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards, not needed for repeats... + s = best.s + if !best.rep && !best.dict { + for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { + best.offset-- + best.length++ + s-- + } + } + if false && best.offset >= s { + panic(fmt.Errorf("t %d >= s %d", best.offset, s)) + } + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + base := s + offset := s - best.offset + s += best.length + + if offset > 65535 && s-base <= 5 && !best.rep { + // Bail if the match is equal or worse to the encoding. + s = best.s + 1 + if s >= sLimit { + goto emitRemainder + } + cv = load64(src, s) + continue + } + if debug && nextEmit != base { + fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base) + } + d += emitLiteral(dst[d:], src[nextEmit:base]) + if best.rep { + if nextEmit > 0 || best.dict { + if debug { + fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) + } + // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. + d += emitRepeat(dst[d:], offset, best.length) + } else { + // First match without dict cannot be a repeat. + if debug { + fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) + } + d += emitCopy(dst[d:], offset, best.length) + } + } else { + if debug { + fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best) + } + d += emitCopy(dst[d:], offset, best.length) + } + repeat = offset + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + // Fill tables... + for i := best.s + 1; i < s; i++ { + cv0 := load64(src, i) + long0 := hash8(cv0, lTableBits) + short0 := hash4(cv0, sTableBits) + lTable[long0] = uint64(i) | lTable[long0]<<32 + sTable[short0] = uint64(i) | sTable[short0]<<32 + } + cv = load64(src, s) + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + if debug && nextEmit != s { + fmt.Println("emitted ", len(src)-nextEmit, "literals") + } + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} + +// encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlockBestSnappy(dst, src []byte) (d int) { + // Initialize the hash tables. + const ( + // Long hash matches. + lTableBits = 19 + maxLTableSize = 1 << lTableBits + + // Short hash matches. + sTableBits = 16 + maxSTableSize = 1 << sTableBits + + inputMargin = 8 + 2 + ) + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + if len(src) < minNonLiteralBlockSize { + return 0 + } + + var lTable [maxLTableSize]uint64 + var sTable [maxSTableSize]uint64 + + // Bail if we can't compress to at least this. + dstLimit := len(src) - 5 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + cv := load64(src, s) + + // We search for a repeat at -1, but don't output repeats when nextEmit == 0 + repeat := 1 + const lowbitMask = 0xffffffff + getCur := func(x uint64) int { + return int(x & lowbitMask) + } + getPrev := func(x uint64) int { + return int(x >> 32) + } + const maxSkip = 64 + + for { + type match struct { + offset int + s int + length int + score int + } + var best match + for { + // Next src position to check + nextS := (s-nextEmit)>>8 + 1 + if nextS > maxSkip { + nextS = s + maxSkip + } else { + nextS += s + } + if nextS > sLimit { + goto emitRemainder + } + hashL := hash8(cv, lTableBits) + hashS := hash4(cv, sTableBits) + candidateL := lTable[hashL] + candidateS := sTable[hashS] + + score := func(m match) int { + // Matches that are longer forward are penalized since we must emit it as a literal. + score := m.length - m.s + if nextEmit == m.s { + // If we do not have to emit literals, we save 1 byte + score++ + } + offset := m.s - m.offset + + return score - emitCopyNoRepeatSize(offset, m.length) + } + + matchAt := func(offset, s int, first uint32) match { + if best.length != 0 && best.s-best.offset == s-offset { + // Don't retest if we have the same offset. + return match{offset: offset, s: s} + } + if load32(src, offset) != first { + return match{offset: offset, s: s} + } + m := match{offset: offset, s: s, length: 4 + offset} + s += 4 + for s <= sLimit { + if diff := load64(src, s) ^ load64(src, m.length); diff != 0 { + m.length += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + m.length += 8 + } + m.length -= offset + m.score = score(m) + if m.score <= -m.s { + // Eliminate if no savings, we might find a better one. + m.length = 0 + } + return m + } + + bestOf := func(a, b match) match { + if b.length == 0 { + return a + } + if a.length == 0 { + return b + } + as := a.score + b.s + bs := b.score + a.s + if as >= bs { + return a + } + return b + } + + best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv))) + best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv))) + best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv))) + + { + best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8))) + if best.length > 0 { + // s+1 + nextShort := sTable[hash4(cv>>8, sTableBits)] + s := s + 1 + cv := load64(src, s) + nextLong := lTable[hash8(cv, lTableBits)] + best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv))) + best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv))) + best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv))) + best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv))) + // Repeat at + 2 + best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8))) + + // s+2 + if true { + nextShort = sTable[hash4(cv>>8, sTableBits)] + s++ + cv = load64(src, s) + nextLong = lTable[hash8(cv, lTableBits)] + best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv))) + best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv))) + best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv))) + best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv))) + } + // Search for a match at best match end, see if that is better. + if sAt := best.s + best.length; sAt < sLimit { + sBack := best.s + backL := best.length + // Load initial values + cv = load64(src, sBack) + // Search for mismatch + next := lTable[hash8(load64(src, sAt), lTableBits)] + //next := sTable[hash4(load64(src, sAt), sTableBits)] + + if checkAt := getCur(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv))) + } + if checkAt := getPrev(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv))) + } + } + } + } + + // Update table + lTable[hashL] = uint64(s) | candidateL<<32 + sTable[hashS] = uint64(s) | candidateS<<32 + + if best.length > 0 { + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards, not needed for repeats... + s = best.s + if true { + for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] { + best.offset-- + best.length++ + s-- + } + } + if false && best.offset >= s { + panic(fmt.Errorf("t %d >= s %d", best.offset, s)) + } + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + base := s + offset := s - best.offset + + s += best.length + + if offset > 65535 && s-base <= 5 { + // Bail if the match is equal or worse to the encoding. + s = best.s + 1 + if s >= sLimit { + goto emitRemainder + } + cv = load64(src, s) + continue + } + d += emitLiteral(dst[d:], src[nextEmit:base]) + d += emitCopyNoRepeat(dst[d:], offset, best.length) + repeat = offset + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + // Fill tables... + for i := best.s + 1; i < s; i++ { + cv0 := load64(src, i) + long0 := hash8(cv0, lTableBits) + short0 := hash4(cv0, sTableBits) + lTable[long0] = uint64(i) | lTable[long0]<<32 + sTable[short0] = uint64(i) | sTable[short0]<<32 + } + cv = load64(src, s) + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} + +// emitCopySize returns the size to encode the offset+length +// +// It assumes that: +// +// 1 <= offset && offset <= math.MaxUint32 +// 4 <= length && length <= 1 << 24 +func emitCopySize(offset, length int) int { + if offset >= 65536 { + i := 0 + if length > 64 { + length -= 64 + if length >= 4 { + // Emit remaining as repeats + return 5 + emitRepeatSize(offset, length) + } + i = 5 + } + if length == 0 { + return i + } + return i + 5 + } + + // Offset no more than 2 bytes. + if length > 64 { + if offset < 2048 { + // Emit 8 bytes, then rest as repeats... + return 2 + emitRepeatSize(offset, length-8) + } + // Emit remaining as repeats, at least 4 bytes remain. + return 3 + emitRepeatSize(offset, length-60) + } + if length >= 12 || offset >= 2048 { + return 3 + } + // Emit the remaining copy, encoded as 2 bytes. + return 2 +} + +// emitCopyNoRepeatSize returns the size to encode the offset+length +// +// It assumes that: +// +// 1 <= offset && offset <= math.MaxUint32 +// 4 <= length && length <= 1 << 24 +func emitCopyNoRepeatSize(offset, length int) int { + if offset >= 65536 { + return 5 + 5*(length/64) + } + + // Offset no more than 2 bytes. + if length > 64 { + // Emit remaining as repeats, at least 4 bytes remain. + return 3 + 3*(length/60) + } + if length >= 12 || offset >= 2048 { + return 3 + } + // Emit the remaining copy, encoded as 2 bytes. + return 2 +} + +// emitRepeatSize returns the number of bytes required to encode a repeat. +// Length must be at least 4 and < 1<<24 +func emitRepeatSize(offset, length int) int { + // Repeat offset, make length cheaper + if length <= 4+4 || (length < 8+4 && offset < 2048) { + return 2 + } + if length < (1<<8)+4+4 { + return 3 + } + if length < (1<<16)+(1<<8)+4 { + return 4 + } + const maxRepeat = (1 << 24) - 1 + length -= (1 << 16) - 4 + left := 0 + if length > maxRepeat { + left = length - maxRepeat + 4 + } + if left > 0 { + return 5 + emitRepeatSize(offset, left) + } + return 5 +} -- cgit v1.2.3