diff options
| author | Rutger Broekhoff | 2023-12-29 21:31:53 +0100 |
|---|---|---|
| committer | Rutger Broekhoff | 2023-12-29 21:31:53 +0100 |
| commit | 404aeae4545d2426c089a5f8d5e82dae56f5212b (patch) | |
| tree | 2d84e00af272b39fc04f3795ae06bc48970e57b5 /vendor/github.com/klauspost/compress/s2/dict.go | |
| parent | 209d8b0187ed025dec9ac149ebcced3462877bff (diff) | |
| download | gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.tar.gz gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.zip | |
Make Nix builds work
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/dict.go')
| -rw-r--r-- | vendor/github.com/klauspost/compress/s2/dict.go | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/dict.go b/vendor/github.com/klauspost/compress/s2/dict.go new file mode 100644 index 0000000..f125ad0 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/dict.go | |||
| @@ -0,0 +1,350 @@ | |||
| 1 | // Copyright (c) 2022+ Klaus Post. All rights reserved. | ||
| 2 | // Use of this source code is governed by a BSD-style | ||
| 3 | // license that can be found in the LICENSE file. | ||
| 4 | |||
| 5 | package s2 | ||
| 6 | |||
| 7 | import ( | ||
| 8 | "bytes" | ||
| 9 | "encoding/binary" | ||
| 10 | "sync" | ||
| 11 | ) | ||
| 12 | |||
| 13 | const ( | ||
| 14 | // MinDictSize is the minimum dictionary size when repeat has been read. | ||
| 15 | MinDictSize = 16 | ||
| 16 | |||
| 17 | // MaxDictSize is the maximum dictionary size when repeat has been read. | ||
| 18 | MaxDictSize = 65536 | ||
| 19 | |||
| 20 | // MaxDictSrcOffset is the maximum offset where a dictionary entry can start. | ||
| 21 | MaxDictSrcOffset = 65535 | ||
| 22 | ) | ||
| 23 | |||
| 24 | // Dict contains a dictionary that can be used for encoding and decoding s2 | ||
| 25 | type Dict struct { | ||
| 26 | dict []byte | ||
| 27 | repeat int // Repeat as index of dict | ||
| 28 | |||
| 29 | fast, better, best sync.Once | ||
| 30 | fastTable *[1 << 14]uint16 | ||
| 31 | |||
| 32 | betterTableShort *[1 << 14]uint16 | ||
| 33 | betterTableLong *[1 << 17]uint16 | ||
| 34 | |||
| 35 | bestTableShort *[1 << 16]uint32 | ||
| 36 | bestTableLong *[1 << 19]uint32 | ||
| 37 | } | ||
| 38 | |||
| 39 | // NewDict will read a dictionary. | ||
| 40 | // It will return nil if the dictionary is invalid. | ||
| 41 | func NewDict(dict []byte) *Dict { | ||
| 42 | if len(dict) == 0 { | ||
| 43 | return nil | ||
| 44 | } | ||
| 45 | var d Dict | ||
| 46 | // Repeat is the first value of the dict | ||
| 47 | r, n := binary.Uvarint(dict) | ||
| 48 | if n <= 0 { | ||
| 49 | return nil | ||
| 50 | } | ||
| 51 | dict = dict[n:] | ||
| 52 | d.dict = dict | ||
| 53 | if cap(d.dict) < len(d.dict)+16 { | ||
| 54 | d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) | ||
| 55 | } | ||
| 56 | if len(dict) < MinDictSize || len(dict) > MaxDictSize { | ||
| 57 | return nil | ||
| 58 | } | ||
| 59 | d.repeat = int(r) | ||
| 60 | if d.repeat > len(dict) { | ||
| 61 | return nil | ||
| 62 | } | ||
| 63 | return &d | ||
| 64 | } | ||
| 65 | |||
| 66 | // Bytes will return a serialized version of the dictionary. | ||
| 67 | // The output can be sent to NewDict. | ||
| 68 | func (d *Dict) Bytes() []byte { | ||
| 69 | dst := make([]byte, binary.MaxVarintLen16+len(d.dict)) | ||
| 70 | return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...) | ||
| 71 | } | ||
| 72 | |||
| 73 | // MakeDict will create a dictionary. | ||
| 74 | // 'data' must be at least MinDictSize. | ||
| 75 | // If data is longer than MaxDictSize only the last MaxDictSize bytes will be used. | ||
| 76 | // If searchStart is set the start repeat value will be set to the last | ||
| 77 | // match of this content. | ||
| 78 | // If no matches are found, it will attempt to find shorter matches. | ||
| 79 | // This content should match the typical start of a block. | ||
| 80 | // If at least 4 bytes cannot be matched, repeat is set to start of block. | ||
| 81 | func MakeDict(data []byte, searchStart []byte) *Dict { | ||
| 82 | if len(data) == 0 { | ||
| 83 | return nil | ||
| 84 | } | ||
| 85 | if len(data) > MaxDictSize { | ||
| 86 | data = data[len(data)-MaxDictSize:] | ||
| 87 | } | ||
| 88 | var d Dict | ||
| 89 | dict := data | ||
| 90 | d.dict = dict | ||
| 91 | if cap(d.dict) < len(d.dict)+16 { | ||
| 92 | d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) | ||
| 93 | } | ||
| 94 | if len(dict) < MinDictSize { | ||
| 95 | return nil | ||
| 96 | } | ||
| 97 | |||
| 98 | // Find the longest match possible, last entry if multiple. | ||
| 99 | for s := len(searchStart); s > 4; s-- { | ||
| 100 | if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 { | ||
| 101 | d.repeat = idx | ||
| 102 | break | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | return &d | ||
| 107 | } | ||
| 108 | |||
| 109 | // MakeDictManual will create a dictionary. | ||
| 110 | // 'data' must be at least MinDictSize and less than or equal to MaxDictSize. | ||
| 111 | // A manual first repeat index into data must be provided. | ||
| 112 | // It must be less than len(data)-8. | ||
| 113 | func MakeDictManual(data []byte, firstIdx uint16) *Dict { | ||
| 114 | if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize { | ||
| 115 | return nil | ||
| 116 | } | ||
| 117 | var d Dict | ||
| 118 | dict := data | ||
| 119 | d.dict = dict | ||
| 120 | if cap(d.dict) < len(d.dict)+16 { | ||
| 121 | d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) | ||
| 122 | } | ||
| 123 | |||
| 124 | d.repeat = int(firstIdx) | ||
| 125 | return &d | ||
| 126 | } | ||
| 127 | |||
| 128 | // Encode returns the encoded form of src. The returned slice may be a sub- | ||
| 129 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
| 130 | // Otherwise, a newly allocated slice will be returned. | ||
| 131 | // | ||
| 132 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
| 133 | // | ||
| 134 | // The blocks will require the same amount of memory to decode as encoding, | ||
| 135 | // and does not make for concurrent decoding. | ||
| 136 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
| 137 | // | ||
| 138 | // If you need to encode larger amounts of data, consider using | ||
| 139 | // the streaming interface which gives all of these features. | ||
| 140 | func (d *Dict) Encode(dst, src []byte) []byte { | ||
| 141 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
| 142 | panic(ErrTooLarge) | ||
| 143 | } else if cap(dst) < n { | ||
| 144 | dst = make([]byte, n) | ||
| 145 | } else { | ||
| 146 | dst = dst[:n] | ||
| 147 | } | ||
| 148 | |||
| 149 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
| 150 | dstP := binary.PutUvarint(dst, uint64(len(src))) | ||
| 151 | |||
| 152 | if len(src) == 0 { | ||
| 153 | return dst[:dstP] | ||
| 154 | } | ||
| 155 | if len(src) < minNonLiteralBlockSize { | ||
| 156 | dstP += emitLiteral(dst[dstP:], src) | ||
| 157 | return dst[:dstP] | ||
| 158 | } | ||
| 159 | n := encodeBlockDictGo(dst[dstP:], src, d) | ||
| 160 | if n > 0 { | ||
| 161 | dstP += n | ||
| 162 | return dst[:dstP] | ||
| 163 | } | ||
| 164 | // Not compressible | ||
| 165 | dstP += emitLiteral(dst[dstP:], src) | ||
| 166 | return dst[:dstP] | ||
| 167 | } | ||
| 168 | |||
| 169 | // EncodeBetter returns the encoded form of src. The returned slice may be a sub- | ||
| 170 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
| 171 | // Otherwise, a newly allocated slice will be returned. | ||
| 172 | // | ||
| 173 | // EncodeBetter compresses better than Encode but typically with a | ||
| 174 | // 10-40% speed decrease on both compression and decompression. | ||
| 175 | // | ||
| 176 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
| 177 | // | ||
| 178 | // The blocks will require the same amount of memory to decode as encoding, | ||
| 179 | // and does not make for concurrent decoding. | ||
| 180 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
| 181 | // | ||
| 182 | // If you need to encode larger amounts of data, consider using | ||
| 183 | // the streaming interface which gives all of these features. | ||
| 184 | func (d *Dict) EncodeBetter(dst, src []byte) []byte { | ||
| 185 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
| 186 | panic(ErrTooLarge) | ||
| 187 | } else if len(dst) < n { | ||
| 188 | dst = make([]byte, n) | ||
| 189 | } | ||
| 190 | |||
| 191 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
| 192 | dstP := binary.PutUvarint(dst, uint64(len(src))) | ||
| 193 | |||
| 194 | if len(src) == 0 { | ||
| 195 | return dst[:dstP] | ||
| 196 | } | ||
| 197 | if len(src) < minNonLiteralBlockSize { | ||
| 198 | dstP += emitLiteral(dst[dstP:], src) | ||
| 199 | return dst[:dstP] | ||
| 200 | } | ||
| 201 | n := encodeBlockBetterDict(dst[dstP:], src, d) | ||
| 202 | if n > 0 { | ||
| 203 | dstP += n | ||
| 204 | return dst[:dstP] | ||
| 205 | } | ||
| 206 | // Not compressible | ||
| 207 | dstP += emitLiteral(dst[dstP:], src) | ||
| 208 | return dst[:dstP] | ||
| 209 | } | ||
| 210 | |||
| 211 | // EncodeBest returns the encoded form of src. The returned slice may be a sub- | ||
| 212 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
| 213 | // Otherwise, a newly allocated slice will be returned. | ||
| 214 | // | ||
| 215 | // EncodeBest compresses as good as reasonably possible but with a | ||
| 216 | // big speed decrease. | ||
| 217 | // | ||
| 218 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
| 219 | // | ||
| 220 | // The blocks will require the same amount of memory to decode as encoding, | ||
| 221 | // and does not make for concurrent decoding. | ||
| 222 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
| 223 | // | ||
| 224 | // If you need to encode larger amounts of data, consider using | ||
| 225 | // the streaming interface which gives all of these features. | ||
| 226 | func (d *Dict) EncodeBest(dst, src []byte) []byte { | ||
| 227 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
| 228 | panic(ErrTooLarge) | ||
| 229 | } else if len(dst) < n { | ||
| 230 | dst = make([]byte, n) | ||
| 231 | } | ||
| 232 | |||
| 233 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
| 234 | dstP := binary.PutUvarint(dst, uint64(len(src))) | ||
| 235 | |||
| 236 | if len(src) == 0 { | ||
| 237 | return dst[:dstP] | ||
| 238 | } | ||
| 239 | if len(src) < minNonLiteralBlockSize { | ||
| 240 | dstP += emitLiteral(dst[dstP:], src) | ||
| 241 | return dst[:dstP] | ||
| 242 | } | ||
| 243 | n := encodeBlockBest(dst[dstP:], src, d) | ||
| 244 | if n > 0 { | ||
| 245 | dstP += n | ||
| 246 | return dst[:dstP] | ||
| 247 | } | ||
| 248 | // Not compressible | ||
| 249 | dstP += emitLiteral(dst[dstP:], src) | ||
| 250 | return dst[:dstP] | ||
| 251 | } | ||
| 252 | |||
| 253 | // Decode returns the decoded form of src. The returned slice may be a sub- | ||
| 254 | // slice of dst if dst was large enough to hold the entire decoded block. | ||
| 255 | // Otherwise, a newly allocated slice will be returned. | ||
| 256 | // | ||
| 257 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
| 258 | func (d *Dict) Decode(dst, src []byte) ([]byte, error) { | ||
| 259 | dLen, s, err := decodedLen(src) | ||
| 260 | if err != nil { | ||
| 261 | return nil, err | ||
| 262 | } | ||
| 263 | if dLen <= cap(dst) { | ||
| 264 | dst = dst[:dLen] | ||
| 265 | } else { | ||
| 266 | dst = make([]byte, dLen) | ||
| 267 | } | ||
| 268 | if s2DecodeDict(dst, src[s:], d) != 0 { | ||
| 269 | return nil, ErrCorrupt | ||
| 270 | } | ||
| 271 | return dst, nil | ||
| 272 | } | ||
| 273 | |||
| 274 | func (d *Dict) initFast() { | ||
| 275 | d.fast.Do(func() { | ||
| 276 | const ( | ||
| 277 | tableBits = 14 | ||
| 278 | maxTableSize = 1 << tableBits | ||
| 279 | ) | ||
| 280 | |||
| 281 | var table [maxTableSize]uint16 | ||
| 282 | // We stop so any entry of length 8 can always be read. | ||
| 283 | for i := 0; i < len(d.dict)-8-2; i += 3 { | ||
| 284 | x0 := load64(d.dict, i) | ||
| 285 | h0 := hash6(x0, tableBits) | ||
| 286 | h1 := hash6(x0>>8, tableBits) | ||
| 287 | h2 := hash6(x0>>16, tableBits) | ||
| 288 | table[h0] = uint16(i) | ||
| 289 | table[h1] = uint16(i + 1) | ||
| 290 | table[h2] = uint16(i + 2) | ||
| 291 | } | ||
| 292 | d.fastTable = &table | ||
| 293 | }) | ||
| 294 | } | ||
| 295 | |||
| 296 | func (d *Dict) initBetter() { | ||
| 297 | d.better.Do(func() { | ||
| 298 | const ( | ||
| 299 | // Long hash matches. | ||
| 300 | lTableBits = 17 | ||
| 301 | maxLTableSize = 1 << lTableBits | ||
| 302 | |||
| 303 | // Short hash matches. | ||
| 304 | sTableBits = 14 | ||
| 305 | maxSTableSize = 1 << sTableBits | ||
| 306 | ) | ||
| 307 | |||
| 308 | var lTable [maxLTableSize]uint16 | ||
| 309 | var sTable [maxSTableSize]uint16 | ||
| 310 | |||
| 311 | // We stop so any entry of length 8 can always be read. | ||
| 312 | for i := 0; i < len(d.dict)-8; i++ { | ||
| 313 | cv := load64(d.dict, i) | ||
| 314 | lTable[hash7(cv, lTableBits)] = uint16(i) | ||
| 315 | sTable[hash4(cv, sTableBits)] = uint16(i) | ||
| 316 | } | ||
| 317 | d.betterTableShort = &sTable | ||
| 318 | d.betterTableLong = &lTable | ||
| 319 | }) | ||
| 320 | } | ||
| 321 | |||
| 322 | func (d *Dict) initBest() { | ||
| 323 | d.best.Do(func() { | ||
| 324 | const ( | ||
| 325 | // Long hash matches. | ||
| 326 | lTableBits = 19 | ||
| 327 | maxLTableSize = 1 << lTableBits | ||
| 328 | |||
| 329 | // Short hash matches. | ||
| 330 | sTableBits = 16 | ||
| 331 | maxSTableSize = 1 << sTableBits | ||
| 332 | ) | ||
| 333 | |||
| 334 | var lTable [maxLTableSize]uint32 | ||
| 335 | var sTable [maxSTableSize]uint32 | ||
| 336 | |||
| 337 | // We stop so any entry of length 8 can always be read. | ||
| 338 | for i := 0; i < len(d.dict)-8; i++ { | ||
| 339 | cv := load64(d.dict, i) | ||
| 340 | hashL := hash8(cv, lTableBits) | ||
| 341 | hashS := hash4(cv, sTableBits) | ||
| 342 | candidateL := lTable[hashL] | ||
| 343 | candidateS := sTable[hashS] | ||
| 344 | lTable[hashL] = uint32(i) | candidateL<<16 | ||
| 345 | sTable[hashS] = uint32(i) | candidateS<<16 | ||
| 346 | } | ||
| 347 | d.bestTableShort = &sTable | ||
| 348 | d.bestTableLong = &lTable | ||
| 349 | }) | ||
| 350 | } | ||