diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/dict.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/s2/dict.go | 350 |
1 files changed, 0 insertions, 350 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/dict.go b/vendor/github.com/klauspost/compress/s2/dict.go deleted file mode 100644 index f125ad0..0000000 --- a/vendor/github.com/klauspost/compress/s2/dict.go +++ /dev/null | |||
@@ -1,350 +0,0 @@ | |||
1 | // Copyright (c) 2022+ Klaus Post. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | package s2 | ||
6 | |||
7 | import ( | ||
8 | "bytes" | ||
9 | "encoding/binary" | ||
10 | "sync" | ||
11 | ) | ||
12 | |||
13 | const ( | ||
14 | // MinDictSize is the minimum dictionary size when repeat has been read. | ||
15 | MinDictSize = 16 | ||
16 | |||
17 | // MaxDictSize is the maximum dictionary size when repeat has been read. | ||
18 | MaxDictSize = 65536 | ||
19 | |||
20 | // MaxDictSrcOffset is the maximum offset where a dictionary entry can start. | ||
21 | MaxDictSrcOffset = 65535 | ||
22 | ) | ||
23 | |||
24 | // Dict contains a dictionary that can be used for encoding and decoding s2 | ||
25 | type Dict struct { | ||
26 | dict []byte | ||
27 | repeat int // Repeat as index of dict | ||
28 | |||
29 | fast, better, best sync.Once | ||
30 | fastTable *[1 << 14]uint16 | ||
31 | |||
32 | betterTableShort *[1 << 14]uint16 | ||
33 | betterTableLong *[1 << 17]uint16 | ||
34 | |||
35 | bestTableShort *[1 << 16]uint32 | ||
36 | bestTableLong *[1 << 19]uint32 | ||
37 | } | ||
38 | |||
39 | // NewDict will read a dictionary. | ||
40 | // It will return nil if the dictionary is invalid. | ||
41 | func NewDict(dict []byte) *Dict { | ||
42 | if len(dict) == 0 { | ||
43 | return nil | ||
44 | } | ||
45 | var d Dict | ||
46 | // Repeat is the first value of the dict | ||
47 | r, n := binary.Uvarint(dict) | ||
48 | if n <= 0 { | ||
49 | return nil | ||
50 | } | ||
51 | dict = dict[n:] | ||
52 | d.dict = dict | ||
53 | if cap(d.dict) < len(d.dict)+16 { | ||
54 | d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) | ||
55 | } | ||
56 | if len(dict) < MinDictSize || len(dict) > MaxDictSize { | ||
57 | return nil | ||
58 | } | ||
59 | d.repeat = int(r) | ||
60 | if d.repeat > len(dict) { | ||
61 | return nil | ||
62 | } | ||
63 | return &d | ||
64 | } | ||
65 | |||
66 | // Bytes will return a serialized version of the dictionary. | ||
67 | // The output can be sent to NewDict. | ||
68 | func (d *Dict) Bytes() []byte { | ||
69 | dst := make([]byte, binary.MaxVarintLen16+len(d.dict)) | ||
70 | return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...) | ||
71 | } | ||
72 | |||
73 | // MakeDict will create a dictionary. | ||
74 | // 'data' must be at least MinDictSize. | ||
75 | // If data is longer than MaxDictSize only the last MaxDictSize bytes will be used. | ||
76 | // If searchStart is set the start repeat value will be set to the last | ||
77 | // match of this content. | ||
78 | // If no matches are found, it will attempt to find shorter matches. | ||
79 | // This content should match the typical start of a block. | ||
80 | // If at least 4 bytes cannot be matched, repeat is set to start of block. | ||
81 | func MakeDict(data []byte, searchStart []byte) *Dict { | ||
82 | if len(data) == 0 { | ||
83 | return nil | ||
84 | } | ||
85 | if len(data) > MaxDictSize { | ||
86 | data = data[len(data)-MaxDictSize:] | ||
87 | } | ||
88 | var d Dict | ||
89 | dict := data | ||
90 | d.dict = dict | ||
91 | if cap(d.dict) < len(d.dict)+16 { | ||
92 | d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) | ||
93 | } | ||
94 | if len(dict) < MinDictSize { | ||
95 | return nil | ||
96 | } | ||
97 | |||
98 | // Find the longest match possible, last entry if multiple. | ||
99 | for s := len(searchStart); s > 4; s-- { | ||
100 | if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 { | ||
101 | d.repeat = idx | ||
102 | break | ||
103 | } | ||
104 | } | ||
105 | |||
106 | return &d | ||
107 | } | ||
108 | |||
109 | // MakeDictManual will create a dictionary. | ||
110 | // 'data' must be at least MinDictSize and less than or equal to MaxDictSize. | ||
111 | // A manual first repeat index into data must be provided. | ||
112 | // It must be less than len(data)-8. | ||
113 | func MakeDictManual(data []byte, firstIdx uint16) *Dict { | ||
114 | if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize { | ||
115 | return nil | ||
116 | } | ||
117 | var d Dict | ||
118 | dict := data | ||
119 | d.dict = dict | ||
120 | if cap(d.dict) < len(d.dict)+16 { | ||
121 | d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) | ||
122 | } | ||
123 | |||
124 | d.repeat = int(firstIdx) | ||
125 | return &d | ||
126 | } | ||
127 | |||
128 | // Encode returns the encoded form of src. The returned slice may be a sub- | ||
129 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
130 | // Otherwise, a newly allocated slice will be returned. | ||
131 | // | ||
132 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
133 | // | ||
134 | // The blocks will require the same amount of memory to decode as encoding, | ||
135 | // and does not make for concurrent decoding. | ||
136 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
137 | // | ||
138 | // If you need to encode larger amounts of data, consider using | ||
139 | // the streaming interface which gives all of these features. | ||
140 | func (d *Dict) Encode(dst, src []byte) []byte { | ||
141 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
142 | panic(ErrTooLarge) | ||
143 | } else if cap(dst) < n { | ||
144 | dst = make([]byte, n) | ||
145 | } else { | ||
146 | dst = dst[:n] | ||
147 | } | ||
148 | |||
149 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
150 | dstP := binary.PutUvarint(dst, uint64(len(src))) | ||
151 | |||
152 | if len(src) == 0 { | ||
153 | return dst[:dstP] | ||
154 | } | ||
155 | if len(src) < minNonLiteralBlockSize { | ||
156 | dstP += emitLiteral(dst[dstP:], src) | ||
157 | return dst[:dstP] | ||
158 | } | ||
159 | n := encodeBlockDictGo(dst[dstP:], src, d) | ||
160 | if n > 0 { | ||
161 | dstP += n | ||
162 | return dst[:dstP] | ||
163 | } | ||
164 | // Not compressible | ||
165 | dstP += emitLiteral(dst[dstP:], src) | ||
166 | return dst[:dstP] | ||
167 | } | ||
168 | |||
169 | // EncodeBetter returns the encoded form of src. The returned slice may be a sub- | ||
170 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
171 | // Otherwise, a newly allocated slice will be returned. | ||
172 | // | ||
173 | // EncodeBetter compresses better than Encode but typically with a | ||
174 | // 10-40% speed decrease on both compression and decompression. | ||
175 | // | ||
176 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
177 | // | ||
178 | // The blocks will require the same amount of memory to decode as encoding, | ||
179 | // and does not make for concurrent decoding. | ||
180 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
181 | // | ||
182 | // If you need to encode larger amounts of data, consider using | ||
183 | // the streaming interface which gives all of these features. | ||
184 | func (d *Dict) EncodeBetter(dst, src []byte) []byte { | ||
185 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
186 | panic(ErrTooLarge) | ||
187 | } else if len(dst) < n { | ||
188 | dst = make([]byte, n) | ||
189 | } | ||
190 | |||
191 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
192 | dstP := binary.PutUvarint(dst, uint64(len(src))) | ||
193 | |||
194 | if len(src) == 0 { | ||
195 | return dst[:dstP] | ||
196 | } | ||
197 | if len(src) < minNonLiteralBlockSize { | ||
198 | dstP += emitLiteral(dst[dstP:], src) | ||
199 | return dst[:dstP] | ||
200 | } | ||
201 | n := encodeBlockBetterDict(dst[dstP:], src, d) | ||
202 | if n > 0 { | ||
203 | dstP += n | ||
204 | return dst[:dstP] | ||
205 | } | ||
206 | // Not compressible | ||
207 | dstP += emitLiteral(dst[dstP:], src) | ||
208 | return dst[:dstP] | ||
209 | } | ||
210 | |||
211 | // EncodeBest returns the encoded form of src. The returned slice may be a sub- | ||
212 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
213 | // Otherwise, a newly allocated slice will be returned. | ||
214 | // | ||
215 | // EncodeBest compresses as good as reasonably possible but with a | ||
216 | // big speed decrease. | ||
217 | // | ||
218 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
219 | // | ||
220 | // The blocks will require the same amount of memory to decode as encoding, | ||
221 | // and does not make for concurrent decoding. | ||
222 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
223 | // | ||
224 | // If you need to encode larger amounts of data, consider using | ||
225 | // the streaming interface which gives all of these features. | ||
226 | func (d *Dict) EncodeBest(dst, src []byte) []byte { | ||
227 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
228 | panic(ErrTooLarge) | ||
229 | } else if len(dst) < n { | ||
230 | dst = make([]byte, n) | ||
231 | } | ||
232 | |||
233 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
234 | dstP := binary.PutUvarint(dst, uint64(len(src))) | ||
235 | |||
236 | if len(src) == 0 { | ||
237 | return dst[:dstP] | ||
238 | } | ||
239 | if len(src) < minNonLiteralBlockSize { | ||
240 | dstP += emitLiteral(dst[dstP:], src) | ||
241 | return dst[:dstP] | ||
242 | } | ||
243 | n := encodeBlockBest(dst[dstP:], src, d) | ||
244 | if n > 0 { | ||
245 | dstP += n | ||
246 | return dst[:dstP] | ||
247 | } | ||
248 | // Not compressible | ||
249 | dstP += emitLiteral(dst[dstP:], src) | ||
250 | return dst[:dstP] | ||
251 | } | ||
252 | |||
253 | // Decode returns the decoded form of src. The returned slice may be a sub- | ||
254 | // slice of dst if dst was large enough to hold the entire decoded block. | ||
255 | // Otherwise, a newly allocated slice will be returned. | ||
256 | // | ||
257 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
258 | func (d *Dict) Decode(dst, src []byte) ([]byte, error) { | ||
259 | dLen, s, err := decodedLen(src) | ||
260 | if err != nil { | ||
261 | return nil, err | ||
262 | } | ||
263 | if dLen <= cap(dst) { | ||
264 | dst = dst[:dLen] | ||
265 | } else { | ||
266 | dst = make([]byte, dLen) | ||
267 | } | ||
268 | if s2DecodeDict(dst, src[s:], d) != 0 { | ||
269 | return nil, ErrCorrupt | ||
270 | } | ||
271 | return dst, nil | ||
272 | } | ||
273 | |||
274 | func (d *Dict) initFast() { | ||
275 | d.fast.Do(func() { | ||
276 | const ( | ||
277 | tableBits = 14 | ||
278 | maxTableSize = 1 << tableBits | ||
279 | ) | ||
280 | |||
281 | var table [maxTableSize]uint16 | ||
282 | // We stop so any entry of length 8 can always be read. | ||
283 | for i := 0; i < len(d.dict)-8-2; i += 3 { | ||
284 | x0 := load64(d.dict, i) | ||
285 | h0 := hash6(x0, tableBits) | ||
286 | h1 := hash6(x0>>8, tableBits) | ||
287 | h2 := hash6(x0>>16, tableBits) | ||
288 | table[h0] = uint16(i) | ||
289 | table[h1] = uint16(i + 1) | ||
290 | table[h2] = uint16(i + 2) | ||
291 | } | ||
292 | d.fastTable = &table | ||
293 | }) | ||
294 | } | ||
295 | |||
296 | func (d *Dict) initBetter() { | ||
297 | d.better.Do(func() { | ||
298 | const ( | ||
299 | // Long hash matches. | ||
300 | lTableBits = 17 | ||
301 | maxLTableSize = 1 << lTableBits | ||
302 | |||
303 | // Short hash matches. | ||
304 | sTableBits = 14 | ||
305 | maxSTableSize = 1 << sTableBits | ||
306 | ) | ||
307 | |||
308 | var lTable [maxLTableSize]uint16 | ||
309 | var sTable [maxSTableSize]uint16 | ||
310 | |||
311 | // We stop so any entry of length 8 can always be read. | ||
312 | for i := 0; i < len(d.dict)-8; i++ { | ||
313 | cv := load64(d.dict, i) | ||
314 | lTable[hash7(cv, lTableBits)] = uint16(i) | ||
315 | sTable[hash4(cv, sTableBits)] = uint16(i) | ||
316 | } | ||
317 | d.betterTableShort = &sTable | ||
318 | d.betterTableLong = &lTable | ||
319 | }) | ||
320 | } | ||
321 | |||
322 | func (d *Dict) initBest() { | ||
323 | d.best.Do(func() { | ||
324 | const ( | ||
325 | // Long hash matches. | ||
326 | lTableBits = 19 | ||
327 | maxLTableSize = 1 << lTableBits | ||
328 | |||
329 | // Short hash matches. | ||
330 | sTableBits = 16 | ||
331 | maxSTableSize = 1 << sTableBits | ||
332 | ) | ||
333 | |||
334 | var lTable [maxLTableSize]uint32 | ||
335 | var sTable [maxSTableSize]uint32 | ||
336 | |||
337 | // We stop so any entry of length 8 can always be read. | ||
338 | for i := 0; i < len(d.dict)-8; i++ { | ||
339 | cv := load64(d.dict, i) | ||
340 | hashL := hash8(cv, lTableBits) | ||
341 | hashS := hash4(cv, sTableBits) | ||
342 | candidateL := lTable[hashL] | ||
343 | candidateS := sTable[hashS] | ||
344 | lTable[hashL] = uint32(i) | candidateL<<16 | ||
345 | sTable[hashS] = uint32(i) | candidateS<<16 | ||
346 | } | ||
347 | d.bestTableShort = &sTable | ||
348 | d.bestTableLong = &lTable | ||
349 | }) | ||
350 | } | ||