diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encode.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/s2/encode.go | 393 |
1 files changed, 393 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go new file mode 100644 index 0000000..0c9088a --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/encode.go | |||
@@ -0,0 +1,393 @@ | |||
1 | // Copyright 2011 The Snappy-Go Authors. All rights reserved. | ||
2 | // Copyright (c) 2019 Klaus Post. All rights reserved. | ||
3 | // Use of this source code is governed by a BSD-style | ||
4 | // license that can be found in the LICENSE file. | ||
5 | |||
6 | package s2 | ||
7 | |||
8 | import ( | ||
9 | "encoding/binary" | ||
10 | "math" | ||
11 | "math/bits" | ||
12 | ) | ||
13 | |||
14 | // Encode returns the encoded form of src. The returned slice may be a sub- | ||
15 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
16 | // Otherwise, a newly allocated slice will be returned. | ||
17 | // | ||
18 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
19 | // | ||
20 | // The blocks will require the same amount of memory to decode as encoding, | ||
21 | // and does not make for concurrent decoding. | ||
22 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
23 | // | ||
24 | // If you need to encode larger amounts of data, consider using | ||
25 | // the streaming interface which gives all of these features. | ||
26 | func Encode(dst, src []byte) []byte { | ||
27 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
28 | panic(ErrTooLarge) | ||
29 | } else if cap(dst) < n { | ||
30 | dst = make([]byte, n) | ||
31 | } else { | ||
32 | dst = dst[:n] | ||
33 | } | ||
34 | |||
35 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
36 | d := binary.PutUvarint(dst, uint64(len(src))) | ||
37 | |||
38 | if len(src) == 0 { | ||
39 | return dst[:d] | ||
40 | } | ||
41 | if len(src) < minNonLiteralBlockSize { | ||
42 | d += emitLiteral(dst[d:], src) | ||
43 | return dst[:d] | ||
44 | } | ||
45 | n := encodeBlock(dst[d:], src) | ||
46 | if n > 0 { | ||
47 | d += n | ||
48 | return dst[:d] | ||
49 | } | ||
50 | // Not compressible | ||
51 | d += emitLiteral(dst[d:], src) | ||
52 | return dst[:d] | ||
53 | } | ||
54 | |||
55 | // EstimateBlockSize will perform a very fast compression | ||
56 | // without outputting the result and return the compressed output size. | ||
57 | // The function returns -1 if no improvement could be achieved. | ||
58 | // Using actual compression will most often produce better compression than the estimate. | ||
59 | func EstimateBlockSize(src []byte) (d int) { | ||
60 | if len(src) <= inputMargin || int64(len(src)) > 0xffffffff { | ||
61 | return -1 | ||
62 | } | ||
63 | if len(src) <= 1024 { | ||
64 | d = calcBlockSizeSmall(src) | ||
65 | } else { | ||
66 | d = calcBlockSize(src) | ||
67 | } | ||
68 | |||
69 | if d == 0 { | ||
70 | return -1 | ||
71 | } | ||
72 | // Size of the varint encoded block size. | ||
73 | d += (bits.Len64(uint64(len(src))) + 7) / 7 | ||
74 | |||
75 | if d >= len(src) { | ||
76 | return -1 | ||
77 | } | ||
78 | return d | ||
79 | } | ||
80 | |||
81 | // EncodeBetter returns the encoded form of src. The returned slice may be a sub- | ||
82 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
83 | // Otherwise, a newly allocated slice will be returned. | ||
84 | // | ||
85 | // EncodeBetter compresses better than Encode but typically with a | ||
86 | // 10-40% speed decrease on both compression and decompression. | ||
87 | // | ||
88 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
89 | // | ||
90 | // The blocks will require the same amount of memory to decode as encoding, | ||
91 | // and does not make for concurrent decoding. | ||
92 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
93 | // | ||
94 | // If you need to encode larger amounts of data, consider using | ||
95 | // the streaming interface which gives all of these features. | ||
96 | func EncodeBetter(dst, src []byte) []byte { | ||
97 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
98 | panic(ErrTooLarge) | ||
99 | } else if len(dst) < n { | ||
100 | dst = make([]byte, n) | ||
101 | } | ||
102 | |||
103 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
104 | d := binary.PutUvarint(dst, uint64(len(src))) | ||
105 | |||
106 | if len(src) == 0 { | ||
107 | return dst[:d] | ||
108 | } | ||
109 | if len(src) < minNonLiteralBlockSize { | ||
110 | d += emitLiteral(dst[d:], src) | ||
111 | return dst[:d] | ||
112 | } | ||
113 | n := encodeBlockBetter(dst[d:], src) | ||
114 | if n > 0 { | ||
115 | d += n | ||
116 | return dst[:d] | ||
117 | } | ||
118 | // Not compressible | ||
119 | d += emitLiteral(dst[d:], src) | ||
120 | return dst[:d] | ||
121 | } | ||
122 | |||
123 | // EncodeBest returns the encoded form of src. The returned slice may be a sub- | ||
124 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
125 | // Otherwise, a newly allocated slice will be returned. | ||
126 | // | ||
127 | // EncodeBest compresses as good as reasonably possible but with a | ||
128 | // big speed decrease. | ||
129 | // | ||
130 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
131 | // | ||
132 | // The blocks will require the same amount of memory to decode as encoding, | ||
133 | // and does not make for concurrent decoding. | ||
134 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
135 | // | ||
136 | // If you need to encode larger amounts of data, consider using | ||
137 | // the streaming interface which gives all of these features. | ||
138 | func EncodeBest(dst, src []byte) []byte { | ||
139 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
140 | panic(ErrTooLarge) | ||
141 | } else if len(dst) < n { | ||
142 | dst = make([]byte, n) | ||
143 | } | ||
144 | |||
145 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
146 | d := binary.PutUvarint(dst, uint64(len(src))) | ||
147 | |||
148 | if len(src) == 0 { | ||
149 | return dst[:d] | ||
150 | } | ||
151 | if len(src) < minNonLiteralBlockSize { | ||
152 | d += emitLiteral(dst[d:], src) | ||
153 | return dst[:d] | ||
154 | } | ||
155 | n := encodeBlockBest(dst[d:], src, nil) | ||
156 | if n > 0 { | ||
157 | d += n | ||
158 | return dst[:d] | ||
159 | } | ||
160 | // Not compressible | ||
161 | d += emitLiteral(dst[d:], src) | ||
162 | return dst[:d] | ||
163 | } | ||
164 | |||
165 | // EncodeSnappy returns the encoded form of src. The returned slice may be a sub- | ||
166 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
167 | // Otherwise, a newly allocated slice will be returned. | ||
168 | // | ||
169 | // The output is Snappy compatible and will likely decompress faster. | ||
170 | // | ||
171 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
172 | // | ||
173 | // The blocks will require the same amount of memory to decode as encoding, | ||
174 | // and does not make for concurrent decoding. | ||
175 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
176 | // | ||
177 | // If you need to encode larger amounts of data, consider using | ||
178 | // the streaming interface which gives all of these features. | ||
179 | func EncodeSnappy(dst, src []byte) []byte { | ||
180 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
181 | panic(ErrTooLarge) | ||
182 | } else if cap(dst) < n { | ||
183 | dst = make([]byte, n) | ||
184 | } else { | ||
185 | dst = dst[:n] | ||
186 | } | ||
187 | |||
188 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
189 | d := binary.PutUvarint(dst, uint64(len(src))) | ||
190 | |||
191 | if len(src) == 0 { | ||
192 | return dst[:d] | ||
193 | } | ||
194 | if len(src) < minNonLiteralBlockSize { | ||
195 | d += emitLiteral(dst[d:], src) | ||
196 | return dst[:d] | ||
197 | } | ||
198 | |||
199 | n := encodeBlockSnappy(dst[d:], src) | ||
200 | if n > 0 { | ||
201 | d += n | ||
202 | return dst[:d] | ||
203 | } | ||
204 | // Not compressible | ||
205 | d += emitLiteral(dst[d:], src) | ||
206 | return dst[:d] | ||
207 | } | ||
208 | |||
209 | // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub- | ||
210 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
211 | // Otherwise, a newly allocated slice will be returned. | ||
212 | // | ||
213 | // The output is Snappy compatible and will likely decompress faster. | ||
214 | // | ||
215 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
216 | // | ||
217 | // The blocks will require the same amount of memory to decode as encoding, | ||
218 | // and does not make for concurrent decoding. | ||
219 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
220 | // | ||
221 | // If you need to encode larger amounts of data, consider using | ||
222 | // the streaming interface which gives all of these features. | ||
223 | func EncodeSnappyBetter(dst, src []byte) []byte { | ||
224 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
225 | panic(ErrTooLarge) | ||
226 | } else if cap(dst) < n { | ||
227 | dst = make([]byte, n) | ||
228 | } else { | ||
229 | dst = dst[:n] | ||
230 | } | ||
231 | |||
232 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
233 | d := binary.PutUvarint(dst, uint64(len(src))) | ||
234 | |||
235 | if len(src) == 0 { | ||
236 | return dst[:d] | ||
237 | } | ||
238 | if len(src) < minNonLiteralBlockSize { | ||
239 | d += emitLiteral(dst[d:], src) | ||
240 | return dst[:d] | ||
241 | } | ||
242 | |||
243 | n := encodeBlockBetterSnappy(dst[d:], src) | ||
244 | if n > 0 { | ||
245 | d += n | ||
246 | return dst[:d] | ||
247 | } | ||
248 | // Not compressible | ||
249 | d += emitLiteral(dst[d:], src) | ||
250 | return dst[:d] | ||
251 | } | ||
252 | |||
253 | // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub- | ||
254 | // slice of dst if dst was large enough to hold the entire encoded block. | ||
255 | // Otherwise, a newly allocated slice will be returned. | ||
256 | // | ||
257 | // The output is Snappy compatible and will likely decompress faster. | ||
258 | // | ||
259 | // The dst and src must not overlap. It is valid to pass a nil dst. | ||
260 | // | ||
261 | // The blocks will require the same amount of memory to decode as encoding, | ||
262 | // and does not make for concurrent decoding. | ||
263 | // Also note that blocks do not contain CRC information, so corruption may be undetected. | ||
264 | // | ||
265 | // If you need to encode larger amounts of data, consider using | ||
266 | // the streaming interface which gives all of these features. | ||
267 | func EncodeSnappyBest(dst, src []byte) []byte { | ||
268 | if n := MaxEncodedLen(len(src)); n < 0 { | ||
269 | panic(ErrTooLarge) | ||
270 | } else if cap(dst) < n { | ||
271 | dst = make([]byte, n) | ||
272 | } else { | ||
273 | dst = dst[:n] | ||
274 | } | ||
275 | |||
276 | // The block starts with the varint-encoded length of the decompressed bytes. | ||
277 | d := binary.PutUvarint(dst, uint64(len(src))) | ||
278 | |||
279 | if len(src) == 0 { | ||
280 | return dst[:d] | ||
281 | } | ||
282 | if len(src) < minNonLiteralBlockSize { | ||
283 | d += emitLiteral(dst[d:], src) | ||
284 | return dst[:d] | ||
285 | } | ||
286 | |||
287 | n := encodeBlockBestSnappy(dst[d:], src) | ||
288 | if n > 0 { | ||
289 | d += n | ||
290 | return dst[:d] | ||
291 | } | ||
292 | // Not compressible | ||
293 | d += emitLiteral(dst[d:], src) | ||
294 | return dst[:d] | ||
295 | } | ||
296 | |||
297 | // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination. | ||
298 | // If the destination is nil or too small, a new will be allocated. | ||
299 | // The blocks are not validated, so garbage in = garbage out. | ||
300 | // dst may not overlap block data. | ||
301 | // Any data in dst is preserved as is, so it will not be considered a block. | ||
302 | func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) { | ||
303 | totalSize := uint64(0) | ||
304 | compSize := 0 | ||
305 | for _, b := range blocks { | ||
306 | l, hdr, err := decodedLen(b) | ||
307 | if err != nil { | ||
308 | return nil, err | ||
309 | } | ||
310 | totalSize += uint64(l) | ||
311 | compSize += len(b) - hdr | ||
312 | } | ||
313 | if totalSize == 0 { | ||
314 | dst = append(dst, 0) | ||
315 | return dst, nil | ||
316 | } | ||
317 | if totalSize > math.MaxUint32 { | ||
318 | return nil, ErrTooLarge | ||
319 | } | ||
320 | var tmp [binary.MaxVarintLen32]byte | ||
321 | hdrSize := binary.PutUvarint(tmp[:], totalSize) | ||
322 | wantSize := hdrSize + compSize | ||
323 | |||
324 | if cap(dst)-len(dst) < wantSize { | ||
325 | dst = append(make([]byte, 0, wantSize+len(dst)), dst...) | ||
326 | } | ||
327 | dst = append(dst, tmp[:hdrSize]...) | ||
328 | for _, b := range blocks { | ||
329 | _, hdr, err := decodedLen(b) | ||
330 | if err != nil { | ||
331 | return nil, err | ||
332 | } | ||
333 | dst = append(dst, b[hdr:]...) | ||
334 | } | ||
335 | return dst, nil | ||
336 | } | ||
337 | |||
338 | // inputMargin is the minimum number of extra input bytes to keep, inside | ||
339 | // encodeBlock's inner loop. On some architectures, this margin lets us | ||
340 | // implement a fast path for emitLiteral, where the copy of short (<= 16 byte) | ||
341 | // literals can be implemented as a single load to and store from a 16-byte | ||
342 | // register. That literal's actual length can be as short as 1 byte, so this | ||
343 | // can copy up to 15 bytes too much, but that's OK as subsequent iterations of | ||
344 | // the encoding loop will fix up the copy overrun, and this inputMargin ensures | ||
345 | // that we don't overrun the dst and src buffers. | ||
346 | const inputMargin = 8 | ||
347 | |||
348 | // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that | ||
349 | // will be accepted by the encoder. | ||
350 | const minNonLiteralBlockSize = 32 | ||
351 | |||
352 | const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits) | ||
353 | |||
354 | // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size. | ||
355 | // Blocks this big are highly discouraged, though. | ||
356 | // Half the size on 32 bit systems. | ||
357 | const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5 | ||
358 | |||
359 | // MaxEncodedLen returns the maximum length of a snappy block, given its | ||
360 | // uncompressed length. | ||
361 | // | ||
362 | // It will return a negative value if srcLen is too large to encode. | ||
363 | // 32 bit platforms will have lower thresholds for rejecting big content. | ||
364 | func MaxEncodedLen(srcLen int) int { | ||
365 | n := uint64(srcLen) | ||
366 | if intReduction == 1 { | ||
367 | // 32 bits | ||
368 | if n > math.MaxInt32 { | ||
369 | // Also includes negative. | ||
370 | return -1 | ||
371 | } | ||
372 | } else if n > 0xffffffff { | ||
373 | // 64 bits | ||
374 | // Also includes negative. | ||
375 | return -1 | ||
376 | } | ||
377 | // Size of the varint encoded block size. | ||
378 | n = n + uint64((bits.Len64(n)+7)/7) | ||
379 | |||
380 | // Add maximum size of encoding block as literals. | ||
381 | n += uint64(literalExtraSize(int64(srcLen))) | ||
382 | if intReduction == 1 { | ||
383 | // 32 bits | ||
384 | if n > math.MaxInt32 { | ||
385 | return -1 | ||
386 | } | ||
387 | } else if n > 0xffffffff { | ||
388 | // 64 bits | ||
389 | // Also includes negative. | ||
390 | return -1 | ||
391 | } | ||
392 | return int(n) | ||
393 | } | ||