aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/compress/s2/lz4convert.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/lz4convert.go')
-rw-r--r--vendor/github.com/klauspost/compress/s2/lz4convert.go585
1 files changed, 585 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/lz4convert.go b/vendor/github.com/klauspost/compress/s2/lz4convert.go
new file mode 100644
index 0000000..46ed908
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/s2/lz4convert.go
@@ -0,0 +1,585 @@
1// Copyright (c) 2022 Klaus Post. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package s2
6
7import (
8 "encoding/binary"
9 "errors"
10 "fmt"
11)
12
13// LZ4Converter provides conversion from LZ4 blocks as defined here:
14// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
15type LZ4Converter struct {
16}
17
18// ErrDstTooSmall is returned when provided destination is too small.
19var ErrDstTooSmall = errors.New("s2: destination too small")
20
21// ConvertBlock will convert an LZ4 block and append it as an S2
22// block without block length to dst.
23// The uncompressed size is returned as well.
24// dst must have capacity to contain the entire compressed block.
25func (l *LZ4Converter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
26 if len(src) == 0 {
27 return dst, 0, nil
28 }
29 const debug = false
30 const inline = true
31 const lz4MinMatch = 4
32
33 s, d := 0, len(dst)
34 dst = dst[:cap(dst)]
35 if !debug && hasAmd64Asm {
36 res, sz := cvtLZ4BlockAsm(dst[d:], src)
37 if res < 0 {
38 const (
39 errCorrupt = -1
40 errDstTooSmall = -2
41 )
42 switch res {
43 case errCorrupt:
44 return nil, 0, ErrCorrupt
45 case errDstTooSmall:
46 return nil, 0, ErrDstTooSmall
47 default:
48 return nil, 0, fmt.Errorf("unexpected result: %d", res)
49 }
50 }
51 if d+sz > len(dst) {
52 return nil, 0, ErrDstTooSmall
53 }
54 return dst[:d+sz], res, nil
55 }
56
57 dLimit := len(dst) - 10
58 var lastOffset uint16
59 var uncompressed int
60 if debug {
61 fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
62 }
63
64 for {
65 if s >= len(src) {
66 return dst[:d], 0, ErrCorrupt
67 }
68 // Read literal info
69 token := src[s]
70 ll := int(token >> 4)
71 ml := int(lz4MinMatch + (token & 0xf))
72
73 // If upper nibble is 15, literal length is extended
74 if token >= 0xf0 {
75 for {
76 s++
77 if s >= len(src) {
78 if debug {
79 fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
80 }
81 return dst[:d], 0, ErrCorrupt
82 }
83 val := src[s]
84 ll += int(val)
85 if val != 255 {
86 break
87 }
88 }
89 }
90 // Skip past token
91 if s+ll >= len(src) {
92 if debug {
93 fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
94 }
95 return nil, 0, ErrCorrupt
96 }
97 s++
98 if ll > 0 {
99 if d+ll > dLimit {
100 return nil, 0, ErrDstTooSmall
101 }
102 if debug {
103 fmt.Printf("emit %d literals\n", ll)
104 }
105 d += emitLiteralGo(dst[d:], src[s:s+ll])
106 s += ll
107 uncompressed += ll
108 }
109
110 // Check if we are done...
111 if s == len(src) && ml == lz4MinMatch {
112 break
113 }
114 // 2 byte offset
115 if s >= len(src)-2 {
116 if debug {
117 fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
118 }
119 return nil, 0, ErrCorrupt
120 }
121 offset := binary.LittleEndian.Uint16(src[s:])
122 s += 2
123 if offset == 0 {
124 if debug {
125 fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
126 }
127 return nil, 0, ErrCorrupt
128 }
129 if int(offset) > uncompressed {
130 if debug {
131 fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
132 }
133 return nil, 0, ErrCorrupt
134 }
135
136 if ml == lz4MinMatch+15 {
137 for {
138 if s >= len(src) {
139 if debug {
140 fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
141 }
142 return nil, 0, ErrCorrupt
143 }
144 val := src[s]
145 s++
146 ml += int(val)
147 if val != 255 {
148 if s >= len(src) {
149 if debug {
150 fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
151 }
152 return nil, 0, ErrCorrupt
153 }
154 break
155 }
156 }
157 }
158 if offset == lastOffset {
159 if debug {
160 fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
161 }
162 if !inline {
163 d += emitRepeat16(dst[d:], offset, ml)
164 } else {
165 length := ml
166 dst := dst[d:]
167 for len(dst) > 5 {
168 // Repeat offset, make length cheaper
169 length -= 4
170 if length <= 4 {
171 dst[0] = uint8(length)<<2 | tagCopy1
172 dst[1] = 0
173 d += 2
174 break
175 }
176 if length < 8 && offset < 2048 {
177 // Encode WITH offset
178 dst[1] = uint8(offset)
179 dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
180 d += 2
181 break
182 }
183 if length < (1<<8)+4 {
184 length -= 4
185 dst[2] = uint8(length)
186 dst[1] = 0
187 dst[0] = 5<<2 | tagCopy1
188 d += 3
189 break
190 }
191 if length < (1<<16)+(1<<8) {
192 length -= 1 << 8
193 dst[3] = uint8(length >> 8)
194 dst[2] = uint8(length >> 0)
195 dst[1] = 0
196 dst[0] = 6<<2 | tagCopy1
197 d += 4
198 break
199 }
200 const maxRepeat = (1 << 24) - 1
201 length -= 1 << 16
202 left := 0
203 if length > maxRepeat {
204 left = length - maxRepeat + 4
205 length = maxRepeat - 4
206 }
207 dst[4] = uint8(length >> 16)
208 dst[3] = uint8(length >> 8)
209 dst[2] = uint8(length >> 0)
210 dst[1] = 0
211 dst[0] = 7<<2 | tagCopy1
212 if left > 0 {
213 d += 5 + emitRepeat16(dst[5:], offset, left)
214 break
215 }
216 d += 5
217 break
218 }
219 }
220 } else {
221 if debug {
222 fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
223 }
224 if !inline {
225 d += emitCopy16(dst[d:], offset, ml)
226 } else {
227 length := ml
228 dst := dst[d:]
229 for len(dst) > 5 {
230 // Offset no more than 2 bytes.
231 if length > 64 {
232 off := 3
233 if offset < 2048 {
234 // emit 8 bytes as tagCopy1, rest as repeats.
235 dst[1] = uint8(offset)
236 dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
237 length -= 8
238 off = 2
239 } else {
240 // Emit a length 60 copy, encoded as 3 bytes.
241 // Emit remaining as repeat value (minimum 4 bytes).
242 dst[2] = uint8(offset >> 8)
243 dst[1] = uint8(offset)
244 dst[0] = 59<<2 | tagCopy2
245 length -= 60
246 }
247 // Emit remaining as repeats, at least 4 bytes remain.
248 d += off + emitRepeat16(dst[off:], offset, length)
249 break
250 }
251 if length >= 12 || offset >= 2048 {
252 // Emit the remaining copy, encoded as 3 bytes.
253 dst[2] = uint8(offset >> 8)
254 dst[1] = uint8(offset)
255 dst[0] = uint8(length-1)<<2 | tagCopy2
256 d += 3
257 break
258 }
259 // Emit the remaining copy, encoded as 2 bytes.
260 dst[1] = uint8(offset)
261 dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
262 d += 2
263 break
264 }
265 }
266 lastOffset = offset
267 }
268 uncompressed += ml
269 if d > dLimit {
270 return nil, 0, ErrDstTooSmall
271 }
272 }
273
274 return dst[:d], uncompressed, nil
275}
276
277// ConvertBlockSnappy will convert an LZ4 block and append it
278// as a Snappy block without block length to dst.
279// The uncompressed size is returned as well.
280// dst must have capacity to contain the entire compressed block.
281func (l *LZ4Converter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
282 if len(src) == 0 {
283 return dst, 0, nil
284 }
285 const debug = false
286 const lz4MinMatch = 4
287
288 s, d := 0, len(dst)
289 dst = dst[:cap(dst)]
290 // Use assembly when possible
291 if !debug && hasAmd64Asm {
292 res, sz := cvtLZ4BlockSnappyAsm(dst[d:], src)
293 if res < 0 {
294 const (
295 errCorrupt = -1
296 errDstTooSmall = -2
297 )
298 switch res {
299 case errCorrupt:
300 return nil, 0, ErrCorrupt
301 case errDstTooSmall:
302 return nil, 0, ErrDstTooSmall
303 default:
304 return nil, 0, fmt.Errorf("unexpected result: %d", res)
305 }
306 }
307 if d+sz > len(dst) {
308 return nil, 0, ErrDstTooSmall
309 }
310 return dst[:d+sz], res, nil
311 }
312
313 dLimit := len(dst) - 10
314 var uncompressed int
315 if debug {
316 fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
317 }
318
319 for {
320 if s >= len(src) {
321 return nil, 0, ErrCorrupt
322 }
323 // Read literal info
324 token := src[s]
325 ll := int(token >> 4)
326 ml := int(lz4MinMatch + (token & 0xf))
327
328 // If upper nibble is 15, literal length is extended
329 if token >= 0xf0 {
330 for {
331 s++
332 if s >= len(src) {
333 if debug {
334 fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
335 }
336 return nil, 0, ErrCorrupt
337 }
338 val := src[s]
339 ll += int(val)
340 if val != 255 {
341 break
342 }
343 }
344 }
345 // Skip past token
346 if s+ll >= len(src) {
347 if debug {
348 fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
349 }
350 return nil, 0, ErrCorrupt
351 }
352 s++
353 if ll > 0 {
354 if d+ll > dLimit {
355 return nil, 0, ErrDstTooSmall
356 }
357 if debug {
358 fmt.Printf("emit %d literals\n", ll)
359 }
360 d += emitLiteralGo(dst[d:], src[s:s+ll])
361 s += ll
362 uncompressed += ll
363 }
364
365 // Check if we are done...
366 if s == len(src) && ml == lz4MinMatch {
367 break
368 }
369 // 2 byte offset
370 if s >= len(src)-2 {
371 if debug {
372 fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
373 }
374 return nil, 0, ErrCorrupt
375 }
376 offset := binary.LittleEndian.Uint16(src[s:])
377 s += 2
378 if offset == 0 {
379 if debug {
380 fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
381 }
382 return nil, 0, ErrCorrupt
383 }
384 if int(offset) > uncompressed {
385 if debug {
386 fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
387 }
388 return nil, 0, ErrCorrupt
389 }
390
391 if ml == lz4MinMatch+15 {
392 for {
393 if s >= len(src) {
394 if debug {
395 fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
396 }
397 return nil, 0, ErrCorrupt
398 }
399 val := src[s]
400 s++
401 ml += int(val)
402 if val != 255 {
403 if s >= len(src) {
404 if debug {
405 fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
406 }
407 return nil, 0, ErrCorrupt
408 }
409 break
410 }
411 }
412 }
413 if debug {
414 fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
415 }
416 length := ml
417 // d += emitCopyNoRepeat(dst[d:], int(offset), ml)
418 for length > 0 {
419 if d >= dLimit {
420 return nil, 0, ErrDstTooSmall
421 }
422
423 // Offset no more than 2 bytes.
424 if length > 64 {
425 // Emit a length 64 copy, encoded as 3 bytes.
426 dst[d+2] = uint8(offset >> 8)
427 dst[d+1] = uint8(offset)
428 dst[d+0] = 63<<2 | tagCopy2
429 length -= 64
430 d += 3
431 continue
432 }
433 if length >= 12 || offset >= 2048 || length < 4 {
434 // Emit the remaining copy, encoded as 3 bytes.
435 dst[d+2] = uint8(offset >> 8)
436 dst[d+1] = uint8(offset)
437 dst[d+0] = uint8(length-1)<<2 | tagCopy2
438 d += 3
439 break
440 }
441 // Emit the remaining copy, encoded as 2 bytes.
442 dst[d+1] = uint8(offset)
443 dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
444 d += 2
445 break
446 }
447 uncompressed += ml
448 if d > dLimit {
449 return nil, 0, ErrDstTooSmall
450 }
451 }
452
453 return dst[:d], uncompressed, nil
454}
455
456// emitRepeat writes a repeat chunk and returns the number of bytes written.
457// Length must be at least 4 and < 1<<24
458func emitRepeat16(dst []byte, offset uint16, length int) int {
459 // Repeat offset, make length cheaper
460 length -= 4
461 if length <= 4 {
462 dst[0] = uint8(length)<<2 | tagCopy1
463 dst[1] = 0
464 return 2
465 }
466 if length < 8 && offset < 2048 {
467 // Encode WITH offset
468 dst[1] = uint8(offset)
469 dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
470 return 2
471 }
472 if length < (1<<8)+4 {
473 length -= 4
474 dst[2] = uint8(length)
475 dst[1] = 0
476 dst[0] = 5<<2 | tagCopy1
477 return 3
478 }
479 if length < (1<<16)+(1<<8) {
480 length -= 1 << 8
481 dst[3] = uint8(length >> 8)
482 dst[2] = uint8(length >> 0)
483 dst[1] = 0
484 dst[0] = 6<<2 | tagCopy1
485 return 4
486 }
487 const maxRepeat = (1 << 24) - 1
488 length -= 1 << 16
489 left := 0
490 if length > maxRepeat {
491 left = length - maxRepeat + 4
492 length = maxRepeat - 4
493 }
494 dst[4] = uint8(length >> 16)
495 dst[3] = uint8(length >> 8)
496 dst[2] = uint8(length >> 0)
497 dst[1] = 0
498 dst[0] = 7<<2 | tagCopy1
499 if left > 0 {
500 return 5 + emitRepeat16(dst[5:], offset, left)
501 }
502 return 5
503}
504
505// emitCopy writes a copy chunk and returns the number of bytes written.
506//
507// It assumes that:
508//
509// dst is long enough to hold the encoded bytes
510// 1 <= offset && offset <= math.MaxUint16
511// 4 <= length && length <= math.MaxUint32
512func emitCopy16(dst []byte, offset uint16, length int) int {
513 // Offset no more than 2 bytes.
514 if length > 64 {
515 off := 3
516 if offset < 2048 {
517 // emit 8 bytes as tagCopy1, rest as repeats.
518 dst[1] = uint8(offset)
519 dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
520 length -= 8
521 off = 2
522 } else {
523 // Emit a length 60 copy, encoded as 3 bytes.
524 // Emit remaining as repeat value (minimum 4 bytes).
525 dst[2] = uint8(offset >> 8)
526 dst[1] = uint8(offset)
527 dst[0] = 59<<2 | tagCopy2
528 length -= 60
529 }
530 // Emit remaining as repeats, at least 4 bytes remain.
531 return off + emitRepeat16(dst[off:], offset, length)
532 }
533 if length >= 12 || offset >= 2048 {
534 // Emit the remaining copy, encoded as 3 bytes.
535 dst[2] = uint8(offset >> 8)
536 dst[1] = uint8(offset)
537 dst[0] = uint8(length-1)<<2 | tagCopy2
538 return 3
539 }
540 // Emit the remaining copy, encoded as 2 bytes.
541 dst[1] = uint8(offset)
542 dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
543 return 2
544}
545
546// emitLiteral writes a literal chunk and returns the number of bytes written.
547//
548// It assumes that:
549//
550// dst is long enough to hold the encoded bytes
551// 0 <= len(lit) && len(lit) <= math.MaxUint32
552func emitLiteralGo(dst, lit []byte) int {
553 if len(lit) == 0 {
554 return 0
555 }
556 i, n := 0, uint(len(lit)-1)
557 switch {
558 case n < 60:
559 dst[0] = uint8(n)<<2 | tagLiteral
560 i = 1
561 case n < 1<<8:
562 dst[1] = uint8(n)
563 dst[0] = 60<<2 | tagLiteral
564 i = 2
565 case n < 1<<16:
566 dst[2] = uint8(n >> 8)
567 dst[1] = uint8(n)
568 dst[0] = 61<<2 | tagLiteral
569 i = 3
570 case n < 1<<24:
571 dst[3] = uint8(n >> 16)
572 dst[2] = uint8(n >> 8)
573 dst[1] = uint8(n)
574 dst[0] = 62<<2 | tagLiteral
575 i = 4
576 default:
577 dst[4] = uint8(n >> 24)
578 dst[3] = uint8(n >> 16)
579 dst[2] = uint8(n >> 8)
580 dst[1] = uint8(n)
581 dst[0] = 63<<2 | tagLiteral
582 i = 5
583 }
584 return i + copy(dst[i:], lit)
585}