From 8db41da676ac8368ef7c2549d56239a5ff5eedde Mon Sep 17 00:00:00 2001
From: Rutger Broekhoff
Date: Tue, 2 Jan 2024 18:56:31 +0100
Subject: Delete vendor directory

---
 .../klauspost/compress/s2/decode_arm64.s           | 574 ---------------------
 1 file changed, 574 deletions(-)
 delete mode 100644 vendor/github.com/klauspost/compress/s2/decode_arm64.s

(limited to 'vendor/github.com/klauspost/compress/s2/decode_arm64.s')

diff --git a/vendor/github.com/klauspost/compress/s2/decode_arm64.s b/vendor/github.com/klauspost/compress/s2/decode_arm64.s
deleted file mode 100644
index 4b63d50..0000000
--- a/vendor/github.com/klauspost/compress/s2/decode_arm64.s
+++ /dev/null
@@ -1,574 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-#define R_TMP0 R2
-#define R_TMP1 R3
-#define R_LEN R4
-#define R_OFF R5
-#define R_SRC R6
-#define R_DST R7
-#define R_DBASE R8
-#define R_DLEN R9
-#define R_DEND R10
-#define R_SBASE R11
-#define R_SLEN R12
-#define R_SEND R13
-#define R_TMP2 R14
-#define R_TMP3 R15
-
-// TEST_SRC will check if R_SRC is <= SRC_END
-#define TEST_SRC() \
-	CMP R_SEND, R_SRC \
-	BGT errCorrupt
-
-// MOVD R_SRC, R_TMP1
-// SUB  R_SBASE, R_TMP1, R_TMP1
-// CMP  R_SLEN, R_TMP1
-// BGT  errCorrupt
-
-// The asm code generally follows the pure Go code in decode_other.go, except
-// where marked with a "!!!".
-
-// func decode(dst, src []byte) int
-//
-// All local variables fit into registers. The non-zero stack size is only to
-// spill registers and push args when issuing a CALL. The register allocation:
-//	- R_TMP0	scratch
-//	- R_TMP1	scratch
-//	- R_LEN	length or x
-//	- R_OFF	offset
-//	- R_SRC	&src[s]
-//	- R_DST	&dst[d]
-//	+ R_DBASE	dst_base
-//	+ R_DLEN	dst_len
-//	+ R_DEND	dst_base + dst_len
-//	+ R_SBASE	src_base
-//	+ R_SLEN	src_len
-//	+ R_SEND	src_base + src_len
-//	- R_TMP2	used by doCopy
-//	- R_TMP3	used by doCopy
-//
-// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
-// function, and after a CALL returns, and are not otherwise modified.
-//
-// The d variable is implicitly R_DST - R_DBASE,  and len(dst)-d is R_DEND - R_DST.
-// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
-TEXT ·s2Decode(SB), NOSPLIT, $56-64
-	// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
-	MOVD dst_base+0(FP), R_DBASE
-	MOVD dst_len+8(FP), R_DLEN
-	MOVD R_DBASE, R_DST
-	MOVD R_DBASE, R_DEND
-	ADD  R_DLEN, R_DEND, R_DEND
-	MOVD src_base+24(FP), R_SBASE
-	MOVD src_len+32(FP), R_SLEN
-	MOVD R_SBASE, R_SRC
-	MOVD R_SBASE, R_SEND
-	ADD  R_SLEN, R_SEND, R_SEND
-	MOVD $0, R_OFF
-
-loop:
-	// for s < len(src)
-	CMP R_SEND, R_SRC
-	BEQ end
-
-	// R_LEN = uint32(src[s])
-	//
-	// switch src[s] & 0x03
-	MOVBU (R_SRC), R_LEN
-	MOVW  R_LEN, R_TMP1
-	ANDW  $3, R_TMP1
-	MOVW  $1, R1
-	CMPW  R1, R_TMP1
-	BGE   tagCopy
-
-	// ----------------------------------------
-	// The code below handles literal tags.
-
-	// case tagLiteral:
-	// x := uint32(src[s] >> 2)
-	// switch
-	MOVW $60, R1
-	LSRW $2, R_LEN, R_LEN
-	CMPW R_LEN, R1
-	BLS  tagLit60Plus
-
-	// case x < 60:
-	// s++
-	ADD $1, R_SRC, R_SRC
-
-doLit:
-	// This is the end of the inner "switch", when we have a literal tag.
-	//
-	// We assume that R_LEN == x and x fits in a uint32, where x is the variable
-	// used in the pure Go decode_other.go code.
-
-	// length = int(x) + 1
-	//
-	// Unlike the pure Go code, we don't need to check if length <= 0 because
-	// R_LEN can hold 64 bits, so the increment cannot overflow.
-	ADD $1, R_LEN, R_LEN
-
-	// Prepare to check if copying length bytes will run past the end of dst or
-	// src.
-	//
-	// R_TMP0 = len(dst) - d
-	// R_TMP1 = len(src) - s
-	MOVD R_DEND, R_TMP0
-	SUB  R_DST, R_TMP0, R_TMP0
-	MOVD R_SEND, R_TMP1
-	SUB  R_SRC, R_TMP1, R_TMP1
-
-	// !!! Try a faster technique for short (16 or fewer bytes) copies.
-	//
-	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
-	//   goto callMemmove // Fall back on calling runtime·memmove.
-	// }
-	//
-	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
-	// against 21 instead of 16, because it cannot assume that all of its input
-	// is contiguous in memory and so it needs to leave enough source bytes to
-	// read the next tag without refilling buffers, but Go's Decode assumes
-	// contiguousness (the src argument is a []byte).
-	CMP $16, R_LEN
-	BGT callMemmove
-	CMP $16, R_TMP0
-	BLT callMemmove
-	CMP $16, R_TMP1
-	BLT callMemmove
-
-	// !!! Implement the copy from src to dst as a 16-byte load and store.
-	// (Decode's documentation says that dst and src must not overlap.)
-	//
-	// This always copies 16 bytes, instead of only length bytes, but that's
-	// OK. If the input is a valid Snappy encoding then subsequent iterations
-	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
-	// non-nil error), so the overrun will be ignored.
-	//
-	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
-	// 16-byte loads and stores. This technique probably wouldn't be as
-	// effective on architectures that are fussier about alignment.
-	LDP 0(R_SRC), (R_TMP2, R_TMP3)
-	STP (R_TMP2, R_TMP3), 0(R_DST)
-
-	// d += length
-	// s += length
-	ADD R_LEN, R_DST, R_DST
-	ADD R_LEN, R_SRC, R_SRC
-	B   loop
-
-callMemmove:
-	// if length > len(dst)-d || length > len(src)-s { etc }
-	CMP R_TMP0, R_LEN
-	BGT errCorrupt
-	CMP R_TMP1, R_LEN
-	BGT errCorrupt
-
-	// copy(dst[d:], src[s:s+length])
-	//
-	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
-	// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
-	// three registers to the stack, to save local variables across the CALL.
-	MOVD R_DST, 8(RSP)
-	MOVD R_SRC, 16(RSP)
-	MOVD R_LEN, 24(RSP)
-	MOVD R_DST, 32(RSP)
-	MOVD R_SRC, 40(RSP)
-	MOVD R_LEN, 48(RSP)
-	MOVD R_OFF, 56(RSP)
-	CALL runtime·memmove(SB)
-
-	// Restore local variables: unspill registers from the stack and
-	// re-calculate R_DBASE-R_SEND.
-	MOVD 32(RSP), R_DST
-	MOVD 40(RSP), R_SRC
-	MOVD 48(RSP), R_LEN
-	MOVD 56(RSP), R_OFF
-	MOVD dst_base+0(FP), R_DBASE
-	MOVD dst_len+8(FP), R_DLEN
-	MOVD R_DBASE, R_DEND
-	ADD  R_DLEN, R_DEND, R_DEND
-	MOVD src_base+24(FP), R_SBASE
-	MOVD src_len+32(FP), R_SLEN
-	MOVD R_SBASE, R_SEND
-	ADD  R_SLEN, R_SEND, R_SEND
-
-	// d += length
-	// s += length
-	ADD R_LEN, R_DST, R_DST
-	ADD R_LEN, R_SRC, R_SRC
-	B   loop
-
-tagLit60Plus:
-	// !!! This fragment does the
-	//
-	// s += x - 58; if uint(s) > uint(len(src)) { etc }
-	//
-	// checks. In the asm version, we code it once instead of once per switch case.
-	ADD R_LEN, R_SRC, R_SRC
-	SUB $58, R_SRC, R_SRC
-	TEST_SRC()
-
-	// case x == 60:
-	MOVW $61, R1
-	CMPW R1, R_LEN
-	BEQ  tagLit61
-	BGT  tagLit62Plus
-
-	// x = uint32(src[s-1])
-	MOVBU -1(R_SRC), R_LEN
-	B     doLit
-
-tagLit61:
-	// case x == 61:
-	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
-	MOVHU -2(R_SRC), R_LEN
-	B     doLit
-
-tagLit62Plus:
-	CMPW $62, R_LEN
-	BHI  tagLit63
-
-	// case x == 62:
-	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
-	MOVHU -3(R_SRC), R_LEN
-	MOVBU -1(R_SRC), R_TMP1
-	ORR   R_TMP1<<16, R_LEN
-	B     doLit
-
-tagLit63:
-	// case x == 63:
-	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
-	MOVWU -4(R_SRC), R_LEN
-	B     doLit
-
-	// The code above handles literal tags.
-	// ----------------------------------------
-	// The code below handles copy tags.
-
-tagCopy4:
-	// case tagCopy4:
-	// s += 5
-	ADD $5, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	MOVD R_SRC, R_TMP1
-	SUB  R_SBASE, R_TMP1, R_TMP1
-	CMP  R_SLEN, R_TMP1
-	BGT  errCorrupt
-
-	// length = 1 + int(src[s-5])>>2
-	MOVD $1, R1
-	ADD  R_LEN>>2, R1, R_LEN
-
-	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
-	MOVWU -4(R_SRC), R_OFF
-	B     doCopy
-
-tagCopy2:
-	// case tagCopy2:
-	// s += 3
-	ADD $3, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// length = 1 + int(src[s-3])>>2
-	MOVD $1, R1
-	ADD  R_LEN>>2, R1, R_LEN
-
-	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-	MOVHU -2(R_SRC), R_OFF
-	B     doCopy
-
-tagCopy:
-	// We have a copy tag. We assume that:
-	//	- R_TMP1 == src[s] & 0x03
-	//	- R_LEN == src[s]
-	CMP $2, R_TMP1
-	BEQ tagCopy2
-	BGT tagCopy4
-
-	// case tagCopy1:
-	// s += 2
-	ADD $2, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-	// Calculate offset in R_TMP0 in case it is a repeat.
-	MOVD  R_LEN, R_TMP0
-	AND   $0xe0, R_TMP0
-	MOVBU -1(R_SRC), R_TMP1
-	ORR   R_TMP0<<3, R_TMP1, R_TMP0
-
-	// length = 4 + int(src[s-2])>>2&0x7
-	MOVD $7, R1
-	AND  R_LEN>>2, R1, R_LEN
-	ADD  $4, R_LEN, R_LEN
-
-	// check if repeat code with offset 0.
-	CMP $0, R_TMP0
-	BEQ repeatCode
-
-	// This is a regular copy, transfer our temporary value to R_OFF (offset)
-	MOVD R_TMP0, R_OFF
-	B    doCopy
-
-	// This is a repeat code.
-repeatCode:
-	// If length < 9, reuse last offset, with the length already calculated.
-	CMP $9, R_LEN
-	BLT doCopyRepeat
-	BEQ repeatLen1
-	CMP $10, R_LEN
-	BEQ repeatLen2
-
-repeatLen3:
-	// s +=3
-	ADD $3, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + 65540
-	MOVBU -1(R_SRC), R_TMP0
-	MOVHU -3(R_SRC), R_LEN
-	ORR   R_TMP0<<16, R_LEN, R_LEN
-	ADD   $65540, R_LEN, R_LEN
-	B     doCopyRepeat
-
-repeatLen2:
-	// s +=2
-	ADD $2, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + 260
-	MOVHU -2(R_SRC), R_LEN
-	ADD   $260, R_LEN, R_LEN
-	B     doCopyRepeat
-
-repeatLen1:
-	// s +=1
-	ADD $1, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// length = src[s-1] + 8
-	MOVBU -1(R_SRC), R_LEN
-	ADD   $8, R_LEN, R_LEN
-	B     doCopyRepeat
-
-doCopy:
-	// This is the end of the outer "switch", when we have a copy tag.
-	//
-	// We assume that:
-	//	- R_LEN == length && R_LEN > 0
-	//	- R_OFF == offset
-
-	// if d < offset { etc }
-	MOVD R_DST, R_TMP1
-	SUB  R_DBASE, R_TMP1, R_TMP1
-	CMP  R_OFF, R_TMP1
-	BLT  errCorrupt
-
-	// Repeat values can skip the test above, since any offset > 0 will be in dst.
-doCopyRepeat:
-
-	// if offset <= 0 { etc }
-	CMP $0, R_OFF
-	BLE errCorrupt
-
-	// if length > len(dst)-d { etc }
-	MOVD R_DEND, R_TMP1
-	SUB  R_DST, R_TMP1, R_TMP1
-	CMP  R_TMP1, R_LEN
-	BGT  errCorrupt
-
-	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
-	//
-	// Set:
-	//	- R_TMP2 = len(dst)-d
-	//	- R_TMP3 = &dst[d-offset]
-	MOVD R_DEND, R_TMP2
-	SUB  R_DST, R_TMP2, R_TMP2
-	MOVD R_DST, R_TMP3
-	SUB  R_OFF, R_TMP3, R_TMP3
-
-	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
-	//
-	// First, try using two 8-byte load/stores, similar to the doLit technique
-	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
-	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
-	// and not one 16-byte load/store, and the first store has to be before the
-	// second load, due to the overlap if offset is in the range [8, 16).
-	//
-	// if length > 16 || offset < 8 || len(dst)-d < 16 {
-	//   goto slowForwardCopy
-	// }
-	// copy 16 bytes
-	// d += length
-	CMP  $16, R_LEN
-	BGT  slowForwardCopy
-	CMP  $8, R_OFF
-	BLT  slowForwardCopy
-	CMP  $16, R_TMP2
-	BLT  slowForwardCopy
-	MOVD 0(R_TMP3), R_TMP0
-	MOVD R_TMP0, 0(R_DST)
-	MOVD 8(R_TMP3), R_TMP1
-	MOVD R_TMP1, 8(R_DST)
-	ADD  R_LEN, R_DST, R_DST
-	B    loop
-
-slowForwardCopy:
-	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
-	// can still try 8-byte load stores, provided we can overrun up to 10 extra
-	// bytes. As above, the overrun will be fixed up by subsequent iterations
-	// of the outermost loop.
-	//
-	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
-	// commentary says:
-	//
-	// ----
-	//
-	// The main part of this loop is a simple copy of eight bytes at a time
-	// until we've copied (at least) the requested amount of bytes.  However,
-	// if d and d-offset are less than eight bytes apart (indicating a
-	// repeating pattern of length < 8), we first need to expand the pattern in
-	// order to get the correct results. For instance, if the buffer looks like
-	// this, with the eight-byte <d-offset> and <d> patterns marked as
-	// intervals:
-	//
-	//    abxxxxxxxxxxxx
-	//    [------]           d-offset
-	//      [------]         d
-	//
-	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
-	// once, after which we can move <d> two bytes without moving <d-offset>:
-	//
-	//    ababxxxxxxxxxx
-	//    [------]           d-offset
-	//        [------]       d
-	//
-	// and repeat the exercise until the two no longer overlap.
-	//
-	// This allows us to do very well in the special case of one single byte
-	// repeated many times, without taking a big hit for more general cases.
-	//
-	// The worst case of extra writing past the end of the match occurs when
-	// offset == 1 and length == 1; the last copy will read from byte positions
-	// [0..7] and write to [4..11], whereas it was only supposed to write to
-	// position 1. Thus, ten excess bytes.
-	//
-	// ----
-	//
-	// That "10 byte overrun" worst case is confirmed by Go's
-	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
-	// and finishSlowForwardCopy algorithm.
-	//
-	// if length > len(dst)-d-10 {
-	//   goto verySlowForwardCopy
-	// }
-	SUB $10, R_TMP2, R_TMP2
-	CMP R_TMP2, R_LEN
-	BGT verySlowForwardCopy
-
-	// We want to keep the offset, so we use R_TMP2 from here.
-	MOVD R_OFF, R_TMP2
-
-makeOffsetAtLeast8:
-	// !!! As above, expand the pattern so that offset >= 8 and we can use
-	// 8-byte load/stores.
-	//
-	// for offset < 8 {
-	//   copy 8 bytes from dst[d-offset:] to dst[d:]
-	//   length -= offset
-	//   d      += offset
-	//   offset += offset
-	//   // The two previous lines together means that d-offset, and therefore
-	//   // R_TMP3, is unchanged.
-	// }
-	CMP  $8, R_TMP2
-	BGE  fixUpSlowForwardCopy
-	MOVD (R_TMP3), R_TMP1
-	MOVD R_TMP1, (R_DST)
-	SUB  R_TMP2, R_LEN, R_LEN
-	ADD  R_TMP2, R_DST, R_DST
-	ADD  R_TMP2, R_TMP2, R_TMP2
-	B    makeOffsetAtLeast8
-
-fixUpSlowForwardCopy:
-	// !!! Add length (which might be negative now) to d (implied by R_DST being
-	// &dst[d]) so that d ends up at the right place when we jump back to the
-	// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
-	// length is positive, copying the remaining length bytes will write to the
-	// right place.
-	MOVD R_DST, R_TMP0
-	ADD  R_LEN, R_DST, R_DST
-
-finishSlowForwardCopy:
-	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
-	// length means that we overrun, but as above, that will be fixed up by
-	// subsequent iterations of the outermost loop.
-	MOVD $0, R1
-	CMP  R1, R_LEN
-	BLE  loop
-	MOVD (R_TMP3), R_TMP1
-	MOVD R_TMP1, (R_TMP0)
-	ADD  $8, R_TMP3, R_TMP3
-	ADD  $8, R_TMP0, R_TMP0
-	SUB  $8, R_LEN, R_LEN
-	B    finishSlowForwardCopy
-
-verySlowForwardCopy:
-	// verySlowForwardCopy is a simple implementation of forward copy. In C
-	// parlance, this is a do/while loop instead of a while loop, since we know
-	// that length > 0. In Go syntax:
-	//
-	// for {
-	//   dst[d] = dst[d - offset]
-	//   d++
-	//   length--
-	//   if length == 0 {
-	//     break
-	//   }
-	// }
-	MOVB (R_TMP3), R_TMP1
-	MOVB R_TMP1, (R_DST)
-	ADD  $1, R_TMP3, R_TMP3
-	ADD  $1, R_DST, R_DST
-	SUB  $1, R_LEN, R_LEN
-	CBNZ R_LEN, verySlowForwardCopy
-	B    loop
-
-	// The code above handles copy tags.
-	// ----------------------------------------
-
-end:
-	// This is the end of the "for s < len(src)".
-	//
-	// if d != len(dst) { etc }
-	CMP R_DEND, R_DST
-	BNE errCorrupt
-
-	// return 0
-	MOVD $0, ret+48(FP)
-	RET
-
-errCorrupt:
-	// return decodeErrCodeCorrupt
-	MOVD $1, R_TMP0
-	MOVD R_TMP0, ret+48(FP)
-	RET
-- 
cgit v1.2.3