aboutsummaryrefslogtreecommitdiffstats
path: root/script/synckv6/oeuf-synckv6.sh
diff options
context:
space:
mode:
authorLibravatar Rutger Broekhoff2024-05-02 20:27:40 +0200
committerLibravatar Rutger Broekhoff2024-05-02 20:27:40 +0200
commit17a3ea880402338420699e03bcb24181e4ff3924 (patch)
treeda666ef91e0b60d20aa0b01529644c136fd1f4ab /script/synckv6/oeuf-synckv6.sh
downloadoeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz
oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip
Initial commit
Based on dc4ba6a
Diffstat (limited to 'script/synckv6/oeuf-synckv6.sh')
-rwxr-xr-xscript/synckv6/oeuf-synckv6.sh43
1 files changed, 43 insertions, 0 deletions
diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh
new file mode 100755
index 0000000..6b24347
--- /dev/null
+++ b/script/synckv6/oeuf-synckv6.sh
@@ -0,0 +1,43 @@
1#!/usr/bin/env bash
2
3set -eu
4set -o pipefail
5
6export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID"
7export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY"
8
9set +x
10all_files=()
11declare -A metafiles
12
13while IFS=' ' read -r size filename; do
14 if [[ "$filename" == *.parquet.meta.json ]]; then
15 metafiles["$filename"]=1
16 else
17 all_files+=($filename)
18 fi
19done < <(rclone ls \
20 --s3-provider "$S3_PROVIDER" \
21 --s3-region "$S3_REGION" \
22 --s3-endpoint "$S3_ENDPOINT" \
23 --s3-env-auth \
24 :s3:$S3_BUCKET)
25
26files=()
27for filename in "${all_files[@]}"; do
28 if [[ -v metafiles["$filename.meta.json"] ]]; then
29 files+=($filename)
30 fi
31done
32
33echo "Found ${#files[@]} relevant KV6 Parquet files"
34echo "Synching this directory with these files"
35
36printf "%s\n" "${files[@]}" | rclone copy \
37 --s3-provider "$S3_PROVIDER" \
38 --s3-region "$S3_REGION" \
39 --s3-endpoint "$S3_ENDPOINT" \
40 --s3-env-auth \
41 --progress \
42 --files-from - \
43 :s3:$S3_BUCKET ./