blob: 6b24347ed7043937d200519da9654dc0d30df23b (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
#!/usr/bin/env bash
set -eu
set -o pipefail
export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID"
export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY"
set +x
all_files=()
declare -A metafiles
while IFS=' ' read -r size filename; do
if [[ "$filename" == *.parquet.meta.json ]]; then
metafiles["$filename"]=1
else
all_files+=($filename)
fi
done < <(rclone ls \
--s3-provider "$S3_PROVIDER" \
--s3-region "$S3_REGION" \
--s3-endpoint "$S3_ENDPOINT" \
--s3-env-auth \
:s3:$S3_BUCKET)
files=()
for filename in "${all_files[@]}"; do
if [[ -v metafiles["$filename.meta.json"] ]]; then
files+=($filename)
fi
done
echo "Found ${#files[@]} relevant KV6 Parquet files"
echo "Synching this directory with these files"
printf "%s\n" "${files[@]}" | rclone copy \
--s3-provider "$S3_PROVIDER" \
--s3-region "$S3_REGION" \
--s3-endpoint "$S3_ENDPOINT" \
--s3-env-auth \
--progress \
--files-from - \
:s3:$S3_BUCKET ./
|