From 17a3ea880402338420699e03bcb24181e4ff3924 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Thu, 2 May 2024 20:27:40 +0200 Subject: Initial commit Based on dc4ba6a --- script/synckv6/oeuf-synckv6.sh | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 script/synckv6/oeuf-synckv6.sh (limited to 'script/synckv6/oeuf-synckv6.sh') diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh new file mode 100755 index 0000000..6b24347 --- /dev/null +++ b/script/synckv6/oeuf-synckv6.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +set -eu +set -o pipefail + +export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" +export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" + +set +x +all_files=() +declare -A metafiles + +while IFS=' ' read -r size filename; do + if [[ "$filename" == *.parquet.meta.json ]]; then + metafiles["$filename"]=1 + else + all_files+=($filename) + fi +done < <(rclone ls \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + :s3:$S3_BUCKET) + +files=() +for filename in "${all_files[@]}"; do + if [[ -v metafiles["$filename.meta.json"] ]]; then + files+=($filename) + fi +done + +echo "Found ${#files[@]} relevant KV6 Parquet files" +echo "Synching this directory with these files" + +printf "%s\n" "${files[@]}" | rclone copy \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + --progress \ + --files-from - \ + :s3:$S3_BUCKET ./ -- cgit v1.2.3