From 17a3ea880402338420699e03bcb24181e4ff3924 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Thu, 2 May 2024 20:27:40 +0200 Subject: Initial commit Based on dc4ba6a --- script/synckv6/default.nix | 15 +++++++++++++++ script/synckv6/oeuf-synckv6.sh | 43 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 script/synckv6/default.nix create mode 100755 script/synckv6/oeuf-synckv6.sh (limited to 'script/synckv6') diff --git a/script/synckv6/default.nix b/script/synckv6/default.nix new file mode 100644 index 0000000..95a9331 --- /dev/null +++ b/script/synckv6/default.nix @@ -0,0 +1,15 @@ +{ pkgs ? import { } }: with pkgs; + +stdenv.mkDerivation { + name = "oeuf-synckv6"; + src = ./.; + + buildInputs = [ bash rclone ]; + nativeBuildInputs = [ makeWrapper ]; + installPhase = '' + mkdir -p $out/bin + cp oeuf-synckv6.sh $out/bin/oeuf-synckv6 + wrapProgram $out/bin/oeuf-synckv6 \ + --prefix PATH : ${lib.makeBinPath [ bash rclone ]} + ''; +} diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh new file mode 100755 index 0000000..6b24347 --- /dev/null +++ b/script/synckv6/oeuf-synckv6.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +set -eu +set -o pipefail + +export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" +export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" + +set +x +all_files=() +declare -A metafiles + +while IFS=' ' read -r size filename; do + if [[ "$filename" == *.parquet.meta.json ]]; then + metafiles["$filename"]=1 + else + all_files+=($filename) + fi +done < <(rclone ls \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + :s3:$S3_BUCKET) + +files=() +for filename in "${all_files[@]}"; do + if [[ -v metafiles["$filename.meta.json"] ]]; then + files+=($filename) + fi +done + +echo "Found ${#files[@]} relevant KV6 Parquet files" +echo "Synching this directory with these files" + +printf "%s\n" "${files[@]}" | rclone copy \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + --progress \ + --files-from - \ + :s3:$S3_BUCKET ./ -- cgit v1.2.3