From 17a3ea880402338420699e03bcb24181e4ff3924 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Thu, 2 May 2024 20:27:40 +0200 Subject: Initial commit Based on dc4ba6a --- script/archiver/default.nix | 15 ++++++++++++++ script/archiver/oeuf-archiver.sh | 31 +++++++++++++++++++++++++++++ script/synckv6/default.nix | 15 ++++++++++++++ script/synckv6/oeuf-synckv6.sh | 43 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+) create mode 100644 script/archiver/default.nix create mode 100755 script/archiver/oeuf-archiver.sh create mode 100644 script/synckv6/default.nix create mode 100755 script/synckv6/oeuf-synckv6.sh (limited to 'script') diff --git a/script/archiver/default.nix b/script/archiver/default.nix new file mode 100644 index 0000000..4a464e0 --- /dev/null +++ b/script/archiver/default.nix @@ -0,0 +1,15 @@ +{ pkgs ? import { } }: with pkgs; + +stdenv.mkDerivation { + name = "oeuf-archiver"; + src = ./.; + + buildInputs = [ bash rclone oeuf-bundleparquet ]; + nativeBuildInputs = [ makeWrapper ]; + installPhase = '' + mkdir -p $out/bin + cp oeuf-archiver.sh $out/bin/oeuf-archiver + wrapProgram $out/bin/oeuf-archiver \ + --prefix PATH : ${lib.makeBinPath [ bash rclone oeuf-bundleparquet ]} + ''; +} diff --git a/script/archiver/oeuf-archiver.sh b/script/archiver/oeuf-archiver.sh new file mode 100755 index 0000000..478d4d9 --- /dev/null +++ b/script/archiver/oeuf-archiver.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -eux +set -o pipefail + +# This option prevents the loop from running +# if it does not match any files +shopt -s nullglob + +oeuf-bundleparquet + +export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" +set +x # Don't print the secret access key to the log +export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" +set -x + +for file in ./merged/oeuf-*.parquet; do + rclone move \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + $file.meta.json :s3:$S3_BUCKET \ + && \ + rclone move \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + $file :s3:$S3_BUCKET +done diff --git a/script/synckv6/default.nix b/script/synckv6/default.nix new file mode 100644 index 0000000..95a9331 --- /dev/null +++ b/script/synckv6/default.nix @@ -0,0 +1,15 @@ +{ pkgs ? import { } }: with pkgs; + +stdenv.mkDerivation { + name = "oeuf-synckv6"; + src = ./.; + + buildInputs = [ bash rclone ]; + nativeBuildInputs = [ makeWrapper ]; + installPhase = '' + mkdir -p $out/bin + cp oeuf-synckv6.sh $out/bin/oeuf-synckv6 + wrapProgram $out/bin/oeuf-synckv6 \ + --prefix PATH : ${lib.makeBinPath [ bash rclone ]} + ''; +} diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh new file mode 100755 index 0000000..6b24347 --- /dev/null +++ b/script/synckv6/oeuf-synckv6.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +set -eu +set -o pipefail + +export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" +export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" + +set +x +all_files=() +declare -A metafiles + +while IFS=' ' read -r size filename; do + if [[ "$filename" == *.parquet.meta.json ]]; then + metafiles["$filename"]=1 + else + all_files+=($filename) + fi +done < <(rclone ls \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + :s3:$S3_BUCKET) + +files=() +for filename in "${all_files[@]}"; do + if [[ -v metafiles["$filename.meta.json"] ]]; then + files+=($filename) + fi +done + +echo "Found ${#files[@]} relevant KV6 Parquet files" +echo "Synching this directory with these files" + +printf "%s\n" "${files[@]}" | rclone copy \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + --progress \ + --files-from - \ + :s3:$S3_BUCKET ./ -- cgit v1.2.3