diff options
author | Rutger Broekhoff | 2024-05-02 20:27:40 +0200 |
---|---|---|
committer | Rutger Broekhoff | 2024-05-02 20:27:40 +0200 |
commit | 17a3ea880402338420699e03bcb24181e4ff3924 (patch) | |
tree | da666ef91e0b60d20aa0b01529644c136fd1f4ab /script/synckv6 | |
download | oeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip |
Initial commit
Based on dc4ba6a
Diffstat (limited to 'script/synckv6')
-rw-r--r-- | script/synckv6/default.nix | 15 | ||||
-rwxr-xr-x | script/synckv6/oeuf-synckv6.sh | 43 |
2 files changed, 58 insertions, 0 deletions
diff --git a/script/synckv6/default.nix b/script/synckv6/default.nix new file mode 100644 index 0000000..95a9331 --- /dev/null +++ b/script/synckv6/default.nix | |||
@@ -0,0 +1,15 @@ | |||
1 | { pkgs ? import <nixpkgs> { } }: with pkgs; | ||
2 | |||
3 | stdenv.mkDerivation { | ||
4 | name = "oeuf-synckv6"; | ||
5 | src = ./.; | ||
6 | |||
7 | buildInputs = [ bash rclone ]; | ||
8 | nativeBuildInputs = [ makeWrapper ]; | ||
9 | installPhase = '' | ||
10 | mkdir -p $out/bin | ||
11 | cp oeuf-synckv6.sh $out/bin/oeuf-synckv6 | ||
12 | wrapProgram $out/bin/oeuf-synckv6 \ | ||
13 | --prefix PATH : ${lib.makeBinPath [ bash rclone ]} | ||
14 | ''; | ||
15 | } | ||
diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh new file mode 100755 index 0000000..6b24347 --- /dev/null +++ b/script/synckv6/oeuf-synckv6.sh | |||
@@ -0,0 +1,43 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | set -eu | ||
4 | set -o pipefail | ||
5 | |||
6 | export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" | ||
7 | export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" | ||
8 | |||
9 | set +x | ||
10 | all_files=() | ||
11 | declare -A metafiles | ||
12 | |||
13 | while IFS=' ' read -r size filename; do | ||
14 | if [[ "$filename" == *.parquet.meta.json ]]; then | ||
15 | metafiles["$filename"]=1 | ||
16 | else | ||
17 | all_files+=($filename) | ||
18 | fi | ||
19 | done < <(rclone ls \ | ||
20 | --s3-provider "$S3_PROVIDER" \ | ||
21 | --s3-region "$S3_REGION" \ | ||
22 | --s3-endpoint "$S3_ENDPOINT" \ | ||
23 | --s3-env-auth \ | ||
24 | :s3:$S3_BUCKET) | ||
25 | |||
26 | files=() | ||
27 | for filename in "${all_files[@]}"; do | ||
28 | if [[ -v metafiles["$filename.meta.json"] ]]; then | ||
29 | files+=($filename) | ||
30 | fi | ||
31 | done | ||
32 | |||
33 | echo "Found ${#files[@]} relevant KV6 Parquet files" | ||
34 | echo "Synching this directory with these files" | ||
35 | |||
36 | printf "%s\n" "${files[@]}" | rclone copy \ | ||
37 | --s3-provider "$S3_PROVIDER" \ | ||
38 | --s3-region "$S3_REGION" \ | ||
39 | --s3-endpoint "$S3_ENDPOINT" \ | ||
40 | --s3-env-auth \ | ||
41 | --progress \ | ||
42 | --files-from - \ | ||
43 | :s3:$S3_BUCKET ./ | ||