diff options
Diffstat (limited to 'script')
-rw-r--r-- | script/archiver/default.nix | 15 | ||||
-rwxr-xr-x | script/archiver/oeuf-archiver.sh | 31 | ||||
-rw-r--r-- | script/synckv6/default.nix | 15 | ||||
-rwxr-xr-x | script/synckv6/oeuf-synckv6.sh | 43 |
4 files changed, 104 insertions, 0 deletions
diff --git a/script/archiver/default.nix b/script/archiver/default.nix new file mode 100644 index 0000000..4a464e0 --- /dev/null +++ b/script/archiver/default.nix | |||
@@ -0,0 +1,15 @@ | |||
1 | { pkgs ? import <nixpkgs> { } }: with pkgs; | ||
2 | |||
3 | stdenv.mkDerivation { | ||
4 | name = "oeuf-archiver"; | ||
5 | src = ./.; | ||
6 | |||
7 | buildInputs = [ bash rclone oeuf-bundleparquet ]; | ||
8 | nativeBuildInputs = [ makeWrapper ]; | ||
9 | installPhase = '' | ||
10 | mkdir -p $out/bin | ||
11 | cp oeuf-archiver.sh $out/bin/oeuf-archiver | ||
12 | wrapProgram $out/bin/oeuf-archiver \ | ||
13 | --prefix PATH : ${lib.makeBinPath [ bash rclone oeuf-bundleparquet ]} | ||
14 | ''; | ||
15 | } | ||
diff --git a/script/archiver/oeuf-archiver.sh b/script/archiver/oeuf-archiver.sh new file mode 100755 index 0000000..478d4d9 --- /dev/null +++ b/script/archiver/oeuf-archiver.sh | |||
@@ -0,0 +1,31 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | set -eux | ||
4 | set -o pipefail | ||
5 | |||
6 | # This option prevents the loop from running | ||
7 | # if it does not match any files | ||
8 | shopt -s nullglob | ||
9 | |||
10 | oeuf-bundleparquet | ||
11 | |||
12 | export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" | ||
13 | set +x # Don't print the secret access key to the log | ||
14 | export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" | ||
15 | set -x | ||
16 | |||
17 | for file in ./merged/oeuf-*.parquet; do | ||
18 | rclone move \ | ||
19 | --s3-provider "$S3_PROVIDER" \ | ||
20 | --s3-region "$S3_REGION" \ | ||
21 | --s3-endpoint "$S3_ENDPOINT" \ | ||
22 | --s3-env-auth \ | ||
23 | $file.meta.json :s3:$S3_BUCKET \ | ||
24 | && \ | ||
25 | rclone move \ | ||
26 | --s3-provider "$S3_PROVIDER" \ | ||
27 | --s3-region "$S3_REGION" \ | ||
28 | --s3-endpoint "$S3_ENDPOINT" \ | ||
29 | --s3-env-auth \ | ||
30 | $file :s3:$S3_BUCKET | ||
31 | done | ||
diff --git a/script/synckv6/default.nix b/script/synckv6/default.nix new file mode 100644 index 0000000..95a9331 --- /dev/null +++ b/script/synckv6/default.nix | |||
@@ -0,0 +1,15 @@ | |||
1 | { pkgs ? import <nixpkgs> { } }: with pkgs; | ||
2 | |||
3 | stdenv.mkDerivation { | ||
4 | name = "oeuf-synckv6"; | ||
5 | src = ./.; | ||
6 | |||
7 | buildInputs = [ bash rclone ]; | ||
8 | nativeBuildInputs = [ makeWrapper ]; | ||
9 | installPhase = '' | ||
10 | mkdir -p $out/bin | ||
11 | cp oeuf-synckv6.sh $out/bin/oeuf-synckv6 | ||
12 | wrapProgram $out/bin/oeuf-synckv6 \ | ||
13 | --prefix PATH : ${lib.makeBinPath [ bash rclone ]} | ||
14 | ''; | ||
15 | } | ||
diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh new file mode 100755 index 0000000..6b24347 --- /dev/null +++ b/script/synckv6/oeuf-synckv6.sh | |||
@@ -0,0 +1,43 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | set -eu | ||
4 | set -o pipefail | ||
5 | |||
6 | export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" | ||
7 | export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" | ||
8 | |||
9 | set +x | ||
10 | all_files=() | ||
11 | declare -A metafiles | ||
12 | |||
13 | while IFS=' ' read -r size filename; do | ||
14 | if [[ "$filename" == *.parquet.meta.json ]]; then | ||
15 | metafiles["$filename"]=1 | ||
16 | else | ||
17 | all_files+=($filename) | ||
18 | fi | ||
19 | done < <(rclone ls \ | ||
20 | --s3-provider "$S3_PROVIDER" \ | ||
21 | --s3-region "$S3_REGION" \ | ||
22 | --s3-endpoint "$S3_ENDPOINT" \ | ||
23 | --s3-env-auth \ | ||
24 | :s3:$S3_BUCKET) | ||
25 | |||
26 | files=() | ||
27 | for filename in "${all_files[@]}"; do | ||
28 | if [[ -v metafiles["$filename.meta.json"] ]]; then | ||
29 | files+=($filename) | ||
30 | fi | ||
31 | done | ||
32 | |||
33 | echo "Found ${#files[@]} relevant KV6 Parquet files" | ||
34 | echo "Synching this directory with these files" | ||
35 | |||
36 | printf "%s\n" "${files[@]}" | rclone copy \ | ||
37 | --s3-provider "$S3_PROVIDER" \ | ||
38 | --s3-region "$S3_REGION" \ | ||
39 | --s3-endpoint "$S3_ENDPOINT" \ | ||
40 | --s3-env-auth \ | ||
41 | --progress \ | ||
42 | --files-from - \ | ||
43 | :s3:$S3_BUCKET ./ | ||