From a7f9c8de31231b9fd9c67c57db659f7b01f1a3b0 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Mon, 29 Apr 2024 19:18:56 +0200 Subject: Rename crates (and therefore commands) --- Cargo.lock | 70 +-- Cargo.toml | 8 +- common/Cargo.toml | 10 - common/src/lib.rs | 348 ----------- docs/man/gitolfs3-authenticate.1 | 4 +- docs/man/gitolfs3-server.1 | 4 +- docs/man/gitolfs3-shell.1 | 4 +- git-lfs-authenticate/Cargo.toml | 10 - git-lfs-authenticate/src/main.rs | 133 ----- gitolfs3-authenticate/Cargo.toml | 10 + gitolfs3-authenticate/src/main.rs | 134 +++++ gitolfs3-common/Cargo.toml | 10 + gitolfs3-common/src/lib.rs | 348 +++++++++++ gitolfs3-server/Cargo.toml | 19 + gitolfs3-server/src/main.rs | 1154 +++++++++++++++++++++++++++++++++++++ gitolfs3-shell/Cargo.toml | 6 + gitolfs3-shell/src/main.rs | 145 +++++ server/Cargo.toml | 19 - server/src/main.rs | 1151 ------------------------------------ shell/Cargo.toml | 6 - shell/src/main.rs | 143 ----- 21 files changed, 1871 insertions(+), 1865 deletions(-) delete mode 100644 common/Cargo.toml delete mode 100644 common/src/lib.rs delete mode 100644 git-lfs-authenticate/Cargo.toml delete mode 100644 git-lfs-authenticate/src/main.rs create mode 100644 gitolfs3-authenticate/Cargo.toml create mode 100644 gitolfs3-authenticate/src/main.rs create mode 100644 gitolfs3-common/Cargo.toml create mode 100644 gitolfs3-common/src/lib.rs create mode 100644 gitolfs3-server/Cargo.toml create mode 100644 gitolfs3-server/src/main.rs create mode 100644 gitolfs3-shell/Cargo.toml create mode 100644 gitolfs3-shell/src/main.rs delete mode 100644 server/Cargo.toml delete mode 100644 server/src/main.rs delete mode 100644 shell/Cargo.toml delete mode 100644 shell/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 5f04d2d..00f856e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -624,16 +624,6 @@ dependencies = [ "windows-targets 0.52.5", ] -[[package]] -name = "common" -version = "0.1.0" -dependencies = [ - "chrono", - "hmac-sha256", - "serde", - "subtle", -] - [[package]] name = "const-oid" version = "0.9.6" @@ -887,15 +877,48 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] -name = "git-lfs-authenticate" +name = "gitolfs3-authenticate" version = "0.1.0" dependencies = [ "anyhow", "chrono", - "common", + "gitolfs3-common", + "serde_json", +] + +[[package]] +name = "gitolfs3-common" +version = "0.1.0" +dependencies = [ + "chrono", + "hmac-sha256", + "serde", + "subtle", +] + +[[package]] +name = "gitolfs3-server" +version = "0.1.0" +dependencies = [ + "aws-config", + "aws-sdk-s3", + "axum", + "base64", + "chrono", + "gitolfs3-common", + "mime", + "serde", "serde_json", + "tokio", + "tokio-util", + "tower", + "tracing-subscriber", ] +[[package]] +name = "gitolfs3-shell" +version = "0.1.0" + [[package]] name = "group" version = "0.12.1" @@ -1726,25 +1749,6 @@ dependencies = [ "serde", ] -[[package]] -name = "server" -version = "0.1.0" -dependencies = [ - "aws-config", - "aws-sdk-s3", - "axum", - "base64", - "chrono", - "common", - "mime", - "serde", - "serde_json", - "tokio", - "tokio-util", - "tower", - "tracing-subscriber", -] - [[package]] name = "sha1" version = "0.10.6" @@ -1776,10 +1780,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "shell" -version = "0.1.0" - [[package]] name = "signal-hook-registry" version = "1.4.2" diff --git a/Cargo.toml b/Cargo.toml index 6439e6b..2fac4ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,8 @@ [workspace] resolver = "2" members = [ - "common", - "git-lfs-authenticate", - "server", - "shell", + "gitolfs3-common", + "gitolfs3-authenticate", + "gitolfs3-server", + "gitolfs3-shell", ] diff --git a/common/Cargo.toml b/common/Cargo.toml deleted file mode 100644 index 20d9bdd..0000000 --- a/common/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "common" -version = "0.1.0" -edition = "2021" - -[dependencies] -chrono = "0.4" -hmac-sha256 = "1.1" -subtle = "2.5" -serde = { version = "1", features = ["derive"] } diff --git a/common/src/lib.rs b/common/src/lib.rs deleted file mode 100644 index 917f566..0000000 --- a/common/src/lib.rs +++ /dev/null @@ -1,348 +0,0 @@ -use chrono::{DateTime, Utc}; -use serde::{de, Deserialize, Serialize}; -use std::{ - fmt::{self, Write}, - ops, - str::FromStr, -}; -use subtle::ConstantTimeEq; - -#[repr(u8)] -enum AuthType { - BatchApi = 1, - Download = 2, -} - -#[derive(Debug, Copy, Clone)] -pub struct Claims<'a> { - pub specific_claims: SpecificClaims, - pub repo_path: &'a str, - pub expires_at: DateTime, -} - -#[derive(Debug, Copy, Clone)] -pub enum SpecificClaims { - BatchApi(Operation), - Download(Oid), -} - -pub type Oid = Digest<32>; - -#[derive(Debug, Eq, PartialEq, Copy, Clone, Serialize, Deserialize)] -#[repr(u8)] -pub enum Operation { - #[serde(rename = "download")] - Download = 1, - #[serde(rename = "upload")] - Upload = 2, -} - -/// Returns None if the claims are invalid. Repo path length may be no more than 100 bytes. -pub fn generate_tag(claims: Claims, key: impl AsRef<[u8]>) -> Option> { - if claims.repo_path.len() > 100 { - return None; - } - - let mut hmac = hmac_sha256::HMAC::new(key); - match claims.specific_claims { - SpecificClaims::BatchApi(operation) => { - hmac.update([AuthType::BatchApi as u8]); - hmac.update([operation as u8]); - } - SpecificClaims::Download(oid) => { - hmac.update([AuthType::Download as u8]); - hmac.update(oid.as_bytes()); - } - } - hmac.update([claims.repo_path.len() as u8]); - hmac.update(claims.repo_path.as_bytes()); - hmac.update(claims.expires_at.timestamp().to_be_bytes()); - Some(hmac.finalize().into()) -} - -#[derive(Debug, PartialEq, Eq, Copy, Clone)] -pub struct ParseOperationError; - -impl fmt::Display for ParseOperationError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "operation should be 'download' or 'upload'") - } -} - -impl FromStr for Operation { - type Err = ParseOperationError; - - fn from_str(s: &str) -> Result { - match s { - "upload" => Ok(Self::Upload), - "download" => Ok(Self::Download), - _ => Err(ParseOperationError), - } - } -} - -/// None means out of range. -fn decode_nibble(c: u8) -> Option { - if c.is_ascii_digit() { - Some(c - b'0') - } else if (b'a'..=b'f').contains(&c) { - Some(c - b'a' + 10) - } else if (b'A'..=b'F').contains(&c) { - Some(c - b'A' + 10) - } else { - None - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct HexByte(pub u8); - -impl<'de> Deserialize<'de> for HexByte { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let str = <&str>::deserialize(deserializer)?; - let &[b1, b2] = str.as_bytes() else { - return Err(de::Error::invalid_length( - str.len(), - &"two hexadecimal characters", - )); - }; - let (Some(b1), Some(b2)) = (decode_nibble(b1), decode_nibble(b2)) else { - return Err(de::Error::invalid_value( - de::Unexpected::Str(str), - &"two hexadecimal characters", - )); - }; - Ok(HexByte((b1 << 4) | b2)) - } -} - -impl fmt::Display for HexByte { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let &HexByte(b) = self; - HexFmt(&[b]).fmt(f) - } -} - -#[derive(Debug, PartialEq, Eq, Copy, Clone)] -pub enum ParseHexError { - UnevenNibbles, - InvalidCharacter, - TooShort, - TooLong, -} - -impl fmt::Display for ParseHexError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::UnevenNibbles => { - write!(f, "uneven amount of nibbles (chars in range [a-zA-Z0-9])") - } - Self::InvalidCharacter => write!(f, "non-hex character encountered"), - Self::TooShort => write!(f, "unexpected end of hex sequence"), - Self::TooLong => write!(f, "longer hex sequence than expected"), - } - } -} - -#[derive(Debug)] -pub enum ReadHexError { - Io(std::io::Error), - Format(ParseHexError), -} - -impl fmt::Display for ReadHexError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::Io(e) => e.fmt(f), - Self::Format(e) => e.fmt(f), - } - } -} - -fn parse_hex_exact(value: &str, buf: &mut [u8]) -> Result<(), ParseHexError> { - if value.bytes().len() % 2 == 1 { - return Err(ParseHexError::UnevenNibbles); - } - if value.bytes().len() < 2 * buf.len() { - return Err(ParseHexError::TooShort); - } - if value.bytes().len() > 2 * buf.len() { - return Err(ParseHexError::TooLong); - } - for (i, c) in value.bytes().enumerate() { - if let Some(b) = decode_nibble(c) { - if i % 2 == 0 { - buf[i / 2] = b << 4; - } else { - buf[i / 2] |= b; - } - } else { - return Err(ParseHexError::InvalidCharacter); - } - } - Ok(()) -} - -pub type Key = SafeByteArray<64>; - -pub fn load_key(path: &str) -> Result { - let key_str = std::fs::read_to_string(path).map_err(ReadHexError::Io)?; - key_str.trim().parse().map_err(ReadHexError::Format) -} - -pub struct SafeByteArray { - inner: [u8; N], -} - -impl SafeByteArray { - pub fn new() -> Self { - Self { inner: [0; N] } - } -} - -impl Default for SafeByteArray { - fn default() -> Self { - Self::new() - } -} - -impl AsRef<[u8]> for SafeByteArray { - fn as_ref(&self) -> &[u8] { - &self.inner - } -} - -impl AsMut<[u8]> for SafeByteArray { - fn as_mut(&mut self) -> &mut [u8] { - &mut self.inner - } -} - -impl Drop for SafeByteArray { - fn drop(&mut self) { - self.inner.fill(0) - } -} - -impl FromStr for SafeByteArray { - type Err = ParseHexError; - - fn from_str(value: &str) -> Result { - let mut sba = Self { inner: [0u8; N] }; - parse_hex_exact(value, &mut sba.inner)?; - Ok(sba) - } -} - -pub struct HexFmt>(pub B); - -impl> fmt::Display for HexFmt { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let HexFmt(buf) = self; - for b in buf.as_ref() { - let (high, low) = (b >> 4, b & 0xF); - let highc = if high < 10 { - high + b'0' - } else { - high - 10 + b'a' - }; - let lowc = if low < 10 { - low + b'0' - } else { - low - 10 + b'a' - }; - f.write_char(highc as char)?; - f.write_char(lowc as char)?; - } - Ok(()) - } -} - -#[derive(Debug, Copy, Clone)] -pub struct Digest { - inner: [u8; N], -} - -impl ops::Index for Digest { - type Output = u8; - - fn index(&self, index: usize) -> &Self::Output { - &self.inner[index] - } -} - -impl Digest { - pub fn as_bytes(&self) -> &[u8; N] { - &self.inner - } -} - -impl fmt::Display for Digest { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - HexFmt(&self.inner).fmt(f) - } -} - -impl Digest { - pub fn new(data: [u8; N]) -> Self { - Self { inner: data } - } -} - -impl From<[u8; N]> for Digest { - fn from(value: [u8; N]) -> Self { - Self::new(value) - } -} - -impl From> for [u8; N] { - fn from(val: Digest) -> Self { - val.inner - } -} - -impl FromStr for Digest { - type Err = ParseHexError; - - fn from_str(value: &str) -> Result { - let mut buf = [0u8; N]; - parse_hex_exact(value, &mut buf)?; - Ok(buf.into()) - } -} - -impl ConstantTimeEq for Digest { - fn ct_eq(&self, other: &Self) -> subtle::Choice { - self.inner.ct_eq(&other.inner) - } -} - -impl PartialEq for Digest { - fn eq(&self, other: &Self) -> bool { - self.ct_eq(other).into() - } -} - -impl Eq for Digest {} - -impl<'de, const N: usize> Deserialize<'de> for Digest { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let hex = <&str>::deserialize(deserializer)?; - Digest::from_str(hex).map_err(de::Error::custom) - } -} - -impl Serialize for Digest { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - serializer.serialize_str(&format!("{self}")) - } -} diff --git a/docs/man/gitolfs3-authenticate.1 b/docs/man/gitolfs3-authenticate.1 index 67151e1..38bf28f 100644 --- a/docs/man/gitolfs3-authenticate.1 +++ b/docs/man/gitolfs3-authenticate.1 @@ -1,8 +1,8 @@ .TH GITOLFS3-AUTHENTICATE 1 2024-04-29 Gitolfs3 "Gitolfs3 Manual" .SH NAME -git-lfs-authenticate \- Git LFS authentication agent for Gitolfs3 +gitolfs3-authenticate \- Git LFS authentication agent for Gitolfs3 .SH SYNOPSIS -.B git-lfs-authenticate +.B gitolfs3-authenticate upload/download .SH DESCRIPTION diff --git a/docs/man/gitolfs3-server.1 b/docs/man/gitolfs3-server.1 index 4f384cf..f141bce 100644 --- a/docs/man/gitolfs3-server.1 +++ b/docs/man/gitolfs3-server.1 @@ -1,8 +1,8 @@ .TH GITOLFS3-SERVER 1 2024-04-29 Gitolfs3 "Gitolfs3 Manual" .SH NAME -server \- Gitolfs3 Git LFS server +gitolfs3-server \- Gitolfs3 Git LFS server .SH SYNOPSIS -.B server +.B gitolfs3-server .SH DESCRIPTION .B server is the Gitolfs3 Git LFS server. It is primarily configured using environment diff --git a/docs/man/gitolfs3-shell.1 b/docs/man/gitolfs3-shell.1 index 56eac31..a626682 100644 --- a/docs/man/gitolfs3-shell.1 +++ b/docs/man/gitolfs3-shell.1 @@ -1,8 +1,8 @@ .TH GITOLFS3-SHELL 1 2024-04-29 Gitolfs3 "Gitolfs3 Manual" .SH NAME -shell \- Gitolfs3 shell +gitolfs3-shell \- Gitolfs3 shell .SH SYNOPSIS -.B shell +.B gitolfs3-shell -c .SH DESCRIPTION .B shell diff --git a/git-lfs-authenticate/Cargo.toml b/git-lfs-authenticate/Cargo.toml deleted file mode 100644 index 15feba8..0000000 --- a/git-lfs-authenticate/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "git-lfs-authenticate" -version = "0.1.0" -edition = "2021" - -[dependencies] -anyhow = "1.0" -chrono = "0.4" -common = { path = "../common" } -serde_json = "1" diff --git a/git-lfs-authenticate/src/main.rs b/git-lfs-authenticate/src/main.rs deleted file mode 100644 index c9094a1..0000000 --- a/git-lfs-authenticate/src/main.rs +++ /dev/null @@ -1,133 +0,0 @@ -use anyhow::{anyhow, bail, Result}; -use chrono::Utc; -use serde_json::json; -use std::{process::ExitCode, time::Duration}; - -fn main() -> ExitCode { - let config = match Config::load() { - Ok(config) => config, - Err(e) => { - eprintln!("Error: {e}"); - return ExitCode::from(2); - } - }; - - let (repo_name, operation) = match parse_cmdline() { - Ok(args) => args, - Err(e) => { - eprintln!("Error: {e}\n"); - eprintln!("Usage: git-lfs-authenticate upload/download"); - // Exit code 2 signifies bad usage of CLI. - return ExitCode::from(2); - } - }; - - if !repo_exists(&repo_name) { - eprintln!("Error: repository does not exist"); - return ExitCode::FAILURE; - } - - let expires_at = Utc::now() + Duration::from_secs(5 * 60); - let Some(tag) = common::generate_tag( - common::Claims { - specific_claims: common::SpecificClaims::BatchApi(operation), - repo_path: &repo_name, - expires_at, - }, - config.key, - ) else { - eprintln!("Failed to generate validation tag"); - return ExitCode::FAILURE; - }; - - let response = json!({ - "header": { - "Authorization": format!( - "Gitolfs3-Hmac-Sha256 {tag} {}", - expires_at.timestamp() - ), - }, - "expires_at": expires_at.to_rfc3339_opts(chrono::SecondsFormat::Secs, true), - "href": format!("{}{}/info/lfs", config.href_base, repo_name), - }); - println!("{}", response); - - ExitCode::SUCCESS -} - -struct Config { - href_base: String, - key: common::Key, -} - -impl Config { - fn load() -> Result { - let Ok(href_base) = std::env::var("GITOLFS3_HREF_BASE") else { - bail!("configured base URL not provided"); - }; - if !href_base.ends_with('/') { - bail!("configured base URL does not end with a slash"); - } - - let Ok(key_path) = std::env::var("GITOLFS3_KEY_PATH") else { - bail!("key path not provided"); - }; - let key = common::load_key(&key_path).map_err(|e| anyhow!("failed to load key: {e}"))?; - - Ok(Self { href_base, key }) - } -} - -fn parse_cmdline() -> Result<(String, common::Operation)> { - let [repo_path, op_str] = get_cmdline_args::<2>()?; - let op: common::Operation = op_str - .parse() - .map_err(|e| anyhow!("unknown operation: {e}"))?; - validate_repo_path(&repo_path).map_err(|e| anyhow!("invalid repository name: {e}"))?; - Ok((repo_path.to_string(), op)) -} - -fn get_cmdline_args() -> Result<[String; N]> { - let args = std::env::args(); - if args.len() - 1 != N { - bail!("got {} argument(s), expected {}", args.len() - 1, N); - } - - // Does not allocate. - const EMPTY_STRING: String = String::new(); - let mut values = [EMPTY_STRING; N]; - - // Skip the first element; we do not care about the program name. - for (i, arg) in args.skip(1).enumerate() { - values[i] = arg - } - Ok(values) -} - -fn validate_repo_path(path: &str) -> Result<()> { - if path.len() > 100 { - bail!("too long (more than 100 characters)"); - } - if path.contains("//") - || path.contains("/./") - || path.contains("/../") - || path.starts_with("./") - || path.starts_with("../") - { - bail!("contains one or more path elements '.' and '..'"); - } - if path.starts_with('/') { - bail!("starts with '/', which is not allowed"); - } - if !path.ends_with(".git") { - bail!("missed '.git' suffix"); - } - Ok(()) -} - -fn repo_exists(name: &str) -> bool { - match std::fs::metadata(name) { - Ok(metadata) => metadata.is_dir(), - _ => false, - } -} diff --git a/gitolfs3-authenticate/Cargo.toml b/gitolfs3-authenticate/Cargo.toml new file mode 100644 index 0000000..5725abc --- /dev/null +++ b/gitolfs3-authenticate/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "gitolfs3-authenticate" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0" +chrono = "0.4" +gitolfs3-common = { path = "../gitolfs3-common" } +serde_json = "1" diff --git a/gitolfs3-authenticate/src/main.rs b/gitolfs3-authenticate/src/main.rs new file mode 100644 index 0000000..771f185 --- /dev/null +++ b/gitolfs3-authenticate/src/main.rs @@ -0,0 +1,134 @@ +use anyhow::{anyhow, bail, Result}; +use chrono::Utc; +use gitolfs3_common::{generate_tag, load_key, Claims, Key, Operation, SpecificClaims}; +use serde_json::json; +use std::{process::ExitCode, time::Duration}; + +fn main() -> ExitCode { + let config = match Config::load() { + Ok(config) => config, + Err(e) => { + eprintln!("Error: {e}"); + return ExitCode::from(2); + } + }; + + let (repo_name, operation) = match parse_cmdline() { + Ok(args) => args, + Err(e) => { + eprintln!("Error: {e}\n"); + eprintln!("Usage: git-lfs-authenticate upload/download"); + // Exit code 2 signifies bad usage of CLI. + return ExitCode::from(2); + } + }; + + if !repo_exists(&repo_name) { + eprintln!("Error: repository does not exist"); + return ExitCode::FAILURE; + } + + let expires_at = Utc::now() + Duration::from_secs(5 * 60); + let Some(tag) = generate_tag( + Claims { + specific_claims: SpecificClaims::BatchApi(operation), + repo_path: &repo_name, + expires_at, + }, + config.key, + ) else { + eprintln!("Failed to generate validation tag"); + return ExitCode::FAILURE; + }; + + let response = json!({ + "header": { + "Authorization": format!( + "Gitolfs3-Hmac-Sha256 {tag} {}", + expires_at.timestamp() + ), + }, + "expires_at": expires_at.to_rfc3339_opts(chrono::SecondsFormat::Secs, true), + "href": format!("{}{}/info/lfs", config.href_base, repo_name), + }); + println!("{}", response); + + ExitCode::SUCCESS +} + +struct Config { + href_base: String, + key: Key, +} + +impl Config { + fn load() -> Result { + let Ok(href_base) = std::env::var("GITOLFS3_HREF_BASE") else { + bail!("configured base URL not provided"); + }; + if !href_base.ends_with('/') { + bail!("configured base URL does not end with a slash"); + } + + let Ok(key_path) = std::env::var("GITOLFS3_KEY_PATH") else { + bail!("key path not provided"); + }; + let key = load_key(&key_path).map_err(|e| anyhow!("failed to load key: {e}"))?; + + Ok(Self { href_base, key }) + } +} + +fn parse_cmdline() -> Result<(String, Operation)> { + let [repo_path, op_str] = get_cmdline_args::<2>()?; + let op: Operation = op_str + .parse() + .map_err(|e| anyhow!("unknown operation: {e}"))?; + validate_repo_path(&repo_path).map_err(|e| anyhow!("invalid repository name: {e}"))?; + Ok((repo_path.to_string(), op)) +} + +fn get_cmdline_args() -> Result<[String; N]> { + let args = std::env::args(); + if args.len() - 1 != N { + bail!("got {} argument(s), expected {}", args.len() - 1, N); + } + + // Does not allocate. + const EMPTY_STRING: String = String::new(); + let mut values = [EMPTY_STRING; N]; + + // Skip the first element; we do not care about the program name. + for (i, arg) in args.skip(1).enumerate() { + values[i] = arg + } + Ok(values) +} + +fn validate_repo_path(path: &str) -> Result<()> { + if path.len() > 100 { + bail!("too long (more than 100 characters)"); + } + if path.contains("//") + || path.contains("/./") + || path.contains("/../") + || path.starts_with("./") + || path.starts_with("../") + { + bail!("contains one or more path elements '.' and '..'"); + } + if path.starts_with('/') { + bail!("starts with '/', which is not allowed"); + } + if !path.ends_with(".git") { + bail!("missed '.git' suffix"); + } + Ok(()) +} + +fn repo_exists(name: &str) -> bool { + match std::fs::metadata(name) { + Ok(metadata) => metadata.is_dir(), + _ => false, + } +} diff --git a/gitolfs3-common/Cargo.toml b/gitolfs3-common/Cargo.toml new file mode 100644 index 0000000..5724732 --- /dev/null +++ b/gitolfs3-common/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "gitolfs3-common" +version = "0.1.0" +edition = "2021" + +[dependencies] +chrono = "0.4" +hmac-sha256 = "1.1" +subtle = "2.5" +serde = { version = "1", features = ["derive"] } diff --git a/gitolfs3-common/src/lib.rs b/gitolfs3-common/src/lib.rs new file mode 100644 index 0000000..917f566 --- /dev/null +++ b/gitolfs3-common/src/lib.rs @@ -0,0 +1,348 @@ +use chrono::{DateTime, Utc}; +use serde::{de, Deserialize, Serialize}; +use std::{ + fmt::{self, Write}, + ops, + str::FromStr, +}; +use subtle::ConstantTimeEq; + +#[repr(u8)] +enum AuthType { + BatchApi = 1, + Download = 2, +} + +#[derive(Debug, Copy, Clone)] +pub struct Claims<'a> { + pub specific_claims: SpecificClaims, + pub repo_path: &'a str, + pub expires_at: DateTime, +} + +#[derive(Debug, Copy, Clone)] +pub enum SpecificClaims { + BatchApi(Operation), + Download(Oid), +} + +pub type Oid = Digest<32>; + +#[derive(Debug, Eq, PartialEq, Copy, Clone, Serialize, Deserialize)] +#[repr(u8)] +pub enum Operation { + #[serde(rename = "download")] + Download = 1, + #[serde(rename = "upload")] + Upload = 2, +} + +/// Returns None if the claims are invalid. Repo path length may be no more than 100 bytes. +pub fn generate_tag(claims: Claims, key: impl AsRef<[u8]>) -> Option> { + if claims.repo_path.len() > 100 { + return None; + } + + let mut hmac = hmac_sha256::HMAC::new(key); + match claims.specific_claims { + SpecificClaims::BatchApi(operation) => { + hmac.update([AuthType::BatchApi as u8]); + hmac.update([operation as u8]); + } + SpecificClaims::Download(oid) => { + hmac.update([AuthType::Download as u8]); + hmac.update(oid.as_bytes()); + } + } + hmac.update([claims.repo_path.len() as u8]); + hmac.update(claims.repo_path.as_bytes()); + hmac.update(claims.expires_at.timestamp().to_be_bytes()); + Some(hmac.finalize().into()) +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub struct ParseOperationError; + +impl fmt::Display for ParseOperationError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "operation should be 'download' or 'upload'") + } +} + +impl FromStr for Operation { + type Err = ParseOperationError; + + fn from_str(s: &str) -> Result { + match s { + "upload" => Ok(Self::Upload), + "download" => Ok(Self::Download), + _ => Err(ParseOperationError), + } + } +} + +/// None means out of range. +fn decode_nibble(c: u8) -> Option { + if c.is_ascii_digit() { + Some(c - b'0') + } else if (b'a'..=b'f').contains(&c) { + Some(c - b'a' + 10) + } else if (b'A'..=b'F').contains(&c) { + Some(c - b'A' + 10) + } else { + None + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct HexByte(pub u8); + +impl<'de> Deserialize<'de> for HexByte { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let str = <&str>::deserialize(deserializer)?; + let &[b1, b2] = str.as_bytes() else { + return Err(de::Error::invalid_length( + str.len(), + &"two hexadecimal characters", + )); + }; + let (Some(b1), Some(b2)) = (decode_nibble(b1), decode_nibble(b2)) else { + return Err(de::Error::invalid_value( + de::Unexpected::Str(str), + &"two hexadecimal characters", + )); + }; + Ok(HexByte((b1 << 4) | b2)) + } +} + +impl fmt::Display for HexByte { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let &HexByte(b) = self; + HexFmt(&[b]).fmt(f) + } +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum ParseHexError { + UnevenNibbles, + InvalidCharacter, + TooShort, + TooLong, +} + +impl fmt::Display for ParseHexError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::UnevenNibbles => { + write!(f, "uneven amount of nibbles (chars in range [a-zA-Z0-9])") + } + Self::InvalidCharacter => write!(f, "non-hex character encountered"), + Self::TooShort => write!(f, "unexpected end of hex sequence"), + Self::TooLong => write!(f, "longer hex sequence than expected"), + } + } +} + +#[derive(Debug)] +pub enum ReadHexError { + Io(std::io::Error), + Format(ParseHexError), +} + +impl fmt::Display for ReadHexError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Io(e) => e.fmt(f), + Self::Format(e) => e.fmt(f), + } + } +} + +fn parse_hex_exact(value: &str, buf: &mut [u8]) -> Result<(), ParseHexError> { + if value.bytes().len() % 2 == 1 { + return Err(ParseHexError::UnevenNibbles); + } + if value.bytes().len() < 2 * buf.len() { + return Err(ParseHexError::TooShort); + } + if value.bytes().len() > 2 * buf.len() { + return Err(ParseHexError::TooLong); + } + for (i, c) in value.bytes().enumerate() { + if let Some(b) = decode_nibble(c) { + if i % 2 == 0 { + buf[i / 2] = b << 4; + } else { + buf[i / 2] |= b; + } + } else { + return Err(ParseHexError::InvalidCharacter); + } + } + Ok(()) +} + +pub type Key = SafeByteArray<64>; + +pub fn load_key(path: &str) -> Result { + let key_str = std::fs::read_to_string(path).map_err(ReadHexError::Io)?; + key_str.trim().parse().map_err(ReadHexError::Format) +} + +pub struct SafeByteArray { + inner: [u8; N], +} + +impl SafeByteArray { + pub fn new() -> Self { + Self { inner: [0; N] } + } +} + +impl Default for SafeByteArray { + fn default() -> Self { + Self::new() + } +} + +impl AsRef<[u8]> for SafeByteArray { + fn as_ref(&self) -> &[u8] { + &self.inner + } +} + +impl AsMut<[u8]> for SafeByteArray { + fn as_mut(&mut self) -> &mut [u8] { + &mut self.inner + } +} + +impl Drop for SafeByteArray { + fn drop(&mut self) { + self.inner.fill(0) + } +} + +impl FromStr for SafeByteArray { + type Err = ParseHexError; + + fn from_str(value: &str) -> Result { + let mut sba = Self { inner: [0u8; N] }; + parse_hex_exact(value, &mut sba.inner)?; + Ok(sba) + } +} + +pub struct HexFmt>(pub B); + +impl> fmt::Display for HexFmt { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let HexFmt(buf) = self; + for b in buf.as_ref() { + let (high, low) = (b >> 4, b & 0xF); + let highc = if high < 10 { + high + b'0' + } else { + high - 10 + b'a' + }; + let lowc = if low < 10 { + low + b'0' + } else { + low - 10 + b'a' + }; + f.write_char(highc as char)?; + f.write_char(lowc as char)?; + } + Ok(()) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct Digest { + inner: [u8; N], +} + +impl ops::Index for Digest { + type Output = u8; + + fn index(&self, index: usize) -> &Self::Output { + &self.inner[index] + } +} + +impl Digest { + pub fn as_bytes(&self) -> &[u8; N] { + &self.inner + } +} + +impl fmt::Display for Digest { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + HexFmt(&self.inner).fmt(f) + } +} + +impl Digest { + pub fn new(data: [u8; N]) -> Self { + Self { inner: data } + } +} + +impl From<[u8; N]> for Digest { + fn from(value: [u8; N]) -> Self { + Self::new(value) + } +} + +impl From> for [u8; N] { + fn from(val: Digest) -> Self { + val.inner + } +} + +impl FromStr for Digest { + type Err = ParseHexError; + + fn from_str(value: &str) -> Result { + let mut buf = [0u8; N]; + parse_hex_exact(value, &mut buf)?; + Ok(buf.into()) + } +} + +impl ConstantTimeEq for Digest { + fn ct_eq(&self, other: &Self) -> subtle::Choice { + self.inner.ct_eq(&other.inner) + } +} + +impl PartialEq for Digest { + fn eq(&self, other: &Self) -> bool { + self.ct_eq(other).into() + } +} + +impl Eq for Digest {} + +impl<'de, const N: usize> Deserialize<'de> for Digest { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let hex = <&str>::deserialize(deserializer)?; + Digest::from_str(hex).map_err(de::Error::custom) + } +} + +impl Serialize for Digest { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(&format!("{self}")) + } +} diff --git a/gitolfs3-server/Cargo.toml b/gitolfs3-server/Cargo.toml new file mode 100644 index 0000000..04edeea --- /dev/null +++ b/gitolfs3-server/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "gitolfs3-server" +version = "0.1.0" +edition = "2021" + +[dependencies] +aws-config = { version = "1.1.2" } +aws-sdk-s3 = "1.12.0" +axum = "0.7" +base64 = "0.21" +chrono = { version = "0.4", features = ["serde"] } +gitolfs3-common = { path = "../gitolfs3-common" } +mime = "0.3" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tokio = { version = "1.35", features = ["full"] } +tokio-util = "0.7" +tower = "0.4" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/gitolfs3-server/src/main.rs b/gitolfs3-server/src/main.rs new file mode 100644 index 0000000..b05a0c8 --- /dev/null +++ b/gitolfs3-server/src/main.rs @@ -0,0 +1,1154 @@ +use aws_sdk_s3::{error::SdkError, operation::head_object::HeadObjectOutput}; +use axum::{ + async_trait, + extract::{rejection, FromRequest, FromRequestParts, OriginalUri, Path, Request, State}, + http::{header, request::Parts, HeaderMap, HeaderValue, StatusCode, Uri}, + response::{IntoResponse, Response}, + routing::{get, post}, + Extension, Json, Router, ServiceExt, +}; +use base64::prelude::*; +use chrono::{DateTime, Utc}; +use gitolfs3_common::{ + generate_tag, load_key, Claims, Digest, HexByte, Key, Oid, Operation, SpecificClaims, +}; +use serde::{ + de::{self, DeserializeOwned}, + Deserialize, Serialize, +}; +use std::{ + collections::{HashMap, HashSet}, + process::ExitCode, + sync::Arc, +}; +use tokio::io::AsyncWriteExt; +use tower::Layer; + +#[tokio::main] +async fn main() -> ExitCode { + tracing_subscriber::fmt::init(); + + let conf = match Config::load() { + Ok(conf) => conf, + Err(e) => { + println!("Error: {e}"); + return ExitCode::from(2); + } + }; + + let dl_limiter = DownloadLimiter::new(conf.download_limit).await; + let dl_limiter = Arc::new(tokio::sync::Mutex::new(dl_limiter)); + + let resetter_dl_limiter = dl_limiter.clone(); + tokio::spawn(async move { + loop { + println!("Resetting download counter in one hour"); + tokio::time::sleep(std::time::Duration::from_secs(3600)).await; + println!("Resetting download counter"); + resetter_dl_limiter.lock().await.reset().await; + } + }); + + let shared_state = Arc::new(AppState { + s3_client: conf.s3_client, + s3_bucket: conf.s3_bucket, + authz_conf: conf.authz_conf, + base_url: conf.base_url, + dl_limiter, + }); + let app = Router::new() + .route("/batch", post(batch)) + .route("/:oid0/:oid1/:oid", get(obj_download)) + .with_state(shared_state); + + let middleware = axum::middleware::map_request(rewrite_url); + let app_with_middleware = middleware.layer(app); + + let listener = match tokio::net::TcpListener::bind(conf.listen_addr).await { + Ok(listener) => listener, + Err(e) => { + println!("Failed to listen: {e}"); + return ExitCode::FAILURE; + } + }; + + match axum::serve(listener, app_with_middleware.into_make_service()).await { + Ok(_) => ExitCode::SUCCESS, + Err(e) => { + println!("Error serving: {e}"); + ExitCode::FAILURE + } + } +} + +#[derive(Clone)] +struct RepositoryName(String); + +struct RepositoryNameRejection; + +impl IntoResponse for RepositoryNameRejection { + fn into_response(self) -> Response { + (StatusCode::INTERNAL_SERVER_ERROR, "Missing repository name").into_response() + } +} + +#[async_trait] +impl FromRequestParts for RepositoryName { + type Rejection = RepositoryNameRejection; + + async fn from_request_parts(parts: &mut Parts, state: &S) -> Result { + let Ok(Extension(repo_name)) = Extension::::from_request_parts(parts, state).await + else { + return Err(RepositoryNameRejection); + }; + Ok(repo_name) + } +} + +async fn rewrite_url( + mut req: axum::http::Request, +) -> Result, StatusCode> { + let uri = req.uri(); + let original_uri = OriginalUri(uri.clone()); + + let Some(path_and_query) = uri.path_and_query() else { + // L @ no path & query + return Err(StatusCode::BAD_REQUEST); + }; + let Some((repo, path)) = path_and_query.path().split_once("/info/lfs/objects") else { + return Err(StatusCode::NOT_FOUND); + }; + let repo = repo + .trim_start_matches('/') + .trim_end_matches('/') + .to_string(); + if !path.starts_with('/') || !repo.ends_with(".git") { + return Err(StatusCode::NOT_FOUND); + } + + let mut parts = uri.clone().into_parts(); + parts.path_and_query = match path_and_query.query() { + None => path.try_into().ok(), + Some(q) => format!("{path}?{q}").try_into().ok(), + }; + let Ok(new_uri) = Uri::from_parts(parts) else { + return Err(StatusCode::INTERNAL_SERVER_ERROR); + }; + + *req.uri_mut() = new_uri; + req.extensions_mut().insert(original_uri); + req.extensions_mut().insert(RepositoryName(repo)); + + Ok(req) +} + +struct AppState { + s3_client: aws_sdk_s3::Client, + s3_bucket: String, + authz_conf: AuthorizationConfig, + // Should not end with a slash. + base_url: String, + dl_limiter: Arc>, +} + +struct Env { + s3_access_key_id: String, + s3_secret_access_key: String, + s3_bucket: String, + s3_region: String, + s3_endpoint: String, + base_url: String, + key_path: String, + listen_host: String, + listen_port: String, + download_limit: String, + trusted_forwarded_hosts: String, +} + +fn require_env(name: &str) -> Result { + std::env::var(name) + .map_err(|_| format!("environment variable {name} should be defined and valid")) +} + +impl Env { + fn load() -> Result { + Ok(Env { + s3_secret_access_key: require_env("GITOLFS3_S3_SECRET_ACCESS_KEY_FILE")?, + s3_access_key_id: require_env("GITOLFS3_S3_ACCESS_KEY_ID_FILE")?, + s3_region: require_env("GITOLFS3_S3_REGION")?, + s3_endpoint: require_env("GITOLFS3_S3_ENDPOINT")?, + s3_bucket: require_env("GITOLFS3_S3_BUCKET")?, + base_url: require_env("GITOLFS3_BASE_URL")?, + key_path: require_env("GITOLFS3_KEY_PATH")?, + listen_host: require_env("GITOLFS3_LISTEN_HOST")?, + listen_port: require_env("GITOLFS3_LISTEN_PORT")?, + download_limit: require_env("GITOLFS3_DOWNLOAD_LIMIT")?, + trusted_forwarded_hosts: std::env::var("GITOLFS3_TRUSTED_FORWARDED_HOSTS") + .unwrap_or_default(), + }) + } +} + +fn get_s3_client(env: &Env) -> Result { + let access_key_id = std::fs::read_to_string(&env.s3_access_key_id)?; + let secret_access_key = std::fs::read_to_string(&env.s3_secret_access_key)?; + + let credentials = aws_sdk_s3::config::Credentials::new( + access_key_id, + secret_access_key, + None, + None, + "gitolfs3-env", + ); + let config = aws_config::SdkConfig::builder() + .behavior_version(aws_config::BehaviorVersion::latest()) + .region(aws_config::Region::new(env.s3_region.clone())) + .endpoint_url(&env.s3_endpoint) + .credentials_provider(aws_sdk_s3::config::SharedCredentialsProvider::new( + credentials, + )) + .build(); + Ok(aws_sdk_s3::Client::new(&config)) +} + +struct Config { + listen_addr: (String, u16), + base_url: String, + authz_conf: AuthorizationConfig, + s3_client: aws_sdk_s3::Client, + s3_bucket: String, + download_limit: u64, +} + +impl Config { + fn load() -> Result { + let env = match Env::load() { + Ok(env) => env, + Err(e) => return Err(format!("failed to load configuration: {e}")), + }; + + let s3_client = match get_s3_client(&env) { + Ok(s3_client) => s3_client, + Err(e) => return Err(format!("failed to create S3 client: {e}")), + }; + let key = match load_key(&env.key_path) { + Ok(key) => key, + Err(e) => return Err(format!("failed to load Gitolfs3 key: {e}")), + }; + + let trusted_forwarded_hosts: HashSet = env + .trusted_forwarded_hosts + .split(',') + .map(|s| s.to_owned()) + .filter(|s| !s.is_empty()) + .collect(); + let base_url = env.base_url.trim_end_matches('/').to_string(); + + let Ok(listen_port): Result = env.listen_port.parse() else { + return Err("configured GITOLFS3_LISTEN_PORT is invalid".to_string()); + }; + let Ok(download_limit): Result = env.download_limit.parse() else { + return Err("configured GITOLFS3_DOWNLOAD_LIMIT is invalid".to_string()); + }; + + Ok(Self { + listen_addr: (env.listen_host, listen_port), + base_url, + authz_conf: AuthorizationConfig { + key, + trusted_forwarded_hosts, + }, + s3_client, + s3_bucket: env.s3_bucket, + download_limit, + }) + } +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Copy)] +enum TransferAdapter { + #[serde(rename = "basic")] + Basic, + #[serde(other)] + Unknown, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Copy)] +enum HashAlgo { + #[serde(rename = "sha256")] + Sha256, + #[serde(other)] + Unknown, +} + +impl Default for HashAlgo { + fn default() -> Self { + Self::Sha256 + } +} + +#[derive(Debug, Deserialize, PartialEq, Eq, Clone)] +struct BatchRequestObject { + oid: Oid, + size: i64, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct BatchRef { + name: String, +} + +fn default_transfers() -> Vec { + vec![TransferAdapter::Basic] +} + +#[derive(Debug, Deserialize, PartialEq, Eq, Clone)] +struct BatchRequest { + operation: Operation, + #[serde(default = "default_transfers")] + transfers: Vec, + objects: Vec, + #[serde(default)] + hash_algo: HashAlgo, +} + +#[derive(Debug, Clone)] +struct GitLfsJson(Json); + +const LFS_MIME: &str = "application/vnd.git-lfs+json"; + +enum GitLfsJsonRejection { + Json(rejection::JsonRejection), + MissingGitLfsJsonContentType, +} + +impl IntoResponse for GitLfsJsonRejection { + fn into_response(self) -> Response { + match self { + Self::Json(rej) => rej.into_response(), + Self::MissingGitLfsJsonContentType => make_error_resp( + StatusCode::UNSUPPORTED_MEDIA_TYPE, + &format!("Expected request with `Content-Type: {LFS_MIME}`"), + ) + .into_response(), + } + } +} + +fn is_git_lfs_json_mimetype(mimetype: &str) -> bool { + let Ok(mime) = mimetype.parse::() else { + return false; + }; + if mime.type_() != mime::APPLICATION + || mime.subtype() != "vnd.git-lfs" + || mime.suffix() != Some(mime::JSON) + { + return false; + } + match mime.get_param(mime::CHARSET) { + Some(mime::UTF_8) | None => true, + Some(_) => false, + } +} + +fn has_git_lfs_json_content_type(req: &Request) -> bool { + let Some(content_type) = req.headers().get(header::CONTENT_TYPE) else { + return false; + }; + let Ok(content_type) = content_type.to_str() else { + return false; + }; + is_git_lfs_json_mimetype(content_type) +} + +#[async_trait] +impl FromRequest for GitLfsJson +where + T: DeserializeOwned, + S: Send + Sync, +{ + type Rejection = GitLfsJsonRejection; + + async fn from_request(req: Request, state: &S) -> Result { + if !has_git_lfs_json_content_type(&req) { + return Err(GitLfsJsonRejection::MissingGitLfsJsonContentType); + } + Json::::from_request(req, state) + .await + .map(GitLfsJson) + .map_err(GitLfsJsonRejection::Json) + } +} + +impl IntoResponse for GitLfsJson { + fn into_response(self) -> Response { + let GitLfsJson(json) = self; + let mut resp = json.into_response(); + resp.headers_mut().insert( + header::CONTENT_TYPE, + HeaderValue::from_static("application/vnd.git-lfs+json; charset=utf-8"), + ); + resp + } +} + +#[derive(Debug, Serialize)] +struct GitLfsErrorData<'a> { + message: &'a str, +} + +type GitLfsErrorResponse<'a> = (StatusCode, GitLfsJson>); + +const fn make_error_resp(code: StatusCode, message: &str) -> GitLfsErrorResponse { + (code, GitLfsJson(Json(GitLfsErrorData { message }))) +} + +#[derive(Debug, Serialize, Clone)] +struct BatchResponseObjectAction { + href: String, + #[serde(skip_serializing_if = "HashMap::is_empty")] + header: HashMap, + expires_at: DateTime, +} + +#[derive(Default, Debug, Serialize, Clone)] +struct BatchResponseObjectActions { + #[serde(skip_serializing_if = "Option::is_none")] + upload: Option, + #[serde(skip_serializing_if = "Option::is_none")] + download: Option, + #[serde(skip_serializing_if = "Option::is_none")] + verify: Option, +} + +#[derive(Debug, Clone, Serialize)] +struct BatchResponseObjectError { + code: u16, + message: String, +} + +#[derive(Debug, Serialize, Clone)] +struct BatchResponseObject { + oid: Oid, + size: i64, + #[serde(skip_serializing_if = "Option::is_none")] + authenticated: Option, + actions: BatchResponseObjectActions, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, +} + +impl BatchResponseObject { + fn error(obj: &BatchRequestObject, code: StatusCode, message: String) -> BatchResponseObject { + BatchResponseObject { + oid: obj.oid, + size: obj.size, + authenticated: None, + actions: Default::default(), + error: Some(BatchResponseObjectError { + code: code.as_u16(), + message, + }), + } + } +} + +#[derive(Debug, Serialize, Clone)] +struct BatchResponse { + transfer: TransferAdapter, + objects: Vec, + hash_algo: HashAlgo, +} + +fn validate_checksum(oid: Oid, obj: &HeadObjectOutput) -> bool { + if let Some(checksum) = obj.checksum_sha256() { + if let Ok(checksum) = BASE64_STANDARD.decode(checksum) { + if let Ok(checksum32b) = TryInto::<[u8; 32]>::try_into(checksum) { + return Oid::from(checksum32b) == oid; + } + } + } + true +} + +fn validate_size(expected: i64, obj: &HeadObjectOutput) -> bool { + if let Some(length) = obj.content_length() { + return length == expected; + } + true +} + +async fn handle_upload_object( + state: &AppState, + repo: &str, + obj: &BatchRequestObject, +) -> Option { + let (oid0, oid1) = (HexByte(obj.oid[0]), HexByte(obj.oid[1])); + let full_path = format!("{repo}/lfs/objects/{}/{}/{}", oid0, oid1, obj.oid); + + match state + .s3_client + .head_object() + .bucket(&state.s3_bucket) + .key(full_path.clone()) + .checksum_mode(aws_sdk_s3::types::ChecksumMode::Enabled) + .send() + .await + { + Ok(result) => { + if validate_size(obj.size, &result) && validate_checksum(obj.oid, &result) { + return None; + } + } + Err(SdkError::ServiceError(e)) if e.err().is_not_found() => {} + Err(e) => { + println!("Failed to HeadObject (repo {repo}, OID {}): {e}", obj.oid); + return Some(BatchResponseObject::error( + obj, + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to query object information".to_string(), + )); + } + }; + + let expires_in = std::time::Duration::from_secs(5 * 60); + let expires_at = Utc::now() + expires_in; + + let Ok(config) = aws_sdk_s3::presigning::PresigningConfig::expires_in(expires_in) else { + return Some(BatchResponseObject::error( + obj, + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to generate upload URL".to_string(), + )); + }; + let Ok(presigned) = state + .s3_client + .put_object() + .bucket(&state.s3_bucket) + .key(full_path) + .checksum_sha256(obj.oid.to_string()) + .content_length(obj.size) + .presigned(config) + .await + else { + return Some(BatchResponseObject::error( + obj, + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to generate upload URL".to_string(), + )); + }; + Some(BatchResponseObject { + oid: obj.oid, + size: obj.size, + authenticated: Some(true), + actions: BatchResponseObjectActions { + upload: Some(BatchResponseObjectAction { + header: presigned + .headers() + .map(|(k, v)| (k.to_owned(), v.to_owned())) + .collect(), + expires_at, + href: presigned.uri().to_string(), + }), + ..Default::default() + }, + error: None, + }) +} + +async fn handle_download_object( + state: &AppState, + repo: &str, + obj: &BatchRequestObject, + trusted: bool, +) -> BatchResponseObject { + let (oid0, oid1) = (HexByte(obj.oid[0]), HexByte(obj.oid[1])); + let full_path = format!("{repo}/lfs/objects/{}/{}/{}", oid0, oid1, obj.oid); + + let result = match state + .s3_client + .head_object() + .bucket(&state.s3_bucket) + .key(&full_path) + .checksum_mode(aws_sdk_s3::types::ChecksumMode::Enabled) + .send() + .await + { + Ok(result) => result, + Err(e) => { + println!("Failed to HeadObject (repo {repo}, OID {}): {e}", obj.oid); + return BatchResponseObject::error( + obj, + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to query object information".to_string(), + ); + } + }; + + // Scaleway actually doesn't provide SHA256 suport, but maybe in the future :) + if !validate_checksum(obj.oid, &result) { + return BatchResponseObject::error( + obj, + StatusCode::UNPROCESSABLE_ENTITY, + "Object corrupted".to_string(), + ); + } + if !validate_size(obj.size, &result) { + return BatchResponseObject::error( + obj, + StatusCode::UNPROCESSABLE_ENTITY, + "Incorrect size specified (or object corrupted)".to_string(), + ); + } + + let expires_in = std::time::Duration::from_secs(5 * 60); + let expires_at = Utc::now() + expires_in; + + if trusted { + let Ok(config) = aws_sdk_s3::presigning::PresigningConfig::expires_in(expires_in) else { + return BatchResponseObject::error( + obj, + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to generate upload URL".to_string(), + ); + }; + let Ok(presigned) = state + .s3_client + .get_object() + .bucket(&state.s3_bucket) + .key(full_path) + .presigned(config) + .await + else { + return BatchResponseObject::error( + obj, + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to generate upload URL".to_string(), + ); + }; + return BatchResponseObject { + oid: obj.oid, + size: obj.size, + authenticated: Some(true), + actions: BatchResponseObjectActions { + download: Some(BatchResponseObjectAction { + header: presigned + .headers() + .map(|(k, v)| (k.to_owned(), v.to_owned())) + .collect(), + expires_at, + href: presigned.uri().to_string(), + }), + ..Default::default() + }, + error: None, + }; + } + + if let Some(content_length) = result.content_length() { + if content_length > 0 { + match state + .dl_limiter + .lock() + .await + .request(content_length as u64) + .await + { + Ok(true) => {} + Ok(false) => { + return BatchResponseObject::error( + obj, + StatusCode::SERVICE_UNAVAILABLE, + "Public LFS downloads temporarily unavailable".to_string(), + ); + } + Err(e) => { + println!("Failed to request {content_length} bytes from download limiter: {e}"); + return BatchResponseObject::error( + obj, + StatusCode::INTERNAL_SERVER_ERROR, + "Internal server error".to_string(), + ); + } + } + } + } + + let Some(tag) = generate_tag( + Claims { + specific_claims: SpecificClaims::Download(obj.oid), + repo_path: repo, + expires_at, + }, + &state.authz_conf.key, + ) else { + return BatchResponseObject::error( + obj, + StatusCode::INTERNAL_SERVER_ERROR, + "Internal server error".to_string(), + ); + }; + + let upload_path = format!( + "{repo}/info/lfs/objects/{}/{}/{}", + HexByte(obj.oid[0]), + HexByte(obj.oid[1]), + obj.oid, + ); + + BatchResponseObject { + oid: obj.oid, + size: obj.size, + authenticated: Some(true), + actions: BatchResponseObjectActions { + download: Some(BatchResponseObjectAction { + header: { + let mut map = HashMap::new(); + map.insert( + "Authorization".to_string(), + format!("Gitolfs3-Hmac-Sha256 {tag} {}", expires_at.timestamp()), + ); + map + }, + expires_at, + href: format!("{}/{upload_path}", state.base_url), + }), + ..Default::default() + }, + error: None, + } +} + +struct AuthorizationConfig { + trusted_forwarded_hosts: HashSet, + key: Key, +} + +struct Trusted(bool); + +fn forwarded_from_trusted_host( + headers: &HeaderMap, + trusted: &HashSet, +) -> Result> { + if let Some(forwarded_host) = headers.get("X-Forwarded-Host") { + if let Ok(forwarded_host) = forwarded_host.to_str() { + if trusted.contains(forwarded_host) { + return Ok(true); + } + } else { + return Err(make_error_resp( + StatusCode::NOT_FOUND, + "Invalid X-Forwarded-Host header", + )); + } + } + Ok(false) +} + +const REPO_NOT_FOUND: GitLfsErrorResponse = + make_error_resp(StatusCode::NOT_FOUND, "Repository not found"); + +fn authorize_batch( + conf: &AuthorizationConfig, + repo_path: &str, + public: bool, + operation: Operation, + headers: &HeaderMap, +) -> Result> { + // - No authentication required for downloading exported repos + // - When authenticated: + // - Download / upload over presigned URLs + // - When accessing over Tailscale: + // - No authentication required for downloading from any repo + + let claims = VerifyClaimsInput { + specific_claims: SpecificClaims::BatchApi(operation), + repo_path, + }; + if !verify_claims(conf, &claims, headers)? { + return authorize_batch_unauthenticated(conf, public, operation, headers); + } + Ok(Trusted(true)) +} + +fn authorize_batch_unauthenticated( + conf: &AuthorizationConfig, + public: bool, + operation: Operation, + headers: &HeaderMap, +) -> Result> { + let trusted = forwarded_from_trusted_host(headers, &conf.trusted_forwarded_hosts)?; + match operation { + Operation::Upload => { + // Trusted users can clone all repositories (by virtue of accessing the server via a + // trusted network). However, they can not push without proper authentication. Untrusted + // users who are also not authenticated should not need to know which repositories exists. + // Therefore, we tell untrusted && unauthenticated users that the repo doesn't exist, but + // tell trusted users that they need to authenticate. + if !trusted { + return Err(REPO_NOT_FOUND); + } + Err(make_error_resp( + StatusCode::FORBIDDEN, + "Authentication required to upload", + )) + } + Operation::Download => { + // Again, trusted users can see all repos. For untrusted users, we first need to check + // whether the repo is public before we authorize. If the user is untrusted and the + // repo isn't public, we just act like it doesn't even exist. + if !trusted { + if !public { + return Err(REPO_NOT_FOUND); + } + return Ok(Trusted(false)); + } + Ok(Trusted(true)) + } + } +} + +fn repo_exists(name: &str) -> bool { + let Ok(metadata) = std::fs::metadata(name) else { + return false; + }; + metadata.is_dir() +} + +fn is_repo_public(name: &str) -> Option { + if !repo_exists(name) { + return None; + } + match std::fs::metadata(format!("{name}/git-daemon-export-ok")) { + Ok(metadata) if metadata.is_file() => Some(true), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Some(false), + _ => None, + } +} + +async fn batch( + State(state): State>, + headers: HeaderMap, + RepositoryName(repo): RepositoryName, + GitLfsJson(Json(payload)): GitLfsJson, +) -> Response { + let Some(public) = is_repo_public(&repo) else { + return REPO_NOT_FOUND.into_response(); + }; + let Trusted(trusted) = match authorize_batch( + &state.authz_conf, + &repo, + public, + payload.operation, + &headers, + ) { + Ok(authn) => authn, + Err(e) => return e.into_response(), + }; + + if !headers + .get_all("Accept") + .iter() + .filter_map(|v| v.to_str().ok()) + .any(is_git_lfs_json_mimetype) + { + let message = format!("Expected `{LFS_MIME}` in list of acceptable response media types"); + return make_error_resp(StatusCode::NOT_ACCEPTABLE, &message).into_response(); + } + + if payload.hash_algo != HashAlgo::Sha256 { + let message = "Unsupported hashing algorithm specified"; + return make_error_resp(StatusCode::CONFLICT, message).into_response(); + } + if !payload.transfers.is_empty() && !payload.transfers.contains(&TransferAdapter::Basic) { + let message = "Unsupported transfer adapter specified (supported: basic)"; + return make_error_resp(StatusCode::CONFLICT, message).into_response(); + } + + let mut resp = BatchResponse { + transfer: TransferAdapter::Basic, + objects: vec![], + hash_algo: HashAlgo::Sha256, + }; + for obj in payload.objects { + match payload.operation { + Operation::Download => resp + .objects + .push(handle_download_object(&state, &repo, &obj, trusted).await), + Operation::Upload => { + if let Some(obj_resp) = handle_upload_object(&state, &repo, &obj).await { + resp.objects.push(obj_resp); + } + } + }; + } + GitLfsJson(Json(resp)).into_response() +} + +#[derive(Deserialize, Copy, Clone)] +#[serde(remote = "Self")] +struct FileParams { + oid0: HexByte, + oid1: HexByte, + oid: Oid, +} + +impl<'de> Deserialize<'de> for FileParams { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let unchecked @ FileParams { + oid0: HexByte(oid0), + oid1: HexByte(oid1), + oid, + } = FileParams::deserialize(deserializer)?; + if oid0 != oid.as_bytes()[0] { + return Err(de::Error::custom( + "first OID path part does not match first byte of full OID", + )); + } + if oid1 != oid.as_bytes()[1] { + return Err(de::Error::custom( + "second OID path part does not match first byte of full OID", + )); + } + Ok(unchecked) + } +} + +pub struct VerifyClaimsInput<'a> { + pub specific_claims: SpecificClaims, + pub repo_path: &'a str, +} + +fn verify_claims( + conf: &AuthorizationConfig, + claims: &VerifyClaimsInput, + headers: &HeaderMap, +) -> Result> { + const INVALID_AUTHZ_HEADER: GitLfsErrorResponse = + make_error_resp(StatusCode::BAD_REQUEST, "Invalid authorization header"); + + let Some(authz) = headers.get(header::AUTHORIZATION) else { + return Ok(false); + }; + let authz = authz.to_str().map_err(|_| INVALID_AUTHZ_HEADER)?; + let val = authz + .strip_prefix("Gitolfs3-Hmac-Sha256 ") + .ok_or(INVALID_AUTHZ_HEADER)?; + let (tag, expires_at) = val.split_once(' ').ok_or(INVALID_AUTHZ_HEADER)?; + let tag: Digest<32> = tag.parse().map_err(|_| INVALID_AUTHZ_HEADER)?; + let expires_at: i64 = expires_at.parse().map_err(|_| INVALID_AUTHZ_HEADER)?; + let expires_at = DateTime::::from_timestamp(expires_at, 0).ok_or(INVALID_AUTHZ_HEADER)?; + let expected_tag = generate_tag( + Claims { + specific_claims: claims.specific_claims, + repo_path: claims.repo_path, + expires_at, + }, + &conf.key, + ) + .ok_or_else(|| make_error_resp(StatusCode::INTERNAL_SERVER_ERROR, "Internal server error"))?; + if tag != expected_tag { + return Err(INVALID_AUTHZ_HEADER); + } + + Ok(true) +} + +fn authorize_get( + conf: &AuthorizationConfig, + repo_path: &str, + oid: Oid, + headers: &HeaderMap, +) -> Result<(), GitLfsErrorResponse<'static>> { + let claims = VerifyClaimsInput { + specific_claims: SpecificClaims::Download(oid), + repo_path, + }; + if !verify_claims(conf, &claims, headers)? { + return Err(make_error_resp( + StatusCode::UNAUTHORIZED, + "Repository not found", + )); + } + Ok(()) +} + +async fn obj_download( + State(state): State>, + headers: HeaderMap, + RepositoryName(repo): RepositoryName, + Path(FileParams { oid0, oid1, oid }): Path, +) -> Response { + if let Err(e) = authorize_get(&state.authz_conf, &repo, oid, &headers) { + return e.into_response(); + } + + let full_path = format!("{repo}/lfs/objects/{}/{}/{}", oid0, oid1, oid); + let result = match state + .s3_client + .get_object() + .bucket(&state.s3_bucket) + .key(full_path) + .checksum_mode(aws_sdk_s3::types::ChecksumMode::Enabled) + .send() + .await + { + Ok(result) => result, + Err(e) => { + println!("Failed to GetObject (repo {repo}, OID {oid}): {e}"); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to query object information", + ) + .into_response(); + } + }; + + let mut headers = header::HeaderMap::new(); + if let Some(content_type) = result.content_type { + let Ok(header_value) = content_type.try_into() else { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + "Object has invalid content type", + ) + .into_response(); + }; + headers.insert(header::CONTENT_TYPE, header_value); + } + if let Some(content_length) = result.content_length { + headers.insert(header::CONTENT_LENGTH, content_length.into()); + } + + let async_read = result.body.into_async_read(); + let stream = tokio_util::io::ReaderStream::new(async_read); + let body = axum::body::Body::from_stream(stream); + + (headers, body).into_response() +} + +struct DownloadLimiter { + current: u64, + limit: u64, +} + +impl DownloadLimiter { + async fn new(limit: u64) -> DownloadLimiter { + let dlimit_str = match tokio::fs::read_to_string(".gitolfs3-dlimit").await { + Ok(dlimit_str) => dlimit_str, + Err(e) => { + println!("Failed to read download counter, assuming 0: {e}"); + return DownloadLimiter { current: 0, limit }; + } + }; + let current: u64 = match dlimit_str + .parse() + .map_err(|e| tokio::io::Error::new(tokio::io::ErrorKind::InvalidData, e)) + { + Ok(current) => current, + Err(e) => { + println!("Failed to read download counter, assuming 0: {e}"); + return DownloadLimiter { current: 0, limit }; + } + }; + DownloadLimiter { current, limit } + } + + async fn request(&mut self, n: u64) -> tokio::io::Result { + if self.current + n > self.limit { + return Ok(false); + } + self.current += n; + self.write_new_count().await?; + Ok(true) + } + + async fn reset(&mut self) { + self.current = 0; + if let Err(e) = self.write_new_count().await { + println!("Failed to reset download counter: {e}"); + } + } + + async fn write_new_count(&self) -> tokio::io::Result<()> { + let cwd = tokio::fs::File::open(std::env::current_dir()?).await?; + let mut file = tokio::fs::File::create(".gitolfs3-dlimit.tmp").await?; + file.write_all(self.current.to_string().as_bytes()).await?; + file.sync_all().await?; + tokio::fs::rename(".gitolfs3-dlimit.tmp", ".gitolfs3-dlimit").await?; + cwd.sync_all().await + } +} + +#[test] +fn test_mimetype() { + assert!(is_git_lfs_json_mimetype("application/vnd.git-lfs+json")); + assert!(!is_git_lfs_json_mimetype("application/vnd.git-lfs")); + assert!(!is_git_lfs_json_mimetype("application/json")); + assert!(is_git_lfs_json_mimetype( + "application/vnd.git-lfs+json; charset=utf-8" + )); + assert!(is_git_lfs_json_mimetype( + "application/vnd.git-lfs+json; charset=UTF-8" + )); + assert!(!is_git_lfs_json_mimetype( + "application/vnd.git-lfs+json; charset=ISO-8859-1" + )); +} + +#[test] +fn test_deserialize() { + let json = r#"{"operation":"upload","objects":[{"oid":"8f4123f9a7181f488c5e111d82cefd992e461ae5df01fd2254399e6e670b2d3c","size":170904}], + "transfers":["lfs-standalone-file","basic","ssh"],"ref":{"name":"refs/heads/main"},"hash_algo":"sha256"}"#; + let expected = BatchRequest { + operation: Operation::Upload, + objects: vec![BatchRequestObject { + oid: "8f4123f9a7181f488c5e111d82cefd992e461ae5df01fd2254399e6e670b2d3c" + .parse() + .unwrap(), + size: 170904, + }], + transfers: vec![ + TransferAdapter::Unknown, + TransferAdapter::Basic, + TransferAdapter::Unknown, + ], + hash_algo: HashAlgo::Sha256, + }; + assert_eq!( + serde_json::from_str::(json).unwrap(), + expected + ); +} + +#[test] +fn test_validate_claims() { + let key = "00232f7a019bd34e3921ee6c5f04caf48a4489d1be5d1999038950a7054e0bfea369ce2becc0f13fd3c69f8af2384a25b7ac2d52eb52c33722f3c00c50d4c9c2"; + let key: Key = key.parse().unwrap(); + + let claims = Claims { + expires_at: Utc::now() + std::time::Duration::from_secs(5 * 60), + repo_path: "lfs-test.git", + specific_claims: SpecificClaims::BatchApi(Operation::Download), + }; + let tag = generate_tag(claims, &key).unwrap(); + let header_value = format!( + "Gitolfs3-Hmac-Sha256 {tag} {}", + claims.expires_at.timestamp() + ); + + let conf = AuthorizationConfig { + key, + trusted_forwarded_hosts: HashSet::new(), + }; + let verification_claims = VerifyClaimsInput { + repo_path: claims.repo_path, + specific_claims: claims.specific_claims, + }; + let mut headers = HeaderMap::new(); + headers.insert(header::AUTHORIZATION, header_value.try_into().unwrap()); + + assert!(verify_claims(&conf, &verification_claims, &headers).unwrap()); +} diff --git a/gitolfs3-shell/Cargo.toml b/gitolfs3-shell/Cargo.toml new file mode 100644 index 0000000..c0b5d3a --- /dev/null +++ b/gitolfs3-shell/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "gitolfs3-shell" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/gitolfs3-shell/src/main.rs b/gitolfs3-shell/src/main.rs new file mode 100644 index 0000000..f0c5f34 --- /dev/null +++ b/gitolfs3-shell/src/main.rs @@ -0,0 +1,145 @@ +use std::{os::unix::process::CommandExt, process::ExitCode}; + +fn main() -> ExitCode { + let bad_usage = ExitCode::from(2); + + let mut args = std::env::args().skip(1); + if args.next() != Some("-c".to_string()) { + eprintln!("Expected usage: shell -c "); + return bad_usage; + } + let Some(cmd) = args.next() else { + eprintln!("Missing argument for argument '-c'"); + return bad_usage; + }; + if args.next().is_some() { + eprintln!("Too many arguments passed"); + return bad_usage; + } + + let Some(mut cmd) = parse_cmd(&cmd) else { + eprintln!("Bad command"); + return bad_usage; + }; + + let Some(mut program) = cmd.drain(0..1).next() else { + eprintln!("Bad command"); + return bad_usage; + }; + if program == "git" { + let Some(subcommand) = cmd.drain(0..1).next() else { + eprintln!("Bad command"); + return bad_usage; + }; + program.push('-'); + program.push_str(&subcommand); + } + + let mut args = Vec::new(); + + let git_cmds = ["git-receive-pack", "git-upload-archive", "git-upload-pack"]; + if git_cmds.contains(&program.as_str()) { + if cmd.len() != 1 { + eprintln!("Bad command"); + return bad_usage; + } + let repository = cmd[0].trim_start_matches('/'); + args.push(repository); + } else if program == "git-lfs-authenticate" { + program.clear(); + program.push_str("gitolfs3-authenticate"); + if cmd.len() != 2 { + eprintln!("Bad command"); + return bad_usage; + } + let repository = cmd[0].trim_start_matches('/'); + args.push(repository); + args.push(&cmd[1]); + } else { + eprintln!("Unknown command"); + return bad_usage; + } + + let e = std::process::Command::new(program).args(args).exec(); + eprintln!("Error: {e}"); + ExitCode::FAILURE +} + +fn parse_cmd(mut cmd: &str) -> Option> { + let mut args = Vec::::new(); + + cmd = cmd.trim_matches(is_posix_space); + while !cmd.is_empty() { + if cmd.starts_with('\'') { + let (arg, remaining) = parse_sq(cmd)?; + args.push(arg); + cmd = remaining.trim_start_matches(is_posix_space); + } else if let Some((arg, remaining)) = cmd.split_once(is_posix_space) { + args.push(arg.to_owned()); + cmd = remaining.trim_start_matches(is_posix_space); + } else { + args.push(cmd.to_owned()); + cmd = ""; + } + } + + Some(args) +} + +fn is_posix_space(c: char) -> bool { + // Form feed: 0x0c + // Vertical tab: 0x0b + c == ' ' || c == '\x0c' || c == '\n' || c == '\r' || c == '\t' || c == '\x0b' +} + +fn parse_sq(s: &str) -> Option<(String, &str)> { + #[derive(PartialEq, Eq)] + enum SqState { + Quoted, + Unquoted { may_escape: bool }, + UnquotedEscaped, + } + + let mut result = String::new(); + let mut state = SqState::Unquoted { may_escape: false }; + let mut remaining = ""; + for (i, c) in s.char_indices() { + match state { + SqState::Unquoted { may_escape: false } => { + if c != '\'' { + return None; + } + state = SqState::Quoted + } + SqState::Quoted => { + if c == '\'' { + state = SqState::Unquoted { may_escape: true }; + continue; + } + result.push(c); + } + SqState::Unquoted { may_escape: true } => { + if is_posix_space(c) { + remaining = &s[i..]; + break; + } + if c != '\\' { + return None; + } + state = SqState::UnquotedEscaped; + } + SqState::UnquotedEscaped => { + if c != '\\' && c != '!' { + return None; + } + result.push(c); + state = SqState::Unquoted { may_escape: false }; + } + } + } + + if state != (SqState::Unquoted { may_escape: true }) { + return None; + } + Some((result, remaining)) +} diff --git a/server/Cargo.toml b/server/Cargo.toml deleted file mode 100644 index edb76d8..0000000 --- a/server/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "server" -version = "0.1.0" -edition = "2021" - -[dependencies] -aws-config = { version = "1.1.2" } -aws-sdk-s3 = "1.12.0" -axum = "0.7" -base64 = "0.21" -chrono = { version = "0.4", features = ["serde"] } -common = { path = "../common" } -mime = "0.3" -serde = { version = "1", features = ["derive"] } -serde_json = "1" -tokio = { version = "1.35", features = ["full"] } -tokio-util = "0.7" -tower = "0.4" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/server/src/main.rs b/server/src/main.rs deleted file mode 100644 index 7371c0d..0000000 --- a/server/src/main.rs +++ /dev/null @@ -1,1151 +0,0 @@ -use aws_sdk_s3::{error::SdkError, operation::head_object::HeadObjectOutput}; -use axum::{ - async_trait, - extract::{rejection, FromRequest, FromRequestParts, OriginalUri, Path, Request, State}, - http::{header, request::Parts, HeaderMap, HeaderValue, StatusCode, Uri}, - response::{IntoResponse, Response}, - routing::{get, post}, - Extension, Json, Router, ServiceExt, -}; -use base64::prelude::*; -use chrono::{DateTime, Utc}; -use serde::{ - de::{self, DeserializeOwned}, - Deserialize, Serialize, -}; -use std::{ - collections::{HashMap, HashSet}, - process::ExitCode, - sync::Arc, -}; -use tokio::io::AsyncWriteExt; -use tower::Layer; - -#[tokio::main] -async fn main() -> ExitCode { - tracing_subscriber::fmt::init(); - - let conf = match Config::load() { - Ok(conf) => conf, - Err(e) => { - println!("Error: {e}"); - return ExitCode::from(2); - } - }; - - let dl_limiter = DownloadLimiter::new(conf.download_limit).await; - let dl_limiter = Arc::new(tokio::sync::Mutex::new(dl_limiter)); - - let resetter_dl_limiter = dl_limiter.clone(); - tokio::spawn(async move { - loop { - println!("Resetting download counter in one hour"); - tokio::time::sleep(std::time::Duration::from_secs(3600)).await; - println!("Resetting download counter"); - resetter_dl_limiter.lock().await.reset().await; - } - }); - - let shared_state = Arc::new(AppState { - s3_client: conf.s3_client, - s3_bucket: conf.s3_bucket, - authz_conf: conf.authz_conf, - base_url: conf.base_url, - dl_limiter, - }); - let app = Router::new() - .route("/batch", post(batch)) - .route("/:oid0/:oid1/:oid", get(obj_download)) - .with_state(shared_state); - - let middleware = axum::middleware::map_request(rewrite_url); - let app_with_middleware = middleware.layer(app); - - let listener = match tokio::net::TcpListener::bind(conf.listen_addr).await { - Ok(listener) => listener, - Err(e) => { - println!("Failed to listen: {e}"); - return ExitCode::FAILURE; - } - }; - - match axum::serve(listener, app_with_middleware.into_make_service()).await { - Ok(_) => ExitCode::SUCCESS, - Err(e) => { - println!("Error serving: {e}"); - ExitCode::FAILURE - } - } -} - -#[derive(Clone)] -struct RepositoryName(String); - -struct RepositoryNameRejection; - -impl IntoResponse for RepositoryNameRejection { - fn into_response(self) -> Response { - (StatusCode::INTERNAL_SERVER_ERROR, "Missing repository name").into_response() - } -} - -#[async_trait] -impl FromRequestParts for RepositoryName { - type Rejection = RepositoryNameRejection; - - async fn from_request_parts(parts: &mut Parts, state: &S) -> Result { - let Ok(Extension(repo_name)) = Extension::::from_request_parts(parts, state).await - else { - return Err(RepositoryNameRejection); - }; - Ok(repo_name) - } -} - -async fn rewrite_url( - mut req: axum::http::Request, -) -> Result, StatusCode> { - let uri = req.uri(); - let original_uri = OriginalUri(uri.clone()); - - let Some(path_and_query) = uri.path_and_query() else { - // L @ no path & query - return Err(StatusCode::BAD_REQUEST); - }; - let Some((repo, path)) = path_and_query.path().split_once("/info/lfs/objects") else { - return Err(StatusCode::NOT_FOUND); - }; - let repo = repo - .trim_start_matches('/') - .trim_end_matches('/') - .to_string(); - if !path.starts_with('/') || !repo.ends_with(".git") { - return Err(StatusCode::NOT_FOUND); - } - - let mut parts = uri.clone().into_parts(); - parts.path_and_query = match path_and_query.query() { - None => path.try_into().ok(), - Some(q) => format!("{path}?{q}").try_into().ok(), - }; - let Ok(new_uri) = Uri::from_parts(parts) else { - return Err(StatusCode::INTERNAL_SERVER_ERROR); - }; - - *req.uri_mut() = new_uri; - req.extensions_mut().insert(original_uri); - req.extensions_mut().insert(RepositoryName(repo)); - - Ok(req) -} - -struct AppState { - s3_client: aws_sdk_s3::Client, - s3_bucket: String, - authz_conf: AuthorizationConfig, - // Should not end with a slash. - base_url: String, - dl_limiter: Arc>, -} - -struct Env { - s3_access_key_id: String, - s3_secret_access_key: String, - s3_bucket: String, - s3_region: String, - s3_endpoint: String, - base_url: String, - key_path: String, - listen_host: String, - listen_port: String, - download_limit: String, - trusted_forwarded_hosts: String, -} - -fn require_env(name: &str) -> Result { - std::env::var(name) - .map_err(|_| format!("environment variable {name} should be defined and valid")) -} - -impl Env { - fn load() -> Result { - Ok(Env { - s3_secret_access_key: require_env("GITOLFS3_S3_SECRET_ACCESS_KEY_FILE")?, - s3_access_key_id: require_env("GITOLFS3_S3_ACCESS_KEY_ID_FILE")?, - s3_region: require_env("GITOLFS3_S3_REGION")?, - s3_endpoint: require_env("GITOLFS3_S3_ENDPOINT")?, - s3_bucket: require_env("GITOLFS3_S3_BUCKET")?, - base_url: require_env("GITOLFS3_BASE_URL")?, - key_path: require_env("GITOLFS3_KEY_PATH")?, - listen_host: require_env("GITOLFS3_LISTEN_HOST")?, - listen_port: require_env("GITOLFS3_LISTEN_PORT")?, - download_limit: require_env("GITOLFS3_DOWNLOAD_LIMIT")?, - trusted_forwarded_hosts: std::env::var("GITOLFS3_TRUSTED_FORWARDED_HOSTS") - .unwrap_or_default(), - }) - } -} - -fn get_s3_client(env: &Env) -> Result { - let access_key_id = std::fs::read_to_string(&env.s3_access_key_id)?; - let secret_access_key = std::fs::read_to_string(&env.s3_secret_access_key)?; - - let credentials = aws_sdk_s3::config::Credentials::new( - access_key_id, - secret_access_key, - None, - None, - "gitolfs3-env", - ); - let config = aws_config::SdkConfig::builder() - .behavior_version(aws_config::BehaviorVersion::latest()) - .region(aws_config::Region::new(env.s3_region.clone())) - .endpoint_url(&env.s3_endpoint) - .credentials_provider(aws_sdk_s3::config::SharedCredentialsProvider::new( - credentials, - )) - .build(); - Ok(aws_sdk_s3::Client::new(&config)) -} - -struct Config { - listen_addr: (String, u16), - base_url: String, - authz_conf: AuthorizationConfig, - s3_client: aws_sdk_s3::Client, - s3_bucket: String, - download_limit: u64, -} - -impl Config { - fn load() -> Result { - let env = match Env::load() { - Ok(env) => env, - Err(e) => return Err(format!("failed to load configuration: {e}")), - }; - - let s3_client = match get_s3_client(&env) { - Ok(s3_client) => s3_client, - Err(e) => return Err(format!("failed to create S3 client: {e}")), - }; - let key = match common::load_key(&env.key_path) { - Ok(key) => key, - Err(e) => return Err(format!("failed to load Gitolfs3 key: {e}")), - }; - - let trusted_forwarded_hosts: HashSet = env - .trusted_forwarded_hosts - .split(',') - .map(|s| s.to_owned()) - .filter(|s| !s.is_empty()) - .collect(); - let base_url = env.base_url.trim_end_matches('/').to_string(); - - let Ok(listen_port): Result = env.listen_port.parse() else { - return Err("configured GITOLFS3_LISTEN_PORT is invalid".to_string()); - }; - let Ok(download_limit): Result = env.download_limit.parse() else { - return Err("configured GITOLFS3_DOWNLOAD_LIMIT is invalid".to_string()); - }; - - Ok(Self { - listen_addr: (env.listen_host, listen_port), - base_url, - authz_conf: AuthorizationConfig { - key, - trusted_forwarded_hosts, - }, - s3_client, - s3_bucket: env.s3_bucket, - download_limit, - }) - } -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Copy)] -enum TransferAdapter { - #[serde(rename = "basic")] - Basic, - #[serde(other)] - Unknown, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Copy)] -enum HashAlgo { - #[serde(rename = "sha256")] - Sha256, - #[serde(other)] - Unknown, -} - -impl Default for HashAlgo { - fn default() -> Self { - Self::Sha256 - } -} - -#[derive(Debug, Deserialize, PartialEq, Eq, Clone)] -struct BatchRequestObject { - oid: common::Oid, - size: i64, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -struct BatchRef { - name: String, -} - -fn default_transfers() -> Vec { - vec![TransferAdapter::Basic] -} - -#[derive(Debug, Deserialize, PartialEq, Eq, Clone)] -struct BatchRequest { - operation: common::Operation, - #[serde(default = "default_transfers")] - transfers: Vec, - objects: Vec, - #[serde(default)] - hash_algo: HashAlgo, -} - -#[derive(Debug, Clone)] -struct GitLfsJson(Json); - -const LFS_MIME: &str = "application/vnd.git-lfs+json"; - -enum GitLfsJsonRejection { - Json(rejection::JsonRejection), - MissingGitLfsJsonContentType, -} - -impl IntoResponse for GitLfsJsonRejection { - fn into_response(self) -> Response { - match self { - Self::Json(rej) => rej.into_response(), - Self::MissingGitLfsJsonContentType => make_error_resp( - StatusCode::UNSUPPORTED_MEDIA_TYPE, - &format!("Expected request with `Content-Type: {LFS_MIME}`"), - ) - .into_response(), - } - } -} - -fn is_git_lfs_json_mimetype(mimetype: &str) -> bool { - let Ok(mime) = mimetype.parse::() else { - return false; - }; - if mime.type_() != mime::APPLICATION - || mime.subtype() != "vnd.git-lfs" - || mime.suffix() != Some(mime::JSON) - { - return false; - } - match mime.get_param(mime::CHARSET) { - Some(mime::UTF_8) | None => true, - Some(_) => false, - } -} - -fn has_git_lfs_json_content_type(req: &Request) -> bool { - let Some(content_type) = req.headers().get(header::CONTENT_TYPE) else { - return false; - }; - let Ok(content_type) = content_type.to_str() else { - return false; - }; - is_git_lfs_json_mimetype(content_type) -} - -#[async_trait] -impl FromRequest for GitLfsJson -where - T: DeserializeOwned, - S: Send + Sync, -{ - type Rejection = GitLfsJsonRejection; - - async fn from_request(req: Request, state: &S) -> Result { - if !has_git_lfs_json_content_type(&req) { - return Err(GitLfsJsonRejection::MissingGitLfsJsonContentType); - } - Json::::from_request(req, state) - .await - .map(GitLfsJson) - .map_err(GitLfsJsonRejection::Json) - } -} - -impl IntoResponse for GitLfsJson { - fn into_response(self) -> Response { - let GitLfsJson(json) = self; - let mut resp = json.into_response(); - resp.headers_mut().insert( - header::CONTENT_TYPE, - HeaderValue::from_static("application/vnd.git-lfs+json; charset=utf-8"), - ); - resp - } -} - -#[derive(Debug, Serialize)] -struct GitLfsErrorData<'a> { - message: &'a str, -} - -type GitLfsErrorResponse<'a> = (StatusCode, GitLfsJson>); - -const fn make_error_resp(code: StatusCode, message: &str) -> GitLfsErrorResponse { - (code, GitLfsJson(Json(GitLfsErrorData { message }))) -} - -#[derive(Debug, Serialize, Clone)] -struct BatchResponseObjectAction { - href: String, - #[serde(skip_serializing_if = "HashMap::is_empty")] - header: HashMap, - expires_at: DateTime, -} - -#[derive(Default, Debug, Serialize, Clone)] -struct BatchResponseObjectActions { - #[serde(skip_serializing_if = "Option::is_none")] - upload: Option, - #[serde(skip_serializing_if = "Option::is_none")] - download: Option, - #[serde(skip_serializing_if = "Option::is_none")] - verify: Option, -} - -#[derive(Debug, Clone, Serialize)] -struct BatchResponseObjectError { - code: u16, - message: String, -} - -#[derive(Debug, Serialize, Clone)] -struct BatchResponseObject { - oid: common::Oid, - size: i64, - #[serde(skip_serializing_if = "Option::is_none")] - authenticated: Option, - actions: BatchResponseObjectActions, - #[serde(skip_serializing_if = "Option::is_none")] - error: Option, -} - -impl BatchResponseObject { - fn error(obj: &BatchRequestObject, code: StatusCode, message: String) -> BatchResponseObject { - BatchResponseObject { - oid: obj.oid, - size: obj.size, - authenticated: None, - actions: Default::default(), - error: Some(BatchResponseObjectError { - code: code.as_u16(), - message, - }), - } - } -} - -#[derive(Debug, Serialize, Clone)] -struct BatchResponse { - transfer: TransferAdapter, - objects: Vec, - hash_algo: HashAlgo, -} - -fn validate_checksum(oid: common::Oid, obj: &HeadObjectOutput) -> bool { - if let Some(checksum) = obj.checksum_sha256() { - if let Ok(checksum) = BASE64_STANDARD.decode(checksum) { - if let Ok(checksum32b) = TryInto::<[u8; 32]>::try_into(checksum) { - return common::Oid::from(checksum32b) == oid; - } - } - } - true -} - -fn validate_size(expected: i64, obj: &HeadObjectOutput) -> bool { - if let Some(length) = obj.content_length() { - return length == expected; - } - true -} - -async fn handle_upload_object( - state: &AppState, - repo: &str, - obj: &BatchRequestObject, -) -> Option { - let (oid0, oid1) = (common::HexByte(obj.oid[0]), common::HexByte(obj.oid[1])); - let full_path = format!("{repo}/lfs/objects/{}/{}/{}", oid0, oid1, obj.oid); - - match state - .s3_client - .head_object() - .bucket(&state.s3_bucket) - .key(full_path.clone()) - .checksum_mode(aws_sdk_s3::types::ChecksumMode::Enabled) - .send() - .await - { - Ok(result) => { - if validate_size(obj.size, &result) && validate_checksum(obj.oid, &result) { - return None; - } - } - Err(SdkError::ServiceError(e)) if e.err().is_not_found() => {} - Err(e) => { - println!("Failed to HeadObject (repo {repo}, OID {}): {e}", obj.oid); - return Some(BatchResponseObject::error( - obj, - StatusCode::INTERNAL_SERVER_ERROR, - "Failed to query object information".to_string(), - )); - } - }; - - let expires_in = std::time::Duration::from_secs(5 * 60); - let expires_at = Utc::now() + expires_in; - - let Ok(config) = aws_sdk_s3::presigning::PresigningConfig::expires_in(expires_in) else { - return Some(BatchResponseObject::error( - obj, - StatusCode::INTERNAL_SERVER_ERROR, - "Failed to generate upload URL".to_string(), - )); - }; - let Ok(presigned) = state - .s3_client - .put_object() - .bucket(&state.s3_bucket) - .key(full_path) - .checksum_sha256(obj.oid.to_string()) - .content_length(obj.size) - .presigned(config) - .await - else { - return Some(BatchResponseObject::error( - obj, - StatusCode::INTERNAL_SERVER_ERROR, - "Failed to generate upload URL".to_string(), - )); - }; - Some(BatchResponseObject { - oid: obj.oid, - size: obj.size, - authenticated: Some(true), - actions: BatchResponseObjectActions { - upload: Some(BatchResponseObjectAction { - header: presigned - .headers() - .map(|(k, v)| (k.to_owned(), v.to_owned())) - .collect(), - expires_at, - href: presigned.uri().to_string(), - }), - ..Default::default() - }, - error: None, - }) -} - -async fn handle_download_object( - state: &AppState, - repo: &str, - obj: &BatchRequestObject, - trusted: bool, -) -> BatchResponseObject { - let (oid0, oid1) = (common::HexByte(obj.oid[0]), common::HexByte(obj.oid[1])); - let full_path = format!("{repo}/lfs/objects/{}/{}/{}", oid0, oid1, obj.oid); - - let result = match state - .s3_client - .head_object() - .bucket(&state.s3_bucket) - .key(&full_path) - .checksum_mode(aws_sdk_s3::types::ChecksumMode::Enabled) - .send() - .await - { - Ok(result) => result, - Err(e) => { - println!("Failed to HeadObject (repo {repo}, OID {}): {e}", obj.oid); - return BatchResponseObject::error( - obj, - StatusCode::INTERNAL_SERVER_ERROR, - "Failed to query object information".to_string(), - ); - } - }; - - // Scaleway actually doesn't provide SHA256 suport, but maybe in the future :) - if !validate_checksum(obj.oid, &result) { - return BatchResponseObject::error( - obj, - StatusCode::UNPROCESSABLE_ENTITY, - "Object corrupted".to_string(), - ); - } - if !validate_size(obj.size, &result) { - return BatchResponseObject::error( - obj, - StatusCode::UNPROCESSABLE_ENTITY, - "Incorrect size specified (or object corrupted)".to_string(), - ); - } - - let expires_in = std::time::Duration::from_secs(5 * 60); - let expires_at = Utc::now() + expires_in; - - if trusted { - let Ok(config) = aws_sdk_s3::presigning::PresigningConfig::expires_in(expires_in) else { - return BatchResponseObject::error( - obj, - StatusCode::INTERNAL_SERVER_ERROR, - "Failed to generate upload URL".to_string(), - ); - }; - let Ok(presigned) = state - .s3_client - .get_object() - .bucket(&state.s3_bucket) - .key(full_path) - .presigned(config) - .await - else { - return BatchResponseObject::error( - obj, - StatusCode::INTERNAL_SERVER_ERROR, - "Failed to generate upload URL".to_string(), - ); - }; - return BatchResponseObject { - oid: obj.oid, - size: obj.size, - authenticated: Some(true), - actions: BatchResponseObjectActions { - download: Some(BatchResponseObjectAction { - header: presigned - .headers() - .map(|(k, v)| (k.to_owned(), v.to_owned())) - .collect(), - expires_at, - href: presigned.uri().to_string(), - }), - ..Default::default() - }, - error: None, - }; - } - - if let Some(content_length) = result.content_length() { - if content_length > 0 { - match state - .dl_limiter - .lock() - .await - .request(content_length as u64) - .await - { - Ok(true) => {} - Ok(false) => { - return BatchResponseObject::error( - obj, - StatusCode::SERVICE_UNAVAILABLE, - "Public LFS downloads temporarily unavailable".to_string(), - ); - } - Err(e) => { - println!("Failed to request {content_length} bytes from download limiter: {e}"); - return BatchResponseObject::error( - obj, - StatusCode::INTERNAL_SERVER_ERROR, - "Internal server error".to_string(), - ); - } - } - } - } - - let Some(tag) = common::generate_tag( - common::Claims { - specific_claims: common::SpecificClaims::Download(obj.oid), - repo_path: repo, - expires_at, - }, - &state.authz_conf.key, - ) else { - return BatchResponseObject::error( - obj, - StatusCode::INTERNAL_SERVER_ERROR, - "Internal server error".to_string(), - ); - }; - - let upload_path = format!( - "{repo}/info/lfs/objects/{}/{}/{}", - common::HexByte(obj.oid[0]), - common::HexByte(obj.oid[1]), - obj.oid, - ); - - BatchResponseObject { - oid: obj.oid, - size: obj.size, - authenticated: Some(true), - actions: BatchResponseObjectActions { - download: Some(BatchResponseObjectAction { - header: { - let mut map = HashMap::new(); - map.insert( - "Authorization".to_string(), - format!("Gitolfs3-Hmac-Sha256 {tag} {}", expires_at.timestamp()), - ); - map - }, - expires_at, - href: format!("{}/{upload_path}", state.base_url), - }), - ..Default::default() - }, - error: None, - } -} - -struct AuthorizationConfig { - trusted_forwarded_hosts: HashSet, - key: common::Key, -} - -struct Trusted(bool); - -fn forwarded_from_trusted_host( - headers: &HeaderMap, - trusted: &HashSet, -) -> Result> { - if let Some(forwarded_host) = headers.get("X-Forwarded-Host") { - if let Ok(forwarded_host) = forwarded_host.to_str() { - if trusted.contains(forwarded_host) { - return Ok(true); - } - } else { - return Err(make_error_resp( - StatusCode::NOT_FOUND, - "Invalid X-Forwarded-Host header", - )); - } - } - Ok(false) -} - -const REPO_NOT_FOUND: GitLfsErrorResponse = - make_error_resp(StatusCode::NOT_FOUND, "Repository not found"); - -fn authorize_batch( - conf: &AuthorizationConfig, - repo_path: &str, - public: bool, - operation: common::Operation, - headers: &HeaderMap, -) -> Result> { - // - No authentication required for downloading exported repos - // - When authenticated: - // - Download / upload over presigned URLs - // - When accessing over Tailscale: - // - No authentication required for downloading from any repo - - let claims = VerifyClaimsInput { - specific_claims: common::SpecificClaims::BatchApi(operation), - repo_path, - }; - if !verify_claims(conf, &claims, headers)? { - return authorize_batch_unauthenticated(conf, public, operation, headers); - } - Ok(Trusted(true)) -} - -fn authorize_batch_unauthenticated( - conf: &AuthorizationConfig, - public: bool, - operation: common::Operation, - headers: &HeaderMap, -) -> Result> { - let trusted = forwarded_from_trusted_host(headers, &conf.trusted_forwarded_hosts)?; - match operation { - common::Operation::Upload => { - // Trusted users can clone all repositories (by virtue of accessing the server via a - // trusted network). However, they can not push without proper authentication. Untrusted - // users who are also not authenticated should not need to know which repositories exists. - // Therefore, we tell untrusted && unauthenticated users that the repo doesn't exist, but - // tell trusted users that they need to authenticate. - if !trusted { - return Err(REPO_NOT_FOUND); - } - Err(make_error_resp( - StatusCode::FORBIDDEN, - "Authentication required to upload", - )) - } - common::Operation::Download => { - // Again, trusted users can see all repos. For untrusted users, we first need to check - // whether the repo is public before we authorize. If the user is untrusted and the - // repo isn't public, we just act like it doesn't even exist. - if !trusted { - if !public { - return Err(REPO_NOT_FOUND); - } - return Ok(Trusted(false)); - } - Ok(Trusted(true)) - } - } -} - -fn repo_exists(name: &str) -> bool { - let Ok(metadata) = std::fs::metadata(name) else { - return false; - }; - metadata.is_dir() -} - -fn is_repo_public(name: &str) -> Option { - if !repo_exists(name) { - return None; - } - match std::fs::metadata(format!("{name}/git-daemon-export-ok")) { - Ok(metadata) if metadata.is_file() => Some(true), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => Some(false), - _ => None, - } -} - -async fn batch( - State(state): State>, - headers: HeaderMap, - RepositoryName(repo): RepositoryName, - GitLfsJson(Json(payload)): GitLfsJson, -) -> Response { - let Some(public) = is_repo_public(&repo) else { - return REPO_NOT_FOUND.into_response(); - }; - let Trusted(trusted) = match authorize_batch( - &state.authz_conf, - &repo, - public, - payload.operation, - &headers, - ) { - Ok(authn) => authn, - Err(e) => return e.into_response(), - }; - - if !headers - .get_all("Accept") - .iter() - .filter_map(|v| v.to_str().ok()) - .any(is_git_lfs_json_mimetype) - { - let message = format!("Expected `{LFS_MIME}` in list of acceptable response media types"); - return make_error_resp(StatusCode::NOT_ACCEPTABLE, &message).into_response(); - } - - if payload.hash_algo != HashAlgo::Sha256 { - let message = "Unsupported hashing algorithm specified"; - return make_error_resp(StatusCode::CONFLICT, message).into_response(); - } - if !payload.transfers.is_empty() && !payload.transfers.contains(&TransferAdapter::Basic) { - let message = "Unsupported transfer adapter specified (supported: basic)"; - return make_error_resp(StatusCode::CONFLICT, message).into_response(); - } - - let mut resp = BatchResponse { - transfer: TransferAdapter::Basic, - objects: vec![], - hash_algo: HashAlgo::Sha256, - }; - for obj in payload.objects { - match payload.operation { - common::Operation::Download => resp - .objects - .push(handle_download_object(&state, &repo, &obj, trusted).await), - common::Operation::Upload => { - if let Some(obj_resp) = handle_upload_object(&state, &repo, &obj).await { - resp.objects.push(obj_resp); - } - } - }; - } - GitLfsJson(Json(resp)).into_response() -} - -#[derive(Deserialize, Copy, Clone)] -#[serde(remote = "Self")] -struct FileParams { - oid0: common::HexByte, - oid1: common::HexByte, - oid: common::Oid, -} - -impl<'de> Deserialize<'de> for FileParams { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let unchecked @ FileParams { - oid0: common::HexByte(oid0), - oid1: common::HexByte(oid1), - oid, - } = FileParams::deserialize(deserializer)?; - if oid0 != oid.as_bytes()[0] { - return Err(de::Error::custom( - "first OID path part does not match first byte of full OID", - )); - } - if oid1 != oid.as_bytes()[1] { - return Err(de::Error::custom( - "second OID path part does not match first byte of full OID", - )); - } - Ok(unchecked) - } -} - -pub struct VerifyClaimsInput<'a> { - pub specific_claims: common::SpecificClaims, - pub repo_path: &'a str, -} - -fn verify_claims( - conf: &AuthorizationConfig, - claims: &VerifyClaimsInput, - headers: &HeaderMap, -) -> Result> { - const INVALID_AUTHZ_HEADER: GitLfsErrorResponse = - make_error_resp(StatusCode::BAD_REQUEST, "Invalid authorization header"); - - let Some(authz) = headers.get(header::AUTHORIZATION) else { - return Ok(false); - }; - let authz = authz.to_str().map_err(|_| INVALID_AUTHZ_HEADER)?; - let val = authz - .strip_prefix("Gitolfs3-Hmac-Sha256 ") - .ok_or(INVALID_AUTHZ_HEADER)?; - let (tag, expires_at) = val.split_once(' ').ok_or(INVALID_AUTHZ_HEADER)?; - let tag: common::Digest<32> = tag.parse().map_err(|_| INVALID_AUTHZ_HEADER)?; - let expires_at: i64 = expires_at.parse().map_err(|_| INVALID_AUTHZ_HEADER)?; - let expires_at = DateTime::::from_timestamp(expires_at, 0).ok_or(INVALID_AUTHZ_HEADER)?; - let expected_tag = common::generate_tag( - common::Claims { - specific_claims: claims.specific_claims, - repo_path: claims.repo_path, - expires_at, - }, - &conf.key, - ) - .ok_or_else(|| make_error_resp(StatusCode::INTERNAL_SERVER_ERROR, "Internal server error"))?; - if tag != expected_tag { - return Err(INVALID_AUTHZ_HEADER); - } - - Ok(true) -} - -fn authorize_get( - conf: &AuthorizationConfig, - repo_path: &str, - oid: common::Oid, - headers: &HeaderMap, -) -> Result<(), GitLfsErrorResponse<'static>> { - let claims = VerifyClaimsInput { - specific_claims: common::SpecificClaims::Download(oid), - repo_path, - }; - if !verify_claims(conf, &claims, headers)? { - return Err(make_error_resp( - StatusCode::UNAUTHORIZED, - "Repository not found", - )); - } - Ok(()) -} - -async fn obj_download( - State(state): State>, - headers: HeaderMap, - RepositoryName(repo): RepositoryName, - Path(FileParams { oid0, oid1, oid }): Path, -) -> Response { - if let Err(e) = authorize_get(&state.authz_conf, &repo, oid, &headers) { - return e.into_response(); - } - - let full_path = format!("{repo}/lfs/objects/{}/{}/{}", oid0, oid1, oid); - let result = match state - .s3_client - .get_object() - .bucket(&state.s3_bucket) - .key(full_path) - .checksum_mode(aws_sdk_s3::types::ChecksumMode::Enabled) - .send() - .await - { - Ok(result) => result, - Err(e) => { - println!("Failed to GetObject (repo {repo}, OID {oid}): {e}"); - return ( - StatusCode::INTERNAL_SERVER_ERROR, - "Failed to query object information", - ) - .into_response(); - } - }; - - let mut headers = header::HeaderMap::new(); - if let Some(content_type) = result.content_type { - let Ok(header_value) = content_type.try_into() else { - return ( - StatusCode::INTERNAL_SERVER_ERROR, - "Object has invalid content type", - ) - .into_response(); - }; - headers.insert(header::CONTENT_TYPE, header_value); - } - if let Some(content_length) = result.content_length { - headers.insert(header::CONTENT_LENGTH, content_length.into()); - } - - let async_read = result.body.into_async_read(); - let stream = tokio_util::io::ReaderStream::new(async_read); - let body = axum::body::Body::from_stream(stream); - - (headers, body).into_response() -} - -struct DownloadLimiter { - current: u64, - limit: u64, -} - -impl DownloadLimiter { - async fn new(limit: u64) -> DownloadLimiter { - let dlimit_str = match tokio::fs::read_to_string(".gitolfs3-dlimit").await { - Ok(dlimit_str) => dlimit_str, - Err(e) => { - println!("Failed to read download counter, assuming 0: {e}"); - return DownloadLimiter { current: 0, limit }; - } - }; - let current: u64 = match dlimit_str - .parse() - .map_err(|e| tokio::io::Error::new(tokio::io::ErrorKind::InvalidData, e)) - { - Ok(current) => current, - Err(e) => { - println!("Failed to read download counter, assuming 0: {e}"); - return DownloadLimiter { current: 0, limit }; - } - }; - DownloadLimiter { current, limit } - } - - async fn request(&mut self, n: u64) -> tokio::io::Result { - if self.current + n > self.limit { - return Ok(false); - } - self.current += n; - self.write_new_count().await?; - Ok(true) - } - - async fn reset(&mut self) { - self.current = 0; - if let Err(e) = self.write_new_count().await { - println!("Failed to reset download counter: {e}"); - } - } - - async fn write_new_count(&self) -> tokio::io::Result<()> { - let cwd = tokio::fs::File::open(std::env::current_dir()?).await?; - let mut file = tokio::fs::File::create(".gitolfs3-dlimit.tmp").await?; - file.write_all(self.current.to_string().as_bytes()).await?; - file.sync_all().await?; - tokio::fs::rename(".gitolfs3-dlimit.tmp", ".gitolfs3-dlimit").await?; - cwd.sync_all().await - } -} - -#[test] -fn test_mimetype() { - assert!(is_git_lfs_json_mimetype("application/vnd.git-lfs+json")); - assert!(!is_git_lfs_json_mimetype("application/vnd.git-lfs")); - assert!(!is_git_lfs_json_mimetype("application/json")); - assert!(is_git_lfs_json_mimetype( - "application/vnd.git-lfs+json; charset=utf-8" - )); - assert!(is_git_lfs_json_mimetype( - "application/vnd.git-lfs+json; charset=UTF-8" - )); - assert!(!is_git_lfs_json_mimetype( - "application/vnd.git-lfs+json; charset=ISO-8859-1" - )); -} - -#[test] -fn test_deserialize() { - let json = r#"{"operation":"upload","objects":[{"oid":"8f4123f9a7181f488c5e111d82cefd992e461ae5df01fd2254399e6e670b2d3c","size":170904}], - "transfers":["lfs-standalone-file","basic","ssh"],"ref":{"name":"refs/heads/main"},"hash_algo":"sha256"}"#; - let expected = BatchRequest { - operation: common::Operation::Upload, - objects: vec![BatchRequestObject { - oid: "8f4123f9a7181f488c5e111d82cefd992e461ae5df01fd2254399e6e670b2d3c" - .parse() - .unwrap(), - size: 170904, - }], - transfers: vec![ - TransferAdapter::Unknown, - TransferAdapter::Basic, - TransferAdapter::Unknown, - ], - hash_algo: HashAlgo::Sha256, - }; - assert_eq!( - serde_json::from_str::(json).unwrap(), - expected - ); -} - -#[test] -fn test_validate_claims() { - let key = "00232f7a019bd34e3921ee6c5f04caf48a4489d1be5d1999038950a7054e0bfea369ce2becc0f13fd3c69f8af2384a25b7ac2d52eb52c33722f3c00c50d4c9c2"; - let key: common::Key = key.parse().unwrap(); - - let claims = common::Claims { - expires_at: Utc::now() + std::time::Duration::from_secs(5 * 60), - repo_path: "lfs-test.git", - specific_claims: common::SpecificClaims::BatchApi(common::Operation::Download), - }; - let tag = common::generate_tag(claims, &key).unwrap(); - let header_value = format!( - "Gitolfs3-Hmac-Sha256 {tag} {}", - claims.expires_at.timestamp() - ); - - let conf = AuthorizationConfig { - key, - trusted_forwarded_hosts: HashSet::new(), - }; - let verification_claims = VerifyClaimsInput { - repo_path: claims.repo_path, - specific_claims: claims.specific_claims, - }; - let mut headers = HeaderMap::new(); - headers.insert(header::AUTHORIZATION, header_value.try_into().unwrap()); - - assert!(verify_claims(&conf, &verification_claims, &headers).unwrap()); -} diff --git a/shell/Cargo.toml b/shell/Cargo.toml deleted file mode 100644 index 0dcb6d6..0000000 --- a/shell/Cargo.toml +++ /dev/null @@ -1,6 +0,0 @@ -[package] -name = "shell" -version = "0.1.0" -edition = "2021" - -[dependencies] diff --git a/shell/src/main.rs b/shell/src/main.rs deleted file mode 100644 index 4a98828..0000000 --- a/shell/src/main.rs +++ /dev/null @@ -1,143 +0,0 @@ -use std::{os::unix::process::CommandExt, process::ExitCode}; - -fn main() -> ExitCode { - let bad_usage = ExitCode::from(2); - - let mut args = std::env::args().skip(1); - if args.next() != Some("-c".to_string()) { - eprintln!("Expected usage: shell -c "); - return bad_usage; - } - let Some(cmd) = args.next() else { - eprintln!("Missing argument for argument '-c'"); - return bad_usage; - }; - if args.next().is_some() { - eprintln!("Too many arguments passed"); - return bad_usage; - } - - let Some(mut cmd) = parse_cmd(&cmd) else { - eprintln!("Bad command"); - return bad_usage; - }; - - let Some(mut program) = cmd.drain(0..1).next() else { - eprintln!("Bad command"); - return bad_usage; - }; - if program == "git" { - let Some(subcommand) = cmd.drain(0..1).next() else { - eprintln!("Bad command"); - return bad_usage; - }; - program.push('-'); - program.push_str(&subcommand); - } - - let mut args = Vec::new(); - - let git_cmds = ["git-receive-pack", "git-upload-archive", "git-upload-pack"]; - if git_cmds.contains(&program.as_str()) { - if cmd.len() != 1 { - eprintln!("Bad command"); - return bad_usage; - } - let repository = cmd[0].trim_start_matches('/'); - args.push(repository); - } else if program == "git-lfs-authenticate" { - if cmd.len() != 2 { - eprintln!("Bad command"); - return bad_usage; - } - let repository = cmd[0].trim_start_matches('/'); - args.push(repository); - args.push(&cmd[1]); - } else { - eprintln!("Unknown command"); - return bad_usage; - } - - let e = std::process::Command::new(program).args(args).exec(); - eprintln!("Error: {e}"); - ExitCode::FAILURE -} - -fn parse_cmd(mut cmd: &str) -> Option> { - let mut args = Vec::::new(); - - cmd = cmd.trim_matches(is_posix_space); - while !cmd.is_empty() { - if cmd.starts_with('\'') { - let (arg, remaining) = parse_sq(cmd)?; - args.push(arg); - cmd = remaining.trim_start_matches(is_posix_space); - } else if let Some((arg, remaining)) = cmd.split_once(is_posix_space) { - args.push(arg.to_owned()); - cmd = remaining.trim_start_matches(is_posix_space); - } else { - args.push(cmd.to_owned()); - cmd = ""; - } - } - - Some(args) -} - -fn is_posix_space(c: char) -> bool { - // Form feed: 0x0c - // Vertical tab: 0x0b - c == ' ' || c == '\x0c' || c == '\n' || c == '\r' || c == '\t' || c == '\x0b' -} - -fn parse_sq(s: &str) -> Option<(String, &str)> { - #[derive(PartialEq, Eq)] - enum SqState { - Quoted, - Unquoted { may_escape: bool }, - UnquotedEscaped, - } - - let mut result = String::new(); - let mut state = SqState::Unquoted { may_escape: false }; - let mut remaining = ""; - for (i, c) in s.char_indices() { - match state { - SqState::Unquoted { may_escape: false } => { - if c != '\'' { - return None; - } - state = SqState::Quoted - } - SqState::Quoted => { - if c == '\'' { - state = SqState::Unquoted { may_escape: true }; - continue; - } - result.push(c); - } - SqState::Unquoted { may_escape: true } => { - if is_posix_space(c) { - remaining = &s[i..]; - break; - } - if c != '\\' { - return None; - } - state = SqState::UnquotedEscaped; - } - SqState::UnquotedEscaped => { - if c != '\\' && c != '!' { - return None; - } - result.push(c); - state = SqState::Unquoted { may_escape: false }; - } - } - } - - if state != (SqState::Unquoted { may_escape: true }) { - return None; - } - Some((result, remaining)) -} -- cgit v1.2.3