diff options
author | Rutger Broekhoff | 2023-12-29 20:29:22 +0100 |
---|---|---|
committer | Rutger Broekhoff | 2023-12-29 20:29:22 +0100 |
commit | 77f852e06d3be7cb558be73d6edc98d30cb52d65 (patch) | |
tree | d217dc9faf144cc188340c9ce6324fd3f862e765 /cmd | |
parent | 0b2fb488e1ccb877f16bad2f413494824f43d9c8 (diff) | |
download | gitolfs3-77f852e06d3be7cb558be73d6edc98d30cb52d65.tar.gz gitolfs3-77f852e06d3be7cb558be73d6edc98d30cb52d65.zip |
Write basic read-only public Git LFS server
The 'integration' with Gitolite is honestly pretty bad and should not be
taken very seriously: it runs the 'gitolite access' command to check if
some user (e.g., daemon/nobody) should be able to read from the
repository. Based on this, it grants access to objects stored in S3, by
generating Presigned GetObject URLs using the S3 API.
Of course, this integration with Gitolite (especially when using the
daemon user to check if the user should be able to read) is not very
'high-value':
1. If we already make use of the daemon pseudo-user to control access to
public repositories, we may as well check for the existence of
git-daemon-export-ok files. In case they exist, we simply assume that
the repository is meant to be shown on the public internet and that
therefore the LFS archive should also be considered 'open to the
public'.
2. The way that Gitolite commands are currently run, this program breaks
when not running under the git user without extra configuration;
Gitolite decides where repositories are based on the HOME environment
variable. This program currently does not set this. This could be set
by the CGI server (or fcgiwrap) and would unbreak the system.
There's no support for any more advanced kind of authn/authz. Uploading
is also not supported yet. That's still to come.
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/git-lfs-server/main.go | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/cmd/git-lfs-server/main.go b/cmd/git-lfs-server/main.go index 06ab7d0..c5b47a3 100644 --- a/cmd/git-lfs-server/main.go +++ b/cmd/git-lfs-server/main.go | |||
@@ -1 +1,330 @@ | |||
1 | package main | 1 | package main |
2 | |||
3 | import ( | ||
4 | "context" | ||
5 | "encoding/json" | ||
6 | "errors" | ||
7 | "fmt" | ||
8 | "net/http" | ||
9 | "net/http/cgi" | ||
10 | "net/url" | ||
11 | "os" | ||
12 | "os/exec" | ||
13 | "path" | ||
14 | "regexp" | ||
15 | "slices" | ||
16 | "strconv" | ||
17 | "strings" | ||
18 | "time" | ||
19 | "unicode" | ||
20 | |||
21 | "github.com/minio/minio-go/v7" | ||
22 | "github.com/minio/minio-go/v7/pkg/credentials" | ||
23 | ) | ||
24 | |||
25 | type operation string | ||
26 | type transferAdapter string | ||
27 | type hashAlgo string | ||
28 | |||
29 | const ( | ||
30 | operationDownload operation = "download" | ||
31 | operationUpload operation = "upload" | ||
32 | transferAdapterBasic transferAdapter = "basic" | ||
33 | hashAlgoSHA256 hashAlgo = "sha256" | ||
34 | ) | ||
35 | |||
36 | const lfsMIME = "application/vnd.git-lfs+json" | ||
37 | |||
38 | type batchRef struct { | ||
39 | Name string `json:"name"` | ||
40 | } | ||
41 | |||
42 | type batchRequestObject struct { | ||
43 | OID string `json:"oid"` | ||
44 | Size uint64 `json:"size"` | ||
45 | } | ||
46 | |||
47 | type batchRequest struct { | ||
48 | Operation operation `json:"operation"` | ||
49 | Transfers []transferAdapter `json:"transfers,omitempty"` | ||
50 | Ref *batchRef `json:"ref,omitempty"` | ||
51 | Objects []batchRequestObject `json:"objects"` | ||
52 | HashAlgo hashAlgo `json:"hash_algo,omitempty"` | ||
53 | } | ||
54 | |||
55 | type RFC3339SecondsTime time.Time | ||
56 | |||
57 | func (t RFC3339SecondsTime) MarshalJSON() ([]byte, error) { | ||
58 | b := make([]byte, 0, len(time.RFC3339)+len(`""`)) | ||
59 | b = append(b, '"') | ||
60 | b = time.Time(t).AppendFormat(b, time.RFC3339) | ||
61 | b = append(b, '"') | ||
62 | return b, nil | ||
63 | } | ||
64 | |||
65 | type SecondDuration time.Duration | ||
66 | |||
67 | func (d SecondDuration) MarshalJSON() ([]byte, error) { | ||
68 | var b []byte | ||
69 | b = strconv.AppendInt(b, int64(time.Duration(d).Seconds()), 10) | ||
70 | return b, nil | ||
71 | } | ||
72 | |||
73 | type batchAction struct { | ||
74 | HRef *url.URL `json:"href"` | ||
75 | Header map[string]string `json:"header,omitempty"` | ||
76 | ExpiresIn *SecondDuration `json:"expires_in,omitempty"` | ||
77 | ExpiresAt *RFC3339SecondsTime `json:"expires_at,omitempty"` | ||
78 | } | ||
79 | |||
80 | type batchError struct { | ||
81 | Code int `json:"code"` | ||
82 | Message string `json:"message"` | ||
83 | } | ||
84 | |||
85 | type batchResponseObject struct { | ||
86 | OID string `json:"oid"` | ||
87 | Size uint64 `json:"size"` | ||
88 | Authenticated *bool `json:"authenticated"` | ||
89 | Actions map[operation]batchAction `json:"actions,omitempty"` | ||
90 | Error *batchError `json:"error,omitempty"` | ||
91 | } | ||
92 | |||
93 | type batchResponse struct { | ||
94 | Transfer transferAdapter `json:"transfer,omitempty"` | ||
95 | Objects []batchResponseObject `json:"objects"` | ||
96 | HashAlgo hashAlgo `json:"hash_algo,omitempty"` | ||
97 | } | ||
98 | |||
99 | var re = regexp.MustCompile(`^([a-zA-Z0-9-_/]+)\.git/info/lfs/objects/batch$`) | ||
100 | |||
101 | type handler struct { | ||
102 | mc *minio.Client | ||
103 | bucket string | ||
104 | anonUser string | ||
105 | } | ||
106 | |||
107 | // Requires lowercase hash | ||
108 | func isValidSHA256Hash(hash string) bool { | ||
109 | if len(hash) != 64 { | ||
110 | return false | ||
111 | } | ||
112 | for _, c := range hash { | ||
113 | if !unicode.Is(unicode.ASCII_Hex_Digit, c) { | ||
114 | return false | ||
115 | } | ||
116 | } | ||
117 | return true | ||
118 | } | ||
119 | |||
120 | type lfsError struct { | ||
121 | Message string `json:"message"` | ||
122 | DocumentationURL string `json:"documentation_url,omitempty"` | ||
123 | RequestID string `json:"request_id,omitempty"` | ||
124 | } | ||
125 | |||
126 | func makeRespError(w http.ResponseWriter, message string, code int) { | ||
127 | w.Header().Set("Content-Type", lfsMIME) | ||
128 | w.WriteHeader(code) | ||
129 | json.NewEncoder(w).Encode(lfsError{Message: message}) | ||
130 | } | ||
131 | |||
132 | func makeObjError(obj parsedBatchObject, message string, code int) batchResponseObject { | ||
133 | return batchResponseObject{ | ||
134 | OID: obj.fullHash, | ||
135 | Size: obj.size, | ||
136 | Error: &batchError{ | ||
137 | Message: message, | ||
138 | Code: code, | ||
139 | }, | ||
140 | } | ||
141 | } | ||
142 | |||
143 | func (h *handler) handleDownloadObject(ctx context.Context, repo string, obj parsedBatchObject) batchResponseObject { | ||
144 | fullPath := path.Join(repo, obj.firstByte, obj.secondByte, obj.fullHash) | ||
145 | expiresIn := time.Hour * 24 | ||
146 | expiresInSeconds := SecondDuration(expiresIn) | ||
147 | |||
148 | info, err := h.mc.StatObject(ctx, h.bucket, fullPath, minio.StatObjectOptions{Checksum: true}) | ||
149 | if err != nil { | ||
150 | var resp minio.ErrorResponse | ||
151 | if errors.As(err, &resp) && resp.StatusCode == http.StatusNotFound { | ||
152 | return makeObjError(obj, "Object does not exist", http.StatusNotFound) | ||
153 | } | ||
154 | // TODO: consider not making this an object-specific, but rather a | ||
155 | // generic error such that the entire Batch API request fails. | ||
156 | return makeObjError(obj, "Failed to query object information", http.StatusInternalServerError) | ||
157 | } | ||
158 | if info.ChecksumSHA256 != "" && strings.ToLower(info.ChecksumSHA256) != obj.fullHash { | ||
159 | return makeObjError(obj, "Corrupted file", http.StatusUnprocessableEntity) | ||
160 | } | ||
161 | if uint64(info.Size) != obj.size { | ||
162 | return makeObjError(obj, "Incorrect size specified for object", http.StatusUnprocessableEntity) | ||
163 | } | ||
164 | |||
165 | presigned, err := h.mc.PresignedGetObject(ctx, h.bucket, fullPath, expiresIn, url.Values{}) | ||
166 | if err != nil { | ||
167 | // TODO: consider not making this an object-specific, but rather a | ||
168 | // generic error such that the entire Batch API request fails. | ||
169 | return makeObjError(obj, "Failed to generate action href", http.StatusInternalServerError) | ||
170 | } | ||
171 | |||
172 | authenticated := true | ||
173 | return batchResponseObject{ | ||
174 | OID: obj.fullHash, | ||
175 | Size: obj.size, | ||
176 | Authenticated: &authenticated, | ||
177 | Actions: map[operation]batchAction{ | ||
178 | operationDownload: { | ||
179 | HRef: presigned, | ||
180 | ExpiresIn: &expiresInSeconds, | ||
181 | }, | ||
182 | }, | ||
183 | } | ||
184 | } | ||
185 | |||
186 | type parsedBatchObject struct { | ||
187 | firstByte string | ||
188 | secondByte string | ||
189 | fullHash string | ||
190 | size uint64 | ||
191 | } | ||
192 | |||
193 | func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { | ||
194 | submatches := re.FindStringSubmatch(r.URL.Path) | ||
195 | if len(submatches) != 1 { | ||
196 | makeRespError(w, "Not found", http.StatusNotFound) | ||
197 | return | ||
198 | } | ||
199 | repo := strings.TrimPrefix("/", path.Clean(submatches[0])) | ||
200 | |||
201 | if !slices.Contains(r.Header.Values("Accept"), lfsMIME) { | ||
202 | makeRespError(w, "Expected "+lfsMIME+" in list of acceptable response media types", http.StatusNotAcceptable) | ||
203 | return | ||
204 | } | ||
205 | if r.Header.Get("Content-Type") != lfsMIME { | ||
206 | makeRespError(w, "Expected request Content-Type to be "+lfsMIME, http.StatusUnsupportedMediaType) | ||
207 | return | ||
208 | } | ||
209 | |||
210 | var body batchRequest | ||
211 | if err := json.NewDecoder(r.Body).Decode(&body); err != nil { | ||
212 | makeRespError(w, "Failed to parse request body as JSON", http.StatusBadRequest) | ||
213 | return | ||
214 | } | ||
215 | |||
216 | if body.HashAlgo != hashAlgoSHA256 { | ||
217 | makeRespError(w, "Unsupported hash algorithm specified", http.StatusConflict) | ||
218 | return | ||
219 | } | ||
220 | |||
221 | // TODO: handle authentication | ||
222 | // right now, we're just trying to make everything publically accessible | ||
223 | if body.Operation == operationUpload { | ||
224 | makeRespError(w, "Upload operations are currently not supported", http.StatusForbidden) | ||
225 | return | ||
226 | } | ||
227 | |||
228 | if len(body.Transfers) != 0 && !slices.Contains(body.Transfers, transferAdapterBasic) { | ||
229 | makeRespError(w, "Unsupported transfer adapter specified (supported: basic)", http.StatusConflict) | ||
230 | return | ||
231 | } | ||
232 | |||
233 | gitoliteArgs := []string{"access", "-q", repo, h.anonUser, "R"} | ||
234 | if body.Ref != nil && body.Ref.Name != "" { | ||
235 | gitoliteArgs = append(gitoliteArgs, body.Ref.Name) | ||
236 | } | ||
237 | cmd := exec.Command("gitolite", gitoliteArgs...) | ||
238 | err := cmd.Run() | ||
239 | permGranted := err == nil | ||
240 | var exitErr *exec.ExitError | ||
241 | if err != nil && !errors.As(err, &exitErr) { | ||
242 | makeRespError(w, "Failed to query access information", http.StatusInternalServerError) | ||
243 | return | ||
244 | } | ||
245 | if !permGranted { | ||
246 | // TODO: when handling authorization, make sure to return 403 Forbidden | ||
247 | // here when the user *does* have read permissions, but is not allowed | ||
248 | // to write when requesting an upload operation. | ||
249 | makeRespError(w, "Repository not found", http.StatusNotFound) | ||
250 | return | ||
251 | } | ||
252 | |||
253 | var objects []parsedBatchObject | ||
254 | for _, obj := range body.Objects { | ||
255 | oid := strings.ToLower(obj.OID) | ||
256 | if !isValidSHA256Hash(oid) { | ||
257 | makeRespError(w, "Invalid hash format in object ID", http.StatusBadRequest) | ||
258 | return | ||
259 | } | ||
260 | objects = append(objects, parsedBatchObject{ | ||
261 | firstByte: oid[:2], | ||
262 | secondByte: oid[2:4], | ||
263 | fullHash: oid, | ||
264 | size: obj.Size, | ||
265 | }) | ||
266 | } | ||
267 | |||
268 | resp := batchResponse{ | ||
269 | Transfer: transferAdapterBasic, | ||
270 | HashAlgo: hashAlgoSHA256, | ||
271 | } | ||
272 | for _, obj := range objects { | ||
273 | resp.Objects = append(resp.Objects, h.handleDownloadObject(r.Context(), repo, obj)) | ||
274 | } | ||
275 | |||
276 | w.Header().Set("Content-Type", lfsMIME) | ||
277 | w.WriteHeader(http.StatusOK) | ||
278 | json.NewEncoder(w).Encode(resp) | ||
279 | } | ||
280 | |||
281 | func die(msg string, args ...any) { | ||
282 | fmt.Fprint(os.Stderr, "Error: ") | ||
283 | fmt.Fprintf(os.Stderr, msg, args...) | ||
284 | fmt.Fprint(os.Stderr, "\n") | ||
285 | os.Exit(1) | ||
286 | } | ||
287 | |||
288 | func main() { | ||
289 | endpoint := os.Getenv("S3_ENDPOINT") | ||
290 | accessKeyID := os.Getenv("S3_ACCESS_KEY_ID") | ||
291 | secretAccessKey := os.Getenv("S3_SECRET_ACCESS_KEY") | ||
292 | bucket := os.Getenv("S3_BUCKET") | ||
293 | anonUser := os.Getenv("ANON_USER") | ||
294 | |||
295 | if endpoint == "" { | ||
296 | die("Expected environment variable S3_ENDPOINT to be set") | ||
297 | } | ||
298 | if accessKeyID == "" { | ||
299 | die("Expected environment variable S3_ACCESS_KEY_ID to be set") | ||
300 | } | ||
301 | if secretAccessKey == "" { | ||
302 | die("Expected environment variable S3_SECRET_ACCESS_KEY to be set") | ||
303 | } | ||
304 | if bucket == "" { | ||
305 | die("Expected environment variable S3_BUCKET to be set") | ||
306 | } | ||
307 | if anonUser == "" { | ||
308 | die("Expected environment variable ANON_USER to be set") | ||
309 | } | ||
310 | |||
311 | mc, err := minio.New(endpoint, &minio.Options{ | ||
312 | Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""), | ||
313 | Secure: true, | ||
314 | }) | ||
315 | if err != nil { | ||
316 | die("Failed to create S3 client") | ||
317 | } | ||
318 | |||
319 | if err = cgi.Serve(&handler{mc, bucket, anonUser}); err != nil { | ||
320 | die("Failed to serve CGI: %s", err) | ||
321 | } | ||
322 | } | ||
323 | |||
324 | // Directory stucture: | ||
325 | // - lfs/ | ||
326 | // - locks/ | ||
327 | // - objects/ | ||
328 | // - <1st OID byte> | ||
329 | // - <2nd OID byte> | ||
330 | // - <OID hash> <- this is the object | ||