aboutsummaryrefslogtreecommitdiffstats
path: root/cmd
diff options
context:
space:
mode:
authorLibravatar Rutger Broekhoff2023-12-29 20:29:22 +0100
committerLibravatar Rutger Broekhoff2023-12-29 20:29:22 +0100
commit77f852e06d3be7cb558be73d6edc98d30cb52d65 (patch)
treed217dc9faf144cc188340c9ce6324fd3f862e765 /cmd
parent0b2fb488e1ccb877f16bad2f413494824f43d9c8 (diff)
downloadgitolfs3-77f852e06d3be7cb558be73d6edc98d30cb52d65.tar.gz
gitolfs3-77f852e06d3be7cb558be73d6edc98d30cb52d65.zip
Write basic read-only public Git LFS server
The 'integration' with Gitolite is honestly pretty bad and should not be taken very seriously: it runs the 'gitolite access' command to check if some user (e.g., daemon/nobody) should be able to read from the repository. Based on this, it grants access to objects stored in S3, by generating Presigned GetObject URLs using the S3 API. Of course, this integration with Gitolite (especially when using the daemon user to check if the user should be able to read) is not very 'high-value': 1. If we already make use of the daemon pseudo-user to control access to public repositories, we may as well check for the existence of git-daemon-export-ok files. In case they exist, we simply assume that the repository is meant to be shown on the public internet and that therefore the LFS archive should also be considered 'open to the public'. 2. The way that Gitolite commands are currently run, this program breaks when not running under the git user without extra configuration; Gitolite decides where repositories are based on the HOME environment variable. This program currently does not set this. This could be set by the CGI server (or fcgiwrap) and would unbreak the system. There's no support for any more advanced kind of authn/authz. Uploading is also not supported yet. That's still to come.
Diffstat (limited to 'cmd')
-rw-r--r--cmd/git-lfs-server/main.go329
1 files changed, 329 insertions, 0 deletions
diff --git a/cmd/git-lfs-server/main.go b/cmd/git-lfs-server/main.go
index 06ab7d0..c5b47a3 100644
--- a/cmd/git-lfs-server/main.go
+++ b/cmd/git-lfs-server/main.go
@@ -1 +1,330 @@
1package main 1package main
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "net/http"
9 "net/http/cgi"
10 "net/url"
11 "os"
12 "os/exec"
13 "path"
14 "regexp"
15 "slices"
16 "strconv"
17 "strings"
18 "time"
19 "unicode"
20
21 "github.com/minio/minio-go/v7"
22 "github.com/minio/minio-go/v7/pkg/credentials"
23)
24
25type operation string
26type transferAdapter string
27type hashAlgo string
28
29const (
30 operationDownload operation = "download"
31 operationUpload operation = "upload"
32 transferAdapterBasic transferAdapter = "basic"
33 hashAlgoSHA256 hashAlgo = "sha256"
34)
35
36const lfsMIME = "application/vnd.git-lfs+json"
37
38type batchRef struct {
39 Name string `json:"name"`
40}
41
42type batchRequestObject struct {
43 OID string `json:"oid"`
44 Size uint64 `json:"size"`
45}
46
47type batchRequest struct {
48 Operation operation `json:"operation"`
49 Transfers []transferAdapter `json:"transfers,omitempty"`
50 Ref *batchRef `json:"ref,omitempty"`
51 Objects []batchRequestObject `json:"objects"`
52 HashAlgo hashAlgo `json:"hash_algo,omitempty"`
53}
54
55type RFC3339SecondsTime time.Time
56
57func (t RFC3339SecondsTime) MarshalJSON() ([]byte, error) {
58 b := make([]byte, 0, len(time.RFC3339)+len(`""`))
59 b = append(b, '"')
60 b = time.Time(t).AppendFormat(b, time.RFC3339)
61 b = append(b, '"')
62 return b, nil
63}
64
65type SecondDuration time.Duration
66
67func (d SecondDuration) MarshalJSON() ([]byte, error) {
68 var b []byte
69 b = strconv.AppendInt(b, int64(time.Duration(d).Seconds()), 10)
70 return b, nil
71}
72
73type batchAction struct {
74 HRef *url.URL `json:"href"`
75 Header map[string]string `json:"header,omitempty"`
76 ExpiresIn *SecondDuration `json:"expires_in,omitempty"`
77 ExpiresAt *RFC3339SecondsTime `json:"expires_at,omitempty"`
78}
79
80type batchError struct {
81 Code int `json:"code"`
82 Message string `json:"message"`
83}
84
85type batchResponseObject struct {
86 OID string `json:"oid"`
87 Size uint64 `json:"size"`
88 Authenticated *bool `json:"authenticated"`
89 Actions map[operation]batchAction `json:"actions,omitempty"`
90 Error *batchError `json:"error,omitempty"`
91}
92
93type batchResponse struct {
94 Transfer transferAdapter `json:"transfer,omitempty"`
95 Objects []batchResponseObject `json:"objects"`
96 HashAlgo hashAlgo `json:"hash_algo,omitempty"`
97}
98
99var re = regexp.MustCompile(`^([a-zA-Z0-9-_/]+)\.git/info/lfs/objects/batch$`)
100
101type handler struct {
102 mc *minio.Client
103 bucket string
104 anonUser string
105}
106
107// Requires lowercase hash
108func isValidSHA256Hash(hash string) bool {
109 if len(hash) != 64 {
110 return false
111 }
112 for _, c := range hash {
113 if !unicode.Is(unicode.ASCII_Hex_Digit, c) {
114 return false
115 }
116 }
117 return true
118}
119
120type lfsError struct {
121 Message string `json:"message"`
122 DocumentationURL string `json:"documentation_url,omitempty"`
123 RequestID string `json:"request_id,omitempty"`
124}
125
126func makeRespError(w http.ResponseWriter, message string, code int) {
127 w.Header().Set("Content-Type", lfsMIME)
128 w.WriteHeader(code)
129 json.NewEncoder(w).Encode(lfsError{Message: message})
130}
131
132func makeObjError(obj parsedBatchObject, message string, code int) batchResponseObject {
133 return batchResponseObject{
134 OID: obj.fullHash,
135 Size: obj.size,
136 Error: &batchError{
137 Message: message,
138 Code: code,
139 },
140 }
141}
142
143func (h *handler) handleDownloadObject(ctx context.Context, repo string, obj parsedBatchObject) batchResponseObject {
144 fullPath := path.Join(repo, obj.firstByte, obj.secondByte, obj.fullHash)
145 expiresIn := time.Hour * 24
146 expiresInSeconds := SecondDuration(expiresIn)
147
148 info, err := h.mc.StatObject(ctx, h.bucket, fullPath, minio.StatObjectOptions{Checksum: true})
149 if err != nil {
150 var resp minio.ErrorResponse
151 if errors.As(err, &resp) && resp.StatusCode == http.StatusNotFound {
152 return makeObjError(obj, "Object does not exist", http.StatusNotFound)
153 }
154 // TODO: consider not making this an object-specific, but rather a
155 // generic error such that the entire Batch API request fails.
156 return makeObjError(obj, "Failed to query object information", http.StatusInternalServerError)
157 }
158 if info.ChecksumSHA256 != "" && strings.ToLower(info.ChecksumSHA256) != obj.fullHash {
159 return makeObjError(obj, "Corrupted file", http.StatusUnprocessableEntity)
160 }
161 if uint64(info.Size) != obj.size {
162 return makeObjError(obj, "Incorrect size specified for object", http.StatusUnprocessableEntity)
163 }
164
165 presigned, err := h.mc.PresignedGetObject(ctx, h.bucket, fullPath, expiresIn, url.Values{})
166 if err != nil {
167 // TODO: consider not making this an object-specific, but rather a
168 // generic error such that the entire Batch API request fails.
169 return makeObjError(obj, "Failed to generate action href", http.StatusInternalServerError)
170 }
171
172 authenticated := true
173 return batchResponseObject{
174 OID: obj.fullHash,
175 Size: obj.size,
176 Authenticated: &authenticated,
177 Actions: map[operation]batchAction{
178 operationDownload: {
179 HRef: presigned,
180 ExpiresIn: &expiresInSeconds,
181 },
182 },
183 }
184}
185
186type parsedBatchObject struct {
187 firstByte string
188 secondByte string
189 fullHash string
190 size uint64
191}
192
193func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
194 submatches := re.FindStringSubmatch(r.URL.Path)
195 if len(submatches) != 1 {
196 makeRespError(w, "Not found", http.StatusNotFound)
197 return
198 }
199 repo := strings.TrimPrefix("/", path.Clean(submatches[0]))
200
201 if !slices.Contains(r.Header.Values("Accept"), lfsMIME) {
202 makeRespError(w, "Expected "+lfsMIME+" in list of acceptable response media types", http.StatusNotAcceptable)
203 return
204 }
205 if r.Header.Get("Content-Type") != lfsMIME {
206 makeRespError(w, "Expected request Content-Type to be "+lfsMIME, http.StatusUnsupportedMediaType)
207 return
208 }
209
210 var body batchRequest
211 if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
212 makeRespError(w, "Failed to parse request body as JSON", http.StatusBadRequest)
213 return
214 }
215
216 if body.HashAlgo != hashAlgoSHA256 {
217 makeRespError(w, "Unsupported hash algorithm specified", http.StatusConflict)
218 return
219 }
220
221 // TODO: handle authentication
222 // right now, we're just trying to make everything publically accessible
223 if body.Operation == operationUpload {
224 makeRespError(w, "Upload operations are currently not supported", http.StatusForbidden)
225 return
226 }
227
228 if len(body.Transfers) != 0 && !slices.Contains(body.Transfers, transferAdapterBasic) {
229 makeRespError(w, "Unsupported transfer adapter specified (supported: basic)", http.StatusConflict)
230 return
231 }
232
233 gitoliteArgs := []string{"access", "-q", repo, h.anonUser, "R"}
234 if body.Ref != nil && body.Ref.Name != "" {
235 gitoliteArgs = append(gitoliteArgs, body.Ref.Name)
236 }
237 cmd := exec.Command("gitolite", gitoliteArgs...)
238 err := cmd.Run()
239 permGranted := err == nil
240 var exitErr *exec.ExitError
241 if err != nil && !errors.As(err, &exitErr) {
242 makeRespError(w, "Failed to query access information", http.StatusInternalServerError)
243 return
244 }
245 if !permGranted {
246 // TODO: when handling authorization, make sure to return 403 Forbidden
247 // here when the user *does* have read permissions, but is not allowed
248 // to write when requesting an upload operation.
249 makeRespError(w, "Repository not found", http.StatusNotFound)
250 return
251 }
252
253 var objects []parsedBatchObject
254 for _, obj := range body.Objects {
255 oid := strings.ToLower(obj.OID)
256 if !isValidSHA256Hash(oid) {
257 makeRespError(w, "Invalid hash format in object ID", http.StatusBadRequest)
258 return
259 }
260 objects = append(objects, parsedBatchObject{
261 firstByte: oid[:2],
262 secondByte: oid[2:4],
263 fullHash: oid,
264 size: obj.Size,
265 })
266 }
267
268 resp := batchResponse{
269 Transfer: transferAdapterBasic,
270 HashAlgo: hashAlgoSHA256,
271 }
272 for _, obj := range objects {
273 resp.Objects = append(resp.Objects, h.handleDownloadObject(r.Context(), repo, obj))
274 }
275
276 w.Header().Set("Content-Type", lfsMIME)
277 w.WriteHeader(http.StatusOK)
278 json.NewEncoder(w).Encode(resp)
279}
280
281func die(msg string, args ...any) {
282 fmt.Fprint(os.Stderr, "Error: ")
283 fmt.Fprintf(os.Stderr, msg, args...)
284 fmt.Fprint(os.Stderr, "\n")
285 os.Exit(1)
286}
287
288func main() {
289 endpoint := os.Getenv("S3_ENDPOINT")
290 accessKeyID := os.Getenv("S3_ACCESS_KEY_ID")
291 secretAccessKey := os.Getenv("S3_SECRET_ACCESS_KEY")
292 bucket := os.Getenv("S3_BUCKET")
293 anonUser := os.Getenv("ANON_USER")
294
295 if endpoint == "" {
296 die("Expected environment variable S3_ENDPOINT to be set")
297 }
298 if accessKeyID == "" {
299 die("Expected environment variable S3_ACCESS_KEY_ID to be set")
300 }
301 if secretAccessKey == "" {
302 die("Expected environment variable S3_SECRET_ACCESS_KEY to be set")
303 }
304 if bucket == "" {
305 die("Expected environment variable S3_BUCKET to be set")
306 }
307 if anonUser == "" {
308 die("Expected environment variable ANON_USER to be set")
309 }
310
311 mc, err := minio.New(endpoint, &minio.Options{
312 Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""),
313 Secure: true,
314 })
315 if err != nil {
316 die("Failed to create S3 client")
317 }
318
319 if err = cgi.Serve(&handler{mc, bucket, anonUser}); err != nil {
320 die("Failed to serve CGI: %s", err)
321 }
322}
323
324// Directory stucture:
325// - lfs/
326// - locks/
327// - objects/
328// - <1st OID byte>
329// - <2nd OID byte>
330// - <OID hash> <- this is the object