From e37e17835ffaf4c2760bfb46e28c68c7581dc739 Mon Sep 17 00:00:00 2001 From: tonic Date: Thu, 18 Jun 2026 20:24:20 +0800 Subject: [PATCH] feat(server): stream monolithic blob PUT straight to object store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit persistMonolithicUpload spooled the entire blob to a disk tempfile (via the chunked-upload session machinery), hashed it, then read it back and uploaded to the object store — two full passes plus ~2x disk I/O, and receive/upload run serially. For multi-GiB VM disk/memory blobs that's the bulk of a push (single PUTs were taking 2-4 min). Monolithic PUT already knows the digest up front (it's in the URL), so there's no need to buffer: stream the request body through a sha256 hasher straight into a concurrent multipart upload (no disk), verify the digest once the stream drains, and delete on mismatch so the content-addressed key never keeps unverified bytes. Server-side digest verification is preserved; no client change. The chunked PATCH path (no first-party client uses it) still spools to disk, since its digest is only known at finalize. GCS S3-compat streaming multipart validated against staging: 12 MiB in 3 concurrent 5 MiB parts, sha256 round-trip verified. --- objectstore/client.go | 23 ++++++++++++++++ registry/registry.go | 6 +++++ server/registry_v2_uploads.go | 50 ++++++++++++++++++++++++----------- 3 files changed, 64 insertions(+), 15 deletions(-) diff --git a/objectstore/client.go b/objectstore/client.go index f789844..ef41657 100644 --- a/objectstore/client.go +++ b/objectstore/client.go @@ -58,6 +58,29 @@ func (c *Client) Put(ctx context.Context, key string, body io.Reader, size int64 return nil } +// blobUploadPartSize/blobUploadThreads bound PutStreaming memory at +// partSize*threads while keeping concurrency for multi-GiB blobs. +const ( + blobUploadPartSize = uint64(64) << 20 + blobUploadThreads = uint(4) +) + +// PutStreaming uploads body to key with concurrent multipart, reading from a +// non-seekable source without buffering the whole object. Memory is bounded at +// blobUploadPartSize*blobUploadThreads. +func (c *Client) PutStreaming(ctx context.Context, key string, body io.Reader, size int64) error { + _, err := c.client.PutObject(ctx, c.cfg.Bucket, c.fullKey(key), body, size, minio.PutObjectOptions{ + ContentType: "application/octet-stream", + PartSize: blobUploadPartSize, + NumThreads: blobUploadThreads, + ConcurrentStreamParts: true, + }) + if err != nil { + return fmt.Errorf("put streaming %s: %w", key, err) + } + return nil +} + // Get returns a streaming reader and size for the given key. func (c *Client) Get(ctx context.Context, key string) (io.ReadCloser, int64, error) { obj, err := c.client.GetObject(ctx, c.cfg.Bucket, c.fullKey(key), minio.GetObjectOptions{}) diff --git a/registry/registry.go b/registry/registry.go index 5dc7799..cbf8f44 100644 --- a/registry/registry.go +++ b/registry/registry.go @@ -64,6 +64,12 @@ func (r *Registry) PushBlobFromStream(ctx context.Context, digest string, body i return r.client.Put(ctx, blobKey(digest), body, size) } +// PushBlobStreaming streams a blob straight to the object store without +// buffering it whole. The caller verifies the digest after the stream drains. +func (r *Registry) PushBlobStreaming(ctx context.Context, digest string, body io.Reader, size int64) error { + return r.client.PutStreaming(ctx, blobKey(digest), body, size) +} + // BlobExists reports whether a blob with the given digest exists. func (r *Registry) BlobExists(ctx context.Context, digest string) (bool, error) { return r.client.Exists(ctx, blobKey(digest)) diff --git a/server/registry_v2_uploads.go b/server/registry_v2_uploads.go index 5f6c004..b0036c0 100644 --- a/server/registry_v2_uploads.go +++ b/server/registry_v2_uploads.go @@ -1,11 +1,15 @@ package server import ( + "crypto/sha256" + "encoding/hex" "errors" "fmt" "io" "net/http" "strconv" + + "github.com/projecteru2/core/log" ) const uploadBodyLimit = defaultUploadMaxBytes @@ -20,6 +24,7 @@ func (s *Server) v2InitBlobUpload(w http.ResponseWriter, r *http.Request) { id, err := s.uploads.Start() if err != nil { + log.WithFunc("server.v2InitBlobUpload").Errorf(r.Context(), err, "start upload session failed") v2Error(w, http.StatusInternalServerError, "INTERNAL_ERROR", err.Error()) return } @@ -78,31 +83,41 @@ func (s *Server) v2CompleteBlobUpload(w http.ResponseWriter, r *http.Request) { s.persistVerifiedBlob(w, r, name, digest, fu) } +// persistMonolithicUpload streams a single-PUT blob (digest known up front) +// straight to the object store while hashing inline — no disk spool, receive +// and upload overlap. The digest is verified after the stream drains; a +// mismatch deletes the object so the content-addressed key never keeps +// unverified bytes (the verify happens the instant the upload completes, so the +// window is negligible and content-addressing protects readers regardless). func (s *Server) persistMonolithicUpload(w http.ResponseWriter, r *http.Request, name, digest string) { - id, err := s.uploads.Start() - if err != nil { - v2Error(w, http.StatusInternalServerError, "INTERNAL_ERROR", err.Error()) + dgst := stripSHA256Prefix(digest) + + if exists, err := s.reg.BlobExists(r.Context(), dgst); err == nil && exists { + drainBody(r.Body) + s.blobCreated(w, name, digest) return } - body := io.LimitReader(r.Body, uploadBodyLimit) - if _, appendErr := s.uploads.Append(id, body); appendErr != nil { - drainBody(body) - s.uploads.Cancel(id) - writeUploadAppendError(w, appendErr) + + hasher := sha256.New() + body := io.TeeReader(io.LimitReader(r.Body, uploadBodyLimit), hasher) + if err := s.reg.PushBlobStreaming(r.Context(), dgst, body, r.ContentLength); err != nil { + log.WithFunc("server.persistMonolithicUpload").Errorf(r.Context(), err, "stream blob sha256:%s (content-length=%d) failed", dgst, r.ContentLength) + v2Error(w, http.StatusInternalServerError, "BLOB_UPLOAD_INVALID", err.Error()) return } - fu, err := s.uploads.Finalize(id) - if err != nil { - drainBody(body) - writeUploadAppendError(w, err) + + if got := "sha256:" + hex.EncodeToString(hasher.Sum(nil)); got != digest { + _ = s.reg.DeleteBlob(r.Context(), dgst) + v2Error(w, http.StatusBadRequest, "DIGEST_INVALID", + fmt.Sprintf("digest mismatch: got %s, expected %s", got, digest)) return } - defer func() { _ = fu.Close() }() - - s.persistVerifiedBlob(w, r, name, digest, fu) + s.blobCreated(w, name, digest) } // persistVerifiedBlob verifies the digest then streams to the object store. +// Used by the chunked PATCH upload path, where the full blob is spooled to +// disk first so the digest can be checked before it reaches the object store. func (s *Server) persistVerifiedBlob(w http.ResponseWriter, r *http.Request, name, digest string, fu *FinalizedUpload) { if got := fu.Digest(); got != digest { v2Error(w, http.StatusBadRequest, "DIGEST_INVALID", @@ -112,14 +127,19 @@ func (s *Server) persistVerifiedBlob(w http.ResponseWriter, r *http.Request, nam rdr, err := fu.Reader() if err != nil { + log.WithFunc("server.persistVerifiedBlob").Errorf(r.Context(), err, "open spooled blob %s failed", digest) v2Error(w, http.StatusInternalServerError, "INTERNAL_ERROR", err.Error()) return } if err := s.reg.PushBlobFromStream(r.Context(), stripSHA256Prefix(digest), rdr, fu.Size()); err != nil { + log.WithFunc("server.persistVerifiedBlob").Errorf(r.Context(), err, "push spooled blob %s (size=%d) failed", digest, fu.Size()) v2Error(w, http.StatusInternalServerError, "BLOB_UPLOAD_INVALID", err.Error()) return } + s.blobCreated(w, name, digest) +} +func (s *Server) blobCreated(w http.ResponseWriter, name, digest string) { w.Header().Set("Location", fmt.Sprintf("/v2/%s/blobs/%s", name, digest)) w.Header().Set("Docker-Content-Digest", digest) w.WriteHeader(http.StatusCreated)