From c38d88005319c832a2903fabe6e3b42220a41ac0 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 8 May 2026 18:08:36 +0200 Subject: [PATCH 01/33] wip --- Cargo.lock | 1 + clients/rust/Cargo.toml | 6 + clients/rust/src/client.rs | 24 ++ clients/rust/src/error.rs | 11 + clients/rust/src/get.rs | 22 +- clients/rust/src/lib.rs | 4 + clients/rust/src/multipart.rs | 325 ++++++++++++++++++ clients/rust/src/put.rs | 96 +++++- clients/rust/tests/common/mod.rs | 37 ++ clients/rust/tests/e2e.rs | 32 +- clients/rust/tests/multipart.rs | 246 +++++++++++++ objectstore-server/src/endpoints/multipart.rs | 69 +--- objectstore-server/tests/multipart.rs | 47 +-- objectstore-types/Cargo.toml | 1 + objectstore-types/src/lib.rs | 1 + objectstore-types/src/multipart.rs | 92 +++++ 16 files changed, 866 insertions(+), 148 deletions(-) create mode 100644 clients/rust/src/multipart.rs create mode 100644 clients/rust/tests/common/mod.rs create mode 100644 clients/rust/tests/multipart.rs create mode 100644 objectstore-types/src/multipart.rs diff --git a/Cargo.lock b/Cargo.lock index 67445a39..96669a6c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2702,6 +2702,7 @@ version = "0.1.9" dependencies = [ "http 1.4.0", "humantime", + "humantime-serde", "insta", "mediatype", "serde", diff --git a/clients/rust/Cargo.toml b/clients/rust/Cargo.toml index 79bdee5a..25d1a2fd 100644 --- a/clients/rust/Cargo.toml +++ b/clients/rust/Cargo.toml @@ -39,6 +39,12 @@ zstd = "0.13.3" [features] default = ["native-tls", "hickory-dns"] +multipart-low-level = [] rustls = ["reqwest/rustls"] native-tls = ["reqwest/native-tls"] hickory-dns = ["reqwest/hickory-dns"] + +[[test]] +name = "multipart" +path = "tests/multipart.rs" +required-features = ["multipart-low-level"] diff --git a/clients/rust/src/client.rs b/clients/rust/src/client.rs index 2d4eced4..df5594a4 100644 --- a/clients/rust/src/client.rs +++ b/clients/rust/src/client.rs @@ -493,6 +493,30 @@ impl Session { let builder = self.client.reqwest.post(url); self.prepare_builder(builder) } + + pub(crate) fn request_url( + &self, + method: reqwest::Method, + url: Url, + ) -> crate::Result { + let builder = self.client.reqwest.request(method, url); + self.prepare_builder(builder) + } + + pub(crate) fn multipart_url(&self, prefix: &str, object_key: &str) -> Url { + let mut url = self.client.service_url.clone(); + let mut segments = url.path_segments_mut().unwrap(); + segments + .push("v1") + .push(prefix) + .push(&self.scope.usecase.name) + .push(&self.scope.scopes.as_api_path().to_string()); + if !object_key.is_empty() { + segments.extend(object_key.split("/")); + } + drop(segments); + url + } } #[cfg(test)] diff --git a/clients/rust/src/error.rs b/clients/rust/src/error.rs index 098bb79a..f3588330 100644 --- a/clients/rust/src/error.rs +++ b/clients/rust/src/error.rs @@ -44,6 +44,17 @@ pub enum Error { /// The error message. message: String, }, + /// Error returned by the multipart complete endpoint in its response body. + /// + /// The complete endpoint always returns HTTP 200 (following the S3 pattern) + /// but may signal failure in the JSON body. + #[error("multipart complete failed ({code}): {message}")] + MultipartComplete { + /// Error code from the server. + code: String, + /// Human-readable error message. + message: String, + }, } /// A convenience alias that defaults our [`Error`] type. diff --git a/clients/rust/src/get.rs b/clients/rust/src/get.rs index cdddc096..f23e4679 100644 --- a/clients/rust/src/get.rs +++ b/clients/rust/src/get.rs @@ -128,7 +128,9 @@ pub(crate) fn maybe_decompress( match (metadata.compression, decompress && !encoding_accepted) { (Some(Compression::Zstd), true) => { metadata.compression = None; - ReaderStream::new(ZstdDecoder::new(StreamReader::new(stream))).boxed() + let mut decoder = ZstdDecoder::new(StreamReader::new(stream)); + decoder.multiple_members(true); + ReaderStream::new(decoder).boxed() } _ => stream, } @@ -230,4 +232,22 @@ mod tests { assert_eq!(collect(out).await, payload); assert_eq!(metadata.compression, None); } + + #[tokio::test] + async fn zstd_concatenated_frames_decompress() { + let payload1 = b"hello "; + let payload2 = b"world"; + let compressed1 = collect(compressed_zstd_stream(payload1)).await; + let compressed2 = collect(compressed_zstd_stream(payload2)).await; + let stream = futures_util::stream::iter([ + Ok::<_, std::io::Error>(bytes::Bytes::from(compressed1)), + Ok::<_, std::io::Error>(bytes::Bytes::from(compressed2)), + ]) + .boxed(); + + let mut metadata = zstd_metadata(); + let out = maybe_decompress(stream, &mut metadata, true, &[]); + assert_eq!(collect(out).await, b"hello world"); + assert_eq!(metadata.compression, None); + } } diff --git a/clients/rust/src/lib.rs b/clients/rust/src/lib.rs index 37758eff..b58802bf 100644 --- a/clients/rust/src/lib.rs +++ b/clients/rust/src/lib.rs @@ -10,6 +10,8 @@ mod get; mod head; mod key; mod many; +#[cfg(feature = "multipart-low-level")] +mod multipart; mod put; pub mod utils; @@ -23,4 +25,6 @@ pub use get::*; pub use head::*; pub use key::*; pub use many::*; +#[cfg(feature = "multipart-low-level")] +pub use multipart::*; pub use put::*; diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs new file mode 100644 index 00000000..f82f34e4 --- /dev/null +++ b/clients/rust/src/multipart.rs @@ -0,0 +1,325 @@ +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::io::Cursor; + +use async_compression::tokio::bufread::ZstdEncoder; +use bytes::Bytes; +use futures_util::{StreamExt, TryStreamExt}; +use objectstore_types::metadata::Metadata; +use objectstore_types::multipart::{ + CompleteErrorDetail, CompletePart, CompleteRequest, CompleteSuccessResponse, InitiateResponse, + ListPartsResponse, UploadPartResponse, +}; +use reqwest::Body; +use serde::Deserialize; +use tokio_util::io::{ReaderStream, StreamReader}; + +use crate::put::MetadataBuilder; +use crate::{ClientStream, ObjectKey, Session}; + +pub use objectstore_types::multipart::CompletePart as MultipartCompletePart; + +#[derive(Deserialize)] +#[serde(untagged)] +enum CompleteResponse { + Error { error: CompleteErrorDetail }, + Success(CompleteSuccessResponse), +} + +impl Session { + /// Creates a builder for initiating a multipart upload. + pub fn create_multipart_upload(&self) -> InitiateBuilder { + let metadata = Metadata { + expiration_policy: self.scope.usecase().expiration_policy(), + // Multipart part uploads are sent as raw bytes. Callers must opt in + // explicitly if they want metadata that advertises compression. + compression: None, + ..Default::default() + }; + + InitiateBuilder { + session: self.clone(), + metadata, + key: None, + } + } +} + +/// A builder for initiating a multipart upload. +#[derive(Debug)] +pub struct InitiateBuilder { + session: Session, + metadata: Metadata, + key: Option, +} + +impl MetadataBuilder for InitiateBuilder { + fn metadata_mut(&mut self) -> &mut Metadata { + &mut self.metadata + } +} + +impl InitiateBuilder { + /// Sets an explicit object key. + /// + /// If a key is specified, the object will be stored under that key. Otherwise, the Objectstore + /// server will automatically assign a random key, which is then returned from the initiate + /// request. + pub fn key(mut self, key: impl Into) -> Self { + self.key = Some(key.into()).filter(|k| !k.is_empty()); + self + } + + /// Sets an explicit compression algorithm. + /// + /// Multipart uploads default to no compression, even if the session usecase + /// has a default compression configured. When set, each uploaded part is + /// compressed client-side before it is sent. + pub fn compression(self, compression: impl Into>) -> Self { + MetadataBuilder::compression(self, compression) + } + + /// Sets the expiration policy. + pub fn expiration_policy(self, expiration_policy: crate::ExpirationPolicy) -> Self { + MetadataBuilder::expiration_policy(self, expiration_policy) + } + + /// Sets the content type. + pub fn content_type(self, content_type: impl Into>) -> Self { + MetadataBuilder::content_type(self, content_type) + } + + /// Sets the origin. + pub fn origin(self, origin: impl Into) -> Self { + MetadataBuilder::origin(self, origin) + } + + /// Sets the custom metadata map. + pub fn set_metadata(self, metadata: impl Into>) -> Self { + MetadataBuilder::set_metadata(self, metadata) + } + + /// Appends a key/value to the custom metadata. + pub fn append_metadata(self, key: impl Into, value: impl Into) -> Self { + MetadataBuilder::append_metadata(self, key, value) + } + + /// Sends the initiate request and returns a [`MultipartUpload`] handle. + pub async fn send(self) -> crate::Result { + let method = match self.key { + Some(_) => reqwest::Method::PUT, + None => reqwest::Method::POST, + }; + + let key_str = self.key.as_deref().unwrap_or_default(); + let url = self.session.multipart_url("objects:multipart", key_str); + let mut builder = self.session.request_url(method, url)?; + + builder = builder.headers(self.metadata.to_headers("")?); + + let response: InitiateResponse = builder.send().await?.error_for_status()?.json().await?; + + Ok(MultipartUpload { + session: self.session, + key: response.key, + upload_id: response.upload_id, + compression: self.metadata.compression, + }) + } +} + +/// Handle to an in-progress multipart upload. +/// +/// Returned by [`InitiateBuilder::send`]. Use it to upload parts, list parts, +/// and complete or abort the upload. +#[derive(Debug)] +pub struct MultipartUpload { + session: Session, + key: String, + upload_id: String, + compression: Option, +} + +impl MultipartUpload { + /// Returns the upload session identifier. + pub fn id(&self) -> &str { + &self.upload_id + } + + /// Returns the object key. + pub fn key(&self) -> &str { + &self.key + } + + /// Uploads a part from an in-memory buffer. + /// + /// An optional base64-encoded Content-MD5 digest can be provided for + /// server-side integrity verification. When compression is enabled on this + /// upload, the digest must match the transmitted compressed part bytes. + pub async fn put( + &self, + body: impl Into, + part_number: u32, + content_md5: Option<&str>, + ) -> crate::Result { + let (body, content_length) = self + .prepare_part_body(MultipartPart::Buffer(body.into()), None) + .await?; + self.upload_part(body, content_length, part_number, content_md5) + .await + } + + /// Uploads a part from a stream. The caller must provide the exact content length. + /// + /// An optional base64-encoded Content-MD5 digest can be provided for + /// server-side integrity verification. When compression is enabled on this + /// upload, the digest must match the transmitted compressed part bytes. + pub async fn put_stream( + &self, + stream: ClientStream, + content_length: u64, + part_number: u32, + content_md5: Option<&str>, + ) -> crate::Result { + let (body, content_length) = self + .prepare_part_body(MultipartPart::Stream(stream), Some(content_length)) + .await?; + self.upload_part(body, content_length, part_number, content_md5) + .await + } + + async fn prepare_part_body( + &self, + part: MultipartPart, + content_length: Option, + ) -> crate::Result<(Body, u64)> { + match (self.compression, part) { + (None, MultipartPart::Buffer(bytes)) => Ok((bytes.clone().into(), bytes.len() as u64)), + (None, MultipartPart::Stream(stream)) => Ok(( + Body::wrap_stream(stream), + content_length.expect("stream parts require content_length"), + )), + (Some(crate::Compression::Zstd), MultipartPart::Buffer(bytes)) => { + let stream = ReaderStream::new(ZstdEncoder::new(Cursor::new(bytes))) + .map_err(std::io::Error::other) + .boxed(); + let compressed = collect_stream_bytes(stream).await?; + Ok((compressed.clone().into(), compressed.len() as u64)) + } + (Some(crate::Compression::Zstd), MultipartPart::Stream(stream)) => { + let stream = ReaderStream::new(ZstdEncoder::new(StreamReader::new(stream))) + .map_err(std::io::Error::other) + .boxed(); + let compressed = collect_stream_bytes(stream).await?; + Ok((compressed.clone().into(), compressed.len() as u64)) + } + } + } + + async fn upload_part( + &self, + body: Body, + content_length: u64, + part_number: u32, + content_md5: Option<&str>, + ) -> crate::Result { + let mut url = self + .session + .multipart_url("objects:multipart:parts", &self.key); + url.query_pairs_mut() + .append_pair("upload_id", &self.upload_id) + .append_pair("part_number", &part_number.to_string()); + + let mut builder = self + .session + .request_url(reqwest::Method::PUT, url)? + .header(reqwest::header::CONTENT_LENGTH, content_length) + .body(body); + + if let Some(md5) = content_md5 { + builder = builder.header("content-md5", md5); + } + + let response: UploadPartResponse = builder.send().await?.error_for_status()?.json().await?; + Ok(response.etag) + } + + /// Lists the parts that have been uploaded for this multipart upload. + pub async fn list_parts( + &self, + max_parts: Option, + part_number_marker: Option, + ) -> crate::Result { + let mut url = self + .session + .multipart_url("objects:multipart:parts", &self.key); + { + let mut pairs = url.query_pairs_mut(); + pairs.append_pair("upload_id", &self.upload_id); + if let Some(max) = max_parts { + pairs.append_pair("max_parts", &max.to_string()); + } + if let Some(marker) = part_number_marker { + pairs.append_pair("part_number_marker", &marker.to_string()); + } + } + + let builder = self.session.request_url(reqwest::Method::GET, url)?; + + let response: ListPartsResponse = builder.send().await?.error_for_status()?.json().await?; + Ok(response) + } + + /// Aborts this multipart upload, discarding any uploaded parts. + pub async fn abort(self) -> crate::Result<()> { + let mut url = self.session.multipart_url("objects:multipart", &self.key); + url.query_pairs_mut() + .append_pair("upload_id", &self.upload_id); + + let builder = self.session.request_url(reqwest::Method::DELETE, url)?; + builder.send().await?.error_for_status()?; + Ok(()) + } + + /// Completes the multipart upload, assembling all parts into the final object. + /// + /// Returns the final object key on success. The server may return an error in + /// the response body even with HTTP 200 (following the S3 pattern), which is + /// surfaced as [`crate::Error::MultipartComplete`]. + pub async fn complete(self, parts: Vec) -> crate::Result { + let mut url = self + .session + .multipart_url("objects:multipart:complete", &self.key); + url.query_pairs_mut() + .append_pair("upload_id", &self.upload_id); + + let builder = self + .session + .request_url(reqwest::Method::POST, url)? + .json(&CompleteRequest { parts }); + + // The complete endpoint streams whitespace as keepalive before the JSON + // payload. serde_json (used by reqwest's .json()) skips leading whitespace, + // so we can deserialize directly. + // + // The response is always HTTP 200 (S3 pattern) — errors are in the body. + let response = builder.send().await?.error_for_status()?; + match response.json::().await? { + CompleteResponse::Success(s) => Ok(s.key), + CompleteResponse::Error { error } => Err(crate::Error::MultipartComplete { + code: error.code, + message: error.message, + }), + } + } +} + +enum MultipartPart { + Buffer(Bytes), + Stream(ClientStream), +} + +async fn collect_stream_bytes(stream: ClientStream) -> crate::Result { + let bytes = stream.try_collect::().await?; + Ok(bytes.freeze()) +} diff --git a/clients/rust/src/put.rs b/clients/rust/src/put.rs index a1abc50e..bfb4c643 100644 --- a/clients/rust/src/put.rs +++ b/clients/rust/src/put.rs @@ -17,6 +17,64 @@ pub use objectstore_types::metadata::{Compression, ExpirationPolicy}; use crate::{ClientStream, ObjectKey, Session}; +/// A builder that carries per-object [`Metadata`]. +/// +/// This trait provides the common configuration methods shared by every builder +/// that attaches metadata to an upload (e.g. [`PutBuilder`], `InitiateBuilder`). +pub trait MetadataBuilder: Sized { + /// Returns a mutable reference to the metadata being built. + fn metadata_mut(&mut self) -> &mut Metadata; + + /// Sets an explicit compression algorithm to be used for this payload. + /// + /// [`None`] should be used if no compression should be performed by the client, + /// either because the payload is uncompressible (such as a media format), or if the user + /// will handle any kind of compression, without the clients knowledge. + /// + /// By default, the compression algorithm set on this Session's Usecase is used. + fn compression(mut self, compression: impl Into>) -> Self { + self.metadata_mut().compression = compression.into(); + self + } + + /// Sets the expiration policy of the object to be uploaded. + /// + /// By default, the expiration policy set on this Session's Usecase is used. + fn expiration_policy(mut self, expiration_policy: ExpirationPolicy) -> Self { + self.metadata_mut().expiration_policy = expiration_policy; + self + } + + /// Sets the content type of the object to be uploaded. + /// + /// You can use the utility function [`crate::utils::guess_mime_type`] to attempt to guess a + /// `content_type` based on magic bytes. + fn content_type(mut self, content_type: impl Into>) -> Self { + self.metadata_mut().content_type = content_type.into(); + self + } + + /// Sets the origin of the object, typically the IP address of the original source. + fn origin(mut self, origin: impl Into) -> Self { + self.metadata_mut().origin = Some(origin.into()); + self + } + + /// This sets the custom metadata to the provided map. + /// + /// It will clear any previously set metadata. + fn set_metadata(mut self, metadata: impl Into>) -> Self { + self.metadata_mut().custom = metadata.into(); + self + } + + /// Appends the `key`/`value` to the custom metadata of this object. + fn append_metadata(mut self, key: impl Into, value: impl Into) -> Self { + self.metadata_mut().custom.insert(key.into(), value.into()); + self + } +} + /// The response returned from the service after uploading an object. #[derive(Debug, Deserialize)] pub struct PutResponse { @@ -107,6 +165,12 @@ pub struct PutBuilder { pub(crate) body: PutBody, } +impl MetadataBuilder for PutBuilder { + fn metadata_mut(&mut self) -> &mut Metadata { + &mut self.metadata + } +} + impl PutBuilder { /// Sets an explicit object key. /// @@ -124,26 +188,23 @@ impl PutBuilder { /// will handle any kind of compression, without the clients knowledge. /// /// By default, the compression algorithm set on this Session's Usecase is used. - pub fn compression(mut self, compression: impl Into>) -> Self { - self.metadata.compression = compression.into(); - self + pub fn compression(self, compression: impl Into>) -> Self { + MetadataBuilder::compression(self, compression) } /// Sets the expiration policy of the object to be uploaded. /// /// By default, the expiration policy set on this Session's Usecase is used. - pub fn expiration_policy(mut self, expiration_policy: ExpirationPolicy) -> Self { - self.metadata.expiration_policy = expiration_policy; - self + pub fn expiration_policy(self, expiration_policy: ExpirationPolicy) -> Self { + MetadataBuilder::expiration_policy(self, expiration_policy) } /// Sets the content type of the object to be uploaded. /// /// You can use the utility function [`crate::utils::guess_mime_type`] to attempt to guess a /// `content_type` based on magic bytes. - pub fn content_type(mut self, content_type: impl Into>) -> Self { - self.metadata.content_type = content_type.into(); - self + pub fn content_type(self, content_type: impl Into>) -> Self { + MetadataBuilder::content_type(self, content_type) } /// Sets the origin of the object, typically the IP address of the original source. @@ -163,23 +224,20 @@ impl PutBuilder { /// .unwrap(); /// # } /// ``` - pub fn origin(mut self, origin: impl Into) -> Self { - self.metadata.origin = Some(origin.into()); - self + pub fn origin(self, origin: impl Into) -> Self { + MetadataBuilder::origin(self, origin) } /// This sets the custom metadata to the provided map. /// /// It will clear any previously set metadata. - pub fn set_metadata(mut self, metadata: impl Into>) -> Self { - self.metadata.custom = metadata.into(); - self + pub fn set_metadata(self, metadata: impl Into>) -> Self { + MetadataBuilder::set_metadata(self, metadata) } - /// Appends they `key`/`value` to the custom metadata of this object. - pub fn append_metadata(mut self, key: impl Into, value: impl Into) -> Self { - self.metadata.custom.insert(key.into(), value.into()); - self + /// Appends the `key`/`value` to the custom metadata of this object. + pub fn append_metadata(self, key: impl Into, value: impl Into) -> Self { + MetadataBuilder::append_metadata(self, key, value) } } diff --git a/clients/rust/tests/common/mod.rs b/clients/rust/tests/common/mod.rs new file mode 100644 index 00000000..c4e4513d --- /dev/null +++ b/clients/rust/tests/common/mod.rs @@ -0,0 +1,37 @@ +#![allow(dead_code)] + +use std::sync::LazyLock; + +use objectstore_client::{Client, SecretKey, Session, TokenGenerator, Usecase}; +use objectstore_test::server::{TEST_EDDSA_KID, TEST_EDDSA_PRIVKEY_PATH, TestServer, config}; + +pub static TEST_EDDSA_PRIVKEY: LazyLock = + LazyLock::new(|| std::fs::read_to_string(&*TEST_EDDSA_PRIVKEY_PATH).unwrap()); + +pub async fn test_server() -> TestServer { + TestServer::with_config(config::Config { + auth: config::AuthZ { + enforce: true, + ..Default::default() + }, + ..Default::default() + }) + .await +} + +pub fn test_token_generator() -> TokenGenerator { + TokenGenerator::new(SecretKey { + kid: TEST_EDDSA_KID.into(), + secret_key: TEST_EDDSA_PRIVKEY.clone(), + }) + .unwrap() +} + +pub fn test_session(server: &TestServer) -> Session { + let client = Client::builder(server.url("/")) + .token(test_token_generator()) + .build() + .unwrap(); + let usecase = Usecase::new("usecase"); + client.session(usecase.for_organization(12345)).unwrap() +} diff --git a/clients/rust/tests/e2e.rs b/clients/rust/tests/e2e.rs index 88a90d80..c3db3a03 100644 --- a/clients/rust/tests/e2e.rs +++ b/clients/rust/tests/e2e.rs @@ -1,20 +1,17 @@ +mod common; + use std::collections::{BTreeMap, HashSet}; use std::io::Write as _; -use std::sync::LazyLock; +use common::{TEST_EDDSA_PRIVKEY, test_server, test_token_generator}; use futures_util::StreamExt as _; use jsonwebtoken::{Algorithm, EncodingKey, Header, encode, get_current_timestamp}; -use objectstore_client::{ - Client, Error, OperationResult, Permission, SecretKey, TokenGenerator, Usecase, -}; -use objectstore_test::server::{TEST_EDDSA_KID, TEST_EDDSA_PRIVKEY_PATH, TestServer, config}; +use objectstore_client::{Client, Error, OperationResult, Permission, Usecase}; +use objectstore_test::server::TEST_EDDSA_KID; use objectstore_types::metadata::Compression; use reqwest::StatusCode; use serde::Serialize; -pub static TEST_EDDSA_PRIVKEY: LazyLock = - LazyLock::new(|| std::fs::read_to_string(&*TEST_EDDSA_PRIVKEY_PATH).unwrap()); - #[derive(Serialize)] struct JwtClaims { exp: u64, @@ -53,25 +50,6 @@ fn sign_static_token(usecase: &str, scopes: &[(&str, &str)]) -> String { encode(&header, &claims, &encoding_key).unwrap() } -async fn test_server() -> TestServer { - TestServer::with_config(config::Config { - auth: config::AuthZ { - enforce: true, - ..Default::default() - }, - ..Default::default() - }) - .await -} - -fn test_token_generator() -> TokenGenerator { - TokenGenerator::new(SecretKey { - kid: TEST_EDDSA_KID.into(), - secret_key: TEST_EDDSA_PRIVKEY.clone(), - }) - .unwrap() -} - #[tokio::test] async fn stores_uncompressed() { let server = test_server().await; diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs new file mode 100644 index 00000000..0c59bdee --- /dev/null +++ b/clients/rust/tests/multipart.rs @@ -0,0 +1,246 @@ +//! End-to-end tests for the multipart upload client API. + +mod common; + +use common::{test_server, test_session}; +use objectstore_client::{Compression, Error, MultipartCompletePart}; + +#[tokio::test] +async fn full_upload_flow() { + let server = test_server().await; + let session = test_session(&server); + + let upload = session + .create_multipart_upload() + .key("multipart-test-key") + .send() + .await + .unwrap(); + + assert_eq!(upload.key(), "multipart-test-key"); + assert!(!upload.id().is_empty()); + + let part1_data = b"hello "; + let part2_data = b"world!"; + + let etag1 = upload.put(part1_data.as_slice(), 1, None).await.unwrap(); + let etag2 = upload.put(part2_data.as_slice(), 2, None).await.unwrap(); + + assert!(!etag1.is_empty()); + assert!(!etag2.is_empty()); + + let key = upload + .complete(vec![ + MultipartCompletePart { + part_number: 1, + etag: etag1, + }, + MultipartCompletePart { + part_number: 2, + etag: etag2, + }, + ]) + .await + .unwrap(); + + assert_eq!(key, "multipart-test-key"); + + let response = session.get(&key).send().await.unwrap().unwrap(); + assert_eq!(response.metadata.compression, None); + let payload = response.payload().await.unwrap(); + assert_eq!(payload, "hello world!"); +} + +#[tokio::test] +async fn compressed_upload_flow() { + let server = test_server().await; + let session = test_session(&server); + + let upload = session + .create_multipart_upload() + .key("multipart-compressed-key") + .compression(Compression::Zstd) + .send() + .await + .unwrap(); + + let part1_data = b"hello "; + let part2_data = b"world!"; + + let etag1 = upload.put(part1_data.as_slice(), 1, None).await.unwrap(); + let etag2 = upload.put(part2_data.as_slice(), 2, None).await.unwrap(); + + let key = upload + .complete(vec![ + MultipartCompletePart { + part_number: 1, + etag: etag1, + }, + MultipartCompletePart { + part_number: 2, + etag: etag2, + }, + ]) + .await + .unwrap(); + + let response = session + .get(&key) + .decompress(false) + .send() + .await + .unwrap() + .unwrap(); + assert_eq!(response.metadata.compression, Some(Compression::Zstd)); + + let mut expected = zstd::encode_all(&part1_data[..], 0).unwrap(); + expected.extend(zstd::encode_all(&part2_data[..], 0).unwrap()); + assert_eq!( + response.payload().await.unwrap().as_ref(), + expected.as_slice() + ); + + let response = session.get(&key).send().await.unwrap().unwrap(); + assert_eq!(response.metadata.compression, None); + assert_eq!(response.payload().await.unwrap(), "hello world!"); +} + +#[tokio::test] +async fn server_generated_key() { + let server = test_server().await; + let session = test_session(&server); + + let upload = session + .create_multipart_upload() + .compression(None) + .send() + .await + .unwrap(); + + assert!(!upload.key().is_empty()); + + let etag = upload.put(b"data".as_slice(), 1, None).await.unwrap(); + + let key = upload + .complete(vec![MultipartCompletePart { + part_number: 1, + etag, + }]) + .await + .unwrap(); + + assert!(!key.is_empty()); + + let response = session.get(&key).send().await.unwrap().unwrap(); + assert_eq!(response.payload().await.unwrap(), "data"); +} + +#[tokio::test] +async fn list_parts() { + let server = test_server().await; + let session = test_session(&server); + + let upload = session + .create_multipart_upload() + .key("list-parts-key") + .compression(None) + .send() + .await + .unwrap(); + + upload.put(b"part-one".as_slice(), 1, None).await.unwrap(); + upload.put(b"part-two".as_slice(), 2, None).await.unwrap(); + + let parts = upload.list_parts(None, None).await.unwrap(); + assert_eq!(parts.parts.len(), 2); + assert!(parts.parts.contains_key(&1)); + assert!(parts.parts.contains_key(&2)); + assert_eq!(parts.parts[&1].size, 8); + assert_eq!(parts.parts[&2].size, 8); + assert!(!parts.is_truncated); + + upload.abort().await.unwrap(); +} + +#[tokio::test] +async fn abort_upload() { + let server = test_server().await; + let session = test_session(&server); + + let upload = session + .create_multipart_upload() + .key("abort-key") + .compression(None) + .send() + .await + .unwrap(); + + upload.put(b"some data".as_slice(), 1, None).await.unwrap(); + upload.abort().await.unwrap(); +} + +#[tokio::test] +async fn metadata_preserved() { + let server = test_server().await; + let session = test_session(&server); + + let upload = session + .create_multipart_upload() + .key("metadata-key") + .compression(None) + .content_type("text/plain") + .origin("203.0.113.42") + .append_metadata("my-key".to_string(), "my-value".to_string()) + .send() + .await + .unwrap(); + + let etag = upload.put(b"payload".as_slice(), 1, None).await.unwrap(); + + let key = upload + .complete(vec![MultipartCompletePart { + part_number: 1, + etag, + }]) + .await + .unwrap(); + + let response = session.get(&key).send().await.unwrap().unwrap(); + assert_eq!(response.metadata.content_type, "text/plain"); + assert_eq!(response.metadata.origin.as_deref(), Some("203.0.113.42")); + assert_eq!( + response.metadata.custom.get("my-key").map(String::as_str), + Some("my-value") + ); +} + +#[tokio::test] +async fn complete_with_bad_etag() { + let server = test_server().await; + let session = test_session(&server); + + let upload = session + .create_multipart_upload() + .key("bad-etag-key") + .compression(None) + .send() + .await + .unwrap(); + + upload.put(b"real data".as_slice(), 1, None).await.unwrap(); + + let result = upload + .complete(vec![MultipartCompletePart { + part_number: 1, + etag: "bogus-etag".to_string(), + }]) + .await; + + match result { + Err(Error::MultipartComplete { code, message }) => { + assert!(!code.is_empty(), "error code should not be empty"); + assert!(!message.is_empty(), "error message should not be empty"); + } + other => panic!("expected MultipartComplete error, got: {other:?}"), + } +} diff --git a/objectstore-server/src/endpoints/multipart.rs b/objectstore-server/src/endpoints/multipart.rs index a9a5f17b..5a12454a 100644 --- a/objectstore-server/src/endpoints/multipart.rs +++ b/objectstore-server/src/endpoints/multipart.rs @@ -1,4 +1,3 @@ -use std::collections::BTreeMap; use std::convert::Infallible; use std::time::{Duration, SystemTime}; @@ -15,9 +14,13 @@ use http::header; use objectstore_service::error::Error as ServiceError; use objectstore_service::id::{ObjectContext, ObjectId}; use objectstore_service::multipart::{CompletedPart, PartNumber, UploadId}; +use objectstore_types::auth::Permission; use objectstore_types::metadata::Metadata; -use serde::{Deserialize, Serialize}; - +use objectstore_types::multipart::{ + CompleteErrorDetail, CompleteErrorResponse, CompleteRequest, CompleteSuccessResponse, + InitiateResponse, ListPartsResponse, PartInfo, UploadPartResponse, +}; +use serde::Deserialize; use crate::auth::AuthAwareService; use crate::endpoints::common::{ApiError, ApiResult}; use crate::extractors::Xt; @@ -66,62 +69,12 @@ struct ListPartsQuery { part_number_marker: Option, } -// --- Request/Response types --- - -#[derive(Debug, Serialize)] -struct InitiateResponse { - key: String, - upload_id: UploadId, -} - -#[derive(Debug, Serialize)] -struct UploadPartResponse { - etag: String, -} - -#[derive(Debug, Serialize)] -struct PartInfo { - etag: String, - #[serde(with = "humantime_serde")] - last_modified: SystemTime, - size: u64, -} - -#[derive(Debug, Serialize)] -struct ListPartsResponse { - parts: BTreeMap, - is_truncated: bool, - #[serde(skip_serializing_if = "Option::is_none")] - next_part_number_marker: Option, +fn validate_part_number(part_number: u32) -> ApiResult<()> { + if part_number == 0 { + return Err(ApiError::Client("part_number must be >= 1".into())); + } + Ok(()) } - -#[derive(Debug, Deserialize)] -struct CompletePartRequest { - part_number: PartNumber, - etag: String, -} - -#[derive(Debug, Deserialize)] -struct CompleteRequest { - parts: Vec, -} - -#[derive(Debug, Serialize)] -struct CompleteSuccessResponse { - key: String, -} - -#[derive(Debug, Serialize)] -struct CompleteErrorDetail { - code: String, - message: String, -} - -#[derive(Debug, Serialize)] -struct CompleteErrorResponse { - error: CompleteErrorDetail, -} - // --- Handlers --- async fn initiate_put( diff --git a/objectstore-server/tests/multipart.rs b/objectstore-server/tests/multipart.rs index eaa70b24..1fe8d791 100644 --- a/objectstore-server/tests/multipart.rs +++ b/objectstore-server/tests/multipart.rs @@ -3,49 +3,10 @@ use anyhow::Result; use objectstore_server::config::{AuthZ, Config}; use objectstore_test::server::TestServer; -use serde::Deserialize; - -#[derive(Debug, Deserialize)] -struct InitiateResponse { - key: String, - upload_id: String, -} - -#[derive(Debug, Deserialize)] -struct UploadPartResponse { - etag: String, -} - -#[derive(Debug, Deserialize)] -#[allow(dead_code)] -struct PartInfo { - etag: String, - last_modified: String, - size: u64, -} - -#[derive(Debug, Deserialize)] -struct ListPartsResponse { - parts: std::collections::BTreeMap, - is_truncated: bool, - next_part_number_marker: Option, -} - -#[derive(Debug, Deserialize)] -struct CompleteSuccessResponse { - key: String, -} - -#[derive(Debug, Deserialize)] -struct CompleteErrorDetail { - code: String, - message: String, -} - -#[derive(Debug, Deserialize)] -struct CompleteErrorResponse { - error: CompleteErrorDetail, -} +use objectstore_types::multipart::{ + CompleteErrorResponse, CompleteSuccessResponse, InitiateResponse, ListPartsResponse, + UploadPartResponse, +}; async fn test_server() -> TestServer { TestServer::with_config(Config { diff --git a/objectstore-types/Cargo.toml b/objectstore-types/Cargo.toml index c29e094e..bed43716 100644 --- a/objectstore-types/Cargo.toml +++ b/objectstore-types/Cargo.toml @@ -12,6 +12,7 @@ publish = true [dependencies] http = { workspace = true } humantime = { workspace = true } +humantime-serde = { workspace = true } mediatype = "0.21.0" serde = { workspace = true } thiserror = { workspace = true } diff --git a/objectstore-types/src/lib.rs b/objectstore-types/src/lib.rs index 2b7f9e28..13d2946b 100644 --- a/objectstore-types/src/lib.rs +++ b/objectstore-types/src/lib.rs @@ -32,4 +32,5 @@ pub mod auth; pub mod metadata; +pub mod multipart; pub mod scope; diff --git a/objectstore-types/src/multipart.rs b/objectstore-types/src/multipart.rs new file mode 100644 index 00000000..eeaa8734 --- /dev/null +++ b/objectstore-types/src/multipart.rs @@ -0,0 +1,92 @@ +//! Types for the multipart upload protocol. +//! +//! These types are shared between the server (which serializes them) and the +//! client (which deserializes them), ensuring both sides agree on the JSON +//! wire format. + +use std::collections::BTreeMap; +use std::time::SystemTime; + +use serde::{Deserialize, Serialize}; + +/// Response from initiating a multipart upload. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InitiateResponse { + /// The object key (server-generated or user-provided). + pub key: String, + /// The upload session identifier for subsequent requests. + pub upload_id: String, +} + +/// Response from uploading a single part. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UploadPartResponse { + /// Opaque identifier of the uploaded part. + pub etag: String, +} + +/// Information about a single uploaded part, as returned by list-parts. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PartInfo { + /// Opaque identifier of the part. + pub etag: String, + /// When the part was last modified. + #[serde(with = "humantime_serde")] + pub last_modified: SystemTime, + /// Size of the part in bytes. + pub size: u64, +} + +/// Response from listing parts of a multipart upload. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ListPartsResponse { + /// Map of part number to part information. + pub parts: BTreeMap, + /// Whether the response was truncated. + pub is_truncated: bool, + /// Marker for the next page of results, if truncated. + #[serde(skip_serializing_if = "Option::is_none")] + pub next_part_number_marker: Option, +} + +/// A single part reference used in the complete request. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompletePart { + /// The part number. + pub part_number: u32, + /// The etag returned when this part was uploaded. + pub etag: String, +} + +/// Request body for completing a multipart upload. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompleteRequest { + /// Ordered list of all parts that make up the object. + pub parts: Vec, +} + +/// Successful response from completing a multipart upload. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompleteSuccessResponse { + /// The final object key. + pub key: String, +} + +/// Detail of an error that occurred during multipart completion. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompleteErrorDetail { + /// Error code. + pub code: String, + /// Human-readable error message. + pub message: String, +} + +/// Error response from completing a multipart upload. +/// +/// The complete endpoint returns HTTP 200 regardless of success or failure +/// (following the S3 pattern), so errors are communicated in the response body. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompleteErrorResponse { + /// The error detail. + pub error: CompleteErrorDetail, +} From 393ea969334351f2dc79f96059e1c7590193fdd6 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 8 May 2026 18:40:08 +0200 Subject: [PATCH 02/33] wip --- clients/rust/src/client.rs | 56 +++++++++++++++------- clients/rust/src/multipart.rs | 87 ++++++++++++++++++++--------------- 2 files changed, 88 insertions(+), 55 deletions(-) diff --git a/clients/rust/src/client.rs b/clients/rust/src/client.rs index df5594a4..5e76f728 100644 --- a/clients/rust/src/client.rs +++ b/clients/rust/src/client.rs @@ -464,6 +464,40 @@ impl Session { url } + fn multipart_url( + &self, + suffix: Option<&'static str>, + object_key: Option<&str>, + query_pairs: Vec<(&str, String)>, + ) -> Url { + let mut url = self.client.service_url.clone(); + + // `path_segments_mut` can only error if the url is cannot-be-a-base, + // and we check that in `ClientBuilder::new`, therefore this will never panic. + let mut segments = url.path_segments_mut().unwrap(); + segments + .push("v1") + .push(match suffix { + Some("parts") => "objects:multipart:parts", + Some("complete") => "objects:multipart:complete", + _ => "objects:multipart", + }) + .push(&self.scope.usecase.name) + .push(&self.scope.scopes.as_api_path().to_string()); + if let Some(object_key) = object_key.filter(|key| !key.is_empty()) { + segments.extend(object_key.split("/")); + } + drop(segments); + { + let mut pairs = url.query_pairs_mut(); + for (key, value) in query_pairs { + pairs.append_pair(key, &value); + } + } + + url + } + fn prepare_builder(&self, mut builder: RequestBuilder) -> crate::Result { if let Some(token) = self.mint_token()? { builder = builder.header("x-os-auth", format!("Bearer {token}")); @@ -494,29 +528,17 @@ impl Session { self.prepare_builder(builder) } - pub(crate) fn request_url( + pub(crate) fn multipart_request( &self, method: reqwest::Method, - url: Url, + action: Option<&'static str>, + object_key: Option<&str>, + query_pairs: Vec<(&str, String)>, ) -> crate::Result { + let url = self.multipart_url(action, object_key, query_pairs); let builder = self.client.reqwest.request(method, url); self.prepare_builder(builder) } - - pub(crate) fn multipart_url(&self, prefix: &str, object_key: &str) -> Url { - let mut url = self.client.service_url.clone(); - let mut segments = url.path_segments_mut().unwrap(); - segments - .push("v1") - .push(prefix) - .push(&self.scope.usecase.name) - .push(&self.scope.scopes.as_api_path().to_string()); - if !object_key.is_empty() { - segments.extend(object_key.split("/")); - } - drop(segments); - url - } } #[cfg(test)] diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index f82f34e4..d5a3443f 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -111,9 +111,9 @@ impl InitiateBuilder { None => reqwest::Method::POST, }; - let key_str = self.key.as_deref().unwrap_or_default(); - let url = self.session.multipart_url("objects:multipart", key_str); - let mut builder = self.session.request_url(method, url)?; + let mut builder = + self.session + .multipart_request(method, None, self.key.as_deref(), vec![])?; builder = builder.headers(self.metadata.to_headers("")?); @@ -223,16 +223,17 @@ impl MultipartUpload { part_number: u32, content_md5: Option<&str>, ) -> crate::Result { - let mut url = self - .session - .multipart_url("objects:multipart:parts", &self.key); - url.query_pairs_mut() - .append_pair("upload_id", &self.upload_id) - .append_pair("part_number", &part_number.to_string()); - let mut builder = self .session - .request_url(reqwest::Method::PUT, url)? + .multipart_request( + reqwest::Method::PUT, + Some("parts"), + Some(&self.key), + vec![ + ("upload_id", self.upload_id.clone()), + ("part_number", part_number.to_string()), + ], + )? .header(reqwest::header::CONTENT_LENGTH, content_length) .body(body); @@ -250,21 +251,20 @@ impl MultipartUpload { max_parts: Option, part_number_marker: Option, ) -> crate::Result { - let mut url = self - .session - .multipart_url("objects:multipart:parts", &self.key); - { - let mut pairs = url.query_pairs_mut(); - pairs.append_pair("upload_id", &self.upload_id); - if let Some(max) = max_parts { - pairs.append_pair("max_parts", &max.to_string()); - } - if let Some(marker) = part_number_marker { - pairs.append_pair("part_number_marker", &marker.to_string()); - } + let mut params = vec![("upload_id", self.upload_id.clone())]; + if let Some(max) = max_parts { + params.push(("max_parts", max.to_string())); + } + if let Some(marker) = part_number_marker { + params.push(("part_number_marker", marker.to_string())); } - let builder = self.session.request_url(reqwest::Method::GET, url)?; + let builder = self.session.multipart_request( + reqwest::Method::GET, + Some("parts"), + Some(&self.key), + params, + )?; let response: ListPartsResponse = builder.send().await?.error_for_status()?.json().await?; Ok(response) @@ -272,11 +272,18 @@ impl MultipartUpload { /// Aborts this multipart upload, discarding any uploaded parts. pub async fn abort(self) -> crate::Result<()> { - let mut url = self.session.multipart_url("objects:multipart", &self.key); - url.query_pairs_mut() - .append_pair("upload_id", &self.upload_id); - - let builder = self.session.request_url(reqwest::Method::DELETE, url)?; + let MultipartUpload { + session, + key, + upload_id, + compression: _, + } = self; + let builder = session.multipart_request( + reqwest::Method::DELETE, + None, + Some(&key), + vec![("upload_id", upload_id)], + )?; builder.send().await?.error_for_status()?; Ok(()) } @@ -287,15 +294,19 @@ impl MultipartUpload { /// the response body even with HTTP 200 (following the S3 pattern), which is /// surfaced as [`crate::Error::MultipartComplete`]. pub async fn complete(self, parts: Vec) -> crate::Result { - let mut url = self - .session - .multipart_url("objects:multipart:complete", &self.key); - url.query_pairs_mut() - .append_pair("upload_id", &self.upload_id); - - let builder = self - .session - .request_url(reqwest::Method::POST, url)? + let MultipartUpload { + session, + key, + upload_id, + compression: _, + } = self; + let builder = session + .multipart_request( + reqwest::Method::POST, + Some("complete"), + Some(&key), + vec![("upload_id", upload_id)], + )? .json(&CompleteRequest { parts }); // The complete endpoint streams whitespace as keepalive before the JSON From 8d0a49d4642e6abdc19162d9f0afd000c7fdb319 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 8 May 2026 18:53:41 +0200 Subject: [PATCH 03/33] wip --- clients/rust/src/error.rs | 9 +++------ objectstore-types/src/multipart.rs | 9 +-------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/clients/rust/src/error.rs b/clients/rust/src/error.rs index f3588330..7f80885a 100644 --- a/clients/rust/src/error.rs +++ b/clients/rust/src/error.rs @@ -44,15 +44,12 @@ pub enum Error { /// The error message. message: String, }, - /// Error returned by the multipart complete endpoint in its response body. - /// - /// The complete endpoint always returns HTTP 200 (following the S3 pattern) - /// but may signal failure in the JSON body. + /// Error returned when attempting to complete a multipart upload. #[error("multipart complete failed ({code}): {message}")] MultipartComplete { - /// Error code from the server. + /// The error code or kind. code: String, - /// Human-readable error message. + /// The error message. message: String, }, } diff --git a/objectstore-types/src/multipart.rs b/objectstore-types/src/multipart.rs index eeaa8734..f63519fe 100644 --- a/objectstore-types/src/multipart.rs +++ b/objectstore-types/src/multipart.rs @@ -1,8 +1,4 @@ //! Types for the multipart upload protocol. -//! -//! These types are shared between the server (which serializes them) and the -//! client (which deserializes them), ensuring both sides agree on the JSON -//! wire format. use std::collections::BTreeMap; use std::time::SystemTime; @@ -68,7 +64,7 @@ pub struct CompleteRequest { /// Successful response from completing a multipart upload. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CompleteSuccessResponse { - /// The final object key. + /// The object key. pub key: String, } @@ -82,9 +78,6 @@ pub struct CompleteErrorDetail { } /// Error response from completing a multipart upload. -/// -/// The complete endpoint returns HTTP 200 regardless of success or failure -/// (following the S3 pattern), so errors are communicated in the response body. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CompleteErrorResponse { /// The error detail. From c20008fd8463ac01d96f9f4127da7b6607e602eb Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 8 May 2026 18:54:45 +0200 Subject: [PATCH 04/33] wip --- clients/rust/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/clients/rust/Cargo.toml b/clients/rust/Cargo.toml index 25d1a2fd..6371c594 100644 --- a/clients/rust/Cargo.toml +++ b/clients/rust/Cargo.toml @@ -40,6 +40,7 @@ zstd = "0.13.3" [features] default = ["native-tls", "hickory-dns"] multipart-low-level = [] + rustls = ["reqwest/rustls"] native-tls = ["reqwest/native-tls"] hickory-dns = ["reqwest/hickory-dns"] From 5928985b23874d9f1d6871b735c5ce804ef7396e Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 8 May 2026 18:56:48 +0200 Subject: [PATCH 05/33] wip --- clients/rust/src/multipart.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index d5a3443f..ae825ddc 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -31,9 +31,7 @@ impl Session { pub fn create_multipart_upload(&self) -> InitiateBuilder { let metadata = Metadata { expiration_policy: self.scope.usecase().expiration_policy(), - // Multipart part uploads are sent as raw bytes. Callers must opt in - // explicitly if they want metadata that advertises compression. - compression: None, + compression: Some(self.scope.usecase().compression()), ..Default::default() }; @@ -72,9 +70,8 @@ impl InitiateBuilder { /// Sets an explicit compression algorithm. /// - /// Multipart uploads default to no compression, even if the session usecase - /// has a default compression configured. When set, each uploaded part is - /// compressed client-side before it is sent. + /// By default, the compression algorithm set on this Session's Usecase is used. + /// When set, each uploaded part is compressed client-side before it is sent. pub fn compression(self, compression: impl Into>) -> Self { MetadataBuilder::compression(self, compression) } From 5c12e5e179d080fd48cb88a10a5ece9e69161ede Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 8 May 2026 19:08:11 +0200 Subject: [PATCH 06/33] wip --- Cargo.lock | 1 + clients/rust/Cargo.toml | 1 + clients/rust/src/multipart.rs | 23 +++++++++++-------- objectstore-server/src/endpoints/multipart.rs | 2 ++ 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 96669a6c..0ca8c54b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2566,6 +2566,7 @@ name = "objectstore-client" version = "0.1.9" dependencies = [ "async-compression", + "base64", "bytes", "futures-util", "infer", diff --git a/clients/rust/Cargo.toml b/clients/rust/Cargo.toml index 6371c594..f7349e1f 100644 --- a/clients/rust/Cargo.toml +++ b/clients/rust/Cargo.toml @@ -12,6 +12,7 @@ publish = true [dependencies] async-compression = { version = "0.4.27", features = ["tokio", "zstd"] } +base64 = "0.22.1" percent-encoding = { workspace = true } bytes = { workspace = true } futures-util = { workspace = true } diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index ae825ddc..46fb8042 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -150,14 +150,14 @@ impl MultipartUpload { /// Uploads a part from an in-memory buffer. /// - /// An optional base64-encoded Content-MD5 digest can be provided for - /// server-side integrity verification. When compression is enabled on this - /// upload, the digest must match the transmitted compressed part bytes. + /// An optional raw MD5 digest can be provided for server-side integrity + /// verification. When compression is enabled on this upload, the digest + /// must match the transmitted compressed part bytes. pub async fn put( &self, body: impl Into, part_number: u32, - content_md5: Option<&str>, + content_md5: Option<&[u8; 16]>, ) -> crate::Result { let (body, content_length) = self .prepare_part_body(MultipartPart::Buffer(body.into()), None) @@ -168,15 +168,15 @@ impl MultipartUpload { /// Uploads a part from a stream. The caller must provide the exact content length. /// - /// An optional base64-encoded Content-MD5 digest can be provided for - /// server-side integrity verification. When compression is enabled on this - /// upload, the digest must match the transmitted compressed part bytes. + /// An optional raw MD5 digest can be provided for server-side integrity + /// verification. When compression is enabled on this upload, the digest + /// must match the transmitted compressed part bytes. pub async fn put_stream( &self, stream: ClientStream, content_length: u64, part_number: u32, - content_md5: Option<&str>, + content_md5: Option<&[u8; 16]>, ) -> crate::Result { let (body, content_length) = self .prepare_part_body(MultipartPart::Stream(stream), Some(content_length)) @@ -218,8 +218,10 @@ impl MultipartUpload { body: Body, content_length: u64, part_number: u32, - content_md5: Option<&str>, + content_md5: Option<&[u8; 16]>, ) -> crate::Result { + use base64::Engine; + let mut builder = self .session .multipart_request( @@ -235,7 +237,8 @@ impl MultipartUpload { .body(body); if let Some(md5) = content_md5 { - builder = builder.header("content-md5", md5); + let encoded = base64::engine::general_purpose::STANDARD.encode(md5); + builder = builder.header("content-md5", encoded); } let response: UploadPartResponse = builder.send().await?.error_for_status()?.json().await?; diff --git a/objectstore-server/src/endpoints/multipart.rs b/objectstore-server/src/endpoints/multipart.rs index 5a12454a..965c6ec7 100644 --- a/objectstore-server/src/endpoints/multipart.rs +++ b/objectstore-server/src/endpoints/multipart.rs @@ -135,6 +135,8 @@ async fn upload_part( .and_then(|v| v.parse::().ok()) .ok_or_else(|| ApiError::Client("Content-Length header is required".into()))?; + // Content-MD5 must be base64-encoded per RFC 1864; passed through to the + // storage backend for integrity verification. let content_md5 = headers .get("content-md5") .and_then(|v| v.to_str().ok()) From b418b629791cabbdcd4ed72deaac75f576ebf569 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 8 May 2026 19:26:20 +0200 Subject: [PATCH 07/33] wip --- clients/rust/src/lib.rs | 2 + clients/rust/src/macros.rs | 64 +++++++++++++ clients/rust/src/multipart.rs | 159 ++++++++------------------------ clients/rust/src/put.rs | 130 +------------------------- clients/rust/tests/multipart.rs | 8 +- 5 files changed, 115 insertions(+), 248 deletions(-) create mode 100644 clients/rust/src/macros.rs diff --git a/clients/rust/src/lib.rs b/clients/rust/src/lib.rs index b58802bf..f5068697 100644 --- a/clients/rust/src/lib.rs +++ b/clients/rust/src/lib.rs @@ -2,6 +2,8 @@ #![warn(missing_docs)] #![warn(missing_debug_implementations)] +#[macro_use] +mod macros; mod auth; mod client; mod delete; diff --git a/clients/rust/src/macros.rs b/clients/rust/src/macros.rs new file mode 100644 index 00000000..a0728c5d --- /dev/null +++ b/clients/rust/src/macros.rs @@ -0,0 +1,64 @@ +macro_rules! metadata_builder_methods { + ($metadata_field:ident) => { + /// Sets the compression algorithm recorded in this object's metadata. + /// + /// For single-object uploads ([`PutBuilder`](crate::PutBuilder)), the client + /// compresses the payload automatically before sending it. + /// + /// For multipart uploads ([`InitiateBuilder`](crate::InitiateBuilder)), this + /// only records the algorithm in metadata — the caller is responsible for + /// pre-compressing each part. + /// + /// Pass [`None`] to disable compression entirely (e.g. for already-compressed + /// media formats, or when handling compression externally). + /// + /// By default, the compression algorithm set on this Session's Usecase is used. + pub fn compression(mut self, compression: impl Into>) -> Self { + self.$metadata_field.compression = compression.into(); + self + } + + /// Sets the expiration policy of the object to be uploaded. + /// + /// By default, the expiration policy set on this Session's Usecase is used. + pub fn expiration_policy(mut self, expiration_policy: $crate::ExpirationPolicy) -> Self { + self.$metadata_field.expiration_policy = expiration_policy; + self + } + + /// Sets the content type of the object to be uploaded. + /// + /// You can use the utility function [`crate::utils::guess_mime_type`] to attempt to guess a + /// `content_type` based on magic bytes. + pub fn content_type( + mut self, + content_type: impl Into>, + ) -> Self { + self.$metadata_field.content_type = content_type.into(); + self + } + + /// Sets the origin of the object, typically the IP address of the original source. + pub fn origin(mut self, origin: impl Into) -> Self { + self.$metadata_field.origin = Some(origin.into()); + self + } + + /// This sets the custom metadata to the provided map. + /// + /// It will clear any previously set metadata. + pub fn set_metadata( + mut self, + metadata: impl Into>, + ) -> Self { + self.$metadata_field.custom = metadata.into(); + self + } + + /// Appends the `key`/`value` to the custom metadata of this object. + pub fn append_metadata(mut self, key: impl Into, value: impl Into) -> Self { + self.$metadata_field.custom.insert(key.into(), value.into()); + self + } + }; +} diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 46fb8042..50424e0c 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -1,10 +1,4 @@ -use std::borrow::Cow; -use std::collections::BTreeMap; -use std::io::Cursor; - -use async_compression::tokio::bufread::ZstdEncoder; use bytes::Bytes; -use futures_util::{StreamExt, TryStreamExt}; use objectstore_types::metadata::Metadata; use objectstore_types::multipart::{ CompleteErrorDetail, CompletePart, CompleteRequest, CompleteSuccessResponse, InitiateResponse, @@ -12,9 +6,7 @@ use objectstore_types::multipart::{ }; use reqwest::Body; use serde::Deserialize; -use tokio_util::io::{ReaderStream, StreamReader}; -use crate::put::MetadataBuilder; use crate::{ClientStream, ObjectKey, Session}; pub use objectstore_types::multipart::CompletePart as MultipartCompletePart; @@ -28,6 +20,11 @@ enum CompleteResponse { impl Session { /// Creates a builder for initiating a multipart upload. + /// + /// The returned [`InitiateBuilder`] inherits the session's default compression + /// and expiration settings. Unlike single-object uploads, the client does + /// **not** compress parts — the caller must pre-compress each part to match + /// the compression algorithm set in metadata. pub fn create_multipart_upload(&self) -> InitiateBuilder { let metadata = Metadata { expiration_policy: self.scope.usecase().expiration_policy(), @@ -44,6 +41,12 @@ impl Session { } /// A builder for initiating a multipart upload. +/// +/// Metadata set here (compression, expiration, content type, etc.) is sent to +/// the server when [`send`](Self::send) is called. Note that unlike +/// single-object uploads, the client does **not** compress parts automatically — +/// if compression is configured, the caller must pre-compress each part before +/// uploading it via [`MultipartUpload::put`] or [`MultipartUpload::put_stream`]. #[derive(Debug)] pub struct InitiateBuilder { session: Session, @@ -51,13 +54,9 @@ pub struct InitiateBuilder { key: Option, } -impl MetadataBuilder for InitiateBuilder { - fn metadata_mut(&mut self) -> &mut Metadata { - &mut self.metadata - } -} - impl InitiateBuilder { + metadata_builder_methods!(metadata); + /// Sets an explicit object key. /// /// If a key is specified, the object will be stored under that key. Otherwise, the Objectstore @@ -68,39 +67,6 @@ impl InitiateBuilder { self } - /// Sets an explicit compression algorithm. - /// - /// By default, the compression algorithm set on this Session's Usecase is used. - /// When set, each uploaded part is compressed client-side before it is sent. - pub fn compression(self, compression: impl Into>) -> Self { - MetadataBuilder::compression(self, compression) - } - - /// Sets the expiration policy. - pub fn expiration_policy(self, expiration_policy: crate::ExpirationPolicy) -> Self { - MetadataBuilder::expiration_policy(self, expiration_policy) - } - - /// Sets the content type. - pub fn content_type(self, content_type: impl Into>) -> Self { - MetadataBuilder::content_type(self, content_type) - } - - /// Sets the origin. - pub fn origin(self, origin: impl Into) -> Self { - MetadataBuilder::origin(self, origin) - } - - /// Sets the custom metadata map. - pub fn set_metadata(self, metadata: impl Into>) -> Self { - MetadataBuilder::set_metadata(self, metadata) - } - - /// Appends a key/value to the custom metadata. - pub fn append_metadata(self, key: impl Into, value: impl Into) -> Self { - MetadataBuilder::append_metadata(self, key, value) - } - /// Sends the initiate request and returns a [`MultipartUpload`] handle. pub async fn send(self) -> crate::Result { let method = match self.key { @@ -120,7 +86,6 @@ impl InitiateBuilder { session: self.session, key: response.key, upload_id: response.upload_id, - compression: self.metadata.compression, }) } } @@ -129,12 +94,17 @@ impl InitiateBuilder { /// /// Returned by [`InitiateBuilder::send`]. Use it to upload parts, list parts, /// and complete or abort the upload. +/// +/// Parts are uploaded as-is — the client does **not** compress them. If the +/// upload was initiated with compression metadata, the caller is responsible +/// for pre-compressing each part before calling [`put`](Self::put) or +/// [`put_stream`](Self::put_stream). `content_length` and `content_md5` always +/// refer to the bytes actually transmitted. #[derive(Debug)] pub struct MultipartUpload { session: Session, key: String, upload_id: String, - compression: Option, } impl MultipartUpload { @@ -151,26 +121,24 @@ impl MultipartUpload { /// Uploads a part from an in-memory buffer. /// /// An optional raw MD5 digest can be provided for server-side integrity - /// verification. When compression is enabled on this upload, the digest - /// must match the transmitted compressed part bytes. + /// verification. The digest must match the bytes being transmitted. pub async fn put( &self, body: impl Into, part_number: u32, content_md5: Option<&[u8; 16]>, ) -> crate::Result { - let (body, content_length) = self - .prepare_part_body(MultipartPart::Buffer(body.into()), None) - .await?; - self.upload_part(body, content_length, part_number, content_md5) + let bytes = body.into(); + let content_length = bytes.len() as u64; + self.upload_part(bytes.into(), content_length, part_number, content_md5) .await } - /// Uploads a part from a stream. The caller must provide the exact content length. + /// Uploads a part from a stream. /// + /// The caller must provide the exact `content_length` of the stream. /// An optional raw MD5 digest can be provided for server-side integrity - /// verification. When compression is enabled on this upload, the digest - /// must match the transmitted compressed part bytes. + /// verification. The digest must match the bytes being transmitted. pub async fn put_stream( &self, stream: ClientStream, @@ -178,39 +146,13 @@ impl MultipartUpload { part_number: u32, content_md5: Option<&[u8; 16]>, ) -> crate::Result { - let (body, content_length) = self - .prepare_part_body(MultipartPart::Stream(stream), Some(content_length)) - .await?; - self.upload_part(body, content_length, part_number, content_md5) - .await - } - - async fn prepare_part_body( - &self, - part: MultipartPart, - content_length: Option, - ) -> crate::Result<(Body, u64)> { - match (self.compression, part) { - (None, MultipartPart::Buffer(bytes)) => Ok((bytes.clone().into(), bytes.len() as u64)), - (None, MultipartPart::Stream(stream)) => Ok(( - Body::wrap_stream(stream), - content_length.expect("stream parts require content_length"), - )), - (Some(crate::Compression::Zstd), MultipartPart::Buffer(bytes)) => { - let stream = ReaderStream::new(ZstdEncoder::new(Cursor::new(bytes))) - .map_err(std::io::Error::other) - .boxed(); - let compressed = collect_stream_bytes(stream).await?; - Ok((compressed.clone().into(), compressed.len() as u64)) - } - (Some(crate::Compression::Zstd), MultipartPart::Stream(stream)) => { - let stream = ReaderStream::new(ZstdEncoder::new(StreamReader::new(stream))) - .map_err(std::io::Error::other) - .boxed(); - let compressed = collect_stream_bytes(stream).await?; - Ok((compressed.clone().into(), compressed.len() as u64)) - } - } + self.upload_part( + Body::wrap_stream(stream), + content_length, + part_number, + content_md5, + ) + .await } async fn upload_part( @@ -272,17 +214,11 @@ impl MultipartUpload { /// Aborts this multipart upload, discarding any uploaded parts. pub async fn abort(self) -> crate::Result<()> { - let MultipartUpload { - session, - key, - upload_id, - compression: _, - } = self; - let builder = session.multipart_request( + let builder = self.session.multipart_request( reqwest::Method::DELETE, None, - Some(&key), - vec![("upload_id", upload_id)], + Some(&self.key), + vec![("upload_id", self.upload_id)], )?; builder.send().await?.error_for_status()?; Ok(()) @@ -294,18 +230,13 @@ impl MultipartUpload { /// the response body even with HTTP 200 (following the S3 pattern), which is /// surfaced as [`crate::Error::MultipartComplete`]. pub async fn complete(self, parts: Vec) -> crate::Result { - let MultipartUpload { - session, - key, - upload_id, - compression: _, - } = self; - let builder = session + let builder = self + .session .multipart_request( reqwest::Method::POST, Some("complete"), - Some(&key), - vec![("upload_id", upload_id)], + Some(&self.key), + vec![("upload_id", self.upload_id)], )? .json(&CompleteRequest { parts }); @@ -324,13 +255,3 @@ impl MultipartUpload { } } } - -enum MultipartPart { - Buffer(Bytes), - Stream(ClientStream), -} - -async fn collect_stream_bytes(stream: ClientStream) -> crate::Result { - let bytes = stream.try_collect::().await?; - Ok(bytes.freeze()) -} diff --git a/clients/rust/src/put.rs b/clients/rust/src/put.rs index bfb4c643..e2c8c240 100644 --- a/clients/rust/src/put.rs +++ b/clients/rust/src/put.rs @@ -1,7 +1,6 @@ use std::fmt; use std::io::{self, Cursor}; use std::path::PathBuf; -use std::{borrow::Cow, collections::BTreeMap}; use async_compression::tokio::bufread::ZstdEncoder; use bytes::Bytes; @@ -13,67 +12,7 @@ use tokio::fs::File; use tokio::io::{AsyncRead, BufReader}; use tokio_util::io::{ReaderStream, StreamReader}; -pub use objectstore_types::metadata::{Compression, ExpirationPolicy}; - -use crate::{ClientStream, ObjectKey, Session}; - -/// A builder that carries per-object [`Metadata`]. -/// -/// This trait provides the common configuration methods shared by every builder -/// that attaches metadata to an upload (e.g. [`PutBuilder`], `InitiateBuilder`). -pub trait MetadataBuilder: Sized { - /// Returns a mutable reference to the metadata being built. - fn metadata_mut(&mut self) -> &mut Metadata; - - /// Sets an explicit compression algorithm to be used for this payload. - /// - /// [`None`] should be used if no compression should be performed by the client, - /// either because the payload is uncompressible (such as a media format), or if the user - /// will handle any kind of compression, without the clients knowledge. - /// - /// By default, the compression algorithm set on this Session's Usecase is used. - fn compression(mut self, compression: impl Into>) -> Self { - self.metadata_mut().compression = compression.into(); - self - } - - /// Sets the expiration policy of the object to be uploaded. - /// - /// By default, the expiration policy set on this Session's Usecase is used. - fn expiration_policy(mut self, expiration_policy: ExpirationPolicy) -> Self { - self.metadata_mut().expiration_policy = expiration_policy; - self - } - - /// Sets the content type of the object to be uploaded. - /// - /// You can use the utility function [`crate::utils::guess_mime_type`] to attempt to guess a - /// `content_type` based on magic bytes. - fn content_type(mut self, content_type: impl Into>) -> Self { - self.metadata_mut().content_type = content_type.into(); - self - } - - /// Sets the origin of the object, typically the IP address of the original source. - fn origin(mut self, origin: impl Into) -> Self { - self.metadata_mut().origin = Some(origin.into()); - self - } - - /// This sets the custom metadata to the provided map. - /// - /// It will clear any previously set metadata. - fn set_metadata(mut self, metadata: impl Into>) -> Self { - self.metadata_mut().custom = metadata.into(); - self - } - - /// Appends the `key`/`value` to the custom metadata of this object. - fn append_metadata(mut self, key: impl Into, value: impl Into) -> Self { - self.metadata_mut().custom.insert(key.into(), value.into()); - self - } -} +use crate::{ClientStream, Compression, ObjectKey, Session}; /// The response returned from the service after uploading an object. #[derive(Debug, Deserialize)] @@ -165,13 +104,9 @@ pub struct PutBuilder { pub(crate) body: PutBody, } -impl MetadataBuilder for PutBuilder { - fn metadata_mut(&mut self) -> &mut Metadata { - &mut self.metadata - } -} - impl PutBuilder { + metadata_builder_methods!(metadata); + /// Sets an explicit object key. /// /// If a key is specified, the object will be stored under that key. Otherwise, the Objectstore @@ -180,65 +115,6 @@ impl PutBuilder { self.key = Some(key.into()).filter(|k| !k.is_empty()); self } - - /// Sets an explicit compression algorithm to be used for this payload. - /// - /// [`None`] should be used if no compression should be performed by the client, - /// either because the payload is uncompressible (such as a media format), or if the user - /// will handle any kind of compression, without the clients knowledge. - /// - /// By default, the compression algorithm set on this Session's Usecase is used. - pub fn compression(self, compression: impl Into>) -> Self { - MetadataBuilder::compression(self, compression) - } - - /// Sets the expiration policy of the object to be uploaded. - /// - /// By default, the expiration policy set on this Session's Usecase is used. - pub fn expiration_policy(self, expiration_policy: ExpirationPolicy) -> Self { - MetadataBuilder::expiration_policy(self, expiration_policy) - } - - /// Sets the content type of the object to be uploaded. - /// - /// You can use the utility function [`crate::utils::guess_mime_type`] to attempt to guess a - /// `content_type` based on magic bytes. - pub fn content_type(self, content_type: impl Into>) -> Self { - MetadataBuilder::content_type(self, content_type) - } - - /// Sets the origin of the object, typically the IP address of the original source. - /// - /// This is an optional but encouraged field that tracks where the payload was - /// originally obtained from. For example, the IP address of the Sentry SDK or CLI - /// that uploaded the data. - /// - /// # Example - /// - /// ```no_run - /// # async fn example(session: objectstore_client::Session) { - /// session.put("data") - /// .origin("203.0.113.42") - /// .send() - /// .await - /// .unwrap(); - /// # } - /// ``` - pub fn origin(self, origin: impl Into) -> Self { - MetadataBuilder::origin(self, origin) - } - - /// This sets the custom metadata to the provided map. - /// - /// It will clear any previously set metadata. - pub fn set_metadata(self, metadata: impl Into>) -> Self { - MetadataBuilder::set_metadata(self, metadata) - } - - /// Appends the `key`/`value` to the custom metadata of this object. - pub fn append_metadata(self, key: impl Into, value: impl Into) -> Self { - MetadataBuilder::append_metadata(self, key, value) - } } /// Compresses the body if compression is specified. diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index 0c59bdee..cd505cd0 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -67,8 +67,12 @@ async fn compressed_upload_flow() { let part1_data = b"hello "; let part2_data = b"world!"; - let etag1 = upload.put(part1_data.as_slice(), 1, None).await.unwrap(); - let etag2 = upload.put(part2_data.as_slice(), 2, None).await.unwrap(); + // Caller is responsible for pre-compressing parts. + let part1_compressed = zstd::encode_all(&part1_data[..], 0).unwrap(); + let part2_compressed = zstd::encode_all(&part2_data[..], 0).unwrap(); + + let etag1 = upload.put(part1_compressed, 1, None).await.unwrap(); + let etag2 = upload.put(part2_compressed, 2, None).await.unwrap(); let key = upload .complete(vec![ From 8b43e4a07bd03b371c4a374d30b79a5771349368 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 11:06:15 +0200 Subject: [PATCH 08/33] wip --- .envrc | 2 +- clients/rust/src/lib.rs | 2 - clients/rust/src/macros.rs | 64 ------------------------------- clients/rust/src/multipart.rs | 54 +++++++++++++++++++++++++- clients/rust/src/put.rs | 72 +++++++++++++++++++++++++++++++++-- pyproject.toml | 3 ++ uv.lock | 4 ++ 7 files changed, 129 insertions(+), 72 deletions(-) delete mode 100644 clients/rust/src/macros.rs diff --git a/.envrc b/.envrc index ead083d1..d2373afb 100644 --- a/.envrc +++ b/.envrc @@ -8,4 +8,4 @@ fi uv sync --all-packages --all-groups source .venv/bin/activate -source_env_if_exists .envrc.private +source_env_if_exists "$(git rev-parse --show-toplevel)/.envrc.private" diff --git a/clients/rust/src/lib.rs b/clients/rust/src/lib.rs index f5068697..b58802bf 100644 --- a/clients/rust/src/lib.rs +++ b/clients/rust/src/lib.rs @@ -2,8 +2,6 @@ #![warn(missing_docs)] #![warn(missing_debug_implementations)] -#[macro_use] -mod macros; mod auth; mod client; mod delete; diff --git a/clients/rust/src/macros.rs b/clients/rust/src/macros.rs deleted file mode 100644 index a0728c5d..00000000 --- a/clients/rust/src/macros.rs +++ /dev/null @@ -1,64 +0,0 @@ -macro_rules! metadata_builder_methods { - ($metadata_field:ident) => { - /// Sets the compression algorithm recorded in this object's metadata. - /// - /// For single-object uploads ([`PutBuilder`](crate::PutBuilder)), the client - /// compresses the payload automatically before sending it. - /// - /// For multipart uploads ([`InitiateBuilder`](crate::InitiateBuilder)), this - /// only records the algorithm in metadata — the caller is responsible for - /// pre-compressing each part. - /// - /// Pass [`None`] to disable compression entirely (e.g. for already-compressed - /// media formats, or when handling compression externally). - /// - /// By default, the compression algorithm set on this Session's Usecase is used. - pub fn compression(mut self, compression: impl Into>) -> Self { - self.$metadata_field.compression = compression.into(); - self - } - - /// Sets the expiration policy of the object to be uploaded. - /// - /// By default, the expiration policy set on this Session's Usecase is used. - pub fn expiration_policy(mut self, expiration_policy: $crate::ExpirationPolicy) -> Self { - self.$metadata_field.expiration_policy = expiration_policy; - self - } - - /// Sets the content type of the object to be uploaded. - /// - /// You can use the utility function [`crate::utils::guess_mime_type`] to attempt to guess a - /// `content_type` based on magic bytes. - pub fn content_type( - mut self, - content_type: impl Into>, - ) -> Self { - self.$metadata_field.content_type = content_type.into(); - self - } - - /// Sets the origin of the object, typically the IP address of the original source. - pub fn origin(mut self, origin: impl Into) -> Self { - self.$metadata_field.origin = Some(origin.into()); - self - } - - /// This sets the custom metadata to the provided map. - /// - /// It will clear any previously set metadata. - pub fn set_metadata( - mut self, - metadata: impl Into>, - ) -> Self { - self.$metadata_field.custom = metadata.into(); - self - } - - /// Appends the `key`/`value` to the custom metadata of this object. - pub fn append_metadata(mut self, key: impl Into, value: impl Into) -> Self { - self.$metadata_field.custom.insert(key.into(), value.into()); - self - } - }; -} diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 50424e0c..92ff283e 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -1,3 +1,6 @@ +use std::borrow::Cow; +use std::collections::BTreeMap; + use bytes::Bytes; use objectstore_types::metadata::Metadata; use objectstore_types::multipart::{ @@ -55,8 +58,6 @@ pub struct InitiateBuilder { } impl InitiateBuilder { - metadata_builder_methods!(metadata); - /// Sets an explicit object key. /// /// If a key is specified, the object will be stored under that key. Otherwise, the Objectstore @@ -67,6 +68,55 @@ impl InitiateBuilder { self } + /// Sets the compression algorithm recorded in this object's metadata. + /// + /// Unlike single-object uploads, the client does **not** compress multipart + /// parts automatically. The caller is responsible for pre-compressing each + /// part to match this algorithm before uploading it via + /// [`MultipartUpload::put`] or [`MultipartUpload::put_stream`]. + /// + /// Pass [`None`] to disable compression entirely. + /// + /// By default, the compression algorithm set on this Session's Usecase is used. + pub fn compression(mut self, compression: impl Into>) -> Self { + self.metadata.compression = compression.into(); + self + } + + /// Sets the expiration policy of the object to be uploaded. + /// + /// By default, the expiration policy set on this Session's Usecase is used. + pub fn expiration_policy(mut self, expiration_policy: crate::ExpirationPolicy) -> Self { + self.metadata.expiration_policy = expiration_policy; + self + } + + /// Sets the content type of the object to be uploaded. + pub fn content_type(mut self, content_type: impl Into>) -> Self { + self.metadata.content_type = content_type.into(); + self + } + + /// Sets the origin of the object, typically the IP address of the original source. + pub fn origin(mut self, origin: impl Into) -> Self { + self.metadata.origin = Some(origin.into()); + self + } + + /// Sets the custom metadata to the provided map. + /// + /// It will clear any previously set metadata. + pub fn set_metadata(mut self, metadata: impl Into>) -> Self { + self.metadata.custom = metadata.into(); + self + } + + /// Appends the `key`/`value` to the custom metadata of this object. + pub fn append_metadata(mut self, key: impl Into, value: impl Into) -> Self { + self.metadata.custom.insert(key.into(), value.into()); + self + } + /// Sends the initiate request and returns a [`MultipartUpload`] handle. pub async fn send(self) -> crate::Result { let method = match self.key { diff --git a/clients/rust/src/put.rs b/clients/rust/src/put.rs index e2c8c240..a1abc50e 100644 --- a/clients/rust/src/put.rs +++ b/clients/rust/src/put.rs @@ -1,6 +1,7 @@ use std::fmt; use std::io::{self, Cursor}; use std::path::PathBuf; +use std::{borrow::Cow, collections::BTreeMap}; use async_compression::tokio::bufread::ZstdEncoder; use bytes::Bytes; @@ -12,7 +13,9 @@ use tokio::fs::File; use tokio::io::{AsyncRead, BufReader}; use tokio_util::io::{ReaderStream, StreamReader}; -use crate::{ClientStream, Compression, ObjectKey, Session}; +pub use objectstore_types::metadata::{Compression, ExpirationPolicy}; + +use crate::{ClientStream, ObjectKey, Session}; /// The response returned from the service after uploading an object. #[derive(Debug, Deserialize)] @@ -105,8 +108,6 @@ pub struct PutBuilder { } impl PutBuilder { - metadata_builder_methods!(metadata); - /// Sets an explicit object key. /// /// If a key is specified, the object will be stored under that key. Otherwise, the Objectstore @@ -115,6 +116,71 @@ impl PutBuilder { self.key = Some(key.into()).filter(|k| !k.is_empty()); self } + + /// Sets an explicit compression algorithm to be used for this payload. + /// + /// [`None`] should be used if no compression should be performed by the client, + /// either because the payload is uncompressible (such as a media format), or if the user + /// will handle any kind of compression, without the clients knowledge. + /// + /// By default, the compression algorithm set on this Session's Usecase is used. + pub fn compression(mut self, compression: impl Into>) -> Self { + self.metadata.compression = compression.into(); + self + } + + /// Sets the expiration policy of the object to be uploaded. + /// + /// By default, the expiration policy set on this Session's Usecase is used. + pub fn expiration_policy(mut self, expiration_policy: ExpirationPolicy) -> Self { + self.metadata.expiration_policy = expiration_policy; + self + } + + /// Sets the content type of the object to be uploaded. + /// + /// You can use the utility function [`crate::utils::guess_mime_type`] to attempt to guess a + /// `content_type` based on magic bytes. + pub fn content_type(mut self, content_type: impl Into>) -> Self { + self.metadata.content_type = content_type.into(); + self + } + + /// Sets the origin of the object, typically the IP address of the original source. + /// + /// This is an optional but encouraged field that tracks where the payload was + /// originally obtained from. For example, the IP address of the Sentry SDK or CLI + /// that uploaded the data. + /// + /// # Example + /// + /// ```no_run + /// # async fn example(session: objectstore_client::Session) { + /// session.put("data") + /// .origin("203.0.113.42") + /// .send() + /// .await + /// .unwrap(); + /// # } + /// ``` + pub fn origin(mut self, origin: impl Into) -> Self { + self.metadata.origin = Some(origin.into()); + self + } + + /// This sets the custom metadata to the provided map. + /// + /// It will clear any previously set metadata. + pub fn set_metadata(mut self, metadata: impl Into>) -> Self { + self.metadata.custom = metadata.into(); + self + } + + /// Appends they `key`/`value` to the custom metadata of this object. + pub fn append_metadata(mut self, key: impl Into, value: impl Into) -> Self { + self.metadata.custom.insert(key.into(), value.into()); + self + } } /// Compresses the body if compression is specified. diff --git a/pyproject.toml b/pyproject.toml index c9479f32..cb605d38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,9 @@ name = "objectstore" version = "0.0.0" requires-python = ">=3.11" +dependencies = [ + "devservices>=1.2.2", +] [tool.uv] required-version = ">=0.9.18" diff --git a/uv.lock b/uv.lock index bce46f30..fce2d962 100644 --- a/uv.lock +++ b/uv.lock @@ -288,6 +288,9 @@ wheels = [ name = "objectstore" version = "0.0.0" source = { virtual = "." } +dependencies = [ + { name = "devservices" }, +] [package.dev-dependencies] dev = [ @@ -304,6 +307,7 @@ docs = [ ] [package.metadata] +requires-dist = [{ name = "devservices", specifier = ">=1.2.2" }] [package.metadata.requires-dev] dev = [ From 2ab6b42fdd03e16d73f42874f6c0844b6866f7c7 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 11:25:12 +0200 Subject: [PATCH 09/33] revert unrelated changes --- .envrc | 2 +- pyproject.toml | 3 --- uv.lock | 4 ---- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.envrc b/.envrc index d2373afb..ead083d1 100644 --- a/.envrc +++ b/.envrc @@ -8,4 +8,4 @@ fi uv sync --all-packages --all-groups source .venv/bin/activate -source_env_if_exists "$(git rev-parse --show-toplevel)/.envrc.private" +source_env_if_exists .envrc.private diff --git a/pyproject.toml b/pyproject.toml index cb605d38..c9479f32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,9 +2,6 @@ name = "objectstore" version = "0.0.0" requires-python = ">=3.11" -dependencies = [ - "devservices>=1.2.2", -] [tool.uv] required-version = ">=0.9.18" diff --git a/uv.lock b/uv.lock index fce2d962..bce46f30 100644 --- a/uv.lock +++ b/uv.lock @@ -288,9 +288,6 @@ wheels = [ name = "objectstore" version = "0.0.0" source = { virtual = "." } -dependencies = [ - { name = "devservices" }, -] [package.dev-dependencies] dev = [ @@ -307,7 +304,6 @@ docs = [ ] [package.metadata] -requires-dist = [{ name = "devservices", specifier = ">=1.2.2" }] [package.metadata.requires-dev] dev = [ From 8f51b3fc5869054ed7c6a0e94f292a77ee81522d Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 11:33:46 +0200 Subject: [PATCH 10/33] fix(test): set compression(None) in full_upload_flow multipart test The test was inheriting the usecase default (zstd) compression but uploading raw uncompressed parts, causing "Unknown frame descriptor" on read-back. --- clients/rust/tests/multipart.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index cd505cd0..42477364 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -13,6 +13,7 @@ async fn full_upload_flow() { let upload = session .create_multipart_upload() .key("multipart-test-key") + .compression(None) .send() .await .unwrap(); From 0571711e3661934bdbfb4143758f16bab440a325 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 11:56:54 +0200 Subject: [PATCH 11/33] fix(test): pre-compress parts in full_upload_flow multipart test Instead of disabling compression, send properly zstd-compressed parts so the test exercises the multiple_members concatenated-frame decompression path that the client uses for multipart objects. --- clients/rust/tests/multipart.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index 42477364..4be198d9 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -13,7 +13,6 @@ async fn full_upload_flow() { let upload = session .create_multipart_upload() .key("multipart-test-key") - .compression(None) .send() .await .unwrap(); @@ -24,8 +23,12 @@ async fn full_upload_flow() { let part1_data = b"hello "; let part2_data = b"world!"; - let etag1 = upload.put(part1_data.as_slice(), 1, None).await.unwrap(); - let etag2 = upload.put(part2_data.as_slice(), 2, None).await.unwrap(); + // Multipart uploads don't auto-compress; caller must pre-compress each part. + let part1_compressed = zstd::encode_all(&part1_data[..], 0).unwrap(); + let part2_compressed = zstd::encode_all(&part2_data[..], 0).unwrap(); + + let etag1 = upload.put(part1_compressed, 1, None).await.unwrap(); + let etag2 = upload.put(part2_compressed, 2, None).await.unwrap(); assert!(!etag1.is_empty()); assert!(!etag2.is_empty()); @@ -46,6 +49,7 @@ async fn full_upload_flow() { assert_eq!(key, "multipart-test-key"); + // The client decompresses concatenated zstd frames (multiple_members) transparently. let response = session.get(&key).send().await.unwrap().unwrap(); assert_eq!(response.metadata.compression, None); let payload = response.payload().await.unwrap(); From 5b7e1a0b500e4d232a99faec626c3f31c2d81690 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 12:06:34 +0200 Subject: [PATCH 12/33] revert unrelated changes --- pyproject.toml | 2 +- uv.lock | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c9479f32..7b8e789a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ members = [ [dependency-groups] dev = [ "devservices>=1.2.2", - "pytest>=9.0.3", + "pytest>=8.3.3", "pytest-cov>=4.1.0", "mypy>=1.17.1", "ruff>=0.14.2", diff --git a/uv.lock b/uv.lock index bce46f30..7f7c01ce 100644 --- a/uv.lock +++ b/uv.lock @@ -121,15 +121,15 @@ toml = [ [[package]] name = "cryptography" -version = "46.0.7" +version = "46.0.5" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.7-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:ea42cbe97209df307fdc3b155f1b6fa2577c0defa8f1f7d3be7d31d189108ad4" }, - { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.7-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:73510b83623e080a2c35c62c15298096e2a5dc8d51c3b4e1740211839d0dea77" }, - { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.7-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:420b1e4109cc95f0e5700eed79908cef9268265c773d3a66f7af1eef53d409ef" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed" }, ] [[package]] @@ -310,7 +310,7 @@ dev = [ { name = "devservices", specifier = ">=1.2.2" }, { name = "mypy", specifier = ">=1.17.1" }, { name = "pre-commit", specifier = ">=4.2.0" }, - { name = "pytest", specifier = ">=9.0.3" }, + { name = "pytest", specifier = ">=8.3.3" }, { name = "pytest-cov", specifier = ">=4.1.0" }, { name = "ruff", specifier = ">=0.14.2" }, ] @@ -400,10 +400,10 @@ wheels = [ [[package]] name = "pygments" -version = "2.20.0" +version = "2.19.2" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" }, ] [[package]] @@ -421,17 +421,16 @@ crypto = [ [[package]] name = "pytest" -version = "9.0.3" +version = "8.3.3" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, - { name = "pygments" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2" }, ] [[package]] @@ -470,7 +469,7 @@ wheels = [ [[package]] name = "requests" -version = "2.33.0" +version = "2.32.5" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "certifi" }, @@ -479,7 +478,7 @@ dependencies = [ { name = "urllib3" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b" }, + { url = "https://pypi.devinfra.sentry.io/wheels/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6" }, ] [[package]] From b5ee8fca4a9e3621da186a67ddbfd338bfbd6049 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 12:07:19 +0200 Subject: [PATCH 13/33] revert unrelated changes --- objectstore-server/src/endpoints/multipart.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/objectstore-server/src/endpoints/multipart.rs b/objectstore-server/src/endpoints/multipart.rs index 965c6ec7..5a12454a 100644 --- a/objectstore-server/src/endpoints/multipart.rs +++ b/objectstore-server/src/endpoints/multipart.rs @@ -135,8 +135,6 @@ async fn upload_part( .and_then(|v| v.parse::().ok()) .ok_or_else(|| ApiError::Client("Content-Length header is required".into()))?; - // Content-MD5 must be base64-encoded per RFC 1864; passed through to the - // storage backend for integrity verification. let content_md5 = headers .get("content-md5") .and_then(|v| v.to_str().ok()) From 7ed21301f98735e7ef2570efedce08519816eaf0 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 12:38:16 +0200 Subject: [PATCH 14/33] improve --- clients/rust/src/client.rs | 10 ++-- clients/rust/src/multipart.rs | 86 +++++++++++++++++++-------------- clients/rust/tests/multipart.rs | 14 +++--- 3 files changed, 62 insertions(+), 48 deletions(-) diff --git a/clients/rust/src/client.rs b/clients/rust/src/client.rs index 5e76f728..0db37785 100644 --- a/clients/rust/src/client.rs +++ b/clients/rust/src/client.rs @@ -468,7 +468,7 @@ impl Session { &self, suffix: Option<&'static str>, object_key: Option<&str>, - query_pairs: Vec<(&str, String)>, + query_pairs: Option>, ) -> Url { let mut url = self.client.service_url.clone(); @@ -490,8 +490,10 @@ impl Session { drop(segments); { let mut pairs = url.query_pairs_mut(); - for (key, value) in query_pairs { - pairs.append_pair(key, &value); + if let Some(query_pairs) = query_pairs { + for (key, value) in query_pairs { + pairs.append_pair(key, &value); + } } } @@ -533,7 +535,7 @@ impl Session { method: reqwest::Method, action: Option<&'static str>, object_key: Option<&str>, - query_pairs: Vec<(&str, String)>, + query_pairs: Option>, ) -> crate::Result { let url = self.multipart_url(action, object_key, query_pairs); let builder = self.client.reqwest.request(method, url); diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 92ff283e..e137fdc7 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -24,18 +24,25 @@ enum CompleteResponse { impl Session { /// Creates a builder for initiating a multipart upload. /// - /// The returned [`InitiateBuilder`] inherits the session's default compression - /// and expiration settings. Unlike single-object uploads, the client does - /// **not** compress parts — the caller must pre-compress each part to match - /// the compression algorithm set in metadata. - pub fn create_multipart_upload(&self) -> InitiateBuilder { + /// The returned [`InitiateMultipartBuilder`] inherits the session's default compression + /// and expiration settings. + /// + /// IMPORTANT: unlike single-object uploads, the client does not automatically compress the + /// contents of [`MultipartUpload::put`]/[`MultipartUpload::put_stream`] based on the + /// configured `compression`. + /// The caller is responsible to compress the payload in accordance with the configured + /// `compression`. + /// That's because we require `content_length` on each part to be the length of the compressed + /// content, which we wouldn't be able to know beforehand if `objectstore_client` automatically + /// compressed payloads on the fly. + pub fn initiate_multipart_upload(&self) -> InitiateMultipartBuilder { let metadata = Metadata { expiration_policy: self.scope.usecase().expiration_policy(), compression: Some(self.scope.usecase().compression()), ..Default::default() }; - InitiateBuilder { + InitiateMultipartBuilder { session: self.clone(), metadata, key: None, @@ -44,25 +51,18 @@ impl Session { } /// A builder for initiating a multipart upload. -/// -/// Metadata set here (compression, expiration, content type, etc.) is sent to -/// the server when [`send`](Self::send) is called. Note that unlike -/// single-object uploads, the client does **not** compress parts automatically — -/// if compression is configured, the caller must pre-compress each part before -/// uploading it via [`MultipartUpload::put`] or [`MultipartUpload::put_stream`]. #[derive(Debug)] -pub struct InitiateBuilder { +pub struct InitiateMultipartBuilder { session: Session, metadata: Metadata, key: Option, } -impl InitiateBuilder { +impl InitiateMultipartBuilder { /// Sets an explicit object key. /// /// If a key is specified, the object will be stored under that key. Otherwise, the Objectstore - /// server will automatically assign a random key, which is then returned from the initiate - /// request. + /// server will automatically assign a random key, which is then returned from this request. pub fn key(mut self, key: impl Into) -> Self { self.key = Some(key.into()).filter(|k| !k.is_empty()); self @@ -70,12 +70,11 @@ impl InitiateBuilder { /// Sets the compression algorithm recorded in this object's metadata. /// - /// Unlike single-object uploads, the client does **not** compress multipart - /// parts automatically. The caller is responsible for pre-compressing each - /// part to match this algorithm before uploading it via - /// [`MultipartUpload::put`] or [`MultipartUpload::put_stream`]. - /// - /// Pass [`None`] to disable compression entirely. + /// IMPORTANT: unlike single-object uploads, the client does not automatically compress the + /// contents of [`MultipartUpload::put`]/[`MultipartUpload::put_stream`] based on the + /// configured `compression`. + /// The caller is responsible to compress the payload in accordance with the configured + /// `compression`. /// /// By default, the compression algorithm set on this Session's Usecase is used. pub fn compression(mut self, compression: impl Into>) -> Self { @@ -92,12 +91,31 @@ impl InitiateBuilder { } /// Sets the content type of the object to be uploaded. + /// + /// You can use the utility function [`crate::utils::guess_mime_type`] to attempt to guess a + /// `content_type` based on magic bytes. pub fn content_type(mut self, content_type: impl Into>) -> Self { self.metadata.content_type = content_type.into(); self } /// Sets the origin of the object, typically the IP address of the original source. + /// + /// This is an optional but encouraged field that tracks where the payload was + /// originally obtained from. For example, the IP address of the Sentry SDK or CLI + /// that uploaded the data. + /// + /// # Example + /// + /// ```no_run + /// # async fn example(session: objectstore_client::Session) { + /// session.initiate_multipart_upload() + /// .origin("203.0.113.42") + /// .send() + /// .await + /// .unwrap(); + /// # } + /// ``` pub fn origin(mut self, origin: impl Into) -> Self { self.metadata.origin = Some(origin.into()); self @@ -126,7 +144,7 @@ impl InitiateBuilder { let mut builder = self.session - .multipart_request(method, None, self.key.as_deref(), vec![])?; + .multipart_request(method, None, self.key.as_deref(), None)?; builder = builder.headers(self.metadata.to_headers("")?); @@ -140,16 +158,10 @@ impl InitiateBuilder { } } -/// Handle to an in-progress multipart upload. -/// -/// Returned by [`InitiateBuilder::send`]. Use it to upload parts, list parts, -/// and complete or abort the upload. +/// Represents an ongoing MultipartUpload, tied to a specific [`Session`] and `upload_id`. /// -/// Parts are uploaded as-is — the client does **not** compress them. If the -/// upload was initiated with compression metadata, the caller is responsible -/// for pre-compressing each part before calling [`put`](Self::put) or -/// [`put_stream`](Self::put_stream). `content_length` and `content_md5` always -/// refer to the bytes actually transmitted. +/// Create a Session using [`Session::initiate_multipart_upload`] or +/// [`Session:resume_multipart_upload`]. #[derive(Debug)] pub struct MultipartUpload { session: Session, @@ -220,10 +232,10 @@ impl MultipartUpload { reqwest::Method::PUT, Some("parts"), Some(&self.key), - vec![ + Some(vec![ ("upload_id", self.upload_id.clone()), ("part_number", part_number.to_string()), - ], + ]), )? .header(reqwest::header::CONTENT_LENGTH, content_length) .body(body); @@ -255,7 +267,7 @@ impl MultipartUpload { reqwest::Method::GET, Some("parts"), Some(&self.key), - params, + Some(params), )?; let response: ListPartsResponse = builder.send().await?.error_for_status()?.json().await?; @@ -268,7 +280,7 @@ impl MultipartUpload { reqwest::Method::DELETE, None, Some(&self.key), - vec![("upload_id", self.upload_id)], + Some(vec![("upload_id", self.upload_id)]), )?; builder.send().await?.error_for_status()?; Ok(()) @@ -286,7 +298,7 @@ impl MultipartUpload { reqwest::Method::POST, Some("complete"), Some(&self.key), - vec![("upload_id", self.upload_id)], + Some(vec![("upload_id", self.upload_id)]), )? .json(&CompleteRequest { parts }); diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index 4be198d9..52b2c4f8 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -11,7 +11,7 @@ async fn full_upload_flow() { let session = test_session(&server); let upload = session - .create_multipart_upload() + .initiate_multipart_upload() .key("multipart-test-key") .send() .await @@ -62,7 +62,7 @@ async fn compressed_upload_flow() { let session = test_session(&server); let upload = session - .create_multipart_upload() + .initiate_multipart_upload() .key("multipart-compressed-key") .compression(Compression::Zstd) .send() @@ -120,7 +120,7 @@ async fn server_generated_key() { let session = test_session(&server); let upload = session - .create_multipart_upload() + .initiate_multipart_upload() .compression(None) .send() .await @@ -150,7 +150,7 @@ async fn list_parts() { let session = test_session(&server); let upload = session - .create_multipart_upload() + .initiate_multipart_upload() .key("list-parts-key") .compression(None) .send() @@ -177,7 +177,7 @@ async fn abort_upload() { let session = test_session(&server); let upload = session - .create_multipart_upload() + .initiate_multipart_upload() .key("abort-key") .compression(None) .send() @@ -194,7 +194,7 @@ async fn metadata_preserved() { let session = test_session(&server); let upload = session - .create_multipart_upload() + .initiate_multipart_upload() .key("metadata-key") .compression(None) .content_type("text/plain") @@ -229,7 +229,7 @@ async fn complete_with_bad_etag() { let session = test_session(&server); let upload = session - .create_multipart_upload() + .initiate_multipart_upload() .key("bad-etag-key") .compression(None) .send() From adb0317bbfd1d1c5a3a30aeff4b63345dc338d9f Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 12:42:15 +0200 Subject: [PATCH 15/33] improve --- clients/rust/src/multipart.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index e137fdc7..cee1c97b 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -48,6 +48,22 @@ impl Session { key: None, } } + + /// Resumes an existing multipart upload from its key and upload ID. + /// + /// This reconstructs a [`MultipartUpload`] handle from previously obtained identifiers. + /// Use this to resume an upload after a process restart or to continue an upload initiated elsewhere. + pub fn resume_multipart_upload( + &self, + key: impl Into, + upload_id: impl Into, + ) -> MultipartUpload { + MultipartUpload { + session: self.clone(), + key: key.into(), + upload_id: upload_id.into(), + } + } } /// A builder for initiating a multipart upload. From 0452d6ea9d3138d120a4819eba28664dc71993f2 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 13:37:27 +0200 Subject: [PATCH 16/33] improve --- clients/rust/src/multipart.rs | 111 ++++++++++++------ clients/rust/tests/multipart.rs | 15 ++- objectstore-server/src/endpoints/multipart.rs | 2 +- objectstore-types/src/multipart.rs | 14 ++- 4 files changed, 91 insertions(+), 51 deletions(-) diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index cee1c97b..5d7242f8 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -1,7 +1,9 @@ use std::borrow::Cow; use std::collections::BTreeMap; +use base64::Engine as _; use bytes::Bytes; +use futures_util::StreamExt as _; use objectstore_types::metadata::Metadata; use objectstore_types::multipart::{ CompleteErrorDetail, CompletePart, CompleteRequest, CompleteSuccessResponse, InitiateResponse, @@ -9,10 +11,15 @@ use objectstore_types::multipart::{ }; use reqwest::Body; use serde::Deserialize; +use tokio::io::AsyncRead; +use tokio_util::io::ReaderStream; use crate::{ClientStream, ObjectKey, Session}; pub use objectstore_types::multipart::CompletePart as MultipartCompletePart; +pub use objectstore_types::multipart::ETag; +pub use objectstore_types::multipart::PartInfo; +pub use objectstore_types::multipart::UploadId; #[derive(Deserialize)] #[serde(untagged)] @@ -51,12 +58,13 @@ impl Session { /// Resumes an existing multipart upload from its key and upload ID. /// - /// This reconstructs a [`MultipartUpload`] handle from previously obtained identifiers. + /// This reconstructs a [`MultipartUpload`] handle from previously obtained identifiers, and + /// doesn't make any network calls. /// Use this to resume an upload after a process restart or to continue an upload initiated elsewhere. pub fn resume_multipart_upload( &self, key: impl Into, - upload_id: impl Into, + upload_id: impl Into, ) -> MultipartUpload { MultipartUpload { session: self.clone(), @@ -174,74 +182,95 @@ impl InitiateMultipartBuilder { } } -/// Represents an ongoing MultipartUpload, tied to a specific [`Session`] and `upload_id`. +/// Represents an ongoing Multipart Upload, tied to a specific [`Session`] and [`UploadId`]. /// -/// Create a Session using [`Session::initiate_multipart_upload`] or -/// [`Session:resume_multipart_upload`]. +/// Create a Multipart Upload handle using [`Session::initiate_multipart_upload`] or [`Session:resume_multipart_upload`]. #[derive(Debug)] pub struct MultipartUpload { session: Session, key: String, - upload_id: String, + upload_id: UploadId, } impl MultipartUpload { /// Returns the upload session identifier. - pub fn id(&self) -> &str { + pub fn upload_id(&self) -> &UploadId { &self.upload_id } - /// Returns the object key. - pub fn key(&self) -> &str { + /// Returns the key of the object that this upload will create. + pub fn key(&self) -> &ObjectKey { &self.key } - /// Uploads a part from an in-memory buffer. + /// Uploads a part using a [`Bytes`]-like payload. /// - /// An optional raw MD5 digest can be provided for server-side integrity - /// verification. The digest must match the bytes being transmitted. + /// IMPORTANT: unlike single-object uploads, the client does not automatically compress + /// contents based on this upload's `Metadata::compression`. + /// The caller is responsible to compress the payload in accordance with the `compression`, + /// and, optionally, to pass the `content_md5` of the compressed payload. pub async fn put( &self, body: impl Into, part_number: u32, content_md5: Option<&[u8; 16]>, - ) -> crate::Result { + ) -> crate::Result { let bytes = body.into(); let content_length = bytes.len() as u64; - self.upload_part(bytes.into(), content_length, part_number, content_md5) + self.upload_part(bytes.into(), part_number, content_length, content_md5) .await } - /// Uploads a part from a stream. + /// Uploads a part using a streaming payload. /// - /// The caller must provide the exact `content_length` of the stream. - /// An optional raw MD5 digest can be provided for server-side integrity - /// verification. The digest must match the bytes being transmitted. + /// IMPORTANT: unlike single-object uploads, the client does not automatically compress + /// contents based on this upload's `Metadata::compression`. + /// The caller is responsible to compress the payload in accordance with the `compression`, + /// and to pass the `content_length` and, optionally, `content_md5` of the compressed payload. pub async fn put_stream( &self, stream: ClientStream, - content_length: u64, part_number: u32, + content_length: u64, content_md5: Option<&[u8; 16]>, - ) -> crate::Result { + ) -> crate::Result { self.upload_part( Body::wrap_stream(stream), - content_length, part_number, + content_length, content_md5, ) .await } + /// Uploads a part from an [`AsyncRead`] source. + /// + /// IMPORTANT: unlike single-object uploads, the client does not automatically compress + /// contents based on this upload's `Metadata::compression`. + /// The caller is responsible to compress the payload in accordance with the `compression`, + /// and to pass the `content_length` and, optionally, `content_md5` of the compressed payload. + pub async fn put_read( + &self, + reader: R, + part_number: u32, + content_length: u64, + content_md5: Option<&[u8; 16]>, + ) -> crate::Result + where + R: AsyncRead + Send + Sync + 'static, + { + let stream = ReaderStream::new(reader).boxed(); + self.put_stream(stream, part_number, content_length, content_md5) + .await + } + async fn upload_part( &self, body: Body, - content_length: u64, part_number: u32, + content_length: u64, content_md5: Option<&[u8; 16]>, - ) -> crate::Result { - use base64::Engine; - + ) -> crate::Result { let mut builder = self .session .multipart_request( @@ -265,8 +294,23 @@ impl MultipartUpload { Ok(response.etag) } - /// Lists the parts that have been uploaded for this multipart upload. - pub async fn list_parts( + /// Lists all parts that have been uploaded for this multipart upload. + pub async fn list_parts(&self) -> crate::Result> { + let mut all_parts = BTreeMap::new(); + let mut marker = None; + + loop { + let page = self.list_parts_page(None, marker).await?; + all_parts.extend(page.parts); + + if !page.is_truncated { + return Ok(all_parts); + } + marker = page.next_part_number_marker; + } + } + + async fn list_parts_page( &self, max_parts: Option, part_number_marker: Option, @@ -290,7 +334,7 @@ impl MultipartUpload { Ok(response) } - /// Aborts this multipart upload, discarding any uploaded parts. + /// Aborts this multipart upload. pub async fn abort(self) -> crate::Result<()> { let builder = self.session.multipart_request( reqwest::Method::DELETE, @@ -303,11 +347,7 @@ impl MultipartUpload { } /// Completes the multipart upload, assembling all parts into the final object. - /// - /// Returns the final object key on success. The server may return an error in - /// the response body even with HTTP 200 (following the S3 pattern), which is - /// surfaced as [`crate::Error::MultipartComplete`]. - pub async fn complete(self, parts: Vec) -> crate::Result { + pub async fn complete(self, parts: Vec) -> crate::Result { let builder = self .session .multipart_request( @@ -318,11 +358,6 @@ impl MultipartUpload { )? .json(&CompleteRequest { parts }); - // The complete endpoint streams whitespace as keepalive before the JSON - // payload. serde_json (used by reqwest's .json()) skips leading whitespace, - // so we can deserialize directly. - // - // The response is always HTTP 200 (S3 pattern) — errors are in the body. let response = builder.send().await?.error_for_status()?; match response.json::().await? { CompleteResponse::Success(s) => Ok(s.key), diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index 52b2c4f8..a5e80432 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -18,7 +18,7 @@ async fn full_upload_flow() { .unwrap(); assert_eq!(upload.key(), "multipart-test-key"); - assert!(!upload.id().is_empty()); + assert!(!upload.upload_id().is_empty()); let part1_data = b"hello "; let part2_data = b"world!"; @@ -160,13 +160,12 @@ async fn list_parts() { upload.put(b"part-one".as_slice(), 1, None).await.unwrap(); upload.put(b"part-two".as_slice(), 2, None).await.unwrap(); - let parts = upload.list_parts(None, None).await.unwrap(); - assert_eq!(parts.parts.len(), 2); - assert!(parts.parts.contains_key(&1)); - assert!(parts.parts.contains_key(&2)); - assert_eq!(parts.parts[&1].size, 8); - assert_eq!(parts.parts[&2].size, 8); - assert!(!parts.is_truncated); + let parts = upload.list_parts().await.unwrap(); + assert_eq!(parts.len(), 2); + assert!(parts.contains_key(&1)); + assert!(parts.contains_key(&2)); + assert_eq!(parts[&1].size, 8); + assert_eq!(parts[&2].size, 8); upload.abort().await.unwrap(); } diff --git a/objectstore-server/src/endpoints/multipart.rs b/objectstore-server/src/endpoints/multipart.rs index 5a12454a..052d5d61 100644 --- a/objectstore-server/src/endpoints/multipart.rs +++ b/objectstore-server/src/endpoints/multipart.rs @@ -18,7 +18,7 @@ use objectstore_types::auth::Permission; use objectstore_types::metadata::Metadata; use objectstore_types::multipart::{ CompleteErrorDetail, CompleteErrorResponse, CompleteRequest, CompleteSuccessResponse, - InitiateResponse, ListPartsResponse, PartInfo, UploadPartResponse, + InitiateResponse, ListPartsResponse, PartInfo, UploadId, UploadPartResponse, }; use serde::Deserialize; use crate::auth::AuthAwareService; diff --git a/objectstore-types/src/multipart.rs b/objectstore-types/src/multipart.rs index f63519fe..23cedebb 100644 --- a/objectstore-types/src/multipart.rs +++ b/objectstore-types/src/multipart.rs @@ -5,27 +5,33 @@ use std::time::SystemTime; use serde::{Deserialize, Serialize}; +/// Identifier for a multipart upload session. +pub type UploadId = String; + +/// Opaque entity tag identifying a specific version of an uploaded part. +pub type ETag = String; + /// Response from initiating a multipart upload. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InitiateResponse { /// The object key (server-generated or user-provided). pub key: String, /// The upload session identifier for subsequent requests. - pub upload_id: String, + pub upload_id: UploadId, } /// Response from uploading a single part. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct UploadPartResponse { /// Opaque identifier of the uploaded part. - pub etag: String, + pub etag: ETag, } /// Information about a single uploaded part, as returned by list-parts. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PartInfo { /// Opaque identifier of the part. - pub etag: String, + pub etag: ETag, /// When the part was last modified. #[serde(with = "humantime_serde")] pub last_modified: SystemTime, @@ -51,7 +57,7 @@ pub struct CompletePart { /// The part number. pub part_number: u32, /// The etag returned when this part was uploaded. - pub etag: String, + pub etag: ETag, } /// Request body for completing a multipart upload. From 59b2decaebd17aa3e8413416cc533bf5682c580a Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 13:40:59 +0200 Subject: [PATCH 17/33] improve --- clients/rust/Cargo.toml | 4 ++-- clients/rust/src/lib.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clients/rust/Cargo.toml b/clients/rust/Cargo.toml index f7349e1f..b4bbb31f 100644 --- a/clients/rust/Cargo.toml +++ b/clients/rust/Cargo.toml @@ -40,7 +40,7 @@ zstd = "0.13.3" [features] default = ["native-tls", "hickory-dns"] -multipart-low-level = [] +multipart = [] rustls = ["reqwest/rustls"] native-tls = ["reqwest/native-tls"] @@ -49,4 +49,4 @@ hickory-dns = ["reqwest/hickory-dns"] [[test]] name = "multipart" path = "tests/multipart.rs" -required-features = ["multipart-low-level"] +required-features = ["multipart"] diff --git a/clients/rust/src/lib.rs b/clients/rust/src/lib.rs index b58802bf..3d497f18 100644 --- a/clients/rust/src/lib.rs +++ b/clients/rust/src/lib.rs @@ -10,7 +10,7 @@ mod get; mod head; mod key; mod many; -#[cfg(feature = "multipart-low-level")] +#[cfg(feature = "multipart")] mod multipart; mod put; pub mod utils; @@ -25,6 +25,6 @@ pub use get::*; pub use head::*; pub use key::*; pub use many::*; -#[cfg(feature = "multipart-low-level")] +#[cfg(feature = "multipart")] pub use multipart::*; pub use put::*; From 697d21e87585891c2a2a019cb859c6e8079eeb2a Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 13:41:42 +0200 Subject: [PATCH 18/33] fix: correct broken intra-doc link in multipart.rs --- clients/rust/src/multipart.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 5d7242f8..dec2e477 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -184,7 +184,7 @@ impl InitiateMultipartBuilder { /// Represents an ongoing Multipart Upload, tied to a specific [`Session`] and [`UploadId`]. /// -/// Create a Multipart Upload handle using [`Session::initiate_multipart_upload`] or [`Session:resume_multipart_upload`]. +/// Create a Multipart Upload handle using [`Session::initiate_multipart_upload`] or [`Session::resume_multipart_upload`]. #[derive(Debug)] pub struct MultipartUpload { session: Session, From 2d1828ea14d0c5d901a8fdf7cef17181d3d3f276 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 13:45:42 +0200 Subject: [PATCH 19/33] feat(rust-client): Make Usecase compression optional Allow callers to disable compression at the Usecase level by passing None to with_compression(). Previously the method only accepted a Compression value, making it impossible to set "no compression" as the usecase-wide default. The internal field and compression() accessor are now Option. The default remains Compression::Zstd. Per-operation builders are unaffected since they already accepted Option. Co-Authored-By: Claude Sonnet 4.6 --- clients/rust/src/client.rs | 12 ++++++------ clients/rust/src/multipart.rs | 2 +- clients/rust/src/put.rs | 2 +- clients/rust/tests/multipart.rs | 13 ++++++++++--- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/clients/rust/src/client.rs b/clients/rust/src/client.rs index 0db37785..f552da10 100644 --- a/clients/rust/src/client.rs +++ b/clients/rust/src/client.rs @@ -151,7 +151,7 @@ impl ClientBuilder { #[derive(Debug, Clone)] pub struct Usecase { name: Arc, - compression: Compression, + compression: Option, expiration_policy: ExpirationPolicy, } @@ -160,7 +160,7 @@ impl Usecase { pub fn new(name: &str) -> Self { Self { name: name.into(), - compression: Compression::Zstd, + compression: Some(Compression::Zstd), expiration_policy: Default::default(), } } @@ -173,7 +173,7 @@ impl Usecase { /// Returns the compression algorithm to use for operations within this usecase. #[inline] - pub fn compression(&self) -> Compression { + pub fn compression(&self) -> Option { self.compression } @@ -181,10 +181,10 @@ impl Usecase { /// /// It's still possible to override this default on each operation's builder. /// - /// By default, [`Compression::Zstd`] is used. - pub fn with_compression(self, compression: Compression) -> Self { + /// By default, [`Compression::Zstd`] is used. Pass [`None`] to disable compression. + pub fn with_compression(self, compression: impl Into>) -> Self { Self { - compression, + compression: compression.into(), ..self } } diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index dec2e477..e930312a 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -45,7 +45,7 @@ impl Session { pub fn initiate_multipart_upload(&self) -> InitiateMultipartBuilder { let metadata = Metadata { expiration_policy: self.scope.usecase().expiration_policy(), - compression: Some(self.scope.usecase().compression()), + compression: self.scope.usecase().compression(), ..Default::default() }; diff --git a/clients/rust/src/put.rs b/clients/rust/src/put.rs index a1abc50e..465039af 100644 --- a/clients/rust/src/put.rs +++ b/clients/rust/src/put.rs @@ -41,7 +41,7 @@ impl Session { fn put_body(&self, body: PutBody) -> PutBuilder { let metadata = Metadata { expiration_policy: self.scope.usecase().expiration_policy(), - compression: Some(self.scope.usecase().compression()), + compression: self.scope.usecase().compression(), ..Default::default() }; diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index a5e80432..9e1817e6 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -3,12 +3,19 @@ mod common; use common::{test_server, test_session}; -use objectstore_client::{Compression, Error, MultipartCompletePart}; +use objectstore_client::{Client, Compression, Error, MultipartCompletePart, Usecase}; + +use crate::common::test_token_generator; #[tokio::test] -async fn full_upload_flow() { +async fn full_upload_uncompressed() { let server = test_server().await; - let session = test_session(&server); + let client = Client::builder(server.url("/")) + .token(test_token_generator()) + .build() + .unwrap(); + let usecase = Usecase::new("usecase").with_compression(None); + let session = client.session(usecase.for_organization(12345)).unwrap(); let upload = session .initiate_multipart_upload() From 7e5fdf63915f4c421dbea2e5530199445932842a Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 14:33:07 +0200 Subject: [PATCH 20/33] improve --- clients/rust/README.md | 83 ++++++++++++ clients/rust/src/multipart.rs | 30 ++-- clients/rust/tests/multipart.rs | 128 +++++++++--------- objectstore-server/src/endpoints/multipart.rs | 12 +- objectstore-server/tests/multipart.rs | 13 +- objectstore-types/src/multipart.rs | 16 ++- 6 files changed, 189 insertions(+), 93 deletions(-) diff --git a/clients/rust/README.md b/clients/rust/README.md index cf8dde88..217188e9 100644 --- a/clients/rust/README.md +++ b/clients/rust/README.md @@ -128,6 +128,89 @@ session.put("payload") .send().await?; ``` +### Multipart Upload API + +For large objects, use multipart uploads to upload parts concurrently with bounded +parallelism. + +**Important:** unlike single-object uploads, multipart uploads do **not** auto-compress. +The caller must pre-compress each part according to the compression set as part of the metadata +when initiating the upload. + +```rust,ignore +use futures_util::StreamExt as _; +use futures_util::stream; +use objectstore_client::{Compression, MultipartCompletePart}; + +let upload = session + .initiate_multipart_upload() + .key("my-large-object") + .compression(Compression::Zstd) + .send() + .await?; + +let parts: Vec<(Vec, u32)> = vec![ + (zstd::encode_all(&part1_data[..], 0)?, 1), + (zstd::encode_all(&part2_data[..], 0)?, 2), +]; + +let results: Vec<_> = stream::iter( + parts + .into_iter() + .map(|(data, part_number)| upload.put(data, part_number, None)), +) +.buffer_unordered(8) +.collect() +.await; + +let mut done = Vec::new(); +let mut errors = Vec::new(); +for result in results { + match result { + Ok(part) => done.push(part), + Err(e) => errors.push(e), + } +} + +if !errors.is_empty() { + // reupload failed parts... +} + +let key = upload.complete(done).await?; +// or +upload.abort().await?; +``` + +You can also resume an in-progress multipart upload, e.g. after a process restart. + +```rust,ignore +use futures_util::{StreamExt as _, TryStreamExt as _}; +use futures_util::stream; +use objectstore_client::MultipartCompletePart; + +let upload = session.resume_multipart_upload("my-large-object", saved_upload_id); + +let existing = upload.list_parts().await?; +let total_parts = 10; +let uploaded: Vec = existing.iter().map(|p| p.part_number).collect(); +let missing: Vec = (1..=total_parts) + .filter(|n| !uploaded.contains(n)) + .collect(); + +let mut done: Vec<_> = stream::iter( + missing + .into_iter() + .map(|part_number| upload.put(get_part_data(part_number), part_number, None)), +) +.buffer_unordered(8) +.try_collect() +.await?; + +done.extend(existing.into_iter().map(MultipartCompletePart::from)); + +let key = upload.complete(done).await?; +``` + ### Many API The Many API allows you to enqueue multiple requests that the client can execute using Objectstore's batch endpoint, minimizing network overhead. diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index e930312a..479f37bb 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -6,7 +6,7 @@ use bytes::Bytes; use futures_util::StreamExt as _; use objectstore_types::metadata::Metadata; use objectstore_types::multipart::{ - CompleteErrorDetail, CompletePart, CompleteRequest, CompleteSuccessResponse, InitiateResponse, + CompleteErrorDetail, CompleteRequest, CompleteSuccessResponse, InitiateResponse, ListPartsResponse, UploadPartResponse, }; use reqwest::Body; @@ -16,7 +16,7 @@ use tokio_util::io::ReaderStream; use crate::{ClientStream, ObjectKey, Session}; -pub use objectstore_types::multipart::CompletePart as MultipartCompletePart; +pub use objectstore_types::multipart::CompletePart; pub use objectstore_types::multipart::ETag; pub use objectstore_types::multipart::PartInfo; pub use objectstore_types::multipart::UploadId; @@ -214,7 +214,7 @@ impl MultipartUpload { body: impl Into, part_number: u32, content_md5: Option<&[u8; 16]>, - ) -> crate::Result { + ) -> crate::Result { let bytes = body.into(); let content_length = bytes.len() as u64; self.upload_part(bytes.into(), part_number, content_length, content_md5) @@ -233,7 +233,7 @@ impl MultipartUpload { part_number: u32, content_length: u64, content_md5: Option<&[u8; 16]>, - ) -> crate::Result { + ) -> crate::Result { self.upload_part( Body::wrap_stream(stream), part_number, @@ -255,7 +255,7 @@ impl MultipartUpload { part_number: u32, content_length: u64, content_md5: Option<&[u8; 16]>, - ) -> crate::Result + ) -> crate::Result where R: AsyncRead + Send + Sync + 'static, { @@ -270,7 +270,7 @@ impl MultipartUpload { part_number: u32, content_length: u64, content_md5: Option<&[u8; 16]>, - ) -> crate::Result { + ) -> crate::Result { let mut builder = self .session .multipart_request( @@ -291,12 +291,15 @@ impl MultipartUpload { } let response: UploadPartResponse = builder.send().await?.error_for_status()?.json().await?; - Ok(response.etag) + Ok(CompletePart { + part_number, + etag: response.etag, + }) } /// Lists all parts that have been uploaded for this multipart upload. - pub async fn list_parts(&self) -> crate::Result> { - let mut all_parts = BTreeMap::new(); + pub async fn list_parts(&self) -> crate::Result> { + let mut all_parts = Vec::new(); let mut marker = None; loop { @@ -347,7 +350,10 @@ impl MultipartUpload { } /// Completes the multipart upload, assembling all parts into the final object. - pub async fn complete(self, parts: Vec) -> crate::Result { + pub async fn complete( + self, + parts: impl IntoIterator, + ) -> crate::Result { let builder = self .session .multipart_request( @@ -356,7 +362,9 @@ impl MultipartUpload { Some(&self.key), Some(vec![("upload_id", self.upload_id)]), )? - .json(&CompleteRequest { parts }); + .json(&CompleteRequest { + parts: parts.into_iter().collect(), + }); let response = builder.send().await?.error_for_status()?; match response.json::().await? { diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index 9e1817e6..c6205e56 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -3,7 +3,9 @@ mod common; use common::{test_server, test_session}; -use objectstore_client::{Client, Compression, Error, MultipartCompletePart, Usecase}; +use futures_util::StreamExt as _; +use futures_util::stream; +use objectstore_client::{Client, CompletePart, Compression, Error, Usecase}; use crate::common::test_token_generator; @@ -27,32 +29,32 @@ async fn full_upload_uncompressed() { assert_eq!(upload.key(), "multipart-test-key"); assert!(!upload.upload_id().is_empty()); - let part1_data = b"hello "; - let part2_data = b"world!"; - // Multipart uploads don't auto-compress; caller must pre-compress each part. - let part1_compressed = zstd::encode_all(&part1_data[..], 0).unwrap(); - let part2_compressed = zstd::encode_all(&part2_data[..], 0).unwrap(); - - let etag1 = upload.put(part1_compressed, 1, None).await.unwrap(); - let etag2 = upload.put(part2_compressed, 2, None).await.unwrap(); - - assert!(!etag1.is_empty()); - assert!(!etag2.is_empty()); - - let key = upload - .complete(vec![ - MultipartCompletePart { - part_number: 1, - etag: etag1, - }, - MultipartCompletePart { - part_number: 2, - etag: etag2, - }, - ]) - .await - .unwrap(); + let parts_data: Vec<(Vec, u32)> = vec![ + (zstd::encode_all(&b"hello "[..], 0).unwrap(), 1), + (zstd::encode_all(&b"world!"[..], 0).unwrap(), 2), + ]; + + let results: Vec<_> = stream::iter( + parts_data + .into_iter() + .map(|(data, part_number)| upload.put(data, part_number, None)), + ) + .buffer_unordered(2) + .collect() + .await; + + let mut parts = Vec::new(); + let mut errors = Vec::new(); + for result in results { + match result { + Ok(part) => parts.push(part), + Err(e) => errors.push(e), + } + } + assert!(errors.is_empty(), "part uploads failed: {errors:?}"); + + let key = upload.complete(parts).await.unwrap(); assert_eq!(key, "multipart-test-key"); @@ -80,25 +82,31 @@ async fn compressed_upload_flow() { let part2_data = b"world!"; // Caller is responsible for pre-compressing parts. - let part1_compressed = zstd::encode_all(&part1_data[..], 0).unwrap(); - let part2_compressed = zstd::encode_all(&part2_data[..], 0).unwrap(); - - let etag1 = upload.put(part1_compressed, 1, None).await.unwrap(); - let etag2 = upload.put(part2_compressed, 2, None).await.unwrap(); - - let key = upload - .complete(vec![ - MultipartCompletePart { - part_number: 1, - etag: etag1, - }, - MultipartCompletePart { - part_number: 2, - etag: etag2, - }, - ]) - .await - .unwrap(); + let parts_data: Vec<(Vec, u32)> = vec![ + (zstd::encode_all(&part1_data[..], 0).unwrap(), 1), + (zstd::encode_all(&part2_data[..], 0).unwrap(), 2), + ]; + + let results: Vec<_> = stream::iter( + parts_data + .into_iter() + .map(|(data, part_number)| upload.put(data, part_number, None)), + ) + .buffer_unordered(2) + .collect() + .await; + + let mut parts = Vec::new(); + let mut errors = Vec::new(); + for result in results { + match result { + Ok(part) => parts.push(part), + Err(e) => errors.push(e), + } + } + assert!(errors.is_empty(), "part uploads failed: {errors:?}"); + + let key = upload.complete(parts).await.unwrap(); let response = session .get(&key) @@ -135,15 +143,9 @@ async fn server_generated_key() { assert!(!upload.key().is_empty()); - let etag = upload.put(b"data".as_slice(), 1, None).await.unwrap(); + let part = upload.put(b"data".as_slice(), 1, None).await.unwrap(); - let key = upload - .complete(vec![MultipartCompletePart { - part_number: 1, - etag, - }]) - .await - .unwrap(); + let key = upload.complete([part]).await.unwrap(); assert!(!key.is_empty()); @@ -169,10 +171,10 @@ async fn list_parts() { let parts = upload.list_parts().await.unwrap(); assert_eq!(parts.len(), 2); - assert!(parts.contains_key(&1)); - assert!(parts.contains_key(&2)); - assert_eq!(parts[&1].size, 8); - assert_eq!(parts[&2].size, 8); + assert_eq!(parts[0].part_number, 1); + assert_eq!(parts[1].part_number, 2); + assert_eq!(parts[0].size, 8); + assert_eq!(parts[1].size, 8); upload.abort().await.unwrap(); } @@ -210,15 +212,9 @@ async fn metadata_preserved() { .await .unwrap(); - let etag = upload.put(b"payload".as_slice(), 1, None).await.unwrap(); + let part = upload.put(b"payload".as_slice(), 1, None).await.unwrap(); - let key = upload - .complete(vec![MultipartCompletePart { - part_number: 1, - etag, - }]) - .await - .unwrap(); + let key = upload.complete([part]).await.unwrap(); let response = session.get(&key).send().await.unwrap().unwrap(); assert_eq!(response.metadata.content_type, "text/plain"); @@ -245,7 +241,7 @@ async fn complete_with_bad_etag() { upload.put(b"real data".as_slice(), 1, None).await.unwrap(); let result = upload - .complete(vec![MultipartCompletePart { + .complete(vec![CompletePart { part_number: 1, etag: "bogus-etag".to_string(), }]) diff --git a/objectstore-server/src/endpoints/multipart.rs b/objectstore-server/src/endpoints/multipart.rs index 052d5d61..35926175 100644 --- a/objectstore-server/src/endpoints/multipart.rs +++ b/objectstore-server/src/endpoints/multipart.rs @@ -172,13 +172,11 @@ async fn list_parts( let parts = response .parts .into_iter() - .map(|p| { - let info = PartInfo { - etag: p.etag, - last_modified: p.last_modified, - size: p.size, - }; - (p.part_number, info) + .map(|p| PartInfo { + part_number: p.part_number, + etag: p.etag, + last_modified: p.last_modified, + size: p.size, }) .collect(); diff --git a/objectstore-server/tests/multipart.rs b/objectstore-server/tests/multipart.rs index 1fe8d791..376fa515 100644 --- a/objectstore-server/tests/multipart.rs +++ b/objectstore-server/tests/multipart.rs @@ -153,10 +153,10 @@ async fn test_multipart_full_flow() -> Result<()> { assert_eq!(response.status(), reqwest::StatusCode::OK); let list: ListPartsResponse = response.json().await?; assert_eq!(list.parts.len(), 2); - assert!(list.parts.contains_key(&1)); - assert!(list.parts.contains_key(&2)); - assert_eq!(list.parts[&1].size, part1_data.len() as u64); - assert_eq!(list.parts[&2].size, part2_data.len() as u64); + assert_eq!(list.parts[0].part_number, 1); + assert_eq!(list.parts[1].part_number, 2); + assert_eq!(list.parts[0].size, part1_data.len() as u64); + assert_eq!(list.parts[1].size, part2_data.len() as u64); assert!(!list.is_truncated); // 5. Complete @@ -394,8 +394,9 @@ async fn test_upload_part_overwrite() -> Result<()> { assert_eq!(response.status(), reqwest::StatusCode::OK); let list: ListPartsResponse = response.json().await?; assert_eq!(list.parts.len(), 1); - assert_eq!(list.parts[&1].etag, second_etag.etag); - assert_eq!(list.parts[&1].size, 6); + assert_eq!(list.parts[0].part_number, 1); + assert_eq!(list.parts[0].etag, second_etag.etag); + assert_eq!(list.parts[0].size, 6); // 5. Complete with the overwritten part complete_and_assert( diff --git a/objectstore-types/src/multipart.rs b/objectstore-types/src/multipart.rs index 23cedebb..831e7107 100644 --- a/objectstore-types/src/multipart.rs +++ b/objectstore-types/src/multipart.rs @@ -1,6 +1,5 @@ //! Types for the multipart upload protocol. -use std::collections::BTreeMap; use std::time::SystemTime; use serde::{Deserialize, Serialize}; @@ -30,6 +29,8 @@ pub struct UploadPartResponse { /// Information about a single uploaded part, as returned by list-parts. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PartInfo { + /// The part number. + pub part_number: u32, /// Opaque identifier of the part. pub etag: ETag, /// When the part was last modified. @@ -39,11 +40,20 @@ pub struct PartInfo { pub size: u64, } +impl From for CompletePart { + fn from(info: PartInfo) -> Self { + Self { + part_number: info.part_number, + etag: info.etag, + } + } +} + /// Response from listing parts of a multipart upload. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ListPartsResponse { - /// Map of part number to part information. - pub parts: BTreeMap, + /// Parts uploaded so far. + pub parts: Vec, /// Whether the response was truncated. pub is_truncated: bool, /// Marker for the next page of results, if truncated. From d0518be9a5393bc17e7cdb5604866c6179338117 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 14:37:15 +0200 Subject: [PATCH 21/33] fix(rust-client): don't zstd-compress parts in uncompressed multipart test The full_upload_uncompressed test was sending zstd-compressed parts while the upload metadata had compression=None, so GET returned the raw compressed bytes instead of the expected plaintext. --- clients/rust/tests/multipart.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index c6205e56..674f8c1f 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -29,11 +29,7 @@ async fn full_upload_uncompressed() { assert_eq!(upload.key(), "multipart-test-key"); assert!(!upload.upload_id().is_empty()); - // Multipart uploads don't auto-compress; caller must pre-compress each part. - let parts_data: Vec<(Vec, u32)> = vec![ - (zstd::encode_all(&b"hello "[..], 0).unwrap(), 1), - (zstd::encode_all(&b"world!"[..], 0).unwrap(), 2), - ]; + let parts_data: Vec<(&[u8], u32)> = vec![(b"hello ", 1), (b"world!", 2)]; let results: Vec<_> = stream::iter( parts_data @@ -58,7 +54,6 @@ async fn full_upload_uncompressed() { assert_eq!(key, "multipart-test-key"); - // The client decompresses concatenated zstd frames (multiple_members) transparently. let response = session.get(&key).send().await.unwrap().unwrap(); assert_eq!(response.metadata.compression, None); let payload = response.payload().await.unwrap(); From cd8ba4939740bfcd96cc04ce1e9faf68353069a7 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 14:59:20 +0200 Subject: [PATCH 22/33] improve --- clients/rust/tests/multipart.rs | 42 +++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index 674f8c1f..6038ef6f 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -10,13 +10,14 @@ use objectstore_client::{Client, CompletePart, Compression, Error, Usecase}; use crate::common::test_token_generator; #[tokio::test] -async fn full_upload_uncompressed() { +async fn test_full_upload_uncompressed() { let server = test_server().await; let client = Client::builder(server.url("/")) .token(test_token_generator()) .build() .unwrap(); let usecase = Usecase::new("usecase").with_compression(None); + let session = client.session(usecase.for_organization(12345)).unwrap(); let upload = session @@ -54,14 +55,20 @@ async fn full_upload_uncompressed() { assert_eq!(key, "multipart-test-key"); - let response = session.get(&key).send().await.unwrap().unwrap(); + let response = session + .get(&key) + .decompress(false) + .send() + .await + .unwrap() + .unwrap(); assert_eq!(response.metadata.compression, None); let payload = response.payload().await.unwrap(); assert_eq!(payload, "hello world!"); } #[tokio::test] -async fn compressed_upload_flow() { +async fn test_full_upload_compressed() { let server = test_server().await; let session = test_session(&server); @@ -76,7 +83,6 @@ async fn compressed_upload_flow() { let part1_data = b"hello "; let part2_data = b"world!"; - // Caller is responsible for pre-compressing parts. let parts_data: Vec<(Vec, u32)> = vec![ (zstd::encode_all(&part1_data[..], 0).unwrap(), 1), (zstd::encode_all(&part2_data[..], 0).unwrap(), 2), @@ -125,7 +131,7 @@ async fn compressed_upload_flow() { } #[tokio::test] -async fn server_generated_key() { +async fn test_server_generated_key() { let server = test_server().await; let session = test_session(&server); @@ -149,7 +155,7 @@ async fn server_generated_key() { } #[tokio::test] -async fn list_parts() { +async fn test_list_parts() { let server = test_server().await; let session = test_session(&server); @@ -161,28 +167,34 @@ async fn list_parts() { .await .unwrap(); - upload.put(b"part-one".as_slice(), 1, None).await.unwrap(); upload.put(b"part-two".as_slice(), 2, None).await.unwrap(); + upload.put(b"part-one".as_slice(), 1, None).await.unwrap(); let parts = upload.list_parts().await.unwrap(); assert_eq!(parts.len(), 2); - assert_eq!(parts[0].part_number, 1); - assert_eq!(parts[1].part_number, 2); - assert_eq!(parts[0].size, 8); - assert_eq!(parts[1].size, 8); + + let p1 = parts + .iter() + .find(|p| p.part_number == 1) + .expect("missing part 1"); + let p2 = parts + .iter() + .find(|p| p.part_number == 2) + .expect("missing part 2"); + assert_eq!(p1.size, 8); + assert_eq!(p2.size, 8); upload.abort().await.unwrap(); } #[tokio::test] -async fn abort_upload() { +async fn test_abort() { let server = test_server().await; let session = test_session(&server); let upload = session .initiate_multipart_upload() .key("abort-key") - .compression(None) .send() .await .unwrap(); @@ -192,7 +204,7 @@ async fn abort_upload() { } #[tokio::test] -async fn metadata_preserved() { +async fn test_metadata_preserved() { let server = test_server().await; let session = test_session(&server); @@ -221,7 +233,7 @@ async fn metadata_preserved() { } #[tokio::test] -async fn complete_with_bad_etag() { +async fn test_complete_with_bad_etag() { let server = test_server().await; let session = test_session(&server); From 9b399c19fd71e5b696b5f87210c054a3baac177f Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 15:03:13 +0200 Subject: [PATCH 23/33] improve --- clients/rust/Cargo.toml | 4 ++-- clients/rust/src/client.rs | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clients/rust/Cargo.toml b/clients/rust/Cargo.toml index b4bbb31f..6d3031d3 100644 --- a/clients/rust/Cargo.toml +++ b/clients/rust/Cargo.toml @@ -12,7 +12,7 @@ publish = true [dependencies] async-compression = { version = "0.4.27", features = ["tokio", "zstd"] } -base64 = "0.22.1" +base64 = { version = "0.22.1", optional = true } percent-encoding = { workspace = true } bytes = { workspace = true } futures-util = { workspace = true } @@ -40,7 +40,7 @@ zstd = "0.13.3" [features] default = ["native-tls", "hickory-dns"] -multipart = [] +multipart = ["dep:base64"] rustls = ["reqwest/rustls"] native-tls = ["reqwest/native-tls"] diff --git a/clients/rust/src/client.rs b/clients/rust/src/client.rs index f552da10..3b6ac42b 100644 --- a/clients/rust/src/client.rs +++ b/clients/rust/src/client.rs @@ -464,6 +464,7 @@ impl Session { url } + #[cfg(feature = "multipart")] fn multipart_url( &self, suffix: Option<&'static str>, @@ -530,6 +531,7 @@ impl Session { self.prepare_builder(builder) } + #[cfg(feature = "multipart")] pub(crate) fn multipart_request( &self, method: reqwest::Method, From d86cecb5c73c5555650713b148d1f89a5846a5e2 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 15:10:00 +0200 Subject: [PATCH 24/33] improve --- clients/rust/src/multipart.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 479f37bb..3e491e09 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -310,6 +310,11 @@ impl MultipartUpload { return Ok(all_parts); } marker = page.next_part_number_marker; + if marker.is_none() { + return Err(crate::Error::MalformedResponse( + "server returned is_truncated=true but no next_part_number_marker".into(), + )); + } } } From b1a695f6decfc33da43bcc80e30bbabeed127740 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 11 May 2026 15:10:28 +0200 Subject: [PATCH 25/33] improve --- clients/rust/src/multipart.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 3e491e09..2261d437 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -312,7 +312,7 @@ impl MultipartUpload { marker = page.next_part_number_marker; if marker.is_none() { return Err(crate::Error::MalformedResponse( - "server returned is_truncated=true but no next_part_number_marker".into(), + "server returned is_truncated=true but no next_part_number_marker. Please report a bug.".into(), )); } } From d84038fa02c2c45c8eaa086fbbaaaf0a6bd00ec0 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Wed, 13 May 2026 14:30:19 +0200 Subject: [PATCH 26/33] improve --- clients/rust/src/multipart.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 2261d437..16d26c45 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -359,6 +359,9 @@ impl MultipartUpload { self, parts: impl IntoIterator, ) -> crate::Result { + let mut parts: Vec<_> = parts.into_iter().collect(); + parts.sort_by_key(|p| p.part_number); + let builder = self .session .multipart_request( @@ -367,9 +370,7 @@ impl MultipartUpload { Some(&self.key), Some(vec![("upload_id", self.upload_id)]), )? - .json(&CompleteRequest { - parts: parts.into_iter().collect(), - }); + .json(&CompleteRequest { parts }); let response = builder.send().await?.error_for_status()?; match response.json::().await? { From a2515a696f706a0ea4a926c67be3f447680430cf Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Wed, 13 May 2026 15:20:55 +0200 Subject: [PATCH 27/33] fix --- clients/rust/src/client.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/clients/rust/src/client.rs b/clients/rust/src/client.rs index 3b6ac42b..7a8848f9 100644 --- a/clients/rust/src/client.rs +++ b/clients/rust/src/client.rs @@ -489,12 +489,10 @@ impl Session { segments.extend(object_key.split("/")); } drop(segments); - { + if let Some(query_pairs) = query_pairs { let mut pairs = url.query_pairs_mut(); - if let Some(query_pairs) = query_pairs { - for (key, value) in query_pairs { - pairs.append_pair(key, &value); - } + for (key, value) in query_pairs { + pairs.append_pair(key, &value); } } From b1e6a4281d914fb184b89758715b23590cc0c3da Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Wed, 27 May 2026 12:59:51 +0200 Subject: [PATCH 28/33] add comment on decoder.multiple_members(true) --- clients/rust/src/get.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clients/rust/src/get.rs b/clients/rust/src/get.rs index f23e4679..5c4cf446 100644 --- a/clients/rust/src/get.rs +++ b/clients/rust/src/get.rs @@ -129,6 +129,9 @@ pub(crate) fn maybe_decompress( (Some(Compression::Zstd), true) => { metadata.compression = None; let mut decoder = ZstdDecoder::new(StreamReader::new(stream)); + // Multipart uploads with compression, when each part is compressed individually, + // will consist of multiple concatenated zstd frames. + // This allows the client to handle automatic decompression for these objects transparently. decoder.multiple_members(true); ReaderStream::new(decoder).boxed() } From 175e3fcfa4fae84c155e4f907e503dbac48a3e71 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 11:57:56 +0200 Subject: [PATCH 29/33] revert spurious pyproject.toml and uv.lock changes These were carried over from the old base branch during the restack and downgraded several Python dependencies. --- pyproject.toml | 2 +- uv.lock | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7b8e789a..c9479f32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ members = [ [dependency-groups] dev = [ "devservices>=1.2.2", - "pytest>=8.3.3", + "pytest>=9.0.3", "pytest-cov>=4.1.0", "mypy>=1.17.1", "ruff>=0.14.2", diff --git a/uv.lock b/uv.lock index 7f7c01ce..bce46f30 100644 --- a/uv.lock +++ b/uv.lock @@ -121,15 +121,15 @@ toml = [ [[package]] name = "cryptography" -version = "46.0.5" +version = "46.0.7" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad" }, - { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263" }, - { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.7-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:ea42cbe97209df307fdc3b155f1b6fa2577c0defa8f1f7d3be7d31d189108ad4" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.7-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:73510b83623e080a2c35c62c15298096e2a5dc8d51c3b4e1740211839d0dea77" }, + { url = "https://pypi.devinfra.sentry.io/wheels/cryptography-46.0.7-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:420b1e4109cc95f0e5700eed79908cef9268265c773d3a66f7af1eef53d409ef" }, ] [[package]] @@ -310,7 +310,7 @@ dev = [ { name = "devservices", specifier = ">=1.2.2" }, { name = "mypy", specifier = ">=1.17.1" }, { name = "pre-commit", specifier = ">=4.2.0" }, - { name = "pytest", specifier = ">=8.3.3" }, + { name = "pytest", specifier = ">=9.0.3" }, { name = "pytest-cov", specifier = ">=4.1.0" }, { name = "ruff", specifier = ">=0.14.2" }, ] @@ -400,10 +400,10 @@ wheels = [ [[package]] name = "pygments" -version = "2.19.2" +version = "2.20.0" source = { registry = "https://pypi.devinfra.sentry.io/simple" } wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176" }, ] [[package]] @@ -421,16 +421,17 @@ crypto = [ [[package]] name = "pytest" -version = "8.3.3" +version = "9.0.3" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, + { name = "pygments" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2" }, + { url = "https://pypi.devinfra.sentry.io/wheels/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9" }, ] [[package]] @@ -469,7 +470,7 @@ wheels = [ [[package]] name = "requests" -version = "2.32.5" +version = "2.33.0" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "certifi" }, @@ -478,7 +479,7 @@ dependencies = [ { name = "urllib3" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6" }, + { url = "https://pypi.devinfra.sentry.io/wheels/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b" }, ] [[package]] From db3745fa0be58e946664fc2484ab43457c48ed18 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 12:05:28 +0200 Subject: [PATCH 30/33] ref: move UploadId and PartNumber to objectstore-types Moves the validated `UploadId` newtype and `PartNumber` (`NonZeroU32`) type alias from `objectstore-service` to `objectstore-types` so they can be shared between the server and Rust client. Updates all wire format types to use the stricter types, removing the need for conversions at the server endpoint boundary. --- clients/rust/src/multipart.rs | 19 ++--- clients/rust/tests/multipart.rs | 47 ++++++++---- objectstore-server/src/endpoints/multipart.rs | 19 ++--- objectstore-server/tests/multipart.rs | 12 ++-- objectstore-service/src/backend/gcs.rs | 2 +- objectstore-service/src/backend/tiered.rs | 4 +- objectstore-service/src/error.rs | 5 ++ objectstore-service/src/multipart.rs | 62 +--------------- objectstore-types/src/multipart.rs | 72 +++++++++++++++++-- 9 files changed, 133 insertions(+), 109 deletions(-) diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 16d26c45..b3ad145d 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -19,6 +19,7 @@ use crate::{ClientStream, ObjectKey, Session}; pub use objectstore_types::multipart::CompletePart; pub use objectstore_types::multipart::ETag; pub use objectstore_types::multipart::PartInfo; +pub use objectstore_types::multipart::PartNumber; pub use objectstore_types::multipart::UploadId; #[derive(Deserialize)] @@ -212,7 +213,7 @@ impl MultipartUpload { pub async fn put( &self, body: impl Into, - part_number: u32, + part_number: PartNumber, content_md5: Option<&[u8; 16]>, ) -> crate::Result { let bytes = body.into(); @@ -230,7 +231,7 @@ impl MultipartUpload { pub async fn put_stream( &self, stream: ClientStream, - part_number: u32, + part_number: PartNumber, content_length: u64, content_md5: Option<&[u8; 16]>, ) -> crate::Result { @@ -252,7 +253,7 @@ impl MultipartUpload { pub async fn put_read( &self, reader: R, - part_number: u32, + part_number: PartNumber, content_length: u64, content_md5: Option<&[u8; 16]>, ) -> crate::Result @@ -267,7 +268,7 @@ impl MultipartUpload { async fn upload_part( &self, body: Body, - part_number: u32, + part_number: PartNumber, content_length: u64, content_md5: Option<&[u8; 16]>, ) -> crate::Result { @@ -278,7 +279,7 @@ impl MultipartUpload { Some("parts"), Some(&self.key), Some(vec![ - ("upload_id", self.upload_id.clone()), + ("upload_id", self.upload_id.to_string()), ("part_number", part_number.to_string()), ]), )? @@ -321,9 +322,9 @@ impl MultipartUpload { async fn list_parts_page( &self, max_parts: Option, - part_number_marker: Option, + part_number_marker: Option, ) -> crate::Result { - let mut params = vec![("upload_id", self.upload_id.clone())]; + let mut params: Vec<(&str, String)> = vec![("upload_id", self.upload_id.to_string())]; if let Some(max) = max_parts { params.push(("max_parts", max.to_string())); } @@ -348,7 +349,7 @@ impl MultipartUpload { reqwest::Method::DELETE, None, Some(&self.key), - Some(vec![("upload_id", self.upload_id)]), + Some(vec![("upload_id", self.upload_id.to_string())]), )?; builder.send().await?.error_for_status()?; Ok(()) @@ -368,7 +369,7 @@ impl MultipartUpload { reqwest::Method::POST, Some("complete"), Some(&self.key), - Some(vec![("upload_id", self.upload_id)]), + Some(vec![("upload_id", self.upload_id.to_string())]), )? .json(&CompleteRequest { parts }); diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index 6038ef6f..29574563 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -5,10 +5,14 @@ mod common; use common::{test_server, test_session}; use futures_util::StreamExt as _; use futures_util::stream; -use objectstore_client::{Client, CompletePart, Compression, Error, Usecase}; +use objectstore_client::{Client, CompletePart, Compression, Error, PartNumber, Usecase}; use crate::common::test_token_generator; +fn pn(n: u32) -> PartNumber { + PartNumber::new(n).unwrap() +} + #[tokio::test] async fn test_full_upload_uncompressed() { let server = test_server().await; @@ -30,7 +34,7 @@ async fn test_full_upload_uncompressed() { assert_eq!(upload.key(), "multipart-test-key"); assert!(!upload.upload_id().is_empty()); - let parts_data: Vec<(&[u8], u32)> = vec![(b"hello ", 1), (b"world!", 2)]; + let parts_data: Vec<(&[u8], PartNumber)> = vec![(b"hello ", pn(1)), (b"world!", pn(2))]; let results: Vec<_> = stream::iter( parts_data @@ -83,9 +87,9 @@ async fn test_full_upload_compressed() { let part1_data = b"hello "; let part2_data = b"world!"; - let parts_data: Vec<(Vec, u32)> = vec![ - (zstd::encode_all(&part1_data[..], 0).unwrap(), 1), - (zstd::encode_all(&part2_data[..], 0).unwrap(), 2), + let parts_data: Vec<(Vec, PartNumber)> = vec![ + (zstd::encode_all(&part1_data[..], 0).unwrap(), pn(1)), + (zstd::encode_all(&part2_data[..], 0).unwrap(), pn(2)), ]; let results: Vec<_> = stream::iter( @@ -144,7 +148,7 @@ async fn test_server_generated_key() { assert!(!upload.key().is_empty()); - let part = upload.put(b"data".as_slice(), 1, None).await.unwrap(); + let part = upload.put(b"data".as_slice(), pn(1), None).await.unwrap(); let key = upload.complete([part]).await.unwrap(); @@ -167,19 +171,25 @@ async fn test_list_parts() { .await .unwrap(); - upload.put(b"part-two".as_slice(), 2, None).await.unwrap(); - upload.put(b"part-one".as_slice(), 1, None).await.unwrap(); + upload + .put(b"part-two".as_slice(), pn(2), None) + .await + .unwrap(); + upload + .put(b"part-one".as_slice(), pn(1), None) + .await + .unwrap(); let parts = upload.list_parts().await.unwrap(); assert_eq!(parts.len(), 2); let p1 = parts .iter() - .find(|p| p.part_number == 1) + .find(|p| p.part_number == pn(1)) .expect("missing part 1"); let p2 = parts .iter() - .find(|p| p.part_number == 2) + .find(|p| p.part_number == pn(2)) .expect("missing part 2"); assert_eq!(p1.size, 8); assert_eq!(p2.size, 8); @@ -199,7 +209,10 @@ async fn test_abort() { .await .unwrap(); - upload.put(b"some data".as_slice(), 1, None).await.unwrap(); + upload + .put(b"some data".as_slice(), pn(1), None) + .await + .unwrap(); upload.abort().await.unwrap(); } @@ -219,7 +232,10 @@ async fn test_metadata_preserved() { .await .unwrap(); - let part = upload.put(b"payload".as_slice(), 1, None).await.unwrap(); + let part = upload + .put(b"payload".as_slice(), pn(1), None) + .await + .unwrap(); let key = upload.complete([part]).await.unwrap(); @@ -245,11 +261,14 @@ async fn test_complete_with_bad_etag() { .await .unwrap(); - upload.put(b"real data".as_slice(), 1, None).await.unwrap(); + upload + .put(b"real data".as_slice(), pn(1), None) + .await + .unwrap(); let result = upload .complete(vec![CompletePart { - part_number: 1, + part_number: pn(1), etag: "bogus-etag".to_string(), }]) .await; diff --git a/objectstore-server/src/endpoints/multipart.rs b/objectstore-server/src/endpoints/multipart.rs index 35926175..c9d40d26 100644 --- a/objectstore-server/src/endpoints/multipart.rs +++ b/objectstore-server/src/endpoints/multipart.rs @@ -1,6 +1,11 @@ use std::convert::Infallible; use std::time::{Duration, SystemTime}; +use crate::auth::AuthAwareService; +use crate::endpoints::common::{ApiError, ApiResult}; +use crate::extractors::Xt; +use crate::extractors::body::MeteredBody; +use crate::state::ServiceState; use axum::body::Body; use axum::extract::{Query, State}; use axum::http::{HeaderMap, StatusCode}; @@ -14,18 +19,12 @@ use http::header; use objectstore_service::error::Error as ServiceError; use objectstore_service::id::{ObjectContext, ObjectId}; use objectstore_service::multipart::{CompletedPart, PartNumber, UploadId}; -use objectstore_types::auth::Permission; use objectstore_types::metadata::Metadata; use objectstore_types::multipart::{ CompleteErrorDetail, CompleteErrorResponse, CompleteRequest, CompleteSuccessResponse, - InitiateResponse, ListPartsResponse, PartInfo, UploadId, UploadPartResponse, + InitiateResponse, ListPartsResponse, PartInfo, UploadPartResponse, }; use serde::Deserialize; -use crate::auth::AuthAwareService; -use crate::endpoints::common::{ApiError, ApiResult}; -use crate::extractors::Xt; -use crate::extractors::body::MeteredBody; -use crate::state::ServiceState; pub fn router() -> Router { let initiate_no_key = routing::post(initiate_post); @@ -69,12 +68,6 @@ struct ListPartsQuery { part_number_marker: Option, } -fn validate_part_number(part_number: u32) -> ApiResult<()> { - if part_number == 0 { - return Err(ApiError::Client("part_number must be >= 1".into())); - } - Ok(()) -} // --- Handlers --- async fn initiate_put( diff --git a/objectstore-server/tests/multipart.rs b/objectstore-server/tests/multipart.rs index 376fa515..0dbd28cd 100644 --- a/objectstore-server/tests/multipart.rs +++ b/objectstore-server/tests/multipart.rs @@ -5,9 +5,13 @@ use objectstore_server::config::{AuthZ, Config}; use objectstore_test::server::TestServer; use objectstore_types::multipart::{ CompleteErrorResponse, CompleteSuccessResponse, InitiateResponse, ListPartsResponse, - UploadPartResponse, + PartNumber, UploadPartResponse, }; +fn pn(n: u32) -> PartNumber { + PartNumber::new(n).unwrap() +} + async fn test_server() -> TestServer { TestServer::with_config(Config { auth: AuthZ { @@ -153,8 +157,8 @@ async fn test_multipart_full_flow() -> Result<()> { assert_eq!(response.status(), reqwest::StatusCode::OK); let list: ListPartsResponse = response.json().await?; assert_eq!(list.parts.len(), 2); - assert_eq!(list.parts[0].part_number, 1); - assert_eq!(list.parts[1].part_number, 2); + assert_eq!(list.parts[0].part_number, pn(1)); + assert_eq!(list.parts[1].part_number, pn(2)); assert_eq!(list.parts[0].size, part1_data.len() as u64); assert_eq!(list.parts[1].size, part2_data.len() as u64); assert!(!list.is_truncated); @@ -394,7 +398,7 @@ async fn test_upload_part_overwrite() -> Result<()> { assert_eq!(response.status(), reqwest::StatusCode::OK); let list: ListPartsResponse = response.json().await?; assert_eq!(list.parts.len(), 1); - assert_eq!(list.parts[0].part_number, 1); + assert_eq!(list.parts[0].part_number, pn(1)); assert_eq!(list.parts[0].etag, second_etag.etag); assert_eq!(list.parts[0].size, 6); diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 80174148..43cc45c1 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -733,7 +733,7 @@ impl TryFrom for InitiateMultipartResponse { type Error = crate::error::Error; fn try_from(r: XmlInitiateMultipartUploadResponse) -> crate::error::Result { - UploadId::new(r.upload_id) + Ok(UploadId::new(r.upload_id)?) } } diff --git a/objectstore-service/src/backend/tiered.rs b/objectstore-service/src/backend/tiered.rs index 7d63dd54..1385e01d 100644 --- a/objectstore-service/src/backend/tiered.rs +++ b/objectstore-service/src/backend/tiered.rs @@ -580,7 +580,9 @@ impl TryInto for TieredUploadId { fn try_into(self) -> Result { let json = serde_json::to_vec(&self).map_err(|e| Error::serde("encoding multipart token", e))?; - UploadId::new(base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json)) + Ok(UploadId::new( + base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json), + )?) } } diff --git a/objectstore-service/src/error.rs b/objectstore-service/src/error.rs index 27344abe..46e38e28 100644 --- a/objectstore-service/src/error.rs +++ b/objectstore-service/src/error.rs @@ -91,6 +91,10 @@ pub enum Error { /// The functionality is not implemented by this instance of the service. #[error("not implemented")] NotImplemented, + + /// Invalid upload ID (e.g. path traversal attempt). + #[error(transparent)] + InvalidUploadId(#[from] objectstore_types::multipart::InvalidUploadId), } impl Error { @@ -147,6 +151,7 @@ impl Error { Self::Dropped => Level::ERROR, Self::UnexpectedTombstone => Level::ERROR, Self::NotImplemented => Level::ERROR, + Self::InvalidUploadId(_) => Level::DEBUG, Self::Generic { .. } => Level::ERROR, } } diff --git a/objectstore-service/src/multipart.rs b/objectstore-service/src/multipart.rs index 1cbe465a..b06e4814 100644 --- a/objectstore-service/src/multipart.rs +++ b/objectstore-service/src/multipart.rs @@ -1,68 +1,8 @@ //! Shared types for Objectstore's multipart upload protocol. -use std::fmt; -use std::ops::Deref; -use std::path::{Component, Path}; use std::time::SystemTime; -use serde::{Deserialize, Deserializer, Serialize}; - -use crate::error::Error; - -/// Identifier for an in-progress multipart upload. -/// -/// Validated on construction: non-empty and free of path-traversal components -/// (`..`, leading `/`, etc.), so it is always safe to use as a single path segment. -#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)] -#[serde(transparent)] -pub struct UploadId(String); - -impl UploadId { - /// Returns the upload ID as a string slice. - pub fn as_str(&self) -> &str { - &self.0 - } - - /// Creates a new `UploadId` after validating the input. - pub fn new(s: String) -> Result { - if s.is_empty() { - return Err(Error::generic("upload_id must not be empty")); - } - for component in Path::new(&s).components() { - if !matches!(component, Component::Normal(_)) { - return Err(Error::generic(format!("invalid upload_id: {s}"))); - } - } - Ok(Self(s)) - } -} - -impl Deref for UploadId { - type Target = str; - fn deref(&self) -> &str { - &self.0 - } -} - -impl fmt::Display for UploadId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl<'de> Deserialize<'de> for UploadId { - fn deserialize(deserializer: D) -> std::result::Result - where - D: Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - Self::new(s).map_err(serde::de::Error::custom) - } -} -/// 1-indexed position of a part within its multipart upload. -pub type PartNumber = std::num::NonZeroU32; -/// Opaque per-part identifier returned by the backend after a successful part upload. -pub type ETag = String; +pub use objectstore_types::multipart::{ETag, InvalidUploadId, PartNumber, UploadId}; /// Description of one part in the response to /// [`MultipartUploadBackend::list_parts`](crate::backend::common::MultipartUploadBackend::list_parts). diff --git a/objectstore-types/src/multipart.rs b/objectstore-types/src/multipart.rs index 831e7107..b56cf44e 100644 --- a/objectstore-types/src/multipart.rs +++ b/objectstore-types/src/multipart.rs @@ -1,15 +1,75 @@ //! Types for the multipart upload protocol. +use std::fmt; +use std::num::NonZeroU32; +use std::ops::Deref; +use std::path::{Component, Path}; use std::time::SystemTime; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; -/// Identifier for a multipart upload session. -pub type UploadId = String; +/// 1-indexed position of a part within its multipart upload. +pub type PartNumber = NonZeroU32; /// Opaque entity tag identifying a specific version of an uploaded part. pub type ETag = String; +/// Identifier for an in-progress multipart upload. +/// +/// Validated on construction: non-empty and free of path-traversal components +/// (`..`, leading `/`, etc.), so it is always safe to use as a single path segment. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)] +#[serde(transparent)] +pub struct UploadId(String); + +/// Error returned when an [`UploadId`] fails validation. +#[derive(Debug, thiserror::Error)] +#[error("invalid upload_id: {0}")] +pub struct InvalidUploadId(String); + +impl UploadId { + /// Creates a new `UploadId` after validating the input. + pub fn new(s: String) -> Result { + if s.is_empty() { + return Err(InvalidUploadId("must not be empty".into())); + } + for component in Path::new(&s).components() { + if !matches!(component, Component::Normal(_)) { + return Err(InvalidUploadId(s)); + } + } + Ok(Self(s)) + } + + /// Returns the upload ID as a string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl Deref for UploadId { + type Target = str; + fn deref(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for UploadId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl<'de> Deserialize<'de> for UploadId { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Self::new(s).map_err(serde::de::Error::custom) + } +} + /// Response from initiating a multipart upload. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InitiateResponse { @@ -30,7 +90,7 @@ pub struct UploadPartResponse { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PartInfo { /// The part number. - pub part_number: u32, + pub part_number: PartNumber, /// Opaque identifier of the part. pub etag: ETag, /// When the part was last modified. @@ -58,14 +118,14 @@ pub struct ListPartsResponse { pub is_truncated: bool, /// Marker for the next page of results, if truncated. #[serde(skip_serializing_if = "Option::is_none")] - pub next_part_number_marker: Option, + pub next_part_number_marker: Option, } /// A single part reference used in the complete request. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CompletePart { /// The part number. - pub part_number: u32, + pub part_number: PartNumber, /// The etag returned when this part was uploaded. pub etag: ETag, } From 8b8396ed339915544f96587720e8ad6a3c30bd4c Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 12:17:07 +0200 Subject: [PATCH 31/33] ref(rust-client): accept u32/String at the public API boundary The client methods (put, put_stream, put_read, resume_multipart_upload) accept u32 for part numbers and String for upload IDs, converting to the stricter internal types (PartNumber, UploadId) at the boundary. This keeps the public API simple while maintaining type safety internally. --- clients/rust/README.md | 10 ++++---- clients/rust/src/error.rs | 3 +++ clients/rust/src/multipart.rs | 22 +++++++++------- clients/rust/tests/multipart.rs | 45 ++++++++++----------------------- 4 files changed, 34 insertions(+), 46 deletions(-) diff --git a/clients/rust/README.md b/clients/rust/README.md index 217188e9..67ff98fa 100644 --- a/clients/rust/README.md +++ b/clients/rust/README.md @@ -140,7 +140,7 @@ when initiating the upload. ```rust,ignore use futures_util::StreamExt as _; use futures_util::stream; -use objectstore_client::{Compression, MultipartCompletePart}; +use objectstore_client::Compression; let upload = session .initiate_multipart_upload() @@ -186,13 +186,13 @@ You can also resume an in-progress multipart upload, e.g. after a process restar ```rust,ignore use futures_util::{StreamExt as _, TryStreamExt as _}; use futures_util::stream; -use objectstore_client::MultipartCompletePart; +use objectstore_client::CompletePart; -let upload = session.resume_multipart_upload("my-large-object", saved_upload_id); +let upload = session.resume_multipart_upload("my-large-object", saved_upload_id)?; let existing = upload.list_parts().await?; let total_parts = 10; -let uploaded: Vec = existing.iter().map(|p| p.part_number).collect(); +let uploaded: Vec = existing.iter().map(|p| p.part_number.get()).collect(); let missing: Vec = (1..=total_parts) .filter(|n| !uploaded.contains(n)) .collect(); @@ -206,7 +206,7 @@ let mut done: Vec<_> = stream::iter( .try_collect() .await?; -done.extend(existing.into_iter().map(MultipartCompletePart::from)); +done.extend(existing.into_iter().map(CompletePart::from)); let key = upload.complete(done).await?; ``` diff --git a/clients/rust/src/error.rs b/clients/rust/src/error.rs index 7f80885a..14eeef61 100644 --- a/clients/rust/src/error.rs +++ b/clients/rust/src/error.rs @@ -44,6 +44,9 @@ pub enum Error { /// The error message. message: String, }, + /// Invalid argument passed to a client method. + #[error("invalid argument: {0}")] + InvalidArgument(String), /// Error returned when attempting to complete a multipart upload. #[error("multipart complete failed ({code}): {message}")] MultipartComplete { diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index b3ad145d..4c688744 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -65,13 +65,14 @@ impl Session { pub fn resume_multipart_upload( &self, key: impl Into, - upload_id: impl Into, - ) -> MultipartUpload { - MultipartUpload { + upload_id: impl Into, + ) -> crate::Result { + Ok(MultipartUpload { session: self.clone(), key: key.into(), - upload_id: upload_id.into(), - } + upload_id: UploadId::new(upload_id.into()) + .map_err(|e| crate::Error::InvalidArgument(e.to_string()))?, + }) } } @@ -213,7 +214,7 @@ impl MultipartUpload { pub async fn put( &self, body: impl Into, - part_number: PartNumber, + part_number: u32, content_md5: Option<&[u8; 16]>, ) -> crate::Result { let bytes = body.into(); @@ -231,7 +232,7 @@ impl MultipartUpload { pub async fn put_stream( &self, stream: ClientStream, - part_number: PartNumber, + part_number: u32, content_length: u64, content_md5: Option<&[u8; 16]>, ) -> crate::Result { @@ -253,7 +254,7 @@ impl MultipartUpload { pub async fn put_read( &self, reader: R, - part_number: PartNumber, + part_number: u32, content_length: u64, content_md5: Option<&[u8; 16]>, ) -> crate::Result @@ -268,10 +269,13 @@ impl MultipartUpload { async fn upload_part( &self, body: Body, - part_number: PartNumber, + part_number: u32, content_length: u64, content_md5: Option<&[u8; 16]>, ) -> crate::Result { + let part_number = PartNumber::new(part_number) + .ok_or_else(|| crate::Error::InvalidArgument("part_number must be >= 1".into()))?; + let mut builder = self .session .multipart_request( diff --git a/clients/rust/tests/multipart.rs b/clients/rust/tests/multipart.rs index 29574563..08610656 100644 --- a/clients/rust/tests/multipart.rs +++ b/clients/rust/tests/multipart.rs @@ -9,10 +9,6 @@ use objectstore_client::{Client, CompletePart, Compression, Error, PartNumber, U use crate::common::test_token_generator; -fn pn(n: u32) -> PartNumber { - PartNumber::new(n).unwrap() -} - #[tokio::test] async fn test_full_upload_uncompressed() { let server = test_server().await; @@ -34,7 +30,7 @@ async fn test_full_upload_uncompressed() { assert_eq!(upload.key(), "multipart-test-key"); assert!(!upload.upload_id().is_empty()); - let parts_data: Vec<(&[u8], PartNumber)> = vec![(b"hello ", pn(1)), (b"world!", pn(2))]; + let parts_data: Vec<(&[u8], u32)> = vec![(b"hello ", 1), (b"world!", 2)]; let results: Vec<_> = stream::iter( parts_data @@ -87,9 +83,9 @@ async fn test_full_upload_compressed() { let part1_data = b"hello "; let part2_data = b"world!"; - let parts_data: Vec<(Vec, PartNumber)> = vec![ - (zstd::encode_all(&part1_data[..], 0).unwrap(), pn(1)), - (zstd::encode_all(&part2_data[..], 0).unwrap(), pn(2)), + let parts_data: Vec<(Vec, u32)> = vec![ + (zstd::encode_all(&part1_data[..], 0).unwrap(), 1), + (zstd::encode_all(&part2_data[..], 0).unwrap(), 2), ]; let results: Vec<_> = stream::iter( @@ -148,7 +144,7 @@ async fn test_server_generated_key() { assert!(!upload.key().is_empty()); - let part = upload.put(b"data".as_slice(), pn(1), None).await.unwrap(); + let part = upload.put(b"data".as_slice(), 1, None).await.unwrap(); let key = upload.complete([part]).await.unwrap(); @@ -171,25 +167,19 @@ async fn test_list_parts() { .await .unwrap(); - upload - .put(b"part-two".as_slice(), pn(2), None) - .await - .unwrap(); - upload - .put(b"part-one".as_slice(), pn(1), None) - .await - .unwrap(); + upload.put(b"part-two".as_slice(), 2, None).await.unwrap(); + upload.put(b"part-one".as_slice(), 1, None).await.unwrap(); let parts = upload.list_parts().await.unwrap(); assert_eq!(parts.len(), 2); let p1 = parts .iter() - .find(|p| p.part_number == pn(1)) + .find(|p| p.part_number.get() == 1) .expect("missing part 1"); let p2 = parts .iter() - .find(|p| p.part_number == pn(2)) + .find(|p| p.part_number.get() == 2) .expect("missing part 2"); assert_eq!(p1.size, 8); assert_eq!(p2.size, 8); @@ -209,10 +199,7 @@ async fn test_abort() { .await .unwrap(); - upload - .put(b"some data".as_slice(), pn(1), None) - .await - .unwrap(); + upload.put(b"some data".as_slice(), 1, None).await.unwrap(); upload.abort().await.unwrap(); } @@ -232,10 +219,7 @@ async fn test_metadata_preserved() { .await .unwrap(); - let part = upload - .put(b"payload".as_slice(), pn(1), None) - .await - .unwrap(); + let part = upload.put(b"payload".as_slice(), 1, None).await.unwrap(); let key = upload.complete([part]).await.unwrap(); @@ -261,14 +245,11 @@ async fn test_complete_with_bad_etag() { .await .unwrap(); - upload - .put(b"real data".as_slice(), pn(1), None) - .await - .unwrap(); + upload.put(b"real data".as_slice(), 1, None).await.unwrap(); let result = upload .complete(vec![CompletePart { - part_number: pn(1), + part_number: PartNumber::new(1).unwrap(), etag: "bogus-etag".to_string(), }]) .await; From 59da6dd021982a65afcea262c3c1c0a71a7b730e Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 12:19:05 +0200 Subject: [PATCH 32/33] ref(rust-client): use specific error variants for validation failures Replace InvalidArgument with InvalidPartNumber and InvalidUploadId. --- clients/rust/src/error.rs | 9 ++++++--- clients/rust/src/multipart.rs | 7 +++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/clients/rust/src/error.rs b/clients/rust/src/error.rs index 14eeef61..341ea47d 100644 --- a/clients/rust/src/error.rs +++ b/clients/rust/src/error.rs @@ -44,9 +44,12 @@ pub enum Error { /// The error message. message: String, }, - /// Invalid argument passed to a client method. - #[error("invalid argument: {0}")] - InvalidArgument(String), + /// Error when part number validation fails (must be >= 1). + #[error("invalid part number: {0}")] + InvalidPartNumber(u32), + /// Error when upload ID validation fails. + #[error(transparent)] + InvalidUploadId(#[from] objectstore_types::multipart::InvalidUploadId), /// Error returned when attempting to complete a multipart upload. #[error("multipart complete failed ({code}): {message}")] MultipartComplete { diff --git a/clients/rust/src/multipart.rs b/clients/rust/src/multipart.rs index 4c688744..e5c6c690 100644 --- a/clients/rust/src/multipart.rs +++ b/clients/rust/src/multipart.rs @@ -70,8 +70,7 @@ impl Session { Ok(MultipartUpload { session: self.clone(), key: key.into(), - upload_id: UploadId::new(upload_id.into()) - .map_err(|e| crate::Error::InvalidArgument(e.to_string()))?, + upload_id: UploadId::new(upload_id.into())?, }) } } @@ -273,8 +272,8 @@ impl MultipartUpload { content_length: u64, content_md5: Option<&[u8; 16]>, ) -> crate::Result { - let part_number = PartNumber::new(part_number) - .ok_or_else(|| crate::Error::InvalidArgument("part_number must be >= 1".into()))?; + let part_number = + PartNumber::new(part_number).ok_or(crate::Error::InvalidPartNumber(part_number))?; let mut builder = self .session From bb111e40260695ab8379df7ed95d427ec54518ff Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 12:54:32 +0200 Subject: [PATCH 33/33] fix(server): map InvalidUploadId to 400 Bad Request Previously fell through to the catch-all 500 Internal Server Error. --- objectstore-server/src/endpoints/common.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/objectstore-server/src/endpoints/common.rs b/objectstore-server/src/endpoints/common.rs index 365f8e62..ab6a8240 100644 --- a/objectstore-server/src/endpoints/common.rs +++ b/objectstore-server/src/endpoints/common.rs @@ -93,6 +93,7 @@ impl ApiError { ApiError::Service(ServiceError::Client(_)) => StatusCode::BAD_REQUEST, ApiError::Service(ServiceError::Metadata(_)) => StatusCode::BAD_REQUEST, + ApiError::Service(ServiceError::InvalidUploadId(_)) => StatusCode::BAD_REQUEST, ApiError::Service(ServiceError::AtCapacity) => StatusCode::TOO_MANY_REQUESTS, ApiError::Service(ServiceError::NotImplemented) => StatusCode::NOT_IMPLEMENTED, ApiError::Service(_) => {