From 3cbf44e823351759c5cba5dbdc4c47da196ec8ba Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 26 May 2026 16:47:22 +0200 Subject: [PATCH 01/45] feat: Support HTTP Range requests for single-object GETs Thread `Option` through all `get_object` paths and return `ContentRange` in every response so the HTTP handler can choose between 200, 206, and 416 with correct headers. Backends that store objects remotely (GCS, S3) forward the Range header and parse the response. Local-filesystem uses seek-based partial reads. In-memory and Bigtable ignore the range and always return the full payload with 200, which is RFC-compliant. --- objectstore-server/src/auth/service.rs | 9 +- objectstore-server/src/endpoints/batch.rs | 2 +- objectstore-server/src/endpoints/common.rs | 3 + objectstore-server/src/endpoints/objects.rs | 57 ++- objectstore-server/tests/range_requests.rs | 240 +++++++++++ objectstore-service/src/backend/bigtable.rs | 60 +-- objectstore-service/src/backend/common.rs | 26 +- objectstore-service/src/backend/gcs.rs | 69 +++- objectstore-service/src/backend/in_memory.rs | 22 +- objectstore-service/src/backend/local_fs.rs | 39 +- .../src/backend/s3_compatible.rs | 56 ++- objectstore-service/src/backend/testing.rs | 32 +- objectstore-service/src/backend/tiered.rs | 35 +- objectstore-service/src/error.rs | 8 + objectstore-service/src/service.rs | 33 +- objectstore-service/src/streaming.rs | 2 +- objectstore-types/src/lib.rs | 1 + objectstore-types/src/range.rs | 372 ++++++++++++++++++ 18 files changed, 941 insertions(+), 125 deletions(-) create mode 100644 objectstore-server/tests/range_requests.rs create mode 100644 objectstore-types/src/range.rs diff --git a/objectstore-server/src/auth/service.rs b/objectstore-server/src/auth/service.rs index 5d51fb9f..b91a3956 100644 --- a/objectstore-server/src/auth/service.rs +++ b/objectstore-server/src/auth/service.rs @@ -3,6 +3,7 @@ use objectstore_service::service::{DeleteResponse, GetResponse, InsertResponse, use objectstore_service::{ClientStream, StorageService}; use objectstore_types::auth::Permission; use objectstore_types::metadata::Metadata; +use objectstore_types::range::ByteRange; use crate::auth::{AuthContext, AuthError}; use crate::endpoints::common::ApiResult; @@ -106,9 +107,13 @@ impl AuthAwareService { } /// Auth-aware wrapper around [`StorageService::get_object`]. - pub async fn get_object(&self, id: ObjectId) -> ApiResult { + pub async fn get_object( + &self, + id: ObjectId, + range: Option, + ) -> ApiResult { self.assert_authorized(Permission::ObjectRead, id.context())?; - Ok(self.service.get_object(id).await?) + Ok(self.service.get_object(id, range).await?) } /// Auth-aware wrapper around [`StorageService::delete_object`]. diff --git a/objectstore-server/src/endpoints/batch.rs b/objectstore-server/src/endpoints/batch.rs index 13d20ac0..c75ece11 100644 --- a/objectstore-server/src/endpoints/batch.rs +++ b/objectstore-server/src/endpoints/batch.rs @@ -147,7 +147,7 @@ async fn convert_to_part( match result { Ok(OpResponse::Got { key, - response: Some((metadata, stream)), + response: Some((metadata, _content_range, stream)), }) => got_to_part(idx, key, metadata, stream, state, context) .await .unwrap_or_else(|e| create_error_part(idx, &e)), diff --git a/objectstore-server/src/endpoints/common.rs b/objectstore-server/src/endpoints/common.rs index a9111d89..f281e1e6 100644 --- a/objectstore-server/src/endpoints/common.rs +++ b/objectstore-server/src/endpoints/common.rs @@ -89,6 +89,9 @@ impl ApiError { ApiError::Service(ServiceError::Client(_)) => StatusCode::BAD_REQUEST, ApiError::Service(ServiceError::Metadata(_)) => StatusCode::BAD_REQUEST, + ApiError::Service(ServiceError::RangeNotSatisfiable { .. }) => { + StatusCode::RANGE_NOT_SATISFIABLE + } ApiError::Service(ServiceError::AtCapacity) => StatusCode::TOO_MANY_REQUESTS, ApiError::Service(_) => { objectstore_log::error!(!!self, "error handling request"); diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index e5d0a8ae..68b36aeb 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -9,6 +9,7 @@ use axum::{Json, Router}; use objectstore_service::error::Error as ServiceError; use objectstore_service::id::{ObjectContext, ObjectId}; use objectstore_types::metadata::Metadata; +use objectstore_types::range::{ByteRange, RangeError}; use serde::Serialize; use crate::auth::AuthAwareService; @@ -64,15 +65,63 @@ async fn object_get( service: AuthAwareService, State(state): State, Xt(id): Xt, + headers: HeaderMap, ) -> ApiResult { + let byte_range = match headers.get(http::header::RANGE) { + Some(value) => { + let header_str = value + .to_str() + .map_err(|_| ApiError::Client("invalid Range header".into()))?; + match ByteRange::parse(header_str) { + Ok(range) => Some(range), + Err(RangeError::UnknownUnit) => None, + Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), + } + } + None => None, + }; + let context = id.context().clone(); - let Some((metadata, stream)) = service.get_object(id).await? else { - return Ok(StatusCode::NOT_FOUND.into_response()); + let result = service.get_object(id, byte_range).await; + + let (metadata, content_range, stream) = match result { + Ok(Some(tuple)) => tuple, + Ok(None) => return Ok(StatusCode::NOT_FOUND.into_response()), + Err(ApiError::Service(ServiceError::RangeNotSatisfiable { total })) => { + return Ok(( + StatusCode::RANGE_NOT_SATISFIABLE, + [(http::header::CONTENT_RANGE, format!("bytes */{total}"))], + [(http::header::ACCEPT_RANGES, "bytes")], + ) + .into_response()); + } + Err(e) => return Err(e), }; + let stream = state.meter_stream(stream, &context); + let metadata_headers = metadata.to_headers("").map_err(ServiceError::from)?; - let headers = metadata.to_headers("").map_err(ServiceError::from)?; - Ok((headers, Body::from_stream(stream)).into_response()) + let status = if content_range.is_full() { + StatusCode::OK + } else { + StatusCode::PARTIAL_CONTENT + }; + + let mut response = (status, metadata_headers, Body::from_stream(stream)).into_response(); + let resp_headers = response.headers_mut(); + resp_headers.insert(http::header::ACCEPT_RANGES, "bytes".parse().unwrap()); + resp_headers.insert( + http::header::CONTENT_LENGTH, + content_range.len().to_string().parse().unwrap(), + ); + if !content_range.is_full() { + resp_headers.insert( + http::header::CONTENT_RANGE, + content_range.to_header_value().parse().unwrap(), + ); + } + + Ok(response) } async fn object_head(service: AuthAwareService, Xt(id): Xt) -> ApiResult { diff --git a/objectstore-server/tests/range_requests.rs b/objectstore-server/tests/range_requests.rs new file mode 100644 index 00000000..8808c288 --- /dev/null +++ b/objectstore-server/tests/range_requests.rs @@ -0,0 +1,240 @@ +//! End-to-end tests for HTTP Range request support. + +use anyhow::Result; +use objectstore_server::config::{AuthZ, Config}; +use objectstore_test::server::TestServer; + +async fn setup() -> (TestServer, String) { + let server = TestServer::with_config(Config { + auth: AuthZ { + enforce: false, + ..Default::default() + }, + ..Default::default() + }) + .await; + + let client = reqwest::Client::new(); + let payload = "Hello, Range Requests!"; // 22 bytes + + let resp = client + .post(server.url("/v1/objects/test/org=1/")) + .header("content-type", "text/plain") + .body(payload) + .send() + .await + .unwrap(); + assert_eq!(resp.status(), reqwest::StatusCode::CREATED); + let body: serde_json::Value = resp.json().await.unwrap(); + let key = body["key"].as_str().unwrap().to_string(); + + (server, key) +} + +#[tokio::test] +async fn no_range_returns_200_with_accept_ranges() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::OK); + assert_eq!( + resp.headers().get("accept-ranges").unwrap().to_str()?, + "bytes" + ); + assert!( + resp.headers().get("content-range").is_none(), + "200 response must not include Content-Range" + ); + + let body = resp.text().await?; + assert_eq!(body, "Hello, Range Requests!"); + Ok(()) +} + +#[tokio::test] +async fn range_prefix_returns_206() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "bytes=0-4") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::PARTIAL_CONTENT); + assert_eq!( + resp.headers().get("accept-ranges").unwrap().to_str()?, + "bytes" + ); + assert_eq!( + resp.headers().get("content-range").unwrap().to_str()?, + "bytes 0-4/22" + ); + assert_eq!(resp.headers().get("content-length").unwrap().to_str()?, "5"); + + let body = resp.text().await?; + assert_eq!(body, "Hello"); + Ok(()) +} + +#[tokio::test] +async fn range_suffix_returns_206() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "bytes=-9") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::PARTIAL_CONTENT); + assert_eq!( + resp.headers().get("content-range").unwrap().to_str()?, + "bytes 13-21/22" + ); + + let body = resp.text().await?; + assert_eq!(body, "Requests!"); + Ok(()) +} + +#[tokio::test] +async fn range_from_offset_returns_206() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "bytes=7-") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::PARTIAL_CONTENT); + assert_eq!( + resp.headers().get("content-range").unwrap().to_str()?, + "bytes 7-21/22" + ); + assert_eq!( + resp.headers().get("content-length").unwrap().to_str()?, + "15" + ); + + let body = resp.text().await?; + assert_eq!(body, "Range Requests!"); + Ok(()) +} + +#[tokio::test] +async fn unknown_range_unit_returns_200_full_body() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "items=0-10") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::OK); + let body = resp.text().await?; + assert_eq!(body, "Hello, Range Requests!"); + Ok(()) +} + +#[tokio::test] +async fn invalid_bytes_range_returns_400() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "bytes=abc-def") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + Ok(()) +} + +#[tokio::test] +async fn multi_range_returns_400() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "bytes=0-4, 10-14") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + Ok(()) +} + +#[tokio::test] +async fn unsatisfiable_range_returns_416() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "bytes=100-200") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::RANGE_NOT_SATISFIABLE); + assert_eq!( + resp.headers().get("content-range").unwrap().to_str()?, + "bytes */22" + ); + assert_eq!( + resp.headers().get("accept-ranges").unwrap().to_str()?, + "bytes" + ); + Ok(()) +} + +#[tokio::test] +async fn range_on_nonexistent_object_returns_404() -> Result<()> { + let (server, _key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url("/v1/objects/test/org=1/nonexistent")) + .header("range", "bytes=0-10") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::NOT_FOUND); + Ok(()) +} + +#[tokio::test] +async fn full_range_returns_200() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + // Request the full object as a range — should still get 200 since it's the full content. + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "bytes=0-21") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::OK); + assert!( + resp.headers().get("content-range").is_none(), + "full-object range should be 200 without Content-Range" + ); + + let body = resp.text().await?; + assert_eq!(body, "Hello, Range Requests!"); + Ok(()) +} diff --git a/objectstore-service/src/backend/bigtable.rs b/objectstore-service/src/backend/bigtable.rs index 56b609fb..9645bb91 100644 --- a/objectstore-service/src/backend/bigtable.rs +++ b/objectstore-service/src/backend/bigtable.rs @@ -38,6 +38,7 @@ use bigtable_rs::google::bigtable::v2::{self, mutation}; use bytes::Bytes; use futures_util::TryStreamExt; use objectstore_types::metadata::{ExpirationPolicy, Metadata}; +use objectstore_types::range::{ByteRange, ContentRange}; use serde::{Deserialize, Serialize}; use tonic::Code; @@ -909,9 +910,11 @@ impl Backend for BigTableBackend { } #[tracing::instrument(level = "trace", fields(?id), skip_all)] - async fn get_object(&self, id: &ObjectId) -> Result { - match self.get_tiered_object(id).await? { - TieredGet::Object(metadata, payload) => Ok(Some((metadata, payload))), + async fn get_object(&self, id: &ObjectId, range: Option) -> Result { + match self.get_tiered_object(id, range).await? { + TieredGet::Object(metadata, content_range, payload) => { + Ok(Some((metadata, content_range, payload))) + } TieredGet::Tombstone(_) => Err(Error::UnexpectedTombstone), TieredGet::NotFound => Ok(None), } @@ -988,7 +991,11 @@ impl HighVolumeBackend for BigTableBackend { } #[tracing::instrument(level = "trace", fields(?id), skip_all)] - async fn get_tiered_object(&self, id: &ObjectId) -> Result { + async fn get_tiered_object( + &self, + id: &ObjectId, + _range: Option, + ) -> Result { objectstore_log::debug!("Reading from Bigtable backend"); let path = id.as_storage_path().to_string().into_bytes(); @@ -1008,7 +1015,8 @@ impl HighVolumeBackend for BigTableBackend { RowData::Object { metadata, payload } => { let mut metadata = metadata; metadata.size = Some(payload.len()); - TieredGet::Object(metadata, crate::stream::single(payload)) + let content_range = ContentRange::full(payload.len() as u64); + TieredGet::Object(metadata, content_range, crate::stream::single(payload)) } }) } @@ -1351,7 +1359,7 @@ mod tests { .put_object(&id, &metadata, stream::single("hello, world")) .await?; - let (obj_meta, stream) = backend.get_object(&id).await?.unwrap(); + let (obj_meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; assert_eq!(payload, b"hello, world"); assert_eq!(obj_meta.content_type, metadata.content_type); @@ -1370,7 +1378,7 @@ mod tests { let backend = create_test_backend().await?; let id = make_id(); - assert!(backend.get_object(&id).await?.is_none()); + assert!(backend.get_object(&id, None).await?.is_none()); assert!(backend.get_metadata(&id).await?.is_none()); backend.delete_object(&id).await?; @@ -1396,7 +1404,7 @@ mod tests { .put_object(&id, &second_metadata, stream::single("world")) .await?; - let (meta, stream) = backend.get_object(&id).await?.unwrap(); + let (meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; assert_eq!(payload, b"world"); assert_eq!(meta.custom, second_metadata.custom); @@ -1413,7 +1421,7 @@ mod tests { create_object(&backend, &id, &metadata, b"hello", SystemTime::now()).await?; backend.delete_object(&id).await?; - assert!(backend.get_object(&id).await?.is_none()); + assert!(backend.get_object(&id, None).await?.is_none()); Ok(()) } @@ -1442,7 +1450,7 @@ mod tests { create_object(&backend, &id1, &metadata, b"hello, world", past_now).await?; // get_object reads the stale row, triggers bump, and returns the pre-bump metadata. - let (pre_obj_meta, _) = backend.get_object(&id1).await?.unwrap(); + let (pre_obj_meta, _, _) = backend.get_object(&id1, None).await?.unwrap(); let pre_obj_expiry = pre_obj_meta.time_expires.unwrap(); // A second get_metadata reads the freshly bumped row. @@ -1467,7 +1475,7 @@ mod tests { assert!(post_expiry > pre_expiry, "bump should extend expiry"); // Payload must be intact after the loaded=false bump (which re-fetches the payload). - let (_, stream) = backend.get_object(&id2).await?.unwrap(); + let (_, _, stream) = backend.get_object(&id2, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; assert_eq!(payload, b"hello, world"); @@ -1517,7 +1525,7 @@ mod tests { }; create_object(&backend, &id, &metadata, b"hello, world", SystemTime::now()).await?; - assert!(backend.get_object(&id).await?.is_none()); + assert!(backend.get_object(&id, None).await?.is_none()); Ok(()) } @@ -1536,7 +1544,7 @@ mod tests { }; create_object(&backend, &id, &metadata, b"hello, world", SystemTime::now()).await?; - assert!(backend.get_object(&id).await?.is_none()); + assert!(backend.get_object(&id, None).await?.is_none()); Ok(()) } @@ -1556,7 +1564,7 @@ mod tests { // empty let id = make_id(); assert!(matches!( - backend.get_tiered_object(&id).await?, + backend.get_tiered_object(&id, None).await?, TieredGet::NotFound )); assert!(matches!( @@ -1573,7 +1581,9 @@ mod tests { }; create_object(&backend, &id, &put_meta, b"payload", SystemTime::now()).await?; - let TieredGet::Object(obj_meta, obj_stream) = backend.get_tiered_object(&id).await? else { + let TieredGet::Object(obj_meta, _, obj_stream) = + backend.get_tiered_object(&id, None).await? + else { panic!("expected TieredGet::Object"); }; let obj_payload = stream::read_to_vec(obj_stream).await?; @@ -1596,7 +1606,7 @@ mod tests { }; create_tombstone(&backend, &hv_id, &tombstone, SystemTime::now()).await?; - match backend.get_tiered_object(&hv_id).await? { + match backend.get_tiered_object(&hv_id, None).await? { TieredGet::Tombstone(get_t) => assert_eq!(get_t.target, lt_id), other => panic!("expected TieredGet::Tombstone, got {other:?}"), } @@ -1624,7 +1634,7 @@ mod tests { .put_non_tombstone(&id, &metadata, Bytes::from_static(b"first")) .await?; assert_eq!(result, None, "expected None on empty row"); - let (_, stream) = backend.get_object(&id).await?.unwrap(); + let (_, _, stream) = backend.get_object(&id, None).await?.unwrap(); assert_eq!(&stream::read_to_vec(stream).await?, b"first"); // object: put_non_tombstone on existing object replaces payload, returns None. @@ -1634,7 +1644,7 @@ mod tests { .put_non_tombstone(&id, &metadata, Bytes::from_static(b"new")) .await?; assert_eq!(result, None, "expected None when overwriting object"); - let (_, stream) = backend.get_object(&id).await?.unwrap(); + let (_, _, stream) = backend.get_object(&id, None).await?.unwrap(); assert_eq!(&stream::read_to_vec(stream).await?, b"new"); // tombstone: put_non_tombstone returns Some(Tombstone) and leaves tombstone intact. @@ -1682,7 +1692,7 @@ mod tests { let metadata = Metadata::default(); create_object(&backend, &id, &metadata, b"hello, world", SystemTime::now()).await?; assert_eq!(backend.delete_non_tombstone(&id).await?, None); - assert!(backend.get_object(&id).await?.is_none()); + assert!(backend.get_object(&id, None).await?.is_none()); // tombstone let id = make_id(); @@ -1736,14 +1746,14 @@ mod tests { }; assert_eq!(t.target, lt_id, "target must round-trip via r column"); assert_eq!(t.expiration_policy, expiration_policy); - match backend.get_tiered_object(&hv_id).await? { + match backend.get_tiered_object(&hv_id, None).await? { TieredGet::Tombstone(t) => assert_eq!(t.target, lt_id, "round-trip via r column"), other => panic!("expected TieredGet::Tombstone, got {other:?}"), } // Legacy reads must error rather than leak tombstone data. assert!(matches!( - backend.get_object(&hv_id).await, + backend.get_object(&hv_id, None).await, Err(Error::UnexpectedTombstone) )); assert!(matches!( @@ -1842,7 +1852,7 @@ mod tests { .compare_and_write(&id, Some(<_id), write.clone()) .await?; assert!(swapped, "expected CAS success with correct target"); - let TieredGet::Object(_, stream) = backend.get_tiered_object(&id).await? else { + let TieredGet::Object(_, _, stream) = backend.get_tiered_object(&id, None).await? else { panic!("expected inline object after swap"); }; assert_eq!(&stream::read_to_vec(stream).await?, payload.as_ref()); @@ -1865,7 +1875,7 @@ mod tests { let committed = backend.compare_and_write(&id, None, write).await?; assert!(committed, "expected CAS success on empty row"); - let TieredGet::Object(_, stream) = backend.get_tiered_object(&id).await? else { + let TieredGet::Object(_, _, stream) = backend.get_tiered_object(&id, None).await? else { panic!("expected Object after CAS-create"); }; assert_eq!(&stream::read_to_vec(stream).await?, payload.as_ref()); @@ -1946,7 +1956,7 @@ mod tests { }; assert_eq!(t.expiration_policy, ExpirationPolicy::Manual); assert!(matches!( - backend.get_tiered_object(&id).await?, + backend.get_tiered_object(&id, None).await?, TieredGet::Tombstone(_) )); @@ -2137,7 +2147,7 @@ mod tests { "put_non_tombstone must succeed (return None) over an expired tombstone" ); - let (_, stream) = backend.get_object(&id).await?.unwrap(); + let (_, _, stream) = backend.get_object(&id, None).await?.unwrap(); assert_eq!(&stream::read_to_vec(stream).await?, b"data"); Ok(()) diff --git a/objectstore-service/src/backend/common.rs b/objectstore-service/src/backend/common.rs index 6b7a676a..03ff7124 100644 --- a/objectstore-service/src/backend/common.rs +++ b/objectstore-service/src/backend/common.rs @@ -3,6 +3,7 @@ use std::fmt; use objectstore_types::metadata::{ExpirationPolicy, Metadata}; +use objectstore_types::range::{ByteRange, ContentRange}; use bytes::Bytes; @@ -22,7 +23,7 @@ pub const USER_AGENT: &str = concat!("sentry-objectstore/", env!("CARGO_PKG_VERS /// Backend response for put operations. pub type PutResponse = (); /// Backend response for get operations. -pub type GetResponse = Option<(Metadata, PayloadStream)>; +pub type GetResponse = Option<(Metadata, ContentRange, PayloadStream)>; /// Backend response for metadata-only get operations. pub type MetadataResponse = Option; /// Backend response for delete operations. @@ -42,15 +43,20 @@ pub trait Backend: fmt::Debug + Send + Sync + 'static { stream: ClientStream, ) -> Result; - /// Retrieves an object at the given path, returning its metadata and a stream of bytes. - async fn get_object(&self, id: &ObjectId) -> Result; + /// Retrieves an object at the given path, returning its metadata, the + /// content range describing which bytes are in the stream, and the payload. + /// + /// When `range` is `Some`, backends that support partial reads should + /// return only the requested byte range. Backends that do not support + /// partial reads may ignore the range and return the full object. + async fn get_object(&self, id: &ObjectId, range: Option) -> Result; /// Retrieves only the metadata for an object, without the payload. async fn get_metadata(&self, id: &ObjectId) -> Result { Ok(self - .get_object(id) + .get_object(id, None) .await? - .map(|(metadata, _stream)| metadata)) + .map(|(metadata, _range, _stream)| metadata)) } /// Deletes the object at the given path. @@ -145,7 +151,10 @@ pub trait HighVolumeBackend: Backend { /// /// Returns [`TieredGet::Tombstone`] instead of synthesizing a tombstone /// object, making the caller's routing logic a compile-time distinction. - async fn get_tiered_object(&self, id: &ObjectId) -> Result; + /// + /// See [`Backend::get_object`] for the semantics of `range`. + async fn get_tiered_object(&self, id: &ObjectId, range: Option) + -> Result; /// Retrieves only metadata with explicit tombstone awareness. /// @@ -200,7 +209,7 @@ pub struct Tombstone { /// Typed response from [`HighVolumeBackend::get_tiered_object`]. pub enum TieredGet { /// A real object was found. - Object(Metadata, PayloadStream), + Object(Metadata, ContentRange, PayloadStream), /// A redirect tombstone was found; the real object lives in the long-term backend. Tombstone(Tombstone), /// No entry exists at this key. @@ -210,9 +219,10 @@ pub enum TieredGet { impl fmt::Debug for TieredGet { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - TieredGet::Object(metadata, _stream) => f + TieredGet::Object(metadata, content_range, _stream) => f .debug_tuple("Object") .field(metadata) + .field(content_range) .finish_non_exhaustive(), TieredGet::Tombstone(info) => f.debug_tuple("Tombstone").field(info).finish(), TieredGet::NotFound => write!(f, "NotFound"), diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index eddef040..53ec8c00 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -10,6 +10,7 @@ use anyhow::Context; use futures_util::{StreamExt, TryStreamExt}; use gcp_auth::TokenProvider; use objectstore_types::metadata::{ExpirationPolicy, Metadata}; +use objectstore_types::range::{ByteRange, ContentRange}; use reqwest::header::HeaderName; use reqwest::{Body, IntoUrl, Method, RequestBuilder, StatusCode, Url, header, multipart}; use serde::{Deserialize, Serialize}; @@ -654,7 +655,7 @@ impl Backend for GcsBackend { } #[tracing::instrument(level = "trace", fields(?id), skip_all)] - async fn get_object(&self, id: &ObjectId) -> Result { + async fn get_object(&self, id: &ObjectId, range: Option) -> Result { objectstore_log::debug!("Reading from GCS backend"); let object_url = self.object_url(id)?; @@ -665,23 +666,51 @@ impl Backend for GcsBackend { let mut download_url = object_url; download_url.query_pairs_mut().append_pair("alt", "media"); + let range_header = range.map(|r| r.to_header_value()); + let payload_response = self .with_retry("get_payload", || async { - self.request(Method::GET, download_url.clone()) - .await? + let mut req = self.request(Method::GET, download_url.clone()).await?; + if let Some(ref value) = range_header { + req = req.header(header::RANGE, value); + } + let resp = req .send() .await - .and_then(|r| r.error_for_status()) + .map_err(|e| Error::reqwest("GCS: get payload", e))?; + + if resp.status() == StatusCode::RANGE_NOT_SATISFIABLE { + let total = resp + .headers() + .get(header::CONTENT_RANGE) + .and_then(|v| v.to_str().ok()) + .and_then(ContentRange::parse_unsatisfiable_total) + .unwrap_or(0); + return Err(Error::RangeNotSatisfiable { total }); + } + + resp.error_for_status() .map_err(|e| Error::reqwest("GCS: get payload", e)) }) .await?; + let content_range = if payload_response.status() == StatusCode::PARTIAL_CONTENT { + payload_response + .headers() + .get(header::CONTENT_RANGE) + .and_then(|v| v.to_str().ok()) + .and_then(ContentRange::parse) + .unwrap_or_else(|| ContentRange::full(metadata.size.unwrap_or(0) as u64)) + } else { + ContentRange::full(metadata.size.unwrap_or(0) as u64) + }; + let stream = payload_response .bytes_stream() .map_err(io::Error::other) .boxed(); - Ok(Some((metadata, stream))) + Ok(Some((metadata, content_range, stream))) } #[tracing::instrument(level = "trace", fields(?id), skip_all)] @@ -1063,7 +1092,7 @@ mod tests { .put_object(&id, &metadata, stream::single("hello, world")) .await?; - let (meta, stream) = backend.get_object(&id).await?.unwrap(); + let (meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; let str_payload = str::from_utf8(&payload).unwrap(); @@ -1081,7 +1110,7 @@ mod tests { let backend = create_test_backend().await?; let id = make_id(); - let result = backend.get_object(&id).await?; + let result = backend.get_object(&id, None).await?; assert!(result.is_none()); Ok(()) @@ -1120,7 +1149,7 @@ mod tests { .put_object(&id, &metadata, stream::single("world")) .await?; - let (meta, stream) = backend.get_object(&id).await?.unwrap(); + let (meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; let str_payload = str::from_utf8(&payload).unwrap(); @@ -1143,7 +1172,7 @@ mod tests { backend.delete_object(&id).await?; - let result = backend.get_object(&id).await?; + let result = backend.get_object(&id, None).await?; assert!(result.is_none()); Ok(()) @@ -1166,7 +1195,7 @@ mod tests { .put_object(&id, &metadata, stream::single("hello, world")) .await?; - let result = backend.get_object(&id).await?; + let result = backend.get_object(&id, None).await?; assert!(result.is_none()); Ok(()) @@ -1189,7 +1218,7 @@ mod tests { .put_object(&id, &metadata, stream::single("hello, world")) .await?; - let result = backend.get_object(&id).await?; + let result = backend.get_object(&id, None).await?; assert!(result.is_none()); Ok(()) @@ -1266,7 +1295,7 @@ mod tests { ); // Verify the payload is still intact after the bump. - let (_, stream) = backend.get_object(&id).await?.unwrap(); + let (_, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; assert_eq!(&payload, b"hello, world"); @@ -1326,7 +1355,7 @@ mod tests { .put_object(&id, &metadata, stream::single(compressed.clone())) .await?; - let (meta, stream) = backend.get_object(&id).await?.unwrap(); + let (meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; assert_eq!(meta.compression, Some(Compression::Zstd)); @@ -1376,7 +1405,7 @@ mod tests { .await?; assert!(result.is_none(), "expected no error on complete"); - let (meta, stream) = backend.get_object(&id).await?.unwrap(); + let (meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; assert_eq!(payload, data); assert_eq!(meta.content_type, "text/plain".to_string()); @@ -1461,7 +1490,7 @@ mod tests { assert!(result.is_none(), "expected no error on complete"); // Object exists after complete - let (_meta, stream) = backend.get_object(&id).await?.unwrap(); + let (_meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; let mut expected = Vec::new(); expected.extend_from_slice(&part1); @@ -1542,7 +1571,7 @@ mod tests { assert!(result.is_none(), "expected no error on complete"); // Verify reassembly order matches part numbers, not upload order. - let (_meta, stream) = backend.get_object(&id).await?.unwrap(); + let (_meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; let mut expected = Vec::new(); expected.extend_from_slice(&part1); @@ -1619,7 +1648,7 @@ mod tests { backend.abort_multipart(&id, &upload_id).await?; // Object should not exist after abort. - let result = backend.get_object(&id).await?; + let result = backend.get_object(&id, None).await?; assert!(result.is_none(), "object should not exist after abort"); Ok(()) @@ -1671,7 +1700,7 @@ mod tests { multipart_put(&backend, &id, &metadata, "hello, world").await?; - let result = backend.get_object(&id).await?; + let result = backend.get_object(&id, None).await?; assert!(result.is_none()); Ok(()) @@ -1688,7 +1717,7 @@ mod tests { multipart_put(&backend, &id, &metadata, "hello, world").await?; - let result = backend.get_object(&id).await?; + let result = backend.get_object(&id, None).await?; assert!(result.is_none()); Ok(()) @@ -1712,7 +1741,7 @@ mod tests { multipart_put(&backend, &id, &metadata, compressed.clone()).await?; - let (meta, stream) = backend.get_object(&id).await?.unwrap(); + let (meta, _, stream) = backend.get_object(&id, None).await?.unwrap(); let payload = stream::read_to_vec(stream).await?; assert_eq!(meta.compression, Some(Compression::Zstd)); diff --git a/objectstore-service/src/backend/in_memory.rs b/objectstore-service/src/backend/in_memory.rs index 253aafcd..6560ee17 100644 --- a/objectstore-service/src/backend/in_memory.rs +++ b/objectstore-service/src/backend/in_memory.rs @@ -9,6 +9,8 @@ use std::collections::{BTreeMap, HashMap}; use std::sync::{Arc, Mutex}; use std::time::SystemTime; +use objectstore_types::range::{ByteRange, ContentRange}; + use bytes::{Bytes, BytesMut}; use futures_util::TryStreamExt; use objectstore_types::metadata::Metadata; @@ -118,15 +120,16 @@ impl super::common::Backend for InMemoryBackend { Ok(()) } - async fn get_object(&self, id: &ObjectId) -> Result { + async fn get_object(&self, id: &ObjectId, _range: Option) -> Result { let entry = self.store.lock().unwrap().get(id).cloned(); match entry { None => Ok(None), Some(StoreEntry::Tombstone(_)) => Err(Error::UnexpectedTombstone), Some(StoreEntry::Object(mut metadata, bytes)) => { metadata.size = Some(bytes.len()); + let content_range = ContentRange::full(bytes.len() as u64); let stream = crate::stream::single(bytes); - Ok(Some((metadata, stream))) + Ok(Some((metadata, content_range, stream))) } } } @@ -156,14 +159,19 @@ impl HighVolumeBackend for InMemoryBackend { Ok(None) } - async fn get_tiered_object(&self, id: &ObjectId) -> Result { + async fn get_tiered_object( + &self, + id: &ObjectId, + _range: Option, + ) -> Result { let entry = self.store.lock().unwrap().get(id).cloned(); Ok(match entry { None => TieredGet::NotFound, Some(StoreEntry::Tombstone(tombstone)) => TieredGet::Tombstone(tombstone), Some(StoreEntry::Object(mut metadata, bytes)) => { metadata.size = Some(bytes.len()); - TieredGet::Object(metadata, crate::stream::single(bytes)) + let content_range = ContentRange::full(bytes.len() as u64); + TieredGet::Object(metadata, content_range, crate::stream::single(bytes)) } }) } @@ -512,7 +520,7 @@ mod tests { .unwrap(); assert!(result.is_none(), "expected no error on complete"); - let (meta, body) = backend.get_object(&id).await.unwrap().unwrap(); + let (meta, _, body) = backend.get_object(&id, None).await.unwrap().unwrap(); let payload = stream::read_to_vec(body).await.unwrap(); assert_eq!(payload, data); assert_eq!(meta.content_type, "text/plain".to_string()); @@ -593,7 +601,7 @@ mod tests { .unwrap(); assert!(result.is_none()); - let (_, body) = backend.get_object(&id).await.unwrap().unwrap(); + let (_, _, body) = backend.get_object(&id, None).await.unwrap().unwrap(); let payload = stream::read_to_vec(body).await.unwrap(); assert_eq!(payload, b"aaaabbbbcc"); } @@ -669,7 +677,7 @@ mod tests { backend.abort_multipart(&id, &upload_id).await.unwrap(); - let result = backend.get_object(&id).await.unwrap(); + let result = backend.get_object(&id, None).await.unwrap(); assert!(result.is_none(), "object should not exist after abort"); } diff --git a/objectstore-service/src/backend/local_fs.rs b/objectstore-service/src/backend/local_fs.rs index c4518a18..88f31d94 100644 --- a/objectstore-service/src/backend/local_fs.rs +++ b/objectstore-service/src/backend/local_fs.rs @@ -7,8 +7,9 @@ use std::time::SystemTime; use futures_util::StreamExt; use objectstore_types::metadata::Metadata; +use objectstore_types::range::{ByteRange, ContentRange}; use tokio::fs::OpenOptions; -use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader, BufWriter}; +use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader, BufWriter}; use tokio_util::io::{ReaderStream, StreamReader}; use crate::backend::common::{ @@ -115,7 +116,7 @@ impl Backend for LocalFsBackend { // TODO: Return `Ok(None)` if object is found but past expiry #[tracing::instrument(level = "trace", fields(?id), skip_all)] - async fn get_object(&self, id: &ObjectId) -> Result { + async fn get_object(&self, id: &ObjectId, range: Option) -> Result { objectstore_log::debug!("Reading from local_fs backend"); let path = self.path.join(id.as_storage_path().to_string()); let file = match OpenOptions::new().read(true).open(path).await { @@ -141,8 +142,30 @@ impl Backend for LocalFsBackend { .ok_or_else(|| Error::generic("local-fs file corrupted: shorter than header"))?; metadata.size = Some(payload_size as usize); - let stream = ReaderStream::new(reader); - Ok(Some((metadata, stream.boxed()))) + let content_range = match range { + Some(byte_range) => match byte_range.resolve(payload_size) { + Some(cr) => cr, + None => { + return Err(Error::RangeNotSatisfiable { + total: payload_size, + }); + } + }, + None => ContentRange::full(payload_size), + }; + + if content_range.is_full() { + let stream = ReaderStream::new(reader); + return Ok(Some((metadata, content_range, stream.boxed()))); + } + + use tokio::io::AsyncSeekExt; + reader + .seek(std::io::SeekFrom::Current(content_range.start as i64)) + .await?; + let limited = reader.take(content_range.len()); + let stream = ReaderStream::new(limited); + Ok(Some((metadata, content_range, stream.boxed()))) } #[tracing::instrument(level = "trace", fields(?id), skip_all)] @@ -460,7 +483,7 @@ mod tests { .await .unwrap(); - let (read_metadata, stream) = backend.get_object(&id).await.unwrap().unwrap(); + let (read_metadata, _, stream) = backend.get_object(&id, None).await.unwrap().unwrap(); let file_contents: BytesMut = stream.try_collect().await.unwrap(); assert_eq!( @@ -578,7 +601,7 @@ mod tests { .unwrap(); assert!(result.is_none(), "expected no error on complete"); - let (meta, body) = backend.get_object(&id).await.unwrap().unwrap(); + let (meta, _, body) = backend.get_object(&id, None).await.unwrap().unwrap(); let payload: BytesMut = body.try_collect().await.unwrap(); assert_eq!(payload.as_ref(), data); assert_eq!(meta.content_type, "text/plain".to_string()); @@ -659,7 +682,7 @@ mod tests { .unwrap(); assert!(result.is_none()); - let (_, body) = backend.get_object(&id).await.unwrap().unwrap(); + let (_, _, body) = backend.get_object(&id, None).await.unwrap().unwrap(); let payload: BytesMut = body.try_collect().await.unwrap(); assert_eq!(payload.as_ref(), b"aaaabbbbcc"); } @@ -735,7 +758,7 @@ mod tests { backend.abort_multipart(&id, &upload_id).await.unwrap(); - let result = backend.get_object(&id).await.unwrap(); + let result = backend.get_object(&id, None).await.unwrap(); assert!(result.is_none(), "object should not exist after abort"); } diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 51069406..6346beee 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -5,6 +5,7 @@ use std::{fmt, io}; use futures_util::{StreamExt, TryStreamExt}; use objectstore_types::metadata::{ExpirationPolicy, Metadata}; +use objectstore_types::range::{ByteRange, ContentRange}; use reqwest::header::HeaderMap; use reqwest::{Body, IntoUrl, Method, RequestBuilder, StatusCode}; @@ -163,24 +164,34 @@ where &self, method: Method, id: &ObjectId, + extra_headers: Option, ) -> Result> { let object_url = self.object_url(id); - let response = self - .request(method, &object_url) - .await? - .send() - .await - .map_err(|cause| Error::Reqwest { - context: "S3: failed to send request".to_string(), - cause, - })?; + let mut builder = self.request(method, &object_url).await?; + if let Some(headers) = extra_headers { + builder = builder.headers(headers); + } + let response = builder.send().await.map_err(|cause| Error::Reqwest { + context: "S3: failed to send request".to_string(), + cause, + })?; if response.status() == StatusCode::NOT_FOUND { objectstore_log::debug!("Object not found"); return Ok(None); } + if response.status() == StatusCode::RANGE_NOT_SATISFIABLE { + let total = response + .headers() + .get(reqwest::header::CONTENT_RANGE) + .and_then(|v| v.to_str().ok()) + .and_then(ContentRange::parse_unsatisfiable_total) + .unwrap_or(0); + return Err(Error::RangeNotSatisfiable { total }); + } + let response = response .error_for_status() .map_err(|cause| Error::Reqwest { @@ -294,21 +305,40 @@ impl Backend for S3CompatibleBackend { } #[tracing::instrument(level = "trace", fields(?id), skip_all)] - async fn get_object(&self, id: &ObjectId) -> Result { + async fn get_object(&self, id: &ObjectId, range: Option) -> Result { objectstore_log::debug!("Reading from s3_compatible backend"); - let Some((metadata, response)) = self.request_object(Method::GET, id).await? else { + let extra_headers = range.map(|r| { + let mut headers = reqwest::header::HeaderMap::new(); + headers.insert(reqwest::header::RANGE, r.to_header_value().parse().unwrap()); + headers + }); + + let Some((metadata, response)) = + self.request_object(Method::GET, id, extra_headers).await? + else { return Ok(None); }; + let content_range = if response.status() == StatusCode::PARTIAL_CONTENT { + response + .headers() + .get(reqwest::header::CONTENT_RANGE) + .and_then(|v| v.to_str().ok()) + .and_then(ContentRange::parse) + .unwrap_or_else(|| ContentRange::full(metadata.size.unwrap_or(0) as u64)) + } else { + ContentRange::full(metadata.size.unwrap_or(0) as u64) + }; + let stream = response.bytes_stream().map_err(io::Error::other); - Ok(Some((metadata, stream.boxed()))) + Ok(Some((metadata, content_range, stream.boxed()))) } #[tracing::instrument(level = "trace", fields(?id), skip_all)] async fn get_metadata(&self, id: &ObjectId) -> Result { objectstore_log::debug!("Reading metadata from s3_compatible backend"); - let response = self.request_object(Method::HEAD, id).await?; + let response = self.request_object(Method::HEAD, id, None).await?; Ok(response.map(|(metadata, _)| metadata)) } diff --git a/objectstore-service/src/backend/testing.rs b/objectstore-service/src/backend/testing.rs index 4b767e21..14265bc0 100644 --- a/objectstore-service/src/backend/testing.rs +++ b/objectstore-service/src/backend/testing.rs @@ -39,6 +39,8 @@ use std::fmt; use bytes::Bytes; use objectstore_types::metadata::Metadata; +use objectstore_types::range::ByteRange; + use crate::backend::common::{ Backend, DeleteResponse, GetResponse, HighVolumeBackend, MetadataResponse, MultipartUploadBackend, PutResponse, TieredGet, TieredMetadata, TieredWrite, Tombstone, @@ -81,8 +83,13 @@ pub trait Hooks: fmt::Debug + Send + Sync + 'static { } /// Intercepts [`Backend::get_object`]. Default delegates to `inner`. - async fn get_object(&self, inner: &InMemoryBackend, id: &ObjectId) -> Result { - inner.get_object(id).await + async fn get_object( + &self, + inner: &InMemoryBackend, + id: &ObjectId, + range: Option, + ) -> Result { + inner.get_object(id, range).await } /// Intercepts [`Backend::get_metadata`]. Default delegates to `inner`. @@ -122,8 +129,13 @@ pub trait Hooks: fmt::Debug + Send + Sync + 'static { } /// Intercepts [`HighVolumeBackend::get_tiered_object`]. Default delegates to `inner`. - async fn get_tiered_object(&self, inner: &InMemoryBackend, id: &ObjectId) -> Result { - inner.get_tiered_object(id).await + async fn get_tiered_object( + &self, + inner: &InMemoryBackend, + id: &ObjectId, + range: Option, + ) -> Result { + inner.get_tiered_object(id, range).await } /// Intercepts [`HighVolumeBackend::get_tiered_metadata`]. Default delegates to `inner`. @@ -280,8 +292,8 @@ impl Backend for TestBackend { .await } - async fn get_object(&self, id: &ObjectId) -> Result { - self.hooks.get_object(&self.inner, id).await + async fn get_object(&self, id: &ObjectId, range: Option) -> Result { + self.hooks.get_object(&self.inner, id, range).await } async fn get_metadata(&self, id: &ObjectId) -> Result { @@ -310,8 +322,12 @@ impl HighVolumeBackend for TestBackend { .await } - async fn get_tiered_object(&self, id: &ObjectId) -> Result { - self.hooks.get_tiered_object(&self.inner, id).await + async fn get_tiered_object( + &self, + id: &ObjectId, + range: Option, + ) -> Result { + self.hooks.get_tiered_object(&self.inner, id, range).await } async fn get_tiered_metadata(&self, id: &ObjectId) -> Result { diff --git a/objectstore-service/src/backend/tiered.rs b/objectstore-service/src/backend/tiered.rs index 4d329c3e..3ee1ee1a 100644 --- a/objectstore-service/src/backend/tiered.rs +++ b/objectstore-service/src/backend/tiered.rs @@ -105,6 +105,7 @@ use base64::Engine as _; use bytes::Bytes; use futures_util::{Stream, StreamExt}; use objectstore_types::metadata::Metadata; +use objectstore_types::range::ByteRange; use serde::{Deserialize, Serialize}; use crate::backend::changelog::{Change, ChangeGuard, ChangeLog, ChangeManager, ChangePhase}; @@ -411,17 +412,21 @@ impl Backend for TieredStorage { Ok(()) } - async fn get_object(&self, id: &ObjectId) -> Result { + async fn get_object(&self, id: &ObjectId, range: Option) -> Result { let start = Instant::now(); - let hv_result = self.inner.high_volume.get_tiered_object(id).await?; + let hv_result = self.inner.high_volume.get_tiered_object(id, range).await?; let (result, backend_choice) = match hv_result { TieredGet::NotFound => (None, BackendChoice::HighVolume), - TieredGet::Object(metadata, stream) => { - (Some((metadata, stream)), BackendChoice::HighVolume) - } + TieredGet::Object(metadata, content_range, stream) => ( + Some((metadata, content_range, stream)), + BackendChoice::HighVolume, + ), TieredGet::Tombstone(tombstone) => ( - self.inner.long_term.get_object(&tombstone.target).await?, + self.inner + .long_term + .get_object(&tombstone.target, range) + .await?, BackendChoice::LongTerm, ), }; @@ -434,7 +439,7 @@ impl Backend for TieredStorage { backend_type = backend_type, ); - if let Some((ref metadata, _)) = result { + if let Some((ref metadata, _, _)) = result { if let Some(size) = metadata.size { objectstore_metrics::record!( "get.size" = size, @@ -870,7 +875,7 @@ mod tests { let (storage, _hv, _lt, _) = make_tiered_storage(); let id = make_id("does-not-exist"); - assert!(storage.get_object(&id).await.unwrap().is_none()); + assert!(storage.get_object(&id, None).await.unwrap().is_none()); assert!(storage.get_metadata(&id).await.unwrap().is_none()); } @@ -898,7 +903,7 @@ mod tests { assert!(hv.contains(&id), "expected in high-volume"); assert!(!lt.contains(&id), "leaked to long-term"); - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload); @@ -941,7 +946,7 @@ mod tests { assert_eq!(lt_meta.expiration_policy, metadata_in.expiration_policy); // get_object follows the tombstone and returns the correct payload. - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload); @@ -1014,7 +1019,7 @@ mod tests { lt.get(<_id_1).expect_not_found(); lt.get(<_id_2).expect_object(); - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload2); } @@ -1034,7 +1039,7 @@ mod tests { storage.delete_object(&id).await.unwrap(); hv.get(&id).expect_not_found(); - assert!(storage.get_object(&id).await.unwrap().is_none()); + assert!(storage.get_object(&id, None).await.unwrap().is_none()); } #[tokio::test] @@ -1106,7 +1111,7 @@ mod tests { // The orphaned GCS blob remains but the object is unreachable through the service. assert!( - storage.get_object(&id).await.unwrap().is_none(), + storage.get_object(&id, None).await.unwrap().is_none(), "object should be unreachable after tombstone is deleted" ); } @@ -1259,7 +1264,7 @@ mod tests { lt.remove(<_id); assert!( - storage.get_object(&id).await.unwrap().is_none(), + storage.get_object(&id, None).await.unwrap().is_none(), "orphan tombstone should resolve to None on get_object" ); assert!( @@ -1296,7 +1301,7 @@ mod tests { .unwrap(); // get_object must follow the tombstone and find the object via the lt_id target. - let (_, s) = storage.get_object(&hv_id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&hv_id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload); diff --git a/objectstore-service/src/error.rs b/objectstore-service/src/error.rs index 186a22d9..f59bba70 100644 --- a/objectstore-service/src/error.rs +++ b/objectstore-service/src/error.rs @@ -73,6 +73,13 @@ pub enum Error { #[error("unexpected tombstone")] UnexpectedTombstone, + /// The requested byte range is not satisfiable for the object's size. + #[error("range not satisfiable (object size: {total} bytes)")] + RangeNotSatisfiable { + /// Total size of the object in bytes. + total: u64, + }, + /// The service has reached its concurrency limit and cannot accept more operations. #[error("concurrency limit reached")] AtCapacity, @@ -132,6 +139,7 @@ impl Error { // Malformed client input at DEBUG level Self::Client(_) => Level::DEBUG, Self::Metadata(_) => Level::DEBUG, + Self::RangeNotSatisfiable { .. } => Level::DEBUG, // Like rate limits, we treat capacity errors as warnings Self::AtCapacity => Level::WARN, // All other errors are service or backend failures diff --git a/objectstore-service/src/service.rs b/objectstore-service/src/service.rs index 9ec10fba..0cab30fe 100644 --- a/objectstore-service/src/service.rs +++ b/objectstore-service/src/service.rs @@ -9,6 +9,7 @@ use std::future::Future; use std::sync::Arc; use objectstore_types::metadata::Metadata; +use objectstore_types::range::{ByteRange, ContentRange}; use crate::backend::common::Backend; use crate::concurrency::ConcurrencyLimiter; @@ -18,7 +19,7 @@ use crate::stream::{ClientStream, PayloadStream}; use crate::streaming::StreamExecutor; /// Service response for [`StorageService::get_object`]. -pub type GetResponse = Option<(Metadata, PayloadStream)>; +pub type GetResponse = Option<(Metadata, ContentRange, PayloadStream)>; /// Service response for [`StorageService::get_metadata`]. pub type MetadataResponse = Option; /// Service response for [`StorageService::insert_object`]. @@ -206,9 +207,13 @@ impl StorageService { } /// Streams the contents of an object. - pub async fn get_object(&self, id: ObjectId) -> Result { + /// + /// When `range` is `Some`, backends that support partial reads return only + /// the requested byte range. The returned [`ContentRange`] always describes + /// which bytes are in the stream, whether partial or full. + pub async fn get_object(&self, id: ObjectId, range: Option) -> Result { let inner = Arc::clone(&self.inner); - self.spawn("get", async move { inner.get_object(&id).await }) + self.spawn("get", async move { inner.get_object(&id, range).await }) .await } @@ -243,6 +248,7 @@ mod tests { use bytes::BytesMut; use futures_util::TryStreamExt; use objectstore_types::metadata::Metadata; + use objectstore_types::range::ByteRange; use objectstore_types::scope::{Scope, Scopes}; use super::*; @@ -298,7 +304,7 @@ mod tests { .await .unwrap(); - let (_metadata, stream) = service.get_object(key).await.unwrap().unwrap(); + let (_metadata, _, stream) = service.get_object(key, None).await.unwrap().unwrap(); let file_contents: BytesMut = stream.try_collect().await.unwrap(); assert_eq!(file_contents.as_ref(), b"oh hai!"); @@ -324,7 +330,7 @@ mod tests { .await .unwrap(); - let (_metadata, stream) = service.get_object(key).await.unwrap().unwrap(); + let (_metadata, _, stream) = service.get_object(key, None).await.unwrap().unwrap(); let file_contents: BytesMut = stream.try_collect().await.unwrap(); assert_eq!(file_contents.as_ref(), b"oh hai!"); @@ -367,7 +373,7 @@ mod tests { .unwrap(); // Sanity: the object is readable through the service (follows the tombstone). - let (_, stream) = service.get_object(id.clone()).await.unwrap().unwrap(); + let (_, _, stream) = service.get_object(id.clone(), None).await.unwrap().unwrap(); let body: BytesMut = stream.try_collect().await.unwrap(); assert_eq!(body.len(), payload_len); @@ -375,11 +381,11 @@ mod tests { service.delete_object(id.clone()).await.unwrap(); // The tombstone in BigTable should be gone, so the service returns None. - let after_delete = service.get_object(id.clone()).await.unwrap(); + let after_delete = service.get_object(id.clone(), None).await.unwrap(); assert!(after_delete.is_none(), "tombstone not deleted"); // The real object in GCS must also be gone — no orphan. - let orphan = gcs_backend.get_object(&id).await.unwrap(); + let orphan = gcs_backend.get_object(&id, None).await.unwrap(); assert!(orphan.is_none(), "object leaked"); } @@ -399,7 +405,7 @@ mod tests { .await .unwrap(); - let (_, stream) = service.get_object(id).await.unwrap().unwrap(); + let (_, _, stream) = service.get_object(id, None).await.unwrap().unwrap(); let body: BytesMut = stream.try_collect().await.unwrap(); assert_eq!(body.as_ref(), b"hello world"); } @@ -423,7 +429,7 @@ mod tests { service.delete_object(id.clone()).await.unwrap(); - let after = service.get_object(id).await.unwrap(); + let after = service.get_object(id, None).await.unwrap(); assert!(after.is_none()); } @@ -436,6 +442,7 @@ mod tests { &self, _inner: &InMemoryBackend, _id: &ObjectId, + _range: Option, ) -> Result { panic!("intentional panic in get_object"); } @@ -446,7 +453,7 @@ mod tests { let service = StorageService::new(Box::new(TestBackend::new(PanicOnGet))); let id = ObjectId::new(make_context(), "panic-test".into()); - let result = service.get_object(id).await; + let result = service.get_object(id, None).await; let Err(Error::Panic(msg)) = result else { panic!("expected Panic error"); @@ -642,11 +649,11 @@ mod tests { // First operation panics — the permit must still be released. let id = ObjectId::new(make_context(), "panic-permit".into()); - let result = service.get_object(id.clone()).await; + let result = service.get_object(id.clone(), None).await; assert!(matches!(result, Err(Error::Panic(_)))); // Second operation should succeed in acquiring the permit (not AtCapacity). - let result = service.get_object(id).await; + let result = service.get_object(id, None).await; assert!( !matches!(result, Err(Error::AtCapacity)), "permit was not released after panic" diff --git a/objectstore-service/src/streaming.rs b/objectstore-service/src/streaming.rs index 6bd4ed15..597f4a53 100644 --- a/objectstore-service/src/streaming.rs +++ b/objectstore-service/src/streaming.rs @@ -289,7 +289,7 @@ async fn execute_operation( match op { Operation::Get(get) => { let id = ObjectId::new(context, get.key); - let response = backend.get_object(&id).await?; + let response = backend.get_object(&id, None).await?; Ok(OpResponse::Got { key: id.key, response, diff --git a/objectstore-types/src/lib.rs b/objectstore-types/src/lib.rs index 2b7f9e28..ebb5c0c5 100644 --- a/objectstore-types/src/lib.rs +++ b/objectstore-types/src/lib.rs @@ -32,4 +32,5 @@ pub mod auth; pub mod metadata; +pub mod range; pub mod scope; diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs new file mode 100644 index 00000000..cc9c669b --- /dev/null +++ b/objectstore-types/src/range.rs @@ -0,0 +1,372 @@ +//! HTTP Range request types for partial content retrieval. +//! +//! [`ByteRange`] represents a parsed single byte-range from a `Range` HTTP +//! header. [`ContentRange`] describes which portion of an object is present in +//! a response body, used to build `Content-Range` response headers and choose +//! between 200 and 206 status codes. + +use std::fmt; + +/// A single byte-range parsed from the `Range` HTTP request header. +/// +/// Only the `bytes` range unit is supported, and only a single range specifier +/// (multi-range requests are rejected at parse time). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ByteRange { + /// `bytes=N-M` — from byte offset N to byte offset M (both inclusive). + FromTo(u64, u64), + /// `bytes=N-` — from byte offset N to the end of the representation. + From(u64), + /// `bytes=-N` — the last N bytes of the representation. + Suffix(u64), +} + +impl ByteRange { + /// Parses a `Range` header value into a [`ByteRange`]. + /// + /// Only `bytes=` ranges with a single specifier are accepted. Multi-range + /// requests (containing commas) and non-`bytes` units are rejected. + pub fn parse(header: &str) -> Result { + let spec = header + .strip_prefix("bytes=") + .ok_or(RangeError::UnknownUnit)?; + + if spec.contains(',') { + return Err(RangeError::MultiRangeNotSupported); + } + + let (start_str, end_str) = spec.split_once('-').ok_or(RangeError::InvalidRange)?; + + if start_str.is_empty() { + // bytes=-N (suffix) + let n: u64 = end_str.parse().map_err(|_| RangeError::InvalidRange)?; + if n == 0 { + return Err(RangeError::InvalidRange); + } + Ok(ByteRange::Suffix(n)) + } else if end_str.is_empty() { + // bytes=N- (from offset to end) + let start: u64 = start_str.parse().map_err(|_| RangeError::InvalidRange)?; + Ok(ByteRange::From(start)) + } else { + // bytes=N-M + let start: u64 = start_str.parse().map_err(|_| RangeError::InvalidRange)?; + let end: u64 = end_str.parse().map_err(|_| RangeError::InvalidRange)?; + if start > end { + return Err(RangeError::InvalidRange); + } + Ok(ByteRange::FromTo(start, end)) + } + } + + /// Formats this range as a `Range` header value (e.g. `bytes=0-499`). + pub fn to_header_value(&self) -> String { + match self { + ByteRange::FromTo(s, e) => format!("bytes={s}-{e}"), + ByteRange::From(s) => format!("bytes={s}-"), + ByteRange::Suffix(n) => format!("bytes=-{n}"), + } + } + + /// Resolves this range against a known total size, returning the concrete + /// byte offsets and total, or `None` if the range is unsatisfiable. + pub fn resolve(self, total: u64) -> Option { + if total == 0 { + return None; + } + + let (start, end) = match self { + ByteRange::FromTo(s, e) => { + if s >= total { + return None; + } + (s, e.min(total - 1)) + } + ByteRange::From(s) => { + if s >= total { + return None; + } + (s, total - 1) + } + ByteRange::Suffix(n) => { + let start = total.saturating_sub(n); + (start, total - 1) + } + }; + + Some(ContentRange { start, end, total }) + } +} + +/// Describes which bytes of the full object are present in the response body. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ContentRange { + /// Byte offset of the first byte in the body (inclusive). + pub start: u64, + /// Byte offset of the last byte in the body (inclusive). + pub end: u64, + /// Total size of the complete object in bytes. + pub total: u64, +} + +impl ContentRange { + /// Creates a [`ContentRange`] representing the entire object. + pub fn full(total: u64) -> Self { + Self { + start: 0, + end: total.saturating_sub(1), + total, + } + } + + /// Parses a `Content-Range` response header value (e.g. `bytes 0-499/1234`). + pub fn parse(header: &str) -> Option { + let rest = header.strip_prefix("bytes ")?; + let (range_part, total_str) = rest.split_once('/')?; + let total: u64 = total_str.parse().ok()?; + let (start_str, end_str) = range_part.split_once('-')?; + let start: u64 = start_str.parse().ok()?; + let end: u64 = end_str.parse().ok()?; + Some(Self { start, end, total }) + } + + /// Parses the total from an unsatisfiable `Content-Range` header (`bytes */1234`). + pub fn parse_unsatisfiable_total(header: &str) -> Option { + let rest = header.strip_prefix("bytes */")?; + rest.parse().ok() + } + + /// Returns the number of bytes in this range. + pub fn len(&self) -> u64 { + if self.total == 0 { + return 0; + } + self.end - self.start + 1 + } + + /// Returns `true` if this range covers the entire object. + pub fn is_full(&self) -> bool { + self.total == 0 || (self.start == 0 && self.len() == self.total) + } + + /// Formats the value for a `Content-Range` response header. + pub fn to_header_value(&self) -> String { + format!("bytes {}-{}/{}", self.start, self.end, self.total) + } +} + +impl fmt::Display for ContentRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "bytes {}-{}/{}", self.start, self.end, self.total) + } +} + +/// Errors that can occur when parsing a `Range` header. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RangeError { + /// The header contained multiple range specifiers separated by commas. + MultiRangeNotSupported, + /// The header value could not be parsed as a valid byte range. + InvalidRange, + /// The range unit is not `bytes` (e.g. `items=0-10`). Per RFC 9110, unknown + /// units should be ignored and the request served as a normal full-body response. + UnknownUnit, +} + +impl fmt::Display for RangeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RangeError::MultiRangeNotSupported => { + write!(f, "multi-range requests are not supported") + } + RangeError::InvalidRange => write!(f, "invalid Range header"), + RangeError::UnknownUnit => write!(f, "unknown range unit"), + } + } +} + +impl std::error::Error for RangeError {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_from_to() { + assert_eq!( + ByteRange::parse("bytes=0-499"), + Ok(ByteRange::FromTo(0, 499)) + ); + } + + #[test] + fn parse_from() { + assert_eq!(ByteRange::parse("bytes=500-"), Ok(ByteRange::From(500))); + } + + #[test] + fn parse_suffix() { + assert_eq!(ByteRange::parse("bytes=-100"), Ok(ByteRange::Suffix(100))); + } + + #[test] + fn parse_rejects_multi_range() { + assert_eq!( + ByteRange::parse("bytes=0-10, 20-30"), + Err(RangeError::MultiRangeNotSupported) + ); + } + + #[test] + fn parse_returns_unknown_unit_for_non_bytes() { + assert_eq!(ByteRange::parse("items=0-10"), Err(RangeError::UnknownUnit)); + } + + #[test] + fn parse_rejects_inverted_range() { + assert_eq!( + ByteRange::parse("bytes=500-100"), + Err(RangeError::InvalidRange) + ); + } + + #[test] + fn parse_rejects_zero_suffix() { + assert_eq!(ByteRange::parse("bytes=-0"), Err(RangeError::InvalidRange)); + } + + #[test] + fn resolve_from_to() { + let range = ByteRange::FromTo(0, 499).resolve(1000); + assert_eq!( + range, + Some(ContentRange { + start: 0, + end: 499, + total: 1000 + }) + ); + } + + #[test] + fn resolve_from_to_clamped() { + let range = ByteRange::FromTo(0, 9999).resolve(500); + assert_eq!( + range, + Some(ContentRange { + start: 0, + end: 499, + total: 500 + }) + ); + } + + #[test] + fn resolve_from() { + let range = ByteRange::From(500).resolve(1000); + assert_eq!( + range, + Some(ContentRange { + start: 500, + end: 999, + total: 1000 + }) + ); + } + + #[test] + fn resolve_suffix() { + let range = ByteRange::Suffix(100).resolve(1000); + assert_eq!( + range, + Some(ContentRange { + start: 900, + end: 999, + total: 1000 + }) + ); + } + + #[test] + fn resolve_suffix_larger_than_total() { + let range = ByteRange::Suffix(2000).resolve(1000); + assert_eq!( + range, + Some(ContentRange { + start: 0, + end: 999, + total: 1000 + }) + ); + } + + #[test] + fn resolve_unsatisfiable() { + assert_eq!(ByteRange::FromTo(1000, 2000).resolve(500), None); + assert_eq!(ByteRange::From(500).resolve(500), None); + assert_eq!(ByteRange::FromTo(0, 0).resolve(0), None); + } + + #[test] + fn content_range_full() { + let cr = ContentRange::full(1000); + assert_eq!(cr.start, 0); + assert_eq!(cr.end, 999); + assert_eq!(cr.total, 1000); + assert_eq!(cr.len(), 1000); + assert!(cr.is_full()); + assert_eq!(cr.to_header_value(), "bytes 0-999/1000"); + } + + #[test] + fn content_range_partial_is_not_full() { + let cr = ContentRange { + start: 0, + end: 499, + total: 1000, + }; + assert!(!cr.is_full()); + assert_eq!(cr.len(), 500); + } + + #[test] + fn content_range_full_zero_bytes() { + let cr = ContentRange::full(0); + assert_eq!(cr.len(), 0); + assert!(cr.is_full()); + } + + #[test] + fn byte_range_to_header_value() { + assert_eq!(ByteRange::FromTo(0, 499).to_header_value(), "bytes=0-499"); + assert_eq!(ByteRange::From(500).to_header_value(), "bytes=500-"); + assert_eq!(ByteRange::Suffix(100).to_header_value(), "bytes=-100"); + } + + #[test] + fn content_range_parse() { + assert_eq!( + ContentRange::parse("bytes 0-499/1234"), + Some(ContentRange { + start: 0, + end: 499, + total: 1234 + }) + ); + assert_eq!(ContentRange::parse("bytes */1234"), None); + assert_eq!(ContentRange::parse("invalid"), None); + } + + #[test] + fn content_range_parse_unsatisfiable_total() { + assert_eq!( + ContentRange::parse_unsatisfiable_total("bytes */1234"), + Some(1234) + ); + assert_eq!( + ContentRange::parse_unsatisfiable_total("bytes 0-499/1234"), + None + ); + assert_eq!(ContentRange::parse_unsatisfiable_total("invalid"), None); + } +} From e1fa2277ddf558d9c63dabfdc35d63b816d5f7f7 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 26 May 2026 16:58:45 +0200 Subject: [PATCH 02/45] improve --- objectstore-types/src/range.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index cc9c669b..f9881585 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -144,6 +144,10 @@ impl ContentRange { self.end - self.start + 1 } + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + /// Returns `true` if this range covers the entire object. pub fn is_full(&self) -> bool { self.total == 0 || (self.start == 0 && self.len() == self.total) From d0fbeb665a1b50c4aa46d63c1978a185066d7adc Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 26 May 2026 16:59:05 +0200 Subject: [PATCH 03/45] improve --- objectstore-types/src/range.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index f9881585..f44de76d 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -144,6 +144,7 @@ impl ContentRange { self.end - self.start + 1 } + /// Returns `true` if this range is empty. pub fn is_empty(&self) -> bool { self.len() == 0 } From f01d018ba9892340b988774dc027ffc9b8b639b4 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 26 May 2026 17:04:28 +0200 Subject: [PATCH 04/45] fix: Case-insensitive range unit matching and multi-range fallback - Parse Range header unit case-insensitively per RFC 9110 (e.g. `Bytes=0-4` now works instead of being ignored) - Multi-range requests fall back to full 200 response instead of returning 400, matching RFC semantics for unsupported features --- objectstore-server/src/endpoints/objects.rs | 2 +- objectstore-server/tests/range_requests.rs | 23 +++++++++++++++++++-- objectstore-types/src/range.rs | 12 ++++++++++- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 68b36aeb..17276638 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -74,7 +74,7 @@ async fn object_get( .map_err(|_| ApiError::Client("invalid Range header".into()))?; match ByteRange::parse(header_str) { Ok(range) => Some(range), - Err(RangeError::UnknownUnit) => None, + Err(RangeError::UnknownUnit | RangeError::MultiRangeNotSupported) => None, Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), } } diff --git a/objectstore-server/tests/range_requests.rs b/objectstore-server/tests/range_requests.rs index 8808c288..3409d3ca 100644 --- a/objectstore-server/tests/range_requests.rs +++ b/objectstore-server/tests/range_requests.rs @@ -164,7 +164,7 @@ async fn invalid_bytes_range_returns_400() -> Result<()> { } #[tokio::test] -async fn multi_range_returns_400() -> Result<()> { +async fn multi_range_falls_back_to_full_body() -> Result<()> { let (server, key) = setup().await; let client = reqwest::Client::new(); @@ -174,7 +174,9 @@ async fn multi_range_returns_400() -> Result<()> { .send() .await?; - assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); + assert_eq!(resp.status(), reqwest::StatusCode::OK); + let body = resp.text().await?; + assert_eq!(body, "Hello, Range Requests!"); Ok(()) } @@ -238,3 +240,20 @@ async fn full_range_returns_200() -> Result<()> { assert_eq!(body, "Hello, Range Requests!"); Ok(()) } + +#[tokio::test] +async fn case_insensitive_range_unit() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .header("range", "Bytes=0-4") + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::PARTIAL_CONTENT); + let body = resp.text().await?; + assert_eq!(body, "Hello"); + Ok(()) +} diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index f44de76d..2a6630a8 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -27,7 +27,8 @@ impl ByteRange { /// Only `bytes=` ranges with a single specifier are accepted. Multi-range /// requests (containing commas) and non-`bytes` units are rejected. pub fn parse(header: &str) -> Result { - let spec = header + let lower = header.to_ascii_lowercase(); + let spec = lower .strip_prefix("bytes=") .ok_or(RangeError::UnknownUnit)?; @@ -222,6 +223,15 @@ mod tests { ); } + #[test] + fn parse_case_insensitive() { + assert_eq!( + ByteRange::parse("Bytes=0-499"), + Ok(ByteRange::FromTo(0, 499)) + ); + assert_eq!(ByteRange::parse("BYTES=100-"), Ok(ByteRange::From(100))); + } + #[test] fn parse_returns_unknown_unit_for_non_bytes() { assert_eq!(ByteRange::parse("items=0-10"), Err(RangeError::UnknownUnit)); From f209283d23c104861ab1439ae97dc315dff453db Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 26 May 2026 17:14:29 +0200 Subject: [PATCH 05/45] fix: Harden Content-Length handling for edge cases - Only set explicit Content-Length when content_range.total > 0, avoiding a Content-Length: 0 regression when metadata.size is unknown. - Use response Content-Length (not metadata.size) in 206 fallback paths for GCS and S3 so Content-Length matches the body if Content-Range is missing from a 206 response. --- objectstore-server/src/endpoints/objects.rs | 10 ++++++---- objectstore-service/src/backend/gcs.rs | 4 +++- objectstore-service/src/backend/s3_compatible.rs | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 17276638..39ddf72d 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -110,10 +110,12 @@ async fn object_get( let mut response = (status, metadata_headers, Body::from_stream(stream)).into_response(); let resp_headers = response.headers_mut(); resp_headers.insert(http::header::ACCEPT_RANGES, "bytes".parse().unwrap()); - resp_headers.insert( - http::header::CONTENT_LENGTH, - content_range.len().to_string().parse().unwrap(), - ); + if content_range.total > 0 { + resp_headers.insert( + http::header::CONTENT_LENGTH, + content_range.len().to_string().parse().unwrap(), + ); + } if !content_range.is_full() { resp_headers.insert( http::header::CONTENT_RANGE, diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 53ec8c00..2e93d36e 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -700,7 +700,9 @@ impl Backend for GcsBackend { .get(header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) .and_then(ContentRange::parse) - .unwrap_or_else(|| ContentRange::full(metadata.size.unwrap_or(0) as u64)) + .unwrap_or_else(|| { + ContentRange::full(payload_response.content_length().unwrap_or(0)) + }) } else { ContentRange::full(metadata.size.unwrap_or(0) as u64) }; diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 6346beee..48cef530 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -326,7 +326,7 @@ impl Backend for S3CompatibleBackend { .get(reqwest::header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) .and_then(ContentRange::parse) - .unwrap_or_else(|| ContentRange::full(metadata.size.unwrap_or(0) as u64)) + .unwrap_or_else(|| ContentRange::full(response.content_length().unwrap_or(0))) } else { ContentRange::full(metadata.size.unwrap_or(0) as u64) }; From 1c5f6cbd17a46ac3499ae6b505a1b46d31354c7b Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 26 May 2026 17:37:24 +0200 Subject: [PATCH 06/45] improve --- objectstore-server/src/endpoints/objects.rs | 4 +--- objectstore-service/src/backend/gcs.rs | 7 ++++--- objectstore-service/src/backend/s3_compatible.rs | 5 ++++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 39ddf72d..17542cbb 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -110,13 +110,11 @@ async fn object_get( let mut response = (status, metadata_headers, Body::from_stream(stream)).into_response(); let resp_headers = response.headers_mut(); resp_headers.insert(http::header::ACCEPT_RANGES, "bytes".parse().unwrap()); - if content_range.total > 0 { + if !content_range.is_full() { resp_headers.insert( http::header::CONTENT_LENGTH, content_range.len().to_string().parse().unwrap(), ); - } - if !content_range.is_full() { resp_headers.insert( http::header::CONTENT_RANGE, content_range.to_header_value().parse().unwrap(), diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 2e93d36e..64f6ed17 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -700,9 +700,10 @@ impl Backend for GcsBackend { .get(header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) .and_then(ContentRange::parse) - .unwrap_or_else(|| { - ContentRange::full(payload_response.content_length().unwrap_or(0)) - }) + .ok_or_else(|| Error::Generic { + context: "GCS: 206 response missing valid Content-Range header".to_owned(), + cause: None, + })? } else { ContentRange::full(metadata.size.unwrap_or(0) as u64) }; diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 48cef530..8fa3bf4e 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -326,7 +326,10 @@ impl Backend for S3CompatibleBackend { .get(reqwest::header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) .and_then(ContentRange::parse) - .unwrap_or_else(|| ContentRange::full(response.content_length().unwrap_or(0))) + .ok_or_else(|| Error::Generic { + context: "S3: 206 response missing valid Content-Range header".to_owned(), + cause: None, + })? } else { ContentRange::full(metadata.size.unwrap_or(0) as u64) }; From e8dd7f73b520d7fe2f5c131ca0eab8c187ea1d19 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 10:15:11 +0200 Subject: [PATCH 07/45] improve --- objectstore-server/src/endpoints/objects.rs | 7 +- objectstore-service/src/backend/gcs.rs | 6 +- .../src/backend/s3_compatible.rs | 2 +- objectstore-types/src/range.rs | 181 ++++++++++-------- 4 files changed, 104 insertions(+), 92 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 17542cbb..dd6c4f87 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -72,7 +72,7 @@ async fn object_get( let header_str = value .to_str() .map_err(|_| ApiError::Client("invalid Range header".into()))?; - match ByteRange::parse(header_str) { + match ByteRange::try_from(header_str) { Ok(range) => Some(range), Err(RangeError::UnknownUnit | RangeError::MultiRangeNotSupported) => None, Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), @@ -115,10 +115,7 @@ async fn object_get( http::header::CONTENT_LENGTH, content_range.len().to_string().parse().unwrap(), ); - resp_headers.insert( - http::header::CONTENT_RANGE, - content_range.to_header_value().parse().unwrap(), - ); + resp_headers.insert(http::header::CONTENT_RANGE, content_range.to_header_value()); } Ok(response) diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 64f6ed17..f8109bba 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -666,13 +666,11 @@ impl Backend for GcsBackend { let mut download_url = object_url; download_url.query_pairs_mut().append_pair("alt", "media"); - let range_header = range.map(|r| r.to_header_value()); - let payload_response = self .with_retry("get_payload", || async { let mut req = self.request(Method::GET, download_url.clone()).await?; - if let Some(ref value) = range_header { - req = req.header(header::RANGE, value); + if let Some(r) = range { + req = req.header(header::RANGE, r.to_header_value()); } let resp = req .send() diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 8fa3bf4e..307e7612 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -310,7 +310,7 @@ impl Backend for S3CompatibleBackend { let extra_headers = range.map(|r| { let mut headers = reqwest::header::HeaderMap::new(); - headers.insert(reqwest::header::RANGE, r.to_header_value().parse().unwrap()); + headers.insert(reqwest::header::RANGE, r.to_header_value()); headers }); diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index 2a6630a8..004fec89 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -1,83 +1,45 @@ -//! HTTP Range request types for partial content retrieval. -//! -//! [`ByteRange`] represents a parsed single byte-range from a `Range` HTTP -//! header. [`ContentRange`] describes which portion of an object is present in -//! a response body, used to build `Content-Range` response headers and choose -//! between 200 and 206 status codes. +//! Types for HTTP range requests. use std::fmt; -/// A single byte-range parsed from the `Range` HTTP request header. -/// -/// Only the `bytes` range unit is supported, and only a single range specifier -/// (multi-range requests are rejected at parse time). +use http::header::HeaderValue; + +/// A byte range. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ByteRange { - /// `bytes=N-M` — from byte offset N to byte offset M (both inclusive). - FromTo(u64, u64), - /// `bytes=N-` — from byte offset N to the end of the representation. + /// Bounded range with start and end, inclusive + Inclusive(u64, u64), + /// From offset X onwards From(u64), - /// `bytes=-N` — the last N bytes of the representation. - Suffix(u64), + /// Last X bytes + Last(u64), } impl ByteRange { - /// Parses a `Range` header value into a [`ByteRange`]. + /// Formats this range as a `Range` request header value (e.g. `bytes=0-499`). /// - /// Only `bytes=` ranges with a single specifier are accepted. Multi-range - /// requests (containing commas) and non-`bytes` units are rejected. - pub fn parse(header: &str) -> Result { - let lower = header.to_ascii_lowercase(); - let spec = lower - .strip_prefix("bytes=") - .ok_or(RangeError::UnknownUnit)?; - - if spec.contains(',') { - return Err(RangeError::MultiRangeNotSupported); - } - - let (start_str, end_str) = spec.split_once('-').ok_or(RangeError::InvalidRange)?; - - if start_str.is_empty() { - // bytes=-N (suffix) - let n: u64 = end_str.parse().map_err(|_| RangeError::InvalidRange)?; - if n == 0 { - return Err(RangeError::InvalidRange); - } - Ok(ByteRange::Suffix(n)) - } else if end_str.is_empty() { - // bytes=N- (from offset to end) - let start: u64 = start_str.parse().map_err(|_| RangeError::InvalidRange)?; - Ok(ByteRange::From(start)) - } else { - // bytes=N-M - let start: u64 = start_str.parse().map_err(|_| RangeError::InvalidRange)?; - let end: u64 = end_str.parse().map_err(|_| RangeError::InvalidRange)?; - if start > end { - return Err(RangeError::InvalidRange); - } - Ok(ByteRange::FromTo(start, end)) - } - } - - /// Formats this range as a `Range` header value (e.g. `bytes=0-499`). - pub fn to_header_value(&self) -> String { - match self { - ByteRange::FromTo(s, e) => format!("bytes={s}-{e}"), + /// The returned value is always valid ASCII and can be inserted directly + /// into an HTTP header map. + pub fn to_header_value(&self) -> HeaderValue { + let s = match self { + ByteRange::Inclusive(s, e) => format!("bytes={s}-{e}"), ByteRange::From(s) => format!("bytes={s}-"), - ByteRange::Suffix(n) => format!("bytes=-{n}"), - } + ByteRange::Last(n) => format!("bytes=-{n}"), + }; + // SAFETY: the format only contains ASCII digits, hyphens, and the + // literal prefix "bytes=", which are all valid header value bytes. + HeaderValue::from_str(&s).expect("ByteRange always produces a valid header value") } /// Resolves this range against a known total size, returning the concrete - /// byte offsets and total, or `None` if the range is unsatisfiable. + /// byte offsets and total, or [`None`] if the range is unsatisfiable. pub fn resolve(self, total: u64) -> Option { if total == 0 { return None; } let (start, end) = match self { - ByteRange::FromTo(s, e) => { + ByteRange::Inclusive(s, e) => { if s >= total { return None; } @@ -89,7 +51,7 @@ impl ByteRange { } (s, total - 1) } - ByteRange::Suffix(n) => { + ByteRange::Last(n) => { let start = total.saturating_sub(n); (start, total - 1) } @@ -99,6 +61,52 @@ impl ByteRange { } } +/// Parses a `Range` request header string into a [`ByteRange`]. +/// +/// Only `bytes=` ranges with a single specifier are accepted. Multi-range +/// requests (containing commas) and non-`bytes` units are rejected. +/// +/// Converting a [`http::header::HeaderValue`] to `&str` via +/// [`HeaderValue::to_str`] is the caller's responsibility, since that +/// conversion can fail when the value contains non-visible-ASCII bytes. +impl TryFrom<&str> for ByteRange { + type Error = RangeError; + + fn try_from(header: &str) -> Result { + let lower = header.to_ascii_lowercase(); + let spec = lower + .strip_prefix("bytes=") + .ok_or(RangeError::UnknownUnit)?; + + if spec.contains(',') { + return Err(RangeError::MultiRangeNotSupported); + } + + let (start_str, end_str) = spec.split_once('-').ok_or(RangeError::InvalidRange)?; + + if start_str.is_empty() { + // bytes=-N (suffix / last N bytes) + let n: u64 = end_str.parse().map_err(|_| RangeError::InvalidRange)?; + if n == 0 { + return Err(RangeError::InvalidRange); + } + Ok(ByteRange::Last(n)) + } else if end_str.is_empty() { + // bytes=N- (from offset to end) + let start: u64 = start_str.parse().map_err(|_| RangeError::InvalidRange)?; + Ok(ByteRange::From(start)) + } else { + // bytes=N-M (inclusive range) + let start: u64 = start_str.parse().map_err(|_| RangeError::InvalidRange)?; + let end: u64 = end_str.parse().map_err(|_| RangeError::InvalidRange)?; + if start > end { + return Err(RangeError::InvalidRange); + } + Ok(ByteRange::Inclusive(start, end)) + } + } +} + /// Describes which bytes of the full object are present in the response body. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ContentRange { @@ -156,8 +164,14 @@ impl ContentRange { } /// Formats the value for a `Content-Range` response header. - pub fn to_header_value(&self) -> String { - format!("bytes {}-{}/{}", self.start, self.end, self.total) + /// + /// The returned value is always valid ASCII and can be inserted directly + /// into an HTTP header map. + pub fn to_header_value(&self) -> HeaderValue { + let s = format!("bytes {}-{}/{}", self.start, self.end, self.total); + // SAFETY: the format only contains ASCII digits, spaces, hyphens, + // and slashes, which are all valid header value bytes. + HeaderValue::from_str(&s).expect("ContentRange always produces a valid header value") } } @@ -200,25 +214,25 @@ mod tests { #[test] fn parse_from_to() { assert_eq!( - ByteRange::parse("bytes=0-499"), - Ok(ByteRange::FromTo(0, 499)) + ByteRange::try_from("bytes=0-499"), + Ok(ByteRange::Inclusive(0, 499)) ); } #[test] fn parse_from() { - assert_eq!(ByteRange::parse("bytes=500-"), Ok(ByteRange::From(500))); + assert_eq!(ByteRange::try_from("bytes=500-"), Ok(ByteRange::From(500))); } #[test] fn parse_suffix() { - assert_eq!(ByteRange::parse("bytes=-100"), Ok(ByteRange::Suffix(100))); + assert_eq!(ByteRange::try_from("bytes=-100"), Ok(ByteRange::Last(100))); } #[test] fn parse_rejects_multi_range() { assert_eq!( - ByteRange::parse("bytes=0-10, 20-30"), + ByteRange::try_from("bytes=0-10, 20-30"), Err(RangeError::MultiRangeNotSupported) ); } @@ -226,33 +240,33 @@ mod tests { #[test] fn parse_case_insensitive() { assert_eq!( - ByteRange::parse("Bytes=0-499"), - Ok(ByteRange::FromTo(0, 499)) + ByteRange::try_from("Bytes=0-499"), + Ok(ByteRange::Inclusive(0, 499)) ); - assert_eq!(ByteRange::parse("BYTES=100-"), Ok(ByteRange::From(100))); + assert_eq!(ByteRange::try_from("BYTES=100-"), Ok(ByteRange::From(100))); } #[test] fn parse_returns_unknown_unit_for_non_bytes() { - assert_eq!(ByteRange::parse("items=0-10"), Err(RangeError::UnknownUnit)); + assert_eq!(ByteRange::try_from("items=0-10"), Err(RangeError::UnknownUnit)); } #[test] fn parse_rejects_inverted_range() { assert_eq!( - ByteRange::parse("bytes=500-100"), + ByteRange::try_from("bytes=500-100"), Err(RangeError::InvalidRange) ); } #[test] fn parse_rejects_zero_suffix() { - assert_eq!(ByteRange::parse("bytes=-0"), Err(RangeError::InvalidRange)); + assert_eq!(ByteRange::try_from("bytes=-0"), Err(RangeError::InvalidRange)); } #[test] fn resolve_from_to() { - let range = ByteRange::FromTo(0, 499).resolve(1000); + let range = ByteRange::Inclusive(0, 499).resolve(1000); assert_eq!( range, Some(ContentRange { @@ -265,7 +279,7 @@ mod tests { #[test] fn resolve_from_to_clamped() { - let range = ByteRange::FromTo(0, 9999).resolve(500); + let range = ByteRange::Inclusive(0, 9999).resolve(500); assert_eq!( range, Some(ContentRange { @@ -291,7 +305,7 @@ mod tests { #[test] fn resolve_suffix() { - let range = ByteRange::Suffix(100).resolve(1000); + let range = ByteRange::Last(100).resolve(1000); assert_eq!( range, Some(ContentRange { @@ -304,7 +318,7 @@ mod tests { #[test] fn resolve_suffix_larger_than_total() { - let range = ByteRange::Suffix(2000).resolve(1000); + let range = ByteRange::Last(2000).resolve(1000); assert_eq!( range, Some(ContentRange { @@ -317,9 +331,9 @@ mod tests { #[test] fn resolve_unsatisfiable() { - assert_eq!(ByteRange::FromTo(1000, 2000).resolve(500), None); + assert_eq!(ByteRange::Inclusive(1000, 2000).resolve(500), None); assert_eq!(ByteRange::From(500).resolve(500), None); - assert_eq!(ByteRange::FromTo(0, 0).resolve(0), None); + assert_eq!(ByteRange::Inclusive(0, 0).resolve(0), None); } #[test] @@ -353,9 +367,12 @@ mod tests { #[test] fn byte_range_to_header_value() { - assert_eq!(ByteRange::FromTo(0, 499).to_header_value(), "bytes=0-499"); + assert_eq!( + ByteRange::Inclusive(0, 499).to_header_value(), + "bytes=0-499" + ); assert_eq!(ByteRange::From(500).to_header_value(), "bytes=500-"); - assert_eq!(ByteRange::Suffix(100).to_header_value(), "bytes=-100"); + assert_eq!(ByteRange::Last(100).to_header_value(), "bytes=-100"); } #[test] From 4a2da3d6ade6cb65695c623c1056d79c0177c2f8 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 10:30:08 +0200 Subject: [PATCH 08/45] refactor: Simplify range request code and consolidate tests Reduce test boilerplate by consolidating 20 unit tests into 7 with identical coverage. Remove redundant integration test already covered by unit tests. Use HeaderValue::from_static, delegate to Display impl, and pass ByteRange directly instead of an intermediate HeaderMap. --- objectstore-server/src/endpoints/objects.rs | 14 +- objectstore-server/tests/range_requests.rs | 17 -- .../src/backend/s3_compatible.rs | 16 +- objectstore-types/src/range.rs | 176 +++++------------- 4 files changed, 60 insertions(+), 163 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index dd6c4f87..27d7e159 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -100,17 +100,21 @@ async fn object_get( let stream = state.meter_stream(stream, &context); let metadata_headers = metadata.to_headers("").map_err(ServiceError::from)?; + let is_partial = !content_range.is_full(); - let status = if content_range.is_full() { - StatusCode::OK - } else { + let status = if is_partial { StatusCode::PARTIAL_CONTENT + } else { + StatusCode::OK }; let mut response = (status, metadata_headers, Body::from_stream(stream)).into_response(); let resp_headers = response.headers_mut(); - resp_headers.insert(http::header::ACCEPT_RANGES, "bytes".parse().unwrap()); - if !content_range.is_full() { + resp_headers.insert( + http::header::ACCEPT_RANGES, + http::header::HeaderValue::from_static("bytes"), + ); + if is_partial { resp_headers.insert( http::header::CONTENT_LENGTH, content_range.len().to_string().parse().unwrap(), diff --git a/objectstore-server/tests/range_requests.rs b/objectstore-server/tests/range_requests.rs index 3409d3ca..9307f776 100644 --- a/objectstore-server/tests/range_requests.rs +++ b/objectstore-server/tests/range_requests.rs @@ -240,20 +240,3 @@ async fn full_range_returns_200() -> Result<()> { assert_eq!(body, "Hello, Range Requests!"); Ok(()) } - -#[tokio::test] -async fn case_insensitive_range_unit() -> Result<()> { - let (server, key) = setup().await; - let client = reqwest::Client::new(); - - let resp = client - .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) - .header("range", "Bytes=0-4") - .send() - .await?; - - assert_eq!(resp.status(), reqwest::StatusCode::PARTIAL_CONTENT); - let body = resp.text().await?; - assert_eq!(body, "Hello"); - Ok(()) -} diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 307e7612..9c9d99e5 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -164,13 +164,13 @@ where &self, method: Method, id: &ObjectId, - extra_headers: Option, + range: Option, ) -> Result> { let object_url = self.object_url(id); let mut builder = self.request(method, &object_url).await?; - if let Some(headers) = extra_headers { - builder = builder.headers(headers); + if let Some(r) = range { + builder = builder.header(reqwest::header::RANGE, r.to_header_value()); } let response = builder.send().await.map_err(|cause| Error::Reqwest { context: "S3: failed to send request".to_string(), @@ -308,15 +308,7 @@ impl Backend for S3CompatibleBackend { async fn get_object(&self, id: &ObjectId, range: Option) -> Result { objectstore_log::debug!("Reading from s3_compatible backend"); - let extra_headers = range.map(|r| { - let mut headers = reqwest::header::HeaderMap::new(); - headers.insert(reqwest::header::RANGE, r.to_header_value()); - headers - }); - - let Some((metadata, response)) = - self.request_object(Method::GET, id, extra_headers).await? - else { + let Some((metadata, response)) = self.request_object(Method::GET, id, range).await? else { return Ok(None); }; diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index 004fec89..19f40078 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -26,8 +26,6 @@ impl ByteRange { ByteRange::From(s) => format!("bytes={s}-"), ByteRange::Last(n) => format!("bytes=-{n}"), }; - // SAFETY: the format only contains ASCII digits, hyphens, and the - // literal prefix "bytes=", which are all valid header value bytes. HeaderValue::from_str(&s).expect("ByteRange always produces a valid header value") } @@ -168,10 +166,8 @@ impl ContentRange { /// The returned value is always valid ASCII and can be inserted directly /// into an HTTP header map. pub fn to_header_value(&self) -> HeaderValue { - let s = format!("bytes {}-{}/{}", self.start, self.end, self.total); - // SAFETY: the format only contains ASCII digits, spaces, hyphens, - // and slashes, which are all valid header value bytes. - HeaderValue::from_str(&s).expect("ContentRange always produces a valid header value") + HeaderValue::from_str(&self.to_string()) + .expect("ContentRange always produces a valid header value") } } @@ -212,33 +208,14 @@ mod tests { use super::*; #[test] - fn parse_from_to() { + fn parse_valid_ranges() { assert_eq!( ByteRange::try_from("bytes=0-499"), Ok(ByteRange::Inclusive(0, 499)) ); - } - - #[test] - fn parse_from() { assert_eq!(ByteRange::try_from("bytes=500-"), Ok(ByteRange::From(500))); - } - - #[test] - fn parse_suffix() { assert_eq!(ByteRange::try_from("bytes=-100"), Ok(ByteRange::Last(100))); - } - - #[test] - fn parse_rejects_multi_range() { - assert_eq!( - ByteRange::try_from("bytes=0-10, 20-30"), - Err(RangeError::MultiRangeNotSupported) - ); - } - - #[test] - fn parse_case_insensitive() { + // Case insensitive assert_eq!( ByteRange::try_from("Bytes=0-499"), Ok(ByteRange::Inclusive(0, 499)) @@ -247,86 +224,33 @@ mod tests { } #[test] - fn parse_returns_unknown_unit_for_non_bytes() { - assert_eq!(ByteRange::try_from("items=0-10"), Err(RangeError::UnknownUnit)); - } - - #[test] - fn parse_rejects_inverted_range() { + fn parse_invalid_ranges() { assert_eq!( - ByteRange::try_from("bytes=500-100"), - Err(RangeError::InvalidRange) + ByteRange::try_from("bytes=0-10, 20-30"), + Err(RangeError::MultiRangeNotSupported) ); - } - - #[test] - fn parse_rejects_zero_suffix() { - assert_eq!(ByteRange::try_from("bytes=-0"), Err(RangeError::InvalidRange)); - } - - #[test] - fn resolve_from_to() { - let range = ByteRange::Inclusive(0, 499).resolve(1000); assert_eq!( - range, - Some(ContentRange { - start: 0, - end: 499, - total: 1000 - }) + ByteRange::try_from("items=0-10"), + Err(RangeError::UnknownUnit) ); - } - - #[test] - fn resolve_from_to_clamped() { - let range = ByteRange::Inclusive(0, 9999).resolve(500); assert_eq!( - range, - Some(ContentRange { - start: 0, - end: 499, - total: 500 - }) - ); - } - - #[test] - fn resolve_from() { - let range = ByteRange::From(500).resolve(1000); - assert_eq!( - range, - Some(ContentRange { - start: 500, - end: 999, - total: 1000 - }) + ByteRange::try_from("bytes=500-100"), + Err(RangeError::InvalidRange) ); - } - - #[test] - fn resolve_suffix() { - let range = ByteRange::Last(100).resolve(1000); assert_eq!( - range, - Some(ContentRange { - start: 900, - end: 999, - total: 1000 - }) + ByteRange::try_from("bytes=-0"), + Err(RangeError::InvalidRange) ); } #[test] - fn resolve_suffix_larger_than_total() { - let range = ByteRange::Last(2000).resolve(1000); - assert_eq!( - range, - Some(ContentRange { - start: 0, - end: 999, - total: 1000 - }) - ); + fn resolve_satisfiable() { + let cr = |start, end, total| Some(ContentRange { start, end, total }); + assert_eq!(ByteRange::Inclusive(0, 499).resolve(1000), cr(0, 499, 1000)); + assert_eq!(ByteRange::Inclusive(0, 9999).resolve(500), cr(0, 499, 500)); + assert_eq!(ByteRange::From(500).resolve(1000), cr(500, 999, 1000)); + assert_eq!(ByteRange::Last(100).resolve(1000), cr(900, 999, 1000)); + assert_eq!(ByteRange::Last(2000).resolve(1000), cr(0, 999, 1000)); } #[test] @@ -337,60 +261,54 @@ mod tests { } #[test] - fn content_range_full() { - let cr = ContentRange::full(1000); - assert_eq!(cr.start, 0); - assert_eq!(cr.end, 999); - assert_eq!(cr.total, 1000); - assert_eq!(cr.len(), 1000); - assert!(cr.is_full()); - assert_eq!(cr.to_header_value(), "bytes 0-999/1000"); - } + fn content_range_properties() { + let full = ContentRange::full(1000); + assert_eq!( + full, + ContentRange { + start: 0, + end: 999, + total: 1000 + } + ); + assert_eq!(full.len(), 1000); + assert!(full.is_full()); - #[test] - fn content_range_partial_is_not_full() { - let cr = ContentRange { + let partial = ContentRange { start: 0, end: 499, total: 1000, }; - assert!(!cr.is_full()); - assert_eq!(cr.len(), 500); - } + assert_eq!(partial.len(), 500); + assert!(!partial.is_full()); - #[test] - fn content_range_full_zero_bytes() { - let cr = ContentRange::full(0); - assert_eq!(cr.len(), 0); - assert!(cr.is_full()); + let zero = ContentRange::full(0); + assert_eq!(zero.len(), 0); + assert!(zero.is_full()); } #[test] - fn byte_range_to_header_value() { + fn header_value_roundtrips() { assert_eq!( ByteRange::Inclusive(0, 499).to_header_value(), "bytes=0-499" ); assert_eq!(ByteRange::From(500).to_header_value(), "bytes=500-"); assert_eq!(ByteRange::Last(100).to_header_value(), "bytes=-100"); - } - #[test] - fn content_range_parse() { - assert_eq!( - ContentRange::parse("bytes 0-499/1234"), - Some(ContentRange { - start: 0, - end: 499, - total: 1234 - }) - ); + let cr = ContentRange { + start: 0, + end: 499, + total: 1234, + }; + assert_eq!(cr.to_header_value(), "bytes 0-499/1234"); + assert_eq!(ContentRange::parse("bytes 0-499/1234"), Some(cr)); assert_eq!(ContentRange::parse("bytes */1234"), None); assert_eq!(ContentRange::parse("invalid"), None); } #[test] - fn content_range_parse_unsatisfiable_total() { + fn parse_unsatisfiable_total() { assert_eq!( ContentRange::parse_unsatisfiable_total("bytes */1234"), Some(1234) From d12a3781a20733ad79028a3b33ef4f8fd095976f Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 10:40:54 +0200 Subject: [PATCH 09/45] fix: Update new tiered.rs tests for range request API changes Tests added to main after this branch diverged still used the old get_object signature (1 arg, 2-tuple return). Update them to pass None for the range parameter and destructure the 3-tuple. --- objectstore-service/src/backend/tiered.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/objectstore-service/src/backend/tiered.rs b/objectstore-service/src/backend/tiered.rs index 3ee1ee1a..d679409d 100644 --- a/objectstore-service/src/backend/tiered.rs +++ b/objectstore-service/src/backend/tiered.rs @@ -1597,7 +1597,7 @@ mod tests { ); // get_object should follow the tombstone and return the payload. - let (got_meta, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (got_meta, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload); assert_eq!(got_meta.content_type, "application/octet-stream"); @@ -1682,7 +1682,7 @@ mod tests { .unwrap(); assert!(error.is_none()); - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); let mut expected = Vec::new(); @@ -1722,7 +1722,7 @@ mod tests { hv.get(&id).expect_not_found(); // The object should not be reachable. - assert!(storage.get_object(&id).await.unwrap().is_none()); + assert!(storage.get_object(&id, None).await.unwrap().is_none()); } #[tokio::test] @@ -1833,7 +1833,7 @@ mod tests { lt.get(&new_lt_id).expect_object(); // Assert the contents of the new revision. - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload2); } @@ -2063,7 +2063,7 @@ mod tests { storage.join().await; // The object is there. - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload); @@ -2086,7 +2086,7 @@ mod tests { assert!(remaining.is_empty()); // The object is still there after recovery. - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload); } @@ -2200,7 +2200,7 @@ mod tests { storage.join().await; // The object is there. - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload); @@ -2223,7 +2223,7 @@ mod tests { assert!(remaining.is_empty()); // The object is there after recovery. - let (_, s) = storage.get_object(&id).await.unwrap().unwrap(); + let (_, _, s) = storage.get_object(&id, None).await.unwrap().unwrap(); let body = stream::read_to_vec(s).await.unwrap(); assert_eq!(body, payload); } From d03594c5bf725cec30887e7c7c3e29a0891bcdaf Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 15:16:15 +0200 Subject: [PATCH 10/45] improve --- objectstore-server/src/endpoints/objects.rs | 2 +- objectstore-types/src/range.rs | 160 ++++++++------------ 2 files changed, 65 insertions(+), 97 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 27d7e159..7307baf3 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -74,7 +74,7 @@ async fn object_get( .map_err(|_| ApiError::Client("invalid Range header".into()))?; match ByteRange::try_from(header_str) { Ok(range) => Some(range), - Err(RangeError::UnknownUnit | RangeError::MultiRangeNotSupported) => None, + Err(RangeError::InvalidUnit(_) | RangeError::MultiRange) => None, Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), } } diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index 19f40078..04122ee3 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -3,41 +3,73 @@ use std::fmt; use http::header::HeaderValue; +use thiserror::Error; -/// A byte range. +/// Specifier for a single byte range. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ByteRange { /// Bounded range with start and end, inclusive - Inclusive(u64, u64), + Bounded(u64, u64), /// From offset X onwards From(u64), /// Last X bytes Last(u64), } +/// Parses a `Range` request header value into a [`ByteRange`]. +/// +/// Only `bytes=` ranges with a single specifier are accepted. +/// Multiple ranges and non-`bytes` units are rejected. +impl TryFrom<&str> for ByteRange { + type Error = RangeError; + + fn try_from(value: &str) -> Result { + let lower = value.to_ascii_lowercase(); + let Some(spec) = lower.strip_prefix("bytes=") else { + let unit = lower.split_once('=').map_or(&*lower, |(u, _)| u); + return Err(RangeError::InvalidUnit(unit.to_owned())); + }; + if spec.contains(',') { + return Err(RangeError::MultiRange); + } + + let (start, end) = spec.split_once('-').ok_or(RangeError::Invalid)?; + if end.is_empty() { + let start: u64 = start.parse().map_err(|_| RangeError::Invalid)?; + Ok(ByteRange::From(start)) + } else if start.is_empty() { + let last: u64 = end.parse().map_err(|_| RangeError::Invalid)?; + Ok(ByteRange::Last(last)) + } else { + let start: u64 = start.parse().map_err(|_| RangeError::Invalid)?; + let end: u64 = end.parse().map_err(|_| RangeError::Invalid)?; + if start > end { + return Err(RangeError::Invalid); + } + Ok(ByteRange::Bounded(start, end)) + } + } +} + impl ByteRange { /// Formats this range as a `Range` request header value (e.g. `bytes=0-499`). - /// - /// The returned value is always valid ASCII and can be inserted directly - /// into an HTTP header map. pub fn to_header_value(&self) -> HeaderValue { let s = match self { - ByteRange::Inclusive(s, e) => format!("bytes={s}-{e}"), - ByteRange::From(s) => format!("bytes={s}-"), + ByteRange::Bounded(a, b) => format!("bytes={a}-{b}"), + ByteRange::From(n) => format!("bytes={n}-"), ByteRange::Last(n) => format!("bytes=-{n}"), }; - HeaderValue::from_str(&s).expect("ByteRange always produces a valid header value") + HeaderValue::from_str(&s).expect("always a valid header value") } - /// Resolves this range against a known total size, returning the concrete - /// byte offsets and total, or [`None`] if the range is unsatisfiable. + /// Resolves this range against a known total size. pub fn resolve(self, total: u64) -> Option { if total == 0 { return None; } let (start, end) = match self { - ByteRange::Inclusive(s, e) => { + ByteRange::Bounded(s, e) => { if s >= total { return None; } @@ -59,52 +91,6 @@ impl ByteRange { } } -/// Parses a `Range` request header string into a [`ByteRange`]. -/// -/// Only `bytes=` ranges with a single specifier are accepted. Multi-range -/// requests (containing commas) and non-`bytes` units are rejected. -/// -/// Converting a [`http::header::HeaderValue`] to `&str` via -/// [`HeaderValue::to_str`] is the caller's responsibility, since that -/// conversion can fail when the value contains non-visible-ASCII bytes. -impl TryFrom<&str> for ByteRange { - type Error = RangeError; - - fn try_from(header: &str) -> Result { - let lower = header.to_ascii_lowercase(); - let spec = lower - .strip_prefix("bytes=") - .ok_or(RangeError::UnknownUnit)?; - - if spec.contains(',') { - return Err(RangeError::MultiRangeNotSupported); - } - - let (start_str, end_str) = spec.split_once('-').ok_or(RangeError::InvalidRange)?; - - if start_str.is_empty() { - // bytes=-N (suffix / last N bytes) - let n: u64 = end_str.parse().map_err(|_| RangeError::InvalidRange)?; - if n == 0 { - return Err(RangeError::InvalidRange); - } - Ok(ByteRange::Last(n)) - } else if end_str.is_empty() { - // bytes=N- (from offset to end) - let start: u64 = start_str.parse().map_err(|_| RangeError::InvalidRange)?; - Ok(ByteRange::From(start)) - } else { - // bytes=N-M (inclusive range) - let start: u64 = start_str.parse().map_err(|_| RangeError::InvalidRange)?; - let end: u64 = end_str.parse().map_err(|_| RangeError::InvalidRange)?; - if start > end { - return Err(RangeError::InvalidRange); - } - Ok(ByteRange::Inclusive(start, end)) - } - } -} - /// Describes which bytes of the full object are present in the response body. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ContentRange { @@ -178,31 +164,19 @@ impl fmt::Display for ContentRange { } /// Errors that can occur when parsing a `Range` header. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Error)] pub enum RangeError { - /// The header contained multiple range specifiers separated by commas. - MultiRangeNotSupported, - /// The header value could not be parsed as a valid byte range. - InvalidRange, - /// The range unit is not `bytes` (e.g. `items=0-10`). Per RFC 9110, unknown - /// units should be ignored and the request served as a normal full-body response. - UnknownUnit, -} - -impl fmt::Display for RangeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - RangeError::MultiRangeNotSupported => { - write!(f, "multi-range requests are not supported") - } - RangeError::InvalidRange => write!(f, "invalid Range header"), - RangeError::UnknownUnit => write!(f, "unknown range unit"), - } - } + /// The value could not be parsed as a valid byte range. + #[error("invalid byte range")] + Invalid, + /// The value contained multiple range specifiers separated by commas. + #[error("expected single byte range, found multipart range")] + MultiRange, + /// The range unit is invalid + #[error("invalid range unit: {0}, expected: bytes")] + InvalidUnit(String), } -impl std::error::Error for RangeError {} - #[cfg(test)] mod tests { use super::*; @@ -211,14 +185,14 @@ mod tests { fn parse_valid_ranges() { assert_eq!( ByteRange::try_from("bytes=0-499"), - Ok(ByteRange::Inclusive(0, 499)) + Ok(ByteRange::Bounded(0, 499)) ); assert_eq!(ByteRange::try_from("bytes=500-"), Ok(ByteRange::From(500))); assert_eq!(ByteRange::try_from("bytes=-100"), Ok(ByteRange::Last(100))); // Case insensitive assert_eq!( ByteRange::try_from("Bytes=0-499"), - Ok(ByteRange::Inclusive(0, 499)) + Ok(ByteRange::Bounded(0, 499)) ); assert_eq!(ByteRange::try_from("BYTES=100-"), Ok(ByteRange::From(100))); } @@ -227,27 +201,24 @@ mod tests { fn parse_invalid_ranges() { assert_eq!( ByteRange::try_from("bytes=0-10, 20-30"), - Err(RangeError::MultiRangeNotSupported) + Err(RangeError::MultiRange) ); assert_eq!( ByteRange::try_from("items=0-10"), - Err(RangeError::UnknownUnit) + Err(RangeError::InvalidUnit("items".into())) ); assert_eq!( ByteRange::try_from("bytes=500-100"), - Err(RangeError::InvalidRange) - ); - assert_eq!( - ByteRange::try_from("bytes=-0"), - Err(RangeError::InvalidRange) + Err(RangeError::Invalid) ); + assert_eq!(ByteRange::try_from("bytes=-0"), Err(RangeError::Invalid)); } #[test] fn resolve_satisfiable() { let cr = |start, end, total| Some(ContentRange { start, end, total }); - assert_eq!(ByteRange::Inclusive(0, 499).resolve(1000), cr(0, 499, 1000)); - assert_eq!(ByteRange::Inclusive(0, 9999).resolve(500), cr(0, 499, 500)); + assert_eq!(ByteRange::Bounded(0, 499).resolve(1000), cr(0, 499, 1000)); + assert_eq!(ByteRange::Bounded(0, 9999).resolve(500), cr(0, 499, 500)); assert_eq!(ByteRange::From(500).resolve(1000), cr(500, 999, 1000)); assert_eq!(ByteRange::Last(100).resolve(1000), cr(900, 999, 1000)); assert_eq!(ByteRange::Last(2000).resolve(1000), cr(0, 999, 1000)); @@ -255,9 +226,9 @@ mod tests { #[test] fn resolve_unsatisfiable() { - assert_eq!(ByteRange::Inclusive(1000, 2000).resolve(500), None); + assert_eq!(ByteRange::Bounded(1000, 2000).resolve(500), None); assert_eq!(ByteRange::From(500).resolve(500), None); - assert_eq!(ByteRange::Inclusive(0, 0).resolve(0), None); + assert_eq!(ByteRange::Bounded(0, 0).resolve(0), None); } #[test] @@ -289,10 +260,7 @@ mod tests { #[test] fn header_value_roundtrips() { - assert_eq!( - ByteRange::Inclusive(0, 499).to_header_value(), - "bytes=0-499" - ); + assert_eq!(ByteRange::Bounded(0, 499).to_header_value(), "bytes=0-499"); assert_eq!(ByteRange::From(500).to_header_value(), "bytes=500-"); assert_eq!(ByteRange::Last(100).to_header_value(), "bytes=-100"); From a1c11fd575f825287860aa33c0df308876b74cee Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 15:33:27 +0200 Subject: [PATCH 11/45] improve --- objectstore-service/src/service.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/objectstore-service/src/service.rs b/objectstore-service/src/service.rs index 48f3eb8f..1aac35c2 100644 --- a/objectstore-service/src/service.rs +++ b/objectstore-service/src/service.rs @@ -210,11 +210,7 @@ impl StorageService { .await } - /// Streams the contents of an object. - /// - /// When `range` is `Some`, backends that support partial reads return only - /// the requested byte range. The returned [`ContentRange`] always describes - /// which bytes are in the stream, whether partial or full. + /// Streams (part of) the contents of an object. pub async fn get_object(&self, id: ObjectId, range: Option) -> Result { let inner = Arc::clone(&self.inner); self.spawn("get", async move { inner.get_object(&id, range).await }) From 2762e5bbb492297d412530b2d43757e5270ba53e Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 15:34:47 +0200 Subject: [PATCH 12/45] fix(range): Reject bytes=-0 as invalid and use Content-Range total for S3 partial size bytes=-0 (last 0 bytes) is semantically meaningless and should be rejected with RangeError::Invalid rather than parsed as Last(0). For S3 206 responses, Content-Length reflects the partial body length, not the full object size. Read metadata.size from the Content-Range total field instead so the metadata accurately represents the full object. Co-Authored-By: Claude Sonnet 4.6 --- objectstore-service/src/backend/s3_compatible.rs | 12 +++++++++++- objectstore-types/src/range.rs | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 9c9d99e5..54047677 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -201,7 +201,17 @@ where let headers = response.headers(); let mut metadata = Metadata::from_headers(headers, GCS_CUSTOM_PREFIX)?; - metadata.size = response.content_length().map(|len| len as usize); + // For partial responses, Content-Length is the range body length, not the full object size. + // Use the Content-Range total instead. + metadata.size = if response.status() == StatusCode::PARTIAL_CONTENT { + headers + .get(reqwest::header::CONTENT_RANGE) + .and_then(|v| v.to_str().ok()) + .and_then(ContentRange::parse) + .map(|cr| cr.total as usize) + } else { + response.content_length().map(|len| len as usize) + }; // TODO: Schedule into background persistently so this doesn't get lost on restarts if let ExpirationPolicy::TimeToIdle(tti) = metadata.expiration_policy { diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index 04122ee3..7ca578a2 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -39,6 +39,9 @@ impl TryFrom<&str> for ByteRange { Ok(ByteRange::From(start)) } else if start.is_empty() { let last: u64 = end.parse().map_err(|_| RangeError::Invalid)?; + if last == 0 { + return Err(RangeError::Invalid); + } Ok(ByteRange::Last(last)) } else { let start: u64 = start.parse().map_err(|_| RangeError::Invalid)?; From 1f6caf07ef678d12835996c9f5868314293c623b Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 15:57:26 +0200 Subject: [PATCH 13/45] improve --- objectstore-service/src/backend/common.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/objectstore-service/src/backend/common.rs b/objectstore-service/src/backend/common.rs index 185c87bb..17af1cc2 100644 --- a/objectstore-service/src/backend/common.rs +++ b/objectstore-service/src/backend/common.rs @@ -44,12 +44,8 @@ pub trait Backend: fmt::Debug + Send + Sync + 'static { stream: ClientStream, ) -> Result; - /// Retrieves an object at the given path, returning its metadata, the - /// content range describing which bytes are in the stream, and the payload. - /// - /// When `range` is `Some`, backends that support partial reads should - /// return only the requested byte range. Backends that do not support - /// partial reads may ignore the range and return the full object. + /// Retrieves (part of) an object at the given path, returning its metadata, a description of + /// the part being returned, and the payload. async fn get_object(&self, id: &ObjectId, range: Option) -> Result; /// Retrieves only the metadata for an object, without the payload. @@ -156,12 +152,10 @@ pub trait HighVolumeBackend: Backend { payload: Bytes, ) -> Result>; - /// Retrieves an object with explicit tombstone awareness. + /// Retrieves (part of) an object with explicit tombstone awareness. /// /// Returns [`TieredGet::Tombstone`] instead of synthesizing a tombstone /// object, making the caller's routing logic a compile-time distinction. - /// - /// See [`Backend::get_object`] for the semantics of `range`. async fn get_tiered_object(&self, id: &ObjectId, range: Option) -> Result; From 02743ad82b9bac39b186c1475ca0d89d0c3c9e09 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 16:08:21 +0200 Subject: [PATCH 14/45] improve --- objectstore-service/src/backend/tiered.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/objectstore-service/src/backend/tiered.rs b/objectstore-service/src/backend/tiered.rs index 02e71d9f..c596ae19 100644 --- a/objectstore-service/src/backend/tiered.rs +++ b/objectstore-service/src/backend/tiered.rs @@ -443,17 +443,13 @@ impl Backend for TieredStorage { backend_type = backend_type, ); - if let Some((ref metadata, _, _)) = result { - if let Some(size) = metadata.size { - objectstore_metrics::record!( - "get.size" = size, - usecase = id.usecase().to_owned(), - backend_choice = backend_choice.as_str(), - backend_type = backend_type, - ); - } else { - objectstore_log::warn!(backend_type, "Missing object size"); - } + if let Some((_, ref content_range, _)) = result { + objectstore_metrics::record!( + "get.size" = content_range.len(), + usecase = id.usecase().to_owned(), + backend_choice = backend_choice.as_str(), + backend_type = backend_type, + ); } Ok(result) From 7d629c8efc276ad6db88cf718dc7fea726b5ba28 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Fri, 29 May 2026 16:36:58 +0200 Subject: [PATCH 15/45] improve --- objectstore-server/src/endpoints/objects.rs | 15 ++++++++------- objectstore-types/src/range.rs | 8 ++++++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 7307baf3..7c42c689 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -100,26 +100,27 @@ async fn object_get( let stream = state.meter_stream(stream, &context); let metadata_headers = metadata.to_headers("").map_err(ServiceError::from)?; - let is_partial = !content_range.is_full(); + let is_partial = !content_range.is_full(); let status = if is_partial { StatusCode::PARTIAL_CONTENT } else { StatusCode::OK }; - let mut response = (status, metadata_headers, Body::from_stream(stream)).into_response(); - let resp_headers = response.headers_mut(); - resp_headers.insert( + + let headers = response.headers_mut(); + headers.insert( http::header::ACCEPT_RANGES, http::header::HeaderValue::from_static("bytes"), ); if is_partial { - resp_headers.insert( + // When `is_partial == false`, `CONTENT_LENGTH is already part of `metadata_headers`. + headers.insert( http::header::CONTENT_LENGTH, - content_range.len().to_string().parse().unwrap(), + content_range.len_to_header_value(), ); - resp_headers.insert(http::header::CONTENT_RANGE, content_range.to_header_value()); + headers.insert(http::header::CONTENT_RANGE, content_range.to_header_value()); } Ok(response) diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index 7ca578a2..6c16f470 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -155,8 +155,12 @@ impl ContentRange { /// The returned value is always valid ASCII and can be inserted directly /// into an HTTP header map. pub fn to_header_value(&self) -> HeaderValue { - HeaderValue::from_str(&self.to_string()) - .expect("ContentRange always produces a valid header value") + HeaderValue::from_str(&self.to_string()).expect("always a valid header value") + } + + /// Formats the length of this range for a `Content-Length` response header. + pub fn len_to_header_value(&self) -> HeaderValue { + HeaderValue::from_str(&self.len().to_string()).expect("always a valid header value") } } From 3fb69754618019a4bbd00fc1a2a8edc2a1b61ff1 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:10:13 +0200 Subject: [PATCH 16/45] improve range types --- objectstore-server/src/endpoints/objects.rs | 2 +- objectstore-service/src/backend/gcs.rs | 2 +- .../src/backend/s3_compatible.rs | 4 +- objectstore-types/src/range.rs | 210 ++++++++++-------- 4 files changed, 116 insertions(+), 102 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 7c42c689..b3bb6063 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -72,7 +72,7 @@ async fn object_get( let header_str = value .to_str() .map_err(|_| ApiError::Client("invalid Range header".into()))?; - match ByteRange::try_from(header_str) { + match header_str.parse::() { Ok(range) => Some(range), Err(RangeError::InvalidUnit(_) | RangeError::MultiRange) => None, Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 50188ad8..73d97398 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -702,7 +702,7 @@ impl Backend for GcsBackend { .headers() .get(header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) - .and_then(ContentRange::parse) + .and_then(|s| s.parse::().ok()) .ok_or_else(|| Error::Generic { context: "GCS: 206 response missing valid Content-Range header".to_owned(), cause: None, diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 54047677..b999b9e2 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -207,7 +207,7 @@ where headers .get(reqwest::header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) - .and_then(ContentRange::parse) + .and_then(|s| s.parse::().ok()) .map(|cr| cr.total as usize) } else { response.content_length().map(|len| len as usize) @@ -327,7 +327,7 @@ impl Backend for S3CompatibleBackend { .headers() .get(reqwest::header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) - .and_then(ContentRange::parse) + .and_then(|s| s.parse::().ok()) .ok_or_else(|| Error::Generic { context: "S3: 206 response missing valid Content-Range header".to_owned(), cause: None, diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index 6c16f470..7e148253 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -1,6 +1,7 @@ //! Types for HTTP range requests. use std::fmt; +use std::str::FromStr; use http::header::HeaderValue; use thiserror::Error; @@ -16,14 +17,69 @@ pub enum ByteRange { Last(u64), } +impl ByteRange { + /// Formats this range for a `Range` request header. + pub fn to_header_value(&self) -> HeaderValue { + let s = match self { + ByteRange::Bounded(a, b) => format!("bytes={a}-{b}"), + ByteRange::From(n) => format!("bytes={n}-"), + ByteRange::Last(n) => format!("bytes=-{n}"), + }; + HeaderValue::from_str(&s).expect("always a valid header value") + } + + /// Resolves this range against a known total size, returning `None` if + /// unsatisfiable (the object is empty, or the start offset is past the end). + pub fn resolve(self, total: u64) -> Option { + if total == 0 { + return None; + } + + let (start, end) = match self { + ByteRange::Bounded(start, end) => { + if start >= total { + return None; + } + (start, end.min(total - 1)) // clamp + } + ByteRange::From(start) => { + if start >= total { + return None; + } + (start, total - 1) + } + ByteRange::Last(negative_start) => { + let start = total.saturating_sub(negative_start); + (start, total - 1) + } + }; + + Some(ContentRange { start, end, total }) + } +} + +/// Errors that can occur when parsing a `Range` header. +#[derive(Debug, Clone, PartialEq, Eq, Error)] +pub enum RangeError { + /// The value could not be parsed as a valid byte range. + #[error("invalid byte range")] + Invalid, + /// The value contained multiple range specifiers separated by commas. + #[error("expected single byte range, found multipart range")] + MultiRange, + /// The range unit is invalid + #[error("invalid range unit: {0}, expected: bytes")] + InvalidUnit(String), +} + /// Parses a `Range` request header value into a [`ByteRange`]. /// /// Only `bytes=` ranges with a single specifier are accepted. /// Multiple ranges and non-`bytes` units are rejected. -impl TryFrom<&str> for ByteRange { - type Error = RangeError; +impl FromStr for ByteRange { + type Err = RangeError; - fn try_from(value: &str) -> Result { + fn from_str(value: &str) -> Result { let lower = value.to_ascii_lowercase(); let Some(spec) = lower.strip_prefix("bytes=") else { let unit = lower.split_once('=').map_or(&*lower, |(u, _)| u); @@ -54,46 +110,6 @@ impl TryFrom<&str> for ByteRange { } } -impl ByteRange { - /// Formats this range as a `Range` request header value (e.g. `bytes=0-499`). - pub fn to_header_value(&self) -> HeaderValue { - let s = match self { - ByteRange::Bounded(a, b) => format!("bytes={a}-{b}"), - ByteRange::From(n) => format!("bytes={n}-"), - ByteRange::Last(n) => format!("bytes=-{n}"), - }; - HeaderValue::from_str(&s).expect("always a valid header value") - } - - /// Resolves this range against a known total size. - pub fn resolve(self, total: u64) -> Option { - if total == 0 { - return None; - } - - let (start, end) = match self { - ByteRange::Bounded(s, e) => { - if s >= total { - return None; - } - (s, e.min(total - 1)) - } - ByteRange::From(s) => { - if s >= total { - return None; - } - (s, total - 1) - } - ByteRange::Last(n) => { - let start = total.saturating_sub(n); - (start, total - 1) - } - }; - - Some(ContentRange { start, end, total }) - } -} - /// Describes which bytes of the full object are present in the response body. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ContentRange { @@ -105,8 +121,26 @@ pub struct ContentRange { pub total: u64, } +/// Parses a `Content-Range` response header value into a [`ContentRange`]. +impl FromStr for ContentRange { + type Err = RangeError; + + fn from_str(s: &str) -> Result { + let parse = || { + let rest = s.strip_prefix("bytes ")?; + let (range_part, total_str) = rest.split_once('/')?; + let total: u64 = total_str.parse().ok()?; + let (start_str, end_str) = range_part.split_once('-')?; + let start: u64 = start_str.parse().ok()?; + let end: u64 = end_str.parse().ok()?; + Some(Self { start, end, total }) + }; + parse().ok_or(RangeError::Invalid) + } +} + impl ContentRange { - /// Creates a [`ContentRange`] representing the entire object. + /// Creates a [`ContentRange`] representing an entire object. pub fn full(total: u64) -> Self { Self { start: 0, @@ -115,18 +149,12 @@ impl ContentRange { } } - /// Parses a `Content-Range` response header value (e.g. `bytes 0-499/1234`). - pub fn parse(header: &str) -> Option { - let rest = header.strip_prefix("bytes ")?; - let (range_part, total_str) = rest.split_once('/')?; - let total: u64 = total_str.parse().ok()?; - let (start_str, end_str) = range_part.split_once('-')?; - let start: u64 = start_str.parse().ok()?; - let end: u64 = end_str.parse().ok()?; - Some(Self { start, end, total }) - } - - /// Parses the total from an unsatisfiable `Content-Range` header (`bytes */1234`). + /// Parses the total from an unsatisfiable `Content-Range` response header value. + /// + /// An unsatisfiable `Content-Range` header value is of the form `bytes */1234`, where `1234` + /// represents the total size of the object. + /// This is communicated back to the client, so that it can make requests that make sense for + /// that total. pub fn parse_unsatisfiable_total(header: &str) -> Option { let rest = header.strip_prefix("bytes */")?; rest.parse().ok() @@ -147,10 +175,10 @@ impl ContentRange { /// Returns `true` if this range covers the entire object. pub fn is_full(&self) -> bool { - self.total == 0 || (self.start == 0 && self.len() == self.total) + self.start == 0 && self.len() == self.total } - /// Formats the value for a `Content-Range` response header. + /// Formats this range for a `Content-Range` response header. /// /// The returned value is always valid ASCII and can be inserted directly /// into an HTTP header map. @@ -170,20 +198,6 @@ impl fmt::Display for ContentRange { } } -/// Errors that can occur when parsing a `Range` header. -#[derive(Debug, Clone, PartialEq, Eq, Error)] -pub enum RangeError { - /// The value could not be parsed as a valid byte range. - #[error("invalid byte range")] - Invalid, - /// The value contained multiple range specifiers separated by commas. - #[error("expected single byte range, found multipart range")] - MultiRange, - /// The range unit is invalid - #[error("invalid range unit: {0}, expected: bytes")] - InvalidUnit(String), -} - #[cfg(test)] mod tests { use super::*; @@ -191,34 +205,34 @@ mod tests { #[test] fn parse_valid_ranges() { assert_eq!( - ByteRange::try_from("bytes=0-499"), + "bytes=0-499".parse::(), Ok(ByteRange::Bounded(0, 499)) ); - assert_eq!(ByteRange::try_from("bytes=500-"), Ok(ByteRange::From(500))); - assert_eq!(ByteRange::try_from("bytes=-100"), Ok(ByteRange::Last(100))); + assert_eq!("bytes=500-".parse::(), Ok(ByteRange::From(500))); + assert_eq!("bytes=-100".parse::(), Ok(ByteRange::Last(100))); // Case insensitive assert_eq!( - ByteRange::try_from("Bytes=0-499"), + "Bytes=0-499".parse::(), Ok(ByteRange::Bounded(0, 499)) ); - assert_eq!(ByteRange::try_from("BYTES=100-"), Ok(ByteRange::From(100))); + assert_eq!("BYTES=100-".parse::(), Ok(ByteRange::From(100))); } #[test] fn parse_invalid_ranges() { assert_eq!( - ByteRange::try_from("bytes=0-10, 20-30"), + "bytes=0-10, 20-30".parse::(), Err(RangeError::MultiRange) ); assert_eq!( - ByteRange::try_from("items=0-10"), + "items=0-10".parse::(), Err(RangeError::InvalidUnit("items".into())) ); assert_eq!( - ByteRange::try_from("bytes=500-100"), + "bytes=500-100".parse::(), Err(RangeError::Invalid) ); - assert_eq!(ByteRange::try_from("bytes=-0"), Err(RangeError::Invalid)); + assert_eq!("bytes=-0".parse::(), Err(RangeError::Invalid)); } #[test] @@ -239,7 +253,7 @@ mod tests { } #[test] - fn content_range_properties() { + fn content_range_methods() { let full = ContentRange::full(1000); assert_eq!( full, @@ -265,6 +279,19 @@ mod tests { assert!(zero.is_full()); } + #[test] + fn parse_unsatisfiable_total() { + assert_eq!( + ContentRange::parse_unsatisfiable_total("bytes */1234"), + Some(1234) + ); + assert_eq!( + ContentRange::parse_unsatisfiable_total("bytes 0-499/1234"), + None + ); + assert_eq!(ContentRange::parse_unsatisfiable_total("invalid"), None); + } + #[test] fn header_value_roundtrips() { assert_eq!(ByteRange::Bounded(0, 499).to_header_value(), "bytes=0-499"); @@ -277,21 +304,8 @@ mod tests { total: 1234, }; assert_eq!(cr.to_header_value(), "bytes 0-499/1234"); - assert_eq!(ContentRange::parse("bytes 0-499/1234"), Some(cr)); - assert_eq!(ContentRange::parse("bytes */1234"), None); - assert_eq!(ContentRange::parse("invalid"), None); - } - - #[test] - fn parse_unsatisfiable_total() { - assert_eq!( - ContentRange::parse_unsatisfiable_total("bytes */1234"), - Some(1234) - ); - assert_eq!( - ContentRange::parse_unsatisfiable_total("bytes 0-499/1234"), - None - ); - assert_eq!(ContentRange::parse_unsatisfiable_total("invalid"), None); + assert_eq!("bytes 0-499/1234".parse::(), Ok(cr)); + assert!("bytes */1234".parse::().is_err()); + assert!("invalid".parse::().is_err()); } } From 4025c2a296125b1af15454830385e49a22cf573b Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:17:46 +0200 Subject: [PATCH 17/45] improve range types a bit more --- objectstore-server/src/endpoints/objects.rs | 4 ++- objectstore-types/src/range.rs | 32 ++++++++++++++------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index b3bb6063..67b58d3d 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -74,6 +74,8 @@ async fn object_get( .map_err(|_| ApiError::Client("invalid Range header".into()))?; match header_str.parse::() { Ok(range) => Some(range), + // If we don't know the unit or the client wants multiple ranges, fall back to + // returning the whole object. Err(RangeError::InvalidUnit(_) | RangeError::MultiRange) => None, Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), } @@ -85,7 +87,7 @@ async fn object_get( let result = service.get_object(id, byte_range).await; let (metadata, content_range, stream) = match result { - Ok(Some(tuple)) => tuple, + Ok(Some(result)) => result, Ok(None) => return Ok(StatusCode::NOT_FOUND.into_response()), Err(ApiError::Service(ServiceError::RangeNotSatisfiable { total })) => { return Ok(( diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index 7e148253..ad692d0b 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -149,17 +149,6 @@ impl ContentRange { } } - /// Parses the total from an unsatisfiable `Content-Range` response header value. - /// - /// An unsatisfiable `Content-Range` header value is of the form `bytes */1234`, where `1234` - /// represents the total size of the object. - /// This is communicated back to the client, so that it can make requests that make sense for - /// that total. - pub fn parse_unsatisfiable_total(header: &str) -> Option { - let rest = header.strip_prefix("bytes */")?; - rest.parse().ok() - } - /// Returns the number of bytes in this range. pub fn len(&self) -> u64 { if self.total == 0 { @@ -190,6 +179,23 @@ impl ContentRange { pub fn len_to_header_value(&self) -> HeaderValue { HeaderValue::from_str(&self.len().to_string()).expect("always a valid header value") } + + /// Parses the total from an unsatisfiable `Content-Range` response header value. + /// + /// An unsatisfiable `Content-Range` header value is of the form `bytes */1234`, where `1234` + /// represents the total size of the object. + /// This is communicated back to the client, so that it can make requests that make sense for + /// that total. + pub fn parse_unsatisfiable_total(header: &str) -> Option { + let rest = header.strip_prefix("bytes */")?; + rest.parse().ok() + } + + /// Formats `total` as the total size of the object in an unsatisfiable `Content-Range` response header. + pub fn unsatisfiable_total_to_header_value(total: u64) -> HeaderValue { + HeaderValue::from_str(format!("bytes */{total}").as_str()) + .expect("always a valid header value") + } } impl fmt::Display for ContentRange { @@ -307,5 +313,9 @@ mod tests { assert_eq!("bytes 0-499/1234".parse::(), Ok(cr)); assert!("bytes */1234".parse::().is_err()); assert!("invalid".parse::().is_err()); + assert_eq!( + ContentRange::unsatisfiable_total_to_header_value(1234), + "bytes */1234" + ); } } From 676cf89b3701f34dc25c801ca19a0075aa10b55a Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:32:36 +0200 Subject: [PATCH 18/45] improve endpoint --- objectstore-server/src/endpoints/objects.rs | 47 ++++++++++++++------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 67b58d3d..d199641b 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -6,10 +6,11 @@ use axum::http::{HeaderMap, StatusCode}; use axum::response::{IntoResponse, Response}; use axum::routing; use axum::{Json, Router}; +use http::HeaderValue; use objectstore_service::error::Error as ServiceError; use objectstore_service::id::{ObjectContext, ObjectId}; use objectstore_types::metadata::Metadata; -use objectstore_types::range::{ByteRange, RangeError}; +use objectstore_types::range::{ByteRange, ContentRange, RangeError}; use serde::Serialize; use crate::auth::AuthAwareService; @@ -17,6 +18,13 @@ use crate::endpoints::common::{ApiError, ApiResult}; use crate::extractors::{Xt, body::MeteredBody}; use crate::state::ServiceState; +fn insert_accept_ranges(response: &mut Response) { + response.headers_mut().insert( + http::header::ACCEPT_RANGES, + HeaderValue::from_static("bytes"), + ); +} + pub fn router() -> Router { let collection_routes = routing::post(objects_post); let object_routes = routing::get(object_get) @@ -74,9 +82,15 @@ async fn object_get( .map_err(|_| ApiError::Client("invalid Range header".into()))?; match header_str.parse::() { Ok(range) => Some(range), - // If we don't know the unit or the client wants multiple ranges, fall back to - // returning the whole object. - Err(RangeError::InvalidUnit(_) | RangeError::MultiRange) => None, + // If the client wants multiple ranges, fall back to returning the whole object. + Err(RangeError::MultiRange) => { + objectstore_log::warn!( + "received range request with multiple range specifiers, ignoring" + ); + None + } + // The client requested an invalid unit or sent a malformed header. + // We could fall back, but better fail hard and let them know. Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), } } @@ -90,12 +104,16 @@ async fn object_get( Ok(Some(result)) => result, Ok(None) => return Ok(StatusCode::NOT_FOUND.into_response()), Err(ApiError::Service(ServiceError::RangeNotSatisfiable { total })) => { - return Ok(( + let mut response = ( StatusCode::RANGE_NOT_SATISFIABLE, - [(http::header::CONTENT_RANGE, format!("bytes */{total}"))], - [(http::header::ACCEPT_RANGES, "bytes")], + [( + http::header::CONTENT_RANGE, + ContentRange::unsatisfiable_total_to_header_value(total), + )], ) - .into_response()); + .into_response(); + insert_accept_ranges(&mut response); + return Ok(response); } Err(e) => return Err(e), }; @@ -111,13 +129,10 @@ async fn object_get( }; let mut response = (status, metadata_headers, Body::from_stream(stream)).into_response(); - let headers = response.headers_mut(); - headers.insert( - http::header::ACCEPT_RANGES, - http::header::HeaderValue::from_static("bytes"), - ); + insert_accept_ranges(&mut response); if is_partial { - // When `is_partial == false`, `CONTENT_LENGTH is already part of `metadata_headers`. + // When `is_partial == false`, `CONTENT_LENGTH` is already part of `metadata_headers`. + let headers = response.headers_mut(); headers.insert( http::header::CONTENT_LENGTH, content_range.len_to_header_value(), @@ -135,7 +150,9 @@ async fn object_head(service: AuthAwareService, Xt(id): Xt) -> ApiResu let headers = metadata.to_headers("").map_err(ServiceError::from)?; - Ok((StatusCode::NO_CONTENT, headers).into_response()) + let mut response = (StatusCode::NO_CONTENT, headers).into_response(); + insert_accept_ranges(&mut response); + Ok(response) } async fn object_put( From 7f653122289f1b98b751d78bfc7ffde533aa9a17 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:34:08 +0200 Subject: [PATCH 19/45] improve endpoint --- objectstore-server/src/endpoints/common.rs | 8 ++++++++ objectstore-server/src/endpoints/objects.rs | 10 +--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/objectstore-server/src/endpoints/common.rs b/objectstore-server/src/endpoints/common.rs index ae39b89e..e611dd6c 100644 --- a/objectstore-server/src/endpoints/common.rs +++ b/objectstore-server/src/endpoints/common.rs @@ -5,6 +5,7 @@ use std::error::Error; use axum::Json; use axum::http::StatusCode; use axum::response::{IntoResponse, Response}; +use http::HeaderValue; use objectstore_service::error::Error as ServiceError; use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -118,3 +119,10 @@ impl IntoResponse for ApiError { (self.status(), Json(body)).into_response() } } + +pub fn insert_accept_ranges(response: &mut Response) { + response.headers_mut().insert( + http::header::ACCEPT_RANGES, + HeaderValue::from_static("bytes"), + ); +} diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index d199641b..b97b6838 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -6,7 +6,6 @@ use axum::http::{HeaderMap, StatusCode}; use axum::response::{IntoResponse, Response}; use axum::routing; use axum::{Json, Router}; -use http::HeaderValue; use objectstore_service::error::Error as ServiceError; use objectstore_service::id::{ObjectContext, ObjectId}; use objectstore_types::metadata::Metadata; @@ -14,17 +13,10 @@ use objectstore_types::range::{ByteRange, ContentRange, RangeError}; use serde::Serialize; use crate::auth::AuthAwareService; -use crate::endpoints::common::{ApiError, ApiResult}; +use crate::endpoints::common::{ApiError, ApiResult, insert_accept_ranges}; use crate::extractors::{Xt, body::MeteredBody}; use crate::state::ServiceState; -fn insert_accept_ranges(response: &mut Response) { - response.headers_mut().insert( - http::header::ACCEPT_RANGES, - HeaderValue::from_static("bytes"), - ); -} - pub fn router() -> Router { let collection_routes = routing::post(objects_post); let object_routes = routing::get(object_get) From b47fb8cd969c75fc46b366a48cdf1d76050cef9c Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:41:05 +0200 Subject: [PATCH 20/45] implement in inmemory backend too --- objectstore-server/src/endpoints/common.rs | 1 + objectstore-service/src/backend/in_memory.rs | 39 ++++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/objectstore-server/src/endpoints/common.rs b/objectstore-server/src/endpoints/common.rs index e611dd6c..06b6014e 100644 --- a/objectstore-server/src/endpoints/common.rs +++ b/objectstore-server/src/endpoints/common.rs @@ -120,6 +120,7 @@ impl IntoResponse for ApiError { } } +/// Inserts `Accept-Ranges: bytes` into the response headers. pub fn insert_accept_ranges(response: &mut Response) { response.headers_mut().insert( http::header::ACCEPT_RANGES, diff --git a/objectstore-service/src/backend/in_memory.rs b/objectstore-service/src/backend/in_memory.rs index d3a58c0d..69770f5d 100644 --- a/objectstore-service/src/backend/in_memory.rs +++ b/objectstore-service/src/backend/in_memory.rs @@ -124,16 +124,30 @@ impl super::common::Backend for InMemoryBackend { Ok(()) } - async fn get_object(&self, id: &ObjectId, _range: Option) -> Result { + async fn get_object(&self, id: &ObjectId, range: Option) -> Result { let entry = self.store.lock().unwrap().get(id).cloned(); match entry { None => Ok(None), Some(StoreEntry::Tombstone(_)) => Err(Error::UnexpectedTombstone), Some(StoreEntry::Object(mut metadata, bytes)) => { + let total = bytes.len() as u64; metadata.size = Some(bytes.len()); - let content_range = ContentRange::full(bytes.len() as u64); - let stream = crate::stream::single(bytes); - Ok(Some((metadata, content_range, stream))) + let content_range = match range { + Some(r) => r + .resolve(total) + .ok_or(Error::RangeNotSatisfiable { total })?, + None => ContentRange::full(total), + }; + let payload = if content_range.is_full() { + bytes + } else { + bytes.slice(content_range.start as usize..=content_range.end as usize) + }; + Ok(Some(( + metadata, + content_range, + crate::stream::single(payload), + ))) } } } @@ -166,16 +180,27 @@ impl HighVolumeBackend for InMemoryBackend { async fn get_tiered_object( &self, id: &ObjectId, - _range: Option, + range: Option, ) -> Result { let entry = self.store.lock().unwrap().get(id).cloned(); Ok(match entry { None => TieredGet::NotFound, Some(StoreEntry::Tombstone(tombstone)) => TieredGet::Tombstone(tombstone), Some(StoreEntry::Object(mut metadata, bytes)) => { + let total = bytes.len() as u64; metadata.size = Some(bytes.len()); - let content_range = ContentRange::full(bytes.len() as u64); - TieredGet::Object(metadata, content_range, crate::stream::single(bytes)) + let content_range = match range { + Some(r) => r + .resolve(total) + .ok_or(Error::RangeNotSatisfiable { total })?, + None => ContentRange::full(total), + }; + let payload = if content_range.is_full() { + bytes + } else { + bytes.slice(content_range.start as usize..=content_range.end as usize) + }; + TieredGet::Object(metadata, content_range, crate::stream::single(payload)) } }) } From dab4e62b599de632179180220ba37098f69ec20c Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:44:10 +0200 Subject: [PATCH 21/45] improve --- objectstore-service/src/backend/local_fs.rs | 24 ++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/objectstore-service/src/backend/local_fs.rs b/objectstore-service/src/backend/local_fs.rs index 3edfff60..75edef39 100644 --- a/objectstore-service/src/backend/local_fs.rs +++ b/objectstore-service/src/backend/local_fs.rs @@ -10,7 +10,7 @@ use futures_util::StreamExt; use objectstore_types::metadata::Metadata; use objectstore_types::range::{ByteRange, ContentRange}; use tokio::fs::OpenOptions; -use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader, BufWriter}; +use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader, BufWriter}; use tokio_util::io::{ReaderStream, StreamReader}; use crate::backend::common::{ @@ -159,18 +159,18 @@ impl Backend for LocalFsBackend { None => ContentRange::full(payload_size), }; - if content_range.is_full() { + let stream = if content_range.is_full() { let stream = ReaderStream::new(reader); - return Ok(Some((metadata, content_range, stream.boxed()))); - } - - use tokio::io::AsyncSeekExt; - reader - .seek(std::io::SeekFrom::Current(content_range.start as i64)) - .await?; - let limited = reader.take(content_range.len()); - let stream = ReaderStream::new(limited); - Ok(Some((metadata, content_range, stream.boxed()))) + stream.boxed() + } else { + reader + .seek(std::io::SeekFrom::Current(content_range.start as i64)) + .await?; + let limited = reader.take(content_range.len()); + let stream = ReaderStream::new(limited); + stream.boxed() + }; + Ok(Some((metadata, content_range, stream))) } #[tracing::instrument(level = "trace", fields(?id), skip_all)] From 94e432b18cb11874a68d9b94da424e9252bf9219 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:46:26 +0200 Subject: [PATCH 22/45] improve localfs --- objectstore-service/src/backend/local_fs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/objectstore-service/src/backend/local_fs.rs b/objectstore-service/src/backend/local_fs.rs index 75edef39..03efe0f9 100644 --- a/objectstore-service/src/backend/local_fs.rs +++ b/objectstore-service/src/backend/local_fs.rs @@ -149,7 +149,7 @@ impl Backend for LocalFsBackend { let content_range = match range { Some(byte_range) => match byte_range.resolve(payload_size) { - Some(cr) => cr, + Some(content_range) => content_range, None => { return Err(Error::RangeNotSatisfiable { total: payload_size, From 9ce1aea65b9ff2aa6ab7495bdcbabcb4192bfa15 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:51:44 +0200 Subject: [PATCH 23/45] improve gcs --- objectstore-service/src/backend/gcs.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 73d97398..092f3b66 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -683,12 +683,20 @@ impl Backend for GcsBackend { .map_err(|e| Error::reqwest("GCS: get payload", e))?; if resp.status() == StatusCode::RANGE_NOT_SATISFIABLE { - let total = resp + let header = resp .headers() .get(header::CONTENT_RANGE) - .and_then(|v| v.to_str().ok()) + .and_then(|v| v.to_str().ok().map(str::to_owned)); + let total = header + .as_deref() .and_then(ContentRange::parse_unsatisfiable_total) - .unwrap_or(0); + .unwrap_or_else(|| { + objectstore_log::error!( + value = header, + "GCS: got 416 response with invalid Content-Range header", + ); + 0 + }); return Err(Error::RangeNotSatisfiable { total }); } From 6abeae8506022da364a417febcd5ebb684469488 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:57:00 +0200 Subject: [PATCH 24/45] improve gcs --- objectstore-service/src/backend/gcs.rs | 28 ++++++++++++++------------ 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 092f3b66..18494013 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -683,21 +683,23 @@ impl Backend for GcsBackend { .map_err(|e| Error::reqwest("GCS: get payload", e))?; if resp.status() == StatusCode::RANGE_NOT_SATISFIABLE { - let header = resp + let raw = resp .headers() .get(header::CONTENT_RANGE) - .and_then(|v| v.to_str().ok().map(str::to_owned)); - let total = header - .as_deref() - .and_then(ContentRange::parse_unsatisfiable_total) - .unwrap_or_else(|| { - objectstore_log::error!( - value = header, - "GCS: got 416 response with invalid Content-Range header", - ); - 0 - }); - return Err(Error::RangeNotSatisfiable { total }); + .and_then(|v| v.to_str().ok()); + let total = raw.and_then(ContentRange::parse_unsatisfiable_total); + match total { + Some(total) => return Err(Error::RangeNotSatisfiable { total }), + None => { + return Err(Error::Generic { + context: format!( + "GCS: 416 response with invalid Content-Range: {:?}", + raw + ), + cause: None, + }); + } + } } resp.error_for_status() From 0ee726369639e84ccf55ae38017c0a9981e08028 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:15:37 +0200 Subject: [PATCH 25/45] improve --- objectstore-server/src/endpoints/objects.rs | 1 - objectstore-service/src/backend/tiered.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index b97b6838..9e424a95 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -123,7 +123,6 @@ async fn object_get( insert_accept_ranges(&mut response); if is_partial { - // When `is_partial == false`, `CONTENT_LENGTH` is already part of `metadata_headers`. let headers = response.headers_mut(); headers.insert( http::header::CONTENT_LENGTH, diff --git a/objectstore-service/src/backend/tiered.rs b/objectstore-service/src/backend/tiered.rs index c596ae19..dc93650f 100644 --- a/objectstore-service/src/backend/tiered.rs +++ b/objectstore-service/src/backend/tiered.rs @@ -445,7 +445,7 @@ impl Backend for TieredStorage { if let Some((_, ref content_range, _)) = result { objectstore_metrics::record!( - "get.size" = content_range.len(), + "get.size" = content_range.total, usecase = id.usecase().to_owned(), backend_choice = backend_choice.as_str(), backend_type = backend_type, From 367aa2f7480af19de7a129386ad260bc88b7d30d Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:18:54 +0200 Subject: [PATCH 26/45] improve --- objectstore-service/src/backend/in_memory.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/objectstore-service/src/backend/in_memory.rs b/objectstore-service/src/backend/in_memory.rs index 69770f5d..b5314d51 100644 --- a/objectstore-service/src/backend/in_memory.rs +++ b/objectstore-service/src/backend/in_memory.rs @@ -133,7 +133,7 @@ impl super::common::Backend for InMemoryBackend { let total = bytes.len() as u64; metadata.size = Some(bytes.len()); let content_range = match range { - Some(r) => r + Some(range) => range .resolve(total) .ok_or(Error::RangeNotSatisfiable { total })?, None => ContentRange::full(total), @@ -190,7 +190,7 @@ impl HighVolumeBackend for InMemoryBackend { let total = bytes.len() as u64; metadata.size = Some(bytes.len()); let content_range = match range { - Some(r) => r + Some(range) => range .resolve(total) .ok_or(Error::RangeNotSatisfiable { total })?, None => ContentRange::full(total), From fdeb8463bceac74109982482b007aed9bda044fc Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:23:27 +0200 Subject: [PATCH 27/45] s3 like gcs --- .../src/backend/s3_compatible.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index b999b9e2..7d759767 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -183,13 +183,20 @@ where } if response.status() == StatusCode::RANGE_NOT_SATISFIABLE { - let total = response + let raw = response .headers() .get(reqwest::header::CONTENT_RANGE) - .and_then(|v| v.to_str().ok()) - .and_then(ContentRange::parse_unsatisfiable_total) - .unwrap_or(0); - return Err(Error::RangeNotSatisfiable { total }); + .and_then(|v| v.to_str().ok()); + let total = raw.and_then(ContentRange::parse_unsatisfiable_total); + match total { + Some(total) => return Err(Error::RangeNotSatisfiable { total }), + None => { + return Err(Error::Generic { + context: format!("S3: 416 response with invalid Content-Range: {:?}", raw), + cause: None, + }); + } + } } let response = response From cedf351170e10a966334a0aa594a346c2d6e37d0 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:27:44 +0200 Subject: [PATCH 28/45] improve --- objectstore-service/src/backend/tiered.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/objectstore-service/src/backend/tiered.rs b/objectstore-service/src/backend/tiered.rs index dc93650f..c596ae19 100644 --- a/objectstore-service/src/backend/tiered.rs +++ b/objectstore-service/src/backend/tiered.rs @@ -445,7 +445,7 @@ impl Backend for TieredStorage { if let Some((_, ref content_range, _)) = result { objectstore_metrics::record!( - "get.size" = content_range.total, + "get.size" = content_range.len(), usecase = id.usecase().to_owned(), backend_choice = backend_choice.as_str(), backend_type = backend_type, From e707a5c530bfbb92f166edaa77977f4fe0d7e90e Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:31:07 +0200 Subject: [PATCH 29/45] improve --- objectstore-service/src/backend/s3_compatible.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 7d759767..7ba9d9e1 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -208,8 +208,6 @@ where let headers = response.headers(); let mut metadata = Metadata::from_headers(headers, GCS_CUSTOM_PREFIX)?; - // For partial responses, Content-Length is the range body length, not the full object size. - // Use the Content-Range total instead. metadata.size = if response.status() == StatusCode::PARTIAL_CONTENT { headers .get(reqwest::header::CONTENT_RANGE) @@ -219,6 +217,7 @@ where } else { response.content_length().map(|len| len as usize) }; + objectstore_log::warn!("S3: failed to retrieve metadata.size"); // TODO: Schedule into background persistently so this doesn't get lost on restarts if let ExpirationPolicy::TimeToIdle(tti) = metadata.expiration_policy { From 83a1899c25a9b13d9962bb46cf011b917fb9423e Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:41:14 +0200 Subject: [PATCH 30/45] improve s3 --- .../src/backend/s3_compatible.rs | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 7ba9d9e1..9410dfd0 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -165,7 +165,7 @@ where method: Method, id: &ObjectId, range: Option, - ) -> Result> { + ) -> Result> { let object_url = self.object_url(id); let mut builder = self.request(method, &object_url).await?; @@ -208,16 +208,27 @@ where let headers = response.headers(); let mut metadata = Metadata::from_headers(headers, GCS_CUSTOM_PREFIX)?; - metadata.size = if response.status() == StatusCode::PARTIAL_CONTENT { + + let content_range = if response.status() == StatusCode::PARTIAL_CONTENT { headers .get(reqwest::header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) .and_then(|s| s.parse::().ok()) - .map(|cr| cr.total as usize) + .ok_or_else(|| Error::Generic { + context: "S3: 206 response missing valid Content-Range header".to_owned(), + cause: None, + })? } else { - response.content_length().map(|len| len as usize) + let total = match response.content_length() { + Some(len) => len, + None => { + objectstore_log::warn!("S3: 200 response missing Content-Length header"); + 0 + } + }; + ContentRange::full(total) }; - objectstore_log::warn!("S3: failed to retrieve metadata.size"); + metadata.size = Some(content_range.total as usize); // TODO: Schedule into background persistently so this doesn't get lost on restarts if let ExpirationPolicy::TimeToIdle(tti) = metadata.expiration_policy { @@ -235,7 +246,7 @@ where } } - Ok(Some((metadata, response))) + Ok(Some((metadata, content_range, response))) } /// Issues a request to update the metadata for the given object. @@ -324,24 +335,12 @@ impl Backend for S3CompatibleBackend { async fn get_object(&self, id: &ObjectId, range: Option) -> Result { objectstore_log::debug!("Reading from s3_compatible backend"); - let Some((metadata, response)) = self.request_object(Method::GET, id, range).await? else { + let Some((metadata, content_range, response)) = + self.request_object(Method::GET, id, range).await? + else { return Ok(None); }; - let content_range = if response.status() == StatusCode::PARTIAL_CONTENT { - response - .headers() - .get(reqwest::header::CONTENT_RANGE) - .and_then(|v| v.to_str().ok()) - .and_then(|s| s.parse::().ok()) - .ok_or_else(|| Error::Generic { - context: "S3: 206 response missing valid Content-Range header".to_owned(), - cause: None, - })? - } else { - ContentRange::full(metadata.size.unwrap_or(0) as u64) - }; - let stream = response.bytes_stream().map_err(io::Error::other); Ok(Some((metadata, content_range, stream.boxed()))) } @@ -350,7 +349,7 @@ impl Backend for S3CompatibleBackend { async fn get_metadata(&self, id: &ObjectId) -> Result { objectstore_log::debug!("Reading metadata from s3_compatible backend"); let response = self.request_object(Method::HEAD, id, None).await?; - Ok(response.map(|(metadata, _)| metadata)) + Ok(response.map(|(metadata, _, _)| metadata)) } #[tracing::instrument(level = "trace", fields(?id), skip_all)] From 61e7cec92ddfabd976bd09e60e9d05da1d11533b Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 16:07:24 +0200 Subject: [PATCH 31/45] fix --- objectstore-server/src/endpoints/objects.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 9e424a95..dc56661c 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -74,7 +74,14 @@ async fn object_get( .map_err(|_| ApiError::Client("invalid Range header".into()))?; match header_str.parse::() { Ok(range) => Some(range), + // We are free to decide whether we want to fall back to the whole object or error. + // Per RFC 9110: + // > A server that supports range requests MAY ignore or reject a Range header + // field that contains an invalid ranges-specifier [...] + // // If the client wants multiple ranges, fall back to returning the whole object. + // We might support multiple ranges in the future, so log a warning to let us know + // clients are trying to do this. Err(RangeError::MultiRange) => { objectstore_log::warn!( "received range request with multiple range specifiers, ignoring" @@ -82,7 +89,8 @@ async fn object_get( None } // The client requested an invalid unit or sent a malformed header. - // We could fall back, but better fail hard and let them know. + // We could fall back, but better fail hard and let them know they requested + // something we won't support. Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), } } From ee6fa4a8717ce86ddd2e704fc43b0c765ddb352f Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 16:08:45 +0200 Subject: [PATCH 32/45] fix --- objectstore-server/tests/range_requests.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/objectstore-server/tests/range_requests.rs b/objectstore-server/tests/range_requests.rs index 9307f776..bd05aa0c 100644 --- a/objectstore-server/tests/range_requests.rs +++ b/objectstore-server/tests/range_requests.rs @@ -132,7 +132,7 @@ async fn range_from_offset_returns_206() -> Result<()> { } #[tokio::test] -async fn unknown_range_unit_returns_200_full_body() -> Result<()> { +async fn unknown_range_unit_returns_400() -> Result<()> { let (server, key) = setup().await; let client = reqwest::Client::new(); @@ -142,9 +142,7 @@ async fn unknown_range_unit_returns_200_full_body() -> Result<()> { .send() .await?; - assert_eq!(resp.status(), reqwest::StatusCode::OK); - let body = resp.text().await?; - assert_eq!(body, "Hello, Range Requests!"); + assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST); Ok(()) } From 8d40bb4553066e7c40028b437df7503b228eb687 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 16:15:29 +0200 Subject: [PATCH 33/45] add a test --- objectstore-server/tests/range_requests.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/objectstore-server/tests/range_requests.rs b/objectstore-server/tests/range_requests.rs index bd05aa0c..4d5cf613 100644 --- a/objectstore-server/tests/range_requests.rs +++ b/objectstore-server/tests/range_requests.rs @@ -56,6 +56,24 @@ async fn no_range_returns_200_with_accept_ranges() -> Result<()> { Ok(()) } +#[tokio::test] +async fn head_returns_accept_ranges() -> Result<()> { + let (server, key) = setup().await; + let client = reqwest::Client::new(); + + let resp = client + .head(server.url(&format!("/v1/objects/test/org=1/{key}"))) + .send() + .await?; + + assert_eq!(resp.status(), reqwest::StatusCode::NO_CONTENT); + assert_eq!( + resp.headers().get("accept-ranges").unwrap().to_str()?, + "bytes" + ); + Ok(()) +} + #[tokio::test] async fn range_prefix_returns_206() -> Result<()> { let (server, key) = setup().await; From a47e329eb1506baa7bc451e15f7b40dbfa9d6977 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 16:31:26 +0200 Subject: [PATCH 34/45] fix --- .../src/backend/s3_compatible.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 9410dfd0..033b7103 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -210,25 +210,28 @@ where let mut metadata = Metadata::from_headers(headers, GCS_CUSTOM_PREFIX)?; let content_range = if response.status() == StatusCode::PARTIAL_CONTENT { - headers + let range = headers .get(reqwest::header::CONTENT_RANGE) .and_then(|v| v.to_str().ok()) .and_then(|s| s.parse::().ok()) .ok_or_else(|| Error::Generic { context: "S3: 206 response missing valid Content-Range header".to_owned(), cause: None, - })? + })?; + metadata.size = Some(range.total as usize); + range } else { - let total = match response.content_length() { - Some(len) => len, + match response.content_length() { + Some(len) => { + metadata.size = Some(len as usize); + ContentRange::full(len) + } None => { objectstore_log::warn!("S3: 200 response missing Content-Length header"); - 0 + ContentRange::full(metadata.size.unwrap_or(0) as u64) } - }; - ContentRange::full(total) + } }; - metadata.size = Some(content_range.total as usize); // TODO: Schedule into background persistently so this doesn't get lost on restarts if let ExpirationPolicy::TimeToIdle(tti) = metadata.expiration_policy { From 5eb797c9aba1aadc67f9943072202e12f47edc76 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 16:58:17 +0200 Subject: [PATCH 35/45] add tests --- objectstore-service/src/backend/local_fs.rs | 96 +++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/objectstore-service/src/backend/local_fs.rs b/objectstore-service/src/backend/local_fs.rs index 03efe0f9..757c73ea 100644 --- a/objectstore-service/src/backend/local_fs.rs +++ b/objectstore-service/src/backend/local_fs.rs @@ -756,6 +756,102 @@ mod tests { backend.abort_multipart(&id, &upload_id).await.unwrap(); } + #[tokio::test] + async fn get_object_range_bounded() { + let (_tempdir, backend) = make_backend(); + let id = make_id(); + let metadata = Metadata::default(); + + let payload = b"Hello, range requests!"; + backend + .put_object(&id, &metadata, stream::single(payload.to_vec())) + .await + .unwrap(); + + // Request bytes 7-11 → "range" + let (_, content_range, body) = backend + .get_object(&id, Some(ByteRange::Bounded(7, 11))) + .await + .unwrap() + .unwrap(); + let data: BytesMut = body.try_collect().await.unwrap(); + + assert_eq!(data.as_ref(), b"range"); + assert_eq!(content_range.start, 7); + assert_eq!(content_range.end, 11); + assert_eq!(content_range.total, payload.len() as u64); + } + + #[tokio::test] + async fn get_object_range_from() { + let (_tempdir, backend) = make_backend(); + let id = make_id(); + let metadata = Metadata::default(); + + let payload = b"Hello, range requests!"; + backend + .put_object(&id, &metadata, stream::single(payload.to_vec())) + .await + .unwrap(); + + // Request bytes 7- → "range requests!" + let (_, content_range, body) = backend + .get_object(&id, Some(ByteRange::From(7))) + .await + .unwrap() + .unwrap(); + let data: BytesMut = body.try_collect().await.unwrap(); + + assert_eq!(data.as_ref(), b"range requests!"); + assert_eq!(content_range.start, 7); + assert_eq!(content_range.end, 21); + assert_eq!(content_range.total, payload.len() as u64); + } + + #[tokio::test] + async fn get_object_range_last() { + let (_tempdir, backend) = make_backend(); + let id = make_id(); + let metadata = Metadata::default(); + + let payload = b"Hello, range requests!"; + backend + .put_object(&id, &metadata, stream::single(payload.to_vec())) + .await + .unwrap(); + + // Request last 9 bytes → "requests!" + let (_, content_range, body) = backend + .get_object(&id, Some(ByteRange::Last(9))) + .await + .unwrap() + .unwrap(); + let data: BytesMut = body.try_collect().await.unwrap(); + + assert_eq!(data.as_ref(), b"requests!"); + assert_eq!(content_range.start, 13); + assert_eq!(content_range.end, 21); + assert_eq!(content_range.total, payload.len() as u64); + } + + #[tokio::test] + async fn get_object_range_unsatisfiable() { + let (_tempdir, backend) = make_backend(); + let id = make_id(); + let metadata = Metadata::default(); + + backend + .put_object(&id, &metadata, stream::single(b"short".to_vec())) + .await + .unwrap(); + + match backend.get_object(&id, Some(ByteRange::From(100))).await { + Err(Error::RangeNotSatisfiable { total: 5 }) => {} + Err(other) => panic!("expected RangeNotSatisfiable, got: {other:?}"), + Ok(_) => panic!("expected RangeNotSatisfiable, got Ok"), + } + } + #[tokio::test] async fn multipart_abort() { let (_tempdir, backend) = make_backend(); From 302b1c40ee5a22187b47a6137af8b43378d483b7 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Mon, 1 Jun 2026 17:09:06 +0200 Subject: [PATCH 36/45] reject invalid bounds --- objectstore-types/src/range.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index ad692d0b..f3fe0a48 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -133,6 +133,9 @@ impl FromStr for ContentRange { let (start_str, end_str) = range_part.split_once('-')?; let start: u64 = start_str.parse().ok()?; let end: u64 = end_str.parse().ok()?; + if start > end || end >= total { + return None; + } Some(Self { start, end, total }) }; parse().ok_or(RangeError::Invalid) @@ -313,6 +316,10 @@ mod tests { assert_eq!("bytes 0-499/1234".parse::(), Ok(cr)); assert!("bytes */1234".parse::().is_err()); assert!("invalid".parse::().is_err()); + // Inverted bounds + assert!("bytes 499-0/1234".parse::().is_err()); + // End beyond total + assert!("bytes 0-1234/1234".parse::().is_err()); assert_eq!( ContentRange::unsatisfiable_total_to_header_value(1234), "bytes */1234" From 0e21c593bc2996d8020a0a0e84bae85be0b2d7b3 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 2 Jun 2026 12:57:28 +0200 Subject: [PATCH 37/45] improve --- objectstore-server/src/endpoints/objects.rs | 31 +++++++------ objectstore-server/tests/range_requests.rs | 23 ---------- objectstore-service/src/backend/bigtable.rs | 5 +-- objectstore-service/src/backend/common.rs | 4 +- objectstore-service/src/backend/gcs.rs | 22 ++++----- objectstore-service/src/backend/in_memory.rs | 42 ++++++++--------- objectstore-service/src/backend/local_fs.rs | 39 +++++++--------- .../src/backend/s3_compatible.rs | 16 +++---- objectstore-service/src/backend/tiered.rs | 19 +++++--- objectstore-service/src/service.rs | 2 +- objectstore-types/src/range.rs | 45 +++---------------- 11 files changed, 96 insertions(+), 152 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index dc56661c..1bc2f78a 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -121,23 +121,26 @@ async fn object_get( let stream = state.meter_stream(stream, &context); let metadata_headers = metadata.to_headers("").map_err(ServiceError::from)?; - let is_partial = !content_range.is_full(); - let status = if is_partial { - StatusCode::PARTIAL_CONTENT - } else { - StatusCode::OK + let mut response = match content_range { + Some(ref content_range) => { + let mut resp = ( + StatusCode::PARTIAL_CONTENT, + metadata_headers, + Body::from_stream(stream), + ) + .into_response(); + let headers = resp.headers_mut(); + headers.insert( + http::header::CONTENT_LENGTH, + content_range.len_to_header_value(), + ); + headers.insert(http::header::CONTENT_RANGE, content_range.to_header_value()); + resp + } + None => (StatusCode::OK, metadata_headers, Body::from_stream(stream)).into_response(), }; - let mut response = (status, metadata_headers, Body::from_stream(stream)).into_response(); insert_accept_ranges(&mut response); - if is_partial { - let headers = response.headers_mut(); - headers.insert( - http::header::CONTENT_LENGTH, - content_range.len_to_header_value(), - ); - headers.insert(http::header::CONTENT_RANGE, content_range.to_header_value()); - } Ok(response) } diff --git a/objectstore-server/tests/range_requests.rs b/objectstore-server/tests/range_requests.rs index 4d5cf613..ca5afd4d 100644 --- a/objectstore-server/tests/range_requests.rs +++ b/objectstore-server/tests/range_requests.rs @@ -233,26 +233,3 @@ async fn range_on_nonexistent_object_returns_404() -> Result<()> { assert_eq!(resp.status(), reqwest::StatusCode::NOT_FOUND); Ok(()) } - -#[tokio::test] -async fn full_range_returns_200() -> Result<()> { - let (server, key) = setup().await; - let client = reqwest::Client::new(); - - // Request the full object as a range — should still get 200 since it's the full content. - let resp = client - .get(server.url(&format!("/v1/objects/test/org=1/{key}"))) - .header("range", "bytes=0-21") - .send() - .await?; - - assert_eq!(resp.status(), reqwest::StatusCode::OK); - assert!( - resp.headers().get("content-range").is_none(), - "full-object range should be 200 without Content-Range" - ); - - let body = resp.text().await?; - assert_eq!(body, "Hello, Range Requests!"); - Ok(()) -} diff --git a/objectstore-service/src/backend/bigtable.rs b/objectstore-service/src/backend/bigtable.rs index 9645bb91..480bc7cc 100644 --- a/objectstore-service/src/backend/bigtable.rs +++ b/objectstore-service/src/backend/bigtable.rs @@ -38,7 +38,7 @@ use bigtable_rs::google::bigtable::v2::{self, mutation}; use bytes::Bytes; use futures_util::TryStreamExt; use objectstore_types::metadata::{ExpirationPolicy, Metadata}; -use objectstore_types::range::{ByteRange, ContentRange}; +use objectstore_types::range::ByteRange; use serde::{Deserialize, Serialize}; use tonic::Code; @@ -1015,8 +1015,7 @@ impl HighVolumeBackend for BigTableBackend { RowData::Object { metadata, payload } => { let mut metadata = metadata; metadata.size = Some(payload.len()); - let content_range = ContentRange::full(payload.len() as u64); - TieredGet::Object(metadata, content_range, crate::stream::single(payload)) + TieredGet::Object(metadata, None, crate::stream::single(payload)) } }) } diff --git a/objectstore-service/src/backend/common.rs b/objectstore-service/src/backend/common.rs index 17af1cc2..87705b63 100644 --- a/objectstore-service/src/backend/common.rs +++ b/objectstore-service/src/backend/common.rs @@ -24,7 +24,7 @@ pub const USER_AGENT: &str = concat!("sentry-objectstore/", env!("CARGO_PKG_VERS /// Backend response for put operations. pub type PutResponse = (); /// Backend response for get operations. -pub type GetResponse = Option<(Metadata, ContentRange, PayloadStream)>; +pub type GetResponse = Option<(Metadata, Option, PayloadStream)>; /// Backend response for metadata-only get operations. pub type MetadataResponse = Option; /// Backend response for delete operations. @@ -212,7 +212,7 @@ pub struct Tombstone { /// Typed response from [`HighVolumeBackend::get_tiered_object`]. pub enum TieredGet { /// A real object was found. - Object(Metadata, ContentRange, PayloadStream), + Object(Metadata, Option, PayloadStream), /// A redirect tombstone was found; the real object lives in the long-term backend. Tombstone(Tombstone), /// No entry exists at this key. diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 18494013..70f7b73a 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -708,17 +708,19 @@ impl Backend for GcsBackend { .await?; let content_range = if payload_response.status() == StatusCode::PARTIAL_CONTENT { - payload_response - .headers() - .get(header::CONTENT_RANGE) - .and_then(|v| v.to_str().ok()) - .and_then(|s| s.parse::().ok()) - .ok_or_else(|| Error::Generic { - context: "GCS: 206 response missing valid Content-Range header".to_owned(), - cause: None, - })? + Some( + payload_response + .headers() + .get(header::CONTENT_RANGE) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + .ok_or_else(|| Error::Generic { + context: "GCS: 206 response missing valid Content-Range header".to_owned(), + cause: None, + })?, + ) } else { - ContentRange::full(metadata.size.unwrap_or(0) as u64) + None }; let stream = payload_response diff --git a/objectstore-service/src/backend/in_memory.rs b/objectstore-service/src/backend/in_memory.rs index b5314d51..4e855c22 100644 --- a/objectstore-service/src/backend/in_memory.rs +++ b/objectstore-service/src/backend/in_memory.rs @@ -9,7 +9,7 @@ use std::collections::{BTreeMap, HashMap}; use std::sync::{Arc, Mutex}; use std::time::SystemTime; -use objectstore_types::range::{ByteRange, ContentRange}; +use objectstore_types::range::ByteRange; use bytes::{Bytes, BytesMut}; use futures_util::TryStreamExt; @@ -132,16 +132,16 @@ impl super::common::Backend for InMemoryBackend { Some(StoreEntry::Object(mut metadata, bytes)) => { let total = bytes.len() as u64; metadata.size = Some(bytes.len()); - let content_range = match range { - Some(range) => range - .resolve(total) - .ok_or(Error::RangeNotSatisfiable { total })?, - None => ContentRange::full(total), - }; - let payload = if content_range.is_full() { - bytes - } else { - bytes.slice(content_range.start as usize..=content_range.end as usize) + let (content_range, payload) = match range { + Some(range) => { + let content_range = range + .resolve(total) + .ok_or(Error::RangeNotSatisfiable { total })?; + let sliced = + bytes.slice(content_range.start as usize..=content_range.end as usize); + (Some(content_range), sliced) + } + None => (None, bytes), }; Ok(Some(( metadata, @@ -189,16 +189,16 @@ impl HighVolumeBackend for InMemoryBackend { Some(StoreEntry::Object(mut metadata, bytes)) => { let total = bytes.len() as u64; metadata.size = Some(bytes.len()); - let content_range = match range { - Some(range) => range - .resolve(total) - .ok_or(Error::RangeNotSatisfiable { total })?, - None => ContentRange::full(total), - }; - let payload = if content_range.is_full() { - bytes - } else { - bytes.slice(content_range.start as usize..=content_range.end as usize) + let (content_range, payload) = match range { + Some(range) => { + let content_range = range + .resolve(total) + .ok_or(Error::RangeNotSatisfiable { total })?; + let sliced = + bytes.slice(content_range.start as usize..=content_range.end as usize); + (Some(content_range), sliced) + } + None => (None, bytes), }; TieredGet::Object(metadata, content_range, crate::stream::single(payload)) } diff --git a/objectstore-service/src/backend/local_fs.rs b/objectstore-service/src/backend/local_fs.rs index 757c73ea..5d35e440 100644 --- a/objectstore-service/src/backend/local_fs.rs +++ b/objectstore-service/src/backend/local_fs.rs @@ -8,7 +8,7 @@ use std::time::SystemTime; use futures_util::StreamExt; use objectstore_types::metadata::Metadata; -use objectstore_types::range::{ByteRange, ContentRange}; +use objectstore_types::range::ByteRange; use tokio::fs::OpenOptions; use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader, BufWriter}; use tokio_util::io::{ReaderStream, StreamReader}; @@ -147,28 +147,20 @@ impl Backend for LocalFsBackend { .ok_or_else(|| Error::generic("local-fs file corrupted: shorter than header"))?; metadata.size = Some(payload_size as usize); - let content_range = match range { - Some(byte_range) => match byte_range.resolve(payload_size) { - Some(content_range) => content_range, - None => { - return Err(Error::RangeNotSatisfiable { + let (content_range, stream) = match range { + Some(byte_range) => { + let content_range = byte_range + .resolve(payload_size) + .ok_or(Error::RangeNotSatisfiable { total: payload_size, - }); - } - }, - None => ContentRange::full(payload_size), - }; - - let stream = if content_range.is_full() { - let stream = ReaderStream::new(reader); - stream.boxed() - } else { - reader - .seek(std::io::SeekFrom::Current(content_range.start as i64)) - .await?; - let limited = reader.take(content_range.len()); - let stream = ReaderStream::new(limited); - stream.boxed() + })?; + reader + .seek(std::io::SeekFrom::Current(content_range.start as i64)) + .await?; + let limited = reader.take(content_range.len()); + (Some(content_range), ReaderStream::new(limited).boxed()) + } + None => (None, ReaderStream::new(reader).boxed()), }; Ok(Some((metadata, content_range, stream))) } @@ -777,6 +769,7 @@ mod tests { let data: BytesMut = body.try_collect().await.unwrap(); assert_eq!(data.as_ref(), b"range"); + let content_range = content_range.unwrap(); assert_eq!(content_range.start, 7); assert_eq!(content_range.end, 11); assert_eq!(content_range.total, payload.len() as u64); @@ -803,6 +796,7 @@ mod tests { let data: BytesMut = body.try_collect().await.unwrap(); assert_eq!(data.as_ref(), b"range requests!"); + let content_range = content_range.unwrap(); assert_eq!(content_range.start, 7); assert_eq!(content_range.end, 21); assert_eq!(content_range.total, payload.len() as u64); @@ -829,6 +823,7 @@ mod tests { let data: BytesMut = body.try_collect().await.unwrap(); assert_eq!(data.as_ref(), b"requests!"); + let content_range = content_range.unwrap(); assert_eq!(content_range.start, 13); assert_eq!(content_range.end, 21); assert_eq!(content_range.total, payload.len() as u64); diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 033b7103..8bc68297 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -165,7 +165,7 @@ where method: Method, id: &ObjectId, range: Option, - ) -> Result> { + ) -> Result, reqwest::Response)>> { let object_url = self.object_url(id); let mut builder = self.request(method, &object_url).await?; @@ -219,18 +219,12 @@ where cause: None, })?; metadata.size = Some(range.total as usize); - range + Some(range) } else { - match response.content_length() { - Some(len) => { - metadata.size = Some(len as usize); - ContentRange::full(len) - } - None => { - objectstore_log::warn!("S3: 200 response missing Content-Length header"); - ContentRange::full(metadata.size.unwrap_or(0) as u64) - } + if let Some(len) = response.content_length() { + metadata.size = Some(len as usize); } + None }; // TODO: Schedule into background persistently so this doesn't get lost on restarts diff --git a/objectstore-service/src/backend/tiered.rs b/objectstore-service/src/backend/tiered.rs index c596ae19..c540f5d1 100644 --- a/objectstore-service/src/backend/tiered.rs +++ b/objectstore-service/src/backend/tiered.rs @@ -443,13 +443,18 @@ impl Backend for TieredStorage { backend_type = backend_type, ); - if let Some((_, ref content_range, _)) = result { - objectstore_metrics::record!( - "get.size" = content_range.len(), - usecase = id.usecase().to_owned(), - backend_choice = backend_choice.as_str(), - backend_type = backend_type, - ); + if let Some((ref metadata, ref content_range, _)) = result { + let size = content_range + .map(|cr| cr.len() as usize) + .or(metadata.size); + if let Some(size) = size { + objectstore_metrics::record!( + "get.size" = size, + usecase = id.usecase().to_owned(), + backend_choice = backend_choice.as_str(), + backend_type = backend_type, + ); + } } Ok(result) diff --git a/objectstore-service/src/service.rs b/objectstore-service/src/service.rs index 1aac35c2..d705cbf6 100644 --- a/objectstore-service/src/service.rs +++ b/objectstore-service/src/service.rs @@ -23,7 +23,7 @@ use crate::stream::{ClientStream, PayloadStream}; use crate::streaming::StreamExecutor; /// Service response for [`StorageService::get_object`]. -pub type GetResponse = Option<(Metadata, ContentRange, PayloadStream)>; +pub type GetResponse = Option<(Metadata, Option, PayloadStream)>; /// Service response for [`StorageService::get_metadata`]. pub type MetadataResponse = Option; /// Service response for [`StorageService::insert_object`]. diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index f3fe0a48..201b4cb8 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -142,34 +142,13 @@ impl FromStr for ContentRange { } } +#[allow(clippy::len_without_is_empty)] // A valid ContentRange is never empty. impl ContentRange { - /// Creates a [`ContentRange`] representing an entire object. - pub fn full(total: u64) -> Self { - Self { - start: 0, - end: total.saturating_sub(1), - total, - } - } - /// Returns the number of bytes in this range. pub fn len(&self) -> u64 { - if self.total == 0 { - return 0; - } self.end - self.start + 1 } - /// Returns `true` if this range is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns `true` if this range covers the entire object. - pub fn is_full(&self) -> bool { - self.start == 0 && self.len() == self.total - } - /// Formats this range for a `Content-Range` response header. /// /// The returned value is always valid ASCII and can be inserted directly @@ -262,18 +241,13 @@ mod tests { } #[test] - fn content_range_methods() { - let full = ContentRange::full(1000); - assert_eq!( - full, - ContentRange { - start: 0, - end: 999, - total: 1000 - } - ); + fn content_range_len() { + let full = ContentRange { + start: 0, + end: 999, + total: 1000, + }; assert_eq!(full.len(), 1000); - assert!(full.is_full()); let partial = ContentRange { start: 0, @@ -281,11 +255,6 @@ mod tests { total: 1000, }; assert_eq!(partial.len(), 500); - assert!(!partial.is_full()); - - let zero = ContentRange::full(0); - assert_eq!(zero.len(), 0); - assert!(zero.is_full()); } #[test] From 20034e1dfb62f05e7604d0f288cfd36947d87cfe Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 2 Jun 2026 13:04:37 +0200 Subject: [PATCH 38/45] improve --- objectstore-types/src/range.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/objectstore-types/src/range.rs b/objectstore-types/src/range.rs index 201b4cb8..531aa48b 100644 --- a/objectstore-types/src/range.rs +++ b/objectstore-types/src/range.rs @@ -142,7 +142,10 @@ impl FromStr for ContentRange { } } -#[allow(clippy::len_without_is_empty)] // A valid ContentRange is never empty. +#[expect( + clippy::len_without_is_empty, + reason = "A valid ContentRange is never empty" +)] impl ContentRange { /// Returns the number of bytes in this range. pub fn len(&self) -> u64 { From 271d62c34e438c75b5d13855ba9a4e4916aa4025 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 2 Jun 2026 13:06:36 +0200 Subject: [PATCH 39/45] clippy --- objectstore-service/src/backend/local_fs.rs | 11 ++++++----- objectstore-service/src/backend/tiered.rs | 4 +--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/objectstore-service/src/backend/local_fs.rs b/objectstore-service/src/backend/local_fs.rs index 5d35e440..2a221a79 100644 --- a/objectstore-service/src/backend/local_fs.rs +++ b/objectstore-service/src/backend/local_fs.rs @@ -149,11 +149,12 @@ impl Backend for LocalFsBackend { let (content_range, stream) = match range { Some(byte_range) => { - let content_range = byte_range - .resolve(payload_size) - .ok_or(Error::RangeNotSatisfiable { - total: payload_size, - })?; + let content_range = + byte_range + .resolve(payload_size) + .ok_or(Error::RangeNotSatisfiable { + total: payload_size, + })?; reader .seek(std::io::SeekFrom::Current(content_range.start as i64)) .await?; diff --git a/objectstore-service/src/backend/tiered.rs b/objectstore-service/src/backend/tiered.rs index c540f5d1..ccb8d5ee 100644 --- a/objectstore-service/src/backend/tiered.rs +++ b/objectstore-service/src/backend/tiered.rs @@ -444,9 +444,7 @@ impl Backend for TieredStorage { ); if let Some((ref metadata, ref content_range, _)) = result { - let size = content_range - .map(|cr| cr.len() as usize) - .or(metadata.size); + let size = content_range.map(|cr| cr.len() as usize).or(metadata.size); if let Some(size) = size { objectstore_metrics::record!( "get.size" = size, From aaa82218dbb033d2d659552291f01ba7f208f09a Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 2 Jun 2026 13:10:36 +0200 Subject: [PATCH 40/45] improve --- objectstore-service/src/backend/gcs.rs | 26 ++++++++----------- .../src/backend/s3_compatible.rs | 2 ++ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index 70f7b73a..ade54142 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -707,21 +707,17 @@ impl Backend for GcsBackend { }) .await?; - let content_range = if payload_response.status() == StatusCode::PARTIAL_CONTENT { - Some( - payload_response - .headers() - .get(header::CONTENT_RANGE) - .and_then(|v| v.to_str().ok()) - .and_then(|s| s.parse::().ok()) - .ok_or_else(|| Error::Generic { - context: "GCS: 206 response missing valid Content-Range header".to_owned(), - cause: None, - })?, - ) - } else { - None - }; + let content_range = (payload_response.status() == StatusCode::PARTIAL_CONTENT).then_some( + payload_response + .headers() + .get(header::CONTENT_RANGE) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + .ok_or_else(|| Error::Generic { + context: "GCS: 206 response missing valid Content-Range header".to_owned(), + cause: None, + })?, + ); let stream = payload_response .bytes_stream() diff --git a/objectstore-service/src/backend/s3_compatible.rs b/objectstore-service/src/backend/s3_compatible.rs index 8bc68297..9818597c 100644 --- a/objectstore-service/src/backend/s3_compatible.rs +++ b/objectstore-service/src/backend/s3_compatible.rs @@ -223,6 +223,8 @@ where } else { if let Some(len) = response.content_length() { metadata.size = Some(len as usize); + } else { + objectstore_log::warn!("S3: 200 response missing Content-Length header"); } None }; From 0aa575c7ac765b210f759437e1b2996bb09c25d2 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Tue, 2 Jun 2026 13:25:43 +0200 Subject: [PATCH 41/45] improve --- objectstore-service/src/backend/gcs.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/objectstore-service/src/backend/gcs.rs b/objectstore-service/src/backend/gcs.rs index ade54142..70f7b73a 100644 --- a/objectstore-service/src/backend/gcs.rs +++ b/objectstore-service/src/backend/gcs.rs @@ -707,17 +707,21 @@ impl Backend for GcsBackend { }) .await?; - let content_range = (payload_response.status() == StatusCode::PARTIAL_CONTENT).then_some( - payload_response - .headers() - .get(header::CONTENT_RANGE) - .and_then(|v| v.to_str().ok()) - .and_then(|s| s.parse::().ok()) - .ok_or_else(|| Error::Generic { - context: "GCS: 206 response missing valid Content-Range header".to_owned(), - cause: None, - })?, - ); + let content_range = if payload_response.status() == StatusCode::PARTIAL_CONTENT { + Some( + payload_response + .headers() + .get(header::CONTENT_RANGE) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + .ok_or_else(|| Error::Generic { + context: "GCS: 206 response missing valid Content-Range header".to_owned(), + cause: None, + })?, + ) + } else { + None + }; let stream = payload_response .bytes_stream() From 04d1b78d44df924199ac0dfa2096f88d27afb180 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:36:31 +0200 Subject: [PATCH 42/45] move range header parsing to its own extractor --- objectstore-server/src/endpoints/objects.rs | 34 ++----------- .../src/extractors/byte_range.rs | 49 +++++++++++++++++++ objectstore-server/src/extractors/mod.rs | 1 + 3 files changed, 53 insertions(+), 31 deletions(-) create mode 100644 objectstore-server/src/extractors/byte_range.rs diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 1bc2f78a..732cdb62 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -9,11 +9,12 @@ use axum::{Json, Router}; use objectstore_service::error::Error as ServiceError; use objectstore_service::id::{ObjectContext, ObjectId}; use objectstore_types::metadata::Metadata; -use objectstore_types::range::{ByteRange, ContentRange, RangeError}; +use objectstore_types::range::{ContentRange, RangeError}; use serde::Serialize; use crate::auth::AuthAwareService; use crate::endpoints::common::{ApiError, ApiResult, insert_accept_ranges}; +use crate::extractors::byte_range::OptionalByteRange; use crate::extractors::{Xt, body::MeteredBody}; use crate::state::ServiceState; @@ -65,38 +66,9 @@ async fn object_get( service: AuthAwareService, State(state): State, Xt(id): Xt, + OptionalByteRange(byte_range): OptionalByteRange, headers: HeaderMap, ) -> ApiResult { - let byte_range = match headers.get(http::header::RANGE) { - Some(value) => { - let header_str = value - .to_str() - .map_err(|_| ApiError::Client("invalid Range header".into()))?; - match header_str.parse::() { - Ok(range) => Some(range), - // We are free to decide whether we want to fall back to the whole object or error. - // Per RFC 9110: - // > A server that supports range requests MAY ignore or reject a Range header - // field that contains an invalid ranges-specifier [...] - // - // If the client wants multiple ranges, fall back to returning the whole object. - // We might support multiple ranges in the future, so log a warning to let us know - // clients are trying to do this. - Err(RangeError::MultiRange) => { - objectstore_log::warn!( - "received range request with multiple range specifiers, ignoring" - ); - None - } - // The client requested an invalid unit or sent a malformed header. - // We could fall back, but better fail hard and let them know they requested - // something we won't support. - Err(e) => return Err(ApiError::Client(format!("invalid Range header: {e}"))), - } - } - None => None, - }; - let context = id.context().clone(); let result = service.get_object(id, byte_range).await; diff --git a/objectstore-server/src/extractors/byte_range.rs b/objectstore-server/src/extractors/byte_range.rs new file mode 100644 index 00000000..c0d96670 --- /dev/null +++ b/objectstore-server/src/extractors/byte_range.rs @@ -0,0 +1,49 @@ +//! Axum extractor for range requests. + +use axum::extract::FromRequestParts; +use http::request::Parts; +use objectstore_types::range::{ByteRange, RangeError}; + +use crate::{endpoints::common::ApiError, state::ServiceState}; + +/// Extractor that parses the `Range` request header into an optional [`ByteRange`]. +#[derive(Debug, Clone)] +pub struct OptionalByteRange(pub Option); + +impl FromRequestParts for OptionalByteRange { + type Rejection = ApiError; + + async fn from_request_parts( + parts: &mut Parts, + _state: &ServiceState, + ) -> Result { + let headers = &parts.headers; + let Some(range) = headers.get(http::header::RANGE) else { + return Ok(Self(None)); + }; + let range = range + .to_str() + .map_err(|_| ApiError::Client("invalid Range header".into()))?; + + match range.parse::() { + Ok(range) => Ok(Self(Some(range))), + // Per RFC 9110: + // > A server that supports range requests MAY ignore or reject a Range header + // field that contains an invalid ranges-specifier [...] + // + // If the client wants multiple ranges, fall back to returning the whole object. + // We might support multiple ranges in the future, so log a warning to let us know + // clients are trying to do this. + Err(RangeError::MultiRange) => { + objectstore_log::warn!( + "received range request with multiple range specifiers, ignoring" + ); + Ok(Self(None)) + } + // The client requested an invalid unit or sent a malformed header. + // We could fall back, but better fail hard and let them know they requested + // something we won't support. + Err(err) => Err(ApiError::Client(format!("invalid Range header: {err}"))), + } + } +} diff --git a/objectstore-server/src/extractors/mod.rs b/objectstore-server/src/extractors/mod.rs index 74ec0ba9..6a5e760d 100644 --- a/objectstore-server/src/extractors/mod.rs +++ b/objectstore-server/src/extractors/mod.rs @@ -2,6 +2,7 @@ pub mod batch; pub mod body; +pub mod byte_range; pub mod downstream_service; mod id; mod service; From c0939f7c6aaa0d9a8160cda4f0bbed0eae92aaf5 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:36:54 +0200 Subject: [PATCH 43/45] fmt --- objectstore-server/src/endpoints/objects.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/objectstore-server/src/endpoints/objects.rs b/objectstore-server/src/endpoints/objects.rs index 732cdb62..8f172e31 100644 --- a/objectstore-server/src/endpoints/objects.rs +++ b/objectstore-server/src/endpoints/objects.rs @@ -9,7 +9,7 @@ use axum::{Json, Router}; use objectstore_service::error::Error as ServiceError; use objectstore_service::id::{ObjectContext, ObjectId}; use objectstore_types::metadata::Metadata; -use objectstore_types::range::{ContentRange, RangeError}; +use objectstore_types::range::ContentRange; use serde::Serialize; use crate::auth::AuthAwareService; @@ -67,7 +67,7 @@ async fn object_get( State(state): State, Xt(id): Xt, OptionalByteRange(byte_range): OptionalByteRange, - headers: HeaderMap, + _headers: HeaderMap, ) -> ApiResult { let context = id.context().clone(); let result = service.get_object(id, byte_range).await; From 3ae2f45918cb3f85035a13a286443aca455de360 Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:29:26 +0200 Subject: [PATCH 44/45] improve --- objectstore-server/src/extractors/byte_range.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/objectstore-server/src/extractors/byte_range.rs b/objectstore-server/src/extractors/byte_range.rs index c0d96670..975e29fd 100644 --- a/objectstore-server/src/extractors/byte_range.rs +++ b/objectstore-server/src/extractors/byte_range.rs @@ -41,8 +41,8 @@ impl FromRequestParts for OptionalByteRange { Ok(Self(None)) } // The client requested an invalid unit or sent a malformed header. - // We could fall back, but better fail hard and let them know they requested - // something we won't support. + // We could fall back, but better fail hard and let them know they send something + // invalid. Err(err) => Err(ApiError::Client(format!("invalid Range header: {err}"))), } } From 8f63a33bc9187dc96bb71ee54fa6890749719fef Mon Sep 17 00:00:00 2001 From: lcian <17258265+lcian@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:30:45 +0200 Subject: [PATCH 45/45] improve --- objectstore-server/src/extractors/byte_range.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/objectstore-server/src/extractors/byte_range.rs b/objectstore-server/src/extractors/byte_range.rs index 975e29fd..9f39dc77 100644 --- a/objectstore-server/src/extractors/byte_range.rs +++ b/objectstore-server/src/extractors/byte_range.rs @@ -41,7 +41,7 @@ impl FromRequestParts for OptionalByteRange { Ok(Self(None)) } // The client requested an invalid unit or sent a malformed header. - // We could fall back, but better fail hard and let them know they send something + // We could fall back, but better fail hard and let them know they sent something // invalid. Err(err) => Err(ApiError::Client(format!("invalid Range header: {err}"))), }