Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions docs/configuration/storage-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Quickwit currently supports four types of storage providers:

Storage URIs refer to different storage providers identified by a URI "protocol" or "scheme". Quickwit supports the following storage URI protocols:
- `s3://` for Amazon S3 and S3-compatible
- `s3+<name>://` for additional S3-compatible backends configured under `storage.s3.named.<name>` (see [Named S3 backends](#named-s3-backends))
- `azure://` for Azure Blob Storage
- `file://` for local file systems
- `gs://` for Google Cloud Storage
Expand Down Expand Up @@ -104,6 +105,42 @@ storage:
endpoint: https://storage.googleapis.com
```

#### Named S3 backends

In addition to the primary `s3:` block, you can declare any number of additional S3-compatible backends under `storage.s3.named.<name>`. Each entry is an independent endpoint with its own credentials, region, and flags. Indexes route to a named backend via the URI scheme `s3+<name>://bucket/path` (plain `s3://` continues to use the primary endpoint).

Each named entry accepts the same fields as the primary `s3:` block, *except* `named` itself (no recursion). If `access_key_id` / `secret_access_key` are omitted on a named entry, the global AWS SDK credential chain is used (env vars, instance metadata, etc.).

Named backends are self-contained: the process-wide `QW_S3_ENDPOINT` and `QW_S3_FORCE_PATH_STYLE_ACCESS` overrides apply to the primary `s3:` backend only. A named backend always uses its own `endpoint` and `force_path_style_access` values.

```yaml
storage:
s3:
# Primary backend — addressed by plain `s3://...` URIs.
endpoint: https://s3.us-east-1.amazonaws.com
region: us-east-1
named:
# Addressed by `s3+secondary://bucket/path` URIs.
secondary:
endpoint: https://s3.eu-west-3.amazonaws.com
region: eu-west-3
access_key_id: ${SECONDARY_S3_ACCESS_KEY_ID}
secret_access_key: ${SECONDARY_S3_SECRET_ACCESS_KEY}
# Addressed by `s3+seaweed://bucket/path` URIs. Falls back to the
# global AWS SDK credentials when keys are omitted.
seaweed:
endpoint: http://seaweedfs-s3:8333
region: us-east-1
force_path_style_access: true
```

An index pointed at a named backend declares its URI accordingly:

```yaml
index_id: logs-eu
index_uri: s3+secondary://logs-bucket/logs-eu
```

### Azure storage configuration

| Property | Description | Default value |
Expand Down
85 changes: 79 additions & 6 deletions quickwit/quickwit-common/src/uri.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ impl FromStr for Protocol {
"ram" => Ok(Protocol::Ram),
"s3" => Ok(Protocol::S3),
"gs" => Ok(Protocol::Google),
// `s3+<name>://...` for a named S3-compatible backend configured under
// `storage.s3.named.<name>`. Routes to the same factory as `s3://`.
s if s.starts_with("s3+") && s.len() > 3 => Ok(Protocol::S3),
Comment thread
papaharry marked this conversation as resolved.
_ => bail!("unknown URI protocol `{protocol}`"),
}
}
Expand Down Expand Up @@ -186,13 +189,33 @@ impl Uri {
let parent_path = path.parent()?;

Some(Self {
uri: format!("{protocol}{PROTOCOL_SEPARATOR}{}", parent_path.display()),
// Preserve the scheme verbatim so an `s3+<name>` qualifier survives.
uri: format!(
"{}{PROTOCOL_SEPARATOR}{}",
self.scheme(),
parent_path.display()
),
protocol,
})
}

/// Returns the URI scheme, preserving any `s3+<name>` qualifier, which may
/// differ from the canonical protocol string (e.g. `s3+alt` vs `s3`).
fn scheme(&self) -> &str {
match self.uri.split_once(PROTOCOL_SEPARATOR) {
Some((scheme, _path)) => scheme,
None => self.protocol.as_str(),
}
}

fn path(&self) -> &Path {
Path::new(&self.uri[self.protocol.as_str().len() + PROTOCOL_SEPARATOR.len()..])
// Slice at the actual `://` separator rather than assuming the scheme
// equals the canonical protocol — `s3+<name>` schemes are longer.
let path = match self.uri.split_once(PROTOCOL_SEPARATOR) {
Some((_scheme, path)) => path,
None => &self.uri,
};
Path::new(path)
}

/// Returns the last component of the URI.
Expand Down Expand Up @@ -262,9 +285,13 @@ impl Uri {
if uri_str.is_empty() {
bail!("failed to parse empty URI");
}
let (protocol, mut path) = match uri_str.split_once(PROTOCOL_SEPARATOR) {
None => (Protocol::File, uri_str.to_string()),
Some((protocol, path)) => (Protocol::from_str(protocol)?, path.to_string()),
let (scheme_opt, protocol, mut path) = match uri_str.split_once(PROTOCOL_SEPARATOR) {
None => (None, Protocol::File, uri_str.to_string()),
Some((scheme, path)) => (
Some(scheme.to_string()),
Protocol::from_str(scheme)?,
path.to_string(),
),
};
if protocol == Protocol::File {
if path.starts_with('~') {
Expand Down Expand Up @@ -292,8 +319,14 @@ impl Uri {
.to_string_lossy()
.to_string();
}
// Preserve `s3+<name>` qualifier so the storage resolver can route to
// the named backend; other schemes normalize to canonical form.
let display_scheme: &str = match scheme_opt.as_deref() {
Some(s) if s.starts_with("s3+") => s,
_ => protocol.as_str(),
};
Ok(Self {
uri: format!("{protocol}{PROTOCOL_SEPARATOR}{path}"),
uri: format!("{display_scheme}{PROTOCOL_SEPARATOR}{path}"),
protocol,
})
}
Expand Down Expand Up @@ -663,6 +696,26 @@ mod tests {
);
}

#[test]
fn test_uri_named_s3_scheme() {
// `s3+<name>` schemes are preserved end-to-end: `path` strips the real
// scheme (not the canonical `s3`), and `parent`/`file_name` keep the
// qualifier intact.
let uri = Uri::for_test("s3+alt://bucket/foo/bar");
assert_eq!(uri.as_str(), "s3+alt://bucket/foo/bar");
assert_eq!(uri.protocol(), Protocol::S3);
assert_eq!(uri.parent().unwrap(), "s3+alt://bucket/foo");
assert_eq!(uri.file_name().unwrap(), Path::new("bar"));

let uri = Uri::for_test("s3+with-dash://bucket/key");
assert_eq!(uri.parent().unwrap(), "s3+with-dash://bucket");
assert_eq!(uri.file_name().unwrap(), Path::new("key"));

// Mirrors the plain-`s3` guard: a bucket-only URI has no parent.
assert!(Uri::for_test("s3+alt://bucket").parent().is_none());
assert!(Uri::for_test("s3+alt://bucket/").parent().is_none());
}

#[test]
fn test_uri_file_name() {
assert!(Uri::for_test("file:///").file_name().is_none());
Expand Down Expand Up @@ -812,4 +865,24 @@ mod tests {
serde_json::Value::String("s3://bucket/key".to_string())
);
}

#[test]
fn test_uri_s3_named_preserved() {
// The `s3+<name>` qualifier is the routing token for named S3-compatible
// backends (`storage.s3.named.<name>`). It must survive parse + serialize
// so the storage resolver can recover the backend name on deserialization;
// before this guarantee, the qualifier was stripped by URI normalization
// and every `s3+<name>://` URI silently resolved to the primary endpoint.
let uri = Uri::from_str("s3+alt://bucket/key").unwrap();
assert_eq!(uri.protocol(), Protocol::S3);
assert_eq!(uri.as_str(), "s3+alt://bucket/key");
let json = serde_json::to_value(&uri).unwrap();
assert_eq!(
json,
serde_json::Value::String("s3+alt://bucket/key".to_string())
);
let round_trip: Uri = serde_json::from_value(json).unwrap();
assert_eq!(round_trip.as_str(), "s3+alt://bucket/key");
assert_eq!(round_trip.protocol(), Protocol::S3);
}
}
Loading