From fdf632fe15434bf9cf3929f6d458a208010917a8 Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 28 Apr 2026 13:36:34 -0500 Subject: [PATCH 1/7] asset proxy spec --- ...-04-28-multi-backend-asset-proxy-design.md | 660 ++++++++++++++++++ 1 file changed, 660 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-28-multi-backend-asset-proxy-design.md diff --git a/docs/superpowers/specs/2026-04-28-multi-backend-asset-proxy-design.md b/docs/superpowers/specs/2026-04-28-multi-backend-asset-proxy-design.md new file mode 100644 index 000000000..730360d2b --- /dev/null +++ b/docs/superpowers/specs/2026-04-28-multi-backend-asset-proxy-design.md @@ -0,0 +1,660 @@ +# Multi-backend Asset Proxy Design + +> Proposed design for path-based first-party asset proxy routing. +> Date: 2026-04-28. + +--- + +## Goal + +Allow Trusted Server to proxy selected first-party asset paths to a different +backend origin than `publisher.origin_url`. + +Example: + +- incoming URL: `https://www.example.com/.images/foo.jpg?w=1200` +- matched rule: `prefix = "/.images/"` +- asset origin: `https://some.fastly-service.com` +- upstream URL: `https://some.fastly-service.com/.images/foo.jpg?w=1200` + +This should happen transparently for normal inbound requests, without requiring +`/first-party/proxy` signed URLs. + +--- + +## Problem + +Today, unknown routes fall through to the publisher proxy path and always go to +one backend: + +- `settings.publisher.origin_url` + +That works for HTML and general publisher-origin traffic, but it does not allow +specific first-party asset namespaces to be served by a separate backend such +as an image CDN, Fastly service, or dedicated asset origin. + +Publishers need to keep asset URLs on their first-party domain while routing +certain path prefixes to a different backend. + +--- + +## Scope + +### In scope + +- Path-prefix-based routing for first-party asset requests +- Multiple configured asset-route rules +- Per-rule alternate `origin_url` +- Transparent proxying for ordinary inbound `GET`/`HEAD` requests +- Preservation of the incoming path and query string +- Raw response pass-through from the matched asset origin +- Deterministic longest-prefix route selection +- Request routing that happens after built-in and integration routes, but + before publisher-origin fallback + +### Out of scope + +- Regex-based route matching +- Path rewrite / prefix replacement +- Cookie, consent, HTML, CSS, or JS rewriting on asset-route responses +- Redirect following for asset routes +- Special cache policy overrides +- Non-`GET` / non-`HEAD` methods +- Per-route header customization +- Health checks, fallback chains, or origin failover + +--- + +## Product Requirements + +### 1. Transparent inbound routing + +The feature applies to normal inbound requests handled by Trusted Server. + +It is **not** an extension of `/first-party/proxy` and does **not** require URL +signing. + +If an incoming request path matches a configured asset route, Trusted Server +proxies it directly to that route's configured origin. + +### 2. Match on simple path prefixes + +Routes are configured as simple prefixes, not regexes. + +Examples: + +- valid: `/.images/` +- valid: `/static/` +- invalid: `.images/` +- invalid: `images/` + +Rule matching is performed against the request path only. Query strings are +ignored for matching. + +### 3. Preserve path and query exactly + +When a rule matches, Trusted Server replaces only the upstream origin +(scheme/host/port) and preserves the rest of the request URL exactly. + +Example: + +- inbound: `/.images/foo/bar.jpg?auto=webp&width=1200` +- upstream path/query: `/.images/foo/bar.jpg?auto=webp&width=1200` + +There is no path rewrite in v1. + +### 4. Multiple rules supported + +Configuration supports multiple asset-route entries. + +Example use cases: + +- `/.images/` → image CDN +- `/static/assets/` → static asset backend +- `/_next/image/` → specialized image transformer + +### 5. Longest matching prefix wins + +If multiple routes match a path, the most specific route wins. + +Example: + +- `/.images/` → backend A +- `/.images/special/` → backend B +- request `/.images/special/x.jpg` → backend B + +### 6. Only `GET` and `HEAD` + +Asset-route matching only applies to `GET` and `HEAD` requests. + +All other methods continue through existing route handling and publisher +fallback behavior unchanged. + +### 7. Explicit routes win first + +Built-in Trusted Server routes and registered integration routes must retain +higher precedence than asset-route matching. + +Asset routes act only inside the fallback proxy space. They must not shadow: + +- `/auction` +- `/first-party/*` +- `/.well-known/*` +- admin routes +- registered integration routes + +### 8. Raw pass-through behavior + +Matched asset routes bypass the publisher-page processing pipeline. + +Specifically, asset-route handling does **not** perform: + +- EC generation / consent pipeline work +- cookie mutation +- HTML rewriting +- CSS rewriting +- URL rewriting +- RSC processing +- post-processing +- redirect following + +The route behaves as a lean transport proxy. + +### 9. Upstream errors are not masked + +If the matched asset origin returns a response, that response is returned to the +client as-is. + +If the asset origin cannot be reached or backend setup fails, Trusted Server +returns the existing error behavior for that failure class. + +It must **not** silently fall back to `publisher.origin_url`. + +### 10. Preserve upstream cache semantics + +Trusted Server passes through upstream cache headers unchanged, including: + +- `Cache-Control` +- `ETag` +- `Last-Modified` +- `Expires` +- `Vary` + +There is no v1 cache override layer. + +### 11. Preserve redirect semantics + +If the asset origin returns a redirect (`301`, `302`, `303`, `307`, `308`), +Trusted Server returns that redirect to the client as-is. + +It does not follow redirects server-side. + +### 12. Preserve `HEAD` semantics + +A `HEAD` request to a matched asset route is proxied upstream as `HEAD` and +returned without body synthesis. + +--- + +## Configuration Design + +Asset routes live under `[proxy]` in `trusted-server.toml`. + +### Proposed shape + +```toml +[proxy] +certificate_check = true + +[[proxy.asset_routes]] +prefix = "/.images/" +origin_url = "https://some.fastly-service.com" + +[[proxy.asset_routes]] +prefix = "/static/assets/" +origin_url = "https://assets.example.net" +``` + +### Field definitions + +#### `prefix` + +- required +- string +- must start with `/` +- matched against the request path only +- case-sensitive, using normal request-path semantics + +#### `origin_url` + +- required +- string +- absolute `http` or `https` URL +- must not include a trailing slash +- used as the upstream scheme/host/port base +- request path and query are preserved from the incoming request + +### Validation rules + +#### Hard validation errors + +These should fail configuration loading: + +- `prefix` missing +- `prefix` does not start with `/` +- `origin_url` missing +- `origin_url` is not an absolute `http`/`https` URL +- `origin_url` has a trailing slash + +#### Warning-only validation + +Duplicate exact prefixes should not fail startup. + +Instead: + +- log a warning for later duplicates +- keep behavior deterministic +- exact duplicate prefixes use the **first configured rule** + +This preserves production availability while surfacing misconfiguration. + +--- + +## Proposed Data Model + +Add a new route type under proxy settings. + +```rust +pub struct ProxyAssetRoute { + pub prefix: String, + pub origin_url: String, +} + +pub struct Proxy { + pub certificate_check: bool, + pub allowed_domains: Vec, + pub asset_routes: Vec, +} +``` + +### Runtime helper behavior + +A helper should normalize and validate asset routes during settings preparation. + +Recommended responsibilities: + +- validate each route +- warn on duplicate exact prefixes +- provide longest-prefix matching for a path +- provide deterministic duplicate behavior + +--- + +## Request Routing Design + +### Current baseline + +Today the top-level request router behaves roughly as follows: + +1. match built-in routes +2. match integration routes +3. otherwise proxy to `publisher.origin_url` + +### Proposed routing order + +1. match built-in Trusted Server routes +2. match integration routes +3. if method is `GET` or `HEAD`, try asset-route match +4. if asset route matched, proxy to that asset origin +5. otherwise fall through to existing publisher-origin proxy path + +### Why this placement + +This preserves current application route behavior while allowing targeted +origin overrides for fallback asset paths. + +Asset routes should not become a general-purpose top-level router that can +interfere with core product endpoints. + +--- + +## Matching Algorithm + +### Inputs + +- HTTP method +- request path +- configured `asset_routes` + +### Matching rules + +1. Ignore all asset routes unless method is `GET` or `HEAD` +2. Compare request path against each configured `prefix` +3. A route matches when `request_path.starts_with(prefix)` +4. Select the match with the longest `prefix` +5. If multiple routes have the same exact prefix, the first configured route + wins and later duplicates only warn + +### Examples + +#### Example 1: simple match + +Rules: + +- `/.images/` → `https://img.fastly.example` + +Request: + +- `GET /.images/photo.jpg?w=1000` + +Result: + +- proxy to `https://img.fastly.example/.images/photo.jpg?w=1000` + +#### Example 2: longest prefix + +Rules: + +- `/.images/` → A +- `/.images/special/` → B + +Request: + +- `GET /.images/special/banner.png` + +Result: + +- route B wins + +#### Example 3: wrong method + +Rules: + +- `/.images/` → A + +Request: + +- `POST /.images/upload` + +Result: + +- no asset-route match; continue existing routing behavior + +--- + +## Proxy Behavior + +### Upstream URL construction + +For a matched asset route: + +1. take the matched rule's `origin_url` +2. preserve the incoming request path exactly +3. preserve the incoming query string exactly +4. build the upstream request URL from those components + +Example: + +- origin: `https://some.fastly-service.com` +- path: `/.images/foo.jpg` +- query: `auto=webp&width=800` +- upstream: `https://some.fastly-service.com/.images/foo.jpg?auto=webp&width=800` + +### Backend selection + +The route should use the existing dynamic-backend mechanism already used +elsewhere in Trusted Server. + +Backend creation should be derived from the matched `origin_url` and +`settings.proxy.certificate_check`. + +### Host header + +The upstream `Host` header must be set to the matched asset origin host, +not the original first-party host. + +This is necessary for CDN and origin correctness. + +### Method forwarding + +- incoming `GET` → upstream `GET` +- incoming `HEAD` → upstream `HEAD` + +No method rewriting. + +### Header forwarding + +Forward a minimal curated set of request headers, aligned with existing proxy +helper behavior where possible. + +Recommended v1 header set: + +- `Accept` +- `Accept-Encoding` +- `Accept-Language` +- `User-Agent` +- `Referer` +- `X-Forwarded-For` + +Avoid broad header tunneling in v1. + +### Redirects + +Do not follow redirects. + +If upstream returns a redirect, return it to the client. + +### Response handling + +Treat the response as raw pass-through: + +- preserve status code +- preserve response body bytes +- preserve response headers, including cache headers +- do not inspect content type for rewriting +- do not run creative, HTML, CSS, or RSC processors + +--- + +## Interaction with Existing Publisher Proxy + +The existing publisher proxy path is HTML-aware and consent-aware. It includes: + +- cookie parsing +- EC generation / forwarding +- consent context construction +- response rewriting and post-processing +- origin fallback through `publisher.origin_url` + +The new asset-route path is intentionally separate. + +### Design principle + +Use the publisher proxy for pages and general publisher-origin traffic. +Use asset-route proxying for configured static/asset namespaces. + +This separation keeps the asset path lean and avoids introducing page-proxy +behavior into CDN-style traffic. + +--- + +## Failure Semantics + +### Upstream returns HTTP response + +Return it as-is. + +Examples: + +- `404 Not Found` → return `404` +- `500 Internal Server Error` → return `500` +- `302 Found` → return `302` + +### Upstream unreachable / backend failure + +Return the normal Trusted Server error behavior for backend/proxy failure. + +Do **not** retry against `publisher.origin_url`. +Do **not** silently fall back. + +### Misconfiguration + +- invalid `prefix` / invalid `origin_url` → configuration error +- duplicate exact `prefix` → warning only + +--- + +## Observability + +At minimum, log enough information to diagnose routing decisions. + +Recommended log points: + +- asset route matched: request path, matched prefix, target origin +- duplicate exact prefix detected at startup +- asset proxy backend creation failure +- asset upstream request failure +- asset route skipped due to unsupported method + +Logging should use the project's normal `log` macros. + +--- + +## Security Considerations + +### 1. Limited scope + +This feature is not an arbitrary open proxy. It only routes to origins that are +statically configured in `trusted-server.toml`. + +### 2. No redirect following + +Returning redirects as-is avoids introducing redirect-chain SSRF concerns for +this feature. + +### 3. Minimal header forwarding + +Forwarding a curated header set reduces risk from hop-by-hop headers or +unexpected application headers being tunneled upstream. + +### 4. No signed-URL trust expansion + +This feature does not reuse `/first-party/proxy` URL-signing behavior. It is a +separate static routing mechanism. + +--- + +## Acceptance Criteria + +### Configuration + +- `trusted-server.toml` accepts `[[proxy.asset_routes]]` +- each route requires `prefix` and `origin_url` +- invalid `prefix` fails config load +- invalid `origin_url` fails config load +- duplicate exact prefixes log warnings but do not fail startup + +### Routing + +- built-in routes still win over asset routes +- integration routes still win over asset routes +- asset routes are evaluated before publisher-origin fallback +- only `GET` and `HEAD` requests participate +- longest matching prefix wins +- exact duplicate prefixes resolve deterministically to the first configured rule + +### Proxy semantics + +- matched requests preserve path and query exactly +- matched requests use the asset origin's scheme/host/port +- upstream `Host` header matches asset origin host +- redirects are returned to the client, not followed +- cache headers pass through unchanged +- no fallback to `publisher.origin_url` on asset origin failure +- `HEAD` remains `HEAD` + +### Response processing + +- matched asset routes bypass publisher consent/cookie/rewriting logic +- matched asset routes behave as raw pass-through + +--- + +## Recommended Tests + +### Settings tests + +- parses multiple `[[proxy.asset_routes]]` entries +- rejects prefix without leading `/` +- rejects `origin_url` with trailing slash +- rejects non-absolute `origin_url` +- warns on duplicate exact prefixes + +### Route-selection tests + +- no match for unsupported method +- match by prefix +- longest-prefix wins +- exact duplicate prefix resolves to first rule +- query string does not affect matching + +### Adapter/router tests + +- built-in route precedence over asset route +- integration route precedence over asset route +- unmatched path still falls through to publisher proxy + +### Proxy-construction tests + +- path preserved exactly +- query preserved exactly +- upstream host header uses asset origin host +- `HEAD` preserved +- redirect response returned as-is + +--- + +## Implementation Notes + +A minimal implementation should avoid changing the existing publisher proxy +behavior more than necessary. + +Recommended implementation outline: + +1. Add `ProxyAssetRoute` and `Proxy.asset_routes` to settings +2. Add normalization / validation / duplicate-warning logic +3. Add a path-matching helper that selects the longest prefix +4. Add a lean asset-proxy handler that: + - builds a backend from matched `origin_url` + - preserves path + query + - forwards a minimal header set + - does not follow redirects + - returns raw upstream response +5. Insert asset-route handling into top-level routing after explicit routes and + before publisher fallback +6. Add focused tests for config, matching, precedence, and proxy construction + +--- + +## Future Extensions + +Potential future work, intentionally excluded from v1: + +- regex path matching +- path rewrite rules +- per-route custom headers +- per-route cache overrides +- per-route certificate-check options +- per-route method allowlists +- route metrics / counters +- fallback chains across multiple origins + +--- + +## Open Questions + +None blocking for v1. + +The only follow-up item already identified is broader project-wide work to make +misconfiguration handling more consistent across Trusted Server, but that is not +required to implement this feature. From 53cdf53da854a5be9f97df2ae89f4d3f4e27390a Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 28 Apr 2026 14:11:13 -0500 Subject: [PATCH 2/7] Add path-based asset backend proxy routing --- .../trusted-server-adapter-fastly/src/main.rs | 120 +++--- .../src/route_tests.rs | 91 ++++- crates/trusted-server-core/src/proxy.rs | 373 +++++++++++++++++- crates/trusted-server-core/src/settings.rs | 253 +++++++++++- trusted-server.toml | 9 + 5 files changed, 791 insertions(+), 55 deletions(-) diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 2d0924636..2f27eaa6c 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -16,8 +16,8 @@ use trusted_server_core::http_util::sanitize_forwarded_headers; use trusted_server_core::integrations::IntegrationRegistry; use trusted_server_core::platform::RuntimeServices; use trusted_server_core::proxy::{ - handle_first_party_click, handle_first_party_proxy, handle_first_party_proxy_rebuild, - handle_first_party_proxy_sign, + handle_asset_proxy_request, handle_first_party_click, handle_first_party_proxy, + handle_first_party_proxy_rebuild, handle_first_party_proxy_sign, }; use trusted_server_core::publisher::{ handle_publisher_request, handle_tsjs_dynamic, stream_publisher_body, PublisherResponse, @@ -150,6 +150,11 @@ async fn route_request( let path = req.get_path().to_string(); let method = req.get_method().clone(); + let matched_asset_route = match &method { + &Method::GET | &Method::HEAD => settings.asset_route_for_path(&path), + _ => None, + }; + // Match known routes and handle them let result = match (method, path.as_str()) { // Serve the tsjs library @@ -202,62 +207,73 @@ async fn route_request( })) }), - // No known route matched, proxy to publisher origin as fallback + // No known route matched, proxy to an asset origin or publisher origin as fallback _ => { - log::info!( - "No known route matched for path: {}, proxying to publisher origin", - path - ); - - match runtime_services_for_consent_route(settings, runtime_services) { - Ok(publisher_services) => { - match handle_publisher_request( - settings, - integration_registry, - &publisher_services, - req, - ) { - Ok(PublisherResponse::Stream { - mut response, - body, - params, - }) => { - // Streaming path: finalize headers, then stream body to client. - finalize_response(settings, geo_info.as_ref(), &mut response); - let mut streaming_body = response.stream_to_client(); - if let Err(e) = stream_publisher_body( + if let Some(asset_route) = matched_asset_route { + log::info!( + "No explicit route matched for path: {}, proxying via asset route prefix {} to {}", + path, + asset_route.prefix, + asset_route.origin_url + ); + handle_asset_proxy_request(settings, runtime_services, req, &asset_route.origin_url) + .await + } else { + log::info!( + "No known route matched for path: {}, proxying to publisher origin", + path + ); + + match runtime_services_for_consent_route(settings, runtime_services) { + Ok(publisher_services) => { + match handle_publisher_request( + settings, + integration_registry, + &publisher_services, + req, + ) { + Ok(PublisherResponse::Stream { + mut response, body, - &mut streaming_body, - ¶ms, - settings, - integration_registry, - ) { - // Headers already committed. Log and abort — client - // sees a truncated response. Standard proxy behavior. - log::error!("Streaming processing failed: {e:?}"); - drop(streaming_body); - } else if let Err(e) = streaming_body.finish() { - log::error!("Failed to finish streaming body: {e}"); + params, + }) => { + // Streaming path: finalize headers, then stream body to client. + finalize_response(settings, geo_info.as_ref(), &mut response); + let mut streaming_body = response.stream_to_client(); + if let Err(e) = stream_publisher_body( + body, + &mut streaming_body, + ¶ms, + settings, + integration_registry, + ) { + // Headers already committed. Log and abort — client + // sees a truncated response. Standard proxy behavior. + log::error!("Streaming processing failed: {e:?}"); + drop(streaming_body); + } else if let Err(e) = streaming_body.finish() { + log::error!("Failed to finish streaming body: {e}"); + } + // Response already sent via stream_to_client() + return None; + } + Ok(PublisherResponse::PassThrough { mut response, body }) => { + // Binary pass-through: reattach body and send via send_to_client(). + // This preserves Content-Length and avoids chunked encoding overhead. + // Fastly streams the body from its internal buffer — no WASM + // memory buffering occurs. + response.set_body(body); + Ok(response) + } + Ok(PublisherResponse::Buffered(response)) => Ok(response), + Err(e) => { + log::error!("Failed to proxy to publisher origin: {:?}", e); + Err(e) } - // Response already sent via stream_to_client() - return None; - } - Ok(PublisherResponse::PassThrough { mut response, body }) => { - // Binary pass-through: reattach body and send via send_to_client(). - // This preserves Content-Length and avoids chunked encoding overhead. - // Fastly streams the body from its internal buffer — no WASM - // memory buffering occurs. - response.set_body(body); - Ok(response) - } - Ok(PublisherResponse::Buffered(response)) => Ok(response), - Err(e) => { - log::error!("Failed to proxy to publisher origin: {:?}", e); - Err(e) } } + Err(e) => Err(e), } - Err(e) => Err(e), } } }; diff --git a/crates/trusted-server-adapter-fastly/src/route_tests.rs b/crates/trusted-server-adapter-fastly/src/route_tests.rs index 0fd0113f8..87718b89c 100644 --- a/crates/trusted-server-adapter-fastly/src/route_tests.rs +++ b/crates/trusted-server-adapter-fastly/src/route_tests.rs @@ -14,7 +14,7 @@ use trusted_server_core::platform::{ StoreName, }; use trusted_server_core::request_signing::JWKS_CONFIG_STORE_NAME; -use trusted_server_core::settings::Settings; +use trusted_server_core::settings::{ProxyAssetRoute, Settings}; use super::route_request; @@ -249,3 +249,92 @@ fn configured_missing_consent_store_only_breaks_consent_routes() { "should scope consent store failures to the consent-dependent routes" ); } + +#[test] +fn asset_routes_bypass_publisher_consent_dependencies() { + let mut settings = create_test_settings(); + settings.proxy.asset_routes = vec![ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + }]; + let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); + let integration_registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let asset_req = Request::get("https://test.com/.images/logo.png?auto=webp"); + let asset_services = test_runtime_services(&asset_req); + let asset_resp = futures::executor::block_on(route_request( + &settings, + &orchestrator, + &integration_registry, + &asset_services, + asset_req, + )) + .expect("should return an error response for asset proxy requests"); + assert_eq!( + asset_resp.get_status(), + StatusCode::BAD_GATEWAY, + "should bypass publisher consent dependencies and fail only on the missing upstream client" + ); +} + +#[test] +fn built_in_routes_take_precedence_over_asset_routes() { + let mut settings = create_test_settings(); + settings.proxy.asset_routes = vec![ProxyAssetRoute { + prefix: "/.well-known/".to_string(), + origin_url: "https://assets.example.com".to_string(), + }]; + let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); + let integration_registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let req = Request::get("https://test.com/.well-known/trusted-server.json"); + let services = test_runtime_services(&req); + let resp = futures::executor::block_on(route_request( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + )) + .expect("should route discovery request"); + assert_eq!( + resp.get_status(), + StatusCode::OK, + "should keep explicit built-in routes ahead of asset routes" + ); +} + +#[test] +fn integration_routes_take_precedence_over_asset_routes() { + let mut settings = create_test_settings(); + settings.proxy.asset_routes = vec![ProxyAssetRoute { + prefix: "/prebid.js".to_string(), + origin_url: "https://assets.example.com".to_string(), + }]; + let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); + let integration_registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let req = Request::get("https://test.com/prebid.js"); + let services = test_runtime_services(&req); + let mut resp = futures::executor::block_on(route_request( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + )) + .expect("should route integration request"); + assert_eq!( + resp.get_status(), + StatusCode::OK, + "should keep explicit integration routes ahead of asset routes" + ); + assert_eq!( + resp.take_body_str(), + "// Script overridden by Trusted Server\n", + "should serve the integration response instead of proxying to the asset origin" + ); +} diff --git a/crates/trusted-server-core/src/proxy.rs b/crates/trusted-server-core/src/proxy.rs index d1da3aa74..0782061f0 100644 --- a/crates/trusted-server-core/src/proxy.rs +++ b/crates/trusted-server-core/src/proxy.rs @@ -39,6 +39,25 @@ const PROXY_FORWARD_HEADERS: [header::HeaderName; 5] = [ HEADER_X_FORWARDED_FOR, ]; +/// Curated request headers preserved for asset proxying. +/// +/// Unlike the HTML publisher fallback, asset requests need cache validation and +/// byte-range semantics to keep 304/206 responses working for browsers. +const ASSET_PROXY_FORWARD_HEADERS: [header::HeaderName; 12] = [ + HEADER_USER_AGENT, + HEADER_ACCEPT, + HEADER_ACCEPT_ENCODING, + HEADER_ACCEPT_LANGUAGE, + HEADER_REFERER, + HEADER_X_FORWARDED_FOR, + header::IF_NONE_MATCH, + header::IF_MODIFIED_SINCE, + header::IF_MATCH, + header::IF_UNMODIFIED_SINCE, + header::RANGE, + header::IF_RANGE, +]; + /// Convert a platform-neutral response into a [`fastly::Response`] for downstream processing. /// /// Shared with `auction/orchestrator.rs`. Both files will migrate off `fastly::Response` @@ -494,6 +513,156 @@ pub async fn proxy_request( .await } +fn default_port_for_scheme(scheme: &str) -> Option { + match scheme { + "http" => Some(80), + "https" => Some(443), + _ => None, + } +} + +fn build_asset_proxy_target_url( + origin_url: &str, + path: &str, + query: &str, +) -> Result> { + let mut target_url = url::Url::parse(origin_url).change_context(TrustedServerError::Proxy { + message: format!("Invalid asset origin_url: {origin_url}"), + })?; + + let scheme = target_url.scheme().to_ascii_lowercase(); + if scheme != "http" && scheme != "https" { + return Err(Report::new(TrustedServerError::Proxy { + message: format!("Unsupported asset origin_url scheme: {scheme}"), + })); + } + + if target_url.host_str().is_none() { + return Err(Report::new(TrustedServerError::Proxy { + message: "Missing host in asset origin_url".to_string(), + })); + } + + target_url.set_path(path); + if query.is_empty() { + target_url.set_query(None); + } else { + target_url.set_query(Some(query)); + } + + Ok(target_url) +} + +fn asset_origin_host_header( + target_url: &url::Url, +) -> Result> { + let scheme = target_url.scheme().to_ascii_lowercase(); + let host = target_url.host_str().ok_or_else(|| { + Report::new(TrustedServerError::Proxy { + message: "Missing host in asset target URL".to_string(), + }) + })?; + let resolved_port = target_url.port_or_known_default().ok_or_else(|| { + Report::new(TrustedServerError::Proxy { + message: format!("Unsupported asset target URL scheme: {scheme}"), + }) + })?; + let host_header = if Some(resolved_port) == default_port_for_scheme(&scheme) { + host.to_string() + } else { + format!("{host}:{resolved_port}") + }; + + HeaderValue::from_str(&host_header).change_context(TrustedServerError::InvalidHeaderValue { + message: format!("invalid asset Host header value: {host_header}"), + }) +} + +/// Proxy a configured first-party asset path to its matched asset origin. +/// +/// This is a lean raw pass-through path: it preserves status/body/headers, +/// does not follow redirects, and bypasses publisher-page processing. +/// +/// # Errors +/// +/// Returns an error if the configured origin URL is invalid, backend +/// registration fails, or the upstream request cannot be sent. +pub async fn handle_asset_proxy_request( + settings: &Settings, + services: &RuntimeServices, + req: Request, + origin_url: &str, +) -> Result> { + let target_url = build_asset_proxy_target_url( + origin_url, + req.get_path(), + req.get_query_str().unwrap_or(""), + )?; + let scheme = target_url.scheme().to_ascii_lowercase(); + let host = target_url.host_str().ok_or_else(|| { + Report::new(TrustedServerError::Proxy { + message: "Missing host in asset target URL".to_string(), + }) + })?; + + let backend_name = services + .backend() + .ensure(&PlatformBackendSpec { + scheme, + host: host.to_string(), + port: target_url.port(), + certificate_check: settings.proxy.certificate_check, + first_byte_timeout: DEFAULT_FIRST_BYTE_TIMEOUT, + }) + .change_context(TrustedServerError::Proxy { + message: "asset backend registration failed".to_string(), + })?; + + let mut builder = edge_request_builder().method(req.get_method().clone()).uri( + target_url + .as_str() + .parse::() + .change_context(TrustedServerError::Proxy { + message: "invalid asset target URL".to_string(), + })?, + ); + + let mut outbound_headers = http::HeaderMap::new(); + for header_name in ASSET_PROXY_FORWARD_HEADERS { + if let Some(value) = req.get_header(&header_name) { + outbound_headers.insert(header_name, value.clone()); + } + } + outbound_headers.insert(header::HOST, asset_origin_host_header(&target_url)?); + + for (name, value) in &outbound_headers { + builder = builder.header(name, value); + } + + let edge_req = + builder + .body(EdgeBody::from(Vec::new())) + .change_context(TrustedServerError::Proxy { + message: "failed to build asset proxy request".to_string(), + })?; + + let platform_resp = services + .http_client() + .send(PlatformHttpRequest::new(edge_req, backend_name)) + .await + .change_context(TrustedServerError::Proxy { + message: "Failed to proxy asset request".to_string(), + })?; + + let mut response = platform_response_to_fastly(platform_resp)?; + + // Asset origins must not be able to set first-party cookies on the + // publisher domain through this proxy path. + response.remove_header(header::SET_COOKIE); + + Ok(response) +} + /// Upserts the `ts-ec` query parameter on a URL, replacing any existing value. fn upsert_ec_query_param(url: &mut url::Url, ec_id: &str) { let mut pairs: Vec<(String, String)> = url @@ -1247,9 +1416,10 @@ fn reconstruct_and_validate_signed_target( #[cfg(test)] mod tests { - use std::sync::Arc; + use std::sync::{Arc, Mutex}; use super::{ + asset_origin_host_header, build_asset_proxy_target_url, handle_asset_proxy_request, handle_first_party_click, handle_first_party_proxy, handle_first_party_proxy_rebuild, handle_first_party_proxy_sign, is_host_allowed, proxy_request, rebuild_response_with_body, reconstruct_and_validate_signed_target, redirect_is_permitted, ProxyRequestConfig, @@ -1279,6 +1449,10 @@ mod tests { /// `select` return `PlatformError::Unsupported`. struct StreamingResponseHttpClient; + struct StaticResponseHttpClient { + response_headers: Vec<(header::HeaderName, HeaderValue)>, + } + #[async_trait::async_trait(?Send)] impl PlatformHttpClient for StreamingResponseHttpClient { async fn send( @@ -1310,6 +1484,38 @@ mod tests { } } + #[async_trait::async_trait(?Send)] + impl PlatformHttpClient for StaticResponseHttpClient { + async fn send( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + let mut builder = edge_response_builder().status(StatusCode::OK); + for (name, value) in &self.response_headers { + builder = builder.header(name.as_str(), value.as_bytes()); + } + let edge_response = builder + .body(EdgeBody::from(Vec::new())) + .expect("should build static test response"); + + Ok(PlatformResponse::new(edge_response).with_backend_name("stub-backend")) + } + + async fn send_async( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } + + async fn select( + &self, + _pending_requests: Vec, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } + } + #[tokio::test] async fn proxy_missing_param_returns_400() { let settings = create_test_settings(); @@ -2089,6 +2295,171 @@ mod tests { ); } + #[test] + fn build_asset_proxy_target_url_preserves_path_and_query() { + let target_url = build_asset_proxy_target_url( + "https://assets.example.com", + "/.images/foo.jpg", + "auto=webp&width=800", + ) + .expect("should build asset target URL"); + + assert_eq!( + target_url.as_str(), + "https://assets.example.com/.images/foo.jpg?auto=webp&width=800", + "should preserve the incoming path and query exactly" + ); + } + + #[test] + fn asset_origin_host_header_omits_standard_port() { + let target_url = url::Url::parse("https://assets.example.com/.images/foo.jpg") + .expect("should parse URL"); + let host = asset_origin_host_header(&target_url).expect("should compute Host header"); + assert_eq!( + host.to_str().expect("should serialize Host header"), + "assets.example.com", + "should omit standard HTTPS port from Host header" + ); + } + + #[test] + fn asset_origin_host_header_includes_non_standard_port() { + let target_url = url::Url::parse("https://assets.example.com:8443/.images/foo.jpg") + .expect("should parse URL"); + let host = asset_origin_host_header(&target_url).expect("should compute Host header"); + assert_eq!( + host.to_str().expect("should serialize Host header"), + "assets.example.com:8443", + "should include non-standard port in Host header" + ); + } + + #[tokio::test] + async fn handle_asset_proxy_request_forwards_asset_headers_and_host() { + use crate::platform::test_support::StubHttpClient; + + let stub = Arc::new(StubHttpClient::new()); + stub.push_response(200, b"ok".to_vec()); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let settings = create_test_settings(); + let mut req = Request::new( + Method::GET, + "https://www.example.com/.images/foo.jpg?auto=webp", + ); + req.set_header(header::USER_AGENT, "asset-agent/1.0"); + req.set_header(header::ACCEPT, "image/avif,image/webp,image/*,*/*;q=0.8"); + req.set_header(header::ACCEPT_ENCODING, "gzip, br"); + req.set_header(header::ACCEPT_LANGUAGE, "en-US"); + req.set_header(header::REFERER, "https://www.example.com/article"); + req.set_header(header::IF_NONE_MATCH, "\"asset-etag\""); + req.set_header(header::IF_MODIFIED_SINCE, "Thu, 13 Mar 2025 08:00:00 GMT"); + req.set_header(header::IF_MATCH, "\"asset-precondition\""); + req.set_header(header::IF_UNMODIFIED_SINCE, "Thu, 13 Mar 2025 09:00:00 GMT"); + req.set_header(header::RANGE, "bytes=0-1023"); + req.set_header(header::IF_RANGE, "\"asset-range\""); + req.set_header(header::HeaderName::from_static("x-custom-test"), "drop-me"); + + let response = handle_asset_proxy_request( + &settings, + &services, + req, + "https://assets.example.com:8443", + ) + .await + .expect("should proxy asset request"); + assert_eq!(response.get_status(), StatusCode::OK); + + let all_headers = stub.recorded_request_headers(); + assert_eq!(all_headers.len(), 1, "should have captured one request"); + let sent = &all_headers[0]; + let header_value = |name: &str| -> Option { + sent.iter().find(|(n, _)| n == name).map(|(_, v)| v.clone()) + }; + + assert_eq!( + header_value("user-agent").as_deref(), + Some("asset-agent/1.0"), + "should forward User-Agent" + ); + assert_eq!( + header_value("accept-encoding").as_deref(), + Some("gzip, br"), + "should preserve the incoming Accept-Encoding" + ); + assert_eq!( + header_value("if-none-match").as_deref(), + Some("\"asset-etag\""), + "should forward conditional ETag validation headers" + ); + assert_eq!( + header_value("if-modified-since").as_deref(), + Some("Thu, 13 Mar 2025 08:00:00 GMT"), + "should forward conditional date validation headers" + ); + assert_eq!( + header_value("if-match").as_deref(), + Some("\"asset-precondition\""), + "should forward precondition headers" + ); + assert_eq!( + header_value("if-unmodified-since").as_deref(), + Some("Thu, 13 Mar 2025 09:00:00 GMT"), + "should forward date precondition headers" + ); + assert_eq!( + header_value("range").as_deref(), + Some("bytes=0-1023"), + "should forward byte-range requests" + ); + assert_eq!( + header_value("if-range").as_deref(), + Some("\"asset-range\""), + "should forward range validators" + ); + assert_eq!( + header_value("host").as_deref(), + Some("assets.example.com:8443"), + "should override Host to the asset origin host" + ); + assert!( + header_value("x-custom-test").is_none(), + "should not forward unrelated custom headers" + ); + } + + #[tokio::test] + async fn handle_asset_proxy_request_strips_set_cookie_from_response() { + let services = build_services_with_http_client(Arc::new(StaticResponseHttpClient { + response_headers: vec![ + ( + header::SET_COOKIE, + HeaderValue::from_static("asset=1; Path=/; Secure"), + ), + (header::ETAG, HeaderValue::from_static("\"asset-etag\"")), + ], + }) as Arc); + let settings = create_test_settings(); + let req = Request::new(Method::GET, "https://www.example.com/.images/foo.jpg"); + + let response = + handle_asset_proxy_request(&settings, &services, req, "https://assets.example.com") + .await + .expect("should proxy asset request"); + + assert!( + response.get_header(header::SET_COOKIE).is_none(), + "should strip upstream Set-Cookie headers from asset responses" + ); + assert_eq!( + response.get_header_str(header::ETAG), + Some("\"asset-etag\""), + "should preserve safe cache validator headers on asset responses" + ); + } + #[tokio::test] async fn proxy_request_returns_error_for_streaming_platform_response_body() { let services = build_services_with_http_client( diff --git a/crates/trusted-server-core/src/settings.rs b/crates/trusted-server-core/src/settings.rs index 785492620..a515c6f58 100644 --- a/crates/trusted-server-core/src/settings.rs +++ b/crates/trusted-server-core/src/settings.rs @@ -3,7 +3,7 @@ use error_stack::{Report, ResultExt}; use regex::Regex; use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize}; use serde_json::Value as JsonValue; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::ops::{Deref, DerefMut}; use std::sync::OnceLock; use url::Url; @@ -333,6 +333,46 @@ fn default_request_signing_enabled() -> bool { false } +#[derive(Debug, Default, Clone, Deserialize, Serialize)] +pub struct ProxyAssetRoute { + pub prefix: String, + pub origin_url: String, +} + +impl ProxyAssetRoute { + fn normalize(&mut self) { + self.prefix = self.prefix.trim().to_string(); + self.origin_url = self.origin_url.trim().to_string(); + } + + /// Eagerly validate runtime-only asset-route configuration. + /// + /// # Errors + /// + /// Returns a configuration error if the asset-route prefix or origin URL is invalid. + pub fn prepare_runtime(&self) -> Result<(), Report> { + validate_asset_route_prefix(&self.prefix).map_err(|err| { + Report::new(TrustedServerError::Configuration { + message: format!( + "proxy.asset_routes prefix `{}` is invalid: {err}", + self.prefix + ), + }) + })?; + + validate_proxy_origin_url(&self.origin_url).map_err(|err| { + Report::new(TrustedServerError::Configuration { + message: format!( + "proxy.asset_routes origin_url `{}` is invalid: {err}", + self.origin_url + ), + }) + })?; + + Ok(()) + } +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct Proxy { /// Enable TLS certificate verification when proxying to HTTPS origins. @@ -351,6 +391,9 @@ pub struct Proxy { /// initiated by signed first-party proxy URLs. #[serde(default, deserialize_with = "vec_from_seq_or_map")] pub allowed_domains: Vec, + /// Path-prefix-based asset proxy routes evaluated before publisher fallback. + #[serde(default, deserialize_with = "vec_from_seq_or_map")] + pub asset_routes: Vec, } fn default_certificate_check() -> bool { @@ -362,6 +405,7 @@ impl Default for Proxy { Self { certificate_check: default_certificate_check(), allowed_domains: Vec::new(), + asset_routes: Vec::new(), } } } @@ -396,6 +440,52 @@ impl Proxy { "proxy.allowed_domains is empty: all redirect destinations are permitted (open mode)" ); } + + for route in &mut self.asset_routes { + route.normalize(); + } + + let mut seen_prefixes = HashSet::new(); + for route in &self.asset_routes { + if !route.prefix.is_empty() && !seen_prefixes.insert(route.prefix.clone()) { + log::warn!( + "proxy.asset_routes contains duplicate prefix `{}`; the first configured route will be used", + route.prefix + ); + } + } + } + + /// Eagerly validate runtime-only proxy settings artifacts. + /// + /// # Errors + /// + /// Returns a configuration error if any configured asset route is invalid. + pub fn prepare_runtime(&self) -> Result<(), Report> { + for route in &self.asset_routes { + route.prepare_runtime()?; + } + + Ok(()) + } + + /// Resolve the longest matching asset route for the given request path. + #[must_use] + pub fn asset_route_for_path(&self, path: &str) -> Option<&ProxyAssetRoute> { + let mut best_match: Option<&ProxyAssetRoute> = None; + + for route in &self.asset_routes { + if !path.starts_with(&route.prefix) { + continue; + } + + match best_match { + Some(current) if current.prefix.len() >= route.prefix.len() => {} + _ => best_match = Some(route), + } + } + + best_match } } @@ -500,6 +590,8 @@ impl Settings { /// /// Returns a configuration error if any cached runtime artifact cannot be prepared. pub fn prepare_runtime(&self) -> Result<(), Report> { + self.proxy.prepare_runtime()?; + for handler in &self.handlers { handler.prepare_runtime()?; } @@ -507,6 +599,12 @@ impl Settings { Ok(()) } + /// Resolve the longest matching asset route for the request path. + #[must_use] + pub fn asset_route_for_path(&self, path: &str) -> Option<&ProxyAssetRoute> { + self.proxy.asset_route_for_path(path) + } + /// Resolve the first handler whose regex matches the request path. /// /// # Errors @@ -628,6 +726,45 @@ fn validate_redacted_not_empty(value: &Redacted) -> Result<(), Validatio Ok(()) } +fn validate_asset_route_prefix(value: &str) -> Result<(), ValidationError> { + if !value.starts_with('/') { + let mut err = ValidationError::new("invalid_prefix"); + err.add_param("value".into(), &value); + err.message = Some("asset-route prefix must start with '/'".into()); + return Err(err); + } + + Ok(()) +} + +fn validate_proxy_origin_url(value: &str) -> Result<(), ValidationError> { + validate_no_trailing_slash(value)?; + + let parsed = Url::parse(value).map_err(|parse_error| { + let mut err = ValidationError::new("invalid_origin_url"); + err.add_param("value".into(), &value); + err.add_param("message".into(), &parse_error.to_string()); + err.message = Some("origin_url must be an absolute http or https URL".into()); + err + })?; + + if !matches!(parsed.scheme(), "http" | "https") { + let mut err = ValidationError::new("invalid_origin_url_scheme"); + err.add_param("value".into(), &value); + err.message = Some("origin_url must use http or https".into()); + return Err(err); + } + + if parsed.host_str().is_none() { + let mut err = ValidationError::new("missing_origin_host"); + err.add_param("value".into(), &value); + err.message = Some("origin_url must include a host".into()); + return Err(err); + } + + Ok(()) +} + fn validate_path(value: &str) -> Result<(), ValidationError> { Regex::new(value).map(|_| ()).map_err(|err| { let mut validation_error = ValidationError::new("invalid_regex"); @@ -1655,6 +1792,7 @@ mod tests { " AD.EXAMPLE.COM ".to_string(), "*.Example.Org".to_string(), ], + asset_routes: vec![], }; proxy.normalize(); assert_eq!( @@ -1674,6 +1812,7 @@ mod tests { "".to_string(), "cdn.example.com".to_string(), ], + asset_routes: vec![], }; proxy.normalize(); assert_eq!( @@ -1688,6 +1827,7 @@ mod tests { let mut proxy = Proxy { certificate_check: true, allowed_domains: vec!["*".to_string(), "tracker.com".to_string()], + asset_routes: vec![], }; proxy.normalize(); assert_eq!( @@ -1702,6 +1842,7 @@ mod tests { let mut proxy = Proxy { certificate_check: true, allowed_domains: vec!["*".to_string()], + asset_routes: vec![], }; proxy.normalize(); assert!( @@ -1715,6 +1856,7 @@ mod tests { let mut proxy = Proxy { certificate_check: true, allowed_domains: vec![" ".to_string(), "\t".to_string()], + asset_routes: vec![], }; proxy.normalize(); assert!( @@ -1723,6 +1865,79 @@ mod tests { ); } + #[test] + fn proxy_normalize_trims_asset_routes() { + let mut proxy = Proxy { + certificate_check: true, + allowed_domains: vec![], + asset_routes: vec![ProxyAssetRoute { + prefix: " /.images/ ".to_string(), + origin_url: " https://assets.example.com ".to_string(), + }], + }; + proxy.normalize(); + assert_eq!( + proxy.asset_routes[0].prefix, "/.images/", + "should trim asset-route prefix" + ); + assert_eq!( + proxy.asset_routes[0].origin_url, "https://assets.example.com", + "should trim asset-route origin_url" + ); + } + + #[test] + fn proxy_asset_route_for_path_prefers_longest_prefix() { + let proxy = Proxy { + certificate_check: true, + allowed_domains: vec![], + asset_routes: vec![ + ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://a.example.com".to_string(), + }, + ProxyAssetRoute { + prefix: "/.images/special/".to_string(), + origin_url: "https://b.example.com".to_string(), + }, + ], + }; + + let route = proxy + .asset_route_for_path("/.images/special/banner.png") + .expect("should match a configured asset route"); + assert_eq!( + route.origin_url, "https://b.example.com", + "should prefer the most specific prefix" + ); + } + + #[test] + fn proxy_asset_route_for_path_keeps_first_duplicate_prefix() { + let proxy = Proxy { + certificate_check: true, + allowed_domains: vec![], + asset_routes: vec![ + ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://first.example.com".to_string(), + }, + ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://second.example.com".to_string(), + }, + ], + }; + + let route = proxy + .asset_route_for_path("/.images/banner.png") + .expect("should match duplicate prefixes deterministically"); + assert_eq!( + route.origin_url, "https://first.example.com", + "should keep the first configured duplicate prefix" + ); + } + #[test] fn proxy_normalize_applied_by_from_toml() { let toml_str = crate_test_settings_str() @@ -1741,6 +1956,42 @@ mod tests { ); } + #[test] + fn proxy_asset_route_validation_rejects_prefix_without_leading_slash() { + let toml_str = crate_test_settings_str() + + r#" + [proxy] + + [[proxy.asset_routes]] + prefix = ".images/" + origin_url = "https://assets.example.com" + "#; + let err = + Settings::from_toml(&toml_str).expect_err("should reject invalid asset-route prefix"); + assert!( + format!("{err:?}").contains("asset-route prefix must start with '/'"), + "should mention the prefix validation failure: {err:?}" + ); + } + + #[test] + fn proxy_asset_route_validation_rejects_non_http_origin_url() { + let toml_str = crate_test_settings_str() + + r#" + [proxy] + + [[proxy.asset_routes]] + prefix = "/.images/" + origin_url = "ftp://assets.example.com" + "#; + let err = Settings::from_toml(&toml_str) + .expect_err("should reject non-http asset-route origin_url"); + assert!( + format!("{err:?}").contains("origin_url must use http or https"), + "should mention the origin_url validation failure: {err:?}" + ); + } + #[test] fn proxy_normalize_applied_by_from_toml_and_env() { let toml_str = crate_test_settings_str() diff --git a/trusted-server.toml b/trusted-server.toml index d9189aaa2..a3a2803e5 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -144,6 +144,15 @@ rewrite_script = true # Defaults to true. Set to false only for local development with self-signed certificates. # certificate_check = true +# Configure first-party asset paths that should proxy to a different backend origin. +# Matching is path-prefix-based and the longest matching prefix wins. +# Only GET/HEAD requests participate. Built-in and integration routes still take precedence. +# Trusted Server preserves the incoming path and query string and only swaps the origin. +# +# [[proxy.asset_routes]] +# prefix = "/.images/" +# origin_url = "https://some.fastly-service.com" +# # Restrict redirect destinations for the first-party proxy to an explicit domain allowlist. # Supports exact match ("example.com") and subdomain wildcard prefix ("*.example.com"). # Wildcard prefix also matches the apex domain ("*.example.com" matches "example.com"). From 03569f7af5933ac7bde977206cf4f2261b0f641c Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 28 Apr 2026 16:12:48 -0500 Subject: [PATCH 3/7] fix clippy --- crates/trusted-server-core/src/proxy.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trusted-server-core/src/proxy.rs b/crates/trusted-server-core/src/proxy.rs index 0782061f0..b3ae6a99a 100644 --- a/crates/trusted-server-core/src/proxy.rs +++ b/crates/trusted-server-core/src/proxy.rs @@ -1416,7 +1416,7 @@ fn reconstruct_and_validate_signed_target( #[cfg(test)] mod tests { - use std::sync::{Arc, Mutex}; + use std::sync::Arc; use super::{ asset_origin_host_header, build_asset_proxy_target_url, handle_asset_proxy_request, From 8e3abc05214386bd5f678e8602305f5bede37fd6 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 4 May 2026 12:53:22 -0500 Subject: [PATCH 4/7] Add configurable asset path rewrites --- .../trusted-server-adapter-fastly/src/main.rs | 3 +- .../src/route_tests.rs | 3 + crates/trusted-server-core/src/proxy.rs | 140 +++++++-- crates/trusted-server-core/src/publisher.rs | 8 +- crates/trusted-server-core/src/settings.rs | 290 +++++++++++++++++- trusted-server.toml | 21 +- 6 files changed, 427 insertions(+), 38 deletions(-) diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 2f27eaa6c..46c8df32f 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -216,8 +216,7 @@ async fn route_request( asset_route.prefix, asset_route.origin_url ); - handle_asset_proxy_request(settings, runtime_services, req, &asset_route.origin_url) - .await + handle_asset_proxy_request(settings, runtime_services, req, asset_route).await } else { log::info!( "No known route matched for path: {}, proxying to publisher origin", diff --git a/crates/trusted-server-adapter-fastly/src/route_tests.rs b/crates/trusted-server-adapter-fastly/src/route_tests.rs index 87718b89c..200a99986 100644 --- a/crates/trusted-server-adapter-fastly/src/route_tests.rs +++ b/crates/trusted-server-adapter-fastly/src/route_tests.rs @@ -256,6 +256,7 @@ fn asset_routes_bypass_publisher_consent_dependencies() { settings.proxy.asset_routes = vec![ProxyAssetRoute { prefix: "/.images/".to_string(), origin_url: "https://assets.example.com".to_string(), + ..Default::default() }]; let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); let integration_registry = @@ -284,6 +285,7 @@ fn built_in_routes_take_precedence_over_asset_routes() { settings.proxy.asset_routes = vec![ProxyAssetRoute { prefix: "/.well-known/".to_string(), origin_url: "https://assets.example.com".to_string(), + ..Default::default() }]; let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); let integration_registry = @@ -312,6 +314,7 @@ fn integration_routes_take_precedence_over_asset_routes() { settings.proxy.asset_routes = vec![ProxyAssetRoute { prefix: "/prebid.js".to_string(), origin_url: "https://assets.example.com".to_string(), + ..Default::default() }]; let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); let integration_registry = diff --git a/crates/trusted-server-core/src/proxy.rs b/crates/trusted-server-core/src/proxy.rs index b3ae6a99a..a30f61f2f 100644 --- a/crates/trusted-server-core/src/proxy.rs +++ b/crates/trusted-server-core/src/proxy.rs @@ -18,7 +18,7 @@ use crate::error::TrustedServerError; use crate::platform::{ PlatformBackendSpec, PlatformHttpRequest, PlatformResponse, RuntimeServices, }; -use crate::settings::Settings; +use crate::settings::{ProxyAssetRoute, Settings}; use crate::streaming_processor::{Compression, PipelineConfig, StreamProcessor, StreamingPipeline}; /// Chunk size used for streaming content through the rewrite pipeline. @@ -522,13 +522,14 @@ fn default_port_for_scheme(scheme: &str) -> Option { } fn build_asset_proxy_target_url( - origin_url: &str, + route: &ProxyAssetRoute, path: &str, query: &str, ) -> Result> { - let mut target_url = url::Url::parse(origin_url).change_context(TrustedServerError::Proxy { - message: format!("Invalid asset origin_url: {origin_url}"), - })?; + let mut target_url = + url::Url::parse(&route.origin_url).change_context(TrustedServerError::Proxy { + message: format!("Invalid asset origin_url: {}", route.origin_url), + })?; let scheme = target_url.scheme().to_ascii_lowercase(); if scheme != "http" && scheme != "https" { @@ -543,7 +544,8 @@ fn build_asset_proxy_target_url( })); } - target_url.set_path(path); + let target_path = route.target_path_for(path)?; + target_url.set_path(&target_path); if query.is_empty() { target_url.set_query(None); } else { @@ -591,13 +593,10 @@ pub async fn handle_asset_proxy_request( settings: &Settings, services: &RuntimeServices, req: Request, - origin_url: &str, + route: &ProxyAssetRoute, ) -> Result> { - let target_url = build_asset_proxy_target_url( - origin_url, - req.get_path(), - req.get_query_str().unwrap_or(""), - )?; + let target_url = + build_asset_proxy_target_url(route, req.get_path(), req.get_query_str().unwrap_or(""))?; let scheme = target_url.scheme().to_ascii_lowercase(); let host = target_url.host_str().ok_or_else(|| { Report::new(TrustedServerError::Proxy { @@ -1433,6 +1432,7 @@ mod tests { PlatformError, PlatformHttpClient, PlatformHttpRequest, PlatformPendingRequest, PlatformResponse, PlatformSelectResult, }; + use crate::settings::ProxyAssetRoute; use crate::test_support::tests::create_test_settings; use bytes::Bytes; use edgezero_core::body::Body as EdgeBody; @@ -2297,12 +2297,14 @@ mod tests { #[test] fn build_asset_proxy_target_url_preserves_path_and_query() { - let target_url = build_asset_proxy_target_url( - "https://assets.example.com", - "/.images/foo.jpg", - "auto=webp&width=800", - ) - .expect("should build asset target URL"); + let route = ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + ..Default::default() + }; + let target_url = + build_asset_proxy_target_url(&route, "/.images/foo.jpg", "auto=webp&width=800") + .expect("should build asset target URL"); assert_eq!( target_url.as_str(), @@ -2311,6 +2313,80 @@ mod tests { ); } + #[test] + fn build_asset_proxy_target_url_applies_cloudinary_style_rewrite() { + let route = ProxyAssetRoute { + prefix: "/.image/".to_string(), + origin_url: "https://thearenagroup-prod-res.cloudinary.com".to_string(), + path_pattern: Some(r"^/\.image/(.*)/[^/]+\.([^/.]+)$".to_string()), + target_path: Some("/image/upload/$1.$2".to_string()), + }; + let target_url = build_asset_proxy_target_url( + &route, + "/.image/c_fit,w_1440/MjA/example.jpg", + "auto=webp", + ) + .expect("should build rewritten asset target URL"); + + assert_eq!( + target_url.as_str(), + "https://thearenagroup-prod-res.cloudinary.com/image/upload/c_fit,w_1440/MjA.jpg?auto=webp", + "should rewrite the path generically while preserving query parameters" + ); + } + + #[test] + fn build_asset_proxy_target_url_applies_raven_static_prefix_rewrite() { + let route = ProxyAssetRoute { + prefix: "/_next/static/".to_string(), + origin_url: "http://raven-assets.example.com".to_string(), + path_pattern: Some(r"^(.*)$".to_string()), + target_path: Some("/_network$1".to_string()), + }; + let target_url = build_asset_proxy_target_url(&route, "/_next/static/chunks/app.js", "") + .expect("should build rewritten Raven asset target URL"); + + assert_eq!( + target_url.as_str(), + "http://raven-assets.example.com/_network/_next/static/chunks/app.js", + "should prepend the configured upstream path prefix" + ); + } + + #[test] + fn build_asset_proxy_target_url_errors_when_rewrite_pattern_misses() { + let route = ProxyAssetRoute { + prefix: "/.image/".to_string(), + origin_url: "https://assets.example.com".to_string(), + path_pattern: Some(r"^/\.image/(.*)\.jpg$".to_string()), + target_path: Some("/image/upload/$1.jpg".to_string()), + }; + let err = build_asset_proxy_target_url(&route, "/.image/foo.png", "") + .expect_err("should reject paths that do not match the configured rewrite"); + + assert!( + format!("{err:?}").contains("did not match path_pattern"), + "should explain the rewrite miss: {err:?}" + ); + } + + #[test] + fn build_asset_proxy_target_url_errors_when_rewrite_omits_leading_slash() { + let route = ProxyAssetRoute { + prefix: "/assets/".to_string(), + origin_url: "https://assets.example.com".to_string(), + path_pattern: Some(r"^/assets/(.*)$".to_string()), + target_path: Some("$1".to_string()), + }; + let err = build_asset_proxy_target_url(&route, "/assets/app.js", "") + .expect_err("should reject rewritten paths without a leading slash"); + + assert!( + format!("{err:?}").contains("must start with '/'"), + "should explain the invalid rewritten path: {err:?}" + ); + } + #[test] fn asset_origin_host_header_omits_standard_port() { let target_url = url::Url::parse("https://assets.example.com/.images/foo.jpg") @@ -2362,14 +2438,14 @@ mod tests { req.set_header(header::IF_RANGE, "\"asset-range\""); req.set_header(header::HeaderName::from_static("x-custom-test"), "drop-me"); - let response = handle_asset_proxy_request( - &settings, - &services, - req, - "https://assets.example.com:8443", - ) - .await - .expect("should proxy asset request"); + let route = ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com:8443".to_string(), + ..Default::default() + }; + let response = handle_asset_proxy_request(&settings, &services, req, &route) + .await + .expect("should proxy asset request"); assert_eq!(response.get_status(), StatusCode::OK); let all_headers = stub.recorded_request_headers(); @@ -2444,10 +2520,14 @@ mod tests { let settings = create_test_settings(); let req = Request::new(Method::GET, "https://www.example.com/.images/foo.jpg"); - let response = - handle_asset_proxy_request(&settings, &services, req, "https://assets.example.com") - .await - .expect("should proxy asset request"); + let route = ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + ..Default::default() + }; + let response = handle_asset_proxy_request(&settings, &services, req, &route) + .await + .expect("should proxy asset request"); assert!( response.get_header(header::SET_COOKIE).is_none(), diff --git a/crates/trusted-server-core/src/publisher.rs b/crates/trusted-server-core/src/publisher.rs index 5bcef6941..6c95543c5 100644 --- a/crates/trusted-server-core/src/publisher.rs +++ b/crates/trusted-server-core/src/publisher.rs @@ -514,15 +514,17 @@ pub fn handle_publisher_request( settings.proxy.certificate_check, )?; let origin_host = settings.publisher.origin_host(); + let origin_host_header = settings.publisher.origin_host_header_value(); log::debug!( - "Proxying to dynamic backend: {} (from {})", + "Proxying to dynamic backend: {} (from {}, Host: {})", backend_name, - settings.publisher.origin_url + settings.publisher.origin_url, + origin_host_header ); // Only advertise encodings the rewrite pipeline can decode and re-encode. restrict_accept_encoding(&mut req); - req.set_header("host", &origin_host); + req.set_header("host", &origin_host_header); let mut response = req .send(&backend_name) diff --git a/crates/trusted-server-core/src/settings.rs b/crates/trusted-server-core/src/settings.rs index a515c6f58..8a9e83762 100644 --- a/crates/trusted-server-core/src/settings.rs +++ b/crates/trusted-server-core/src/settings.rs @@ -24,6 +24,9 @@ pub struct Publisher { pub cookie_domain: String, #[validate(custom(function = validate_no_trailing_slash))] pub origin_url: String, + /// Optional upstream Host header value used when connecting to an origin + /// whose routing host differs from the backend host. + pub origin_host_header: Option, /// Secret used to encrypt/decrypt proxied URLs in `/first-party/proxy`. /// Keep this secret stable to allow existing links to decode. #[validate(custom(function = validate_redacted_not_empty))] @@ -43,6 +46,41 @@ impl Publisher { .any(|p| p.eq_ignore_ascii_case(proxy_secret)) } + fn normalize(&mut self) { + self.origin_host_header = self + .origin_host_header + .take() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()); + } + + /// Eagerly validate runtime-only publisher configuration. + /// + /// # Errors + /// + /// Returns a configuration error if the configured origin Host header is invalid. + pub fn prepare_runtime(&self) -> Result<(), Report> { + if let Some(host_header) = &self.origin_host_header { + validate_host_header_value(host_header).map_err(|err| { + Report::new(TrustedServerError::Configuration { + message: format!( + "publisher.origin_host_header `{host_header}` is invalid: {err}" + ), + }) + })?; + } + + Ok(()) + } + + /// Returns the upstream Host header to send to the publisher origin. + #[must_use] + pub fn origin_host_header_value(&self) -> String { + self.origin_host_header + .clone() + .unwrap_or_else(|| self.origin_host()) + } + /// Extracts the host (including port if present) from the `origin_url`. /// /// # Examples @@ -54,6 +92,7 @@ impl Publisher { /// domain: "example.com".to_string(), /// cookie_domain: ".example.com".to_string(), /// origin_url: "https://origin.example.com:8080".to_string(), + /// origin_host_header: None, /// proxy_secret: Redacted::new("proxy-secret".to_string()), /// }; /// assert_eq!(publisher.origin_host(), "origin.example.com:8080"); @@ -337,19 +376,97 @@ fn default_request_signing_enabled() -> bool { pub struct ProxyAssetRoute { pub prefix: String, pub origin_url: String, + /// Optional regex matched against the incoming request path before proxying. + pub path_pattern: Option, + /// Optional regex replacement used with [`Self::path_pattern`] to build the upstream path. + pub target_path: Option, } impl ProxyAssetRoute { fn normalize(&mut self) { self.prefix = self.prefix.trim().to_string(); self.origin_url = self.origin_url.trim().to_string(); + self.path_pattern = self + .path_pattern + .take() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()); + self.target_path = self + .target_path + .take() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()); + } + + fn compiled_path_pattern(&self) -> Result, Report> { + let Some(pattern) = self.path_pattern.as_deref() else { + return Ok(None); + }; + + Regex::new(pattern).map(Some).map_err(|err| { + Report::new(TrustedServerError::Configuration { + message: format!( + "proxy.asset_routes path_pattern `{pattern}` failed to compile: {err}" + ), + }) + }) + } + + /// Rewrite a matched request path to the configured upstream target path. + /// + /// # Errors + /// + /// Returns a proxy/configuration error if the rewrite is incomplete, does not + /// match the request path, or produces a path that does not start with `/`. + pub fn target_path_for(&self, path: &str) -> Result> { + match (&self.path_pattern, &self.target_path) { + (None, None) => Ok(path.to_string()), + (Some(_), Some(target_path)) => { + let regex = self.compiled_path_pattern()?.ok_or_else(|| { + Report::new(TrustedServerError::Configuration { + message: format!( + "proxy.asset_routes prefix `{}` has a target_path without path_pattern", + self.prefix + ), + }) + })?; + + if !regex.is_match(path) { + return Err(Report::new(TrustedServerError::Proxy { + message: format!( + "asset path `{path}` matched prefix `{}` but did not match path_pattern", + self.prefix + ), + })); + } + + let rewritten = regex.replace(path, target_path.as_str()).into_owned(); + if !rewritten.starts_with('/') { + return Err(Report::new(TrustedServerError::Configuration { + message: format!( + "proxy.asset_routes prefix `{}` rewrote `{path}` to `{rewritten}`, which must start with '/'", + self.prefix + ), + })); + } + + Ok(rewritten) + } + _ => Err(Report::new(TrustedServerError::Configuration { + message: format!( + "proxy.asset_routes prefix `{}` must configure path_pattern and target_path together", + self.prefix + ), + })), + } } /// Eagerly validate runtime-only asset-route configuration. /// /// # Errors /// - /// Returns a configuration error if the asset-route prefix or origin URL is invalid. + /// Returns a configuration error if the asset-route prefix, origin URL, or + /// path rewrite settings are invalid. pub fn prepare_runtime(&self) -> Result<(), Report> { validate_asset_route_prefix(&self.prefix).map_err(|err| { Report::new(TrustedServerError::Configuration { @@ -369,7 +486,19 @@ impl ProxyAssetRoute { }) })?; - Ok(()) + match (&self.path_pattern, &self.target_path) { + (None, None) | (Some(_), Some(_)) => {} + _ => { + return Err(Report::new(TrustedServerError::Configuration { + message: format!( + "proxy.asset_routes prefix `{}` must configure path_pattern and target_path together", + self.prefix + ), + })); + } + } + + self.compiled_path_pattern().map(|_| ()) } } @@ -531,6 +660,7 @@ impl Settings { message: "Failed to deserialize TOML configuration".to_string(), })?; + settings.publisher.normalize(); settings.proxy.normalize(); settings.consent.validate(); settings.prepare_runtime()?; @@ -569,6 +699,7 @@ impl Settings { })?; settings.integrations.normalize(); + settings.publisher.normalize(); settings.proxy.normalize(); settings.consent.validate(); @@ -590,6 +721,7 @@ impl Settings { /// /// Returns a configuration error if any cached runtime artifact cannot be prepared. pub fn prepare_runtime(&self) -> Result<(), Report> { + self.publisher.prepare_runtime()?; self.proxy.prepare_runtime()?; for handler in &self.handlers { @@ -726,6 +858,18 @@ fn validate_redacted_not_empty(value: &Redacted) -> Result<(), Validatio Ok(()) } +fn validate_host_header_value(value: &str) -> Result<(), ValidationError> { + if value.is_empty() || value.contains(['\0', '\n', '\r']) { + let mut err = ValidationError::new("invalid_host_header"); + err.add_param("value".into(), &value); + err.message = + Some("host header must be non-empty and must not contain control characters".into()); + return Err(err); + } + + Ok(()) +} + fn validate_asset_route_prefix(value: &str) -> Result<(), ValidationError> { if !value.starts_with('/') { let mut err = ValidationError::new("invalid_prefix"); @@ -1363,6 +1507,7 @@ mod tests { domain: "example.com".to_string(), cookie_domain: ".example.com".to_string(), origin_url: "https://origin.example.com:8080".to_string(), + origin_host_header: None, proxy_secret: Redacted::new("test-secret".to_string()), }; assert_eq!(publisher.origin_host(), "origin.example.com:8080"); @@ -1372,6 +1517,7 @@ mod tests { domain: "example.com".to_string(), cookie_domain: ".example.com".to_string(), origin_url: "https://origin.example.com".to_string(), + origin_host_header: None, proxy_secret: Redacted::new("test-secret".to_string()), }; assert_eq!(publisher.origin_host(), "origin.example.com"); @@ -1381,6 +1527,7 @@ mod tests { domain: "example.com".to_string(), cookie_domain: ".example.com".to_string(), origin_url: "http://localhost:9090".to_string(), + origin_host_header: None, proxy_secret: Redacted::new("test-secret".to_string()), }; assert_eq!(publisher.origin_host(), "localhost:9090"); @@ -1390,6 +1537,7 @@ mod tests { domain: "example.com".to_string(), cookie_domain: ".example.com".to_string(), origin_url: "localhost:9090".to_string(), + origin_host_header: None, proxy_secret: Redacted::new("test-secret".to_string()), }; assert_eq!(publisher.origin_host(), "localhost:9090"); @@ -1399,6 +1547,7 @@ mod tests { domain: "example.com".to_string(), cookie_domain: ".example.com".to_string(), origin_url: "http://192.168.1.1:8080".to_string(), + origin_host_header: None, proxy_secret: Redacted::new("test-secret".to_string()), }; assert_eq!(publisher.origin_host(), "192.168.1.1:8080"); @@ -1408,11 +1557,47 @@ mod tests { domain: "example.com".to_string(), cookie_domain: ".example.com".to_string(), origin_url: "http://[::1]:8080".to_string(), + origin_host_header: None, proxy_secret: Redacted::new("test-secret".to_string()), }; assert_eq!(publisher.origin_host(), "[::1]:8080"); } + #[test] + fn publisher_origin_host_header_defaults_to_origin_host() { + let publisher = Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_url: "https://origin.example.com".to_string(), + origin_host_header: None, + proxy_secret: Redacted::new("test-secret".to_string()), + }; + + assert_eq!( + publisher.origin_host_header_value(), + "origin.example.com", + "should preserve existing Host header behavior by default" + ); + } + + #[test] + fn publisher_origin_host_header_uses_configured_value() { + let mut publisher = Publisher { + domain: "autoblog.com".to_string(), + cookie_domain: ".autoblog.com".to_string(), + origin_url: "https://raven-public.prod.saymedia.com".to_string(), + origin_host_header: Some(" autoblog.com ".to_string()), + proxy_secret: Redacted::new("test-secret".to_string()), + }; + publisher.normalize(); + + assert_eq!( + publisher.origin_host_header_value(), + "autoblog.com", + "should use the normalized configured upstream Host header" + ); + } + #[test] fn test_integration_settings_from_env() { use crate::integrations::testlight::TestlightConfig; @@ -1873,6 +2058,7 @@ mod tests { asset_routes: vec![ProxyAssetRoute { prefix: " /.images/ ".to_string(), origin_url: " https://assets.example.com ".to_string(), + ..Default::default() }], }; proxy.normalize(); @@ -1886,6 +2072,102 @@ mod tests { ); } + #[test] + fn proxy_normalize_trims_asset_route_rewrite_fields() { + let mut proxy = Proxy { + certificate_check: true, + allowed_domains: vec![], + asset_routes: vec![ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + path_pattern: Some(" ^/(.*)$ ".to_string()), + target_path: Some(" /rewritten/$1 ".to_string()), + }], + }; + proxy.normalize(); + + assert_eq!( + proxy.asset_routes[0].path_pattern.as_deref(), + Some("^/(.*)$"), + "should trim asset-route path_pattern" + ); + assert_eq!( + proxy.asset_routes[0].target_path.as_deref(), + Some("/rewritten/$1"), + "should trim asset-route target_path" + ); + } + + #[test] + fn proxy_asset_route_rewrite_fields_parse_from_toml() { + let toml_str = crate_test_settings_str() + + r#" + [proxy] + + [[proxy.asset_routes]] + prefix = "/.image/" + origin_url = "https://assets.example.com" + path_pattern = "^/\\.image/(.*)/[^/]+\\.([^/.]+)$" + target_path = "/image/upload/$1.$2" + "#; + let settings = Settings::from_toml(&toml_str).expect("should parse asset route rewrite"); + let route = settings + .asset_route_for_path("/.image/options/id/example.jpg") + .expect("should match configured asset route"); + + assert_eq!( + route.path_pattern.as_deref(), + Some(r"^/\.image/(.*)/[^/]+\.([^/.]+)$"), + "should preserve the configured rewrite pattern" + ); + assert_eq!( + route.target_path.as_deref(), + Some("/image/upload/$1.$2"), + "should preserve the configured replacement" + ); + } + + #[test] + fn proxy_asset_route_validation_rejects_incomplete_rewrite() { + let toml_str = crate_test_settings_str() + + r#" + [proxy] + + [[proxy.asset_routes]] + prefix = "/.image/" + origin_url = "https://assets.example.com" + path_pattern = "^/\\.image/(.*)$" + "#; + let err = Settings::from_toml(&toml_str) + .expect_err("should reject incomplete asset route rewrite"); + + assert!( + format!("{err:?}").contains("must configure path_pattern and target_path together"), + "should mention the incomplete rewrite configuration: {err:?}" + ); + } + + #[test] + fn proxy_asset_route_validation_rejects_invalid_path_pattern() { + let toml_str = crate_test_settings_str() + + r#" + [proxy] + + [[proxy.asset_routes]] + prefix = "/.image/" + origin_url = "https://assets.example.com" + path_pattern = "[" + target_path = "/image/upload/$1" + "#; + let err = Settings::from_toml(&toml_str) + .expect_err("should reject invalid asset route path_pattern"); + + assert!( + format!("{err:?}").contains("failed to compile"), + "should mention the invalid regex: {err:?}" + ); + } + #[test] fn proxy_asset_route_for_path_prefers_longest_prefix() { let proxy = Proxy { @@ -1895,10 +2177,12 @@ mod tests { ProxyAssetRoute { prefix: "/.images/".to_string(), origin_url: "https://a.example.com".to_string(), + ..Default::default() }, ProxyAssetRoute { prefix: "/.images/special/".to_string(), origin_url: "https://b.example.com".to_string(), + ..Default::default() }, ], }; @@ -1921,10 +2205,12 @@ mod tests { ProxyAssetRoute { prefix: "/.images/".to_string(), origin_url: "https://first.example.com".to_string(), + ..Default::default() }, ProxyAssetRoute { prefix: "/.images/".to_string(), origin_url: "https://second.example.com".to_string(), + ..Default::default() }, ], }; diff --git a/trusted-server.toml b/trusted-server.toml index a3a2803e5..751da28e2 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -12,6 +12,9 @@ password = "changeme" domain = "test-publisher.com" cookie_domain = ".test-publisher.com" origin_url = "https://origin.test-publisher.com" +# Optional: override the upstream Host header while still connecting to origin_url. +# Useful when an origin routes sites by Host but the backend DNS name differs. +# origin_host_header = "test-publisher.com" proxy_secret = "change-me-proxy-secret" [edge_cookie] @@ -147,12 +150,28 @@ rewrite_script = true # Configure first-party asset paths that should proxy to a different backend origin. # Matching is path-prefix-based and the longest matching prefix wins. # Only GET/HEAD requests participate. Built-in and integration routes still take precedence. -# Trusted Server preserves the incoming path and query string and only swaps the origin. +# Trusted Server preserves the incoming query string. By default it also preserves +# the incoming path, but path_pattern/target_path can generically rewrite paths +# before sending them upstream. # # [[proxy.asset_routes]] # prefix = "/.images/" # origin_url = "https://some.fastly-service.com" # +# Example: Cloudinary-style first-party image path rewrite. +# [[proxy.asset_routes]] +# prefix = "/.image/" +# origin_url = "https://thearenagroup-prod-res.cloudinary.com" +# path_pattern = "^/\\.image/(.*)/[^/]+\\.([^/.]+)$" +# target_path = "/image/upload/$1.$2" +# +# Example: Raven shared static assets stored under an upstream /_network prefix. +# [[proxy.asset_routes]] +# prefix = "/_next/static/" +# origin_url = "http://thearenagroup-raven-prod-public-assets.s3-website-us-west-2.amazonaws.com" +# path_pattern = "^(.*)$" +# target_path = "/_network$1" +# # Restrict redirect destinations for the first-party proxy to an explicit domain allowlist. # Supports exact match ("example.com") and subdomain wildcard prefix ("*.example.com"). # Wildcard prefix also matches the apex domain ("*.example.com" matches "example.com"). From 126e7fe8862a9728ee4c306e9b5e9b149e350be7 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 4 May 2026 13:57:58 -0500 Subject: [PATCH 5/7] Honor publisher origin host overrides --- crates/trusted-server-core/src/backend.rs | 77 ++++++++++++++++++++- crates/trusted-server-core/src/publisher.rs | 8 ++- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/crates/trusted-server-core/src/backend.rs b/crates/trusted-server-core/src/backend.rs index 468a3f830..145096167 100644 --- a/crates/trusted-server-core/src/backend.rs +++ b/crates/trusted-server-core/src/backend.rs @@ -46,6 +46,7 @@ pub struct BackendConfig<'a> { port: Option, certificate_check: bool, first_byte_timeout: Duration, + override_host: Option<&'a str>, } impl<'a> BackendConfig<'a> { @@ -61,6 +62,7 @@ impl<'a> BackendConfig<'a> { port: None, certificate_check: true, first_byte_timeout: DEFAULT_FIRST_BYTE_TIMEOUT, + override_host: None, } } @@ -79,6 +81,14 @@ impl<'a> BackendConfig<'a> { self } + /// Override the Host header sent upstream while keeping the backend target, + /// TLS SNI, and certificate verification tied to [`Self::host`]. + #[must_use] + pub fn override_host(mut self, override_host: Option<&'a str>) -> Self { + self.override_host = override_host; + self + } + /// Set the maximum time to wait for the first byte of the response. /// /// Defaults to 15 seconds. For latency-sensitive paths like auction @@ -109,6 +119,14 @@ impl<'a> BackendConfig<'a> { message: "host contains control characters".to_string(), })); } + if self + .override_host + .is_some_and(|host| host.is_empty() || host.chars().any(char::is_control)) + { + return Err(Report::new(TrustedServerError::Proxy { + message: "override host is empty or contains control characters".to_string(), + })); + } if self.scheme.chars().any(char::is_control) { return Err(Report::new(TrustedServerError::Proxy { message: "scheme contains control characters".to_string(), @@ -125,11 +143,17 @@ impl<'a> BackendConfig<'a> { } else { "_nocert" }; + let override_host_suffix = self + .override_host + .filter(|host| !host.is_empty()) + .map(|host| format!("_oh_{}", host.replace(['.', ':'], "_"))) + .unwrap_or_default(); let timeout_ms = self.first_byte_timeout.as_millis(); let backend_name = format!( - "backend_{}{}_t{}", + "backend_{}{}{}_t{}", name_base.replace(['.', ':'], "_"), cert_suffix, + override_host_suffix, timeout_ms ); @@ -165,11 +189,12 @@ impl<'a> BackendConfig<'a> { let host_with_port = format!("{}:{}", self.host, target_port); - let host_header = compute_host_header(self.scheme, self.host, target_port); + let default_host_header = compute_host_header(self.scheme, self.host, target_port); + let host_header = self.override_host.unwrap_or(&default_host_header); // Target base is host[:port]; SSL is enabled only for https scheme let mut builder = Backend::builder(&backend_name, &host_with_port) - .override_host(&host_header) + .override_host(host_header) .connect_timeout(Duration::from_secs(1)) .first_byte_timeout(self.first_byte_timeout) .between_bytes_timeout(Duration::from_secs(10)); @@ -278,6 +303,26 @@ impl<'a> BackendConfig<'a> { origin_url: &str, certificate_check: bool, first_byte_timeout: Duration, + ) -> Result> { + Self::from_url_with_first_byte_timeout_and_override_host( + origin_url, + certificate_check, + first_byte_timeout, + None, + ) + } + + /// Parse an origin URL and ensure a dynamic backend with an optional upstream Host override. + /// + /// # Errors + /// + /// Returns an error if the URL cannot be parsed or lacks a host, or if + /// backend creation fails. + pub fn from_url_with_first_byte_timeout_and_override_host( + origin_url: &str, + certificate_check: bool, + first_byte_timeout: Duration, + override_host: Option<&str>, ) -> Result> { let (scheme, host, port) = Self::parse_origin(origin_url)?; @@ -285,6 +330,7 @@ impl<'a> BackendConfig<'a> { .port(port) .certificate_check(certificate_check) .first_byte_timeout(first_byte_timeout) + .override_host(override_host) .ensure() } @@ -400,6 +446,31 @@ mod tests { assert_eq!(name, "backend_http_example_org_80_t15000"); } + #[test] + fn override_host_changes_backend_name() { + let (name, _) = BackendConfig::new("https", "raven-public.prod.saymedia.com") + .override_host(Some("www.autoblog.com")) + .compute_name() + .expect("should compute name with Host override"); + + assert_eq!( + name, "backend_https_raven-public_prod_saymedia_com_443_oh_www_autoblog_com_t15000", + "should isolate dynamic backends with different Host overrides" + ); + } + + #[test] + fn error_on_override_host_with_control_characters() { + let err = BackendConfig::new("https", "origin.example.com") + .override_host(Some("www.example.com\nINFO fake log entry")) + .predict_name() + .expect_err("should reject override host containing newline"); + assert!( + err.to_string().contains("override host"), + "should report invalid override host in error message" + ); + } + #[test] fn error_on_host_with_control_characters() { let err = BackendConfig::new("https", "evil.com\nINFO fake log entry") diff --git a/crates/trusted-server-core/src/publisher.rs b/crates/trusted-server-core/src/publisher.rs index 6c95543c5..c32f96296 100644 --- a/crates/trusted-server-core/src/publisher.rs +++ b/crates/trusted-server-core/src/publisher.rs @@ -509,12 +509,14 @@ pub fn handle_publisher_request( let ec_allowed = allows_ec_creation(&consent_context); log::debug!("Proxy ec_allowed: {}", ec_allowed); - let backend_name = BackendConfig::from_url( + let origin_host = settings.publisher.origin_host(); + let origin_host_header = settings.publisher.origin_host_header_value(); + let backend_name = BackendConfig::from_url_with_first_byte_timeout_and_override_host( &settings.publisher.origin_url, settings.proxy.certificate_check, + crate::backend::DEFAULT_FIRST_BYTE_TIMEOUT, + Some(&origin_host_header), )?; - let origin_host = settings.publisher.origin_host(); - let origin_host_header = settings.publisher.origin_host_header_value(); log::debug!( "Proxying to dynamic backend: {} (from {}, Host: {})", From 1efc12deaa7b186c44283b891069fba28f5883e6 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 4 May 2026 14:10:30 -0500 Subject: [PATCH 6/7] Rewrite publisher public origin URLs --- crates/trusted-server-core/src/publisher.rs | 14 +++--- crates/trusted-server-core/src/settings.rs | 55 +++++++++++++++++++++ 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/crates/trusted-server-core/src/publisher.rs b/crates/trusted-server-core/src/publisher.rs index c32f96296..6c9f56efc 100644 --- a/crates/trusted-server-core/src/publisher.rs +++ b/crates/trusted-server-core/src/publisher.rs @@ -509,8 +509,8 @@ pub fn handle_publisher_request( let ec_allowed = allows_ec_creation(&consent_context); log::debug!("Proxy ec_allowed: {}", ec_allowed); - let origin_host = settings.publisher.origin_host(); let origin_host_header = settings.publisher.origin_host_header_value(); + let origin_rewrite_url = settings.publisher.origin_rewrite_url(); let backend_name = BackendConfig::from_url_with_first_byte_timeout_and_override_host( &settings.publisher.origin_url, settings.proxy.certificate_check, @@ -611,7 +611,7 @@ pub fn handle_publisher_request( ResponseRoute::Stream => { log::debug!( "Streaming response - Content-Type: {}, Content-Encoding: {}, Request Host: {}, Origin Host: {}", - content_type, content_encoding, request_host, origin_host + content_type, content_encoding, request_host, origin_host_header ); let body = response.take_body(); @@ -622,8 +622,8 @@ pub fn handle_publisher_request( body, params: OwnedProcessResponseParams { content_encoding, - origin_host, - origin_url: settings.publisher.origin_url.clone(), + origin_host: origin_host_header, + origin_url: origin_rewrite_url, request_host: request_host.to_string(), request_scheme: request_scheme.to_string(), content_type, @@ -633,14 +633,14 @@ pub fn handle_publisher_request( ResponseRoute::BufferedProcessed => { log::debug!( "Buffered response - Content-Type: {}, Content-Encoding: {}, Request Host: {}, Origin Host: {}", - content_type, content_encoding, request_host, origin_host + content_type, content_encoding, request_host, origin_host_header ); let body = response.take_body(); let params = ProcessResponseParams { content_encoding: &content_encoding, - origin_host: &origin_host, - origin_url: &settings.publisher.origin_url, + origin_host: &origin_host_header, + origin_url: &origin_rewrite_url, request_host, request_scheme, settings, diff --git a/crates/trusted-server-core/src/settings.rs b/crates/trusted-server-core/src/settings.rs index 8a9e83762..4785aaa42 100644 --- a/crates/trusted-server-core/src/settings.rs +++ b/crates/trusted-server-core/src/settings.rs @@ -81,6 +81,26 @@ impl Publisher { .unwrap_or_else(|| self.origin_host()) } + /// Returns the public origin URL whose URLs should be rewritten to the request host. + /// + /// When `origin_host_header` is configured, the backend connection target + /// (`origin_url`) may be an internal routing host while page content still + /// references the public origin host. In that case, rewrite against the + /// configured Host header using the origin URL's scheme. + #[must_use] + pub fn origin_rewrite_url(&self) -> String { + let Some(host_header) = self.origin_host_header.as_deref() else { + return self.origin_url.clone(); + }; + + let scheme = Url::parse(&self.origin_url) + .ok() + .map(|url| url.scheme().to_string()) + .unwrap_or_else(|| "https".to_string()); + + format!("{scheme}://{host_header}") + } + /// Extracts the host (including port if present) from the `origin_url`. /// /// # Examples @@ -1598,6 +1618,41 @@ mod tests { ); } + #[test] + fn publisher_origin_rewrite_url_uses_configured_host_with_origin_scheme() { + let mut publisher = Publisher { + domain: "autoblog.com".to_string(), + cookie_domain: ".autoblog.com".to_string(), + origin_url: "https://raven-public.prod.saymedia.com".to_string(), + origin_host_header: Some("www.autoblog.com".to_string()), + proxy_secret: Redacted::new("test-secret".to_string()), + }; + publisher.normalize(); + + assert_eq!( + publisher.origin_rewrite_url(), + "https://www.autoblog.com", + "should rewrite public-origin URLs instead of backend routing host URLs" + ); + } + + #[test] + fn publisher_origin_rewrite_url_defaults_to_origin_url_without_host_override() { + let publisher = Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_url: "https://origin.example.com".to_string(), + origin_host_header: None, + proxy_secret: Redacted::new("test-secret".to_string()), + }; + + assert_eq!( + publisher.origin_rewrite_url(), + "https://origin.example.com", + "should preserve existing rewrite behavior without a Host override" + ); + } + #[test] fn test_integration_settings_from_env() { use crate::integrations::testlight::TestlightConfig; From b2edefe21a631c871826bddfbb2ca4c4e238f19c Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 5 May 2026 07:42:03 -0500 Subject: [PATCH 7/7] Address asset proxy review feedback --- .../trusted-server-adapter-fastly/src/main.rs | 7 +- .../src/route_tests.rs | 339 +++++++++++++++++- .../src/platform/test_support.rs | 48 ++- crates/trusted-server-core/src/proxy.rs | 83 ++--- crates/trusted-server-core/src/settings.rs | 88 ++++- trusted-server.toml | 1 + 6 files changed, 495 insertions(+), 71 deletions(-) diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 46c8df32f..afad18489 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -150,10 +150,9 @@ async fn route_request( let path = req.get_path().to_string(); let method = req.get_method().clone(); - let matched_asset_route = match &method { - &Method::GET | &Method::HEAD => settings.asset_route_for_path(&path), - _ => None, - }; + let matched_asset_route = matches!(method, Method::GET | Method::HEAD) + .then(|| settings.asset_route_for_path(&path)) + .flatten(); // Match known routes and handle them let result = match (method, path.as_str()) { diff --git a/crates/trusted-server-adapter-fastly/src/route_tests.rs b/crates/trusted-server-adapter-fastly/src/route_tests.rs index 200a99986..64f3cec9c 100644 --- a/crates/trusted-server-adapter-fastly/src/route_tests.rs +++ b/crates/trusted-server-adapter-fastly/src/route_tests.rs @@ -1,9 +1,11 @@ use std::net::IpAddr; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; +use edgezero_core::body::Body as EdgeBody; +use edgezero_core::http::response_builder as edge_response_builder; use edgezero_core::key_value_store::NoopKvStore; use error_stack::Report; -use fastly::http::StatusCode; +use fastly::http::{header, Method, StatusCode}; use fastly::Request; use trusted_server_core::auction::build_orchestrator; use trusted_server_core::integrations::IntegrationRegistry; @@ -85,6 +87,92 @@ impl PlatformBackend for NoopBackend { struct NoopHttpClient; +struct RecordingHttpClient { + calls: Mutex>, + response_status: StatusCode, + response_headers: Vec<(String, String)>, +} + +impl RecordingHttpClient { + fn new(response_status: StatusCode) -> Self { + Self { + calls: Mutex::new(Vec::new()), + response_status, + response_headers: Vec::new(), + } + } + + fn with_response_headers( + mut self, + headers: Vec<(impl Into, impl Into)>, + ) -> Self { + self.response_headers = headers + .into_iter() + .map(|(name, value)| (name.into(), value.into())) + .collect(); + self + } +} + +struct RecordedHttpCall { + method: Method, + uri: String, + backend_name: String, +} + +struct FixedBackend; + +impl PlatformBackend for FixedBackend { + fn predict_name(&self, spec: &PlatformBackendSpec) -> Result> { + Ok(format!("{}-{}", spec.scheme, spec.host)) + } + + fn ensure(&self, spec: &PlatformBackendSpec) -> Result> { + self.predict_name(spec) + } +} + +#[async_trait::async_trait(?Send)] +impl PlatformHttpClient for RecordingHttpClient { + async fn send( + &self, + request: PlatformHttpRequest, + ) -> Result> { + self.calls + .lock() + .expect("should lock calls") + .push(RecordedHttpCall { + method: request.request.method().clone(), + uri: request.request.uri().to_string(), + backend_name: request.backend_name, + }); + + let mut builder = edge_response_builder().status(self.response_status); + for (name, value) in &self.response_headers { + builder = builder.header(name, value); + } + let edge_response = builder + .body(EdgeBody::from(Vec::new())) + .map_err(|_| Report::new(PlatformError::HttpClient))?; + + Ok(PlatformResponse::new(edge_response)) + } + + async fn send_async( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } + + async fn select( + &self, + _pending_requests: Vec, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } +} + #[async_trait::async_trait(?Send)] impl PlatformHttpClient for NoopHttpClient { async fn send( @@ -163,12 +251,24 @@ fn create_test_settings() -> Settings { } fn test_runtime_services(req: &Request) -> RuntimeServices { + test_runtime_services_with_http_client( + req, + Arc::new(NoopBackend), + Arc::new(NoopHttpClient) as Arc, + ) +} + +fn test_runtime_services_with_http_client( + req: &Request, + backend: Arc, + http_client: Arc, +) -> RuntimeServices { RuntimeServices::builder() .config_store(Arc::new(StubJwksConfigStore)) .secret_store(Arc::new(NoopSecretStore)) .kv_store(Arc::new(NoopKvStore) as Arc) - .backend(Arc::new(NoopBackend)) - .http_client(Arc::new(NoopHttpClient)) + .backend(backend) + .http_client(http_client) .geo(Arc::new(NoopGeo)) .client_info(ClientInfo { client_ip: req.get_client_ip_addr(), @@ -279,6 +379,237 @@ fn asset_routes_bypass_publisher_consent_dependencies() { ); } +#[test] +fn asset_origin_failure_does_not_fall_back_to_publisher_origin() { + let mut settings = create_test_settings(); + settings.proxy.asset_routes = vec![ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + ..Default::default() + }]; + let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); + let integration_registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let req = Request::get("https://test.com/.images/logo.png"); + let http_client = Arc::new(RecordingHttpClient::new(StatusCode::OK)); + let services = test_runtime_services_with_http_client( + &req, + Arc::new(NoopBackend), + Arc::clone(&http_client) as Arc, + ); + + let resp = futures::executor::block_on(route_request( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + )) + .expect("should return an error response for failed asset origin"); + + assert_eq!( + resp.get_status(), + StatusCode::BAD_GATEWAY, + "should stop asset-origin backend failures at the asset proxy path" + ); + assert!( + http_client + .calls + .lock() + .expect("should lock recorded calls") + .is_empty(), + "should not invoke the publisher origin when asset backend registration fails" + ); +} + +#[test] +fn asset_routes_proxy_head_requests() { + let mut settings = create_test_settings(); + settings.proxy.asset_routes = vec![ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + ..Default::default() + }]; + let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); + let integration_registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let req = Request::head("https://test.com/.images/logo.png"); + let http_client = Arc::new(RecordingHttpClient::new(StatusCode::NO_CONTENT)); + let services = test_runtime_services_with_http_client( + &req, + Arc::new(FixedBackend), + Arc::clone(&http_client) as Arc, + ); + + let resp = futures::executor::block_on(route_request( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + )) + .expect("should route HEAD asset request"); + + assert_eq!( + resp.get_status(), + StatusCode::NO_CONTENT, + "should pass through asset-origin HEAD response status" + ); + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert_eq!(calls.len(), 1, "should send exactly one asset request"); + assert_eq!( + calls[0].method, + Method::HEAD, + "should forward HEAD upstream" + ); + assert!( + calls[0].backend_name.contains("assets.example.com"), + "should send to the asset backend, got {}", + calls[0].backend_name + ); +} + +#[test] +fn asset_routes_ignore_query_string_for_matching() { + let mut settings = create_test_settings(); + settings.proxy.asset_routes = vec![ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + ..Default::default() + }]; + let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); + let integration_registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let req = Request::get("https://test.com/.images/logo.png?auto=webp"); + let http_client = Arc::new(RecordingHttpClient::new(StatusCode::OK)); + let services = test_runtime_services_with_http_client( + &req, + Arc::new(FixedBackend), + Arc::clone(&http_client) as Arc, + ); + + let resp = futures::executor::block_on(route_request( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + )) + .expect("should route asset request with query string"); + + assert_eq!( + resp.get_status(), + StatusCode::OK, + "should match by path only" + ); + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert_eq!(calls.len(), 1, "should send exactly one asset request"); + assert!( + calls[0].uri.ends_with("/.images/logo.png?auto=webp"), + "should preserve query on the upstream asset request, got {}", + calls[0].uri + ); +} + +#[test] +fn asset_routes_pass_redirect_responses_through() { + let mut settings = create_test_settings(); + settings.proxy.asset_routes = vec![ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + ..Default::default() + }]; + let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); + let integration_registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let req = Request::get("https://test.com/.images/logo.png"); + let http_client = Arc::new( + RecordingHttpClient::new(StatusCode::FOUND).with_response_headers(vec![( + header::LOCATION.as_str(), + "https://cdn.example.com/logo.png", + )]), + ); + let services = test_runtime_services_with_http_client( + &req, + Arc::new(FixedBackend), + Arc::clone(&http_client) as Arc, + ); + + let resp = futures::executor::block_on(route_request( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + )) + .expect("should route redirecting asset request"); + + assert_eq!( + resp.get_status(), + StatusCode::FOUND, + "should pass redirect status through without following it" + ); + assert_eq!( + resp.get_header_str(header::LOCATION), + Some("https://cdn.example.com/logo.png"), + "should preserve asset-origin redirect location" + ); +} + +#[test] +fn asset_routes_skip_non_get_head_requests() { + let mut settings = create_test_settings(); + settings.proxy.asset_routes = vec![ProxyAssetRoute { + prefix: "/.images/".to_string(), + origin_url: "https://assets.example.com".to_string(), + ..Default::default() + }]; + let orchestrator = build_orchestrator(&settings).expect("should build auction orchestrator"); + let integration_registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let req = Request::post("https://test.com/.images/logo.png"); + let http_client = Arc::new(RecordingHttpClient::new(StatusCode::OK)); + let services = test_runtime_services_with_http_client( + &req, + Arc::new(FixedBackend), + Arc::clone(&http_client) as Arc, + ); + + let resp = futures::executor::block_on(route_request( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + )) + .expect("should route non-asset POST request"); + + assert_ne!( + resp.get_status(), + StatusCode::OK, + "should not return the asset-origin response for POST requests" + ); + assert!( + http_client + .calls + .lock() + .expect("should lock recorded calls") + .is_empty(), + "should not send POST requests through asset routing" + ); +} + #[test] fn built_in_routes_take_precedence_over_asset_routes() { let mut settings = create_test_settings(); diff --git a/crates/trusted-server-core/src/platform/test_support.rs b/crates/trusted-server-core/src/platform/test_support.rs index 0d67bd8a4..d6786b3b6 100644 --- a/crates/trusted-server-core/src/platform/test_support.rs +++ b/crates/trusted-server-core/src/platform/test_support.rs @@ -129,7 +129,7 @@ struct StubPendingResponse { /// Test stub for [`PlatformHttpClient`] that records call backend names and /// returns pre-queued canned responses for `send`, `send_async`, and `select`. /// -/// Responses are stored as `(status_code, body_bytes)` to remain [`Send`]. +/// Responses are stored as status/body/header parts to remain [`Send`]. /// [`PlatformResponse`] contains [`edgezero_core::body::Body`] which wraps a /// `LocalBoxStream` that is `!Send`, so it cannot be stored directly in a /// `Mutex` field. @@ -140,12 +140,17 @@ struct StubPendingResponse { /// sites. pub(crate) struct StubHttpClient { calls: Mutex>, - // (status_code, body_bytes) — kept Send by avoiding Body::Stream - responses: Mutex)>>, + responses: Mutex>, // Headers captured per send call, stored as (name, value) string pairs. request_headers: Mutex>>, } +struct StubHttpResponse { + status: u16, + body: Vec, + headers: Vec<(String, String)>, +} + impl StubHttpClient { pub fn new() -> Self { Self { @@ -157,10 +162,28 @@ impl StubHttpClient { /// Queue a canned response by status code and body bytes. pub fn push_response(&self, status: u16, body: Vec) { + self.push_response_with_headers(status, body, Vec::<(String, String)>::new()); + } + + /// Queue a canned response with headers. + pub fn push_response_with_headers( + &self, + status: u16, + body: Vec, + headers: Vec<(impl Into, impl Into)>, + ) { + let headers = headers + .into_iter() + .map(|(name, value)| (name.into(), value.into())) + .collect(); self.responses .lock() .expect("should lock responses") - .push_back((status, body)); + .push_back(StubHttpResponse { + status, + body, + headers, + }); } /// Return backend names recorded across all `send` calls, in order. @@ -207,16 +230,19 @@ impl PlatformHttpClient for StubHttpClient { .expect("should lock request_headers") .push(headers); - let (status, body_bytes) = self + let response = self .responses .lock() .expect("should lock responses") .pop_front() .ok_or_else(|| Report::new(PlatformError::HttpClient))?; - let edge_response = edgezero_core::http::response_builder() - .status(status) - .body(edgezero_core::body::Body::from(body_bytes)) + let mut builder = edgezero_core::http::response_builder().status(response.status); + for (name, value) in response.headers { + builder = builder.header(name, value); + } + let edge_response = builder + .body(edgezero_core::body::Body::from(response.body)) .change_context(PlatformError::HttpClient)?; Ok(PlatformResponse::new(edge_response)) @@ -232,7 +258,7 @@ impl PlatformHttpClient for StubHttpClient { .expect("should lock calls") .push(backend_name.clone()); - let (status, body_bytes) = self + let response = self .responses .lock() .expect("should lock responses") @@ -241,8 +267,8 @@ impl PlatformHttpClient for StubHttpClient { let pending = StubPendingResponse { backend_name: backend_name.clone(), - status, - body: body_bytes, + status: response.status, + body: response.body, }; Ok(PlatformPendingRequest::new(pending).with_backend_name(backend_name)) } diff --git a/crates/trusted-server-core/src/proxy.rs b/crates/trusted-server-core/src/proxy.rs index a30f61f2f..9bd523220 100644 --- a/crates/trusted-server-core/src/proxy.rs +++ b/crates/trusted-server-core/src/proxy.rs @@ -531,7 +531,7 @@ fn build_asset_proxy_target_url( message: format!("Invalid asset origin_url: {}", route.origin_url), })?; - let scheme = target_url.scheme().to_ascii_lowercase(); + let scheme = target_url.scheme(); if scheme != "http" && scheme != "https" { return Err(Report::new(TrustedServerError::Proxy { message: format!("Unsupported asset origin_url scheme: {scheme}"), @@ -558,7 +558,7 @@ fn build_asset_proxy_target_url( fn asset_origin_host_header( target_url: &url::Url, ) -> Result> { - let scheme = target_url.scheme().to_ascii_lowercase(); + let scheme = target_url.scheme(); let host = target_url.host_str().ok_or_else(|| { Report::new(TrustedServerError::Proxy { message: "Missing host in asset target URL".to_string(), @@ -569,7 +569,7 @@ fn asset_origin_host_header( message: format!("Unsupported asset target URL scheme: {scheme}"), }) })?; - let host_header = if Some(resolved_port) == default_port_for_scheme(&scheme) { + let host_header = if Some(resolved_port) == default_port_for_scheme(scheme) { host.to_string() } else { format!("{host}:{resolved_port}") @@ -597,7 +597,7 @@ pub async fn handle_asset_proxy_request( ) -> Result> { let target_url = build_asset_proxy_target_url(route, req.get_path(), req.get_query_str().unwrap_or(""))?; - let scheme = target_url.scheme().to_ascii_lowercase(); + let scheme = target_url.scheme(); let host = target_url.host_str().ok_or_else(|| { Report::new(TrustedServerError::Proxy { message: "Missing host in asset target URL".to_string(), @@ -607,7 +607,7 @@ pub async fn handle_asset_proxy_request( let backend_name = services .backend() .ensure(&PlatformBackendSpec { - scheme, + scheme: scheme.to_string(), host: host.to_string(), port: target_url.port(), certificate_check: settings.proxy.certificate_check, @@ -655,9 +655,10 @@ pub async fn handle_asset_proxy_request( let mut response = platform_response_to_fastly(platform_resp)?; - // Asset origins must not be able to set first-party cookies on the - // publisher domain through this proxy path. + // Asset origins must not be able to set first-party cookies or publisher + // domain transport security policy through this proxy path. response.remove_header(header::SET_COOKIE); + response.remove_header(header::STRICT_TRANSPORT_SECURITY); Ok(response) } @@ -1427,7 +1428,9 @@ mod tests { use crate::constants::HEADER_ACCEPT; use crate::creative; use crate::error::{IntoHttpResponse, TrustedServerError}; - use crate::platform::test_support::{build_services_with_http_client, noop_services}; + use crate::platform::test_support::{ + build_services_with_http_client, noop_services, StubHttpClient, + }; use crate::platform::{ PlatformError, PlatformHttpClient, PlatformHttpRequest, PlatformPendingRequest, PlatformResponse, PlatformSelectResult, @@ -1449,10 +1452,6 @@ mod tests { /// `select` return `PlatformError::Unsupported`. struct StreamingResponseHttpClient; - struct StaticResponseHttpClient { - response_headers: Vec<(header::HeaderName, HeaderValue)>, - } - #[async_trait::async_trait(?Send)] impl PlatformHttpClient for StreamingResponseHttpClient { async fn send( @@ -1484,38 +1483,6 @@ mod tests { } } - #[async_trait::async_trait(?Send)] - impl PlatformHttpClient for StaticResponseHttpClient { - async fn send( - &self, - _request: PlatformHttpRequest, - ) -> Result> { - let mut builder = edge_response_builder().status(StatusCode::OK); - for (name, value) in &self.response_headers { - builder = builder.header(name.as_str(), value.as_bytes()); - } - let edge_response = builder - .body(EdgeBody::from(Vec::new())) - .expect("should build static test response"); - - Ok(PlatformResponse::new(edge_response).with_backend_name("stub-backend")) - } - - async fn send_async( - &self, - _request: PlatformHttpRequest, - ) -> Result> { - Err(Report::new(PlatformError::Unsupported)) - } - - async fn select( - &self, - _pending_requests: Vec, - ) -> Result> { - Err(Report::new(PlatformError::Unsupported)) - } - } - #[tokio::test] async fn proxy_missing_param_returns_400() { let settings = create_test_settings(); @@ -2507,16 +2474,24 @@ mod tests { } #[tokio::test] - async fn handle_asset_proxy_request_strips_set_cookie_from_response() { - let services = build_services_with_http_client(Arc::new(StaticResponseHttpClient { - response_headers: vec![ + async fn handle_asset_proxy_request_strips_unsafe_response_headers() { + let stub = Arc::new(StubHttpClient::new()); + stub.push_response_with_headers( + 200, + Vec::new(), + vec![ + (header::SET_COOKIE.as_str(), "asset=1; Path=/; Secure"), + (header::SET_COOKIE.as_str(), "other=2; Path=/; Secure"), ( - header::SET_COOKIE, - HeaderValue::from_static("asset=1; Path=/; Secure"), + header::STRICT_TRANSPORT_SECURITY.as_str(), + "max-age=31536000; includeSubDomains; preload", ), - (header::ETAG, HeaderValue::from_static("\"asset-etag\"")), + (header::ETAG.as_str(), "\"asset-etag\""), ], - }) as Arc); + ); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); let settings = create_test_settings(); let req = Request::new(Method::GET, "https://www.example.com/.images/foo.jpg"); @@ -2533,6 +2508,12 @@ mod tests { response.get_header(header::SET_COOKIE).is_none(), "should strip upstream Set-Cookie headers from asset responses" ); + assert!( + response + .get_header(header::STRICT_TRANSPORT_SECURITY) + .is_none(), + "should strip upstream HSTS headers from asset responses" + ); assert_eq!( response.get_header_str(header::ETAG), Some("\"asset-etag\""), diff --git a/crates/trusted-server-core/src/settings.rs b/crates/trusted-server-core/src/settings.rs index 4785aaa42..784c20d62 100644 --- a/crates/trusted-server-core/src/settings.rs +++ b/crates/trusted-server-core/src/settings.rs @@ -392,13 +392,27 @@ fn default_request_signing_enabled() -> bool { false } +/// A path-prefix asset route that proxies matched first-party requests to an alternate origin. #[derive(Debug, Default, Clone, Deserialize, Serialize)] pub struct ProxyAssetRoute { + /// Path prefix matched against the incoming request path. Must start with `/`. + /// + /// Matching uses string-prefix semantics, not path-segment semantics. Include + /// a trailing `/` unless you intentionally want `/static` to match paths such + /// as `/staticfile.js`. pub prefix: String, + /// Absolute `http` or `https` origin used for upstream requests. + /// + /// Only the scheme, host, and port are used. Any path or query configured on + /// this URL is rejected because the incoming request path/query, or the + /// configured rewrite result, replaces them at runtime. pub origin_url: String, /// Optional regex matched against the incoming request path before proxying. pub path_pattern: Option, /// Optional regex replacement used with [`Self::path_pattern`] to build the upstream path. + /// + /// Must be configured together with [`Self::path_pattern`] and must produce a + /// path that starts with `/`. pub target_path: Option, } @@ -607,6 +621,9 @@ impl Proxy { /// Eagerly validate runtime-only proxy settings artifacts. /// + /// Asset-route validation lives here so regex compilation and origin URL + /// semantic checks fail fast alongside other runtime-prepared settings. + /// /// # Errors /// /// Returns a configuration error if any configured asset route is invalid. @@ -865,7 +882,7 @@ fn validate_no_trailing_slash(value: &str) -> Result<(), ValidationError> { if value.ends_with('/') { let mut err = ValidationError::new("trailing_slash"); err.add_param("value".into(), &value); - err.message = Some("origin_url must not end with '/'".into()); + err.message = Some("origin_url must not include a trailing slash".into()); return Err(err); } Ok(()) @@ -926,6 +943,21 @@ fn validate_proxy_origin_url(value: &str) -> Result<(), ValidationError> { return Err(err); } + if !matches!(parsed.path(), "" | "/") { + let mut err = ValidationError::new("origin_url_has_path"); + err.add_param("value".into(), &value); + err.message = + Some("origin_url must not include a path; only scheme/host/port are used".into()); + return Err(err); + } + + if parsed.query().is_some() { + let mut err = ValidationError::new("origin_url_has_query"); + err.add_param("value".into(), &value); + err.message = Some("origin_url must not include a query string".into()); + return Err(err); + } + Ok(()) } @@ -2333,6 +2365,60 @@ mod tests { ); } + #[test] + fn proxy_asset_route_validation_rejects_origin_url_path() { + let toml_str = crate_test_settings_str() + + r#" + [proxy] + + [[proxy.asset_routes]] + prefix = "/.images/" + origin_url = "https://assets.example.com/api" + "#; + let err = Settings::from_toml(&toml_str) + .expect_err("should reject asset-route origin_url with path"); + assert!( + format!("{err:?}").contains("origin_url must not include a path"), + "should mention the origin_url path validation failure: {err:?}" + ); + } + + #[test] + fn proxy_asset_route_validation_rejects_origin_url_query() { + let toml_str = crate_test_settings_str() + + r#" + [proxy] + + [[proxy.asset_routes]] + prefix = "/.images/" + origin_url = "https://assets.example.com?token=abc" + "#; + let err = Settings::from_toml(&toml_str) + .expect_err("should reject asset-route origin_url with query"); + assert!( + format!("{err:?}").contains("origin_url must not include a query string"), + "should mention the origin_url query validation failure: {err:?}" + ); + } + + #[test] + fn proxy_asset_route_validation_accepts_origin_url_host_and_port() { + let toml_str = crate_test_settings_str() + + r#" + [proxy] + + [[proxy.asset_routes]] + prefix = "/.images/" + origin_url = "https://assets.example.com:8443" + "#; + let settings = + Settings::from_toml(&toml_str).expect("should accept asset-route origin host and port"); + assert_eq!( + settings.proxy.asset_routes[0].origin_url, "https://assets.example.com:8443", + "should preserve valid origin URL with non-standard port" + ); + } + #[test] fn proxy_normalize_applied_by_from_toml_and_env() { let toml_str = crate_test_settings_str() diff --git a/trusted-server.toml b/trusted-server.toml index 751da28e2..5c29228b8 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -149,6 +149,7 @@ rewrite_script = true # Configure first-party asset paths that should proxy to a different backend origin. # Matching is path-prefix-based and the longest matching prefix wins. +# Include a trailing / unless you intentionally want /static to also match paths such as /staticfile.js. # Only GET/HEAD requests participate. Built-in and integration routes still take precedence. # Trusted Server preserves the incoming query string. By default it also preserves # the incoming path, but path_pattern/target_path can generically rewrite paths