Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion cmd/atenet/internal/router/xds.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import (
endpointgrpc "github.com/envoyproxy/go-control-plane/envoy/service/endpoint/v3"
listenergrpc "github.com/envoyproxy/go-control-plane/envoy/service/listener/v3"
routegrpc "github.com/envoyproxy/go-control-plane/envoy/service/route/v3"
typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
"github.com/envoyproxy/go-control-plane/pkg/cache/types"
cachev3 "github.com/envoyproxy/go-control-plane/pkg/cache/v3"
resourcev3 "github.com/envoyproxy/go-control-plane/pkg/resource/v3"
Expand Down Expand Up @@ -145,7 +146,6 @@ func (x *XdsServer) UpdateSnapshot() error {
resourcev3.RouteType: routes,
resourcev3.ListenerType: listeners,
})

if err != nil {
return fmt.Errorf("failed to build xDS Snapshot: %w", err)
}
Expand Down Expand Up @@ -264,6 +264,21 @@ func (x *XdsServer) buildDynamicForwardProxyCluster() *clusterv3.Cluster {
TypedConfig: clusterConfigAny,
},
},
CircuitBreakers: &clusterv3.CircuitBreakers{
Thresholds: []*clusterv3.CircuitBreakers_Thresholds{
{
Priority: corev3.RoutingPriority_DEFAULT,
RetryBudget: &clusterv3.CircuitBreakers_Thresholds_RetryBudget{
// Set to Envoy's default to scale retries with load.
BudgetPercent: &typev3.Percent{Value: 20.0},
// Floor for low-traffic periods, raised above the default
// of 3 so a burst of simultaneous first-request resumes
// is not throttled when overall load is low.
MinRetryConcurrency: wrapperspb.UInt32(20),
},
},
},
},
}
}

Expand All @@ -287,6 +302,22 @@ func (x *XdsServer) buildRoutes() *routev3.RouteConfiguration {
Cluster: "dynamic_forward_proxy_cluster",
},
Timeout: durationpb.New(10 * time.Second),
// A request can arrive at the router in the brief window
// after an actor is resumed but before its workload is
// accepting connections, or while a pooled upstream
// connection to a just-suspended actor is going stale.
// Either case surfaces as an upstream reset/connection
// failure before response headers. Retry these transient
// failures (with backoff) so the request lands once the
// listener is ready instead of returning a 503.
RetryPolicy: &routev3.RetryPolicy{
RetryOn: "reset,connect-failure",
NumRetries: wrapperspb.UInt32(5),
RetryBackOff: &routev3.RetryPolicy_RetryBackOff{
BaseInterval: durationpb.New(50 * time.Millisecond),
MaxInterval: durationpb.New(1 * time.Second),
},
},
},
},
},
Expand Down
23 changes: 23 additions & 0 deletions cmd/atenet/internal/router/xds_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,17 @@ func TestXdsServer_UpdateSnapshot(t *testing.T) {
if c.GetName() != "dynamic_forward_proxy_cluster" {
t.Errorf("Expected 'dynamic_forward_proxy_cluster', got %s", c.GetName())
}

// A retry budget must replace Envoy's static default of 3 concurrent
// retries so simultaneous first-request resumes are not throttled.
thresholds := c.GetCircuitBreakers().GetThresholds()
if len(thresholds) != 1 {
t.Fatalf("Expected 1 circuit-breaker threshold, got %d", len(thresholds))
}
budget := thresholds[0].GetRetryBudget()
if budget == nil {
t.Fatal("Expected a retry budget on the dynamic_forward_proxy cluster, got none")
}
}

// Verify Virtual Hosts generated inside Route configuration
Expand Down Expand Up @@ -116,6 +127,18 @@ func TestXdsServer_UpdateSnapshot(t *testing.T) {
if fallbackRoute.GetMatch().GetPrefix() != "/" {
t.Errorf("Expected path mapping prefix '/', got '%s'", fallbackRoute.GetMatch().GetPrefix())
}

// Transient upstream resets/connection failures during actor resume must
// be retried rather than surfaced as 503s.
retry := fallbackRoute.GetRoute().GetRetryPolicy()
if retry == nil {
t.Fatal("Expected a retry policy on the actor route, got none")
}
for _, on := range []string{"reset", "connect-failure"} {
if !strings.Contains(retry.GetRetryOn(), on) {
t.Errorf("Expected retry_on to include %q, got %q", on, retry.GetRetryOn())
}
}
}

// Verify listeners generated
Expand Down