Skip to content

Commit 0c1cb3d

Browse files
authored
Return 503 on hitting distributor instance limits (#6387)
* Return 503 on hitting distributor client inflight request limit Signed-off-by: Anna Tran <trananna@amazon.com> * Return HTTP 503 on hitting distributor instance limits Signed-off-by: Anna Tran <trananna@amazon.com> --------- Signed-off-by: Anna Tran <trananna@amazon.com>
1 parent 53b0ff3 commit 0c1cb3d

File tree

3 files changed

+12
-15
lines changed

3 files changed

+12
-15
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
* [ENHANCEMENT] Distributor: Add new `cortex_distributor_inflight_client_requests` metric to track number of ingester client inflight requests. #6358
4343
* [ENHANCEMENT] Distributor: Expose `cortex_label_size_bytes` native histogram metric. #6372
4444
* [ENHANCEMENT] Add new option `-server.grpc_server-num-stream-workers` to configure the number of worker goroutines that should be used to process incoming streams. #6386
45+
* [ENHANCEMENT] Distributor: Return HTTP 5XX instead of HTTP 4XX when instance limits are hit. #6358
4546
* [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224
4647
* [BUGFIX] Ruler: Allow rule evaluation to complete during shutdown. #6326
4748
* [BUGFIX] Ring: update ring with new ip address when instance is lost, rejoins, but heartbeat is disabled #6271

pkg/distributor/distributor.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,6 @@ var (
5151
// Validation errors.
5252
errInvalidShardingStrategy = errors.New("invalid sharding strategy")
5353
errInvalidTenantShardSize = errors.New("invalid tenant shard size. The value must be greater than or equal to 0")
54-
55-
// Distributor instance limits errors.
56-
errTooManyInflightPushRequests = errors.New("too many inflight push requests in distributor")
57-
errMaxSamplesPushRateLimitReached = errors.New("distributor's samples push rate limit reached")
58-
errTooManyInflightClientRequests = errors.New("too many inflight ingester client requests in distributor")
5954
)
6055

6156
const (
@@ -668,19 +663,19 @@ func (d *Distributor) Push(ctx context.Context, req *cortexpb.WriteRequest) (*co
668663
d.incomingMetadata.WithLabelValues(userID).Add(float64(len(req.Metadata)))
669664

670665
if d.cfg.InstanceLimits.MaxInflightPushRequests > 0 && inflight > int64(d.cfg.InstanceLimits.MaxInflightPushRequests) {
671-
return nil, errTooManyInflightPushRequests
666+
return nil, httpgrpc.Errorf(http.StatusServiceUnavailable, "too many inflight push requests in distributor")
672667
}
673668

674669
if d.cfg.InstanceLimits.MaxIngestionRate > 0 {
675670
if rate := d.ingestionRate.Rate(); rate >= d.cfg.InstanceLimits.MaxIngestionRate {
676-
return nil, errMaxSamplesPushRateLimitReached
671+
return nil, httpgrpc.Errorf(http.StatusServiceUnavailable, "distributor's samples push rate limit reached")
677672
}
678673
}
679674

680675
// only reject requests at this stage to allow distributor to finish sending the current batch request to all ingesters
681676
// even if we've exceeded the MaxInflightClientRequests in the `doBatch`
682677
if d.cfg.InstanceLimits.MaxInflightClientRequests > 0 && d.inflightClientRequests.Load() > int64(d.cfg.InstanceLimits.MaxInflightClientRequests) {
683-
return nil, errTooManyInflightClientRequests
678+
return nil, httpgrpc.Errorf(http.StatusServiceUnavailable, "too many inflight ingester client requests in distributor")
684679
}
685680

686681
removeReplica := false

pkg/distributor/distributor_test.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ func TestDistributor_PushInstanceLimits(t *testing.T) {
840840
preInflight: 101,
841841
inflightLimit: 101,
842842
pushes: []testPush{
843-
{samples: 100, expectedError: errTooManyInflightPushRequests},
843+
{samples: 100, expectedError: httpgrpc.Errorf(http.StatusServiceUnavailable, "too many inflight push requests in distributor")},
844844
},
845845
},
846846
"below inflight client limit": {
@@ -863,7 +863,8 @@ func TestDistributor_PushInstanceLimits(t *testing.T) {
863863
preInflightClient: 103,
864864
inflightClientLimit: 101,
865865
pushes: []testPush{
866-
{samples: 100, expectedError: errTooManyInflightClientRequests},
866+
{samples: 100, expectedError: httpgrpc.Errorf(http.StatusServiceUnavailable,
867+
"too many inflight ingester client requests in distributor")},
867868
},
868869
},
869870
"below ingestion rate limit": {
@@ -892,7 +893,7 @@ func TestDistributor_PushInstanceLimits(t *testing.T) {
892893
ingestionRateLimit: 1000,
893894

894895
pushes: []testPush{
895-
{samples: 100, expectedError: errMaxSamplesPushRateLimitReached},
896+
{samples: 100, expectedError: httpgrpc.Errorf(http.StatusServiceUnavailable, "distributor's samples push rate limit reached")},
896897
{samples: 100, expectedError: nil},
897898
},
898899
},
@@ -902,10 +903,10 @@ func TestDistributor_PushInstanceLimits(t *testing.T) {
902903
ingestionRateLimit: 1000,
903904

904905
pushes: []testPush{
905-
{samples: 5000, expectedError: nil}, // after push, rate = 500 + 0.2*(5000-500) = 1400
906-
{samples: 5000, expectedError: errMaxSamplesPushRateLimitReached}, // after push, rate = 1400 + 0.2*(0 - 1400) = 1120
907-
{samples: 5000, expectedError: errMaxSamplesPushRateLimitReached}, // after push, rate = 1120 + 0.2*(0 - 1120) = 896
908-
{samples: 5000, expectedError: nil}, // 896 is below 1000, so this push succeeds, new rate = 896 + 0.2*(5000-896) = 1716.8
906+
{samples: 5000, expectedError: nil}, // after push, rate = 500 + 0.2*(5000-500) = 1400
907+
{samples: 5000, expectedError: httpgrpc.Errorf(http.StatusServiceUnavailable, "distributor's samples push rate limit reached")}, // after push, rate = 1400 + 0.2*(0 - 1400) = 1120
908+
{samples: 5000, expectedError: httpgrpc.Errorf(http.StatusServiceUnavailable, "distributor's samples push rate limit reached")}, // after push, rate = 1120 + 0.2*(0 - 1120) = 896
909+
{samples: 5000, expectedError: nil}, // 896 is below 1000, so this push succeeds, new rate = 896 + 0.2*(5000-896) = 1716.8
909910
},
910911
},
911912
}

0 commit comments

Comments
 (0)