Skip to content

Commit bc1b5be

Browse files
authored
Ruler: Add support for per-user external labels (#6340)
* Ruler: Add support for per-user external labels Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com> * Add more test cases Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com> * Update the cache before updating the rule manager Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com> --------- Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com>
1 parent 5e6907d commit bc1b5be

16 files changed

+380
-37
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* [FEATURE] Store Gateway: Add an in-memory chunk cache. #6245
1717
* [FEATURE] Chunk Cache: Support multi level cache and add metrics. #6249
1818
* [FEATURE] Distributor: Accept multiple HA Tracker pairs in the same request. #6256
19+
* [FEATURE] Ruler: Add support for per-user external labels #6340
1920
* [ENHANCEMENT] Ingester: Add metrics to track succeed/failed native histograms. #6370
2021
* [ENHANCEMENT] Query Frontend/Querier: Add an experimental flag `-querier.enable-promql-experimental-functions` to enable experimental promQL functions. #6355
2122
* [ENHANCEMENT] OTLP: Add `-distributor.otlp-max-recv-msg-size` flag to limit OTLP request size in bytes. #6333

docs/configuration/config-file-reference.md

+3
Original file line numberDiff line numberDiff line change
@@ -3546,6 +3546,9 @@ query_rejection:
35463546
# CLI flag: -ruler.query-offset
35473547
[ruler_query_offset: <duration> | default = 0s]
35483548

3549+
# external labels for alerting rules
3550+
[ruler_external_labels: <map of string (labelName) to string (labelValue)> | default = []]
3551+
35493552
# The default tenant's shard size when the shuffle-sharding strategy is used.
35503553
# Must be set when the store-gateway sharding is enabled with the
35513554
# shuffle-sharding strategy. When this setting is specified in the per-tenant

pkg/cortex/modules.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,8 @@ func (t *Cortex) initRuntimeConfig() (services.Service, error) {
154154
// no need to initialize module if load path is empty
155155
return nil, nil
156156
}
157-
t.Cfg.RuntimeConfig.Loader = loadRuntimeConfig
157+
runtimeConfigLoader := runtimeConfigLoader{cfg: t.Cfg}
158+
t.Cfg.RuntimeConfig.Loader = runtimeConfigLoader.load
158159

159160
// make sure to set default limits before we start loading configuration into memory
160161
validation.SetDefaultLimitsForYAMLUnmarshalling(t.Cfg.LimitsConfig)
@@ -612,14 +613,14 @@ func (t *Cortex) initRuler() (serv services.Service, err error) {
612613
}
613614

614615
managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Cfg.ExternalPusher, t.Cfg.ExternalQueryable, queryEngine, t.Overrides, metrics, prometheus.DefaultRegisterer)
615-
manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger)
616+
manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, t.Overrides, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger)
616617
} else {
617618
rulerRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "ruler"}, prometheus.DefaultRegisterer)
618619
// TODO: Consider wrapping logger to differentiate from querier module logger
619620
queryable, _, engine := querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, rulerRegisterer, util_log.Logger)
620621

621622
managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Distributor, queryable, engine, t.Overrides, metrics, prometheus.DefaultRegisterer)
622-
manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger)
623+
manager, err = ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, t.Overrides, managerFactory, metrics, prometheus.DefaultRegisterer, util_log.Logger)
623624
}
624625

625626
if err != nil {

pkg/cortex/runtime_config.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ func (l *runtimeConfigTenantLimits) AllByUserID() map[string]*validation.Limits
5858
return nil
5959
}
6060

61-
func loadRuntimeConfig(r io.Reader) (interface{}, error) {
61+
type runtimeConfigLoader struct {
62+
cfg Config
63+
}
64+
65+
func (l runtimeConfigLoader) load(r io.Reader) (interface{}, error) {
6266
var overrides = &RuntimeConfigValues{}
6367

6468
decoder := yaml.NewDecoder(r)
@@ -74,6 +78,12 @@ func loadRuntimeConfig(r io.Reader) (interface{}, error) {
7478
return nil, errMultipleDocuments
7579
}
7680

81+
for _, ul := range overrides.TenantLimits {
82+
if err := ul.Validate(l.cfg.Distributor.ShardByAllLabels); err != nil {
83+
return nil, err
84+
}
85+
}
86+
7787
return overrides, nil
7888
}
7989

pkg/cortex/runtime_config_test.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"github.com/stretchr/testify/assert"
88
"github.com/stretchr/testify/require"
99

10+
"github.com/cortexproject/cortex/pkg/distributor"
1011
"github.com/cortexproject/cortex/pkg/util/validation"
1112
)
1213

@@ -28,7 +29,8 @@ overrides:
2829
'1235': *id001
2930
'1236': *id001
3031
`)
31-
runtimeCfg, err := loadRuntimeConfig(yamlFile)
32+
loader := runtimeConfigLoader{cfg: Config{Distributor: distributor.Config{ShardByAllLabels: true}}}
33+
runtimeCfg, err := loader.load(yamlFile)
3234
require.NoError(t, err)
3335

3436
limits := validation.Limits{
@@ -51,7 +53,7 @@ func TestLoadRuntimeConfig_ShouldLoadEmptyFile(t *testing.T) {
5153
yamlFile := strings.NewReader(`
5254
# This is an empty YAML.
5355
`)
54-
actual, err := loadRuntimeConfig(yamlFile)
56+
actual, err := runtimeConfigLoader{}.load(yamlFile)
5557
require.NoError(t, err)
5658
assert.Equal(t, &RuntimeConfigValues{}, actual)
5759
}
@@ -60,7 +62,7 @@ func TestLoadRuntimeConfig_MissingPointerFieldsAreNil(t *testing.T) {
6062
yamlFile := strings.NewReader(`
6163
# This is an empty YAML.
6264
`)
63-
actual, err := loadRuntimeConfig(yamlFile)
65+
actual, err := runtimeConfigLoader{}.load(yamlFile)
6466
require.NoError(t, err)
6567

6668
actualCfg, ok := actual.(*RuntimeConfigValues)
@@ -102,7 +104,7 @@ overrides:
102104
}
103105

104106
for _, tc := range cases {
105-
actual, err := loadRuntimeConfig(strings.NewReader(tc))
107+
actual, err := runtimeConfigLoader{}.load(strings.NewReader(tc))
106108
assert.Equal(t, errMultipleDocuments, err)
107109
assert.Nil(t, actual)
108110
}

pkg/ruler/api_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ func TestRuler_LimitsPerGroup(t *testing.T) {
454454
r := newTestRuler(t, cfg, store, nil)
455455
defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck
456456

457-
r.limits = ruleLimits{maxRuleGroups: 1, maxRulesPerRuleGroup: 1}
457+
r.limits = &ruleLimits{maxRuleGroups: 1, maxRulesPerRuleGroup: 1}
458458

459459
a := NewAPI(r, r.store, log.NewNopLogger())
460460

@@ -508,7 +508,7 @@ func TestRuler_RulerGroupLimits(t *testing.T) {
508508
r := newTestRuler(t, cfg, store, nil)
509509
defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck
510510

511-
r.limits = ruleLimits{maxRuleGroups: 1, maxRulesPerRuleGroup: 1}
511+
r.limits = &ruleLimits{maxRuleGroups: 1, maxRulesPerRuleGroup: 1}
512512

513513
a := NewAPI(r, r.store, log.NewNopLogger())
514514

pkg/ruler/compat.go

+1
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ type RulesLimits interface {
153153
RulerMaxRulesPerRuleGroup(userID string) int
154154
RulerQueryOffset(userID string) time.Duration
155155
DisabledRuleGroups(userID string) validation.DisabledRuleGroups
156+
RulerExternalLabels(userID string) labels.Labels
156157
}
157158

158159
// EngineQueryFunc returns a new engine query function validating max queryLength.

pkg/ruler/compat_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ func TestPusherErrors(t *testing.T) {
287287
writes := prometheus.NewCounter(prometheus.CounterOpts{})
288288
failures := prometheus.NewCounter(prometheus.CounterOpts{})
289289

290-
pa := NewPusherAppendable(pusher, "user-1", ruleLimits{}, writes, failures)
290+
pa := NewPusherAppendable(pusher, "user-1", &ruleLimits{}, writes, failures)
291291

292292
lbls, err := parser.ParseMetric("foo_bar")
293293
require.NoError(t, err)

pkg/ruler/external_labels.go

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package ruler
2+
3+
import (
4+
"sync"
5+
6+
"github.com/prometheus/prometheus/model/labels"
7+
)
8+
9+
// userExternalLabels checks and merges per-user external labels with global external labels.
10+
type userExternalLabels struct {
11+
global labels.Labels
12+
limits RulesLimits
13+
builder *labels.Builder
14+
15+
mtx sync.Mutex
16+
users map[string]labels.Labels
17+
}
18+
19+
func newUserExternalLabels(global labels.Labels, limits RulesLimits) *userExternalLabels {
20+
return &userExternalLabels{
21+
global: global,
22+
limits: limits,
23+
builder: labels.NewBuilder(nil),
24+
25+
mtx: sync.Mutex{},
26+
users: map[string]labels.Labels{},
27+
}
28+
}
29+
30+
func (e *userExternalLabels) get(userID string) (labels.Labels, bool) {
31+
e.mtx.Lock()
32+
defer e.mtx.Unlock()
33+
lset, ok := e.users[userID]
34+
return lset, ok
35+
}
36+
37+
func (e *userExternalLabels) update(userID string) (labels.Labels, bool) {
38+
lset := e.limits.RulerExternalLabels(userID)
39+
40+
e.mtx.Lock()
41+
defer e.mtx.Unlock()
42+
43+
e.builder.Reset(e.global)
44+
for _, l := range lset {
45+
e.builder.Set(l.Name, l.Value)
46+
}
47+
lset = e.builder.Labels()
48+
49+
if !labels.Equal(e.users[userID], lset) {
50+
e.users[userID] = lset
51+
return lset, true
52+
}
53+
return lset, false
54+
}
55+
56+
func (e *userExternalLabels) remove(user string) {
57+
e.mtx.Lock()
58+
defer e.mtx.Unlock()
59+
delete(e.users, user)
60+
}
61+
62+
func (e *userExternalLabels) cleanup() {
63+
e.mtx.Lock()
64+
defer e.mtx.Unlock()
65+
for user := range e.users {
66+
delete(e.users, user)
67+
}
68+
}

pkg/ruler/external_labels_test.go

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package ruler
2+
3+
import (
4+
"testing"
5+
6+
"github.com/prometheus/prometheus/model/labels"
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
func TestUserExternalLabels(t *testing.T) {
11+
limits := ruleLimits{}
12+
e := newUserExternalLabels(labels.FromStrings("from", "cortex"), &limits)
13+
14+
tests := []struct {
15+
name string
16+
removeBeforeTest bool
17+
exists bool
18+
userExternalLabels labels.Labels
19+
expectedExternalLabels labels.Labels
20+
}{
21+
{
22+
name: "global labels only",
23+
removeBeforeTest: false,
24+
exists: false,
25+
userExternalLabels: nil,
26+
expectedExternalLabels: labels.FromStrings("from", "cortex"),
27+
},
28+
{
29+
name: "local labels without overriding",
30+
removeBeforeTest: true,
31+
exists: false,
32+
userExternalLabels: labels.FromStrings("tag", "local"),
33+
expectedExternalLabels: labels.FromStrings("from", "cortex", "tag", "local"),
34+
},
35+
{
36+
name: "local labels that override globals",
37+
removeBeforeTest: false,
38+
exists: true,
39+
userExternalLabels: labels.FromStrings("from", "cloud", "tag", "local"),
40+
expectedExternalLabels: labels.FromStrings("from", "cloud", "tag", "local"),
41+
},
42+
}
43+
44+
const userID = "test-user"
45+
for _, data := range tests {
46+
data := data
47+
t.Run(data.name, func(t *testing.T) {
48+
if data.removeBeforeTest {
49+
e.remove(userID)
50+
}
51+
_, exists := e.get(userID)
52+
require.Equal(t, data.exists, exists)
53+
54+
limits.externalLabels = data.userExternalLabels
55+
lset, ok := e.update(userID)
56+
require.True(t, ok)
57+
require.Equal(t, data.expectedExternalLabels, lset)
58+
lset1, ok := e.update(userID)
59+
require.False(t, ok) // Not updated.
60+
require.Equal(t, data.expectedExternalLabels, lset1)
61+
})
62+
}
63+
64+
_, ok := e.get(userID)
65+
require.True(t, ok)
66+
e.cleanup()
67+
_, ok = e.get(userID)
68+
require.False(t, ok)
69+
}

0 commit comments

Comments
 (0)