diff --git a/CHANGELOG.md b/CHANGELOG.md index dbdc9144bb0..af1f7c6287e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## master / unreleased * [CHANGE] Querier: Make query time range configurations per-tenant: `query_ingesters_within`, `query_store_after`, and `shuffle_sharding_ingesters_lookback_period`. Uses `model.Duration` instead of `time.Duration` to support serialization but has minimum unit of 1ms (nanoseconds/microseconds not supported). #7160 +* [CHANGE] Cache: Setting `-blocks-storage.bucket-store.metadata-cache.bucket-index-content-ttl` to 0 will disable the bucket-index cache. #7446 * [FEATURE] Ruler: Add per-tenant `ruler_alert_generator_url_template` runtime config option to customize alert generator URLs using Go templates. Supports Grafana Explore, Perses, and other UIs. #7302 * [FEATURE] Distributor: Add experimental `-distributor.enable-start-timestamp` flag for Prometheus Remote Write 2.0. When enabled, `StartTimestamp (ST)` is ingested. #7371 * [FEATURE] Memberlist: Add `-memberlist.cluster-label` and `-memberlist.cluster-label-verification-disabled` to prevent accidental cross-cluster gossip joins and support rolling label rollout. #7385 diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md index 5ceb537848d..57833b875ef 100644 --- a/docs/blocks-storage/querier.md +++ b/docs/blocks-storage/querier.md @@ -1412,7 +1412,7 @@ blocks_storage: # CLI flag: -blocks-storage.bucket-store.metadata-cache.block-index-attributes-ttl [block_index_attributes_ttl: | default = 168h] - # How long to cache content of the bucket index. + # How long to cache content of the bucket index. 0 disables caching # CLI flag: -blocks-storage.bucket-store.metadata-cache.bucket-index-content-ttl [bucket_index_content_ttl: | default = 5m] diff --git a/docs/blocks-storage/store-gateway.md b/docs/blocks-storage/store-gateway.md index 22e21cec67f..870773ff3a6 100644 --- a/docs/blocks-storage/store-gateway.md +++ b/docs/blocks-storage/store-gateway.md @@ -1487,7 +1487,7 @@ blocks_storage: # CLI flag: -blocks-storage.bucket-store.metadata-cache.block-index-attributes-ttl [block_index_attributes_ttl: | default = 168h] - # How long to cache content of the bucket index. + # How long to cache content of the bucket index. 0 disables caching # CLI flag: -blocks-storage.bucket-store.metadata-cache.bucket-index-content-ttl [bucket_index_content_ttl: | default = 5m] diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 8c2adf98027..2091fb52ff9 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2113,7 +2113,7 @@ bucket_store: # CLI flag: -blocks-storage.bucket-store.metadata-cache.block-index-attributes-ttl [block_index_attributes_ttl: | default = 168h] - # How long to cache content of the bucket index. + # How long to cache content of the bucket index. 0 disables caching # CLI flag: -blocks-storage.bucket-store.metadata-cache.bucket-index-content-ttl [bucket_index_content_ttl: | default = 5m] diff --git a/pkg/storage/tsdb/caching_bucket.go b/pkg/storage/tsdb/caching_bucket.go index 2d6b936bed4..d998c6ff53e 100644 --- a/pkg/storage/tsdb/caching_bucket.go +++ b/pkg/storage/tsdb/caching_bucket.go @@ -177,7 +177,7 @@ func (cfg *MetadataCacheConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix f.IntVar(&cfg.MetafileMaxSize, prefix+"metafile-max-size-bytes", 1*1024*1024, "Maximum size of metafile content to cache in bytes. Caching will be skipped if the content exceeds this size. This is useful to avoid network round trip for large content if the configured caching backend has an hard limit on cached items size (in this case, you should set this limit to the same limit in the caching backend).") f.DurationVar(&cfg.MetafileAttributesTTL, prefix+"metafile-attributes-ttl", 168*time.Hour, "How long to cache attributes of the block metafile.") f.DurationVar(&cfg.BlockIndexAttributesTTL, prefix+"block-index-attributes-ttl", 168*time.Hour, "How long to cache attributes of the block index.") - f.DurationVar(&cfg.BucketIndexContentTTL, prefix+"bucket-index-content-ttl", 5*time.Minute, "How long to cache content of the bucket index.") + f.DurationVar(&cfg.BucketIndexContentTTL, prefix+"bucket-index-content-ttl", 5*time.Minute, "How long to cache content of the bucket index. 0 disables caching") f.IntVar(&cfg.BucketIndexMaxSize, prefix+"bucket-index-max-size-bytes", 1*1024*1024, "Maximum size of bucket index content to cache in bytes. Caching will be skipped if the content exceeds this size. This is useful to avoid network round trip for large content if the configured caching backend has an hard limit on cached items size (in this case, you should set this limit to the same limit in the caching backend).") f.DurationVar(&cfg.PartitionedGroupsListTTL, prefix+"partitioned-groups-list-ttl", 0, "How long to cache list of partitioned groups for an user. 0 disables caching") } @@ -245,7 +245,10 @@ func CreateCachingBucket(chunksConfig ChunksCacheConfig, metadataConfig Metadata cfg.CacheGet("metafile", metadataCache, matchers.GetMetafileMatcher(), metadataConfig.MetafileMaxSize, metadataConfig.MetafileContentTTL, metadataConfig.MetafileExistsTTL, metadataConfig.MetafileDoesntExistTTL) cfg.CacheAttributes("metafile", metadataCache, matchers.GetMetafileMatcher(), metadataConfig.MetafileAttributesTTL) cfg.CacheAttributes("block-index", metadataCache, matchers.GetBlockIndexMatcher(), metadataConfig.BlockIndexAttributesTTL) - cfg.CacheGet("bucket-index", metadataCache, matchers.GetBucketIndexMatcher(), metadataConfig.BucketIndexMaxSize, metadataConfig.BucketIndexContentTTL /* do not cache exist / not exist: */, 0, 0) + + if metadataConfig.BucketIndexContentTTL > 0 { + cfg.CacheGet("bucket-index", metadataCache, matchers.GetBucketIndexMatcher(), metadataConfig.BucketIndexMaxSize, metadataConfig.BucketIndexContentTTL /* do not cache exist / not exist: */, 0, 0) + } codec := snappyIterCodec{storecache.JSONIterCodec{}} cfg.CacheIter("tenants-iter", metadataCache, matchers.GetTenantsIterMatcher(), metadataConfig.TenantsListTTL, codec, "") @@ -296,8 +299,11 @@ func CreateCachingBucketForCompactor(metadataConfig MetadataCacheConfig, cleaner if !cleaner { cfg.CacheExists("metafile", metadataCache, matchers.GetMetafileMatcher(), metadataConfig.MetafileExistsTTL, metadataConfig.MetafileDoesntExistTTL) cfg.CacheGet("metafile", metadataCache, matchers.GetMetafileMatcher(), metadataConfig.MetafileMaxSize, metadataConfig.MetafileContentTTL, metadataConfig.MetafileExistsTTL, metadataConfig.MetafileDoesntExistTTL) - cfg.CacheGet("bucket-index", metadataCache, matchers.GetBucketIndexMatcher(), metadataConfig.BucketIndexMaxSize, metadataConfig.BucketIndexContentTTL /* do not cache exist / not exist: */, 0, 0) cfg.CacheIter("tenant-blocks-iter", metadataCache, matchers.GetTenantBlocksIterMatcher(), metadataConfig.TenantBlocksListTTL, codec, "") + + if metadataConfig.BucketIndexContentTTL > 0 { + cfg.CacheGet("bucket-index", metadataCache, matchers.GetBucketIndexMatcher(), metadataConfig.BucketIndexMaxSize, metadataConfig.BucketIndexContentTTL /* do not cache exist / not exist: */, 0, 0) + } } else { // Cache only GET for metadata and don't cache exists and not exists. cfg.CacheGet("metafile", metadataCache, matchers.GetMetafileMatcher(), metadataConfig.MetafileMaxSize, metadataConfig.MetafileContentTTL, 0, 0) diff --git a/pkg/storage/tsdb/caching_bucket_test.go b/pkg/storage/tsdb/caching_bucket_test.go index ff4e1869ca8..efd25e8d625 100644 --- a/pkg/storage/tsdb/caching_bucket_test.go +++ b/pkg/storage/tsdb/caching_bucket_test.go @@ -1,13 +1,39 @@ package tsdb import ( + "bytes" + "context" "fmt" + "io" "testing" + "time" + "github.com/go-kit/log" "github.com/oklog/ulid/v2" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" ) +type countingBucket struct { + objstore.Bucket + getCount int64 +} + +func (b *countingBucket) Get(ctx context.Context, name string) (io.ReadCloser, error) { + b.getCount++ + return b.Bucket.Get(ctx, name) +} + +func (b *countingBucket) WithExpectedErrs(fn objstore.IsOpFailureExpectedFunc) objstore.Bucket { + return b +} + +func (b *countingBucket) ReaderWithExpectedErrs(fn objstore.IsOpFailureExpectedFunc) objstore.BucketReader { + return b +} + func Test_BucketCacheBackendValidation(t *testing.T) { tests := map[string]struct { cfg BucketCacheBackend @@ -129,6 +155,120 @@ func Test_BucketCacheBackendValidation(t *testing.T) { } } +func Test_BucketIndexCache(t *testing.T) { + const bucketIndexFile = "user1/bucket-index.json.gz" + const fileContent = "test-content" + ctx := context.Background() + + tests := map[string]struct { + ttl time.Duration + expectCached bool + }{ + "TTL > 0 caches bucket-index": { + ttl: 5 * time.Minute, + expectCached: true, + }, + "TTL = 0 does not cache bucket-index": { + ttl: 0, + expectCached: false, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + inmem := objstore.NewInMemBucket() + require.NoError(t, inmem.Upload(ctx, bucketIndexFile, bytes.NewReader([]byte(fileContent)))) + + wrappedBucket := &countingBucket{Bucket: inmem} + metadataCfg := MetadataCacheConfig{ + BucketCacheBackend: BucketCacheBackend{ + Backend: CacheBackendInMemory, + InMemory: InMemoryBucketCacheConfig{MaxSizeBytes: 1024 * 1024}, + }, + BucketIndexContentTTL: tc.ttl, + BucketIndexMaxSize: 1024 * 1024, + } + + bkt, err := CreateCachingBucket(ChunksCacheConfig{}, metadataCfg, ParquetLabelsCacheConfig{}, NewMatchers(), wrappedBucket, log.NewNopLogger(), prometheus.NewRegistry()) + require.NoError(t, err) + + r, err := bkt.Get(ctx, bucketIndexFile) + require.NoError(t, err) + _, _ = io.ReadAll(r) + _ = r.Close() + assert.Equal(t, int64(1), wrappedBucket.getCount) + + r, err = bkt.Get(ctx, bucketIndexFile) + require.NoError(t, err) + _, _ = io.ReadAll(r) + _ = r.Close() + + if tc.expectCached { + assert.Equal(t, int64(1), wrappedBucket.getCount, "second Get should be served by the cache") + } else { + assert.Equal(t, int64(2), wrappedBucket.getCount, "second Get should be served by the bucket") + } + }) + } +} + +func Test_BucketIndexCacheForCompactor(t *testing.T) { + const bucketIndexFile = "user1/bucket-index.json.gz" + const fileContent = "test-content" + ctx := context.Background() + + tests := map[string]struct { + ttl time.Duration + expectCached bool + }{ + "TTL > 0 caches bucket-index": { + ttl: 5 * time.Minute, + expectCached: true, + }, + "TTL = 0 does not cache bucket-index": { + ttl: 0, + expectCached: false, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + inmem := objstore.NewInMemBucket() + require.NoError(t, inmem.Upload(ctx, bucketIndexFile, bytes.NewReader([]byte(fileContent)))) + + wrappedBucket := &countingBucket{Bucket: inmem} + metadataCfg := MetadataCacheConfig{ + BucketCacheBackend: BucketCacheBackend{ + Backend: CacheBackendInMemory, + InMemory: InMemoryBucketCacheConfig{MaxSizeBytes: 1024 * 1024}, + }, + BucketIndexContentTTL: tc.ttl, + BucketIndexMaxSize: 1024 * 1024, + } + + bkt, err := CreateCachingBucketForCompactor(metadataCfg, false, wrappedBucket, log.NewNopLogger(), prometheus.NewRegistry()) + require.NoError(t, err) + + r, err := bkt.Get(ctx, bucketIndexFile) + require.NoError(t, err) + _, _ = io.ReadAll(r) + _ = r.Close() + assert.Equal(t, int64(1), wrappedBucket.getCount) + + r, err = bkt.Get(ctx, bucketIndexFile) + require.NoError(t, err) + _, _ = io.ReadAll(r) + _ = r.Close() + + if tc.expectCached { + assert.Equal(t, int64(1), wrappedBucket.getCount, "second Get should be served by the cache") + } else { + assert.Equal(t, int64(2), wrappedBucket.getCount, "second Get should be served by the bucket") + } + }) + } +} + func TestIsTenantDir(t *testing.T) { assert.False(t, isTenantBlocksDir("")) assert.True(t, isTenantBlocksDir("test")) diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json index 84724f8d76e..2521b40aca8 100644 --- a/schemas/cortex-config-schema.json +++ b/schemas/cortex-config-schema.json @@ -1888,7 +1888,7 @@ }, "bucket_index_content_ttl": { "default": "5m0s", - "description": "How long to cache content of the bucket index.", + "description": "How long to cache content of the bucket index. 0 disables caching", "type": "string", "x-cli-flag": "blocks-storage.bucket-store.metadata-cache.bucket-index-content-ttl", "x-format": "duration"