From 802051bc2011949a3b6f7d580d2d12e0c3be8ba1 Mon Sep 17 00:00:00 2001 From: "alex.berger" Date: Mon, 27 Apr 2026 11:28:42 +0200 Subject: [PATCH 1/2] Fix/Compactor handle not-found error in Attributes when updating block index. Handle not-found and access-denied errors from `Attributes()` in bucket index updater, preventing a stale cached `Get()` from causing the entire cleanup cycle to fail when `meta.json` has been deleted from object storage. - Return ErrBlockMetaNotFound if meta.json attributes are missing - Extend MockBucketFailure to support AttributesFailures for testing - Add test to verify updater skips blocks with missing meta attributes Fixes https://github.com/cortexproject/cortex/issues/7453 Signed-off-by: alex.berger --- CHANGELOG.md | 1 + pkg/storage/tsdb/bucketindex/updater.go | 6 ++++ pkg/storage/tsdb/bucketindex/updater_test.go | 34 ++++++++++++++++++++ pkg/util/testutil/objstore.go | 18 ++++++++--- 4 files changed, 54 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95d688766ce..064118f52f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ * [BUGFIX] Memberlist: Skip nil values delivered by `WatchPrefix` when a key is deleted, preventing a panic in the HA tracker caused by a failed type assertion on a nil interface value. #7429 * [BUGFIX] Tenant Federation: Fix `unsupported character` error when `tenant-federation.regex-matcher-enabled` is enabled and the input regex matches 0 or 1 existing tenant. #7424 * [BUGFIX] Packaging: Fix RPM and deb packages to install the binary to `/usr/bin`, install the systemd unit to the correct system path (`/usr/lib/systemd/system` for RPM, `/lib/systemd/system` for deb), and mark the sysconfig/default env file as a config file so it is not overwritten on upgrade. #7445 +* [BUGFIX] Compactor: Handle not-found and access-denied errors from `Attributes()` in bucket index updater, preventing a stale cached `Get()` from causing the entire cleanup cycle to fail when `meta.json` has been deleted from object storage. #7453 ## 1.21.0 in progress diff --git a/pkg/storage/tsdb/bucketindex/updater.go b/pkg/storage/tsdb/bucketindex/updater.go index 505dca0be12..42f5fb1620f 100644 --- a/pkg/storage/tsdb/bucketindex/updater.go +++ b/pkg/storage/tsdb/bucketindex/updater.go @@ -180,6 +180,12 @@ func (w *Updater) updateBlockIndexEntry(ctx context.Context, id ulid.ULID) (*Blo // Get the meta.json attributes. attrs, err := w.bkt.Attributes(ctx, metaFile) + if w.bkt.IsObjNotFoundErr(err) { + return nil, ErrBlockMetaNotFound + } + if w.bkt.IsAccessDeniedErr(err) { + return nil, errBlockMetaKeyAccessDeniedErr + } if err != nil { return nil, errors.Wrapf(err, "read meta file attributes: %v", metaFile) } diff --git a/pkg/storage/tsdb/bucketindex/updater_test.go b/pkg/storage/tsdb/bucketindex/updater_test.go index dc38b2f77c7..d9b0d559fb4 100644 --- a/pkg/storage/tsdb/bucketindex/updater_test.go +++ b/pkg/storage/tsdb/bucketindex/updater_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "os" "path" "strings" "testing" @@ -184,6 +185,39 @@ func TestUpdater_UpdateIndex_ShouldNotIncreaseOperationFailureMetricCustomerKey( `), "thanos_objstore_bucket_operation_failures_total")) } +func TestUpdater_UpdateIndex_ShouldSkipBlockWithMetaAttributesNotFound(t *testing.T) { + const userID = "user-1" + + bkt, _ := testutil.PrepareFilesystemBucket(t) + + ctx := context.Background() + logger := log.NewNopLogger() + + // Mock some blocks in the storage. + bkt = BucketWithGlobalMarkers(bkt) + block1 := testutil.MockStorageBlock(t, bkt, userID, 10, 20) + block2 := testutil.MockStorageBlock(t, bkt, userID, 20, 30) + + // Simulate a race condition where Get() on meta.json succeeds but Attributes() returns not-found. + // This can happen in object stores like S3 when meta.json is deleted between the Get and Attributes calls. + bkt = &testutil.MockBucketFailure{ + Bucket: bkt, + AttributesFailures: map[string]error{ + path.Join(userID, block2.ULID.String(), "meta.json"): os.ErrNotExist, + }, + } + + w := NewUpdater(bkt, userID, nil, logger) + idx, partials, _, err := w.UpdateIndex(ctx, nil) + require.NoError(t, err) + assertBucketIndexEqual(t, idx, bkt, userID, + []tsdb.BlockMeta{block1}, + []*metadata.DeletionMark{}) + + assert.Len(t, partials, 1) + assert.True(t, errors.Is(partials[block2.ULID], ErrBlockMetaNotFound)) +} + func TestUpdater_UpdateIndex_ShouldSkipBlocksWithCorruptedMeta(t *testing.T) { const userID = "user-1" diff --git a/pkg/util/testutil/objstore.go b/pkg/util/testutil/objstore.go index 0892d19b6fe..efd9fb9aa66 100644 --- a/pkg/util/testutil/objstore.go +++ b/pkg/util/testutil/objstore.go @@ -35,9 +35,10 @@ func PrepareFilesystemBucket(t testing.TB) (objstore.InstrumentedBucket, string) type MockBucketFailure struct { objstore.Bucket - DeleteFailures []string - GetFailures map[string]error - UploadFailures map[string]error + DeleteFailures []string + GetFailures map[string]error + UploadFailures map[string]error + AttributesFailures map[string]error UploadCalls atomic.Int32 GetCalls atomic.Int32 @@ -92,9 +93,16 @@ func (m *MockBucketFailure) Upload(ctx context.Context, name string, r io.Reader return m.Bucket.Upload(ctx, name, r, opts...) } +func (m *MockBucketFailure) Attributes(ctx context.Context, name string) (objstore.ObjectAttributes, error) { + if e, ok := m.AttributesFailures[name]; ok { + return objstore.ObjectAttributes{}, e + } + return m.Bucket.Attributes(ctx, name) +} + func (m *MockBucketFailure) WithExpectedErrs(expectedFunc objstore.IsOpFailureExpectedFunc) objstore.Bucket { if ibkt, ok := m.Bucket.(objstore.InstrumentedBucket); ok { - return &MockBucketFailure{Bucket: ibkt.WithExpectedErrs(expectedFunc), DeleteFailures: m.DeleteFailures, GetFailures: m.GetFailures} + return &MockBucketFailure{Bucket: ibkt.WithExpectedErrs(expectedFunc), DeleteFailures: m.DeleteFailures, GetFailures: m.GetFailures, AttributesFailures: m.AttributesFailures} } return m @@ -102,7 +110,7 @@ func (m *MockBucketFailure) WithExpectedErrs(expectedFunc objstore.IsOpFailureEx func (m *MockBucketFailure) ReaderWithExpectedErrs(expectedFunc objstore.IsOpFailureExpectedFunc) objstore.BucketReader { if ibkt, ok := m.Bucket.(objstore.InstrumentedBucket); ok { - return &MockBucketFailure{Bucket: ibkt.WithExpectedErrs(expectedFunc), DeleteFailures: m.DeleteFailures, GetFailures: m.GetFailures} + return &MockBucketFailure{Bucket: ibkt.WithExpectedErrs(expectedFunc), DeleteFailures: m.DeleteFailures, GetFailures: m.GetFailures, AttributesFailures: m.AttributesFailures} } return m From 59b474a7e7a9747d344d466b2f1587231885d9ca Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:13:36 -0700 Subject: [PATCH 2/2] update PR Signed-off-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 479540a45a8..86c025b70f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ * [BUGFIX] Metrics Helper: Fix non-deterministic bucket order in merged histograms by sorting buckets after map iteration, matching Prometheus client library behavior. #7380 * [BUGFIX] Distributor: Return HTTP 401 Unauthorized when tenant ID resolution fails in the Prometheus Remote Write 2.0 path. #7389 * [BUGFIX] Packaging: Fix RPM and deb packages to install the binary to `/usr/bin`, install the systemd unit to the correct system path (`/usr/lib/systemd/system` for RPM, `/lib/systemd/system` for deb), and mark the sysconfig/default env file as a config file so it is not overwritten on upgrade. #7445 -* [BUGFIX] Compactor: Handle not-found and access-denied errors from `Attributes()` in bucket index updater, preventing a stale cached `Get()` from causing the entire cleanup cycle to fail when `meta.json` has been deleted from object storage. #7453 +* [BUGFIX] Compactor: Handle not-found and access-denied errors from `Attributes()` in bucket index updater, preventing a stale cached `Get()` from causing the entire cleanup cycle to fail when `meta.json` has been deleted from object storage. #7454 * [BUGFIX] gRPC: Fix panic when `grpc_compression` is set to `snappy` on ingester client or store-gateway client configurations. #7459 ## 1.21.0 2026-04-24