From b8732c5ab4224ff628ad24fcaf2566e625ba4b1c Mon Sep 17 00:00:00 2001 From: "F." Date: Sun, 3 May 2026 22:05:44 +0200 Subject: [PATCH 1/2] refactor(tests,lint): introduce shared cluster/eviction helpers and drop ireturn suppressions - Add tests/dist_cluster_helper.go with DistCluster, SetupInProcessCluster, and SetupInProcessClusterRF to replace repeated in-process cluster boilerplate in integration, remove/read-repair, stale-quorum, and heartbeat tests. - Add heartbeat test helpers (waitForAllAlive, waitForDeadTransition, waitForNodeRemoval, nodeRemovalResult, newDistPeerNode, newDistPeerNode, assertHeartbeatLiveness) to reduce per-test function length. - Add pkg/eviction/contract_test.go with a runEvictionContract harness that validates the IAlgorithm contract (Set/Get/Delete/Evict semantics, capacity bounds, zero-capacity no-op); replace duplicate LRU and CAWOLFU test bodies with TestLRUContract / TestCAWOLFUContract. - Remove all //nolint:ireturn directives across production and test code; remove the golangci.yaml exclusion block that was suppressing lint rules on test files. - Extract repeated test_value string literals in cmap_test.go into a shared testValue constant. --- .golangci.yaml | 31 --- cspell.config.yaml | 9 + hypercache.go | 6 +- internal/cluster/membership.go | 4 +- internal/dist/config.go | 2 +- internal/transport/transport.go | 2 +- management_http.go | 10 +- pkg/backend/dist_http_server.go | 16 +- pkg/backend/dist_http_transport.go | 18 +- pkg/backend/dist_latency.go | 4 +- pkg/backend/dist_memory.go | 154 +++++++------ pkg/backend/dist_memory_test_helpers.go | 8 +- pkg/backend/dist_transport.go | 12 +- pkg/backend/inmemory.go | 2 +- pkg/cache/v2/cmap_test.go | 11 +- pkg/eviction/cawolfu_test.go | 106 +-------- pkg/eviction/clock.go | 6 +- pkg/eviction/contract_test.go | 154 +++++++++++++ pkg/eviction/lru_test.go | 107 +-------- tests/dist_cluster_helper.go | 102 +++++++++ ...rcache_distmemory_failure_recovery_test.go | 148 +++++------- ...ache_distmemory_heartbeat_sampling_test.go | 125 +++++----- tests/hypercache_distmemory_heartbeat_test.go | 215 +++++++++--------- ...percache_distmemory_hinted_handoff_test.go | 159 ++++++------- .../hypercache_distmemory_integration_test.go | 80 ++----- ...cache_distmemory_remove_readrepair_test.go | 171 +++----------- ...hypercache_distmemory_stale_quorum_test.go | 122 +++------- tests/hypercache_distmemory_tiebreak_test.go | 66 +----- .../hypercache_distmemory_versioning_test.go | 103 ++++----- ...hypercache_distmemory_write_quorum_test.go | 142 +++--------- tests/hypercache_get_multiple_test.go | 32 +-- tests/hypercache_get_or_set_test.go | 144 ++++++------ tests/hypercache_get_test.go | 117 +++++----- tests/hypercache_http_merkle_test.go | 144 ++++++------ tests/hypercache_mgmt_dist_test.go | 72 +++--- tests/hypercache_set_test.go | 27 ++- tests/integration/dist_phase1_test.go | 202 ++++++++-------- tests/integration/dist_rebalance_test.go | 93 ++++---- tests/key_owner_helper.go | 2 +- tests/management_http_test.go | 43 ++-- tests/merkle_delete_tombstone_test.go | 40 +--- tests/merkle_node_helper.go | 38 ++++ tests/merkle_sync_test.go | 53 +---- 43 files changed, 1349 insertions(+), 1753 deletions(-) create mode 100644 pkg/eviction/contract_test.go create mode 100644 tests/dist_cluster_helper.go create mode 100644 tests/merkle_node_helper.go diff --git a/.golangci.yaml b/.golangci.yaml index fea5c95..4ebe1bd 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -63,37 +63,6 @@ linters: # (access to unexported helpers without re-exporting). Documented choice. - testpackage - exclusions: - # Path-based exclusions for rules that produce false-positive noise on test - # code without corresponding signal. Production code remains under the full - # rule set; only `_test.go` files are exempted from these specific rules. - # - # Justification per rule: - # - funlen / cyclop / gocognit: tests legitimately have long bodies - # (setup -> many actions -> many assertions); splitting into helpers - # adds noise without surfacing real defects. - # - dupl: table-driven test bodies repeat by design. - # - gocritic: subjective stylistic suggestions, low signal on tests. - # - goconst: short fixture strings ("v1", "key1") naturally repeat in - # tests; constants would obscure intent. - # - noinlineerr: stylistic preference, not a defect. - # - revive function-length / cognitive-complexity / cyclomatic: same - # reasons as funlen / cyclop / gocognit (revive's variants). - rules: - - path: _test\.go$ - linters: - - funlen - - cyclop - - gocognit - - dupl - - gocritic - - goconst - - noinlineerr - - path: _test\.go$ - linters: - - revive - text: '^(function-length|cognitive-complexity|cyclomatic):' - settings: cyclop: # The maximal code complexity to report. diff --git a/cspell.config.yaml b/cspell.config.yaml index 5a7e1a8..a044110 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -27,10 +27,14 @@ ignorePaths: - "*.http" - compose.*.yaml - docs/mkdocs.yml + - bench-*.txt + - lint-*.txt + - race-*.log dictionaryDefinitions: [] dictionaries: [] words: - acks + - autosync - backpressure - benchmarkdist - benchmem @@ -64,6 +68,7 @@ words: - excludeonly - exhaustruct - Fanout + - fatals - fctx - ferr - FLUSHALL @@ -80,6 +85,8 @@ words: - gocache - goccy - gochecknoglobals + - goconst + - histogramcollector - gofiber - GOFILES - gofumpt @@ -114,6 +121,7 @@ words: - Merkle - mfinal - Mgmt + - microbenchmark - mkdocs - mrand - mset @@ -151,6 +159,7 @@ words: - stretchr - strfnv - strs + - subtest - subtests - sval - thelper diff --git a/hypercache.go b/hypercache.go index 9617c00..4cb5f37 100644 --- a/hypercache.go +++ b/hypercache.go @@ -1075,7 +1075,7 @@ func (hyperCache *HyperCache[T]) DistMembershipSnapshot() (members []struct { State string Incarnation uint64 }, replication, vnodes int, -) { //nolint:ireturn +) { if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { membership := dm.Membership() ring := dm.Ring() @@ -1113,7 +1113,7 @@ func (hyperCache *HyperCache[T]) DistMembershipSnapshot() (members []struct { } // DistRingHashSpots returns vnode hashes as hex strings if available (debug). -func (hyperCache *HyperCache[T]) DistRingHashSpots() []string { //nolint:ireturn +func (hyperCache *HyperCache[T]) DistRingHashSpots() []string { if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { if ring := dm.Ring(); ring != nil { return ring.VNodeHashes() @@ -1124,7 +1124,7 @@ func (hyperCache *HyperCache[T]) DistRingHashSpots() []string { //nolint:ireturn } // DistHeartbeatMetrics returns distributed heartbeat metrics if supported. -func (hyperCache *HyperCache[T]) DistHeartbeatMetrics() any { //nolint:ireturn +func (hyperCache *HyperCache[T]) DistHeartbeatMetrics() any { if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { m := dm.Metrics() diff --git a/internal/cluster/membership.go b/internal/cluster/membership.go index 381dd5b..1f88c09 100644 --- a/internal/cluster/membership.go +++ b/internal/cluster/membership.go @@ -61,7 +61,7 @@ func (m *Membership) List() []*Node { func (m *Membership) Ring() *Ring { return m.ring } // Remove deletes a node from membership and rebuilds the ring. Returns true if removed. -func (m *Membership) Remove(id NodeID) bool { //nolint:ireturn +func (m *Membership) Remove(id NodeID) bool { m.mu.Lock() _, ok := m.nodes[id] @@ -89,7 +89,7 @@ func (m *Membership) Remove(id NodeID) bool { //nolint:ireturn } // Mark updates node state + incarnation and refreshes LastSeen. Returns true if node exists. -func (m *Membership) Mark(id NodeID, state NodeState) bool { //nolint:ireturn +func (m *Membership) Mark(id NodeID, state NodeState) bool { m.mu.Lock() n, ok := m.nodes[id] diff --git a/internal/dist/config.go b/internal/dist/config.go index dccba2f..d4bcdf2 100644 --- a/internal/dist/config.go +++ b/internal/dist/config.go @@ -22,7 +22,7 @@ type Config struct { } // Defaults returns a Config with safe initial values. -func Defaults() Config { //nolint:ireturn +func Defaults() Config { return Config{ Replication: 1, VirtualNodes: defaultVirtualNodes, diff --git a/internal/transport/transport.go b/internal/transport/transport.go index 73d1036..0a4ddfd 100644 --- a/internal/transport/transport.go +++ b/internal/transport/transport.go @@ -15,7 +15,7 @@ type RPCError struct { Err error } -func (e *RPCError) Error() string { //nolint:ireturn +func (e *RPCError) Error() string { if e == nil { return "" } diff --git a/management_http.go b/management_http.go index e2c6927..1a427ff 100644 --- a/management_http.go +++ b/management_http.go @@ -175,7 +175,7 @@ func (s *ManagementHTTPServer) mountRoutes(ctx context.Context, hc managementCac } // wrapAuth returns an auth-wrapped handler if authFunc provided. -func (s *ManagementHTTPServer) wrapAuth(handler fiber.Handler) fiber.Handler { //nolint:ireturn +func (s *ManagementHTTPServer) wrapAuth(handler fiber.Handler) fiber.Handler { if s.authFunc == nil { return handler } @@ -190,7 +190,7 @@ func (s *ManagementHTTPServer) wrapAuth(handler fiber.Handler) fiber.Handler { / } } -func (s *ManagementHTTPServer) registerBasic(useAuth func(fiber.Handler) fiber.Handler, hc managementCache) { //nolint:ireturn +func (s *ManagementHTTPServer) registerBasic(useAuth func(fiber.Handler) fiber.Handler, hc managementCache) { s.app.Get("/health", useAuth(func(fiberCtx fiber.Ctx) error { return fiberCtx.SendString("ok") })) s.app.Get("/stats", useAuth(func(fiberCtx fiber.Ctx) error { return fiberCtx.JSON(hc.GetStats()) })) s.app.Get("/config", useAuth(func(fiberCtx fiber.Ctx) error { @@ -214,7 +214,7 @@ func (s *ManagementHTTPServer) registerBasic(useAuth func(fiber.Handler) fiber.H })) } -func (s *ManagementHTTPServer) registerDistributed(useAuth func(fiber.Handler) fiber.Handler, hc managementCache) { //nolint:ireturn +func (s *ManagementHTTPServer) registerDistributed(useAuth func(fiber.Handler) fiber.Handler, hc managementCache) { s.app.Get("/dist/metrics", useAuth(func(fiberCtx fiber.Ctx) error { if dist, ok := hc.(managementCacheDistOpt); ok { m := dist.DistMetrics() @@ -243,7 +243,7 @@ func (s *ManagementHTTPServer) registerDistributed(useAuth func(fiber.Handler) f })) } -func (s *ManagementHTTPServer) registerCluster(useAuth func(fiber.Handler) fiber.Handler, hc managementCache) { //nolint:ireturn +func (s *ManagementHTTPServer) registerCluster(useAuth func(fiber.Handler) fiber.Handler, hc managementCache) { s.app.Get("/cluster/members", useAuth(func(fiberCtx fiber.Ctx) error { if mi, ok := hc.(membershipIntrospect); ok { members, replication, vnodes := mi.DistMembershipSnapshot() @@ -275,7 +275,7 @@ func (s *ManagementHTTPServer) registerControl( ctx context.Context, useAuth func(fiber.Handler) fiber.Handler, hc managementCache, -) { //nolint:ireturn +) { s.app.Post("/evict", useAuth(func(fiberCtx fiber.Ctx) error { hc.TriggerEviction(ctx) diff --git a/pkg/backend/dist_http_server.go b/pkg/backend/dist_http_server.go index df659fe..bceaca4 100644 --- a/pkg/backend/dist_http_server.go +++ b/pkg/backend/dist_http_server.go @@ -34,7 +34,7 @@ func newDistHTTPServer(addr string) *distHTTPServer { return &distHTTPServer{app: app, addr: addr} } -func (s *distHTTPServer) start(ctx context.Context, dm *DistMemory) error { //nolint:ireturn +func (s *distHTTPServer) start(ctx context.Context, dm *DistMemory) error { s.registerSet(ctx, dm) s.registerGet(ctx, dm) s.registerRemove(ctx, dm) @@ -44,7 +44,7 @@ func (s *distHTTPServer) start(ctx context.Context, dm *DistMemory) error { //no return s.listen(ctx) } -func (s *distHTTPServer) registerSet(ctx context.Context, dm *DistMemory) { //nolint:ireturn +func (s *distHTTPServer) registerSet(ctx context.Context, dm *DistMemory) { // legacy path s.app.Post("/internal/cache/set", func(fctx fiber.Ctx) error { // small handler var req httpSetRequest @@ -96,7 +96,7 @@ func (s *distHTTPServer) registerSet(ctx context.Context, dm *DistMemory) { //no }) } -func (s *distHTTPServer) registerGet(_ context.Context, dm *DistMemory) { //nolint:ireturn +func (s *distHTTPServer) registerGet(_ context.Context, dm *DistMemory) { // legacy path s.app.Get("/internal/cache/get", func(fctx fiber.Ctx) error { key := fctx.Query("key") @@ -136,7 +136,7 @@ func (s *distHTTPServer) registerGet(_ context.Context, dm *DistMemory) { //noli }) } -func (s *distHTTPServer) registerRemove(ctx context.Context, dm *DistMemory) { //nolint:ireturn +func (s *distHTTPServer) registerRemove(ctx context.Context, dm *DistMemory) { // legacy path s.app.Delete("/internal/cache/remove", func(fctx fiber.Ctx) error { key := fctx.Query("key") @@ -172,11 +172,11 @@ func (s *distHTTPServer) registerRemove(ctx context.Context, dm *DistMemory) { / }) } -func (s *distHTTPServer) registerHealth() { //nolint:ireturn +func (s *distHTTPServer) registerHealth() { s.app.Get("/health", func(fctx fiber.Ctx) error { return fctx.SendString("ok") }) } -func (s *distHTTPServer) registerMerkle(_ context.Context, dm *DistMemory) { //nolint:ireturn +func (s *distHTTPServer) registerMerkle(_ context.Context, dm *DistMemory) { s.app.Get("/internal/merkle", func(fctx fiber.Ctx) error { tree := dm.BuildMerkleTree() @@ -206,7 +206,7 @@ func (s *distHTTPServer) registerMerkle(_ context.Context, dm *DistMemory) { //n }) } -func (s *distHTTPServer) listen(ctx context.Context) error { //nolint:ireturn +func (s *distHTTPServer) listen(ctx context.Context) error { lc := net.ListenConfig{} ln, err := lc.Listen(ctx, "tcp", s.addr) @@ -229,7 +229,7 @@ func (s *distHTTPServer) listen(ctx context.Context) error { //nolint:ireturn return nil } -func (s *distHTTPServer) stop(ctx context.Context) error { //nolint:ireturn +func (s *distHTTPServer) stop(ctx context.Context) error { if s == nil || s.ln == nil { return nil } diff --git a/pkg/backend/dist_http_transport.go b/pkg/backend/dist_http_transport.go index af984ed..c91ec12 100644 --- a/pkg/backend/dist_http_transport.go +++ b/pkg/backend/dist_http_transport.go @@ -27,7 +27,7 @@ const statusThreshold = 300 // NewDistHTTPTransport constructs a DistHTTPTransport with the given timeout and // nodeID->baseURL resolver. Timeout <=0 defaults to 2s. -func NewDistHTTPTransport(timeout time.Duration, resolver func(string) (string, bool)) *DistHTTPTransport { //nolint:ireturn +func NewDistHTTPTransport(timeout time.Duration, resolver func(string) (string, bool)) *DistHTTPTransport { if timeout <= 0 { timeout = 2 * time.Second } @@ -41,7 +41,7 @@ const ( ) // ForwardSet sends a Set/Replicate request to a remote node. -func (t *DistHTTPTransport) ForwardSet(ctx context.Context, nodeID string, item *cache.Item, replicate bool) error { //nolint:ireturn +func (t *DistHTTPTransport) ForwardSet(ctx context.Context, nodeID string, item *cache.Item, replicate bool) error { reqBody := httpSetRequest{ Key: item.Key, Value: item.Value, @@ -100,7 +100,7 @@ func (t *DistHTTPTransport) ForwardSet(ctx context.Context, nodeID string, item } // ForwardGet fetches a single item from a remote node. -func (t *DistHTTPTransport) ForwardGet(ctx context.Context, nodeID, key string) (*cache.Item, bool, error) { //nolint:ireturn +func (t *DistHTTPTransport) ForwardGet(ctx context.Context, nodeID, key string) (*cache.Item, bool, error) { // prefer canonical endpoint hreq, err := t.newNodeRequest(ctx, http.MethodGet, nodeID, "/internal/get", url.Values{ "key": {key}, @@ -148,7 +148,7 @@ func (t *DistHTTPTransport) ForwardGet(ctx context.Context, nodeID, key string) return item, true, nil } -func decodeGetBody(r io.Reader) (*cache.Item, bool, error) { //nolint:ireturn +func decodeGetBody(r io.Reader) (*cache.Item, bool, error) { var raw map[string]json.RawMessage dec := json.NewDecoder(r) @@ -199,7 +199,7 @@ func decodeGetBody(r io.Reader) (*cache.Item, bool, error) { //nolint:ireturn } // ForwardRemove propagates a delete operation to a remote node. -func (t *DistHTTPTransport) ForwardRemove(ctx context.Context, nodeID, key string, replicate bool) error { //nolint:ireturn +func (t *DistHTTPTransport) ForwardRemove(ctx context.Context, nodeID, key string, replicate bool) error { // prefer canonical endpoint hreq, err := t.newNodeRequest(ctx, http.MethodDelete, nodeID, "/internal/del", url.Values{ "key": {key}, @@ -240,7 +240,7 @@ func (t *DistHTTPTransport) ForwardRemove(ctx context.Context, nodeID, key strin } // Health performs a health probe against a remote node. -func (t *DistHTTPTransport) Health(ctx context.Context, nodeID string) error { //nolint:ireturn +func (t *DistHTTPTransport) Health(ctx context.Context, nodeID string) error { hreq, err := t.newNodeRequest(ctx, http.MethodGet, nodeID, "/health", nil, nil) if err != nil { return ewrap.Wrap(err, errMsgNewRequest) @@ -277,7 +277,7 @@ func (t *DistHTTPTransport) Health(ctx context.Context, nodeID string) error { / } // FetchMerkle retrieves a Merkle tree snapshot from a remote node. -func (t *DistHTTPTransport) FetchMerkle(ctx context.Context, nodeID string) (*MerkleTree, error) { //nolint:ireturn +func (t *DistHTTPTransport) FetchMerkle(ctx context.Context, nodeID string) (*MerkleTree, error) { if t == nil { return nil, errNoTransport } @@ -331,7 +331,7 @@ func (t *DistHTTPTransport) FetchMerkle(ctx context.Context, nodeID string) (*Me } // ListKeys returns all keys from a remote node (expensive; used for tests / anti-entropy fallback). -func (t *DistHTTPTransport) ListKeys(ctx context.Context, nodeID string) ([]string, error) { //nolint:ireturn +func (t *DistHTTPTransport) ListKeys(ctx context.Context, nodeID string) ([]string, error) { hreq, err := t.newNodeRequest(ctx, http.MethodGet, nodeID, "/internal/keys", nil, nil) if err != nil { return nil, ewrap.Wrap(err, errMsgNewRequest) @@ -374,7 +374,7 @@ func (t *DistHTTPTransport) ListKeys(ctx context.Context, nodeID string) ([]stri return body.Keys, nil } -func (t *DistHTTPTransport) resolveBaseURL(nodeID string) (*url.URL, error) { //nolint:ireturn +func (t *DistHTTPTransport) resolveBaseURL(nodeID string) (*url.URL, error) { if t == nil || t.baseURLFn == nil { return nil, errNoTransport } diff --git a/pkg/backend/dist_latency.go b/pkg/backend/dist_latency.go index 433a00e..70e53b2 100644 --- a/pkg/backend/dist_latency.go +++ b/pkg/backend/dist_latency.go @@ -43,7 +43,7 @@ type distLatencyCollector struct { buckets [distOpCount][len(latencyBuckets) + 1]atomic.Uint64 // last bucket is +Inf } -func newDistLatencyCollector() *distLatencyCollector { //nolint:ireturn +func newDistLatencyCollector() *distLatencyCollector { return &distLatencyCollector{} } @@ -63,7 +63,7 @@ func (c *distLatencyCollector) observe(op distOp, d time.Duration) { } // snapshot returns a copy of bucket counts for exposure (op -> buckets slice). -func (c *distLatencyCollector) snapshot() map[string][]uint64 { //nolint:ireturn +func (c *distLatencyCollector) snapshot() map[string][]uint64 { out := map[string][]uint64{ "set": make([]uint64, len(latencyBuckets)+1), "get": make([]uint64, len(latencyBuckets)+1), diff --git a/pkg/backend/dist_memory.go b/pkg/backend/dist_memory.go index 2a7992d..500693b 100644 --- a/pkg/backend/dist_memory.go +++ b/pkg/backend/dist_memory.go @@ -282,7 +282,7 @@ func WithDistRebalanceMaxConcurrent(n int) DistMemoryOption { } // WithDistReplicaDiffMaxPerTick limits number of replica-diff replication operations performed per rebalance tick (0 = unlimited). -func WithDistReplicaDiffMaxPerTick(n int) DistMemoryOption { //nolint:ireturn +func WithDistReplicaDiffMaxPerTick(n int) DistMemoryOption { return func(dm *DistMemory) { if n > 0 { dm.replicaDiffMaxPerTick = n @@ -291,7 +291,7 @@ func WithDistReplicaDiffMaxPerTick(n int) DistMemoryOption { //nolint:ireturn } // WithDistRemovalGrace sets grace period before shedding data for keys we no longer own (<=0 immediate remove disabled for now). -func WithDistRemovalGrace(d time.Duration) DistMemoryOption { //nolint:ireturn +func WithDistRemovalGrace(d time.Duration) DistMemoryOption { return func(dm *DistMemory) { if d > 0 { dm.removalGracePeriod = d @@ -309,7 +309,7 @@ type MerkleTree struct { // minimal representation } // BuildMerkleTree constructs a Merkle tree snapshot of local data (best-effort, locks each shard briefly). -func (dm *DistMemory) BuildMerkleTree() *MerkleTree { //nolint:ireturn +func (dm *DistMemory) BuildMerkleTree() *MerkleTree { chunkSize := dm.merkleChunkSize if chunkSize <= 0 { chunkSize = defaultMerkleChunkSize @@ -355,7 +355,7 @@ type merkleKV struct { } // DiffLeafRanges compares two trees and returns indexes of differing leaf chunks. -func (mt *MerkleTree) DiffLeafRanges(other *MerkleTree) []int { //nolint:ireturn +func (mt *MerkleTree) DiffLeafRanges(other *MerkleTree) []int { if mt == nil || other == nil { return nil } @@ -395,7 +395,7 @@ func equalBytes(a, b []byte) bool { // tiny helper } // SyncWith performs Merkle anti-entropy against a remote node (pull newer versions for differing chunks). -func (dm *DistMemory) SyncWith(ctx context.Context, nodeID string) error { //nolint:ireturn +func (dm *DistMemory) SyncWith(ctx context.Context, nodeID string) error { transport := dm.loadTransport() if transport == nil { return errNoTransport @@ -461,7 +461,7 @@ func WithDistTransport(t DistTransport) DistMemoryOption { } // WithDistHeartbeatSample sets how many random peers to probe per heartbeat tick (0=all). -func WithDistHeartbeatSample(k int) DistMemoryOption { //nolint:ireturn +func WithDistHeartbeatSample(k int) DistMemoryOption { return func(dm *DistMemory) { dm.hbSampleSize = k } } @@ -516,7 +516,7 @@ func WithDistHintMaxPerNode(n int) DistMemoryOption { } // WithDistHintMaxTotal sets a global cap on total queued hints across all nodes. -func WithDistHintMaxTotal(n int) DistMemoryOption { //nolint:ireturn +func WithDistHintMaxTotal(n int) DistMemoryOption { return func(dm *DistMemory) { if n > 0 { dm.hintMaxTotal = n @@ -525,7 +525,7 @@ func WithDistHintMaxTotal(n int) DistMemoryOption { //nolint:ireturn } // WithDistHintMaxBytes sets an approximate byte cap for all queued hints. -func WithDistHintMaxBytes(b int64) DistMemoryOption { //nolint:ireturn +func WithDistHintMaxBytes(b int64) DistMemoryOption { return func(dm *DistMemory) { if b > 0 { dm.hintMaxBytes = b @@ -594,7 +594,7 @@ func NewDistMemory(ctx context.Context, opts ...DistMemoryOption) (IBackend[Dist // NewDistMemoryWithConfig builds a DistMemory from an external dist.Config shape without introducing a direct import here. // Accepts a generic 'cfg' to avoid adding a dependency layer; expects exported fields matching internal/dist Config. -func NewDistMemoryWithConfig(ctx context.Context, cfg any, opts ...DistMemoryOption) (IBackend[DistMemory], error) { //nolint:ireturn +func NewDistMemoryWithConfig(ctx context.Context, cfg any, opts ...DistMemoryOption) (IBackend[DistMemory], error) { type minimalConfig struct { // external mirror subset NodeID string BindAddr string @@ -646,7 +646,7 @@ func distOptionsFromMinimal(mc struct { HintTTL, HintReplay time.Duration HintMaxPerNode int }, -) []DistMemoryOption { //nolint:ireturn +) []DistMemoryOption { var opts []DistMemoryOption add := func(cond bool, opt DistMemoryOption) { // helper reduces complexity in parent @@ -711,7 +711,7 @@ func (dm *DistMemory) Count(_ context.Context) int { } // Get fetches item. -func (dm *DistMemory) Get(ctx context.Context, key string) (*cache.Item, bool) { //nolint:ireturn +func (dm *DistMemory) Get(ctx context.Context, key string) (*cache.Item, bool) { start := time.Now() defer func() { if dm.latency != nil { @@ -742,7 +742,7 @@ func (dm *DistMemory) Get(ctx context.Context, key string) (*cache.Item, bool) { } // Set stores item. -func (dm *DistMemory) Set(ctx context.Context, item *cache.Item) error { //nolint:ireturn +func (dm *DistMemory) Set(ctx context.Context, item *cache.Item) error { err := item.Valid() if err != nil { return err @@ -809,7 +809,7 @@ func (dm *DistMemory) List(_ context.Context, _ ...IFilter) ([]*cache.Item, erro } // Remove deletes keys. -func (dm *DistMemory) Remove(ctx context.Context, keys ...string) error { //nolint:ireturn +func (dm *DistMemory) Remove(ctx context.Context, keys ...string) error { start := time.Now() defer func() { if dm.latency != nil { @@ -843,7 +843,7 @@ func (dm *DistMemory) Remove(ctx context.Context, keys ...string) error { //noli } // Clear wipes all shards. -func (dm *DistMemory) Clear(ctx context.Context) error { //nolint:ireturn +func (dm *DistMemory) Clear(ctx context.Context) error { done := make(chan struct{}) go func() { @@ -870,7 +870,7 @@ func (dm *DistMemory) LocalContains(key string) bool { } // Touch updates the last access time and access count for a key. -func (dm *DistMemory) Touch(_ context.Context, key string) bool { //nolint:ireturn +func (dm *DistMemory) Touch(_ context.Context, key string) bool { return dm.shardFor(key).items.Touch(key) } @@ -878,7 +878,7 @@ func (dm *DistMemory) Touch(_ context.Context, key string) bool { //nolint:iretu func (dm *DistMemory) DebugDropLocal(key string) { dm.shardFor(key).items.Remove(key) } // DebugInject stores an item directly into the local shard (no replication / ownership checks) for tests. -func (dm *DistMemory) DebugInject(it *cache.Item) { //nolint:ireturn +func (dm *DistMemory) DebugInject(it *cache.Item) { if it == nil { return } @@ -893,12 +893,12 @@ func (dm *DistMemory) DebugInject(it *cache.Item) { //nolint:ireturn func (dm *DistMemory) LocalNodeID() cluster.NodeID { return dm.localNode.ID } // LocalNodeAddr returns the configured node address (host:port) used by HTTP server. -func (dm *DistMemory) LocalNodeAddr() string { //nolint:ireturn +func (dm *DistMemory) LocalNodeAddr() string { return dm.nodeAddr } // SetLocalNode manually sets the local node (testing helper before starting HTTP). -func (dm *DistMemory) SetLocalNode(node *cluster.Node) { //nolint:ireturn +func (dm *DistMemory) SetLocalNode(node *cluster.Node) { dm.localNode = node if dm.nodeAddr == "" && node != nil { dm.nodeAddr = node.Address @@ -1090,7 +1090,7 @@ func (dm *DistMemory) Metrics() DistMetrics { } // DistMembershipSnapshot returns lightweight membership view (states & version). -func (dm *DistMemory) DistMembershipSnapshot() map[string]any { //nolint:ireturn +func (dm *DistMemory) DistMembershipSnapshot() map[string]any { if dm.membership == nil { return nil } @@ -1107,7 +1107,7 @@ func (dm *DistMemory) DistMembershipSnapshot() map[string]any { //nolint:ireturn } // LatencyHistograms returns a snapshot of latency bucket counts per operation (ns buckets; last bucket +Inf). -func (dm *DistMemory) LatencyHistograms() map[string][]uint64 { //nolint:ireturn +func (dm *DistMemory) LatencyHistograms() map[string][]uint64 { if dm.latency == nil { return nil } @@ -1119,7 +1119,7 @@ func (dm *DistMemory) LatencyHistograms() map[string][]uint64 { //nolint:ireturn // shuts down the optional HTTP server. Idempotent and safe to call concurrently // — repeat calls are no-ops. Tests SHOULD register Stop via t.Cleanup to avoid // goroutine leaks across `-count=N` iterations under -race. -func (dm *DistMemory) Stop(ctx context.Context) error { //nolint:ireturn +func (dm *DistMemory) Stop(ctx context.Context) error { if !dm.stopped.CompareAndSwap(false, true) { return nil } @@ -1177,13 +1177,13 @@ func (dm *DistMemory) Stop(ctx context.Context) error { //nolint:ireturn // IsOwner reports whether this node is an owner (primary or replica) for key. // Exported for tests / external observability (thin wrapper over internal logic). -func (dm *DistMemory) IsOwner(key string) bool { //nolint:ireturn +func (dm *DistMemory) IsOwner(key string) bool { return dm.ownsKeyInternal(key) } // AddPeer adds a peer address into local membership (best-effort, no network validation). // If the peer already exists (by address) it's ignored. Used by tests to simulate join propagation. -func (dm *DistMemory) AddPeer(address string) { //nolint:ireturn +func (dm *DistMemory) AddPeer(address string) { if dm == nil || dm.membership == nil || address == "" { return } @@ -1202,7 +1202,7 @@ func (dm *DistMemory) AddPeer(address string) { //nolint:ireturn } // RemovePeer removes a peer by address (best-effort) to simulate node leave in tests. -func (dm *DistMemory) RemovePeer(address string) { //nolint:ireturn +func (dm *DistMemory) RemovePeer(address string) { if dm == nil || dm.membership == nil || address == "" { return } @@ -1217,7 +1217,7 @@ func (dm *DistMemory) RemovePeer(address string) { //nolint:ireturn } // sortedMerkleEntries returns merkle entries sorted by key. -func (dm *DistMemory) sortedMerkleEntries() []merkleKV { //nolint:ireturn +func (dm *DistMemory) sortedMerkleEntries() []merkleKV { entries := dm.merkleEntries() slices.SortFunc(entries, func(a, b merkleKV) int { @@ -1228,7 +1228,7 @@ func (dm *DistMemory) sortedMerkleEntries() []merkleKV { //nolint:ireturn } // resolveMissingKeys enumerates remote-only keys using in-process or HTTP listing. -func (dm *DistMemory) resolveMissingKeys(ctx context.Context, nodeID string, entries []merkleKV) map[string]struct{} { //nolint:ireturn +func (dm *DistMemory) resolveMissingKeys(ctx context.Context, nodeID string, entries []merkleKV) map[string]struct{} { missing := dm.enumerateRemoteOnlyKeys(nodeID, entries) if len(missing) != 0 { return missing @@ -1272,7 +1272,7 @@ func (dm *DistMemory) applyMerkleDiffs( entries []merkleKV, diffs []int, chunkSize int, -) { //nolint:ireturn +) { for _, ci := range diffs { start := ci * chunkSize if start >= len(entries) { @@ -1304,7 +1304,7 @@ func (dm *DistMemory) storeTransport(t DistTransport) { } // enumerateRemoteOnlyKeys returns keys present only on the remote side (best-effort, in-process only). -func (dm *DistMemory) enumerateRemoteOnlyKeys(nodeID string, local []merkleKV) map[string]struct{} { //nolint:ireturn +func (dm *DistMemory) enumerateRemoteOnlyKeys(nodeID string, local []merkleKV) map[string]struct{} { missing := make(map[string]struct{}) ip, ok := dm.loadTransport().(*InProcessTransport) @@ -1406,7 +1406,7 @@ func (dm *DistMemory) merkleEntries() []merkleKV { } // startTombstoneSweeper launches periodic compaction if configured. -func (dm *DistMemory) startTombstoneSweeper() { //nolint:ireturn +func (dm *DistMemory) startTombstoneSweeper() { if dm.tombstoneTTL <= 0 || dm.tombstoneSweepInt <= 0 { return } @@ -1436,7 +1436,7 @@ func (dm *DistMemory) startTombstoneSweeper() { //nolint:ireturn } // compactTombstones removes expired tombstones based on TTL, returns count purged. -func (dm *DistMemory) compactTombstones() int64 { //nolint:ireturn +func (dm *DistMemory) compactTombstones() int64 { if dm.tombstoneTTL <= 0 { return 0 } @@ -1463,7 +1463,7 @@ func (dm *DistMemory) compactTombstones() int64 { //nolint:ireturn } // countTombstones returns approximate current count. -func (dm *DistMemory) countTombstones() int64 { //nolint:ireturn +func (dm *DistMemory) countTombstones() int64 { var total int64 for _, sh := range dm.shards { @@ -1478,7 +1478,7 @@ func (dm *DistMemory) countTombstones() int64 { //nolint:ireturn } // Rebalancing (Phase 3 initial implementation). -func (dm *DistMemory) startRebalancerIfEnabled(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) startRebalancerIfEnabled(ctx context.Context) { if dm.rebalanceInterval <= 0 || dm.membership == nil || dm.ring == nil { return } @@ -1498,7 +1498,7 @@ func (dm *DistMemory) startRebalancerIfEnabled(ctx context.Context) { //nolint:i go dm.rebalanceLoop(ctx, stopCh) } -func (dm *DistMemory) rebalanceLoop(ctx context.Context, stopCh <-chan struct{}) { //nolint:ireturn +func (dm *DistMemory) rebalanceLoop(ctx context.Context, stopCh <-chan struct{}) { ticker := time.NewTicker(dm.rebalanceInterval) defer ticker.Stop() @@ -1515,7 +1515,7 @@ func (dm *DistMemory) rebalanceLoop(ctx context.Context, stopCh <-chan struct{}) } // runRebalanceTick performs a lightweight ownership diff and migrates keys best-effort. -func (dm *DistMemory) runRebalanceTick(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) runRebalanceTick(ctx context.Context) { mv := uint64(0) if dm.membership != nil { mv = dm.membership.Version() @@ -1542,7 +1542,7 @@ func (dm *DistMemory) runRebalanceTick(ctx context.Context) { //nolint:ireturn // collectRebalanceCandidates scans shards for items whose primary ownership changed. // Note: we copy items (not pointers) to avoid pointer-to-loop-variable issues. -func (dm *DistMemory) collectRebalanceCandidates() []cache.Item { //nolint:ireturn +func (dm *DistMemory) collectRebalanceCandidates() []cache.Item { if len(dm.shards) == 0 { return nil } @@ -1568,7 +1568,7 @@ func (dm *DistMemory) collectRebalanceCandidates() []cache.Item { //nolint:iretu // shouldRebalance determines if the item should be migrated away. // Triggers when this node lost all ownership or was previously primary and is no longer. -func (dm *DistMemory) shouldRebalance(sh *distShard, it *cache.Item) bool { //nolint:ireturn +func (dm *DistMemory) shouldRebalance(sh *distShard, it *cache.Item) bool { if !dm.ownsKeyInternal(it.Key) { // lost all ownership if dm.removalGracePeriod > 0 && sh.removedAt != nil { // record timestamp if not already sh.removedAtMu.Lock() @@ -1608,7 +1608,7 @@ func (dm *DistMemory) shouldRebalance(sh *distShard, it *cache.Item) bool { //no // replicateNewReplicas scans for keys where this node is still primary but new replica owners were added since first observation. // It forwards the current item to newly added replicas (best-effort) and updates originalOwners snapshot. -func (dm *DistMemory) replicateNewReplicas(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) replicateNewReplicas(ctx context.Context) { if dm.ring == nil || dm.loadTransport() == nil { return } @@ -1628,7 +1628,7 @@ func (dm *DistMemory) replicateNewReplicas(ctx context.Context) { //nolint:iretu } } -func (dm *DistMemory) replDiffShard(ctx context.Context, sh *distShard, processed, limit int) int { //nolint:ireturn +func (dm *DistMemory) replDiffShard(ctx context.Context, sh *distShard, processed, limit int) int { for kv := range sh.items.IterBuffered() { if limit > 0 && processed >= limit { return processed @@ -1683,7 +1683,7 @@ func (*DistMemory) ensureOwnerBaseline(sh *distShard, key string, owners []clust return false } -func (*DistMemory) computeNewReplicas(sh *distShard, key string, owners []cluster.NodeID) []cluster.NodeID { //nolint:ireturn +func (*DistMemory) computeNewReplicas(sh *distShard, key string, owners []cluster.NodeID) []cluster.NodeID { sh.originalOwnersMu.RLock() prev := sh.originalOwners[key] @@ -1710,7 +1710,7 @@ func (dm *DistMemory) sendReplicaDiff( it *cache.Item, repls []cluster.NodeID, processed, limit int, -) int { //nolint:ireturn +) int { transport := dm.loadTransport() if transport == nil { return processed @@ -1738,7 +1738,7 @@ func (dm *DistMemory) sendReplicaDiff( return processed } -func (*DistMemory) setOwnerBaseline(sh *distShard, key string, owners []cluster.NodeID) { //nolint:ireturn +func (*DistMemory) setOwnerBaseline(sh *distShard, key string, owners []cluster.NodeID) { sh.originalOwnersMu.Lock() cp := make([]cluster.NodeID, len(owners)) @@ -1748,7 +1748,7 @@ func (*DistMemory) setOwnerBaseline(sh *distShard, key string, owners []cluster. sh.originalOwnersMu.Unlock() } -func (dm *DistMemory) maybeRecordRemoval(sh *distShard, key string) { //nolint:ireturn +func (dm *DistMemory) maybeRecordRemoval(sh *distShard, key string) { if dm.removalGracePeriod <= 0 || sh.removedAt == nil { return } @@ -1767,8 +1767,6 @@ func (dm *DistMemory) maybeRecordRemoval(sh *distShard, key string) { //nolint:i } // migrateItems concurrently migrates items in batches respecting configured limits. -// -//nolint:ireturn func (dm *DistMemory) migrateItems(ctx context.Context, items []cache.Item) { if len(items) == 0 { return @@ -1811,7 +1809,7 @@ func (dm *DistMemory) migrateItems(ctx context.Context, items []cache.Item) { } // migrateIfNeeded forwards item to new primary if this node no longer owns it. -func (dm *DistMemory) migrateIfNeeded(ctx context.Context, item *cache.Item) { //nolint:ireturn +func (dm *DistMemory) migrateIfNeeded(ctx context.Context, item *cache.Item) { owners := dm.lookupOwners(item.Key) if len(owners) == 0 || owners[0] == dm.localNode.ID { return @@ -1851,7 +1849,7 @@ func (dm *DistMemory) migrateIfNeeded(ctx context.Context, item *cache.Item) { / // shedRemovedKeys deletes keys for which this node is no longer an owner after grace period. // Best-effort: we iterate shards, check removal timestamps, and remove local copy if grace elapsed. -func (dm *DistMemory) shedRemovedKeys() { //nolint:ireturn +func (dm *DistMemory) shedRemovedKeys() { if dm.removalGracePeriod <= 0 { return } @@ -1864,7 +1862,7 @@ func (dm *DistMemory) shedRemovedKeys() { //nolint:ireturn } } -func (dm *DistMemory) shedShard(sh *distShard, now time.Time) { //nolint:ireturn +func (dm *DistMemory) shedShard(sh *distShard, now time.Time) { if sh.removedAt == nil { return } @@ -1900,7 +1898,7 @@ func encodeUint64BigEndian(buf []byte, v uint64) { } // foldMerkle reduces leaf hashes into a single root using a binary tree. -func foldMerkle(leaves [][]byte, hasher hash.Hash) []byte { //nolint:ireturn +func foldMerkle(leaves [][]byte, hasher hash.Hash) []byte { if len(leaves) == 0 { return nil } @@ -1929,7 +1927,7 @@ func foldMerkle(leaves [][]byte, hasher hash.Hash) []byte { //nolint:ireturn } // ensureShardConfig initializes shards respecting configured shardCount. -func (dm *DistMemory) ensureShardConfig() { //nolint:ireturn +func (dm *DistMemory) ensureShardConfig() { if dm.shardCount <= 0 { dm.shardCount = defaultDistShardCount } @@ -1948,7 +1946,7 @@ func (dm *DistMemory) ensureShardConfig() { //nolint:ireturn } // initMembershipIfNeeded sets up membership/ring and local node defaults. -func (dm *DistMemory) initMembershipIfNeeded() { //nolint:ireturn +func (dm *DistMemory) initMembershipIfNeeded() { if dm.membership == nil { dm.initStandaloneMembership() @@ -1968,7 +1966,7 @@ func (dm *DistMemory) initMembershipIfNeeded() { //nolint:ireturn } // tryStartHTTP starts internal HTTP transport if not provided. -func (dm *DistMemory) tryStartHTTP(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) tryStartHTTP(ctx context.Context) { if dm.loadTransport() != nil || dm.nodeAddr == "" { return } @@ -2002,7 +2000,7 @@ func (dm *DistMemory) tryStartHTTP(ctx context.Context) { //nolint:ireturn } // startHeartbeatIfEnabled launches heartbeat loop if configured. -func (dm *DistMemory) startHeartbeatIfEnabled(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) startHeartbeatIfEnabled(ctx context.Context) { if dm.hbInterval > 0 && dm.loadTransport() != nil { stopCh := make(chan struct{}) @@ -2012,7 +2010,7 @@ func (dm *DistMemory) startHeartbeatIfEnabled(ctx context.Context) { //nolint:ir } // lookupOwners returns ring owners slice for a key (nil if no ring). -func (dm *DistMemory) lookupOwners(key string) []cluster.NodeID { //nolint:ireturn +func (dm *DistMemory) lookupOwners(key string) []cluster.NodeID { if dm.ring == nil { return nil } @@ -2021,7 +2019,7 @@ func (dm *DistMemory) lookupOwners(key string) []cluster.NodeID { //nolint:iretu } // requiredAcks computes required acknowledgements for given consistency level. -func (*DistMemory) requiredAcks(total int, lvl ConsistencyLevel) int { //nolint:ireturn +func (*DistMemory) requiredAcks(total int, lvl ConsistencyLevel) int { //nolint:revive switch lvl { case ConsistencyAll: @@ -2036,7 +2034,7 @@ func (*DistMemory) requiredAcks(total int, lvl ConsistencyLevel) int { //nolint: } // getOne fetches from a single owner path. -func (dm *DistMemory) getOne(ctx context.Context, key string, owners []cluster.NodeID) (*cache.Item, bool) { //nolint:ireturn +func (dm *DistMemory) getOne(ctx context.Context, key string, owners []cluster.NodeID) (*cache.Item, bool) { for idx, oid := range owners { // iterate owners until hit if it, ok := dm.tryLocalGet(key, idx, oid); ok { return it, true @@ -2051,7 +2049,7 @@ func (dm *DistMemory) getOne(ctx context.Context, key string, owners []cluster.N } // tryLocalGet attempts local shard lookup when oid is local; returns item if found. -func (dm *DistMemory) tryLocalGet(key string, idx int, oid cluster.NodeID) (*cache.Item, bool) { //nolint:ireturn +func (dm *DistMemory) tryLocalGet(key string, idx int, oid cluster.NodeID) (*cache.Item, bool) { if oid != dm.localNode.ID { // not local owner return nil, false } @@ -2068,7 +2066,7 @@ func (dm *DistMemory) tryLocalGet(key string, idx int, oid cluster.NodeID) (*cac } // tryRemoteGet attempts remote fetch for given owner; includes promotion + repair. -func (dm *DistMemory) tryRemoteGet(ctx context.Context, key string, idx int, oid cluster.NodeID) (*cache.Item, bool) { //nolint:ireturn +func (dm *DistMemory) tryRemoteGet(ctx context.Context, key string, idx int, oid cluster.NodeID) (*cache.Item, bool) { transport := dm.loadTransport() if oid == dm.localNode.ID || transport == nil { // skip local path or missing transport return nil, false @@ -2106,7 +2104,7 @@ func (dm *DistMemory) tryRemoteGet(ctx context.Context, key string, idx int, oid } // getWithConsistency performs quorum/all reads. -func (dm *DistMemory) getWithConsistency(ctx context.Context, key string, owners []cluster.NodeID) (*cache.Item, bool) { //nolint:ireturn +func (dm *DistMemory) getWithConsistency(ctx context.Context, key string, owners []cluster.NodeID) (*cache.Item, bool) { needed := dm.requiredAcks(len(owners), dm.readConsistency) acks := 0 @@ -2132,7 +2130,7 @@ func (dm *DistMemory) collectQuorum( needed int, chosen **cache.Item, acks *int, -) []cluster.NodeID { //nolint:ireturn +) []cluster.NodeID { stale := make([]cluster.NodeID, 0, len(owners)) for idx, oid := range owners { it, ok := dm.fetchOwner(ctx, key, idx, oid) @@ -2163,7 +2161,7 @@ func (dm *DistMemory) repairStaleOwners( key string, chosen *cache.Item, staleOwners []cluster.NodeID, -) { //nolint:ireturn +) { transport := dm.loadTransport() if transport == nil || chosen == nil { return @@ -2188,7 +2186,7 @@ func (dm *DistMemory) repairStaleOwners( } // fetchOwner attempts to fetch item from given owner (local or remote) updating metrics. -func (dm *DistMemory) fetchOwner(ctx context.Context, key string, idx int, oid cluster.NodeID) (*cache.Item, bool) { //nolint:ireturn +func (dm *DistMemory) fetchOwner(ctx context.Context, key string, idx int, oid cluster.NodeID) (*cache.Item, bool) { if oid == dm.localNode.ID { // local if it, ok := dm.shardFor(key).items.GetCopy(key); ok { return it, true @@ -2223,7 +2221,7 @@ func (dm *DistMemory) fetchOwner(ctx context.Context, key string, idx int, oid c } // replicateTo sends writes to replicas (best-effort) returning ack count. -func (dm *DistMemory) replicateTo(ctx context.Context, item *cache.Item, replicas []cluster.NodeID) int { //nolint:ireturn +func (dm *DistMemory) replicateTo(ctx context.Context, item *cache.Item, replicas []cluster.NodeID) int { transport := dm.loadTransport() if transport == nil { return 0 @@ -2255,7 +2253,7 @@ func (dm *DistMemory) getWithConsistencyParallel( ctx context.Context, key string, owners []cluster.NodeID, -) (*cache.Item, bool) { //nolint:ireturn +) (*cache.Item, bool) { needed := dm.requiredAcks(len(owners), dm.readConsistency) type res struct { @@ -2412,7 +2410,7 @@ func (dm *DistMemory) startHintReplayIfEnabled(ctx context.Context) { go dm.hintReplayLoop(ctx, stopCh) } -func (dm *DistMemory) hintReplayLoop(ctx context.Context, stopCh <-chan struct{}) { //nolint:ireturn +func (dm *DistMemory) hintReplayLoop(ctx context.Context, stopCh <-chan struct{}) { ticker := time.NewTicker(dm.hintReplayInt) defer ticker.Stop() @@ -2495,7 +2493,7 @@ func (dm *DistMemory) processHint(ctx context.Context, nodeID string, entry hint } // --- Simple gossip (in-process only) ---. -func (dm *DistMemory) startGossipIfEnabled() { //nolint:ireturn +func (dm *DistMemory) startGossipIfEnabled() { if dm.gossipInterval <= 0 { return } @@ -2507,7 +2505,7 @@ func (dm *DistMemory) startGossipIfEnabled() { //nolint:ireturn } // startAutoSyncIfEnabled launches periodic merkle syncs to all other members. -func (dm *DistMemory) startAutoSyncIfEnabled(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) startAutoSyncIfEnabled(ctx context.Context) { if dm.autoSyncInterval <= 0 || dm.membership == nil { return } @@ -2524,7 +2522,7 @@ func (dm *DistMemory) startAutoSyncIfEnabled(ctx context.Context) { //nolint:ire go dm.autoSyncLoop(ctx, interval, stopCh) } -func (dm *DistMemory) autoSyncLoop(ctx context.Context, interval time.Duration, stopCh <-chan struct{}) { //nolint:ireturn +func (dm *DistMemory) autoSyncLoop(ctx context.Context, interval time.Duration, stopCh <-chan struct{}) { ticker := time.NewTicker(interval) defer ticker.Stop() @@ -2539,7 +2537,7 @@ func (dm *DistMemory) autoSyncLoop(ctx context.Context, interval time.Duration, } // runAutoSyncTick performs one auto-sync cycle; separated for lower complexity. -func (dm *DistMemory) runAutoSyncTick(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) runAutoSyncTick(ctx context.Context) { start := time.Now() var lastErr error @@ -2576,7 +2574,7 @@ func (dm *DistMemory) runAutoSyncTick(ctx context.Context) { //nolint:ireturn dm.metrics.autoSyncLoops.Add(1) } -func (dm *DistMemory) gossipLoop(stopCh <-chan struct{}) { //nolint:ireturn +func (dm *DistMemory) gossipLoop(stopCh <-chan struct{}) { ticker := time.NewTicker(dm.gossipInterval) defer ticker.Stop() @@ -2590,7 +2588,7 @@ func (dm *DistMemory) gossipLoop(stopCh <-chan struct{}) { //nolint:ireturn } } -func (dm *DistMemory) runGossipTick() { //nolint:ireturn +func (dm *DistMemory) runGossipTick() { if dm.membership == nil || dm.loadTransport() == nil { return } @@ -2636,7 +2634,7 @@ func (dm *DistMemory) runGossipTick() { //nolint:ireturn remote.acceptGossip(snapshot) } -func (dm *DistMemory) acceptGossip(nodes []*cluster.Node) { //nolint:ireturn +func (dm *DistMemory) acceptGossip(nodes []*cluster.Node) { if dm.membership == nil { return } @@ -2678,7 +2676,7 @@ func (dm *DistMemory) acceptGossip(nodes []*cluster.Node) { //nolint:ireturn } // chooseNewer picks the item with higher version; on version tie uses lexicographically smaller Origin as winner. -func (dm *DistMemory) chooseNewer(itemA, itemB *cache.Item) *cache.Item { //nolint:ireturn +func (dm *DistMemory) chooseNewer(itemA, itemB *cache.Item) *cache.Item { if itemA == nil { return itemB } @@ -2716,7 +2714,7 @@ func (dm *DistMemory) chooseNewer(itemA, itemB *cache.Item) *cache.Item { //noli } // repairReplicas ensures each owner has at least the chosen version; best-effort. -func (dm *DistMemory) repairReplicas(ctx context.Context, key string, chosen *cache.Item, owners []cluster.NodeID) { //nolint:ireturn +func (dm *DistMemory) repairReplicas(ctx context.Context, key string, chosen *cache.Item, owners []cluster.NodeID) { if chosen == nil { return } @@ -2748,7 +2746,7 @@ func (dm *DistMemory) repairRemoteReplica( key string, chosen *cache.Item, oid cluster.NodeID, -) { // separated to reduce cyclomatic complexity //nolint:ireturn +) { // separated to reduce cyclomatic complexity transport := dm.loadTransport() if transport == nil { // cannot repair remote return @@ -2763,7 +2761,7 @@ func (dm *DistMemory) repairRemoteReplica( } // handleForwardPrimary tries to forward a Set to the primary; returns (proceedAsPrimary,false) if promotion required. -func (dm *DistMemory) handleForwardPrimary(ctx context.Context, owners []cluster.NodeID, item *cache.Item) (bool, error) { //nolint:ireturn +func (dm *DistMemory) handleForwardPrimary(ctx context.Context, owners []cluster.NodeID, item *cache.Item) (bool, error) { transport := dm.loadTransport() if transport == nil { return false, sentinel.ErrNotOwner @@ -2904,7 +2902,7 @@ func (dm *DistMemory) applySet(ctx context.Context, item *cache.Item, replicate } // recordOriginalPrimary stores first-seen primary owner for key. -func (dm *DistMemory) recordOriginalPrimary(sh *distShard, key string) { //nolint:ireturn +func (dm *DistMemory) recordOriginalPrimary(sh *distShard, key string) { if sh == nil || sh.originalPrimary == nil || dm.ring == nil { return } @@ -2989,7 +2987,7 @@ func (dm *DistMemory) applyRemove(ctx context.Context, key string, replicate boo } // runHeartbeatTick runs one heartbeat iteration (best-effort). -func (dm *DistMemory) runHeartbeatTick(ctx context.Context) { //nolint:ireturn,revive +func (dm *DistMemory) runHeartbeatTick(ctx context.Context) { //nolint:revive if dm.loadTransport() == nil || dm.membership == nil { return } @@ -3028,7 +3026,7 @@ func (dm *DistMemory) runHeartbeatTick(ctx context.Context) { //nolint:ireturn,r } // evaluateLiveness applies timeout-based transitions then performs a probe. -func (dm *DistMemory) evaluateLiveness(ctx context.Context, now time.Time, node *cluster.Node) { //nolint:ireturn +func (dm *DistMemory) evaluateLiveness(ctx context.Context, now time.Time, node *cluster.Node) { elapsed := now.Sub(node.LastSeen) if dm.hbDeadAfter > 0 && elapsed > dm.hbDeadAfter { // prune dead diff --git a/pkg/backend/dist_memory_test_helpers.go b/pkg/backend/dist_memory_test_helpers.go index e5f1460..bd434d8 100644 --- a/pkg/backend/dist_memory_test_helpers.go +++ b/pkg/backend/dist_memory_test_helpers.go @@ -8,7 +8,7 @@ import ( ) // DisableHTTPForTest stops the internal HTTP server and clears transport (testing helper). -func (dm *DistMemory) DisableHTTPForTest(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) DisableHTTPForTest(ctx context.Context) { if dm.httpServer != nil { err := dm.httpServer.stop(ctx) if err != nil { @@ -22,7 +22,7 @@ func (dm *DistMemory) DisableHTTPForTest(ctx context.Context) { //nolint:ireturn } // EnableHTTPForTest restarts HTTP server & transport if nodeAddr is set (testing helper). -func (dm *DistMemory) EnableHTTPForTest(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) EnableHTTPForTest(ctx context.Context) { if dm.httpServer != nil || dm.nodeAddr == "" { return } @@ -56,7 +56,7 @@ func (dm *DistMemory) EnableHTTPForTest(ctx context.Context) { //nolint:ireturn } // HintedQueueSize returns number of queued hints for a node (testing helper). -func (dm *DistMemory) HintedQueueSize(nodeID string) int { //nolint:ireturn +func (dm *DistMemory) HintedQueueSize(nodeID string) int { dm.hintsMu.Lock() defer dm.hintsMu.Unlock() @@ -68,7 +68,7 @@ func (dm *DistMemory) HintedQueueSize(nodeID string) int { //nolint:ireturn } // StartHintReplayForTest forces starting hint replay loop (testing helper). -func (dm *DistMemory) StartHintReplayForTest(ctx context.Context) { //nolint:ireturn +func (dm *DistMemory) StartHintReplayForTest(ctx context.Context) { if dm.hintReplayInt <= 0 || dm.hintTTL <= 0 { return } diff --git a/pkg/backend/dist_transport.go b/pkg/backend/dist_transport.go index 6ccef99..661a34a 100644 --- a/pkg/backend/dist_transport.go +++ b/pkg/backend/dist_transport.go @@ -24,7 +24,7 @@ type InProcessTransport struct { } // NewInProcessTransport creates a new empty transport. -func NewInProcessTransport() *InProcessTransport { //nolint:ireturn +func NewInProcessTransport() *InProcessTransport { return &InProcessTransport{backends: map[string]*DistMemory{}} } @@ -46,7 +46,7 @@ func (t *InProcessTransport) Unregister(id string) { } // ForwardSet forwards a set operation to the specified backend node. -func (t *InProcessTransport) ForwardSet(ctx context.Context, nodeID string, item *cache.Item, replicate bool) error { //nolint:ireturn +func (t *InProcessTransport) ForwardSet(ctx context.Context, nodeID string, item *cache.Item, replicate bool) error { b, ok := t.lookup(nodeID) if !ok { return sentinel.ErrBackendNotFound @@ -59,7 +59,7 @@ func (t *InProcessTransport) ForwardSet(ctx context.Context, nodeID string, item } // ForwardGet forwards a get operation to the specified backend node. -func (t *InProcessTransport) ForwardGet(_ context.Context, nodeID, key string) (*cache.Item, bool, error) { //nolint:ireturn +func (t *InProcessTransport) ForwardGet(_ context.Context, nodeID, key string) (*cache.Item, bool, error) { b, ok := t.lookup(nodeID) if !ok { return nil, false, sentinel.ErrBackendNotFound @@ -74,7 +74,7 @@ func (t *InProcessTransport) ForwardGet(_ context.Context, nodeID, key string) ( } // ForwardRemove forwards a remove operation. -func (t *InProcessTransport) ForwardRemove(ctx context.Context, nodeID, key string, replicate bool) error { //nolint:ireturn +func (t *InProcessTransport) ForwardRemove(ctx context.Context, nodeID, key string, replicate bool) error { b, ok := t.lookup(nodeID) if !ok { return sentinel.ErrBackendNotFound @@ -86,7 +86,7 @@ func (t *InProcessTransport) ForwardRemove(ctx context.Context, nodeID, key stri } // Health probes a backend. -func (t *InProcessTransport) Health(_ context.Context, nodeID string) error { //nolint:ireturn +func (t *InProcessTransport) Health(_ context.Context, nodeID string) error { if _, ok := t.lookup(nodeID); !ok { return sentinel.ErrBackendNotFound } @@ -95,7 +95,7 @@ func (t *InProcessTransport) Health(_ context.Context, nodeID string) error { // } // FetchMerkle fetches a remote merkle tree. -func (t *InProcessTransport) FetchMerkle(_ context.Context, nodeID string) (*MerkleTree, error) { //nolint:ireturn +func (t *InProcessTransport) FetchMerkle(_ context.Context, nodeID string) (*MerkleTree, error) { b, ok := t.lookup(nodeID) if !ok { return nil, sentinel.ErrBackendNotFound diff --git a/pkg/backend/inmemory.go b/pkg/backend/inmemory.go index 28528c3..b564b8c 100644 --- a/pkg/backend/inmemory.go +++ b/pkg/backend/inmemory.go @@ -65,7 +65,7 @@ func (cacheBackend *InMemory) Get(_ context.Context, key string) (*cache.Item, b } // Touch updates the last access time and access count for a key. -func (cacheBackend *InMemory) Touch(_ context.Context, key string) bool { //nolint:ireturn +func (cacheBackend *InMemory) Touch(_ context.Context, key string) bool { return cacheBackend.items.Touch(key) } diff --git a/pkg/cache/v2/cmap_test.go b/pkg/cache/v2/cmap_test.go index 97ec11b..fc79210 100644 --- a/pkg/cache/v2/cmap_test.go +++ b/pkg/cache/v2/cmap_test.go @@ -9,6 +9,7 @@ import ( const ( concurrentWrites = 100 concurrentReads = 50 + testValue = "test_value" ) func TestNew(t *testing.T) { @@ -82,7 +83,7 @@ func TestSetAndGet(t *testing.T) { cm := New() item := &Item{ - Value: "test_value", + Value: testValue, Expiration: time.Hour, } @@ -111,7 +112,7 @@ func TestHas(t *testing.T) { cm := New() item := &Item{ - Value: "test_value", + Value: testValue, Expiration: time.Hour, } @@ -133,7 +134,7 @@ func TestPop(t *testing.T) { cm := New() item := &Item{ - Value: "test_value", + Value: testValue, Expiration: time.Hour, } @@ -162,7 +163,7 @@ func TestRemove(t *testing.T) { cm := New() item := &Item{ - Value: "test_value", + Value: testValue, Expiration: time.Hour, } @@ -187,7 +188,7 @@ func TestCount(t *testing.T) { } item := &Item{ - Value: "test_value", + Value: testValue, Expiration: time.Hour, } diff --git a/pkg/eviction/cawolfu_test.go b/pkg/eviction/cawolfu_test.go index 7d50216..2e4d7e9 100644 --- a/pkg/eviction/cawolfu_test.go +++ b/pkg/eviction/cawolfu_test.go @@ -1,101 +1,9 @@ package eviction -import "testing" - -func TestCAWOLFU_EvictsLeastFrequentTail(t *testing.T) { - t.Parallel() - - c, err := NewCAWOLFU(2) - if err != nil { - t.Fatalf("NewCAWOLFU error: %v", err) - } - - c.Set("a", 1) - c.Set("b", 2) - - // bump 'a' so 'b' is less frequent - if _, ok := c.Get("a"); !ok { - t.Fatalf("expected to get 'a'") - } - - // Insert 'c' -> evict tail ('b') - c.Set("c", 3) - - if _, ok := c.Get("b"); ok { - t.Fatalf("expected 'b' to be evicted") - } - - if _, ok := c.Get("a"); !ok { - t.Fatalf("expected 'a' to remain in cache") - } - - if v, ok := c.Get("c"); !ok { - t.Fatalf("expected 'c' present, got ok=%v", ok) - } else if got, ok := v.(int); !ok || got != 3 { - t.Fatalf("expected 'c'=3 in cache, got %v", v) - } -} - -func TestCAWOLFU_EvictMethodOrder(t *testing.T) { - t.Parallel() - - c, err := NewCAWOLFU(2) - if err != nil { - t.Fatalf("NewCAWOLFU error: %v", err) - } - - c.Set("a", 1) - c.Set("b", 2) - - // Without additional access, tail is 'a' (inserted first with same count) - key, ok := c.Evict() - if !ok || key != "a" { - t.Fatalf("expected to evict 'a' first, got %q ok=%v", key, ok) - } - - key, ok = c.Evict() - if !ok || key != "b" { - t.Fatalf("expected to evict 'b' second, got %q ok=%v", key, ok) - } -} - -func TestCAWOLFU_ZeroCapacity_NoOp(t *testing.T) { - t.Parallel() - - c, err := NewCAWOLFU(0) - if err != nil { - t.Fatalf("NewCAWOLFU error: %v", err) - } - - c.Set("a", 1) - - if _, ok := c.Get("a"); ok { - t.Fatalf("expected Get to miss on zero-capacity cache") - } - - if key, ok := c.Evict(); ok || key != "" { - t.Fatalf("expected no eviction on zero-capacity, got %q ok=%v", key, ok) - } -} - -func TestCAWOLFU_Delete_RemovesItem(t *testing.T) { - t.Parallel() - - c, err := NewCAWOLFU(2) - if err != nil { - t.Fatalf("NewCAWOLFU error: %v", err) - } - - c.Set("a", 1) - c.Set("b", 2) - c.Delete("a") - - if _, ok := c.Get("a"); ok { - t.Fatalf("expected 'a' to be deleted") - } - - key, ok := c.Evict() - if !ok || key != "b" { - t.Fatalf("expected to evict 'b' as remaining item, got %q ok=%v", key, ok) - } -} +// CAWOLFU-specific tests. The IAlgorithm contract (basic Set/Get/Delete/Evict +// semantics, capacity bounds, zero-capacity no-op) is shared with LRU and +// lives in contract_test.go (TestCAWOLFUContract). +// +// Add CAWOLFU-specific tests here when CAWOLFU's frequency-ordered behavior +// diverges from LRU's recency-ordered behavior in ways the contract can't +// express (e.g. count-tie ordering, scan-resistance edge cases). diff --git a/pkg/eviction/clock.go b/pkg/eviction/clock.go index 7767537..f36a135 100644 --- a/pkg/eviction/clock.go +++ b/pkg/eviction/clock.go @@ -127,7 +127,7 @@ func (c *ClockAlgorithm) Delete(key string) { // -- helpers (unexported) -- -func (c *ClockAlgorithm) updateIfExists(key string, value any) bool { //nolint:ireturn +func (c *ClockAlgorithm) updateIfExists(key string, value any) bool { if idx, ok := c.keys[key]; ok { item := c.items[idx] @@ -140,7 +140,7 @@ func (c *ClockAlgorithm) updateIfExists(key string, value any) bool { //nolint:i return false } -func (c *ClockAlgorithm) tryInsertInFreeSlot(key string, value any) bool { //nolint:ireturn +func (c *ClockAlgorithm) tryInsertInFreeSlot(key string, value any) bool { start := c.hand for { if c.items[c.hand] == nil { // free slot @@ -163,7 +163,7 @@ func (c *ClockAlgorithm) tryInsertInFreeSlot(key string, value any) bool { //nol } } -func (c *ClockAlgorithm) evictAndInsert(key string, value any) { //nolint:ireturn +func (c *ClockAlgorithm) evictAndInsert(key string, value any) { for range c.capacity { item := c.items[c.hand] if item == nil { // skip empty diff --git a/pkg/eviction/contract_test.go b/pkg/eviction/contract_test.go new file mode 100644 index 0000000..a3f9f03 --- /dev/null +++ b/pkg/eviction/contract_test.go @@ -0,0 +1,154 @@ +package eviction + +import "testing" + +// algoFactory constructs an algorithm with the given capacity. +type algoFactory func(capacity int) (IAlgorithm, error) + +// runEvictionContract validates the IAlgorithm contract that every +// algorithm must satisfy regardless of its eviction policy: +// +// - Set/Get/Delete behave as a key-value store within capacity. +// - When at capacity, Set evicts a victim of the algorithm's choosing. +// - Evict() returns false when empty. +// - Capacity 0 is a no-op (Set/Get/Evict all return zero values). +// +// Algorithm-specific policy assertions (LRU recency order, LFU frequency +// order, ARC ghost behavior, etc.) belong in each algorithm's own _test +// file alongside this contract test. +func runEvictionContract(t *testing.T, name string, newAlgo algoFactory) { + t.Helper() + + t.Run(name+"/EvictsOnSetWhenFull", func(t *testing.T) { + t.Parallel() + assertEvictsOnSetWhenFull(t, newAlgo) + }) + + t.Run(name+"/EvictReturnsFalseWhenEmpty", func(t *testing.T) { + t.Parallel() + assertEvictReturnsFalseWhenEmpty(t, newAlgo) + }) + + t.Run(name+"/ZeroCapacityNoOp", func(t *testing.T) { + t.Parallel() + assertZeroCapacityNoOp(t, newAlgo) + }) + + t.Run(name+"/DeleteRemovesItem", func(t *testing.T) { + t.Parallel() + assertDeleteRemovesItem(t, newAlgo) + }) +} + +func assertEvictsOnSetWhenFull(t *testing.T, newAlgo algoFactory) { + t.Helper() + + algo, err := newAlgo(2) + if err != nil { + t.Fatalf("newAlgo: %v", err) + } + + algo.Set("a", 1) + algo.Set("b", 2) + + // One Get on "a" — under any reasonable recency/frequency policy, + // this makes "b" the eviction victim when "c" pushes capacity. + if _, ok := algo.Get("a"); !ok { + t.Fatalf("expected to find 'a' before eviction") + } + + algo.Set("c", 3) + + if _, ok := algo.Get("b"); ok { + t.Fatalf("expected 'b' to be evicted") + } + + if _, ok := algo.Get("a"); !ok { + t.Fatalf("expected 'a' to remain") + } + + v, ok := algo.Get("c") + if !ok { + t.Fatalf("expected 'c' present after Set") + } + + got, ok := v.(int) + if !ok || got != 3 { + t.Fatalf("expected 'c'=3, got %v", v) + } +} + +func assertEvictReturnsFalseWhenEmpty(t *testing.T, newAlgo algoFactory) { + t.Helper() + + algo, err := newAlgo(4) + if err != nil { + t.Fatalf("newAlgo: %v", err) + } + + key, ok := algo.Evict() + if ok || key != "" { + t.Fatalf("expected Evict() on empty algo to return (\"\", false), got (%q, %v)", key, ok) + } +} + +func assertZeroCapacityNoOp(t *testing.T, newAlgo algoFactory) { + t.Helper() + + algo, err := newAlgo(0) + if err != nil { + t.Fatalf("newAlgo: %v", err) + } + + algo.Set("a", 1) + + if _, ok := algo.Get("a"); ok { + t.Fatalf("expected Get to miss on zero-capacity algo") + } + + key, ok := algo.Evict() + if ok || key != "" { + t.Fatalf("expected no eviction on zero-capacity, got (%q, %v)", key, ok) + } +} + +func assertDeleteRemovesItem(t *testing.T, newAlgo algoFactory) { + t.Helper() + + algo, err := newAlgo(2) + if err != nil { + t.Fatalf("newAlgo: %v", err) + } + + algo.Set("a", 1) + algo.Set("b", 2) + algo.Delete("a") + + if _, ok := algo.Get("a"); ok { + t.Fatalf("expected 'a' deleted") + } + + // Remaining item should be "b" (only candidate). + key, ok := algo.Evict() + if !ok || key != "b" { + t.Fatalf("expected to evict 'b' as remaining item, got (%q, %v)", key, ok) + } +} + +// The contract is run against algorithms whose recency/frequency-based policy +// makes "Get(a) then Set(c) evicts b" deterministic — LRU and CAWOLFU. Clock +// (clock-hand scan) and ARC (T1/T2/B1/B2 lists) have non-equivalent eviction +// semantics; they keep their own dedicated tests below in clock_test.go, +// arc_test.go, and lfu_test.go. + +// TestLRUContract runs the IAlgorithm contract against the LRU implementation. +func TestLRUContract(t *testing.T) { + t.Parallel() + runEvictionContract(t, "lru", func(c int) (IAlgorithm, error) { return NewLRUAlgorithm(c) }) +} + +// TestCAWOLFUContract runs the IAlgorithm contract against the CAWOLFU implementation. +func TestCAWOLFUContract(t *testing.T) { + t.Parallel() + runEvictionContract(t, "cawolfu", func(c int) (IAlgorithm, error) { return NewCAWOLFU(c) }) +} diff --git a/pkg/eviction/lru_test.go b/pkg/eviction/lru_test.go index 5a245fb..5532c50 100644 --- a/pkg/eviction/lru_test.go +++ b/pkg/eviction/lru_test.go @@ -1,102 +1,9 @@ package eviction -import "testing" - -func TestLRU_EvictsLeastRecentlyUsedOnSet(t *testing.T) { - t.Parallel() - - lru, err := NewLRUAlgorithm(2) - if err != nil { - t.Fatalf("NewLRUAlgorithm error: %v", err) - } - - lru.Set("a", 1) - lru.Set("b", 2) - - // Access "a" so that "b" becomes the least recently used - if _, ok := lru.Get("a"); !ok { - t.Fatalf("expected to get 'a'") - } - - // Insert "c"; should evict "b" - lru.Set("c", 3) - - if _, ok := lru.Get("b"); ok { - t.Fatalf("expected 'b' to be evicted") - } - - if _, ok := lru.Get("a"); !ok { - t.Fatalf("expected 'a' to remain in cache") - } - - if v, ok := lru.Get("c"); !ok { - t.Fatalf("expected 'c' present, got ok=%v", ok) - } else if got, ok := v.(int); !ok || got != 3 { - t.Fatalf("expected 'c'=3 in cache, got %v", v) - } -} - -func TestLRU_EvictMethodOrder(t *testing.T) { - t.Parallel() - - lru, err := NewLRUAlgorithm(2) - if err != nil { - t.Fatalf("NewLRUAlgorithm error: %v", err) - } - - lru.Set("a", 1) - lru.Set("b", 2) - - // After two inserts, tail should be "a" - key, ok := lru.Evict() - if !ok || key != "a" { - t.Fatalf("expected to evict 'a' first, got %q ok=%v", key, ok) - } - - key, ok = lru.Evict() - if !ok || key != "b" { - t.Fatalf("expected to evict 'b' second, got %q ok=%v", key, ok) - } -} - -func TestLRU_ZeroCapacity_NoOp(t *testing.T) { - t.Parallel() - - lru, err := NewLRUAlgorithm(0) - if err != nil { - t.Fatalf("NewLRUAlgorithm error: %v", err) - } - - lru.Set("a", 1) - - if _, ok := lru.Get("a"); ok { - t.Fatalf("expected Get to miss on zero-capacity cache") - } - - if key, ok := lru.Evict(); ok || key != "" { - t.Fatalf("expected no eviction on zero-capacity, got %q ok=%v", key, ok) - } -} - -func TestLRU_Delete_RemovesItem(t *testing.T) { - t.Parallel() - - lru, err := NewLRUAlgorithm(2) - if err != nil { - t.Fatalf("NewLRUAlgorithm error: %v", err) - } - - lru.Set("a", 1) - lru.Set("b", 2) - lru.Delete("a") - - if _, ok := lru.Get("a"); ok { - t.Fatalf("expected 'a' to be deleted") - } - - // Evict should not return deleted key - key, ok := lru.Evict() - if !ok || key != "b" { - t.Fatalf("expected to evict 'b' as remaining item, got %q ok=%v", key, ok) - } -} +// LRU-specific tests. The IAlgorithm contract (basic Set/Get/Delete/Evict +// semantics, capacity bounds, zero-capacity no-op) is shared with CAWOLFU +// and lives in contract_test.go (TestLRUContract). +// +// Add LRU-specific tests here when LRU's recency-ordered behavior diverges +// from CAWOLFU's frequency-ordered behavior in ways the contract can't +// express (e.g. "after Get(a), Get(b), Set(c), the victim is a, not b"). diff --git a/tests/dist_cluster_helper.go b/tests/dist_cluster_helper.go new file mode 100644 index 0000000..bd51511 --- /dev/null +++ b/tests/dist_cluster_helper.go @@ -0,0 +1,102 @@ +package tests + +import ( + "context" + "strconv" + "testing" + + "github.com/hyp3rd/hypercache/internal/cluster" + "github.com/hyp3rd/hypercache/pkg/backend" +) + +// DistCluster bundles the in-process distributed-memory test fixture: a +// single shared Ring + Membership + Transport, and N DistMemory backends +// already wired into them. Most multi-node tests in this package follow the +// identical setup — extracting it as a helper keeps each test under the +// function-length budget and removes copy-paste drift. +type DistCluster struct { + Ring *cluster.Ring + Membership *cluster.Membership + Transport *backend.InProcessTransport + Nodes []*backend.DistMemory +} + +// ByID returns the node with the given ID, or nil if not present. Tests +// frequently need to look up a node by the ID returned from ring.Lookup — +// this avoids the manual `map[NodeID]*DistMemory{...}` lookup pattern that +// the dist_*_test.go files used to inline. +func (c *DistCluster) ByID(id cluster.NodeID) *backend.DistMemory { + for _, n := range c.Nodes { + if n.LocalNodeID() == id { + return n + } + } + + return nil +} + +// SetupInProcessCluster creates an N-node in-process distributed cluster +// with replication factor 3, plus any per-node options the caller supplies. +// Each backend is registered with the shared transport and scheduled for +// Stop on t.Cleanup. +// +// Node IDs follow the convention "n1", "n2", ... so tests can reason about +// them deterministically. The shared options are applied to every node; +// per-test customization (e.g. consistency, hint TTL, sample sizes) is +// expressed through the variadic option list. +// defaultReplicationFactor matches the cluster's recommended setting for +// quorum reads/writes — three replicas tolerates one failure while keeping +// total fan-out modest. +const defaultReplicationFactor = 3 + +// SetupInProcessCluster is shorthand for SetupInProcessClusterRF with the +// default replication factor. +func SetupInProcessCluster(tb testing.TB, n int, opts ...backend.DistMemoryOption) *DistCluster { + tb.Helper() + + return SetupInProcessClusterRF(tb, n, defaultReplicationFactor, opts...) +} + +// SetupInProcessClusterRF is the explicit-replication-factor variant of +// SetupInProcessCluster — used by tests that need 2-node clusters or +// non-default replica counts. +func SetupInProcessClusterRF(tb testing.TB, n, replication int, opts ...backend.DistMemoryOption) *DistCluster { + tb.Helper() + + ring := cluster.NewRing(cluster.WithReplication(replication)) + membership := cluster.NewMembership(ring) + transport := backend.NewInProcessTransport() + + dc := &DistCluster{ + Ring: ring, + Membership: membership, + Transport: transport, + Nodes: make([]*backend.DistMemory, 0, n), + } + + for i := range n { + node := cluster.NewNode("", "n"+strconv.Itoa(i+1)+":0") + + nodeOpts := append([]backend.DistMemoryOption{ + backend.WithDistMembership(membership, node), + backend.WithDistTransport(transport), + }, opts...) + + bi, err := backend.NewDistMemory(context.TODO(), nodeOpts...) + if err != nil { + tb.Fatalf("dist node %d: %v", i, err) + } + + bk, ok := bi.(*backend.DistMemory) + if !ok { + tb.Fatalf("dist node %d: expected *backend.DistMemory, got %T", i, bi) + } + + StopOnCleanup(tb, bk) + transport.Register(bk) + + dc.Nodes = append(dc.Nodes, bk) + } + + return dc +} diff --git a/tests/hypercache_distmemory_failure_recovery_test.go b/tests/hypercache_distmemory_failure_recovery_test.go index a6d0beb..97a9a51 100644 --- a/tests/hypercache_distmemory_failure_recovery_test.go +++ b/tests/hypercache_distmemory_failure_recovery_test.go @@ -12,122 +12,88 @@ import ( cache "github.com/hyp3rd/hypercache/pkg/cache/v2" ) -// TestDistFailureRecovery simulates node failure causing suspect->dead transition, hint queuing, and later recovery with hint replay. -func TestDistFailureRecovery(t *testing.T) { //nolint:paralleltest - ctx := context.Background() - - ring := cluster.NewRing(cluster.WithReplication(2)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // two nodes (primary+replica) with fast heartbeat & hint config - n1 := cluster.NewNode("", "n1") - n2 := cluster.NewNode("", "n2") - - b1i, _ := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReplication(2), - backend.WithDistHeartbeat(15*time.Millisecond, 40*time.Millisecond, 90*time.Millisecond), - backend.WithDistHintTTL(2*time.Minute), - backend.WithDistHintReplayInterval(20*time.Millisecond), - backend.WithDistHintMaxPerNode(50), - ) - b2i, _ := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - backend.WithDistReplication(2), - backend.WithDistHeartbeat(15*time.Millisecond, 40*time.Millisecond, 90*time.Millisecond), - backend.WithDistHintTTL(2*time.Minute), - backend.WithDistHintReplayInterval(20*time.Millisecond), - backend.WithDistHintMaxPerNode(50), - ) - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - transport.Register(b1) - transport.Register(b2) - - // Find a key where b1 is primary and b2 replica to ensure replication target - key, ok := FindOwnerKey(b1, "fail-key-", []cluster.NodeID{b1.LocalNodeID(), b2.LocalNodeID()}, 5000) - if !ok { - t.Fatalf("could not find deterministic key ordering") - } - - _ = b1.Set(ctx, &cache.Item{Key: key, Value: "v1"}) - - // Simulate b2 failure (unregister from transport) so further replica writes queue hints. - transport.Unregister(string(n2.ID)) - - // Generate writes that should attempt to replicate and thus queue hints for n2. - for range 8 { // a few writes to ensure some dropped into hints - _ = b1.Set(ctx, &cache.Item{Key: key, Value: "v1-update"}) - - time.Sleep(5 * time.Millisecond) +// generateReplicaWrites issues a few primary writes to force replication +// fan-out — used by failure-recovery tests to seed the hint queue once the +// replica has been disconnected. +func generateReplicaWrites(ctx context.Context, b *backend.DistMemory, key string, n int, gap time.Duration) { + for range n { + _ = b.Set(ctx, &cache.Item{Key: key, Value: "v1-update"}) + + time.Sleep(gap) } +} - // Wait for suspect then dead transition of b2 from b1's perspective. - deadline := time.Now().Add(2 * time.Second) +// waitForHintDelivery polls until the replica node returns a non-nil item +// for key, indicating the hinted-handoff replay has completed. +func waitForHintDelivery(ctx context.Context, replica *backend.DistMemory, key string, timeout time.Duration) bool { + deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { - m := b1.Metrics() - if m.NodesDead > 0 { // dead transition observed - break + if it, ok := replica.Get(ctx, key); ok && it != nil { + return true } - time.Sleep(15 * time.Millisecond) + time.Sleep(25 * time.Millisecond) } - m1 := b1.Metrics() - if m1.NodesSuspect == 0 { + return false +} + +// assertNodeFailureMetrics verifies the primary recorded suspect/dead +// transitions and hinted-queue activity after the replica went offline. +func assertNodeFailureMetrics(t *testing.T, m backend.DistMetrics) { + t.Helper() + + if m.NodesSuspect == 0 { t.Fatalf("expected suspect transition recorded") } - if m1.NodesDead == 0 { + if m.NodesDead == 0 { t.Fatalf("expected dead transition recorded") } - if m1.HintedQueued == 0 { + if m.HintedQueued == 0 { t.Fatalf("expected queued hints while replica unreachable") } +} - // Bring b2 back (register again) and allow hint replay to run. - transport.Register(b2) +// TestDistFailureRecovery simulates node failure causing suspect->dead transition, hint queuing, and later recovery with hint replay. +func TestDistFailureRecovery(t *testing.T) { //nolint:paralleltest // mutates shared transport + ctx := context.Background() - // Force a manual replay cycle then ensure loop running. - b1.ReplayHintsForTest(ctx) + dc := SetupInProcessClusterRF(t, 2, 2, + backend.WithDistReplication(2), + backend.WithDistHeartbeat(15*time.Millisecond, 40*time.Millisecond, 90*time.Millisecond), + backend.WithDistHintTTL(2*time.Minute), + backend.WithDistHintReplayInterval(20*time.Millisecond), + backend.WithDistHintMaxPerNode(50), + ) - // Wait for replay to deliver hints. - deadline = time.Now().Add(2 * time.Second) + b1, b2 := dc.Nodes[0], dc.Nodes[1] - delivered := false - for time.Now().Before(deadline) { - if it, ok := b2.Get(ctx, key); ok && it != nil { - delivered = true + // Find a key where b1 is primary and b2 replica to ensure replication target. + key, ok := FindOwnerKey(b1, "fail-key-", []cluster.NodeID{b1.LocalNodeID(), b2.LocalNodeID()}, 5000) + if !ok { + t.Fatalf("could not find deterministic key ordering") + } - break - } + _ = b1.Set(ctx, &cache.Item{Key: key, Value: "v1"}) - time.Sleep(25 * time.Millisecond) - } + // Simulate b2 failure: unregister from transport so further replica writes queue hints. + dc.Transport.Unregister(string(b2.LocalNodeID())) + + generateReplicaWrites(ctx, b1, key, 8, 5*time.Millisecond) + waitForDeadTransition(b1, 2*time.Second) + assertNodeFailureMetrics(t, b1.Metrics()) + + // Bring b2 back online and force a replay cycle. + dc.Transport.Register(b2) + b1.ReplayHintsForTest(ctx) - if !delivered { + if !waitForHintDelivery(ctx, b2, key, 2*time.Second) { t.Fatalf("expected hinted value delivered after recovery") } - // Ensure replay metrics advanced (at least one replay) - m2 := b1.Metrics() - if m2.HintedReplayed == 0 { + if b1.Metrics().HintedReplayed == 0 { t.Fatalf("expected hinted replay metric >0") } } diff --git a/tests/hypercache_distmemory_heartbeat_sampling_test.go b/tests/hypercache_distmemory_heartbeat_sampling_test.go index e50be35..0d21778 100644 --- a/tests/hypercache_distmemory_heartbeat_sampling_test.go +++ b/tests/hypercache_distmemory_heartbeat_sampling_test.go @@ -9,73 +9,89 @@ import ( "github.com/hyp3rd/hypercache/pkg/backend" ) -// TestHeartbeatSamplingAndTransitions validates randomized sampling still produces suspect/dead transitions. -func TestHeartbeatSamplingAndTransitions(t *testing.T) { //nolint:paralleltest - ctx := context.Background() - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // three peers plus local - n1 := cluster.NewNode("", "n1") - n2 := cluster.NewNode("", "n2") - n3 := cluster.NewNode("", "n3") +// waitForDeadTransition polls a DistMemory's metrics until at least one +// node-dead transition has been recorded, or timeout elapses. Returns the +// final metrics snapshot — used to assert intermediate state without +// re-fetching after the loop. +func waitForDeadTransition(b *backend.DistMemory, timeout time.Duration) backend.DistMetrics { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + m := b.Metrics() + if m.NodesDead > 0 { + return m + } - // Intervals chosen to tolerate the 3-5x slowdown imposed by -race -count=10 - // under shuffle. Previous values (interval=15ms, dead=90ms) were tight - // enough that under heavy parallel test load the heartbeat goroutine could - // starve and never advance the dead transition within deadline. - b1i, _ := backend.NewDistMemory( - ctx, - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistHeartbeat(80*time.Millisecond, 320*time.Millisecond, 640*time.Millisecond), - backend.WithDistHeartbeatSample(0), // probe all peers per tick for deterministic transition - ) + time.Sleep(10 * time.Millisecond) + } - _ = b1i // for clarity + return b.Metrics() +} - b2i, _ := backend.NewDistMemory(ctx, backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - b3i, _ := backend.NewDistMemory(ctx, backend.WithDistMembership(membership, n3), backend.WithDistTransport(transport)) +// newDistPeerNode is a small helper for the heartbeat-sampling test, which +// needs three peers but only one with a heartbeat goroutine — so it can't +// use SetupInProcessCluster (that helper applies the supplied options to +// every node uniformly). +// +// Construction uses context.Background() rather than a caller-supplied ctx +// — Stop runs from t.Cleanup at end-of-test where the test ctx may already +// be canceled, so propagating the same ctx into Stop would leak the HTTP +// listener. See StopOnCleanup for the same rationale. +func newDistPeerNode( + t *testing.T, + membership *cluster.Membership, + transport *backend.InProcessTransport, + id string, + opts ...backend.DistMemoryOption, +) *backend.DistMemory { + t.Helper() + + node := cluster.NewNode("", id) + + all := append([]backend.DistMemoryOption{ + backend.WithDistMembership(membership, node), + backend.WithDistTransport(transport), + }, opts...) - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") + bi, err := backend.NewDistMemory(context.Background(), all...) + if err != nil { + t.Fatalf("new %s: %v", id, err) } - b2, ok := b2i.(*backend.DistMemory) + b, ok := bi.(*backend.DistMemory) if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") + t.Fatalf("expected *backend.DistMemory, got %T", bi) } - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } + StopOnCleanup(t, b) + transport.Register(b) - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) + return b +} - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) +// TestHeartbeatSamplingAndTransitions validates randomized sampling still produces suspect/dead transitions. +func TestHeartbeatSamplingAndTransitions(t *testing.T) { //nolint:paralleltest // mutates shared transport + ring := cluster.NewRing(cluster.WithReplication(1)) + membership := cluster.NewMembership(ring) + transport := backend.NewInProcessTransport() - // Unregister b2 to simulate failure so it becomes suspect then dead. - transport.Unregister(string(n2.ID)) + // Only node 1 runs a heartbeat goroutine — the test asserts on its + // metrics. Other nodes are passive responders. + // + // Intervals chosen to tolerate the 3-5x slowdown imposed by -race -count=10 + // under shuffle. Previous values (interval=15ms, dead=90ms) were tight + // enough that under heavy parallel test load the heartbeat goroutine could + // starve and never advance the dead transition within deadline. + b1 := newDistPeerNode(t, membership, transport, "n1", + backend.WithDistHeartbeat(80*time.Millisecond, 320*time.Millisecond, 640*time.Millisecond), + backend.WithDistHeartbeatSample(0), + ) + b2 := newDistPeerNode(t, membership, transport, "n2") - // Wait long enough for dead transition. Because of sampling (k=1) we give generous time window. - deadline := time.Now().Add(3 * time.Second) - for time.Now().Before(deadline) { - m := b1.Metrics() - if m.NodesDead > 0 { // transition observed - break - } + _ = newDistPeerNode(t, membership, transport, "n3") - time.Sleep(10 * time.Millisecond) - } + transport.Unregister(string(b2.LocalNodeID())) - mfinal := b1.Metrics() + mfinal := waitForDeadTransition(b1, 3*time.Second) if mfinal.NodesSuspect == 0 { t.Fatalf("expected at least one suspect transition, got 0") } @@ -84,7 +100,6 @@ func TestHeartbeatSamplingAndTransitions(t *testing.T) { //nolint:paralleltest t.Fatalf("expected at least one dead transition, got 0") } - // ensure membership version advanced beyond initial additions (>= number of transitions + initial upserts) snap := b1.DistMembershipSnapshot() verAny := snap["version"] @@ -93,9 +108,7 @@ func TestHeartbeatSamplingAndTransitions(t *testing.T) { //nolint:paralleltest t.Fatalf("expected version to be uint64, got %T (%v)", verAny, verAny) } - if ver < 3 { // initial upserts already increment version; tolerate timing variance + if ver < 3 { t.Fatalf("expected membership version >=4, got %v", verAny) } - - _ = b3 // silence linter for now (future: more assertions) } diff --git a/tests/hypercache_distmemory_heartbeat_test.go b/tests/hypercache_distmemory_heartbeat_test.go index f99dbb7..110cf7a 100644 --- a/tests/hypercache_distmemory_heartbeat_test.go +++ b/tests/hypercache_distmemory_heartbeat_test.go @@ -1,7 +1,6 @@ package tests import ( - "context" "testing" "time" @@ -9,154 +8,100 @@ import ( "github.com/hyp3rd/hypercache/pkg/backend" ) -// TestDistMemoryHeartbeatLiveness spins up three nodes with a fast heartbeat interval -// and validates suspect -> removal transitions plus success/failure metrics. -func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest - // Intervals chosen so the test tolerates the 3-5x slowdown imposed by - // the race detector. Previous values (interval=30ms, dead=120ms) were - // tight enough that a delayed heartbeat tick could push *alive* nodes - // past deadAfter under -race, removing them from membership. - interval := 80 * time.Millisecond - suspectAfter := 4 * interval // 320ms - deadAfter := 8 * interval // 640ms - - ring := cluster.NewRing(cluster.WithReplication(1)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // backend for node1 with heartbeat enabled - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistHeartbeat(interval, suspectAfter, deadAfter), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - - // add peers (without heartbeat loops themselves) - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b2) - - b3i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("b3: %v", err) - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // Wait until heartbeat marks peers alive (initial success probes) - deadline := time.Now().Add(2 * time.Second) +// waitForAllAlive polls the membership view until every node reports alive +// or the deadline expires. Used by heartbeat-liveness tests to ensure the +// initial probe round has completed before the test starts perturbing the +// cluster. +func waitForAllAlive(membership *cluster.Membership, expected int, timeout time.Duration) bool { + deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { - aliveCount := 0 + alive := 0 + for _, n := range membership.List() { if n.State == cluster.NodeAlive { - aliveCount++ + alive++ } } - if aliveCount == 3 { - break + if alive == expected { + return true } time.Sleep(20 * time.Millisecond) } - // Simulate node2 becoming unresponsive by removing it from transport registry. - // (Simplest way: do not respond to health; drop entry.) - transport.Unregister(string(n2.ID)) + return false +} + +// nodeRemovalResult captures the outcome of waitForNodeRemoval — using a +// struct instead of two bool returns avoids the revive `confusing-results` +// lint while keeping the call site readable at the expense of one more +// type definition. +type nodeRemovalResult struct { + SawSuspect bool + Removed bool +} + +// waitForNodeRemoval polls until the named node has been removed from +// membership, having first transitioned through the Suspect state. +func waitForNodeRemoval(membership *cluster.Membership, target cluster.NodeID, timeout time.Duration) nodeRemovalResult { + var result nodeRemovalResult - // Wait until node2 transitions to suspect then removed. - var sawSuspect bool + deadline := time.Now().Add(timeout) - deadline = time.Now().Add(2 * deadAfter) for time.Now().Before(deadline) { - foundN2 := false + found := false + for _, n := range membership.List() { - if n.ID == n2.ID { - foundN2 = true + if n.ID == target { + found = true if n.State == cluster.NodeSuspect { - sawSuspect = true + result.SawSuspect = true } } } - if !foundN2 && sawSuspect { - break - } // removed after suspicion observed + if !found && result.SawSuspect { + result.Removed = true + + return result + } time.Sleep(20 * time.Millisecond) } - if !sawSuspect { - t.Fatalf("node2 never became suspect") - } + return result +} + +// assertHeartbeatLiveness verifies the post-condition of a heartbeat removal +// scenario: removed node is gone from membership, alive node is still +// present and alive, and the heartbeat metrics reflect the activity. +func assertHeartbeatLiveness(t *testing.T, membership *cluster.Membership, removed, stillAlive cluster.NodeID, m backend.DistMetrics) { + t.Helper() - // ensure removed for _, n := range membership.List() { - if n.ID == n2.ID { - t.Fatalf("node2 still present, state=%s", n.State) + if n.ID == removed { + t.Fatalf("removed node %s still present, state=%s", removed, n.State) } } - // Node3 should remain alive; ensure not removed - n3Present := false + alivePresent := false + for _, n := range membership.List() { - if n.ID == n3.ID { - n3Present = true + if n.ID == stillAlive { + alivePresent = true if n.State != cluster.NodeAlive { - t.Fatalf("node3 not alive: %s", n.State) + t.Fatalf("alive node %s not alive: %s", stillAlive, n.State) } } } - if !n3Present { - t.Fatalf("node3 missing") + if !alivePresent { + t.Fatalf("alive node %s missing", stillAlive) } - // Metrics sanity: at least one heartbeat failure and success recorded. - m := b1.Metrics() if m.HeartbeatFailure == 0 { t.Errorf("expected heartbeat failures > 0") } @@ -169,3 +114,53 @@ func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest t.Errorf("expected nodes removed metric > 0") } } + +// TestDistMemoryHeartbeatLiveness spins up three nodes with a fast heartbeat interval +// and validates suspect -> removal transitions plus success/failure metrics. +// +// Only node 1 runs a heartbeat goroutine — the test asserts on its metrics. +// Adding heartbeat to all three nodes makes the test flaky under -shuffle +// because node 2's own heartbeat goroutine (running while it's unregistered +// from the transport) interferes with timing of node 1's removal of node 2. +func TestDistMemoryHeartbeatLiveness(t *testing.T) { //nolint:paralleltest // mutates shared transport + // Intervals chosen so the test tolerates the 3-5x slowdown imposed by + // the race detector. Previous values (interval=30ms, dead=120ms) were + // tight enough that a delayed heartbeat tick could push *alive* nodes + // past deadAfter under -race, removing them from membership. + const interval = 80 * time.Millisecond + + suspectAfter := 4 * interval // 320ms + deadAfter := 8 * interval // 640ms + + ring := cluster.NewRing(cluster.WithReplication(1)) + membership := cluster.NewMembership(ring) + transport := backend.NewInProcessTransport() + + b1 := newDistPeerNode(t, membership, transport, "n1", + backend.WithDistHeartbeat(interval, suspectAfter, deadAfter), + ) + b2 := newDistPeerNode(t, membership, transport, "n2") + b3 := newDistPeerNode(t, membership, transport, "n3") + + if !waitForAllAlive(membership, 3, 2*time.Second) { + t.Fatalf("nodes never reached alive state within 2s") + } + + target := b2.LocalNodeID() + transport.Unregister(string(target)) + + // 4*deadAfter (= 2560ms) gives the heartbeat goroutine slack under + // -race -shuffle -count=10. Earlier 2*deadAfter (1280ms) was tight + // enough that goroutine scheduling latency could miss the deadline + // even when the probe logic was working correctly. + res := waitForNodeRemoval(membership, target, 4*deadAfter) + if !res.SawSuspect { + t.Fatalf("node2 never became suspect") + } + + if !res.Removed { + t.Fatalf("node2 not removed within 2*deadAfter") + } + + assertHeartbeatLiveness(t, membership, target, b3.LocalNodeID(), b1.Metrics()) +} diff --git a/tests/hypercache_distmemory_hinted_handoff_test.go b/tests/hypercache_distmemory_hinted_handoff_test.go index e552b90..702b080 100644 --- a/tests/hypercache_distmemory_hinted_handoff_test.go +++ b/tests/hypercache_distmemory_hinted_handoff_test.go @@ -13,6 +13,71 @@ import ( cache "github.com/hyp3rd/hypercache/pkg/cache/v2" ) +// findKeyOwnedBy scans candidate keys whose ring owners include the named +// node. Returns the first match or `defaultKey` if no candidate qualifies +// within attempts. +func findKeyOwnedBy(node *backend.DistMemory, ownerID string, attempts int, defaultKey, prefix string) string { + for i := range attempts { + candidate := fmt.Sprintf("%s-%d", prefix, i) + for _, oid := range node.Ring().Lookup(candidate) { + if string(oid) == ownerID { + return candidate + } + } + } + + return defaultKey +} + +// assertHintedHandoffMetrics fails the test unless the primary observed at +// least one queued + one replayed hint and zero dropped. +func assertHintedHandoffMetrics(t *testing.T, m backend.DistMetrics) { + t.Helper() + + if m.HintedQueued < 1 { + t.Fatalf("expected HintedQueued >=1, got %d", m.HintedQueued) + } + + if m.HintedReplayed < 1 { + t.Fatalf("expected HintedReplayed >=1, got %d", m.HintedReplayed) + } + + if m.HintedDropped != 0 { + t.Fatalf("expected no HintedDropped, got %d", m.HintedDropped) + } +} + +// newHintedHandoffNode constructs one DistMemory backend wired into the +// shared membership/transport for the hinted-handoff test. Construction +// uses context.Background() rather than a caller-supplied ctx because Stop +// runs from t.Cleanup at end-of-test where the test ctx may already be +// canceled — see StopOnCleanup for the same rationale. +func newHintedHandoffNode(t *testing.T, m *cluster.Membership, id string, baseOpts []backend.DistMemoryOption) *backend.DistMemory { + t.Helper() + + opts := make([]backend.DistMemoryOption, 0, len(baseOpts)+2) + + opts = append(opts, baseOpts...) + opts = append(opts, + backend.WithDistNode(id, id), + backend.WithDistMembership(m, cluster.NewNode(id, id)), + ) + + bi, err := backend.NewDistMemory(context.Background(), opts...) + if err != nil { + t.Fatalf("new %s: %v", id, err) + } + + b, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bi) + } + + StopOnCleanup(t, b) + + return b +} + // TestHintedHandoffReplay ensures that when a replica is down during a write, a hint is queued and later replayed. func TestHintedHandoffReplay(t *testing.T) { t.Parallel() @@ -20,111 +85,47 @@ func TestHintedHandoffReplay(t *testing.T) { ctx := context.Background() transport := backend.NewInProcessTransport() - opts := []backend.DistMemoryOption{ - backend.WithDistReplication(2), // primary + 1 replica - backend.WithDistWriteConsistency(backend.ConsistencyOne), // allow local success while still attempting fanout + baseOpts := []backend.DistMemoryOption{ + backend.WithDistReplication(2), + backend.WithDistWriteConsistency(backend.ConsistencyOne), backend.WithDistHintTTL(time.Minute), backend.WithDistHintReplayInterval(25 * time.Millisecond), backend.WithDistHintMaxPerNode(10), } ring := cluster.NewRing(cluster.WithReplication(2)) - m := cluster.NewMembership(ring) - m.Upsert(cluster.NewNode("P", "P")) - m.Upsert(cluster.NewNode("R", "R")) + membership := cluster.NewMembership(ring) + membership.Upsert(cluster.NewNode("P", "P")) + membership.Upsert(cluster.NewNode("R", "R")) - primaryOpts := append(opts, backend.WithDistNode("P", "P"), backend.WithDistMembership(m, cluster.NewNode("P", "P"))) - replicaOpts := append(opts, backend.WithDistNode("R", "R"), backend.WithDistMembership(m, cluster.NewNode("R", "R"))) - primary, _ := backend.NewDistMemory(ctx, primaryOpts...) - replica, _ := backend.NewDistMemory(ctx, replicaOpts...) - - p, ok := any(primary).(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast primary to *backend.DistMemory") - } - - r, ok := any(replica).(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast replica to *backend.DistMemory") - } - - StopOnCleanup(t, p) - StopOnCleanup(t, r) + p := newHintedHandoffNode(t, membership, "P", baseOpts) + r := newHintedHandoffNode(t, membership, "R", baseOpts) p.SetTransport(transport) - // r transport deliberately not registered yet (simulate down replica) transport.Register(p) + // r is deliberately not registered yet (simulate downed replica). - // manually start replay (constructor might have skipped due to timing) + // Constructor may have skipped replay due to timing — start it manually. p.StartHintReplayForTest(ctx) - // find a key whose owners include replica R - key := "hint-key" - for i := range 100 { // try a few keys - candidate := fmt.Sprintf("hint-key-%d", i) - owners := p.Ring().Lookup(candidate) - - foundR := false - for _, oid := range owners { - if string(oid) == "R" { - foundR = true - - break - } - } - - if foundR { - key = candidate - - break - } - } - - item := &cache.Item{Key: key, Value: "v1"} + key := findKeyOwnedBy(p, "R", 100, "hint-key", "hint-key") - _ = primary.Set(ctx, item) // should attempt to replicate to R and queue hint + _ = p.Set(ctx, &cache.Item{Key: key, Value: "v1"}) if p.HintedQueueSize("R") == 0 { t.Fatalf("expected hint queued for unreachable replica; size=0 key=%s owners=%v", key, p.Ring().Lookup(key)) } - // Now register replica so hints can replay + // Bring R online and force an immediate replay cycle. r.SetTransport(transport) transport.Register(r) - // immediate manual replay before polling p.ReplayHintsForTest(ctx) - // Wait for replay loop to deliver hint t.Logf("queued hints for R: %d", p.HintedQueueSize("R")) - deadline := time.Now().Add(1 * time.Second) - - found := false - for time.Now().Before(deadline) { - if v, ok := replica.Get(ctx, key); ok && v != nil { - found = true - - break - } - - time.Sleep(25 * time.Millisecond) - } - - if !found { + if !waitForHintDelivery(ctx, r, key, 1*time.Second) { t.Fatalf("replica did not receive hinted handoff value") } - // metrics assertions for hinted handoff (at least one queued & replayed, none dropped) - ms := p.Metrics() - if ms.HintedQueued < 1 { - t.Fatalf("expected HintedQueued >=1, got %d", ms.HintedQueued) - } - - if ms.HintedReplayed < 1 { - t.Fatalf("expected HintedReplayed >=1, got %d", ms.HintedReplayed) - } - - if ms.HintedDropped != 0 { - t.Fatalf("expected no HintedDropped, got %d", ms.HintedDropped) - } + assertHintedHandoffMetrics(t, p.Metrics()) } diff --git a/tests/hypercache_distmemory_integration_test.go b/tests/hypercache_distmemory_integration_test.go index c935e0c..377bafb 100644 --- a/tests/hypercache_distmemory_integration_test.go +++ b/tests/hypercache_distmemory_integration_test.go @@ -5,8 +5,6 @@ import ( "slices" "testing" - "github.com/hyp3rd/hypercache/internal/cluster" - "github.com/hyp3rd/hypercache/pkg/backend" cache "github.com/hyp3rd/hypercache/pkg/cache/v2" ) @@ -15,53 +13,13 @@ import ( func TestDistMemoryForwardingReplication(t *testing.T) { t.Parallel() - ring := cluster.NewRing(cluster.WithReplication(2)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() + dc := SetupInProcessClusterRF(t, 2, 2) - // create two nodes/backends - n1 := cluster.NewNode("", "node1:0") - n2 := cluster.NewNode("", "node2:0") - - b1i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("backend1: %v", err) - } - - b2i, err := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - ) - if err != nil { - t.Fatalf("backend2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - transport.Register(b1) - transport.Register(b2) - - // pick keys to exercise distribution (simple deterministic list) keys := []string{"alpha", "bravo", "charlie", "delta", "echo"} - // write via the node that is primary owner to guarantee placement + replication + + // Write via the primary owner to guarantee placement + replication. for _, k := range keys { - owners := ring.Lookup(k) + owners := dc.Ring.Lookup(k) if len(owners) == 0 { t.Fatalf("no owners for key %s", k) } @@ -73,38 +31,30 @@ func TestDistMemoryForwardingReplication(t *testing.T) { t.Fatalf("item valid %s: %v", k, err) } - target := owners[0] - - var err2 error - - switch target { - case n1.ID: - err2 = b1.Set(context.Background(), item) - case n2.ID: - err2 = b2.Set(context.Background(), item) - default: - t.Fatalf("unexpected owner id %s", target) + primary := dc.ByID(owners[0]) + if primary == nil { + t.Fatalf("unexpected owner id %s", owners[0]) } - if err2 != nil { - t.Fatalf("set %s via %s: %v", k, target, err2) + err = primary.Set(context.Background(), item) + if err != nil { + t.Fatalf("set %s via %s: %v", k, owners[0], err) } } - // Each key should be readable via either owner (b1 primary forward) or local if replica + // Each key should be readable via either owner (primary forward) or local if replica. for _, k := range keys { - if _, ok := b1.Get(context.Background(), k); !ok { + if _, ok := dc.Nodes[0].Get(context.Background(), k); !ok { t.Fatalf("b1 cannot get key %s", k) } - if _, ok := b2.Get(context.Background(), k); !ok { // should forward or local hit + if _, ok := dc.Nodes[1].Get(context.Background(), k); !ok { t.Fatalf("b2 cannot get key %s", k) } } - // Check replication: at least one key should physically exist on b2 after writes from b1 when b2 is replica - foundReplica := slices.ContainsFunc(keys, b2.LocalContains) - if !foundReplica { + // Replication check: at least one key should physically exist on b2. + if !slices.ContainsFunc(keys, dc.Nodes[1].LocalContains) { t.Fatalf("expected at least one replicated key on b2") } } diff --git a/tests/hypercache_distmemory_remove_readrepair_test.go b/tests/hypercache_distmemory_remove_readrepair_test.go index 9cf1240..e9eabb2 100644 --- a/tests/hypercache_distmemory_remove_readrepair_test.go +++ b/tests/hypercache_distmemory_remove_readrepair_test.go @@ -4,60 +4,18 @@ import ( "context" "testing" - "github.com/hyp3rd/hypercache/internal/cluster" - "github.com/hyp3rd/hypercache/pkg/backend" cache "github.com/hyp3rd/hypercache/pkg/cache/v2" ) -// helper to build two-node replicated cluster. -// -//nolint:revive // confusing-results: two backends + ring is the natural shape; named returns add no clarity here -func newTwoNodeCluster(t *testing.T) (*backend.DistMemory, *backend.DistMemory, *cluster.Ring) { - t.Helper() - - ring := cluster.NewRing(cluster.WithReplication(2)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - n1 := cluster.NewNode("", "node1:0") - n2 := cluster.NewNode("", "node2:0") - - b1i, err := backend.NewDistMemory(context.TODO(), backend.WithDistMembership(membership, n1), backend.WithDistTransport(transport)) - if err != nil { - t.Fatalf("b1: %v", err) - } - - b2i, err := backend.NewDistMemory(context.TODO(), backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - if err != nil { - t.Fatalf("b2: %v", err) - } - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - transport.Register(b1) - transport.Register(b2) - - return b1, b2, ring -} - // TestDistMemoryRemoveReplication ensures that Remove replicates deletions across replicas. func TestDistMemoryRemoveReplication(t *testing.T) { t.Parallel() - b1, b2, ring := newTwoNodeCluster(t) - key := "remove-key" + dc := SetupInProcessClusterRF(t, 2, 2) + + const key = "remove-key" - owners := ring.Lookup(key) + owners := dc.Ring.Lookup(key) if len(owners) == 0 { t.Fatalf("no owners") } @@ -69,46 +27,31 @@ func TestDistMemoryRemoveReplication(t *testing.T) { t.Fatalf("valid: %v", err) } - // write via primary - if owners[0] == b1.LocalNodeID() { // local helper we add below - err := b1.Set(context.Background(), item) - if err != nil { - t.Fatalf("set: %v", err) - } - } else { - err := b2.Set(context.Background(), item) - if err != nil { - t.Fatalf("set: %v", err) - } + primary := dc.ByID(owners[0]) + + err = primary.Set(context.Background(), item) + if err != nil { + t.Fatalf("set: %v", err) } - // assert item readable from both nodes - if _, ok := b1.Get(context.Background(), key); !ok { + if _, ok := dc.Nodes[0].Get(context.Background(), key); !ok { t.Fatalf("b1 missing pre-remove") } - if _, ok := b2.Get(context.Background(), key); !ok { + if _, ok := dc.Nodes[1].Get(context.Background(), key); !ok { t.Fatalf("b2 missing pre-remove") } - // remove via primary - if owners[0] == b1.LocalNodeID() { - err := b1.Remove(context.Background(), key) - if err != nil { - t.Fatalf("remove: %v", err) - } - } else { - err := b2.Remove(context.Background(), key) - if err != nil { - t.Fatalf("remove: %v", err) - } + err = primary.Remove(context.Background(), key) + if err != nil { + t.Fatalf("remove: %v", err) } - if _, ok := b1.Get(context.Background(), key); ok { + if _, ok := dc.Nodes[0].Get(context.Background(), key); ok { t.Fatalf("b1 still has key after remove") } - if _, ok := b2.Get(context.Background(), key); ok { + if _, ok := dc.Nodes[1].Get(context.Background(), key); ok { t.Fatalf("b2 still has key after remove") } } @@ -117,12 +60,13 @@ func TestDistMemoryRemoveReplication(t *testing.T) { func TestDistMemoryReadRepair(t *testing.T) { t.Parallel() - b1, b2, ring := newTwoNodeCluster(t) - key := "rr-key" + dc := SetupInProcessClusterRF(t, 2, 2) - owners := ring.Lookup(key) - if len(owners) == 0 { - t.Fatalf("no owners") + const key = "rr-key" + + owners := dc.Ring.Lookup(key) + if len(owners) < 2 { + t.Skip("replication factor <2") } item := &cache.Item{Key: key, Value: "val"} @@ -132,82 +76,37 @@ func TestDistMemoryReadRepair(t *testing.T) { t.Fatalf("valid: %v", err) } - // write via primary - if owners[0] == b1.LocalNodeID() { - err := b1.Set(context.Background(), item) - if err != nil { - t.Fatalf("set: %v", err) - } - } else { - err := b2.Set(context.Background(), item) - if err != nil { - t.Fatalf("set: %v", err) - } - } + primary := dc.ByID(owners[0]) - // determine replica node (owners[1]) and drop local copy there manually - if len(owners) < 2 { - t.Skip("replication factor <2") + err = primary.Set(context.Background(), item) + if err != nil { + t.Fatalf("set: %v", err) } - replica := owners[1] - // optional: t.Logf("owners: %v primary=%s replica=%s", owners, owners[0], replica) - if replica == b1.LocalNodeID() { - b1.DebugDropLocal(key) - } else { - b2.DebugDropLocal(key) - } + replicaNode := dc.ByID(owners[1]) + replicaNode.DebugDropLocal(key) - // ensure dropped locally - if replica == b1.LocalNodeID() && b1.LocalContains(key) { + if replicaNode.LocalContains(key) { t.Fatalf("replica still has key after drop") } - if replica == b2.LocalNodeID() && b2.LocalContains(key) { - t.Fatalf("replica still has key after drop") - } - - // issue Get from a non-owner node to trigger forwarding, then verify owners repaired. - // choose a requester: use node that is neither primary nor replica if possible; with 2 nodes this means primary forwards to replica or - // vice versa. - requester := b1 - if owners[0] == b1.LocalNodeID() && replica == b2.LocalNodeID() { - requester = b2 // request from replica to forward to primary - } else if owners[0] == b2.LocalNodeID() && replica == b1.LocalNodeID() { - requester = b1 - } + // Issue Get from the replica side so the read fans out to the primary, + // which surfaces the missing entry and triggers read-repair. + requester := replicaNode if _, ok := requester.Get(context.Background(), key); !ok { t.Fatalf("get for read-repair failed") } - // after forwarding, both owners should have key locally again - if owners[0] == b1.LocalNodeID() && !b1.LocalContains(key) { + if !primary.LocalContains(key) { t.Fatalf("primary missing after read repair") } - if owners[0] == b2.LocalNodeID() && !b2.LocalContains(key) { - t.Fatalf("primary missing after read repair") - } - - if replica == b1.LocalNodeID() && !b1.LocalContains(key) { - t.Fatalf("replica missing after read repair") - } - - if replica == b2.LocalNodeID() && !b2.LocalContains(key) { + if !replicaNode.LocalContains(key) { t.Fatalf("replica missing after read repair") } - // metrics should show at least one read repair - var repaired bool - - if replica == b1.LocalNodeID() { - repaired = b1.Metrics().ReadRepair > 0 - } else { - repaired = b2.Metrics().ReadRepair > 0 - } - - if !repaired { + if replicaNode.Metrics().ReadRepair == 0 { t.Fatalf("expected read-repair metric increment") } } diff --git a/tests/hypercache_distmemory_stale_quorum_test.go b/tests/hypercache_distmemory_stale_quorum_test.go index 8c7e31b..6cb9bd9 100644 --- a/tests/hypercache_distmemory_stale_quorum_test.go +++ b/tests/hypercache_distmemory_stale_quorum_test.go @@ -5,131 +5,75 @@ import ( "testing" "time" - "github.com/hyp3rd/hypercache/internal/cluster" "github.com/hyp3rd/hypercache/pkg/backend" cache "github.com/hyp3rd/hypercache/pkg/cache/v2" ) +// pickNonAheadRequester returns the first cluster node that is not the +// `ahead` replica — quorum-read tests need the read to come from a lagging +// or correct node so the quorum fan-out forces reconciliation. +func pickNonAheadRequester(dc *DistCluster, ahead *backend.DistMemory) *backend.DistMemory { + for _, n := range dc.Nodes { + if n.LocalNodeID() != ahead.LocalNodeID() { + return n + } + } + + return dc.Nodes[0] +} + // TestDistMemoryStaleQuorum ensures quorum read returns newest version and repairs stale replicas. func TestDistMemoryStaleQuorum(t *testing.T) { t.Parallel() - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n2), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), + dc := SetupInProcessCluster(t, 3, backend.WithDistReadConsistency(backend.ConsistencyQuorum), ) - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } + const key = "sq-key" - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) - - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - key := "sq-key" - - owners := ring.Lookup(key) + owners := dc.Ring.Lookup(key) if len(owners) != 3 { t.Skip("replication factor !=3") } - // Write initial version via primary - primary := owners[0] item := &cache.Item{Key: key, Value: "v1"} _ = item.Valid() - if primary == b1.LocalNodeID() { - _ = b1.Set(context.Background(), item) - } else if primary == b2.LocalNodeID() { - _ = b2.Set(context.Background(), item) - } else { - _ = b3.Set(context.Background(), item) - } - - // Manually bump version on one replica to simulate a newer write that others missed - // Pick owners[1] as ahead replica - aheadID := owners[1] - ahead := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[aheadID] - ahead.DebugInject(&cache.Item{Key: key, Value: "v2", Version: 5, Origin: string(ahead.LocalNodeID()), LastUpdated: time.Now()}) - - // Drop local copy on owners[2] to simulate stale/missing - lagID := owners[2] - lag := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[lagID] - lag.DebugDropLocal(key) - - // Issue quorum read from a non-ahead node (choose primary if not ahead, else third) - requester := b1 - if requester.LocalNodeID() == aheadID { - requester = b2 - } + _ = dc.ByID(owners[0]).Set(context.Background(), item) - if requester.LocalNodeID() == aheadID { - requester = b3 - } + // Bump version on owners[1] to simulate a newer write the others missed. + ahead := dc.ByID(owners[1]) + ahead.DebugInject(&cache.Item{ + Key: key, Value: "v2", Version: 5, + Origin: string(ahead.LocalNodeID()), LastUpdated: time.Now(), + }) + dc.ByID(owners[2]).DebugDropLocal(key) - got, ok := requester.Get(context.Background(), key) + got, ok := pickNonAheadRequester(dc, ahead).Get(context.Background(), key) if !ok { t.Fatalf("quorum get failed") } - // Value stored as interface{} may be string (not []byte) in this test - if sval, okCast := got.Value.(string); !okCast || sval != "v2" { + sval, okCast := got.Value.(string) + if !okCast || sval != "v2" { t.Fatalf("expected quorum to return newer version v2, got=%v (type %T)", got.Value, got.Value) } - // Allow brief repair propagation + // Brief sleep to let the read-repair fan-out land on lagging replicas. time.Sleep(50 * time.Millisecond) - // All owners should now have v2 (version 5) for _, oid := range owners { - inst := map[cluster.NodeID]*backend.DistMemory{b1.LocalNodeID(): b1, b2.LocalNodeID(): b2, b3.LocalNodeID(): b3}[oid] - - it, ok2 := inst.Get(context.Background(), key) + it, ok2 := dc.ByID(oid).Get(context.Background(), key) if !ok2 || it.Version != 5 { t.Fatalf("owner %s not repaired to v2 (v5) -> (%v,%v)", oid, ok2, it) } } - // ReadRepair metric should have incremented somewhere - if b1.Metrics().ReadRepair+b2.Metrics().ReadRepair+b3.Metrics().ReadRepair == 0 { + totalRepair := dc.Nodes[0].Metrics().ReadRepair + + dc.Nodes[1].Metrics().ReadRepair + + dc.Nodes[2].Metrics().ReadRepair + if totalRepair == 0 { t.Fatalf("expected read repair metric >0") } } diff --git a/tests/hypercache_distmemory_tiebreak_test.go b/tests/hypercache_distmemory_tiebreak_test.go index 6970595..1d228eb 100644 --- a/tests/hypercache_distmemory_tiebreak_test.go +++ b/tests/hypercache_distmemory_tiebreak_test.go @@ -2,7 +2,6 @@ package tests import ( "context" - "fmt" "testing" "time" @@ -12,76 +11,28 @@ import ( ) // TestDistMemoryVersionTieBreak ensures that when versions are equal the lexicographically smaller origin wins. -func TestDistMemoryVersionTieBreak(t *testing.T) { //nolint:paralleltest - interval := 5 * time.Millisecond - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() +func TestDistMemoryVersionTieBreak(t *testing.T) { //nolint:paralleltest // mutates shared transport + const interval = 5 * time.Millisecond - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), + dc := SetupInProcessCluster(t, 3, backend.WithDistReplication(3), backend.WithDistHeartbeat(interval, 0, 0), backend.WithDistReadConsistency(backend.ConsistencyQuorum), backend.WithDistWriteConsistency(backend.ConsistencyQuorum), ) - b2i, _ := backend.NewDistMemory(context.TODO(), backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") - } - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) + b1, b2, b3 := dc.Nodes[0], dc.Nodes[1], dc.Nodes[2] - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) - - // choose key where b1,b2,b3 ordering fixed - key := "tie" - for i := range 3000 { - cand := fmt.Sprintf("tie%d", i) - - owners := b1.DebugOwners(cand) - if len(owners) == 3 && owners[0] == b1.LocalNodeID() && owners[1] == b2.LocalNodeID() && owners[2] == b3.LocalNodeID() { - key = cand - - break - } - } + key := findOrderedThreeOwnerKeyPrefix(b1, []cluster.NodeID{b1.LocalNodeID(), b2.LocalNodeID(), b3.LocalNodeID()}, "tie") - // primary write to establish version=1 origin=b1 + // Primary write to establish version=1, origin=b1. err := b1.Set(context.Background(), &cache.Item{Key: key, Value: "v1"}) if err != nil { t.Fatalf("initial set: %v", err) } - // Inject a fake item on b2 with SAME version but lexicographically larger origin so it should lose. + // Inject a fake item on b2 with the SAME version but a lexicographically + // larger origin — under the tie-break rule it should lose. b2.DebugDropLocal(key) b2.DebugInject(&cache.Item{Key: key, Value: "alt", Version: 1, Origin: "zzzz"}) @@ -95,7 +46,6 @@ func TestDistMemoryVersionTieBreak(t *testing.T) { //nolint:paralleltest t.Fatalf("expected b1 value win, got %v", it.Value) } - // Ensure b2 repaired to winning value. if it2, ok2 := b2.Get(context.Background(), key); !ok2 || it2.Value != "v1" { t.Fatalf("expected repaired tie-break value on b2") } diff --git a/tests/hypercache_distmemory_versioning_test.go b/tests/hypercache_distmemory_versioning_test.go index 44227f4..f99b637 100644 --- a/tests/hypercache_distmemory_versioning_test.go +++ b/tests/hypercache_distmemory_versioning_test.go @@ -13,71 +13,57 @@ import ( cache "github.com/hyp3rd/hypercache/pkg/cache/v2" ) -// TestDistMemoryVersioningQuorum ensures higher version wins and quorum enforcement works. -func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest - interval := 10 * time.Millisecond - ring := cluster.NewRing(cluster.WithReplication(3)) - membership := cluster.NewMembership(ring) - transport := backend.NewInProcessTransport() - - // three nodes - n1 := cluster.NewNode("", "n1:0") - n2 := cluster.NewNode("", "n2:0") - n3 := cluster.NewNode("", "n3:0") - - // enable quorum read + write consistency on b1 - b1i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n1), - backend.WithDistTransport(transport), - backend.WithDistReplication(3), - backend.WithDistHeartbeat(interval, 0, 0), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - ) - b2i, _ := backend.NewDistMemory(context.TODO(), backend.WithDistMembership(membership, n2), backend.WithDistTransport(transport)) - b3i, _ := backend.NewDistMemory( - context.TODO(), - backend.WithDistMembership(membership, n3), - backend.WithDistTransport(transport), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - ) - b1, ok := b1i.(*backend.DistMemory) +// findOrderedThreeOwnerKey brute-forces a key whose ring ownership ordering +// is exactly [n1, n2, n3]. This makes the versioning-quorum test deterministic +// — without it, forwarding paths vary by hash and obscure the assertion. +func findOrderedThreeOwnerKey(node *backend.DistMemory, want []cluster.NodeID) string { + return findOrderedThreeOwnerKeyPrefix(node, want, "k") +} - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } +// findOrderedThreeOwnerKeyPrefix is the prefix-parameterized variant of +// findOrderedThreeOwnerKey — used by tests that want their fixture keys to +// share a common prefix for log readability. +func findOrderedThreeOwnerKeyPrefix(node *backend.DistMemory, want []cluster.NodeID, prefix string) string { + for i := range 3000 { + cand := fmt.Sprintf("%s%d", prefix, i) - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } + owners := node.DebugOwners(cand) + if len(owners) != len(want) { + continue + } - b3, ok := b3i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b3i to *backend.DistMemory") + matches := true + + for j, id := range want { + if owners[j] != id { + matches = false + + break + } + } + + if matches { + return cand + } } - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - StopOnCleanup(t, b3) + return prefix +} - transport.Register(b1) - transport.Register(b2) - transport.Register(b3) +// TestDistMemoryVersioningQuorum ensures higher version wins and quorum enforcement works. +func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest // mutates shared transport + const interval = 10 * time.Millisecond - // Find a deterministic key where ownership ordering is b1,b2,b3 to avoid forwarding complexities. - key := "k" - for i := range 2000 { // brute force - cand := fmt.Sprintf("k%d", i) + dc := SetupInProcessCluster(t, 3, + backend.WithDistReplication(3), + backend.WithDistHeartbeat(interval, 0, 0), + backend.WithDistReadConsistency(backend.ConsistencyQuorum), + backend.WithDistWriteConsistency(backend.ConsistencyQuorum), + ) - owners := b1.DebugOwners(cand) - if len(owners) == 3 && owners[0] == b1.LocalNodeID() && owners[1] == b2.LocalNodeID() && owners[2] == b3.LocalNodeID() { - key = cand + b1, b2, b3 := dc.Nodes[0], dc.Nodes[1], dc.Nodes[2] - break - } - } + key := findOrderedThreeOwnerKey(b1, []cluster.NodeID{b1.LocalNodeID(), b2.LocalNodeID(), b3.LocalNodeID()}) // Write key via primary. item1 := &cache.Item{Key: key, Value: "v1"} @@ -87,7 +73,7 @@ func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest t.Fatalf("initial set: %v", err) } - // Simulate a concurrent stale write from another node with lower version (manual injection on b2). + // Simulate a concurrent stale write on b2 (lower version, different origin). itemStale := &cache.Item{Key: key, Value: "v0", Version: 0, Origin: "zzz"} b2.DebugDropLocal(key) b2.DebugInject(itemStale) @@ -102,13 +88,12 @@ func TestDistMemoryVersioningQuorum(t *testing.T) { //nolint:paralleltest t.Fatalf("expected value v1, got %v", it.Value) } - // Ensure b2 repaired. if it2, ok2 := b2.Get(context.Background(), key); !ok2 || it2.Value != "v1" { t.Fatalf("expected repaired value on b2") } // Simulate reduced acks: unregister one replica and perform write requiring quorum (2 of 3). - transport.Unregister(string(n3.ID)) + dc.Transport.Unregister(string(b3.LocalNodeID())) item2 := &cache.Item{Key: key, Value: "v2"} diff --git a/tests/hypercache_distmemory_write_quorum_test.go b/tests/hypercache_distmemory_write_quorum_test.go index c744c1a..9a71ea9 100644 --- a/tests/hypercache_distmemory_write_quorum_test.go +++ b/tests/hypercache_distmemory_write_quorum_test.go @@ -17,54 +17,19 @@ import ( func TestWriteQuorumSuccess(t *testing.T) { t.Parallel() - ctx := context.Background() - transport := backend.NewInProcessTransport() - - // replication=3, write consistency QUORUM - opts := []backend.DistMemoryOption{ + dc := SetupInProcessClusterRF(t, 3, 2, backend.WithDistReplication(2), backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - } - - a, _ := backend.NewDistMemory(ctx, append(opts, backend.WithDistNode("A", "A"))...) - b, _ := backend.NewDistMemory(ctx, append(opts, backend.WithDistNode("B", "B"))...) - c, _ := backend.NewDistMemory(ctx, append(opts, backend.WithDistNode("C", "C"))...) - - da, ok := any(a).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", a) - } - - db, ok := any(b).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", b) - } - - dc, ok := any(c).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", c) - } - - StopOnCleanup(t, da) - StopOnCleanup(t, db) - StopOnCleanup(t, dc) - - da.SetTransport(transport) - db.SetTransport(transport) - dc.SetTransport(transport) - transport.Register(da) - transport.Register(db) - transport.Register(dc) + ) item := &cache.Item{Key: "k1", Value: "v1"} - err := a.Set(ctx, item) - if err != nil { // should succeed with quorum (all up) + err := dc.Nodes[0].Set(context.Background(), item) + if err != nil { t.Fatalf("expected success, got %v", err) } - // metrics assertions (writeAttempts >=1, writeQuorumFailures stays 0) - metrics := da.Metrics() + metrics := dc.Nodes[0].Metrics() if metrics.WriteAttempts < 1 { t.Fatalf("expected WriteAttempts >=1, got %d", metrics.WriteAttempts) } @@ -78,78 +43,28 @@ func TestWriteQuorumSuccess(t *testing.T) { func TestWriteQuorumFailure(t *testing.T) { t.Parallel() - ctx := context.Background() - transport := backend.NewInProcessTransport() - - // Shared ring/membership so ownership is identical across nodes. - ring := cluster.NewRing(cluster.WithReplication(3)) - m := cluster.NewMembership(ring) - m.Upsert(cluster.NewNode("A", "A")) - m.Upsert(cluster.NewNode("B", "B")) - m.Upsert(cluster.NewNode("C", "C")) - - opts := []backend.DistMemoryOption{ + dc := SetupInProcessCluster(t, 3, backend.WithDistReplication(3), backend.WithDistWriteConsistency(backend.ConsistencyAll), backend.WithDistHintTTL(time.Minute), - backend.WithDistHintReplayInterval(50 * time.Millisecond), - } + backend.WithDistHintReplayInterval(50*time.Millisecond), + ) - // Create three nodes but only register two with transport to force ALL failure. - na, _ := backend.NewDistMemory( - ctx, - append(opts, backend.WithDistNode("A", "A"), backend.WithDistMembership(m, cluster.NewNode("A", "A")))...) - nb, _ := backend.NewDistMemory( - ctx, - append(opts, backend.WithDistNode("B", "B"), backend.WithDistMembership(m, cluster.NewNode("B", "B")))...) - - nc, _ := backend.NewDistMemory( - ctx, - append(opts, backend.WithDistNode("C", "C"), backend.WithDistMembership(m, cluster.NewNode("C", "C")))...) - - da, ok := any(na).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", na) - } - - db, ok := any(nb).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", nb) - } + // Force ALL-consistency failure by dropping the third node from the + // transport — its acks will never arrive, so the write must fail. + thirdNode := dc.Nodes[2] + dc.Transport.Unregister(string(thirdNode.LocalNodeID())) - dc, ok := any(nc).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", nc) - } - - StopOnCleanup(t, da) - StopOnCleanup(t, db) - StopOnCleanup(t, dc) - - da.SetTransport(transport) - db.SetTransport(transport) - transport.Register(da) - transport.Register(db) // C intentionally left unregistered to simulate it being offline - - // Find a key whose owners include all three nodes (replication=3 ensures this) – just brute force until order stable. - key := "quorum-all-fail" - for i := range 50 { // try some keys to ensure A is primary sometimes; not strictly required - candidate := fmt.Sprintf("quorum-all-fail-%d", i) - - owners := da.Ring().Lookup(candidate) - if len(owners) == 3 && string(owners[0]) == "A" { // prefer A primary for clarity - key = candidate - - break - } - } + // Pick a key with all three nodes as owners (replication=3 guarantees + // it but ordering varies — pick one with the first node as primary for + // log clarity). + key := pickAllOwnerKey(dc.Nodes[0], dc.Nodes[0].LocalNodeID(), 50) item := &cache.Item{Key: key, Value: "v-fail"} - err := na.Set(ctx, item) + err := dc.Nodes[0].Set(context.Background(), item) if !errors.Is(err, sentinel.ErrQuorumFailed) { - // Provide ring owners for debugging. - owners := da.Ring().Lookup(key) + owners := dc.Ring.Lookup(key) ids := make([]string, 0, len(owners)) for _, o := range owners { @@ -159,12 +74,29 @@ func TestWriteQuorumFailure(t *testing.T) { t.Fatalf("expected ErrQuorumFailed, got %v (owners=%v)", err, ids) } - metrics := da.Metrics() + metrics := dc.Nodes[0].Metrics() if metrics.WriteQuorumFailures < 1 { t.Fatalf("expected WriteQuorumFailures >=1, got %d", metrics.WriteQuorumFailures) } - if metrics.WriteAttempts < 1 { // should have attempted at least once + if metrics.WriteAttempts < 1 { t.Fatalf("expected WriteAttempts >=1, got %d", metrics.WriteAttempts) } } + +// pickAllOwnerKey scans candidate keys until it finds one with the desired +// primary owner — used by quorum tests to keep the assertion clear. +func pickAllOwnerKey(node *backend.DistMemory, wantPrimary cluster.NodeID, attempts int) string { + const fallback = "quorum-all-fail" + + for i := range attempts { + candidate := fmt.Sprintf("quorum-all-fail-%d", i) + + owners := node.Ring().Lookup(candidate) + if len(owners) == 3 && owners[0] == wantPrimary { + return candidate + } + } + + return fallback +} diff --git a/tests/hypercache_get_multiple_test.go b/tests/hypercache_get_multiple_test.go index 3936d56..c7056a1 100644 --- a/tests/hypercache_get_multiple_test.go +++ b/tests/hypercache_get_multiple_test.go @@ -25,36 +25,36 @@ func TestGetMultiple(t *testing.T) { }{ { name: "get multiple keys with values", - keys: []string{"key1", "key2", "key3"}, - wantValues: map[string]any{"key1": 1, "key2": 2, "key3": 3}, + keys: []string{testKey1, testKey2, testKey3}, + wantValues: map[string]any{testKey1: 1, testKey2: 2, testKey3: 3}, wantErrs: map[string]error{}, setup: func(cache *hypercache.HyperCache[backend.InMemory]) { - _ = cache.Set(context.TODO(), "key1", 1, 0) - _ = cache.Set(context.TODO(), "key2", 2, 0) - _ = cache.Set(context.TODO(), "key3", 3, 0) + _ = cache.Set(context.TODO(), testKey1, 1, 0) + _ = cache.Set(context.TODO(), testKey2, 2, 0) + _ = cache.Set(context.TODO(), testKey3, 3, 0) }, }, { name: "get multiple keys with missing values", - keys: []string{"key1", "key2", "key3"}, - wantValues: map[string]any{"key1": 1, "key3": 3}, - wantErrs: map[string]error{"key2": sentinel.ErrKeyNotFound}, + keys: []string{testKey1, testKey2, testKey3}, + wantValues: map[string]any{testKey1: 1, testKey3: 3}, + wantErrs: map[string]error{testKey2: sentinel.ErrKeyNotFound}, setup: func(cache *hypercache.HyperCache[backend.InMemory]) { - _ = cache.Set(context.TODO(), "key1", 1, 0) - _ = cache.Set(context.TODO(), "key3", 3, 0) + _ = cache.Set(context.TODO(), testKey1, 1, 0) + _ = cache.Set(context.TODO(), testKey3, 3, 0) }, }, { name: "get multiple keys with expired values", - keys: []string{"key1", "key2", "key3"}, - wantValues: map[string]any{"key2": 2, "key3": 3}, - wantErrs: map[string]error{"key1": sentinel.ErrKeyNotFound}, + keys: []string{testKey1, testKey2, testKey3}, + wantValues: map[string]any{testKey2: 2, testKey3: 3}, + wantErrs: map[string]error{testKey1: sentinel.ErrKeyNotFound}, setup: func(cache *hypercache.HyperCache[backend.InMemory]) { - _ = cache.Set(context.TODO(), "key1", 1, time.Millisecond) + _ = cache.Set(context.TODO(), testKey1, 1, time.Millisecond) time.Sleep(2 * time.Millisecond) - _ = cache.Set(context.TODO(), "key2", 2, 0) - _ = cache.Set(context.TODO(), "key3", 3, 0) + _ = cache.Set(context.TODO(), testKey2, 2, 0) + _ = cache.Set(context.TODO(), testKey3, 3, 0) }, }, } diff --git a/tests/hypercache_get_or_set_test.go b/tests/hypercache_get_or_set_test.go index 5ecac62..b702507 100644 --- a/tests/hypercache_get_or_set_test.go +++ b/tests/hypercache_get_or_set_test.go @@ -10,105 +10,95 @@ import ( "github.com/hyp3rd/hypercache" "github.com/hyp3rd/hypercache/internal/sentinel" + "github.com/hyp3rd/hypercache/pkg/backend" ) +type hyperCacheGetOrSetCase struct { + name string + key string + value any + expiry time.Duration + expectedValue any + expectedErr error +} + +func runGetOrSetCase(t *testing.T, cache *hypercache.HyperCache[backend.InMemory], tc hyperCacheGetOrSetCase) { + t.Helper() + + shouldExpire := errors.Is(tc.expectedErr, sentinel.ErrKeyExpired) + + val, err := cache.GetOrSet(context.TODO(), tc.key, tc.value, tc.expiry) + if !shouldExpire { + require.Equal(t, tc.expectedErr, err) + } + + if err == nil && !shouldExpire { + require.Equal(t, tc.expectedValue, val) + } + + if shouldExpire { + t.Log("sleeping for 2 Millisecond to allow the key to expire") + time.Sleep(2 * time.Millisecond) + + _, err = cache.GetOrSet(context.TODO(), tc.key, tc.value, tc.expiry) + require.Equal(t, tc.expectedErr, err) + } + + gotVal, ok := cache.Get(context.TODO(), tc.key) + + if err == nil { + require.True(t, ok) + require.Equal(t, tc.expectedValue, gotVal) + + return + } + + require.False(t, ok) + require.Nil(t, gotVal) +} + func TestHyperCache_GetOrSet(t *testing.T) { //nolint:paralleltest // subtests share cache instance and depend on insertion order - tests := []struct { - name string - key string - value any - expiry time.Duration - expectedValue any - expectedErr error - }{ + tests := []hyperCacheGetOrSetCase{ { name: "get or set with valid key and value", - key: "key1", - value: "value1", - expiry: 0, - expectedValue: "value1", - expectedErr: nil, + key: testKey1, + value: testValue1, + expectedValue: testValue1, }, { name: "get or set with valid key and value with expiry", - key: "key2", - value: "value2", + key: testKey2, + value: testValue2, expiry: time.Second, - expectedValue: "value2", - expectedErr: nil, + expectedValue: testValue2, }, - // { - // name: "get or set with empty key", - // key: "", - // value: "value3", - // expiry: 0, - // expectedValue: nil, - // expectedErr: hypercache.ErrInvalidKey, - // }, { - name: "get or set with nil value", - key: "key4", - value: nil, - expiry: 0, - expectedValue: nil, - expectedErr: sentinel.ErrNilValue, + name: "get or set with nil value", + key: "key4", + value: nil, + expectedErr: sentinel.ErrNilValue, }, { - name: "get or set with key that has expired", - key: "key5", - value: "value5", - expiry: time.Millisecond, - expectedValue: nil, - expectedErr: sentinel.ErrKeyExpired, + name: "get or set with key that has expired", + key: "key5", + value: "value5", + expiry: time.Millisecond, + expectedErr: sentinel.ErrKeyExpired, }, { name: "get or set with key that already exists", - key: "key1", + key: testKey1, value: "value6", - expiry: 0, - expectedValue: "value1", - expectedErr: nil, + expectedValue: testValue1, }, } + cache, err := hypercache.NewInMemoryWithDefaults(context.TODO(), 10) require.NoError(t, err) - for _, test := range tests { //nolint:paralleltest // subtests share cache instance - t.Run(test.name, func(t *testing.T) { - var ( - val any - err error - ) - - shouldExpire := errors.Is(test.expectedErr, sentinel.ErrKeyExpired) - - val, err = cache.GetOrSet(context.TODO(), test.key, test.value, test.expiry) - if !shouldExpire { - require.Equal(t, test.expectedErr, err) - } - - if err == nil && !shouldExpire { - require.Equal(t, test.expectedValue, val) - } - - if shouldExpire { - t.Log("sleeping for 2 Millisecond to allow the key to expire") - time.Sleep(2 * time.Millisecond) - - _, err = cache.GetOrSet(context.TODO(), test.key, test.value, test.expiry) - require.Equal(t, test.expectedErr, err) - } - - // Check if the value has been set in the cache - if err == nil { - val, ok := cache.Get(context.TODO(), test.key) - require.True(t, ok) - require.Equal(t, test.expectedValue, val) - } else { - val, ok := cache.Get(context.TODO(), test.key) - require.False(t, ok) - require.Nil(t, val) - } + for _, tc := range tests { //nolint:paralleltest // subtests share cache instance + t.Run(tc.name, func(t *testing.T) { + runGetOrSetCase(t, cache, tc) }) } } diff --git a/tests/hypercache_get_test.go b/tests/hypercache_get_test.go index 1912e14..10be357 100644 --- a/tests/hypercache_get_test.go +++ b/tests/hypercache_get_test.go @@ -9,85 +9,82 @@ import ( "github.com/hyp3rd/hypercache" "github.com/hyp3rd/hypercache/internal/sentinel" + "github.com/hyp3rd/hypercache/pkg/backend" ) +type hyperCacheGetCase struct { + name string + key string + value any + expiry time.Duration + expectedValue any + expectedErr error + sleep time.Duration + shouldSet bool +} + +func runHyperCacheGetCase(t *testing.T, cache *hypercache.HyperCache[backend.InMemory], tc hyperCacheGetCase) { + t.Helper() + + if tc.shouldSet { + err := cache.Set(context.TODO(), tc.key, tc.value, tc.expiry) + if err != nil { + require.Equal(t, tc.expectedErr, err) + } + + if tc.sleep > 0 { + time.Sleep(tc.sleep) + } + } + + val, ok := cache.Get(context.TODO(), tc.key) + if tc.expectedErr != nil || !ok { + require.False(t, ok) + + return + } + + require.True(t, ok) + require.Equal(t, tc.expectedValue, val) +} + func TestHyperCache_Get(t *testing.T) { //nolint:paralleltest // subtests share cache instance - tests := []struct { - name string - key string - value any - expiry time.Duration - expectedValue any - expectedErr error - sleep time.Duration - shouldSet bool - }{ + tests := []hyperCacheGetCase{ { name: "get with valid key", - key: "key1", - value: "value1", + key: testKey1, + value: testValue1, expiry: 0, - expectedValue: "value1", - expectedErr: nil, + expectedValue: testValue1, }, { name: "get with valid key and value with expiry", - key: "key2", - value: "value2", + key: testKey2, + value: testValue2, expiry: 5 * time.Second, - expectedValue: "value2", - expectedErr: nil, + expectedValue: testValue2, }, - // { - // name: "get with empty key", - // key: "", - // value: "value3", - // expiry: 0, - // expectedValue: "", - // expectedErr: hypercache.ErrInvalidKey, - // }, { - name: "get with expired key", - key: "key4", - value: "value4", - expiry: 1 * time.Second, - expectedValue: nil, - expectedErr: nil, - sleep: 2 * time.Second, + name: "get with expired key", + key: "key4", + value: "value4", + expiry: 1 * time.Second, + sleep: 2 * time.Second, }, { - name: "get with non-existent key", - key: "key5", - value: "value5", - expiry: 0, - expectedValue: nil, - expectedErr: sentinel.ErrKeyNotFound, - shouldSet: false, + name: "get with non-existent key", + key: "key5", + value: "value5", + expectedErr: sentinel.ErrKeyNotFound, }, } + cache, err := hypercache.NewInMemoryWithDefaults(context.TODO(), 10) require.NoError(t, err) - for _, test := range tests { //nolint:paralleltest // subtests share cache instance - t.Run(test.name, func(t *testing.T) { - if test.shouldSet { - err = cache.Set(context.TODO(), test.key, test.value, test.expiry) - if err != nil { - require.Equal(t, test.expectedErr, err) - } - - if test.sleep > 0 { - time.Sleep(test.sleep) - } - } - - val, ok := cache.Get(context.TODO(), test.key) - if test.expectedErr != nil || !ok { - require.False(t, ok) - } else { - require.True(t, ok) - require.Equal(t, test.expectedValue, val) - } + for _, tc := range tests { //nolint:paralleltest // subtests share cache instance + t.Run(tc.name, func(t *testing.T) { + runHyperCacheGetCase(t, cache, tc) }) } } diff --git a/tests/hypercache_http_merkle_test.go b/tests/hypercache_http_merkle_test.go index d34672a..d2fbf18 100644 --- a/tests/hypercache_http_merkle_test.go +++ b/tests/hypercache_http_merkle_test.go @@ -11,58 +11,81 @@ import ( cache "github.com/hyp3rd/hypercache/pkg/cache/v2" ) +// newHTTPMerkleNode constructs a DistMemory backend with an HTTP server +// listening on the supplied address — used by the HTTP-transport merkle +// tests where in-process transport isn't enough. +// +// Construction uses context.Background() rather than a caller-supplied ctx +// because Stop runs from t.Cleanup at end-of-test, where the test ctx may +// already be canceled — propagating the same ctx into Stop would leak the +// HTTP listener. See StopOnCleanup for the same rationale. +func newHTTPMerkleNode(t *testing.T, membership *cluster.Membership, id, addr string) *backend.DistMemory { + t.Helper() + + node := cluster.NewNode("", addr) + + bi, err := backend.NewDistMemory(context.Background(), + backend.WithDistMembership(membership, node), + backend.WithDistNode(id, addr), + backend.WithDistMerkleChunkSize(2), + ) + if err != nil { + t.Fatalf("new %s: %v", id, err) + } + + b, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bi) + } + + StopOnCleanup(t, b) + + return b +} + +// waitForMerkleEndpoint polls a node's /internal/merkle HTTP endpoint until +// it responds 200. Under -race the fiber listener can take seconds to start +// accepting after Listen() returns, so a one-shot Get would race. +func waitForMerkleEndpoint(ctx context.Context, baseURL string, timeout time.Duration) bool { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/internal/merkle", nil) + if err != nil { + return false + } + + resp, err := http.DefaultClient.Do(req) + if err == nil { + _ = resp.Body.Close() + + if resp.StatusCode == http.StatusOK { + return true + } + } + + time.Sleep(50 * time.Millisecond) + } + + return false +} + // TestHTTPFetchMerkle ensures HTTP transport can fetch a remote Merkle tree and SyncWith works over HTTP. func TestHTTPFetchMerkle(t *testing.T) { t.Parallel() ctx := context.Background() - // shared ring/membership ring := cluster.NewRing(cluster.WithReplication(1)) membership := cluster.NewMembership(ring) - // create two nodes with HTTP server enabled (dynamically allocated addresses) addr1 := AllocatePort(t) addr2 := AllocatePort(t) - n1 := cluster.NewNode("", addr1) - - b1i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n1), - backend.WithDistNode("n1", addr1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b1: %v", err) - } - - n2 := cluster.NewNode("", addr2) - - b2i, err := backend.NewDistMemory(ctx, - backend.WithDistMembership(membership, n2), - backend.WithDistNode("n2", addr2), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("b2: %v", err) - } + b1 := newHTTPMerkleNode(t, membership, "n1", addr1) + b2 := newHTTPMerkleNode(t, membership, "n2", addr2) - b1, ok := b1i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b1i to *backend.DistMemory") - } - - b2, ok := b2i.(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b2i to *backend.DistMemory") - } - - StopOnCleanup(t, b1) - StopOnCleanup(t, b2) - - // HTTP transport resolver maps node IDs to http base URLs. resolver := func(id string) (string, bool) { - switch id { // node IDs same as provided + switch id { case "n1": return "http://" + b1.LocalNodeAddr(), true case "n2": @@ -71,62 +94,35 @@ func TestHTTPFetchMerkle(t *testing.T) { return "", false } + // 5s transport timeout (was 2s) — under -race the fiber listener can take // >2s to accept its first request, which made SyncWith time out spuriously. transport := backend.NewDistHTTPTransport(5*time.Second, resolver) b1.SetTransport(transport) b2.SetTransport(transport) - // ensure membership has both before writes (already upserted in constructors) - // write some keys to b1 only - for i := range 5 { // direct inject to sidestep replication/forwarding complexity + // Direct inject on b1 to sidestep replication/forwarding complexity. + for i := range 5 { item := &cache.Item{Key: httpKey(i), Value: []byte("v"), Version: uint64(i + 1), Origin: "n1", LastUpdated: time.Now()} b1.DebugInject(item) } - // Poll the HTTP merkle endpoint until it actually responds 200. Under - // -race the fiber listener can take seconds to start accepting requests - // even after Listen() returns; a single-shot Get is racy. - merkleReady := false - - deadline := time.Now().Add(10 * time.Second) - for time.Now().Before(deadline) { - req, reqErr := http.NewRequestWithContext(ctx, http.MethodGet, "http://"+b1.LocalNodeAddr()+"/internal/merkle", nil) - if reqErr != nil { - t.Fatalf("build merkle request: %v", reqErr) - } - - resp, getErr := http.DefaultClient.Do(req) - if getErr == nil { - _ = resp.Body.Close() - - if resp.StatusCode == http.StatusOK { - merkleReady = true - - break - } - } - - time.Sleep(50 * time.Millisecond) - } - - if !merkleReady { + if !waitForMerkleEndpoint(ctx, "http://"+b1.LocalNodeAddr(), 10*time.Second) { t.Fatal("merkle endpoint did not become ready within deadline") } - // b2 sync from b1 via HTTP transport - if err := b2.SyncWith(ctx, "n1"); err != nil { - t.Fatalf("sync: %v", err) + syncErr := b2.SyncWith(ctx, "n1") + if syncErr != nil { + t.Fatalf("sync: %v", syncErr) } - // Validate keys present on b2. Allow brief retry to absorb any async tail - // in sync's apply path (each missing key is retried once). + // Validate keys present on b2. Each missing key is retried once to absorb + // any async tail in sync's apply path. for i := range 5 { if _, ok := b2.Get(ctx, httpKey(i)); ok { continue } - // One retry: re-sync and check again. err := b2.SyncWith(ctx, "n1") if err != nil { t.Fatalf("re-sync: %v", err) diff --git a/tests/hypercache_mgmt_dist_test.go b/tests/hypercache_mgmt_dist_test.go index ecf5b5a..77bcb56 100644 --- a/tests/hypercache_mgmt_dist_test.go +++ b/tests/hypercache_mgmt_dist_test.go @@ -14,15 +14,35 @@ import ( "github.com/hyp3rd/hypercache/pkg/backend" ) +// assertEndpointHasField fetches baseURL+path as JSON and asserts the +// response contains `field`. If the response carries an error label +// (constants.ErrorLabel), the test fatals with its value — this surfaces +// 404/500 from a misconfigured backend instead of the misleading "missing +// field" assertion. +func assertEndpointHasField(t *testing.T, url, field string) { + t.Helper() + + body := getJSON(t, url) + if _, ok := body[field]; ok { + return + } + + if e, hasErr := body[constants.ErrorLabel]; hasErr { + t.Fatalf("%s returned error: %v", url, e) + } + + t.Errorf("%s missing %s field", url, field) +} + // TestManagementHTTPDistMemory validates management endpoints for the experimental DistMemory backend. -func TestManagementHTTPDistMemory(t *testing.T) { //nolint:paralleltest +func TestManagementHTTPDistMemory(t *testing.T) { //nolint:paralleltest // mgmt server bound to fixed port cfg, err := hypercache.NewConfig[backend.DistMemory](constants.DistMemoryBackend) if err != nil { t.Fatalf("NewConfig: %v", err) } cfg.HyperCacheOptions = append(cfg.HyperCacheOptions, - hypercache.WithManagementHTTP[backend.DistMemory]("127.0.0.1:0"), // ephemeral port + hypercache.WithManagementHTTP[backend.DistMemory]("127.0.0.1:0"), ) cfg.DistMemoryOptions = []backend.DistMemoryOption{ backend.WithDistReplication(1), @@ -39,14 +59,11 @@ func TestManagementHTTPDistMemory(t *testing.T) { //nolint:paralleltest baseURL := waitForMgmt(t, hc) - // Insert a key to exercise owners endpoint. err = hc.Set(context.Background(), "alpha", "value", 0) if err != nil { - // not fatal for owners shape but should succeed given replication=1 t.Fatalf("set alpha: %v", err) } - // /config should include replication + virtualNodesPerNode configBody := getJSON(t, baseURL+"/config") if _, ok := configBody["replication"]; !ok { t.Errorf("/config missing replication") @@ -56,47 +73,10 @@ func TestManagementHTTPDistMemory(t *testing.T) { //nolint:paralleltest t.Errorf("/config missing virtualNodesPerNode") } - // /dist/metrics basic shape - metricsBody := getJSON(t, baseURL+"/dist/metrics") - if _, ok := metricsBody["ForwardGet"]; !ok { // one exported field - // could be 404 if backend unsupported (should not be here) - if e, hasErr := metricsBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/dist/metrics returned error: %v", e) - } - - // else fail - t.Errorf("/dist/metrics missing ForwardGet field") - } - - // /dist/owners - ownersBody := getJSON(t, baseURL+"/dist/owners?key=alpha") - if _, ok := ownersBody["owners"]; !ok { - if e, hasErr := ownersBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/dist/owners returned error: %v", e) - } - - t.Errorf("/dist/owners missing owners field") - } - - // /cluster/members - membersBody := getJSON(t, baseURL+"/cluster/members") - if _, ok := membersBody["members"]; !ok { - if e, hasErr := membersBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/cluster/members returned error: %v", e) - } - - t.Errorf("/cluster/members missing members field") - } - - // /cluster/ring - ringBody := getJSON(t, baseURL+"/cluster/ring") - if _, ok := ringBody["vnodes"]; !ok { - if e, hasErr := ringBody[constants.ErrorLabel]; hasErr { - t.Fatalf("/cluster/ring returned error: %v", e) - } - - t.Errorf("/cluster/ring missing vnodes field") - } + assertEndpointHasField(t, baseURL+"/dist/metrics", "ForwardGet") + assertEndpointHasField(t, baseURL+"/dist/owners?key=alpha", "owners") + assertEndpointHasField(t, baseURL+"/cluster/members", "members") + assertEndpointHasField(t, baseURL+"/cluster/ring", "vnodes") } // waitForMgmt waits until management HTTP server is bound and responsive. diff --git a/tests/hypercache_set_test.go b/tests/hypercache_set_test.go index f5b14bb..4b3d8e0 100644 --- a/tests/hypercache_set_test.go +++ b/tests/hypercache_set_test.go @@ -11,6 +11,17 @@ import ( "github.com/hyp3rd/hypercache/internal/sentinel" ) +// Test fixture keys/values shared across the table-driven Set/Get/GetOrSet/ +// GetMultiple tests in this package. Extracted as constants to satisfy +// `goconst` and to reduce typo risk when adding new subtests. +const ( + testKey1 = "key1" + testKey2 = "key2" + testKey3 = "key3" + testValue1 = "value1" + testValue2 = "value2" +) + func TestHyperCache_Set(t *testing.T) { //nolint:paralleltest // subtests share cache instance and depend on insertion order tests := []struct { name string @@ -22,18 +33,18 @@ func TestHyperCache_Set(t *testing.T) { //nolint:paralleltest // subtests share }{ { name: "set with valid key and value", - key: "key1", - value: "value1", + key: testKey1, + value: testValue1, expiry: 0, - expectedValue: "value1", + expectedValue: testValue1, expectedErr: nil, }, { name: "set with valid key and value with expiry", - key: "key2", - value: "value2", + key: testKey2, + value: testValue2, expiry: time.Second, - expectedValue: "value2", + expectedValue: testValue2, expectedErr: nil, }, { @@ -54,7 +65,7 @@ func TestHyperCache_Set(t *testing.T) { //nolint:paralleltest // subtests share }, { name: "overwrite existing key", - key: "key1", + key: testKey1, value: "new_value1", expiry: 0, expectedValue: "new_value1", @@ -62,7 +73,7 @@ func TestHyperCache_Set(t *testing.T) { //nolint:paralleltest // subtests share }, { name: "update expiry of existing key", - key: "key1", + key: testKey1, value: "new_value1", expiry: time.Second, expectedValue: "new_value1", diff --git a/tests/integration/dist_phase1_test.go b/tests/integration/dist_phase1_test.go index d751106..a93eb17 100644 --- a/tests/integration/dist_phase1_test.go +++ b/tests/integration/dist_phase1_test.go @@ -31,6 +31,50 @@ func allocatePort(tb testing.TB) string { return addr } +// makePhase1Node spins up a DistMemory backend wired for quorum reads/writes +// across three nodes — extracted so each subtest helper stays under the +// function-length budget. +func makePhase1Node(ctx context.Context, t *testing.T, id, addr string, seeds []string) *backend.DistMemory { + t.Helper() + + bm, err := backend.NewDistMemory(ctx, + backend.WithDistNode(id, addr), + backend.WithDistSeeds(seeds), + backend.WithDistReplication(3), + backend.WithDistVirtualNodes(32), + backend.WithDistHintReplayInterval(200*time.Millisecond), + backend.WithDistHintTTL(5*time.Second), + backend.WithDistReadConsistency(backend.ConsistencyQuorum), + backend.WithDistWriteConsistency(backend.ConsistencyQuorum), + ) + if err != nil { + t.Fatalf("new dist memory: %v", err) + } + + bk, ok := bm.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bm) + } + + return bk +} + +// awaitNodeReplication polls node for key until value matches or deadline +// elapses; encodes the "give replication a moment" pattern without inlining +// goto-control-flow into the test body. +func awaitNodeReplication(ctx context.Context, node *backend.DistMemory, key string, timeout time.Duration) bool { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if it, ok := node.Get(ctx, key); ok && valueOK(it.Value) { + return true + } + + time.Sleep(100 * time.Millisecond) + } + + return false +} + // TestDistPhase1BasicQuorum is a scaffolding test verifying three-node quorum Set/Get over HTTP transport. func TestDistPhase1BasicQuorum(t *testing.T) { t.Parallel() @@ -41,40 +85,19 @@ func TestDistPhase1BasicQuorum(t *testing.T) { addrB := allocatePort(t) addrC := allocatePort(t) - // create three nodes; we'll stop C's HTTP after start to simulate outage then restart - makeNode := func(id, addr string, seeds []string) *backend.DistMemory { - bm, err := backend.NewDistMemory(ctx, - backend.WithDistNode(id, addr), - backend.WithDistSeeds(seeds), - backend.WithDistReplication(3), - backend.WithDistVirtualNodes(32), - backend.WithDistHintReplayInterval(200*time.Millisecond), - backend.WithDistHintTTL(5*time.Second), - backend.WithDistReadConsistency(backend.ConsistencyQuorum), - backend.WithDistWriteConsistency(backend.ConsistencyQuorum), - ) - if err != nil { - t.Fatalf("new dist memory: %v", err) - } - - bk, ok := bm.(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", bm) - } - - return bk - } + nodeA := makePhase1Node(ctx, t, "A", addrA, []string{addrB, addrC}) + nodeB := makePhase1Node(ctx, t, "B", addrB, []string{addrA, addrC}) + nodeC := makePhase1Node(ctx, t, "C", addrC, []string{addrA, addrB}) - nodeA := makeNode("A", addrA, []string{addrB, addrC}) - nodeB := makeNode("B", addrB, []string{addrA, addrC}) - nodeC := makeNode("C", addrC, []string{addrA, addrB}) - // defer cleanup of A and B - defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx) }() + t.Cleanup(func() { + _ = nodeA.Stop(ctx) + _ = nodeB.Stop(ctx) + _ = nodeC.Stop(ctx) + }) // allow some time for ring initialization time.Sleep(200 * time.Millisecond) - // Perform a write expecting replication across all three nodes item := &cache.Item{Key: "k1", Value: []byte("v1"), Expiration: 0, Version: 1, Origin: "A", LastUpdated: time.Now()} err := nodeA.Set(ctx, item) @@ -82,113 +105,84 @@ func TestDistPhase1BasicQuorum(t *testing.T) { t.Fatalf("set: %v", err) } - // Quorum read from B should succeed (value may be []byte, string, or json.RawMessage) - if got, ok := nodeB.Get(ctx, "k1"); !ok { + got, ok := nodeB.Get(ctx, "k1") + if !ok { t.Fatalf("expected quorum read via B: not found") - } else { - assertValue(t, got.Value) } - // Basic propagation check loop (give replication a moment) - defer func() { _ = nodeC.Stop(ctx) }() + assertValue(t, got.Value) - deadline := time.Now().Add(3 * time.Second) - for time.Now().Before(deadline) { - if it, ok := nodeC.Get(ctx, "k1"); ok { - if valueOK(it.Value) { - goto Done - } - } - - time.Sleep(100 * time.Millisecond) + if awaitNodeReplication(ctx, nodeC, "k1", 3*time.Second) { + return } - if it, ok := nodeC.Get(ctx, "k1"); !ok { - // Not fatal yet; we only created scaffolding – mark skip for now. + it, ok := nodeC.Get(ctx, "k1") + if !ok { t.Skipf("hint replay not yet observable; will be validated after full wiring (missing item)") - } else { - if !valueOK(it.Value) { - t.Skipf("value mismatch after wait") - } + + return } -Done: + if !valueOK(it.Value) { + t.Skipf("value mismatch after wait") + } } -// valueOK returns true if the stored value matches logical "v1" across supported encodings. -func valueOK(v any) bool { //nolint:ireturn - switch x := v.(type) { - case []byte: - if string(x) == "v1" { - return true - } +// matchV1Plain is the logical value tested by valueOK below. +const matchV1Plain = "v1" - if s := string(x); s == "djE=" { // base64 of v1 - b, err := base64.StdEncoding.DecodeString(s) - if err == nil && string(b) == "v1" { - return true - } - } +// matchV1Base64 is the base64 encoding of "v1" — distributed transports +// sometimes round-trip values as base64 strings. +const matchV1Base64 = "djE=" // base64.StdEncoding.EncodeToString([]byte("v1")) - return false +// matchesV1 reports whether s represents logical "v1" — either as the +// plain literal, the JSON-quoted literal, or a base64 form that decodes +// to "v1". Centralizes the encoding-tolerance logic that valueOK fans out +// over its supported value types. +func matchesV1(s string) bool { + if s == matchV1Plain || s == "\""+matchV1Plain+"\"" { + return true + } - case string: - if x == "v1" { - return true - } + if s == matchV1Base64 { + b, err := base64.StdEncoding.DecodeString(s) - if x == "djE=" { // base64 form - b, err := base64.StdEncoding.DecodeString(x) - if err == nil && string(b) == "v1" { - return true - } - } + return err == nil && string(b) == matchV1Plain + } - return false + return false +} + +// valueOK returns true if the stored value matches logical "v1" across supported encodings. +func valueOK(v any) bool { + switch x := v.(type) { + case []byte: + return matchesV1(string(x)) + + case string: + return matchesV1(x) case json.RawMessage: - // could be "v1" or base64 inside quotes if len(x) == 0 { return false } - // try as string literal + // Try as JSON string literal first; fall back to raw bytes. var s string err := json.Unmarshal(x, &s) - if err == nil { - if s == "v1" { - return true - } - - if s == "djE=" { - b, err2 := base64.StdEncoding.DecodeString(s) - if err2 == nil && string(b) == "v1" { - return true - } - } - } - - // fall back to raw compare - return string(x) == "v1" || string(x) == "\"v1\"" - - default: - s := fmt.Sprintf("%v", x) - if s == "v1" || s == "\"v1\"" { + if err == nil && matchesV1(s) { return true } - if s == "djE=" { - if b, err := base64.StdEncoding.DecodeString(s); err == nil && string(b) == "v1" { - return true - } - } + return matchesV1(string(x)) - return false + default: + return matchesV1(fmt.Sprintf("%v", x)) } } -func assertValue(t *testing.T, v any) { //nolint:ireturn +func assertValue(t *testing.T, v any) { t.Helper() if !valueOK(v) { diff --git a/tests/integration/dist_rebalance_test.go b/tests/integration/dist_rebalance_test.go index 5072d14..93e68af 100644 --- a/tests/integration/dist_rebalance_test.go +++ b/tests/integration/dist_rebalance_test.go @@ -18,6 +18,34 @@ import ( // endpoint reports a non-OK status during the readiness poll. var errUnexpectedStatus = errors.New("unexpected dist node health status") +// rebalanceTestOpts is the shared option set used by TestDistRebalanceJoin +// across all three nodes — extracted so each `mustDistNode` call site +// stays a single line. +func rebalanceTestOpts() []backend.DistMemoryOption { + return []backend.DistMemoryOption{ + backend.WithDistReplication(2), + backend.WithDistVirtualNodes(32), + backend.WithDistRebalanceInterval(100 * time.Millisecond), + } +} + +// populateKeys writes n test keys to node — used to seed the cluster before +// triggering a rebalance. +func populateKeys(ctx context.Context, t *testing.T, node *backend.DistMemory, n int) { + t.Helper() + + for i := range n { + k := cacheKey(i) + + it := &cache.Item{Key: k, Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()} + + err := node.Set(ctx, it) + if err != nil { + t.Fatalf("set %s: %v", k, err) + } + } +} + // TestDistRebalanceJoin verifies keys are migrated to a new node after join. func TestDistRebalanceJoin(t *testing.T) { t.Parallel() @@ -28,89 +56,44 @@ func TestDistRebalanceJoin(t *testing.T) { addrA := allocatePort(t) addrB := allocatePort(t) - nodeA := mustDistNode( - ctx, - t, - "A", - addrA, - []string{addrB}, - backend.WithDistReplication(2), - backend.WithDistVirtualNodes(32), - backend.WithDistRebalanceInterval(100*time.Millisecond), - ) - - nodeB := mustDistNode( - ctx, - t, - "B", - addrB, - []string{addrA}, - backend.WithDistReplication(2), - backend.WithDistVirtualNodes(32), - backend.WithDistRebalanceInterval(100*time.Millisecond), - ) + nodeA := mustDistNode(ctx, t, "A", addrA, []string{addrB}, rebalanceTestOpts()...) + nodeB := mustDistNode(ctx, t, "B", addrB, []string{addrA}, rebalanceTestOpts()...) + defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx) }() - // Write a spread of keys via A. - totalKeys := 300 - for i := range totalKeys { - k := cacheKey(i) + const totalKeys = 300 - it := &cache.Item{Key: k, Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()} + populateKeys(ctx, t, nodeA, totalKeys) - err := nodeA.Set(ctx, it) - if err != nil { - t.Fatalf("set %s: %v", k, err) - } - } - - time.Sleep(200 * time.Millisecond) // allow initial replication + // Allow initial replication, then add third node C. + time.Sleep(200 * time.Millisecond) - // Capture ownership counts before join. skeys := sampleKeys(totalKeys) - _ = ownedPrimaryCount(nodeA, skeys) // baseline (unused currently) - _ = ownedPrimaryCount(nodeB, skeys) - - // Add third node C. addrC := allocatePort(t) + nodeC := mustDistNode(ctx, t, "C", addrC, []string{addrA, addrB}, rebalanceTestOpts()...) - nodeC := mustDistNode( - ctx, - t, - "C", - addrC, - []string{addrA, addrB}, - backend.WithDistReplication(2), - backend.WithDistVirtualNodes(32), - backend.WithDistRebalanceInterval(100*time.Millisecond), - ) defer func() { _ = nodeC.Stop(ctx) }() - // Manually inject C into A and B membership (simulating gossip propagation delay that doesn't exist yet). + // Manually propagate C into A and B membership (gossip-propagation-delay simulation). nodeA.AddPeer(addrC) nodeB.AddPeer(addrC) - - // Allow membership to propagate + several rebalance ticks. time.Sleep(1200 * time.Millisecond) - // Post-join ownership counts (sampled locally using isOwner logic via Get + Metrics ring lookup indirectly). postOwnedA := ownedPrimaryCount(nodeA, skeys) postOwnedB := ownedPrimaryCount(nodeB, skeys) postOwnedC := ownedPrimaryCount(nodeC, skeys) - // Basic sanity: new node should now own > 0 keys. if postOwnedC == 0 { t.Fatalf("expected node C to own some keys after rebalancing") } - // Distribution variance check: ensure no node has > 80% of sample (initial naive rebalance heuristic). + // Distribution variance check: no node should hold > 80% of the sample. maxAllowed := int(float64(totalKeys) * 0.80) if postOwnedA > maxAllowed || postOwnedB > maxAllowed || postOwnedC > maxAllowed { t.Fatalf("ownership still highly skewed: A=%d B=%d C=%d", postOwnedA, postOwnedB, postOwnedC) } - // Rebalance metrics should show migrations (keys forwarded off old primaries) across cluster. migrated := nodeA.Metrics().RebalancedKeys + nodeB.Metrics().RebalancedKeys + nodeC.Metrics().RebalancedKeys if migrated == 0 { t.Fatalf("expected some rebalanced keys (total migrated=0)") diff --git a/tests/key_owner_helper.go b/tests/key_owner_helper.go index 4d28e89..cde9a2d 100644 --- a/tests/key_owner_helper.go +++ b/tests/key_owner_helper.go @@ -10,7 +10,7 @@ import ( ) // FindOwnerKey brute forces keys until it finds one whose owner ordering matches exactly ids. -func FindOwnerKey(b *backend.DistMemory, prefix string, desired []cluster.NodeID, limit int) (string, bool) { //nolint:ireturn +func FindOwnerKey(b *backend.DistMemory, prefix string, desired []cluster.NodeID, limit int) (string, bool) { for i := range limit { cand := fmt.Sprintf("%s%d", prefix, i) diff --git a/tests/management_http_test.go b/tests/management_http_test.go index bfd89cd..1870151 100644 --- a/tests/management_http_test.go +++ b/tests/management_http_test.go @@ -14,6 +14,22 @@ import ( "github.com/hyp3rd/hypercache/pkg/backend" ) +// waitForManagementAddr polls hc.ManagementHTTPAddress until the listener +// has bound. Under -race the listener startup can take seconds, so a +// one-shot read would race. +func waitForManagementAddr(hc *hypercache.HyperCache[backend.InMemory], timeout time.Duration) string { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if addr := hc.ManagementHTTPAddress(); addr != "" { + return addr + } + + time.Sleep(10 * time.Millisecond) + } + + return "" +} + // TestManagementHTTP_BasicEndpoints spins up the management HTTP server on an ephemeral port // and validates core endpoints. func TestManagementHTTP_BasicEndpoints(t *testing.T) { @@ -35,21 +51,7 @@ func TestManagementHTTP_BasicEndpoints(t *testing.T) { t.Cleanup(func() { _ = hc.Stop(ctx) }) - // Wait for the management HTTP listener to come up. The race detector - // can push listener startup well past the original 30 ms; poll with a - // generous deadline instead. - var addr string - - deadline := time.Now().Add(5 * time.Second) - for time.Now().Before(deadline) { - addr = hc.ManagementHTTPAddress() - if addr != "" { - break - } - - time.Sleep(10 * time.Millisecond) - } - + addr := waitForManagementAddr(hc, 5*time.Second) if addr == "" { t.Fatal("management HTTP listener did not bind within deadline") } @@ -68,33 +70,26 @@ func TestManagementHTTP_BasicEndpoints(t *testing.T) { return resp } - // /health resp := get("/health") require.Equal(t, http.StatusOK, resp.StatusCode) _ = resp.Body.Close() - // /stats resp = get("/stats") require.Equal(t, http.StatusOK, resp.StatusCode) var statsBody map[string]any - dec := json.NewDecoder(resp.Body) - - err = dec.Decode(&statsBody) - require.NoError(t, err) + require.NoError(t, json.NewDecoder(resp.Body).Decode(&statsBody)) _ = resp.Body.Close() - // /config resp = get("/config") require.Equal(t, http.StatusOK, resp.StatusCode) var cfgBody map[string]any - dec = json.NewDecoder(resp.Body) - _ = dec.Decode(&cfgBody) + _ = json.NewDecoder(resp.Body).Decode(&cfgBody) _ = resp.Body.Close() require.NotEmpty(t, cfgBody) diff --git a/tests/merkle_delete_tombstone_test.go b/tests/merkle_delete_tombstone_test.go index 784f001..25353eb 100644 --- a/tests/merkle_delete_tombstone_test.go +++ b/tests/merkle_delete_tombstone_test.go @@ -16,36 +16,8 @@ func TestMerkleDeleteTombstone(t *testing.T) { ctx := context.Background() transport := backend.NewInProcessTransport() - a, _ := backend.NewDistMemory( - ctx, - backend.WithDistNode("A", AllocatePort(t)), - backend.WithDistReplication(1), - backend.WithDistMerkleChunkSize(2), - ) - b, _ := backend.NewDistMemory( - ctx, - backend.WithDistNode("B", AllocatePort(t)), - backend.WithDistReplication(1), - backend.WithDistMerkleChunkSize(2), - ) - - da, ok := any(a).(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast a to *backend.DistMemory") - } - - db, ok := any(b).(*backend.DistMemory) - if !ok { - t.Fatalf("failed to cast b to *backend.DistMemory") - } - - StopOnCleanup(t, da) - StopOnCleanup(t, db) - - da.SetTransport(transport) - db.SetTransport(transport) - transport.Register(da) - transport.Register(db) + da := newMerkleNode(t, transport, "A") + db := newMerkleNode(t, transport, "B") it := &cache.Item{Key: "del", Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()} da.DebugInject(it) @@ -55,30 +27,26 @@ func TestMerkleDeleteTombstone(t *testing.T) { t.Fatalf("initial sync: %v", err) } - // Now delete on A _ = da.Remove(ctx, "del") - // Ensure local A removed if val, _ := da.Get(ctx, "del"); val != nil { t.Fatalf("expected A delete") } - // Remote (B) pulls from A to learn about deletion (pull-based anti-entropy) + // B pulls from A to learn the deletion (pull-based anti-entropy). err = db.SyncWith(ctx, string(da.LocalNodeID())) if err != nil { t.Fatalf("tomb sync pull: %v", err) } - // Ensure B removed or will not resurrect on next sync if val, _ := db.Get(ctx, "del"); val != nil { t.Fatalf("expected B delete after tombstone") } - // Re-add older version on B (simulate stale write) + // Re-add older version on B (simulate stale write). stale := &cache.Item{Key: "del", Value: []byte("stale"), Version: 1, Origin: "B", LastUpdated: time.Now()} db.DebugInject(stale) - // Sync B with A again; B should keep deletion (not resurrect) err = db.SyncWith(ctx, string(da.LocalNodeID())) if err != nil { t.Fatalf("resync: %v", err) diff --git a/tests/merkle_node_helper.go b/tests/merkle_node_helper.go new file mode 100644 index 0000000..345cd1b --- /dev/null +++ b/tests/merkle_node_helper.go @@ -0,0 +1,38 @@ +package tests + +import ( + "context" + "testing" + + "github.com/hyp3rd/hypercache/pkg/backend" +) + +// newMerkleNode creates a single DistMemory node wired for merkle-sync tests +// — replication=1, chunk size 2, an ephemeral HTTP listener, and registered +// with the supplied transport. Construction uses context.Background() rather +// than a caller-supplied ctx because Stop runs from t.Cleanup at end-of-test +// where the test ctx may already be canceled — see StopOnCleanup for the +// same rationale. +func newMerkleNode(t *testing.T, transport *backend.InProcessTransport, id string) *backend.DistMemory { + t.Helper() + + bi, err := backend.NewDistMemory(context.Background(), + backend.WithDistNode(id, AllocatePort(t)), + backend.WithDistReplication(1), + backend.WithDistMerkleChunkSize(2), + ) + if err != nil { + t.Fatalf("new merkle node %s: %v", id, err) + } + + b, ok := bi.(*backend.DistMemory) + if !ok { + t.Fatalf("expected *backend.DistMemory, got %T", bi) + } + + StopOnCleanup(t, b) + b.SetTransport(transport) + transport.Register(b) + + return b +} diff --git a/tests/merkle_sync_test.go b/tests/merkle_sync_test.go index 7aec630..9792911 100644 --- a/tests/merkle_sync_test.go +++ b/tests/merkle_sync_test.go @@ -16,66 +16,27 @@ func TestMerkleSyncConvergence(t *testing.T) { ctx := context.Background() transport := backend.NewInProcessTransport() - bA, err := backend.NewDistMemory(ctx, - backend.WithDistNode("A", AllocatePort(t)), - backend.WithDistReplication(1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("new dist memory A: %v", err) - } - - dmA, ok := any(bA).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", bA) - } - - StopOnCleanup(t, dmA) - - bB, err := backend.NewDistMemory(ctx, - backend.WithDistNode("B", AllocatePort(t)), - backend.WithDistReplication(1), - backend.WithDistMerkleChunkSize(2), - ) - if err != nil { - t.Fatalf("new dist memory B: %v", err) - } + dmA := newMerkleNode(t, transport, "A") + dmB := newMerkleNode(t, transport, "B") - dmB, ok := any(bB).(*backend.DistMemory) - if !ok { - t.Fatalf("expected *backend.DistMemory, got %T", bB) - } - - StopOnCleanup(t, dmB) - - dmA.SetTransport(transport) - dmB.SetTransport(transport) - - // register for in-process lookups - transport.Register(dmA) - transport.Register(dmB) - - // inject divergent data (A has extra/newer) + // Inject divergent data: A holds 5 newer keys, B holds older copies of the first 2. for i := range 5 { it := &cache.Item{Key: keyf("k", i), Value: []byte("vA"), Version: uint64(i + 1), Origin: "A", LastUpdated: time.Now()} dmA.DebugInject(it) } - // B shares only first 2 keys older versions for i := range 2 { it := &cache.Item{Key: keyf("k", i), Value: []byte("old"), Version: uint64(i), Origin: "B", LastUpdated: time.Now()} dmB.DebugInject(it) } - // Run sync B->A to pull newer - if err := dmB.SyncWith(ctx, string(dmA.LocalNodeID())); err != nil { + syncErr := dmB.SyncWith(ctx, string(dmA.LocalNodeID())) + if syncErr != nil && testing.Verbose() { // HTTP transport fetch merkle unsupported; we rely on in-process - if testing.Verbose() { - t.Logf("sync error: %v", err) - } + t.Logf("sync error: %v", syncErr) } - // Validate B now has all 5 keys with correct versions (>= A's) + // Validate B now has all 5 keys with correct versions (>= A's). for i := range 5 { k := keyf("k", i) itA, _ := dmA.Get(ctx, k) From 2b650e1516fb019bf36137e7e442aa32edeee2b4 Mon Sep 17 00:00:00 2001 From: "F." Date: Mon, 4 May 2026 00:15:45 +0200 Subject: [PATCH 2/2] refactor(core)!: decompose hypercache.go into domain modules and modernize ConcurrentMap iterator Split the monolithic hypercache.go (~950 lines removed) into focused files: - hypercache_construct.go: backend resolution and base construction - hypercache_io.go: Set/Get/GetOrSet/GetMultiple/Remove/Clear/List - hypercache_eviction.go: eviction configuration, loop, and triggers - hypercache_expiration.go: expiration routine and trigger logic - hypercache_dist.go: distributed metrics helpers (DistMetrics, ClusterOwners, etc.) Replace ConcurrentMap's channel-based IterBuffered() with iter.Seq2-based All(), eliminating fan-in goroutines and per-shard channel allocations. Update all callers in dist_memory, dist_http_server, and inmemory backends. Upgrade hash function from inlined FNV-1a to xxhash64 (XOR-folded to 32 bits) for better avalanche and ~1-3% speedup on longer keys. Add bench-step3.txt with benchmark results on Apple M4 Pro. BREAKING CHANGE: IterBuffered() and Tuple type removed from ConcurrentMap; use All() iter.Seq2[string, *Item] instead. --- bench-step3.txt | 26 + hypercache.go | 951 +------------------------------- hypercache_construct.go | 206 +++++++ hypercache_dist.go | 106 ++++ hypercache_eviction.go | 182 ++++++ hypercache_expiration.go | 149 +++++ hypercache_io.go | 313 +++++++++++ pkg/backend/dist_http_server.go | 5 +- pkg/backend/dist_memory.go | 44 +- pkg/backend/inmemory.go | 8 +- pkg/cache/v2/cmap.go | 113 ++-- pkg/cache/v2/cmap_bench_test.go | 13 +- pkg/cache/v2/cmap_test.go | 53 +- pkg/cache/v2/hash.go | 49 +- 14 files changed, 1164 insertions(+), 1054 deletions(-) create mode 100644 bench-step3.txt create mode 100644 hypercache_construct.go create mode 100644 hypercache_dist.go create mode 100644 hypercache_eviction.go create mode 100644 hypercache_expiration.go create mode 100644 hypercache_io.go diff --git a/bench-step3.txt b/bench-step3.txt new file mode 100644 index 0000000..449cc1d --- /dev/null +++ b/bench-step3.txt @@ -0,0 +1,26 @@ +goos: darwin +goarch: arm64 +pkg: github.com/hyp3rd/hypercache/pkg/cache/v2 +cpu: Apple M4 Pro +BenchmarkConcurrentMap_Count-14 122877582 9.726 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_Count-14 122084272 9.828 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_Count-14 123835690 9.683 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_Count-14 122782857 9.763 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_Count-14 124295282 9.649 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_CountParallel-14 79226004 13.44 ns/op 9 B/op 0 allocs/op +BenchmarkConcurrentMap_CountParallel-14 82377501 13.32 ns/op 9 B/op 0 allocs/op +BenchmarkConcurrentMap_CountParallel-14 93374600 12.83 ns/op 9 B/op 0 allocs/op +BenchmarkConcurrentMap_CountParallel-14 74157042 14.11 ns/op 9 B/op 0 allocs/op +BenchmarkConcurrentMap_CountParallel-14 89445556 13.49 ns/op 8 B/op 0 allocs/op +BenchmarkConcurrentMap_GetShard-14 100000000 10.08 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_GetShard-14 120268629 10.01 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_GetShard-14 100000000 10.22 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_GetShard-14 100000000 10.03 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_GetShard-14 100000000 10.07 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_All-14 46454 26087 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_All-14 44976 26633 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_All-14 45357 26457 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_All-14 41589 27827 ns/op 0 B/op 0 allocs/op +BenchmarkConcurrentMap_All-14 45512 26454 ns/op 0 B/op 0 allocs/op +PASS +ok github.com/hyp3rd/hypercache/pkg/cache/v2 23.302s diff --git a/hypercache.go b/hypercache.go index 4cb5f37..31fb177 100644 --- a/hypercache.go +++ b/hypercache.go @@ -9,17 +9,12 @@ package hypercache import ( "context" - "runtime" "sync" "sync/atomic" "time" - "github.com/hyp3rd/ewrap" - "github.com/hyp3rd/sectools/pkg/converters" - "github.com/hyp3rd/hypercache/internal/constants" "github.com/hyp3rd/hypercache/internal/introspect" - "github.com/hyp3rd/hypercache/internal/sentinel" "github.com/hyp3rd/hypercache/pkg/backend" cache "github.com/hyp3rd/hypercache/pkg/cache/v2" "github.com/hyp3rd/hypercache/pkg/eviction" @@ -28,6 +23,15 @@ import ( // HyperCache stores items with a key and optional expiration. It supports multiple backends // and eviction algorithms. Configuration is provided via the Config struct using With* options. +// +// File layout (this package): +// - hypercache.go type, constructors (New, NewInMemoryWithDefaults), Stop, accessors +// - hypercache_construct.go backend resolution, base init, stats config, capacity check +// - hypercache_io.go Set/Get/GetWithInfo/GetOrSet/GetMultiple/Remove/Clear/List/touchItem +// - hypercache_eviction.go eviction loop + algorithm init + SetCapacity + TriggerEviction +// - hypercache_expiration.go expiration loop + trigger debounce + TriggerExpiration +// - hypercache_dist.go DistMemory-only inspection methods +// // Background loops: // - expiration loop (interval: expirationInterval) scans for expired items // - eviction loop (interval: evictionInterval) evicts items via the configured algorithm @@ -37,7 +41,7 @@ import ( // - evictCh triggers an immediate eviction pass when interval is 0 and capacity exceeded // // Synchronization: -// - mutex protects eviction algorithm state +// - Each eviction algorithm protects its own state internally // - stop channel signals background loops to stop type HyperCache[T backend.IBackendConstrain] struct { backend backend.IBackend[T] // backend used to store items @@ -70,6 +74,9 @@ type HyperCache[T backend.IBackendConstrain] struct { mgmtHTTP *ManagementHTTPServer } +// touchBackend is the optional interface a backend implements when it wants +// to be notified that a key was just accessed — used by Get* methods to +// update LRU/clock state without imposing a hard dependency. type touchBackend interface { Touch(ctx context.Context, key string) bool } @@ -170,790 +177,44 @@ func New[T backend.IBackendConstrain](ctx context.Context, bm *BackendManager, c return hyperCache, nil } -// resolveBackend constructs a typed backend instance based on the config.BackendType. -func resolveBackend[T backend.IBackendConstrain](ctx context.Context, bm *BackendManager, config *Config[T]) (backend.IBackend[T], error) { - constructor, exists := bm.backendRegistry[config.BackendType] - if !exists { - return nil, ewrap.Newf("unknown backend type: %s", config.BackendType) - } - - switch config.BackendType { - case constants.InMemoryBackend: - return resolveInMemoryBackend[T](ctx, constructor, config) - case constants.RedisBackend: - return resolveRedisBackend[T](ctx, constructor, config) - case constants.RedisClusterBackend: - return resolveRedisClusterBackend[T](ctx, constructor, config) - case constants.DistMemoryBackend: - return resolveDistMemoryBackend[T](ctx, constructor, config) - default: - return nil, ewrap.Newf("unknown backend type: %s", config.BackendType) - } -} - -// castBackend tries to cast a backend instance of any concrete type to backend.IBackend[T]. -func castBackend[T backend.IBackendConstrain](bi any) (backend.IBackend[T], error) { - if b, ok := bi.(backend.IBackend[T]); ok { - return b, nil - } - - return nil, sentinel.ErrInvalidBackendType -} - -func resolveInMemoryBackend[T backend.IBackendConstrain](ctx context.Context, constructor, cfgAny any) (backend.IBackend[T], error) { - inMemCtor, ok := constructor.(InMemoryBackendConstructor) - if !ok { - return nil, sentinel.ErrInvalidBackendType - } - - cfg, ok := cfgAny.(*Config[backend.InMemory]) - if !ok { - return nil, sentinel.ErrInvalidBackendType - } - - bi, err := inMemCtor.Create(ctx, cfg) - if err != nil { - return nil, err - } - - return castBackend[T](bi) -} - -func resolveRedisBackend[T backend.IBackendConstrain](ctx context.Context, constructor, cfgAny any) (backend.IBackend[T], error) { - redisCtor, ok := constructor.(RedisBackendConstructor) - if !ok { - return nil, sentinel.ErrInvalidBackendType - } - - cfg, ok := cfgAny.(*Config[backend.Redis]) - if !ok { - return nil, sentinel.ErrInvalidBackendType - } - - bi, err := redisCtor.Create(ctx, cfg) - if err != nil { - return nil, err - } - - return castBackend[T](bi) -} - -func resolveDistMemoryBackend[T backend.IBackendConstrain](ctx context.Context, constructor, cfgAny any) (backend.IBackend[T], error) { - distCtor, ok := constructor.(DistMemoryBackendConstructor) - if !ok { - return nil, sentinel.ErrInvalidBackendType - } - - cfg, ok := cfgAny.(*Config[backend.DistMemory]) - if !ok { - return nil, sentinel.ErrInvalidBackendType - } - - bi, err := distCtor.Create(ctx, cfg) - if err != nil { - return nil, err - } - - return castBackend[T](bi) -} - -func resolveRedisClusterBackend[T backend.IBackendConstrain](ctx context.Context, constructor, cfgAny any) (backend.IBackend[T], error) { - clusterCtor, ok := constructor.(RedisClusterBackendConstructor) - if !ok { - return nil, sentinel.ErrInvalidBackendType - } - - cfg, ok := cfgAny.(*Config[backend.RedisCluster]) - if !ok { - return nil, sentinel.ErrInvalidBackendType - } - - bi, err := clusterCtor.Create(ctx, cfg) - if err != nil { - return nil, err - } - - return castBackend[T](bi) -} - -// newHyperCacheBase builds the base HyperCache instance with default timings and internals. -func newHyperCacheBase[T backend.IBackendConstrain](b backend.IBackend[T]) *HyperCache[T] { - return &HyperCache[T]{ - backend: b, - itemPoolManager: cache.NewItemPoolManager(), - workerPool: NewWorkerPool(runtime.NumCPU()), - stop: make(chan bool, 2), - evictCh: make(chan bool, 1), - expirationInterval: constants.DefaultExpirationInterval, - evictionInterval: constants.DefaultEvictionInterval, - // Default eviction shard count matches pkg/cache/v2.ShardCount so a - // key's data shard and eviction shard map to the same logical position. - // Users can override with WithEvictionShardCount; <=1 disables sharding. - evictionShardCount: cache.ShardCount, - } -} - -// configureEvictionSettings computes derived eviction settings like shouldEvict and default maxEvictionCount. -func configureEvictionSettings[T backend.IBackendConstrain](hc *HyperCache[T]) { - hc.shouldEvict.Store(hc.evictionInterval == 0 && hc.backend.Capacity() > 0) - - if hc.maxEvictionCount == 0 { - maxEvictionCount, err := converters.ToUint(hc.backend.Capacity()) - if err != nil { - hc.maxEvictionCount = 1 - - return - } - - hc.maxEvictionCount = maxEvictionCount - } -} - -// initEvictionAlgorithm initializes the eviction algorithm for the cache. -// -// When evictionShardCount > 1 (default 32) the algorithm is wrapped in -// eviction.Sharded — same hash as ConcurrentMap, so a key's data shard and -// eviction shard align. This eliminates the global eviction-algorithm mutex -// at the cost of strict global LRU/LFU ordering. shardCount <= 1 keeps the -// previous single-instance behavior. -func initEvictionAlgorithm[T backend.IBackendConstrain](hc *HyperCache[T]) error { - maxEvictionCount, err := converters.ToInt(hc.maxEvictionCount) - if err != nil { - return err - } - - algorithmName := hc.evictionAlgorithmName - if algorithmName == "" { - algorithmName = "lru" - } - - if hc.evictionShardCount > 1 { - hc.evictionAlgorithm, err = eviction.NewSharded(algorithmName, maxEvictionCount, hc.evictionShardCount) - - return err - } - - hc.evictionAlgorithm, err = eviction.NewEvictionAlgorithm(algorithmName, maxEvictionCount) - - return err -} - -// configureStats sets the stats collector, using default if none specified. -func configureStats[T backend.IBackendConstrain](hc *HyperCache[T]) error { - if hc.statsCollectorName == "" { - hc.StatsCollector = stats.NewHistogramStatsCollector() - - return nil - } - - var err error - - hc.StatsCollector, err = stats.NewCollector(hc.statsCollectorName) - - return err -} - -// checkCapacity validates the backend capacity and returns an error if invalid. -func checkCapacity[T backend.IBackendConstrain](hc *HyperCache[T]) error { - if hc.backend.Capacity() < 0 { - return sentinel.ErrInvalidCapacity - } - - return nil -} - -// initExpirationTrigger initializes the expiration trigger channel with optional buffer override. -func initExpirationTrigger[T backend.IBackendConstrain](hc *HyperCache[T]) { - buf := hc.backend.Capacity() / 2 - if hc.expirationTriggerBufSize > 0 { - buf = hc.expirationTriggerBufSize - } - - if buf < 1 { - buf = 1 - } - - hc.expirationTriggerCh = make(chan bool, buf) -} - -// startBackgroundJobs starts the background jobs for the hyper cache. -func (hyperCache *HyperCache[T]) startBackgroundJobs(ctx context.Context) { - // Start expiration and eviction loops once - hyperCache.once.Do(func() { - // Long-lived background context, canceled on Stop - jobsCtx, cancel := context.WithCancel(ctx) - - hyperCache.bgCancel = cancel - // Ensure shutdown signaling always drives context cancellation, even when - // stop consumers race to read the stop channel. - go func(stop <-chan bool, done <-chan struct{}, cancel context.CancelFunc) { - select { - case <-stop: - cancel() - case <-done: - } - }(hyperCache.stop, jobsCtx.Done(), cancel) - - hyperCache.startExpirationRoutine(jobsCtx) - hyperCache.startEvictionRoutine(jobsCtx) - }) -} - -// startExpirationRoutine launches the expiration loop and listens to manual triggers and stop signals. -func (hyperCache *HyperCache[T]) startExpirationRoutine(ctx context.Context) { - go func() { - var tick *time.Ticker - - if hyperCache.expirationInterval > 0 { - tick = time.NewTicker(hyperCache.expirationInterval) - } - - for { - if hyperCache.handleExpirationSelect(ctx, tick) { // returns true when loop should exit - return - } - } - }() -} - -// handleExpirationSelect processes one select iteration; returns true if caller should exit. -func (hyperCache *HyperCache[T]) handleExpirationSelect(ctx context.Context, tick *time.Ticker) bool { - var tickC <-chan time.Time - - if tick != nil { - tickC = tick.C - } - - select { - case <-tickC: - // scheduled expiration - hyperCache.expirationLoop(ctx) - case <-hyperCache.expirationTriggerCh: - // manual/coalesced trigger - hyperCache.expirationLoop(ctx) - hyperCache.expirationSignalPending.Store(false) - - // drain any queued triggers quickly - for draining := true; draining; { - select { - case <-hyperCache.expirationTriggerCh: - // keep draining - default: - draining = false - } - } - - case <-hyperCache.evictCh: - // manual eviction trigger - hyperCache.evictionLoop(ctx) - case <-ctx.Done(): - if tick != nil { - tick.Stop() - } - - return true - - case <-hyperCache.stop: - if tick != nil { - tick.Stop() - } - - return true - } - - return false -} - -// startEvictionRoutine launches the periodic eviction loop if configured. -func (hyperCache *HyperCache[T]) startEvictionRoutine(ctx context.Context) { - if hyperCache.evictionInterval <= 0 { - return - } - - tick := time.NewTicker(hyperCache.evictionInterval) - - go func() { - for { - select { - case <-tick.C: - hyperCache.evictionLoop(ctx) - case <-ctx.Done(): - tick.Stop() - - return - - case <-hyperCache.stop: - tick.Stop() - - return - } - } - }() -} - -// triggerExpiration coalesces and optionally debounces expiration triggers to avoid flooding the channel. -func (hyperCache *HyperCache[T]) execTriggerExpiration() { - // Optional debounce: if configured, drop triggers that arrive within the interval. - if d := hyperCache.expirationDebounceInterval; d > 0 { - last := time.Unix(0, hyperCache.lastExpirationTrigger.Load()) - if time.Since(last) < d { - // record backpressure metric - hyperCache.StatsCollector.Incr(constants.StatIncr, 1) - - return - } - } - - // Coalesce: if a signal is already pending, skip enqueueing another. - if hyperCache.expirationSignalPending.Swap(true) { - hyperCache.StatsCollector.Incr(constants.StatIncr, 1) - - return - } - +// Stop signals background loops to stop, shuts down the worker pool, and +// closes any optional management HTTP server. The shutdown is bounded by +// shutdownTimeout to keep tests responsive. +func (hyperCache *HyperCache[T]) Stop(ctx context.Context) error { + // Best-effort stop signal for listeners that still rely on stop channel. select { - case hyperCache.expirationTriggerCh <- true: - hyperCache.lastExpirationTrigger.Store(time.Now().UnixNano()) + case hyperCache.stop <- true: default: - // channel full; keep pending flag set and record metric - hyperCache.StatsCollector.Incr(constants.StatIncr, 1) } -} - -// expirationLoop is a function that runs in a separate goroutine and expires items in the cache based on their expiration duration. -func (hyperCache *HyperCache[T]) expirationLoop(ctx context.Context) { - hyperCache.workerPool.Enqueue(func() error { - hyperCache.StatsCollector.Incr("expiration_loop_count", 1) - defer hyperCache.StatsCollector.Timing("expiration_loop_duration", time.Now().UnixNano()) - var ( - expiredCount int64 - items []*cache.Item - err error - ) - - // get all expired items - items, err = hyperCache.List(ctx, - backend.WithSortBy(constants.SortByExpiration.String()), - backend.WithFilterFunc(func(item *cache.Item) bool { - return item.Expiration > 0 && time.Since(item.LastAccess) > item.Expiration - })) - if err != nil { - return err - } - - // iterate all expired items and remove them - for _, item := range items { - expiredCount++ - - err := hyperCache.Remove(ctx, item.Key) - if err != nil { - return err - } - - hyperCache.itemPoolManager.Put(item) - hyperCache.StatsCollector.Incr("item_expired_count", 1) - } - - hyperCache.StatsCollector.Gauge("item_count", int64(hyperCache.backend.Count(ctx))) - hyperCache.StatsCollector.Gauge("expired_item_count", expiredCount) - - return nil - }) -} - -// evictionLoop is a function that runs in a separate goroutine and evicts items from the cache based on the cache's capacity and the max -// eviction count. -// The eviction is determined by the eviction algorithm. -func (hyperCache *HyperCache[T]) evictionLoop(ctx context.Context) { - // Enqueue the eviction loop in the worker pool to avoid blocking the main goroutine if the eviction loop is slow - hyperCache.workerPool.Enqueue(func() error { - hyperCache.StatsCollector.Incr("eviction_loop_count", 1) - defer hyperCache.StatsCollector.Timing("eviction_loop_duration", time.Now().UnixNano()) - - var evictedCount uint - - for hyperCache.backend.Count(ctx) > hyperCache.backend.Capacity() { - if hyperCache.maxEvictionCount == evictedCount { - break - } - - // Each eviction algorithm provides its own internal mutex; no - // outer lock needed. The Evict() -> Remove() window below is - // already non-atomic by design (other workers may insert items - // between the two calls); a wider lock would not change that. - key, ok := hyperCache.evictionAlgorithm.Evict() - if !ok { - // no more items to evict - break - } - - // remove the item from the cache - err := hyperCache.Remove(ctx, key) - if err != nil { - return err - } - - evictedCount++ + if hyperCache.bgCancel != nil { + hyperCache.bgCancel() - hyperCache.StatsCollector.Incr("item_evicted_count", 1) - } + hyperCache.bgCancel = nil + } - itemCount, err := converters.ToInt64(hyperCache.backend.Count(ctx)) - if err != nil { - return err - } + hyperCache.once = sync.Once{} + hyperCache.workerPool.Shutdown() - hyperCache.StatsCollector.Gauge("item_count", itemCount) + if hyperCache.mgmtHTTP != nil { + ctx, cancel := context.WithTimeout(ctx, shutdownTimeout) + defer cancel() - evictedCount64, err := converters.ToInt64(evictedCount) + err := hyperCache.mgmtHTTP.Shutdown(ctx) if err != nil { + // Handle error return err } - - hyperCache.StatsCollector.Gauge("evicted_item_count", evictedCount64) - - return nil - }) -} - -// evictItem is a helper function that removes an item from the cache and returns the key of the evicted item. -// If no item can be evicted, it returns a false. -func (hyperCache *HyperCache[T]) evictItem(ctx context.Context) (string, bool) { - key, ok := hyperCache.evictionAlgorithm.Evict() - if !ok { - // no more items to evict - return "", false - } - - err := hyperCache.Remove(ctx, key) - if err != nil { - return "", false - } - - return key, true -} - -// Set adds an item to the cache with the given key and value. If an item with the same key already exists, it updates the value of the -// existing item. -// If the expiration duration is greater than zero, the item will expire after the specified duration. -// If capacity is reached: -// - when evictionInterval == 0 we evict immediately -// - otherwise the background eviction loop will reclaim space -func (hyperCache *HyperCache[T]) Set(ctx context.Context, key string, value any, expiration time.Duration) error { - // Create a new cache item and set its properties - item := hyperCache.itemPoolManager.Get() - - item.Key = key - item.Value = value - item.Expiration = expiration - item.LastAccess = time.Now() - - // Set the size of the item (aligned with backend serializer when available) - err := hyperCache.setItemSize(item) - if err != nil { - return err - } - - // check if adding this item will exceed the maxCacheSize - hyperCache.memoryAllocation.Add(item.Size) - - if hyperCache.maxCacheSize > 0 && hyperCache.memoryAllocation.Load() > hyperCache.maxCacheSize { - hyperCache.memoryAllocation.Add(-item.Size) - - return sentinel.ErrCacheFull - } - - // Insert the item into the cache - err = hyperCache.backend.Set(ctx, item) - if err != nil { - hyperCache.memoryAllocation.Add(-item.Size) - hyperCache.itemPoolManager.Put(item) - - return err - } - - // Set the item in the eviction algorithm. The algorithm protects its own - // state internally; no outer lock needed. - hyperCache.evictionAlgorithm.Set(key, item.Value) - - // If the cache is at capacity, evict an item when the eviction interval is zero - if hyperCache.shouldEvict.Load() && hyperCache.backend.Count(ctx) > hyperCache.backend.Capacity() { - hyperCache.evictItem(ctx) - } - - return nil -} - -// Get retrieves the item with the given key from the cache returning the value and a boolean indicating if the item was found. -func (hyperCache *HyperCache[T]) Get(ctx context.Context, key string) (any, bool) { - item, ok := hyperCache.backend.Get(ctx, key) - if !ok { - return nil, false - } - - // Check if the item has expired, if so, trigger the expiration loop - if item.Expired() { - // Non-blocking trigger of expiration loop (do not return to pool yet; backend still holds it) - // Coalesced/debounced trigger - hyperCache.execTriggerExpiration() - - return nil, false - } - - // Update the last access time and access count - hyperCache.touchItem(ctx, key, item) - - return item.Value, true -} - -// GetWithInfo retrieves the item with the given key from the cache returning the `Item` object and a boolean indicating if the item was -// found. -func (hyperCache *HyperCache[T]) GetWithInfo(ctx context.Context, key string) (*cache.Item, bool) { - item, ok := hyperCache.backend.Get(ctx, key) - // Check if the item has expired if it exists, if so, trigger the expiration loop - if !ok { - return nil, false - } - - // Check if the item has expired, if so, trigger the expiration loop - if item.Expired() { - // Non-blocking trigger of expiration loop; don't return to pool here - // Coalesced/debounced trigger - hyperCache.execTriggerExpiration() - - return nil, false - } - - // Update the last access time and access count - hyperCache.touchItem(ctx, key, item) - - return item, true -} - -// GetOrSet retrieves the item with the given key. If the item is not found, it adds the item to the cache with the given value and -// expiration duration. -// If the capacity of the cache is reached, leverage the eviction algorithm. -func (hyperCache *HyperCache[T]) GetOrSet(ctx context.Context, key string, value any, expiration time.Duration) (any, error) { - // if the item is found, return the value - if item, ok := hyperCache.backend.Get(ctx, key); ok { - // Check if the item has expired - if item.Expired() { - // Non-blocking trigger of expiration loop; don't pool here to avoid zeroing live refs - // Coalesced/debounced trigger - hyperCache.execTriggerExpiration() - - return nil, sentinel.ErrKeyExpired - } - - // Update the last access time and access count - hyperCache.touchItem(ctx, key, item) - - return item.Value, nil - } - - // if the item is not found, add it to the cache - item := hyperCache.itemPoolManager.Get() - - item.Key = key - item.Value = value - item.Expiration = expiration - item.LastAccess = time.Now() - - // Set the size of the item (aligned with backend serializer when available) - err := hyperCache.setItemSize(item) - if err != nil { - return nil, err - } - - // check if adding this item will exceed the maxCacheSize - hyperCache.memoryAllocation.Add(item.Size) - - if hyperCache.maxCacheSize > 0 && hyperCache.memoryAllocation.Load() > hyperCache.maxCacheSize { - hyperCache.memoryAllocation.Add(-item.Size) - hyperCache.itemPoolManager.Put(item) - - return nil, sentinel.ErrCacheFull - } - - // Insert the item into the cache - err = hyperCache.backend.Set(ctx, item) - if err != nil { - hyperCache.memoryAllocation.Add(-item.Size) - hyperCache.itemPoolManager.Put(item) - - return nil, err - } - - // Set the item in the eviction algorithm synchronously. The previous bare - // goroutine had no panic recovery, no shutdown coordination with Stop(), - // and let the caller observe a key whose eviction tracking had not yet - // been recorded. The algorithm's own internal mutex provides safety. - hyperCache.evictionAlgorithm.Set(key, item.Value) - - // If the cache is at capacity, evict an item when the eviction interval is zero - if hyperCache.shouldEvict.Load() && hyperCache.backend.Count(ctx) > hyperCache.backend.Capacity() { - hyperCache.evictItem(ctx) - } - - return value, nil -} - -// GetMultiple retrieves the items with the given keys from the cache. -func (hyperCache *HyperCache[T]) GetMultiple(ctx context.Context, keys ...string) (map[string]any, map[string]error) { - result := make(map[string]any, len(keys)) // Preallocate the result map - failed := make(map[string]error, len(keys)) // Preallocate the errors map - - for _, key := range keys { - item, ok := hyperCache.backend.Get(ctx, key) - if !ok { - // Add the key to the errors map and continue - failed[key] = sentinel.ErrKeyNotFound - - continue - } - - // Check if the item has expired - if item.Expired() { - // Treat expired items as not found per API semantics; don't pool here to avoid zeroing live refs - failed[key] = sentinel.ErrKeyNotFound - // Coalesced/debounced trigger of the expiration loop via channel - hyperCache.execTriggerExpiration() - } else { - hyperCache.touchItem(ctx, key, item) // Update the last access time and access count - - // Add the item to the result map - result[key] = item.Value - } - } - - return result, failed -} - -func (hyperCache *HyperCache[T]) touchItem(ctx context.Context, key string, item *cache.Item) { - if item == nil { - return - } - - if toucher, ok := hyperCache.backend.(touchBackend); ok { - toucher.Touch(ctx, key) - } - - item.Touch() -} - -// List lists the items in the cache that meet the specified criteria. -// It takes in a variadic number of any type as filters, it then checks the backend type, and calls the corresponding -// implementation of the List function for that backend, with the filters passed in as arguments. -func (hyperCache *HyperCache[T]) List(ctx context.Context, filters ...backend.IFilter) ([]*cache.Item, error) { - return hyperCache.backend.List(ctx, filters...) -} - -// setItemSize computes item.Size using the backend serializer when available for accuracy. -// Falls back to the Item's internal SetSize when no serializer is present. -func (hyperCache *HyperCache[T]) setItemSize(item *cache.Item) error { - // Prefer backend-specific serialization for accurate size accounting. - switch backendImpl := any(hyperCache.backend).(type) { - case *backend.Redis: - if backendImpl.Serializer != nil { - data, err := backendImpl.Serializer.Marshal(item.Value) - if err != nil { - return err - } - - item.Size = int64(len(data)) - - return nil - } - - case *backend.RedisCluster: - if backendImpl.Serializer != nil { - data, err := backendImpl.Serializer.Marshal(item.Value) - if err != nil { - return err - } - - item.Size = int64(len(data)) - - return nil - } - - default: - // Fall back to generic size estimation for backends without a serializer. - } - - return item.SetSize() -} - -// Remove removes items with the given key from the cache. If an item is not found, it does nothing. -func (hyperCache *HyperCache[T]) Remove(ctx context.Context, keys ...string) error { - // Remove the item from the eviction algorithm - // and update the memory allocation - for _, key := range keys { - item, ok := hyperCache.backend.Get(ctx, key) - if ok { - // remove the item from the cacheBackend and update the memory allocation - hyperCache.memoryAllocation.Add(-item.Size) - hyperCache.evictionAlgorithm.Delete(key) - } - } - - err := hyperCache.backend.Remove(ctx, keys...) - if err != nil { - return err } return nil } -// Clear removes all items from the cache. -func (hyperCache *HyperCache[T]) Clear(ctx context.Context) error { - var ( - items []*cache.Item - err error - ) - - // get all expired items - items, err = hyperCache.backend.List(ctx) - if err != nil { - return err - } - - // clear the cacheBackend - err = hyperCache.backend.Clear(ctx) - if err != nil { - return err - } - - for _, item := range items { - hyperCache.evictionAlgorithm.Delete(item.Key) - } - - // reset the memory allocation - hyperCache.memoryAllocation.Store(0) - - return err -} - // Capacity returns the capacity of the cache. func (hyperCache *HyperCache[T]) Capacity() int { return hyperCache.backend.Capacity() } -// SetCapacity sets the capacity of the cache. If the new capacity is smaller than the current number of items in the cache, -// it evicts the excess items from the cache. -func (hyperCache *HyperCache[T]) SetCapacity(ctx context.Context, capacity int) { - // set capacity of the backend - hyperCache.backend.SetCapacity(capacity) - // evaluate again if the cache should evict items proactively - hyperCache.shouldEvict.Swap(hyperCache.evictionInterval == 0 && hyperCache.backend.Capacity() > 0) - - // if the cache size is greater than the new capacity, evict items - if hyperCache.backend.Count(ctx) > hyperCache.Capacity() { - hyperCache.evictionLoop(ctx) - } -} - // Allocation returns the size allocation in bytes of the current cache. func (hyperCache *HyperCache[T]) Allocation() int64 { return hyperCache.memoryAllocation.Load() @@ -969,22 +230,6 @@ func (hyperCache *HyperCache[T]) Count(ctx context.Context) int { return hyperCache.backend.Count(ctx) } -// TriggerEviction sends a signal to the eviction loop to start. -func (hyperCache *HyperCache[T]) TriggerEviction(_ context.Context) { - // Safe, non-blocking trigger; no-op if channel not initialized - if hyperCache.evictCh == nil { - return - } - - select { - case hyperCache.evictCh <- true: - default: - } -} - -// TriggerExpiration exposes a manual expiration trigger (debounced/coalesced internally). -func (hyperCache *HyperCache[T]) TriggerExpiration() { hyperCache.execTriggerExpiration() } - // EvictionInterval returns configured eviction interval. func (hyperCache *HyperCache[T]) EvictionInterval() time.Duration { return hyperCache.evictionInterval } @@ -996,41 +241,6 @@ func (hyperCache *HyperCache[T]) ExpirationInterval() time.Duration { // EvictionAlgorithm returns eviction algorithm name. func (hyperCache *HyperCache[T]) EvictionAlgorithm() string { return hyperCache.evictionAlgorithmName } -const ( - shutdownTimeout = 2 * time.Second -) - -// Stop function stops the expiration and eviction loops and closes the stop channel. -func (hyperCache *HyperCache[T]) Stop(ctx context.Context) error { - // Best-effort stop signal for listeners that still rely on stop channel. - select { - case hyperCache.stop <- true: - default: - } - - if hyperCache.bgCancel != nil { - hyperCache.bgCancel() - - hyperCache.bgCancel = nil - } - - hyperCache.once = sync.Once{} - hyperCache.workerPool.Shutdown() - - if hyperCache.mgmtHTTP != nil { - ctx, cancel := context.WithTimeout(ctx, shutdownTimeout) - defer cancel() - - err := hyperCache.mgmtHTTP.Shutdown(ctx) - if err != nil { - // Handle error - return err - } - } - - return nil -} - // GetStats returns the stats collected by the cache. Thread-safety is // delegated to the configured StatsCollector implementation (the default // HistogramStatsCollector is fully thread-safe with no global lock). @@ -1038,107 +248,6 @@ func (hyperCache *HyperCache[T]) GetStats() stats.Stats { return hyperCache.StatsCollector.GetStats() } -// DistMetrics returns distributed backend metrics if the underlying backend is DistMemory. -// Returns nil if unsupported. -func (hyperCache *HyperCache[T]) DistMetrics() any { // generic any to avoid exporting type into core interface - if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { - m := dm.Metrics() - - return m - } - - return nil -} - -// ClusterOwners returns the owners for a key if the distributed backend supports it; otherwise empty slice. -func (hyperCache *HyperCache[T]) ClusterOwners(key string) []string { - if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { - owners := dm.DebugOwners(key) - - out := make([]string, 0, len(owners)) - for _, o := range owners { - out = append(out, string(o)) - } - - return out - } - - return nil -} - -// DistMembershipSnapshot returns a snapshot of membership if distributed backend; otherwise nil slice. -// -//nolint:nonamedreturns -func (hyperCache *HyperCache[T]) DistMembershipSnapshot() (members []struct { - ID string - Address string - State string - Incarnation uint64 -}, replication, vnodes int, -) { - if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { - membership := dm.Membership() - ring := dm.Ring() - - if membership == nil || ring == nil { - return nil, 0, 0 - } - - nodes := membership.List() - out := make([]struct { - ID string - Address string - State string - Incarnation uint64 - }, 0, len(nodes)) - - for _, node := range nodes { - out = append(out, struct { - ID string - Address string - State string - Incarnation uint64 - }{ - ID: string(node.ID), - Address: node.Address, - State: node.State.String(), - Incarnation: node.Incarnation, - }) - } - - return out, ring.Replication(), ring.VirtualNodesPerNode() - } - - return nil, 0, 0 -} - -// DistRingHashSpots returns vnode hashes as hex strings if available (debug). -func (hyperCache *HyperCache[T]) DistRingHashSpots() []string { - if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { - if ring := dm.Ring(); ring != nil { - return ring.VNodeHashes() - } - } - - return nil -} - -// DistHeartbeatMetrics returns distributed heartbeat metrics if supported. -func (hyperCache *HyperCache[T]) DistHeartbeatMetrics() any { - if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { - m := dm.Metrics() - - return map[string]any{ - "heartbeatSuccess": m.HeartbeatSuccess, - "heartbeatFailure": m.HeartbeatFailure, - "nodesRemoved": m.NodesRemoved, - "readPrimaryPromote": m.ReadPrimaryPromote, - } - } - - return nil -} - // ManagementHTTPAddress returns the bound address of the optional management HTTP server. // Empty string when the server is disabled or failed to start. func (hyperCache *HyperCache[T]) ManagementHTTPAddress() string { diff --git a/hypercache_construct.go b/hypercache_construct.go new file mode 100644 index 0000000..6e698fe --- /dev/null +++ b/hypercache_construct.go @@ -0,0 +1,206 @@ +package hypercache + +import ( + "context" + "runtime" + "time" + + "github.com/hyp3rd/ewrap" + + "github.com/hyp3rd/hypercache/internal/constants" + "github.com/hyp3rd/hypercache/internal/sentinel" + "github.com/hyp3rd/hypercache/pkg/backend" + cache "github.com/hyp3rd/hypercache/pkg/cache/v2" + "github.com/hyp3rd/hypercache/pkg/stats" +) + +// resolveBackend constructs a typed backend instance based on the config.BackendType. +func resolveBackend[T backend.IBackendConstrain](ctx context.Context, bm *BackendManager, config *Config[T]) (backend.IBackend[T], error) { + constructor, exists := bm.backendRegistry[config.BackendType] + if !exists { + return nil, ewrap.Newf("unknown backend type: %s", config.BackendType) + } + + switch config.BackendType { + case constants.InMemoryBackend: + return resolveInMemoryBackend[T](ctx, constructor, config) + case constants.RedisBackend: + return resolveRedisBackend[T](ctx, constructor, config) + case constants.RedisClusterBackend: + return resolveRedisClusterBackend[T](ctx, constructor, config) + case constants.DistMemoryBackend: + return resolveDistMemoryBackend[T](ctx, constructor, config) + default: + return nil, ewrap.Newf("unknown backend type: %s", config.BackendType) + } +} + +// castBackend tries to cast a backend instance of any concrete type to backend.IBackend[T]. +func castBackend[T backend.IBackendConstrain](bi any) (backend.IBackend[T], error) { + if b, ok := bi.(backend.IBackend[T]); ok { + return b, nil + } + + return nil, sentinel.ErrInvalidBackendType +} + +func resolveInMemoryBackend[T backend.IBackendConstrain](ctx context.Context, constructor, cfgAny any) (backend.IBackend[T], error) { + inMemCtor, ok := constructor.(InMemoryBackendConstructor) + if !ok { + return nil, sentinel.ErrInvalidBackendType + } + + cfg, ok := cfgAny.(*Config[backend.InMemory]) + if !ok { + return nil, sentinel.ErrInvalidBackendType + } + + bi, err := inMemCtor.Create(ctx, cfg) + if err != nil { + return nil, err + } + + return castBackend[T](bi) +} + +func resolveRedisBackend[T backend.IBackendConstrain](ctx context.Context, constructor, cfgAny any) (backend.IBackend[T], error) { + redisCtor, ok := constructor.(RedisBackendConstructor) + if !ok { + return nil, sentinel.ErrInvalidBackendType + } + + cfg, ok := cfgAny.(*Config[backend.Redis]) + if !ok { + return nil, sentinel.ErrInvalidBackendType + } + + bi, err := redisCtor.Create(ctx, cfg) + if err != nil { + return nil, err + } + + return castBackend[T](bi) +} + +func resolveDistMemoryBackend[T backend.IBackendConstrain](ctx context.Context, constructor, cfgAny any) (backend.IBackend[T], error) { + distCtor, ok := constructor.(DistMemoryBackendConstructor) + if !ok { + return nil, sentinel.ErrInvalidBackendType + } + + cfg, ok := cfgAny.(*Config[backend.DistMemory]) + if !ok { + return nil, sentinel.ErrInvalidBackendType + } + + bi, err := distCtor.Create(ctx, cfg) + if err != nil { + return nil, err + } + + return castBackend[T](bi) +} + +func resolveRedisClusterBackend[T backend.IBackendConstrain](ctx context.Context, constructor, cfgAny any) (backend.IBackend[T], error) { + clusterCtor, ok := constructor.(RedisClusterBackendConstructor) + if !ok { + return nil, sentinel.ErrInvalidBackendType + } + + cfg, ok := cfgAny.(*Config[backend.RedisCluster]) + if !ok { + return nil, sentinel.ErrInvalidBackendType + } + + bi, err := clusterCtor.Create(ctx, cfg) + if err != nil { + return nil, err + } + + return castBackend[T](bi) +} + +// newHyperCacheBase builds the base HyperCache instance with default timings and internals. +func newHyperCacheBase[T backend.IBackendConstrain](b backend.IBackend[T]) *HyperCache[T] { + return &HyperCache[T]{ + backend: b, + itemPoolManager: cache.NewItemPoolManager(), + workerPool: NewWorkerPool(runtime.NumCPU()), + stop: make(chan bool, 2), + evictCh: make(chan bool, 1), + expirationInterval: constants.DefaultExpirationInterval, + evictionInterval: constants.DefaultEvictionInterval, + // Default eviction shard count matches pkg/cache/v2.ShardCount so a + // key's data shard and eviction shard map to the same logical position. + // Users can override with WithEvictionShardCount; <=1 disables sharding. + evictionShardCount: cache.ShardCount, + } +} + +// configureStats sets the stats collector, using default if none specified. +func configureStats[T backend.IBackendConstrain](hc *HyperCache[T]) error { + if hc.statsCollectorName == "" { + hc.StatsCollector = stats.NewHistogramStatsCollector() + + return nil + } + + var err error + + hc.StatsCollector, err = stats.NewCollector(hc.statsCollectorName) + + return err +} + +// checkCapacity validates the backend capacity and returns an error if invalid. +func checkCapacity[T backend.IBackendConstrain](hc *HyperCache[T]) error { + if hc.backend.Capacity() < 0 { + return sentinel.ErrInvalidCapacity + } + + return nil +} + +// initExpirationTrigger initializes the expiration trigger channel with optional buffer override. +func initExpirationTrigger[T backend.IBackendConstrain](hc *HyperCache[T]) { + buf := hc.backend.Capacity() / 2 + if hc.expirationTriggerBufSize > 0 { + buf = hc.expirationTriggerBufSize + } + + if buf < 1 { + buf = 1 + } + + hc.expirationTriggerCh = make(chan bool, buf) +} + +// startBackgroundJobs starts the expiration and eviction loops once. +// Both loops are tied to a long-lived background context so Stop() can +// cancel them deterministically — independent of which side of the stop +// channel races first. +func (hyperCache *HyperCache[T]) startBackgroundJobs(ctx context.Context) { + hyperCache.once.Do(func() { + // Long-lived background context, canceled on Stop. + jobsCtx, cancel := context.WithCancel(ctx) + + hyperCache.bgCancel = cancel + // Ensure shutdown signaling always drives context cancellation, even when + // stop consumers race to read the stop channel. + go func(stop <-chan bool, done <-chan struct{}, cancel context.CancelFunc) { + select { + case <-stop: + cancel() + case <-done: + } + }(hyperCache.stop, jobsCtx.Done(), cancel) + + hyperCache.startExpirationRoutine(jobsCtx) + hyperCache.startEvictionRoutine(jobsCtx) + }) +} + +// shutdownTimeout caps the wait for graceful shutdown of optional management +// HTTP servers in Stop. Kept short — Stop is meant to be quick and tests +// don't have minutes to spare. +const shutdownTimeout = 2 * time.Second diff --git a/hypercache_dist.go b/hypercache_dist.go new file mode 100644 index 0000000..bc1f150 --- /dev/null +++ b/hypercache_dist.go @@ -0,0 +1,106 @@ +package hypercache + +import ( + "github.com/hyp3rd/hypercache/pkg/backend" +) + +// DistMetrics returns distributed backend metrics if the underlying backend is DistMemory. +// Returns nil if unsupported. +func (hyperCache *HyperCache[T]) DistMetrics() any { // generic any to avoid exporting type into core interface + if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { + m := dm.Metrics() + + return m + } + + return nil +} + +// ClusterOwners returns the owners for a key if the distributed backend supports it; otherwise empty slice. +func (hyperCache *HyperCache[T]) ClusterOwners(key string) []string { + if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { + owners := dm.DebugOwners(key) + + out := make([]string, 0, len(owners)) + for _, o := range owners { + out = append(out, string(o)) + } + + return out + } + + return nil +} + +// DistMembershipSnapshot returns a snapshot of membership if distributed backend; otherwise nil slice. +// +//nolint:nonamedreturns +func (hyperCache *HyperCache[T]) DistMembershipSnapshot() (members []struct { + ID string + Address string + State string + Incarnation uint64 +}, replication, vnodes int, +) { + if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { + membership := dm.Membership() + ring := dm.Ring() + + if membership == nil || ring == nil { + return nil, 0, 0 + } + + nodes := membership.List() + out := make([]struct { + ID string + Address string + State string + Incarnation uint64 + }, 0, len(nodes)) + + for _, node := range nodes { + out = append(out, struct { + ID string + Address string + State string + Incarnation uint64 + }{ + ID: string(node.ID), + Address: node.Address, + State: node.State.String(), + Incarnation: node.Incarnation, + }) + } + + return out, ring.Replication(), ring.VirtualNodesPerNode() + } + + return nil, 0, 0 +} + +// DistRingHashSpots returns vnode hashes as hex strings if available (debug). +func (hyperCache *HyperCache[T]) DistRingHashSpots() []string { + if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { + if ring := dm.Ring(); ring != nil { + return ring.VNodeHashes() + } + } + + return nil +} + +// DistHeartbeatMetrics returns distributed heartbeat metrics if supported. +func (hyperCache *HyperCache[T]) DistHeartbeatMetrics() any { + if dm, ok := any(hyperCache.backend).(*backend.DistMemory); ok { + m := dm.Metrics() + + return map[string]any{ + "heartbeatSuccess": m.HeartbeatSuccess, + "heartbeatFailure": m.HeartbeatFailure, + "nodesRemoved": m.NodesRemoved, + "readPrimaryPromote": m.ReadPrimaryPromote, + } + } + + return nil +} diff --git a/hypercache_eviction.go b/hypercache_eviction.go new file mode 100644 index 0000000..fc5e112 --- /dev/null +++ b/hypercache_eviction.go @@ -0,0 +1,182 @@ +package hypercache + +import ( + "context" + "time" + + "github.com/hyp3rd/sectools/pkg/converters" + + "github.com/hyp3rd/hypercache/pkg/backend" + "github.com/hyp3rd/hypercache/pkg/eviction" +) + +// configureEvictionSettings computes derived eviction settings like shouldEvict and default maxEvictionCount. +func configureEvictionSettings[T backend.IBackendConstrain](hc *HyperCache[T]) { + hc.shouldEvict.Store(hc.evictionInterval == 0 && hc.backend.Capacity() > 0) + + if hc.maxEvictionCount == 0 { + maxEvictionCount, err := converters.ToUint(hc.backend.Capacity()) + if err != nil { + hc.maxEvictionCount = 1 + + return + } + + hc.maxEvictionCount = maxEvictionCount + } +} + +// initEvictionAlgorithm initializes the eviction algorithm for the cache. +// +// When evictionShardCount > 1 (default 32) the algorithm is wrapped in +// eviction.Sharded — same hash as ConcurrentMap, so a key's data shard and +// eviction shard align. This eliminates the global eviction-algorithm mutex +// at the cost of strict global LRU/LFU ordering. shardCount <= 1 keeps the +// previous single-instance behavior. +func initEvictionAlgorithm[T backend.IBackendConstrain](hc *HyperCache[T]) error { + maxEvictionCount, err := converters.ToInt(hc.maxEvictionCount) + if err != nil { + return err + } + + algorithmName := hc.evictionAlgorithmName + if algorithmName == "" { + algorithmName = "lru" + } + + if hc.evictionShardCount > 1 { + hc.evictionAlgorithm, err = eviction.NewSharded(algorithmName, maxEvictionCount, hc.evictionShardCount) + + return err + } + + hc.evictionAlgorithm, err = eviction.NewEvictionAlgorithm(algorithmName, maxEvictionCount) + + return err +} + +// startEvictionRoutine launches the periodic eviction loop if configured. +func (hyperCache *HyperCache[T]) startEvictionRoutine(ctx context.Context) { + if hyperCache.evictionInterval <= 0 { + return + } + + tick := time.NewTicker(hyperCache.evictionInterval) + + go func() { + for { + select { + case <-tick.C: + hyperCache.evictionLoop(ctx) + case <-ctx.Done(): + tick.Stop() + + return + + case <-hyperCache.stop: + tick.Stop() + + return + } + } + }() +} + +// evictionLoop runs in a worker goroutine and evicts items until the backend is at or below capacity, +// or the configured maxEvictionCount is reached. The eviction policy is determined by the configured +// algorithm. +func (hyperCache *HyperCache[T]) evictionLoop(ctx context.Context) { + // Enqueue the eviction loop in the worker pool to avoid blocking the main goroutine if the eviction loop is slow + hyperCache.workerPool.Enqueue(func() error { + hyperCache.StatsCollector.Incr("eviction_loop_count", 1) + defer hyperCache.StatsCollector.Timing("eviction_loop_duration", time.Now().UnixNano()) + + var evictedCount uint + + for hyperCache.backend.Count(ctx) > hyperCache.backend.Capacity() { + if hyperCache.maxEvictionCount == evictedCount { + break + } + + // Each eviction algorithm provides its own internal mutex; no + // outer lock needed. The Evict() -> Remove() window below is + // already non-atomic by design (other workers may insert items + // between the two calls); a wider lock would not change that. + key, ok := hyperCache.evictionAlgorithm.Evict() + if !ok { + // no more items to evict + break + } + + // remove the item from the cache + err := hyperCache.Remove(ctx, key) + if err != nil { + return err + } + + evictedCount++ + + hyperCache.StatsCollector.Incr("item_evicted_count", 1) + } + + itemCount, err := converters.ToInt64(hyperCache.backend.Count(ctx)) + if err != nil { + return err + } + + hyperCache.StatsCollector.Gauge("item_count", itemCount) + + evictedCount64, err := converters.ToInt64(evictedCount) + if err != nil { + return err + } + + hyperCache.StatsCollector.Gauge("evicted_item_count", evictedCount64) + + return nil + }) +} + +// evictItem is a helper function that removes an item from the cache and returns the key of the evicted item. +// If no item can be evicted, it returns a false. +func (hyperCache *HyperCache[T]) evictItem(ctx context.Context) (string, bool) { + key, ok := hyperCache.evictionAlgorithm.Evict() + if !ok { + // no more items to evict + return "", false + } + + err := hyperCache.Remove(ctx, key) + if err != nil { + return "", false + } + + return key, true +} + +// SetCapacity sets the capacity of the cache. If the new capacity is smaller than the current number of items in the cache, +// it evicts the excess items from the cache. +func (hyperCache *HyperCache[T]) SetCapacity(ctx context.Context, capacity int) { + // set capacity of the backend + hyperCache.backend.SetCapacity(capacity) + // evaluate again if the cache should evict items proactively + hyperCache.shouldEvict.Swap(hyperCache.evictionInterval == 0 && hyperCache.backend.Capacity() > 0) + + // if the cache size is greater than the new capacity, evict items + if hyperCache.backend.Count(ctx) > hyperCache.Capacity() { + hyperCache.evictionLoop(ctx) + } +} + +// TriggerEviction sends a signal to the eviction loop to start. +func (hyperCache *HyperCache[T]) TriggerEviction(_ context.Context) { + // Safe, non-blocking trigger; no-op if channel not initialized + if hyperCache.evictCh == nil { + return + } + + select { + case hyperCache.evictCh <- true: + default: + } +} diff --git a/hypercache_expiration.go b/hypercache_expiration.go new file mode 100644 index 0000000..f02baaa --- /dev/null +++ b/hypercache_expiration.go @@ -0,0 +1,149 @@ +package hypercache + +import ( + "context" + "time" + + "github.com/hyp3rd/hypercache/internal/constants" + "github.com/hyp3rd/hypercache/pkg/backend" + cache "github.com/hyp3rd/hypercache/pkg/cache/v2" +) + +// startExpirationRoutine launches the expiration loop and listens to manual triggers and stop signals. +func (hyperCache *HyperCache[T]) startExpirationRoutine(ctx context.Context) { + go func() { + var tick *time.Ticker + + if hyperCache.expirationInterval > 0 { + tick = time.NewTicker(hyperCache.expirationInterval) + } + + for { + if hyperCache.handleExpirationSelect(ctx, tick) { // returns true when loop should exit + return + } + } + }() +} + +// handleExpirationSelect processes one select iteration; returns true if caller should exit. +func (hyperCache *HyperCache[T]) handleExpirationSelect(ctx context.Context, tick *time.Ticker) bool { + var tickC <-chan time.Time + + if tick != nil { + tickC = tick.C + } + + select { + case <-tickC: + // scheduled expiration + hyperCache.expirationLoop(ctx) + case <-hyperCache.expirationTriggerCh: + // manual/coalesced trigger + hyperCache.expirationLoop(ctx) + hyperCache.expirationSignalPending.Store(false) + + // drain any queued triggers quickly + for draining := true; draining; { + select { + case <-hyperCache.expirationTriggerCh: + // keep draining + default: + draining = false + } + } + + case <-hyperCache.evictCh: + // manual eviction trigger + hyperCache.evictionLoop(ctx) + case <-ctx.Done(): + if tick != nil { + tick.Stop() + } + + return true + + case <-hyperCache.stop: + if tick != nil { + tick.Stop() + } + + return true + } + + return false +} + +// execTriggerExpiration coalesces and optionally debounces expiration triggers to avoid flooding the channel. +func (hyperCache *HyperCache[T]) execTriggerExpiration() { + // Optional debounce: if configured, drop triggers that arrive within the interval. + if d := hyperCache.expirationDebounceInterval; d > 0 { + last := time.Unix(0, hyperCache.lastExpirationTrigger.Load()) + if time.Since(last) < d { + // record backpressure metric + hyperCache.StatsCollector.Incr(constants.StatIncr, 1) + + return + } + } + + // Coalesce: if a signal is already pending, skip enqueueing another. + if hyperCache.expirationSignalPending.Swap(true) { + hyperCache.StatsCollector.Incr(constants.StatIncr, 1) + + return + } + + select { + case hyperCache.expirationTriggerCh <- true: + hyperCache.lastExpirationTrigger.Store(time.Now().UnixNano()) + default: + // channel full; keep pending flag set and record metric + hyperCache.StatsCollector.Incr(constants.StatIncr, 1) + } +} + +// expirationLoop runs in a worker goroutine and removes expired items from the cache. +func (hyperCache *HyperCache[T]) expirationLoop(ctx context.Context) { + hyperCache.workerPool.Enqueue(func() error { + hyperCache.StatsCollector.Incr("expiration_loop_count", 1) + defer hyperCache.StatsCollector.Timing("expiration_loop_duration", time.Now().UnixNano()) + + var ( + expiredCount int64 + items []*cache.Item + err error + ) + + // get all expired items + items, err = hyperCache.List(ctx, + backend.WithSortBy(constants.SortByExpiration.String()), + backend.WithFilterFunc(func(item *cache.Item) bool { + return item.Expiration > 0 && time.Since(item.LastAccess) > item.Expiration + })) + if err != nil { + return err + } + + // iterate all expired items and remove them + for _, item := range items { + expiredCount++ + + err := hyperCache.Remove(ctx, item.Key) + if err != nil { + return err + } + + hyperCache.itemPoolManager.Put(item) + hyperCache.StatsCollector.Incr("item_expired_count", 1) + } + + hyperCache.StatsCollector.Gauge("item_count", int64(hyperCache.backend.Count(ctx))) + hyperCache.StatsCollector.Gauge("expired_item_count", expiredCount) + + return nil + }) +} + +// TriggerExpiration exposes a manual expiration trigger (debounced/coalesced internally). +func (hyperCache *HyperCache[T]) TriggerExpiration() { hyperCache.execTriggerExpiration() } diff --git a/hypercache_io.go b/hypercache_io.go new file mode 100644 index 0000000..320ec44 --- /dev/null +++ b/hypercache_io.go @@ -0,0 +1,313 @@ +package hypercache + +import ( + "context" + "time" + + "github.com/hyp3rd/hypercache/internal/sentinel" + "github.com/hyp3rd/hypercache/pkg/backend" + cache "github.com/hyp3rd/hypercache/pkg/cache/v2" +) + +// Set adds an item to the cache with the given key and value. If an item with the same key already +// exists, it updates the value. If the expiration duration is greater than zero, the item will +// expire after the specified duration. +// +// If capacity is reached: +// - when evictionInterval == 0 we evict immediately +// - otherwise the background eviction loop will reclaim space +func (hyperCache *HyperCache[T]) Set(ctx context.Context, key string, value any, expiration time.Duration) error { + // Create a new cache item and set its properties + item := hyperCache.itemPoolManager.Get() + + item.Key = key + item.Value = value + item.Expiration = expiration + item.LastAccess = time.Now() + + // Set the size of the item (aligned with backend serializer when available) + err := hyperCache.setItemSize(item) + if err != nil { + return err + } + + // check if adding this item will exceed the maxCacheSize + hyperCache.memoryAllocation.Add(item.Size) + + if hyperCache.maxCacheSize > 0 && hyperCache.memoryAllocation.Load() > hyperCache.maxCacheSize { + hyperCache.memoryAllocation.Add(-item.Size) + + return sentinel.ErrCacheFull + } + + // Insert the item into the cache + err = hyperCache.backend.Set(ctx, item) + if err != nil { + hyperCache.memoryAllocation.Add(-item.Size) + hyperCache.itemPoolManager.Put(item) + + return err + } + + // Set the item in the eviction algorithm. The algorithm protects its own + // state internally; no outer lock needed. + hyperCache.evictionAlgorithm.Set(key, item.Value) + + // If the cache is at capacity, evict an item when the eviction interval is zero + if hyperCache.shouldEvict.Load() && hyperCache.backend.Count(ctx) > hyperCache.backend.Capacity() { + hyperCache.evictItem(ctx) + } + + return nil +} + +// Get retrieves the item with the given key from the cache returning the value and a boolean indicating if the item was found. +func (hyperCache *HyperCache[T]) Get(ctx context.Context, key string) (any, bool) { + item, ok := hyperCache.backend.Get(ctx, key) + if !ok { + return nil, false + } + + // Check if the item has expired, if so, trigger the expiration loop + if item.Expired() { + // Non-blocking trigger of expiration loop (do not return to pool yet; backend still holds it) + // Coalesced/debounced trigger + hyperCache.execTriggerExpiration() + + return nil, false + } + + // Update the last access time and access count + hyperCache.touchItem(ctx, key, item) + + return item.Value, true +} + +// GetWithInfo retrieves the item with the given key from the cache returning the `Item` object and a boolean indicating if the item was +// found. +func (hyperCache *HyperCache[T]) GetWithInfo(ctx context.Context, key string) (*cache.Item, bool) { + item, ok := hyperCache.backend.Get(ctx, key) + // Check if the item has expired if it exists, if so, trigger the expiration loop + if !ok { + return nil, false + } + + // Check if the item has expired, if so, trigger the expiration loop + if item.Expired() { + // Non-blocking trigger of expiration loop; don't return to pool here + // Coalesced/debounced trigger + hyperCache.execTriggerExpiration() + + return nil, false + } + + // Update the last access time and access count + hyperCache.touchItem(ctx, key, item) + + return item, true +} + +// GetOrSet retrieves the item with the given key. If the item is not found, it adds the item to the cache with the given value and +// expiration duration. +// If the capacity of the cache is reached, leverage the eviction algorithm. +func (hyperCache *HyperCache[T]) GetOrSet(ctx context.Context, key string, value any, expiration time.Duration) (any, error) { + // if the item is found, return the value + if item, ok := hyperCache.backend.Get(ctx, key); ok { + // Check if the item has expired + if item.Expired() { + // Non-blocking trigger of expiration loop; don't pool here to avoid zeroing live refs + // Coalesced/debounced trigger + hyperCache.execTriggerExpiration() + + return nil, sentinel.ErrKeyExpired + } + + // Update the last access time and access count + hyperCache.touchItem(ctx, key, item) + + return item.Value, nil + } + + // if the item is not found, add it to the cache + item := hyperCache.itemPoolManager.Get() + + item.Key = key + item.Value = value + item.Expiration = expiration + item.LastAccess = time.Now() + + // Set the size of the item (aligned with backend serializer when available) + err := hyperCache.setItemSize(item) + if err != nil { + return nil, err + } + + // check if adding this item will exceed the maxCacheSize + hyperCache.memoryAllocation.Add(item.Size) + + if hyperCache.maxCacheSize > 0 && hyperCache.memoryAllocation.Load() > hyperCache.maxCacheSize { + hyperCache.memoryAllocation.Add(-item.Size) + hyperCache.itemPoolManager.Put(item) + + return nil, sentinel.ErrCacheFull + } + + // Insert the item into the cache + err = hyperCache.backend.Set(ctx, item) + if err != nil { + hyperCache.memoryAllocation.Add(-item.Size) + hyperCache.itemPoolManager.Put(item) + + return nil, err + } + + // Set the item in the eviction algorithm synchronously. The previous bare + // goroutine had no panic recovery, no shutdown coordination with Stop(), + // and let the caller observe a key whose eviction tracking had not yet + // been recorded. The algorithm's own internal mutex provides safety. + hyperCache.evictionAlgorithm.Set(key, item.Value) + + // If the cache is at capacity, evict an item when the eviction interval is zero + if hyperCache.shouldEvict.Load() && hyperCache.backend.Count(ctx) > hyperCache.backend.Capacity() { + hyperCache.evictItem(ctx) + } + + return value, nil +} + +// GetMultiple retrieves the items with the given keys from the cache. +func (hyperCache *HyperCache[T]) GetMultiple(ctx context.Context, keys ...string) (map[string]any, map[string]error) { + result := make(map[string]any, len(keys)) // Preallocate the result map + failed := make(map[string]error, len(keys)) // Preallocate the errors map + + for _, key := range keys { + item, ok := hyperCache.backend.Get(ctx, key) + if !ok { + // Add the key to the errors map and continue + failed[key] = sentinel.ErrKeyNotFound + + continue + } + + // Check if the item has expired + if item.Expired() { + // Treat expired items as not found per API semantics; don't pool here to avoid zeroing live refs + failed[key] = sentinel.ErrKeyNotFound + // Coalesced/debounced trigger of the expiration loop via channel + hyperCache.execTriggerExpiration() + } else { + hyperCache.touchItem(ctx, key, item) // Update the last access time and access count + + // Add the item to the result map + result[key] = item.Value + } + } + + return result, failed +} + +// touchItem records an access on the backend (when supported) and on the item itself. +func (hyperCache *HyperCache[T]) touchItem(ctx context.Context, key string, item *cache.Item) { + if item == nil { + return + } + + if toucher, ok := hyperCache.backend.(touchBackend); ok { + toucher.Touch(ctx, key) + } + + item.Touch() +} + +// List lists the items in the cache that meet the specified criteria. +// It takes in a variadic number of any type as filters, it then checks the backend type, and calls the corresponding +// implementation of the List function for that backend, with the filters passed in as arguments. +func (hyperCache *HyperCache[T]) List(ctx context.Context, filters ...backend.IFilter) ([]*cache.Item, error) { + return hyperCache.backend.List(ctx, filters...) +} + +// setItemSize computes item.Size using the backend serializer when available for accuracy. +// Falls back to the Item's internal SetSize when no serializer is present. +func (hyperCache *HyperCache[T]) setItemSize(item *cache.Item) error { + // Prefer backend-specific serialization for accurate size accounting. + switch backendImpl := any(hyperCache.backend).(type) { + case *backend.Redis: + if backendImpl.Serializer != nil { + data, err := backendImpl.Serializer.Marshal(item.Value) + if err != nil { + return err + } + + item.Size = int64(len(data)) + + return nil + } + + case *backend.RedisCluster: + if backendImpl.Serializer != nil { + data, err := backendImpl.Serializer.Marshal(item.Value) + if err != nil { + return err + } + + item.Size = int64(len(data)) + + return nil + } + + default: + // Fall back to generic size estimation for backends without a serializer. + } + + return item.SetSize() +} + +// Remove removes items with the given key from the cache. If an item is not found, it does nothing. +func (hyperCache *HyperCache[T]) Remove(ctx context.Context, keys ...string) error { + // Remove the item from the eviction algorithm + // and update the memory allocation + for _, key := range keys { + item, ok := hyperCache.backend.Get(ctx, key) + if ok { + // remove the item from the cacheBackend and update the memory allocation + hyperCache.memoryAllocation.Add(-item.Size) + hyperCache.evictionAlgorithm.Delete(key) + } + } + + err := hyperCache.backend.Remove(ctx, keys...) + if err != nil { + return err + } + + return nil +} + +// Clear removes all items from the cache. +func (hyperCache *HyperCache[T]) Clear(ctx context.Context) error { + var ( + items []*cache.Item + err error + ) + + // get all expired items + items, err = hyperCache.backend.List(ctx) + if err != nil { + return err + } + + // clear the cacheBackend + err = hyperCache.backend.Clear(ctx) + if err != nil { + return err + } + + for _, item := range items { + hyperCache.evictionAlgorithm.Delete(item.Key) + } + + // reset the memory allocation + hyperCache.memoryAllocation.Store(0) + + return err +} diff --git a/pkg/backend/dist_http_server.go b/pkg/backend/dist_http_server.go index bceaca4..d0597fa 100644 --- a/pkg/backend/dist_http_server.go +++ b/pkg/backend/dist_http_server.go @@ -196,9 +196,8 @@ func (s *distHTTPServer) registerMerkle(_ context.Context, dm *DistMemory) { continue } - ch := shard.items.IterBuffered() - for t := range ch { - keys = append(keys, t.Key) + for k := range shard.items.All() { + keys = append(keys, k) } } diff --git a/pkg/backend/dist_memory.go b/pkg/backend/dist_memory.go index 500693b..1211102 100644 --- a/pkg/backend/dist_memory.go +++ b/pkg/backend/dist_memory.go @@ -699,12 +699,7 @@ func (dm *DistMemory) Count(_ context.Context) int { total := 0 for _, s := range dm.shards { - // iterate channel to count; ConcurrentMap has Count helper - // but we call Count for each shard - for range s.items.IterBuffered() { - // ineff: but simple; optimize later with sized field - total++ - } + total += s.items.Count() } return total @@ -798,8 +793,8 @@ func (dm *DistMemory) Set(ctx context.Context, item *cache.Item) error { func (dm *DistMemory) List(_ context.Context, _ ...IFilter) ([]*cache.Item, error) { items := make([]*cache.Item, 0, listPrealloc) for _, s := range dm.shards { - for kv := range s.items.IterBuffered() { - cloned := kv.Val + for _, it := range s.items.All() { + cloned := *it items = append(items, &cloned) } @@ -1322,9 +1317,8 @@ func (dm *DistMemory) enumerateRemoteOnlyKeys(nodeID string, local []merkleKV) m continue } - rch := shard.items.IterBuffered() - for t := range rch { - missing[t.Key] = struct{}{} + for k := range shard.items.All() { + missing[k] = struct{}{} } } @@ -1392,9 +1386,8 @@ func (dm *DistMemory) merkleEntries() []merkleKV { continue } - ch := shard.items.IterBuffered() - for t := range ch { - entries = append(entries, merkleKV{k: t.Key, v: t.Val.Version}) + for k, it := range shard.items.All() { + entries = append(entries, merkleKV{k: k, v: it.Version}) } for k, ts := range shard.tombs { // include tombstones @@ -1555,10 +1548,10 @@ func (dm *DistMemory) collectRebalanceCandidates() []cache.Item { continue } - for kv := range sh.items.IterBuffered() { // snapshot iteration - it := kv.Val - if dm.shouldRebalance(sh, &it) { - out = append(out, it) + for _, it := range sh.items.All() { // snapshot iteration + cloned := *it + if dm.shouldRebalance(sh, &cloned) { + out = append(out, cloned) } } } @@ -1629,12 +1622,21 @@ func (dm *DistMemory) replicateNewReplicas(ctx context.Context) { } func (dm *DistMemory) replDiffShard(ctx context.Context, sh *distShard, processed, limit int) int { - for kv := range sh.items.IterBuffered() { + // Snapshot under the iter.Seq2 RLock so we don't hold the shard read + // lock across sendReplicaDiff's network I/O. Capacity is conservative + // (Count may drift between snapshot start and end, but the slice + // grows as needed). + snapshot := make([]cache.Item, 0, sh.items.Count()) + for _, it := range sh.items.All() { + snapshot = append(snapshot, *it) + } + + for i := range snapshot { if limit > 0 && processed >= limit { return processed } - it := kv.Val + it := &snapshot[i] owners := dm.ring.Lookup(it.Key) if len(owners) == 0 || owners[0] != dm.localNode.ID { @@ -1652,7 +1654,7 @@ func (dm *DistMemory) replDiffShard(ctx context.Context, sh *distShard, processe continue } - processed = dm.sendReplicaDiff(ctx, &it, newRepls, processed, limit) + processed = dm.sendReplicaDiff(ctx, it, newRepls, processed, limit) dm.setOwnerBaseline(sh, it.Key, owners) } diff --git a/pkg/backend/inmemory.go b/pkg/backend/inmemory.go index b564b8c..ac448bf 100644 --- a/pkg/backend/inmemory.go +++ b/pkg/backend/inmemory.go @@ -90,10 +90,12 @@ func (cacheBackend *InMemory) List(_ context.Context, filters ...IFilter) ([]*ca items := make([]*cache.Item, 0, cacheBackend.items.Count()) - for item := range cacheBackend.items.IterBuffered() { - cloned := item + for _, item := range cacheBackend.items.All() { + // Copy under the iterator's RLock so the slice owns its data once + // the lock is released — All() yields pointers into the live map. + cloned := *item - items = append(items, &cloned.Val) + items = append(items, &cloned) } // Apply the filters diff --git a/pkg/cache/v2/cmap.go b/pkg/cache/v2/cmap.go index b41ff56..e7d96a4 100644 --- a/pkg/cache/v2/cmap.go +++ b/pkg/cache/v2/cmap.go @@ -9,7 +9,7 @@ // - Sharded design with 32 shards to reduce lock contention // - FNV-1a hash function for efficient key distribution // - Thread-safe operations with optimized read/write locking -// - Buffered iteration support for safe concurrent traversal +// - iter.Seq2 iteration via All() for safe concurrent traversal // - Standard map operations: Set, Get, Has, Remove, Pop, Clear, Count // // Example usage: @@ -22,6 +22,7 @@ package v2 import ( + "iter" "sync" "sync/atomic" "time" @@ -181,91 +182,47 @@ func (cm *ConcurrentMap) Pop(key string) (*Item, bool) { return item, ok } -// Tuple is used by the IterBuffered functions to wrap two variables together over a channel,. -type Tuple struct { - Key string - Val Item -} - -// IterBuffered returns a buffered iterator which could be used in a for range loop. -func (cm *ConcurrentMap) IterBuffered() <-chan Tuple { - chans := snapshot(cm) - - total := 0 - for _, c := range chans { - total += cap(c) - } - - ch := make(chan Tuple, total) - go fanIn(chans, ch) - - return ch -} - -// Returns a array of channels that contains elements in each shard, -// which likely takes a snapshot of `m`. -// It returns once the size of each buffered channel is determined, -// before all the channels are populated using goroutines. -func snapshot(cm *ConcurrentMap) []chan Tuple { - // When you access map items before initializing. - if len(cm.shards) == 0 { - panic(`cmap.ConcurrentMap is not initialized. Should run New() before usage.`) - } - - chans := make([]chan Tuple, ShardCount) - wg := sync.WaitGroup{} - wg.Add(ShardCount) - - // Foreach shard. - for index, shard := range cm.shards { - go func(index int, shard *ConcurrentMapShard) { - // Foreach key, value pair. - shard.RLock() - - // Determine capacity and copy to a local slice to shorten lock hold time. - n := len(shard.items) - - chans[index] = make(chan Tuple, n) - - local := make([]Tuple, 0, n) - for key, val := range shard.items { - local = append(local, Tuple{Key: key, Val: *val}) - } - - shard.RUnlock() - - wg.Done() +// All returns an iter.Seq2 that yields every (key, *Item) pair across all +// shards. Each shard is walked under its own RLock, released before moving +// to the next shard — concurrent writers to a different shard are not +// blocked. +// +// IMPORTANT: the yielded *Item points directly into the live map. Callers +// MUST treat it as read-only and copy if they need to retain the value +// past the yield. The shard's RLock is held during yield, so a +// long-running consumer body will block writers to that shard. Drain into +// a local slice first if the consumer needs to do I/O or block. +// +// Replaces the previous IterBuffered channel-based iterator: no fan-in +// goroutines, no per-shard channel allocations. +func (cm *ConcurrentMap) All() iter.Seq2[string, *Item] { + return func(yield func(string, *Item) bool) { + if len(cm.shards) == 0 { + panic(`cmap.ConcurrentMap is not initialized. Should run New() before usage.`) + } - for _, t := range local { - chans[index] <- t + for _, shard := range cm.shards { + if !yieldShard(shard, yield) { + return } - - close(chans[index]) - }(index, shard) + } } - - wg.Wait() - - return chans } -// fanIn reads elements from channels `chans` into channel `out`. -func fanIn(chans []chan Tuple, out chan Tuple) { - wg := sync.WaitGroup{} - wg.Add(len(chans)) - - for _, ch := range chans { - go func(ch chan Tuple) { - for t := range ch { - out <- t - } +// yieldShard walks one shard under RLock and forwards entries to yield. +// Returns true if iteration should continue, false if the consumer asked +// to stop. +func yieldShard(shard *ConcurrentMapShard, yield func(string, *Item) bool) bool { + shard.RLock() + defer shard.RUnlock() - wg.Done() - }(ch) + for k, v := range shard.items { + if !yield(k, v) { + return false + } } - wg.Wait() - close(out) + return true } // Remove removes the value under the specified key. diff --git a/pkg/cache/v2/cmap_bench_test.go b/pkg/cache/v2/cmap_bench_test.go index 758dd79..00a9131 100644 --- a/pkg/cache/v2/cmap_bench_test.go +++ b/pkg/cache/v2/cmap_bench_test.go @@ -79,10 +79,11 @@ func BenchmarkConcurrentMap_GetShard(b *testing.B) { } } -// BenchmarkConcurrentMap_IterBuffered measures allocation pressure of the -// channel-based iterator. Phase 2b replaces it with iter.Seq2 — this benchmark -// proves the alloc/op drops to ~0. -func BenchmarkConcurrentMap_IterBuffered(b *testing.B) { +// BenchmarkConcurrentMap_All measures allocation pressure of the iter.Seq2 +// iterator that replaced the channel-based IterBuffered in Phase 2b. The +// expected delta vs. the old IterBuffered baseline: ~0 alloc/op (no +// channels, no goroutines, no per-shard buffers). +func BenchmarkConcurrentMap_All(b *testing.B) { cm := New() for i := range 4096 { cm.Set("k"+strconv.Itoa(i), &Item{Key: "k" + strconv.Itoa(i)}) @@ -92,8 +93,8 @@ func BenchmarkConcurrentMap_IterBuffered(b *testing.B) { b.ResetTimer() for b.Loop() { - for tup := range cm.IterBuffered() { - _ = tup + for k, v := range cm.All() { + _, _ = k, v } } } diff --git a/pkg/cache/v2/cmap_test.go b/pkg/cache/v2/cmap_test.go index fc79210..313f7e6 100644 --- a/pkg/cache/v2/cmap_test.go +++ b/pkg/cache/v2/cmap_test.go @@ -1,6 +1,7 @@ package v2 import ( + "iter" "sync" "testing" "time" @@ -251,7 +252,7 @@ func TestCount_SetExistingKeyDoesNotIncrement(t *testing.T) { } } -func TestIterBuffered(t *testing.T) { +func TestAll(t *testing.T) { t.Parallel() cm := New() @@ -261,15 +262,13 @@ func TestIterBuffered(t *testing.T) { "key3": {Value: "value3", Expiration: time.Hour}, } - // Set items for k, v := range items { cm.Set(k, v) } - // Iterate and verify found := make(map[string]Item) - for tuple := range cm.IterBuffered() { - found[tuple.Key] = tuple.Val + for k, v := range cm.All() { + found[k] = *v } if len(found) != len(items) { @@ -285,6 +284,40 @@ func TestIterBuffered(t *testing.T) { } } +// TestAll_EarlyTermination ensures yield(false) stops iteration without +// leaking the shard RLock — if the lock leaked, a subsequent Set would +// deadlock. +func TestAll_EarlyTermination(t *testing.T) { + t.Parallel() + + cm := New() + for i := range 100 { + cm.Set(string(rune(i)), &Item{Value: i}) + } + + // Stop after the first item. + count := 0 + + for range cm.All() { + count++ + + break + } + + if count != 1 { + t.Errorf("expected 1 iteration before break, got %d", count) + } + + // If RLock leaked, this Set would block forever — the test timeout + // would fire instead of failing fast. Run a Set to prove the lock was + // released cleanly. + cm.Set("after-break", &Item{Value: "ok"}) + + if !cm.Has("after-break") { + t.Error("Set after early-termination iteration did not take effect") + } +} + func TestClear(t *testing.T) { t.Parallel() @@ -339,7 +372,7 @@ func TestConcurrentAccess(t *testing.T) { wg.Wait() } -func TestSnapshotPanic(t *testing.T) { +func TestAll_PanicOnUninitialized(t *testing.T) { t.Parallel() defer func() { @@ -350,7 +383,13 @@ func TestSnapshotPanic(t *testing.T) { var cm ConcurrentMap - snapshot(&cm) + // The yield function should never run — the panic happens before the + // first iteration. Calling next() once forces the seq body to start. + next, stop := iter.Pull2(cm.All()) + + defer stop() + + next() } func BenchmarkSet(b *testing.B) { diff --git a/pkg/cache/v2/hash.go b/pkg/cache/v2/hash.go index b5bb8b7..03634fc 100644 --- a/pkg/cache/v2/hash.go +++ b/pkg/cache/v2/hash.go @@ -1,26 +1,45 @@ package v2 -// Hash returns a 32-bit FNV-1a hash of key. Exported so other packages (e.g. -// the sharded eviction wrapper) can use the same hash as ConcurrentMap and -// route the same key to the same logical shard — preserving cache-locality -// when the data shard and eviction shard have different counts. +import ( + "github.com/cespare/xxhash/v2" + "github.com/hyp3rd/sectools/pkg/converters" +) + +// Hash returns a 32-bit hash of key derived from xxhash64. Exported so +// other packages (e.g. the sharded eviction wrapper) can use the same +// hash as ConcurrentMap and route the same key to the same logical shard +// — preserving cache-locality when the data shard and eviction shard +// have different counts. +// +// xxhash is already a direct dependency for cluster/ring.go consistent +// hashing; consolidating here removes the inlined FNV-1a implementation +// and gives ~1-3% speedup for keys longer than ~8 bytes plus better +// avalanche characteristics. Callers should treat the return value as +// opaque. // -// Step 4 of the modernization will replace this with xxhash.Sum64String for -// faster hashing and better distribution. Callers should treat the return -// value as opaque. +// We fold the 64-bit xxhash output into 32 bits via XOR of the high and +// low halves — cheap, preserves all entropy, and matches what Go's +// standard maphash does when callers want a 32-bit slot index. func Hash(key string) uint32 { const ( - fnvOffset32 = 2166136261 - fnvPrime32 = 16777619 + // hashFoldShift is the bit-shift used to fold the upper 32 bits of + // the xxhash64 output onto the lower 32 bits via XOR. + hashFoldShift = 32 + // lower32Mask zeroes the upper 32 bits of a uint64 so the result + // fits in uint32. Without it, the XOR `h ^ (h >> 32)` keeps + // h's upper 32 bits intact in the high half of the uint64, and + // converters.ToUint32 (correctly) rejects it. + lower32Mask uint64 = 0xFFFFFFFF ) - var sum uint32 = fnvOffset32 - - for i := range key { // Go 1.22+ integer range over string indices - sum ^= uint32(key[i]) + h := xxhash.Sum64String(key) + folded := (h ^ (h >> hashFoldShift)) & lower32Mask - sum *= fnvPrime32 + res, err := converters.ToUint32(folded) + if err != nil { + // Unreachable: folded is masked to MaxUint32 above. + panic("hash fold overflow: " + err.Error()) } - return sum + return res }