-
Notifications
You must be signed in to change notification settings - Fork 2
perf(store): add ELASTICKV_FSM_SYNC_MODE for FSM apply fsync opt-out #592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
88910f9
56af1db
1236dae
de873f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ import ( | |
| "os" | ||
| "path/filepath" | ||
| "strconv" | ||
| "strings" | ||
| "sync" | ||
|
|
||
| "github.com/cockroachdb/errors" | ||
|
|
@@ -61,6 +62,31 @@ const ( | |
| // mebibyteShift converts MiB to bytes via x << mebibyteShift. Named to | ||
| // avoid a magic-number lint violation on the shift amount. | ||
| mebibyteShift = 20 | ||
|
|
||
| // fsmSyncModeEnv selects the Pebble WriteOptions used on the FSM | ||
| // commit path (ApplyMutations, DeletePrefixAt). Values: | ||
| // | ||
| // "sync" (default) — b.Commit(pebble.Sync); every committed raft | ||
| // entry triggers an fsync on the Pebble WAL. | ||
| // Strongest local durability; slowest. | ||
| // "nosync" — b.Commit(pebble.NoSync); the Pebble WAL | ||
| // still records the write, but is not fsynced. | ||
| // Durability still holds because the raft WAL | ||
| // (etcd/raft) fsyncs the committed entry | ||
| // upstream, and on restart the raft log is | ||
| // replayed from the last FSM-snapshot index; | ||
| // any apply that did not reach Pebble's | ||
| // fsync'd region is re-applied. | ||
| // | ||
| // The default is "sync" so production behaviour is unchanged without | ||
| // an explicit opt-in. See docs/fsm_sync_mode.md (or the PR body) for | ||
| // the full durability argument. | ||
| fsmSyncModeEnv = "ELASTICKV_FSM_SYNC_MODE" | ||
|
|
||
| // fsmSyncModeSync / fsmSyncModeNoSync are the accepted values for | ||
| // fsmSyncModeEnv. Any other value falls back to the default. | ||
| fsmSyncModeSync = "sync" | ||
| fsmSyncModeNoSync = "nosync" | ||
| ) | ||
|
|
||
| // pebbleCacheBytes is the effective per-store Pebble block-cache capacity, | ||
|
|
@@ -78,6 +104,23 @@ func init() { | |
| pebbleCacheBytes = resolvePebbleCacheBytes(os.Getenv(pebbleCacheMBEnv)) | ||
| } | ||
|
|
||
| // resolveFSMApplyWriteOpts parses an ELASTICKV_FSM_SYNC_MODE value and | ||
| // returns both the *pebble.WriteOptions used on the FSM commit path and | ||
| // the canonical label name. Case is normalised. Empty, malformed, or | ||
| // unrecognised values fall back to the default ("sync"). | ||
| // | ||
| // Exported via package-internal calls only; tests use it directly. | ||
| func resolveFSMApplyWriteOpts(envVal string) (*pebble.WriteOptions, string) { | ||
| switch strings.ToLower(strings.TrimSpace(envVal)) { | ||
| case fsmSyncModeNoSync: | ||
| return pebble.NoSync, fsmSyncModeNoSync | ||
| case "", fsmSyncModeSync: | ||
| return pebble.Sync, fsmSyncModeSync | ||
| default: | ||
| return pebble.Sync, fsmSyncModeSync | ||
| } | ||
|
Comment on lines
+114
to
+121
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The switch statement can be simplified by merging the redundant cases that fall back to the default sync behavior. This improves readability and maintainability by removing unnecessary code paths. func resolveFSMApplyWriteOpts(envVal string) (*pebble.WriteOptions, string) {
switch strings.ToLower(strings.TrimSpace(envVal)) {
case fsmSyncModeNoSync:
return pebble.NoSync, fsmSyncModeNoSync
default:
return pebble.Sync, fsmSyncModeSync
}
}References
|
||
| } | ||
|
|
||
| // resolvePebbleCacheBytes parses an ELASTICKV_PEBBLE_CACHE_MB value and | ||
| // returns the resolved cache size in bytes. Empty, malformed, or | ||
| // out-of-range values are rejected and fall back to the default rather | ||
|
|
@@ -127,6 +170,18 @@ type pebbleStore struct { | |
| // detected inside ApplyMutations. Polled by the monitoring | ||
| // WriteConflictCollector; not part of the authoritative OCC path. | ||
| writeConflicts *writeConflictCounter | ||
| // fsmApplyWriteOpts is the Pebble WriteOptions value applied on the | ||
| // FSM commit path (ApplyMutations, DeletePrefixAt). Resolved once | ||
| // from ELASTICKV_FSM_SYNC_MODE in NewPebbleStore and then treated | ||
| // as read-only for the store's lifetime. The default is pebble.Sync; | ||
| // operators may opt into pebble.NoSync when the raft WAL's | ||
| // durability is considered sufficient. | ||
| fsmApplyWriteOpts *pebble.WriteOptions | ||
| // fsmApplySyncModeLabel is the human-readable label corresponding | ||
| // to fsmApplyWriteOpts ("sync" or "nosync"). Kept alongside the | ||
| // write-options pointer so monitoring (elastickv_fsm_apply_sync_mode) | ||
| // and log lines stay in sync with the resolved mode. | ||
| fsmApplySyncModeLabel string | ||
| } | ||
|
|
||
| // Ensure pebbleStore implements MVCCStore and RetentionController. | ||
|
|
@@ -173,12 +228,15 @@ func defaultPebbleOptionsWithCache() (*pebble.Options, *pebble.Cache) { | |
|
|
||
| // NewPebbleStore creates a new Pebble-backed MVCC store. | ||
| func NewPebbleStore(dir string, opts ...PebbleStoreOption) (MVCCStore, error) { | ||
| fsmOpts, fsmLabel := resolveFSMApplyWriteOpts(os.Getenv(fsmSyncModeEnv)) | ||
| s := &pebbleStore{ | ||
| dir: dir, | ||
| log: slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ | ||
| Level: slog.LevelWarn, | ||
| })), | ||
| writeConflicts: newWriteConflictCounter(), | ||
| writeConflicts: newWriteConflictCounter(), | ||
| fsmApplyWriteOpts: fsmOpts, | ||
| fsmApplySyncModeLabel: fsmLabel, | ||
| } | ||
| for _, opt := range opts { | ||
| opt(s) | ||
|
|
@@ -424,6 +482,15 @@ func (s *pebbleStore) LastCommitTS() uint64 { | |
| return s.lastCommitTS | ||
| } | ||
|
|
||
| // FSMApplySyncModeLabel returns the resolved FSM sync-mode label | ||
| // ("sync" or "nosync") for this store. Consumed by monitoring to | ||
| // surface the current durability posture as a gauge with a mode label. | ||
| // The value is fixed for the store's lifetime (resolved once from | ||
| // ELASTICKV_FSM_SYNC_MODE in NewPebbleStore) so no locking is needed. | ||
| func (s *pebbleStore) FSMApplySyncModeLabel() string { | ||
| return s.fsmApplySyncModeLabel | ||
| } | ||
|
|
||
| func (s *pebbleStore) MinRetainedTS() uint64 { | ||
| s.mtx.RLock() | ||
| defer s.mtx.RUnlock() | ||
|
|
@@ -1053,7 +1120,11 @@ func (s *pebbleStore) ApplyMutations(ctx context.Context, mutations []*KVPairMut | |
| s.mtx.Unlock() | ||
| return err | ||
| } | ||
| if err := b.Commit(pebble.Sync); err != nil { | ||
| // s.fsmApplyWriteOpts is Sync by default. Operators may opt in to NoSync | ||
| // via ELASTICKV_FSM_SYNC_MODE=nosync when the raft WAL's durability is | ||
| // considered sufficient (raft-log replay from the last FSM snapshot | ||
| // re-applies any entries lost from Pebble after a crash). | ||
| if err := b.Commit(s.fsmApplyWriteOpts); err != nil { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Switching Useful? React with 👍 / 👎. |
||
| s.mtx.Unlock() | ||
| return errors.WithStack(err) | ||
| } | ||
|
|
@@ -1105,7 +1176,9 @@ func (s *pebbleStore) DeletePrefixAt(ctx context.Context, prefix []byte, exclude | |
| if err := setPebbleUint64InBatch(batch, metaLastCommitTSBytes, newLastTS); err != nil { | ||
| return err | ||
| } | ||
| if err := batch.Commit(pebble.Sync); err != nil { | ||
| // See ApplyMutations for the durability argument behind | ||
| // s.fsmApplyWriteOpts (ELASTICKV_FSM_SYNC_MODE). | ||
| if err := batch.Commit(s.fsmApplyWriteOpts); err != nil { | ||
| return errors.WithStack(err) | ||
| } | ||
| s.updateLastCommitTS(newLastTS) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.