-
Notifications
You must be signed in to change notification settings - Fork 2
kv/lease: adopt CLOCK_MONOTONIC_RAW for lease-read path (#551) #604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5b00411
54a6f98
551ceaa
03edaa1
20a30ed
587717e
c5c27b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| // Package monoclock exposes a monotonic-raw clock for the lease-read | ||
| // path. | ||
| // | ||
| // Go's time.Now() returns a wall-clock value backed internally by the | ||
| // kernel's CLOCK_MONOTONIC (Linux) or its equivalent — which is | ||
| // rate-adjusted ("slewed") by NTP at up to 500 ppm. That slew is small | ||
| // in steady state (~0.35 ms over a 700 ms lease window), but the safety | ||
| // case for leader-local lease reads should not depend on NTP being | ||
| // well-behaved: a misconfigured or abused time daemon can push the | ||
| // slew rate far past the 500 ppm POSIX cap, and other monotonic time | ||
| // sources (e.g. CLOCK_MONOTONIC_COARSE) can compound the error. | ||
| // CLOCK_MONOTONIC_RAW is immune to NTP rate adjustment and step events | ||
| // and is what TiKV's lease path uses. | ||
| // | ||
| // Instant values are opaque int64 nanosecond counters. They are only | ||
| // comparable within the same process lifetime and MUST NOT be | ||
| // persisted, serialized, or sent over the wire — the zero point is | ||
| // arbitrary and changes across processes. Callers that need an | ||
| // externally-meaningful timestamp should sample time.Now() separately; | ||
| // Instant is only for intra-process lease-safety reasoning. | ||
| package monoclock | ||
|
|
||
| import "time" | ||
|
|
||
| // Instant is a reading from the monotonic-raw clock. The zero value | ||
| // represents "no reading" and compares equal to Zero. | ||
| type Instant struct { | ||
| ns int64 | ||
| } | ||
|
|
||
| // Zero is the unset Instant. | ||
| var Zero = Instant{} | ||
|
|
||
| // Now returns the current monotonic-raw instant. | ||
| func Now() Instant { return Instant{ns: nowNanos()} } | ||
|
|
||
| // IsZero reports whether i is the zero Instant. | ||
| func (i Instant) IsZero() bool { return i.ns == 0 } | ||
|
|
||
| // After reports whether i is strictly after j. | ||
| func (i Instant) After(j Instant) bool { return i.ns > j.ns } | ||
|
|
||
| // Before reports whether i is strictly before j. | ||
| func (i Instant) Before(j Instant) bool { return i.ns < j.ns } | ||
|
|
||
| // Sub returns i - j as a Duration. Meaningful only when neither i nor | ||
| // j is the zero Instant; callers must guard with IsZero first. | ||
| func (i Instant) Sub(j Instant) time.Duration { return time.Duration(i.ns - j.ns) } | ||
|
|
||
| // Add returns i advanced by d. | ||
| func (i Instant) Add(d time.Duration) Instant { return Instant{ns: i.ns + int64(d)} } | ||
|
|
||
| // Nanos returns the raw int64 counter. Intended for atomic.Int64 | ||
| // storage where a whole Instant struct cannot be stored atomically | ||
| // (see internal/raftengine/etcd/quorum_ack.go). | ||
| func (i Instant) Nanos() int64 { return i.ns } | ||
|
|
||
| // FromNanos reconstructs an Instant from a raw counter previously | ||
| // obtained via Nanos(). Counterpart to Nanos; the same intra-process | ||
| // caveats apply. | ||
| func FromNanos(ns int64) Instant { return Instant{ns: ns} } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| //go:build !(linux || darwin) | ||
|
|
||
| package monoclock | ||
|
|
||
| import "time" | ||
|
|
||
| // epoch anchors the fallback monotonic counter. time.Since uses Go's | ||
| // runtime monotonic component and is step-immune, though unlike | ||
| // CLOCK_MONOTONIC_RAW it is still subject to NTP rate adjustment. On | ||
| // platforms where golang.org/x/sys/unix does not export | ||
| // CLOCK_MONOTONIC_RAW (FreeBSD, Windows, Plan 9, ...) this is the | ||
| // closest portable substitute; lease safety on those platforms | ||
| // therefore matches the pre-#551 behaviour. Linux and Darwin use | ||
| // the raw clock (monoclock_unix.go). | ||
| var epoch = time.Now() | ||
|
|
||
| func nowNanos() int64 { | ||
| return int64(time.Since(epoch)) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| package monoclock | ||
|
|
||
| import ( | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| func TestInstant_ZeroIsZero(t *testing.T) { | ||
| t.Parallel() | ||
| require.True(t, Zero.IsZero()) | ||
| var i Instant | ||
| require.True(t, i.IsZero()) | ||
| require.True(t, FromNanos(0).IsZero()) | ||
| } | ||
|
|
||
| func TestNow_IsNonZeroAndMonotonic(t *testing.T) { | ||
| t.Parallel() | ||
| // CLOCK_MONOTONIC_RAW must advance across two Now() calls (modulo | ||
| // nanosecond-granularity ties; use a sleep to ensure monotonic | ||
| // progress). A regression that returns 0 or runs the clock | ||
| // backwards would break every lease-read safety guard. | ||
| a := Now() | ||
| require.False(t, a.IsZero(), "Now must return non-zero instant on supported platforms") | ||
| time.Sleep(100 * time.Microsecond) | ||
| b := Now() | ||
| require.False(t, b.Before(a), "monotonic-raw clock must not regress across calls") | ||
| require.True(t, b.After(a) || b == a) | ||
| } | ||
|
|
||
| func TestInstant_AddAndSub(t *testing.T) { | ||
| t.Parallel() | ||
| base := FromNanos(1_000_000) | ||
| later := base.Add(250 * time.Millisecond) | ||
| require.True(t, later.After(base)) | ||
| require.Equal(t, 250*time.Millisecond, later.Sub(base)) | ||
| require.Equal(t, -250*time.Millisecond, base.Sub(later)) | ||
| } | ||
|
|
||
| func TestInstant_NanosRoundtrip(t *testing.T) { | ||
| t.Parallel() | ||
| i := FromNanos(42) | ||
| require.Equal(t, int64(42), i.Nanos()) | ||
| } | ||
|
|
||
| func TestInstant_BeforeAfterOrdering(t *testing.T) { | ||
| t.Parallel() | ||
| a := FromNanos(100) | ||
| b := FromNanos(200) | ||
| require.True(t, a.Before(b)) | ||
| require.True(t, b.After(a)) | ||
| require.False(t, a.After(b)) | ||
| require.False(t, b.Before(a)) | ||
| // Equal instants: neither Before nor After. | ||
| c := FromNanos(100) | ||
| require.False(t, a.Before(c)) | ||
| require.False(t, a.After(c)) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| //go:build linux || darwin | ||
|
|
||
| package monoclock | ||
|
|
||
| import "golang.org/x/sys/unix" | ||
|
|
||
| // nowNanos reads CLOCK_MONOTONIC_RAW via clock_gettime(3). Only Linux | ||
| // and Darwin export this constant in golang.org/x/sys/unix; FreeBSD | ||
| // lacks the binding (its kernel exposes CLOCK_MONOTONIC_PRECISE, a | ||
| // different clock) and all other platforms use the portable fallback | ||
| // in monoclock_fallback.go. | ||
| // | ||
| // A non-nil error from ClockGettime should be essentially impossible | ||
| // on supported platforms — the syscall fails only on invalid clock | ||
| // IDs (compile-time constant here) or EFAULT on the timespec pointer | ||
| // (stack-allocated here). The realistic failure mode is a | ||
| // seccomp/sandbox profile that denies clock_gettime. We return 0 in | ||
| // that case: callers (leaseState.valid, engineLeaseAckValid) treat a | ||
| // zero Instant as "clock unavailable" and force the slow path, so a | ||
| // persistent syscall failure cannot leave a warmed lease valid. | ||
| func nowNanos() int64 { | ||
| var ts unix.Timespec | ||
| if err := unix.ClockGettime(unix.CLOCK_MONOTONIC_RAW, &ts); err != nil { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Selecting Useful? React with 👍 / 👎. |
||
| return 0 | ||
|
Comment on lines
+23
to
+24
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Returning Useful? React with 👍 / 👎. |
||
| } | ||
| return ts.Nano() | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update the design doc to describe the implementation that ships in this PR.
Section 3.1 still shows the old
gen/expiryNanosatomic layout, and the Section 3.5 pseudocode still describes only the caller-side lease path. The code inkv/lease_state.goandkv/coordinator.gonow usesatomic.Pointer[leaseSlot]plus the primaryLastQuorumAck()fast path, so the document is already out of sync.Also applies to: 272-304
🤖 Prompt for AI Agents