From db029bd6e0e47f226c79f6feb4f6c9c2a508b020 Mon Sep 17 00:00:00 2001 From: Jon Cope Date: Tue, 7 Apr 2026 11:04:59 -0500 Subject: [PATCH 1/2] fix(kube-apiserver): Add fail-fast RBAC bootstrap hook deadlock detection On MicroShift restart, the RBAC bootstrap hook can deadlock when etcd contains existing data. The hook uses context.TODO() for API calls, which has no timeout. When the loopback client hangs, this creates a circular dependency where the hook waits for the API server while the API server waits for the hook to complete. This change adds a parallel deadlock detector that: - Monitors /readyz/poststarthook/rbac/bootstrap-roles specifically - Checks if etcd is healthy while the hook is stuck - Detects deadlock in ~15 seconds instead of waiting 60 seconds - Restarts microshift-etcd.scope to recover from the deadlock This breaks the crash loop by detecting the condition early and taking recovery action at the MicroShift level, without requiring changes to vendored upstream Kubernetes code. Related upstream issues: kubernetes/kubernetes#86715, #97119 Co-Authored-By: Claude Opus 4.5 --- pkg/controllers/kube-apiserver.go | 135 +++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/pkg/controllers/kube-apiserver.go b/pkg/controllers/kube-apiserver.go index 70cb89dcdc..d11b2afcaa 100644 --- a/pkg/controllers/kube-apiserver.go +++ b/pkg/controllers/kube-apiserver.go @@ -23,6 +23,7 @@ import ( "io" "net" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -55,6 +56,12 @@ import ( const ( kubeAPIStartupTimeout = 60 + // rbacHookDeadlockTimeout is the time to wait for the RBAC bootstrap hook + // before declaring a deadlock. This is shorter than kubeAPIStartupTimeout + // to allow for faster recovery. + rbacHookDeadlockTimeout = 15 + // rbacHookCheckInterval is how often to check the RBAC hook status + rbacHookCheckInterval = 2 ) var ( @@ -348,7 +355,13 @@ func (s *KubeAPIServer) Run(ctx context.Context, ready chan<- struct{}, stopped return err } - // run readiness check + // Channel to signal RBAC hook deadlock detection + rbacDeadlockDetected := make(chan struct{}) + + // Run RBAC hook deadlock detector + go s.detectRBACHookDeadlock(ctx, restClient, rbacDeadlockDetected) + + // Run standard readiness check go func() { err := wait.PollUntilContextTimeout(ctx, time.Second, kubeAPIStartupTimeout*time.Second, true, func(ctx context.Context) (bool, error) { var status int @@ -420,7 +433,127 @@ func (s *KubeAPIServer) Run(ctx context.Context, ready chan<- struct{}, stopped return err case perr := <-panicChannel: panic(perr) + case <-rbacDeadlockDetected: + klog.Error("RBAC bootstrap hook deadlock detected - restarting microshift-etcd.scope to recover") + if err := restartMicroshiftEtcdScope(); err != nil { + klog.Errorf("Failed to restart microshift-etcd.scope: %v", err) + } + return fmt.Errorf("RBAC bootstrap hook deadlock detected after %d seconds", rbacHookDeadlockTimeout) + } +} + +// detectRBACHookDeadlock monitors the RBAC bootstrap hook status and detects deadlock conditions. +// A deadlock is detected when: +// 1. The RBAC hook is not completing (stuck in "not finished" state) +// 2. etcd is healthy and responsive +// This indicates the circular dependency where the hook waits for API server +// while API server waits for the hook. +func (s *KubeAPIServer) detectRBACHookDeadlock(ctx context.Context, restClient rest.Interface, deadlockDetected chan<- struct{}) { + // Wait a few seconds before starting detection to allow normal startup + select { + case <-ctx.Done(): + return + case <-time.After(5 * time.Second): } + + checkCount := 0 + maxChecks := (rbacHookDeadlockTimeout - 5) / rbacHookCheckInterval // Account for initial delay + + for checkCount < maxChecks { + select { + case <-ctx.Done(): + return + case <-time.After(rbacHookCheckInterval * time.Second): + } + + checkCount++ + + // Check RBAC hook status + var status int + err := restClient.Get().AbsPath("/readyz/poststarthook/rbac/bootstrap-roles").Do(ctx).StatusCode(&status).Error() + + // If hook is ready, no deadlock + if err == nil && status == 200 { + klog.V(4).Info("RBAC bootstrap hook completed successfully") + return + } + + // Hook not ready - check if etcd is healthy + etcdHealthy, etcdErr := isEtcdHealthy(ctx) + if etcdErr != nil { + klog.V(4).Infof("Could not check etcd health: %v", etcdErr) + continue + } + + if etcdHealthy { + klog.Warningf("RBAC bootstrap hook not ready (check %d/%d), but etcd is healthy - potential deadlock", + checkCount, maxChecks) + } else { + // etcd not healthy - not a deadlock, just waiting for etcd + klog.V(4).Infof("RBAC hook waiting, etcd not yet healthy (check %d/%d)", checkCount, maxChecks) + // Reset counter since this isn't a deadlock condition + checkCount = 0 + } + } + + // Reached max checks with etcd healthy but hook not completing - deadlock detected + klog.Error("RBAC bootstrap hook deadlock confirmed: etcd healthy but hook not completing") + close(deadlockDetected) +} + +// isEtcdHealthy checks if etcd is responsive by attempting to connect and get status. +func isEtcdHealthy(ctx context.Context) (bool, error) { + certsDir := cryptomaterial.CertsDirectory(config.DataDir) + etcdAPIServerClientCertDir := cryptomaterial.EtcdAPIServerClientCertDir(certsDir) + + tlsInfo := transport.TLSInfo{ + CertFile: cryptomaterial.ClientCertPath(etcdAPIServerClientCertDir), + KeyFile: cryptomaterial.ClientKeyPath(etcdAPIServerClientCertDir), + TrustedCAFile: cryptomaterial.CACertPath(cryptomaterial.EtcdSignerDir(certsDir)), + } + tlsConfig, err := tlsInfo.ClientConfig() + if err != nil { + return false, fmt.Errorf("failed to create TLS config: %w", err) + } + + // Use a short timeout for health check + checkCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + + client, err := clientv3.New(clientv3.Config{ + Endpoints: []string{"https://localhost:2379"}, + DialTimeout: 1 * time.Second, + TLS: tlsConfig, + Context: checkCtx, + }) + if err != nil { + return false, fmt.Errorf("failed to create etcd client: %w", err) + } + defer func() { _ = client.Close() }() + + _, err = client.Status(checkCtx, "localhost:2379") + if err != nil { + return false, nil // etcd not healthy, but not an error condition + } + + return true, nil +} + +// restartMicroshiftEtcdScope restarts the microshift-etcd.scope to recover from deadlock. +// This forces a clean restart of etcd which can help break the circular dependency. +func restartMicroshiftEtcdScope() error { + klog.Info("Stopping microshift-etcd.scope for recovery") + + stopCmd := exec.Command("systemctl", "stop", "microshift-etcd.scope") + if out, err := stopCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to stop microshift-etcd.scope: %w, output: %s", err, string(out)) + } + + // Wait briefly for cleanup + time.Sleep(1 * time.Second) + + klog.Info("microshift-etcd.scope stopped - MicroShift will restart") + return nil } func discoverEtcdServers(ctx context.Context, kubeconfigPath string) ([]string, error) { From 6de022ec95b6193e1f4190572e783fce8a3f69f1 Mon Sep 17 00:00:00 2001 From: Jon Cope Date: Tue, 7 Apr 2026 15:35:24 -0500 Subject: [PATCH 2/2] fix(kube-apiserver): Add wall-clock deadline to prevent flapping Add rbacHookMaxWaitDuration (30s) as an absolute deadline that cannot be reset by etcd health state changes. This prevents a flapping etcd from extending the deadlock detection indefinitely. The existing checkCount logic is preserved for detecting deadlock when etcd is consistently healthy, but the wall-clock deadline provides a hard upper bound regardless of etcd state transitions. Co-Authored-By: Claude Opus 4.5 --- pkg/controllers/kube-apiserver.go | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/pkg/controllers/kube-apiserver.go b/pkg/controllers/kube-apiserver.go index d11b2afcaa..c0bcd4f47f 100644 --- a/pkg/controllers/kube-apiserver.go +++ b/pkg/controllers/kube-apiserver.go @@ -62,6 +62,10 @@ const ( rbacHookDeadlockTimeout = 15 // rbacHookCheckInterval is how often to check the RBAC hook status rbacHookCheckInterval = 2 + // rbacHookMaxWaitDuration is the absolute maximum time to wait for the RBAC hook + // regardless of etcd health state changes. This prevents flapping from extending + // detection indefinitely. + rbacHookMaxWaitDuration = 30 * time.Second ) var ( @@ -456,10 +460,18 @@ func (s *KubeAPIServer) detectRBACHookDeadlock(ctx context.Context, restClient r case <-time.After(5 * time.Second): } + // Track wall-clock deadline to prevent flapping from extending detection indefinitely + startTime := time.Now() checkCount := 0 maxChecks := (rbacHookDeadlockTimeout - 5) / rbacHookCheckInterval // Account for initial delay for checkCount < maxChecks { + // Check absolute deadline first - this cannot be reset by etcd state changes + if time.Since(startTime) >= rbacHookMaxWaitDuration { + klog.Errorf("RBAC bootstrap hook exceeded maximum wait duration of %v", rbacHookMaxWaitDuration) + break + } + select { case <-ctx.Done(): return @@ -486,18 +498,20 @@ func (s *KubeAPIServer) detectRBACHookDeadlock(ctx context.Context, restClient r } if etcdHealthy { - klog.Warningf("RBAC bootstrap hook not ready (check %d/%d), but etcd is healthy - potential deadlock", - checkCount, maxChecks) + klog.Warningf("RBAC bootstrap hook not ready (check %d/%d, elapsed %v), but etcd is healthy - potential deadlock", + checkCount, maxChecks, time.Since(startTime).Round(time.Second)) } else { // etcd not healthy - not a deadlock, just waiting for etcd klog.V(4).Infof("RBAC hook waiting, etcd not yet healthy (check %d/%d)", checkCount, maxChecks) // Reset counter since this isn't a deadlock condition + // Note: wall-clock deadline (startTime) is NOT reset - flapping cannot extend indefinitely checkCount = 0 } } // Reached max checks with etcd healthy but hook not completing - deadlock detected - klog.Error("RBAC bootstrap hook deadlock confirmed: etcd healthy but hook not completing") + klog.Errorf("RBAC bootstrap hook deadlock confirmed after %v: etcd healthy but hook not completing", + time.Since(startTime).Round(time.Second)) close(deadlockDetected) }