From 443ac62f5499e86b1e200935accdc52f33a90228 Mon Sep 17 00:00:00 2001 From: matt423 Date: Thu, 4 Jun 2026 15:07:40 +0100 Subject: [PATCH 1/3] Resume web CLI session on return instead of killing it on inactivity A backgrounded terminal was hard-closed after 5 minutes with close code 4002, which handleWebSocketClose treats as non-recoverable: the sessionId was purged and the user had to press Enter to start a brand-new session, losing their shell. Reported as random "disconnects" when a tab is left open in the background. Now the inactivity close preserves the session: the sessionId is kept and the session is transparently resumed (reconnect carries the sessionId) as soon as the tab is foregrounded again. If the server rejects the resume (it reaped the detached session), we fall back to a fresh session rather than dead-ending on the manual prompt. The previously-hardcoded 5-minute timeout is now configurable via a new inactivityTimeoutMs prop, which also makes the pause/resume cycle quick to exercise locally. DX-1379 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/AblyCliTerminal.inactivity.test.tsx | 467 ++++++++++++++++++ .../react-web-cli/src/AblyCliTerminal.tsx | 215 ++++++-- 2 files changed, 643 insertions(+), 39 deletions(-) create mode 100644 packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx diff --git a/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx b/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx new file mode 100644 index 000000000..f659aa572 --- /dev/null +++ b/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx @@ -0,0 +1,467 @@ +import React from "react"; +import { render, act } from "@testing-library/react"; +import "@testing-library/jest-dom"; +import { vi, describe, test, expect, beforeEach, afterEach } from "vitest"; + +// Mock global-reconnect so reconnect bookkeeping is inert in these tests. +vi.mock("./global-reconnect", () => ({ + getBackoffDelay: vi.fn(() => 0), + resetState: vi.fn(), + increment: vi.fn(), + cancelReconnect: vi.fn(), + scheduleReconnect: vi.fn(), + getAttempts: vi.fn(() => 0), + getMaxAttempts: vi.fn(() => 15), + isMaxAttemptsReached: vi.fn(() => false), + isCancelledState: vi.fn(() => false), + setCountdownCallback: vi.fn(), + successfulConnectionReset: vi.fn(), +})); + +vi.mock("./terminal-box", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + drawBox: vi.fn(), + clearBox: vi.fn(), + updateLine: vi.fn(), + updateSpinner: vi.fn(), + }; +}); + +vi.mock("@xterm/xterm", () => ({ + Terminal: vi.fn().mockImplementation(function () { + return { + open: vi.fn(), + write: vi.fn(), + writeln: vi.fn(), + reset: vi.fn(), + focus: vi.fn(), + clear: vi.fn(), + onData: vi.fn(), + onResize: vi.fn(), + dispose: vi.fn(), + loadAddon: vi.fn(), + options: {}, + element: null, + textarea: null, + scrollToBottom: vi.fn(), + attachCustomKeyEventHandler: vi.fn(), + buffer: { active: { cursorX: 0, cursorY: 0 } }, + }; + }), +})); + +vi.mock("@xterm/addon-fit", () => ({ + FitAddon: vi.fn().mockImplementation(function () { + return { + fit: vi.fn(), + proposeDimensions: vi.fn(() => ({ cols: 80, rows: 24 })), + }; + }), +})); + +vi.mock("lucide-react", () => ({ + SplitSquareHorizontal: () => null, + X: () => null, +})); + +vi.mock("./utils/crypto", () => ({ + hashCredentials: vi.fn(async (apiKey?: string, accessToken?: string) => { + return `hash-${apiKey || ""}:${accessToken || ""}`; + }), +})); + +// Import AFTER mocks. Note: use-terminal-visibility is deliberately NOT mocked — +// we drive the real hook via IntersectionObserver + document.visibilityState so +// visibility changes trigger real re-renders. +import { AblyCliTerminal } from "./AblyCliTerminal"; +import { CONTROL_MESSAGE_PREFIX } from "./terminal-shared"; + +const createControlMessage = (payload: unknown) => + CONTROL_MESSAGE_PREFIX + JSON.stringify(payload); + +const WS_URL = "wss://web-cli-terminal.ably-dev.com"; +const URL_HOST = new URL(WS_URL).host; +const SIGNED_CONFIG = JSON.stringify({ + apiKey: "app.key:secret", + accessToken: "tok", + timestamp: 1, +}); +const SIGNATURE = "test-signature"; +const INACTIVITY_MS = 100; + +// --- WebSocket mock --------------------------------------------------------- +type Listener = (ev: unknown) => void; +let sockets: FakeWebSocket[] = []; + +class FakeWebSocket { + static CONNECTING = 0; + static OPEN = 1; + static CLOSING = 2; + static CLOSED = 3; + url: string; + readyState = FakeWebSocket.CONNECTING; + listeners: Record = { + open: [], + message: [], + close: [], + error: [], + }; + send = vi.fn(); + close = vi.fn((code?: number, reason?: string) => { + if (this.readyState === FakeWebSocket.CLOSED) return; + this.readyState = FakeWebSocket.CLOSED; + this.dispatch("close", { code, reason, wasClean: true }); + }); + constructor(url: string) { + this.url = url; + sockets.push(this); + } + addEventListener(type: string, cb: Listener) { + (this.listeners[type] ||= []).push(cb); + } + removeEventListener(type: string, cb: Listener) { + this.listeners[type] = (this.listeners[type] || []).filter((l) => l !== cb); + } + dispatchEvent() { + return true; + } + dispatch(type: string, ev: unknown) { + (this.listeners[type] || []).forEach((l) => l(ev)); + } + fireOpen() { + this.readyState = FakeWebSocket.OPEN; + this.dispatch("open", {}); + } + fireMessage(data: unknown) { + this.dispatch("message", { data }); + } + // Mirror real WebSocket semantics: readyState flips to CLOSED before onclose. + fireClose(code: number, reason: string) { + this.readyState = FakeWebSocket.CLOSED; + this.dispatch("close", { code, reason, wasClean: true }); + } +} + +// --- visibility plumbing ---------------------------------------------------- +class FakeIntersectionObserver { + cb: (entries: Array<{ isIntersecting: boolean }>) => void; + constructor(cb: (entries: Array<{ isIntersecting: boolean }>) => void) { + this.cb = cb; + } + observe() { + this.cb([{ isIntersecting: true }]); + } + unobserve() {} + disconnect() {} +} + +let visibility = "visible"; +function setVisibility(v: string) { + visibility = v; + document.dispatchEvent(new Event("visibilitychange")); +} + +const flush = () => + act(async () => { + await Promise.resolve(); + await Promise.resolve(); + }); + +beforeEach(() => { + sockets = []; + visibility = "visible"; + sessionStorage.clear(); + Object.defineProperty(document, "visibilityState", { + configurable: true, + get: () => visibility, + }); + vi.stubGlobal("WebSocket", FakeWebSocket); + vi.stubGlobal("IntersectionObserver", FakeIntersectionObserver); + vi.useFakeTimers(); +}); + +afterEach(() => { + vi.useRealTimers(); + vi.unstubAllGlobals(); + vi.clearAllMocks(); +}); + +async function renderConnected(sessionId: string) { + await act(async () => { + render( + , + ); + }); + await flush(); + // First socket should have been created while visible. + expect(sockets.length).toBe(1); + const ws = sockets[0]; + await act(async () => { + ws.fireOpen(); + }); + await flush(); + // Server sends hello with a sessionId -> client stores it. + await act(async () => { + ws.fireMessage(createControlMessage({ type: "hello", sessionId })); + }); + await flush(); + return ws; +} + +describe("DX-1379: inactivity pause & resume-on-return", () => { + test("pause on background preserves the session (no purge) and closes with 4900", async () => { + const ws = await renderConnected("sess-1"); + expect(sessionStorage.getItem(`ably.cli.sessionId.${URL_HOST}`)).toBe( + "sess-1", + ); + + // Background the tab. + await act(async () => { + setVisibility("hidden"); + }); + await flush(); + + // Cross the (short) inactivity threshold. + await act(async () => { + vi.advanceTimersByTime(INACTIVITY_MS + 10); + }); + await flush(); + + // Private 4900 pause code, distinct from the server's 4002 "resume rejected". + const inactivityClose = ws.close.mock.calls.find( + ([code, reason]) => code === 4900 && reason === "inactivity-timeout", + ); + expect(inactivityClose).toBeTruthy(); + + // Session is PRESERVED for resume — not purged. + expect(sessionStorage.getItem(`ably.cli.sessionId.${URL_HOST}`)).toBe( + "sess-1", + ); + }); + + test("a brief background blip (returns before the timeout) does not pause", async () => { + const ws = await renderConnected("sess-blip"); + + await act(async () => { + setVisibility("hidden"); + }); + await flush(); + // Return before the threshold — the inactivity timer should be cancelled. + await act(async () => { + vi.advanceTimersByTime(INACTIVITY_MS / 2); + }); + await act(async () => { + setVisibility("visible"); + }); + await flush(); + // Now run well past the original threshold. + await act(async () => { + vi.advanceTimersByTime(INACTIVITY_MS * 2); + }); + await flush(); + + // No pause close, socket still open, no extra sockets opened. + expect(ws.close.mock.calls.some(([code]) => code === 4900)).toBe(false); + expect(ws.readyState).toBe(FakeWebSocket.OPEN); + expect(sockets.length).toBe(1); + }); + + test("honours a custom inactivityTimeoutMs (not the 5-minute default)", async () => { + const CUSTOM = 300; + await act(async () => { + render( + , + ); + }); + await flush(); + const ws = sockets[0]; + await act(async () => { + ws.fireOpen(); + }); + await flush(); + + await act(async () => { + setVisibility("hidden"); + }); + await flush(); + // Before the custom threshold: no pause. + await act(async () => { + vi.advanceTimersByTime(CUSTOM - 50); + }); + expect(ws.close.mock.calls.some(([code]) => code === 4900)).toBe(false); + // After it: paused. + await act(async () => { + vi.advanceTimersByTime(100); + }); + await flush(); + expect(ws.close.mock.calls.some(([code]) => code === 4900)).toBe(true); + }); + + test("a token-expiry (4008) during a resume is surfaced, not swallowed as a failed resume", async () => { + await renderConnected("sess-4008"); + + await act(async () => { + setVisibility("hidden"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(INACTIVITY_MS + 10); + }); + await flush(); + + const socketsAfterPause = sockets.length; + await act(async () => { + setVisibility("visible"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(50); + }); + await flush(); + + const resumeSocket = sockets[socketsAfterPause]; + const socketsBeforeClose = sockets.length; + await act(async () => { + resumeSocket.fireOpen(); + }); + await flush(); + // Server rejects with token-expired (NOT the 4002 resume-rejected code). + await act(async () => { + resumeSocket.fireClose(4008, "token expired"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(50); + }); + await flush(); + + // The session is purged (4008 is non-recoverable) and — crucially — we do + // NOT silently spin up a fresh session (that would hide the token error and + // loop against the same expired token). + expect(sessionStorage.getItem(`ably.cli.sessionId.${URL_HOST}`)).toBeNull(); + expect(sockets.length).toBe(socketsBeforeClose); + }); + + test("returning to the tab resumes the session (reconnect carries the sessionId)", async () => { + const ws = await renderConnected("sess-2"); + + await act(async () => { + setVisibility("hidden"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(INACTIVITY_MS + 10); + }); + await flush(); + expect(ws.readyState).toBe(FakeWebSocket.CLOSED); + + const socketsAfterPause = sockets.length; + + // Foreground the tab -> auto-resume. + await act(async () => { + setVisibility("visible"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(50); // resume uses a 20ms micro-delay + }); + await flush(); + + // A fresh socket was opened to resume. + expect(sockets.length).toBe(socketsAfterPause + 1); + const resumeSocket = sockets[socketsAfterPause]; + + await act(async () => { + resumeSocket.fireOpen(); + }); + await flush(); + + // The auth payload sent on (re)open carries the preserved sessionId. + const authSend = resumeSocket.send.mock.calls + .map(([raw]) => { + try { + return JSON.parse(raw as string); + } catch { + return null; + } + }) + .find((p) => p && p.sessionId); + expect(authSend?.sessionId).toBe("sess-2"); + }); + + test("a rejected resume falls back to a fresh session", async () => { + await renderConnected("sess-3"); + + await act(async () => { + setVisibility("hidden"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(INACTIVITY_MS + 10); + }); + await flush(); + + await act(async () => { + setVisibility("visible"); + }); + await flush(); + const socketsAfterPause = sockets.length; + + await act(async () => { + vi.advanceTimersByTime(50); + }); + await flush(); + + const resumeSocket = sockets[socketsAfterPause]; + const socketsBeforeReject = sockets.length; + + // Server rejects the resume (session was reaped): close with 4002 but NOT + // our "inactivity-timeout" reason. + await act(async () => { + resumeSocket.fireOpen(); + }); + await flush(); + await act(async () => { + resumeSocket.fireClose(4002, "session resume rejected"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(50); + }); + await flush(); + + // The stale session is purged and a brand-new session is started. + expect(sessionStorage.getItem(`ably.cli.sessionId.${URL_HOST}`)).toBeNull(); + expect(sockets.length).toBe(socketsBeforeReject + 1); + const freshSocket = sockets[socketsBeforeReject]; + await act(async () => { + freshSocket.fireOpen(); + }); + await flush(); + const freshAuth = freshSocket.send.mock.calls + .map(([raw]) => { + try { + return JSON.parse(raw as string); + } catch { + return null; + } + }) + .find(Boolean); + // Fresh session: no sessionId in the auth payload. + expect(freshAuth?.sessionId).toBeUndefined(); + }); +}); diff --git a/packages/react-web-cli/src/AblyCliTerminal.tsx b/packages/react-web-cli/src/AblyCliTerminal.tsx index 5b1c379f4..295b9f85b 100644 --- a/packages/react-web-cli/src/AblyCliTerminal.tsx +++ b/packages/react-web-cli/src/AblyCliTerminal.tsx @@ -119,6 +119,13 @@ export interface AblyCliTerminalProperties { */ resumeOnReload?: boolean; maxReconnectAttempts?: number; + /** + * Idle time (ms) after which a backgrounded session is paused (the websocket + * is closed to free server resources). When the tab is foregrounded again the + * session is resumed automatically. Defaults to 5 minutes. Lower it (e.g. in + * dev) to exercise the pause/resume cycle without waiting. + */ + inactivityTimeoutMs?: number; /** * When true, enables split-screen mode with a second independent terminal. * A split icon will be displayed in the top-right corner when in single-pane mode. @@ -165,6 +172,29 @@ if (globalThis.window !== undefined) { // stable between renders and doesn't trigger exhaustive-deps warnings. const spinnerFrames = ["● ", " ● ", " ●", " ● "]; +// Close codes that should *not* trigger automatic reconnection because they +// represent explicit server-side rejections. Codes such as 1005 (No Status) or +// 1006 (Abnormal Closure) can legitimately occur when the server is temporarily +// unreachable, so they are intentionally excluded (treated as recoverable). +// Single source of truth so the primary and secondary close handlers can't drift. +// 4900 (client-initiated inactivity pause) is deliberately NOT here — it is +// handled as a recoverable pause/resume, never a terminal disconnect. +const BASE_NON_RECOVERABLE_CLOSE_CODES = [ + 4001, // Policy violation (e.g. invalid credentials) + 4008, // Token expired + 1013, // Try again later – the server is telling us not to retry + 4002, // Session resume rejected + 4000, // Generic server error + 4003, // Rate limit exceeded + 4004, // Unsupported protocol version + 4009, // Server at capacity +] as const; + +// Private (4900-range) close code the client uses to pause a backgrounded +// session. Distinct from the server's 4002 "resume rejected" so the two are +// never confused. +const INACTIVITY_PAUSE_CLOSE_CODE = 4900; + const AblyCliTerminalInner = ( { websocketUrl, @@ -176,6 +206,7 @@ const AblyCliTerminalInner = ( onSessionId, resumeOnReload, maxReconnectAttempts, + inactivityTimeoutMs, enableSplitScreen = false, showSplitControl = true, }: AblyCliTerminalProperties, @@ -546,6 +577,12 @@ const AblyCliTerminalInner = ( // Ref to track manual reconnect prompt visibility inside stable event handlers const showManualReconnectPromptReference = useRef(false); + // Set when the session was paused due to background inactivity (rather than a + // server-side disconnect). Drives automatic resume when the tab is foregrounded. + const pausedForInactivityReference = useRef(false); + // Set while an inactivity-resume attempt is in flight, so a failed resume can + // fall back to a fresh session instead of dead-ending on the manual prompt. + const resumeAttemptReference = useRef(false); // Guard to ensure we do NOT double-count a failed attempt when both the // `error` and the subsequent `close` events fire for the *same* socket. const reconnectScheduledThisCycleReference = useRef(false); @@ -600,6 +637,11 @@ const AblyCliTerminalInner = ( const updateConnectionStatusAndExpose = useCallback( (status: ConnectionStatus) => { // updateConnectionStatusAndExpose debug removed + // A successful connection clears any in-flight inactivity-resume guard so + // a later unrelated disconnect isn't misread as a failed resume. + if (status === "connected") { + resumeAttemptReference.current = false; + } setComponentConnectionStatusState(status); // (window as any).componentConnectionStatusForTest = status; // Keep for direct inspection if needed, but primary is below // console.log(`[AblyCLITerminal] (window as any).componentConnectionStatusForTest SET TO: ${status}`); @@ -1283,6 +1325,13 @@ const AblyCliTerminalInner = ( debugLog( "⚠️ DIAGNOSTIC: Terminal not visible, skipping connection attempt", ); + // If this skipped attempt was a resume (tab re-hidden inside the resume + // delay), re-arm the pause so the next foreground resumes again rather + // than stranding a preserved-but-unconnected session. + if (resumeAttemptReference.current) { + resumeAttemptReference.current = false; + pausedForInactivityReference.current = true; + } return; } @@ -1745,7 +1794,8 @@ const AblyCliTerminalInner = ( const userClosedTerminal = event.reason === "user-closed-primary" || event.reason === "user-closed-secondary" || - event.reason === "manual-reconnect"; + event.reason === "manual-reconnect" || + event.reason === "resume-supersede"; if (userClosedTerminal) { debugLog( @@ -1754,6 +1804,25 @@ const AblyCliTerminalInner = ( return; // Don't try to reconnect if user closed the terminal intentionally } + // Client-initiated pause: the inactivity timer closed the socket because + // the tab was backgrounded. Preserve the session (don't purge sessionId) + // so it can be resumed automatically when the tab is foregrounded again. + if (event.code === INACTIVITY_PAUSE_CLOSE_CODE) { + debugLog( + "[AblyCLITerminal] Session paused for inactivity - preserving session for resume on return", + ); + grCancelReconnect(); + grResetState(); + pausedForInactivityReference.current = true; + if (term.current) { + term.current.writeln( + "\r\nSession paused while in the background. It will resume automatically when you return.", + ); + } + updateConnectionStatusAndExpose("disconnected"); + return; + } + // Close codes that should *not* trigger automatic reconnection because // they represent explicit server-side rejections or client-initiated // terminations. Codes such as 1005 (No Status) or 1006 (Abnormal @@ -1761,18 +1830,9 @@ const AblyCliTerminalInner = ( // unreachable – for example when the terminal server is still // starting up. Those cases should be treated as recoverable so they // are intentionally **excluded** from this list. - const NON_RECOVERABLE_CLOSE_CODES = new Set([ - 4001, // Policy violation (e.g. invalid credentials) - 4008, // Token expired - 1013, // Try again later – the server is telling us not to retry - 4002, // Session resume rejected - 4000, // Generic server error - 4003, // Rate limit exceeded - 4004, // Unsupported protocol version - 4009, // Server at capacity - // Note: 1005 removed - it's used for both graceful exit AND network disconnections - // We should handle exit commands differently, not by close code - ]); + const NON_RECOVERABLE_CLOSE_CODES = new Set( + BASE_NON_RECOVERABLE_CLOSE_CODES, + ); const inactivityRegex = /inactiv|timed out/i; if (event.code === 1000 && inactivityRegex.test(event.reason)) { @@ -1780,6 +1840,42 @@ const AblyCliTerminalInner = ( } if (NON_RECOVERABLE_CLOSE_CODES.has(event.code)) { + // A *resume rejection* (4002: the server reaped the detached session + // while the tab was backgrounded) should transparently fall back to a + // fresh session rather than dead-ending on the manual reconnect prompt. + // Gate strictly on 4002 so an unrelated failure during a resume (e.g. + // 4008 token expired, 4001 policy) is NOT swallowed — those must surface + // their real error instead of silently starting a doomed fresh session. + if (resumeAttemptReference.current && event.code === 4002) { + resumeAttemptReference.current = false; + debugLog( + "[AblyCLITerminal] Resume rejected by server - starting a fresh session", + ); + if (resumeOnReload && globalThis.window !== undefined) { + const urlDomain = new URL(websocketUrl).host; + globalThis.sessionStorage.removeItem( + `ably.cli.sessionId.${urlDomain}`, + ); + globalThis.sessionStorage.removeItem( + `ably.cli.credentialHash.${urlDomain}`, + ); + } + setSessionId(null); + sessionIdReference.current = null; + grResetState(); + grSuccessfulConnectionReset(); + clearPtyBuffer(); + setTimeout(() => { + connectWebSocketReference.current?.(); + startConnectingAnimation(false); + }, 20); + return; + } + + // Not a resume rejection: this is a genuine terminal disconnect. Clear + // the resume guard so it can't mis-fire on a later close. + resumeAttemptReference.current = false; + grCancelReconnect(); grResetState(); updateConnectionStatusAndExpose("disconnected"); @@ -1917,6 +2013,7 @@ const AblyCliTerminalInner = ( resumeOnReload, sessionId, clearConnectionTimeout, + clearPtyBuffer, ], ); @@ -2536,7 +2633,7 @@ const AblyCliTerminalInner = ( // Visibility & inactivity timer logic // ----------------------------------------------------------------------------------- - const INACTIVITY_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes + const INACTIVITY_TIMEOUT_MS = inactivityTimeoutMs ?? 5 * 60 * 1000; // default 5 minutes const inactivityTimerReference = useRef | null>( null, ); @@ -2551,25 +2648,23 @@ const AblyCliTerminalInner = ( const startInactivityTimer = useCallback(() => { clearInactivityTimer(); inactivityTimerReference.current = setTimeout(() => { - // Auto-terminate session due to prolonged invisibility + // Pause the session due to prolonged invisibility. Closing with + // "inactivity-timeout" lets handleWebSocketClose preserve the sessionId so + // it can be resumed when the tab is foregrounded again (see the resume + // effect below). If there is no open socket to close, mark the pause + // directly so the resume path still fires. if ( socketReference.current && socketReference.current.readyState === WebSocket.OPEN ) { - socketReference.current.close(4002, "inactivity-timeout"); - } - // Inform the user inside the terminal UI - if (term.current) { - term.current.writeln( - `\r\nSession terminated after ${INACTIVITY_TIMEOUT_MS / 60_000} minutes of inactivity.`, + socketReference.current.close( + INACTIVITY_PAUSE_CLOSE_CODE, + "inactivity-timeout", ); - term.current.writeln("Press ⏎ to start a new session."); + } else { + pausedForInactivityReference.current = true; + updateConnectionStatusAndExpose("disconnected"); } - grCancelReconnect(); - grResetState(); - setShowManualReconnectPrompt(true); - showManualReconnectPromptReference.current = true; - updateConnectionStatusAndExpose("disconnected"); }, INACTIVITY_TIMEOUT_MS); }, [ INACTIVITY_TIMEOUT_MS, @@ -2577,10 +2672,53 @@ const AblyCliTerminalInner = ( updateConnectionStatusAndExpose, ]); + // Resume a session that was paused for inactivity, preserving the sessionId so + // the server can reattach us to the existing PTY. Mirrors the manual-reconnect + // path but deliberately does NOT forget the session. + const resumeFromInactivity = useCallback(() => { + pausedForInactivityReference.current = false; + resumeAttemptReference.current = true; + showManualReconnectPromptReference.current = false; + setShowManualReconnectPrompt(false); + clearAnimationMessages(); + + // Defensively close any lingering socket without forgetting the session. + if ( + socketReference.current && + socketReference.current.readyState !== WebSocket.CLOSED + ) { + try { + socketReference.current.close(1000, "resume-supersede"); + } catch { + /* ignore */ + } + socketReference.current = null; + } + + // Give the browser a micro-task to mark the socket CLOSED before reconnect. + setTimeout(() => { + grResetState(); + grSuccessfulConnectionReset(); + setConnectionStartTime(null); + setShowInstallInstructions(false); + clearInstallInstructionsTimer(); + connectWebSocketReference.current?.(); + startConnectingAnimation(false); + }, 20); + }, [ + clearAnimationMessages, + clearInstallInstructionsTimer, + startConnectingAnimation, + ]); + // Manage the timer whenever visibility changes useEffect(() => { if (isVisible) { clearInactivityTimer(); + // Returning to a paused session: resume it automatically. + if (pausedForInactivityReference.current) { + resumeFromInactivity(); + } return; } // If not visible start countdown only if there is an active/open socket @@ -2590,7 +2728,12 @@ const AblyCliTerminalInner = ( ) { startInactivityTimer(); } - }, [isVisible, startInactivityTimer, clearInactivityTimer]); + }, [ + isVisible, + startInactivityTimer, + clearInactivityTimer, + resumeFromInactivity, + ]); useEffect(() => () => clearInactivityTimer(), [clearInactivityTimer]); @@ -3142,16 +3285,9 @@ const AblyCliTerminalInner = ( updateSecondaryConnectionStatus("disconnected"); // Check if this is a non-recoverable error - const NON_RECOVERABLE_CLOSE_CODES = new Set([ - 4001, // Policy violation (e.g. invalid credentials) - 4008, // Token expired - 1013, // Try again later - 4002, // Session resume rejected - 4000, // Generic server error - 4003, // Rate limit exceeded - 4004, // Unsupported protocol version - 4009, // Server at capacity - ]); + const NON_RECOVERABLE_CLOSE_CODES = new Set( + BASE_NON_RECOVERABLE_CLOSE_CODES, + ); if (NON_RECOVERABLE_CLOSE_CODES.has(event.code)) { // Clear the secondary session ID as it's no longer valid @@ -3167,7 +3303,8 @@ const AblyCliTerminalInner = ( // Check if this was a user-initiated close const userClosedTerminal = event.reason === "user-closed-secondary" || - event.reason === "manual-reconnect"; + event.reason === "manual-reconnect" || + event.reason === "resume-supersede"; if (!userClosedTerminal && secondaryTerm.current) { let title = "DISCONNECTED"; From e9404f1c0a6ffa3fb0ed274022382bf2aa97e05f Mon Sep 17 00:00:00 2001 From: matt423 Date: Thu, 4 Jun 2026 15:54:09 +0100 Subject: [PATCH 2/3] Refresh credentials before each handshake so reconnects don't expire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed configs are short-lived (the terminal server rejects configs older than a few minutes). A resume-on-return — or any auto-reconnect more than a few minutes into a session — reused the original signed config and was rejected with "Config expired", forcing a noisy retry. Add an optional async refreshCredentials() prop that the component awaits in the open handler, just before sending the auth payload, for both the primary and secondary (split-screen) terminals. Embedders return a fresh signed config; when absent or on error we fall back to the signedConfig/signature props, so behaviour is unchanged for consumers that don't supply it. DX-1379 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/AblyCliTerminal.inactivity.test.tsx | 118 ++++++++ .../react-web-cli/src/AblyCliTerminal.tsx | 265 +++++++++++------- 2 files changed, 280 insertions(+), 103 deletions(-) diff --git a/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx b/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx index f659aa572..d7399a68f 100644 --- a/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx +++ b/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx @@ -464,4 +464,122 @@ describe("DX-1379: inactivity pause & resume-on-return", () => { // Fresh session: no sessionId in the auth payload. expect(freshAuth?.sessionId).toBeUndefined(); }); + + test("resume uses freshly-refreshed credentials for the handshake", async () => { + const freshConfig = JSON.stringify({ + apiKey: "app.key:fresh", + accessToken: "fresh-tok", + timestamp: 2, + }); + const refreshCredentials = vi + .fn() + .mockResolvedValue({ signedConfig: freshConfig, signature: "fresh-sig" }); + + await act(async () => { + render( + , + ); + }); + await flush(); + const ws = sockets[0]; + await act(async () => { + ws.fireOpen(); + }); + await flush(); + await act(async () => { + ws.fireMessage( + createControlMessage({ type: "hello", sessionId: "sess-4" }), + ); + }); + await flush(); + + await act(async () => { + setVisibility("hidden"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(INACTIVITY_MS + 10); + }); + await flush(); + + const socketsAfterPause = sockets.length; + await act(async () => { + setVisibility("visible"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(50); + }); + await flush(); + + const resumeSocket = sockets[socketsAfterPause]; + await act(async () => { + resumeSocket.fireOpen(); + }); + await flush(); + + expect(refreshCredentials).toHaveBeenCalled(); + const authSend = resumeSocket.send.mock.calls + .map(([raw]) => { + try { + return JSON.parse(raw as string); + } catch { + return null; + } + }) + .find((p) => p && p.config); + // The handshake carries the refreshed signed config, not the stale prop. + expect(authSend?.config).toBe(freshConfig); + expect(authSend?.signature).toBe("fresh-sig"); + expect(authSend?.sessionId).toBe("sess-4"); + }); + + test.each([ + ["returns null", vi.fn().mockResolvedValue(null)], + ["throws", vi.fn().mockRejectedValue(new Error("offline"))], + ])( + "falls back to the prop credentials when refreshCredentials %s", + async (_label, refreshCredentials) => { + await act(async () => { + render( + , + ); + }); + await flush(); + const ws = sockets[0]; + await act(async () => { + ws.fireOpen(); + }); + await flush(); + + expect(refreshCredentials).toHaveBeenCalled(); + const authSend = ws.send.mock.calls + .map(([raw]) => { + try { + return JSON.parse(raw as string); + } catch { + return null; + } + }) + .find((p) => p && p.config); + // Refresh produced nothing usable, so the handshake still goes out using + // the static prop config rather than failing/hanging. + expect(authSend?.config).toBe(SIGNED_CONFIG); + expect(authSend?.signature).toBe(SIGNATURE); + }, + ); }); diff --git a/packages/react-web-cli/src/AblyCliTerminal.tsx b/packages/react-web-cli/src/AblyCliTerminal.tsx index 295b9f85b..55318f639 100644 --- a/packages/react-web-cli/src/AblyCliTerminal.tsx +++ b/packages/react-web-cli/src/AblyCliTerminal.tsx @@ -126,6 +126,17 @@ export interface AblyCliTerminalProperties { * dev) to exercise the pause/resume cycle without waiting. */ inactivityTimeoutMs?: number; + /** + * Called right before each (re)connection handshake to obtain fresh auth. + * Short-lived signed configs expire (the terminal server rejects configs + * older than a few minutes), so a resume or reconnect after the tab has been + * idle would otherwise fail with "Config expired". Return fresh credentials to + * use for the handshake, or null/undefined to keep the current + * signedConfig/signature props. Errors are swallowed and fall back to props. + */ + refreshCredentials?: () => Promise< + { signedConfig: string; signature: string } | null | undefined + >; /** * When true, enables split-screen mode with a second independent terminal. * A split icon will be displayed in the top-right corner when in single-pane mode. @@ -207,6 +218,7 @@ const AblyCliTerminalInner = ( resumeOnReload, maxReconnectAttempts, inactivityTimeoutMs, + refreshCredentials, enableSplitScreen = false, showSplitControl = true, }: AblyCliTerminalProperties, @@ -583,6 +595,41 @@ const AblyCliTerminalInner = ( // Set while an inactivity-resume attempt is in flight, so a failed resume can // fall back to a fresh session instead of dead-ending on the manual prompt. const resumeAttemptReference = useRef(false); + + // Effective auth used for handshakes. Starts from the props and is replaced by + // refreshCredentials() output (when provided) so resumes/reconnects after the + // tab has been idle use a fresh, non-expired signed config. + const refreshCredentialsReference = useRef(refreshCredentials); + const effectiveSignedConfigReference = useRef(signedConfig); + const effectiveSignatureReference = useRef(signature); + useEffect(() => { + refreshCredentialsReference.current = refreshCredentials; + }, [refreshCredentials]); + useEffect(() => { + effectiveSignedConfigReference.current = signedConfig; + }, [signedConfig]); + useEffect(() => { + effectiveSignatureReference.current = signature; + }, [signature]); + + // Pull fresh credentials before a handshake; falls back to current props. + const refreshAuth = useCallback(async () => { + const fn = refreshCredentialsReference.current; + if (!fn) return; + try { + const fresh = await fn(); + if (fresh?.signedConfig && fresh?.signature) { + effectiveSignedConfigReference.current = fresh.signedConfig; + effectiveSignatureReference.current = fresh.signature; + } + } catch (error) { + debugLog( + "[AblyCLITerminal] refreshCredentials failed; using existing auth", + error, + ); + } + }, []); + // Guard to ensure we do NOT double-count a failed attempt when both the // `error` and the subsequent `close` events fire for the *same* socket. const reconnectScheduledThisCycleReference = useRef(false); @@ -1465,87 +1512,97 @@ const AblyCliTerminalInner = ( const socketReference = useRef(null); // Ref to hold the current socket for cleanup - const handleWebSocketOpen = useCallback(() => { - // console.log('[AblyCLITerminal] WebSocket opened'); - // Clear connection timeout since we successfully connected - clearConnectionTimeout(); + const handleWebSocketOpen = useCallback( + async (sock: WebSocket) => { + // `sock` is the socket that fired this open event. We send auth on it + // specifically (not socketReference.current) because refreshAuth() awaits a + // network call, during which a newer reconnect could have replaced the ref. + // console.log('[AblyCLITerminal] WebSocket opened'); + // Clear connection timeout since we successfully connected + clearConnectionTimeout(); - // Do not reset reconnection attempts here; wait until terminal prompt confirms full session - setShowManualReconnectPrompt(false); + // Do not reset reconnection attempts here; wait until terminal prompt confirms full session + setShowManualReconnectPrompt(false); - // Only clear buffer for new sessions, not when resuming - if (sessionId) { - debugLog( - `⚠️ DIAGNOSTIC: Skipping PTY buffer clear for resumed session ${sessionId}`, - ); - // For resumed sessions, we might already be at a prompt - // Check if we need to activate the session immediately - if ( - connectionStatusReference.current === "connected" && - !isSessionActiveReference.current - ) { + // Only clear buffer for new sessions, not when resuming + if (sessionId) { debugLog( - `⚠️ DIAGNOSTIC: Resumed session but not active - checking for existing prompt`, + `⚠️ DIAGNOSTIC: Skipping PTY buffer clear for resumed session ${sessionId}`, ); + // For resumed sessions, we might already be at a prompt + // Check if we need to activate the session immediately + if ( + connectionStatusReference.current === "connected" && + !isSessionActiveReference.current + ) { + debugLog( + `⚠️ DIAGNOSTIC: Resumed session but not active - checking for existing prompt`, + ); + } + } else { + clearPtyBuffer(); // Clear buffer for new session prompt detection + debugLog(`⚠️ DIAGNOSTIC: Cleared PTY buffer for new session`); } - } else { - clearPtyBuffer(); // Clear buffer for new session prompt detection - debugLog(`⚠️ DIAGNOSTIC: Cleared PTY buffer for new session`); - } - - debugLog( - "⚠️ DIAGNOSTIC: WebSocket open handler started - tracking initialization sequence", - ); - if (term.current) { - debugLog("⚠️ DIAGNOSTIC: Focusing terminal"); - term.current.focus(); - // Don't send the initial command yet - wait for prompt detection - } + debugLog( + "⚠️ DIAGNOSTIC: WebSocket open handler started - tracking initialization sequence", + ); - // Send auth payload with signed config - const payload = createAuthPayload(sessionId, signedConfig, signature); + if (term.current) { + debugLog("⚠️ DIAGNOSTIC: Focusing terminal"); + term.current.focus(); + // Don't send the initial command yet - wait for prompt detection + } - debugLog( - `⚠️ DIAGNOSTIC: Preparing to send auth payload with env vars: ${JSON.stringify(payload.environmentVariables)}`, - ); - debugLog( - `⚠️ DIAGNOSTIC: Auth payload includes sessionId: ${payload.sessionId || "none (new session)"}`, - ); + // Refresh credentials before authenticating so a resume/reconnect after the + // tab has been idle doesn't hand the server an expired signed config. + await refreshAuth(); - if ( - socketReference.current && - socketReference.current.readyState === WebSocket.OPEN - ) { - debugLog("⚠️ DIAGNOSTIC: Sending auth payload to server"); - socketReference.current.send(JSON.stringify(payload)); - } + // Send auth payload with signed config + const payload = createAuthPayload( + sessionId, + effectiveSignedConfigReference.current, + effectiveSignatureReference.current, + ); - // Set up initial command to be sent when prompt is detected - // Skip initial command if we're resuming an existing session - if (initialCommand && !sessionId) { debugLog( - `⚠️ DIAGNOSTIC: Initial command present: "${initialCommand}" - will be sent when prompt is detected`, + `⚠️ DIAGNOSTIC: Preparing to send auth payload with env vars: ${JSON.stringify(payload.environmentVariables)}`, ); - pendingInitialCommandReference.current = initialCommand; - } else if (initialCommand && sessionId) { debugLog( - `⚠️ DIAGNOSTIC: Skipping initial command for resumed session ${sessionId}`, + `⚠️ DIAGNOSTIC: Auth payload includes sessionId: ${payload.sessionId || "none (new session)"}`, ); - } else if (!initialCommand) { - debugLog("⚠️ DIAGNOSTIC: No initial command provided"); - } - // persistence handled by dedicated useEffect - debugLog("WebSocket OPEN handler completed. sessionId:", sessionId); - }, [ - initialCommand, - clearPtyBuffer, - sessionId, - clearConnectionTimeout, - signedConfig, - signature, - ]); + if (sock.readyState === WebSocket.OPEN) { + debugLog("⚠️ DIAGNOSTIC: Sending auth payload to server"); + sock.send(JSON.stringify(payload)); + } + + // Set up initial command to be sent when prompt is detected + // Skip initial command if we're resuming an existing session + if (initialCommand && !sessionId) { + debugLog( + `⚠️ DIAGNOSTIC: Initial command present: "${initialCommand}" - will be sent when prompt is detected`, + ); + pendingInitialCommandReference.current = initialCommand; + } else if (initialCommand && sessionId) { + debugLog( + `⚠️ DIAGNOSTIC: Skipping initial command for resumed session ${sessionId}`, + ); + } else if (!initialCommand) { + debugLog("⚠️ DIAGNOSTIC: No initial command provided"); + } + + // persistence handled by dedicated useEffect + debugLog("WebSocket OPEN handler completed. sessionId:", sessionId); + }, + [ + initialCommand, + clearPtyBuffer, + sessionId, + clearConnectionTimeout, + refreshAuth, + ], + ); const handleWebSocketMessage = useCallback( async (event: MessageEvent) => { @@ -2549,7 +2606,10 @@ const AblyCliTerminalInner = ( const messageListener = (event: MessageEvent) => { void handleWebSocketMessage(event); }; - socket.addEventListener("open", handleWebSocketOpen); + const openListener = () => { + void handleWebSocketOpen(socket); + }; + socket.addEventListener("open", openListener); socket.addEventListener("message", messageListener); socket.addEventListener("close", handleWebSocketClose); socket.addEventListener("error", handleWebSocketError); @@ -2559,7 +2619,7 @@ const AblyCliTerminalInner = ( debugLog( "[AblyCLITerminal] Cleaning up WebSocket event listeners for old socket.", ); - socket.removeEventListener("open", handleWebSocketOpen); + socket.removeEventListener("open", openListener); socket.removeEventListener("message", messageListener); socket.removeEventListener("close", handleWebSocketClose); socket.removeEventListener("error", handleWebSocketError); @@ -3033,39 +3093,44 @@ const AblyCliTerminalInner = ( // WebSocket open handler newSocket.addEventListener("open", () => { - debugLog("[AblyCLITerminal] Secondary WebSocket opened"); - - // Clear any reconnect prompt - setSecondaryShowManualReconnectPrompt(false); - secondaryShowManualReconnectPromptReference.current = false; + void (async () => { + debugLog("[AblyCLITerminal] Secondary WebSocket opened"); - if (secondaryTerm.current) { - secondaryTerm.current.focus(); - } + // Clear any reconnect prompt + setSecondaryShowManualReconnectPrompt(false); + secondaryShowManualReconnectPromptReference.current = false; - // Send auth payload with signed config - const payload = createAuthPayload( - secondarySessionId, - signedConfig, - signature, - ); + if (secondaryTerm.current) { + secondaryTerm.current.focus(); + } - if (newSocket.readyState === WebSocket.OPEN) { - newSocket.send(JSON.stringify(payload)); - } + // Refresh credentials before authenticating (see primary handler). + await refreshAuth(); - // Set up initial command to be sent when prompt is detected - // Skip initial command if we're resuming an existing session - if (initialCommand && !secondarySessionId) { - debugLog( - `[AblyCLITerminal] [Secondary] Initial command present: "${initialCommand}" - will be sent when prompt is detected`, - ); - pendingSecondaryInitialCommandReference.current = initialCommand; - } else if (initialCommand && secondarySessionId) { - debugLog( - `[AblyCLITerminal] [Secondary] Skipping initial command for resumed session ${secondarySessionId}`, + // Send auth payload with signed config + const payload = createAuthPayload( + secondarySessionId, + effectiveSignedConfigReference.current, + effectiveSignatureReference.current, ); - } + + if (newSocket.readyState === WebSocket.OPEN) { + newSocket.send(JSON.stringify(payload)); + } + + // Set up initial command to be sent when prompt is detected + // Skip initial command if we're resuming an existing session + if (initialCommand && !secondarySessionId) { + debugLog( + `[AblyCLITerminal] [Secondary] Initial command present: "${initialCommand}" - will be sent when prompt is detected`, + ); + pendingSecondaryInitialCommandReference.current = initialCommand; + } else if (initialCommand && secondarySessionId) { + debugLog( + `[AblyCLITerminal] [Secondary] Skipping initial command for resumed session ${secondarySessionId}`, + ); + } + })(); }); // WebSocket message handler with binary framing support @@ -3348,13 +3413,7 @@ const AblyCliTerminalInner = ( return newSocket; // eslint-disable-next-line react-hooks/exhaustive-deps -- callback is accessed via connectSecondaryWebSocketReference ref; missing deps include hoisted callbacks and state that would cause cascading recreations - }, [ - websocketUrl, - signedConfig, - signature, - resumeOnReload, - secondarySessionId, - ]); + }, [websocketUrl, refreshAuth, resumeOnReload, secondarySessionId]); // Keep the ref updated with the latest callback connectSecondaryWebSocketReference.current = connectSecondaryWebSocket; From 9199a644ba211f5295d994b1003877ae5ec4842e Mon Sep 17 00:00:00 2001 From: matt423 Date: Fri, 5 Jun 2026 12:56:54 +0100 Subject: [PATCH 3/3] Bound the post-open handshake wait so a silent server can't hang the spinner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the terminal server accepted the WebSocket but never sent its hello / status:connected, the UI hung forever on the yellow "Connecting" (initial / resume) or "Reconnecting" (auto-reconnect) box. The 30s connection timeout only covers the pre-open CONNECTING phase, so once a socket opened there was nothing to bound the wait — reproduced both in the unit harness and the live dashboard. Arm an "awaiting hello" timeout (12s) when a socket opens; if it elapses before we reach `connected`, close the socket with a private recoverable code (4901) so the reconnect scheduler retries — with fresh credentials — and ultimately lands on the recoverable "Press Enter" prompt instead of a frozen spinner. The timer is cleared on connected and on close. DX-1379 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/AblyCliTerminal.inactivity.test.tsx | 148 ++++++++++++++++++ .../react-web-cli/src/AblyCliTerminal.tsx | 83 +++++++++- 2 files changed, 230 insertions(+), 1 deletion(-) diff --git a/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx b/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx index d7399a68f..6629bcfe8 100644 --- a/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx +++ b/packages/react-web-cli/src/AblyCliTerminal.inactivity.test.tsx @@ -77,6 +77,7 @@ vi.mock("./utils/crypto", () => ({ // visibility changes trigger real re-renders. import { AblyCliTerminal } from "./AblyCliTerminal"; import { CONTROL_MESSAGE_PREFIX } from "./terminal-shared"; +import * as GlobalReconnect from "./global-reconnect"; const createControlMessage = (payload: unknown) => CONTROL_MESSAGE_PREFIX + JSON.stringify(payload); @@ -582,4 +583,151 @@ describe("DX-1379: inactivity pause & resume-on-return", () => { expect(authSend?.signature).toBe(SIGNATURE); }, ); + + // The reporter's "stuck on Connecting/Reconnecting to Ably" symptom: a socket + // that opens but never receives the server's hello. Before the await-hello + // timeout this hung forever (the 30s connection timeout only covers the + // pre-open CONNECTING phase). It must now be force-closed for retry. (DX-1379) + const AWAIT_HELLO_MS = 12_000; + + test("a socket that opens but never receives hello is force-closed for retry (not left hanging)", async () => { + await act(async () => { + render( + , + ); + }); + await flush(); + const ws = sockets[0]; + await act(async () => { + ws.fireOpen(); // opens, but the server stays silent (no hello) + }); + await flush(); + + // Just before the threshold: still waiting, not force-closed. + await act(async () => { + vi.advanceTimersByTime(AWAIT_HELLO_MS - 1000); + }); + expect( + ws.close.mock.calls.some( + ([, reason]) => reason === "awaiting-hello-timeout", + ), + ).toBe(false); + + // Cross the await-hello threshold -> the silent socket is closed for retry. + await act(async () => { + vi.advanceTimersByTime(1500); + }); + await flush(); + const helloTimeoutClose = ws.close.mock.calls.find( + ([code, reason]) => code === 4901 && reason === "awaiting-hello-timeout", + ); + expect(helloTimeoutClose).toBeTruthy(); + }); + + test("a resumed socket that never receives hello is closed for retry (the stuck-reconnecting repro)", async () => { + await renderConnected("sess-hang"); + + await act(async () => { + setVisibility("hidden"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(INACTIVITY_MS + 10); + }); + await flush(); + + const afterPause = sockets.length; + await act(async () => { + setVisibility("visible"); + }); + await flush(); + await act(async () => { + vi.advanceTimersByTime(50); + }); + await flush(); + + const resumeSocket = sockets[afterPause]; + await act(async () => { + resumeSocket.fireOpen(); // resume socket opens; server never sends hello + }); + await flush(); + + // It must not hang: after the await-hello timeout the resume socket is closed. + await act(async () => { + vi.advanceTimersByTime(AWAIT_HELLO_MS + 500); + }); + await flush(); + const helloTimeoutClose = resumeSocket.close.mock.calls.find( + ([code, reason]) => code === 4901 && reason === "awaiting-hello-timeout", + ); + expect(helloTimeoutClose).toBeTruthy(); + }); + + test("a hello within the await-hello window keeps the connection (no spurious close)", async () => { + await act(async () => { + render( + , + ); + }); + await flush(); + const ws = sockets[0]; + await act(async () => { + ws.fireOpen(); + }); + await flush(); + // Server replies in time. + await act(async () => { + ws.fireMessage( + createControlMessage({ type: "hello", sessionId: "sess-ok" }), + ); + }); + await flush(); + + // Advancing past the await-hello window must NOT close the live socket. + await act(async () => { + vi.advanceTimersByTime(AWAIT_HELLO_MS + 2000); + }); + await flush(); + // The live socket is not closed and stays usable. + expect( + ws.close.mock.calls.some( + ([, reason]) => reason === "awaiting-hello-timeout", + ), + ).toBe(false); + expect(ws.readyState).toBe(FakeWebSocket.OPEN); + }); + + test("the await-hello close code (4901) routes to a reconnect, not a terminal disconnect", async () => { + const ws = await renderConnected("sess-route"); + // Isolate the effect of the 4901 close from the initial-connect bookkeeping. + vi.mocked(GlobalReconnect.increment).mockClear(); + vi.mocked(GlobalReconnect.scheduleReconnect).mockClear(); + + // A 4901 close (what the await-hello timeout emits) must be treated as + // recoverable: schedule a reconnect rather than purging the session. + await act(async () => { + ws.fireClose(4901, "awaiting-hello-timeout"); + }); + await flush(); + + expect(GlobalReconnect.increment).toHaveBeenCalled(); + expect(GlobalReconnect.scheduleReconnect).toHaveBeenCalled(); + // Recoverable => the session is NOT purged. + expect(sessionStorage.getItem(`ably.cli.sessionId.${URL_HOST}`)).toBe( + "sess-route", + ); + }); }); diff --git a/packages/react-web-cli/src/AblyCliTerminal.tsx b/packages/react-web-cli/src/AblyCliTerminal.tsx index 55318f639..868a59c9d 100644 --- a/packages/react-web-cli/src/AblyCliTerminal.tsx +++ b/packages/react-web-cli/src/AblyCliTerminal.tsx @@ -206,6 +206,18 @@ const BASE_NON_RECOVERABLE_CLOSE_CODES = [ // never confused. const INACTIVITY_PAUSE_CLOSE_CODE = 4900; +// Private recoverable close code used when a socket opens but the server never +// sends its `hello`/`connected` within AWAIT_HELLO_TIMEOUT_MS. Closing it as a +// (recoverable, not in NON_RECOVERABLE) failure lets the reconnect scheduler +// retry rather than leaving the UI stuck on "Connecting"/"Reconnecting" forever. +const AWAIT_HELLO_CLOSE_CODE = 4901; + +// How long to wait, after a socket has *opened*, for the server's first +// `hello`/`status: connected` before treating the attempt as failed. The 30s +// connection timeout only covers the pre-open CONNECTING phase, so without this +// a silent-but-accepting server hangs the spinner indefinitely (DX-1379). +const AWAIT_HELLO_TIMEOUT_MS = 12_000; + const AblyCliTerminalInner = ( { websocketUrl, @@ -685,9 +697,14 @@ const AblyCliTerminalInner = ( (status: ConnectionStatus) => { // updateConnectionStatusAndExpose debug removed // A successful connection clears any in-flight inactivity-resume guard so - // a later unrelated disconnect isn't misread as a failed resume. + // a later unrelated disconnect isn't misread as a failed resume, and the + // "awaiting hello" timeout (the handshake completed). if (status === "connected") { resumeAttemptReference.current = false; + if (awaitHelloTimerReference.current) { + clearTimeout(awaitHelloTimerReference.current); + awaitHelloTimerReference.current = null; + } } setComponentConnectionStatusState(status); // (window as any).componentConnectionStatusForTest = status; // Keep for direct inspection if needed, but primary is below @@ -761,6 +778,26 @@ const AblyCliTerminalInner = ( } }, []); + // "Awaiting hello" timeout: armed once a socket opens, cleared on + // connected/close. Guards against a server that accepts the socket but never + // completes the handshake. + const awaitHelloTimerReference = useRef(null); + const clearAwaitHelloTimer = useCallback(() => { + if (awaitHelloTimerReference.current) { + clearTimeout(awaitHelloTimerReference.current); + awaitHelloTimerReference.current = null; + } + }, []); + + // Same guard for the split-screen secondary socket (its own timer). + const secondaryAwaitHelloTimerReference = useRef(null); + const clearSecondaryAwaitHelloTimer = useCallback(() => { + if (secondaryAwaitHelloTimerReference.current) { + clearTimeout(secondaryAwaitHelloTimerReference.current); + secondaryAwaitHelloTimerReference.current = null; + } + }, []); + const clearInstallInstructionsTimer = useCallback(() => { if (installInstructionsTimerReference.current) { clearTimeout(installInstructionsTimerReference.current); @@ -1577,6 +1614,24 @@ const AblyCliTerminalInner = ( sock.send(JSON.stringify(payload)); } + // Arm the "awaiting hello" timeout: the socket is open, but until the + // server sends its hello/connected we have no session. If it never + // arrives, force-close this socket as a recoverable failure so the + // reconnect scheduler retries instead of the UI hanging on the spinner. + clearAwaitHelloTimer(); + awaitHelloTimerReference.current = setTimeout(() => { + if (connectionStatusReference.current !== "connected") { + debugLog( + `⚠️ DIAGNOSTIC: No hello within ${AWAIT_HELLO_TIMEOUT_MS}ms of open - closing socket to retry`, + ); + try { + sock.close(AWAIT_HELLO_CLOSE_CODE, "awaiting-hello-timeout"); + } catch { + /* ignore */ + } + } + }, AWAIT_HELLO_TIMEOUT_MS); + // Set up initial command to be sent when prompt is detected // Skip initial command if we're resuming an existing session if (initialCommand && !sessionId) { @@ -1600,6 +1655,7 @@ const AblyCliTerminalInner = ( clearPtyBuffer, sessionId, clearConnectionTimeout, + clearAwaitHelloTimer, refreshAuth, ], ); @@ -1844,6 +1900,7 @@ const AblyCliTerminalInner = ( `[AblyCLITerminal] WebSocket closed. Code: ${event.code}, Reason: ${event.reason}, Clean: ${event.wasClean}`, ); clearConnectionTimeout(); // Clear timeout on close + clearAwaitHelloTimer(); clearTerminalBoxOnly(); updateSessionActive(false); @@ -2070,6 +2127,7 @@ const AblyCliTerminalInner = ( resumeOnReload, sessionId, clearConnectionTimeout, + clearAwaitHelloTimer, clearPtyBuffer, ], ); @@ -2796,6 +2854,11 @@ const AblyCliTerminalInner = ( ]); useEffect(() => () => clearInactivityTimer(), [clearInactivityTimer]); + useEffect(() => () => clearAwaitHelloTimer(), [clearAwaitHelloTimer]); + useEffect( + () => () => clearSecondaryAwaitHelloTimer(), + [clearSecondaryAwaitHelloTimer], + ); // Cleanup install instructions timer on unmount useEffect( @@ -3118,6 +3181,19 @@ const AblyCliTerminalInner = ( newSocket.send(JSON.stringify(payload)); } + // Bound the wait for the server's hello on the secondary socket too, so + // a silent-but-accepting server can't hang the secondary pane's spinner. + clearSecondaryAwaitHelloTimer(); + secondaryAwaitHelloTimerReference.current = setTimeout(() => { + if (secondaryConnectionStatusReference.current !== "connected") { + try { + newSocket.close(AWAIT_HELLO_CLOSE_CODE, "awaiting-hello-timeout"); + } catch { + /* ignore */ + } + } + }, AWAIT_HELLO_TIMEOUT_MS); + // Set up initial command to be sent when prompt is detected // Skip initial command if we're resuming an existing session if (initialCommand && !secondarySessionId) { @@ -3346,6 +3422,7 @@ const AblyCliTerminalInner = ( debugLog( `[AblyCLITerminal] [Secondary] WebSocket closed. Code: ${event.code}, Reason: ${event.reason}`, ); + clearSecondaryAwaitHelloTimer(); setIsSecondarySessionActive(false); updateSecondaryConnectionStatus("disconnected"); @@ -3740,6 +3817,10 @@ const AblyCliTerminalInner = ( // Update internal state for the secondary terminal setSecondaryConnectionStatus(status); secondaryConnectionStatusReference.current = status; + if (status === "connected" && secondaryAwaitHelloTimerReference.current) { + clearTimeout(secondaryAwaitHelloTimerReference.current); + secondaryAwaitHelloTimerReference.current = null; + } // We intentionally don't call onConnectionStatusChange here // as per requirements - only the primary terminal status should be reported