From ca6cd9a40b25d5b36d4ad81e71e082b511ee71cd Mon Sep 17 00:00:00 2001 From: David Zhao Date: Sat, 27 Jun 2026 23:40:59 +0200 Subject: [PATCH 1/5] feat: auto failover APIs with LK Cloud retries in alternative datacenters on 5xx and transport failures --- .github/workflows/test-api.yml | 51 ++++++ packages/livekit-server-sdk/package.json | 1 - .../src/AgentDispatchClient.ts | 8 +- .../livekit-server-sdk/src/ClientOptions.ts | 7 + .../livekit-server-sdk/src/ConnectorClient.ts | 8 +- .../livekit-server-sdk/src/EgressClient.ts | 8 +- .../livekit-server-sdk/src/IngressClient.ts | 8 +- .../src/RoomServiceClient.ts | 8 +- packages/livekit-server-sdk/src/SipClient.ts | 8 +- packages/livekit-server-sdk/src/TwirpRPC.ts | 139 +++++++++++----- packages/livekit-server-sdk/src/failover.ts | 155 ++++++++++++++++++ packages/livekit-server-sdk/src/index.ts | 2 + .../test/api/failover.test.ts | 78 +++++++++ pnpm-lock.yaml | 38 ----- 14 files changed, 418 insertions(+), 101 deletions(-) create mode 100644 .github/workflows/test-api.yml create mode 100644 packages/livekit-server-sdk/src/failover.ts create mode 100644 packages/livekit-server-sdk/test/api/failover.test.ts diff --git a/.github/workflows/test-api.yml b/.github/workflows/test-api.yml new file mode 100644 index 00000000..a5f456ba --- /dev/null +++ b/.github/workflows/test-api.yml @@ -0,0 +1,51 @@ +# SPDX-FileCopyrightText: 2026 LiveKit, Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +name: Test API + +permissions: + contents: read + +on: + workflow_dispatch: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + failover: + runs-on: ubuntu-latest + services: + mock-server: + image: livekit/test-server:latest + ports: + - 9999:9999 + - 10000:10000 + - 10001:10001 + - 10002:10002 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - uses: pnpm/action-setup@b906affcce14559ad1aafd4ab0e942779e9f58b1 # v4.3.0 + + - name: Setup Node.js + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: 24 + cache: pnpm + + - name: Install dependencies + run: pnpm install + + - name: Wait for mock server + run: | + for i in $(seq 1 30); do + curl -sf http://127.0.0.1:9999/settings/regions >/dev/null && exit 0 + sleep 1 + done + echo "mock server did not become ready" && exit 1 + + - name: Run API tests + run: pnpm --filter="livekit-server-sdk" exec vitest --environment node run test/api diff --git a/packages/livekit-server-sdk/package.json b/packages/livekit-server-sdk/package.json index c8b815ab..567d9702 100644 --- a/packages/livekit-server-sdk/package.json +++ b/packages/livekit-server-sdk/package.json @@ -45,7 +45,6 @@ "dependencies": { "@bufbuild/protobuf": "^1.10.1", "@livekit/protocol": "^1.46.6", - "camelcase-keys": "^9.0.0", "jose": "^5.1.2" }, "devDependencies": { diff --git a/packages/livekit-server-sdk/src/AgentDispatchClient.ts b/packages/livekit-server-sdk/src/AgentDispatchClient.ts index d97bc486..1df7c409 100644 --- a/packages/livekit-server-sdk/src/AgentDispatchClient.ts +++ b/packages/livekit-server-sdk/src/AgentDispatchClient.ts @@ -40,10 +40,10 @@ export class AgentDispatchClient extends ServiceBase { */ constructor(host: string, apiKey?: string, secret?: string, options?: ClientOptions) { super(apiKey, secret); - const rpcOptions = options?.requestTimeout - ? { requestTimeout: options.requestTimeout } - : undefined; - this.rpc = new TwirpRpc(host, livekitPackage, rpcOptions); + this.rpc = new TwirpRpc(host, livekitPackage, { + requestTimeout: options?.requestTimeout, + failover: options?.failover, + }); } /** diff --git a/packages/livekit-server-sdk/src/ClientOptions.ts b/packages/livekit-server-sdk/src/ClientOptions.ts index b2ef093f..757473ed 100644 --- a/packages/livekit-server-sdk/src/ClientOptions.ts +++ b/packages/livekit-server-sdk/src/ClientOptions.ts @@ -2,6 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 +import type { FailoverConfig } from './failover.js'; + /** * Options common to all clients */ @@ -10,4 +12,9 @@ export type ClientOptions = { * Optional timeout, in seconds, for all server requests */ requestTimeout?: number; + /** + * Region-failover behavior for API requests. Defaults to auto, which fails + * over to alternative regions for LiveKit Cloud hosts on retryable errors. + */ + failover?: FailoverConfig; }; diff --git a/packages/livekit-server-sdk/src/ConnectorClient.ts b/packages/livekit-server-sdk/src/ConnectorClient.ts index 8cf055e9..5a1be108 100644 --- a/packages/livekit-server-sdk/src/ConnectorClient.ts +++ b/packages/livekit-server-sdk/src/ConnectorClient.ts @@ -123,10 +123,10 @@ export class ConnectorClient extends ServiceBase { */ constructor(host: string, apiKey?: string, secret?: string, options?: ClientOptions) { super(apiKey, secret); - const rpcOptions = options?.requestTimeout - ? { requestTimeout: options.requestTimeout } - : undefined; - this.rpc = new TwirpRpc(host, livekitPackage, rpcOptions); + this.rpc = new TwirpRpc(host, livekitPackage, { + requestTimeout: options?.requestTimeout, + failover: options?.failover, + }); } /** diff --git a/packages/livekit-server-sdk/src/EgressClient.ts b/packages/livekit-server-sdk/src/EgressClient.ts index 5d6f97aa..69dec99c 100644 --- a/packages/livekit-server-sdk/src/EgressClient.ts +++ b/packages/livekit-server-sdk/src/EgressClient.ts @@ -142,10 +142,10 @@ export class EgressClient extends ServiceBase { */ constructor(host: string, apiKey?: string, secret?: string, options?: ClientOptions) { super(apiKey, secret); - const rpcOptions = options?.requestTimeout - ? { requestTimeout: options.requestTimeout } - : undefined; - this.rpc = new TwirpRpc(host, livekitPackage, rpcOptions); + this.rpc = new TwirpRpc(host, livekitPackage, { + requestTimeout: options?.requestTimeout, + failover: options?.failover, + }); } /** diff --git a/packages/livekit-server-sdk/src/IngressClient.ts b/packages/livekit-server-sdk/src/IngressClient.ts index f0198720..75934ee9 100644 --- a/packages/livekit-server-sdk/src/IngressClient.ts +++ b/packages/livekit-server-sdk/src/IngressClient.ts @@ -129,10 +129,10 @@ export class IngressClient extends ServiceBase { */ constructor(host: string, apiKey?: string, secret?: string, options?: ClientOptions) { super(apiKey, secret); - const rpcOptions = options?.requestTimeout - ? { requestTimeout: options.requestTimeout } - : undefined; - this.rpc = new TwirpRpc(host, livekitPackage, rpcOptions); + this.rpc = new TwirpRpc(host, livekitPackage, { + requestTimeout: options?.requestTimeout, + failover: options?.failover, + }); } /** diff --git a/packages/livekit-server-sdk/src/RoomServiceClient.ts b/packages/livekit-server-sdk/src/RoomServiceClient.ts index 8bdff547..59202dca 100644 --- a/packages/livekit-server-sdk/src/RoomServiceClient.ts +++ b/packages/livekit-server-sdk/src/RoomServiceClient.ts @@ -132,10 +132,10 @@ export class RoomServiceClient extends ServiceBase { */ constructor(host: string, apiKey?: string, secret?: string, options?: ClientOptions) { super(apiKey, secret); - const rpcOptions = options?.requestTimeout - ? { requestTimeout: options.requestTimeout } - : undefined; - this.rpc = new TwirpRpc(host, livekitPackage, rpcOptions); + this.rpc = new TwirpRpc(host, livekitPackage, { + requestTimeout: options?.requestTimeout, + failover: options?.failover, + }); } /** diff --git a/packages/livekit-server-sdk/src/SipClient.ts b/packages/livekit-server-sdk/src/SipClient.ts index 526e7d92..6fd50b2a 100644 --- a/packages/livekit-server-sdk/src/SipClient.ts +++ b/packages/livekit-server-sdk/src/SipClient.ts @@ -239,10 +239,10 @@ export class SipClient extends ServiceBase { */ constructor(host: string, apiKey?: string, secret?: string, options?: ClientOptions) { super(apiKey, secret); - const rpcOptions = options?.requestTimeout - ? { requestTimeout: options.requestTimeout } - : undefined; - this.rpc = new TwirpRpc(host, livekitPackage, rpcOptions); + this.rpc = new TwirpRpc(host, livekitPackage, { + requestTimeout: options?.requestTimeout, + failover: options?.failover, + }); } /** diff --git a/packages/livekit-server-sdk/src/TwirpRPC.ts b/packages/livekit-server-sdk/src/TwirpRPC.ts index f7f63d30..e55d9c3d 100644 --- a/packages/livekit-server-sdk/src/TwirpRPC.ts +++ b/packages/livekit-server-sdk/src/TwirpRPC.ts @@ -2,6 +2,15 @@ // // SPDX-License-Identifier: Apache-2.0 import type { JsonValue } from '@bufbuild/protobuf'; +import type { FailoverConfig, ResolvedFailover } from './failover.js'; +import { + failoverEnabled, + hostKey, + pickNext, + regionOrigins, + resolveFailover, + sleep, +} from './failover.js'; // twirp RPC adapter for client implementation @@ -10,6 +19,8 @@ type Options = { prefix?: string; /** Timeout for fetch requests, in seconds. Must be within the valid range for abort signal timeouts. */ requestTimeout?: number; + /** Region-failover behavior. Defaults to auto (enabled for LiveKit Cloud hosts). */ + failover?: FailoverConfig; }; const defaultPrefix = '/twirp'; @@ -58,6 +69,8 @@ export class TwirpRpc { requestTimeout: number; + failover: ResolvedFailover; + constructor(host: string, pkg: string, options?: Options) { if (host.startsWith('ws')) { host = host.replace('ws', 'http'); @@ -66,8 +79,16 @@ export class TwirpRpc { this.pkg = pkg; this.requestTimeout = options?.requestTimeout ?? defaultTimeoutSeconds; this.prefix = options?.prefix || defaultPrefix; + this.failover = resolveFailover(options?.failover); } + /** + * Issues a Twirp request, failing over to alternative regions on retryable + * errors. On any transport error or HTTP 5xx it discovers regions via + * /settings/regions and replays the request — body and headers intact — + * against the next untried region, with exponential backoff. A 4xx is + * returned immediately. + */ async request( service: string, method: string, @@ -77,52 +98,94 @@ export class TwirpRpc { // eslint-disable-next-line @typescript-eslint/no-explicit-any ): Promise { const path = `${this.prefix}/${this.pkg}.${service}/${method}`; - const url = new URL(path, this.host); - const init: RequestInit = { - method: 'POST', - headers: { - 'Content-Type': 'application/json;charset=UTF-8', - ...headers, - }, - body: JSON.stringify(data), + const body = JSON.stringify(data); + const requestHeaders = { + 'Content-Type': 'application/json;charset=UTF-8', + ...headers, }; - if (timeout) { - init.signal = AbortSignal.timeout(timeout * 1000); - } - - const response = await fetch(url, init); + const origin = new URL(this.host); + const enabled = failoverEnabled(this.failover, origin.hostname); + const maxAttempts = enabled ? Math.max(1, this.failover.maxAttempts) : 1; + const attempted = new Set([hostKey(origin)]); + let regions: string[] | undefined; + let current = this.host; + + for (let attempt = 0; attempt < maxAttempts; attempt += 1) { + const isLast = attempt + 1 >= maxAttempts; + const init: RequestInit = { method: 'POST', headers: requestHeaders, body }; + if (timeout) { + init.signal = AbortSignal.timeout(timeout * 1000); + } - if (!response.ok) { - const isJson = response.headers.get('content-type') === 'application/json'; - let errorMessage = 'Unknown internal error'; - let errorCode: string | undefined = undefined; - let metadata: Record | undefined = undefined; + let response: Response | undefined; + let transportError: unknown; try { - if (isJson) { - const parsedError = (await response.json()) as Record; - if ('msg' in parsedError) { - errorMessage = parsedError.msg; - } - if ('code' in parsedError) { - errorCode = parsedError.code; - } - if ('meta' in parsedError) { - metadata = >parsedError.meta; - } - } else { - errorMessage = await response.text(); - } + response = await fetch(new URL(path, current), init); } catch (e) { - // parsing went wrong, no op and we keep default error message - console.debug(`Error when trying to parse error message, using defaults`, e); + transportError = e; + } + + if (response?.ok) { + // Return the raw JSON. Every caller parses it with protobuf-es + // fromJson(), which per the proto3 JSON spec accepts both the proto + // field names (snake_case) and their json_name (camelCase), so no key + // conversion is needed. Converting keys would also corrupt map + // entries (e.g. participant attributes), whose keys are user data. + return (await response.json()) as Record; + } + + // Only retryable failures (a transport error or HTTP 5xx) continue; + // a 4xx is terminal. + const retryable = transportError !== undefined || (!!response && response.status >= 500); + let next: string | undefined; + if (retryable && !isLast) { + if (!regions) { + regions = await regionOrigins(origin, headers); + } + next = pickNext(regions, attempted); } - throw new TwirpError(response.statusText, errorMessage, response.status, errorCode, metadata); + if (!retryable || next === undefined) { + if (response) { + throw await toTwirpError(response); + } + throw transportError; + } + + await sleep(this.failover.backoffBase * 2 ** attempt); + attempted.add(hostKey(new URL(next))); + current = next; } - const parsedResp = (await response.json()) as Record; - const camelcaseKeys = await import('camelcase-keys').then((mod) => mod.default); - return camelcaseKeys(parsedResp, { deep: true }); + throw new Error('failover loop exited without returning'); // unreachable + } +} + +/** Builds a TwirpError from a non-2xx response, mirroring Twirp's JSON error shape. */ +async function toTwirpError(response: Response): Promise { + const isJson = response.headers.get('content-type') === 'application/json'; + let errorMessage = 'Unknown internal error'; + let errorCode: string | undefined = undefined; + let metadata: Record | undefined = undefined; + try { + if (isJson) { + const parsedError = (await response.json()) as Record; + if ('msg' in parsedError) { + errorMessage = parsedError.msg; + } + if ('code' in parsedError) { + errorCode = parsedError.code; + } + if ('meta' in parsedError) { + metadata = >parsedError.meta; + } + } else { + errorMessage = await response.text(); + } + } catch (e) { + // parsing went wrong, no op and we keep default error message + console.debug(`Error when trying to parse error message, using defaults`, e); } + return new TwirpError(response.statusText, errorMessage, response.status, errorCode, metadata); } diff --git a/packages/livekit-server-sdk/src/failover.ts b/packages/livekit-server-sdk/src/failover.ts new file mode 100644 index 00000000..4a63cc5d --- /dev/null +++ b/packages/livekit-server-sdk/src/failover.ts @@ -0,0 +1,155 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +// Region failover for the Twirp API clients. +// +// On a retryable failure (any transport error or HTTP 5xx) the client discovers +// alternative LiveKit Cloud regions via /settings/regions and replays the +// request against the next region, with exponential backoff. 4xx responses are +// returned immediately. + +/** Controls when API requests fail over to alternative regions. */ +export type FailoverMode = + /** Fail over only for LiveKit Cloud hosts. The default. */ + | 'auto' + /** Always fail over, regardless of host (primarily for tests). */ + | 'on' + /** Never fail over. */ + | 'off'; + +/** Tunes the region-failover retry loop. */ +export type FailoverConfig = { + /** Defaults to 'auto'. */ + mode?: FailoverMode; + /** Total attempts including the first — the original host plus `maxAttempts - 1` fallback regions. Defaults to 3. */ + maxAttempts?: number; + /** Milliseconds before the first retry; each subsequent retry doubles it. Defaults to 200. */ + backoffBase?: number; +}; + +export type ResolvedFailover = Required; + +export function resolveFailover(config?: FailoverConfig): ResolvedFailover { + return { + mode: config?.mode ?? 'auto', + maxAttempts: config?.maxAttempts ?? 3, + backoffBase: config?.backoffBase ?? 200, + }; +} + +export function failoverEnabled(config: ResolvedFailover, hostname: string): boolean { + switch (config.mode) { + case 'off': + return false; + case 'on': + return true; + default: + return isCloud(hostname); + } +} + +// Auto mode only enables failover for LiveKit Cloud project domains. +function isCloud(hostname: string): boolean { + return hostname.endsWith('.livekit.cloud'); +} + +/** Normalizes a region URL to an http(s) scheme (ws -> http, wss -> https). */ +function toHttp(url: string): string { + return url.startsWith('ws') ? `http${url.slice(2)}` : url; +} + +/** A stable key identifying a host (including port) for dedup across attempts. */ +export function hostKey(url: URL): string { + return url.host.toLowerCase(); +} + +/** Returns the first region origin whose host has not yet been attempted. */ +export function pickNext(regionOrigins: string[], attempted: Set): string | undefined { + for (const origin of regionOrigins) { + try { + if (!attempted.has(hostKey(new URL(origin)))) { + return origin; + } + } catch { + // skip malformed URLs + } + } + return undefined; +} + +export function sleep(ms: number): Promise { + return ms > 0 ? new Promise((resolve) => setTimeout(resolve, ms)) : Promise.resolve(); +} + +type CacheEntry = { + origins: string[]; + fetchedAt: number; + ttl: number; // ms +}; + +// Shared across all clients in the process so the region list is fetched once. +const regionCache = new Map(); + +/** + * Returns alternative region origins for `origin`, fetching /settings/regions + * if the cache is stale. Best-effort: on a fetch failure it serves a stale + * cached list when available, otherwise an empty list. Forwards `headers` so a + * valid token — and any test directives — reach the discovery endpoint. + */ +export async function regionOrigins(origin: URL, headers: unknown): Promise { + const key = hostKey(origin); + const cached = regionCache.get(key); + if (cached && Date.now() - cached.fetchedAt < cached.ttl) { + return cached.origins; + } + + try { + const { origins, ttl } = await fetchRegions(origin, headers); + // A zero TTL (e.g. Cache-Control: max-age=0) means "do not cache". + if (ttl > 0) { + regionCache.set(key, { origins, fetchedAt: Date.now(), ttl }); + } + return origins; + } catch { + return cached?.origins ?? []; + } +} + +async function fetchRegions( + origin: URL, + headers: unknown, +): Promise<{ origins: string[]; ttl: number }> { + // Forward the caller's headers (auth + any custom), minus body-specific ones. + const fetchHeaders: Record = {}; + for (const [k, v] of Object.entries((headers as Record) ?? {})) { + if (k.toLowerCase() === 'content-type' || k.toLowerCase() === 'content-length') continue; + fetchHeaders[k] = v; + } + + const response = await fetch(new URL('/settings/regions', origin.origin), { + method: 'GET', + headers: fetchHeaders, + }); + if (!response.ok) { + throw new Error(`region discovery failed: ${response.status}`); + } + const ttl = parseMaxAge(response.headers.get('cache-control')); + const body = (await response.json()) as { regions?: Array<{ url?: string }> }; + const origins = (body.regions ?? []) + .filter((r) => !!r.url) + .map((r) => new URL(toHttp(r.url!)).origin); + return { origins, ttl }; +} + +function parseMaxAge(cacheControl: string | null): number { + if (!cacheControl) return 0; + for (const directive of cacheControl.split(',')) { + const trimmed = directive.trim().toLowerCase(); + if (trimmed.startsWith('max-age=')) { + const secs = parseInt(trimmed.slice('max-age='.length), 10); + return Number.isFinite(secs) && secs > 0 ? secs * 1000 : 0; + } + } + return 0; +} diff --git a/packages/livekit-server-sdk/src/index.ts b/packages/livekit-server-sdk/src/index.ts index ef17e951..19ffcdfa 100644 --- a/packages/livekit-server-sdk/src/index.ts +++ b/packages/livekit-server-sdk/src/index.ts @@ -81,4 +81,6 @@ export * from './IngressClient.js'; export * from './RoomServiceClient.js'; export * from './SipClient.js'; export { TwirpError } from './TwirpRPC.js'; +export type { ClientOptions } from './ClientOptions.js'; +export type { FailoverConfig, FailoverMode } from './failover.js'; export * from './WebhookReceiver.js'; diff --git a/packages/livekit-server-sdk/test/api/failover.test.ts b/packages/livekit-server-sdk/test/api/failover.test.ts new file mode 100644 index 00000000..6be5a005 --- /dev/null +++ b/packages/livekit-server-sdk/test/api/failover.test.ts @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +// API tests against the shared mock LiveKit API server (livekit/livekit +// cmd/test-server). Point them at a running instance with LK_TEST_SERVER_URL +// (default http://127.0.0.1:9999); they skip when no server is reachable. In CI +// the server is booted as a Docker container. +// +// See cmd/test-server/README.md for the X-Lk-Mock-* control protocol. These +// tests drive TwirpRpc.request() directly because the public service methods do +// not expose per-call headers. +import { describe, expect, it } from 'vitest'; +import type { FailoverConfig } from '../../src/failover.js'; +import { TwirpError, TwirpRpc, livekitPackage } from '../../src/TwirpRPC.js'; + +const BASE = process.env.LK_TEST_SERVER_URL ?? 'http://127.0.0.1:9999'; + +let reachable = false; +try { + reachable = (await fetch(`${BASE}/settings/regions`)).ok; +} catch { + reachable = false; +} + +const cfg = (mode: FailoverConfig['mode'] = 'on'): FailoverConfig => ({ + mode, + maxAttempts: 3, + backoffBase: 1, +}); + +const call = (directives: Record, mode: FailoverConfig['mode'] = 'on') => { + const rpc = new TwirpRpc(BASE, livekitPackage, { failover: cfg(mode) }); + return rpc.request('RoomService', 'CreateRoom', {}, { + authorization: 'Bearer test-token', + ...directives, + }); +}; + +(reachable ? describe : describe.skip)('region failover', () => { + it('succeeds on the primary when healthy', async () => { + await expect(call({})).resolves.toBeDefined(); + }); + + it('fails over to a healthy region when the primary is down', async () => { + await expect(call({ 'x-lk-mock-fail-regions': '0' })).resolves.toBeDefined(); + }); + + it('fails over to region 2 on the third attempt', async () => { + await expect(call({ 'x-lk-mock-fail-regions': '0,1' })).resolves.toBeDefined(); + }); + + it('surfaces an error when all regions are down', async () => { + await expect(call({ 'x-lk-mock-fail-regions': '0,1,2,3' })).rejects.toThrow(TwirpError); + }); + + it('does not retry a 4xx', async () => { + await expect( + call({ 'x-lk-mock-fail-regions': '0', 'x-lk-mock-fail-status': '400' }), + ).rejects.toMatchObject({ code: 'invalid_argument' }); + }); + + it('fails over on a transport error', async () => { + await expect( + call({ 'x-lk-mock-fail-regions': '0', 'x-lk-mock-fail-mode': 'drop' }), + ).resolves.toBeDefined(); + }); + + it('surfaces the original error when region discovery is unreachable', async () => { + await expect( + call({ 'x-lk-mock-fail-regions': '0', 'x-lk-mock-regions-status': '500' }), + ).rejects.toThrow(TwirpError); + }); + + it('does not fail over for a non-cloud host in auto mode', async () => { + await expect(call({ 'x-lk-mock-fail-regions': '0' }, 'auto')).rejects.toThrow(TwirpError); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2d73d82a..6ac6320d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -249,9 +249,6 @@ importers: '@livekit/protocol': specifier: ^1.46.6 version: 1.46.6 - camelcase-keys: - specifier: ^9.0.0 - version: 9.1.3 jose: specifier: ^5.1.2 version: 5.9.6 @@ -1840,14 +1837,6 @@ packages: resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} engines: {node: '>=6'} - camelcase-keys@9.1.3: - resolution: {integrity: sha512-Rircqi9ch8AnZscQcsA1C47NFdaO3wukpmIRzYcDOrmvgt78hM/sj5pZhZNec2NM12uk5vTwRHZ4anGcrC4ZTg==} - engines: {node: '>=16'} - - camelcase@8.0.0: - resolution: {integrity: sha512-8WB3Jcas3swSvjIeA2yvCJ+Miyz5l1ZmB6HFb9R1317dt9LCQoswg/BGrmAmkWVEszSrrg4RwmO46qIm2OEnSA==} - engines: {node: '>=16'} - caniuse-lite@1.0.30001629: resolution: {integrity: sha512-c3dl911slnQhmxUIT4HhYzT7wnBK/XYpGnYLOj4nJBaRiw52Ibe7YxlDaAeRECvA786zCuExhxIUJ2K7nHMrBw==} @@ -2796,10 +2785,6 @@ packages: magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} - map-obj@5.0.0: - resolution: {integrity: sha512-2L3MIgJynYrZ3TYMriLDLWocz15okFakV6J12HXvMXDHui2x/zgChzg1u9mFFGbbGWE+GsLpQByt4POb9Or+uA==} - engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} - markdown-it@14.1.0: resolution: {integrity: sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==} hasBin: true @@ -3140,10 +3125,6 @@ packages: quick-format-unescaped@4.0.4: resolution: {integrity: sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==} - quick-lru@6.1.2: - resolution: {integrity: sha512-AAFUA5O1d83pIHEhJwWCq/RQcRukCkn/NSm2QsTEMle5f2hP0ChI2+3Xb051PZCkLryI/Ir1MVKviT2FIloaTQ==} - engines: {node: '>=12'} - react-dom@19.2.7: resolution: {integrity: sha512-t0BRVXvbiE/o20Hfw669rLbMCDWtYZLvmJigy2f0MxsXF+71pxhR3xOkspmsO8h3ZlNzyibAmtCa3l4lYKk6gQ==} peerDependencies: @@ -3541,10 +3522,6 @@ packages: resolution: {integrity: sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==} engines: {node: '>=10'} - type-fest@4.20.0: - resolution: {integrity: sha512-MBh+PHUHHisjXf4tlx0CFWoMdjx8zCMLJHOjnV1prABYZFHqtFOyauCIK2/7w4oIfwkF8iNhLtnJEfVY2vn3iw==} - engines: {node: '>=16'} - typed-array-buffer@1.0.3: resolution: {integrity: sha512-nAYYwfY3qnzX30IkA6AQZjVbtK6duGontcQm1WSG1MD94YLqK0515GNApXkoxKOWMusVssAHWLh9SeaoefYFGw==} engines: {node: '>= 0.4'} @@ -5230,15 +5207,6 @@ snapshots: callsites@3.1.0: {} - camelcase-keys@9.1.3: - dependencies: - camelcase: 8.0.0 - map-obj: 5.0.0 - quick-lru: 6.1.2 - type-fest: 4.20.0 - - camelcase@8.0.0: {} - caniuse-lite@1.0.30001629: {} chai@6.2.2: {} @@ -6362,8 +6330,6 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 - map-obj@5.0.0: {} - markdown-it@14.1.0: dependencies: argparse: 2.0.1 @@ -6695,8 +6661,6 @@ snapshots: quick-format-unescaped@4.0.4: {} - quick-lru@6.1.2: {} - react-dom@19.2.7(react@19.2.7): dependencies: react: 19.2.7 @@ -7182,8 +7146,6 @@ snapshots: type-fest@0.20.2: {} - type-fest@4.20.0: {} - typed-array-buffer@1.0.3: dependencies: call-bound: 1.0.4 From 6480f0afcc62bca931fdf45a3f94559769b77ee3 Mon Sep 17 00:00:00 2001 From: David Zhao Date: Sat, 27 Jun 2026 14:42:35 -0700 Subject: [PATCH 2/5] Add auto failover APIs for LK Cloud Add auto failover APIs for LK Cloud in livekit-server-sdk. --- .changeset/wet-dryers-matter.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/wet-dryers-matter.md diff --git a/.changeset/wet-dryers-matter.md b/.changeset/wet-dryers-matter.md new file mode 100644 index 00000000..f888d3ce --- /dev/null +++ b/.changeset/wet-dryers-matter.md @@ -0,0 +1,5 @@ +--- +"livekit-server-sdk": patch +--- + +feat: auto failover APIs with LK Cloud From b7e7dbeedb2734604a852a81be0d2bf3cec3d361 Mon Sep 17 00:00:00 2001 From: David Zhao Date: Sat, 27 Jun 2026 23:43:34 +0200 Subject: [PATCH 3/5] fixed formatting --- packages/livekit-server-sdk/src/ClientOptions.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/livekit-server-sdk/src/ClientOptions.ts b/packages/livekit-server-sdk/src/ClientOptions.ts index 757473ed..1ff117ae 100644 --- a/packages/livekit-server-sdk/src/ClientOptions.ts +++ b/packages/livekit-server-sdk/src/ClientOptions.ts @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. // // SPDX-License-Identifier: Apache-2.0 - import type { FailoverConfig } from './failover.js'; /** From 4f2925cac7e5af48b7b50310f952424101dfcf12 Mon Sep 17 00:00:00 2001 From: David Zhao Date: Sun, 28 Jun 2026 00:25:47 +0200 Subject: [PATCH 4/5] simplified API --- .../livekit-server-sdk/src/ClientOptions.ts | 5 +- packages/livekit-server-sdk/src/TwirpRPC.ts | 15 +++-- packages/livekit-server-sdk/src/failover.ts | 58 +++++++++---------- packages/livekit-server-sdk/src/index.ts | 2 +- .../test/api/failover.test.ts | 32 +++++++--- 5 files changed, 62 insertions(+), 50 deletions(-) diff --git a/packages/livekit-server-sdk/src/ClientOptions.ts b/packages/livekit-server-sdk/src/ClientOptions.ts index 1ff117ae..0d338e08 100644 --- a/packages/livekit-server-sdk/src/ClientOptions.ts +++ b/packages/livekit-server-sdk/src/ClientOptions.ts @@ -12,8 +12,9 @@ export type ClientOptions = { */ requestTimeout?: number; /** - * Region-failover behavior for API requests. Defaults to auto, which fails - * over to alternative regions for LiveKit Cloud hosts on retryable errors. + * Region-failover tuning for API requests. Omit it (the default) to enable + * failover for LiveKit Cloud hosts only; pass a config to enable it for any + * host. Set `maxAttempts: 1` to disable. */ failover?: FailoverConfig; }; diff --git a/packages/livekit-server-sdk/src/TwirpRPC.ts b/packages/livekit-server-sdk/src/TwirpRPC.ts index e55d9c3d..9713fe76 100644 --- a/packages/livekit-server-sdk/src/TwirpRPC.ts +++ b/packages/livekit-server-sdk/src/TwirpRPC.ts @@ -2,13 +2,13 @@ // // SPDX-License-Identifier: Apache-2.0 import type { JsonValue } from '@bufbuild/protobuf'; -import type { FailoverConfig, ResolvedFailover } from './failover.js'; +import type { FailoverConfig } from './failover.js'; import { - failoverEnabled, + failoverBackoffBase, + failoverMaxAttempts, hostKey, pickNext, regionOrigins, - resolveFailover, sleep, } from './failover.js'; @@ -69,7 +69,7 @@ export class TwirpRpc { requestTimeout: number; - failover: ResolvedFailover; + failover: FailoverConfig | undefined; constructor(host: string, pkg: string, options?: Options) { if (host.startsWith('ws')) { @@ -79,7 +79,7 @@ export class TwirpRpc { this.pkg = pkg; this.requestTimeout = options?.requestTimeout ?? defaultTimeoutSeconds; this.prefix = options?.prefix || defaultPrefix; - this.failover = resolveFailover(options?.failover); + this.failover = options?.failover; } /** @@ -105,8 +105,7 @@ export class TwirpRpc { }; const origin = new URL(this.host); - const enabled = failoverEnabled(this.failover, origin.hostname); - const maxAttempts = enabled ? Math.max(1, this.failover.maxAttempts) : 1; + const maxAttempts = failoverMaxAttempts(this.failover, origin.hostname); const attempted = new Set([hostKey(origin)]); let regions: string[] | undefined; let current = this.host; @@ -153,7 +152,7 @@ export class TwirpRpc { throw transportError; } - await sleep(this.failover.backoffBase * 2 ** attempt); + await sleep(failoverBackoffBase(this.failover) * 2 ** attempt); attempted.add(hostKey(new URL(next))); current = next; } diff --git a/packages/livekit-server-sdk/src/failover.ts b/packages/livekit-server-sdk/src/failover.ts index 4a63cc5d..b9bd2c41 100644 --- a/packages/livekit-server-sdk/src/failover.ts +++ b/packages/livekit-server-sdk/src/failover.ts @@ -9,47 +9,45 @@ // request against the next region, with exponential backoff. 4xx responses are // returned immediately. -/** Controls when API requests fail over to alternative regions. */ -export type FailoverMode = - /** Fail over only for LiveKit Cloud hosts. The default. */ - | 'auto' - /** Always fail over, regardless of host (primarily for tests). */ - | 'on' - /** Never fail over. */ - | 'off'; - -/** Tunes the region-failover retry loop. */ +/** + * Region-failover tuning, passed as the `failover` option. Omit it (default) to + * enable failover for LiveKit Cloud hosts only; pass a config to enable it for + * any host. + */ export type FailoverConfig = { - /** Defaults to 'auto'. */ - mode?: FailoverMode; - /** Total attempts including the first — the original host plus `maxAttempts - 1` fallback regions. Defaults to 3. */ + /** + * Total number of attempts including the initial request — the original host + * plus up to `maxAttempts - 1` fallback regions. Defaults to 3. Set to 1 to + * disable failover (a single attempt). + */ maxAttempts?: number; /** Milliseconds before the first retry; each subsequent retry doubles it. Defaults to 200. */ backoffBase?: number; }; -export type ResolvedFailover = Required; +const DEFAULT_MAX_ATTEMPTS = 3; +const DEFAULT_BACKOFF_BASE = 200; -export function resolveFailover(config?: FailoverConfig): ResolvedFailover { - return { - mode: config?.mode ?? 'auto', - maxAttempts: config?.maxAttempts ?? 3, - backoffBase: config?.backoffBase ?? 200, - }; +/** + * Total request attempts including the initial one; 1 means no failover. With no + * config (`undefined`) failover is enabled only for LiveKit Cloud hosts; an + * explicit config enables it for any host (`maxAttempts: 1` disables it). + */ +export function failoverMaxAttempts( + config: FailoverConfig | undefined, + hostname: string, +): number { + if (config === undefined) { + return isCloud(hostname) ? DEFAULT_MAX_ATTEMPTS : 1; + } + return Math.max(1, config.maxAttempts ?? DEFAULT_MAX_ATTEMPTS); } -export function failoverEnabled(config: ResolvedFailover, hostname: string): boolean { - switch (config.mode) { - case 'off': - return false; - case 'on': - return true; - default: - return isCloud(hostname); - } +export function failoverBackoffBase(config: FailoverConfig | undefined): number { + return config?.backoffBase ?? DEFAULT_BACKOFF_BASE; } -// Auto mode only enables failover for LiveKit Cloud project domains. +// The default (no config) only enables failover for LiveKit Cloud project domains. function isCloud(hostname: string): boolean { return hostname.endsWith('.livekit.cloud'); } diff --git a/packages/livekit-server-sdk/src/index.ts b/packages/livekit-server-sdk/src/index.ts index 19ffcdfa..db437402 100644 --- a/packages/livekit-server-sdk/src/index.ts +++ b/packages/livekit-server-sdk/src/index.ts @@ -82,5 +82,5 @@ export * from './RoomServiceClient.js'; export * from './SipClient.js'; export { TwirpError } from './TwirpRPC.js'; export type { ClientOptions } from './ClientOptions.js'; -export type { FailoverConfig, FailoverMode } from './failover.js'; +export type { FailoverConfig } from './failover.js'; export * from './WebhookReceiver.js'; diff --git a/packages/livekit-server-sdk/test/api/failover.test.ts b/packages/livekit-server-sdk/test/api/failover.test.ts index 6be5a005..2fa301bc 100644 --- a/packages/livekit-server-sdk/test/api/failover.test.ts +++ b/packages/livekit-server-sdk/test/api/failover.test.ts @@ -23,14 +23,15 @@ try { reachable = false; } -const cfg = (mode: FailoverConfig['mode'] = 'on'): FailoverConfig => ({ - mode, - maxAttempts: 3, - backoffBase: 1, -}); +// An explicit config enables failover on any host (the non-cloud mock) with a +// tiny backoff so the tests run fast. +const FORCED: FailoverConfig = { maxAttempts: 3, backoffBase: 1 }; -const call = (directives: Record, mode: FailoverConfig['mode'] = 'on') => { - const rpc = new TwirpRpc(BASE, livekitPackage, { failover: cfg(mode) }); +const call = ( + directives: Record, + failover: FailoverConfig | undefined = FORCED, +) => { + const rpc = new TwirpRpc(BASE, livekitPackage, { failover }); return rpc.request('RoomService', 'CreateRoom', {}, { authorization: 'Bearer test-token', ...directives, @@ -72,7 +73,20 @@ const call = (directives: Record, mode: FailoverConfig['mode'] = ).rejects.toThrow(TwirpError); }); - it('does not fail over for a non-cloud host in auto mode', async () => { - await expect(call({ 'x-lk-mock-fail-regions': '0' }, 'auto')).rejects.toThrow(TwirpError); + it('does not fail over for a non-cloud host by default', async () => { + // No failover option => undefined => auto (cloud-only); 127.0.0.1 is not cloud. + const rpc = new TwirpRpc(BASE, livekitPackage); + await expect( + rpc.request('RoomService', 'CreateRoom', {}, { + authorization: 'Bearer test-token', + 'x-lk-mock-fail-regions': '0', + }), + ).rejects.toThrow(TwirpError); + }); + + it('does not fail over when disabled with maxAttempts 1', async () => { + await expect(call({ 'x-lk-mock-fail-regions': '0' }, { maxAttempts: 1 })).rejects.toThrow( + TwirpError, + ); }); }); From 0619fe113a92c31e5415905f2dc2c998d6ea34fb Mon Sep 17 00:00:00 2001 From: David Zhao Date: Sun, 28 Jun 2026 00:26:03 +0200 Subject: [PATCH 5/5] fmt --- packages/livekit-server-sdk/src/failover.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/livekit-server-sdk/src/failover.ts b/packages/livekit-server-sdk/src/failover.ts index b9bd2c41..8b94f2e2 100644 --- a/packages/livekit-server-sdk/src/failover.ts +++ b/packages/livekit-server-sdk/src/failover.ts @@ -33,10 +33,7 @@ const DEFAULT_BACKOFF_BASE = 200; * config (`undefined`) failover is enabled only for LiveKit Cloud hosts; an * explicit config enables it for any host (`maxAttempts: 1` disables it). */ -export function failoverMaxAttempts( - config: FailoverConfig | undefined, - hostname: string, -): number { +export function failoverMaxAttempts(config: FailoverConfig | undefined, hostname: string): number { if (config === undefined) { return isCloud(hostname) ? DEFAULT_MAX_ATTEMPTS : 1; }