From 47e27386dc6e8c183e7c6210bb0e5d108ffa2bb4 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Mon, 16 Mar 2026 17:23:41 +0000
Subject: [PATCH 01/49] docs: ai chat.task

---
 docs/ai-chat/backend.mdx           | 856 +++++++++++++++++++++++++++++
 docs/ai-chat/features.mdx          | 421 ++++++++++++++
 docs/ai-chat/frontend.mdx          | 234 ++++++++
 docs/ai-chat/overview.mdx          | 161 ++++++
 docs/ai-chat/quick-start.mdx       | 108 ++++
 docs/ai-chat/reference.mdx         | 257 +++++++++
 docs/docs.json                     |  15 +
 references/ai-chat/ARCHITECTURE.md | 311 +++++++++++
 8 files changed, 2363 insertions(+)
 create mode 100644 docs/ai-chat/backend.mdx
 create mode 100644 docs/ai-chat/features.mdx
 create mode 100644 docs/ai-chat/frontend.mdx
 create mode 100644 docs/ai-chat/overview.mdx
 create mode 100644 docs/ai-chat/quick-start.mdx
 create mode 100644 docs/ai-chat/reference.mdx
 create mode 100644 references/ai-chat/ARCHITECTURE.md

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
new file mode 100644
index 00000000000..5c21e88ee65
--- /dev/null
+++ b/docs/ai-chat/backend.mdx
@@ -0,0 +1,856 @@
+---
+title: "Backend"
+sidebarTitle: "Backend"
+description: "Three approaches to building your chat backend — chat.task(), session iterator, or raw task primitives."
+---
+
+## chat.task()
+
+The highest-level approach. Handles message accumulation, stop signals, turn lifecycle, and auto-piping automatically.
+
+### Simple: return a StreamTextResult
+
+Return the `streamText` result from `run` and it's automatically piped to the frontend:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const simpleChat = chat.task({
+  id: "simple-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      system: "You are a helpful assistant.",
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### Using chat.pipe() for complex flows
+
+For complex agent flows where `streamText` is called deep inside your code, use `chat.pipe()`. It works from **anywhere inside a task** — even nested function calls.
+
+```ts trigger/agent-chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import type { ModelMessage } from "ai";
+
+export const agentChat = chat.task({
+  id: "agent-chat",
+  run: async ({ messages }) => {
+    // Don't return anything — chat.pipe is called inside
+    await runAgentLoop(messages);
+  },
+});
+
+async function runAgentLoop(messages: ModelMessage[]) {
+  // ... agent logic, tool calls, etc.
+
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages,
+  });
+
+  // Pipe from anywhere — no need to return it
+  await chat.pipe(result);
+}
+```
+
+### Lifecycle hooks
+
+#### onPreload
+
+Fires when a preloaded run starts — before any messages arrive. Use it to eagerly initialize state (DB records, user context) while the user is still typing.
+
+Preloaded runs are triggered by calling `transport.preload(chatId)` on the frontend. See [Preload](/ai-chat/features#preload) for details.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onPreload: async ({ chatId, clientData, runId, chatAccessToken }) => {
+    // Initialize early — before the first message arrives
+    const user = await db.user.findUnique({ where: { id: clientData.userId } });
+    userContext.init({ name: user.name, plan: user.plan });
+
+    await db.chat.create({ data: { id: chatId, userId: clientData.userId } });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  onChatStart: async ({ preloaded }) => {
+    if (preloaded) return; // Already initialized in onPreload
+    // ... non-preloaded initialization
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `chatId` | `string` | Chat session ID |
+| `runId` | `string` | The Trigger.dev run ID |
+| `chatAccessToken` | `string` | Scoped access token for this run |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
+
+#### onChatStart
+
+Fires once on the first turn (turn 0) before `run()` executes. Use it to create a chat record in your database.
+
+The `continuation` field tells you whether this is a brand new chat or a continuation of an existing one (where the previous run timed out or was cancelled). The `preloaded` field tells you whether `onPreload` already ran.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onChatStart: async ({ chatId, clientData, continuation, preloaded }) => {
+    if (preloaded) return; // Already set up in onPreload
+    if (continuation) return; // Chat record already exists
+
+    const { userId } = clientData as { userId: string };
+    await db.chat.create({
+      data: { id: chatId, userId, title: "New chat" },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  `clientData` contains custom data from the frontend — either the `clientData` option on the transport constructor (sent with every message) or the `metadata` option on `sendMessage()` (per-message). See [Client data and metadata](/ai-chat/frontend#client-data-and-metadata).
+</Tip>
+
+#### onTurnStart
+
+Fires at the start of every turn, after message accumulation and `onChatStart` (turn 0), but **before** `run()` executes. Use it to persist messages before streaming begins — so a mid-stream page refresh still shows the user's message.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `chatId` | `string` | Chat session ID |
+| `messages` | `ModelMessage[]` | Full accumulated conversation (model format) |
+| `uiMessages` | `UIMessage[]` | Full accumulated conversation (UI format) |
+| `turn` | `number` | Turn number (0-indexed) |
+| `runId` | `string` | The Trigger.dev run ID |
+| `chatAccessToken` | `string` | Scoped access token for this run |
+| `continuation` | `boolean` | Whether this run is continuing an existing chat |
+| `preloaded` | `boolean` | Whether this run was preloaded |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnStart: async ({ chatId, uiMessages, runId, chatAccessToken }) => {
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  By persisting in `onTurnStart`, the user's message is saved to your database before the AI starts streaming. If the user refreshes mid-stream, the message is already there.
+</Tip>
+
+#### onTurnComplete
+
+Fires after each turn completes — after the response is captured, before waiting for the next message. This is the primary hook for persisting the assistant's response.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `chatId` | `string` | Chat session ID |
+| `messages` | `ModelMessage[]` | Full accumulated conversation (model format) |
+| `uiMessages` | `UIMessage[]` | Full accumulated conversation (UI format) |
+| `newMessages` | `ModelMessage[]` | Only this turn's messages (model format) |
+| `newUIMessages` | `UIMessage[]` | Only this turn's messages (UI format) |
+| `responseMessage` | `UIMessage \| undefined` | The assistant's response for this turn |
+| `turn` | `number` | Turn number (0-indexed) |
+| `runId` | `string` | The Trigger.dev run ID |
+| `chatAccessToken` | `string` | Scoped access token for this run |
+| `lastEventId` | `string \| undefined` | Stream position for resumption. Persist this with the session. |
+| `stopped` | `boolean` | Whether the user stopped generation during this turn |
+| `continuation` | `boolean` | Whether this run is continuing an existing chat |
+| `rawResponseMessage` | `UIMessage \| undefined` | The raw assistant response before abort cleanup (same as `responseMessage` when not stopped) |
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken, lastEventId },
+      update: { runId, publicAccessToken: chatAccessToken, lastEventId },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  Use `uiMessages` to overwrite the full conversation each turn (simplest). Use `newUIMessages` if you prefer to store messages individually — for example, one database row per message.
+</Tip>
+
+<Tip>
+  Persist `lastEventId` alongside the session. When the transport reconnects after a page refresh, it uses this to skip past already-seen events — preventing duplicate messages.
+</Tip>
+
+### Stop generation
+
+#### How stop works
+
+Calling `stop()` from `useChat` sends a stop signal to the running task via input streams. The task's `streamText` call aborts (if you passed `signal` or `stopSignal`), but the **run stays alive** and waits for the next message. The partial response is captured and accumulated normally.
+
+#### Abort signals
+
+The `run` function receives three abort signals:
+
+| Signal | Fires when | Use for |
+|--------|-----------|---------|
+| `signal` | Stop **or** cancel | Pass to `streamText` — handles both cases. **Use this in most cases.** |
+| `stopSignal` | Stop only (per-turn, reset each turn) | Custom logic that should only run on user stop, not cancellation |
+| `cancelSignal` | Run cancel, expire, or maxDuration exceeded | Cleanup that should only happen on full cancellation |
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ messages, signal, stopSignal, cancelSignal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal, // Handles both stop and cancel
+    });
+  },
+});
+```
+
+<Tip>
+  Use `signal` (the combined signal) in most cases. The separate `stopSignal` and `cancelSignal` are only needed if you want different behavior for stop vs cancel.
+</Tip>
+
+#### Detecting stop in callbacks
+
+The `onTurnComplete` event includes a `stopped` boolean that indicates whether the user stopped generation during that turn:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ chatId, uiMessages, stopped }) => {
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages, lastStoppedAt: stopped ? new Date() : undefined },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+You can also check stop status from **anywhere** during a turn using `chat.isStopped()`. This is useful inside `streamText`'s `onFinish` callback where the AI SDK's `isAborted` flag can be unreliable (e.g. when using `createUIMessageStream` + `writer.merge()`):
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+      onFinish: ({ isAborted }) => {
+        // isAborted may be false even after stop when using createUIMessageStream
+        const wasStopped = isAborted || chat.isStopped();
+        if (wasStopped) {
+          // handle stop — e.g. log analytics
+        }
+      },
+    });
+  },
+});
+```
+
+#### Cleaning up aborted messages
+
+When stop happens mid-stream, the captured response message can contain parts in an incomplete state — tool calls stuck in `partial-call`, reasoning blocks still marked as `streaming`, etc. These can cause UI issues like permanent spinners.
+
+`chat.task` automatically cleans up the `responseMessage` when stop is detected before passing it to `onTurnComplete`. If you use `chat.pipe()` manually and capture response messages yourself, use `chat.cleanupAbortedParts()`:
+
+```ts
+const cleaned = chat.cleanupAbortedParts(rawResponseMessage);
+```
+
+This removes tool invocation parts stuck in `partial-call` state and marks any `streaming` text or reasoning parts as `done`.
+
+<Note>
+  Stop signal delivery is best-effort. There is a small race window where the model may finish before the stop signal arrives, in which case the turn completes normally with `stopped: false`. This is expected and does not require special handling.
+</Note>
+
+### Persistence
+
+#### What needs to be persisted
+
+To build a chat app that survives page refreshes, you need to persist two things:
+
+1. **Messages** — The conversation history. Persisted **server-side** in the task via `onTurnStart` and `onTurnComplete`.
+2. **Sessions** — The transport's connection state (`runId`, `publicAccessToken`, `lastEventId`). Persisted **server-side** via `onTurnStart` and `onTurnComplete`.
+
+<Note>
+  Sessions let the transport reconnect to an existing run after a page refresh. Without them, every page load would start a new run — losing the conversation context that was accumulated in the previous run.
+</Note>
+
+#### Full persistence example
+
+<CodeGroup>
+```ts trigger/chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+import { db } from "@/lib/db";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({
+    userId: z.string(),
+  }),
+  onChatStart: async ({ chatId, clientData }) => {
+    await db.chat.create({
+      data: { id: chatId, userId: clientData.userId, title: "New chat", messages: [] },
+    });
+  },
+  onTurnStart: async ({ chatId, uiMessages, runId, chatAccessToken }) => {
+    // Persist messages + session before streaming
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken },
+      update: { runId, publicAccessToken: chatAccessToken },
+    });
+  },
+  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+    // Persist assistant response + stream position
+    await db.chat.update({
+      where: { id: chatId },
+      data: { messages: uiMessages },
+    });
+    await db.chatSession.upsert({
+      where: { id: chatId },
+      create: { id: chatId, runId, publicAccessToken: chatAccessToken, lastEventId },
+      update: { runId, publicAccessToken: chatAccessToken, lastEventId },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+```ts app/actions.ts
+"use server";
+
+import { chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "@/trigger/chat";
+import { db } from "@/lib/db";
+
+export const getChatToken = () =>
+  chat.createAccessToken<typeof myChat>("my-chat");
+
+export async function getChatMessages(chatId: string) {
+  const found = await db.chat.findUnique({ where: { id: chatId } });
+  return found?.messages ?? [];
+}
+
+export async function getAllSessions() {
+  const sessions = await db.chatSession.findMany();
+  const result: Record<string, {
+    runId: string;
+    publicAccessToken: string;
+    lastEventId?: string;
+  }> = {};
+  for (const s of sessions) {
+    result[s.id] = {
+      runId: s.runId,
+      publicAccessToken: s.publicAccessToken,
+      lastEventId: s.lastEventId ?? undefined,
+    };
+  }
+  return result;
+}
+
+export async function deleteSession(chatId: string) {
+  await db.chatSession.delete({ where: { id: chatId } }).catch(() => {});
+}
+```
+
+```tsx app/components/chat.tsx
+"use client";
+
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+import { getChatToken, deleteSession } from "@/app/actions";
+
+export function Chat({ chatId, initialMessages, initialSessions }) {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: getChatToken,
+    clientData: { userId: currentUser.id }, // Type-checked against clientDataSchema
+    sessions: initialSessions,
+    onSessionChange: (id, session) => {
+      if (!session) deleteSession(id);
+    },
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    messages: initialMessages,
+    transport,
+    resume: initialMessages.length > 0,
+  });
+
+  return (
+    <div>
+      {messages.map((m) => (
+        <div key={m.id}>
+          <strong>{m.role}:</strong>
+          {m.parts.map((part, i) =>
+            part.type === "text" ? <span key={i}>{part.text}</span> : null
+          )}
+        </div>
+      ))}
+
+      <form
+        onSubmit={(e) => {
+          e.preventDefault();
+          const input = e.currentTarget.querySelector("input");
+          if (input?.value) {
+            sendMessage({ text: input.value });
+            input.value = "";
+          }
+        }}
+      >
+        <input placeholder="Type a message..." />
+        <button type="submit" disabled={status === "streaming"}>
+          Send
+        </button>
+        {status === "streaming" && (
+          <button type="button" onClick={stop}>Stop</button>
+        )}
+      </form>
+    </div>
+  );
+}
+```
+</CodeGroup>
+
+### Runtime configuration
+
+#### chat.setTurnTimeout()
+
+Override how long the run stays suspended waiting for the next message. Call from inside `run()`:
+
+```ts
+run: async ({ messages, signal }) => {
+  chat.setTurnTimeout("2h"); // Wait longer for this conversation
+  return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+},
+```
+
+#### chat.setWarmTimeoutInSeconds()
+
+Override how long the run stays warm (active, using compute) after each turn:
+
+```ts
+run: async ({ messages, signal }) => {
+  chat.setWarmTimeoutInSeconds(60); // Stay warm for 1 minute
+  return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+},
+```
+
+<Info>
+  Longer warm timeout means faster responses but more compute usage. Set to `0` to suspend immediately after each turn (minimum latency cost, slight delay on next message).
+</Info>
+
+#### Stream options
+
+Control how `streamText` results are converted to the frontend stream via `toUIMessageStream()`. Set static defaults on the task, or override per-turn.
+
+##### Error handling with onError
+
+When `streamText` encounters an error mid-stream (rate limits, API failures, network errors), the `onError` callback converts it to a string that's sent to the frontend as an `{ type: "error", errorText }` chunk. The AI SDK's `useChat` receives this via its `onError` callback.
+
+By default, the raw error message is sent to the frontend. Use `onError` to sanitize errors and avoid leaking internal details:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    onError: (error) => {
+      // Log the full error server-side for debugging
+      console.error("Stream error:", error);
+      // Return a sanitized message — this is what the frontend sees
+      if (error instanceof Error && error.message.includes("rate limit")) {
+        return "Rate limited — please wait a moment and try again.";
+      }
+      return "Something went wrong. Please try again.";
+    },
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+`onError` is also called for tool execution errors, so a single handler covers both LLM errors and tool failures.
+
+On the frontend, handle the error in `useChat`:
+
+```tsx
+const { messages, sendMessage } = useChat({
+  transport,
+  onError: (error) => {
+    // error.message contains the string returned by your onError handler
+    toast.error(error.message);
+  },
+});
+```
+
+##### Reasoning and sources
+
+Control which AI SDK features are forwarded to the frontend:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    sendReasoning: true,  // Forward model reasoning (default: true)
+    sendSources: true,    // Forward source citations (default: false)
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+##### Per-turn overrides
+
+Override per-turn with `chat.setUIMessageStreamOptions()` — per-turn values merge with the static config (per-turn wins on conflicts). The override is cleared automatically after each turn.
+
+```ts
+run: async ({ messages, clientData, signal }) => {
+  // Enable reasoning only for certain models
+  if (clientData.model?.includes("claude")) {
+    chat.setUIMessageStreamOptions({ sendReasoning: true });
+  }
+  return streamText({ model: openai(clientData.model ?? "gpt-4o"), messages, abortSignal: signal });
+},
+```
+
+`chat.setUIMessageStreamOptions()` works across all abstraction levels — `chat.task()`, `chat.createSession()` / `turn.complete()`, and `chat.pipeAndCapture()`.
+
+See [ChatUIMessageStreamOptions](/ai-chat/reference#chatuimessagestreamoptions) for the full reference.
+
+<Note>
+  `onFinish` is managed internally for response capture and cannot be overridden here. Use `streamText`'s `onFinish` callback for custom finish handling, or use [raw task mode](#raw-task-with-primitives) for full control over `toUIMessageStream()`.
+</Note>
+
+### Manual mode with task()
+
+If you need full control over task options, use the standard `task()` with `ChatTaskPayload` and `chat.pipe()`:
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskPayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const manualChat = task({
+  id: "manual-chat",
+  retry: { maxAttempts: 3 },
+  queue: { concurrencyLimit: 10 },
+  run: async (payload: ChatTaskPayload) => {
+    const result = streamText({
+      model: openai("gpt-4o"),
+      messages: payload.messages,
+    });
+
+    await chat.pipe(result);
+  },
+});
+```
+
+<Warning>
+  Manual mode does not get automatic message accumulation or the `onTurnComplete`/`onChatStart` lifecycle hooks. The `responseMessage` field in `onTurnComplete` will be `undefined` when using `chat.pipe()` directly. Use `chat.task()` for the full multi-turn experience.
+</Warning>
+
+---
+
+## chat.createSession()
+
+A middle ground between `chat.task()` and raw primitives. You get an async iterator that yields `ChatTurn` objects — each turn handles stop signals, message accumulation, and turn-complete signaling automatically. You control initialization, model/tool selection, persistence, and any custom per-turn logic.
+
+Use `chat.createSession()` inside a standard `task()`:
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskWirePayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = task({
+  id: "my-chat",
+  run: async (payload: ChatTaskWirePayload, { signal }) => {
+    // One-time initialization — just code, no hooks
+    const clientData = payload.metadata as { userId: string };
+    await db.chat.create({ data: { id: payload.chatId, userId: clientData.userId } });
+
+    const session = chat.createSession(payload, {
+      signal,
+      warmTimeoutInSeconds: 60,
+      timeout: "1h",
+    });
+
+    for await (const turn of session) {
+      const result = streamText({
+        model: openai("gpt-4o"),
+        messages: turn.messages,
+        abortSignal: turn.signal,
+      });
+
+      // Pipe, capture, accumulate, and signal turn-complete — all in one call
+      await turn.complete(result);
+
+      // Persist after each turn
+      await db.chat.update({
+        where: { id: turn.chatId },
+        data: { messages: turn.uiMessages },
+      });
+    }
+  },
+});
+```
+
+### ChatSessionOptions
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `signal` | `AbortSignal` | required | Run-level cancel signal (from task context) |
+| `warmTimeoutInSeconds` | `number` | `30` | Seconds to stay warm between turns |
+| `timeout` | `string` | `"1h"` | Duration string for suspend timeout |
+| `maxTurns` | `number` | `100` | Max turns before ending |
+
+### ChatTurn
+
+Each turn yielded by the iterator provides:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `number` | `number` | Turn number (0-indexed) |
+| `chatId` | `string` | Chat session ID |
+| `trigger` | `string` | What triggered this turn |
+| `clientData` | `unknown` | Client data from the transport |
+| `messages` | `ModelMessage[]` | Full accumulated model messages — pass to `streamText` |
+| `uiMessages` | `UIMessage[]` | Full accumulated UI messages — use for persistence |
+| `signal` | `AbortSignal` | Combined stop+cancel signal (fresh each turn) |
+| `stopped` | `boolean` | Whether the user stopped generation this turn |
+| `continuation` | `boolean` | Whether this is a continuation run |
+
+| Method | Description |
+|--------|-------------|
+| `turn.complete(source)` | Pipe stream, capture response, accumulate, and signal turn-complete |
+| `turn.done()` | Just signal turn-complete (when you've piped manually) |
+| `turn.addResponse(response)` | Add a response to the accumulator manually |
+
+### turn.complete() vs manual control
+
+`turn.complete(result)` is the easy path — it handles piping, capturing the response, accumulating messages, cleaning up aborted parts, and writing the turn-complete chunk.
+
+For more control, you can do each step manually:
+
+```ts
+for await (const turn of session) {
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+  });
+
+  // Manual: pipe and capture separately
+  const response = await chat.pipeAndCapture(result, { signal: turn.signal });
+
+  if (response) {
+    // Custom processing before accumulating
+    await turn.addResponse(response);
+  }
+
+  // Custom persistence, analytics, etc.
+  await db.chat.update({ ... });
+
+  // Must call done() when not using complete()
+  await turn.done();
+}
+```
+
+---
+
+## Raw task with primitives
+
+For full control, use a standard `task()` with the composable primitives from the `chat` namespace. You manage everything: the turn loop, stop signals, message accumulation, and turn-complete signaling.
+
+Raw task mode also lets you call `.toUIMessageStream()` yourself with any options — including `onFinish` and `originalMessages`. This is the right choice when you need complete control over the stream conversion beyond what `chat.setUIMessageStreamOptions()` provides.
+
+### Primitives
+
+| Primitive | Description |
+|-----------|-------------|
+| `chat.messages` | Input stream for incoming messages — use `.waitWithWarmup()` to wait for the next turn |
+| `chat.createStopSignal()` | Create a managed stop signal wired to the stop input stream |
+| `chat.pipeAndCapture(result)` | Pipe a `StreamTextResult` to the chat stream and capture the response |
+| `chat.writeTurnComplete()` | Signal the frontend that the current turn is complete |
+| `chat.MessageAccumulator` | Accumulates conversation messages across turns |
+| `chat.pipe(stream)` | Pipe a stream to the frontend (no response capture) |
+| `chat.cleanupAbortedParts(msg)` | Clean up incomplete parts from a stopped response |
+
+### Example
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { chat, type ChatTaskWirePayload } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = task({
+  id: "my-chat-raw",
+  run: async (payload: ChatTaskWirePayload, { signal: runSignal }) => {
+    let currentPayload = payload;
+
+    // Handle preload — wait for the first real message
+    if (currentPayload.trigger === "preload") {
+      const result = await chat.messages.waitWithWarmup({
+        warmTimeoutInSeconds: 60,
+        timeout: "1h",
+        spanName: "waiting for first message",
+      });
+      if (!result.ok) return;
+      currentPayload = result.output;
+    }
+
+    const stop = chat.createStopSignal();
+    const conversation = new chat.MessageAccumulator();
+
+    for (let turn = 0; turn < 100; turn++) {
+      stop.reset();
+
+      const messages = await conversation.addIncoming(
+        currentPayload.messages,
+        currentPayload.trigger,
+        turn
+      );
+
+      const combinedSignal = AbortSignal.any([runSignal, stop.signal]);
+
+      const result = streamText({
+        model: openai("gpt-4o"),
+        messages,
+        abortSignal: combinedSignal,
+      });
+
+      let response;
+      try {
+        response = await chat.pipeAndCapture(result, { signal: combinedSignal });
+      } catch (error) {
+        if (error instanceof Error && error.name === "AbortError") {
+          if (runSignal.aborted) break;
+          // Stop — fall through to accumulate partial
+        } else {
+          throw error;
+        }
+      }
+
+      if (response) {
+        const cleaned = stop.signal.aborted && !runSignal.aborted
+          ? chat.cleanupAbortedParts(response)
+          : response;
+        await conversation.addResponse(cleaned);
+      }
+
+      if (runSignal.aborted) break;
+
+      // Persist, analytics, etc.
+      await db.chat.update({
+        where: { id: currentPayload.chatId },
+        data: { messages: conversation.uiMessages },
+      });
+
+      await chat.writeTurnComplete();
+
+      // Wait for the next message
+      const next = await chat.messages.waitWithWarmup({
+        warmTimeoutInSeconds: 60,
+        timeout: "1h",
+        spanName: "waiting for next message",
+      });
+      if (!next.ok) break;
+      currentPayload = next.output;
+    }
+
+    stop.cleanup();
+  },
+});
+```
+
+### MessageAccumulator
+
+The `MessageAccumulator` handles the transport protocol automatically:
+
+- Turn 0: replaces messages (full history from frontend)
+- Subsequent turns: appends new messages (frontend only sends the new user message)
+- Regenerate: replaces messages (full history minus last assistant message)
+
+```ts
+const conversation = new chat.MessageAccumulator();
+
+// Returns full accumulated ModelMessage[] for streamText
+const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+// After piping, add the response
+const response = await chat.pipeAndCapture(result);
+if (response) await conversation.addResponse(response);
+
+// Access accumulated messages for persistence
+conversation.uiMessages;   // UIMessage[]
+conversation.modelMessages; // ModelMessage[]
+```
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
new file mode 100644
index 00000000000..fd4b63789a1
--- /dev/null
+++ b/docs/ai-chat/features.mdx
@@ -0,0 +1,421 @@
+---
+title: "Features"
+sidebarTitle: "Features"
+description: "Per-run data, deferred work, custom streaming, subtask integration, and preload."
+---
+
+## Per-run data with chat.local
+
+Use `chat.local` to create typed, run-scoped data that persists across turns and is accessible from anywhere — the run function, tools, nested helpers. Each run gets its own isolated copy, and locals are automatically cleared between runs.
+
+When a subtask is invoked via `ai.tool()`, initialized locals are automatically serialized into the subtask's metadata and hydrated on first access — no extra code needed. Subtask changes to hydrated locals are local to the subtask and don't propagate back to the parent.
+
+### Declaring and initializing
+
+Declare locals at module level with a unique `id`, then initialize them inside a lifecycle hook where you have context (chatId, clientData, etc.):
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+import { db } from "@/lib/db";
+
+// Declare at module level — each local needs a unique id
+const userContext = chat.local<{
+  name: string;
+  plan: "free" | "pro";
+  messageCount: number;
+}>({ id: "userContext" });
+
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onChatStart: async ({ clientData }) => {
+    // Initialize with real data from your database
+    const user = await db.user.findUnique({
+      where: { id: clientData.userId },
+    });
+    userContext.init({
+      name: user.name,
+      plan: user.plan,
+      messageCount: user.messageCount,
+    });
+  },
+  run: async ({ messages, signal }) => {
+    userContext.messageCount++;
+
+    return streamText({
+      model: openai("gpt-4o"),
+      system: `Helping ${userContext.name} (${userContext.plan} plan).`,
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### Accessing from tools
+
+Locals are accessible from anywhere during task execution — including AI SDK tools:
+
+```ts
+const userContext = chat.local<{ plan: "free" | "pro" }>({ id: "userContext" });
+
+const premiumTool = tool({
+  description: "Access premium features",
+  inputSchema: z.object({ feature: z.string() }),
+  execute: async ({ feature }) => {
+    if (userContext.plan !== "pro") {
+      return { error: "This feature requires a Pro plan." };
+    }
+    // ... premium logic
+  },
+});
+```
+
+### Accessing from subtasks
+
+When you use `ai.tool()` to expose a subtask, chat locals are automatically available read-only:
+
+```ts
+import { chat, ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+const userContext = chat.local<{ name: string; plan: "free" | "pro" }>({ id: "userContext" });
+
+export const analyzeData = schemaTask({
+  id: "analyze-data",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    // userContext.name just works — auto-hydrated from parent metadata
+    console.log(`Analyzing for ${userContext.name}`);
+    // Changes here are local to this subtask and don't propagate back
+  },
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  onChatStart: async ({ clientData }) => {
+    userContext.init({ name: "Alice", plan: "pro" });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      tools: { analyzeData: ai.tool(analyzeData) },
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+<Note>
+  Values must be JSON-serializable for subtask access. Non-serializable values (functions, class instances, etc.) will be lost during transfer.
+</Note>
+
+### Dirty tracking and persistence
+
+The `hasChanged()` method returns `true` if any property was set since the last check, then resets the flag. Use it in lifecycle hooks to only persist when data actually changed:
+
+```ts
+onTurnComplete: async ({ chatId }) => {
+  if (userContext.hasChanged()) {
+    await db.user.update({
+      where: { id: userContext.get().userId },
+      data: {
+        messageCount: userContext.messageCount,
+      },
+    });
+  }
+},
+```
+
+### chat.local API
+
+| Method | Description |
+|--------|-------------|
+| `chat.local<T>({ id })` | Create a typed local with a unique id (declare at module level) |
+| `local.init(value)` | Initialize with a value (call in hooks or `run`) |
+| `local.hasChanged()` | Returns `true` if modified since last check, resets flag |
+| `local.get()` | Returns a plain object copy (for serialization) |
+| `local.property` | Direct property access (read/write via Proxy) |
+
+<Note>
+  Locals use shallow proxying. Nested object mutations like `local.prefs.theme = "dark"` won't trigger the dirty flag. Instead, replace the whole property: `local.prefs = { ...local.prefs, theme: "dark" }`.
+</Note>
+
+---
+
+## chat.defer()
+
+Use `chat.defer()` to run background work in parallel with streaming. The deferred promise runs alongside the LLM response and is awaited (with a 5s timeout) before `onTurnComplete` fires.
+
+This moves non-blocking work (DB writes, analytics, etc.) out of the critical path:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnStart: async ({ chatId, uiMessages }) => {
+    // Persist messages without blocking the LLM call
+    chat.defer(db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }));
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+`chat.defer()` can be called from anywhere during a turn — hooks, `run()`, or nested helpers. All deferred promises are collected and awaited together before `onTurnComplete`.
+
+---
+
+## Custom streaming with chat.stream
+
+`chat.stream` is a typed stream bound to the chat output. Use it to write custom `UIMessageChunk` data alongside the AI-generated response — for example, status updates or progress indicators.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    // Write a custom data part to the chat stream.
+    // The AI SDK's data-* chunk protocol adds this to message.parts
+    // on the frontend, where you can render it however you like.
+    const { waitUntilComplete } = chat.stream.writer({
+      execute: ({ write }) => {
+        write({
+          type: "data-status",
+          id: "search-progress",
+          data: { message: "Searching the web...", progress: 0.5 },
+        });
+      },
+    });
+    await waitUntilComplete();
+
+    // Then stream the AI response
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  Use `data-*` chunk types (e.g. `data-status`, `data-progress`) for custom data. The AI SDK processes these into `DataUIPart` objects in `message.parts` on the frontend. Writing the same `type` + `id` again updates the existing part instead of creating a new one — useful for live progress.
+</Tip>
+
+`chat.stream` exposes the full stream API:
+
+| Method | Description |
+|--------|-------------|
+| `chat.stream.writer(options)` | Write individual chunks via a callback |
+| `chat.stream.pipe(stream, options?)` | Pipe a `ReadableStream` or `AsyncIterable` |
+| `chat.stream.append(value, options?)` | Append raw data |
+| `chat.stream.read(runId, options?)` | Read the stream by run ID |
+
+### Streaming from subtasks
+
+When a tool invokes a subtask via `triggerAndWait`, the subtask can stream directly to the parent chat using `target: "root"`:
+
+```ts
+import { chat, ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { streamText, generateId } from "ai";
+import { z } from "zod";
+
+// A subtask that streams progress back to the parent chat
+export const researchTask = schemaTask({
+  id: "research",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    const partId = generateId();
+
+    // Write a data-* chunk to the root run's chat stream.
+    // The frontend receives this as a DataUIPart in message.parts.
+    const { waitUntilComplete } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-research-status",
+          id: partId,
+          data: { query, status: "in-progress" },
+        });
+      },
+    });
+    await waitUntilComplete();
+
+    // Do the work...
+    const result = await doResearch(query);
+
+    // Update the same part with the final status
+    const { waitUntilComplete: waitDone } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-research-status",
+          id: partId,
+          data: { query, status: "done", resultCount: result.length },
+        });
+      },
+    });
+    await waitDone();
+
+    return result;
+  },
+});
+
+// The chat task uses it as a tool via ai.tool()
+export const myChat = chat.task({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+      tools: {
+        research: ai.tool(researchTask),
+      },
+    });
+  },
+});
+```
+
+On the frontend, render the custom data part:
+
+```tsx
+{message.parts.map((part, i) => {
+  if (part.type === "data-research-status") {
+    const { query, status, resultCount } = part.data;
+    return (
+      <div key={i}>
+        {status === "done" ? `Found ${resultCount} results` : `Researching "${query}"...`}
+      </div>
+    );
+  }
+  // ...other part types
+})}
+```
+
+The `target` option accepts:
+- `"self"` — current run (default)
+- `"parent"` — parent task's run
+- `"root"` — root task's run (the chat task)
+- A specific run ID string
+
+---
+
+## ai.tool() — subtask integration
+
+When a subtask runs via `ai.tool()`, it can access the tool call context and chat context from the parent:
+
+```ts
+import { ai, chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "./chat";
+
+export const mySubtask = schemaTask({
+  id: "my-subtask",
+  schema: z.object({ query: z.string() }),
+  run: async ({ query }) => {
+    // Get the AI SDK's tool call ID (useful for data-* chunk IDs)
+    const toolCallId = ai.toolCallId();
+
+    // Get typed chat context — pass typeof yourChatTask for typed clientData
+    const { chatId, clientData } = ai.chatContextOrThrow<typeof myChat>();
+    // clientData is typed based on myChat's clientDataSchema
+
+    // Write a data chunk using the tool call ID
+    const { waitUntilComplete } = chat.stream.writer({
+      target: "root",
+      execute: ({ write }) => {
+        write({
+          type: "data-progress",
+          id: toolCallId,
+          data: { status: "working", query, userId: clientData?.userId },
+        });
+      },
+    });
+    await waitUntilComplete();
+
+    return { result: "done" };
+  },
+});
+```
+
+| Helper | Returns | Description |
+|--------|---------|-------------|
+| `ai.toolCallId()` | `string \| undefined` | The AI SDK tool call ID |
+| `ai.chatContext<typeof myChat>()` | `{ chatId, turn, continuation, clientData } \| undefined` | Chat context with typed `clientData`. Returns `undefined` if not in a chat context. |
+| `ai.chatContextOrThrow<typeof myChat>()` | `{ chatId, turn, continuation, clientData }` | Same as above but throws if not in a chat context |
+| `ai.currentToolOptions()` | `ToolCallExecutionOptions \| undefined` | Full tool execution options |
+
+---
+
+## Preload
+
+Preload eagerly triggers a run for a chat before the first message is sent. This allows initialization (DB setup, context loading) to happen while the user is still typing, reducing first-response latency.
+
+### Frontend
+
+Call `transport.preload(chatId)` to start a run early:
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+
+export function Chat({ chatId }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: getChatToken,
+    clientData: { userId: currentUser.id },
+  });
+
+  // Preload on mount — run starts before the user types anything
+  useEffect(() => {
+    transport.preload(chatId, { warmTimeoutInSeconds: 60 });
+  }, [chatId]);
+
+  const { messages, sendMessage } = useChat({ id: chatId, transport });
+  // ...
+}
+```
+
+Preload is a no-op if a session already exists for this chatId.
+
+### Backend
+
+On the backend, the `onPreload` hook fires immediately. The run then waits for the first message. When the user sends a message, `onChatStart` fires with `preloaded: true` — you can skip initialization that was already done in `onPreload`:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onPreload: async ({ chatId, clientData }) => {
+    // Eagerly initialize — runs before the first message
+    userContext.init(await loadUser(clientData.userId));
+    await db.chat.create({ data: { id: chatId } });
+  },
+  onChatStart: async ({ preloaded }) => {
+    if (preloaded) return; // Already initialized in onPreload
+    // ... fallback initialization for non-preloaded runs
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+With `chat.createSession()` or raw tasks, check `payload.trigger === "preload"` and wait for the first message:
+
+```ts
+if (payload.trigger === "preload") {
+  // Initialize early...
+  const result = await chat.messages.waitWithWarmup({
+    warmTimeoutInSeconds: 60,
+    timeout: "1h",
+  });
+  if (!result.ok) return;
+  currentPayload = result.output;
+}
+```
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
new file mode 100644
index 00000000000..0e7854e4d5d
--- /dev/null
+++ b/docs/ai-chat/frontend.mdx
@@ -0,0 +1,234 @@
+---
+title: "Frontend"
+sidebarTitle: "Frontend"
+description: "Transport setup, session management, client data, and frontend patterns for AI Chat."
+---
+
+## Transport setup
+
+Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to create a memoized transport instance, then pass it to `useChat`:
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+import type { myChat } from "@/trigger/chat";
+import { getChatToken } from "@/app/actions";
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: getChatToken,
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({ transport });
+  // ... render UI
+}
+```
+
+The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
+
+<Tip>
+  The hook keeps `onSessionChange` up to date via a ref internally, so you don't need to memoize the callback or worry about stale closures.
+</Tip>
+
+### Dynamic access tokens
+
+For token refresh, pass a function instead of a string. It's called on each `sendMessage`:
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: async () => {
+    const res = await fetch("/api/chat-token");
+    return res.text();
+  },
+});
+```
+
+## Session management
+
+### Session cleanup (frontend)
+
+Since session creation and updates are handled server-side, the frontend only needs to handle session deletion when a run ends:
+
+```tsx
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken,
+  sessions: loadedSessions, // Restored from DB on page load
+  onSessionChange: (chatId, session) => {
+    if (!session) {
+      deleteSession(chatId); // Server action — run ended
+    }
+  },
+});
+```
+
+### Restoring on page load
+
+On page load, fetch both the messages and the session from your database, then pass them to `useChat` and the transport. Pass `resume: true` to `useChat` when there's an existing conversation — this tells the AI SDK to reconnect to the stream via the transport.
+
+```tsx app/page.tsx
+"use client";
+
+import { useEffect, useState } from "react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+import { getChatToken, getChatMessages, getSession, deleteSession } from "@/app/actions";
+
+export default function ChatPage({ chatId }: { chatId: string }) {
+  const [initialMessages, setInitialMessages] = useState([]);
+  const [initialSession, setInitialSession] = useState(undefined);
+  const [loaded, setLoaded] = useState(false);
+
+  useEffect(() => {
+    async function load() {
+      const [messages, session] = await Promise.all([
+        getChatMessages(chatId),
+        getSession(chatId),
+      ]);
+      setInitialMessages(messages);
+      setInitialSession(session ? { [chatId]: session } : undefined);
+      setLoaded(true);
+    }
+    load();
+  }, [chatId]);
+
+  if (!loaded) return null;
+
+  return (
+    <ChatClient
+      chatId={chatId}
+      initialMessages={initialMessages}
+      initialSessions={initialSession}
+    />
+  );
+}
+
+function ChatClient({ chatId, initialMessages, initialSessions }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: getChatToken,
+    sessions: initialSessions,
+    onSessionChange: (id, session) => {
+      if (!session) deleteSession(id);
+    },
+  });
+
+  const { messages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    messages: initialMessages,
+    transport,
+    resume: initialMessages.length > 0, // Resume if there's an existing conversation
+  });
+
+  // ... render UI
+}
+```
+
+<Info>
+  `resume: true` causes `useChat` to call `reconnectToStream` on the transport when the component mounts. The transport uses the session's `lastEventId` to skip past already-seen stream events, so the frontend only receives new data. Only enable `resume` when there are existing messages — for brand new chats, there's nothing to reconnect to.
+</Info>
+
+<Warning>
+  In React strict mode (enabled by default in Next.js dev), you may see a `TypeError: Cannot read properties of undefined (reading 'state')` in the console when using `resume`. This is a [known bug in the AI SDK](https://github.com/vercel/ai/issues/8477) caused by React strict mode double-firing the resume effect. The error is caught internally and **does not affect functionality** — streaming and message display work correctly. It only appears in development and will not occur in production builds.
+</Warning>
+
+## Client data and metadata
+
+### Transport-level client data
+
+Set default client data on the transport that's included in every request. When the task uses `clientDataSchema`, this is type-checked to match:
+
+```ts
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken,
+  clientData: { userId: currentUser.id },
+});
+```
+
+### Per-message metadata
+
+Pass metadata with individual messages via `sendMessage`. Per-message values are merged with transport-level client data (per-message wins on conflicts):
+
+```ts
+sendMessage(
+  { text: "Hello" },
+  { metadata: { model: "gpt-4o", priority: "high" } }
+);
+```
+
+### Typed client data with clientDataSchema
+
+Instead of manually parsing `clientData` with Zod in every hook, pass a `clientDataSchema` to `chat.task`. The schema validates the data once per turn, and `clientData` is typed in all hooks and `run`:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({
+    model: z.string().optional(),
+    userId: z.string(),
+  }),
+  onChatStart: async ({ chatId, clientData }) => {
+    // clientData is typed as { model?: string; userId: string }
+    await db.chat.create({
+      data: { id: chatId, userId: clientData.userId },
+    });
+  },
+  run: async ({ messages, clientData, signal }) => {
+    // Same typed clientData — no manual parsing needed
+    return streamText({
+      model: openai(clientData?.model ?? "gpt-4o"),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+The schema also types the `clientData` option on the frontend transport:
+
+```ts
+// TypeScript enforces that clientData matches the schema
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken,
+  clientData: { userId: currentUser.id },
+});
+```
+
+Supports Zod, ArkType, Valibot, and other schema libraries supported by the SDK.
+
+## Stop generation
+
+Calling `stop()` from `useChat` sends a stop signal to the running task via input streams. The task aborts the current `streamText` call, but the run stays alive for the next message:
+
+```tsx
+const { messages, sendMessage, stop, status } = useChat({ transport });
+
+{status === "streaming" && (
+  <button type="button" onClick={stop}>
+    Stop
+  </button>
+)}
+```
+
+See [Stop generation](/ai-chat/backend#stop-generation) in the backend docs for how to handle stop signals in your task.
+
+## Self-hosting
+
+If you're self-hosting Trigger.dev, pass the `baseURL` option:
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken,
+  baseURL: "https://your-trigger-instance.com",
+});
+```
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
new file mode 100644
index 00000000000..eb3d1ab23df
--- /dev/null
+++ b/docs/ai-chat/overview.mdx
@@ -0,0 +1,161 @@
+---
+title: "AI Chat"
+sidebarTitle: "Overview"
+description: "Run AI SDK chat completions as durable Trigger.dev tasks with built-in realtime streaming, multi-turn conversations, and message persistence."
+---
+
+## Overview
+
+The `@trigger.dev/sdk` provides a custom [ChatTransport](https://sdk.vercel.ai/docs/ai-sdk-ui/transport) for the Vercel AI SDK's `useChat` hook. This lets you run chat completions as **durable Trigger.dev tasks** instead of fragile API routes — with automatic retries, observability, and realtime streaming built in.
+
+**How it works:**
+1. The frontend sends messages via `useChat` through `TriggerChatTransport`
+2. The first message triggers a Trigger.dev task; subsequent messages resume the **same run** via input streams
+3. The task streams `UIMessageChunk` events back via Trigger.dev's realtime streams
+4. The AI SDK's `useChat` processes the stream natively — text, tool calls, reasoning, etc.
+5. Between turns, the run stays warm briefly then suspends (freeing compute) until the next message
+
+No custom API routes needed. Your chat backend is a Trigger.dev task.
+
+<Accordion title="How it works (sequence diagrams)">
+
+### First message flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    User->>useChat: sendMessage("Hello")
+    useChat->>useChat: No session for chatId → trigger new run
+    useChat->>API: triggerTask(payload, tags: [chat:id])
+    API-->>useChat: { runId, publicAccessToken }
+    useChat->>useChat: Store session, subscribe to SSE
+
+    API->>Task: Start run with ChatTaskWirePayload
+    Task->>Task: onChatStart({ chatId, messages, clientData })
+    Task->>Task: onTurnStart({ chatId, messages })
+    Task->>LLM: streamText({ model, messages, abortSignal })
+    LLM-->>Task: Stream response chunks
+    Task->>API: streams.pipe("chat", uiStream)
+    API-->>useChat: SSE: UIMessageChunks
+    useChat-->>User: Render streaming text
+    Task->>API: Write __trigger_turn_complete
+    API-->>useChat: SSE: turn complete + refreshed token
+    useChat->>useChat: Close stream, update session
+    Task->>Task: onTurnComplete({ messages, stopped: false })
+    Task->>Task: Wait for next message (warm → suspend)
+```
+
+### Multi-turn flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Suspended, waiting for message
+
+    User->>useChat: sendMessage("Tell me more")
+    useChat->>useChat: Session exists → send via input stream
+    useChat->>API: sendInputStream(runId, "chat-messages", payload)
+    Note right of useChat: Only sends new message (not full history)
+
+    API->>Task: Deliver to messagesInput
+    Task->>Task: Wake from suspend
+    Task->>Task: Append to accumulated messages
+    Task->>Task: onTurnStart({ turn: 1 })
+    Task->>LLM: streamText({ messages: [all accumulated] })
+    LLM-->>Task: Stream response
+    Task->>API: streams.pipe("chat", uiStream)
+    API-->>useChat: SSE: UIMessageChunks
+    useChat-->>User: Render streaming text
+    Task->>API: Write __trigger_turn_complete
+    Task->>Task: onTurnComplete({ turn: 1 })
+    Task->>Task: Wait for next message (warm → suspend)
+```
+
+### Stop signal flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Streaming response...
+
+    User->>useChat: Click "Stop"
+    useChat->>API: sendInputStream(runId, "chat-stop", { stop: true })
+    API->>Task: Deliver to stopInput
+    Task->>Task: stopController.abort()
+    LLM-->>Task: Stream ends (AbortError)
+    Task->>Task: cleanupAbortedParts(responseMessage)
+    Note right of Task: Remove partial tool calls,<br/>mark streaming parts as done
+    Task->>API: Write __trigger_turn_complete
+    API-->>useChat: SSE: turn complete
+    Task->>Task: onTurnComplete({ stopped: true })
+    Task->>Task: Wait for next message
+```
+
+</Accordion>
+
+<Note>
+  Requires `@trigger.dev/sdk` version **4.4.0 or later** and the `ai` package **v5.0.0 or later**.
+</Note>
+
+## How multi-turn works
+
+### One run, many turns
+
+The entire conversation lives in a **single Trigger.dev run**. After each AI response, the run waits for the next message via input streams. The frontend transport handles this automatically — it triggers a new run for the first message, and sends subsequent messages to the existing run.
+
+This means your conversation has full observability in the Trigger.dev dashboard: every turn is a span inside the same run.
+
+### Warm and suspended states
+
+After each turn, the run goes through two phases of waiting:
+
+1. **Warm phase** (default 30s) — The run stays active and responds instantly to the next message. Uses compute.
+2. **Suspended phase** (default up to 1h) — The run suspends, freeing compute. It wakes when the next message arrives. There's a brief delay as the run resumes.
+
+If no message arrives within the turn timeout, the run ends gracefully. The next message from the frontend will automatically start a fresh run.
+
+<Info>
+  You are not charged for compute during the suspended phase. Only the warm phase uses compute resources.
+</Info>
+
+### What the backend accumulates
+
+The backend automatically accumulates the full conversation history across turns. After the first turn, the frontend transport only sends the new user message — not the entire history. This is handled transparently by the transport and task.
+
+The accumulated messages are available in:
+- `run()` as `messages` (`ModelMessage[]`) — for passing to `streamText`
+- `onTurnStart()` as `uiMessages` (`UIMessage[]`) — for persisting before streaming
+- `onTurnComplete()` as `uiMessages` (`UIMessage[]`) — for persisting after the response
+
+## Three approaches
+
+There are three ways to build the backend, from most opinionated to most flexible:
+
+| Approach | Use when | What you get |
+|----------|----------|--------------|
+| [chat.task()](/ai-chat/backend#chattask) | Most apps | Auto-piping, lifecycle hooks, message accumulation, stop handling |
+| [chat.createSession()](/ai-chat/backend#chatcreatesession) | Need a loop but not hooks | Async iterator with per-turn helpers, message accumulation, stop handling |
+| [Raw task + primitives](/ai-chat/backend#raw-task-with-primitives) | Full control | Manual control of every step — use `chat.messages`, `chat.createStopSignal()`, etc. |
+
+## Related
+
+- [Quick Start](/ai-chat/quick-start) — Get a working chat in 3 steps
+- [Backend](/ai-chat/backend) — Backend approaches in detail
+- [Frontend](/ai-chat/frontend) — Transport setup, sessions, client data
+- [Features](/ai-chat/features) — Per-run data, deferred work, streaming, subtasks
+- [API Reference](/ai-chat/reference) — Complete reference tables
diff --git a/docs/ai-chat/quick-start.mdx b/docs/ai-chat/quick-start.mdx
new file mode 100644
index 00000000000..b8245d92372
--- /dev/null
+++ b/docs/ai-chat/quick-start.mdx
@@ -0,0 +1,108 @@
+---
+title: "Quick Start"
+sidebarTitle: "Quick Start"
+description: "Get a working AI chat in 3 steps — define a task, generate a token, and wire up the frontend."
+---
+
+<Steps>
+  <Step title="Define a chat task">
+    Use `chat.task` from `@trigger.dev/sdk/ai` to define a task that handles chat messages. The `run` function receives `ModelMessage[]` (already converted from the frontend's `UIMessage[]`) — pass them directly to `streamText`.
+
+    If you return a `StreamTextResult`, it's **automatically piped** to the frontend.
+
+    ```ts trigger/chat.ts
+    import { chat } from "@trigger.dev/sdk/ai";
+    import { streamText } from "ai";
+    import { openai } from "@ai-sdk/openai";
+
+    export const myChat = chat.task({
+      id: "my-chat",
+      run: async ({ messages, signal }) => {
+        // messages is ModelMessage[] — pass directly to streamText
+        // signal fires on stop or run cancel
+        return streamText({
+          model: openai("gpt-4o"),
+          messages,
+          abortSignal: signal,
+        });
+      },
+    });
+    ```
+  </Step>
+
+  <Step title="Generate an access token">
+    On your server (e.g. a Next.js server action), create a trigger public token scoped to your chat task:
+
+    ```ts app/actions.ts
+    "use server";
+
+    import { chat } from "@trigger.dev/sdk/ai";
+    import type { myChat } from "@/trigger/chat";
+
+    export const getChatToken = () =>
+      chat.createAccessToken<typeof myChat>("my-chat");
+    ```
+  </Step>
+
+  <Step title="Use in the frontend">
+    Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to create a memoized transport instance, then pass it to `useChat`:
+
+    ```tsx app/components/chat.tsx
+    "use client";
+
+    import { useChat } from "@ai-sdk/react";
+    import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+    import type { myChat } from "@/trigger/chat";
+    import { getChatToken } from "@/app/actions";
+
+    export function Chat() {
+      const transport = useTriggerChatTransport<typeof myChat>({
+        task: "my-chat",
+        accessToken: getChatToken,
+      });
+
+      const { messages, sendMessage, stop, status } = useChat({ transport });
+
+      return (
+        <div>
+          {messages.map((m) => (
+            <div key={m.id}>
+              <strong>{m.role}:</strong>
+              {m.parts.map((part, i) =>
+                part.type === "text" ? <span key={i}>{part.text}</span> : null
+              )}
+            </div>
+          ))}
+
+          <form
+            onSubmit={(e) => {
+              e.preventDefault();
+              const input = e.currentTarget.querySelector("input");
+              if (input?.value) {
+                sendMessage({ text: input.value });
+                input.value = "";
+              }
+            }}
+          >
+            <input placeholder="Type a message..." />
+            <button type="submit" disabled={status === "streaming"}>
+              Send
+            </button>
+            {status === "streaming" && (
+              <button type="button" onClick={stop}>
+                Stop
+              </button>
+            )}
+          </form>
+        </div>
+      );
+    }
+    ```
+  </Step>
+</Steps>
+
+## Next steps
+
+- [Backend](/ai-chat/backend) — Lifecycle hooks, persistence, session iterator, raw task primitives
+- [Frontend](/ai-chat/frontend) — Session management, client data, reconnection
+- [Features](/ai-chat/features) — Per-run data, deferred work, streaming, subtasks
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
new file mode 100644
index 00000000000..420decee98b
--- /dev/null
+++ b/docs/ai-chat/reference.mdx
@@ -0,0 +1,257 @@
+---
+title: "API Reference"
+sidebarTitle: "API Reference"
+description: "Complete API reference for the AI Chat SDK — backend options, events, frontend transport, and hooks."
+---
+
+## ChatTaskOptions
+
+Options for `chat.task()`.
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `id` | `string` | required | Task identifier |
+| `run` | `(payload: ChatTaskRunPayload) => Promise<unknown>` | required | Handler for each turn |
+| `clientDataSchema` | `TaskSchema` | — | Schema for validating and typing `clientData` |
+| `onPreload` | `(event: PreloadEvent) => Promise<void> \| void` | — | Fires on preloaded runs before the first message |
+| `onChatStart` | `(event: ChatStartEvent) => Promise<void> \| void` | — | Fires on turn 0 before `run()` |
+| `onTurnStart` | `(event: TurnStartEvent) => Promise<void> \| void` | — | Fires every turn before `run()` |
+| `onTurnComplete` | `(event: TurnCompleteEvent) => Promise<void> \| void` | — | Fires after each turn completes |
+| `maxTurns` | `number` | `100` | Max conversational turns per run |
+| `turnTimeout` | `string` | `"1h"` | How long to wait for next message |
+| `warmTimeoutInSeconds` | `number` | `30` | Seconds to stay warm before suspending |
+| `chatAccessTokenTTL` | `string` | `"1h"` | How long the scoped access token remains valid |
+| `preloadWarmTimeoutInSeconds` | `number` | Same as `warmTimeoutInSeconds` | Warm timeout after `onPreload` fires |
+| `preloadTimeout` | `string` | Same as `turnTimeout` | Suspend timeout for preloaded runs |
+| `uiMessageStreamOptions` | `ChatUIMessageStreamOptions` | — | Default options for `toUIMessageStream()`. Per-turn override via `chat.setUIMessageStreamOptions()` |
+
+Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`, `maxDuration`, etc.
+
+## ChatTaskRunPayload
+
+The payload passed to the `run` function.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `ModelMessage[]` | Model-ready messages — pass directly to `streamText` |
+| `chatId` | `string` | Unique chat session ID |
+| `trigger` | `"submit-message" \| "regenerate-message"` | What triggered the request |
+| `messageId` | `string \| undefined` | Message ID (for regenerate) |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend (typed when schema is provided) |
+| `continuation` | `boolean` | Whether this run is continuing an existing chat (previous run ended) |
+| `signal` | `AbortSignal` | Combined stop + cancel signal |
+| `cancelSignal` | `AbortSignal` | Cancel-only signal |
+| `stopSignal` | `AbortSignal` | Stop-only signal (per-turn) |
+
+## PreloadEvent
+
+Passed to the `onPreload` callback.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `chatId` | `string` | Chat session ID |
+| `runId` | `string` | The Trigger.dev run ID |
+| `chatAccessToken` | `string` | Scoped access token for this run |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
+
+## ChatStartEvent
+
+Passed to the `onChatStart` callback.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `chatId` | `string` | Chat session ID |
+| `messages` | `ModelMessage[]` | Initial model-ready messages |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
+| `runId` | `string` | The Trigger.dev run ID |
+| `chatAccessToken` | `string` | Scoped access token for this run |
+| `continuation` | `boolean` | Whether this run is continuing an existing chat |
+| `previousRunId` | `string \| undefined` | Previous run ID (only when `continuation` is true) |
+| `preloaded` | `boolean` | Whether this run was preloaded before the first message |
+
+## TurnStartEvent
+
+Passed to the `onTurnStart` callback.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `chatId` | `string` | Chat session ID |
+| `messages` | `ModelMessage[]` | Full accumulated conversation (model format) |
+| `uiMessages` | `UIMessage[]` | Full accumulated conversation (UI format) |
+| `turn` | `number` | Turn number (0-indexed) |
+| `runId` | `string` | The Trigger.dev run ID |
+| `chatAccessToken` | `string` | Scoped access token for this run |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
+| `continuation` | `boolean` | Whether this run is continuing an existing chat |
+| `previousRunId` | `string \| undefined` | Previous run ID (only when `continuation` is true) |
+| `preloaded` | `boolean` | Whether this run was preloaded |
+
+## TurnCompleteEvent
+
+Passed to the `onTurnComplete` callback.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `chatId` | `string` | Chat session ID |
+| `messages` | `ModelMessage[]` | Full accumulated conversation (model format) |
+| `uiMessages` | `UIMessage[]` | Full accumulated conversation (UI format) |
+| `newMessages` | `ModelMessage[]` | Only this turn's messages (model format) |
+| `newUIMessages` | `UIMessage[]` | Only this turn's messages (UI format) |
+| `responseMessage` | `UIMessage \| undefined` | The assistant's response for this turn |
+| `rawResponseMessage` | `UIMessage \| undefined` | Raw response before abort cleanup |
+| `turn` | `number` | Turn number (0-indexed) |
+| `runId` | `string` | The Trigger.dev run ID |
+| `chatAccessToken` | `string` | Scoped access token for this run |
+| `lastEventId` | `string \| undefined` | Stream position for resumption |
+| `stopped` | `boolean` | Whether the user stopped generation during this turn |
+| `continuation` | `boolean` | Whether this run is continuing an existing chat |
+
+## ChatSessionOptions
+
+Options for `chat.createSession()`.
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `signal` | `AbortSignal` | required | Run-level cancel signal |
+| `warmTimeoutInSeconds` | `number` | `30` | Seconds to stay warm between turns |
+| `timeout` | `string` | `"1h"` | Duration string for suspend timeout |
+| `maxTurns` | `number` | `100` | Max turns before ending |
+
+## ChatTurn
+
+Each turn yielded by `chat.createSession()`.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `number` | `number` | Turn number (0-indexed) |
+| `chatId` | `string` | Chat session ID |
+| `trigger` | `string` | What triggered this turn |
+| `clientData` | `unknown` | Client data from the transport |
+| `messages` | `ModelMessage[]` | Full accumulated model messages |
+| `uiMessages` | `UIMessage[]` | Full accumulated UI messages |
+| `signal` | `AbortSignal` | Combined stop+cancel signal (fresh each turn) |
+| `stopped` | `boolean` | Whether the user stopped generation this turn |
+| `continuation` | `boolean` | Whether this is a continuation run |
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `complete(source)` | `Promise<UIMessage \| undefined>` | Pipe, capture, accumulate, cleanup, and signal turn-complete |
+| `done()` | `Promise<void>` | Signal turn-complete (when you've piped manually) |
+| `addResponse(response)` | `Promise<void>` | Add response to accumulator manually |
+
+## chat namespace
+
+All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
+
+| Method | Description |
+|--------|-------------|
+| `chat.task(options)` | Create a chat task |
+| `chat.createSession(payload, options)` | Create an async iterator for chat turns |
+| `chat.pipe(source, options?)` | Pipe a stream to the frontend (from anywhere inside a task) |
+| `chat.pipeAndCapture(source, options?)` | Pipe and capture the response `UIMessage` |
+| `chat.writeTurnComplete(options?)` | Signal the frontend that the current turn is complete |
+| `chat.createStopSignal()` | Create a managed stop signal wired to the stop input stream |
+| `chat.messages` | Input stream for incoming messages — use `.waitWithWarmup()` |
+| `chat.local<T>({ id })` | Create a per-run typed local (see [Per-run data](/ai-chat/features#per-run-data-with-chatlocal)) |
+| `chat.createAccessToken(taskId)` | Create a public access token for a chat task |
+| `chat.setTurnTimeout(duration)` | Override turn timeout at runtime (e.g. `"2h"`) |
+| `chat.setTurnTimeoutInSeconds(seconds)` | Override turn timeout at runtime (in seconds) |
+| `chat.setWarmTimeoutInSeconds(seconds)` | Override warm timeout at runtime |
+| `chat.setUIMessageStreamOptions(options)` | Override `toUIMessageStream()` options for the current turn |
+| `chat.defer(promise)` | Run background work in parallel with streaming, awaited before `onTurnComplete` |
+| `chat.isStopped()` | Check if the current turn was stopped by the user |
+| `chat.cleanupAbortedParts(message)` | Remove incomplete parts from a stopped response message |
+| `chat.stream` | Typed chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()` |
+| `chat.MessageAccumulator` | Class that accumulates conversation messages across turns |
+
+## ChatUIMessageStreamOptions
+
+Options for customizing `toUIMessageStream()`. Set as static defaults via `uiMessageStreamOptions` on `chat.task()`, or override per-turn via `chat.setUIMessageStreamOptions()`. See [Stream options](/ai-chat/backend#stream-options) for usage examples.
+
+Derived from the AI SDK's `UIMessageStreamOptions` with `onFinish`, `originalMessages`, and `generateMessageId` omitted (managed internally).
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `onError` | `(error: unknown) => string` | Raw error message | Called on LLM errors and tool execution errors. Return a sanitized string — sent as `{ type: "error", errorText }` to the frontend. |
+| `sendReasoning` | `boolean` | `true` | Send reasoning parts to the client |
+| `sendSources` | `boolean` | `false` | Send source parts to the client |
+| `sendFinish` | `boolean` | `true` | Send the finish event. Set to `false` when chaining multiple `streamText` calls. |
+| `sendStart` | `boolean` | `true` | Send the message start event. Set to `false` when chaining. |
+| `messageMetadata` | `(options: { part }) => metadata` | — | Extract message metadata to send to the client. Called on `start` and `finish` events. |
+
+## TriggerChatTransport options
+
+Options for the frontend transport constructor and `useTriggerChatTransport` hook.
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `task` | `string` | required | Task ID to trigger |
+| `accessToken` | `string \| () => string \| Promise<string>` | required | Auth token or function that returns one |
+| `baseURL` | `string` | `"https://api.trigger.dev"` | API base URL (for self-hosted) |
+| `streamKey` | `string` | `"chat"` | Stream key (only change if using custom key) |
+| `headers` | `Record<string, string>` | — | Extra headers for API requests |
+| `streamTimeoutSeconds` | `number` | `120` | How long to wait for stream data |
+| `clientData` | Typed by `clientDataSchema` | — | Default client data for every request |
+| `sessions` | `Record<string, {...}>` | — | Restore sessions from storage |
+| `onSessionChange` | `(chatId, session \| null) => void` | — | Fires when session state changes |
+| `triggerOptions` | `{...}` | — | Options for the initial task trigger (see below) |
+
+### triggerOptions
+
+Options forwarded to the Trigger.dev API when starting a new run. Only applies to the first message — subsequent messages reuse the same run.
+
+A `chat:{chatId}` tag is automatically added to every run.
+
+| Option | Type | Description |
+|--------|------|-------------|
+| `tags` | `string[]` | Additional tags for the run (merged with auto-tags, max 5 total) |
+| `queue` | `string` | Queue name for the run |
+| `maxAttempts` | `number` | Maximum retry attempts |
+| `machine` | `"micro" \| "small-1x" \| ...` | Machine preset for the run |
+| `priority` | `number` | Priority (lower = higher priority) |
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: getChatToken,
+  triggerOptions: {
+    tags: ["user:123"],
+    queue: "chat-queue",
+  },
+});
+```
+
+### transport.preload()
+
+Eagerly trigger a run before the first message.
+
+```ts
+transport.preload(chatId, { warmTimeoutInSeconds?: number }): Promise<void>
+```
+
+No-op if a session already exists for this chatId. See [Preload](/ai-chat/features#preload) for full details.
+
+## useTriggerChatTransport
+
+React hook that creates and memoizes a `TriggerChatTransport` instance. Import from `@trigger.dev/sdk/chat/react`.
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: () => getChatToken(),
+  sessions: savedSessions,
+  onSessionChange: handleSessionChange,
+});
+```
+
+The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
+
+## Related
+
+- [Realtime Streams](/tasks/streams) — How streams work under the hood
+- [Using the Vercel AI SDK](/guides/examples/vercel-ai-sdk) — Basic AI SDK usage with Trigger.dev
+- [Realtime React Hooks](/realtime/react-hooks/overview) — Lower-level realtime hooks
+- [Authentication](/realtime/auth) — Public access tokens and trigger tokens
diff --git a/docs/docs.json b/docs/docs.json
index 7263daee638..47ad59ca96b 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -80,6 +80,17 @@
               "hidden-tasks"
             ]
           },
+          {
+            "group": "AI Chat",
+            "pages": [
+              "ai-chat/overview",
+              "ai-chat/quick-start",
+              "ai-chat/backend",
+              "ai-chat/frontend",
+              "ai-chat/features",
+              "ai-chat/reference"
+            ]
+          },
           {
             "group": "Configuration",
             "pages": [
@@ -743,6 +754,10 @@
     {
       "source": "/insights/metrics",
       "destination": "/observability/dashboards"
+    },
+    {
+      "source": "/guides/ai-chat",
+      "destination": "/ai-chat/overview"
     }
   ]
 }
diff --git a/references/ai-chat/ARCHITECTURE.md b/references/ai-chat/ARCHITECTURE.md
new file mode 100644
index 00000000000..8adbc0c4a1a
--- /dev/null
+++ b/references/ai-chat/ARCHITECTURE.md
@@ -0,0 +1,311 @@
+# AI Chat Architecture
+
+## System Overview
+
+```mermaid
+graph TB
+    subgraph Frontend["Frontend (Browser)"]
+        UC[useChat Hook]
+        TCT[TriggerChatTransport]
+        UI[Chat UI Components]
+    end
+
+    subgraph Platform["Trigger.dev Platform"]
+        API[REST API]
+        RS[Realtime Streams]
+        RE[Run Engine]
+    end
+
+    subgraph Worker["Task Worker"]
+        CT[chat.task Turn Loop]
+        ST[streamText / AI SDK]
+        LLM[LLM Provider]
+        SUB[Subtasks via ai.tool]
+    end
+
+    UI -->|user types| UC
+    UC -->|sendMessages| TCT
+    TCT -->|triggerTask / sendInputStream| API
+    API -->|queue run / deliver input| RE
+    RE -->|execute| CT
+    CT -->|call| ST
+    ST -->|API call| LLM
+    LLM -->|stream chunks| ST
+    ST -->|UIMessageChunks| RS
+    RS -->|SSE| TCT
+    TCT -->|ReadableStream| UC
+    UC -->|update| UI
+    CT -->|triggerAndWait| SUB
+    SUB -->|chat.stream target:root| RS
+```
+
+## Detailed Flow: New Chat (First Message)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    User->>useChat: sendMessage("Hello")
+    useChat->>useChat: No session for chatId → trigger new run
+
+    useChat->>API: triggerTask(payload, tags: [chat:id])
+    API-->>useChat: { runId, publicAccessToken }
+    useChat->>useChat: Store session, subscribe to SSE
+
+    API->>Task: Start run with ChatTaskWirePayload
+
+    Note over Task: Preload phase skipped (trigger ≠ "preload")
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 0
+        Task->>Task: convertToModelMessages(uiMessages)
+        Task->>Task: Mint access token
+        Task->>Task: onChatStart({ chatId, messages, clientData })
+        Task->>Task: onTurnStart({ chatId, messages, uiMessages })
+        Task->>LLM: streamText({ model, messages, abortSignal })
+        LLM-->>Task: Stream response chunks
+        Task->>API: streams.pipe("chat", uiStream)
+        API-->>useChat: SSE: UIMessageChunks
+        useChat-->>User: Render streaming text
+        Task->>Task: onFinish → capturedResponseMessage
+        Task->>Task: Accumulate response in messages
+        Task->>API: Write __trigger_turn_complete chunk
+        API-->>useChat: SSE: { type: __trigger_turn_complete, publicAccessToken }
+        useChat->>useChat: Close stream, update session
+        Task->>Task: onTurnComplete({ messages, uiMessages, stopped })
+    end
+
+    rect rgb(255, 248, 240)
+        Note over Task: Wait for next message
+        Task->>Task: messagesInput.once() [warm, 30s]
+        Note over Task: No message → suspend
+        Task->>Task: messagesInput.wait() [suspended, 1h]
+    end
+```
+
+## Detailed Flow: Multi-Turn (Subsequent Messages)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Suspended, waiting for message
+
+    User->>useChat: sendMessage("Tell me more")
+    useChat->>useChat: Session exists → send via input stream
+    useChat->>API: sendInputStream(runId, "chat-messages", payload)
+    Note right of useChat: Only sends new message<br/>(not full history)
+
+    API->>Task: Deliver to messagesInput
+    Task->>Task: Wake from suspend
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 1
+        Task->>Task: Append new message to accumulators
+        Task->>Task: Mint fresh access token
+        Task->>Task: onTurnStart({ turn: 1, messages })
+        Task->>LLM: streamText({ messages: [all accumulated] })
+        LLM-->>Task: Stream response
+        Task->>API: streams.pipe("chat", uiStream)
+        API-->>useChat: SSE: UIMessageChunks
+        useChat-->>User: Render streaming text
+        Task->>API: Write __trigger_turn_complete
+        Task->>Task: onTurnComplete({ turn: 1 })
+    end
+
+    Task->>Task: Wait for next message (warm → suspend)
+```
+
+## Stop Signal Flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+    participant LLM as LLM Provider
+
+    Note over Task: Streaming response...
+
+    User->>useChat: Click "Stop"
+    useChat->>API: sendInputStream(runId, "chat-stop", { stop: true })
+    useChat->>useChat: Set skipToTurnComplete = true
+
+    API->>Task: Deliver to stopInput
+    Task->>Task: stopController.abort()
+    Task->>LLM: AbortSignal fires
+    LLM-->>Task: Stream ends (AbortError)
+    Task->>Task: Catch AbortError, fall through
+    Task->>Task: await onFinishPromise (race condition fix)
+    Task->>Task: cleanupAbortedParts(responseMessage)
+    Note right of Task: Remove partial tool calls<br/>Mark streaming parts as done
+
+    Task->>API: Write __trigger_turn_complete
+    API-->>useChat: SSE: __trigger_turn_complete
+    useChat->>useChat: skipToTurnComplete = false, close stream
+
+    Task->>Task: onTurnComplete({ stopped: true, responseMessage: cleaned })
+    Task->>Task: Wait for next message
+```
+
+## Preload Flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+
+    User->>useChat: Click "New Chat"
+    useChat->>API: transport.preload(chatId)
+    Note right of useChat: payload: { messages: [], trigger: "preload" }<br/>tags: [chat:id, preload:true]
+    API-->>useChat: { runId, publicAccessToken }
+    useChat->>useChat: Store session
+
+    API->>Task: Start run (trigger = "preload")
+
+    rect rgb(240, 255, 240)
+        Note over Task: Preload Phase
+        Task->>Task: Mint access token
+        Task->>Task: onPreload({ chatId, clientData })
+        Note right of Task: DB setup, load user context,<br/>load dynamic tools
+        Task->>Task: messagesInput.once() [warm]
+        Note over Task: Waiting for first message...
+    end
+
+    Note over User: User is typing...
+
+    User->>useChat: sendMessage("Hello")
+    useChat->>useChat: Session exists → send via input stream
+    useChat->>API: sendInputStream(runId, "chat-messages", payload)
+    API->>Task: Deliver message
+
+    rect rgb(240, 248, 255)
+        Note over Task: Turn 0 (preloaded = true)
+        Task->>Task: onChatStart({ preloaded: true })
+        Task->>Task: onTurnStart({ preloaded: true })
+        Task->>Task: run() with preloaded dynamic tools ready
+    end
+```
+
+## Subtask Streaming (Tool as Task)
+
+```mermaid
+sequenceDiagram
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Chat as chat.task
+    participant LLM as LLM Provider
+    participant Sub as Subtask (ai.tool)
+
+    Chat->>LLM: streamText({ tools: { research: ai.tool(task) } })
+    LLM-->>Chat: Tool call: research({ query, urls })
+
+    Chat->>API: triggerAndWait(subtask, input)
+    Note right of Chat: Passes toolCallId, chatId,<br/>clientData via metadata
+
+    API->>Sub: Start subtask
+
+    Sub->>Sub: ai.chatContextOrThrow() → { chatId, clientData }
+    Sub->>API: chat.stream.writer({ target: "root" })
+    Note right of Sub: Write data-research-progress<br/>chunks to parent's stream
+    API-->>useChat: SSE: data-* chunks
+    useChat-->>useChat: Render progress UI
+
+    Sub-->>Chat: Return result
+    Chat->>LLM: Tool result
+    LLM-->>Chat: Continue response
+```
+
+## Continuation Flow (Run Timeout / Cancel)
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant useChat as useChat + Transport
+    participant API as Trigger.dev API
+    participant Task as chat.task Worker
+
+    Note over Task: Previous run timed out / was cancelled
+
+    User->>useChat: sendMessage("Continue")
+    useChat->>API: sendInputStream(runId, payload)
+    API-->>useChat: Error (run dead)
+
+    useChat->>useChat: Delete session, set isContinuation = true
+    useChat->>API: triggerTask(payload, continuation: true, previousRunId)
+    API-->>useChat: New { runId, publicAccessToken }
+
+    API->>Task: Start new run
+
+    rect rgb(255, 245, 238)
+        Note over Task: Turn 0 (continuation = true)
+        Task->>Task: cleanupAbortedParts(incoming messages)
+        Note right of Task: Strip incomplete tool calls<br/>from previous run's response
+        Task->>Task: onChatStart({ continuation: true, previousRunId })
+        Task->>Task: Normal turn flow...
+    end
+```
+
+## Hook Lifecycle
+
+```mermaid
+graph TD
+    START([Run Starts]) --> IS_PRELOAD{trigger = preload?}
+
+    IS_PRELOAD -->|Yes| PRELOAD[onPreload]
+    PRELOAD --> WAIT_MSG[Wait for first message<br/>warm → suspend]
+    WAIT_MSG --> TURN0
+
+    IS_PRELOAD -->|No| TURN0
+
+    TURN0[Turn 0] --> CHAT_START[onChatStart<br/>continuation, preloaded]
+    CHAT_START --> TURN_START_0[onTurnStart]
+    TURN_START_0 --> RUN_0[run → streamText]
+    RUN_0 --> TURN_COMPLETE_0[onTurnComplete<br/>stopped, responseMessage]
+
+    TURN_COMPLETE_0 --> WAIT{Wait for<br/>next message}
+    WAIT -->|Message arrives| TURN_N[Turn N]
+    WAIT -->|Timeout| END_RUN([Run Ends])
+
+    TURN_N --> TURN_START_N[onTurnStart]
+    TURN_START_N --> RUN_N[run → streamText]
+    RUN_N --> TURN_COMPLETE_N[onTurnComplete]
+    TURN_COMPLETE_N --> WAIT
+```
+
+## Stream Architecture
+
+```mermaid
+graph LR
+    subgraph Output["Output Stream (chat)"]
+        direction TB
+        O1[UIMessageChunks<br/>text, reasoning, tools]
+        O2[data-* custom chunks]
+        O3[__trigger_turn_complete<br/>control chunk]
+    end
+
+    subgraph Input["Input Streams"]
+        direction TB
+        I1[chat-messages<br/>User messages]
+        I2[chat-stop<br/>Stop signal]
+    end
+
+    Frontend -->|sendInputStream| I1
+    Frontend -->|sendInputStream| I2
+    I1 -->|messagesInput.once/wait| Worker
+    I2 -->|stopInput.on| Worker
+    Worker -->|streams.pipe / chat.stream| Output
+    Subtask -->|chat.stream target:root| Output
+    Output -->|SSE /realtime/v1/streams| Frontend
+```

From 82f5d0dd3f2c780f48289d2b9f24a13c6aebe2b7 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Mon, 23 Mar 2026 10:27:17 +0000
Subject: [PATCH 02/49] docs: rename warmTimeout to idleTimeout in ai-chat docs

---
 docs/ai-chat/backend.mdx   | 22 +++++++++++-----------
 docs/ai-chat/features.mdx  |  6 +++---
 docs/ai-chat/overview.mdx  |  8 ++++----
 docs/ai-chat/reference.mdx | 12 ++++++------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 5c21e88ee65..01b97f41b85 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -489,19 +489,19 @@ run: async ({ messages, signal }) => {
 },
 ```
 
-#### chat.setWarmTimeoutInSeconds()
+#### chat.setIdleTimeoutInSeconds()
 
-Override how long the run stays warm (active, using compute) after each turn:
+Override how long the run stays idle (active, using compute) after each turn:
 
 ```ts
 run: async ({ messages, signal }) => {
-  chat.setWarmTimeoutInSeconds(60); // Stay warm for 1 minute
+  chat.setIdleTimeoutInSeconds(60); // Stay idle for 1 minute
   return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
 },
 ```
 
 <Info>
-  Longer warm timeout means faster responses but more compute usage. Set to `0` to suspend immediately after each turn (minimum latency cost, slight delay on next message).
+  Longer idle timeout means faster responses but more compute usage. Set to `0` to suspend immediately after each turn (minimum latency cost, slight delay on next message).
 </Info>
 
 #### Stream options
@@ -639,7 +639,7 @@ export const myChat = task({
 
     const session = chat.createSession(payload, {
       signal,
-      warmTimeoutInSeconds: 60,
+      idleTimeoutInSeconds: 60,
       timeout: "1h",
     });
 
@@ -668,7 +668,7 @@ export const myChat = task({
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `signal` | `AbortSignal` | required | Run-level cancel signal (from task context) |
-| `warmTimeoutInSeconds` | `number` | `30` | Seconds to stay warm between turns |
+| `idleTimeoutInSeconds` | `number` | `30` | Seconds to stay idle between turns |
 | `timeout` | `string` | `"1h"` | Duration string for suspend timeout |
 | `maxTurns` | `number` | `100` | Max turns before ending |
 
@@ -736,7 +736,7 @@ Raw task mode also lets you call `.toUIMessageStream()` yourself with any option
 
 | Primitive | Description |
 |-----------|-------------|
-| `chat.messages` | Input stream for incoming messages — use `.waitWithWarmup()` to wait for the next turn |
+| `chat.messages` | Input stream for incoming messages — use `.waitWithIdleTimeout()` to wait for the next turn |
 | `chat.createStopSignal()` | Create a managed stop signal wired to the stop input stream |
 | `chat.pipeAndCapture(result)` | Pipe a `StreamTextResult` to the chat stream and capture the response |
 | `chat.writeTurnComplete()` | Signal the frontend that the current turn is complete |
@@ -759,8 +759,8 @@ export const myChat = task({
 
     // Handle preload — wait for the first real message
     if (currentPayload.trigger === "preload") {
-      const result = await chat.messages.waitWithWarmup({
-        warmTimeoutInSeconds: 60,
+      const result = await chat.messages.waitWithIdleTimeout({
+        idleTimeoutInSeconds: 60,
         timeout: "1h",
         spanName: "waiting for first message",
       });
@@ -818,8 +818,8 @@ export const myChat = task({
       await chat.writeTurnComplete();
 
       // Wait for the next message
-      const next = await chat.messages.waitWithWarmup({
-        warmTimeoutInSeconds: 60,
+      const next = await chat.messages.waitWithIdleTimeout({
+        idleTimeoutInSeconds: 60,
         timeout: "1h",
         spanName: "waiting for next message",
       });
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index fd4b63789a1..9de3d13bf7e 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -374,7 +374,7 @@ export function Chat({ chatId }) {
 
   // Preload on mount — run starts before the user types anything
   useEffect(() => {
-    transport.preload(chatId, { warmTimeoutInSeconds: 60 });
+    transport.preload(chatId, { idleTimeoutInSeconds: 60 });
   }, [chatId]);
 
   const { messages, sendMessage } = useChat({ id: chatId, transport });
@@ -411,8 +411,8 @@ With `chat.createSession()` or raw tasks, check `payload.trigger === "preload"`
 ```ts
 if (payload.trigger === "preload") {
   // Initialize early...
-  const result = await chat.messages.waitWithWarmup({
-    warmTimeoutInSeconds: 60,
+  const result = await chat.messages.waitWithIdleTimeout({
+    idleTimeoutInSeconds: 60,
     timeout: "1h",
   });
   if (!result.ok) return;
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
index eb3d1ab23df..a1d207c7993 100644
--- a/docs/ai-chat/overview.mdx
+++ b/docs/ai-chat/overview.mdx
@@ -13,7 +13,7 @@ The `@trigger.dev/sdk` provides a custom [ChatTransport](https://sdk.vercel.ai/d
 2. The first message triggers a Trigger.dev task; subsequent messages resume the **same run** via input streams
 3. The task streams `UIMessageChunk` events back via Trigger.dev's realtime streams
 4. The AI SDK's `useChat` processes the stream natively — text, tool calls, reasoning, etc.
-5. Between turns, the run stays warm briefly then suspends (freeing compute) until the next message
+5. Between turns, the run stays idle briefly then suspends (freeing compute) until the next message
 
 No custom API routes needed. Your chat backend is a Trigger.dev task.
 
@@ -47,7 +47,7 @@ sequenceDiagram
     API-->>useChat: SSE: turn complete + refreshed token
     useChat->>useChat: Close stream, update session
     Task->>Task: onTurnComplete({ messages, stopped: false })
-    Task->>Task: Wait for next message (warm → suspend)
+    Task->>Task: Wait for next message (idle → suspend)
 ```
 
 ### Multi-turn flow
@@ -78,7 +78,7 @@ sequenceDiagram
     useChat-->>User: Render streaming text
     Task->>API: Write __trigger_turn_complete
     Task->>Task: onTurnComplete({ turn: 1 })
-    Task->>Task: Wait for next message (warm → suspend)
+    Task->>Task: Wait for next message (idle → suspend)
 ```
 
 ### Stop signal flow
@@ -130,7 +130,7 @@ After each turn, the run goes through two phases of waiting:
 If no message arrives within the turn timeout, the run ends gracefully. The next message from the frontend will automatically start a fresh run.
 
 <Info>
-  You are not charged for compute during the suspended phase. Only the warm phase uses compute resources.
+  You are not charged for compute during the suspended phase. Only the idle phase uses compute resources.
 </Info>
 
 ### What the backend accumulates
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index 420decee98b..f2a5fb00d07 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -19,9 +19,9 @@ Options for `chat.task()`.
 | `onTurnComplete` | `(event: TurnCompleteEvent) => Promise<void> \| void` | — | Fires after each turn completes |
 | `maxTurns` | `number` | `100` | Max conversational turns per run |
 | `turnTimeout` | `string` | `"1h"` | How long to wait for next message |
-| `warmTimeoutInSeconds` | `number` | `30` | Seconds to stay warm before suspending |
+| `idleTimeoutInSeconds` | `number` | `30` | Seconds to stay idle before suspending |
 | `chatAccessTokenTTL` | `string` | `"1h"` | How long the scoped access token remains valid |
-| `preloadWarmTimeoutInSeconds` | `number` | Same as `warmTimeoutInSeconds` | Warm timeout after `onPreload` fires |
+| `preloadIdleTimeoutInSeconds` | `number` | Same as `idleTimeoutInSeconds` | Idle timeout after `onPreload` fires |
 | `preloadTimeout` | `string` | Same as `turnTimeout` | Suspend timeout for preloaded runs |
 | `uiMessageStreamOptions` | `ChatUIMessageStreamOptions` | — | Default options for `toUIMessageStream()`. Per-turn override via `chat.setUIMessageStreamOptions()` |
 
@@ -113,7 +113,7 @@ Options for `chat.createSession()`.
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `signal` | `AbortSignal` | required | Run-level cancel signal |
-| `warmTimeoutInSeconds` | `number` | `30` | Seconds to stay warm between turns |
+| `idleTimeoutInSeconds` | `number` | `30` | Seconds to stay idle between turns |
 | `timeout` | `string` | `"1h"` | Duration string for suspend timeout |
 | `maxTurns` | `number` | `100` | Max turns before ending |
 
@@ -151,12 +151,12 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 | `chat.pipeAndCapture(source, options?)` | Pipe and capture the response `UIMessage` |
 | `chat.writeTurnComplete(options?)` | Signal the frontend that the current turn is complete |
 | `chat.createStopSignal()` | Create a managed stop signal wired to the stop input stream |
-| `chat.messages` | Input stream for incoming messages — use `.waitWithWarmup()` |
+| `chat.messages` | Input stream for incoming messages — use `.waitWithIdleTimeout()` |
 | `chat.local<T>({ id })` | Create a per-run typed local (see [Per-run data](/ai-chat/features#per-run-data-with-chatlocal)) |
 | `chat.createAccessToken(taskId)` | Create a public access token for a chat task |
 | `chat.setTurnTimeout(duration)` | Override turn timeout at runtime (e.g. `"2h"`) |
 | `chat.setTurnTimeoutInSeconds(seconds)` | Override turn timeout at runtime (in seconds) |
-| `chat.setWarmTimeoutInSeconds(seconds)` | Override warm timeout at runtime |
+| `chat.setIdleTimeoutInSeconds(seconds)` | Override idle timeout at runtime |
 | `chat.setUIMessageStreamOptions(options)` | Override `toUIMessageStream()` options for the current turn |
 | `chat.defer(promise)` | Run background work in parallel with streaming, awaited before `onTurnComplete` |
 | `chat.isStopped()` | Check if the current turn was stopped by the user |
@@ -226,7 +226,7 @@ const transport = useTriggerChatTransport({
 Eagerly trigger a run before the first message.
 
 ```ts
-transport.preload(chatId, { warmTimeoutInSeconds?: number }): Promise<void>
+transport.preload(chatId, { idleTimeoutInSeconds?: number }): Promise<void>
 ```
 
 No-op if a session already exists for this chatId. See [Preload](/ai-chat/features#preload) for full details.

From f2f89982025d7d6aa86ce9ab972059938ff85a21 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 24 Mar 2026 14:28:45 +0000
Subject: [PATCH 03/49] add docs for prompts

---
 docs/ai-chat/backend.mdx    |  83 +++++++
 docs/ai-chat/compaction.mdx | 228 +++++++++++++++++++
 docs/ai-chat/reference.mdx  |  47 ++++
 docs/ai/prompts.mdx         | 424 ++++++++++++++++++++++++++++++++++++
 docs/docs.json              |  21 +-
 5 files changed, 796 insertions(+), 7 deletions(-)
 create mode 100644 docs/ai-chat/compaction.mdx
 create mode 100644 docs/ai/prompts.mdx

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 01b97f41b85..6d80f0fec78 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -218,6 +218,51 @@ export const myChat = chat.task({
   Persist `lastEventId` alongside the session. When the transport reconnects after a page refresh, it uses this to skip past already-seen events — preventing duplicate messages.
 </Tip>
 
+### Using prompts
+
+Use [AI Prompts](/ai/prompts) to manage your system prompt as versioned, overridable config. Store the resolved prompt in a lifecycle hook with `chat.prompt.set()`, then spread `chat.toStreamTextOptions()` into `streamText` — it includes the system prompt, model, config, and telemetry automatically.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, createProviderRegistry } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+const registry = createProviderRegistry({ openai });
+
+const systemPrompt = prompts.define({
+  id: "my-chat-system",
+  model: "openai:gpt-4o",
+  config: { temperature: 0.7 },
+  variables: z.object({ name: z.string() }),
+  content: `You are a helpful assistant for {{name}}.`,
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onChatStart: async ({ clientData }) => {
+    const user = await db.user.findUnique({ where: { id: clientData.userId } });
+    const resolved = await systemPrompt.resolve({ name: user.name });
+    chat.prompt.set(resolved);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }), // system, model, config, telemetry
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+`chat.toStreamTextOptions()` returns an object with `system`, `model` (resolved via the registry), `temperature`, and `experimental_telemetry` — all from the stored prompt. Properties you set after the spread (like a client-selected model) take precedence.
+
+<Tip>
+  See [Prompts](/ai/prompts) for the full guide — defining templates, variable schemas, dashboard overrides, and the management SDK.
+</Tip>
+
 ### Stop generation
 
 #### How stop works
@@ -476,6 +521,44 @@ export function Chat({ chatId, initialMessages, initialSessions }) {
 ```
 </CodeGroup>
 
+### prepareMessages
+
+Transform model messages before they're used anywhere — in `run()`, in compaction rebuilds, and in compaction results. Define once, applied everywhere.
+
+Use this for Anthropic cache breaks, injecting system context, stripping PII, etc.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  prepareMessages: ({ messages, reason }) => {
+    // Add Anthropic cache breaks to the last message
+    if (messages.length === 0) return messages;
+    const last = messages[messages.length - 1];
+    return [
+      ...messages.slice(0, -1),
+      {
+        ...last,
+        providerOptions: {
+          ...last.providerOptions,
+          anthropic: { cacheControl: { type: "ephemeral" } },
+        },
+      },
+    ];
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+The `reason` field tells you why messages are being prepared:
+
+| Reason | Description |
+|--------|-------------|
+| `"run"` | Messages being passed to `run()` for `streamText` |
+| `"compaction-rebuild"` | Rebuilding from a previous compaction summary |
+| `"compaction-result"` | Fresh compaction just produced these messages |
+
 ### Runtime configuration
 
 #### chat.setTurnTimeout()
diff --git a/docs/ai-chat/compaction.mdx b/docs/ai-chat/compaction.mdx
new file mode 100644
index 00000000000..fa78ffc483d
--- /dev/null
+++ b/docs/ai-chat/compaction.mdx
@@ -0,0 +1,228 @@
+---
+title: "Compaction"
+sidebarTitle: "Compaction"
+description: "Automatic context compaction to keep long conversations within token limits."
+---
+
+## Overview
+
+Long conversations accumulate tokens across turns. Eventually the context window fills up, causing errors or degraded responses. Compaction solves this by automatically summarizing the conversation when token usage exceeds a threshold, then using that summary as the context for future turns.
+
+The `compaction` option on `chat.task()` handles this in both paths:
+
+- **Between tool-call steps** (inner loop) — via the AI SDK's `prepareStep`, compaction runs between tool calls within a single turn
+- **Between turns** (outer loop) — for single-step responses with no tool calls, where `prepareStep` never fires
+
+## Basic usage
+
+Provide `shouldCompact` to decide when to compact and `summarize` to generate the summary:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) => {
+      const result = await generateText({
+        model: openai("gpt-4o-mini"),
+        messages: [...messages, { role: "user", content: "Summarize this conversation concisely." }],
+      });
+      return result.text;
+    },
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+<Note>
+  The `prepareStep` for inner-loop compaction is automatically injected when you spread `chat.toStreamTextOptions()` into your `streamText` call. If you provide your own `prepareStep` after the spread, it overrides the auto-injected one.
+</Note>
+
+## How it works
+
+After each turn completes:
+
+1. `shouldCompact` is called with the current token usage
+2. If it returns `true`, `summarize` generates a summary from the model messages
+3. The **model messages** (sent to the LLM) are replaced with the summary
+4. The **UI messages** (persisted and displayed) are preserved by default
+5. The `onCompacted` hook fires if configured
+
+On the next turn, the LLM receives the compact summary instead of the full history — dramatically reducing token usage while preserving context.
+
+## Customizing what gets persisted
+
+By default, compaction only affects model messages — UI messages stay intact so users see the full conversation after a page refresh. You can customize this with `compactUIMessages`:
+
+### Summary + recent messages
+
+Replace older messages with a summary but keep the last few exchanges visible:
+
+```ts
+import { generateId } from "ai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) => {
+      return generateText({
+        model: openai("gpt-4o-mini"),
+        messages: [...messages, { role: "user", content: "Summarize." }],
+      }).then((r) => r.text);
+    },
+    compactUIMessages: ({ uiMessages, summary }) => [
+      {
+        id: generateId(),
+        role: "assistant",
+        parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }],
+      },
+      ...uiMessages.slice(-4), // Keep the last 4 messages
+    ],
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+### Flatten to summary only
+
+Replace all messages with just the summary (like the LLM sees):
+
+```ts
+compactUIMessages: ({ summary }) => [
+  {
+    id: generateId(),
+    role: "assistant",
+    parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }],
+  },
+],
+```
+
+## Customizing model messages
+
+By default, model messages are replaced with a single summary message. Use `compactModelMessages` to customize what the LLM sees after compaction:
+
+### Summary + recent context
+
+Keep the last few model messages so the LLM has recent detail alongside the summary:
+
+```ts
+compactModelMessages: ({ modelMessages, summary }) => [
+  { role: "user", content: summary },
+  ...modelMessages.slice(-2), // Keep last exchange for detail
+],
+```
+
+### Keep tool results
+
+Preserve tool-call results so the LLM remembers what tools returned:
+
+```ts
+compactModelMessages: ({ modelMessages, summary }) => [
+  { role: "user", content: summary },
+  ...modelMessages.filter((m) => m.role === "tool"),
+],
+```
+
+## shouldCompact event
+
+The `shouldCompact` callback receives context about the current state:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `ModelMessage[]` | Current model messages |
+| `totalTokens` | `number \| undefined` | Total tokens from the triggering step/turn |
+| `inputTokens` | `number \| undefined` | Input tokens |
+| `outputTokens` | `number \| undefined` | Output tokens |
+| `usage` | `LanguageModelUsage` | Full usage object |
+| `totalUsage` | `LanguageModelUsage` | Cumulative usage across all turns |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn (0-indexed) |
+| `clientData` | `unknown` | Custom data from the frontend |
+| `source` | `"inner" \| "outer"` | Whether this is between steps or between turns |
+| `steps` | `CompactionStep[]` | Steps array (inner loop only) |
+| `stepNumber` | `number` | Step index (inner loop only) |
+
+## summarize event
+
+The `summarize` callback receives similar context:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `ModelMessage[]` | Messages to summarize |
+| `usage` | `LanguageModelUsage` | Usage from the triggering step/turn |
+| `totalUsage` | `LanguageModelUsage` | Cumulative usage |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn |
+| `clientData` | `unknown` | Custom data from the frontend |
+| `source` | `"inner" \| "outer"` | Where compaction is running |
+| `stepNumber` | `number` | Step index (inner loop only) |
+
+## onCompacted hook
+
+Track compaction events for logging, billing, or analytics:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  compaction: { ... },
+  onCompacted: async ({ summary, totalTokens, messageCount, chatId, turn }) => {
+    logger.info("Compacted", { chatId, turn, totalTokens, messageCount });
+    await db.compactionLog.create({
+      data: { chatId, summary, totalTokens, messageCount },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+## Low-level compaction
+
+For `chat.createSession()` or raw task mode, use `chat.compact()` and `chat.compactionStep()` directly inside a custom `prepareStep`:
+
+```ts
+const result = streamText({
+  model: openai("gpt-4o"),
+  messages,
+  prepareStep: async ({ messages: stepMessages, steps }) => {
+    const result = await chat.compact(stepMessages, steps, {
+      threshold: 80_000,
+      summarize: async (msgs) =>
+        generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text),
+    });
+    return result.type === "skipped" ? undefined : result;
+  },
+});
+```
+
+Or use the higher-level `chat.compactionStep()` factory:
+
+```ts
+const result = streamText({
+  model: openai("gpt-4o"),
+  messages,
+  prepareStep: chat.compactionStep({
+    threshold: 80_000,
+    summarize: async (msgs) =>
+      generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text),
+  }),
+});
+```
+
+<Note>
+  The low-level APIs only handle inner-loop compaction (between tool-call steps). For full coverage including single-step turns, use the `compaction` option on `chat.task()`.
+</Note>
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index f2a5fb00d07..ad738d305a4 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -17,6 +17,9 @@ Options for `chat.task()`.
 | `onChatStart` | `(event: ChatStartEvent) => Promise<void> \| void` | — | Fires on turn 0 before `run()` |
 | `onTurnStart` | `(event: TurnStartEvent) => Promise<void> \| void` | — | Fires every turn before `run()` |
 | `onTurnComplete` | `(event: TurnCompleteEvent) => Promise<void> \| void` | — | Fires after each turn completes |
+| `onCompacted` | `(event: CompactedEvent) => Promise<void> \| void` | — | Fires when compaction occurs. See [Compaction](/ai-chat/compaction) |
+| `compaction` | `ChatTaskCompactionOptions` | — | Automatic context compaction. See [Compaction](/ai-chat/compaction) |
+| `prepareMessages` | `(event: PrepareMessagesEvent) => ModelMessage[]` | — | Transform model messages before use (cache breaks, context injection, etc.) |
 | `maxTurns` | `number` | `100` | Max conversational turns per run |
 | `turnTimeout` | `string` | `"1h"` | How long to wait for next message |
 | `idleTimeoutInSeconds` | `number` | `30` | Seconds to stay idle before suspending |
@@ -105,6 +108,50 @@ Passed to the `onTurnComplete` callback.
 | `lastEventId` | `string \| undefined` | Stream position for resumption |
 | `stopped` | `boolean` | Whether the user stopped generation during this turn |
 | `continuation` | `boolean` | Whether this run is continuing an existing chat |
+| `usage` | `LanguageModelUsage \| undefined` | Token usage for this turn |
+| `totalUsage` | `LanguageModelUsage` | Cumulative token usage across all turns |
+
+## ChatTaskCompactionOptions
+
+Options for the `compaction` field on `chat.task()`. See [Compaction](/ai-chat/compaction) for usage guide.
+
+| Option | Type | Required | Description |
+|--------|------|----------|-------------|
+| `shouldCompact` | `(event: ShouldCompactEvent) => boolean \| Promise<boolean>` | Yes | Decide whether to compact. Return `true` to trigger |
+| `summarize` | `(event: SummarizeEvent) => Promise<string>` | Yes | Generate a summary from the current messages |
+| `compactUIMessages` | `(event: CompactMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>` | No | Transform UI messages after compaction. Default: preserve all |
+| `compactModelMessages` | `(event: CompactMessagesEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No | Transform model messages after compaction. Default: replace all with summary |
+
+## CompactMessagesEvent
+
+Passed to `compactUIMessages` and `compactModelMessages` callbacks.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `summary` | `string` | The generated summary text |
+| `uiMessages` | `UIMessage[]` | Current UI messages (full conversation) |
+| `modelMessages` | `ModelMessage[]` | Current model messages (full conversation) |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn (0-indexed) |
+| `clientData` | `unknown` | Custom data from the frontend |
+| `source` | `"inner" \| "outer"` | Whether compaction is between steps or between turns |
+
+## CompactedEvent
+
+Passed to the `onCompacted` callback.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `summary` | `string` | The generated summary text |
+| `messages` | `ModelMessage[]` | Messages that were compacted (pre-compaction) |
+| `messageCount` | `number` | Number of messages before compaction |
+| `usage` | `LanguageModelUsage` | Token usage from the triggering step/turn |
+| `totalTokens` | `number \| undefined` | Total token count that triggered compaction |
+| `inputTokens` | `number \| undefined` | Input token count |
+| `outputTokens` | `number \| undefined` | Output token count |
+| `stepNumber` | `number` | Step number (-1 for outer loop) |
+| `chatId` | `string \| undefined` | Chat session ID |
+| `turn` | `number \| undefined` | Current turn |
 
 ## ChatSessionOptions
 
diff --git a/docs/ai/prompts.mdx b/docs/ai/prompts.mdx
new file mode 100644
index 00000000000..4ac324ffff9
--- /dev/null
+++ b/docs/ai/prompts.mdx
@@ -0,0 +1,424 @@
+---
+title: "Prompts"
+sidebarTitle: "Prompts"
+description: "Define prompt templates as code, version them on deploy, and override from the dashboard without redeploying."
+---
+
+## Overview
+
+AI Prompts let you define prompt templates in your codebase alongside your tasks. When you deploy, Trigger.dev automatically versions your prompts. You can then:
+
+- View all prompt versions in the dashboard
+- Create **overrides** to change the prompt text or model without redeploying
+- Track every generation that used each prompt version
+- See token usage, cost, and latency metrics per prompt
+- Manage prompts programmatically via SDK methods
+
+## Defining a prompt
+
+Use `prompts.define()` to create a prompt with typed variables:
+
+```ts
+import { prompts } from "@trigger.dev/sdk";
+import { z } from "zod";
+
+export const supportPrompt = prompts.define({
+  id: "customer-support",
+  description: "System prompt for customer support interactions",
+  model: "gpt-4o",
+  config: { temperature: 0.7 },
+  variables: z.object({
+    customerName: z.string(),
+    plan: z.string(),
+    issue: z.string(),
+  }),
+  content: `You are a support agent for Acme SaaS.
+
+## Customer context
+
+- **Name:** {{customerName}}
+- **Plan:** {{plan}}
+- **Issue:** {{issue}}
+
+Respond to the customer's issue. Be concise and helpful.`,
+});
+```
+
+### Options
+
+| Option | Type | Required | Description |
+|--------|------|----------|-------------|
+| `id` | `string` | Yes | Unique identifier (becomes the prompt slug) |
+| `description` | `string` | No | Shown in the dashboard |
+| `model` | `string` | No | Default model (e.g. `"gpt-4o"`, `"claude-sonnet-4-6"`) |
+| `config` | `object` | No | Default config (temperature, maxTokens, etc.) |
+| `variables` | Zod/ArkType schema | No | Schema for template variables (enables validation and dashboard UI) |
+| `content` | `string` | Yes | The prompt template with `{{variable}}` placeholders |
+
+### Template syntax
+
+Templates use Mustache-style placeholders:
+
+- `{{variableName}}` — replaced with the variable value
+- `{{#conditionalVar}}...{{/conditionalVar}}` — content only included if the variable is truthy
+
+```ts
+export const prompt = prompts.define({
+  id: "summarizer",
+  model: "gpt-4o-mini",
+  variables: z.object({
+    text: z.string(),
+    maxSentences: z.string().optional(),
+  }),
+  content: `Summarize the following text{{#maxSentences}} in {{maxSentences}} sentences or fewer{{/maxSentences}}:
+
+{{text}}`,
+});
+```
+
+## Resolving a prompt
+
+### Via prompt handle
+
+Call `.resolve()` on the handle returned by `define()`:
+
+```ts
+const resolved = await supportPrompt.resolve({
+  customerName: "Alice",
+  plan: "Pro",
+  issue: "Cannot access billing dashboard",
+});
+
+console.log(resolved.text);    // The compiled prompt with variables filled in
+console.log(resolved.version); // e.g. 3
+console.log(resolved.model);   // "gpt-4o"
+console.log(resolved.labels);  // ["current"] or ["override"]
+```
+
+### Via standalone prompts.resolve()
+
+Resolve any prompt by slug without needing a handle. Pass the prompt handle as a type parameter for full type safety:
+
+```ts
+import { prompts } from "@trigger.dev/sdk";
+import type { supportPrompt } from "./prompts";
+
+// Fully typesafe — ID and variables are checked at compile time
+const resolved = await prompts.resolve<typeof supportPrompt>("customer-support", {
+  customerName: "Alice",
+  plan: "Pro",
+  issue: "Cannot access billing dashboard",
+});
+```
+
+Without the generic, the function still works but accepts any string slug and `Record<string, unknown>` variables.
+
+### Resolve options
+
+You can resolve a specific version or label:
+
+```ts
+// Resolve a specific version
+const v2 = await supportPrompt.resolve(variables, { version: 2 });
+
+// Resolve by label
+const current = await supportPrompt.resolve(variables, { label: "current" });
+```
+
+By default, `resolve()` returns the **override** version if one is active, otherwise the **current** (latest deployed) version.
+
+<Note>
+  Both `promptHandle.resolve()` and `prompts.resolve()` call the Trigger.dev API when a client is configured. During local dev with `trigger dev`, this means you'll always get the server version (including overrides).
+</Note>
+
+## Using with the AI SDK
+
+The resolved prompt integrates with the [Vercel AI SDK](https://ai-sdk.dev) via `toAISDKTelemetry()`. This links AI generation spans to the prompt in the dashboard.
+
+### generateText
+
+```ts
+import { task } from "@trigger.dev/sdk";
+import { generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const supportTask = task({
+  id: "handle-support",
+  run: async (payload) => {
+    const resolved = await supportPrompt.resolve({
+      customerName: payload.name,
+      plan: payload.plan,
+      issue: payload.issue,
+    });
+
+    const result = await generateText({
+      model: openai(resolved.model ?? "gpt-4o"),
+      system: resolved.text,
+      prompt: payload.issue,
+      ...resolved.toAISDKTelemetry(),
+    });
+
+    return { response: result.text };
+  },
+});
+```
+
+### streamText
+
+```ts
+import { streamText } from "ai";
+
+export const streamTask = task({
+  id: "stream-support",
+  run: async (payload) => {
+    const resolved = await supportPrompt.resolve({
+      customerName: payload.name,
+      plan: payload.plan,
+      issue: payload.issue,
+    });
+
+    const result = streamText({
+      model: openai(resolved.model ?? "gpt-4o"),
+      system: resolved.text,
+      prompt: payload.issue,
+      ...resolved.toAISDKTelemetry(),
+    });
+
+    let fullText = "";
+    for await (const chunk of result.textStream) {
+      fullText += chunk;
+    }
+
+    return { response: fullText };
+  },
+});
+```
+
+### Custom telemetry metadata
+
+Pass additional metadata to `toAISDKTelemetry()` that will appear on the generation span:
+
+```ts
+const result = await generateText({
+  model: openai("gpt-4o"),
+  prompt: resolved.text,
+  ...resolved.toAISDKTelemetry({
+    "task.type": "summarization",
+    "customer.tier": "enterprise",
+  }),
+});
+```
+
+## Using with chat.task()
+
+Prompts integrate with `chat.task()` via `chat.prompt` — a run-scoped store for the resolved prompt. Store a prompt once in a lifecycle hook, then access it anywhere during the run.
+
+### chat.prompt.set() and chat.prompt()
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, createProviderRegistry } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+const registry = createProviderRegistry({ openai, anthropic });
+
+const systemPrompt = prompts.define({
+  id: "my-chat-system",
+  model: "openai:gpt-4o",
+  config: { temperature: 0.7 },
+  variables: z.object({ name: z.string() }),
+  content: `You are a helpful assistant for {{name}}.`,
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  onChatStart: async ({ clientData }) => {
+    const resolved = await systemPrompt.resolve({ name: clientData.name });
+    chat.prompt.set(resolved);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### chat.toStreamTextOptions()
+
+Returns an options object ready to spread into `streamText()`. When a prompt is stored via `chat.prompt.set()`, it includes:
+
+- `system` — the compiled prompt text
+- `model` — resolved via the `registry` when provided
+- `temperature`, `maxTokens`, etc. — from the prompt's `config`
+- `experimental_telemetry` — links generations to the prompt in the dashboard
+
+```ts
+// With registry — model is resolved automatically
+const options = chat.toStreamTextOptions({ registry });
+// { system: "...", model: LanguageModel, temperature: 0.7, experimental_telemetry: { ... } }
+
+// Without registry — model is not included
+const options = chat.toStreamTextOptions();
+// { system: "...", temperature: 0.7, experimental_telemetry: { ... } }
+```
+
+<Tip>
+  When the user provides a `registry` and the prompt has a `model` string (e.g. `"openai:gpt-4o"`), the model is resolved via `registry.languageModel()` and included in the returned options. This means `streamText` uses the prompt's model by default — no manual model selection needed.
+</Tip>
+
+### Reading the prompt
+
+Access the stored prompt from anywhere in the run:
+
+```ts
+run: async ({ messages, signal }) => {
+  const prompt = chat.prompt(); // Throws if not set
+  console.log(prompt.text);     // The compiled prompt
+  console.log(prompt.model);    // "openai:gpt-4o"
+  console.log(prompt.version);  // 3
+
+  return streamText({
+    ...chat.toStreamTextOptions({ registry }),
+    messages,
+    abortSignal: signal,
+  });
+},
+```
+
+You can also set a plain string if you don't need the full prompt system:
+
+```ts
+chat.prompt.set("You are a helpful assistant.");
+```
+
+## Prompt management SDK
+
+The `prompts` namespace includes methods for managing prompts programmatically. These work both inside tasks and outside (e.g. scripts, API handlers) as long as an API client is configured.
+
+### List prompts
+
+```ts
+const allPrompts = await prompts.list();
+```
+
+### List versions
+
+```ts
+const versions = await prompts.versions("customer-support");
+```
+
+### Create an override
+
+Create a new override that takes priority over the deployed version:
+
+```ts
+const result = await prompts.createOverride("customer-support", {
+  textContent: "New prompt template: Hello {{customerName}}!",
+  model: "gpt-4o-mini",
+  commitMessage: "Shorter prompt",
+});
+```
+
+### Update an override
+
+```ts
+await prompts.updateOverride("customer-support", {
+  textContent: "Updated template: Hi {{customerName}}!",
+  model: "gpt-4o",
+});
+```
+
+### Remove an override
+
+Remove the active override, reverting to the deployed version:
+
+```ts
+await prompts.removeOverride("customer-support");
+```
+
+### Promote a version
+
+```ts
+await prompts.promote("customer-support", 2);
+```
+
+### All management methods
+
+| Method | Description |
+|--------|-------------|
+| `prompts.list()` | List all prompts in the current environment |
+| `prompts.versions(slug)` | List all versions for a prompt |
+| `prompts.resolve(slug, variables?, options?)` | Resolve a prompt by slug |
+| `prompts.promote(slug, version)` | Promote a version to current |
+| `prompts.createOverride(slug, body)` | Create an override |
+| `prompts.updateOverride(slug, body)` | Update the active override |
+| `prompts.removeOverride(slug)` | Remove the active override |
+| `prompts.reactivateOverride(slug, version)` | Reactivate a removed override |
+
+## Overrides
+
+Overrides let you change a prompt's template or model from the dashboard or SDK without redeploying your code. When an override is active, `resolve()` returns the override version instead of the deployed version.
+
+### How overrides work
+
+- Overrides take priority over the deployed ("current") version
+- Only one override can be active at a time
+- Creating a new override replaces the previous one
+- Removing an override reverts to the deployed version
+- Overrides are environment-scoped (dev, staging, production are independent)
+
+### Creating an override (dashboard)
+
+1. Go to the prompt detail page
+2. Click **Create Override**
+3. Edit the template text and/or model
+4. Add an optional commit message
+5. Click **Create override**
+
+### Version resolution order
+
+When `resolve()` is called, versions are resolved in this order:
+
+1. **Specific version** — if `{ version: N }` is passed
+2. **Override** — if an override is active in this environment
+3. **Label** — if `{ label: "..." }` is passed (defaults to `"current"`)
+4. **Current** — the latest deployed version with the "current" label
+
+## Dashboard
+
+### Prompts list
+
+The prompts list page shows all prompts in the current environment with the current or override version, default model, and a usage sparkline.
+
+### Prompt detail
+
+Click a prompt to see:
+
+- **Template panel** — the prompt template for the selected version
+- **Details tab** — slug, description, model, config, source file, and variable schema
+- **Versions tab** — all versions with labels, source, and commit messages
+- **Generations tab** — every AI generation that used this prompt, with live polling
+- **Metrics tab** — token usage, cost, and latency charts
+
+### AI span inspectors
+
+When you use `toAISDKTelemetry()`, AI generation spans in the run trace get a custom inspector showing:
+
+- **Overview** — model, provider, token usage, cost, input/output preview
+- **Messages** — the full message thread
+- **Tools** — tool definitions and tool call details
+- **Prompt** — the linked prompt's metadata, input variables, and template content
+
+## Type utilities
+
+```ts
+import type { PromptHandle, PromptIdentifier, PromptVariables } from "@trigger.dev/sdk";
+
+type Id = PromptIdentifier<typeof supportPrompt>;   // "customer-support"
+type Vars = PromptVariables<typeof supportPrompt>;   // { customerName: string; plan: string; issue: string }
+```
diff --git a/docs/docs.json b/docs/docs.json
index 47ad59ca96b..a5f524fb1af 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -81,14 +81,21 @@
             ]
           },
           {
-            "group": "AI Chat",
+            "group": "AI",
             "pages": [
-              "ai-chat/overview",
-              "ai-chat/quick-start",
-              "ai-chat/backend",
-              "ai-chat/frontend",
-              "ai-chat/features",
-              "ai-chat/reference"
+              "ai/prompts",
+              {
+                "group": "Chat",
+                "pages": [
+                  "ai-chat/overview",
+                  "ai-chat/quick-start",
+                  "ai-chat/backend",
+                  "ai-chat/frontend",
+                  "ai-chat/features",
+                  "ai-chat/compaction",
+                  "ai-chat/reference"
+                ]
+              }
             ]
           },
           {

From 7d794d8cf8863e6722ecb55c76f863faa3d5c4db Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 24 Mar 2026 14:47:40 +0000
Subject: [PATCH 04/49] better compaction support in createSession and manual
 tasks

---
 docs/ai-chat/compaction.mdx | 108 +++++++++++++++++++++++++++++-------
 1 file changed, 88 insertions(+), 20 deletions(-)

diff --git a/docs/ai-chat/compaction.mdx b/docs/ai-chat/compaction.mdx
index fa78ffc483d..5f2c61245e9 100644
--- a/docs/ai-chat/compaction.mdx
+++ b/docs/ai-chat/compaction.mdx
@@ -190,39 +190,107 @@ export const myChat = chat.task({
 });
 ```
 
-## Low-level compaction
+## Using with chat.createSession()
 
-For `chat.createSession()` or raw task mode, use `chat.compact()` and `chat.compactionStep()` directly inside a custom `prepareStep`:
+Pass the same `compaction` config to `chat.createSession()`. The session handles outer-loop compaction automatically inside `turn.complete()`:
 
 ```ts
-const result = streamText({
-  model: openai("gpt-4o"),
-  messages,
-  prepareStep: async ({ messages: stepMessages, steps }) => {
-    const result = await chat.compact(stepMessages, steps, {
-      threshold: 80_000,
-      summarize: async (msgs) =>
-        generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text),
-    });
-    return result.type === "skipped" ? undefined : result;
+const session = chat.createSession(payload, {
+  signal,
+  idleTimeoutInSeconds: 60,
+  timeout: "1h",
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) =>
+      generateText({ model: openai("gpt-4o-mini"), messages }).then((r) => r.text),
+    compactUIMessages: ({ uiMessages, summary }) => [
+      { id: generateId(), role: "assistant",
+        parts: [{ type: "text", text: `[Summary]\n\n${summary}` }] },
+      ...uiMessages.slice(-4),
+    ],
   },
 });
+
+for await (const turn of session) {
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+  });
+
+  await turn.complete(result);
+  // Outer-loop compaction runs automatically after complete()
+
+  await db.chat.update({
+    where: { id: turn.chatId },
+    data: { messages: turn.uiMessages },
+  });
+}
 ```
 
-Or use the higher-level `chat.compactionStep()` factory:
+## Using with raw tasks (MessageAccumulator)
+
+Pass `compaction` to the `MessageAccumulator` constructor. Use `prepareStep()` for inner-loop compaction and `compactIfNeeded()` for the outer loop:
 
 ```ts
-const result = streamText({
-  model: openai("gpt-4o"),
-  messages,
-  prepareStep: chat.compactionStep({
+const conversation = new chat.MessageAccumulator({
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) =>
+      generateText({ model: openai("gpt-4o-mini"), messages }).then((r) => r.text),
+    compactUIMessages: ({ summary }) => [
+      { id: generateId(), role: "assistant",
+        parts: [{ type: "text", text: `[Summary]\n\n${summary}` }] },
+    ],
+  },
+});
+
+for (let turn = 0; turn < 100; turn++) {
+  const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages,
+    prepareStep: conversation.prepareStep(), // Inner-loop compaction
+  });
+
+  const response = await chat.pipeAndCapture(result);
+  if (response) await conversation.addResponse(response);
+
+  // Outer-loop compaction
+  const usage = await result.totalUsage;
+  await conversation.compactIfNeeded(usage, { chatId: payload.chatId, turn });
+
+  await db.chat.update({ data: { messages: conversation.uiMessages } });
+  await chat.writeTurnComplete();
+}
+```
+
+## Fully manual compaction
+
+For maximum control, use `chat.compact()` directly inside a custom `prepareStep`:
+
+```ts
+prepareStep: async ({ messages: stepMessages, steps }) => {
+  const result = await chat.compact(stepMessages, steps, {
     threshold: 80_000,
     summarize: async (msgs) =>
       generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text),
-  }),
-});
+  });
+  return result.type === "skipped" ? undefined : result;
+},
+```
+
+Or use the `chat.compactionStep()` factory:
+
+```ts
+prepareStep: chat.compactionStep({
+  threshold: 80_000,
+  summarize: async (msgs) =>
+    generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text),
+}),
 ```
 
 <Note>
-  The low-level APIs only handle inner-loop compaction (between tool-call steps). For full coverage including single-step turns, use the `compaction` option on `chat.task()`.
+  The fully manual APIs only handle inner-loop compaction (between tool-call steps). For outer-loop coverage, use the `compaction` option on `chat.task()`, `chat.createSession()`, or `MessageAccumulator`.
 </Note>

From 1dd44cdb9c959f73a7fe6474c628adc18e76d6c9 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Wed, 25 Mar 2026 14:25:39 +0000
Subject: [PATCH 05/49] docs: add prompts, compaction, and pending messages
 docs

---
 docs/ai-chat/backend.mdx          |  27 +++
 docs/ai-chat/pending-messages.mdx | 327 ++++++++++++++++++++++++++++++
 docs/ai-chat/reference.mdx        |  52 +++++
 docs/docs.json                    |   1 +
 4 files changed, 407 insertions(+)
 create mode 100644 docs/ai-chat/pending-messages.mdx

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 6d80f0fec78..8a52e48e45d 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -521,6 +521,33 @@ export function Chat({ chatId, initialMessages, initialSessions }) {
 ```
 </CodeGroup>
 
+### Pending messages (steering)
+
+Users can send messages while the agent is executing tool calls. With `pendingMessages`, these messages are injected between tool-call steps, steering the agent mid-execution:
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  pendingMessages: {
+    shouldInject: ({ steps }) => steps.length > 0,
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      tools: { /* ... */ },
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+On the frontend, the `usePendingMessages` hook handles sending, tracking, and rendering injection points.
+
+<Tip>
+  See [Pending Messages](/ai-chat/pending-messages) for the full guide — backend configuration, frontend hook, queuing vs steering, and how injection works with all three chat variants.
+</Tip>
+
 ### prepareMessages
 
 Transform model messages before they're used anywhere — in `run()`, in compaction rebuilds, and in compaction results. Define once, applied everywhere.
diff --git a/docs/ai-chat/pending-messages.mdx b/docs/ai-chat/pending-messages.mdx
new file mode 100644
index 00000000000..3f0e9ecefda
--- /dev/null
+++ b/docs/ai-chat/pending-messages.mdx
@@ -0,0 +1,327 @@
+---
+title: "Pending Messages"
+sidebarTitle: "Pending Messages"
+description: "Inject user messages mid-execution to steer agents between tool-call steps."
+---
+
+## Overview
+
+When an AI agent is executing tool calls, users may want to send a message that **steers the agent mid-execution** — adding context, correcting course, or refining the request without waiting for the response to finish.
+
+The `pendingMessages` option enables this by injecting user messages between tool-call steps via the AI SDK's `prepareStep`. Messages that arrive during streaming are queued and injected at the next step boundary. If there are no more step boundaries (single-step response or final text generation), the message becomes the next turn automatically.
+
+## How it works
+
+1. User sends a message while the agent is streaming
+2. The message is sent to the backend via input stream (`transport.sendPendingMessage`)
+3. The backend queues it in the steering queue
+4. At the next `prepareStep` boundary (between tool-call steps), `shouldInject` is called
+5. If it returns `true`, the message is injected into the LLM's context
+6. A `data-pending-message-injected` stream chunk confirms injection to the frontend
+7. If `prepareStep` never fires (no tool calls), the message becomes the next turn
+
+## Backend: chat.task
+
+Add `pendingMessages` to your `chat.task` configuration:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+export const myChat = chat.task({
+  id: "my-chat",
+  pendingMessages: {
+    // Only inject when there are completed steps (tool calls happened)
+    shouldInject: ({ steps }) => steps.length > 0,
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      tools: { /* ... */ },
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+The `prepareStep` for injection is automatically included when you spread `chat.toStreamTextOptions()`. If you provide your own `prepareStep` after the spread, it overrides the auto-injected one.
+
+### Options
+
+| Option | Type | Description |
+|--------|------|-------------|
+| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean` | Decide whether to inject the batch. Called once per step boundary. If absent, no injection happens. |
+| `prepare` | `(event: PendingMessagesBatchEvent) => ModelMessage[]` | Transform the batch before injection. Default: convert each message via `convertToModelMessages`. |
+| `onReceived` | `(event) => void` | Called when a message arrives during streaming (per-message). |
+| `onInjected` | `(event) => void` | Called after a batch is injected. |
+
+### shouldInject
+
+Called once per step boundary with the full batch of pending messages. Return `true` to inject all of them, `false` to skip (they'll be available at the next boundary or become the next turn).
+
+```ts
+pendingMessages: {
+  // Always inject
+  shouldInject: () => true,
+
+  // Only inject after tool calls
+  shouldInject: ({ steps }) => steps.length > 0,
+
+  // Only inject if there's one message
+  shouldInject: ({ messages }) => messages.length === 1,
+},
+```
+
+The event includes:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `UIMessage[]` | All pending messages (batch) |
+| `modelMessages` | `ModelMessage[]` | Current conversation |
+| `steps` | `CompactionStep[]` | Completed steps |
+| `stepNumber` | `number` | Current step (0-indexed) |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn |
+| `clientData` | `unknown` | Frontend metadata |
+
+### prepare
+
+Transform the batch of pending messages before they're injected into the LLM's context. By default, each UIMessage is converted to ModelMessages individually. Use `prepare` to combine multiple messages or add context:
+
+```ts
+pendingMessages: {
+  shouldInject: ({ steps }) => steps.length > 0,
+  prepare: ({ messages }) => [{
+    role: "user",
+    content: messages.length === 1
+      ? messages[0].parts[0]?.text ?? ""
+      : `The user sent ${messages.length} messages:\n${
+          messages.map((m, i) => `${i + 1}. ${m.parts[0]?.text}`).join("\n")
+        }`,
+  }],
+},
+```
+
+### Stream chunk
+
+When messages are injected, the SDK automatically writes a `data-pending-message-injected` stream chunk containing the message IDs and text. The frontend uses this to:
+- Confirm which messages were injected
+- Remove them from the pending overlay
+- Render them inline at the injection point in the assistant response
+
+A "pending message injected" span also appears in the run trace.
+
+## Backend: chat.createSession
+
+Pass `pendingMessages` to the session options:
+
+```ts
+const session = chat.createSession(payload, {
+  signal,
+  idleTimeoutInSeconds: 60,
+  pendingMessages: {
+    shouldInject: () => true,
+  },
+});
+
+for await (const turn of session) {
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages: turn.messages,
+    abortSignal: turn.signal,
+    prepareStep: turn.prepareStep(), // Handles injection + compaction
+  });
+
+  await turn.complete(result);
+}
+```
+
+Use `turn.prepareStep()` to get a prepareStep function that handles both injection and compaction. Users who spread `chat.toStreamTextOptions()` get it automatically.
+
+## Backend: MessageAccumulator (raw task)
+
+Pass `pendingMessages` to the constructor and wire up the message listener manually:
+
+```ts
+const conversation = new chat.MessageAccumulator({
+  pendingMessages: {
+    shouldInject: () => true,
+    prepare: ({ messages }) => [{
+      role: "user",
+      content: `[Steering]: ${messages.map(m => m.parts[0]?.text).join(", ")}`,
+    }],
+  },
+});
+
+for (let turn = 0; turn < 100; turn++) {
+  const messages = await conversation.addIncoming(payload.messages, payload.trigger, turn);
+
+  // Listen for steering messages during streaming
+  const sub = chat.messages.on(async (msg) => {
+    const lastMsg = msg.messages?.[msg.messages.length - 1];
+    if (lastMsg) await conversation.steerAsync(lastMsg);
+  });
+
+  const result = streamText({
+    model: openai("gpt-4o"),
+    messages,
+    prepareStep: conversation.prepareStep(), // Handles injection + compaction
+  });
+
+  const response = await chat.pipeAndCapture(result);
+  sub.off();
+
+  if (response) await conversation.addResponse(response);
+  await chat.writeTurnComplete();
+}
+```
+
+### MessageAccumulator methods
+
+| Method | Description |
+|--------|-------------|
+| `steer(message, modelMessages?)` | Queue a UIMessage for injection (sync) |
+| `steerAsync(message)` | Queue a UIMessage, converting to model messages automatically |
+| `drainSteering()` | Get and clear unconsumed steering messages |
+| `prepareStep()` | Returns a prepareStep function handling injection + compaction |
+
+## Frontend: usePendingMessages hook
+
+The `usePendingMessages` hook manages all the frontend complexity — tracking pending messages, detecting injections, and handling the turn lifecycle.
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, usePendingMessages } from "@trigger.dev/sdk/chat/react";
+
+function Chat({ chatId }) {
+  const transport = useTriggerChatTransport({ task: "my-chat", accessToken });
+
+  const { messages, setMessages, sendMessage, stop, status } = useChat({
+    id: chatId,
+    transport,
+  });
+
+  const pending = usePendingMessages({
+    transport,
+    chatId,
+    status,
+    messages,
+    setMessages,
+    sendMessage,
+    metadata: { model: "gpt-4o" },
+  });
+
+  return (
+    <div>
+      {/* Render messages */}
+      {messages.map((msg) => (
+        <div key={msg.id}>
+          {msg.role === "assistant" ? (
+            msg.parts.map((part, i) =>
+              pending.isInjectionPoint(part) ? (
+                // Render injected messages inline at the injection point
+                <div key={i}>
+                  {pending.getInjectedMessages(part).map((m) => (
+                    <div key={m.id} className="injected-message">{m.text}</div>
+                  ))}
+                </div>
+              ) : (
+                <Part key={i} part={part} />
+              )
+            )
+          ) : (
+            <UserMessage msg={msg} />
+          )}
+        </div>
+      ))}
+
+      {/* Render pending messages */}
+      {pending.pending.map((msg) => (
+        <div key={msg.id}>
+          <span>{msg.text}</span>
+          <span>{msg.mode === "steering" ? "Steering" : "Queued"}</span>
+          {msg.mode === "queued" && status === "streaming" && (
+            <button onClick={() => pending.promoteToSteering(msg.id)}>
+              Steer instead
+            </button>
+          )}
+        </div>
+      ))}
+
+      {/* Send form */}
+      <form onSubmit={(e) => {
+        e.preventDefault();
+        pending.steer(input); // Steers during streaming, sends normally when ready
+        setInput("");
+      }}>
+        <input value={input} onChange={(e) => setInput(e.target.value)} />
+        <button type="submit">Send</button>
+        {status === "streaming" && (
+          <button type="button" onClick={() => { pending.queue(input); setInput(""); }}>
+            Queue
+          </button>
+        )}
+      </form>
+    </div>
+  );
+}
+```
+
+### Hook API
+
+| Property/Method | Type | Description |
+|----------------|------|-------------|
+| `pending` | `PendingMessage[]` | Current pending messages with `id`, `text`, `mode`, and `injected` status |
+| `steer(text)` | `(text: string) => void` | Send a steering message during streaming, or normal message when ready |
+| `queue(text)` | `(text: string) => void` | Queue for next turn during streaming, or send normally when ready |
+| `promoteToSteering(id)` | `(id: string) => void` | Convert a queued message to steering (sends via input stream immediately) |
+| `isInjectionPoint(part)` | `(part: unknown) => boolean` | Check if an assistant message part is an injection confirmation |
+| `getInjectedMessageIds(part)` | `(part: unknown) => string[]` | Get message IDs from an injection point |
+| `getInjectedMessages(part)` | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point |
+
+### PendingMessage
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `string` | Unique message ID |
+| `text` | `string` | Message text |
+| `mode` | `"steering" \| "queued"` | How the message is being handled |
+| `injected` | `boolean` | Whether the backend confirmed injection |
+
+### Message lifecycle
+
+- **Steering messages** are sent via `transport.sendPendingMessage()` immediately. They appear as purple pending bubbles. If injected, they disappear from the overlay and render inline at the injection point. If not injected (no more step boundaries), they auto-send as the next turn when the response finishes.
+
+- **Queued messages** stay client-side until the turn completes, then auto-send as the next turn via `sendMessage()`. They can be promoted to steering mid-stream by clicking "Steer instead".
+
+- **Promoted messages** are queued messages that were converted to steering. They get sent via input stream immediately and follow the steering lifecycle from that point.
+
+## Transport: sendPendingMessage
+
+The `TriggerChatTransport` exposes a `sendPendingMessage` method for sending messages via input stream without disrupting the active stream subscription:
+
+```ts
+const sent = await transport.sendPendingMessage(chatId, {
+  id: crypto.randomUUID(),
+  role: "user",
+  parts: [{ type: "text", text: "and compare to vercel" }],
+}, { model: "gpt-4o" });
+```
+
+Unlike `sendMessage()` from useChat, this does NOT:
+- Add the message to useChat's local state
+- Cancel the active stream subscription
+- Start a new response stream
+
+The `usePendingMessages` hook calls this internally — you typically don't need to use it directly.
+
+## Coexistence with compaction
+
+Pending message injection and compaction both use `prepareStep`. When both are configured, the auto-injected `prepareStep` handles them in order:
+
+1. **Compaction** runs first — checks threshold, generates summary if needed
+2. **Injection** runs second — pending messages are appended to either the compacted or original messages
+
+This means injected messages are always included after compaction, ensuring the LLM sees both the compressed history and the new steering input.
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index ad738d305a4..c3bc8811614 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -19,6 +19,7 @@ Options for `chat.task()`.
 | `onTurnComplete` | `(event: TurnCompleteEvent) => Promise<void> \| void` | — | Fires after each turn completes |
 | `onCompacted` | `(event: CompactedEvent) => Promise<void> \| void` | — | Fires when compaction occurs. See [Compaction](/ai-chat/compaction) |
 | `compaction` | `ChatTaskCompactionOptions` | — | Automatic context compaction. See [Compaction](/ai-chat/compaction) |
+| `pendingMessages` | `PendingMessagesOptions` | — | Mid-execution message injection. See [Pending Messages](/ai-chat/pending-messages) |
 | `prepareMessages` | `(event: PrepareMessagesEvent) => ModelMessage[]` | — | Transform model messages before use (cache breaks, context injection, etc.) |
 | `maxTurns` | `number` | `100` | Max conversational turns per run |
 | `turnTimeout` | `string` | `"1h"` | How long to wait for next message |
@@ -153,6 +154,57 @@ Passed to the `onCompacted` callback.
 | `chatId` | `string \| undefined` | Chat session ID |
 | `turn` | `number \| undefined` | Current turn |
 
+## PendingMessagesOptions
+
+Options for the `pendingMessages` field. See [Pending Messages](/ai-chat/pending-messages) for usage guide.
+
+| Option | Type | Required | Description |
+|--------|------|----------|-------------|
+| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean \| Promise<boolean>` | No | Decide whether to inject the batch between tool-call steps. If absent, no injection. |
+| `prepare` | `(event: PendingMessagesBatchEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No | Transform the batch before injection. Default: convert each via `convertToModelMessages`. |
+| `onReceived` | `(event: PendingMessageReceivedEvent) => void \| Promise<void>` | No | Called when a message arrives during streaming (per-message). |
+| `onInjected` | `(event: PendingMessagesInjectedEvent) => void \| Promise<void>` | No | Called after a batch is injected via prepareStep. |
+
+## PendingMessagesBatchEvent
+
+Passed to `shouldInject` and `prepare` callbacks.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `UIMessage[]` | All pending messages (batch) |
+| `modelMessages` | `ModelMessage[]` | Current conversation |
+| `steps` | `CompactionStep[]` | Completed steps so far |
+| `stepNumber` | `number` | Current step (0-indexed) |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn (0-indexed) |
+| `clientData` | `unknown` | Custom data from the frontend |
+
+## PendingMessagesInjectedEvent
+
+Passed to `onInjected` callback.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `messages` | `UIMessage[]` | All injected UI messages |
+| `injectedModelMessages` | `ModelMessage[]` | The model messages that were injected |
+| `chatId` | `string` | Chat session ID |
+| `turn` | `number` | Current turn |
+| `stepNumber` | `number` | Step where injection occurred |
+
+## UsePendingMessagesReturn
+
+Return value of `usePendingMessages` hook. See [Pending Messages — Frontend](/ai-chat/pending-messages#frontend-usependingmessages-hook).
+
+| Property/Method | Type | Description |
+|-----------------|------|-------------|
+| `pending` | `PendingMessage[]` | Current pending messages with mode and injection status |
+| `steer` | `(text: string) => void` | Send a steering message (or normal message when not streaming) |
+| `queue` | `(text: string) => void` | Queue for next turn (or send normally when not streaming) |
+| `promoteToSteering` | `(id: string) => void` | Convert a queued message to steering |
+| `isInjectionPoint` | `(part: unknown) => boolean` | Check if an assistant message part is an injection confirmation |
+| `getInjectedMessageIds` | `(part: unknown) => string[]` | Get message IDs from an injection point |
+| `getInjectedMessages` | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point |
+
 ## ChatSessionOptions
 
 Options for `chat.createSession()`.
diff --git a/docs/docs.json b/docs/docs.json
index a5f524fb1af..ae0f495281e 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -93,6 +93,7 @@
                   "ai-chat/frontend",
                   "ai-chat/features",
                   "ai-chat/compaction",
+                  "ai-chat/pending-messages",
                   "ai-chat/reference"
                 ]
               }

From 364787e361ce897c348453c67c250b80ebe692e0 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 26 Mar 2026 08:20:57 +0000
Subject: [PATCH 06/49] document the writer stuff

---
 docs/ai-chat/backend.mdx   | 38 +++++++++++++++++++++++++++++++++++-
 docs/ai-chat/features.mdx  |  4 ++++
 docs/ai-chat/reference.mdx | 40 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 8a52e48e45d..81f42c1023d 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -101,6 +101,9 @@ export const myChat = chat.task({
 | `runId` | `string` | The Trigger.dev run ID |
 | `chatAccessToken` | `string` | Scoped access token for this run |
 | `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
+| `writer` | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks |
+
+Every lifecycle callback receives a `writer` — a lazy stream writer that lets you send custom `UIMessageChunk` parts (like `data-*` parts) to the frontend without the ceremony of `chat.stream.writer()`. See [ChatWriter](/ai-chat/reference#chatwriter).
 
 #### onChatStart
 
@@ -145,6 +148,7 @@ Fires at the start of every turn, after message accumulation and `onChatStart` (
 | `continuation` | `boolean` | Whether this run is continuing an existing chat |
 | `preloaded` | `boolean` | Whether this run was preloaded |
 | `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
+| `writer` | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks |
 
 ```ts
 export const myChat = chat.task({
@@ -170,9 +174,41 @@ export const myChat = chat.task({
   By persisting in `onTurnStart`, the user's message is saved to your database before the AI starts streaming. If the user refreshes mid-stream, the message is already there.
 </Tip>
 
+#### onBeforeTurnComplete
+
+Fires after the response is captured but **before** the stream closes. The `writer` can send custom chunks that appear in the current turn — use this for post-processing indicators, compaction progress, or any data the user should see before the turn ends.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onBeforeTurnComplete: async ({ writer, usage, uiMessages }) => {
+    // Write a custom data part while the stream is still open
+    writer.write({
+      type: "data-usage-summary",
+      data: {
+        tokens: usage?.totalTokens,
+        messageCount: uiMessages.length,
+      },
+    });
+
+    // You can also compact messages here and write progress
+    if (usage?.totalTokens && usage.totalTokens > 50_000) {
+      writer.write({ type: "data-compaction", data: { status: "compacting" } });
+      chat.setMessages(compactedMessages);
+      writer.write({ type: "data-compaction", data: { status: "complete" } });
+    }
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+Receives the same fields as [`TurnCompleteEvent`](/ai-chat/reference#turncompleteevent), plus a [`writer`](/ai-chat/reference#chatwriter).
+
 #### onTurnComplete
 
-Fires after each turn completes — after the response is captured, before waiting for the next message. This is the primary hook for persisting the assistant's response.
+Fires after each turn completes — after the response is captured and the stream is closed. This is the primary hook for persisting the assistant's response. Does not include a `writer` since the stream is already closed.
 
 | Field | Type | Description |
 |-------|------|-------------|
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index 9de3d13bf7e..789c17531ec 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -207,6 +207,10 @@ export const myChat = chat.task({
   Use `data-*` chunk types (e.g. `data-status`, `data-progress`) for custom data. The AI SDK processes these into `DataUIPart` objects in `message.parts` on the frontend. Writing the same `type` + `id` again updates the existing part instead of creating a new one — useful for live progress.
 </Tip>
 
+<Tip>
+  Inside lifecycle callbacks (`onPreload`, `onChatStart`, `onTurnStart`, `onBeforeTurnComplete`, `onCompacted`), you can use the `writer` parameter instead of `chat.stream.writer()` — it's simpler and avoids the `execute` + `waitUntilComplete` boilerplate. See [ChatWriter](/ai-chat/reference#chatwriter).
+</Tip>
+
 `chat.stream` exposes the full stream API:
 
 | Method | Description |
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index c3bc8811614..e7b59187233 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -16,8 +16,9 @@ Options for `chat.task()`.
 | `onPreload` | `(event: PreloadEvent) => Promise<void> \| void` | — | Fires on preloaded runs before the first message |
 | `onChatStart` | `(event: ChatStartEvent) => Promise<void> \| void` | — | Fires on turn 0 before `run()` |
 | `onTurnStart` | `(event: TurnStartEvent) => Promise<void> \| void` | — | Fires every turn before `run()` |
-| `onTurnComplete` | `(event: TurnCompleteEvent) => Promise<void> \| void` | — | Fires after each turn completes |
-| `onCompacted` | `(event: CompactedEvent) => Promise<void> \| void` | — | Fires when compaction occurs. See [Compaction](/ai-chat/compaction) |
+| `onBeforeTurnComplete` | `(event: BeforeTurnCompleteEvent) => Promise<void> \| void` | — | Fires after response but before stream closes. Includes `writer`. |
+| `onTurnComplete` | `(event: TurnCompleteEvent) => Promise<void> \| void` | — | Fires after each turn completes (stream closed) |
+| `onCompacted` | `(event: CompactedEvent) => Promise<void> \| void` | — | Fires when compaction occurs. Includes `writer`. See [Compaction](/ai-chat/compaction) |
 | `compaction` | `ChatTaskCompactionOptions` | — | Automatic context compaction. See [Compaction](/ai-chat/compaction) |
 | `pendingMessages` | `PendingMessagesOptions` | — | Mid-execution message injection. See [Pending Messages](/ai-chat/pending-messages) |
 | `prepareMessages` | `(event: PrepareMessagesEvent) => ModelMessage[]` | — | Transform model messages before use (cache breaks, context injection, etc.) |
@@ -57,6 +58,7 @@ Passed to the `onPreload` callback.
 | `runId` | `string` | The Trigger.dev run ID |
 | `chatAccessToken` | `string` | Scoped access token for this run |
 | `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
+| `writer` | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
 
 ## ChatStartEvent
 
@@ -72,6 +74,7 @@ Passed to the `onChatStart` callback.
 | `continuation` | `boolean` | Whether this run is continuing an existing chat |
 | `previousRunId` | `string \| undefined` | Previous run ID (only when `continuation` is true) |
 | `preloaded` | `boolean` | Whether this run was preloaded before the first message |
+| `writer` | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
 
 ## TurnStartEvent
 
@@ -89,6 +92,7 @@ Passed to the `onTurnStart` callback.
 | `continuation` | `boolean` | Whether this run is continuing an existing chat |
 | `previousRunId` | `string \| undefined` | Previous run ID (only when `continuation` is true) |
 | `preloaded` | `boolean` | Whether this run was preloaded |
+| `writer` | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
 
 ## TurnCompleteEvent
 
@@ -112,6 +116,37 @@ Passed to the `onTurnComplete` callback.
 | `usage` | `LanguageModelUsage \| undefined` | Token usage for this turn |
 | `totalUsage` | `LanguageModelUsage` | Cumulative token usage across all turns |
 
+## BeforeTurnCompleteEvent
+
+Passed to the `onBeforeTurnComplete` callback. Same fields as `TurnCompleteEvent` plus a `writer`.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| _(all TurnCompleteEvent fields)_ | | See [TurnCompleteEvent](#turncompleteevent) |
+| `writer` | [`ChatWriter`](#chatwriter) | Stream writer — the stream is still open so chunks appear in the current turn |
+
+## ChatWriter
+
+A stream writer passed to lifecycle callbacks. Write custom `UIMessageChunk` parts (e.g. `data-*` parts) to the chat stream.
+
+The writer is lazy — no stream is opened unless you call `write()` or `merge()`, so there's zero overhead for callbacks that don't use it.
+
+| Method | Type | Description |
+|--------|------|-------------|
+| `write(part)` | `(part: UIMessageChunk) => void` | Write a single chunk to the chat stream |
+| `merge(stream)` | `(stream: ReadableStream<UIMessageChunk>) => void` | Merge another stream's chunks into the chat stream |
+
+```ts
+onTurnStart: async ({ writer }) => {
+  // Write a custom data part — render it on the frontend
+  writer.write({ type: "data-status", data: { loading: true } });
+},
+onBeforeTurnComplete: async ({ writer, usage }) => {
+  // Stream is still open — these chunks arrive before the turn ends
+  writer.write({ type: "data-usage", data: { tokens: usage?.totalTokens } });
+},
+```
+
 ## ChatTaskCompactionOptions
 
 Options for the `compaction` field on `chat.task()`. See [Compaction](/ai-chat/compaction) for usage guide.
@@ -153,6 +188,7 @@ Passed to the `onCompacted` callback.
 | `stepNumber` | `number` | Step number (-1 for outer loop) |
 | `chatId` | `string \| undefined` | Chat session ID |
 | `turn` | `number \| undefined` | Current turn |
+| `writer` | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks during compaction |
 
 ## PendingMessagesOptions
 

From 1ce0911097e4751169ef6988a04e77b28cfa44f7 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 26 Mar 2026 15:32:22 +0000
Subject: [PATCH 07/49] Add background injection docs

---
 docs/ai-chat/backend.mdx              |  28 ++++
 docs/ai-chat/background-injection.mdx | 192 ++++++++++++++++++++++++++
 docs/docs.json                        |   1 +
 3 files changed, 221 insertions(+)
 create mode 100644 docs/ai-chat/background-injection.mdx

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 81f42c1023d..f2bc0560c6a 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -584,6 +584,34 @@ On the frontend, the `usePendingMessages` hook handles sending, tracking, and re
   See [Pending Messages](/ai-chat/pending-messages) for the full guide — backend configuration, frontend hook, queuing vs steering, and how injection works with all three chat variants.
 </Tip>
 
+### Background injection
+
+Inject context from background work into the conversation using `chat.inject()`. Combine with `chat.defer()` to run analysis between turns and inject results before the next response — self-review, RAG augmentation, safety checks, etc.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    chat.defer((async () => {
+      const review = await generateObject({ /* ... */ });
+      if (review.object.needsImprovement) {
+        chat.inject([{
+          role: "system",
+          content: `[Self-review]\n${review.object.suggestions.join("\n")}`,
+        }]);
+      }
+    })());
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ ...chat.toStreamTextOptions({ registry }), messages, abortSignal: signal });
+  },
+});
+```
+
+<Tip>
+  See [Background Injection](/ai-chat/background-injection) for the full guide — timing, self-review example, and how it differs from pending messages.
+</Tip>
+
 ### prepareMessages
 
 Transform model messages before they're used anywhere — in `run()`, in compaction rebuilds, and in compaction results. Define once, applied everywhere.
diff --git a/docs/ai-chat/background-injection.mdx b/docs/ai-chat/background-injection.mdx
new file mode 100644
index 00000000000..b50c86329f6
--- /dev/null
+++ b/docs/ai-chat/background-injection.mdx
@@ -0,0 +1,192 @@
+---
+title: "Background injection"
+sidebarTitle: "Background injection"
+description: "Inject context from background work into the agent's conversation — self-review, RAG augmentation, or any async analysis."
+---
+
+## Overview
+
+`chat.inject()` queues model messages for injection into the conversation. Messages are picked up at the start of the next turn or at the next `prepareStep` boundary (between tool-call steps).
+
+This is the backend counterpart to [pending messages](/ai-chat/pending-messages) — pending messages come from the user via the frontend, while `chat.inject()` comes from your task code.
+
+## Basic usage
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+// Queue a system message for injection
+chat.inject([
+  {
+    role: "system",
+    content: "The user's account was just upgraded to Pro.",
+  },
+]);
+```
+
+Messages are appended to the model messages before the next LLM inference call. The LLM sees them as part of the conversation context.
+
+## Common pattern: defer + inject
+
+The most powerful pattern combines `chat.defer()` (background work) with `chat.inject()` (inject results). Background work runs in parallel with the idle wait between turns, and results are injected before the next response.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    // Kick off background analysis — doesn't block the turn
+    chat.defer(
+      (async () => {
+        const analysis = await analyzeConversation(messages);
+        chat.inject([
+          {
+            role: "system",
+            content: `[Analysis of conversation so far]\n\n${analysis}`,
+          },
+        ]);
+      })()
+    );
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### Timing
+
+1. Turn completes, `onTurnComplete` fires
+2. `chat.defer()` registers the background work
+3. The run immediately starts waiting for the next message (no blocking)
+4. Background work completes, `chat.inject()` queues the messages
+5. User sends next message, turn starts
+6. Injected messages are appended before `run()` executes
+7. The LLM sees the injected context alongside the new user message
+
+If the background work finishes *during* a tool-call loop (not between turns), the messages are picked up at the next `prepareStep` boundary instead.
+
+## Example: self-review
+
+A cheap model reviews the agent's response after each turn and injects coaching for the next one. Uses [Prompts](/ai/prompts) for the review prompt and `generateObject` for structured output.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { prompts } from "@trigger.dev/sdk";
+import { streamText, generateObject, createProviderRegistry } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+const registry = createProviderRegistry({ openai });
+
+const selfReviewPrompt = prompts.define({
+  id: "self-review",
+  model: "openai:gpt-4o-mini",
+  content: `You are a conversation quality reviewer. Analyze the assistant's most recent response.
+
+Focus on:
+- Whether the response answered the user's question
+- Missed opportunities to use tools or provide more detail
+- Tone mismatches
+
+Be concise. Only flag issues worth fixing.`,
+});
+
+export const myChat = chat.task({
+  id: "my-chat",
+  onTurnComplete: async ({ messages }) => {
+    chat.defer(
+      (async () => {
+        const resolved = await selfReviewPrompt.resolve({});
+
+        const review = await generateObject({
+          model: registry.languageModel(resolved.model ?? "openai:gpt-4o-mini"),
+          ...resolved.toAISDKTelemetry(),
+          system: resolved.text,
+          prompt: messages
+            .filter((m) => m.role === "user" || m.role === "assistant")
+            .map((m) => {
+              const text =
+                typeof m.content === "string"
+                  ? m.content
+                  : Array.isArray(m.content)
+                    ? m.content
+                        .filter((p: any) => p.type === "text")
+                        .map((p: any) => p.text)
+                        .join("")
+                    : "";
+              return `${m.role}: ${text}`;
+            })
+            .join("\n\n"),
+          schema: z.object({
+            needsImprovement: z.boolean(),
+            suggestions: z.array(z.string()),
+          }),
+        });
+
+        if (review.object.needsImprovement) {
+          chat.inject([
+            {
+              role: "system",
+              content: `[Self-review]\n\n${review.object.suggestions.map((s) => `- ${s}`).join("\n")}\n\nApply these naturally.`,
+            },
+          ]);
+        }
+      })()
+    );
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      ...chat.toStreamTextOptions({ registry }),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+The self-review runs on `gpt-4o-mini` (fast, cheap) in the background. If the user sends another message before it completes, the coaching is still injected — `chat.inject()` persists across the idle wait.
+
+## Other use cases
+
+- **RAG augmentation**: After each turn, fetch relevant documents and inject them as context for the next response
+- **Safety checks**: Run a moderation model on the response, inject warnings if issues are detected
+- **Fact-checking**: Verify claims in the response using search tools, inject corrections
+- **Context enrichment**: Look up user/account data based on what was discussed, inject it as system context
+
+## How it differs from pending messages
+
+| | `chat.inject()` | [Pending messages](/ai-chat/pending-messages) |
+|---|---|---|
+| **Source** | Backend task code | Frontend user input |
+| **Triggered by** | Your code (e.g. `onTurnComplete` + `chat.defer()`) | User sending a message during streaming |
+| **Injection point** | Start of next turn, or next `prepareStep` boundary | Next `prepareStep` boundary only |
+| **Message role** | Any (`system`, `user`, `assistant`) | Typically `user` |
+| **Frontend visibility** | Not visible unless you write custom `data-*` chunks | Visible via `usePendingMessages` hook |
+
+## API reference
+
+### chat.inject()
+
+```ts
+chat.inject(messages: ModelMessage[]): void
+```
+
+Queue model messages for injection at the next opportunity. Messages persist across the idle wait between turns — they are not reset when a new turn starts.
+
+**Parameters:**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `messages` | `ModelMessage[]` | Model messages to inject (from the `ai` package) |
+
+Messages are drained (consumed) when:
+1. A new turn starts — before `run()` executes
+2. A `prepareStep` boundary is reached — between tool-call steps during streaming
+
+<Note>
+  `chat.inject()` writes to an in-memory queue in the current process. It works from any code running in the same task — lifecycle hooks, deferred work, tool execute functions, etc. It does not work from subtasks or other runs.
+</Note>
diff --git a/docs/docs.json b/docs/docs.json
index ae0f495281e..edc7e5d477d 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -94,6 +94,7 @@
                   "ai-chat/features",
                   "ai-chat/compaction",
                   "ai-chat/pending-messages",
+                  "ai-chat/background-injection",
                   "ai-chat/reference"
                 ]
               }

From 708926ff321a1d9faa8e98d43bac30c0c056db3e Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 27 Mar 2026 14:29:17 +0000
Subject: [PATCH 08/49] docs(ai-chat): add Types page, link toolExecute and
 withUIMessage, fix MDX headings

---
 docs/ai-chat/backend.mdx              |   4 +
 docs/ai-chat/features.mdx             |  32 ++++--
 docs/ai-chat/frontend.mdx             |  17 ++++
 docs/ai-chat/overview.mdx             |   1 +
 docs/ai-chat/quick-start.mdx          |   5 +
 docs/ai-chat/reference.mdx            |  44 +++++++++
 docs/ai-chat/types.mdx                | 137 ++++++++++++++++++++++++++
 docs/docs.json                        |   1 +
 docs/migrating-from-v3.mdx            |   4 +-
 docs/snippets/migrate-v4-using-ai.mdx |  17 ++--
 docs/tasks/schemaTask.mdx             | 112 ++++++++++++---------
 11 files changed, 310 insertions(+), 64 deletions(-)
 create mode 100644 docs/ai-chat/types.mdx

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index f2bc0560c6a..2e48a6b2b57 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -8,6 +8,10 @@ description: "Three approaches to building your chat backend — chat.task(), se
 
 The highest-level approach. Handles message accumulation, stop signals, turn lifecycle, and auto-piping automatically.
 
+<Tip>
+  To fix a **custom** `UIMessage` subtype (typed custom data parts, tool map, etc.), use [`chat.withUIMessage<...>().task({...})`](/ai-chat/types) instead of `chat.task({...})`. Options are the same; defaults for `toUIMessageStream()` can be set on `withUIMessage`.
+</Tip>
+
 ### Simple: return a StreamTextResult
 
 Return the `streamText` result from `run` and it's automatically piped to the frontend:
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index 789c17531ec..ccceebedeea 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -8,7 +8,7 @@ description: "Per-run data, deferred work, custom streaming, subtask integration
 
 Use `chat.local` to create typed, run-scoped data that persists across turns and is accessible from anywhere — the run function, tools, nested helpers. Each run gets its own isolated copy, and locals are automatically cleared between runs.
 
-When a subtask is invoked via `ai.tool()`, initialized locals are automatically serialized into the subtask's metadata and hydrated on first access — no extra code needed. Subtask changes to hydrated locals are local to the subtask and don't propagate back to the parent.
+When a subtask is invoked via `ai.toolExecute()` (or the deprecated `ai.tool()`), initialized locals are automatically serialized into the subtask's metadata and hydrated on first access — no extra code needed. Subtask changes to hydrated locals are local to the subtask and don't propagate back to the parent.
 
 ### Declaring and initializing
 
@@ -76,18 +76,18 @@ const premiumTool = tool({
 
 ### Accessing from subtasks
 
-When you use `ai.tool()` to expose a subtask, chat locals are automatically available read-only:
+When you use `ai.toolExecute()` inside AI SDK `tool()` to expose a subtask, chat locals are automatically available read-only:
 
 ```ts
 import { chat, ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
-import { streamText } from "ai";
+import { streamText, tool } from "ai";
 import { openai } from "@ai-sdk/openai";
 import { z } from "zod";
 
 const userContext = chat.local<{ name: string; plan: "free" | "pro" }>({ id: "userContext" });
 
-export const analyzeData = schemaTask({
+export const analyzeDataTask = schemaTask({
   id: "analyze-data",
   schema: z.object({ query: z.string() }),
   run: async ({ query }) => {
@@ -97,6 +97,12 @@ export const analyzeData = schemaTask({
   },
 });
 
+const analyzeData = tool({
+  description: analyzeDataTask.description ?? "",
+  inputSchema: analyzeDataTask.schema!,
+  execute: ai.toolExecute(analyzeDataTask),
+});
+
 export const myChat = chat.task({
   id: "my-chat",
   onChatStart: async ({ clientData }) => {
@@ -106,7 +112,7 @@ export const myChat = chat.task({
     return streamText({
       model: openai("gpt-4o"),
       messages,
-      tools: { analyzeData: ai.tool(analyzeData) },
+      tools: { analyzeData },
       abortSignal: signal,
     });
   },
@@ -227,7 +233,8 @@ When a tool invokes a subtask via `triggerAndWait`, the subtask can stream direc
 ```ts
 import { chat, ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
-import { streamText, generateId } from "ai";
+import { streamText, tool, generateId } from "ai";
+import { openai } from "@ai-sdk/openai";
 import { z } from "zod";
 
 // A subtask that streams progress back to the parent chat
@@ -271,7 +278,12 @@ export const researchTask = schemaTask({
   },
 });
 
-// The chat task uses it as a tool via ai.tool()
+const research = tool({
+  description: researchTask.description ?? "",
+  inputSchema: researchTask.schema!,
+  execute: ai.toolExecute(researchTask),
+});
+
 export const myChat = chat.task({
   id: "my-chat",
   run: async ({ messages, signal }) => {
@@ -280,7 +292,7 @@ export const myChat = chat.task({
       messages,
       abortSignal: signal,
       tools: {
-        research: ai.tool(researchTask),
+        research,
       },
     });
   },
@@ -311,9 +323,9 @@ The `target` option accepts:
 
 ---
 
-## ai.tool() — subtask integration
+## Task tool subtasks (`ai.toolExecute`)
 
-When a subtask runs via `ai.tool()`, it can access the tool call context and chat context from the parent:
+When a subtask runs through **`execute: ai.toolExecute(task)`** (or the deprecated `ai.tool()`), it can access the tool call context and chat context from the parent:
 
 ```ts
 import { ai, chat } from "@trigger.dev/sdk/ai";
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 0e7854e4d5d..8c0d8cff9da 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -31,6 +31,23 @@ The transport is created once on first render and reused across re-renders. Pass
   The hook keeps `onSessionChange` up to date via a ref internally, so you don't need to memoize the callback or worry about stale closures.
 </Tip>
 
+## Typed messages (`chat.withUIMessage`)
+
+If your chat task is defined with [`chat.withUIMessage<YourUIMessage>()`](/ai-chat/types) (custom `data-*` parts, typed tools, etc.), pass the same message type through `useChat` so `messages` and `message.parts` are narrowed on the client:
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, type InferChatUIMessage } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "./myChat";
+
+type Msg = InferChatUIMessage<typeof myChat>;
+
+const transport = useTriggerChatTransport<typeof myChat>({ task: "my-chat", accessToken: getChatToken });
+const { messages } = useChat<Msg>({ transport });
+```
+
+See the [Types](/ai-chat/types) guide for defining `YourUIMessage`, default stream options, and backend examples.
+
 ### Dynamic access tokens
 
 For token refresh, pass a function instead of a string. It's called on each `sendMessage`:
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
index a1d207c7993..3fe6d0f3ec2 100644
--- a/docs/ai-chat/overview.mdx
+++ b/docs/ai-chat/overview.mdx
@@ -157,5 +157,6 @@ There are three ways to build the backend, from most opinionated to most flexibl
 - [Quick Start](/ai-chat/quick-start) — Get a working chat in 3 steps
 - [Backend](/ai-chat/backend) — Backend approaches in detail
 - [Frontend](/ai-chat/frontend) — Transport setup, sessions, client data
+- [Types](/ai-chat/types) — TypeScript patterns, including custom `UIMessage` with `chat.withUIMessage`
 - [Features](/ai-chat/features) — Per-run data, deferred work, streaming, subtasks
 - [API Reference](/ai-chat/reference) — Complete reference tables
diff --git a/docs/ai-chat/quick-start.mdx b/docs/ai-chat/quick-start.mdx
index b8245d92372..cfffcc828b9 100644
--- a/docs/ai-chat/quick-start.mdx
+++ b/docs/ai-chat/quick-start.mdx
@@ -28,6 +28,10 @@ description: "Get a working AI chat in 3 steps — define a task, generate a tok
       },
     });
     ```
+
+    <Tip>
+      For a **custom** [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype (typed `data-*` parts, tool map, etc.), define the task with [`chat.withUIMessage<...>().task({...})`](/ai-chat/types) instead of `chat.task`.
+    </Tip>
   </Step>
 
   <Step title="Generate an access token">
@@ -105,4 +109,5 @@ description: "Get a working AI chat in 3 steps — define a task, generate a tok
 
 - [Backend](/ai-chat/backend) — Lifecycle hooks, persistence, session iterator, raw task primitives
 - [Frontend](/ai-chat/frontend) — Session management, client data, reconnection
+- [Types](/ai-chat/types) — `chat.withUIMessage`, `InferChatUIMessage`, and related typing
 - [Features](/ai-chat/features) — Per-run data, deferred work, streaming, subtasks
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index e7b59187233..ff95019be05 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -298,6 +298,50 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 | `chat.cleanupAbortedParts(message)` | Remove incomplete parts from a stopped response message |
 | `chat.stream` | Typed chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()` |
 | `chat.MessageAccumulator` | Class that accumulates conversation messages across turns |
+| `chat.withUIMessage(config?).task(options)` | Same as `chat.task`, but fixes a custom `UIMessage` subtype and optional default stream options. See [Types](/ai-chat/types) |
+
+## `chat.withUIMessage`
+
+Returns `{ task }`, where `task` is like [`chat.task`](#chat-namespace) but parameterized on a UI message type `TUIM`.
+
+```ts
+chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): {
+  task: (options: ChatTaskOptions<..., ..., TUIM>) => Task<...>;
+};
+```
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `config.streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Optional defaults for `toUIMessageStream()`. Shallow-merged with `uiMessageStreamOptions` on the inner `.task({ ... })` (task wins on key conflicts). |
+
+Use this when you need [`InferChatUIMessage`](#inferchatuimessage) / typed `data-*` parts / `InferUITools` to line up across backend hooks and `useChat`. Full guide: [Types](/ai-chat/types).
+
+## `ChatWithUIMessageConfig`
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Default `toUIMessageStream()` options for tasks created via `.task()` |
+
+## `InferChatUIMessage`
+
+Type helper: extracts the `UIMessage` subtype from a chat task’s wire payload.
+
+```ts
+import type { InferChatUIMessage } from "@trigger.dev/sdk/ai";
+// or from "@trigger.dev/sdk/chat/react"
+
+type Msg = InferChatUIMessage<typeof myChat>;
+```
+
+Use with `useChat<Msg>({ transport })` when using [`chat.withUIMessage`](/ai-chat/types). For tasks defined with plain `chat.task()` (no custom generic), this resolves to the base `UIMessage`.
+
+## AI helpers (`ai` from `@trigger.dev/sdk/ai`)
+
+| Export | Status | Description |
+|--------|--------|-------------|
+| `ai.toolExecute(task)` | **Preferred** | Returns the `execute` function for AI SDK `tool()`. Runs the task via `triggerAndSubscribe` and attaches tool/chat metadata (same behavior the deprecated wrapper used internally). |
+| `ai.tool(task, options?)` | **Deprecated** | Wraps `tool()` / `dynamicTool()` and the same execute path. Migrate to `tool({ ..., execute: ai.toolExecute(task) })`. See [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools). |
+| `ai.toolCallId`, `ai.chatContext`, `ai.chatContextOrThrow`, `ai.currentToolOptions` | Supported | Work for any task-backed tool execute path, including `ai.toolExecute`. |
 
 ## ChatUIMessageStreamOptions
 
diff --git a/docs/ai-chat/types.mdx b/docs/ai-chat/types.mdx
new file mode 100644
index 00000000000..8ddfff063f0
--- /dev/null
+++ b/docs/ai-chat/types.mdx
@@ -0,0 +1,137 @@
+---
+title: "Types"
+sidebarTitle: "Types"
+description: "TypeScript types for AI Chat tasks, UI messages, and the frontend transport."
+---
+
+TypeScript patterns for [AI Chat](/ai-chat/overview). This page will expand over time; it currently documents how to pin a custom AI SDK [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype with `chat.withUIMessage` and align types on the client.
+
+## Custom `UIMessage` with `chat.withUIMessage`
+
+`chat.task()` types the wire payload with the base AI SDK `UIMessage`. That is enough for many apps.
+
+When you add **custom `data-*` parts** (via `chat.stream` / `writer`) or a **typed tool map** (e.g. `InferUITools<typeof tools>`), you want a **narrower** `UIMessage` generic so that:
+
+- `onTurnStart`, `onTurnComplete`, and similar hooks expose correctly typed `uiMessages`
+- Stream options like `sendReasoning` align with your message shape
+- The frontend can treat `useChat` messages as the same subtype end-to-end
+
+`chat.withUIMessage<YourUIMessage>(config?)` returns `{ task }`, where `task(...)` accepts the **same options as** [`chat.task()`](/ai-chat/backend#chat-task) but fixes `YourUIMessage` as the UI message type for that chat task.
+
+### Defining a `UIMessage` subtype
+
+Build the type from AI SDK helpers and your tools object:
+
+```ts
+import type { InferUITools, UIDataTypes, UIMessage } from "ai";
+import { tool } from "ai";
+import { z } from "zod";
+
+const myTools = {
+  lookup: tool({
+    description: "Look up a record",
+    inputSchema: z.object({ id: z.string() }),
+    execute: async ({ id }) => ({ id, label: "example" }),
+  }),
+};
+
+type MyChatTools = InferUITools<typeof myTools>;
+
+type MyChatDataTypes = UIDataTypes & {
+  "turn-status": { status: "preparing" | "streaming" | "done" };
+};
+
+export type MyChatUIMessage = UIMessage<unknown, MyChatDataTypes, MyChatTools>;
+```
+
+Task-backed tools should use AI SDK [`tool()`](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling) with `execute: ai.toolExecute(schemaTask)` where needed — see [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools).
+
+### Backend: `chat.withUIMessage(...).task(...)`
+
+Call `withUIMessage` **once**, then chain `.task({ ... })` instead of `chat.task({ ... })`:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+import type { MyChatUIMessage } from "./my-chat-types";
+
+const myTools = {
+  lookup: tool({
+    description: "Look up a record",
+    inputSchema: z.object({ id: z.string() }),
+    execute: async ({ id }) => ({ id, label: "example" }),
+  }),
+};
+
+export const myChat = chat.withUIMessage<MyChatUIMessage>({
+  streamOptions: {
+    sendReasoning: true,
+    onError: (error) =>
+      error instanceof Error ? error.message : "Something went wrong.",
+  },
+}).task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+  onTurnStart: async ({ uiMessages, writer }) => {
+    // uiMessages is MyChatUIMessage[] — custom data parts are typed
+    writer.write({
+      type: "data-turn-status",
+      data: { status: "preparing" },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      tools: myTools,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+### Default stream options
+
+The optional `streamOptions` object becomes the **default** [`uiMessageStreamOptions`](/ai-chat/reference#chat-task-options) for `toUIMessageStream()`.
+
+If you also set `uiMessageStreamOptions` on the inner `.task({ ... })`, the two objects are **shallow-merged** — keys on the **task** win on conflicts. Per-turn overrides via [`chat.setUIMessageStreamOptions()`](/ai-chat/backend#stream-options) still apply on top.
+
+### Frontend: `InferChatUIMessage`
+
+Import the helper type and pass it to `useChat` so `messages` and render logic match the backend:
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport, type InferChatUIMessage } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "./myChat";
+
+type Msg = InferChatUIMessage<typeof myChat>;
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: getChatToken,
+  });
+
+  const { messages } = useChat<Msg>({ transport });
+
+  return messages.map((m) => (
+    <div key={m.id}>{/* m.parts narrowed for your UIMessage subtype */}</div>
+  ));
+}
+```
+
+You can also import `InferChatUIMessage` from `@trigger.dev/sdk/ai` in non-React modules.
+
+### When plain `chat.task()` is enough
+
+If you do not rely on custom `UIMessage` generics (only default text, reasoning, and built-in tool UI types), **`chat.task()` alone is fine** — no need for `withUIMessage`.
+
+## See also
+
+- [Backend — `chat.task()`](/ai-chat/backend#chat-task)
+- [Frontend — transport & `useChat`](/ai-chat/frontend)
+- [API reference — `chat.withUIMessage`](/ai-chat/reference#chat-withuimessage)
+- [Task-backed AI tools — `ai.toolExecute`](/tasks/schemaTask#task-backed-ai-tools)
diff --git a/docs/docs.json b/docs/docs.json
index edc7e5d477d..d3a58d42350 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -91,6 +91,7 @@
                   "ai-chat/quick-start",
                   "ai-chat/backend",
                   "ai-chat/frontend",
+                  "ai-chat/types",
                   "ai-chat/features",
                   "ai-chat/compaction",
                   "ai-chat/pending-messages",
diff --git a/docs/migrating-from-v3.mdx b/docs/migrating-from-v3.mdx
index 5530d66b62d..c820b25a1de 100644
--- a/docs/migrating-from-v3.mdx
+++ b/docs/migrating-from-v3.mdx
@@ -34,7 +34,7 @@ We're retiring Trigger.dev v3. **New v3 deploys will stop working from 1 April 2
 | [Hidden tasks](/hidden-tasks)                                        | Create tasks that are not exported from your trigger files but can still be executed.                                                                                                                      |
 | [Middleware & locals](#middleware-and-locals)                        | The middleware system runs at the top level, executing before and after all lifecycle hooks. The locals API allows sharing data between middleware and hooks.                                              |
 | [useWaitToken](/realtime/react-hooks/use-wait-token)                 | Use the useWaitToken hook to complete a wait token from a React component.                                                                                                                                 |
-| [ai.tool](/tasks/schemaTask#ai-tool)                                 | Create an AI tool from an existing `schemaTask` to use with the Vercel [AI SDK](https://vercel.com/docs/ai-sdk).                                                                                           |
+| [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools)       | Use `schemaTask` with AI SDK `tool()` and `ai.toolExecute()` (legacy `ai.tool` is deprecated).                                                                                                              |
 
 ## Node.js support
 
@@ -165,7 +165,7 @@ export const myAiTask = schemaTask({
 });
 ```
 
-We've replaced the `toolTask` function with the `ai.tool` function, which creates an AI tool from an existing `schemaTask`. See the [ai.tool](/tasks/schemaTask#ai-tool) page for more details.
+We've replaced the `toolTask` function with `schemaTask` plus AI SDK `tool()` and `ai.toolExecute()` (the older `ai.tool()` wrapper is deprecated). See [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools).
 
 ## Breaking changes
 
diff --git a/docs/snippets/migrate-v4-using-ai.mdx b/docs/snippets/migrate-v4-using-ai.mdx
index fa749ed7231..aa5393c158d 100644
--- a/docs/snippets/migrate-v4-using-ai.mdx
+++ b/docs/snippets/migrate-v4-using-ai.mdx
@@ -56,7 +56,7 @@ const myTask = task({
   },
 });
 
-We’ve deprecated the `toolTask` function and replaced it with the `ai.tool` function, which creates an AI tool from an existing `schemaTask`. This is the old version:
+We’ve deprecated the `toolTask` function. Use `schemaTask` plus AI SDK `tool()` with `execute: ai.toolExecute(task)` (the `ai.tool()` wrapper is deprecated). This is the old version:
 
 import { toolTask, schemaTask } from "@trigger.dev/sdk";
 import { z } from "zod";
@@ -85,9 +85,11 @@ export const myAiTask = schemaTask({
 
 This is the new version:
 
-import { schemaTask, ai } from "@trigger.dev/sdk";
+import { schemaTask } from "@trigger.dev/sdk";
+import { ai } from "@trigger.dev/sdk/ai";
 import { z } from "zod";
-import { generateText } from "ai";
+import { generateText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
 
 // Convert toolTask to schemaTask with a schema
 const myToolTask = schemaTask({
@@ -99,8 +101,11 @@ const myToolTask = schemaTask({
   run: async (payload, { ctx }) => {},
 });
 
-// Create an AI tool from the schemaTask
-const myTool = ai.tool(myToolTask);
+const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 
 export const myAiTask = schemaTask({
   id: "my-ai-task",
@@ -112,7 +117,7 @@ export const myAiTask = schemaTask({
       prompt: payload.text,
       model: openai("gpt-4o"),
       tools: {
-        myTool, // Use the ai.tool created from schemaTask
+        myTool,
       },
     });
   },
diff --git a/docs/tasks/schemaTask.mdx b/docs/tasks/schemaTask.mdx
index 3692d1d7035..82ba4aa5679 100644
--- a/docs/tasks/schemaTask.mdx
+++ b/docs/tasks/schemaTask.mdx
@@ -76,51 +76,63 @@ await myTask.trigger({ age: 30, dob: "2020-01-01" }); // this is valid
 await myTask.trigger({ name: "Alice", age: 30, dob: "2020-01-01" }); // this is also valid
 ```
 
-## `ai.tool`
+## Task-backed AI tools
 
-The `ai.tool` function allows you to create an AI tool from an existing `schemaTask` to use with the Vercel [AI SDK](https://vercel.com/docs/ai-sdk):
+Use a `schemaTask` as the implementation of a Vercel [AI SDK](https://vercel.com/docs/ai-sdk) tool: the model calls the tool, and Trigger runs your task as a **subtask** with tool-call metadata, optional [chat context](/ai-chat/features#task-tool-subtasks), and the same payload validation as a normal trigger.
+
+### Recommended: `ai.toolExecute` with `tool()`
+
+Prefer building the tool with the AI SDK’s [`tool()`](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling) and passing **`execute: ai.toolExecute(yourTask)`**. You keep full control of `description`, `inputSchema`, and AI-SDK-only options (for example `experimental_toToolResultContent`), and your types follow the `ai` version installed in **your** app.
 
 ```ts
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
+import { tool, generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
 import { z } from "zod";
-import { generateText } from "ai";
 
 const myToolTask = schemaTask({
   id: "my-tool-task",
   schema: z.object({
     foo: z.string(),
   }),
-  run: async (payload: any, { ctx }) => {},
+  run: async ({ foo }) => {
+    return { bar: foo.toUpperCase() };
+  },
 });
 
-const myTool = ai.tool(myToolTask);
+const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 
 export const myAiTask = schemaTask({
   id: "my-ai-task",
   schema: z.object({
     text: z.string(),
   }),
-  run: async (payload, { ctx }) => {
-    const { text } = await generateText({
-      prompt: payload.text,
+  run: async ({ text }) => {
+    const { text: reply } = await generateText({
+      prompt: text,
       model: openai("gpt-4o"),
       tools: {
         myTool,
       },
     });
+    return reply;
   },
 });
 ```
 
-You can also pass the `experimental_toToolResultContent` option to the `ai.tool` function to customize the content of the tool result:
+`experimental_toToolResultContent` and other tool-level options belong on **`tool({ ... })`**, not on `ai.toolExecute`:
 
 ```ts
 import { openai } from "@ai-sdk/openai";
 import { Sandbox } from "@e2b/code-interpreter";
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
-import { generateObject } from "ai";
+import { generateObject, tool } from "ai";
 import { z } from "zod";
 
 const chartTask = schemaTask({
@@ -135,56 +147,37 @@ const chartTask = schemaTask({
       schema: z.object({
         code: z.string().describe("The Python code to execute"),
       }),
-      system: `
-        You are a helpful assistant that can generate Python code to be executed in a sandbox, using matplotlib.pyplot.
-
-        For example: 
-        
-        import matplotlib.pyplot as plt
-        plt.plot([1, 2, 3, 4])
-        plt.ylabel('some numbers')
-        plt.show()
-        
-        Make sure the code ends with plt.show()
-      `,
+      system: `You are a helpful assistant that generates matplotlib code. End with plt.show().`,
       prompt: input,
     });
 
     const sandbox = await Sandbox.create();
-
     const execution = await sandbox.runCode(code.object.code);
-
     const firstResult = execution.results[0];
 
     if (firstResult.png) {
-      return {
-        chart: firstResult.png,
-      };
-    } else {
-      throw new Error("No chart generated");
+      return { chart: firstResult.png };
     }
+    throw new Error("No chart generated");
   },
 });
 
-// This is useful if you want to return an image from the tool
-export const chartTool = ai.tool(chartTask, {
-  experimental_toToolResultContent: (result) => {
-    return [
-      {
-        type: "image",
-        data: result.chart,
-        mimeType: "image/png",
-      },
-    ];
-  },
+export const chartTool = tool({
+  description: chartTask.description ?? "",
+  inputSchema: chartTask.schema!,
+  execute: ai.toolExecute(chartTask),
+  experimental_toToolResultContent: (result) => [
+    { type: "image", data: result.chart, mimeType: "image/png" },
+  ],
 });
 ```
 
-You can access the current tool execution options inside the task run function using the `ai.currentToolOptions()` function:
+Inside the task run, you can read tool execution context with **`ai.currentToolOptions()`** (and helpers like `ai.toolCallId()`, `ai.chatContext()` when running inside a [`chat.task`](/ai-chat/overview)):
 
 ```ts
 import { ai } from "@trigger.dev/sdk/ai";
 import { schemaTask } from "@trigger.dev/sdk";
+import { tool } from "ai";
 import { z } from "zod";
 
 const myToolTask = schemaTask({
@@ -192,22 +185,49 @@ const myToolTask = schemaTask({
   schema: z.object({
     foo: z.string(),
   }),
-  run: async (payload, { ctx }) => {
+  run: async ({ foo }) => {
     const toolOptions = ai.currentToolOptions();
     console.log(toolOptions);
+    return { foo };
   },
 });
 
-export const myAiTask = ai.tool(myToolTask);
+export const myTool = tool({
+  description: myToolTask.description ?? "",
+  inputSchema: myToolTask.schema!,
+  execute: ai.toolExecute(myToolTask),
+});
 ```
 
-See the [AI SDK tool execution options docs](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling#tool-execution-options) for more details on the tool execution options.
+See the [AI SDK tool execution options](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling#tool-execution-options) for fields passed through the runtime.
 
 <Note>
-  `ai.tool` is compatible with `schemaTask`'s defined with Zod and ArkType schemas, or any schemas
-  that implement a `.toJsonSchema()` function.
+  `ai.toolExecute` works with `schemaTask` definitions that use Zod, ArkType, or any schema that provides a JSON schema via `.toJsonSchema()` (same coverage as the legacy `ai.tool` wrapper).
 </Note>
 
+### Deprecated: `ai.tool`
+
+The **`ai.tool(task, options?)`** helper is **deprecated**. It constructs an AI SDK `Tool` for you (using `tool()` for Zod-like schemas and `dynamicTool()` otherwise) and may be removed in a future major version. New code should use **`tool({ ..., execute: ai.toolExecute(task) })`** as shown above.
+
+### Legacy `ai.tool` example (deprecated)
+
+```ts
+import { ai } from "@trigger.dev/sdk/ai";
+import { schemaTask } from "@trigger.dev/sdk";
+import { z } from "zod";
+import { generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+const myToolTask = schemaTask({
+  id: "my-tool-task",
+  schema: z.object({ foo: z.string() }),
+  run: async ({ foo }) => ({ foo }),
+});
+
+// Deprecated — prefer tool({ execute: ai.toolExecute(myToolTask), ... })
+const myTool = ai.tool(myToolTask);
+```
+
 ## Supported schema types
 
 ### Zod

From 6fefbb7191bea20f06d19537acee5d87c7bd935c Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 27 Mar 2026 16:07:53 +0000
Subject: [PATCH 09/49] Add run-scoped PAT renewal for chat transport

---
 docs/ai-chat/backend.mdx     | 254 ++++++++++--------
 docs/ai-chat/features.mdx    |   2 +
 docs/ai-chat/frontend.mdx    |  53 ++--
 docs/ai-chat/quick-start.mdx |  11 +-
 docs/ai-chat/reference.mdx   | 484 +++++++++++++++++++----------------
 5 files changed, 444 insertions(+), 360 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 2e48a6b2b57..50a338cd2c1 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -99,13 +99,13 @@ export const myChat = chat.task({
 });
 ```
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `chatId` | `string` | Chat session ID |
-| `runId` | `string` | The Trigger.dev run ID |
-| `chatAccessToken` | `string` | Scoped access token for this run |
-| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
-| `writer` | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks |
+| Field             | Type                                          | Description                      |
+| ----------------- | --------------------------------------------- | -------------------------------- |
+| `chatId`          | `string`                                      | Chat session ID                  |
+| `runId`           | `string`                                      | The Trigger.dev run ID           |
+| `chatAccessToken` | `string`                                      | Scoped access token for this run |
+| `clientData`      | Typed by `clientDataSchema`                   | Custom data from the frontend    |
+| `writer`          | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks  |
 
 Every lifecycle callback receives a `writer` — a lazy stream writer that lets you send custom `UIMessageChunk` parts (like `data-*` parts) to the frontend without the ceremony of `chat.stream.writer()`. See [ChatWriter](/ai-chat/reference#chatwriter).
 
@@ -134,25 +134,27 @@ export const myChat = chat.task({
 ```
 
 <Tip>
-  `clientData` contains custom data from the frontend — either the `clientData` option on the transport constructor (sent with every message) or the `metadata` option on `sendMessage()` (per-message). See [Client data and metadata](/ai-chat/frontend#client-data-and-metadata).
+  `clientData` contains custom data from the frontend — either the `clientData` option on the
+  transport constructor (sent with every message) or the `metadata` option on `sendMessage()`
+  (per-message). See [Client data and metadata](/ai-chat/frontend#client-data-and-metadata).
 </Tip>
 
 #### onTurnStart
 
 Fires at the start of every turn, after message accumulation and `onChatStart` (turn 0), but **before** `run()` executes. Use it to persist messages before streaming begins — so a mid-stream page refresh still shows the user's message.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `chatId` | `string` | Chat session ID |
-| `messages` | `ModelMessage[]` | Full accumulated conversation (model format) |
-| `uiMessages` | `UIMessage[]` | Full accumulated conversation (UI format) |
-| `turn` | `number` | Turn number (0-indexed) |
-| `runId` | `string` | The Trigger.dev run ID |
-| `chatAccessToken` | `string` | Scoped access token for this run |
-| `continuation` | `boolean` | Whether this run is continuing an existing chat |
-| `preloaded` | `boolean` | Whether this run was preloaded |
-| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
-| `writer` | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks |
+| Field             | Type                                          | Description                                     |
+| ----------------- | --------------------------------------------- | ----------------------------------------------- |
+| `chatId`          | `string`                                      | Chat session ID                                 |
+| `messages`        | `ModelMessage[]`                              | Full accumulated conversation (model format)    |
+| `uiMessages`      | `UIMessage[]`                                 | Full accumulated conversation (UI format)       |
+| `turn`            | `number`                                      | Turn number (0-indexed)                         |
+| `runId`           | `string`                                      | The Trigger.dev run ID                          |
+| `chatAccessToken` | `string`                                      | Scoped access token for this run                |
+| `continuation`    | `boolean`                                     | Whether this run is continuing an existing chat |
+| `preloaded`       | `boolean`                                     | Whether this run was preloaded                  |
+| `clientData`      | Typed by `clientDataSchema`                   | Custom data from the frontend                   |
+| `writer`          | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks                 |
 
 ```ts
 export const myChat = chat.task({
@@ -175,7 +177,8 @@ export const myChat = chat.task({
 ```
 
 <Tip>
-  By persisting in `onTurnStart`, the user's message is saved to your database before the AI starts streaming. If the user refreshes mid-stream, the message is already there.
+  By persisting in `onTurnStart`, the user's message is saved to your database before the AI starts
+  streaming. If the user refreshes mid-stream, the message is already there.
 </Tip>
 
 #### onBeforeTurnComplete
@@ -214,20 +217,20 @@ Receives the same fields as [`TurnCompleteEvent`](/ai-chat/reference#turncomplet
 
 Fires after each turn completes — after the response is captured and the stream is closed. This is the primary hook for persisting the assistant's response. Does not include a `writer` since the stream is already closed.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `chatId` | `string` | Chat session ID |
-| `messages` | `ModelMessage[]` | Full accumulated conversation (model format) |
-| `uiMessages` | `UIMessage[]` | Full accumulated conversation (UI format) |
-| `newMessages` | `ModelMessage[]` | Only this turn's messages (model format) |
-| `newUIMessages` | `UIMessage[]` | Only this turn's messages (UI format) |
-| `responseMessage` | `UIMessage \| undefined` | The assistant's response for this turn |
-| `turn` | `number` | Turn number (0-indexed) |
-| `runId` | `string` | The Trigger.dev run ID |
-| `chatAccessToken` | `string` | Scoped access token for this run |
-| `lastEventId` | `string \| undefined` | Stream position for resumption. Persist this with the session. |
-| `stopped` | `boolean` | Whether the user stopped generation during this turn |
-| `continuation` | `boolean` | Whether this run is continuing an existing chat |
+| Field                | Type                     | Description                                                                                  |
+| -------------------- | ------------------------ | -------------------------------------------------------------------------------------------- |
+| `chatId`             | `string`                 | Chat session ID                                                                              |
+| `messages`           | `ModelMessage[]`         | Full accumulated conversation (model format)                                                 |
+| `uiMessages`         | `UIMessage[]`            | Full accumulated conversation (UI format)                                                    |
+| `newMessages`        | `ModelMessage[]`         | Only this turn's messages (model format)                                                     |
+| `newUIMessages`      | `UIMessage[]`            | Only this turn's messages (UI format)                                                        |
+| `responseMessage`    | `UIMessage \| undefined` | The assistant's response for this turn                                                       |
+| `turn`               | `number`                 | Turn number (0-indexed)                                                                      |
+| `runId`              | `string`                 | The Trigger.dev run ID                                                                       |
+| `chatAccessToken`    | `string`                 | Scoped access token for this run                                                             |
+| `lastEventId`        | `string \| undefined`    | Stream position for resumption. Persist this with the session.                               |
+| `stopped`            | `boolean`                | Whether the user stopped generation during this turn                                         |
+| `continuation`       | `boolean`                | Whether this run is continuing an existing chat                                              |
 | `rawResponseMessage` | `UIMessage \| undefined` | The raw assistant response before abort cleanup (same as `responseMessage` when not stopped) |
 
 ```ts
@@ -251,11 +254,13 @@ export const myChat = chat.task({
 ```
 
 <Tip>
-  Use `uiMessages` to overwrite the full conversation each turn (simplest). Use `newUIMessages` if you prefer to store messages individually — for example, one database row per message.
+  Use `uiMessages` to overwrite the full conversation each turn (simplest). Use `newUIMessages` if
+  you prefer to store messages individually — for example, one database row per message.
 </Tip>
 
 <Tip>
-  Persist `lastEventId` alongside the session. When the transport reconnects after a page refresh, it uses this to skip past already-seen events — preventing duplicate messages.
+  Persist `lastEventId` alongside the session. When the transport reconnects after a page refresh,
+  it uses this to skip past already-seen events — preventing duplicate messages.
 </Tip>
 
 ### Using prompts
@@ -300,7 +305,8 @@ export const myChat = chat.task({
 `chat.toStreamTextOptions()` returns an object with `system`, `model` (resolved via the registry), `temperature`, and `experimental_telemetry` — all from the stored prompt. Properties you set after the spread (like a client-selected model) take precedence.
 
 <Tip>
-  See [Prompts](/ai/prompts) for the full guide — defining templates, variable schemas, dashboard overrides, and the management SDK.
+  See [Prompts](/ai/prompts) for the full guide — defining templates, variable schemas, dashboard
+  overrides, and the management SDK.
 </Tip>
 
 ### Stop generation
@@ -313,11 +319,11 @@ Calling `stop()` from `useChat` sends a stop signal to the running task via inpu
 
 The `run` function receives three abort signals:
 
-| Signal | Fires when | Use for |
-|--------|-----------|---------|
-| `signal` | Stop **or** cancel | Pass to `streamText` — handles both cases. **Use this in most cases.** |
-| `stopSignal` | Stop only (per-turn, reset each turn) | Custom logic that should only run on user stop, not cancellation |
-| `cancelSignal` | Run cancel, expire, or maxDuration exceeded | Cleanup that should only happen on full cancellation |
+| Signal         | Fires when                                  | Use for                                                                |
+| -------------- | ------------------------------------------- | ---------------------------------------------------------------------- |
+| `signal`       | Stop **or** cancel                          | Pass to `streamText` — handles both cases. **Use this in most cases.** |
+| `stopSignal`   | Stop only (per-turn, reset each turn)       | Custom logic that should only run on user stop, not cancellation       |
+| `cancelSignal` | Run cancel, expire, or maxDuration exceeded | Cleanup that should only happen on full cancellation                   |
 
 ```ts
 export const myChat = chat.task({
@@ -333,7 +339,8 @@ export const myChat = chat.task({
 ```
 
 <Tip>
-  Use `signal` (the combined signal) in most cases. The separate `stopSignal` and `cancelSignal` are only needed if you want different behavior for stop vs cancel.
+  Use `signal` (the combined signal) in most cases. The separate `stopSignal` and `cancelSignal` are
+  only needed if you want different behavior for stop vs cancel.
 </Tip>
 
 #### Detecting stop in callbacks
@@ -393,7 +400,9 @@ const cleaned = chat.cleanupAbortedParts(rawResponseMessage);
 This removes tool invocation parts stuck in `partial-call` state and marks any `streaming` text or reasoning parts as `done`.
 
 <Note>
-  Stop signal delivery is best-effort. There is a small race window where the model may finish before the stop signal arrives, in which case the turn completes normally with `stopped: false`. This is expected and does not require special handling.
+  Stop signal delivery is best-effort. There is a small race window where the model may finish
+  before the stop signal arrives, in which case the turn completes normally with `stopped: false`.
+  This is expected and does not require special handling.
 </Note>
 
 ### Persistence
@@ -406,7 +415,9 @@ To build a chat app that survives page refreshes, you need to persist two things
 2. **Sessions** — The transport's connection state (`runId`, `publicAccessToken`, `lastEventId`). Persisted **server-side** via `onTurnStart` and `onTurnComplete`.
 
 <Note>
-  Sessions let the transport reconnect to an existing run after a page refresh. Without them, every page load would start a new run — losing the conversation context that was accumulated in the previous run.
+  Sessions let the transport reconnect to an existing run after a page refresh. Without them, every
+  page load would start a new run — losing the conversation context that was accumulated in the
+  previous run.
 </Note>
 
 #### Full persistence example
@@ -470,8 +481,7 @@ import { chat } from "@trigger.dev/sdk/ai";
 import type { myChat } from "@/trigger/chat";
 import { db } from "@/lib/db";
 
-export const getChatToken = () =>
-  chat.createAccessToken<typeof myChat>("my-chat");
+export const getChatToken = () => chat.createAccessToken<typeof myChat>("my-chat");
 
 export async function getChatMessages(chatId: string) {
   const found = await db.chat.findUnique({ where: { id: chatId } });
@@ -480,11 +490,14 @@ export async function getChatMessages(chatId: string) {
 
 export async function getAllSessions() {
   const sessions = await db.chatSession.findMany();
-  const result: Record<string, {
-    runId: string;
-    publicAccessToken: string;
-    lastEventId?: string;
-  }> = {};
+  const result: Record<
+    string,
+    {
+      runId: string;
+      publicAccessToken: string;
+      lastEventId?: string;
+    }
+  > = {};
   for (const s of sessions) {
     result[s.id] = {
       runId: s.runId,
@@ -552,13 +565,16 @@ export function Chat({ chatId, initialMessages, initialSessions }) {
           Send
         </button>
         {status === "streaming" && (
-          <button type="button" onClick={stop}>Stop</button>
+          <button type="button" onClick={stop}>
+            Stop
+          </button>
         )}
       </form>
     </div>
   );
 }
 ```
+
 </CodeGroup>
 
 ### Pending messages (steering)
@@ -575,7 +591,9 @@ export const myChat = chat.task({
     return streamText({
       ...chat.toStreamTextOptions({ registry }),
       messages,
-      tools: { /* ... */ },
+      tools: {
+        /* ... */
+      },
       abortSignal: signal,
     });
   },
@@ -585,7 +603,8 @@ export const myChat = chat.task({
 On the frontend, the `usePendingMessages` hook handles sending, tracking, and rendering injection points.
 
 <Tip>
-  See [Pending Messages](/ai-chat/pending-messages) for the full guide — backend configuration, frontend hook, queuing vs steering, and how injection works with all three chat variants.
+  See [Pending Messages](/ai-chat/pending-messages) for the full guide — backend configuration,
+  frontend hook, queuing vs steering, and how injection works with all three chat variants.
 </Tip>
 
 ### Background injection
@@ -596,15 +615,21 @@ Inject context from background work into the conversation using `chat.inject()`.
 export const myChat = chat.task({
   id: "my-chat",
   onTurnComplete: async ({ messages }) => {
-    chat.defer((async () => {
-      const review = await generateObject({ /* ... */ });
-      if (review.object.needsImprovement) {
-        chat.inject([{
-          role: "system",
-          content: `[Self-review]\n${review.object.suggestions.join("\n")}`,
-        }]);
-      }
-    })());
+    chat.defer(
+      (async () => {
+        const review = await generateObject({
+          /* ... */
+        });
+        if (review.object.needsImprovement) {
+          chat.inject([
+            {
+              role: "system",
+              content: `[Self-review]\n${review.object.suggestions.join("\n")}`,
+            },
+          ]);
+        }
+      })()
+    );
   },
   run: async ({ messages, signal }) => {
     return streamText({ ...chat.toStreamTextOptions({ registry }), messages, abortSignal: signal });
@@ -613,7 +638,8 @@ export const myChat = chat.task({
 ```
 
 <Tip>
-  See [Background Injection](/ai-chat/background-injection) for the full guide — timing, self-review example, and how it differs from pending messages.
+  See [Background Injection](/ai-chat/background-injection) for the full guide — timing, self-review
+  example, and how it differs from pending messages.
 </Tip>
 
 ### prepareMessages
@@ -648,11 +674,11 @@ export const myChat = chat.task({
 
 The `reason` field tells you why messages are being prepared:
 
-| Reason | Description |
-|--------|-------------|
-| `"run"` | Messages being passed to `run()` for `streamText` |
-| `"compaction-rebuild"` | Rebuilding from a previous compaction summary |
-| `"compaction-result"` | Fresh compaction just produced these messages |
+| Reason                 | Description                                       |
+| ---------------------- | ------------------------------------------------- |
+| `"run"`                | Messages being passed to `run()` for `streamText` |
+| `"compaction-rebuild"` | Rebuilding from a previous compaction summary     |
+| `"compaction-result"`  | Fresh compaction just produced these messages     |
 
 ### Runtime configuration
 
@@ -679,7 +705,8 @@ run: async ({ messages, signal }) => {
 ```
 
 <Info>
-  Longer idle timeout means faster responses but more compute usage. Set to `0` to suspend immediately after each turn (minimum latency cost, slight delay on next message).
+  Longer idle timeout means faster responses but more compute usage. Set to `0` to suspend
+  immediately after each turn (minimum latency cost, slight delay on next message).
 </Info>
 
 #### Stream options
@@ -734,8 +761,8 @@ Control which AI SDK features are forwarded to the frontend:
 export const myChat = chat.task({
   id: "my-chat",
   uiMessageStreamOptions: {
-    sendReasoning: true,  // Forward model reasoning (default: true)
-    sendSources: true,    // Forward source citations (default: false)
+    sendReasoning: true, // Forward model reasoning (default: true)
+    sendSources: true, // Forward source citations (default: false)
   },
   run: async ({ messages, signal }) => {
     return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
@@ -762,7 +789,9 @@ run: async ({ messages, clientData, signal }) => {
 See [ChatUIMessageStreamOptions](/ai-chat/reference#chatuimessagestreamoptions) for the full reference.
 
 <Note>
-  `onFinish` is managed internally for response capture and cannot be overridden here. Use `streamText`'s `onFinish` callback for custom finish handling, or use [raw task mode](#raw-task-with-primitives) for full control over `toUIMessageStream()`.
+  `onFinish` is managed internally for response capture and cannot be overridden here. Use
+  `streamText`'s `onFinish` callback for custom finish handling, or use [raw task
+  mode](#raw-task-with-primitives) for full control over `toUIMessageStream()`.
 </Note>
 
 ### Manual mode with task()
@@ -791,7 +820,9 @@ export const manualChat = task({
 ```
 
 <Warning>
-  Manual mode does not get automatic message accumulation or the `onTurnComplete`/`onChatStart` lifecycle hooks. The `responseMessage` field in `onTurnComplete` will be `undefined` when using `chat.pipe()` directly. Use `chat.task()` for the full multi-turn experience.
+  Manual mode does not get automatic message accumulation or the `onTurnComplete`/`onChatStart`
+  lifecycle hooks. The `responseMessage` field in `onTurnComplete` will be `undefined` when using
+  `chat.pipe()` directly. Use `chat.task()` for the full multi-turn experience.
 </Warning>
 
 ---
@@ -843,34 +874,34 @@ export const myChat = task({
 
 ### ChatSessionOptions
 
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `signal` | `AbortSignal` | required | Run-level cancel signal (from task context) |
-| `idleTimeoutInSeconds` | `number` | `30` | Seconds to stay idle between turns |
-| `timeout` | `string` | `"1h"` | Duration string for suspend timeout |
-| `maxTurns` | `number` | `100` | Max turns before ending |
+| Option                 | Type          | Default  | Description                                 |
+| ---------------------- | ------------- | -------- | ------------------------------------------- |
+| `signal`               | `AbortSignal` | required | Run-level cancel signal (from task context) |
+| `idleTimeoutInSeconds` | `number`      | `30`     | Seconds to stay idle between turns          |
+| `timeout`              | `string`      | `"1h"`   | Duration string for suspend timeout         |
+| `maxTurns`             | `number`      | `100`    | Max turns before ending                     |
 
 ### ChatTurn
 
 Each turn yielded by the iterator provides:
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `number` | `number` | Turn number (0-indexed) |
-| `chatId` | `string` | Chat session ID |
-| `trigger` | `string` | What triggered this turn |
-| `clientData` | `unknown` | Client data from the transport |
-| `messages` | `ModelMessage[]` | Full accumulated model messages — pass to `streamText` |
-| `uiMessages` | `UIMessage[]` | Full accumulated UI messages — use for persistence |
-| `signal` | `AbortSignal` | Combined stop+cancel signal (fresh each turn) |
-| `stopped` | `boolean` | Whether the user stopped generation this turn |
-| `continuation` | `boolean` | Whether this is a continuation run |
-
-| Method | Description |
-|--------|-------------|
-| `turn.complete(source)` | Pipe stream, capture response, accumulate, and signal turn-complete |
-| `turn.done()` | Just signal turn-complete (when you've piped manually) |
-| `turn.addResponse(response)` | Add a response to the accumulator manually |
+| Field          | Type             | Description                                            |
+| -------------- | ---------------- | ------------------------------------------------------ |
+| `number`       | `number`         | Turn number (0-indexed)                                |
+| `chatId`       | `string`         | Chat session ID                                        |
+| `trigger`      | `string`         | What triggered this turn                               |
+| `clientData`   | `unknown`        | Client data from the transport                         |
+| `messages`     | `ModelMessage[]` | Full accumulated model messages — pass to `streamText` |
+| `uiMessages`   | `UIMessage[]`    | Full accumulated UI messages — use for persistence     |
+| `signal`       | `AbortSignal`    | Combined stop+cancel signal (fresh each turn)          |
+| `stopped`      | `boolean`        | Whether the user stopped generation this turn          |
+| `continuation` | `boolean`        | Whether this is a continuation run                     |
+
+| Method                       | Description                                                         |
+| ---------------------------- | ------------------------------------------------------------------- |
+| `turn.complete(source)`      | Pipe stream, capture response, accumulate, and signal turn-complete |
+| `turn.done()`                | Just signal turn-complete (when you've piped manually)              |
+| `turn.addResponse(response)` | Add a response to the accumulator manually                          |
 
 ### turn.complete() vs manual control
 
@@ -912,15 +943,15 @@ Raw task mode also lets you call `.toUIMessageStream()` yourself with any option
 
 ### Primitives
 
-| Primitive | Description |
-|-----------|-------------|
-| `chat.messages` | Input stream for incoming messages — use `.waitWithIdleTimeout()` to wait for the next turn |
-| `chat.createStopSignal()` | Create a managed stop signal wired to the stop input stream |
-| `chat.pipeAndCapture(result)` | Pipe a `StreamTextResult` to the chat stream and capture the response |
-| `chat.writeTurnComplete()` | Signal the frontend that the current turn is complete |
-| `chat.MessageAccumulator` | Accumulates conversation messages across turns |
-| `chat.pipe(stream)` | Pipe a stream to the frontend (no response capture) |
-| `chat.cleanupAbortedParts(msg)` | Clean up incomplete parts from a stopped response |
+| Primitive                       | Description                                                                                 |
+| ------------------------------- | ------------------------------------------------------------------------------------------- |
+| `chat.messages`                 | Input stream for incoming messages — use `.waitWithIdleTimeout()` to wait for the next turn |
+| `chat.createStopSignal()`       | Create a managed stop signal wired to the stop input stream                                 |
+| `chat.pipeAndCapture(result)`   | Pipe a `StreamTextResult` to the chat stream and capture the response                       |
+| `chat.writeTurnComplete()`      | Signal the frontend that the current turn is complete                                       |
+| `chat.MessageAccumulator`       | Accumulates conversation messages across turns                                              |
+| `chat.pipe(stream)`             | Pipe a stream to the frontend (no response capture)                                         |
+| `chat.cleanupAbortedParts(msg)` | Clean up incomplete parts from a stopped response                                           |
 
 ### Example
 
@@ -979,9 +1010,8 @@ export const myChat = task({
       }
 
       if (response) {
-        const cleaned = stop.signal.aborted && !runSignal.aborted
-          ? chat.cleanupAbortedParts(response)
-          : response;
+        const cleaned =
+          stop.signal.aborted && !runSignal.aborted ? chat.cleanupAbortedParts(response) : response;
         await conversation.addResponse(cleaned);
       }
 
@@ -1029,6 +1059,6 @@ const response = await chat.pipeAndCapture(result);
 if (response) await conversation.addResponse(response);
 
 // Access accumulated messages for persistence
-conversation.uiMessages;   // UIMessage[]
+conversation.uiMessages; // UIMessage[]
 conversation.modelMessages; // ModelMessage[]
 ```
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index ccceebedeea..4b262e3929c 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -400,6 +400,8 @@ export function Chat({ chatId }) {
 
 Preload is a no-op if a session already exists for this chatId.
 
+When the transport needs a trigger token for preload, your `accessToken` callback receives `{ chatId, purpose: "preload" }` (same as for a normal trigger, but `purpose` is `"trigger"` when starting a run from `sendMessages`). See [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options).
+
 ### Backend
 
 On the backend, the `onPreload` hook fires immediately. The run then waits for the first message. When the user sends a message, `onChatStart` fires with `preloaded: true` — you can skip initialization that was already done in `onPreload`:
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 8c0d8cff9da..c03eb484565 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -28,7 +28,8 @@ export function Chat() {
 The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
 
 <Tip>
-  The hook keeps `onSessionChange` up to date via a ref internally, so you don't need to memoize the callback or worry about stale closures.
+  The hook keeps `onSessionChange` up to date via a ref internally, so you don't need to memoize the
+  callback or worry about stale closures.
 </Tip>
 
 ## Typed messages (`chat.withUIMessage`)
@@ -42,7 +43,10 @@ import type { myChat } from "./myChat";
 
 type Msg = InferChatUIMessage<typeof myChat>;
 
-const transport = useTriggerChatTransport<typeof myChat>({ task: "my-chat", accessToken: getChatToken });
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: getChatToken,
+});
 const { messages } = useChat<Msg>({ transport });
 ```
 
@@ -50,13 +54,18 @@ See the [Types](/ai-chat/types) guide for defining `YourUIMessage`, default stre
 
 ### Dynamic access tokens
 
-For token refresh, pass a function instead of a string. It's called on each `sendMessage`:
+For token refresh, pass a function instead of a string. The transport calls it when it needs a **trigger** token: starting a run from `sendMessages`, or when you call `preload()`. The callback receives `chatId` and `purpose` (`"trigger"` | `"preload"`). Import `ResolveChatAccessTokenParams` from `@trigger.dev/sdk/chat` to type your server action or fetch handler (see [reference](/ai-chat/reference#triggerchattransport-options)).
 
 ```ts
+import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
+
 const transport = useTriggerChatTransport({
   task: "my-chat",
-  accessToken: async () => {
-    const res = await fetch("/api/chat-token");
+  accessToken: async (input: ResolveChatAccessTokenParams) => {
+    const res = await fetch("/api/chat-token", {
+      method: "POST",
+      body: JSON.stringify(input),
+    });
     return res.text();
   },
 });
@@ -100,10 +109,7 @@ export default function ChatPage({ chatId }: { chatId: string }) {
 
   useEffect(() => {
     async function load() {
-      const [messages, session] = await Promise.all([
-        getChatMessages(chatId),
-        getSession(chatId),
-      ]);
+      const [messages, session] = await Promise.all([getChatMessages(chatId), getSession(chatId)]);
       setInitialMessages(messages);
       setInitialSession(session ? { [chatId]: session } : undefined);
       setLoaded(true);
@@ -144,11 +150,19 @@ function ChatClient({ chatId, initialMessages, initialSessions }) {
 ```
 
 <Info>
-  `resume: true` causes `useChat` to call `reconnectToStream` on the transport when the component mounts. The transport uses the session's `lastEventId` to skip past already-seen stream events, so the frontend only receives new data. Only enable `resume` when there are existing messages — for brand new chats, there's nothing to reconnect to.
+  `resume: true` causes `useChat` to call `reconnectToStream` on the transport when the component
+  mounts. The transport uses the session's `lastEventId` to skip past already-seen stream events, so
+  the frontend only receives new data. Only enable `resume` when there are existing messages — for
+  brand new chats, there's nothing to reconnect to.
 </Info>
 
 <Warning>
-  In React strict mode (enabled by default in Next.js dev), you may see a `TypeError: Cannot read properties of undefined (reading 'state')` in the console when using `resume`. This is a [known bug in the AI SDK](https://github.com/vercel/ai/issues/8477) caused by React strict mode double-firing the resume effect. The error is caught internally and **does not affect functionality** — streaming and message display work correctly. It only appears in development and will not occur in production builds.
+  In React strict mode (enabled by default in Next.js dev), you may see a `TypeError: Cannot read
+  properties of undefined (reading 'state')` in the console when using `resume`. This is a [known
+  bug in the AI SDK](https://github.com/vercel/ai/issues/8477) caused by React strict mode
+  double-firing the resume effect. The error is caught internally and **does not affect
+  functionality** — streaming and message display work correctly. It only appears in development and
+  will not occur in production builds.
 </Warning>
 
 ## Client data and metadata
@@ -170,10 +184,7 @@ const transport = useTriggerChatTransport<typeof myChat>({
 Pass metadata with individual messages via `sendMessage`. Per-message values are merged with transport-level client data (per-message wins on conflicts):
 
 ```ts
-sendMessage(
-  { text: "Hello" },
-  { metadata: { model: "gpt-4o", priority: "high" } }
-);
+sendMessage({ text: "Hello" }, { metadata: { model: "gpt-4o", priority: "high" } });
 ```
 
 ### Typed client data with clientDataSchema
@@ -229,11 +240,13 @@ Calling `stop()` from `useChat` sends a stop signal to the running task via inpu
 ```tsx
 const { messages, sendMessage, stop, status } = useChat({ transport });
 
-{status === "streaming" && (
-  <button type="button" onClick={stop}>
-    Stop
-  </button>
-)}
+{
+  status === "streaming" && (
+    <button type="button" onClick={stop}>
+      Stop
+    </button>
+  );
+}
 ```
 
 See [Stop generation](/ai-chat/backend#stop-generation) in the backend docs for how to handle stop signals in your task.
diff --git a/docs/ai-chat/quick-start.mdx b/docs/ai-chat/quick-start.mdx
index cfffcc828b9..881cc381548 100644
--- a/docs/ai-chat/quick-start.mdx
+++ b/docs/ai-chat/quick-start.mdx
@@ -32,20 +32,24 @@ description: "Get a working AI chat in 3 steps — define a task, generate a tok
     <Tip>
       For a **custom** [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype (typed `data-*` parts, tool map, etc.), define the task with [`chat.withUIMessage<...>().task({...})`](/ai-chat/types) instead of `chat.task`.
     </Tip>
+
   </Step>
 
   <Step title="Generate an access token">
-    On your server (e.g. a Next.js server action), create a trigger public token scoped to your chat task:
+    On your server (e.g. a Next.js server action), create a trigger public token scoped to your chat task. The transport calls your function with `chatId` and `purpose` (`"trigger"` or `"preload"`). Import `ResolveChatAccessTokenParams` from `@trigger.dev/sdk/chat` so the signature matches — see [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options).
 
     ```ts app/actions.ts
     "use server";
 
     import { chat } from "@trigger.dev/sdk/ai";
+    import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
     import type { myChat } from "@/trigger/chat";
 
-    export const getChatToken = () =>
-      chat.createAccessToken<typeof myChat>("my-chat");
+    export async function getChatToken(_input: ResolveChatAccessTokenParams) {
+      return chat.createAccessToken<typeof myChat>("my-chat");
+    }
     ```
+
   </Step>
 
   <Step title="Use in the frontend">
@@ -102,6 +106,7 @@ description: "Get a working AI chat in 3 steps — define a task, generate a tok
       );
     }
     ```
+
   </Step>
 </Steps>
 
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index ff95019be05..40f6f04daf8 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -8,27 +8,27 @@ description: "Complete API reference for the AI Chat SDK — backend options, ev
 
 Options for `chat.task()`.
 
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `id` | `string` | required | Task identifier |
-| `run` | `(payload: ChatTaskRunPayload) => Promise<unknown>` | required | Handler for each turn |
-| `clientDataSchema` | `TaskSchema` | — | Schema for validating and typing `clientData` |
-| `onPreload` | `(event: PreloadEvent) => Promise<void> \| void` | — | Fires on preloaded runs before the first message |
-| `onChatStart` | `(event: ChatStartEvent) => Promise<void> \| void` | — | Fires on turn 0 before `run()` |
-| `onTurnStart` | `(event: TurnStartEvent) => Promise<void> \| void` | — | Fires every turn before `run()` |
-| `onBeforeTurnComplete` | `(event: BeforeTurnCompleteEvent) => Promise<void> \| void` | — | Fires after response but before stream closes. Includes `writer`. |
-| `onTurnComplete` | `(event: TurnCompleteEvent) => Promise<void> \| void` | — | Fires after each turn completes (stream closed) |
-| `onCompacted` | `(event: CompactedEvent) => Promise<void> \| void` | — | Fires when compaction occurs. Includes `writer`. See [Compaction](/ai-chat/compaction) |
-| `compaction` | `ChatTaskCompactionOptions` | — | Automatic context compaction. See [Compaction](/ai-chat/compaction) |
-| `pendingMessages` | `PendingMessagesOptions` | — | Mid-execution message injection. See [Pending Messages](/ai-chat/pending-messages) |
-| `prepareMessages` | `(event: PrepareMessagesEvent) => ModelMessage[]` | — | Transform model messages before use (cache breaks, context injection, etc.) |
-| `maxTurns` | `number` | `100` | Max conversational turns per run |
-| `turnTimeout` | `string` | `"1h"` | How long to wait for next message |
-| `idleTimeoutInSeconds` | `number` | `30` | Seconds to stay idle before suspending |
-| `chatAccessTokenTTL` | `string` | `"1h"` | How long the scoped access token remains valid |
-| `preloadIdleTimeoutInSeconds` | `number` | Same as `idleTimeoutInSeconds` | Idle timeout after `onPreload` fires |
-| `preloadTimeout` | `string` | Same as `turnTimeout` | Suspend timeout for preloaded runs |
-| `uiMessageStreamOptions` | `ChatUIMessageStreamOptions` | — | Default options for `toUIMessageStream()`. Per-turn override via `chat.setUIMessageStreamOptions()` |
+| Option                        | Type                                                        | Default                        | Description                                                                                         |
+| ----------------------------- | ----------------------------------------------------------- | ------------------------------ | --------------------------------------------------------------------------------------------------- |
+| `id`                          | `string`                                                    | required                       | Task identifier                                                                                     |
+| `run`                         | `(payload: ChatTaskRunPayload) => Promise<unknown>`         | required                       | Handler for each turn                                                                               |
+| `clientDataSchema`            | `TaskSchema`                                                | —                              | Schema for validating and typing `clientData`                                                       |
+| `onPreload`                   | `(event: PreloadEvent) => Promise<void> \| void`            | —                              | Fires on preloaded runs before the first message                                                    |
+| `onChatStart`                 | `(event: ChatStartEvent) => Promise<void> \| void`          | —                              | Fires on turn 0 before `run()`                                                                      |
+| `onTurnStart`                 | `(event: TurnStartEvent) => Promise<void> \| void`          | —                              | Fires every turn before `run()`                                                                     |
+| `onBeforeTurnComplete`        | `(event: BeforeTurnCompleteEvent) => Promise<void> \| void` | —                              | Fires after response but before stream closes. Includes `writer`.                                   |
+| `onTurnComplete`              | `(event: TurnCompleteEvent) => Promise<void> \| void`       | —                              | Fires after each turn completes (stream closed)                                                     |
+| `onCompacted`                 | `(event: CompactedEvent) => Promise<void> \| void`          | —                              | Fires when compaction occurs. Includes `writer`. See [Compaction](/ai-chat/compaction)              |
+| `compaction`                  | `ChatTaskCompactionOptions`                                 | —                              | Automatic context compaction. See [Compaction](/ai-chat/compaction)                                 |
+| `pendingMessages`             | `PendingMessagesOptions`                                    | —                              | Mid-execution message injection. See [Pending Messages](/ai-chat/pending-messages)                  |
+| `prepareMessages`             | `(event: PrepareMessagesEvent) => ModelMessage[]`           | —                              | Transform model messages before use (cache breaks, context injection, etc.)                         |
+| `maxTurns`                    | `number`                                                    | `100`                          | Max conversational turns per run                                                                    |
+| `turnTimeout`                 | `string`                                                    | `"1h"`                         | How long to wait for next message                                                                   |
+| `idleTimeoutInSeconds`        | `number`                                                    | `30`                           | Seconds to stay idle before suspending                                                              |
+| `chatAccessTokenTTL`          | `string`                                                    | `"1h"`                         | How long the scoped access token remains valid                                                      |
+| `preloadIdleTimeoutInSeconds` | `number`                                                    | Same as `idleTimeoutInSeconds` | Idle timeout after `onPreload` fires                                                                |
+| `preloadTimeout`              | `string`                                                    | Same as `turnTimeout`          | Suspend timeout for preloaded runs                                                                  |
+| `uiMessageStreamOptions`      | `ChatUIMessageStreamOptions`                                | —                              | Default options for `toUIMessageStream()`. Per-turn override via `chat.setUIMessageStreamOptions()` |
 
 Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`, `maxDuration`, etc.
 
@@ -36,94 +36,94 @@ Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`
 
 The payload passed to the `run` function.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `messages` | `ModelMessage[]` | Model-ready messages — pass directly to `streamText` |
-| `chatId` | `string` | Unique chat session ID |
-| `trigger` | `"submit-message" \| "regenerate-message"` | What triggered the request |
-| `messageId` | `string \| undefined` | Message ID (for regenerate) |
-| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend (typed when schema is provided) |
-| `continuation` | `boolean` | Whether this run is continuing an existing chat (previous run ended) |
-| `signal` | `AbortSignal` | Combined stop + cancel signal |
-| `cancelSignal` | `AbortSignal` | Cancel-only signal |
-| `stopSignal` | `AbortSignal` | Stop-only signal (per-turn) |
+| Field          | Type                                       | Description                                                          |
+| -------------- | ------------------------------------------ | -------------------------------------------------------------------- |
+| `messages`     | `ModelMessage[]`                           | Model-ready messages — pass directly to `streamText`                 |
+| `chatId`       | `string`                                   | Unique chat session ID                                               |
+| `trigger`      | `"submit-message" \| "regenerate-message"` | What triggered the request                                           |
+| `messageId`    | `string \| undefined`                      | Message ID (for regenerate)                                          |
+| `clientData`   | Typed by `clientDataSchema`                | Custom data from the frontend (typed when schema is provided)        |
+| `continuation` | `boolean`                                  | Whether this run is continuing an existing chat (previous run ended) |
+| `signal`       | `AbortSignal`                              | Combined stop + cancel signal                                        |
+| `cancelSignal` | `AbortSignal`                              | Cancel-only signal                                                   |
+| `stopSignal`   | `AbortSignal`                              | Stop-only signal (per-turn)                                          |
 
 ## PreloadEvent
 
 Passed to the `onPreload` callback.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `chatId` | `string` | Chat session ID |
-| `runId` | `string` | The Trigger.dev run ID |
-| `chatAccessToken` | `string` | Scoped access token for this run |
-| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
-| `writer` | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
 
 ## ChatStartEvent
 
 Passed to the `onChatStart` callback.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `chatId` | `string` | Chat session ID |
-| `messages` | `ModelMessage[]` | Initial model-ready messages |
-| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
-| `runId` | `string` | The Trigger.dev run ID |
-| `chatAccessToken` | `string` | Scoped access token for this run |
-| `continuation` | `boolean` | Whether this run is continuing an existing chat |
-| `previousRunId` | `string \| undefined` | Previous run ID (only when `continuation` is true) |
-| `preloaded` | `boolean` | Whether this run was preloaded before the first message |
-| `writer` | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `messages`        | `ModelMessage[]`            | Initial model-ready messages                                   |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `continuation`    | `boolean`                   | Whether this run is continuing an existing chat                |
+| `previousRunId`   | `string \| undefined`       | Previous run ID (only when `continuation` is true)             |
+| `preloaded`       | `boolean`                   | Whether this run was preloaded before the first message        |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
 
 ## TurnStartEvent
 
 Passed to the `onTurnStart` callback.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `chatId` | `string` | Chat session ID |
-| `messages` | `ModelMessage[]` | Full accumulated conversation (model format) |
-| `uiMessages` | `UIMessage[]` | Full accumulated conversation (UI format) |
-| `turn` | `number` | Turn number (0-indexed) |
-| `runId` | `string` | The Trigger.dev run ID |
-| `chatAccessToken` | `string` | Scoped access token for this run |
-| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend |
-| `continuation` | `boolean` | Whether this run is continuing an existing chat |
-| `previousRunId` | `string \| undefined` | Previous run ID (only when `continuation` is true) |
-| `preloaded` | `boolean` | Whether this run was preloaded |
-| `writer` | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
+| Field             | Type                        | Description                                                    |
+| ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `chatId`          | `string`                    | Chat session ID                                                |
+| `messages`        | `ModelMessage[]`            | Full accumulated conversation (model format)                   |
+| `uiMessages`      | `UIMessage[]`               | Full accumulated conversation (UI format)                      |
+| `turn`            | `number`                    | Turn number (0-indexed)                                        |
+| `runId`           | `string`                    | The Trigger.dev run ID                                         |
+| `chatAccessToken` | `string`                    | Scoped access token for this run                               |
+| `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
+| `continuation`    | `boolean`                   | Whether this run is continuing an existing chat                |
+| `previousRunId`   | `string \| undefined`       | Previous run ID (only when `continuation` is true)             |
+| `preloaded`       | `boolean`                   | Whether this run was preloaded                                 |
+| `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
 
 ## TurnCompleteEvent
 
 Passed to the `onTurnComplete` callback.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `chatId` | `string` | Chat session ID |
-| `messages` | `ModelMessage[]` | Full accumulated conversation (model format) |
-| `uiMessages` | `UIMessage[]` | Full accumulated conversation (UI format) |
-| `newMessages` | `ModelMessage[]` | Only this turn's messages (model format) |
-| `newUIMessages` | `UIMessage[]` | Only this turn's messages (UI format) |
-| `responseMessage` | `UIMessage \| undefined` | The assistant's response for this turn |
-| `rawResponseMessage` | `UIMessage \| undefined` | Raw response before abort cleanup |
-| `turn` | `number` | Turn number (0-indexed) |
-| `runId` | `string` | The Trigger.dev run ID |
-| `chatAccessToken` | `string` | Scoped access token for this run |
-| `lastEventId` | `string \| undefined` | Stream position for resumption |
-| `stopped` | `boolean` | Whether the user stopped generation during this turn |
-| `continuation` | `boolean` | Whether this run is continuing an existing chat |
-| `usage` | `LanguageModelUsage \| undefined` | Token usage for this turn |
-| `totalUsage` | `LanguageModelUsage` | Cumulative token usage across all turns |
+| Field                | Type                              | Description                                          |
+| -------------------- | --------------------------------- | ---------------------------------------------------- |
+| `chatId`             | `string`                          | Chat session ID                                      |
+| `messages`           | `ModelMessage[]`                  | Full accumulated conversation (model format)         |
+| `uiMessages`         | `UIMessage[]`                     | Full accumulated conversation (UI format)            |
+| `newMessages`        | `ModelMessage[]`                  | Only this turn's messages (model format)             |
+| `newUIMessages`      | `UIMessage[]`                     | Only this turn's messages (UI format)                |
+| `responseMessage`    | `UIMessage \| undefined`          | The assistant's response for this turn               |
+| `rawResponseMessage` | `UIMessage \| undefined`          | Raw response before abort cleanup                    |
+| `turn`               | `number`                          | Turn number (0-indexed)                              |
+| `runId`              | `string`                          | The Trigger.dev run ID                               |
+| `chatAccessToken`    | `string`                          | Scoped access token for this run                     |
+| `lastEventId`        | `string \| undefined`             | Stream position for resumption                       |
+| `stopped`            | `boolean`                         | Whether the user stopped generation during this turn |
+| `continuation`       | `boolean`                         | Whether this run is continuing an existing chat      |
+| `usage`              | `LanguageModelUsage \| undefined` | Token usage for this turn                            |
+| `totalUsage`         | `LanguageModelUsage`              | Cumulative token usage across all turns              |
 
 ## BeforeTurnCompleteEvent
 
 Passed to the `onBeforeTurnComplete` callback. Same fields as `TurnCompleteEvent` plus a `writer`.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| _(all TurnCompleteEvent fields)_ | | See [TurnCompleteEvent](#turncompleteevent) |
-| `writer` | [`ChatWriter`](#chatwriter) | Stream writer — the stream is still open so chunks appear in the current turn |
+| Field                            | Type                        | Description                                                                   |
+| -------------------------------- | --------------------------- | ----------------------------------------------------------------------------- |
+| _(all TurnCompleteEvent fields)_ |                             | See [TurnCompleteEvent](#turncompleteevent)                                   |
+| `writer`                         | [`ChatWriter`](#chatwriter) | Stream writer — the stream is still open so chunks appear in the current turn |
 
 ## ChatWriter
 
@@ -131,9 +131,9 @@ A stream writer passed to lifecycle callbacks. Write custom `UIMessageChunk` par
 
 The writer is lazy — no stream is opened unless you call `write()` or `merge()`, so there's zero overhead for callbacks that don't use it.
 
-| Method | Type | Description |
-|--------|------|-------------|
-| `write(part)` | `(part: UIMessageChunk) => void` | Write a single chunk to the chat stream |
+| Method          | Type                                               | Description                                        |
+| --------------- | -------------------------------------------------- | -------------------------------------------------- |
+| `write(part)`   | `(part: UIMessageChunk) => void`                   | Write a single chunk to the chat stream            |
 | `merge(stream)` | `(stream: ReadableStream<UIMessageChunk>) => void` | Merge another stream's chunks into the chat stream |
 
 ```ts
@@ -151,153 +151,153 @@ onBeforeTurnComplete: async ({ writer, usage }) => {
 
 Options for the `compaction` field on `chat.task()`. See [Compaction](/ai-chat/compaction) for usage guide.
 
-| Option | Type | Required | Description |
-|--------|------|----------|-------------|
-| `shouldCompact` | `(event: ShouldCompactEvent) => boolean \| Promise<boolean>` | Yes | Decide whether to compact. Return `true` to trigger |
-| `summarize` | `(event: SummarizeEvent) => Promise<string>` | Yes | Generate a summary from the current messages |
-| `compactUIMessages` | `(event: CompactMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>` | No | Transform UI messages after compaction. Default: preserve all |
-| `compactModelMessages` | `(event: CompactMessagesEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No | Transform model messages after compaction. Default: replace all with summary |
+| Option                 | Type                                                                         | Required | Description                                                                  |
+| ---------------------- | ---------------------------------------------------------------------------- | -------- | ---------------------------------------------------------------------------- |
+| `shouldCompact`        | `(event: ShouldCompactEvent) => boolean \| Promise<boolean>`                 | Yes      | Decide whether to compact. Return `true` to trigger                          |
+| `summarize`            | `(event: SummarizeEvent) => Promise<string>`                                 | Yes      | Generate a summary from the current messages                                 |
+| `compactUIMessages`    | `(event: CompactMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>`       | No       | Transform UI messages after compaction. Default: preserve all                |
+| `compactModelMessages` | `(event: CompactMessagesEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No       | Transform model messages after compaction. Default: replace all with summary |
 
 ## CompactMessagesEvent
 
 Passed to `compactUIMessages` and `compactModelMessages` callbacks.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `summary` | `string` | The generated summary text |
-| `uiMessages` | `UIMessage[]` | Current UI messages (full conversation) |
-| `modelMessages` | `ModelMessage[]` | Current model messages (full conversation) |
-| `chatId` | `string` | Chat session ID |
-| `turn` | `number` | Current turn (0-indexed) |
-| `clientData` | `unknown` | Custom data from the frontend |
-| `source` | `"inner" \| "outer"` | Whether compaction is between steps or between turns |
+| Field           | Type                 | Description                                          |
+| --------------- | -------------------- | ---------------------------------------------------- |
+| `summary`       | `string`             | The generated summary text                           |
+| `uiMessages`    | `UIMessage[]`        | Current UI messages (full conversation)              |
+| `modelMessages` | `ModelMessage[]`     | Current model messages (full conversation)           |
+| `chatId`        | `string`             | Chat session ID                                      |
+| `turn`          | `number`             | Current turn (0-indexed)                             |
+| `clientData`    | `unknown`            | Custom data from the frontend                        |
+| `source`        | `"inner" \| "outer"` | Whether compaction is between steps or between turns |
 
 ## CompactedEvent
 
 Passed to the `onCompacted` callback.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `summary` | `string` | The generated summary text |
-| `messages` | `ModelMessage[]` | Messages that were compacted (pre-compaction) |
-| `messageCount` | `number` | Number of messages before compaction |
-| `usage` | `LanguageModelUsage` | Token usage from the triggering step/turn |
-| `totalTokens` | `number \| undefined` | Total token count that triggered compaction |
-| `inputTokens` | `number \| undefined` | Input token count |
-| `outputTokens` | `number \| undefined` | Output token count |
-| `stepNumber` | `number` | Step number (-1 for outer loop) |
-| `chatId` | `string \| undefined` | Chat session ID |
-| `turn` | `number \| undefined` | Current turn |
-| `writer` | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks during compaction |
+| Field          | Type                        | Description                                       |
+| -------------- | --------------------------- | ------------------------------------------------- |
+| `summary`      | `string`                    | The generated summary text                        |
+| `messages`     | `ModelMessage[]`            | Messages that were compacted (pre-compaction)     |
+| `messageCount` | `number`                    | Number of messages before compaction              |
+| `usage`        | `LanguageModelUsage`        | Token usage from the triggering step/turn         |
+| `totalTokens`  | `number \| undefined`       | Total token count that triggered compaction       |
+| `inputTokens`  | `number \| undefined`       | Input token count                                 |
+| `outputTokens` | `number \| undefined`       | Output token count                                |
+| `stepNumber`   | `number`                    | Step number (-1 for outer loop)                   |
+| `chatId`       | `string \| undefined`       | Chat session ID                                   |
+| `turn`         | `number \| undefined`       | Current turn                                      |
+| `writer`       | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks during compaction |
 
 ## PendingMessagesOptions
 
 Options for the `pendingMessages` field. See [Pending Messages](/ai-chat/pending-messages) for usage guide.
 
-| Option | Type | Required | Description |
-|--------|------|----------|-------------|
-| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean \| Promise<boolean>` | No | Decide whether to inject the batch between tool-call steps. If absent, no injection. |
-| `prepare` | `(event: PendingMessagesBatchEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No | Transform the batch before injection. Default: convert each via `convertToModelMessages`. |
-| `onReceived` | `(event: PendingMessageReceivedEvent) => void \| Promise<void>` | No | Called when a message arrives during streaming (per-message). |
-| `onInjected` | `(event: PendingMessagesInjectedEvent) => void \| Promise<void>` | No | Called after a batch is injected via prepareStep. |
+| Option         | Type                                                                              | Required | Description                                                                               |
+| -------------- | --------------------------------------------------------------------------------- | -------- | ----------------------------------------------------------------------------------------- |
+| `shouldInject` | `(event: PendingMessagesBatchEvent) => boolean \| Promise<boolean>`               | No       | Decide whether to inject the batch between tool-call steps. If absent, no injection.      |
+| `prepare`      | `(event: PendingMessagesBatchEvent) => ModelMessage[] \| Promise<ModelMessage[]>` | No       | Transform the batch before injection. Default: convert each via `convertToModelMessages`. |
+| `onReceived`   | `(event: PendingMessageReceivedEvent) => void \| Promise<void>`                   | No       | Called when a message arrives during streaming (per-message).                             |
+| `onInjected`   | `(event: PendingMessagesInjectedEvent) => void \| Promise<void>`                  | No       | Called after a batch is injected via prepareStep.                                         |
 
 ## PendingMessagesBatchEvent
 
 Passed to `shouldInject` and `prepare` callbacks.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `messages` | `UIMessage[]` | All pending messages (batch) |
-| `modelMessages` | `ModelMessage[]` | Current conversation |
-| `steps` | `CompactionStep[]` | Completed steps so far |
-| `stepNumber` | `number` | Current step (0-indexed) |
-| `chatId` | `string` | Chat session ID |
-| `turn` | `number` | Current turn (0-indexed) |
-| `clientData` | `unknown` | Custom data from the frontend |
+| Field           | Type               | Description                   |
+| --------------- | ------------------ | ----------------------------- |
+| `messages`      | `UIMessage[]`      | All pending messages (batch)  |
+| `modelMessages` | `ModelMessage[]`   | Current conversation          |
+| `steps`         | `CompactionStep[]` | Completed steps so far        |
+| `stepNumber`    | `number`           | Current step (0-indexed)      |
+| `chatId`        | `string`           | Chat session ID               |
+| `turn`          | `number`           | Current turn (0-indexed)      |
+| `clientData`    | `unknown`          | Custom data from the frontend |
 
 ## PendingMessagesInjectedEvent
 
 Passed to `onInjected` callback.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `messages` | `UIMessage[]` | All injected UI messages |
+| Field                   | Type             | Description                           |
+| ----------------------- | ---------------- | ------------------------------------- |
+| `messages`              | `UIMessage[]`    | All injected UI messages              |
 | `injectedModelMessages` | `ModelMessage[]` | The model messages that were injected |
-| `chatId` | `string` | Chat session ID |
-| `turn` | `number` | Current turn |
-| `stepNumber` | `number` | Step where injection occurred |
+| `chatId`                | `string`         | Chat session ID                       |
+| `turn`                  | `number`         | Current turn                          |
+| `stepNumber`            | `number`         | Step where injection occurred         |
 
 ## UsePendingMessagesReturn
 
 Return value of `usePendingMessages` hook. See [Pending Messages — Frontend](/ai-chat/pending-messages#frontend-usependingmessages-hook).
 
-| Property/Method | Type | Description |
-|-----------------|------|-------------|
-| `pending` | `PendingMessage[]` | Current pending messages with mode and injection status |
-| `steer` | `(text: string) => void` | Send a steering message (or normal message when not streaming) |
-| `queue` | `(text: string) => void` | Queue for next turn (or send normally when not streaming) |
-| `promoteToSteering` | `(id: string) => void` | Convert a queued message to steering |
-| `isInjectionPoint` | `(part: unknown) => boolean` | Check if an assistant message part is an injection confirmation |
-| `getInjectedMessageIds` | `(part: unknown) => string[]` | Get message IDs from an injection point |
-| `getInjectedMessages` | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point |
+| Property/Method         | Type                                   | Description                                                     |
+| ----------------------- | -------------------------------------- | --------------------------------------------------------------- |
+| `pending`               | `PendingMessage[]`                     | Current pending messages with mode and injection status         |
+| `steer`                 | `(text: string) => void`               | Send a steering message (or normal message when not streaming)  |
+| `queue`                 | `(text: string) => void`               | Queue for next turn (or send normally when not streaming)       |
+| `promoteToSteering`     | `(id: string) => void`                 | Convert a queued message to steering                            |
+| `isInjectionPoint`      | `(part: unknown) => boolean`           | Check if an assistant message part is an injection confirmation |
+| `getInjectedMessageIds` | `(part: unknown) => string[]`          | Get message IDs from an injection point                         |
+| `getInjectedMessages`   | `(part: unknown) => InjectedMessage[]` | Get messages (id + text) from an injection point                |
 
 ## ChatSessionOptions
 
 Options for `chat.createSession()`.
 
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `signal` | `AbortSignal` | required | Run-level cancel signal |
-| `idleTimeoutInSeconds` | `number` | `30` | Seconds to stay idle between turns |
-| `timeout` | `string` | `"1h"` | Duration string for suspend timeout |
-| `maxTurns` | `number` | `100` | Max turns before ending |
+| Option                 | Type          | Default  | Description                         |
+| ---------------------- | ------------- | -------- | ----------------------------------- |
+| `signal`               | `AbortSignal` | required | Run-level cancel signal             |
+| `idleTimeoutInSeconds` | `number`      | `30`     | Seconds to stay idle between turns  |
+| `timeout`              | `string`      | `"1h"`   | Duration string for suspend timeout |
+| `maxTurns`             | `number`      | `100`    | Max turns before ending             |
 
 ## ChatTurn
 
 Each turn yielded by `chat.createSession()`.
 
-| Field | Type | Description |
-|-------|------|-------------|
-| `number` | `number` | Turn number (0-indexed) |
-| `chatId` | `string` | Chat session ID |
-| `trigger` | `string` | What triggered this turn |
-| `clientData` | `unknown` | Client data from the transport |
-| `messages` | `ModelMessage[]` | Full accumulated model messages |
-| `uiMessages` | `UIMessage[]` | Full accumulated UI messages |
-| `signal` | `AbortSignal` | Combined stop+cancel signal (fresh each turn) |
-| `stopped` | `boolean` | Whether the user stopped generation this turn |
-| `continuation` | `boolean` | Whether this is a continuation run |
-
-| Method | Returns | Description |
-|--------|---------|-------------|
-| `complete(source)` | `Promise<UIMessage \| undefined>` | Pipe, capture, accumulate, cleanup, and signal turn-complete |
-| `done()` | `Promise<void>` | Signal turn-complete (when you've piped manually) |
-| `addResponse(response)` | `Promise<void>` | Add response to accumulator manually |
+| Field          | Type             | Description                                   |
+| -------------- | ---------------- | --------------------------------------------- |
+| `number`       | `number`         | Turn number (0-indexed)                       |
+| `chatId`       | `string`         | Chat session ID                               |
+| `trigger`      | `string`         | What triggered this turn                      |
+| `clientData`   | `unknown`        | Client data from the transport                |
+| `messages`     | `ModelMessage[]` | Full accumulated model messages               |
+| `uiMessages`   | `UIMessage[]`    | Full accumulated UI messages                  |
+| `signal`       | `AbortSignal`    | Combined stop+cancel signal (fresh each turn) |
+| `stopped`      | `boolean`        | Whether the user stopped generation this turn |
+| `continuation` | `boolean`        | Whether this is a continuation run            |
+
+| Method                  | Returns                           | Description                                                  |
+| ----------------------- | --------------------------------- | ------------------------------------------------------------ |
+| `complete(source)`      | `Promise<UIMessage \| undefined>` | Pipe, capture, accumulate, cleanup, and signal turn-complete |
+| `done()`                | `Promise<void>`                   | Signal turn-complete (when you've piped manually)            |
+| `addResponse(response)` | `Promise<void>`                   | Add response to accumulator manually                         |
 
 ## chat namespace
 
 All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 
-| Method | Description |
-|--------|-------------|
-| `chat.task(options)` | Create a chat task |
-| `chat.createSession(payload, options)` | Create an async iterator for chat turns |
-| `chat.pipe(source, options?)` | Pipe a stream to the frontend (from anywhere inside a task) |
-| `chat.pipeAndCapture(source, options?)` | Pipe and capture the response `UIMessage` |
-| `chat.writeTurnComplete(options?)` | Signal the frontend that the current turn is complete |
-| `chat.createStopSignal()` | Create a managed stop signal wired to the stop input stream |
-| `chat.messages` | Input stream for incoming messages — use `.waitWithIdleTimeout()` |
-| `chat.local<T>({ id })` | Create a per-run typed local (see [Per-run data](/ai-chat/features#per-run-data-with-chatlocal)) |
-| `chat.createAccessToken(taskId)` | Create a public access token for a chat task |
-| `chat.setTurnTimeout(duration)` | Override turn timeout at runtime (e.g. `"2h"`) |
-| `chat.setTurnTimeoutInSeconds(seconds)` | Override turn timeout at runtime (in seconds) |
-| `chat.setIdleTimeoutInSeconds(seconds)` | Override idle timeout at runtime |
-| `chat.setUIMessageStreamOptions(options)` | Override `toUIMessageStream()` options for the current turn |
-| `chat.defer(promise)` | Run background work in parallel with streaming, awaited before `onTurnComplete` |
-| `chat.isStopped()` | Check if the current turn was stopped by the user |
-| `chat.cleanupAbortedParts(message)` | Remove incomplete parts from a stopped response message |
-| `chat.stream` | Typed chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()` |
-| `chat.MessageAccumulator` | Class that accumulates conversation messages across turns |
+| Method                                      | Description                                                                                                                  |
+| ------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- |
+| `chat.task(options)`                        | Create a chat task                                                                                                           |
+| `chat.createSession(payload, options)`      | Create an async iterator for chat turns                                                                                      |
+| `chat.pipe(source, options?)`               | Pipe a stream to the frontend (from anywhere inside a task)                                                                  |
+| `chat.pipeAndCapture(source, options?)`     | Pipe and capture the response `UIMessage`                                                                                    |
+| `chat.writeTurnComplete(options?)`          | Signal the frontend that the current turn is complete                                                                        |
+| `chat.createStopSignal()`                   | Create a managed stop signal wired to the stop input stream                                                                  |
+| `chat.messages`                             | Input stream for incoming messages — use `.waitWithIdleTimeout()`                                                            |
+| `chat.local<T>({ id })`                     | Create a per-run typed local (see [Per-run data](/ai-chat/features#per-run-data-with-chatlocal))                             |
+| `chat.createAccessToken(taskId)`            | Create a public access token for a chat task                                                                                 |
+| `chat.setTurnTimeout(duration)`             | Override turn timeout at runtime (e.g. `"2h"`)                                                                               |
+| `chat.setTurnTimeoutInSeconds(seconds)`     | Override turn timeout at runtime (in seconds)                                                                                |
+| `chat.setIdleTimeoutInSeconds(seconds)`     | Override idle timeout at runtime                                                                                             |
+| `chat.setUIMessageStreamOptions(options)`   | Override `toUIMessageStream()` options for the current turn                                                                  |
+| `chat.defer(promise)`                       | Run background work in parallel with streaming, awaited before `onTurnComplete`                                              |
+| `chat.isStopped()`                          | Check if the current turn was stopped by the user                                                                            |
+| `chat.cleanupAbortedParts(message)`         | Remove incomplete parts from a stopped response message                                                                      |
+| `chat.stream`                               | Typed chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()`                                                |
+| `chat.MessageAccumulator`                   | Class that accumulates conversation messages across turns                                                                    |
 | `chat.withUIMessage(config?).task(options)` | Same as `chat.task`, but fixes a custom `UIMessage` subtype and optional default stream options. See [Types](/ai-chat/types) |
 
 ## `chat.withUIMessage`
@@ -310,16 +310,16 @@ chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): {
 };
 ```
 
-| Parameter | Type | Description |
-|-----------|------|-------------|
+| Parameter              | Type                               | Description                                                                                                                                           |
+| ---------------------- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `config.streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Optional defaults for `toUIMessageStream()`. Shallow-merged with `uiMessageStreamOptions` on the inner `.task({ ... })` (task wins on key conflicts). |
 
 Use this when you need [`InferChatUIMessage`](#inferchatuimessage) / typed `data-*` parts / `InferUITools` to line up across backend hooks and `useChat`. Full guide: [Types](/ai-chat/types).
 
 ## `ChatWithUIMessageConfig`
 
-| Field | Type | Description |
-|-------|------|-------------|
+| Field           | Type                               | Description                                                           |
+| --------------- | ---------------------------------- | --------------------------------------------------------------------- |
 | `streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Default `toUIMessageStream()` options for tasks created via `.task()` |
 
 ## `InferChatUIMessage`
@@ -337,11 +337,11 @@ Use with `useChat<Msg>({ transport })` when using [`chat.withUIMessage`](/ai-cha
 
 ## AI helpers (`ai` from `@trigger.dev/sdk/ai`)
 
-| Export | Status | Description |
-|--------|--------|-------------|
-| `ai.toolExecute(task)` | **Preferred** | Returns the `execute` function for AI SDK `tool()`. Runs the task via `triggerAndSubscribe` and attaches tool/chat metadata (same behavior the deprecated wrapper used internally). |
-| `ai.tool(task, options?)` | **Deprecated** | Wraps `tool()` / `dynamicTool()` and the same execute path. Migrate to `tool({ ..., execute: ai.toolExecute(task) })`. See [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools). |
-| `ai.toolCallId`, `ai.chatContext`, `ai.chatContextOrThrow`, `ai.currentToolOptions` | Supported | Work for any task-backed tool execute path, including `ai.toolExecute`. |
+| Export                                                                              | Status         | Description                                                                                                                                                                                |
+| ----------------------------------------------------------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `ai.toolExecute(task)`                                                              | **Preferred**  | Returns the `execute` function for AI SDK `tool()`. Runs the task via `triggerAndSubscribe` and attaches tool/chat metadata (same behavior the deprecated wrapper used internally).        |
+| `ai.tool(task, options?)`                                                           | **Deprecated** | Wraps `tool()` / `dynamicTool()` and the same execute path. Migrate to `tool({ ..., execute: ai.toolExecute(task) })`. See [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools). |
+| `ai.toolCallId`, `ai.chatContext`, `ai.chatContextOrThrow`, `ai.currentToolOptions` | Supported      | Work for any task-backed tool execute path, including `ai.toolExecute`.                                                                                                                    |
 
 ## ChatUIMessageStreamOptions
 
@@ -349,31 +349,65 @@ Options for customizing `toUIMessageStream()`. Set as static defaults via `uiMes
 
 Derived from the AI SDK's `UIMessageStreamOptions` with `onFinish`, `originalMessages`, and `generateMessageId` omitted (managed internally).
 
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `onError` | `(error: unknown) => string` | Raw error message | Called on LLM errors and tool execution errors. Return a sanitized string — sent as `{ type: "error", errorText }` to the frontend. |
-| `sendReasoning` | `boolean` | `true` | Send reasoning parts to the client |
-| `sendSources` | `boolean` | `false` | Send source parts to the client |
-| `sendFinish` | `boolean` | `true` | Send the finish event. Set to `false` when chaining multiple `streamText` calls. |
-| `sendStart` | `boolean` | `true` | Send the message start event. Set to `false` when chaining. |
-| `messageMetadata` | `(options: { part }) => metadata` | — | Extract message metadata to send to the client. Called on `start` and `finish` events. |
+| Option            | Type                              | Default           | Description                                                                                                                         |
+| ----------------- | --------------------------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| `onError`         | `(error: unknown) => string`      | Raw error message | Called on LLM errors and tool execution errors. Return a sanitized string — sent as `{ type: "error", errorText }` to the frontend. |
+| `sendReasoning`   | `boolean`                         | `true`            | Send reasoning parts to the client                                                                                                  |
+| `sendSources`     | `boolean`                         | `false`           | Send source parts to the client                                                                                                     |
+| `sendFinish`      | `boolean`                         | `true`            | Send the finish event. Set to `false` when chaining multiple `streamText` calls.                                                    |
+| `sendStart`       | `boolean`                         | `true`            | Send the message start event. Set to `false` when chaining.                                                                         |
+| `messageMetadata` | `(options: { part }) => metadata` | —                 | Extract message metadata to send to the client. Called on `start` and `finish` events.                                              |
 
 ## TriggerChatTransport options
 
 Options for the frontend transport constructor and `useTriggerChatTransport` hook.
 
-| Option | Type | Default | Description |
-|--------|------|---------|-------------|
-| `task` | `string` | required | Task ID to trigger |
-| `accessToken` | `string \| () => string \| Promise<string>` | required | Auth token or function that returns one |
-| `baseURL` | `string` | `"https://api.trigger.dev"` | API base URL (for self-hosted) |
-| `streamKey` | `string` | `"chat"` | Stream key (only change if using custom key) |
-| `headers` | `Record<string, string>` | — | Extra headers for API requests |
-| `streamTimeoutSeconds` | `number` | `120` | How long to wait for stream data |
-| `clientData` | Typed by `clientDataSchema` | — | Default client data for every request |
-| `sessions` | `Record<string, {...}>` | — | Restore sessions from storage |
-| `onSessionChange` | `(chatId, session \| null) => void` | — | Fires when session state changes |
-| `triggerOptions` | `{...}` | — | Options for the initial task trigger (see below) |
+| Option                 | Type                                                                 | Default                     | Description                                                                 |
+| ---------------------- | -------------------------------------------------------------------- | --------------------------- | --------------------------------------------------------------------------- |
+| `task`                 | `string`                                                             | required                    | Task ID to trigger                                                          |
+| `accessToken`          | `string \| (params: ResolveChatAccessTokenParams) => string \| Promise<string>` | required                    | Trigger / API auth token, or a function that returns one (see below)        |
+| `baseURL`              | `string`                                                             | `"https://api.trigger.dev"` | API base URL (for self-hosted)                                              |
+| `streamKey`            | `string`                                                             | `"chat"`                    | Stream key (only change if using custom key)                                |
+| `headers`              | `Record<string, string>`                                             | —                           | Extra headers for API requests                                              |
+| `streamTimeoutSeconds` | `number`                                                             | `120`                       | How long to wait for stream data                                            |
+| `clientData`           | Typed by `clientDataSchema`                                          | —                           | Default client data for every request                                       |
+| `sessions`             | `Record<string, {...}>`                                              | —                           | Restore sessions from storage                                               |
+| `onSessionChange`      | `(chatId, session \| null) => void`                                  | —                           | Fires when session state changes                                            |
+| `renewRunAccessToken`  | `(params: RenewRunAccessTokenParams) => string \| ... \| Promise<...>` | —                           | Mint a new run-scoped PAT when the run PAT returns 401 (realtime / input stream). Retries once. |
+| `triggerOptions`       | `{...}`                                                              | —                           | Options for the initial task trigger (see below)                            |
+
+### `accessToken` callback
+
+When `accessToken` is a function, the transport calls it with **`ResolveChatAccessTokenParams`** (exported from `@trigger.dev/sdk/chat`):
+
+- `chatId` — the conversation id (`useChat` id / `sendMessages` chat id).
+- `purpose` — `"trigger"` when calling `triggerTask` from `sendMessages` (new run or after the session ended), or `"preload"` when calling `preload()`.
+
+Use this to mint or log per-chat trigger tokens. A plain **`string`** is still supported and skips the callback.
+
+### `renewRunAccessToken` callback
+
+Optional. When the **run** public access token used for realtime SSE or input streams expires, the transport calls this once with **`RenewRunAccessTokenParams`** (`chatId`, `runId`), then retries the failing request. Implement it with your server `auth.createPublicToken` (scopes `read:runs:<runId>` and `write:inputStreams:<runId>`). See [Authentication](/realtime/auth).
+
+```ts
+import { auth } from "@trigger.dev/sdk";
+import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
+
+async function getChatToken(input: ResolveChatAccessTokenParams) {
+  return auth.createTriggerPublicToken("my-chat", { expirationTime: "1h" });
+}
+
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: getChatToken,
+  renewRunAccessToken: async ({ chatId, runId }) => {
+    return auth.createPublicToken({
+      scopes: { read: { runs: runId }, write: { inputStreams: runId } },
+      expirationTime: "1h",
+    });
+  },
+});
+```
 
 ### triggerOptions
 
@@ -381,13 +415,13 @@ Options forwarded to the Trigger.dev API when starting a new run. Only applies t
 
 A `chat:{chatId}` tag is automatically added to every run.
 
-| Option | Type | Description |
-|--------|------|-------------|
-| `tags` | `string[]` | Additional tags for the run (merged with auto-tags, max 5 total) |
-| `queue` | `string` | Queue name for the run |
-| `maxAttempts` | `number` | Maximum retry attempts |
-| `machine` | `"micro" \| "small-1x" \| ...` | Machine preset for the run |
-| `priority` | `number` | Priority (lower = higher priority) |
+| Option        | Type                           | Description                                                      |
+| ------------- | ------------------------------ | ---------------------------------------------------------------- |
+| `tags`        | `string[]`                     | Additional tags for the run (merged with auto-tags, max 5 total) |
+| `queue`       | `string`                       | Queue name for the run                                           |
+| `maxAttempts` | `number`                       | Maximum retry attempts                                           |
+| `machine`     | `"micro" \| "small-1x" \| ...` | Machine preset for the run                                       |
+| `priority`    | `number`                       | Priority (lower = higher priority)                               |
 
 ```ts
 const transport = useTriggerChatTransport({
@@ -420,7 +454,7 @@ import type { myChat } from "@/trigger/chat";
 
 const transport = useTriggerChatTransport<typeof myChat>({
   task: "my-chat",
-  accessToken: () => getChatToken(),
+  accessToken: getChatToken, // (params) => … — same shape as ResolveChatAccessTokenParams
   sessions: savedSessions,
   onSessionChange: handleSessionChange,
 });

From 360c8ff3e470a05c23e948c6d0252d31fcd8e561 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 27 Mar 2026 17:09:27 +0000
Subject: [PATCH 10/49] patterns and the ctx thing

---
 docs/ai-chat/backend.mdx   | 15 ++++++++++++++-
 docs/ai-chat/features.mdx  |  4 +++-
 docs/ai-chat/overview.mdx  |  2 ++
 docs/ai-chat/reference.mdx | 30 +++++++++++++++++++++++++++---
 docs/docs.json             |  7 +++++++
 5 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 50a338cd2c1..c7b6e051c0b 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -67,6 +67,12 @@ async function runAgentLoop(messages: ModelMessage[]) {
 
 ### Lifecycle hooks
 
+#### Task context (`ctx`)
+
+Every chat lifecycle callback and the **`run`** payload include **`ctx`**: the same run context object as `task({ run: (payload, { ctx }) => ... })`. Import the type with **`import type { TaskRunContext } from "@trigger.dev/sdk"`** (the **`Context`** export is the same type). Use **`ctx`** for tags, metadata, or any API that needs the full run record. The string **`runId`** on chat events is always **`ctx.run.id`** (both are provided for convenience). See [Task context (`ctx`)](/ai-chat/reference#task-context-ctx) in the API reference.
+
+Standard **[task lifecycle hooks](/tasks/overview)** — **`onWait`**, **`onResume`**, **`onComplete`**, **`onFailure`**, etc. — are also available on **`chat.task()`** with the same shapes as on a normal `task()`. For example, tear down an external sandbox **right before the run suspends** waiting for the next message using **`onWait`** when **`wait.type === "token"`**. See the [Code execution sandbox](/ai-chat/patterns/code-sandbox) pattern.
+
 #### onPreload
 
 Fires when a preloaded run starts — before any messages arrive. Use it to eagerly initialize state (DB records, user context) while the user is still typing.
@@ -77,7 +83,7 @@ Preloaded runs are triggered by calling `transport.preload(chatId)` on the front
 export const myChat = chat.task({
   id: "my-chat",
   clientDataSchema: z.object({ userId: z.string() }),
-  onPreload: async ({ chatId, clientData, runId, chatAccessToken }) => {
+  onPreload: async ({ ctx, chatId, clientData, runId, chatAccessToken }) => {
     // Initialize early — before the first message arrives
     const user = await db.user.findUnique({ where: { id: clientData.userId } });
     userContext.init({ name: user.name, plan: user.plan });
@@ -101,6 +107,7 @@ export const myChat = chat.task({
 
 | Field             | Type                                          | Description                      |
 | ----------------- | --------------------------------------------- | -------------------------------- |
+| `ctx`             | `TaskRunContext`                              | Full task run context — [reference](/ai-chat/reference#task-context-ctx) |
 | `chatId`          | `string`                                      | Chat session ID                  |
 | `runId`           | `string`                                      | The Trigger.dev run ID           |
 | `chatAccessToken` | `string`                                      | Scoped access token for this run |
@@ -145,6 +152,7 @@ Fires at the start of every turn, after message accumulation and `onChatStart` (
 
 | Field             | Type                                          | Description                                     |
 | ----------------- | --------------------------------------------- | ----------------------------------------------- |
+| `ctx`             | `TaskRunContext`                              | Full task run context — [reference](/ai-chat/reference#task-context-ctx) |
 | `chatId`          | `string`                                      | Chat session ID                                 |
 | `messages`        | `ModelMessage[]`                              | Full accumulated conversation (model format)    |
 | `uiMessages`      | `UIMessage[]`                                 | Full accumulated conversation (UI format)       |
@@ -219,6 +227,7 @@ Fires after each turn completes — after the response is captured and the strea
 
 | Field                | Type                     | Description                                                                                  |
 | -------------------- | ------------------------ | -------------------------------------------------------------------------------------------- |
+| `ctx`                | `TaskRunContext`         | Full task run context — [reference](/ai-chat/reference#task-context-ctx)                    |
 | `chatId`             | `string`                 | Chat session ID                                                                              |
 | `messages`           | `ModelMessage[]`         | Full accumulated conversation (model format)                                                 |
 | `uiMessages`         | `UIMessage[]`            | Full accumulated conversation (UI format)                                                    |
@@ -263,6 +272,10 @@ export const myChat = chat.task({
   it uses this to skip past already-seen events — preventing duplicate messages.
 </Tip>
 
+<Tip>
+  For a full **conversation + session** persistence pattern (including preload, continuation, and token renewal), see [Database persistence](/ai-chat/patterns/database-persistence).
+</Tip>
+
 ### Using prompts
 
 Use [AI Prompts](/ai/prompts) to manage your system prompt as versioned, overridable config. Store the resolved prompt in a lifecycle hook with `chat.prompt.set()`, then spread `chat.toStreamTextOptions()` into `streamText` — it includes the system prompt, model, config, and telemetry automatically.
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index 4b262e3929c..efb0cad3692 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -8,6 +8,8 @@ description: "Per-run data, deferred work, custom streaming, subtask integration
 
 Use `chat.local` to create typed, run-scoped data that persists across turns and is accessible from anywhere — the run function, tools, nested helpers. Each run gets its own isolated copy, and locals are automatically cleared between runs.
 
+Lifecycle hooks and **`run`** also receive **`ctx`** ([`TaskRunContext`](/ai-chat/reference#task-context-ctx)) — the same object as on a standard `task()` — for tags, metadata, and cleanup that needs the full run record.
+
 When a subtask is invoked via `ai.toolExecute()` (or the deprecated `ai.tool()`), initialized locals are automatically serialized into the subtask's metadata and hydrated on first access — no extra code needed. Subtask changes to hydrated locals are local to the subtask and don't propagate back to the parent.
 
 ### Declaring and initializing
@@ -156,7 +158,7 @@ onTurnComplete: async ({ chatId }) => {
 
 ---
 
-## chat.defer()
+## chat.defer() {#chat-defer}
 
 Use `chat.defer()` to run background work in parallel with streaming. The deferred promise runs alongside the LLM response and is awaited (with a 5s timeout) before `onTurnComplete` fires.
 
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
index 3fe6d0f3ec2..8a339d5be0f 100644
--- a/docs/ai-chat/overview.mdx
+++ b/docs/ai-chat/overview.mdx
@@ -155,6 +155,8 @@ There are three ways to build the backend, from most opinionated to most flexibl
 ## Related
 
 - [Quick Start](/ai-chat/quick-start) — Get a working chat in 3 steps
+- [Database persistence](/ai-chat/patterns/database-persistence) — Conversation + session state across hooks (ORM-agnostic)
+- [Code execution sandbox](/ai-chat/patterns/code-sandbox) — Warm/teardown pattern for E2B (or similar) with `onWait` / `chat.local`
 - [Backend](/ai-chat/backend) — Backend approaches in detail
 - [Frontend](/ai-chat/frontend) — Transport setup, sessions, client data
 - [Types](/ai-chat/types) — TypeScript patterns, including custom `UIMessage` with `chat.withUIMessage`
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index 40f6f04daf8..6cf329acb8e 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -30,7 +30,23 @@ Options for `chat.task()`.
 | `preloadTimeout`              | `string`                                                    | Same as `turnTimeout`          | Suspend timeout for preloaded runs                                                                  |
 | `uiMessageStreamOptions`      | `ChatUIMessageStreamOptions`                                | —                              | Default options for `toUIMessageStream()`. Per-turn override via `chat.setUIMessageStreamOptions()` |
 
-Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`, `maxDuration`, etc.
+Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`, `maxDuration`, **`onWait`**, **`onResume`**, **`onComplete`**, and other lifecycle hooks. Those hooks use the same parameter shapes as on a normal `task()` (including `ctx`).
+
+## Task context (`ctx`)
+
+All **`chat.task`** lifecycle events (**`onPreload`**, **`onChatStart`**, **`onTurnStart`**, **`onBeforeTurnComplete`**, **`onTurnComplete`**, **`onCompacted`**) and the object passed to **`run`** include **`ctx`**: the same **`TaskRunContext`** shape as the `ctx` in `task({ run: (payload, { ctx }) => ... })`.
+
+Use **`ctx`** for run metadata, tags, parent links, or any API that needs the full run record. The chat-specific string **`runId`** on events is always **`ctx.run.id`**; both are provided for convenience.
+
+```ts
+import type { TaskRunContext } from "@trigger.dev/sdk";
+// Equivalent alias (same type):
+import type { Context } from "@trigger.dev/sdk";
+```
+
+<Note>
+  Prefer `import type { TaskRunContext } from "@trigger.dev/sdk"` in application code. Do not depend on `@trigger.dev/core` directly.
+</Note>
 
 ## ChatTaskRunPayload
 
@@ -38,6 +54,7 @@ The payload passed to the `run` function.
 
 | Field          | Type                                       | Description                                                          |
 | -------------- | ------------------------------------------ | -------------------------------------------------------------------- |
+| `ctx`          | `TaskRunContext`                           | Full task run context — same as `task` `run`’s `{ ctx }`             |
 | `messages`     | `ModelMessage[]`                           | Model-ready messages — pass directly to `streamText`                 |
 | `chatId`       | `string`                                   | Unique chat session ID                                               |
 | `trigger`      | `"submit-message" \| "regenerate-message"` | What triggered the request                                           |
@@ -47,6 +64,8 @@ The payload passed to the `run` function.
 | `signal`       | `AbortSignal`                              | Combined stop + cancel signal                                        |
 | `cancelSignal` | `AbortSignal`                              | Cancel-only signal                                                   |
 | `stopSignal`   | `AbortSignal`                              | Stop-only signal (per-turn)                                          |
+| `previousTurnUsage` | `LanguageModelUsage \| undefined`       | Token usage from the previous turn (undefined on turn 0)        |
+| `totalUsage`   | `LanguageModelUsage`                       | Cumulative token usage across completed turns so far              |
 
 ## PreloadEvent
 
@@ -54,6 +73,7 @@ Passed to the `onPreload` callback.
 
 | Field             | Type                        | Description                                                    |
 | ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `ctx`             | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx)   |
 | `chatId`          | `string`                    | Chat session ID                                                |
 | `runId`           | `string`                    | The Trigger.dev run ID                                         |
 | `chatAccessToken` | `string`                    | Scoped access token for this run                               |
@@ -66,6 +86,7 @@ Passed to the `onChatStart` callback.
 
 | Field             | Type                        | Description                                                    |
 | ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `ctx`             | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx)   |
 | `chatId`          | `string`                    | Chat session ID                                                |
 | `messages`        | `ModelMessage[]`            | Initial model-ready messages                                   |
 | `clientData`      | Typed by `clientDataSchema` | Custom data from the frontend                                  |
@@ -82,6 +103,7 @@ Passed to the `onTurnStart` callback.
 
 | Field             | Type                        | Description                                                    |
 | ----------------- | --------------------------- | -------------------------------------------------------------- |
+| `ctx`             | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx)   |
 | `chatId`          | `string`                    | Chat session ID                                                |
 | `messages`        | `ModelMessage[]`            | Full accumulated conversation (model format)                   |
 | `uiMessages`      | `UIMessage[]`               | Full accumulated conversation (UI format)                      |
@@ -100,6 +122,7 @@ Passed to the `onTurnComplete` callback.
 
 | Field                | Type                              | Description                                          |
 | -------------------- | --------------------------------- | ---------------------------------------------------- |
+| `ctx`                | `TaskRunContext`                  | Full task run context — see [Task context](#task-context-ctx) |
 | `chatId`             | `string`                          | Chat session ID                                      |
 | `messages`           | `ModelMessage[]`                  | Full accumulated conversation (model format)         |
 | `uiMessages`         | `UIMessage[]`                     | Full accumulated conversation (UI format)            |
@@ -118,11 +141,11 @@ Passed to the `onTurnComplete` callback.
 
 ## BeforeTurnCompleteEvent
 
-Passed to the `onBeforeTurnComplete` callback. Same fields as `TurnCompleteEvent` plus a `writer`.
+Passed to the `onBeforeTurnComplete` callback. Same fields as `TurnCompleteEvent` (including **`ctx`**) plus a `writer`.
 
 | Field                            | Type                        | Description                                                                   |
 | -------------------------------- | --------------------------- | ----------------------------------------------------------------------------- |
-| _(all TurnCompleteEvent fields)_ |                             | See [TurnCompleteEvent](#turncompleteevent)                                   |
+| _(all TurnCompleteEvent fields)_ |                             | See [TurnCompleteEvent](#turncompleteevent) (includes `ctx`)                  |
 | `writer`                         | [`ChatWriter`](#chatwriter) | Stream writer — the stream is still open so chunks appear in the current turn |
 
 ## ChatWriter
@@ -178,6 +201,7 @@ Passed to the `onCompacted` callback.
 
 | Field          | Type                        | Description                                       |
 | -------------- | --------------------------- | ------------------------------------------------- |
+| `ctx`          | `TaskRunContext`            | Full task run context — see [Task context](#task-context-ctx) |
 | `summary`      | `string`                    | The generated summary text                        |
 | `messages`     | `ModelMessage[]`            | Messages that were compacted (pre-compaction)     |
 | `messageCount` | `number`                    | Number of messages before compaction              |
diff --git a/docs/docs.json b/docs/docs.json
index d3a58d42350..5a924c6870c 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -96,6 +96,13 @@
                   "ai-chat/compaction",
                   "ai-chat/pending-messages",
                   "ai-chat/background-injection",
+                  {
+                    "group": "Patterns",
+                    "pages": [
+                      "ai-chat/patterns/database-persistence",
+                      "ai-chat/patterns/code-sandbox"
+                    ]
+                  },
                   "ai-chat/reference"
                 ]
               }

From 9b261b78fef2ff47070b573ee96b71812bd4d704 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 28 Mar 2026 09:10:49 +0000
Subject: [PATCH 11/49] docs: add onChatSuspend/onChatResume,
 exitAfterPreloadIdle, withClientData, ChatBuilder docs

---
 docs/ai-chat/backend.mdx   |  69 ++++++++++++++++-
 docs/ai-chat/reference.mdx |  56 ++++++++++++--
 docs/ai-chat/types.mdx     | 147 ++++++++++++++++++++++++++++++-------
 3 files changed, 237 insertions(+), 35 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index c7b6e051c0b..9842f9cfa5b 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -9,7 +9,7 @@ description: "Three approaches to building your chat backend — chat.task(), se
 The highest-level approach. Handles message accumulation, stop signals, turn lifecycle, and auto-piping automatically.
 
 <Tip>
-  To fix a **custom** `UIMessage` subtype (typed custom data parts, tool map, etc.), use [`chat.withUIMessage<...>().task({...})`](/ai-chat/types) instead of `chat.task({...})`. Options are the same; defaults for `toUIMessageStream()` can be set on `withUIMessage`.
+  To fix a **custom** `UIMessage` subtype or typed client data schema, use the [ChatBuilder](/ai-chat/types#chatbuilder) via `chat.withUIMessage<...>()` and/or `chat.withClientData({ schema })`. Builder-level hooks can also be chained before `.task()`. See [Types](/ai-chat/types).
 </Tip>
 
 ### Simple: return a StreamTextResult
@@ -71,7 +71,9 @@ async function runAgentLoop(messages: ModelMessage[]) {
 
 Every chat lifecycle callback and the **`run`** payload include **`ctx`**: the same run context object as `task({ run: (payload, { ctx }) => ... })`. Import the type with **`import type { TaskRunContext } from "@trigger.dev/sdk"`** (the **`Context`** export is the same type). Use **`ctx`** for tags, metadata, or any API that needs the full run record. The string **`runId`** on chat events is always **`ctx.run.id`** (both are provided for convenience). See [Task context (`ctx`)](/ai-chat/reference#task-context-ctx) in the API reference.
 
-Standard **[task lifecycle hooks](/tasks/overview)** — **`onWait`**, **`onResume`**, **`onComplete`**, **`onFailure`**, etc. — are also available on **`chat.task()`** with the same shapes as on a normal `task()`. For example, tear down an external sandbox **right before the run suspends** waiting for the next message using **`onWait`** when **`wait.type === "token"`**. See the [Code execution sandbox](/ai-chat/patterns/code-sandbox) pattern.
+Standard **[task lifecycle hooks](/tasks/overview)** — **`onWait`**, **`onResume`**, **`onComplete`**, **`onFailure`**, etc. — are also available on **`chat.task()`** with the same shapes as on a normal `task()`.
+
+Chat tasks also have two dedicated suspension hooks — **`onChatSuspend`** and **`onChatResume`** — that fire at the idle-to-suspended transition with full chat context. Use them for resource cleanup (e.g. tearing down sandboxes) and re-initialization. See [onChatSuspend / onChatResume](#onchatsuspend--onchatresume) and the [Code execution sandbox](/ai-chat/patterns/code-sandbox) pattern.
 
 #### onPreload
 
@@ -276,6 +278,69 @@ export const myChat = chat.task({
   For a full **conversation + session** persistence pattern (including preload, continuation, and token renewal), see [Database persistence](/ai-chat/patterns/database-persistence).
 </Tip>
 
+#### onChatSuspend / onChatResume
+
+Chat-specific hooks that fire at the **idle-to-suspended** transition — the moment the run stops using compute and waits for the next message. These replace the need for the generic `onWait` / `onResume` task hooks for chat-specific work.
+
+The `phase` discriminator tells you **when** the suspend/resume happened:
+
+- `"preload"` — after `onPreload`, waiting for the first message
+- `"turn"` — after `onTurnComplete`, waiting for the next message
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  onChatSuspend: async (event) => {
+    // Tear down expensive resources before suspending
+    await disposeCodeSandbox(event.ctx.run.id);
+    if (event.phase === "turn") {
+      logger.info("Suspending after turn", { turn: event.turn });
+    }
+  },
+  onChatResume: async (event) => {
+    // Re-initialize after waking up
+    logger.info("Resumed", { phase: event.phase });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+| Field        | Type             | Description                                                  |
+| ------------ | ---------------- | ------------------------------------------------------------ |
+| `phase`      | `"preload" \| "turn"` | Whether this is a preload or post-turn suspension       |
+| `ctx`        | `TaskRunContext` | Full task run context                                        |
+| `chatId`     | `string`         | Chat session ID                                              |
+| `runId`      | `string`         | The Trigger.dev run ID                                       |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend                   |
+| `turn`       | `number`         | Turn number (**`"turn"` phase only**)                        |
+| `messages`   | `ModelMessage[]` | Accumulated model messages (**`"turn"` phase only**)         |
+| `uiMessages` | `UIMessage[]`    | Accumulated UI messages (**`"turn"` phase only**)            |
+
+<Tip>
+  Unlike `onWait` (which fires for all wait types — duration, task, batch, token), `onChatSuspend` fires only at chat suspension points with full chat context. No need to filter on `wait.type`.
+</Tip>
+
+#### exitAfterPreloadIdle
+
+When set to `true`, a preloaded run completes successfully after the idle timeout elapses instead of suspending. Use this for "fire and forget" preloads — if the user doesn't send a message during the idle window, the run ends cleanly.
+
+```ts
+export const myChat = chat.task({
+  id: "my-chat",
+  preloadIdleTimeoutInSeconds: 10,
+  exitAfterPreloadIdle: true,
+  onPreload: async ({ chatId, clientData }) => {
+    // Eagerly set up state — if no message comes, the run just ends
+    await initializeChat(chatId, clientData);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
 ### Using prompts
 
 Use [AI Prompts](/ai/prompts) to manage your system prompt as versioned, overridable config. Store the resolved prompt in a lifecycle hook with `chat.prompt.set()`, then spread `chat.toStreamTextOptions()` into `streamText` — it includes the system prompt, model, config, and telemetry automatically.
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index 6cf329acb8e..6d959171622 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -29,6 +29,9 @@ Options for `chat.task()`.
 | `preloadIdleTimeoutInSeconds` | `number`                                                    | Same as `idleTimeoutInSeconds` | Idle timeout after `onPreload` fires                                                                |
 | `preloadTimeout`              | `string`                                                    | Same as `turnTimeout`          | Suspend timeout for preloaded runs                                                                  |
 | `uiMessageStreamOptions`      | `ChatUIMessageStreamOptions`                                | —                              | Default options for `toUIMessageStream()`. Per-turn override via `chat.setUIMessageStreamOptions()` |
+| `onChatSuspend`               | `(event: ChatSuspendEvent) => Promise<void> \| void`        | —                              | Fires right before the run suspends. See [onChatSuspend](/ai-chat/backend#onchatsuspend--onchatresume) |
+| `onChatResume`                | `(event: ChatResumeEvent) => Promise<void> \| void`         | —                              | Fires right after the run resumes from suspension                                                   |
+| `exitAfterPreloadIdle`        | `boolean`                                                   | `false`                        | Exit run after preload idle timeout instead of suspending. See [exitAfterPreloadIdle](/ai-chat/backend#exitafterpreloadidle) |
 
 Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`, `maxDuration`, **`onWait`**, **`onResume`**, **`onComplete`**, and other lifecycle hooks. Those hooks use the same parameter shapes as on a normal `task()` (including `ctx`).
 
@@ -148,6 +151,36 @@ Passed to the `onBeforeTurnComplete` callback. Same fields as `TurnCompleteEvent
 | _(all TurnCompleteEvent fields)_ |                             | See [TurnCompleteEvent](#turncompleteevent) (includes `ctx`)                  |
 | `writer`                         | [`ChatWriter`](#chatwriter) | Stream writer — the stream is still open so chunks appear in the current turn |
 
+## ChatSuspendEvent
+
+Passed to the `onChatSuspend` callback. A discriminated union on `phase`.
+
+| Field        | Type                        | Description                                              |
+| ------------ | --------------------------- | -------------------------------------------------------- |
+| `phase`      | `"preload" \| "turn"`       | Whether this is a preload or post-turn suspension        |
+| `ctx`        | `TaskRunContext`            | Full task run context                                    |
+| `chatId`     | `string`                    | Chat session ID                                          |
+| `runId`      | `string`                    | The Trigger.dev run ID                                   |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend                            |
+| `turn`       | `number`                    | Turn number (**`"turn"` phase only**)                    |
+| `messages`   | `ModelMessage[]`            | Accumulated model messages (**`"turn"` phase only**)     |
+| `uiMessages` | `UIMessage[]`               | Accumulated UI messages (**`"turn"` phase only**)        |
+
+## ChatResumeEvent
+
+Passed to the `onChatResume` callback. Same discriminated union shape as `ChatSuspendEvent`.
+
+| Field        | Type                        | Description                                              |
+| ------------ | --------------------------- | -------------------------------------------------------- |
+| `phase`      | `"preload" \| "turn"`       | Whether this is a preload or post-turn resumption        |
+| `ctx`        | `TaskRunContext`            | Full task run context                                    |
+| `chatId`     | `string`                    | Chat session ID                                          |
+| `runId`      | `string`                    | The Trigger.dev run ID                                   |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend                            |
+| `turn`       | `number`                    | Turn number (**`"turn"` phase only**)                    |
+| `messages`   | `ModelMessage[]`            | Accumulated model messages (**`"turn"` phase only**)     |
+| `uiMessages` | `UIMessage[]`               | Accumulated UI messages (**`"turn"` phase only**)        |
+
 ## ChatWriter
 
 A stream writer passed to lifecycle callbacks. Write custom `UIMessageChunk` parts (e.g. `data-*` parts) to the chat stream.
@@ -322,16 +355,15 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 | `chat.cleanupAbortedParts(message)`         | Remove incomplete parts from a stopped response message                                                                      |
 | `chat.stream`                               | Typed chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()`                                                |
 | `chat.MessageAccumulator`                   | Class that accumulates conversation messages across turns                                                                    |
-| `chat.withUIMessage(config?).task(options)` | Same as `chat.task`, but fixes a custom `UIMessage` subtype and optional default stream options. See [Types](/ai-chat/types) |
+| `chat.withUIMessage(config?)`               | Returns a [ChatBuilder](/ai-chat/types#chatbuilder) with a fixed `UIMessage` subtype. See [Types](/ai-chat/types)            |
+| `chat.withClientData({ schema })`           | Returns a [ChatBuilder](/ai-chat/types#chatbuilder) with a fixed client data schema. See [Types](/ai-chat/types#typed-client-data-with-chatwithclientdata) |
 
 ## `chat.withUIMessage`
 
-Returns `{ task }`, where `task` is like [`chat.task`](#chat-namespace) but parameterized on a UI message type `TUIM`.
+Returns a [`ChatBuilder`](/ai-chat/types#chatbuilder) with a fixed `UIMessage` subtype. Chain `.withClientData()`, hook methods, and `.task()`.
 
 ```ts
-chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): {
-  task: (options: ChatTaskOptions<..., ..., TUIM>) => Task<...>;
-};
+chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): ChatBuilder<TUIM>;
 ```
 
 | Parameter              | Type                               | Description                                                                                                                                           |
@@ -340,6 +372,20 @@ chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): {
 
 Use this when you need [`InferChatUIMessage`](#inferchatuimessage) / typed `data-*` parts / `InferUITools` to line up across backend hooks and `useChat`. Full guide: [Types](/ai-chat/types).
 
+## `chat.withClientData`
+
+Returns a [`ChatBuilder`](/ai-chat/types#chatbuilder) with a fixed client data schema. All hooks and `run` get typed `clientData` without passing `clientDataSchema` in `.task()` options.
+
+```ts
+chat.withClientData<TSchema>({ schema: TSchema }): ChatBuilder<UIMessage, TSchema>;
+```
+
+| Parameter | Type         | Description                                        |
+| --------- | ------------ | -------------------------------------------------- |
+| `schema`  | `TaskSchema` | Zod, ArkType, Valibot, or any supported schema lib |
+
+Full guide: [Typed client data](/ai-chat/types#typed-client-data-with-chatwithclientdata).
+
 ## `ChatWithUIMessageConfig`
 
 | Field           | Type                               | Description                                                           |
diff --git a/docs/ai-chat/types.mdx b/docs/ai-chat/types.mdx
index 8ddfff063f0..1350a2f259e 100644
--- a/docs/ai-chat/types.mdx
+++ b/docs/ai-chat/types.mdx
@@ -4,7 +4,7 @@ sidebarTitle: "Types"
 description: "TypeScript types for AI Chat tasks, UI messages, and the frontend transport."
 ---
 
-TypeScript patterns for [AI Chat](/ai-chat/overview). This page will expand over time; it currently documents how to pin a custom AI SDK [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype with `chat.withUIMessage` and align types on the client.
+TypeScript patterns for [AI Chat](/ai-chat/overview). This page covers how to pin a custom AI SDK [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype with `chat.withUIMessage`, fix a typed `clientData` schema with `chat.withClientData`, chain builder-level hooks, and align types on the client.
 
 ## Custom `UIMessage` with `chat.withUIMessage`
 
@@ -16,7 +16,7 @@ When you add **custom `data-*` parts** (via `chat.stream` / `writer`) or a **typ
 - Stream options like `sendReasoning` align with your message shape
 - The frontend can treat `useChat` messages as the same subtype end-to-end
 
-`chat.withUIMessage<YourUIMessage>(config?)` returns `{ task }`, where `task(...)` accepts the **same options as** [`chat.task()`](/ai-chat/backend#chat-task) but fixes `YourUIMessage` as the UI message type for that chat task.
+`chat.withUIMessage<YourUIMessage>(config?)` returns a [ChatBuilder](#chatbuilder) where `.task(...)` accepts the **same options as** [`chat.task()`](/ai-chat/backend#chat-task) but fixes `YourUIMessage` as the UI message type for that chat task.
 
 ### Defining a `UIMessage` subtype
 
@@ -48,7 +48,7 @@ Task-backed tools should use AI SDK [`tool()`](https://sdk.vercel.ai/docs/ai-sdk
 
 ### Backend: `chat.withUIMessage(...).task(...)`
 
-Call `withUIMessage` **once**, then chain `.task({ ... })` instead of `chat.task({ ... })`:
+Call `withUIMessage` **once**, then chain `.task({ ... })` instead of `chat.task({ ... })`. You can also chain `.withClientData()` and hook methods before `.task()`:
 
 ```ts
 import { chat } from "@trigger.dev/sdk/ai";
@@ -65,31 +65,35 @@ const myTools = {
   }),
 };
 
-export const myChat = chat.withUIMessage<MyChatUIMessage>({
-  streamOptions: {
-    sendReasoning: true,
-    onError: (error) =>
-      error instanceof Error ? error.message : "Something went wrong.",
-  },
-}).task({
-  id: "my-chat",
-  clientDataSchema: z.object({ userId: z.string() }),
-  onTurnStart: async ({ uiMessages, writer }) => {
-    // uiMessages is MyChatUIMessage[] — custom data parts are typed
-    writer.write({
-      type: "data-turn-status",
-      data: { status: "preparing" },
-    });
-  },
-  run: async ({ messages, signal }) => {
-    return streamText({
-      model: openai("gpt-4o"),
-      messages,
-      tools: myTools,
-      abortSignal: signal,
-    });
-  },
-});
+export const myChat = chat
+  .withUIMessage<MyChatUIMessage>({
+    streamOptions: {
+      sendReasoning: true,
+      onError: (error) =>
+        error instanceof Error ? error.message : "Something went wrong.",
+    },
+  })
+  .withClientData({
+    schema: z.object({ userId: z.string() }),
+  })
+  .task({
+    id: "my-chat",
+    onTurnStart: async ({ uiMessages, writer }) => {
+      // uiMessages is MyChatUIMessage[] — custom data parts are typed
+      writer.write({
+        type: "data-turn-status",
+        data: { status: "preparing" },
+      });
+    },
+    run: async ({ messages, signal }) => {
+      return streamText({
+        model: openai("gpt-4o"),
+        messages,
+        tools: myTools,
+        abortSignal: signal,
+      });
+    },
+  });
 ```
 
 ### Default stream options
@@ -125,6 +129,91 @@ export function Chat() {
 
 You can also import `InferChatUIMessage` from `@trigger.dev/sdk/ai` in non-React modules.
 
+## Typed client data with `chat.withClientData`
+
+`chat.withClientData({ schema })` returns a [ChatBuilder](#chatbuilder) that fixes the client data schema. All hooks and `run` receive typed `clientData` without needing `clientDataSchema` in `.task()` options.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { z } from "zod";
+
+export const myChat = chat
+  .withClientData({
+    schema: z.object({ userId: z.string(), model: z.string().optional() }),
+  })
+  .task({
+    id: "my-chat",
+    onPreload: async ({ clientData }) => {
+      // clientData is typed as { userId: string; model?: string }
+      await initUser(clientData.userId);
+    },
+    run: async ({ messages, clientData, signal }) => {
+      return streamText({
+        model: getModel(clientData.model),
+        messages,
+        abortSignal: signal,
+      });
+    },
+  });
+```
+
+## ChatBuilder
+
+Both `chat.withUIMessage()` and `chat.withClientData()` return a **ChatBuilder** — a chainable object that accumulates configuration before creating the task with `.task()`.
+
+Builder methods can be chained in any order:
+
+```ts
+export const myChat = chat
+  .withUIMessage<MyChatUIMessage>({
+    streamOptions: { sendReasoning: true },
+  })
+  .withClientData({
+    schema: z.object({ userId: z.string() }),
+  })
+  .onChatSuspend(async ({ ctx }) => {
+    await disposeCodeSandbox(ctx.run.id);
+  })
+  .onChatResume(async ({ ctx }) => {
+    warmCache(ctx.run.id);
+  })
+  .task({
+    id: "my-chat",
+    run: async ({ messages, signal }) => {
+      return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+    },
+  });
+```
+
+### Builder-level hooks
+
+All [lifecycle hooks](/ai-chat/backend#lifecycle-hooks) can be set on the builder: `onPreload`, `onChatStart`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`, `onCompacted`, `onChatSuspend`, `onChatResume`.
+
+Builder hooks and task-level hooks **coexist**. When both are defined for the same event, the builder hook runs first, then the task hook:
+
+```ts
+chat
+  .withUIMessage<MyChatUIMessage>()
+  .onPreload(async (event) => {
+    // Runs first — shared setup across tasks using this builder
+    await initializeSharedState(event.chatId);
+  })
+  .task({
+    id: "my-chat",
+    onPreload: async (event) => {
+      // Runs second — task-specific logic
+      await createChatRecord(event.chatId);
+    },
+    run: async ({ messages, signal }) => {
+      return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+    },
+  });
+```
+
+<Tip>
+  Set types first (`.withUIMessage()`, `.withClientData()`), then hooks. Hook parameters are typed based on the builder's current generics — so hooks registered after `.withClientData()` get typed `clientData`.
+</Tip>
+
 ### When plain `chat.task()` is enough
 
 If you do not rely on custom `UIMessage` generics (only default text, reasoning, and built-in tool UI types), **`chat.task()` alone is fine** — no need for `withUIMessage`.
@@ -132,6 +221,8 @@ If you do not rely on custom `UIMessage` generics (only default text, reasoning,
 ## See also
 
 - [Backend — `chat.task()`](/ai-chat/backend#chat-task)
+- [Backend — Lifecycle hooks](/ai-chat/backend#lifecycle-hooks)
 - [Frontend — transport & `useChat`](/ai-chat/frontend)
 - [API reference — `chat.withUIMessage`](/ai-chat/reference#chat-withuimessage)
+- [API reference — `chat.withClientData`](/ai-chat/reference#chat-withclientdata)
 - [Task-backed AI tools — `ai.toolExecute`](/tasks/schemaTask#task-backed-ai-tools)

From d8488c76de698462c0d46ee463aefcfb72fa3503 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 28 Mar 2026 09:11:35 +0000
Subject: [PATCH 12/49] code sandbox and database patterns

---
 docs/ai-chat/patterns/code-sandbox.mdx        | 125 +++++++++++++++++
 .../ai-chat/patterns/database-persistence.mdx | 127 ++++++++++++++++++
 2 files changed, 252 insertions(+)
 create mode 100644 docs/ai-chat/patterns/code-sandbox.mdx
 create mode 100644 docs/ai-chat/patterns/database-persistence.mdx

diff --git a/docs/ai-chat/patterns/code-sandbox.mdx b/docs/ai-chat/patterns/code-sandbox.mdx
new file mode 100644
index 00000000000..bf35da3dea8
--- /dev/null
+++ b/docs/ai-chat/patterns/code-sandbox.mdx
@@ -0,0 +1,125 @@
+---
+title: "Code execution sandbox"
+sidebarTitle: "Code sandbox"
+description: "Warm an isolated sandbox on each chat turn, run an AI SDK executeCode tool, and tear down right before the run suspends — using chat.task hooks and chat.local."
+---
+
+Use a **hosted code sandbox** (for example [E2B](https://e2b.dev)) when the model should run short scripts to analyze tool output (PostHog queries, CSV-like data, math) without executing arbitrary code on the Trigger worker host.
+
+This page describes a **durable chat** pattern that fits `chat.task()`:
+
+- **Warm** the sandbox at the start of each turn (**non-blocking**).
+- **Reuse** it for every `executeCode` tool call during that turn (and across turns in the same run if you keep the handle).
+- **Dispose** it **right before the run suspends** waiting for the next user message — using the **`onChatSuspend`** hook, not `onTurnComplete`.
+
+<Info>
+  The reference implementation lives in the monorepo at [`references/ai-chat`](https://github.com/triggerdotdev/trigger.dev/tree/main/references/ai-chat) (`code-sandbox.ts`, `chat-tools.ts`, `trigger/chat.ts`).
+</Info>
+
+## Why not tear down in `onTurnComplete`?
+
+After a turn finishes, the chat runtime still goes through an **idle** window and only then suspends. During that window the run is still executing — useful for `chat.defer()` work — and the run hasn't suspended yet.
+
+The boundary you want for “turn done, about to sleep” is **`onChatSuspend`**, which fires right before the run transitions from idle to suspended. It provides the `phase` (`”preload”` or `”turn”`) and full chat context. See [onChatSuspend / onChatResume](/ai-chat/backend#onchatsuspend--onchatresume).
+
+```mermaid
+sequenceDiagram
+  participant TurnStart as onTurnStart
+  participant Run as run / streamText
+  participant TurnDone as onTurnComplete
+  participant Idle as Idle window
+  participant Suspend as onChatSuspend
+  participant Sleep as suspended
+
+  TurnStart->>Run: warm sandbox (async)
+  Run->>TurnDone: persist / inject / etc.
+  TurnDone->>Idle: still running
+  Idle->>Suspend: dispose sandbox
+  Suspend->>Sleep: waiting for next message
+```
+
+## Recommended provider: E2B
+
+- **API key** auth — works from any Trigger.dev worker; no Vercel-only OIDC.
+- **Code Interpreter** SDK (`@e2b/code-interpreter`): long-lived sandbox, `runCode()`, `kill()`.
+
+Alternatives (Modal, Daytona, raw Docker) are fine but more DIY. Vercel’s sandbox + AI SDK helpers are a better fit when execution stays **on Vercel**, not on the Trigger worker.
+
+## Implementation sketch
+
+### 1. Run-scoped sandbox map
+
+Keep a `Map<runId, Promise<Sandbox>>` (or similar) in a **task-only module** so your Next.js app never imports it.
+
+### 2. `onTurnStart` — warm without blocking
+
+```ts
+onTurnStart: async ({ runId, ctx, ...rest }) => {
+  warmCodeSandbox(runId); // fire-and-forget Sandbox.create()
+  // ...persist messages, writer, etc.
+},
+```
+
+### 3. `chat.local` — run id for tools
+
+Tool `execute` functions do not receive hook payloads. Use [`chat.local()`](/ai-chat/features#per-run-data-with-chatlocal) to store the current run id for the sandbox key, **initialized from `onTurnStart`** (same `runId` as the map):
+
+```ts
+// In the same task module as your tools
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const codeSandboxRun = chat.local<{ runId: string }>({ id: "codeSandboxRun" });
+
+export function warmCodeSandbox(runId: string) {
+  codeSandboxRun.init({ runId });
+  // ...start Sandbox.create(), store promise in Map by runId
+}
+```
+
+The **`executeCode`** tool reads `codeSandboxRun.runId` and awaits the sandbox promise before `runCode`.
+
+### 4. `onChatSuspend` / `onComplete` — teardown
+
+Use **`onChatSuspend`** to dispose the sandbox right before the run suspends, and **`onComplete`** as a safety net when the run ends entirely.
+
+```ts
+export const aiChat = chat.task({
+  id: "ai-chat",
+  // ...
+  onChatSuspend: async ({ phase, ctx }) => {
+    await disposeCodeSandboxForRun(ctx.run.id);
+  },
+  onComplete: async ({ ctx }) => {
+    await disposeCodeSandboxForRun(ctx.run.id);
+  },
+});
+```
+
+Unlike `onWait` (which fires for all wait types), `onChatSuspend` only fires at chat suspension points — no need to filter on `wait.type`. The `phase` discriminator tells you if this is a preload or post-turn suspension.
+
+Optional **`onChatResume`**: log or reset flags; a fresh sandbox can be warmed again on the next **`onTurnStart`**.
+
+### 5. AI SDK tool
+
+Wrap the provider in a normal AI SDK `tool({ inputSchema, execute })` (same pattern as `webFetch`). Keep tool definitions in **task code**, not in the Next.js server bundle.
+
+### 6. Environment
+
+Set **`E2B_API_KEY`** (or your provider’s secret) on the **Trigger environment** for the worker — not in public client env.
+
+## Typing `ctx`
+
+Every `chat.task` lifecycle event and the `run` payload include **`ctx`**: the same **[`TaskRunContext`](/ai-chat/reference#task-context-ctx)** shape as `task({ run: (payload, { ctx }) => ... })`.
+
+```ts
+import type { TaskRunContext } from "@trigger.dev/sdk";
+```
+
+The alias **`Context`** is also exported from `@trigger.dev/sdk` and is the same type.
+
+## See also
+
+- [Database persistence for chat](/ai-chat/patterns/database-persistence) — conversation + session rows, hooks, token renewal
+- [Backend — Lifecycle hooks](/ai-chat/backend#lifecycle-hooks)
+- [API Reference — `ctx` on events](/ai-chat/reference#task-context-ctx)
+- [Per-run data with `chat.local`](/ai-chat/features#per-run-data-with-chatlocal)
diff --git a/docs/ai-chat/patterns/database-persistence.mdx b/docs/ai-chat/patterns/database-persistence.mdx
new file mode 100644
index 00000000000..4e1126a8931
--- /dev/null
+++ b/docs/ai-chat/patterns/database-persistence.mdx
@@ -0,0 +1,127 @@
+---
+title: "Database persistence for chat"
+sidebarTitle: "Database persistence"
+description: "Split conversation state and live session metadata across hooks — preload, turn start, turn complete — without tying the pattern to a specific ORM or schema."
+---
+
+Durable chat runs can span **hours** and **many turns**. You usually want:
+
+1. **Conversation state** — full **`UIMessage[]`** (or equivalent) keyed by **`chatId`**, so reloads and history views work.
+2. **Live session state** — the **current Trigger `runId`**, a **scoped access token** for realtime + input streams, and optionally **`lastEventId`** for stream resume.
+
+This page describes a **hook mapping** that works with any database. The [ai-chat reference app](https://github.com/triggerdotdev/trigger.dev/tree/main/references/ai-chat) implements the same idea with a SQL database and an ORM; adapt table and column names to your stack.
+
+## Conceptual data model
+
+You can use one table or two; the important split is **semantic**:
+
+| Concept | Purpose | Typical fields |
+| ------- | ------- | -------------- |
+| **Conversation** | Durable transcript + display metadata | Stable id (same as **`chatId`**), serialized **`uiMessages`**, title, model choice, owner/user id, timestamps |
+| **Active session** | Reconnect + resume the **same** run | Same **`chatId`** as key (or FK), **current `runId`**, **`publicAccessToken`** (or your stored PAT), optional **`lastEventId`** |
+
+The **conversation** row is what your UI lists as “chats.” The **session** row is what the **transport** needs after a refresh or token expiry: *which run is live* and *how to authenticate* to it.
+
+<Note>
+  Store **`UIMessage[]`** in a JSON-compatible column, or normalize to a messages table — the pattern is *when* you read/write, not *how* you encode rows.
+</Note>
+
+## Where each hook writes
+
+### `onPreload` (optional)
+
+When the user triggers [preload](/ai-chat/features#preload), the run starts **before** the first user message.
+
+- Ensure the **conversation** row exists (create or no-op).
+- **Upsert session**: **`runId`**, **`chatAccessToken`** from the event (this is the turn-scoped token for that run).
+- Load any **user / tenant context** you need for prompts (`clientData`).
+
+If you skip preload, do the equivalent in **`onChatStart`** when **`preloaded`** is false.
+
+### `onChatStart` (turn 0, non-preloaded path)
+
+- If **`preloaded`** is true, return early — **`onPreload`** already ran.
+- Otherwise mirror preload: user/context, conversation create, session upsert.
+- If **`continuation`** is true, the conversation row usually **already exists** (previous run ended or timed out); only update **session** fields so the **new** run id and token are stored.
+
+### `onTurnStart`
+
+- Persist **`uiMessages`** (full accumulated history including the new user turn) **before** streaming starts — so a mid-stream refresh still shows the user’s message.
+- Optionally use [`chat.defer()`](/ai-chat/features#chat-defer) so the write does not block the model if your driver is slow.
+
+### `onTurnComplete`
+
+- Persist **`uiMessages`** again with the **assistant** reply finalized.
+- **Upsert session** with **`runId`**, fresh **`chatAccessToken`**, and **`lastEventId`** from the event.
+
+**`lastEventId`** lets the frontend [resume](/ai-chat/frontend) without replaying SSE events it already applied. Treat it as part of session state, not optional polish, if you care about duplicate chunks after refresh.
+
+## Token renewal (app server)
+
+Turn tokens expire (see **`chatAccessTokenTTL`** on **`chat.task`**). When the transport gets **401** on realtime or input streams, mint a **new** public access token with the **same** scopes the task uses — typically **read** for that **`runId`** and **write** for **input streams** on that run — then **persist** it on your **session** row.
+
+Your **Next.js server action**, **Remix action**, or **API route** should:
+
+1. Load **session** by **`chatId`** → **`runId`**.
+2. Call **`auth.createPublicToken`** (or your platform’s equivalent) with those scopes.
+3. Save the new token (and confirm **`runId`** is unchanged unless you started a new run).
+
+No Trigger task code needs to run for renewal.
+
+## Minimal pseudocode
+
+```typescript
+// Pseudocode — replace saveConversation / saveSession with your DB layer.
+
+chat.task({
+  id: "my-chat",
+  clientDataSchema: z.object({ userId: z.string() }),
+
+  onPreload: async ({ chatId, runId, chatAccessToken, clientData }) => {
+    if (!clientData) return;
+    await ensureUser(clientData.userId);
+    await upsertConversation({ id: chatId, userId: clientData.userId /* ... */ });
+    await upsertSession({ chatId, runId, publicAccessToken: chatAccessToken });
+  },
+
+  onChatStart: async ({ chatId, runId, chatAccessToken, clientData, continuation, preloaded }) => {
+    if (preloaded) return;
+    await ensureUser(clientData.userId);
+    if (!continuation) {
+      await upsertConversation({ id: chatId, userId: clientData.userId /* ... */ });
+    }
+    await upsertSession({ chatId, runId, publicAccessToken: chatAccessToken });
+  },
+
+  onTurnStart: async ({ chatId, uiMessages }) => {
+    chat.defer(saveConversationMessages(chatId, uiMessages));
+  },
+
+  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+    await saveConversationMessages(chatId, uiMessages);
+    await upsertSession({
+      chatId,
+      runId,
+      publicAccessToken: chatAccessToken,
+      lastEventId,
+    });
+  },
+
+  run: async ({ messages, signal }) => {
+    /* streamText, etc. */
+  },
+});
+```
+
+## Design notes
+
+- **`chatId`** is stable for the life of a thread; **`runId`** changes when the user starts a **new** run (timeout, cancel, explicit new chat). Session rows must always reflect the **current** run.
+- **`continuation: true`** means “same logical chat, new run” — update session, don’t assume an empty conversation.
+- Keep **task modules** that perform writes **out of** browser bundles; the pattern assumes persistence runs **in the worker** (or your BFF that the task calls).
+
+## See also
+
+- [Backend — Lifecycle hooks](/ai-chat/backend#lifecycle-hooks)
+- [Session management](/ai-chat/frontend#session-management) — `resume`, `lastEventId`, transport
+- [`chat.defer()`](/ai-chat/features#chat-defer) — non-blocking writes during a turn
+- [Code execution sandbox](/ai-chat/patterns/code-sandbox) — combines **`onWait`** / **`onComplete`** with this persistence model

From fbf72d1ef7be7160f64b6cf61519200467130090 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Mon, 30 Mar 2026 21:55:32 +0100
Subject: [PATCH 13/49] docs: rename chat.task to chat.agent across all AI docs

---
 docs/ai-chat/backend.mdx                      | 56 +++++++++----------
 docs/ai-chat/background-injection.mdx         |  4 +-
 docs/ai-chat/compaction.mdx                   | 10 ++--
 docs/ai-chat/features.mdx                     | 14 ++---
 docs/ai-chat/frontend.mdx                     |  6 +-
 docs/ai-chat/overview.mdx                     | 26 +++++----
 docs/ai-chat/patterns/code-sandbox.mdx        |  8 +--
 .../ai-chat/patterns/database-persistence.mdx |  4 +-
 docs/ai-chat/pending-messages.mdx             |  6 +-
 docs/ai-chat/quick-start.mdx                  | 12 ++--
 docs/ai-chat/reference.mdx                    | 32 +++++------
 docs/ai-chat/types.mdx                        | 32 +++++------
 docs/ai/prompts.mdx                           |  6 +-
 docs/docs.json                                |  2 +-
 docs/tasks/schemaTask.mdx                     |  2 +-
 15 files changed, 111 insertions(+), 109 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 9842f9cfa5b..ff1f7686e77 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -1,15 +1,15 @@
 ---
 title: "Backend"
 sidebarTitle: "Backend"
-description: "Three approaches to building your chat backend — chat.task(), session iterator, or raw task primitives."
+description: "Three approaches to building your chat backend — chat.agent(), session iterator, or raw task primitives."
 ---
 
-## chat.task()
+## chat.agent()
 
 The highest-level approach. Handles message accumulation, stop signals, turn lifecycle, and auto-piping automatically.
 
 <Tip>
-  To fix a **custom** `UIMessage` subtype or typed client data schema, use the [ChatBuilder](/ai-chat/types#chatbuilder) via `chat.withUIMessage<...>()` and/or `chat.withClientData({ schema })`. Builder-level hooks can also be chained before `.task()`. See [Types](/ai-chat/types).
+  To fix a **custom** `UIMessage` subtype or typed client data schema, use the [ChatBuilder](/ai-chat/types#chatbuilder) via `chat.withUIMessage<...>()` and/or `chat.withClientData({ schema })`. Builder-level hooks can also be chained before `.agent()`. See [Types](/ai-chat/types).
 </Tip>
 
 ### Simple: return a StreamTextResult
@@ -21,7 +21,7 @@ import { chat } from "@trigger.dev/sdk/ai";
 import { streamText } from "ai";
 import { openai } from "@ai-sdk/openai";
 
-export const simpleChat = chat.task({
+export const simpleChat = chat.agent({
   id: "simple-chat",
   run: async ({ messages, signal }) => {
     return streamText({
@@ -44,7 +44,7 @@ import { streamText } from "ai";
 import { openai } from "@ai-sdk/openai";
 import type { ModelMessage } from "ai";
 
-export const agentChat = chat.task({
+export const agentChat = chat.agent({
   id: "agent-chat",
   run: async ({ messages }) => {
     // Don't return anything — chat.pipe is called inside
@@ -71,9 +71,9 @@ async function runAgentLoop(messages: ModelMessage[]) {
 
 Every chat lifecycle callback and the **`run`** payload include **`ctx`**: the same run context object as `task({ run: (payload, { ctx }) => ... })`. Import the type with **`import type { TaskRunContext } from "@trigger.dev/sdk"`** (the **`Context`** export is the same type). Use **`ctx`** for tags, metadata, or any API that needs the full run record. The string **`runId`** on chat events is always **`ctx.run.id`** (both are provided for convenience). See [Task context (`ctx`)](/ai-chat/reference#task-context-ctx) in the API reference.
 
-Standard **[task lifecycle hooks](/tasks/overview)** — **`onWait`**, **`onResume`**, **`onComplete`**, **`onFailure`**, etc. — are also available on **`chat.task()`** with the same shapes as on a normal `task()`.
+Standard **[task lifecycle hooks](/tasks/overview)** — **`onWait`**, **`onResume`**, **`onComplete`**, **`onFailure`**, etc. — are also available on **`chat.agent()`** with the same shapes as on a normal `task()`.
 
-Chat tasks also have two dedicated suspension hooks — **`onChatSuspend`** and **`onChatResume`** — that fire at the idle-to-suspended transition with full chat context. Use them for resource cleanup (e.g. tearing down sandboxes) and re-initialization. See [onChatSuspend / onChatResume](#onchatsuspend--onchatresume) and the [Code execution sandbox](/ai-chat/patterns/code-sandbox) pattern.
+Chat agents also have two dedicated suspension hooks — **`onChatSuspend`** and **`onChatResume`** — that fire at the idle-to-suspended transition with full chat context. Use them for resource cleanup (e.g. tearing down sandboxes) and re-initialization. See [onChatSuspend / onChatResume](#onchatsuspend--onchatresume) and the [Code execution sandbox](/ai-chat/patterns/code-sandbox) pattern.
 
 #### onPreload
 
@@ -82,7 +82,7 @@ Fires when a preloaded run starts — before any messages arrive. Use it to eage
 Preloaded runs are triggered by calling `transport.preload(chatId)` on the frontend. See [Preload](/ai-chat/features#preload) for details.
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   clientDataSchema: z.object({ userId: z.string() }),
   onPreload: async ({ ctx, chatId, clientData, runId, chatAccessToken }) => {
@@ -125,7 +125,7 @@ Fires once on the first turn (turn 0) before `run()` executes. Use it to create
 The `continuation` field tells you whether this is a brand new chat or a continuation of an existing one (where the previous run timed out or was cancelled). The `preloaded` field tells you whether `onPreload` already ran.
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onChatStart: async ({ chatId, clientData, continuation, preloaded }) => {
     if (preloaded) return; // Already set up in onPreload
@@ -167,7 +167,7 @@ Fires at the start of every turn, after message accumulation and `onChatStart` (
 | `writer`          | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks                 |
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onTurnStart: async ({ chatId, uiMessages, runId, chatAccessToken }) => {
     await db.chat.update({
@@ -196,7 +196,7 @@ export const myChat = chat.task({
 Fires after the response is captured but **before** the stream closes. The `writer` can send custom chunks that appear in the current turn — use this for post-processing indicators, compaction progress, or any data the user should see before the turn ends.
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onBeforeTurnComplete: async ({ writer, usage, uiMessages }) => {
     // Write a custom data part while the stream is still open
@@ -245,7 +245,7 @@ Fires after each turn completes — after the response is captured and the strea
 | `rawResponseMessage` | `UIMessage \| undefined` | The raw assistant response before abort cleanup (same as `responseMessage` when not stopped) |
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
     await db.chat.update({
@@ -288,7 +288,7 @@ The `phase` discriminator tells you **when** the suspend/resume happened:
 - `"turn"` — after `onTurnComplete`, waiting for the next message
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onChatSuspend: async (event) => {
     // Tear down expensive resources before suspending
@@ -327,7 +327,7 @@ export const myChat = chat.task({
 When set to `true`, a preloaded run completes successfully after the idle timeout elapses instead of suspending. Use this for "fire and forget" preloads — if the user doesn't send a message during the idle window, the run ends cleanly.
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   preloadIdleTimeoutInSeconds: 10,
   exitAfterPreloadIdle: true,
@@ -362,7 +362,7 @@ const systemPrompt = prompts.define({
   content: `You are a helpful assistant for {{name}}.`,
 });
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   clientDataSchema: z.object({ userId: z.string() }),
   onChatStart: async ({ clientData }) => {
@@ -404,7 +404,7 @@ The `run` function receives three abort signals:
 | `cancelSignal` | Run cancel, expire, or maxDuration exceeded | Cleanup that should only happen on full cancellation                   |
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   run: async ({ messages, signal, stopSignal, cancelSignal }) => {
     return streamText({
@@ -426,7 +426,7 @@ export const myChat = chat.task({
 The `onTurnComplete` event includes a `stopped` boolean that indicates whether the user stopped generation during that turn:
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onTurnComplete: async ({ chatId, uiMessages, stopped }) => {
     await db.chat.update({
@@ -446,7 +446,7 @@ You can also check stop status from **anywhere** during a turn using `chat.isSto
 import { chat } from "@trigger.dev/sdk/ai";
 import { streamText } from "ai";
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   run: async ({ messages, signal }) => {
     return streamText({
@@ -469,7 +469,7 @@ export const myChat = chat.task({
 
 When stop happens mid-stream, the captured response message can contain parts in an incomplete state — tool calls stuck in `partial-call`, reasoning blocks still marked as `streaming`, etc. These can cause UI issues like permanent spinners.
 
-`chat.task` automatically cleans up the `responseMessage` when stop is detected before passing it to `onTurnComplete`. If you use `chat.pipe()` manually and capture response messages yourself, use `chat.cleanupAbortedParts()`:
+`chat.agent` automatically cleans up the `responseMessage` when stop is detected before passing it to `onTurnComplete`. If you use `chat.pipe()` manually and capture response messages yourself, use `chat.cleanupAbortedParts()`:
 
 ```ts
 const cleaned = chat.cleanupAbortedParts(rawResponseMessage);
@@ -508,7 +508,7 @@ import { openai } from "@ai-sdk/openai";
 import { z } from "zod";
 import { db } from "@/lib/db";
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   clientDataSchema: z.object({
     userId: z.string(),
@@ -660,7 +660,7 @@ export function Chat({ chatId, initialMessages, initialSessions }) {
 Users can send messages while the agent is executing tool calls. With `pendingMessages`, these messages are injected between tool-call steps, steering the agent mid-execution:
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   pendingMessages: {
     shouldInject: ({ steps }) => steps.length > 0,
@@ -690,7 +690,7 @@ On the frontend, the `usePendingMessages` hook handles sending, tracking, and re
 Inject context from background work into the conversation using `chat.inject()`. Combine with `chat.defer()` to run analysis between turns and inject results before the next response — self-review, RAG augmentation, safety checks, etc.
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onTurnComplete: async ({ messages }) => {
     chat.defer(
@@ -727,7 +727,7 @@ Transform model messages before they're used anywhere — in `run()`, in compact
 Use this for Anthropic cache breaks, injecting system context, stripping PII, etc.
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   prepareMessages: ({ messages, reason }) => {
     // Add Anthropic cache breaks to the last message
@@ -798,7 +798,7 @@ When `streamText` encounters an error mid-stream (rate limits, API failures, net
 By default, the raw error message is sent to the frontend. Use `onError` to sanitize errors and avoid leaking internal details:
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   uiMessageStreamOptions: {
     onError: (error) => {
@@ -836,7 +836,7 @@ const { messages, sendMessage } = useChat({
 Control which AI SDK features are forwarded to the frontend:
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   uiMessageStreamOptions: {
     sendReasoning: true, // Forward model reasoning (default: true)
@@ -862,7 +862,7 @@ run: async ({ messages, clientData, signal }) => {
 },
 ```
 
-`chat.setUIMessageStreamOptions()` works across all abstraction levels — `chat.task()`, `chat.createSession()` / `turn.complete()`, and `chat.pipeAndCapture()`.
+`chat.setUIMessageStreamOptions()` works across all abstraction levels — `chat.agent()`, `chat.createSession()` / `turn.complete()`, and `chat.pipeAndCapture()`.
 
 See [ChatUIMessageStreamOptions](/ai-chat/reference#chatuimessagestreamoptions) for the full reference.
 
@@ -900,14 +900,14 @@ export const manualChat = task({
 <Warning>
   Manual mode does not get automatic message accumulation or the `onTurnComplete`/`onChatStart`
   lifecycle hooks. The `responseMessage` field in `onTurnComplete` will be `undefined` when using
-  `chat.pipe()` directly. Use `chat.task()` for the full multi-turn experience.
+  `chat.pipe()` directly. Use `chat.agent()` for the full multi-turn experience.
 </Warning>
 
 ---
 
 ## chat.createSession()
 
-A middle ground between `chat.task()` and raw primitives. You get an async iterator that yields `ChatTurn` objects — each turn handles stop signals, message accumulation, and turn-complete signaling automatically. You control initialization, model/tool selection, persistence, and any custom per-turn logic.
+A middle ground between `chat.agent()` and raw primitives. You get an async iterator that yields `ChatTurn` objects — each turn handles stop signals, message accumulation, and turn-complete signaling automatically. You control initialization, model/tool selection, persistence, and any custom per-turn logic.
 
 Use `chat.createSession()` inside a standard `task()`:
 
diff --git a/docs/ai-chat/background-injection.mdx b/docs/ai-chat/background-injection.mdx
index b50c86329f6..8f1942398ba 100644
--- a/docs/ai-chat/background-injection.mdx
+++ b/docs/ai-chat/background-injection.mdx
@@ -31,7 +31,7 @@ Messages are appended to the model messages before the next LLM inference call.
 The most powerful pattern combines `chat.defer()` (background work) with `chat.inject()` (inject results). Background work runs in parallel with the idle wait between turns, and results are injected before the next response.
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onTurnComplete: async ({ messages }) => {
     // Kick off background analysis — doesn't block the turn
@@ -95,7 +95,7 @@ Focus on:
 Be concise. Only flag issues worth fixing.`,
 });
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onTurnComplete: async ({ messages }) => {
     chat.defer(
diff --git a/docs/ai-chat/compaction.mdx b/docs/ai-chat/compaction.mdx
index 5f2c61245e9..9039084173a 100644
--- a/docs/ai-chat/compaction.mdx
+++ b/docs/ai-chat/compaction.mdx
@@ -8,7 +8,7 @@ description: "Automatic context compaction to keep long conversations within tok
 
 Long conversations accumulate tokens across turns. Eventually the context window fills up, causing errors or degraded responses. Compaction solves this by automatically summarizing the conversation when token usage exceeds a threshold, then using that summary as the context for future turns.
 
-The `compaction` option on `chat.task()` handles this in both paths:
+The `compaction` option on `chat.agent()` handles this in both paths:
 
 - **Between tool-call steps** (inner loop) — via the AI SDK's `prepareStep`, compaction runs between tool calls within a single turn
 - **Between turns** (outer loop) — for single-step responses with no tool calls, where `prepareStep` never fires
@@ -22,7 +22,7 @@ import { chat } from "@trigger.dev/sdk/ai";
 import { streamText, generateText } from "ai";
 import { openai } from "@ai-sdk/openai";
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   compaction: {
     shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
@@ -71,7 +71,7 @@ Replace older messages with a summary but keep the last few exchanges visible:
 ```ts
 import { generateId } from "ai";
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   compaction: {
     shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
@@ -175,7 +175,7 @@ The `summarize` callback receives similar context:
 Track compaction events for logging, billing, or analytics:
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   compaction: { ... },
   onCompacted: async ({ summary, totalTokens, messageCount, chatId, turn }) => {
@@ -292,5 +292,5 @@ prepareStep: chat.compactionStep({
 ```
 
 <Note>
-  The fully manual APIs only handle inner-loop compaction (between tool-call steps). For outer-loop coverage, use the `compaction` option on `chat.task()`, `chat.createSession()`, or `MessageAccumulator`.
+  The fully manual APIs only handle inner-loop compaction (between tool-call steps). For outer-loop coverage, use the `compaction` option on `chat.agent()`, `chat.createSession()`, or `MessageAccumulator`.
 </Note>
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index efb0cad3692..a91b61ff03d 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -30,7 +30,7 @@ const userContext = chat.local<{
   messageCount: number;
 }>({ id: "userContext" });
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   clientDataSchema: z.object({ userId: z.string() }),
   onChatStart: async ({ clientData }) => {
@@ -105,7 +105,7 @@ const analyzeData = tool({
   execute: ai.toolExecute(analyzeDataTask),
 });
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onChatStart: async ({ clientData }) => {
     userContext.init({ name: "Alice", plan: "pro" });
@@ -165,7 +165,7 @@ Use `chat.defer()` to run background work in parallel with streaming. The deferr
 This moves non-blocking work (DB writes, analytics, etc.) out of the critical path:
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onTurnStart: async ({ chatId, uiMessages }) => {
     // Persist messages without blocking the LLM call
@@ -188,7 +188,7 @@ export const myChat = chat.task({
 ```ts
 import { chat } from "@trigger.dev/sdk/ai";
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   run: async ({ messages, signal }) => {
     // Write a custom data part to the chat stream.
@@ -286,7 +286,7 @@ const research = tool({
   execute: ai.toolExecute(researchTask),
 });
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   run: async ({ messages, signal }) => {
     return streamText({
@@ -320,7 +320,7 @@ On the frontend, render the custom data part:
 The `target` option accepts:
 - `"self"` — current run (default)
 - `"parent"` — parent task's run
-- `"root"` — root task's run (the chat task)
+- `"root"` — root task's run (the chat agent)
 - A specific run ID string
 
 ---
@@ -409,7 +409,7 @@ When the transport needs a trigger token for preload, your `accessToken` callbac
 On the backend, the `onPreload` hook fires immediately. The run then waits for the first message. When the user sends a message, `onChatStart` fires with `preloaded: true` — you can skip initialization that was already done in `onPreload`:
 
 ```ts
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onPreload: async ({ chatId, clientData }) => {
     // Eagerly initialize — runs before the first message
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index c03eb484565..197e738baf4 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -34,7 +34,7 @@ The transport is created once on first render and reused across re-renders. Pass
 
 ## Typed messages (`chat.withUIMessage`)
 
-If your chat task is defined with [`chat.withUIMessage<YourUIMessage>()`](/ai-chat/types) (custom `data-*` parts, typed tools, etc.), pass the same message type through `useChat` so `messages` and `message.parts` are narrowed on the client:
+If your chat agent is defined with [`chat.withUIMessage<YourUIMessage>()`](/ai-chat/types) (custom `data-*` parts, typed tools, etc.), pass the same message type through `useChat` so `messages` and `message.parts` are narrowed on the client:
 
 ```tsx
 import { useChat } from "@ai-sdk/react";
@@ -189,7 +189,7 @@ sendMessage({ text: "Hello" }, { metadata: { model: "gpt-4o", priority: "high" }
 
 ### Typed client data with clientDataSchema
 
-Instead of manually parsing `clientData` with Zod in every hook, pass a `clientDataSchema` to `chat.task`. The schema validates the data once per turn, and `clientData` is typed in all hooks and `run`:
+Instead of manually parsing `clientData` with Zod in every hook, pass a `clientDataSchema` to `chat.agent`. The schema validates the data once per turn, and `clientData` is typed in all hooks and `run`:
 
 ```ts
 import { chat } from "@trigger.dev/sdk/ai";
@@ -197,7 +197,7 @@ import { streamText } from "ai";
 import { openai } from "@ai-sdk/openai";
 import { z } from "zod";
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   clientDataSchema: z.object({
     model: z.string().optional(),
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
index 8a339d5be0f..eaab0db43cd 100644
--- a/docs/ai-chat/overview.mdx
+++ b/docs/ai-chat/overview.mdx
@@ -1,21 +1,21 @@
 ---
-title: "AI Chat"
+title: "AI Agents"
 sidebarTitle: "Overview"
-description: "Run AI SDK chat completions as durable Trigger.dev tasks with built-in realtime streaming, multi-turn conversations, and message persistence."
+description: "Run AI SDK chat completions as durable Trigger.dev agents with built-in realtime streaming, multi-turn conversations, and message persistence."
 ---
 
 ## Overview
 
-The `@trigger.dev/sdk` provides a custom [ChatTransport](https://sdk.vercel.ai/docs/ai-sdk-ui/transport) for the Vercel AI SDK's `useChat` hook. This lets you run chat completions as **durable Trigger.dev tasks** instead of fragile API routes — with automatic retries, observability, and realtime streaming built in.
+The `@trigger.dev/sdk` provides a custom [ChatTransport](https://sdk.vercel.ai/docs/ai-sdk-ui/transport) for the Vercel AI SDK's `useChat` hook. This lets you run chat completions as **durable Trigger.dev agents** instead of fragile API routes — with automatic retries, observability, and realtime streaming built in.
 
 **How it works:**
 1. The frontend sends messages via `useChat` through `TriggerChatTransport`
-2. The first message triggers a Trigger.dev task; subsequent messages resume the **same run** via input streams
-3. The task streams `UIMessageChunk` events back via Trigger.dev's realtime streams
+2. The first message triggers a Trigger.dev agent; subsequent messages resume the **same run** via input streams
+3. The agent streams `UIMessageChunk` events back via Trigger.dev's realtime streams
 4. The AI SDK's `useChat` processes the stream natively — text, tool calls, reasoning, etc.
 5. Between turns, the run stays idle briefly then suspends (freeing compute) until the next message
 
-No custom API routes needed. Your chat backend is a Trigger.dev task.
+No custom API routes needed. Your chat backend is a Trigger.dev agent.
 
 <Accordion title="How it works (sequence diagrams)">
 
@@ -26,7 +26,7 @@ sequenceDiagram
     participant User
     participant useChat as useChat + Transport
     participant API as Trigger.dev API
-    participant Task as chat.task Worker
+    participant Task as chat.agent Worker
     participant LLM as LLM Provider
 
     User->>useChat: sendMessage("Hello")
@@ -57,7 +57,7 @@ sequenceDiagram
     participant User
     participant useChat as useChat + Transport
     participant API as Trigger.dev API
-    participant Task as chat.task Worker
+    participant Task as chat.agent Worker
     participant LLM as LLM Provider
 
     Note over Task: Suspended, waiting for message
@@ -88,7 +88,7 @@ sequenceDiagram
     participant User
     participant useChat as useChat + Transport
     participant API as Trigger.dev API
-    participant Task as chat.task Worker
+    participant Task as chat.agent Worker
     participant LLM as LLM Provider
 
     Note over Task: Streaming response...
@@ -116,7 +116,7 @@ sequenceDiagram
 
 ### One run, many turns
 
-The entire conversation lives in a **single Trigger.dev run**. After each AI response, the run waits for the next message via input streams. The frontend transport handles this automatically — it triggers a new run for the first message, and sends subsequent messages to the existing run.
+The entire conversation lives in a **single Trigger.dev run**. After each AI response, the run waits for the next message via input streams. The frontend transport handles this automatically — it triggers a new run for the first message and sends subsequent messages to the existing run.
 
 This means your conversation has full observability in the Trigger.dev dashboard: every turn is a span inside the same run.
 
@@ -135,20 +135,22 @@ If no message arrives within the turn timeout, the run ends gracefully. The next
 
 ### What the backend accumulates
 
-The backend automatically accumulates the full conversation history across turns. After the first turn, the frontend transport only sends the new user message — not the entire history. This is handled transparently by the transport and task.
+The backend automatically accumulates the full conversation history across turns. After the first turn, the frontend transport only sends the new user message — not the entire history. This is handled transparently by the transport and agent.
 
 The accumulated messages are available in:
 - `run()` as `messages` (`ModelMessage[]`) — for passing to `streamText`
 - `onTurnStart()` as `uiMessages` (`UIMessage[]`) — for persisting before streaming
 - `onTurnComplete()` as `uiMessages` (`UIMessage[]`) — for persisting after the response
 
+Agents appear in the **Agents** section of the dashboard (not Tasks) and can be tested via the **Playground**.
+
 ## Three approaches
 
 There are three ways to build the backend, from most opinionated to most flexible:
 
 | Approach | Use when | What you get |
 |----------|----------|--------------|
-| [chat.task()](/ai-chat/backend#chattask) | Most apps | Auto-piping, lifecycle hooks, message accumulation, stop handling |
+| [chat.agent()](/ai-chat/backend#chatagent) | Most apps | Auto-piping, lifecycle hooks, message accumulation, stop handling |
 | [chat.createSession()](/ai-chat/backend#chatcreatesession) | Need a loop but not hooks | Async iterator with per-turn helpers, message accumulation, stop handling |
 | [Raw task + primitives](/ai-chat/backend#raw-task-with-primitives) | Full control | Manual control of every step — use `chat.messages`, `chat.createStopSignal()`, etc. |
 
diff --git a/docs/ai-chat/patterns/code-sandbox.mdx b/docs/ai-chat/patterns/code-sandbox.mdx
index bf35da3dea8..eb76d8fcbf4 100644
--- a/docs/ai-chat/patterns/code-sandbox.mdx
+++ b/docs/ai-chat/patterns/code-sandbox.mdx
@@ -1,12 +1,12 @@
 ---
 title: "Code execution sandbox"
 sidebarTitle: "Code sandbox"
-description: "Warm an isolated sandbox on each chat turn, run an AI SDK executeCode tool, and tear down right before the run suspends — using chat.task hooks and chat.local."
+description: "Warm an isolated sandbox on each chat turn, run an AI SDK executeCode tool, and tear down right before the run suspends — using chat.agent hooks and chat.local."
 ---
 
 Use a **hosted code sandbox** (for example [E2B](https://e2b.dev)) when the model should run short scripts to analyze tool output (PostHog queries, CSV-like data, math) without executing arbitrary code on the Trigger worker host.
 
-This page describes a **durable chat** pattern that fits `chat.task()`:
+This page describes a **durable chat** pattern that fits `chat.agent()`:
 
 - **Warm** the sandbox at the start of each turn (**non-blocking**).
 - **Reuse** it for every `executeCode` tool call during that turn (and across turns in the same run if you keep the handle).
@@ -83,7 +83,7 @@ The **`executeCode`** tool reads `codeSandboxRun.runId` and awaits the sandbox p
 Use **`onChatSuspend`** to dispose the sandbox right before the run suspends, and **`onComplete`** as a safety net when the run ends entirely.
 
 ```ts
-export const aiChat = chat.task({
+export const aiChat = chat.agent({
   id: "ai-chat",
   // ...
   onChatSuspend: async ({ phase, ctx }) => {
@@ -109,7 +109,7 @@ Set **`E2B_API_KEY`** (or your provider’s secret) on the **Trigger environment
 
 ## Typing `ctx`
 
-Every `chat.task` lifecycle event and the `run` payload include **`ctx`**: the same **[`TaskRunContext`](/ai-chat/reference#task-context-ctx)** shape as `task({ run: (payload, { ctx }) => ... })`.
+Every `chat.agent` lifecycle event and the `run` payload include **`ctx`**: the same **[`TaskRunContext`](/ai-chat/reference#task-context-ctx)** shape as `task({ run: (payload, { ctx }) => ... })`.
 
 ```ts
 import type { TaskRunContext } from "@trigger.dev/sdk";
diff --git a/docs/ai-chat/patterns/database-persistence.mdx b/docs/ai-chat/patterns/database-persistence.mdx
index 4e1126a8931..77bd45ee3e9 100644
--- a/docs/ai-chat/patterns/database-persistence.mdx
+++ b/docs/ai-chat/patterns/database-persistence.mdx
@@ -58,7 +58,7 @@ If you skip preload, do the equivalent in **`onChatStart`** when **`preloaded`**
 
 ## Token renewal (app server)
 
-Turn tokens expire (see **`chatAccessTokenTTL`** on **`chat.task`**). When the transport gets **401** on realtime or input streams, mint a **new** public access token with the **same** scopes the task uses — typically **read** for that **`runId`** and **write** for **input streams** on that run — then **persist** it on your **session** row.
+Turn tokens expire (see **`chatAccessTokenTTL`** on **`chat.agent`**). When the transport gets **401** on realtime or input streams, mint a **new** public access token with the **same** scopes the task uses — typically **read** for that **`runId`** and **write** for **input streams** on that run — then **persist** it on your **session** row.
 
 Your **Next.js server action**, **Remix action**, or **API route** should:
 
@@ -73,7 +73,7 @@ No Trigger task code needs to run for renewal.
 ```typescript
 // Pseudocode — replace saveConversation / saveSession with your DB layer.
 
-chat.task({
+chat.agent({
   id: "my-chat",
   clientDataSchema: z.object({ userId: z.string() }),
 
diff --git a/docs/ai-chat/pending-messages.mdx b/docs/ai-chat/pending-messages.mdx
index 3f0e9ecefda..b367b1c3a12 100644
--- a/docs/ai-chat/pending-messages.mdx
+++ b/docs/ai-chat/pending-messages.mdx
@@ -20,16 +20,16 @@ The `pendingMessages` option enables this by injecting user messages between too
 6. A `data-pending-message-injected` stream chunk confirms injection to the frontend
 7. If `prepareStep` never fires (no tool calls), the message becomes the next turn
 
-## Backend: chat.task
+## Backend: chat.agent
 
-Add `pendingMessages` to your `chat.task` configuration:
+Add `pendingMessages` to your `chat.agent` configuration:
 
 ```ts
 import { chat } from "@trigger.dev/sdk/ai";
 import { streamText } from "ai";
 import { openai } from "@ai-sdk/openai";
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   pendingMessages: {
     // Only inject when there are completed steps (tool calls happened)
diff --git a/docs/ai-chat/quick-start.mdx b/docs/ai-chat/quick-start.mdx
index 881cc381548..cf066f090a3 100644
--- a/docs/ai-chat/quick-start.mdx
+++ b/docs/ai-chat/quick-start.mdx
@@ -1,12 +1,12 @@
 ---
 title: "Quick Start"
 sidebarTitle: "Quick Start"
-description: "Get a working AI chat in 3 steps — define a task, generate a token, and wire up the frontend."
+description: "Get a working AI agent in 3 steps — define an agent, generate a token, and wire up the frontend."
 ---
 
 <Steps>
-  <Step title="Define a chat task">
-    Use `chat.task` from `@trigger.dev/sdk/ai` to define a task that handles chat messages. The `run` function receives `ModelMessage[]` (already converted from the frontend's `UIMessage[]`) — pass them directly to `streamText`.
+  <Step title="Define a chat agent">
+    Use `chat.agent` from `@trigger.dev/sdk/ai` to define an agent that handles chat messages. The `run` function receives `ModelMessage[]` (already converted from the frontend's `UIMessage[]`) — pass them directly to `streamText`.
 
     If you return a `StreamTextResult`, it's **automatically piped** to the frontend.
 
@@ -15,7 +15,7 @@ description: "Get a working AI chat in 3 steps — define a task, generate a tok
     import { streamText } from "ai";
     import { openai } from "@ai-sdk/openai";
 
-    export const myChat = chat.task({
+    export const myChat = chat.agent({
       id: "my-chat",
       run: async ({ messages, signal }) => {
         // messages is ModelMessage[] — pass directly to streamText
@@ -30,13 +30,13 @@ description: "Get a working AI chat in 3 steps — define a task, generate a tok
     ```
 
     <Tip>
-      For a **custom** [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype (typed `data-*` parts, tool map, etc.), define the task with [`chat.withUIMessage<...>().task({...})`](/ai-chat/types) instead of `chat.task`.
+      For a **custom** [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype (typed `data-*` parts, tool map, etc.), define the agent with [`chat.withUIMessage<...>().agent({...})`](/ai-chat/types) instead of `chat.agent`.
     </Tip>
 
   </Step>
 
   <Step title="Generate an access token">
-    On your server (e.g. a Next.js server action), create a trigger public token scoped to your chat task. The transport calls your function with `chatId` and `purpose` (`"trigger"` or `"preload"`). Import `ResolveChatAccessTokenParams` from `@trigger.dev/sdk/chat` so the signature matches — see [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options).
+    On your server (e.g. a Next.js server action), create a trigger public token scoped to your chat agent. The transport calls your function with `chatId` and `purpose` (`"trigger"` or `"preload"`). Import `ResolveChatAccessTokenParams` from `@trigger.dev/sdk/chat` so the signature matches — see [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options).
 
     ```ts app/actions.ts
     "use server";
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index 6d959171622..6eb024a84bb 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -1,12 +1,12 @@
 ---
 title: "API Reference"
 sidebarTitle: "API Reference"
-description: "Complete API reference for the AI Chat SDK — backend options, events, frontend transport, and hooks."
+description: "Complete API reference for the AI Agents SDK — backend options, events, frontend transport, and hooks."
 ---
 
-## ChatTaskOptions
+## ChatAgentOptions
 
-Options for `chat.task()`.
+Options for `chat.agent()`.
 
 | Option                        | Type                                                        | Default                        | Description                                                                                         |
 | ----------------------------- | ----------------------------------------------------------- | ------------------------------ | --------------------------------------------------------------------------------------------------- |
@@ -19,7 +19,7 @@ Options for `chat.task()`.
 | `onBeforeTurnComplete`        | `(event: BeforeTurnCompleteEvent) => Promise<void> \| void` | —                              | Fires after response but before stream closes. Includes `writer`.                                   |
 | `onTurnComplete`              | `(event: TurnCompleteEvent) => Promise<void> \| void`       | —                              | Fires after each turn completes (stream closed)                                                     |
 | `onCompacted`                 | `(event: CompactedEvent) => Promise<void> \| void`          | —                              | Fires when compaction occurs. Includes `writer`. See [Compaction](/ai-chat/compaction)              |
-| `compaction`                  | `ChatTaskCompactionOptions`                                 | —                              | Automatic context compaction. See [Compaction](/ai-chat/compaction)                                 |
+| `compaction`                  | `ChatAgentCompactionOptions`                                | —                              | Automatic context compaction. See [Compaction](/ai-chat/compaction)                                 |
 | `pendingMessages`             | `PendingMessagesOptions`                                    | —                              | Mid-execution message injection. See [Pending Messages](/ai-chat/pending-messages)                  |
 | `prepareMessages`             | `(event: PrepareMessagesEvent) => ModelMessage[]`           | —                              | Transform model messages before use (cache breaks, context injection, etc.)                         |
 | `maxTurns`                    | `number`                                                    | `100`                          | Max conversational turns per run                                                                    |
@@ -37,7 +37,7 @@ Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`
 
 ## Task context (`ctx`)
 
-All **`chat.task`** lifecycle events (**`onPreload`**, **`onChatStart`**, **`onTurnStart`**, **`onBeforeTurnComplete`**, **`onTurnComplete`**, **`onCompacted`**) and the object passed to **`run`** include **`ctx`**: the same **`TaskRunContext`** shape as the `ctx` in `task({ run: (payload, { ctx }) => ... })`.
+All **`chat.agent`** lifecycle events (**`onPreload`**, **`onChatStart`**, **`onTurnStart`**, **`onBeforeTurnComplete`**, **`onTurnComplete`**, **`onCompacted`**) and the object passed to **`run`** include **`ctx`**: the same **`TaskRunContext`** shape as the `ctx` in `task({ run: (payload, { ctx }) => ... })`.
 
 Use **`ctx`** for run metadata, tags, parent links, or any API that needs the full run record. The chat-specific string **`runId`** on events is always **`ctx.run.id`**; both are provided for convenience.
 
@@ -203,9 +203,9 @@ onBeforeTurnComplete: async ({ writer, usage }) => {
 },
 ```
 
-## ChatTaskCompactionOptions
+## ChatAgentCompactionOptions
 
-Options for the `compaction` field on `chat.task()`. See [Compaction](/ai-chat/compaction) for usage guide.
+Options for the `compaction` field on `chat.agent()`. See [Compaction](/ai-chat/compaction) for usage guide.
 
 | Option                 | Type                                                                         | Required | Description                                                                  |
 | ---------------------- | ---------------------------------------------------------------------------- | -------- | ---------------------------------------------------------------------------- |
@@ -337,7 +337,7 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 
 | Method                                      | Description                                                                                                                  |
 | ------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- |
-| `chat.task(options)`                        | Create a chat task                                                                                                           |
+| `chat.agent(options)`                       | Create a chat agent                                                                                                          |
 | `chat.createSession(payload, options)`      | Create an async iterator for chat turns                                                                                      |
 | `chat.pipe(source, options?)`               | Pipe a stream to the frontend (from anywhere inside a task)                                                                  |
 | `chat.pipeAndCapture(source, options?)`     | Pipe and capture the response `UIMessage`                                                                                    |
@@ -345,7 +345,7 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 | `chat.createStopSignal()`                   | Create a managed stop signal wired to the stop input stream                                                                  |
 | `chat.messages`                             | Input stream for incoming messages — use `.waitWithIdleTimeout()`                                                            |
 | `chat.local<T>({ id })`                     | Create a per-run typed local (see [Per-run data](/ai-chat/features#per-run-data-with-chatlocal))                             |
-| `chat.createAccessToken(taskId)`            | Create a public access token for a chat task                                                                                 |
+| `chat.createAccessToken(taskId)`            | Create a public access token for a chat agent                                                                                |
 | `chat.setTurnTimeout(duration)`             | Override turn timeout at runtime (e.g. `"2h"`)                                                                               |
 | `chat.setTurnTimeoutInSeconds(seconds)`     | Override turn timeout at runtime (in seconds)                                                                                |
 | `chat.setIdleTimeoutInSeconds(seconds)`     | Override idle timeout at runtime                                                                                             |
@@ -360,7 +360,7 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 
 ## `chat.withUIMessage`
 
-Returns a [`ChatBuilder`](/ai-chat/types#chatbuilder) with a fixed `UIMessage` subtype. Chain `.withClientData()`, hook methods, and `.task()`.
+Returns a [`ChatBuilder`](/ai-chat/types#chatbuilder) with a fixed `UIMessage` subtype. Chain `.withClientData()`, hook methods, and `.agent()`.
 
 ```ts
 chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): ChatBuilder<TUIM>;
@@ -368,13 +368,13 @@ chat.withUIMessage<TUIM>(config?: ChatWithUIMessageConfig<TUIM>): ChatBuilder<TU
 
 | Parameter              | Type                               | Description                                                                                                                                           |
 | ---------------------- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `config.streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Optional defaults for `toUIMessageStream()`. Shallow-merged with `uiMessageStreamOptions` on the inner `.task({ ... })` (task wins on key conflicts). |
+| `config.streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Optional defaults for `toUIMessageStream()`. Shallow-merged with `uiMessageStreamOptions` on the inner `.agent({ ... })` (agent wins on key conflicts). |
 
 Use this when you need [`InferChatUIMessage`](#inferchatuimessage) / typed `data-*` parts / `InferUITools` to line up across backend hooks and `useChat`. Full guide: [Types](/ai-chat/types).
 
 ## `chat.withClientData`
 
-Returns a [`ChatBuilder`](/ai-chat/types#chatbuilder) with a fixed client data schema. All hooks and `run` get typed `clientData` without passing `clientDataSchema` in `.task()` options.
+Returns a [`ChatBuilder`](/ai-chat/types#chatbuilder) with a fixed client data schema. All hooks and `run` get typed `clientData` without passing `clientDataSchema` in `.agent()` options.
 
 ```ts
 chat.withClientData<TSchema>({ schema: TSchema }): ChatBuilder<UIMessage, TSchema>;
@@ -390,11 +390,11 @@ Full guide: [Typed client data](/ai-chat/types#typed-client-data-with-chatwithcl
 
 | Field           | Type                               | Description                                                           |
 | --------------- | ---------------------------------- | --------------------------------------------------------------------- |
-| `streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Default `toUIMessageStream()` options for tasks created via `.task()` |
+| `streamOptions` | `ChatUIMessageStreamOptions<TUIM>` | Default `toUIMessageStream()` options for agents created via `.agent()` |
 
 ## `InferChatUIMessage`
 
-Type helper: extracts the `UIMessage` subtype from a chat task’s wire payload.
+Type helper: extracts the `UIMessage` subtype from a chat agent’s wire payload.
 
 ```ts
 import type { InferChatUIMessage } from "@trigger.dev/sdk/ai";
@@ -403,7 +403,7 @@ import type { InferChatUIMessage } from "@trigger.dev/sdk/ai";
 type Msg = InferChatUIMessage<typeof myChat>;
 ```
 
-Use with `useChat<Msg>({ transport })` when using [`chat.withUIMessage`](/ai-chat/types). For tasks defined with plain `chat.task()` (no custom generic), this resolves to the base `UIMessage`.
+Use with `useChat<Msg>({ transport })` when using [`chat.withUIMessage`](/ai-chat/types). For agents defined with plain `chat.agent()` (no custom generic), this resolves to the base `UIMessage`.
 
 ## AI helpers (`ai` from `@trigger.dev/sdk/ai`)
 
@@ -415,7 +415,7 @@ Use with `useChat<Msg>({ transport })` when using [`chat.withUIMessage`](/ai-cha
 
 ## ChatUIMessageStreamOptions
 
-Options for customizing `toUIMessageStream()`. Set as static defaults via `uiMessageStreamOptions` on `chat.task()`, or override per-turn via `chat.setUIMessageStreamOptions()`. See [Stream options](/ai-chat/backend#stream-options) for usage examples.
+Options for customizing `toUIMessageStream()`. Set as static defaults via `uiMessageStreamOptions` on `chat.agent()`, or override per-turn via `chat.setUIMessageStreamOptions()`. See [Stream options](/ai-chat/backend#stream-options) for usage examples.
 
 Derived from the AI SDK's `UIMessageStreamOptions` with `onFinish`, `originalMessages`, and `generateMessageId` omitted (managed internally).
 
diff --git a/docs/ai-chat/types.mdx b/docs/ai-chat/types.mdx
index 1350a2f259e..6f40f4a9a5e 100644
--- a/docs/ai-chat/types.mdx
+++ b/docs/ai-chat/types.mdx
@@ -1,14 +1,14 @@
 ---
 title: "Types"
 sidebarTitle: "Types"
-description: "TypeScript types for AI Chat tasks, UI messages, and the frontend transport."
+description: "TypeScript types for AI Agents, UI messages, and the frontend transport."
 ---
 
 TypeScript patterns for [AI Chat](/ai-chat/overview). This page covers how to pin a custom AI SDK [`UIMessage`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/ui-message) subtype with `chat.withUIMessage`, fix a typed `clientData` schema with `chat.withClientData`, chain builder-level hooks, and align types on the client.
 
 ## Custom `UIMessage` with `chat.withUIMessage`
 
-`chat.task()` types the wire payload with the base AI SDK `UIMessage`. That is enough for many apps.
+`chat.agent()` types the wire payload with the base AI SDK `UIMessage`. That is enough for many apps.
 
 When you add **custom `data-*` parts** (via `chat.stream` / `writer`) or a **typed tool map** (e.g. `InferUITools<typeof tools>`), you want a **narrower** `UIMessage` generic so that:
 
@@ -16,7 +16,7 @@ When you add **custom `data-*` parts** (via `chat.stream` / `writer`) or a **typ
 - Stream options like `sendReasoning` align with your message shape
 - The frontend can treat `useChat` messages as the same subtype end-to-end
 
-`chat.withUIMessage<YourUIMessage>(config?)` returns a [ChatBuilder](#chatbuilder) where `.task(...)` accepts the **same options as** [`chat.task()`](/ai-chat/backend#chat-task) but fixes `YourUIMessage` as the UI message type for that chat task.
+`chat.withUIMessage<YourUIMessage>(config?)` returns a [ChatBuilder](#chatbuilder) where `.agent(...)` accepts the **same options as** [`chat.agent()`](/ai-chat/backend#chat-agent) but fixes `YourUIMessage` as the UI message type for that chat agent.
 
 ### Defining a `UIMessage` subtype
 
@@ -46,9 +46,9 @@ export type MyChatUIMessage = UIMessage<unknown, MyChatDataTypes, MyChatTools>;
 
 Task-backed tools should use AI SDK [`tool()`](https://sdk.vercel.ai/docs/ai-sdk-core/tools-and-tool-calling) with `execute: ai.toolExecute(schemaTask)` where needed — see [Task-backed AI tools](/tasks/schemaTask#task-backed-ai-tools).
 
-### Backend: `chat.withUIMessage(...).task(...)`
+### Backend: `chat.withUIMessage(...).agent(...)`
 
-Call `withUIMessage` **once**, then chain `.task({ ... })` instead of `chat.task({ ... })`. You can also chain `.withClientData()` and hook methods before `.task()`:
+Call `withUIMessage` **once**, then chain `.agent({ ... })` instead of `chat.agent({ ... })`. You can also chain `.withClientData()` and hook methods before `.agent()`:
 
 ```ts
 import { chat } from "@trigger.dev/sdk/ai";
@@ -76,7 +76,7 @@ export const myChat = chat
   .withClientData({
     schema: z.object({ userId: z.string() }),
   })
-  .task({
+  .agent({
     id: "my-chat",
     onTurnStart: async ({ uiMessages, writer }) => {
       // uiMessages is MyChatUIMessage[] — custom data parts are typed
@@ -98,9 +98,9 @@ export const myChat = chat
 
 ### Default stream options
 
-The optional `streamOptions` object becomes the **default** [`uiMessageStreamOptions`](/ai-chat/reference#chat-task-options) for `toUIMessageStream()`.
+The optional `streamOptions` object becomes the **default** [`uiMessageStreamOptions`](/ai-chat/reference#chat-agent-options) for `toUIMessageStream()`.
 
-If you also set `uiMessageStreamOptions` on the inner `.task({ ... })`, the two objects are **shallow-merged** — keys on the **task** win on conflicts. Per-turn overrides via [`chat.setUIMessageStreamOptions()`](/ai-chat/backend#stream-options) still apply on top.
+If you also set `uiMessageStreamOptions` on the inner `.agent({ ... })`, the two objects are **shallow-merged** — keys on the **agent** win on conflicts. Per-turn overrides via [`chat.setUIMessageStreamOptions()`](/ai-chat/backend#stream-options) still apply on top.
 
 ### Frontend: `InferChatUIMessage`
 
@@ -131,7 +131,7 @@ You can also import `InferChatUIMessage` from `@trigger.dev/sdk/ai` in non-React
 
 ## Typed client data with `chat.withClientData`
 
-`chat.withClientData({ schema })` returns a [ChatBuilder](#chatbuilder) that fixes the client data schema. All hooks and `run` receive typed `clientData` without needing `clientDataSchema` in `.task()` options.
+`chat.withClientData({ schema })` returns a [ChatBuilder](#chatbuilder) that fixes the client data schema. All hooks and `run` receive typed `clientData` without needing `clientDataSchema` in `.agent()` options.
 
 ```ts
 import { chat } from "@trigger.dev/sdk/ai";
@@ -141,7 +141,7 @@ export const myChat = chat
   .withClientData({
     schema: z.object({ userId: z.string(), model: z.string().optional() }),
   })
-  .task({
+  .agent({
     id: "my-chat",
     onPreload: async ({ clientData }) => {
       // clientData is typed as { userId: string; model?: string }
@@ -159,7 +159,7 @@ export const myChat = chat
 
 ## ChatBuilder
 
-Both `chat.withUIMessage()` and `chat.withClientData()` return a **ChatBuilder** — a chainable object that accumulates configuration before creating the task with `.task()`.
+Both `chat.withUIMessage()` and `chat.withClientData()` return a **ChatBuilder** — a chainable object that accumulates configuration before creating the agent with `.agent()`.
 
 Builder methods can be chained in any order:
 
@@ -177,7 +177,7 @@ export const myChat = chat
   .onChatResume(async ({ ctx }) => {
     warmCache(ctx.run.id);
   })
-  .task({
+  .agent({
     id: "my-chat",
     run: async ({ messages, signal }) => {
       return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
@@ -198,7 +198,7 @@ chat
     // Runs first — shared setup across tasks using this builder
     await initializeSharedState(event.chatId);
   })
-  .task({
+  .agent({
     id: "my-chat",
     onPreload: async (event) => {
       // Runs second — task-specific logic
@@ -214,13 +214,13 @@ chat
   Set types first (`.withUIMessage()`, `.withClientData()`), then hooks. Hook parameters are typed based on the builder's current generics — so hooks registered after `.withClientData()` get typed `clientData`.
 </Tip>
 
-### When plain `chat.task()` is enough
+### When plain `chat.agent()` is enough
 
-If you do not rely on custom `UIMessage` generics (only default text, reasoning, and built-in tool UI types), **`chat.task()` alone is fine** — no need for `withUIMessage`.
+If you do not rely on custom `UIMessage` generics (only default text, reasoning, and built-in tool UI types), **`chat.agent()` alone is fine** — no need for `withUIMessage`.
 
 ## See also
 
-- [Backend — `chat.task()`](/ai-chat/backend#chat-task)
+- [Backend — `chat.agent()`](/ai-chat/backend#chat-agent)
 - [Backend — Lifecycle hooks](/ai-chat/backend#lifecycle-hooks)
 - [Frontend — transport & `useChat`](/ai-chat/frontend)
 - [API reference — `chat.withUIMessage`](/ai-chat/reference#chat-withuimessage)
diff --git a/docs/ai/prompts.mdx b/docs/ai/prompts.mdx
index 4ac324ffff9..e3a7d395a3d 100644
--- a/docs/ai/prompts.mdx
+++ b/docs/ai/prompts.mdx
@@ -209,9 +209,9 @@ const result = await generateText({
 });
 ```
 
-## Using with chat.task()
+## Using with chat.agent()
 
-Prompts integrate with `chat.task()` via `chat.prompt` — a run-scoped store for the resolved prompt. Store a prompt once in a lifecycle hook, then access it anywhere during the run.
+Prompts integrate with `chat.agent()` via `chat.prompt` — a run-scoped store for the resolved prompt. Store a prompt once in a lifecycle hook, then access it anywhere during the run.
 
 ### chat.prompt.set() and chat.prompt()
 
@@ -232,7 +232,7 @@ const systemPrompt = prompts.define({
   content: `You are a helpful assistant for {{name}}.`,
 });
 
-export const myChat = chat.task({
+export const myChat = chat.agent({
   id: "my-chat",
   onChatStart: async ({ clientData }) => {
     const resolved = await systemPrompt.resolve({ name: clientData.name });
diff --git a/docs/docs.json b/docs/docs.json
index 5a924c6870c..3ccbe65157f 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -85,7 +85,7 @@
             "pages": [
               "ai/prompts",
               {
-                "group": "Chat",
+                "group": "Agents",
                 "pages": [
                   "ai-chat/overview",
                   "ai-chat/quick-start",
diff --git a/docs/tasks/schemaTask.mdx b/docs/tasks/schemaTask.mdx
index 82ba4aa5679..71eb6720db9 100644
--- a/docs/tasks/schemaTask.mdx
+++ b/docs/tasks/schemaTask.mdx
@@ -172,7 +172,7 @@ export const chartTool = tool({
 });
 ```
 
-Inside the task run, you can read tool execution context with **`ai.currentToolOptions()`** (and helpers like `ai.toolCallId()`, `ai.chatContext()` when running inside a [`chat.task`](/ai-chat/overview)):
+Inside the task run, you can read tool execution context with **`ai.currentToolOptions()`** (and helpers like `ai.toolCallId()`, `ai.chatContext()` when running inside a [`chat.agent`](/ai-chat/overview)):
 
 ```ts
 import { ai } from "@trigger.dev/sdk/ai";

From eb5d516ac986e56a0a17afe0052d14121b7c69c3 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 2 Apr 2026 13:07:04 +0100
Subject: [PATCH 14/49] subagents and AgentChat docs

---
 docs/ai-chat/server-chat.mdx | 245 ++++++++++++++++++++++++++++++++++
 docs/ai-chat/sub-agents.mdx  | 248 +++++++++++++++++++++++++++++++++++
 docs/docs.json               |   2 +
 3 files changed, 495 insertions(+)
 create mode 100644 docs/ai-chat/server-chat.mdx
 create mode 100644 docs/ai-chat/sub-agents.mdx

diff --git a/docs/ai-chat/server-chat.mdx b/docs/ai-chat/server-chat.mdx
new file mode 100644
index 00000000000..22cd7573305
--- /dev/null
+++ b/docs/ai-chat/server-chat.mdx
@@ -0,0 +1,245 @@
+---
+title: "Server-Side Chat"
+sidebarTitle: "Server-Side Chat"
+description: "Use AgentChat to interact with chat agents from server-side code — tasks, webhooks, scripts, or other agents."
+---
+
+`AgentChat` lets you chat with agents from server-side code. It works inside tasks (agent-to-agent), request handlers, webhook processors, and scripts.
+
+```ts
+import { AgentChat } from "@trigger.dev/sdk/chat";
+
+const chat = new AgentChat({ agent: "my-agent" });
+const stream = await chat.sendMessage("Hello!");
+const text = await stream.text();
+await chat.close();
+```
+
+## Type-safe client data
+
+Pass `typeof yourAgent` as a type parameter and `clientData` is automatically typed from the agent's `withClientData` schema:
+
+```ts
+import { AgentChat } from "@trigger.dev/sdk/chat";
+import type { myAgent } from "./trigger/my-agent";
+
+const chat = new AgentChat<typeof myAgent>({
+  agent: "my-agent",
+  clientData: { userId: "user_123" }, // ← typed from agent definition
+});
+```
+
+## Conversation lifecycle
+
+Each `AgentChat` instance represents one conversation. The conversation ID is auto-generated or can be set explicitly:
+
+```ts
+// Auto-generated ID
+const chat = new AgentChat({ agent: "my-agent" });
+
+// Explicit ID — useful for persistence or finding the run later
+const chat = new AgentChat({ agent: "my-agent", id: `review-${prNumber}` });
+```
+
+### Sending messages
+
+`sendMessage()` triggers a new run on the first call, then reuses the same run for subsequent messages via input streams:
+
+```ts
+// First message — triggers a new run
+const stream1 = await chat.sendMessage("Review PR #42");
+const review = await stream1.text();
+
+// Follow-up — same run, agent has full context
+const stream2 = await chat.sendMessage("Can you fix the main bug?");
+const fix = await stream2.text();
+```
+
+### Preloading (optional)
+
+If you want the agent to initialize before the first message (e.g., load data, authenticate), call `preload()`. This is optional — `sendMessage()` triggers the run automatically if needed.
+
+```ts
+await chat.preload();
+// Agent's onPreload hook fires now, before user types anything
+const stream = await chat.sendMessage("Hello");
+```
+
+### Closing
+
+Signal the agent to exit its loop gracefully:
+
+```ts
+await chat.close();
+```
+
+Without `close()`, the agent exits on its own when its idle/suspend timeout expires.
+
+## Reading responses
+
+`sendMessage()` returns a `ChatStream` — a typed wrapper around the response.
+
+### Get the full text
+
+```ts
+const stream = await chat.sendMessage("What is Trigger.dev?");
+const text = await stream.text();
+```
+
+### Get structured results
+
+```ts
+const stream = await chat.sendMessage("Research this topic");
+const { text, toolCalls, toolResults } = await stream.result();
+
+for (const tc of toolCalls) {
+  console.log(`Tool: ${tc.toolName}, Input: ${JSON.stringify(tc.input)}`);
+}
+```
+
+### Stream chunks in real-time
+
+```ts
+const stream = await chat.sendMessage("Write a report");
+
+for await (const chunk of stream) {
+  if (chunk.type === "text-delta") {
+    process.stdout.write(chunk.delta);
+  }
+  if (chunk.type === "tool-input-available") {
+    console.log(`Using tool: ${chunk.toolName}`);
+  }
+}
+```
+
+## Stateless request handlers
+
+In a stateless environment (HTTP handler, serverless function), you need to persist and restore the session across requests.
+
+`AgentChat` provides a `session` option and two callbacks for this:
+
+```ts
+import { AgentChat } from "@trigger.dev/sdk/chat";
+
+export async function POST(req: Request) {
+  const { chatId, message, runId, lastEventId } = await req.json();
+
+  const chat = new AgentChat({
+    agent: "my-agent",
+    id: chatId,
+    // Restore from previous request
+    session: runId ? { runId, lastEventId } : undefined,
+    // Persist when a new run starts
+    onTriggered: async ({ runId, chatId }) => {
+      await db.sessions.upsert({ chatId, runId });
+    },
+    // Persist after each turn for stream resumption
+    onTurnComplete: async ({ lastEventId, chatId }) => {
+      await db.sessions.update({ chatId, lastEventId });
+    },
+  });
+
+  const stream = await chat.sendMessage(message);
+  const text = await stream.text();
+
+  return Response.json({ text, runId: chat.run?.runId });
+}
+```
+
+## Sub-agent tool pattern
+
+`AgentChat` can be used inside an AI SDK tool to delegate work to a durable sub-agent. The sub-agent's response streams as preliminary tool results:
+
+```ts
+import { tool } from "ai";
+import { AgentChat } from "@trigger.dev/sdk/chat";
+import { z } from "zod";
+
+const researchTool = tool({
+  description: "Delegate research to a specialist agent.",
+  inputSchema: z.object({ topic: z.string() }),
+  execute: async function* ({ topic }, { abortSignal }) {
+    const chat = new AgentChat({ agent: "research-agent" });
+    const stream = await chat.sendMessage(topic, { abortSignal });
+    yield* stream.messages();
+    await chat.close();
+  },
+  toModelOutput: ({ output: message }) => {
+    const lastText = message?.parts?.findLast(
+      (p: { type: string }) => p.type === "text"
+    ) as { text?: string } | undefined;
+    return { type: "text", value: lastText?.text ?? "Done." };
+  },
+});
+```
+
+This supports single-turn delegation, multi-turn LLM-driven conversations with persistent sub-agents, and cross-turn state that survives snapshot/restore.
+
+See the [Sub-Agents guide](/ai-chat/sub-agents) for the full pattern including multi-turn conversations, cleanup, and what the frontend sees.
+
+## Additional methods
+
+### Steering
+
+Send a message during an active stream without interrupting it:
+
+```ts
+await chat.steer("Focus on security issues specifically");
+```
+
+### Stop generation
+
+Abort the current `streamText` call without ending the run:
+
+```ts
+await chat.stop();
+```
+
+### Raw messages
+
+For full control over the UIMessage shape:
+
+```ts
+const rawStream = await chat.sendRaw([
+  {
+    id: "msg-1",
+    role: "user",
+    parts: [
+      { type: "text", text: "Hello" },
+      { type: "file", url: "https://...", mediaType: "image/png" },
+    ],
+  },
+]);
+```
+
+### Reconnect
+
+Resume a stream subscription after a disconnect:
+
+```ts
+const stream = await chat.reconnect();
+```
+
+## AgentChat options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `agent` | `string` | required | The agent task ID to trigger |
+| `id` | `string` | `crypto.randomUUID()` | Conversation ID for tagging and correlation |
+| `clientData` | typed from agent | `undefined` | Client data included in every request |
+| `session` | `{ runId: string; lastEventId?: string }` | `undefined` | Restore a previous session |
+| `onTriggered` | `(event) => void` | `undefined` | Called when a new run is created |
+| `onTurnComplete` | `(event) => void` | `undefined` | Called when a turn's stream ends |
+| `streamKey` | `string` | `"chat"` | Output stream key |
+| `streamTimeoutSeconds` | `number` | `120` | SSE timeout in seconds |
+| `triggerOptions` | `object` | `undefined` | Tags, queue, machine, priority |
+
+## ChatStream methods
+
+| Method | Returns | Description |
+|---|---|---|
+| `text()` | `Promise<string>` | Consume stream, return accumulated text |
+| `result()` | `Promise<ChatStreamResult>` | Consume stream, return `{ text, toolCalls, toolResults }` |
+| `messages()` | `AsyncGenerator<UIMessage>` | Yield accumulated UIMessage snapshots (sub-agent pattern) |
+| `[Symbol.asyncIterator]` | `UIMessageChunk` | Iterate over typed stream chunks |
+| `.stream` | `ReadableStream<UIMessageChunk>` | Raw stream for AI SDK utilities |
diff --git a/docs/ai-chat/sub-agents.mdx b/docs/ai-chat/sub-agents.mdx
new file mode 100644
index 00000000000..c0bd4c064af
--- /dev/null
+++ b/docs/ai-chat/sub-agents.mdx
@@ -0,0 +1,248 @@
+---
+title: "Sub-Agents"
+sidebarTitle: "Sub-Agents"
+description: "Delegate work to durable sub-agents from within a parent agent's tool calls, with streaming preliminary results."
+---
+
+Sub-agents let a parent agent delegate work to other agents running as durable Trigger.dev tasks. The sub-agent's response streams back through the parent as preliminary tool results, so the frontend sees the sub-agent working inside the parent's tool call card.
+
+This builds on the AI SDK's [async generator tool pattern](https://ai-sdk.dev/docs/agents/subagents) and Trigger.dev's [AgentChat](/ai-chat/server-chat) for server-side agent interaction.
+
+## How it works
+
+1. The parent LLM calls a tool (e.g., `researchAgent`)
+2. The tool's `execute` is an `async function*` (async generator)
+3. Inside, it creates an `AgentChat` and sends a message to the sub-agent
+4. `yield* stream.messages()` streams each accumulated `UIMessage` snapshot as a preliminary tool result
+5. The frontend renders the sub-agent's response building up inside the parent's tool card
+6. `toModelOutput` compresses the full output into a summary for the parent LLM
+
+```
+Parent LLM
+  │
+  ├─ calls researchAgent tool
+  │    │
+  │    ├─ AgentChat triggers sub-agent run
+  │    ├─ sub-agent streams response (text, tool calls, etc.)
+  │    ├─ yield* sends UIMessage snapshots as preliminary results
+  │    └─ toModelOutput compresses for parent LLM
+  │
+  └─ parent LLM reads compressed summary, continues reasoning
+```
+
+## Single-turn sub-agent
+
+The simplest pattern: one tool call, one sub-agent turn, conversation closes.
+
+```ts
+import { tool } from "ai";
+import { AgentChat } from "@trigger.dev/sdk/chat";
+import { z } from "zod";
+import type { prReviewAgent } from "./trigger/pr-review";
+
+const prReviewTool = tool({
+  description: "Delegate a PR review to the PR review agent.",
+  inputSchema: z.object({
+    prNumber: z.number().describe("The PR number to review"),
+    repo: z.string().describe("The GitHub repo URL"),
+  }),
+  execute: async function* ({ prNumber, repo }, { abortSignal }) {
+    const chat = new AgentChat<typeof prReviewAgent>({
+      agent: "pr-review",
+      id: `review-${prNumber}`,
+      clientData: { userId: "parent-agent", githubUrl: repo },
+    });
+
+    const stream = await chat.sendMessage(`Review PR #${prNumber}`, { abortSignal });
+
+    // Each yield sends a UIMessage snapshot to the frontend
+    yield* stream.messages();
+
+    await chat.close();
+  },
+  // The parent LLM only sees this compressed summary
+  toModelOutput: ({ output: message }) => {
+    const lastText = message?.parts?.findLast(
+      (p: { type: string }) => p.type === "text"
+    ) as { text?: string } | undefined;
+    return { type: "text", value: lastText?.text ?? "Review complete." };
+  },
+});
+```
+
+Use this tool in a parent agent's `streamText` call:
+
+```ts
+import { streamText } from "ai";
+import { anthropic } from "@ai-sdk/anthropic";
+
+const result = streamText({
+  model: anthropic("claude-sonnet-4-6"),
+  tools: { prReview: prReviewTool },
+  prompt: "Review PR #42 on triggerdotdev/trigger.dev",
+});
+```
+
+## Multi-turn sub-agent (LLM-driven)
+
+The parent LLM drives a persistent conversation with a sub-agent across multiple tool calls. Each call with the same `conversationId` hits the same durable agent run.
+
+```ts
+import { tool } from "ai";
+import { AgentChat } from "@trigger.dev/sdk/chat";
+import { z } from "zod";
+
+// Track active sub-agent conversations
+const subAgents = new Map<string, AgentChat>();
+
+const researchTool = tool({
+  description:
+    "Talk to a research agent. Use the same conversationId to continue " +
+    "an existing conversation — the agent remembers full context.",
+  inputSchema: z.object({
+    conversationId: z
+      .string()
+      .describe("Unique ID for this research thread. Reuse to continue."),
+    message: z.string().describe("Your message to the research agent"),
+  }),
+  execute: async function* ({ conversationId, message }, { abortSignal }) {
+    let agent = subAgents.get(conversationId);
+    if (!agent) {
+      agent = new AgentChat({
+        agent: "research-agent",
+        id: conversationId,
+      });
+      subAgents.set(conversationId, agent);
+    }
+
+    const stream = await agent.sendMessage(message, { abortSignal });
+    yield* stream.messages();
+  },
+  toModelOutput: ({ output: message }) => {
+    const lastText = message?.parts?.findLast(
+      (p: { type: string }) => p.type === "text"
+    ) as { text?: string } | undefined;
+    return { type: "text", value: lastText?.text ?? "Done." };
+  },
+});
+```
+
+The parent LLM naturally calls this tool multiple times:
+
+1. `researchAgent({ conversationId: "competitors", message: "Research competitors in AI agents" })` — first call triggers a new sub-agent run
+2. `researchAgent({ conversationId: "competitors", message: "Go deeper on pricing" })` — same run, sub-agent has full context
+3. `researchAgent({ conversationId: "new-topic", message: "..." })` — different ID = different sub-agent
+
+### Cross-turn persistence
+
+Sub-agent conversations persist across **parent turns** because the `Map` lives in the parent's process heap. When the parent suspends and restores via snapshot, the heap is preserved — the Map still has the conversations, the sessions still have the run IDs.
+
+```ts
+export const orchestrator = chat
+  .withClientData({ schema: z.object({ userId: z.string() }) })
+  .customAgent({
+    id: "orchestrator",
+    run: async (payload, { signal: runSignal }) => {
+      // These survive across parent turns via snapshot/restore
+      const subAgents = new Map<string, AgentChat>();
+
+      const researchTool = tool({
+        // ... closes over subAgents Map
+      });
+
+      // Turn loop — subAgents persist across all turns
+      for (let turn = 0; turn < 50; turn++) {
+        // ... streamText with researchTool
+      }
+
+      // Cleanup when parent exits
+      await Promise.all(
+        Array.from(subAgents.values()).map((a) => a.close().catch(() => {}))
+      );
+    },
+  });
+```
+
+## How sub-agents clean up
+
+Sub-agents clean up through three mechanisms:
+
+1. **Explicit close**: Call `chat.close()` or `agent.close()` when done
+2. **Idle timeout**: The sub-agent's idle timeout expires, it suspends
+3. **Suspend timeout**: The sub-agent's suspend timeout expires, the run ends
+
+For the multi-turn pattern, the parent should clean up sub-agents when it exits (in `onComplete` for managed agents, or at the end of the loop for custom agents). Without explicit cleanup, sub-agents close on their own via timeouts — no leaked resources or cost while suspended.
+
+## What the frontend sees
+
+Each `yield` from `stream.messages()` sends a complete `UIMessage` containing all the sub-agent's parts accumulated so far. The AI SDK delivers these as `tool-output-available` chunks with `preliminary: true`.
+
+The frontend renders the tool part with:
+- `state: "output-available"` and `preliminary: true` while streaming
+- `state: "output-available"` and `preliminary: false` (or absent) when done
+
+The tool output contains the full `UIMessage` with nested parts — text, the sub-agent's own tool calls and results, reasoning, etc.
+
+### Controlling what the parent LLM sees
+
+`toModelOutput` transforms the tool's output before it enters the parent LLM's context. The full UIMessage streams to the frontend, but the model only sees the compressed version:
+
+```ts
+toModelOutput: ({ output: message }) => {
+  // Extract just the final text — the model doesn't need
+  // to see all the sub-agent's tool calls and intermediate work
+  const lastText = message?.parts?.findLast(
+    (p: { type: string }) => p.type === "text"
+  ) as { text?: string } | undefined;
+  return { type: "text", value: lastText?.text ?? "Done." };
+},
+```
+
+This is important for token efficiency: the sub-agent might use 100K tokens exploring and reasoning, but the parent LLM only consumes the summary.
+
+## ChatStream.messages()
+
+The `messages()` method on `ChatStream` wraps the AI SDK's `readUIMessageStream`. It reads the raw `UIMessageChunk` stream and yields complete `UIMessage` snapshots — each containing all parts received so far.
+
+```ts
+const stream = await chat.sendMessage("Research this topic");
+
+// Each yield is a complete UIMessage with all accumulated parts
+for await (const message of stream.messages()) {
+  console.log(message.parts.length, "parts so far");
+}
+```
+
+For the sub-agent pattern, use `yield*` to delegate all yields to the parent tool's generator:
+
+```ts
+execute: async function* ({ topic }, { abortSignal }) {
+  const stream = await chat.sendMessage(topic, { abortSignal });
+  yield* stream.messages();
+},
+```
+
+<Tip>
+  `stream.messages()` consumes the stream. You can't also call `stream.text()` or iterate over chunks on the same stream. Pick one consumption mode.
+</Tip>
+
+## Combining with chat.agent()
+
+Sub-agent tools work inside both `chat.agent()` (managed) and `chat.customAgent()` (manual lifecycle):
+
+```ts
+// Managed agent with sub-agent tool
+export const myAgent = chat.agent({
+  id: "orchestrator",
+  run: async ({ messages, stopSignal }) => {
+    return streamText({
+      model: anthropic("claude-sonnet-4-6"),
+      messages,
+      tools: { research: researchTool },
+      abortSignal: stopSignal,
+    });
+  },
+});
+```
+
+For `chat.customAgent()`, define the tool and sub-agent Map inside the `run` closure so they survive across turns. See [Example 7 in the ai-chat reference project](https://github.com/triggerdotdev/trigger.dev/blob/main/references/ai-chat/src/trigger/chat-client-test.ts) for a complete working example.
diff --git a/docs/docs.json b/docs/docs.json
index 3ccbe65157f..099096fb529 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -91,6 +91,8 @@
                   "ai-chat/quick-start",
                   "ai-chat/backend",
                   "ai-chat/frontend",
+                  "ai-chat/server-chat",
+                  "ai-chat/sub-agents",
                   "ai-chat/types",
                   "ai-chat/features",
                   "ai-chat/compaction",

From ae2923107c47d4a021cafc4292b5159de2482854 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 2 Apr 2026 13:35:09 +0100
Subject: [PATCH 15/49] remove references to the ai chat reference project from
 the docs

---
 docs/ai-chat/patterns/code-sandbox.mdx         | 3 ---
 docs/ai-chat/patterns/database-persistence.mdx | 2 +-
 docs/ai-chat/sub-agents.mdx                    | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/docs/ai-chat/patterns/code-sandbox.mdx b/docs/ai-chat/patterns/code-sandbox.mdx
index eb76d8fcbf4..901dfac68dd 100644
--- a/docs/ai-chat/patterns/code-sandbox.mdx
+++ b/docs/ai-chat/patterns/code-sandbox.mdx
@@ -12,9 +12,6 @@ This page describes a **durable chat** pattern that fits `chat.agent()`:
 - **Reuse** it for every `executeCode` tool call during that turn (and across turns in the same run if you keep the handle).
 - **Dispose** it **right before the run suspends** waiting for the next user message — using the **`onChatSuspend`** hook, not `onTurnComplete`.
 
-<Info>
-  The reference implementation lives in the monorepo at [`references/ai-chat`](https://github.com/triggerdotdev/trigger.dev/tree/main/references/ai-chat) (`code-sandbox.ts`, `chat-tools.ts`, `trigger/chat.ts`).
-</Info>
 
 ## Why not tear down in `onTurnComplete`?
 
diff --git a/docs/ai-chat/patterns/database-persistence.mdx b/docs/ai-chat/patterns/database-persistence.mdx
index 77bd45ee3e9..e2732aea7be 100644
--- a/docs/ai-chat/patterns/database-persistence.mdx
+++ b/docs/ai-chat/patterns/database-persistence.mdx
@@ -9,7 +9,7 @@ Durable chat runs can span **hours** and **many turns**. You usually want:
 1. **Conversation state** — full **`UIMessage[]`** (or equivalent) keyed by **`chatId`**, so reloads and history views work.
 2. **Live session state** — the **current Trigger `runId`**, a **scoped access token** for realtime + input streams, and optionally **`lastEventId`** for stream resume.
 
-This page describes a **hook mapping** that works with any database. The [ai-chat reference app](https://github.com/triggerdotdev/trigger.dev/tree/main/references/ai-chat) implements the same idea with a SQL database and an ORM; adapt table and column names to your stack.
+This page describes a **hook mapping** that works with any database. Adapt table and column names to your stack.
 
 ## Conceptual data model
 
diff --git a/docs/ai-chat/sub-agents.mdx b/docs/ai-chat/sub-agents.mdx
index c0bd4c064af..db9a7ff4b48 100644
--- a/docs/ai-chat/sub-agents.mdx
+++ b/docs/ai-chat/sub-agents.mdx
@@ -245,4 +245,4 @@ export const myAgent = chat.agent({
 });
 ```
 
-For `chat.customAgent()`, define the tool and sub-agent Map inside the `run` closure so they survive across turns. See [Example 7 in the ai-chat reference project](https://github.com/triggerdotdev/trigger.dev/blob/main/references/ai-chat/src/trigger/chat-client-test.ts) for a complete working example.
+For `chat.customAgent()`, define the tool and sub-agent Map inside the `run` closure so they survive across turns.

From 2e979f725b4bec5262b28fdae48444dfd687ca12 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 2 Apr 2026 16:13:19 +0100
Subject: [PATCH 16/49] agent mcp tools docs

---
 docs/ai-chat/mcp.mdx                       | 97 ++++++++++++++++++++++
 docs/ai-chat/{ => patterns}/sub-agents.mdx |  0
 docs/ai-chat/server-chat.mdx               |  2 +-
 docs/docs.json                             |  3 +-
 docs/mcp-tools.mdx                         | 53 ++++++++++++
 5 files changed, 153 insertions(+), 2 deletions(-)
 create mode 100644 docs/ai-chat/mcp.mdx
 rename docs/ai-chat/{ => patterns}/sub-agents.mdx (100%)

diff --git a/docs/ai-chat/mcp.mdx b/docs/ai-chat/mcp.mdx
new file mode 100644
index 00000000000..076b25a3240
--- /dev/null
+++ b/docs/ai-chat/mcp.mdx
@@ -0,0 +1,97 @@
+---
+title: "MCP Server"
+sidebarTitle: "MCP Server"
+description: "Chat with your agents from any AI coding tool using the Trigger.dev MCP server."
+---
+
+The Trigger.dev MCP server includes tools for having conversations with your chat agents directly from AI coding tools like Claude Code, Cursor, Windsurf, and others. This lets your AI assistant interact with your agents without writing any code.
+
+## Available tools
+
+| Tool | Description |
+| --- | --- |
+| `list_agents` | List all agents in the current worker |
+| `start_agent_chat` | Start a conversation with an agent |
+| `send_agent_message` | Send a message and get the response |
+| `close_agent_chat` | Close a conversation |
+
+See the [MCP Tools Reference](/mcp-tools#agent-chat-tools) for full details on each tool.
+
+## Typical workflow
+
+<Steps>
+  <Step title="List available agents">
+    Ask your AI assistant to list agents in your project. This calls `list_agents` which returns all tasks created with `chat.agent()` or `chat.customAgent()`.
+  </Step>
+  <Step title="Start a chat">
+    Start a conversation with an agent using `start_agent_chat`. This triggers a run and optionally preloads the agent so it's ready to respond immediately.
+
+    If the agent uses `withClientData`, pass the required fields via `clientData`.
+  </Step>
+  <Step title="Send messages">
+    Send messages with `send_agent_message`. The agent processes each message and returns the full response text, including which tools were used. The agent retains full context across messages in the same chat.
+  </Step>
+  <Step title="Close the chat">
+    When done, close the conversation with `close_agent_chat`. This signals the agent to exit gracefully. If you skip this step, the agent will close on its own when its idle timeout expires.
+  </Step>
+</Steps>
+
+## Example conversation
+
+Here's what a typical MCP conversation with an agent looks like:
+
+```
+You: List my agents
+
+AI: Found 3 agents in worker 20250402.12 (dev):
+    - support-agent (src/trigger/support.ts)
+    - pr-review (src/trigger/pr-review.ts)
+    - research (src/trigger/research.ts)
+
+You: Start a chat with the support agent
+
+AI: Agent chat started and preloaded.
+    - Chat ID: abc-123
+    - Agent: support-agent
+    - Run ID: run_xyz
+
+You: Ask it to look up order #4821
+
+AI: I found order #4821. It was placed on March 15 and shipped via
+    FedEx (tracking: 7891234). Current status: In Transit, estimated
+    delivery March 19.
+
+    Tools used: lookupOrder, getShippingStatus
+
+You: Close the chat
+
+AI: Chat abc-123 closed.
+```
+
+## Setup
+
+If you haven't set up the MCP server yet, see the [MCP Server introduction](/mcp-introduction) for installation and client configuration.
+
+Agent chat tools require:
+- A running dev server (`trigger dev`) or a deployed worker
+- At least one agent defined with `chat.agent()` or `chat.customAgent()`
+
+## How it works
+
+Under the hood, the MCP tools use the same protocol as the [frontend transport](/ai-chat/frontend) and [AgentChat SDK](/ai-chat/server-chat):
+
+1. **`start_agent_chat`** triggers a task run with the `preload` trigger and stores the session (run ID, chat ID) in memory.
+2. **`send_agent_message`** sends the message via the run's input stream and subscribes to the output SSE stream to collect the agent's full response.
+3. **`close_agent_chat`** sends a close signal via the input stream and removes the session.
+
+Sessions are held in-memory within the MCP server process. If the MCP server restarts, active sessions are lost — but the underlying agent runs continue until their idle timeout.
+
+<Note>
+  The `get_current_worker` tool also labels agents with `[agent]` in its output, making it easy to identify which tasks are agents even when listing all tasks.
+</Note>
+
+## See also
+
+- [AgentChat SDK](/ai-chat/server-chat) — programmatic server-side access to agents
+- [Sub-Agents](/ai-chat/patterns/sub-agents) — agents calling other agents
+- [MCP Tools Reference](/mcp-tools#agent-chat-tools) — full tool parameter reference
diff --git a/docs/ai-chat/sub-agents.mdx b/docs/ai-chat/patterns/sub-agents.mdx
similarity index 100%
rename from docs/ai-chat/sub-agents.mdx
rename to docs/ai-chat/patterns/sub-agents.mdx
diff --git a/docs/ai-chat/server-chat.mdx b/docs/ai-chat/server-chat.mdx
index 22cd7573305..eef67b7331c 100644
--- a/docs/ai-chat/server-chat.mdx
+++ b/docs/ai-chat/server-chat.mdx
@@ -175,7 +175,7 @@ const researchTool = tool({
 
 This supports single-turn delegation, multi-turn LLM-driven conversations with persistent sub-agents, and cross-turn state that survives snapshot/restore.
 
-See the [Sub-Agents guide](/ai-chat/sub-agents) for the full pattern including multi-turn conversations, cleanup, and what the frontend sees.
+See the [Sub-Agents guide](/ai-chat/patterns/sub-agents) for the full pattern including multi-turn conversations, cleanup, and what the frontend sees.
 
 ## Additional methods
 
diff --git a/docs/docs.json b/docs/docs.json
index 099096fb529..d58e420b163 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -92,15 +92,16 @@
                   "ai-chat/backend",
                   "ai-chat/frontend",
                   "ai-chat/server-chat",
-                  "ai-chat/sub-agents",
                   "ai-chat/types",
                   "ai-chat/features",
                   "ai-chat/compaction",
                   "ai-chat/pending-messages",
                   "ai-chat/background-injection",
+                  "ai-chat/mcp",
                   {
                     "group": "Patterns",
                     "pages": [
+                      "ai-chat/patterns/sub-agents",
                       "ai-chat/patterns/database-persistence",
                       "ai-chat/patterns/code-sandbox"
                     ]
diff --git a/docs/mcp-tools.mdx b/docs/mcp-tools.mdx
index 037d7e887ef..9a920274fba 100644
--- a/docs/mcp-tools.mdx
+++ b/docs/mcp-tools.mdx
@@ -218,3 +218,56 @@ Check the status of the dev server and view recent output. Shows whether it is s
 <Callout type="warning">
   The deploy and list_preview_branches tools are not available when the MCP server is running with the `--dev-only` flag. The `--readonly` flag hides deploy, trigger_task, and cancel_run.
 </Callout>
+
+## Agent Chat Tools
+
+These tools let you have conversations with [chat agents](/ai-chat/overview) directly from your AI coding tool. See the [Agent MCP guide](/ai-chat/mcp) for a walkthrough.
+
+### list_agents
+
+List all chat agents registered in the current worker. Agents are tasks created with `chat.agent()` or `chat.customAgent()`.
+
+**Example usage:**
+- `"What agents are available?"`
+- `"List my chat agents"`
+
+### start_agent_chat
+
+Start a conversation with a chat agent. Returns a chat ID for use with `send_agent_message`. Optionally preloads the agent so it initializes before the first message.
+
+**Parameters:**
+- `agentId` (required) — The agent task slug (e.g., `"support-agent"`)
+- `chatId` (optional) — A custom conversation ID. Auto-generated if omitted
+- `clientData` (optional) — Client data to include with every message (e.g., `{ userId: "user_123" }`). Must match the agent's `clientDataSchema` if one is defined
+- `preload` (optional, default: `true`) — Whether to preload the agent before the first message
+
+**Example usage:**
+- `"Start a chat with the support agent"`
+- `"Talk to the pr-review agent with userId abc"`
+
+### send_agent_message
+
+Send a message to an active agent chat and get the full response back. The agent remembers full context from previous messages in the same chat.
+
+**Parameters:**
+- `chatId` (required) — The chat ID from `start_agent_chat`
+- `message` (required) — The message text to send
+
+**Example usage:**
+- `"Tell the agent to review the latest PR"`
+- `"Ask it what tools it has available"`
+
+### close_agent_chat
+
+Close an agent chat conversation. The agent exits its loop gracefully. Without this, the agent will close on its own when its idle timeout expires.
+
+**Parameters:**
+- `chatId` (required) — The chat ID to close
+
+**Example usage:**
+- `"Close the chat"`
+- `"End the conversation"`
+
+<Callout type="warning">
+  The `start_agent_chat`, `send_agent_message`, and `close_agent_chat` tools are write operations and are not available in readonly mode.
+</Callout>

From fef5efcf362539016217fc851be3f7b424bb520f Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 2 Apr 2026 18:02:18 +0100
Subject: [PATCH 17/49] docs for validating ui messages

---
 docs/ai-chat/backend.mdx   | 33 +++++++++++++++++++++++++++++++++
 docs/ai-chat/reference.mdx | 16 ++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index ff1f7686e77..1e05c073915 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -148,6 +148,39 @@ export const myChat = chat.agent({
   (per-message). See [Client data and metadata](/ai-chat/frontend#client-data-and-metadata).
 </Tip>
 
+#### onValidateMessages
+
+Validate or transform incoming `UIMessage[]` before they are converted to model messages. Fires once per turn with the raw messages from the wire payload (after cleanup of aborted tool parts), **before** accumulation and `toModelMessages()`.
+
+Return the validated messages array. Throw to abort the turn with an error.
+
+This is the right place to call the AI SDK's [`validateUIMessages`](https://ai-sdk.dev/docs/ai-sdk-ui/chatbot-message-persistence#validating-messages-on-the-server) to catch malformed messages from storage or untrusted input before they reach the model — especially useful when persisting conversations to a database, where tool schemas may drift between deploys.
+
+| Field     | Type                                                            | Description                              |
+| --------- | --------------------------------------------------------------- | ---------------------------------------- |
+| `messages` | `UIMessage[]`                                                  | Incoming UI messages for this turn       |
+| `chatId`  | `string`                                                        | Chat session ID                          |
+| `turn`    | `number`                                                        | Turn number (0-indexed)                  |
+| `trigger` | `"submit-message" \| "regenerate-message" \| "preload" \| "close"` | The trigger type for this turn        |
+
+```ts
+import { validateUIMessages } from "ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  onValidateMessages: async ({ messages }) => {
+    return validateUIMessages({ messages, tools: chatTools });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, tools: chatTools, abortSignal: signal });
+  },
+});
+```
+
+<Note>
+  `onValidateMessages` fires **before** `onTurnStart` and message accumulation. If you need to validate messages loaded from a database, do the loading in `onChatStart` or `onPreload` and let `onValidateMessages` validate the full incoming set each turn.
+</Note>
+
 #### onTurnStart
 
 Fires at the start of every turn, after message accumulation and `onChatStart` (turn 0), but **before** `run()` executes. Use it to persist messages before streaming begins — so a mid-stream page refresh still shows the user's message.
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index 6eb024a84bb..551227dfe58 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -15,6 +15,7 @@ Options for `chat.agent()`.
 | `clientDataSchema`            | `TaskSchema`                                                | —                              | Schema for validating and typing `clientData`                                                       |
 | `onPreload`                   | `(event: PreloadEvent) => Promise<void> \| void`            | —                              | Fires on preloaded runs before the first message                                                    |
 | `onChatStart`                 | `(event: ChatStartEvent) => Promise<void> \| void`          | —                              | Fires on turn 0 before `run()`                                                                      |
+| `onValidateMessages`          | `(event: ValidateMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>` | —                | Validate/transform UIMessages before model conversion. See [onValidateMessages](/ai-chat/backend#onvalidatemessages) |
 | `onTurnStart`                 | `(event: TurnStartEvent) => Promise<void> \| void`          | —                              | Fires every turn before `run()`                                                                     |
 | `onBeforeTurnComplete`        | `(event: BeforeTurnCompleteEvent) => Promise<void> \| void` | —                              | Fires after response but before stream closes. Includes `writer`.                                   |
 | `onTurnComplete`              | `(event: TurnCompleteEvent) => Promise<void> \| void`       | —                              | Fires after each turn completes (stream closed)                                                     |
@@ -39,6 +40,10 @@ Plus all standard [TaskOptions](/tasks/overview) — `retry`, `queue`, `machine`
 
 All **`chat.agent`** lifecycle events (**`onPreload`**, **`onChatStart`**, **`onTurnStart`**, **`onBeforeTurnComplete`**, **`onTurnComplete`**, **`onCompacted`**) and the object passed to **`run`** include **`ctx`**: the same **`TaskRunContext`** shape as the `ctx` in `task({ run: (payload, { ctx }) => ... })`.
 
+<Note>
+  **`onValidateMessages`** does not include `ctx` — it fires before message accumulation and is designed for pure validation/transformation of incoming messages.
+</Note>
+
 Use **`ctx`** for run metadata, tags, parent links, or any API that needs the full run record. The chat-specific string **`runId`** on events is always **`ctx.run.id`**; both are provided for convenience.
 
 ```ts
@@ -100,6 +105,17 @@ Passed to the `onChatStart` callback.
 | `preloaded`       | `boolean`                   | Whether this run was preloaded before the first message        |
 | `writer`          | [`ChatWriter`](#chatwriter) | Stream writer for custom chunks. Lazy — no overhead if unused. |
 
+## ValidateMessagesEvent
+
+Passed to the `onValidateMessages` callback.
+
+| Field     | Type                                                            | Description                              |
+| --------- | --------------------------------------------------------------- | ---------------------------------------- |
+| `messages` | `UIMessage[]`                                                  | Incoming UI messages for this turn       |
+| `chatId`  | `string`                                                        | Chat session ID                          |
+| `turn`    | `number`                                                        | Turn number (0-indexed)                  |
+| `trigger` | `"submit-message" \| "regenerate-message" \| "preload" \| "close"` | The trigger type for this turn        |
+
 ## TurnStartEvent
 
 Passed to the `onTurnStart` callback.

From f514a24ba134a40c56ccea61c37547f583ae5560 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 3 Apr 2026 15:55:28 +0100
Subject: [PATCH 18/49] version upgrades

---
 docs/ai-chat/backend.mdx                   |   4 +
 docs/ai-chat/client-protocol.mdx           | 383 +++++++++++++++++++++
 docs/ai-chat/overview.mdx                  |   6 +-
 docs/ai-chat/patterns/version-upgrades.mdx | 157 +++++++++
 docs/docs.json                             |   2 +
 5 files changed, 549 insertions(+), 3 deletions(-)
 create mode 100644 docs/ai-chat/client-protocol.mdx
 create mode 100644 docs/ai-chat/patterns/version-upgrades.mdx

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 1e05c073915..880b59da7a5 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -791,6 +791,10 @@ The `reason` field tells you why messages are being prepared:
 | `"compaction-rebuild"` | Rebuilding from a previous compaction summary     |
 | `"compaction-result"`  | Fresh compaction just produced these messages     |
 
+### Version upgrades
+
+Chat agent runs are pinned to the worker version they started on. When you deploy a new version, suspended runs resume on the old code. Call `chat.requestUpgrade()` in `onTurnStart` to skip `run()` and exit immediately — the transport re-triggers the same message on the latest version. See the [Version Upgrades pattern](/ai-chat/patterns/version-upgrades) for the full guide.
+
 ### Runtime configuration
 
 #### chat.setTurnTimeout()
diff --git a/docs/ai-chat/client-protocol.mdx b/docs/ai-chat/client-protocol.mdx
new file mode 100644
index 00000000000..64f2117fd5c
--- /dev/null
+++ b/docs/ai-chat/client-protocol.mdx
@@ -0,0 +1,383 @@
+---
+title: "Client Protocol"
+sidebarTitle: "Client Protocol"
+description: "The wire protocol for building custom chat transports — how clients communicate with chat agents over input streams and SSE."
+---
+
+This page documents the protocol that chat clients use to communicate with `chat.agent()` tasks. Use this if you're building a custom transport (e.g., for a Slack bot, CLI tool, or native app) instead of using the built-in `TriggerChatTransport` or `AgentChat`.
+
+<Note>
+  Most users don't need this. Use [`TriggerChatTransport`](/ai-chat/frontend) for browser apps or [`AgentChat`](/ai-chat/server-chat) for server-side code. This page is for building your own from scratch.
+</Note>
+
+## Overview
+
+The protocol has three parts:
+
+1. **Trigger** — start a new run by calling the task trigger API
+2. **Input streams** — send messages and signals to a running agent
+3. **Output stream** — subscribe to the agent's response via SSE
+
+```mermaid
+sequenceDiagram
+  participant Client
+  participant API as Trigger.dev API
+  participant Agent as Chat Agent Run
+
+  Client->>API: POST /api/v1/tasks/{taskId}/trigger (first message)
+  API-->>Client: { id: runId, publicAccessToken }
+  Client->>API: GET /realtime/v1/streams/{runId}/chat (SSE subscribe)
+  Agent-->>Client: UIMessageChunk stream...
+  Agent-->>Client: { type: "trigger:turn-complete" }
+  Client->>API: POST /realtime/v1/streams/{runId}/input/chat-messages (next message)
+  Agent-->>Client: UIMessageChunk stream...
+  Agent-->>Client: { type: "trigger:turn-complete" }
+```
+
+## Step 1: Trigger the first run
+
+Start a conversation by triggering the agent task. The payload follows the `ChatTaskWirePayload` shape:
+
+```bash
+POST /api/v1/tasks/{taskId}/trigger
+Authorization: Bearer <secret-key-or-jwt>
+Content-Type: application/json
+
+{
+  "payload": {
+    "messages": [
+      {
+        "id": "msg-1",
+        "role": "user",
+        "parts": [{ "type": "text", "text": "Hello!" }]
+      }
+    ],
+    "chatId": "conversation-123",
+    "trigger": "submit-message",
+    "metadata": { "userId": "user-456" }
+  },
+  "options": {
+    "tags": ["chat:conversation-123"]
+  }
+}
+```
+
+The response body contains the `runId`:
+
+```json
+{
+  "id": "run_abc123"
+}
+```
+
+The **response headers** contain the public access token (a JWT scoped to this run):
+
+The `x-trigger-jwt` header contains a JWT with `read:runs:{runId}` and `write:inputStreams:{runId}` scopes. Use this for all stream operations.
+
+Store the `runId` and the `x-trigger-jwt` value — you need both for input streams and SSE.
+
+<Note>
+  The built-in SDK clients (`TriggerChatTransport`, `AgentChat`) extract the JWT from the response header automatically. If you're using the `ApiClient` from `@trigger.dev/core/v3`, `triggerTask()` returns `{ id, publicAccessToken }` with the header already extracted.
+</Note>
+
+### Preloading (optional)
+
+To preload an agent before the first message, trigger with `"trigger": "preload"` and an empty `messages` array:
+
+```json
+{
+  "payload": {
+    "messages": [],
+    "chatId": "conversation-123",
+    "trigger": "preload",
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+The agent starts, runs `onPreload`, and waits for the first real message via the input stream.
+
+## Step 2: Subscribe to the output stream
+
+Subscribe to the agent's response via SSE:
+
+```
+GET /realtime/v1/streams/{runId}/chat
+Authorization: Bearer <publicAccessToken>
+Accept: text/event-stream
+```
+
+### Stream format (S2)
+
+The output stream uses [S2](https://s2.dev) (a durable streaming service) under the hood. SSE events arrive as **batches** — each event has `event: batch` and a `data` field containing an array of records:
+
+```json
+event: batch
+data: {
+  "records": [
+    {
+      "body": "{\"data\": {\"type\": \"text-delta\", \"delta\": \"Hello\"}, \"id\": \"abc123\"}",
+      "seq_num": 1,
+      "timestamp": 1712150400000
+    },
+    {
+      "body": "{\"data\": {\"type\": \"text-delta\", \"delta\": \" world\"}, \"id\": \"def456\"}",
+      "seq_num": 2,
+      "timestamp": 1712150400001
+    }
+  ]
+}
+```
+
+Each record's `body` is a JSON string containing `{ data, id }`. The `data` field is the actual `UIMessageChunk`. The `seq_num` is used for stream resumption.
+
+**Recommended:** Use `SSEStreamSubscription` from `@trigger.dev/core/v3` to handle parsing automatically — it takes care of batch decoding, deduplication, and resume tracking:
+
+```ts
+import { SSEStreamSubscription } from "@trigger.dev/core/v3";
+
+const subscription = new SSEStreamSubscription(
+  `${baseUrl}/realtime/v1/streams/${runId}/chat`,
+  {
+    headers: { Authorization: `Bearer ${publicAccessToken}` },
+    timeoutInSeconds: 120,
+  }
+);
+
+const stream = await subscription.subscribe();
+const reader = stream.getReader();
+
+while (true) {
+  const { done, value } = await reader.read();
+  if (done) break;
+
+  // value is { id: string, chunk: UIMessageChunk, timestamp: number }
+  const chunk = value.chunk;
+
+  if (chunk.type === "trigger:turn-complete") break;
+  if (chunk.type === "text-delta") process.stdout.write(chunk.delta);
+}
+```
+
+If you prefer to parse the S2 protocol yourself, see the [S2 documentation](https://s2.dev/docs) for the full SSE batch protocol reference.
+
+### Chunk types
+
+Each chunk's `data` field is a `UIMessageChunk` from the [AI SDK](https://ai-sdk.dev/docs/ai-sdk-ui/ui-message-stream). The stream contains standard AI SDK chunk types (`text-delta`, `reasoning-delta`, `tool-input-available`, `tool-output-available`, `error`, etc.) plus two Trigger.dev-specific control chunks.
+
+See the [AI SDK UIMessageStream documentation](https://ai-sdk.dev/docs/ai-sdk-ui/ui-message-stream) for the full list of chunk types and their shapes.
+
+### `trigger:turn-complete`
+
+Signals that the agent's turn is finished — stop reading and wait for user input.
+
+```json
+{
+  "type": "trigger:turn-complete",
+  "publicAccessToken": "eyJ..."
+}
+```
+
+| Field | Type | Description |
+| --- | --- | --- |
+| `type` | `"trigger:turn-complete"` | Always this string |
+| `publicAccessToken` | `string` (optional) | A refreshed JWT for this run. If present, replace your stored token with this one — the previous token may be close to expiry. |
+
+When you receive this chunk:
+1. Update `publicAccessToken` if one is included
+2. Close the stream reader
+3. Wait for the next user message before subscribing again
+
+### `trigger:upgrade-required`
+
+Signals that the agent cannot handle this message on its current version and the client should retry on a new run. This is emitted when the agent calls [`chat.requestUpgrade()`](/ai-chat/patterns/version-upgrades) before processing the turn.
+
+```json
+{
+  "type": "trigger:upgrade-required"
+}
+```
+
+When you receive this chunk:
+1. Close the stream reader
+2. Clear the current session
+3. Immediately trigger a **new run** with the full message history and `continuation: true` (same as [Step 4: Handle continuations](#step-4-handle-continuations))
+4. Subscribe to the new run's stream and pipe it through to the consumer
+
+The user's message is **not lost** — it gets replayed on the new version. The built-in clients (`TriggerChatTransport`, `AgentChat`) handle this transparently. The consumer sees a seamless response from the upgraded agent.
+
+### Resuming a stream
+
+If the SSE connection drops, reconnect with the `Last-Event-ID` header set to the last `seq_num` you received:
+
+```
+GET /realtime/v1/streams/{runId}/chat
+Authorization: Bearer <publicAccessToken>
+Last-Event-ID: 42
+```
+
+`SSEStreamSubscription` tracks this automatically via its `lastEventId` option.
+
+## Step 3: Send subsequent messages
+
+After the first turn, send messages via the run's input stream instead of triggering a new run:
+
+```bash
+POST /realtime/v1/streams/{runId}/input/chat-messages
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+
+{
+  "data": {
+    "messages": [
+      {
+        "id": "msg-2",
+        "role": "user",
+        "parts": [{ "type": "text", "text": "Tell me more" }]
+      }
+    ],
+    "chatId": "conversation-123",
+    "trigger": "submit-message",
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+Note the `{ "data": ... }` wrapper — the input stream API wraps the payload in a `data` field.
+
+After sending, subscribe to the output stream again (same URL, same auth) to receive the response.
+
+<Warning>
+  On turn 2+, only send the **new** message(s) in the `messages` array — not the full history. The agent accumulates the conversation internally. On turn 1 (or after a continuation), send the **full** message history.
+</Warning>
+
+## Pending and steering messages
+
+You can send messages to the agent **while it's still streaming a response**. These are called pending messages — the agent receives them mid-turn and can inject them between tool-call steps.
+
+Send a pending message to the same `chat-messages` input stream:
+
+```bash
+POST /realtime/v1/streams/{runId}/input/chat-messages
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+
+{
+  "data": {
+    "messages": [
+      {
+        "id": "msg-steering-1",
+        "role": "user",
+        "parts": [{ "type": "text", "text": "Actually, focus on the security issues first" }]
+      }
+    ],
+    "chatId": "conversation-123",
+    "trigger": "submit-message",
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+This is the same endpoint and format as a normal message. The difference is timing — the agent is already streaming. What happens to the message depends on the agent's `pendingMessages` configuration:
+
+- **With `pendingMessages.shouldInject`**: The message is injected into the model's context at the next `prepareStep` boundary (between tool-call steps). The agent sees it and can adjust its behavior mid-response.
+- **Without `pendingMessages` config**: The message queues for the next turn. It becomes the `currentWirePayload` for the following turn, skipping the wait-for-message phase.
+
+See [Pending Messages](/ai-chat/pending-messages) for how to configure the agent side.
+
+<Note>
+  Unlike a normal `sendMessage`, pending messages should **not** cancel the active stream subscription. Keep reading the current response stream — the agent incorporates the pending message into the same turn or queues it for the next one.
+</Note>
+
+## Step 4: Handle continuations
+
+A run can end for several reasons: idle timeout, max turns reached, `chat.requestUpgrade()`, or cancellation. When this happens, the input stream POST will fail (400 "Cannot send to input stream on a completed run").
+
+When this error occurs, trigger a **new run** with the full message history and `continuation: true`:
+
+```json
+{
+  "payload": {
+    "messages": [/* full UIMessage history */],
+    "chatId": "conversation-123",
+    "trigger": "submit-message",
+    "metadata": { "userId": "user-456" },
+    "continuation": true,
+    "previousRunId": "run_abc123"
+  }
+}
+```
+
+The new run picks up the latest deployed version automatically. The agent's `onChatStart` hook receives `continuation: true` and `previousRunId` so it can distinguish from a brand new conversation.
+
+<Tip>
+  This is how [version upgrades](/ai-chat/patterns/version-upgrades) work transparently — the agent calls `chat.requestUpgrade()`, the run exits, and the client's next message triggers a continuation on the new version. No special handling needed beyond the standard continuation flow.
+</Tip>
+
+## Stopping and closing
+
+### Stop the current turn
+
+Send a stop signal to interrupt the agent mid-response:
+
+```bash
+POST /realtime/v1/streams/{runId}/input/chat-stop
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+
+{
+  "data": { "stop": true }
+}
+```
+
+The agent's stop signal fires, `streamText` aborts, and a `trigger:turn-complete` chunk is emitted.
+
+### Close the conversation
+
+Send a close signal to end the conversation gracefully:
+
+```bash
+POST /realtime/v1/streams/{runId}/input/chat-messages
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+
+{
+  "data": {
+    "messages": [],
+    "chatId": "conversation-123",
+    "trigger": "close"
+  }
+}
+```
+
+The agent exits its loop and the run completes. If you skip this, the agent closes on its own when the idle/turn timeout expires.
+
+## Session state
+
+A client needs to track per-conversation:
+
+| Field | Description |
+| --- | --- |
+| `chatId` | Stable conversation ID (survives continuations) |
+| `runId` | Current run ID (changes on continuation) |
+| `publicAccessToken` | JWT for stream auth (refreshed on each turn-complete) |
+| `lastEventId` | Last SSE event ID (for stream resumption) |
+
+On continuation, `runId` and `publicAccessToken` change. `chatId` stays the same.
+
+## Authentication
+
+| Operation | Auth |
+| --- | --- |
+| Trigger task | Secret API key or scoped JWT with `write:tasks` |
+| Input stream POST | JWT with `write:inputStreams` scope for the run |
+| Output stream GET | JWT with `read:runs` scope for the run |
+
+The `publicAccessToken` returned from the trigger response has both `read:runs` and `write:inputStreams` scopes for the run. Use it for all stream operations.
+
+## See also
+
+- [`TriggerChatTransport`](/ai-chat/frontend) — built-in frontend transport (implements this protocol)
+- [`AgentChat`](/ai-chat/server-chat) — built-in server-side client (implements this protocol)
+- [Backend lifecycle](/ai-chat/backend#lifecycle-hooks) — what the agent does on each event
+- [Version upgrades](/ai-chat/patterns/version-upgrades) — how `chat.requestUpgrade()` uses continuations
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
index eaab0db43cd..0e58bc997e8 100644
--- a/docs/ai-chat/overview.mdx
+++ b/docs/ai-chat/overview.mdx
@@ -43,7 +43,7 @@ sequenceDiagram
     Task->>API: streams.pipe("chat", uiStream)
     API-->>useChat: SSE: UIMessageChunks
     useChat-->>User: Render streaming text
-    Task->>API: Write __trigger_turn_complete
+    Task->>API: Write trigger:turn-complete
     API-->>useChat: SSE: turn complete + refreshed token
     useChat->>useChat: Close stream, update session
     Task->>Task: onTurnComplete({ messages, stopped: false })
@@ -76,7 +76,7 @@ sequenceDiagram
     Task->>API: streams.pipe("chat", uiStream)
     API-->>useChat: SSE: UIMessageChunks
     useChat-->>User: Render streaming text
-    Task->>API: Write __trigger_turn_complete
+    Task->>API: Write trigger:turn-complete
     Task->>Task: onTurnComplete({ turn: 1 })
     Task->>Task: Wait for next message (idle → suspend)
 ```
@@ -100,7 +100,7 @@ sequenceDiagram
     LLM-->>Task: Stream ends (AbortError)
     Task->>Task: cleanupAbortedParts(responseMessage)
     Note right of Task: Remove partial tool calls,<br/>mark streaming parts as done
-    Task->>API: Write __trigger_turn_complete
+    Task->>API: Write trigger:turn-complete
     API-->>useChat: SSE: turn complete
     Task->>Task: onTurnComplete({ stopped: true })
     Task->>Task: Wait for next message
diff --git a/docs/ai-chat/patterns/version-upgrades.mdx b/docs/ai-chat/patterns/version-upgrades.mdx
new file mode 100644
index 00000000000..8679b664f0b
--- /dev/null
+++ b/docs/ai-chat/patterns/version-upgrades.mdx
@@ -0,0 +1,157 @@
+---
+title: "Version upgrades"
+sidebarTitle: "Version upgrades"
+description: "Gracefully migrate suspended chat agents to a new deployment using chat.requestUpgrade() and the continuation mechanism."
+---
+
+Chat agent runs are pinned to the worker version they started on. When you deploy a new version, suspended runs resume on the **old** code. If your deploy includes breaking changes (new tools, changed schemas, updated API contracts), this can cause issues.
+
+`chat.requestUpgrade()` lets the agent opt out of the current run so the transport triggers a new one on the latest version.
+
+## How it works
+
+When `chat.requestUpgrade()` is called in `onTurnStart` or `onValidateMessages`:
+
+1. `run()` is **skipped** — no response is generated on old code
+2. The agent writes a `trigger:upgrade-required` control chunk to the stream
+3. The transport receives the chunk and immediately triggers a **new run** on the currently promoted deployment with the same message (as a continuation)
+4. The new run's response is piped through transparently — the user sees a single seamless response from the upgraded agent
+
+When called from inside `run()` or `chat.defer()`, the current turn completes normally first and the run exits afterward. The next message triggers the continuation.
+
+```mermaid
+sequenceDiagram
+  participant User
+  participant Transport
+  participant RunV1 as Run (v1)
+  participant RunV2 as Run (v2)
+
+  User->>Transport: send message
+  Transport->>RunV1: input stream
+  RunV1->>RunV1: onTurnStart → requestUpgrade()
+  RunV1-->>Transport: trigger:upgrade-required
+  RunV1->>RunV1: exit (run() never called)
+  Transport->>RunV2: trigger new run (continuation, same message)
+  RunV2-->>Transport: response stream
+  Transport-->>User: response (seamless)
+```
+
+## Contract versioning
+
+Define an explicit version for the contract between your frontend and agent. The frontend sends a `protocolVersion` via `clientData`, and the agent declares which versions it supports. When a breaking change ships (new tools, changed data parts, updated response format), bump the version.
+
+This gives you full control — the frontend can be backwards-compatible across multiple agent versions, and the agent only upgrades when it sees a version it doesn't support.
+
+```tsx title="app/components/Chat.tsx"
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+
+export function Chat() {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: getChatToken,
+    // Bump this when you ship a breaking change to the chat UI or tools
+    clientData: { userId: user.id, protocolVersion: "v2" },
+  });
+
+  const { messages, sendMessage } = useChat({ transport });
+  // ...
+}
+```
+
+On the agent side, declare which versions the current code supports:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+// The set of frontend protocol versions this agent code supports.
+// When you deploy a breaking change, remove old versions from this set.
+const SUPPORTED_VERSIONS = new Set(["v2", "v3"]);
+
+export const myChat = chat
+  .withClientData({
+    schema: z.object({
+      userId: z.string(),
+      protocolVersion: z.string(),
+    }),
+  })
+  .agent({
+    id: "my-chat",
+    onTurnStart: async ({ clientData }) => {
+      if (clientData?.protocolVersion && !SUPPORTED_VERSIONS.has(clientData.protocolVersion)) {
+        chat.requestUpgrade();
+      }
+    },
+    run: async ({ messages, signal }) => {
+      return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+    },
+  });
+```
+
+The transport includes `clientData` in every payload — both the initial trigger and subsequent input stream messages — so the agent always has the current value.
+
+This pattern is useful when:
+- Your frontend is backwards-compatible across several agent versions, but occasionally ships breaking changes
+- You want explicit control over when upgrades happen rather than upgrading on every deploy
+- Multiple frontend versions may be active at the same time (e.g., users with cached tabs)
+
+## Auto-detect from build ID (Next.js / Vercel)
+
+For automatic upgrade on every deploy, pass your platform's build ID via `clientData` instead of a manual version. The agent stores the ID from the first message and upgrades when it changes:
+
+```tsx title="app/components/Chat.tsx"
+// Vercel sets this at build time, or use your own build ID
+const APP_VERSION = process.env.NEXT_PUBLIC_VERCEL_DEPLOYMENT_ID
+  ?? process.env.NEXT_PUBLIC_BUILD_ID
+  ?? "dev";
+
+export function Chat() {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: getChatToken,
+    clientData: { userId: user.id, appVersion: APP_VERSION },
+  });
+  // ...
+}
+```
+
+```ts title="trigger/chat.ts"
+const initialAppVersion = chat.local<string>({ id: "appVersion" });
+
+export const myChat = chat
+  .withClientData({
+    schema: z.object({
+      userId: z.string(),
+      appVersion: z.string(),
+    }),
+  })
+  .agent({
+    id: "my-chat",
+    onChatStart: async ({ clientData }) => {
+      initialAppVersion.init(clientData.appVersion);
+    },
+    onTurnStart: async ({ clientData }) => {
+      if (clientData?.appVersion && clientData.appVersion !== initialAppVersion.value) {
+        chat.requestUpgrade();
+      }
+    },
+    run: async ({ messages, signal }) => {
+      return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+    },
+  });
+```
+
+This upgrades on **every** deploy, not just breaking changes. Good for fast-moving projects where you always want the latest code.
+
+## Other agent types
+
+- **`chat.agent()`** and **`chat.createSession()`** — use `chat.requestUpgrade()` as shown above
+- **`chat.customAgent()`** — you control the turn loop, so just `return` from `run()` when you want to exit
+
+## See also
+
+- [Lifecycle hooks](/ai-chat/backend#lifecycle-hooks) — where `onTurnStart` and `onChatResume` fit in the turn cycle
+- [Database persistence](/ai-chat/patterns/database-persistence) — how continuations interact with session state
+- [Client Protocol](/ai-chat/client-protocol#step-4-handle-continuations) — how clients handle continuations at the wire level
diff --git a/docs/docs.json b/docs/docs.json
index d58e420b163..0a16aaad980 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -102,10 +102,12 @@
                     "group": "Patterns",
                     "pages": [
                       "ai-chat/patterns/sub-agents",
+                      "ai-chat/patterns/version-upgrades",
                       "ai-chat/patterns/database-persistence",
                       "ai-chat/patterns/code-sandbox"
                     ]
                   },
+                  "ai-chat/client-protocol",
                   "ai-chat/reference"
                 ]
               }

From 27da46ba8424b2f9e90e02f425e9f7cce72e43cc Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 11 Apr 2026 10:20:39 +0100
Subject: [PATCH 19/49] docs for stopping chats after resume

---
 docs/ai-chat/frontend.mdx  | 33 +++++++++++++++++++++++++++++++--
 docs/ai-chat/reference.mdx | 23 +++++++++++++++++++++++
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 197e738baf4..1c060a3d055 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -156,6 +156,13 @@ function ChatClient({ chatId, initialMessages, initialSessions }) {
   brand new chats, there's nothing to reconnect to.
 </Info>
 
+<Note>
+  After resuming, `useChat`'s built-in `stop()` won't send the stop signal to the backend because
+  the AI SDK doesn't pass its abort signal through `reconnectToStream`. Use
+  `transport.stopGeneration(chatId)` for reliable stop behavior after resume — see
+  [Stop generation](#stop-generation) for the recommended pattern.
+</Note>
+
 <Warning>
   In React strict mode (enabled by default in Next.js dev), you may see a `TypeError: Cannot read
   properties of undefined (reading 'state')` in the console when using `resume`. This is a [known
@@ -235,10 +242,18 @@ Supports Zod, ArkType, Valibot, and other schema libraries supported by the SDK.
 
 ## Stop generation
 
-Calling `stop()` from `useChat` sends a stop signal to the running task via input streams. The task aborts the current `streamText` call, but the run stays alive for the next message:
+Use `transport.stopGeneration(chatId)` to stop the current generation. This sends a stop signal to the running task via input streams, aborting the current `streamText` call while keeping the run alive for the next message.
+
+`stopGeneration` works in all scenarios — including after a page refresh when the stream was reconnected via `resume`. Call it alongside `useChat`'s `stop()` to also update the frontend state:
 
 ```tsx
-const { messages, sendMessage, stop, status } = useChat({ transport });
+const { messages, sendMessage, stop: aiStop, status } = useChat({ transport });
+
+// Wrap both calls in a single stop handler
+const stop = useCallback(() => {
+  transport.stopGeneration(chatId);
+  aiStop();
+}, [transport, chatId, aiStop]);
 
 {
   status === "streaming" && (
@@ -249,6 +264,20 @@ const { messages, sendMessage, stop, status } = useChat({ transport });
 }
 ```
 
+<Info>
+  `transport.stopGeneration(chatId)` handles the backend stop signal and closes
+  the SSE connection, while `aiStop()` (from `useChat`) updates the frontend
+  status to `"ready"` and fires the `onFinish` callback.
+</Info>
+
+<Tip>
+  A [PR to the AI SDK](https://github.com/vercel/ai/pull/14350) has been
+  submitted to pass `abortSignal` through `reconnectToStream`, which would make
+  `useChat`'s built-in `stop()` work after resume without needing
+  `stopGeneration`. Until that lands, use the pattern above for reliable stop
+  behavior after page refresh.
+</Tip>
+
 See [Stop generation](/ai-chat/backend#stop-generation) in the backend docs for how to handle stop signals in your task.
 
 ## Self-hosting
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index 551227dfe58..3a66eb9327e 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -520,6 +520,29 @@ const transport = useTriggerChatTransport({
 });
 ```
 
+### transport.stopGeneration()
+
+Stop the current generation for a chat session. Sends a stop signal to the backend task and closes the active SSE connection.
+
+```ts
+transport.stopGeneration(chatId: string): Promise<boolean>
+```
+
+Returns `true` if the stop signal was sent, `false` if there's no active session. Works for both initial connections and reconnected streams (after page refresh with `resume: true`).
+
+Use alongside `useChat`'s `stop()` for a complete stop experience:
+
+```tsx
+const { stop: aiStop } = useChat({ transport });
+
+const stop = useCallback(() => {
+  transport.stopGeneration(chatId);
+  aiStop();
+}, [transport, chatId, aiStop]);
+```
+
+See [Stop generation](/ai-chat/frontend#stop-generation) for full details.
+
 ### transport.preload()
 
 Eagerly trigger a run before the first message.

From d7f72abfe4949cd194f0bcbd85704843a378eb67 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 14 Apr 2026 13:08:52 +0100
Subject: [PATCH 20/49] docs: add tool approvals and stop-after-resume
 documentation

---
 docs/ai-chat/backend.mdx  | 32 ++++++++++++++
 docs/ai-chat/frontend.mdx | 90 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 880b59da7a5..ac04bbc3415 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -516,6 +516,38 @@ This removes tool invocation parts stuck in `partial-call` state and marks any `
   This is expected and does not require special handling.
 </Note>
 
+### Tool approvals
+
+Tools with `needsApproval: true` pause execution until the user approves or denies via the frontend. Define the tool as normal and pass it to `streamText` — `chat.agent` handles the rest:
+
+```ts
+const sendEmail = tool({
+  description: "Send an email. Requires human approval.",
+  inputSchema: z.object({ to: z.string(), subject: z.string(), body: z.string() }),
+  needsApproval: true,
+  execute: async ({ to, subject, body }) => {
+    await emailService.send({ to, subject, body });
+    return { sent: true };
+  },
+});
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      tools: { sendEmail },
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+When the model calls an approval-required tool, the turn completes with the tool in `approval-requested` state. After the user approves on the frontend, the updated message is sent back and `chat.agent` replaces it in the conversation accumulator by matching the message ID. `streamText` then executes the approved tool and continues.
+
+See [Tool approvals](/ai-chat/frontend#tool-approvals) in the frontend docs for the UI setup.
+
 ### Persistence
 
 #### What needs to be persisted
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 1c060a3d055..874778eb129 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -280,6 +280,96 @@ const stop = useCallback(() => {
 
 See [Stop generation](/ai-chat/backend#stop-generation) in the backend docs for how to handle stop signals in your task.
 
+## Tool approvals
+
+The AI SDK supports tools that require human approval before execution. To use this with `chat.agent`, define a tool with `needsApproval: true` on the backend, then handle the approval UI and configure `sendAutomaticallyWhen` on the frontend.
+
+### Backend: define an approval-required tool
+
+```ts
+import { tool } from "ai";
+import { z } from "zod";
+
+const sendEmail = tool({
+  description: "Send an email. Requires human approval before sending.",
+  inputSchema: z.object({
+    to: z.string(),
+    subject: z.string(),
+    body: z.string(),
+  }),
+  needsApproval: true,
+  execute: async ({ to, subject, body }) => {
+    await emailService.send({ to, subject, body });
+    return { sent: true, to, subject };
+  },
+});
+```
+
+Pass the tool to `streamText` in your `run` function as usual. When the model calls the tool, `chat.agent` streams a `tool-approval-request` chunk. The turn completes and the run waits for the next message.
+
+### Frontend: approval UI
+
+Import `lastAssistantMessageIsCompleteWithApprovalResponses` from the AI SDK and pass it to `sendAutomaticallyWhen`. This tells `useChat` to automatically re-send messages once all approvals have been responded to.
+
+Destructure `addToolApprovalResponse` from `useChat` and wire it to your approval buttons:
+
+```tsx
+import { useChat } from "@ai-sdk/react";
+import { lastAssistantMessageIsCompleteWithApprovalResponses } from "ai";
+
+function Chat({ chatId, transport }) {
+  const { messages, sendMessage, addToolApprovalResponse, status } = useChat({
+    id: chatId,
+    transport,
+    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
+  });
+
+  const handleApprove = (approvalId: string) => {
+    addToolApprovalResponse({ id: approvalId, approved: true });
+  };
+
+  const handleDeny = (approvalId: string) => {
+    addToolApprovalResponse({ id: approvalId, approved: false, reason: "User denied" });
+  };
+
+  return (
+    <div>
+      {messages.map((msg) =>
+        msg.parts.map((part, i) => {
+          if (part.state === "approval-requested") {
+            return (
+              <div key={i}>
+                <p>Tool "{part.type}" wants to run with input:</p>
+                <pre>{JSON.stringify(part.input, null, 2)}</pre>
+                <button onClick={() => handleApprove(part.approval.id)}>Approve</button>
+                <button onClick={() => handleDeny(part.approval.id)}>Deny</button>
+              </div>
+            );
+          }
+          // ... render other parts
+        })
+      )}
+    </div>
+  );
+}
+```
+
+### How it works
+
+1. Model calls a tool with `needsApproval: true` — the turn completes with the tool in `approval-requested` state
+2. Frontend shows Approve/Deny buttons
+3. User clicks Approve — `addToolApprovalResponse` updates the tool part to `approval-responded`
+4. `sendAutomaticallyWhen` returns `true` — `useChat` re-sends the updated assistant message
+5. The transport sends the message via input streams — the backend matches it by ID and replaces the existing assistant message in the accumulator
+6. `streamText` sees the approved tool, executes it, and streams the result
+
+<Info>
+  Message IDs are kept in sync between frontend and backend automatically. The backend always
+  includes a `generateMessageId` function when streaming responses, ensuring the `start` chunk
+  carries a `messageId` that the frontend uses. This makes the ID-based matching reliable
+  for tool approval updates.
+</Info>
+
 ## Self-hosting
 
 If you're self-hosting Trigger.dev, pass the `baseURL` option:

From fe242b0e2573d1938a42ee2823bbd194146a0084 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 14 Apr 2026 15:03:05 +0100
Subject: [PATCH 21/49] cover tool approvals in the client protocol

---
 docs/ai-chat/client-protocol.mdx | 39 ++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/docs/ai-chat/client-protocol.mdx b/docs/ai-chat/client-protocol.mdx
index 64f2117fd5c..7ac159f6d93 100644
--- a/docs/ai-chat/client-protocol.mdx
+++ b/docs/ai-chat/client-protocol.mdx
@@ -251,6 +251,45 @@ After sending, subscribe to the output stream again (same URL, same auth) to rec
   On turn 2+, only send the **new** message(s) in the `messages` array — not the full history. The agent accumulates the conversation internally. On turn 1 (or after a continuation), send the **full** message history.
 </Warning>
 
+### Tool approval responses
+
+When a tool requires approval (`needsApproval: true`), the agent streams the tool call with an `approval-requested` state and completes the turn. After the user approves or denies, send the **updated assistant message** (with `approval-responded` tool parts) back via the same input stream:
+
+```bash
+POST /realtime/v1/streams/{runId}/input/chat-messages
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+
+{
+  "data": {
+    "messages": [
+      {
+        "id": "asst-msg-1",
+        "role": "assistant",
+        "parts": [
+          { "type": "text", "text": "I'll send that email for you." },
+          {
+            "type": "tool-sendEmail",
+            "toolCallId": "call-1",
+            "state": "approval-responded",
+            "input": { "to": "user@example.com", "subject": "Hello" },
+            "approval": { "id": "approval-1", "approved": true }
+          }
+        ]
+      }
+    ],
+    "chatId": "conversation-123",
+    "trigger": "submit-message"
+  }
+}
+```
+
+The agent matches the incoming message by its `id` against the accumulated conversation. If a match is found, it **replaces** the existing message (instead of appending). This updates the tool approval state, and `streamText` executes the approved tool on the next step.
+
+<Note>
+  The message `id` must match the one the agent assigned during streaming. If you're using `TriggerChatTransport`, IDs are kept in sync automatically. Custom transports should use the `messageId` from the stream's `start` chunk.
+</Note>
+
 ## Pending and steering messages
 
 You can send messages to the agent **while it's still streaming a response**. These are called pending messages — the agent receives them mid-turn and can inject them between tool-call steps.

From d271f01c7d1443f27a24dcc10b89d7998125e57d Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 14 Apr 2026 15:31:03 +0100
Subject: [PATCH 22/49] Cover passing a custom message ID generator

---
 docs/ai-chat/backend.mdx   | 45 ++++++++++++++++++++++++++++++++++++++
 docs/ai-chat/reference.mdx | 21 +++++++++---------
 2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index ac04bbc3415..f9e6e5588cd 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -917,6 +917,51 @@ export const myChat = chat.agent({
 });
 ```
 
+##### Custom message IDs
+
+By default, response message IDs are generated using the AI SDK's built-in `generateId`. Pass a custom `generateMessageId` function to use your own ID format (e.g. UUID-v7):
+
+```ts
+import { v7 as uuidv7 } from "uuid";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    generateMessageId: () => uuidv7(),
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+With the `.withUIMessage()` builder, set it under `streamOptions`:
+
+```ts
+import { v7 as uuidv7 } from "uuid";
+
+export const myChat = chat
+  .withUIMessage<MyChatUIMessage>({
+    streamOptions: {
+      generateMessageId: () => uuidv7(),
+      sendReasoning: true,
+    },
+  })
+  .agent({
+    id: "my-chat",
+    run: async ({ messages, signal }) => {
+      return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+    },
+  });
+```
+
+<Info>
+  The generated ID is sent to the frontend in the stream's `start` chunk, so frontend and backend
+  always reference the same ID for each message. This is important for features like tool
+  approvals, where the frontend resends an assistant message and the backend needs to match it
+  by ID in the conversation accumulator.
+</Info>
+
 ##### Per-turn overrides
 
 Override per-turn with `chat.setUIMessageStreamOptions()` — per-turn values merge with the static config (per-turn wins on conflicts). The override is cleared automatically after each turn.
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index 3a66eb9327e..da7a5479e3a 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -433,16 +433,17 @@ Use with `useChat<Msg>({ transport })` when using [`chat.withUIMessage`](/ai-cha
 
 Options for customizing `toUIMessageStream()`. Set as static defaults via `uiMessageStreamOptions` on `chat.agent()`, or override per-turn via `chat.setUIMessageStreamOptions()`. See [Stream options](/ai-chat/backend#stream-options) for usage examples.
 
-Derived from the AI SDK's `UIMessageStreamOptions` with `onFinish`, `originalMessages`, and `generateMessageId` omitted (managed internally).
-
-| Option            | Type                              | Default           | Description                                                                                                                         |
-| ----------------- | --------------------------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
-| `onError`         | `(error: unknown) => string`      | Raw error message | Called on LLM errors and tool execution errors. Return a sanitized string — sent as `{ type: "error", errorText }` to the frontend. |
-| `sendReasoning`   | `boolean`                         | `true`            | Send reasoning parts to the client                                                                                                  |
-| `sendSources`     | `boolean`                         | `false`           | Send source parts to the client                                                                                                     |
-| `sendFinish`      | `boolean`                         | `true`            | Send the finish event. Set to `false` when chaining multiple `streamText` calls.                                                    |
-| `sendStart`       | `boolean`                         | `true`            | Send the message start event. Set to `false` when chaining.                                                                         |
-| `messageMetadata` | `(options: { part }) => metadata` | —                 | Extract message metadata to send to the client. Called on `start` and `finish` events.                                              |
+Derived from the AI SDK's `UIMessageStreamOptions` with `onFinish` and `originalMessages` omitted (managed internally — `onFinish` for response capture, `originalMessages` for cross-turn message ID reuse).
+
+| Option              | Type                              | Default              | Description                                                                                                                         |
+| ------------------- | --------------------------------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| `onError`           | `(error: unknown) => string`      | Raw error message    | Called on LLM errors and tool execution errors. Return a sanitized string — sent as `{ type: "error", errorText }` to the frontend. |
+| `sendReasoning`     | `boolean`                         | `true`               | Send reasoning parts to the client                                                                                                  |
+| `sendSources`       | `boolean`                         | `false`              | Send source parts to the client                                                                                                     |
+| `sendFinish`        | `boolean`                         | `true`               | Send the finish event. Set to `false` when chaining multiple `streamText` calls.                                                    |
+| `sendStart`         | `boolean`                         | `true`               | Send the message start event. Set to `false` when chaining.                                                                         |
+| `messageMetadata`   | `(options: { part }) => metadata` | —                    | Extract message metadata to send to the client. Called on `start` and `finish` events.                                              |
+| `generateMessageId` | `() => string`                    | AI SDK's `generateId` | Custom message ID generator for response messages (e.g. UUID-v7). IDs are shared between frontend and backend via the stream's `start` chunk. |
 
 ## TriggerChatTransport options
 

From 319a9ce52f5b2f65c7c4b37cf2b0f44fa2b0597a Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 14 Apr 2026 19:07:27 +0100
Subject: [PATCH 23/49] docs: add chat.response API, persistent data parts,
 transient flag, tool approvals wire protocol

---
 docs/ai-chat/backend.mdx   |  2 +-
 docs/ai-chat/features.mdx  | 80 +++++++++++++++++++++++++++++---------
 docs/ai-chat/reference.mdx |  3 +-
 3 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index f9e6e5588cd..e75f34a502b 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -116,7 +116,7 @@ export const myChat = chat.agent({
 | `clientData`      | Typed by `clientDataSchema`                   | Custom data from the frontend    |
 | `writer`          | [`ChatWriter`](/ai-chat/reference#chatwriter) | Stream writer for custom chunks  |
 
-Every lifecycle callback receives a `writer` — a lazy stream writer that lets you send custom `UIMessageChunk` parts (like `data-*` parts) to the frontend without the ceremony of `chat.stream.writer()`. See [ChatWriter](/ai-chat/reference#chatwriter).
+Every lifecycle callback receives a `writer` — a lazy stream writer that lets you send custom `UIMessageChunk` parts (like `data-*` parts) to the frontend. Non-transient `data-*` chunks written via the `writer` are automatically added to the response message and available in `onTurnComplete`. Add `transient: true` for ephemeral chunks (progress indicators, etc.) that should not persist. See [Custom data parts](/ai-chat/features#custom-data-parts).
 
 #### onChatStart
 
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index a91b61ff03d..490b0199b86 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -181,42 +181,84 @@ export const myChat = chat.agent({
 
 ---
 
-## Custom streaming with chat.stream
+## Custom data parts
 
-`chat.stream` is a typed stream bound to the chat output. Use it to write custom `UIMessageChunk` data alongside the AI-generated response — for example, status updates or progress indicators.
+You can add custom data parts to the assistant's response message. These appear on the frontend in `message.parts` and are included in `onTurnComplete`'s `responseMessage` and `uiMessages` for persistence.
+
+### Writing persistent data parts
+
+Use `chat.response.write()` or the `writer` in lifecycle hooks. Non-transient `data-*` chunks are automatically added to the response message:
 
 ```ts
 import { chat } from "@trigger.dev/sdk/ai";
 
 export const myChat = chat.agent({
   id: "my-chat",
+  onBeforeTurnComplete: async ({ writer, turn }) => {
+    // This data part will be in responseMessage.parts in onTurnComplete
+    writer.write({
+      type: "data-metadata",
+      data: { turn, model: "gpt-4o", timestamp: Date.now() },
+    });
+  },
+  onTurnComplete: async ({ responseMessage }) => {
+    // responseMessage.parts includes the data-metadata part
+    await db.messages.save(responseMessage);
+  },
   run: async ({ messages, signal }) => {
-    // Write a custom data part to the chat stream.
-    // The AI SDK's data-* chunk protocol adds this to message.parts
-    // on the frontend, where you can render it however you like.
-    const { waitUntilComplete } = chat.stream.writer({
-      execute: ({ write }) => {
-        write({
-          type: "data-status",
-          id: "search-progress",
-          data: { message: "Searching the web...", progress: 0.5 },
-        });
-      },
+    // Also works from run() via chat.response
+    chat.response.write({
+      type: "data-context",
+      data: { searchResults: results },
     });
-    await waitUntilComplete();
 
-    // Then stream the AI response
     return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
   },
 });
 ```
 
-<Tip>
-  Use `data-*` chunk types (e.g. `data-status`, `data-progress`) for custom data. The AI SDK processes these into `DataUIPart` objects in `message.parts` on the frontend. Writing the same `type` + `id` again updates the existing part instead of creating a new one — useful for live progress.
-</Tip>
+### Transient data parts (ephemeral)
+
+Add `transient: true` to data chunks that should stream to the frontend but NOT persist in the response message. Use this for progress indicators, loading states, and other temporary UI:
+
+```ts
+// Transient — frontend sees it, but NOT in onTurnComplete's responseMessage
+writer.write({
+  type: "data-progress",
+  id: "search",
+  data: { percent: 50 },
+  transient: true,
+});
+```
+
+<Info>
+  This matches the AI SDK's semantics: `data-*` chunks persist to `message.parts` by default.
+  Only `transient: true` chunks are ephemeral. Non-data chunks (`text-delta`, `tool-*`, etc.)
+  are handled by `streamText` and captured via `onFinish` — they don't need `chat.response`.
+</Info>
+
+<Note>
+  `chat.response` and the `writer` accumulation behavior work with `chat.agent` and
+  `chat.createSession`. If you're using `chat.customAgent`, manage data part accumulation
+  manually via your own message accumulator.
+</Note>
+
+### Raw streaming with chat.stream
+
+For low-level stream access (piping from subtasks, reading streams by run ID), use `chat.stream`. Chunks written via `chat.stream` go directly to the realtime output — they are NOT accumulated into the response message regardless of the `transient` flag.
+
+```ts
+// Raw stream — always ephemeral, never in responseMessage
+const { waitUntilComplete } = chat.stream.writer({
+  execute: ({ write }) => {
+    write({ type: "data-status", data: { message: "Processing..." } });
+  },
+});
+await waitUntilComplete();
+```
 
 <Tip>
-  Inside lifecycle callbacks (`onPreload`, `onChatStart`, `onTurnStart`, `onBeforeTurnComplete`, `onCompacted`), you can use the `writer` parameter instead of `chat.stream.writer()` — it's simpler and avoids the `execute` + `waitUntilComplete` boilerplate. See [ChatWriter](/ai-chat/reference#chatwriter).
+  Use `data-*` chunk types (e.g. `data-status`, `data-progress`) for custom data. The AI SDK processes these into `DataUIPart` objects in `message.parts` on the frontend. Writing the same `type` + `id` again updates the existing part instead of creating a new one — useful for live progress.
 </Tip>
 
 `chat.stream` exposes the full stream API:
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index da7a5479e3a..e76ca25e3e5 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -369,7 +369,8 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 | `chat.defer(promise)`                       | Run background work in parallel with streaming, awaited before `onTurnComplete`                                              |
 | `chat.isStopped()`                          | Check if the current turn was stopped by the user                                                                            |
 | `chat.cleanupAbortedParts(message)`         | Remove incomplete parts from a stopped response message                                                                      |
-| `chat.stream`                               | Typed chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()`                                                |
+| `chat.response.write(chunk)`                | Write a data part that streams to the frontend AND persists in `onTurnComplete`'s `responseMessage`                          |
+| `chat.stream`                               | Raw chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()`. Chunks are NOT accumulated into the response.   |
 | `chat.MessageAccumulator`                   | Class that accumulates conversation messages across turns                                                                    |
 | `chat.withUIMessage(config?)`               | Returns a [ChatBuilder](/ai-chat/types#chatbuilder) with a fixed `UIMessage` subtype. See [Types](/ai-chat/types)            |
 | `chat.withClientData({ schema })`           | Returns a [ChatBuilder](/ai-chat/types#chatbuilder) with a fixed client data schema. See [Types](/ai-chat/types#typed-client-data-with-chatwithclientdata) |

From 78f0f76abc3e98c23d489cdbc94633ecfd614717 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 14 Apr 2026 19:15:09 +0100
Subject: [PATCH 24/49] add agent prerelease changelog

---
 docs/ai-chat/changelog.mdx | 64 ++++++++++++++++++++++++++++++++++++++
 docs/docs.json             |  1 +
 2 files changed, 65 insertions(+)
 create mode 100644 docs/ai-chat/changelog.mdx

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
new file mode 100644
index 00000000000..21b899dfeb4
--- /dev/null
+++ b/docs/ai-chat/changelog.mdx
@@ -0,0 +1,64 @@
+---
+title: "Changelog"
+sidebarTitle: "Changelog"
+description: "Pre-release updates for AI chat agents."
+---
+
+<Update label="April 14, 2026" description="0.0.0-chat-prerelease-20260414181032" tags={["SDK"]}>
+
+## `chat.response` — persistent data parts
+
+Added `chat.response.write()` for writing data parts that both stream to the frontend AND persist in `onTurnComplete`'s `responseMessage` and `uiMessages`.
+
+```ts
+// Persists to responseMessage.parts — available in onTurnComplete
+chat.response.write({ type: "data-handover", data: { context: summary } });
+
+// Transient — streams to frontend only, not in responseMessage
+writer.write({ type: "data-progress", data: { percent: 50 }, transient: true });
+```
+
+Non-transient `data-*` chunks written via lifecycle hook `writer.write()` now automatically persist to the response message, matching the AI SDK's default semantics. Add `transient: true` for ephemeral chunks (progress indicators, status updates).
+
+See [Custom data parts](/ai-chat/features#custom-data-parts).
+
+## Tool approvals
+
+Added support for AI SDK tool approvals (`needsApproval: true`). When the model calls a tool that needs approval, the turn completes and the frontend shows approve/deny buttons. After approval, the updated assistant message is sent back and matched by ID in the accumulator.
+
+```ts
+const sendEmail = tool({
+  description: "Send an email. Requires human approval.",
+  inputSchema: z.object({ to: z.string(), subject: z.string(), body: z.string() }),
+  needsApproval: true,
+  execute: async ({ to, subject, body }) => { /* ... */ },
+});
+```
+
+Frontend setup requires `sendAutomaticallyWhen` and `addToolApprovalResponse` from `useChat`. See [Tool approvals](/ai-chat/frontend#tool-approvals).
+
+## `transport.stopGeneration(chatId)`
+
+Added `stopGeneration` method to `TriggerChatTransport` for reliable stop after page refresh / stream reconnect. Works regardless of whether the AI SDK passes `abortSignal` through `reconnectToStream`.
+
+```tsx
+const stop = useCallback(() => {
+  transport.stopGeneration(chatId);
+  aiStop(); // also update useChat state
+}, [transport, chatId, aiStop]);
+```
+
+See [Stop generation](/ai-chat/frontend#stop-generation).
+
+## `generateMessageId` support
+
+`generateMessageId` can now be passed via `uiMessageStreamOptions` to control response message ID generation (e.g. UUID-v7). The backend automatically passes `originalMessages` to `toUIMessageStream` so message IDs are consistent between frontend and backend.
+
+## Bug fixes
+
+- **`onTurnComplete` not called**: Fixed `turnCompleteResult?.lastEventId` TypeError that silently skipped `onTurnComplete` when `writeTurnCompleteChunk` returned undefined in dev.
+- **Stop during streaming**: Added 2s timeout on `onFinishPromise` so `onBeforeTurnComplete` and `onTurnComplete` fire even when the AI SDK's `onFinish` doesn't fire after abort.
+- **`toStreamTextOptions` without `chat.prompt.set()`**: `prepareStep` injection (compaction, steering, background context) now works even when the user passes `system` directly to `streamText` instead of using `chat.prompt.set()`.
+- **Background queue vs tool approvals**: Background context injection is now skipped when the last accumulated message is a `tool` message, preventing it from breaking `streamText`'s `collectToolApprovals`.
+
+</Update>
diff --git a/docs/docs.json b/docs/docs.json
index 0a16aaad980..8bcade8be94 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -88,6 +88,7 @@
                 "group": "Agents",
                 "pages": [
                   "ai-chat/overview",
+                  "ai-chat/changelog",
                   "ai-chat/quick-start",
                   "ai-chat/backend",
                   "ai-chat/frontend",

From 94c28cd21b6656c81631233389e8c044ca137ec9 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Wed, 15 Apr 2026 16:24:47 +0100
Subject: [PATCH 25/49] docs: add hydrateMessages, chat.history, and actions
 documentation

---
 docs/ai-chat/backend.mdx                      | 140 ++++++++++++++++++
 docs/ai-chat/changelog.mdx                    |  63 ++++++++
 docs/ai-chat/client-protocol.mdx              |  28 ++++
 docs/ai-chat/frontend.mdx                     |  36 +++++
 .../ai-chat/patterns/database-persistence.mdx |  32 ++++
 docs/ai-chat/reference.mdx                    |  56 +++++++
 6 files changed, 355 insertions(+)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index e75f34a502b..b5c6726dd85 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -181,6 +181,56 @@ export const myChat = chat.agent({
   `onValidateMessages` fires **before** `onTurnStart` and message accumulation. If you need to validate messages loaded from a database, do the loading in `onChatStart` or `onPreload` and let `onValidateMessages` validate the full incoming set each turn.
 </Note>
 
+#### hydrateMessages
+
+Load the full message history from your backend on every turn, replacing the built-in linear accumulator. When set, the hook's return value becomes the accumulated state — the normal accumulation logic (append for submit, replace for regenerate) is skipped entirely.
+
+Use this when the backend should be the source of truth for message history — abuse prevention, branching conversations (DAGs), or rollback/undo support.
+
+| Field              | Type                                                  | Description                                               |
+| ------------------ | ----------------------------------------------------- | --------------------------------------------------------- |
+| `chatId`           | `string`                                              | Chat session ID                                           |
+| `turn`             | `number`                                              | Turn number (0-indexed)                                   |
+| `trigger`          | `"submit-message" \| "regenerate-message" \| "action"` | The trigger type for this turn                           |
+| `incomingMessages` | `UIMessage[]`                                         | Validated wire messages from the frontend (empty for actions) |
+| `previousMessages` | `UIMessage[]`                                         | Accumulated UI messages before this turn (`[]` on turn 0) |
+| `clientData`       | Typed by `clientDataSchema`                           | Custom data from the frontend                             |
+| `continuation`     | `boolean`                                             | Whether this run is continuing an existing chat           |
+| `previousRunId`    | `string \| undefined`                                 | The previous run ID (if continuation)                     |
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    const record = await db.chat.findUnique({ where: { id: chatId } });
+    const stored = record?.messages ?? [];
+
+    // Append the new user message and persist
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      const newMsg = incomingMessages[incomingMessages.length - 1]!;
+      stored.push(newMsg);
+      await db.chat.update({
+        where: { id: chatId },
+        data: { messages: stored },
+      });
+    }
+
+    return stored;
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+**Lifecycle position:** `onValidateMessages` → **`hydrateMessages`** → `onChatStart` (turn 0) → `onTurnStart` → `run()`
+
+After the hook returns, any incoming wire message whose ID matches a hydrated message is auto-merged — this makes [tool approvals](/ai-chat/frontend#tool-approvals) work transparently with hydration.
+
+<Note>
+  `hydrateMessages` also fires for [action](#actions) turns (`trigger: "action"`) with empty `incomingMessages`. This lets the action handler work with the latest DB state.
+</Note>
+
 #### onTurnStart
 
 Fires at the start of every turn, after message accumulation and `onChatStart` (turn 0), but **before** `run()` executes. Use it to persist messages before streaming begins — so a mid-stream page refresh still shows the user's message.
@@ -785,6 +835,96 @@ export const myChat = chat.agent({
   example, and how it differs from pending messages.
 </Tip>
 
+### Actions
+
+Custom actions let the frontend send structured commands (undo, rollback, edit) that modify the conversation state before the LLM responds. Actions use the same input stream as messages, so they wake the agent from suspension and trigger a full turn.
+
+Define an `actionSchema` for validation and an `onAction` handler that uses `chat.history` to modify state:
+
+```ts
+import { z } from "zod";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  actionSchema: z.discriminatedUnion("type", [
+    z.object({ type: z.literal("undo") }),
+    z.object({ type: z.literal("rollback"), targetMessageId: z.string() }),
+    z.object({ type: z.literal("edit"), messageId: z.string(), text: z.string() }),
+  ]),
+
+  onAction: async ({ action }) => {
+    switch (action.type) {
+      case "undo":
+        chat.history.slice(0, -2); // Remove last user + assistant exchange
+        break;
+      case "rollback":
+        chat.history.rollbackTo(action.targetMessageId);
+        break;
+      case "edit":
+        chat.history.replace(action.messageId, {
+          id: action.messageId,
+          role: "user",
+          parts: [{ type: "text", text: action.text }],
+        });
+        break;
+    }
+  },
+
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+**Lifecycle flow:** Wake → parse action against `actionSchema` → `hydrateMessages` (if set) → **`onAction`** → apply `chat.history` mutations → `onTurnStart` → `run()` → `onTurnComplete`
+
+On the frontend, send actions via the transport:
+
+```ts
+// Browser — TriggerChatTransport
+const stream = await transport.sendAction(chatId, { type: "undo" });
+
+// Server — AgentChat
+const stream = await agentChat.sendAction({ type: "rollback", targetMessageId: "msg-3" });
+```
+
+The action payload is validated against `actionSchema` on the backend — invalid actions throw and abort the turn. The `action` parameter in `onAction` is fully typed from the schema.
+
+<Note>
+  Actions always trigger `run()` — the LLM responds to the modified state. For silent state changes that don't need a response (e.g. injecting background context), use [`chat.inject()`](/ai-chat/background-injection) instead.
+</Note>
+
+### chat.history {#chat-history}
+
+Imperative API for modifying the accumulated message history. Works from any hook (`onAction`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`) or from `run()` and AI SDK tools.
+
+| Method | Description |
+|--------|-------------|
+| `chat.history.all()` | Read the current accumulated UI messages (returns a copy) |
+| `chat.history.set(messages)` | Replace all messages (same as `chat.setMessages()`) |
+| `chat.history.remove(messageId)` | Remove a specific message by ID |
+| `chat.history.rollbackTo(messageId)` | Keep messages up to and including the given ID (undo) |
+| `chat.history.replace(messageId, message)` | Replace a specific message by ID (edit) |
+| `chat.history.slice(start, end?)` | Keep only messages in the given range |
+
+```ts
+// Undo the last exchange in onAction
+onAction: async ({ action }) => {
+  if (action.type === "undo") {
+    chat.history.slice(0, -2);
+  }
+},
+
+// Trim history in onTurnComplete
+onTurnComplete: async ({ uiMessages }) => {
+  if (uiMessages.length > 50) {
+    chat.history.slice(-20);
+  }
+},
+```
+
+Mutations use the same deferred mechanism as `chat.setMessages()` — they are applied at lifecycle checkpoints (after hooks return). Multiple mutations in the same hook compose correctly.
+
 ### prepareMessages
 
 Transform model messages before they're used anywhere — in `run()`, in compaction rebuilds, and in compaction results. Define once, applied everywhere.
diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index 21b899dfeb4..668f19ca173 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,6 +4,69 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
+<Update label="April 15, 2026" description="0.0.0-chat-prerelease-20260415152704" tags={["SDK"]}>
+
+## `hydrateMessages` — backend-controlled message history
+
+Load message history from your database on every turn instead of trusting the frontend accumulator. The hook replaces the built-in linear accumulation entirely — the backend is the source of truth.
+
+```ts
+chat.agent({
+  id: "my-chat",
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    const stored = await db.getMessages(chatId);
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      stored.push(incomingMessages[incomingMessages.length - 1]!);
+      await db.persistMessages(chatId, stored);
+    }
+    return stored;
+  },
+});
+```
+
+Tool approval updates are auto-merged after hydration — no extra handling needed.
+
+See [hydrateMessages](/ai-chat/backend#hydratemessages).
+
+## `chat.history` — imperative message mutations
+
+Modify the accumulated message history from any hook or `run()`:
+
+```ts
+chat.history.rollbackTo(messageId);  // Undo — keep up to this message
+chat.history.remove(messageId);      // Remove one message
+chat.history.replace(id, newMsg);    // Edit a message
+chat.history.slice(0, -2);           // Remove last 2 messages
+chat.history.all();                  // Read current state
+```
+
+See [chat.history](/ai-chat/backend#chat-history).
+
+## Custom actions — `actionSchema` + `onAction`
+
+Send typed actions (undo, rollback, edit) from the frontend via `transport.sendAction()`. Actions wake the agent, fire `onAction`, then trigger a normal `run()` turn.
+
+```ts
+chat.agent({
+  id: "my-chat",
+  actionSchema: z.discriminatedUnion("type", [
+    z.object({ type: z.literal("undo") }),
+    z.object({ type: z.literal("rollback"), targetMessageId: z.string() }),
+  ]),
+  onAction: async ({ action }) => {
+    if (action.type === "undo") chat.history.slice(0, -2);
+    if (action.type === "rollback") chat.history.rollbackTo(action.targetMessageId);
+  },
+});
+```
+
+Frontend: `transport.sendAction(chatId, { type: "undo" })`
+Server: `agentChat.sendAction({ type: "undo" })`
+
+See [Actions](/ai-chat/backend#actions) and [Sending actions](/ai-chat/frontend#sending-actions).
+
+</Update>
+
 <Update label="April 14, 2026" description="0.0.0-chat-prerelease-20260414181032" tags={["SDK"]}>
 
 ## `chat.response` — persistent data parts
diff --git a/docs/ai-chat/client-protocol.mdx b/docs/ai-chat/client-protocol.mdx
index 7ac159f6d93..d20cd79ad76 100644
--- a/docs/ai-chat/client-protocol.mdx
+++ b/docs/ai-chat/client-protocol.mdx
@@ -290,6 +290,34 @@ The agent matches the incoming message by its `id` against the accumulated conve
   The message `id` must match the one the agent assigned during streaming. If you're using `TriggerChatTransport`, IDs are kept in sync automatically. Custom transports should use the `messageId` from the stream's `start` chunk.
 </Note>
 
+## Custom actions
+
+Send a custom action (undo, rollback, edit) to the agent using the same `chat-messages` input stream. Actions use `trigger: "action"` and carry a custom payload in the `action` field:
+
+```bash
+POST /realtime/v1/streams/{runId}/input/chat-messages
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+
+{
+  "data": {
+    "messages": [],
+    "chatId": "conversation-123",
+    "trigger": "action",
+    "action": { "type": "undo" },
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+Actions wake the agent from suspension (same as messages), fire the `onAction` hook, then trigger a normal `run()` turn. The `action` payload is validated against the agent's `actionSchema`.
+
+After sending, subscribe to the output stream to receive the agent's response — the same flow as [Step 2](#step-2-subscribe-to-the-output-stream).
+
+<Note>
+  `messages` is empty for actions. The agent's `onAction` handler modifies the conversation state via `chat.history.*`, and the LLM responds to the updated state. See [Actions](/ai-chat/backend#actions) for backend setup.
+</Note>
+
 ## Pending and steering messages
 
 You can send messages to the agent **while it's still streaming a response**. These are called pending messages — the agent receives them mid-turn and can inject them between tool-call steps.
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 874778eb129..515effdda8e 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -370,6 +370,42 @@ function Chat({ chatId, transport }) {
   for tool approval updates.
 </Info>
 
+## Sending actions
+
+Send custom actions (undo, rollback, edit) to the agent via `transport.sendAction()`. Actions wake the agent, fire the `onAction` hook, and trigger a normal response — the LLM responds to the modified state.
+
+```tsx
+function ChatControls({ chatId }: { chatId: string }) {
+  const transport = useTriggerChatTransport({ task: "my-chat", accessToken });
+
+  return (
+    <div>
+      <button onClick={() => transport.sendAction(chatId, { type: "undo" })}>
+        Undo last exchange
+      </button>
+      <button onClick={() => transport.sendAction(chatId, { type: "rollback", targetMessageId: "msg-5" })}>
+        Rollback to message
+      </button>
+    </div>
+  );
+}
+```
+
+The action payload is validated against the agent's `actionSchema` on the backend — invalid actions are rejected. See [Actions](/ai-chat/backend#actions) for the backend setup.
+
+<Note>
+  `sendAction` returns a `ReadableStream<UIMessageChunk>` — the agent's response to the modified state. If you're using `useChat`, the response is handled automatically through the transport.
+</Note>
+
+For server-to-server usage, `AgentChat` has the same method:
+
+```ts
+const stream = await agentChat.sendAction({ type: "undo" });
+for await (const chunk of stream) {
+  if (chunk.type === "text-delta") process.stdout.write(chunk.delta);
+}
+```
+
 ## Self-hosting
 
 If you're self-hosting Trigger.dev, pass the `baseURL` option:
diff --git a/docs/ai-chat/patterns/database-persistence.mdx b/docs/ai-chat/patterns/database-persistence.mdx
index e2732aea7be..f7b9e22ac94 100644
--- a/docs/ai-chat/patterns/database-persistence.mdx
+++ b/docs/ai-chat/patterns/database-persistence.mdx
@@ -113,6 +113,38 @@ chat.agent({
 });
 ```
 
+## Alternative: `hydrateMessages`
+
+For apps that need the backend to be the single source of truth for message history — abuse prevention, branching conversations, or rollback support — use [`hydrateMessages`](/ai-chat/backend#hydratemessages) instead of relying on the frontend's accumulated state.
+
+With hydration, the hook loads messages from your database on every turn. The frontend's messages are ignored (except for the new user message, which arrives in `incomingMessages`):
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    const record = await db.chat.findUnique({ where: { id: chatId } });
+    const stored = record?.messages ?? [];
+
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      stored.push(incomingMessages[incomingMessages.length - 1]!);
+      await db.chat.update({ where: { id: chatId }, data: { messages: stored } });
+    }
+
+    return stored;
+  },
+  onTurnComplete: async ({ chatId, uiMessages }) => {
+    // Persist the response
+    await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+This replaces the `onTurnStart` persistence pattern — the hook handles both loading and persisting the new message in one place.
+
 ## Design notes
 
 - **`chatId`** is stable for the life of a thread; **`runId`** changes when the user starts a **new** run (timeout, cancel, explicit new chat). Session rows must always reflect the **current** run.
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index e76ca25e3e5..b4ebe291605 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -16,6 +16,9 @@ Options for `chat.agent()`.
 | `onPreload`                   | `(event: PreloadEvent) => Promise<void> \| void`            | —                              | Fires on preloaded runs before the first message                                                    |
 | `onChatStart`                 | `(event: ChatStartEvent) => Promise<void> \| void`          | —                              | Fires on turn 0 before `run()`                                                                      |
 | `onValidateMessages`          | `(event: ValidateMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>` | —                | Validate/transform UIMessages before model conversion. See [onValidateMessages](/ai-chat/backend#onvalidatemessages) |
+| `hydrateMessages`             | `(event: HydrateMessagesEvent) => UIMessage[] \| Promise<UIMessage[]>` | —                 | Load message history from backend, replacing the linear accumulator. See [hydrateMessages](/ai-chat/backend#hydratemessages) |
+| `actionSchema`                | `TaskSchema`                                                | —                              | Schema for validating custom actions sent via `transport.sendAction()`. See [Actions](/ai-chat/backend#actions) |
+| `onAction`                    | `(event: ActionEvent) => Promise<void> \| void`             | —                              | Handle custom actions. Fires after hydration, before `onTurnStart`. See [Actions](/ai-chat/backend#actions) |
 | `onTurnStart`                 | `(event: TurnStartEvent) => Promise<void> \| void`          | —                              | Fires every turn before `run()`                                                                     |
 | `onBeforeTurnComplete`        | `(event: BeforeTurnCompleteEvent) => Promise<void> \| void` | —                              | Fires after response but before stream closes. Includes `writer`.                                   |
 | `onTurnComplete`              | `(event: TurnCompleteEvent) => Promise<void> \| void`       | —                              | Fires after each turn completes (stream closed)                                                     |
@@ -116,6 +119,34 @@ Passed to the `onValidateMessages` callback.
 | `turn`    | `number`                                                        | Turn number (0-indexed)                  |
 | `trigger` | `"submit-message" \| "regenerate-message" \| "preload" \| "close"` | The trigger type for this turn        |
 
+## HydrateMessagesEvent
+
+Passed to the `hydrateMessages` callback. See [hydrateMessages](/ai-chat/backend#hydratemessages).
+
+| Field              | Type                                                  | Description                                               |
+| ------------------ | ----------------------------------------------------- | --------------------------------------------------------- |
+| `chatId`           | `string`                                              | Chat session ID                                           |
+| `turn`             | `number`                                              | Turn number (0-indexed)                                   |
+| `trigger`          | `"submit-message" \| "regenerate-message" \| "action"` | The trigger type for this turn                           |
+| `incomingMessages` | `UIMessage[]`                                         | Validated wire messages from the frontend (empty for actions) |
+| `previousMessages` | `UIMessage[]`                                         | Accumulated UI messages before this turn (`[]` on turn 0) |
+| `clientData`       | Typed by `clientDataSchema`                           | Custom data from the frontend                             |
+| `continuation`     | `boolean`                                             | Whether this run is continuing an existing chat           |
+| `previousRunId`    | `string \| undefined`                                 | Previous run ID (only when `continuation` is true)        |
+
+## ActionEvent
+
+Passed to the `onAction` callback. See [Actions](/ai-chat/backend#actions).
+
+| Field        | Type                        | Description                                              |
+| ------------ | --------------------------- | -------------------------------------------------------- |
+| `action`     | Typed by `actionSchema`     | The parsed and validated action payload                  |
+| `chatId`     | `string`                    | Chat session ID                                          |
+| `turn`       | `number`                    | Turn number (0-indexed)                                  |
+| `clientData` | Typed by `clientDataSchema` | Custom data from the frontend                            |
+| `uiMessages` | `UIMessage[]`               | Accumulated UI messages (after hydration, if set)        |
+| `messages`   | `ModelMessage[]`            | Accumulated model messages (after hydration, if set)     |
+
 ## TurnStartEvent
 
 Passed to the `onTurnStart` callback.
@@ -371,6 +402,12 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 | `chat.cleanupAbortedParts(message)`         | Remove incomplete parts from a stopped response message                                                                      |
 | `chat.response.write(chunk)`                | Write a data part that streams to the frontend AND persists in `onTurnComplete`'s `responseMessage`                          |
 | `chat.stream`                               | Raw chat output stream — use `.writer()`, `.pipe()`, `.append()`, `.read()`. Chunks are NOT accumulated into the response.   |
+| `chat.history.all()`                        | Read the current accumulated UI messages (returns a copy). See [chat.history](/ai-chat/backend#chat-history)                  |
+| `chat.history.set(messages)`                | Replace all accumulated messages (same as `chat.setMessages()`)                                                              |
+| `chat.history.remove(messageId)`            | Remove a specific message by ID                                                                                              |
+| `chat.history.rollbackTo(messageId)`        | Keep messages up to and including the given ID (undo/rollback)                                                               |
+| `chat.history.replace(messageId, message)`  | Replace a specific message by ID (edit)                                                                                      |
+| `chat.history.slice(start, end?)`           | Keep only messages in the given range                                                                                        |
 | `chat.MessageAccumulator`                   | Class that accumulates conversation messages across turns                                                                    |
 | `chat.withUIMessage(config?)`               | Returns a [ChatBuilder](/ai-chat/types#chatbuilder) with a fixed `UIMessage` subtype. See [Types](/ai-chat/types)            |
 | `chat.withClientData({ schema })`           | Returns a [ChatBuilder](/ai-chat/types#chatbuilder) with a fixed client data schema. See [Types](/ai-chat/types#typed-client-data-with-chatwithclientdata) |
@@ -545,6 +582,25 @@ const stop = useCallback(() => {
 
 See [Stop generation](/ai-chat/frontend#stop-generation) for full details.
 
+### transport.sendAction()
+
+Send a custom action to the agent. Actions wake the agent from suspension, fire `onAction`, then trigger a normal `run()` turn.
+
+```ts
+transport.sendAction(chatId: string, action: unknown): Promise<ReadableStream<UIMessageChunk>>
+```
+
+The action payload is validated against the agent's `actionSchema` on the backend.
+
+```tsx
+// Undo button
+<button onClick={() => transport.sendAction(chatId, { type: "undo" })}>
+  Undo
+</button>
+```
+
+See [Actions](/ai-chat/backend#actions) for backend setup and [Sending actions](/ai-chat/frontend#sending-actions) for frontend usage.
+
 ### transport.preload()
 
 Eagerly trigger a run before the first message.

From e4dfabc84da6395a668f48294dd46f6fe5867a83 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Wed, 15 Apr 2026 17:02:05 +0100
Subject: [PATCH 26/49] Add branching pattern

---
 .../patterns/branching-conversations.mdx      | 279 ++++++++++++++++++
 docs/docs.json                                |   1 +
 2 files changed, 280 insertions(+)
 create mode 100644 docs/ai-chat/patterns/branching-conversations.mdx

diff --git a/docs/ai-chat/patterns/branching-conversations.mdx b/docs/ai-chat/patterns/branching-conversations.mdx
new file mode 100644
index 00000000000..b4a793ba763
--- /dev/null
+++ b/docs/ai-chat/patterns/branching-conversations.mdx
@@ -0,0 +1,279 @@
+---
+title: "Branching conversations"
+sidebarTitle: "Branching conversations"
+description: "Build ChatGPT-style conversation trees with edit, regenerate, undo, and branch switching using hydrateMessages, chat.history, and actions."
+---
+
+Most chat UIs treat conversations as linear sequences. But real conversations branch — users edit previous messages, regenerate responses, undo exchanges, and explore alternative paths. This pattern shows how to build a branching conversation system using `hydrateMessages`, `chat.history`, and custom actions.
+
+## Data model
+
+The standard approach (used by ChatGPT, Open WebUI, LibreChat, and others) stores messages as a tree with parent pointers:
+
+```ts
+// Each message is a node in the tree
+type ChatNode = {
+  id: string;
+  chatId: string;
+  parentId: string | null; // null for root
+  role: "user" | "assistant";
+  message: UIMessage; // the full AI SDK message
+  createdAt: Date;
+};
+```
+
+A conversation is a tree of nodes. The **active branch** is resolved by walking from a leaf node up through `parentId` pointers to the root, then reversing:
+
+```
+root
+├── user: "Hello"
+│   └── assistant: "Hi there!"
+│       ├── user: "What's the weather?" ← branch A
+│       │   └── assistant: "It's sunny!"
+│       └── user: "Tell me a joke" ← branch B (active)
+│           └── assistant: "Why did the..."
+```
+
+Switching branches means changing which leaf is "active" — the same tree, different path.
+
+## Backend setup
+
+### Store: tree operations
+
+Define helpers that read and write the node tree. Adapt to your database:
+
+```ts
+// Resolve the active path: walk from leaf to root, reverse
+async function getActiveBranch(chatId: string): Promise<UIMessage[]> {
+  const nodes = await db.chatNode.findMany({ where: { chatId } });
+  const byId = new Map(nodes.map((n) => [n.id, n]));
+
+  // Find active leaf (most recently created leaf node)
+  const childIds = new Set(nodes.map((n) => n.parentId).filter(Boolean));
+  const leaves = nodes.filter((n) => !childIds.has(n.id));
+  const activeLeaf = leaves.sort((a, b) => b.createdAt - a.createdAt)[0];
+  if (!activeLeaf) return [];
+
+  // Walk to root
+  const path: UIMessage[] = [];
+  let current: ChatNode | undefined = activeLeaf;
+  while (current) {
+    path.unshift(current.message);
+    current = current.parentId ? byId.get(current.parentId) : undefined;
+  }
+  return path;
+}
+
+// Append a message as a child of the current leaf
+async function appendMessage(chatId: string, message: UIMessage): Promise<void> {
+  const branch = await getActiveBranch(chatId);
+  const parentId = branch.length > 0 ? branch[branch.length - 1]!.id : null;
+
+  await db.chatNode.create({
+    data: { id: message.id, chatId, parentId, role: message.role, message, createdAt: new Date() },
+  });
+}
+```
+
+### Agent: hydration + actions
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+export const myChat = chat.agent({
+  id: "branching-chat",
+
+  // Load the active branch from the DB on every turn.
+  // The frontend's message array is ignored — the tree is the source of truth.
+  hydrateMessages: async ({ chatId, trigger, incomingMessages }) => {
+    if (trigger === "submit-message" && incomingMessages.length > 0) {
+      await appendMessage(chatId, incomingMessages[incomingMessages.length - 1]!);
+    }
+    return getActiveBranch(chatId);
+  },
+
+  actionSchema: z.discriminatedUnion("type", [
+    // Edit a previous user message — creates a sibling node in the tree
+    z.object({ type: z.literal("edit"), messageId: z.string(), text: z.string() }),
+    // Switch to a different branch by selecting a leaf node
+    z.object({ type: z.literal("switch-branch"), leafId: z.string() }),
+    // Undo the last user + assistant exchange
+    z.object({ type: z.literal("undo") }),
+  ]),
+
+  onAction: async ({ action, chatId }) => {
+    switch (action.type) {
+      case "edit": {
+        // Find the original message's parent, create a sibling with new content
+        const original = await db.chatNode.findUnique({ where: { id: action.messageId } });
+        if (!original) break;
+
+        const newId = generateId();
+        await db.chatNode.create({
+          data: {
+            id: newId,
+            chatId,
+            parentId: original.parentId, // same parent = sibling
+            role: "user",
+            message: { id: newId, role: "user", parts: [{ type: "text", text: action.text }] },
+            createdAt: new Date(),
+          },
+        });
+        // Active branch now resolves through the new sibling (most recent leaf)
+        break;
+      }
+
+      case "switch-branch": {
+        // Mark this leaf as the most recently accessed so getActiveBranch picks it
+        await db.chatNode.update({
+          where: { id: action.leafId },
+          data: { createdAt: new Date() },
+        });
+        break;
+      }
+
+      case "undo": {
+        // Remove the last two nodes (user + assistant) from the active branch
+        const branch = await getActiveBranch(chatId);
+        if (branch.length >= 2) {
+          const lastTwo = branch.slice(-2);
+          await db.chatNode.deleteMany({
+            where: { id: { in: lastTwo.map((m) => m.id) } },
+          });
+        }
+        break;
+      }
+    }
+
+    // Reload the (now modified) active branch into the accumulator
+    const updated = await getActiveBranch(chatId);
+    chat.history.set(updated);
+  },
+
+  onTurnComplete: async ({ chatId, responseMessage }) => {
+    // Persist the assistant's response as a new node
+    if (responseMessage) {
+      await appendMessage(chatId, responseMessage);
+    }
+  },
+
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+## Frontend
+
+### Sending actions
+
+Wire up edit, undo, and branch switching to the transport:
+
+```tsx
+function MessageActions({ message, chatId }: { message: UIMessage; chatId: string }) {
+  const transport = useTransport();
+  const [editing, setEditing] = useState(false);
+  const [editText, setEditText] = useState("");
+
+  if (message.role !== "user") return null;
+
+  return (
+    <div>
+      {editing ? (
+        <form onSubmit={() => {
+          transport.sendAction(chatId, { type: "edit", messageId: message.id, text: editText });
+          setEditing(false);
+        }}>
+          <input value={editText} onChange={(e) => setEditText(e.target.value)} />
+          <button type="submit">Save</button>
+        </form>
+      ) : (
+        <button onClick={() => { setEditText(getMessageText(message)); setEditing(true); }}>
+          Edit
+        </button>
+      )}
+    </div>
+  );
+}
+```
+
+### Branch navigation
+
+To show the `< 2/3 >` sibling switcher, query the tree for siblings at each fork point. This is a frontend concern — the backend exposes the data, the UI navigates it.
+
+```tsx
+function BranchSwitcher({ message, chatId, siblings }: {
+  message: UIMessage;
+  chatId: string;
+  siblings: { id: string; createdAt: string }[];
+}) {
+  const transport = useTransport();
+  if (siblings.length <= 1) return null;
+
+  const currentIndex = siblings.findIndex((s) => s.id === message.id);
+
+  return (
+    <div>
+      <button
+        disabled={currentIndex === 0}
+        onClick={() => {
+          // Find the leaf of the previous sibling's subtree
+          transport.sendAction(chatId, {
+            type: "switch-branch",
+            leafId: siblings[currentIndex - 1]!.id,
+          });
+        }}
+      >
+        &lt;
+      </button>
+      <span>{currentIndex + 1}/{siblings.length}</span>
+      <button
+        disabled={currentIndex === siblings.length - 1}
+        onClick={() => {
+          transport.sendAction(chatId, {
+            type: "switch-branch",
+            leafId: siblings[currentIndex + 1]!.id,
+          });
+        }}
+      >
+        &gt;
+      </button>
+    </div>
+  );
+}
+```
+
+<Note>
+  The sibling data (which messages share the same parent) needs to come from your database — query it when loading the chat or include it as client data. The agent only returns the active branch via `hydrateMessages`.
+</Note>
+
+## How it works
+
+| Operation | What happens |
+|-----------|-------------|
+| **Send message** | `hydrateMessages` appends the new message as a child of the current leaf, returns the active path |
+| **Edit message** | `onAction` creates a sibling node with the same parent. The new node becomes the latest leaf, so `hydrateMessages` resolves through it. LLM responds to the edited history |
+| **Regenerate** | Same as edit — create a new assistant sibling. The AI SDK's `regenerate()` handles this via `trigger: "regenerate-message"` |
+| **Undo** | `onAction` removes the last two nodes. `chat.history.set()` updates the accumulator. LLM responds to the earlier state |
+| **Switch branch** | `onAction` updates which leaf is "active". `hydrateMessages` loads the new path. LLM responds to the switched context |
+
+## Design notes
+
+- **Messages are immutable** — edits create siblings, not mutations. This preserves full history for analytics and auditing.
+- **The tree lives in your database** — the agent loads a linear path from it via `hydrateMessages`. The agent itself doesn't know about the tree structure.
+- **`hydrateMessages` + `onAction` + `chat.history`** are the three primitives. Hydration loads the active path, actions modify the tree, and `chat.history.set()` syncs the accumulator after tree modifications.
+- **Frontend owns navigation** — the `< 2/3 >` UI, sibling queries, and branch switching triggers are client-side concerns. The backend just processes actions and returns responses.
+
+## See also
+
+- [`hydrateMessages`](/ai-chat/backend#hydratemessages) — backend-controlled message history
+- [Actions](/ai-chat/backend#actions) — custom actions with `actionSchema` and `onAction`
+- [`chat.history`](/ai-chat/backend#chat-history) — imperative history mutations
+- [Database persistence](/ai-chat/patterns/database-persistence) — basic persistence pattern (linear)
diff --git a/docs/docs.json b/docs/docs.json
index 8bcade8be94..b5fcf857d54 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -105,6 +105,7 @@
                       "ai-chat/patterns/sub-agents",
                       "ai-chat/patterns/version-upgrades",
                       "ai-chat/patterns/database-persistence",
+                      "ai-chat/patterns/branching-conversations",
                       "ai-chat/patterns/code-sandbox"
                     ]
                   },

From e3806255b685484340b2eaf71e675494dc5fe716 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Wed, 15 Apr 2026 17:46:23 +0100
Subject: [PATCH 27/49] new changelog for 0.0.0-chat-prerelease-20260415164455

---
 docs/ai-chat/changelog.mdx | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index 668f19ca173..463531f4344 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,6 +4,18 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
+<Update label="April 15, 2026" description="0.0.0-chat-prerelease-20260415164455" tags={["SDK"]}>
+
+## Fix: `resume: true` hangs on completed turns
+
+When refreshing a page after a turn completed, `useChat` with `resume: true` would hang indefinitely — `reconnectToStream` opened an SSE connection that never received data.
+
+Added `isStreaming` to session state. The transport sets it to `true` when streaming starts and `false` on `trigger:turn-complete`. `reconnectToStream` returns `null` immediately when `isStreaming` is false, so `resume: initialMessages.length > 0` is now safe to pass unconditionally.
+
+The flag flows through `onSessionChange` and is restored from `sessions` — no extra persistence code needed.
+
+</Update>
+
 <Update label="April 15, 2026" description="0.0.0-chat-prerelease-20260415152704" tags={["SDK"]}>
 
 ## `hydrateMessages` — backend-controlled message history

From 76ec213fda939e6b1d77235ccb729c8ff5ff029e Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 17 Apr 2026 16:18:57 +0100
Subject: [PATCH 28/49] docs: multi-tab coordination, error stack truncation
 changelog

---
 docs/ai-chat/changelog.mdx | 22 ++++++++++++++
 docs/ai-chat/frontend.mdx  | 59 ++++++++++++++++++++++++++++++++++++++
 docs/ai-chat/reference.mdx | 40 ++++++++++++++++++++++++++
 3 files changed, 121 insertions(+)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index 463531f4344..a3e0d76c79e 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,6 +4,28 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
+<Update label="April 17, 2026" description="0.0.0-chat-prerelease-20260417152143" tags={["SDK"]}>
+
+## Multi-tab coordination
+
+Prevent duplicate messages when the same chat is open in multiple browser tabs. Enable with `multiTab: true` on the transport.
+
+```tsx
+const transport = useTriggerChatTransport({ task: "my-chat", multiTab: true, accessToken });
+const { messages, setMessages } = useChat({ id: chatId, transport });
+const { isReadOnly } = useMultiTabChat(transport, chatId, messages, setMessages);
+```
+
+Only one tab can send at a time. Other tabs enter read-only mode with real-time message updates via `BroadcastChannel`. When the active tab's turn completes, any tab can send next. Crashed tabs are detected via heartbeat timeout (10s).
+
+See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination) and [`useMultiTabChat`](/ai-chat/reference#usemultitabchat).
+
+## Error stack truncation
+
+Large error stacks no longer OOM the worker process. Stacks are capped at 50 frames (top 5 + bottom 45), individual lines at 1024 chars, messages at 1000 chars. Applied in `parseError`, `sanitizeError`, and OTel span recording.
+
+</Update>
+
 <Update label="April 15, 2026" description="0.0.0-chat-prerelease-20260415164455" tags={["SDK"]}>
 
 ## Fix: `resume: true` hangs on completed turns
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 515effdda8e..32a24705af2 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -406,6 +406,65 @@ for await (const chunk of stream) {
 }
 ```
 
+## Multi-tab coordination
+
+When the same chat is open in multiple browser tabs, `multiTab: true` prevents duplicate messages and syncs conversation state across tabs. Only one tab can send at a time. Other tabs enter read-only mode with real-time message updates.
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import { useMultiTabChat } from "@trigger.dev/sdk/chat/react";
+import { useChat } from "@ai-sdk/react";
+
+function Chat({ chatId }: { chatId: string }) {
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken,
+    multiTab: true,
+  });
+
+  const { messages, setMessages, sendMessage } = useChat({
+    id: chatId,
+    transport,
+  });
+
+  const { isReadOnly } = useMultiTabChat(transport, chatId, messages, setMessages);
+
+  return (
+    <div>
+      {isReadOnly && (
+        <div className="bg-amber-50 text-amber-700 p-2 text-sm">
+          This chat is active in another tab. Messages are read-only.
+        </div>
+      )}
+      {/* message list */}
+      <input
+        disabled={isReadOnly}
+        placeholder={isReadOnly ? "Active in another tab" : "Type a message..."}
+      />
+    </div>
+  );
+}
+```
+
+### How it works
+
+1. When a tab sends a message, the transport "claims" the chatId via `BroadcastChannel`
+2. Other tabs detect the claim and enter read-only mode (`isReadOnly: true`)
+3. The active tab broadcasts its messages so read-only tabs see updates in real-time
+4. When the turn completes, the claim is released. Any tab can send next.
+5. Heartbeats detect crashed tabs (10s timeout clears stale claims)
+
+### What `useMultiTabChat` does
+
+- Returns `{ isReadOnly }` for disabling the input UI
+- Broadcasts `messages` from the active tab to other tabs
+- Calls `setMessages` on read-only tabs when messages arrive from the active tab
+- Tracks read-only state via the transport's `BroadcastChannel` coordinator
+
+<Note>
+  Multi-tab coordination is same-browser only (`BroadcastChannel` is a browser API). It gracefully degrades to a no-op in Node.js, SSR, or browsers without `BroadcastChannel` support. Cross-device coordination requires server-side involvement.
+</Note>
+
 ## Self-hosting
 
 If you're self-hosting Trigger.dev, pass the `baseURL` option:
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index b4ebe291605..db16638b5ec 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -534,6 +534,20 @@ const transport = useTriggerChatTransport({
 });
 ```
 
+### multiTab
+
+Enable multi-tab coordination. When `true`, only one browser tab can send messages to a given chatId at a time. Other tabs enter read-only mode with real-time message updates via `BroadcastChannel`.
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken,
+  multiTab: true,
+});
+```
+
+No-op when `BroadcastChannel` is unavailable (SSR, Node.js). See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination).
+
 ### triggerOptions
 
 Options forwarded to the Trigger.dev API when starting a new run. Only applies to the first message — subsequent messages reuse the same run.
@@ -629,6 +643,32 @@ const transport = useTriggerChatTransport<typeof myChat>({
 
 The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
 
+## useMultiTabChat
+
+React hook for multi-tab message coordination. Import from `@trigger.dev/sdk/chat/react`.
+
+```tsx
+import { useMultiTabChat } from "@trigger.dev/sdk/chat/react";
+
+const { isReadOnly } = useMultiTabChat(transport, chatId, messages, setMessages);
+```
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `transport` | `TriggerChatTransport` | Transport instance with `multiTab: true` |
+| `chatId` | `string` | The chat session ID |
+| `messages` | `UIMessage[]` | Current messages from `useChat` |
+| `setMessages` | `(messages) => void` | Message setter from `useChat` |
+
+**Returns:** `{ isReadOnly: boolean }` — `true` when another tab is actively sending to this chatId.
+
+The hook handles:
+- Tracking read-only state from the transport's `BroadcastChannel` coordinator
+- Broadcasting messages when this tab is the active sender
+- Receiving messages from other tabs and updating via `setMessages`
+
+See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination).
+
 ## Related
 
 - [Realtime Streams](/tasks/streams) — How streams work under the hood

From 4bf2f416294e832069279f5c824ee3de8d611ee5 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 18 Apr 2026 09:32:31 +0100
Subject: [PATCH 29/49] docs(ai-chat): add testing page for chat.agent

Covers the mockChatAgent harness end-to-end: setup, driver methods,
turn assertions, hook ordering, and the locals-based dependency
injection pattern for testing against a real database (testcontainers,
PGlite, or in-memory fakes).
---
 docs/ai-chat/testing.mdx | 540 +++++++++++++++++++++++++++++++++++++++
 docs/docs.json           |   1 +
 2 files changed, 541 insertions(+)
 create mode 100644 docs/ai-chat/testing.mdx

diff --git a/docs/ai-chat/testing.mdx b/docs/ai-chat/testing.mdx
new file mode 100644
index 00000000000..b59b37c61d1
--- /dev/null
+++ b/docs/ai-chat/testing.mdx
@@ -0,0 +1,540 @@
+---
+title: "Testing"
+sidebarTitle: "Testing"
+description: "Drive a chat.agent through real turns in unit tests — no network, no task runtime, no mocking the SDK."
+---
+
+## Overview
+
+`@trigger.dev/sdk/ai/test` exports `mockChatAgent`, an offline harness that runs your `chat.agent` definition's `run()` function inside an in-memory task runtime. You send messages, actions, and stop signals through driver methods and assert against the chunks the agent emits.
+
+The harness exercises the real turn loop, lifecycle hooks, validation, hydration, and action routing — only the language model and the surrounding Trigger.dev runtime are replaced. Pair it with [`MockLanguageModelV3`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/mock-language-model-v3) and `simulateReadableStream` from `ai` to control LLM responses.
+
+<Note>
+  Import `@trigger.dev/sdk/ai/test` **before** your agent module. It installs the resource catalog so `chat.agent({ id, ... })` can register tasks during testing.
+</Note>
+
+## Quick start
+
+```ts trigger/my-chat.test.ts
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+
+import { describe, expect, it } from "vitest";
+import { simulateReadableStream } from "ai";
+import { MockLanguageModelV3 } from "ai/test";
+import type { LanguageModelV3StreamPart } from "@ai-sdk/provider";
+import { myChatAgent } from "./my-chat.js";
+
+function modelWithText(text: string) {
+  const chunks: LanguageModelV3StreamPart[] = [
+    { type: "text-start", id: "t1" },
+    { type: "text-delta", id: "t1", delta: text },
+    { type: "text-end", id: "t1" },
+    {
+      type: "finish",
+      finishReason: { unified: "stop", raw: "stop" },
+      usage: {
+        inputTokens: { total: 10, noCache: 10, cacheRead: undefined, cacheWrite: undefined },
+        outputTokens: { total: 10, text: 10, reasoning: undefined },
+      },
+    },
+  ];
+  return new MockLanguageModelV3({
+    doStream: async () => ({ stream: simulateReadableStream({ chunks }) }),
+  });
+}
+
+describe("myChatAgent", () => {
+  it("streams the model's response", async () => {
+    const model = modelWithText("hello world");
+    const harness = mockChatAgent(myChatAgent, {
+      chatId: "test-1",
+      clientData: { model },
+    });
+
+    try {
+      const turn = await harness.sendMessage({
+        id: "u1",
+        role: "user",
+        parts: [{ type: "text", text: "hi" }],
+      });
+
+      const text = turn.chunks
+        .filter((c) => c.type === "text-delta")
+        .map((c) => (c as { delta: string }).delta)
+        .join("");
+      expect(text).toBe("hello world");
+    } finally {
+      await harness.close();
+    }
+  });
+});
+```
+
+The agent reads the mock model from `clientData`:
+
+```ts trigger/my-chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, type LanguageModel } from "ai";
+import { z } from "zod";
+
+type ClientData = { model: LanguageModel };
+
+export const myChatAgent = chat
+  .withClientData({
+    schema: z.custom<ClientData>(
+      (v) => !!v && typeof v === "object" && "model" in (v as object)
+    ),
+  })
+  .agent({
+    id: "my-chat",
+    run: async ({ messages, clientData, signal }) => {
+      return streamText({
+        model: clientData?.model ?? "openai/gpt-4o-mini",
+        messages,
+        abortSignal: signal,
+      });
+    },
+  });
+```
+
+## Setup
+
+### Install dev dependencies
+
+The harness itself ships with `@trigger.dev/sdk`. You need a test runner and the AI SDK's mock model utilities:
+
+```bash
+pnpm add -D vitest ai @ai-sdk/provider
+```
+
+`@ai-sdk/provider` is only needed to type the chunk array as `LanguageModelV3StreamPart[]` — drop it if you cast inline.
+
+### Vitest config
+
+A minimal `vitest.config.ts` for a Trigger.dev project:
+
+```ts
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+  test: {
+    include: ["src/**/*.test.ts"],
+    environment: "node",
+  },
+});
+```
+
+### Import order
+
+`mockChatAgent` must be imported **first** so the resource catalog is installed before any `chat.agent({ id, ... })` registration runs:
+
+```ts
+// ✅ Correct
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+import { myAgent } from "./my-agent.js";
+
+// ❌ Wrong — agent loads before the catalog exists
+import { myAgent } from "./my-agent.js";
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+```
+
+If the agent isn't registered when `mockChatAgent` runs, you'll get:
+
+```
+mockChatAgent: no task registered with id "my-chat".
+```
+
+## Inject the model via clientData
+
+`MockLanguageModelV3` lives in test code and shouldn't leak into your agent module. Pass it through `clientData` so the agent picks it up at runtime in tests, and falls back to a real model in production:
+
+```ts trigger/agent.ts
+type ClientData = { model?: LanguageModel };
+
+export const agent = chat
+  .withClientData({ schema: z.custom<ClientData>() })
+  .agent({
+    id: "agent",
+    run: async ({ messages, clientData, signal }) => {
+      return streamText({
+        model: clientData?.model ?? openai("gpt-4o-mini"),
+        messages,
+        abortSignal: signal,
+      });
+    },
+  });
+```
+
+```ts agent.test.ts
+const harness = mockChatAgent(agent, {
+  chatId: "test",
+  clientData: { model: mockModel },
+});
+```
+
+## Driving turns
+
+The harness exposes one method per chat trigger. Each waits for the next `trigger:turn-complete` chunk before resolving.
+
+### sendMessage
+
+```ts
+const turn = await harness.sendMessage({
+  id: "u1",
+  role: "user",
+  parts: [{ type: "text", text: "hi" }],
+});
+```
+
+Pass an array to send multiple messages at once.
+
+### sendRegenerate
+
+```ts
+const turn = await harness.sendRegenerate(messages);
+```
+
+Equivalent to the frontend's `useChat().regenerate()` — replays a turn with the given message history.
+
+### sendAction
+
+Routes a payload through `actionSchema` + `onAction`:
+
+```ts
+const turn = await harness.sendAction({ type: "undo" });
+```
+
+If the action fails schema validation, an `error` chunk appears in `turn.rawChunks`.
+
+### sendStop
+
+Fires a stop signal. Does **not** wait for a turn — the agent's `signal.aborted` becomes `true` and the current turn unwinds:
+
+```ts
+await harness.sendStop("user requested stop");
+```
+
+### close
+
+Sends a `close` trigger, closes the input stream, and aborts the run signal so the task exits cleanly. Always call this at the end of every test:
+
+```ts
+afterEach(() => harness.close());
+// or with a try/finally
+try {
+  await harness.sendMessage(...);
+} finally {
+  await harness.close();
+}
+```
+
+## Inspecting output
+
+Each turn returns:
+
+```ts
+type MockChatAgentTurn = {
+  chunks: UIMessageChunk[];   // text-delta, tool-call, etc.
+  rawChunks: unknown[];       // includes control chunks (turn-complete, errors)
+};
+```
+
+The harness also exposes accumulators across all turns:
+
+```ts
+harness.allChunks;     // every UIMessageChunk since creation
+harness.allRawChunks;  // every raw chunk including control frames
+```
+
+A small helper to assemble streamed text:
+
+```ts
+function collectText(chunks: UIMessageChunk[]): string {
+  return chunks
+    .filter((c) => c.type === "text-delta")
+    .map((c) => (c as { delta: string }).delta)
+    .join("");
+}
+```
+
+## Common patterns
+
+### Asserting hook order
+
+```ts
+const events: string[] = [];
+const agent = chat.agent({
+  id: "hook-order",
+  onChatStart: async () => { events.push("onChatStart"); },
+  onTurnStart: async () => { events.push("onTurnStart"); },
+  onBeforeTurnComplete: async () => { events.push("onBeforeTurnComplete"); },
+  onTurnComplete: async () => { events.push("onTurnComplete"); },
+  run: async ({ messages, signal }) => {
+    events.push("run");
+    return streamText({ model, messages, abortSignal: signal });
+  },
+});
+
+const harness = mockChatAgent(agent, { chatId: "t" });
+await harness.sendMessage(userMessage("hi"));
+
+// onTurnComplete fires after the turn-complete chunk is written —
+// give it a tick before asserting.
+await new Promise((r) => setTimeout(r, 20));
+expect(events).toEqual([
+  "onChatStart",
+  "onTurnStart",
+  "run",
+  "onBeforeTurnComplete",
+  "onTurnComplete",
+]);
+await harness.close();
+```
+
+### Testing onValidateMessages
+
+```ts
+const turn = await harness.sendMessage(userMessage("hello blocked-word"));
+
+// The turn completes with an error chunk, not text
+expect(collectText(turn.chunks)).toBe("");
+expect(turn.rawChunks.some((c) =>
+  typeof c === "object" && c !== null &&
+  (c as { type?: string }).type === "trigger:turn-complete"
+)).toBe(true);
+```
+
+### Testing actions and rejection
+
+```ts
+// Valid action
+await harness.sendAction({ type: "undo" });
+
+// Invalid action — schema validation fails, error chunk emitted
+const turn = await harness.sendAction({ type: "not-a-real-action" });
+const errors = turn.rawChunks.filter((c) =>
+  typeof c === "object" && c !== null &&
+  (c as { type?: string }).type === "error"
+);
+expect(errors.length).toBeGreaterThan(0);
+```
+
+### Multi-turn accumulation
+
+The harness preserves chat history across turns, just like the real runtime:
+
+```ts
+const seenLengths: number[] = [];
+const agent = chat.agent({
+  id: "multi-turn",
+  run: async ({ messages, signal }) => {
+    seenLengths.push(messages.length);
+    return streamText({ model, messages, abortSignal: signal });
+  },
+});
+
+const harness = mockChatAgent(agent, { chatId: "t" });
+await harness.sendMessage(userMessage("first"));
+await harness.sendMessage(userMessage("second"));
+await harness.sendMessage(userMessage("third"));
+
+// Turn 1: 1 message; turn 2: user + assistant + user = 3; turn 3: 5
+expect(seenLengths).toEqual([1, 3, 5]);
+```
+
+### Hydrating from a "database"
+
+Use `clientData` to seed a synthetic prior context for `hydrateMessages`:
+
+```ts
+const hydrated = [
+  { id: "h1", role: "user", parts: [{ type: "text", text: "prior question" }] },
+  { id: "h2", role: "assistant", parts: [{ type: "text", text: "prior answer" }] },
+];
+
+const harness = mockChatAgent(agent, {
+  chatId: "test-hydrate",
+  clientData: { model, hydrated: [...hydrated, userMessage("follow up")] },
+});
+
+await harness.sendMessage(userMessage("follow up"));
+
+// Model should have been called with the hydrated context
+expect(model.doStreamCalls[0]!.prompt.length).toBeGreaterThanOrEqual(3);
+```
+
+The agent reads `clientData.hydrated` inside its `hydrateMessages` hook:
+
+```ts
+hydrateMessages: async ({ clientData, incomingMessages }) => {
+  return clientData?.hydrated ?? incomingMessages;
+},
+```
+
+## Testing against a database
+
+Most agents call into a database from `hydrateMessages` or `onTurnComplete` to load history and persist replies. You shouldn't pass database clients through `clientData` — that's wire-data from the browser. Use **`locals` for dependency injection** instead.
+
+`locals` are task-scoped, server-side only, and untyped to the wire format. The mock harness exposes a `setupLocals` callback that pre-seeds them before the agent's `run()` starts.
+
+### Define a locals key for the dependency
+
+Create a single key per dependency, exported from your project:
+
+```ts db.ts
+import { locals } from "@trigger.dev/sdk";
+import { PrismaClient } from "@prisma/client";
+
+export type Db = PrismaClient;
+export const dbKey = locals.create<Db>("db");
+
+export function getDb(): Db {
+  // Returns the seeded test instance if present, otherwise lazy-creates prod.
+  return locals.get(dbKey) ?? locals.set(dbKey, new PrismaClient());
+}
+```
+
+### Use the dependency from agent hooks
+
+Hooks read from `locals` instead of constructing clients themselves:
+
+```ts trigger/agent.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { getDb } from "../db";
+
+export const agent = chat.agent({
+  id: "agent",
+  hydrateMessages: async ({ chatId }) => {
+    const db = getDb();
+    const row = await db.chat.findUnique({ where: { id: chatId } });
+    return (row?.messages as UIMessage[]) ?? [];
+  },
+  onTurnComplete: async ({ chatId, messages }) => {
+    const db = getDb();
+    await db.chat.upsert({
+      where: { id: chatId },
+      create: { id: chatId, messages },
+      update: { messages },
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+### Inject a test database in the harness
+
+`setupLocals` runs *before* the agent starts, so `getDb()` returns the test instance for every hook:
+
+```ts agent.test.ts
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+import { dbKey } from "./db";
+import { agent } from "./trigger/agent";
+
+const harness = mockChatAgent(agent, {
+  chatId: "test-1",
+  setupLocals: ({ set }) => {
+    set(dbKey, testDb); // testDb = your testcontainers Prisma client, sqlite stub, etc.
+  },
+});
+```
+
+### Pick a backing database
+
+You still need to decide what `testDb` actually is:
+
+- **Testcontainers (recommended).** Spin up Postgres in Docker via `@internal/testcontainers` (or `testcontainers` directly), run migrations, hand the resulting `PrismaClient` to `set(dbKey, ...)`. Highest fidelity — catches schema drift, migration bugs, transaction issues.
+- **Embedded SQLite / PGlite.** Fast and no Docker, but a different SQL dialect from production. Fine for hooks that only do simple CRUD; risky for raw SQL or Postgres-specific features.
+- **In-memory fake.** Hand-rolled object with the same interface as your DB module. Fastest, lowest fidelity — works when you only care about whether the agent *called* the right method, not what the DB *did* with it.
+
+### Drizzle, Kysely, etc.
+
+The pattern is the same — replace `PrismaClient` with your client class:
+
+```ts db.ts
+import { drizzle } from "drizzle-orm/node-postgres";
+import { Pool } from "pg";
+
+export type Db = ReturnType<typeof drizzle>;
+export const dbKey = locals.create<Db>("db");
+
+export function getDb(): Db {
+  return locals.get(dbKey) ?? locals.set(
+    dbKey,
+    drizzle(new Pool({ connectionString: process.env.DATABASE_URL })),
+  );
+}
+```
+
+<Tip>
+  The same `setupLocals` pattern works for any server-side dependency: feature flag clients, Stripe SDK, internal HTTP clients, Sentry. Anything you'd normally inject via constructor parameters in a class-based design.
+</Tip>
+
+## API reference
+
+### mockChatAgent(agent, options?)
+
+```ts
+function mockChatAgent(
+  agent: { id: string },
+  options?: MockChatAgentOptions,
+): MockChatAgentHarness;
+```
+
+#### MockChatAgentOptions
+
+| Option         | Type                       | Default       | Description                                                                                            |
+| -------------- | -------------------------- | ------------- | ------------------------------------------------------------------------------------------------------ |
+| `chatId`       | `string`                   | `"test-chat"` | Chat session id passed in every wire payload.                                                          |
+| `clientData`   | `unknown`                  | `undefined`   | Client-provided data forwarded to `run()` and every hook.                                              |
+| `taskContext`  | `MockTaskContextOptions`   | `{}`          | Overrides for the mock `TaskRunContext` (run id, environment, organization, etc.).                     |
+| `preload`      | `boolean`                  | `true`        | Start in preload mode. When `false`, the first `sendMessage()` starts turn 0 directly without preload. |
+| `setupLocals`  | `({ set }) => void \| Promise<void>` | `undefined` | Callback invoked before `run()` starts. Use `set(key, value)` to inject server-side dependencies (DB clients, service stubs) that the agent reads via `locals.get()`. |
+
+#### MockChatAgentHarness
+
+| Member                                | Description                                                                                            |
+| ------------------------------------- | ------------------------------------------------------------------------------------------------------ |
+| `chatId`                              | The chat session id used by this harness.                                                              |
+| `sendMessage(message \| messages)`    | Send a user message (or array). Returns the chunks produced during the resulting turn.                 |
+| `sendRegenerate(messages)`            | Send a regenerate trigger with a message history.                                                      |
+| `sendAction(action)`                  | Route a custom action through `actionSchema` + `onAction`.                                             |
+| `sendStop(message?)`                  | Fire a stop signal. Does not wait for the turn — the run's `signal.aborted` becomes `true`.            |
+| `close()`                             | Send a `close` trigger, abort the signal, wait for `run()` to return. Always call at end of test.      |
+| `allChunks`                           | Every `UIMessageChunk` emitted since the harness was created.                                          |
+| `allRawChunks`                        | Every raw chunk emitted since creation, including control chunks (`trigger:turn-complete`, errors).    |
+
+### runInMockTaskContext
+
+`mockChatAgent` is a higher-level wrapper around `runInMockTaskContext`, exported from `@trigger.dev/core/v3/test`. Use it directly when you need to drive a non-chat task offline:
+
+```ts
+import { runInMockTaskContext } from "@trigger.dev/core/v3/test";
+
+await runInMockTaskContext(
+  async ({ inputs, outputs, ctx }) => {
+    setTimeout(() => {
+      inputs.send("chat-messages", { messages: [], chatId: "c1" });
+    }, 0);
+
+    await myTask.fns.run(payload, {
+      ctx,
+      signal: new AbortController().signal,
+    });
+
+    expect(outputs.chunks("chat")).toContainEqual(
+      expect.objectContaining({ type: "text-delta", delta: "hi" }),
+    );
+  },
+  { ctx: { run: { id: "run_abc" } } },
+);
+```
+
+## Limitations
+
+- **No network.** The mock task context replaces realtime streams, run metadata, lifecycle managers, and the runtime. Anything that bypasses these (raw `fetch`, direct DB clients) runs against the real network.
+- **Single agent per process.** The resource catalog is process-global; tests within a file are sequential by default. If you parallelize across files, vitest runs each file in its own worker, which avoids registry collisions.
+- **Time-sensitive hooks.** `onTurnComplete` runs *after* the `turn-complete` chunk is written, so `sendMessage()` resolves before that hook finishes. Add a brief `await new Promise((r) => setTimeout(r, 20))` if you need to assert on hook side-effects.
+- **No real LLM.** The harness does not call providers — you must inject `MockLanguageModelV3` (or another mock) yourself.
diff --git a/docs/docs.json b/docs/docs.json
index b5fcf857d54..f0e9498c898 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -99,6 +99,7 @@
                   "ai-chat/pending-messages",
                   "ai-chat/background-injection",
                   "ai-chat/mcp",
+                  "ai-chat/testing",
                   {
                     "group": "Patterns",
                     "pages": [

From 4fc69ff9e577393801aeceae68462e7f080d72de Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 18 Apr 2026 09:39:21 +0100
Subject: [PATCH 30/49] docs(ai-chat): changelog entry for
 0.0.0-chat-prerelease-20260418083610

Documents the new mockChatAgent test harness, setupLocals DI pattern,
and the lower-level runInMockTaskContext utility.
---
 docs/ai-chat/changelog.mdx | 52 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index a3e0d76c79e..bda2f1ae2f4 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,6 +4,58 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
+<Update label="April 18, 2026" description="0.0.0-chat-prerelease-20260418083610" tags={["SDK"]}>
+
+## Offline test harness for `chat.agent`
+
+`@trigger.dev/sdk/ai/test` now ships `mockChatAgent`, a harness that drives a `chat.agent` definition through real turns without network or task runtime. Send messages, actions, and stop signals; inspect emitted chunks; assert on hook order.
+
+```ts
+import { mockChatAgent } from "@trigger.dev/sdk/ai/test";
+import { MockLanguageModelV3 } from "ai/test";
+import { myAgent } from "./my-agent";
+
+const harness = mockChatAgent(myAgent, {
+  chatId: "test-1",
+  clientData: { model: new MockLanguageModelV3({ /* ... */ }) },
+});
+
+const turn = await harness.sendMessage({
+  id: "u1",
+  role: "user",
+  parts: [{ type: "text", text: "hi" }],
+});
+expect(turn.chunks).toContainEqual(
+  expect.objectContaining({ type: "text-delta", delta: "hello" }),
+);
+await harness.close();
+```
+
+### Dependency injection via locals
+
+`setupLocals` pre-seeds `locals` before `run()` starts — the pattern for injecting database clients, service stubs, and other server-side dependencies that shouldn't leak through untrusted `clientData`:
+
+```ts
+import { dbKey } from "./db";
+
+const harness = mockChatAgent(agent, {
+  chatId: "test-1",
+  setupLocals: ({ set }) => {
+    set(dbKey, testDb);
+  },
+});
+```
+
+Hooks then read the seeded value with `locals.get(dbKey)`. Falls through to the production client in real runs.
+
+See [Testing](/ai-chat/testing).
+
+## `runInMockTaskContext` — lower-level test harness
+
+`@trigger.dev/core/v3/test` now exports `runInMockTaskContext` for unit-testing any task code offline (not just chat agents). Installs in-memory managers for `locals`, `lifecycleHooks`, `runtime`, `inputStreams`, and `realtimeStreams`, plus a mock `TaskContext`. Drivers let you push data into input streams and inspect chunks written to output streams.
+
+</Update>
+
 <Update label="April 17, 2026" description="0.0.0-chat-prerelease-20260417152143" tags={["SDK"]}>
 
 ## Multi-tab coordination

From ae880dcfe1d23abdccfc601963e09632cf26f520 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 18 Apr 2026 09:50:31 +0100
Subject: [PATCH 31/49] docs(ai-chat): comprehensive error handling guide

---
 docs/ai-chat/error-handling.mdx | 373 ++++++++++++++++++++++++++++++++
 docs/docs.json                  |   1 +
 2 files changed, 374 insertions(+)
 create mode 100644 docs/ai-chat/error-handling.mdx

diff --git a/docs/ai-chat/error-handling.mdx b/docs/ai-chat/error-handling.mdx
new file mode 100644
index 00000000000..2556c40751a
--- /dev/null
+++ b/docs/ai-chat/error-handling.mdx
@@ -0,0 +1,373 @@
+---
+title: "Error handling"
+sidebarTitle: "Error handling"
+description: "How errors flow through chat.agent — stream errors, hook errors, run failures — and how to recover."
+---
+
+`chat.agent` errors fall into four layers, each with different recovery semantics. The default behavior is **conversation-preserving**: a thrown error in a hook or `run()` does not kill the chat. The current turn ends with an error chunk, and the agent waits for the user's next message.
+
+## Error layers at a glance
+
+| Layer | Source | Default behavior | Recovery |
+|-------|--------|------------------|----------|
+| **Stream** | `streamText` errors mid-response (rate limits, model API failures) | `onError` callback converts to error chunk | Sanitize message via `uiMessageStreamOptions.onError` |
+| **Hook / turn** | Throws in `onValidateMessages`, `onTurnStart`, `run`, etc. | Error chunk + turn-complete written to stream; conversation continues | Catch in your hook, or rely on default |
+| **Run** | Unhandled exception escapes the run | Run fails. No retry by default. Standard task `onFailure` fires. | `onFailure` task hook |
+| **Frontend** | Stream delivers `{ type: "error", errorText }` | `useChat` exposes via `error` field and `onError` callback | Show toast, retry button, etc. |
+
+## Stream errors mid-turn
+
+When the model API errors mid-response (rate limits, network failures, malformed output), the AI SDK's `streamText` calls the `onError` callback. Use `uiMessageStreamOptions.onError` to convert the error to a user-friendly string. The string is sent to the frontend as an error chunk.
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  uiMessageStreamOptions: {
+    onError: (error) => {
+      console.error("Stream error:", error);
+      if (error instanceof Error && error.message.includes("rate limit")) {
+        return "Rate limited. Please wait a moment and try again.";
+      }
+      if (error instanceof Error && error.message.includes("context_length")) {
+        return "This conversation is too long. Please start a new chat.";
+      }
+      return "Something went wrong while generating a response. Please try again.";
+    },
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+<Note>
+  Returning a string from `onError` is what gets shown to the user. Do not return raw error messages — they may leak internal details (API keys, stack traces, etc.).
+</Note>
+
+The frontend receives this as an error chunk that `useChat` exposes via its `error` field:
+
+```tsx
+const { messages, error } = useChat({ transport });
+
+{error && <div className="text-red-600">{error.message}</div>}
+```
+
+## Hook and turn errors
+
+If any lifecycle hook (`onValidateMessages`, `onChatStart`, `onTurnStart`, `hydrateMessages`, `onAction`, `prepareMessages`, `onBeforeTurnComplete`, `onTurnComplete`) or `run()` throws an unhandled exception, the turn loop catches it:
+
+1. Writes `{ type: "error", errorText: error.message }` to the stream
+2. Writes a turn-complete chunk to close the turn
+3. Waits for the next user message
+
+The conversation stays alive. The user can send another message and continue.
+
+```ts
+export const myChat = chat.agent({
+  id: "my-chat",
+  onTurnStart: async ({ chatId, uiMessages }) => {
+    // If this throws, the turn ends with an error chunk
+    // and the agent waits for the next message
+    await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+### Catching errors in your own hooks
+
+For granular control, wrap your hook code in try/catch and decide what to do. Common patterns:
+
+```ts
+onValidateMessages: async ({ messages }) => {
+  try {
+    return await validateUIMessages({ messages, tools: chatTools });
+  } catch (err) {
+    // Log to your error tracking service
+    Sentry.captureException(err);
+    // Throw a user-facing error message — this becomes the error chunk
+    throw new Error("Your message contains invalid data and could not be sent.");
+  }
+},
+```
+
+<Tip>
+  The `Error.message` you throw is sent verbatim to the frontend as the error chunk's `errorText`. Use messages safe for end users.
+</Tip>
+
+### Catching errors inside `run()`
+
+`run()` is your code — wrap it in try/catch for full control. This is the right place to save partial state to your DB before the error chunk goes out:
+
+```ts
+run: async ({ messages, chatId, signal }) => {
+  try {
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  } catch (err) {
+    // Save the failed turn for debugging / undo
+    await db.failedTurn.create({
+      data: {
+        chatId,
+        error: err instanceof Error ? err.message : String(err),
+        messages,
+      },
+    });
+    throw err; // Re-throw to trigger the error chunk
+  }
+},
+```
+
+## Saving error state to your DB
+
+To persist errors for debugging or undo, use `onTurnComplete` (which fires even after errors) or the standard task `onComplete` hook.
+
+### Using `onTurnComplete`
+
+`onTurnComplete` fires after every turn — successful **or** errored. The `responseMessage` will be undefined or partial on errors. Use this to mark the turn as failed:
+
+```ts
+onTurnComplete: async ({ chatId, uiMessages, responseMessage, stopped }) => {
+  // Persist the messages regardless of error state
+  await db.chat.update({
+    where: { id: chatId },
+    data: {
+      messages: uiMessages,
+      // Mark the chat as errored if no response message
+      lastTurnStatus: responseMessage ? "ok" : stopped ? "stopped" : "errored",
+    },
+  });
+},
+```
+
+### Using the standard `onFailure` task hook
+
+For run-level failures (the entire run dies), use the standard task `onFailure` hook. This fires when the run terminates with an unhandled exception:
+
+```ts
+chat.agent({
+  id: "my-chat",
+  onFailure: async ({ error, ctx }) => {
+    // Log run-level failure to your monitoring service
+    await monitoring.recordRunFailure({
+      runId: ctx.run.id,
+      chatId: ctx.run.tags.find(t => t.startsWith("chat:"))?.slice(5),
+      error: error.message,
+    });
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ ... });
+  },
+});
+```
+
+<Info>
+  `chat.agent` uses `retry: { maxAttempts: 1 }` internally, so the run never retries on failure. To add run-level retries, wrap the agent in a parent task or implement your own retry logic in the frontend (re-send the message).
+</Info>
+
+## Recovery patterns
+
+### Pattern 1: Undo to last successful response
+
+A common pattern is to let the user "undo" the failed turn and try again. Combine `chat.history.rollbackTo` with a custom action:
+
+```ts
+chat.agent({
+  id: "my-chat",
+  actionSchema: z.discriminatedUnion("type", [
+    z.object({ type: z.literal("undo") }),
+  ]),
+  onAction: async ({ action, uiMessages }) => {
+    if (action.type === "undo") {
+      // Find the last user message and roll back to it
+      const lastUserIdx = [...uiMessages].reverse().findIndex(m => m.role === "user");
+      if (lastUserIdx !== -1) {
+        const targetIdx = uiMessages.length - 1 - lastUserIdx - 1;
+        const target = uiMessages[targetIdx];
+        if (target) chat.history.rollbackTo(target.id);
+      }
+    }
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({ ... });
+  },
+});
+```
+
+On the frontend, show an "Undo" button when an error occurs:
+
+```tsx
+{error && (
+  <button onClick={() => transport.sendAction(chatId, { type: "undo" })}>
+    Undo and try again
+  </button>
+)}
+```
+
+### Pattern 2: Retry the last message
+
+For transient errors (network blips, rate limits), the simplest recovery is to re-send the last user message. The AI SDK's `useChat` provides `regenerate()`:
+
+```tsx
+const { messages, error, regenerate } = useChat({ transport });
+
+{error && (
+  <button onClick={() => regenerate()}>Retry</button>
+)}
+```
+
+`regenerate()` removes the last assistant response and re-sends. Combined with `onValidateMessages` or `hydrateMessages`, you can reload the canonical state from your DB before retrying.
+
+### Pattern 3: Save partial responses
+
+When a stream errors mid-response, the `responseMessage` in `onBeforeTurnComplete` and `onTurnComplete` contains the partial output. Save it as a "draft" so the user can see what was generated before the error:
+
+```ts
+onBeforeTurnComplete: async ({ chatId, responseMessage, stopped }) => {
+  if (responseMessage && responseMessage.parts.length > 0) {
+    // Save partial response — user can manually accept or discard
+    await db.partialResponse.create({
+      data: {
+        chatId,
+        message: responseMessage,
+        reason: stopped ? "stopped" : "errored",
+      },
+    });
+  }
+},
+```
+
+### Pattern 4: Fall back to a different model
+
+If the primary model errors, try a fallback model in the same turn:
+
+```ts
+run: async ({ messages, signal }) => {
+  try {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+    });
+  } catch (err) {
+    console.warn("Primary model failed, falling back:", err);
+    return streamText({
+      model: anthropic("claude-sonnet-4-6"),
+      messages,
+      abortSignal: signal,
+    });
+  }
+},
+```
+
+<Note>
+  This only catches errors thrown synchronously by `streamText` setup. Errors that happen mid-stream go through `uiMessageStreamOptions.onError`, not your try/catch.
+</Note>
+
+## What gets written to the stream on error
+
+When an error occurs at any layer, the frontend receives an error chunk in the SSE stream:
+
+```
+event: data
+data: {"type":"error","errorText":"Rate limited. Please wait a moment and try again."}
+
+event: data
+data: {"type":"trigger:turn-complete",...}
+```
+
+The AI SDK's `useChat` processes this and:
+
+1. Sets `useChat`'s `error` field to an `Error` with `message = errorText`
+2. Calls the user's `onError` callback (if set)
+3. Marks the turn as complete (`status` returns to `"ready"`)
+
+```tsx
+const { messages, error, status } = useChat({
+  transport,
+  onError: (err) => {
+    toast.error(err.message);
+  },
+});
+```
+
+## Frontend error handling
+
+### Showing the error to the user
+
+```tsx
+function Chat() {
+  const transport = useTriggerChatTransport({ task: "my-chat", accessToken });
+  const { messages, error, sendMessage } = useChat({ transport });
+
+  return (
+    <div>
+      {messages.map(m => /* ... */)}
+      {error && (
+        <div className="rounded border border-red-300 bg-red-50 p-3">
+          <p className="text-red-700">{error.message}</p>
+        </div>
+      )}
+      <form onSubmit={(e) => { e.preventDefault(); sendMessage(/* ... */); }}>
+        {/* ... */}
+      </form>
+    </div>
+  );
+}
+```
+
+### Distinguishing error types
+
+The `errorText` is just a string, so distinguish error types via prefixes or codes:
+
+```ts
+// Backend
+uiMessageStreamOptions: {
+  onError: (error) => {
+    if (error.message.includes("rate limit")) return "RATE_LIMIT: Please wait and try again.";
+    if (error.message.includes("context_length")) return "CONTEXT_TOO_LONG: Start a new chat.";
+    return "UNKNOWN: Something went wrong.";
+  },
+},
+```
+
+```tsx
+// Frontend
+{error?.message.startsWith("RATE_LIMIT") && <RateLimitNotice />}
+{error?.message.startsWith("CONTEXT_TOO_LONG") && <NewChatPrompt />}
+```
+
+<Tip>
+  For richer error structures, use [`chat.response.write()`](/ai-chat/features#custom-data-parts) with a custom `data-error` part type. This lets you ship structured error metadata (codes, retry hints, etc.) instead of stringly-typed messages.
+</Tip>
+
+## Run-level retries
+
+`chat.agent` uses `retry: { maxAttempts: 1 }` — the run **never retries** on unhandled failure. This is intentional: each turn is conversation-preserving, so a true run failure is severe and shouldn't silently retry (which could send duplicate API calls or mutate state twice).
+
+To add retry-like behavior:
+
+- **Per-turn retries**: handle inside `run()` with try/catch and a fallback model
+- **Per-message retries**: re-send from the frontend (call `sendMessage` or `regenerate` again)
+- **Whole-run retries**: wrap `chat.agent` with a parent task that has `retry` configured, and call the agent's task internally
+
+## Best practices
+
+1. **Always set `uiMessageStreamOptions.onError`** to sanitize stream errors before they reach the user.
+2. **Persist messages in `onTurnStart`** so a mid-stream failure still leaves the user's message visible.
+3. **Use `onTurnComplete` to mark turn status** in your DB (`ok` / `errored` / `stopped`).
+4. **Don't throw raw errors with internal details** in hooks — catch, log, then throw a sanitized user-facing message.
+5. **Provide an undo or retry affordance** in the UI when errors occur.
+6. **Use `onFailure` for run-level monitoring** (Sentry, monitoring dashboards).
+7. **For known transient errors (rate limits, network)**, consider a fallback model inside `run()` instead of failing the turn.
+
+## See also
+
+- [`uiMessageStreamOptions.onError`](/ai-chat/backend#error-handling-with-onerror) — stream error handler details
+- [Custom actions](/ai-chat/backend#actions) — implement undo/retry actions
+- [`chat.history`](/ai-chat/backend#chat-history) — rollback to a previous message
+- [Database persistence](/ai-chat/patterns/database-persistence) — saving conversation state
+- [Standard task hooks](/tasks/overview) — `onFailure`, `onComplete`, `onWait`, etc.
diff --git a/docs/docs.json b/docs/docs.json
index f0e9498c898..d1bfafb6f92 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -98,6 +98,7 @@
                   "ai-chat/compaction",
                   "ai-chat/pending-messages",
                   "ai-chat/background-injection",
+                  "ai-chat/error-handling",
                   "ai-chat/mcp",
                   "ai-chat/testing",
                   {

From 2ed0f64685b89315326056589d056198fbe9905b Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 18 Apr 2026 15:53:39 +0100
Subject: [PATCH 32/49] docs(ai-chat): add human-in-the-loop patterns page

Covers askUser-style mid-turn user input end-to-end: defining a
no-execute tool, rendering pending tool calls on the frontend with
addToolOutput + sendAutomaticallyWhen, detecting paused turns via
finishReason, and persistence patterns (overwrite vs checkpoint nodes)
for apps that need an immutable audit trail.

Resolves TRI-8404.
---
 docs/ai-chat/patterns/human-in-the-loop.mdx | 240 ++++++++++++++++++++
 docs/docs.json                              |   3 +-
 2 files changed, 242 insertions(+), 1 deletion(-)
 create mode 100644 docs/ai-chat/patterns/human-in-the-loop.mdx

diff --git a/docs/ai-chat/patterns/human-in-the-loop.mdx b/docs/ai-chat/patterns/human-in-the-loop.mdx
new file mode 100644
index 00000000000..5e3a44a9918
--- /dev/null
+++ b/docs/ai-chat/patterns/human-in-the-loop.mdx
@@ -0,0 +1,240 @@
+---
+title: "Human-in-the-loop"
+sidebarTitle: "Human-in-the-loop"
+description: "Pause the agent mid-response to ask the user a clarifying question, then resume with their answer."
+---
+
+Some turns need to stop and ask the user something before they can finish — picking between options, confirming a destructive action, or clarifying an ambiguous request. The AI SDK calls this **human-in-the-loop** (HITL), and the building block is a tool with no `execute` function.
+
+When the LLM calls a tool that has no `execute`, `streamText` ends with the tool call still pending. The turn completes cleanly, the frontend renders UI to collect the answer, and when the user responds, a new turn resumes with the answer merged into the same assistant message.
+
+## How it works
+
+```
+Turn N:
+  User message → run()
+  LLM streams text → calls askUser tool (no execute)
+  streamText ends with tool-call in `input-available` state
+  onTurnComplete fires (finishReason = "tool-calls")
+  Agent idle
+
+Frontend:
+  Renders question + option buttons from tool input
+  User clicks → addToolOutput({ tool, toolCallId, output })
+  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls
+  → sendMessage() fires next turn
+
+Turn N+1:
+  hydrateMessages / accumulator sees the updated assistant message
+  run() is called, LLM continues from the tool result
+  onTurnComplete fires (finishReason = "stop", responseMessage is the FULL merged message)
+```
+
+The AI SDK's `toUIMessageStream` automatically reuses the assistant message ID across the pause (we pass `originalMessages` internally), so `responseMessage` in the post-resume `onTurnComplete` is the **full merged message** — the original text, the completed tool call, and any follow-up content — not just the new parts.
+
+## Backend: define the tool
+
+A HITL tool has an `inputSchema` describing what the model can ask, but **no `execute` function**. When the LLM calls it, `streamText` returns control to your agent.
+
+```ts trigger/my-chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, tool } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+const askUser = tool({
+  description:
+    "Ask the user a clarifying question when you need their input. " +
+    "Present 2-4 options for them to pick from.",
+  inputSchema: z.object({
+    question: z.string(),
+    options: z
+      .array(
+        z.object({
+          id: z.string(),
+          label: z.string(),
+          description: z.string().optional(),
+        })
+      )
+      .min(2)
+      .max(4),
+  }),
+  // No execute function — streamText ends, the frontend supplies the output
+  // via addToolOutput, and the next turn continues from the result.
+});
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      tools: { askUser },
+      abortSignal: signal,
+    });
+  },
+});
+```
+
+## Frontend: render the question and collect the answer
+
+Two pieces on the client:
+
+1. **UI for the pending tool call** — render when the tool part is in `input-available` state, i.e. the LLM has called the tool but there's no output yet.
+2. **Auto-send on resolution** — use `sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls` so answering kicks off the next turn without the user having to hit "send."
+
+```tsx
+import { useChat, lastAssistantMessageIsCompleteWithToolCalls } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/react-hooks";
+
+function ChatView({ chatId, accessToken }: { chatId: string; accessToken: string }) {
+  const transport = useTriggerChatTransport({ task: "my-chat", accessToken });
+  const { messages, sendMessage, addToolOutput } = useChat({
+    id: chatId,
+    transport,
+    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,
+  });
+
+  return (
+    <>
+      {messages.map((m) =>
+        m.parts.map((part, i) => {
+          if (part.type === "tool-askUser" && part.state === "input-available") {
+            return (
+              <AskUserCard
+                key={i}
+                question={part.input.question}
+                options={part.input.options}
+                onAnswer={(opt) =>
+                  addToolOutput({
+                    tool: "askUser",
+                    toolCallId: part.toolCallId,
+                    output: { optionId: opt.id, label: opt.label },
+                  })
+                }
+              />
+            );
+          }
+          if (part.type === "text") return <Markdown key={i}>{part.text}</Markdown>;
+          return null;
+        })
+      )}
+    </>
+  );
+}
+```
+
+`addToolOutput` patches the assistant message locally with `state: "output-available"` and fills in `output`. `lastAssistantMessageIsCompleteWithToolCalls` detects that every pending tool call now has a result, and `useChat` fires a new `sendMessage` — the backend picks it up as the next turn.
+
+## Detecting a paused turn in `onTurnComplete`
+
+Two ways to detect "this turn paused for user input" vs "this turn finished normally":
+
+### Via `finishReason` (recommended)
+
+The AI SDK's finish reason is surfaced on every `onTurnComplete` event. If the model stopped on tool calls, it's `"tool-calls"`:
+
+```ts
+onTurnComplete: async ({ finishReason, responseMessage }) => {
+  if (finishReason === "tool-calls") {
+    // Turn paused — assistant message has pending tool call(s)
+    const pending = responseMessage?.parts.filter(
+      (p) => p.type.startsWith("tool-") && p.state === "input-available"
+    );
+    // Persist as a checkpoint / partial turn
+  } else {
+    // finishReason === "stop" — normal completion
+    // Persist as a completed turn
+  }
+};
+```
+
+<Note>
+  `finishReason` is only undefined for manual `chat.pipe()` flows or aborted streams. For the common `run() → return streamText(...)` pattern it's always populated.
+</Note>
+
+### Via response parts
+
+If you need more nuance (e.g. which specific tool is pending), inspect the parts directly:
+
+```ts
+function pendingToolCalls(message: UIMessage): string[] {
+  return message.parts
+    .filter((p) => p.type.startsWith("tool-") && p.state === "input-available")
+    .map((p) => p.toolCallId);
+}
+```
+
+Both `finishReason === "tool-calls"` and `pendingToolCalls(responseMessage).length > 0` are equivalent in practice. Use `finishReason` for dispatch, parts for detail.
+
+## Persistence: one message vs one record per pause
+
+Because the AI SDK reuses the assistant message ID across the pause, the "same turn" from the user's perspective maps to **two `onTurnComplete` firings** on the server — but both receive a `responseMessage` with the **same `id`**, and the second firing's `responseMessage` contains the fully merged content.
+
+Two common persistence patterns:
+
+### Overwrite on every turn (simplest)
+
+Just store the latest `uiMessages` array on every `onTurnComplete`. The paused-turn write is overwritten by the resume-turn write; the final DB state has the full merged message.
+
+```ts
+onTurnComplete: async ({ chatId, uiMessages }) => {
+  await db.chat.update({
+    where: { id: chatId },
+    data: { messages: uiMessages },
+  });
+},
+```
+
+Use this unless you specifically need an audit trail.
+
+### Checkpoint nodes (immutable history)
+
+For apps that want every pause point recorded as its own immutable snapshot (branching, replay, diff review), save a checkpoint when paused and a sibling when complete:
+
+```ts
+onTurnComplete: async ({ chatId, responseMessage, finishReason, uiMessages }) => {
+  if (!responseMessage) return;
+
+  if (finishReason === "tool-calls") {
+    // Paused — save a checkpoint
+    await db.turnCheckpoint.create({
+      data: {
+        chatId,
+        messageId: responseMessage.id,
+        parts: responseMessage.parts,
+        kind: "partial",
+      },
+    });
+  } else {
+    // Completed — save a sibling with the merged full message
+    await db.turnCheckpoint.create({
+      data: {
+        chatId,
+        messageId: responseMessage.id,
+        parts: responseMessage.parts,
+        kind: "final",
+      },
+    });
+  }
+
+  // Always update the canonical chat record for `hydrateMessages` to load
+  await db.chat.update({
+    where: { id: chatId },
+    data: { messages: uiMessages },
+  });
+};
+```
+
+Both writes see `responseMessage.id` as the same value — they're checkpoints of the same logical message. Grouping by `messageId` + ordering by `createdAt` gives you the progression.
+
+## Multi-pause turns
+
+A single logical turn can pause more than once — the LLM asks question A, gets the answer, thinks, then asks question B before finishing. Each pause fires its own `onTurnComplete` with `finishReason === "tool-calls"`; only the last firing has `finishReason === "stop"`. The checkpoint pattern above handles this naturally — each pause adds a new checkpoint sharing the same `responseMessage.id`.
+
+## Gotchas
+
+- **Don't set an `execute` function on the HITL tool.** If it has one, `streamText` will call it immediately instead of handing control back.
+- **The frontend must use `sendAutomaticallyWhen`.** Without it, the user has to press Enter after answering — `addToolOutput` updates local state but doesn't fire a new turn by itself.
+- **Don't mutate `responseMessage` in `onTurnComplete`.** It's the captured snapshot. To add custom parts, use `chat.response.append()` in `onBeforeTurnComplete` (while the stream is open).
+- **Stop handling.** If the user stops the run while a pause is active (`chat.stop()` on the transport), `onTurnComplete` fires with `stopped: true` and `finishReason` reflecting the last successful step. Treat stopped paused turns the same as stopped normal turns.
diff --git a/docs/docs.json b/docs/docs.json
index d1bfafb6f92..2a8645f4932 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -108,7 +108,8 @@
                       "ai-chat/patterns/version-upgrades",
                       "ai-chat/patterns/database-persistence",
                       "ai-chat/patterns/branching-conversations",
-                      "ai-chat/patterns/code-sandbox"
+                      "ai-chat/patterns/code-sandbox",
+                      "ai-chat/patterns/human-in-the-loop"
                     ]
                   },
                   "ai-chat/client-protocol",

From fdf6d693543c76cbf265c1f619559094ee213303 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 18 Apr 2026 16:02:22 +0100
Subject: [PATCH 33/49] docs(ai-chat): document chat.endRun()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an "Ending a run on your terms" section to backend.mdx covering
chat.endRun() — exit after the current turn without the upgrade-required
signal, for one-shot or self-terminating agents.
---
 docs/ai-chat/backend.mdx | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index b5c6726dd85..df0c11f8d17 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -967,6 +967,25 @@ The `reason` field tells you why messages are being prepared:
 
 Chat agent runs are pinned to the worker version they started on. When you deploy a new version, suspended runs resume on the old code. Call `chat.requestUpgrade()` in `onTurnStart` to skip `run()` and exit immediately — the transport re-triggers the same message on the latest version. See the [Version Upgrades pattern](/ai-chat/patterns/version-upgrades) for the full guide.
 
+### Ending a run on your terms
+
+By default, a chat agent stays idle after each turn waiting for the next user message. Call `chat.endRun()` from `run()`, `chat.defer()`, `onBeforeTurnComplete`, or `onTurnComplete` to exit the loop once the current turn finishes — no upgrade signal, no idle wait.
+
+```ts
+chat.agent({
+  id: "one-shot",
+  run: async ({ messages, signal }) => {
+    // Single-response agent — exit after this turn.
+    chat.endRun();
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+The current turn streams through normally, `onBeforeTurnComplete` / `onTurnComplete` fire, the turn-complete chunk is written, and the run exits instead of suspending. The next user message on the same `chatId` starts a fresh run via the standard continuation flow.
+
+Use this when the agent knows its work is done (budget exhausted, goal achieved, one-shot response) rather than relying on the idle timeout. Unlike `chat.requestUpgrade()`, no `upgrade-required` signal is sent to the client, so there's no version-migration semantics.
+
 ### Runtime configuration
 
 #### chat.setTurnTimeout()

From 029de087ba42345154689753fc95c7c21ab5dba9 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 18 Apr 2026 16:15:19 +0100
Subject: [PATCH 34/49] docs(ai-chat): add user-initiated compaction pattern

Show how to wire a "Summarize conversation" button or slash command
via actionSchema + onAction. The backend summarizes and replaces
history with chat.history.set(); run() short-circuits when
trigger === "action" so no LLM response is generated. Includes a
progress-feedback variant using chat.stream.append().

Resolves TRI-8268.
---
 docs/ai-chat/compaction.mdx | 106 ++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/docs/ai-chat/compaction.mdx b/docs/ai-chat/compaction.mdx
index 9039084173a..04e754a4562 100644
--- a/docs/ai-chat/compaction.mdx
+++ b/docs/ai-chat/compaction.mdx
@@ -190,6 +190,112 @@ export const myChat = chat.agent({
 });
 ```
 
+## User-initiated compaction
+
+Sometimes you want the user to decide when to compact — a "Summarize conversation" button, a `/compact` slash command, or a settings toggle. Wire this up with [actions](/ai-chat/backend#actions): the frontend sends a typed action, `onAction` runs the summary, and `chat.history.set()` replaces the conversation.
+
+### Backend
+
+Define a `compact` action that reuses your existing `summarize` function:
+
+```ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { streamText, generateText, generateId, convertToModelMessages } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { z } from "zod";
+
+// Reusable summarize fn — also used by the automatic compaction config.
+async function summarize(messages: ModelMessage[]) {
+  const result = await generateText({
+    model: openai("gpt-4o-mini"),
+    messages: [...messages, { role: "user", content: "Summarize this conversation concisely." }],
+  });
+  return result.text;
+}
+
+export const myChat = chat.agent({
+  id: "my-chat",
+
+  // Automatic compaction still runs on threshold.
+  compaction: {
+    shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000,
+    summarize: async ({ messages }) => summarize(messages),
+  },
+
+  // User-initiated: the frontend sends { type: "compact" }.
+  actionSchema: z.discriminatedUnion("type", [
+    z.object({ type: z.literal("compact") }),
+  ]),
+
+  onAction: async ({ action, uiMessages }) => {
+    if (action.type !== "compact") return;
+
+    const summary = await summarize(convertToModelMessages(uiMessages));
+
+    // Replace the full history with a single summary message.
+    chat.history.set([
+      {
+        id: generateId(),
+        role: "assistant",
+        parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }],
+      },
+    ]);
+  },
+
+  run: async ({ messages, trigger, signal }) => {
+    // Compact action doesn't need an LLM response — just exit.
+    if (trigger === "action") return;
+
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+Actions fire `onAction`, apply any `chat.history.*` mutations, then call `run()`. For compaction there's no new user message to respond to, so `run()` returns early when `trigger === "action"`. `onTurnComplete` still fires with the compacted `uiMessages` — use it to persist the new state.
+
+### Frontend
+
+Call `transport.sendAction()` from a button or slash command:
+
+```tsx
+import { useTriggerChatTransport } from "@trigger.dev/react-hooks";
+import { useChat } from "@ai-sdk/react";
+
+function ChatView({ chatId, accessToken }: { chatId: string; accessToken: string }) {
+  const transport = useTriggerChatTransport({ task: "my-chat", accessToken });
+  const { messages } = useChat({ id: chatId, transport });
+
+  return (
+    <>
+      <button onClick={() => transport.sendAction(chatId, { type: "compact" })}>
+        Summarize conversation
+      </button>
+      {messages.map(/* ... */)}
+    </>
+  );
+}
+```
+
+The call returns as soon as the backend accepts the action. Because `onTurnComplete` replaces the `uiMessages` with the summary, `useChat` receives the new state via the normal turn-complete flow — the UI updates automatically.
+
+### Indicating compaction in the UI
+
+For "Compacting..." feedback while the summary generates, append a transient data part from `onAction` via `chat.stream.append()`:
+
+```ts
+onAction: async ({ action, uiMessages }) => {
+  if (action.type !== "compact") return;
+
+  chat.stream.append({ type: "data-compaction", data: { status: "compacting" } });
+  const summary = await summarize(convertToModelMessages(uiMessages));
+  chat.stream.append({ type: "data-compaction", data: { status: "complete" } });
+
+  chat.history.set([ /* ... */ ]);
+},
+```
+
+See [Raw streaming with chat.stream](/ai-chat/features#raw-streaming-with-chatstream) for the full API.
+
 ## Using with chat.createSession()
 
 Pass the same `compaction` config to `chat.createSession()`. The session handles outer-loop compaction automatically inside `turn.complete()`:

From 102ad32a5456d83ef103bf6aad2266917b49ca85 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 18 Apr 2026 18:43:05 +0100
Subject: [PATCH 35/49] docs(ai-chat): changelog entry for
 0.0.0-chat-prerelease-20260418174118

Covers chat.endRun(), finishReason on turn-complete events,
user-initiated compaction pattern, and the new human-in-the-loop
patterns page.
---
 docs/ai-chat/changelog.mdx | 47 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index bda2f1ae2f4..f1732548d98 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,6 +4,53 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
+<Update label="April 18, 2026" description="0.0.0-chat-prerelease-20260418174118" tags={["SDK"]}>
+
+## `chat.endRun()` — exit on your own terms
+
+New imperative API to exit the loop after the current turn completes, without the upgrade-required signal that `chat.requestUpgrade()` sends. Use for one-shot agents, budget-exhausted exits, or goal-reached completions.
+
+```ts
+chat.agent({
+  id: "one-shot",
+  run: async ({ messages, signal }) => {
+    chat.endRun();
+    return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
+  },
+});
+```
+
+The current turn streams normally, `onBeforeTurnComplete` / `onTurnComplete` fire, the turn-complete chunk is written, and the run exits instead of suspending. Callable from `run()`, `chat.defer()`, `onBeforeTurnComplete`, or `onTurnComplete`. See [Ending a run on your terms](/ai-chat/backend#ending-a-run-on-your-terms).
+
+## `finishReason` on turn-complete events
+
+`TurnCompleteEvent` and `BeforeTurnCompleteEvent` now include the AI SDK's `finishReason` (`"stop" | "tool-calls" | "length" | "content-filter" | "error" | "other"`). Clean signal for distinguishing a normal turn end from one paused on a pending tool call (HITL flows like `ask_user`):
+
+```ts
+onTurnComplete: async ({ finishReason, responseMessage }) => {
+  if (finishReason === "tool-calls") {
+    // Paused — assistant message has a pending tool call waiting for user input
+    await persistCheckpoint(responseMessage);
+  } else {
+    await persistCompleted(responseMessage);
+  }
+};
+```
+
+Undefined for manual `chat.pipe()` flows or aborted streams. See the new [Human-in-the-loop pattern](/ai-chat/patterns/human-in-the-loop).
+
+## User-initiated compaction pattern
+
+The [Compaction guide](/ai-chat/compaction) now covers how to wire a "Summarize conversation" button or `/compact` slash command via `actionSchema` + `onAction`. The agent summarizes on demand, rewrites history with `chat.history.set()`, and short-circuits the LLM call for action turns.
+
+Needed a small type fix for this: `ChatTaskPayload.trigger` now correctly includes `"action"`, so `run()` handlers can short-circuit with `if (trigger === "action") return` when an action doesn't need a response.
+
+## Human-in-the-loop pattern page
+
+New [Human-in-the-loop](/ai-chat/patterns/human-in-the-loop) page walks through `ask_user`-style mid-turn user input end-to-end: defining a no-execute tool, rendering pending tool calls on the frontend with `addToolOutput` + `sendAutomaticallyWhen`, detecting paused turns via `finishReason`, and two persistence strategies (overwrite vs. checkpoint nodes).
+
+</Update>
+
 <Update label="April 18, 2026" description="0.0.0-chat-prerelease-20260418083610" tags={["SDK"]}>
 
 ## Offline test harness for `chat.agent`

From 95857c209fe49fe5f69b448187d83f859d1d3e24 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 18 Apr 2026 22:00:03 +0100
Subject: [PATCH 36/49] docs(ai-chat): add Agent Skills pattern page

New docs/ai-chat/patterns/skills.mdx covering Phase 1 end-to-end:
folder layout, SKILL.md format, skills.define + chat.skills.set,
auto-wired loadSkill/readFile/bash tools, built-in CLI bundling,
path scoping rules, and mixing with custom tools. Links to the
AI SDK cookbook pattern we build on.
---
 docs/ai-chat/patterns/skills.mdx | 213 +++++++++++++++++++++++++++++++
 docs/docs.json                   |   3 +-
 2 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 docs/ai-chat/patterns/skills.mdx

diff --git a/docs/ai-chat/patterns/skills.mdx b/docs/ai-chat/patterns/skills.mdx
new file mode 100644
index 00000000000..97933587fe7
--- /dev/null
+++ b/docs/ai-chat/patterns/skills.mdx
@@ -0,0 +1,213 @@
+---
+title: "Agent Skills"
+sidebarTitle: "Agent Skills"
+description: "Ship reusable capabilities (folders with SKILL.md + scripts) that a chat agent discovers and invokes on demand."
+---
+
+Agent skills are reusable capabilities you ship as folders — a `SKILL.md` describing when and how to use them, plus optional scripts, references, and assets. The chat agent sees a short description of each skill in its system prompt, loads the full instructions on demand via a `loadSkill` tool, and invokes the bundled scripts via `bash` — all without you wiring anything up manually.
+
+Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills). Works with any provider (OpenAI, Anthropic, Gemini, etc.) — not tied to Anthropic's server-side skills.
+
+## Why skills?
+
+Compared to regular AI SDK tools:
+
+- **Tools** are typed functions you pre-declare. Great when you know up-front exactly what capability the agent needs.
+- **Skills** are folders the model discovers and reads on demand. Great when the capability is a bundle of instructions + helper scripts that would be awkward to encode as a single tool.
+
+PDFs are the canonical example: you don't want to ask the LLM to parse PDF bytes inline. You want it to `bash scripts/extract.py report.pdf` using a bundled `pdfplumber` wrapper. A skill ships the script, the instructions, and any reference notes together.
+
+Skills are also [dashboard-editable](/ai-chat/skills/overview) in Phase 2 — a platform team can tighten a skill's description or "when to use" text without a redeploy. Phase 1 (today) is SDK-only.
+
+## Trust model
+
+Skills are **developer-authored code**, not end-user-supplied. The same developer who writes the `chat.agent()` writes the skill bundle. The trust boundary is identical to any `tool.execute` handler the developer writes — scripts run directly in the Trigger.dev worker container, no sandboxing required.
+
+This makes skills different from the Claude Code / end-user model where arbitrary user-provided skills need isolation. Don't accept skill paths from untrusted input.
+
+## Skill folder layout
+
+A skill is a directory under your project (conventionally `trigger/skills/{id}/`):
+
+```
+trigger/skills/time-utils/
+├── SKILL.md              # Required — frontmatter + instructions
+├── scripts/
+│   ├── now.sh
+│   └── add.sh
+├── references/
+│   └── timezones.txt
+└── assets/               # Optional — templates, data files, etc.
+```
+
+### SKILL.md
+
+Frontmatter is YAML-subset — only `name` and `description` are required:
+
+```md
+---
+name: time-utils
+description: Compute and format dates/times in arbitrary timezones. Use when the user asks "what time is it", timezone conversions, or date math.
+---
+
+# Time utilities
+
+## When to use
+
+- The user asks for the current time in a timezone
+- The user wants date math ("3 days from now")
+
+## Scripts
+
+### `scripts/now.sh [TZ]`
+Prints the current time in the given IANA timezone (default `UTC`).
+
+### `scripts/add.sh DAYS [TZ]`
+Prints a date `DAYS` days from now.
+
+## Tips
+- IANA timezone names only (`America/New_York`, not `EST`).
+- See `references/timezones.txt` for a cheat-sheet.
+```
+
+The **description** is what the model sees in its system prompt — write it like you're explaining to the agent when to reach for the skill.
+
+The **body** is loaded on demand via the `loadSkill` tool when the agent decides to use the skill. Write it like documentation for the agent.
+
+## Defining and using a skill
+
+```ts trigger/chat.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { skills } from "@trigger.dev/sdk";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+const timeUtilsSkill = skills.define({
+  id: "time-utils",
+  path: "./skills/time-utils",
+});
+
+export const agent = chat.agent({
+  id: "docs-chat",
+  onChatStart: async () => {
+    chat.skills.set([await timeUtilsSkill.local()]);
+  },
+  run: async ({ messages, signal }) => {
+    return streamText({
+      model: openai("gpt-4o"),
+      messages,
+      abortSignal: signal,
+      ...chat.toStreamTextOptions(),
+    });
+  },
+});
+```
+
+`skills.define({ id, path })` does two things:
+
+1. Registers the skill with the Trigger.dev build system so the CLI **automatically bundles the folder** into your deploy image at `/app/.trigger/skills/{id}/`. No `trigger.config.ts` changes, no build extension — it just works.
+2. Returns a `SkillHandle` you use at runtime.
+
+`skill.local()` reads the bundled `SKILL.md` from disk and returns a `ResolvedSkill` with the parsed frontmatter + body + on-disk path.
+
+`chat.skills.set([...])` stores the resolved skills for the current run. `chat.toStreamTextOptions()` spreads them into `streamText` automatically:
+
+- The frontmatter `description` lands in the system prompt under "Available skills:".
+- Three tools are added: `loadSkill`, `readFile`, `bash` — scoped per skill.
+
+## What gets auto-injected
+
+When you spread `chat.toStreamTextOptions()` with skills set, the AI SDK call receives three tools:
+
+### `loadSkill({ name })`
+
+Returns the full `SKILL.md` body for the named skill. The model calls this first when it decides a skill is relevant, to load the full instructions.
+
+### `readFile({ skill, path })`
+
+Reads a file inside the skill's bundled folder. Paths are relative to the skill's root and are rejected if they attempt to escape via `..` or absolute paths. Output is capped at 1 MB per call.
+
+Use for reference files and templates that the model should read literally:
+
+```
+readFile({ skill: "time-utils", path: "references/timezones.txt" })
+```
+
+### `bash({ skill, command })`
+
+Runs a bash command with `cwd` set to the skill's root. Stdout and stderr are captured and returned (each capped at 64 KB per call, with tail truncation). The turn's abort signal propagates — cancelling the run kills the child process.
+
+Use to invoke the skill's bundled scripts:
+
+```
+bash({ skill: "time-utils", command: "bash scripts/now.sh America/Los_Angeles" })
+```
+
+Script runtime expectations are yours to manage. If your skill uses `extract.py`, your deploy image needs Python — add it via your build config the same way you would for any other task dependency.
+
+## How discovery works in the model
+
+The model sees a short preamble appended to your system prompt:
+
+```
+Available skills (call `loadSkill` to read the full instructions before using one):
+- time-utils: Compute and format dates/times in arbitrary timezones...
+- pdf-processing: Extract text from PDFs, fill forms...
+```
+
+When the user asks something that matches a description, the model calls `loadSkill({ name: "time-utils" })` to load the body, then follows the body's instructions — typically by calling `bash` or `readFile` on the bundled scripts.
+
+This is **progressive disclosure**: each skill costs ~100 tokens up front (its one-line description), and only the ones the model actually uses pay the full context cost.
+
+## Mixing skills with custom tools
+
+If you also define your own AI SDK tools, pass them through `chat.toStreamTextOptions()` so the merge is explicit:
+
+```ts
+return streamText({
+  model: openai("gpt-4o"),
+  messages,
+  abortSignal: signal,
+  ...chat.toStreamTextOptions({
+    tools: {
+      webFetch,       // your tool
+      deepResearch,   // your tool
+    },
+  }),
+});
+```
+
+Your tools win on name conflicts. (Pick names that don't collide with `loadSkill` / `readFile` / `bash` to keep things predictable.)
+
+## Bundling
+
+Bundling is **built-in to the CLI** — there's no extension to import. When you run `trigger deploy` or `trigger dev`:
+
+1. esbuild bundles your task code as usual.
+2. The CLI forks the indexer locally against the bundled output, collects every `skills.define({ path })` registration.
+3. Each skill's folder is copied to `{outputPath}/.trigger/skills/{id}/` via a recursive copy.
+4. The existing Dockerfile `COPY` picks up `.trigger/skills/` along with the rest of the bundle — no Dockerfile changes.
+
+If you're running `trigger dev`, the same layout appears in the local dev output directory, so `skill.local()` works the same way.
+
+## Path scoping rules
+
+- `skill.path` always resolves to `${process.cwd()}/.trigger/skills/{id}/` at runtime. Don't hardcode paths elsewhere.
+- `readFile` rejects `..` segments and absolute paths — the tool only exposes files inside the skill's own directory.
+- `bash` runs with `cwd` set to the skill's root. Inside the script, relative paths resolve against the skill directory.
+- Cross-skill access isn't provided — each skill is isolated by design. If two skills need to share data, either duplicate the shared file or consolidate the skills.
+
+## Limitations in Phase 1
+
+- `skill.resolve()` (backend-managed overrides) is not available yet. It throws a "not available in Phase 1, use `.local()`" error. Phase 2 ships dashboard-editable `SKILL.md` text.
+- No per-skill metrics in the dashboard yet.
+- No Anthropic `/v1/skills` integration — use the portable path today; the Anthropic optimization comes in Phase 4.
+
+## Full example
+
+See `references/ai-chat/src/trigger/skills/time-utils/` in the Trigger.dev monorepo for a working skill that bundles two bash scripts and a reference cheat-sheet, wired into a `chat.agent` that answers timezone questions.
+
+## Related
+
+- [AI SDK cookbook — Agent Skills](https://ai-sdk.dev/cookbook/guides/agent-skills) — the userland pattern we build on
+- [Anthropic Agent Skills](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview) — Anthropic's codified version (server-side, optional future integration)
diff --git a/docs/docs.json b/docs/docs.json
index 2a8645f4932..21d3edc5987 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -109,7 +109,8 @@
                       "ai-chat/patterns/database-persistence",
                       "ai-chat/patterns/branching-conversations",
                       "ai-chat/patterns/code-sandbox",
-                      "ai-chat/patterns/human-in-the-loop"
+                      "ai-chat/patterns/human-in-the-loop",
+                      "ai-chat/patterns/skills"
                     ]
                   },
                   "ai-chat/client-protocol",

From 9e69c2c3017db0bee475ddc4e74149c50a2112fe Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sun, 19 Apr 2026 18:39:11 +0100
Subject: [PATCH 37/49] docs(ai-chat): changelog entry for
 0.0.0-chat-prerelease-20260419173457
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Covers Agent Skills Phase 1 — skills.define, chat.skills.set,
auto-wired loadSkill/readFile/bash tools, and CLI bundling into
/app/.trigger/skills/ at deploy time.
---
 docs/ai-chat/changelog.mdx | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index f1732548d98..6fa7d7177b5 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,6 +4,18 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
+<Update label="April 19, 2026" description="0.0.0-chat-prerelease-20260419173457" tags={["SDK", "CLI"]}>
+
+## Agent Skills (Phase 1)
+
+Ship reusable capabilities as folders — a `SKILL.md` plus optional scripts, references, and assets. The agent sees short descriptions in its system prompt, loads full instructions on demand via `loadSkill`, and invokes bundled scripts via `bash` — no manual wiring.
+
+`skills.define({ id, path })` registers the skill; the CLI bundles the folder into the deploy image. `chat.skills.set([...])` activates skills for the run; `chat.toStreamTextOptions()` auto-injects the preamble and tools.
+
+See the new [Agent Skills guide](/ai-chat/patterns/skills).
+
+</Update>
+
 <Update label="April 18, 2026" description="0.0.0-chat-prerelease-20260418174118" tags={["SDK"]}>
 
 ## `chat.endRun()` — exit on your own terms

From ed8f51f895332c3711fa698ce6d1d49183bc5d4f Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 24 Apr 2026 13:56:36 +0100
Subject: [PATCH 38/49] docs: Sessions primitive + chat.agent on Sessions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Net-new Sessions section (4 pages under docs/sessions/) plus updates
to 10 ai-chat pages and cross-reference callouts on two realtime
backend pages — covers the chat.agent migration to the Session
primitive and the X-Session-Settled fast-close work.

Phase 1 — Sessions primitive (net-new)

docs/sessions/overview.mdx — what a Session is, identity
(sessionId + externalId, idempotent create on externalId), the
.in / .out asymmetry, durability across runs, when to use Sessions
vs run-scoped streams, relationship to chat.agent.

docs/sessions/quick-start.mdx — four-step walkthrough (create →
open from a task → subscribe from a client → close), plus a brief
.in example.

docs/sessions/channels.mdx — deep dive. .out producer methods
(append / pipe / writer) and consumer method (read).
.in consumer methods (on / once / peek / wait /
waitWithIdleTimeout) and producer method (send). Suspend-while-idle
via session-stream waitpoints. Uniform serialization. Buffering
and attachment semantics.

docs/sessions/reference.mdx — full API reference tables. `sessions.*`
methods with signatures. CreateSessionRequestBody / SessionItem /
UpdateSessionRequestBody / CloseSessionRequestBody / ListSessionsOptions
shapes. SessionHandle / SessionOutputChannel / SessionInputChannel
method tables. Option types (WriterStreamOptions,
SessionSubscribeOptions, InputStreamOnceOptions / InputStreamWaitOptions
/ InputStreamWaitWithIdleTimeoutOptions). Token scopes. Wire
endpoints including X-Session-Settled.

docs/docs.json — added Sessions subgroup under the AI dropdown,
placed right after Agents.

Phase 2 — chat.agent updates

docs/ai-chat/client-protocol.mdx — full rewrite. Old run-scoped
endpoints (`/realtime/v1/streams/{runId}/chat`,
`/realtime/v1/streams/{runId}/input/chat-messages`) replaced with
session endpoints (`POST /api/v1/sessions`,
`POST /realtime/v1/sessions/{sessionId}/in/append`,
`GET /realtime/v1/sessions/{sessionId}/out`). Documents ChatInputChunk
tagged union, Last-Event-ID resume, X-Session-Settled header. Keeps
sequence diagrams, upgrade-required flow, tool approvals, actions,
pending / steering messages, and continuations — all re-framed
around sessions.

docs/ai-chat/frontend.mdx — new "What the transport persists per
chat" table under Session management: sessionId durable,
publicAccessToken refreshed, lastEventId for resume, runId optional,
isStreaming optional (server decides via X-Session-Settled now).
Short paragraph under Restoring on page load explaining cross-run
resume.

docs/ai-chat/server-chat.mdx — stateless handler example rewritten
around sessionId as the durable key (runId + lastEventId are
live-run hints). Info callout on cross-run durability. New inbox
example via sessions.list({type: "chat.agent"}). Updated session
option row in the AgentChat options table to reflect the new
ChatSession shape.

docs/ai-chat/backend.mdx — Info callout at the top of chat.agent()
section: every conversation is a Session, externalId = chatId,
type = "chat.agent". Rarely need to touch directly, but
`payload.sessionId` + `sessions.open()` is available.

docs/ai-chat/reference.mdx — ChatTaskRunPayload gained sessionId
row. TriggerChatTransport options updated for ChatSession shape.
RenewRunAccessTokenParams documented with sessionId; renew callback
example mints both run + session scopes. New ChatSession type
section. New ChatInputChunk type section. Session scopes table
with links to /sessions/reference.

docs/ai-chat/overview.mdx — recast "How multi-turn works": one
conversation, many runs (sessionId is durable). New "Resume and
inbox" section covering cross-run resume on page load and
sessions.list for inbox views. Link to /sessions/overview in
Related.

docs/ai-chat/quick-start.mdx — Sessions link added to Next steps.

docs/ai-chat/changelog.mdx — new dated entry covering the session
migration: externalId = chatId, public surface unchanged,
cross-run resume is free, inbox via sessions.list, X-Session-Settled
fast-close improvement, migration notes for custom-transport
authors.

docs/ai-chat/testing.mdx — note that mockChatAgent drives the
agent's backing Session channels under the hood (API unchanged).
"close" wording updated from "input stream" to ".in channel".

docs/ai-chat/patterns/version-upgrades.mdx — clarified that
chat.requestUpgrade() re-triggers on the same session; only runId
+ publicAccessToken refresh. Subsequent-messages terminology
updated from "input stream" to "session's .in channel" for
consistency.

Phase 3 — Cross-references

docs/realtime/backend/streams.mdx — Tip callout pointing to
Sessions for durable long-lived channels. Run-scoped streams
explicitly not deprecated.

docs/realtime/backend/input-streams.mdx — same callout, framed
around runId vs sessionId addressing.

All 16 affected pages verified serving 200 on the local Mintlify
dev server. Plan lives at .claude/docs-plans/sessions-chat-agent.md
for anyone picking up the work later.
---
 .claude/docs-plans/sessions-chat-agent.md  |  98 ++++++
 docs/ai-chat/backend.mdx                   |   4 +
 docs/ai-chat/changelog.mdx                 |  33 ++
 docs/ai-chat/client-protocol.mdx           | 377 +++++++++++----------
 docs/ai-chat/frontend.mdx                  |  14 +
 docs/ai-chat/overview.mdx                  |  27 +-
 docs/ai-chat/patterns/version-upgrades.mdx |   6 +-
 docs/ai-chat/quick-start.mdx               |   1 +
 docs/ai-chat/reference.mdx                 |  79 ++++-
 docs/ai-chat/server-chat.mdx               |  48 ++-
 docs/ai-chat/testing.mdx                   |   4 +-
 docs/docs.json                             |  93 ++++-
 docs/realtime/backend/input-streams.mdx    |   4 +
 docs/realtime/backend/streams.mdx          |   4 +
 docs/sessions/channels.mdx                 | 214 ++++++++++++
 docs/sessions/overview.mdx                 | 169 +++++++++
 docs/sessions/quick-start.mdx              | 128 +++++++
 docs/sessions/reference.mdx                | 222 ++++++++++++
 18 files changed, 1319 insertions(+), 206 deletions(-)
 create mode 100644 .claude/docs-plans/sessions-chat-agent.md
 create mode 100644 docs/sessions/channels.mdx
 create mode 100644 docs/sessions/overview.mdx
 create mode 100644 docs/sessions/quick-start.mdx
 create mode 100644 docs/sessions/reference.mdx

diff --git a/.claude/docs-plans/sessions-chat-agent.md b/.claude/docs-plans/sessions-chat-agent.md
new file mode 100644
index 00000000000..7d3d22a0b99
--- /dev/null
+++ b/.claude/docs-plans/sessions-chat-agent.md
@@ -0,0 +1,98 @@
+# Docs plan — Sessions primitive + chat.agent migration
+
+Plan for updating Mintlify docs to cover:
+
+1. **Sessions** — net-new public primitive (`sessions.create/open/list/close`, `SessionHandle`, `.in`/`.out`) that doesn't exist in docs yet.
+2. **chat.agent on Sessions** — 14 ai-chat pages reference the old run-scoped wire protocol. Public `chat.agent()` surface is unchanged, but the underlying transport, persistence shape, and wire endpoints all moved.
+3. **Session-settled signal** — recent improvement (`X-Session-Settled` response header, `wait=0` drain on settled reconnects). Needs mention on frontend/server-chat pages.
+
+Architecture reference (what the system actually does, for doc writers):
+`.claude/architecture/chat-agent-sessions.md`.
+
+## Relationship to other doc plans
+
+Coordinate with the hydration/history/actions plan saved in
+`project_docs_update_plan.md` (memory). Sessions should land **first** —
+it's the foundational primitive the other features reference.
+
+---
+
+## Phase 1 — Sessions primitive docs (net-new)
+
+New top-level section `docs/sessions/`, added as a dropdown group in
+`docs.json`. Should ship as its own PR and merge before Phase 2 so
+chat.agent docs can link into it.
+
+| File | Covers |
+|---|---|
+| `sessions/overview.mdx` | What a Session is. Identity (`sessionId` + `externalId`, `session_*` friendly format, externalId idempotency on create). `.in` / `.out` channels as a durable typed I/O pair. Durability across runs. When to use Sessions vs. run-scoped streams. That `chat.agent` is built on Sessions. |
+| `sessions/quick-start.mdx` | Minimal end-to-end: `sessions.create` → `sessions.open` → `.out.append` + `.in.on` → `sessions.close`. Model on `ai-chat/quick-start.mdx` shape. |
+| `sessions/channels.mdx` | Deep dive. `.out` producer API (`append`, `pipe`, `writer({execute})` matching `streams.define`) and external consumer API (`read`). `.in` consumer API (`on`, `once`, `peek`, `wait`, `waitWithIdleTimeout` matching `streams.input`) and external producer API (`send`). Suspend-while-idle via session-stream waitpoints. Uniform serialization on `.out` (subscribers always get parsed objects). |
+| `sessions/reference.mdx` | API reference. `sessions.create / retrieve / update / close / list / open`. `SessionHandle`, `SessionInputChannel`, `SessionOutputChannel`. Token scopes: `read:sessions`, `write:sessions`, `admin:sessions`, super-scopes. |
+| `sessions/patterns.mdx` *(optional — can defer)* | Cross-run resume. Inbox via `sessions.list({type, tags})`. Multi-agent shared channels (two agents coordinating on one session). Custom transports keyed on `externalId`. |
+
+Navigation: add `Sessions` dropdown to `docs.json`, placed adjacent to
+`AI Chat` so readers see the relationship.
+
+---
+
+## Phase 2 — Update chat.agent docs
+
+Ships after Phase 1 merges. One PR.
+
+| File | Change |
+|---|---|
+| `ai-chat/client-protocol.mdx` | **Full rewrite.** Old run-scoped endpoints (`POST /api/v1/tasks/:id/trigger`, `GET /realtime/v1/streams/:runId/chat`, `POST /realtime/v1/streams/:runId/input/chat-messages`) are gone. New surface: `POST /api/v1/sessions` (create, idempotent on externalId), `POST /realtime/v1/sessions/:session/:io/append` (input chunks — note `io="in"` for chat), `GET /realtime/v1/sessions/:session/:io` (SSE subscribe, `io="out"`). Document `ChatInputChunk` tagged union (`{kind: "message", payload}` / `{kind: "stop", message?}`). Document `Last-Event-ID` resume. Document `X-Session-Settled: true` response header and when it fires (server peeks `.out` tail; if last record is `trigger:turn-complete`, SSE uses `wait=0` and closes fast with this header). |
+| `ai-chat/frontend.mdx` | Update `TriggerChatTransport`. Persistence shape grew: `{sessionId, publicAccessToken, lastEventId, runId?, isStreaming?}` — `sessionId` is the durable identity now, `runId` is a live-run hint. `isStreaming` is **optional** after the settled-signal work; callers that drop it get server-decided settled behavior with no 60s hang. `onSessionChange` now carries `sessionId`. Note: cross-run resume is free — same chat persists across page reloads, across day boundaries, across process exits. |
+| `ai-chat/server-chat.mdx` | Same persistence shape update for `AgentChat`. `ChatSession` type gained `sessionId`. Same cross-run resume story. |
+| `ai-chat/backend.mdx` | `ChatTaskWirePayload` / `ChatTaskPayload` / `ChatTaskRunPayload` grew optional `sessionId`. Agent code rarely needs to touch it — `chat.stream`, `chat.messages`, `chat.stopSignal` still work identically. Show `sessions.open(payload.sessionId)` as an escape hatch for advanced cases (e.g., writing to the session from a sub-agent or from outside the turn loop). |
+| `ai-chat/reference.mdx` | Add `ChatInputChunk<TMessage, TMetadata>` type. Update `ChatSession` shape. Document `TriggerChatTaskResult.sessionId`. Session scopes list. Link to `sessions/reference`. |
+| `ai-chat/overview.mdx` | Conceptual: chats now outlive individual runs. Inbox pattern via `sessions.list({type: "chat.agent"})`. Link to `/sessions/overview`. |
+| `ai-chat/quick-start.mdx` | Minimal edit. One sentence: sessions power the chat primitive; link out to `/sessions/overview` for the underlying model. |
+| `ai-chat/changelog.mdx` | New entry covering (a) the session migration, (b) the settled-signal improvement (optional `isStreaming`). |
+| `ai-chat/testing.mdx` | `mockChatAgent` now drives `.in` via `drivers.sessions.in.send(sessionId, {kind, payload})` instead of the old input-stream manager. `TestSessionStreamManager` + `TestSessionOutputChannel` replace the stream-based harness. Update any code examples. |
+| `ai-chat/patterns/version-upgrades.mdx` | `trigger:upgrade-required` flow now reuses `sessionId` across runs — a single line clarifying that only `runId` + PAT refresh, `sessionId` stays. |
+| `ai-chat/patterns/human-in-the-loop.mdx` | Audit for stale stream-ID references; likely only a small update if any. |
+
+---
+
+## Phase 3 — Cross-references in realtime docs
+
+Tacked onto the Phase 2 PR. Trivial edits.
+
+| File | Change |
+|---|---|
+| `realtime/backend/streams.mdx` | Callout: "For durable, long-lived channels that outlive a single run (e.g. chat agents), see [Sessions](/sessions/overview)." Run-scoped streams are not deprecated — they're still correct for ephemeral run I/O. |
+| `realtime/backend/input-streams.mdx` | Same callout. |
+
+---
+
+## Out of scope
+
+- **Deprecation of run-scoped streams.** They remain the right primitive for ephemeral per-run I/O. Sessions is additive, not a replacement.
+- **Rewriting pattern pages that happen to work unchanged.** `code-sandbox`, `skills`, `sub-agents`, `branching-conversations`, `database-persistence`, `compaction`, `pending-messages`, `background-injection`, `error-handling`, `mcp` — only touch if there's a concrete stale reference. Audit quickly; don't rewrite prophylactically.
+- **Wire-protocol examples for non-chat session uses.** If `sessions/patterns.mdx` gets written, covers this lightly. Otherwise defer — Sessions is general-purpose but chat is the primary motivating use case for v1 docs.
+- **Migration guide for external callers of the old wire protocol.** The `chat-constants.ts` commit already documented the mapping in its commit message (`streams.writer(CHAT_STREAM_KEY)` → `sessions.open(sessionId).out.writer(...)`, etc.). If we hear from users building custom non-`TriggerChatTransport` clients, we can write a dedicated migration page then.
+
+---
+
+## Sizing
+
+Rough effort estimates, in full dedicated doc passes:
+
+- Phase 1 — ~1 pass. 4 net-new pages, `sessions/overview` and `sessions/channels` are the meaty ones; `sessions/patterns` is optional and can be Phase 1.5.
+- Phase 2 — ~1 pass. `client-protocol.mdx` is the single biggest rewrite (~half a pass); the other 10 edits are paragraph-level.
+- Phase 3 — rounds to zero; fold into Phase 2 PR.
+
+Total: ~2 dedicated doc passes, ideally across two PRs.
+
+---
+
+## Sequencing decision
+
+Phase 1 **before** Phase 2. Two reasons:
+
+1. Phase 2 pages will link into `/sessions/*`; merging Phase 2 first creates broken links in published docs.
+2. Readers encountering `sessionId` in updated chat docs need somewhere to go to learn what a Session is. That page has to exist first.
+
+Phase 1.5 (the optional `sessions/patterns.mdx` page) can ship either with Phase 1 or as a follow-up — it's not on the critical path for Phase 2.
diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index df0c11f8d17..23f1598f138 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -12,6 +12,10 @@ The highest-level approach. Handles message accumulation, stop signals, turn lif
   To fix a **custom** `UIMessage` subtype or typed client data schema, use the [ChatBuilder](/ai-chat/types#chatbuilder) via `chat.withUIMessage<...>()` and/or `chat.withClientData({ schema })`. Builder-level hooks can also be chained before `.agent()`. See [Types](/ai-chat/types).
 </Tip>
 
+<Info>
+  Every `chat.agent` conversation is backed by a [Session](/sessions/overview) — `externalId` is your `chatId`, `type` is `"chat.agent"`. The session outlives any single run, which is why chats can resume across days or deploys without losing identity. You rarely need to touch the session directly (`chat.stream`, `chat.messages`, `chat.stopSignal` wrap everything), but `payload.sessionId` is available if you want to reach in — e.g. `sessions.open(payload.sessionId)` to write from a sub-agent or from outside the turn loop.
+</Info>
+
 ### Simple: return a StreamTextResult
 
 Return the `streamText` result from `run` and it's automatically piped to the frontend:
diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index 6fa7d7177b5..39f621eee3a 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,6 +4,39 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
+<Update label="April 24, 2026" description="chat.agent on Sessions" tags={["SDK", "Platform"]}>
+
+## `chat.agent` now runs on Sessions
+
+Every chat is backed by a [Session](/sessions/overview) — a new public, durable, bidirectional I/O primitive that outlives any single run. `externalId` = your chat ID, `type` = `"chat.agent"`. Under the hood:
+
+- Output chunks stream on `session.out` (was a run-scoped `streams.writer("chat")`).
+- Client messages and stops land on `session.in` as a [`ChatInputChunk`](/ai-chat/reference#chatinputchunk) tagged union (was two run-scoped `streams.input` definitions).
+- Wire endpoints moved from `/realtime/v1/streams/{runId}/...` to `/realtime/v1/sessions/{sessionId}/...`. See the rewritten [Client Protocol](/ai-chat/client-protocol).
+
+Public surface (`chat.agent()`, `TriggerChatTransport`, `AgentChat`, `chat.stream` / `chat.messages` / `chat.stopSignal`) is unchanged — existing apps keep working. What's new is:
+
+- **Cross-run resume is free.** A chat you were in yesterday resumes against the same `sessionId` today, even if the original run long since exited. No more lost conversations when a run idle-times-out.
+- **Inbox views via `sessions.list({type: "chat.agent"})`.** Enumerate every chat in your environment, filter by tag or status.
+- **`TriggerChatTaskResult.sessionId`** + **`ChatTaskRunPayload.sessionId`** — you can reach into the raw session via `sessions.open(payload.sessionId)` for advanced cases (writing from a sub-agent, custom transport).
+- **Dashboard Agent tab** resolves via `sessionId` and stays in sync with the live stream across runs.
+
+See the new [Sessions docs](/sessions/overview) for the underlying primitive.
+
+## `X-Session-Settled` — fast reconnect on idle chats
+
+When a client reconnects to `session.out` and the tail record is a `trigger:turn-complete` marker (agent finished a turn, idle-waiting or exited), the server sets `X-Session-Settled: true` and uses `wait=0` on the underlying S2 read. The SSE drains any remaining records then closes in ~1s instead of long-polling for 60s.
+
+Practical impact: `TriggerChatTransport.reconnectToStream` no longer needs a client-side `isStreaming` flag. You can drop the field from your persisted `ChatSession` state entirely — the server decides. Existing callers that still persist `isStreaming` are unaffected; `reconnectToStream` keeps the fast-path short-circuit when it's `false`.
+
+## Migration
+
+Nothing to do for users of `chat.agent()` + `TriggerChatTransport` + `AgentChat`. Your existing code keeps working; you pick up the new durability guarantees for free.
+
+If you built a custom transport against the old `/realtime/v1/streams/{runId}/...` endpoints, see the rewritten [Client Protocol](/ai-chat/client-protocol) for the new session-based wire format. The old constants (`CHAT_STREAM_KEY`, `CHAT_MESSAGES_STREAM_ID`, `CHAT_STOP_STREAM_ID`) are removed from `@trigger.dev/core/v3/chat-client` — migrate to `sessions.open(sessionId).out` / `.in`.
+
+</Update>
+
 <Update label="April 19, 2026" description="0.0.0-chat-prerelease-20260419173457" tags={["SDK", "CLI"]}>
 
 ## Agent Skills (Phase 1)
diff --git a/docs/ai-chat/client-protocol.mdx b/docs/ai-chat/client-protocol.mdx
index d20cd79ad76..f4381eff86d 100644
--- a/docs/ai-chat/client-protocol.mdx
+++ b/docs/ai-chat/client-protocol.mdx
@@ -1,7 +1,7 @@
 ---
 title: "Client Protocol"
 sidebarTitle: "Client Protocol"
-description: "The wire protocol for building custom chat transports — how clients communicate with chat agents over input streams and SSE."
+description: "The wire protocol for building custom chat transports — how clients communicate with chat agents over Sessions and SSE."
 ---
 
 This page documents the protocol that chat clients use to communicate with `chat.agent()` tasks. Use this if you're building a custom transport (e.g., for a Slack bot, CLI tool, or native app) instead of using the built-in `TriggerChatTransport` or `AgentChat`.
@@ -12,11 +12,14 @@ This page documents the protocol that chat clients use to communicate with `chat
 
 ## Overview
 
-The protocol has three parts:
+`chat.agent` is built on [Sessions](/sessions/overview) — a durable, bidirectional I/O primitive that outlives a single run. A conversation is one session; a session can host many runs over its lifetime.
 
-1. **Trigger** — start a new run by calling the task trigger API
-2. **Input streams** — send messages and signals to a running agent
-3. **Output stream** — subscribe to the agent's response via SSE
+The protocol has four parts:
+
+1. **Create the session** — idempotent on your chat ID
+2. **Trigger a run** — start an agent run bound to the session
+3. **Subscribe to `.out`** — receive `UIMessageChunk` events via SSE
+4. **Append to `.in`** — send messages, stops, or actions
 
 ```mermaid
 sequenceDiagram
@@ -24,19 +27,59 @@ sequenceDiagram
   participant API as Trigger.dev API
   participant Agent as Chat Agent Run
 
-  Client->>API: POST /api/v1/tasks/{taskId}/trigger (first message)
+  Client->>API: POST /api/v1/sessions { externalId: chatId }
+  API-->>Client: { id: sessionId }
+  Client->>API: POST /api/v1/tasks/{taskId}/trigger (payload includes sessionId)
   API-->>Client: { id: runId, publicAccessToken }
-  Client->>API: GET /realtime/v1/streams/{runId}/chat (SSE subscribe)
+  Client->>API: GET /realtime/v1/sessions/{sessionId}/out (SSE subscribe)
   Agent-->>Client: UIMessageChunk stream...
   Agent-->>Client: { type: "trigger:turn-complete" }
-  Client->>API: POST /realtime/v1/streams/{runId}/input/chat-messages (next message)
+  Client->>API: POST /realtime/v1/sessions/{sessionId}/in/append { kind: "message", payload }
   Agent-->>Client: UIMessageChunk stream...
   Agent-->>Client: { type: "trigger:turn-complete" }
 ```
 
-## Step 1: Trigger the first run
+## Step 1: Create the session
 
-Start a conversation by triggering the agent task. The payload follows the `ChatTaskWirePayload` shape:
+Before triggering a run, create a Session. Use your stable chat ID as `externalId` — this makes creation idempotent, so two concurrent clients for the same chat converge on the same session.
+
+```bash
+POST /api/v1/sessions
+Authorization: Bearer <secret-key-or-jwt>
+Content-Type: application/json
+
+{
+  "type": "chat.agent",
+  "externalId": "conversation-123",
+  "tags": ["user:user-456"]
+}
+```
+
+Response:
+
+```json
+{
+  "id": "session_cm4z2plfh000abcd1efgh",
+  "externalId": "conversation-123",
+  "type": "chat.agent",
+  "tags": ["user:user-456"],
+  "metadata": null,
+  "closedAt": null,
+  "closedReason": null,
+  "expiresAt": null,
+  "createdAt": "2026-04-24T09:00:00.000Z",
+  "updatedAt": "2026-04-24T09:00:00.000Z",
+  "isCached": false
+}
+```
+
+`id` is the `session_*` friendly ID — persist it alongside your chat state. `isCached: true` means the server returned an existing session for this `externalId` (safe to ignore).
+
+See [`POST /api/v1/sessions`](/sessions/reference#create) for the full request / response schema.
+
+## Step 2: Trigger a run
+
+Start an agent run bound to the session. The payload follows the `ChatTaskWirePayload` shape plus a `sessionId` field:
 
 ```bash
 POST /api/v1/tasks/{taskId}/trigger
@@ -53,6 +96,7 @@ Content-Type: application/json
       }
     ],
     "chatId": "conversation-123",
+    "sessionId": "session_cm4z2plfh000abcd1efgh",
     "trigger": "submit-message",
     "metadata": { "userId": "user-456" }
   },
@@ -62,7 +106,7 @@ Content-Type: application/json
 }
 ```
 
-The response body contains the `runId`:
+Response:
 
 ```json
 {
@@ -70,14 +114,16 @@ The response body contains the `runId`:
 }
 ```
 
-The **response headers** contain the public access token (a JWT scoped to this run):
+The response headers contain `x-trigger-jwt` — a JWT with the scopes the transport needs to operate against the session:
 
-The `x-trigger-jwt` header contains a JWT with `read:runs:{runId}` and `write:inputStreams:{runId}` scopes. Use this for all stream operations.
+- `read:runs:{runId}` — read the run
+- `read:sessions:{sessionId}` — subscribe to `.out`
+- `write:sessions:{sessionId}` — append to `.in`, close the session
 
-Store the `runId` and the `x-trigger-jwt` value — you need both for input streams and SSE.
+Persist `runId` + `publicAccessToken` + `sessionId` + `lastEventId` as your client-side chat state.
 
 <Note>
-  The built-in SDK clients (`TriggerChatTransport`, `AgentChat`) extract the JWT from the response header automatically. If you're using the `ApiClient` from `@trigger.dev/core/v3`, `triggerTask()` returns `{ id, publicAccessToken }` with the header already extracted.
+  The built-in SDK clients (`TriggerChatTransport`, `AgentChat`) mint this token with the right scopes automatically. If you're using the `ApiClient` from `@trigger.dev/core/v3`, `triggerTask()` returns `{ id, publicAccessToken }` with the header already extracted.
 </Note>
 
 ### Preloading (optional)
@@ -89,58 +135,57 @@ To preload an agent before the first message, trigger with `"trigger": "preload"
   "payload": {
     "messages": [],
     "chatId": "conversation-123",
+    "sessionId": "session_cm4z2plfh000abcd1efgh",
     "trigger": "preload",
     "metadata": { "userId": "user-456" }
   }
 }
 ```
 
-The agent starts, runs `onPreload`, and waits for the first real message via the input stream.
+The agent starts, runs `onPreload`, opens the session handle, and waits for the first real message on `.in`.
 
-## Step 2: Subscribe to the output stream
+## Step 3: Subscribe to `.out`
 
-Subscribe to the agent's response via SSE:
+Subscribe to the agent's response via SSE on the session's `.out` channel:
 
 ```
-GET /realtime/v1/streams/{runId}/chat
+GET /realtime/v1/sessions/{sessionId}/out
 Authorization: Bearer <publicAccessToken>
 Accept: text/event-stream
 ```
 
+The URL uses `sessionId` — not `runId`. A session's `.out` stays the same across runs, so the client doesn't need to re-subscribe when a new run starts on the same chat.
+
 ### Stream format (S2)
 
-The output stream uses [S2](https://s2.dev) (a durable streaming service) under the hood. SSE events arrive as **batches** — each event has `event: batch` and a `data` field containing an array of records:
+The output stream uses [S2](https://s2.dev) under the hood. SSE events arrive as batches — each event has `event: batch` and a `data` field containing an array of records:
 
-```json
+```
 event: batch
 data: {
   "records": [
     {
-      "body": "{\"data\": {\"type\": \"text-delta\", \"delta\": \"Hello\"}, \"id\": \"abc123\"}",
+      "body": "{\"data\":{\"type\":\"text-delta\",\"delta\":\"Hello\"},\"id\":\"abc\"}",
       "seq_num": 1,
       "timestamp": 1712150400000
-    },
-    {
-      "body": "{\"data\": {\"type\": \"text-delta\", \"delta\": \" world\"}, \"id\": \"def456\"}",
-      "seq_num": 2,
-      "timestamp": 1712150400001
     }
   ]
 }
 ```
 
-Each record's `body` is a JSON string containing `{ data, id }`. The `data` field is the actual `UIMessageChunk`. The `seq_num` is used for stream resumption.
+Each record's `body` is a JSON string containing `{ data, id }`. `data` is the actual `UIMessageChunk` object (not a stringified payload). `seq_num` is the resume cursor.
 
-**Recommended:** Use `SSEStreamSubscription` from `@trigger.dev/core/v3` to handle parsing automatically — it takes care of batch decoding, deduplication, and resume tracking:
+**Recommended:** use `SSEStreamSubscription` from `@trigger.dev/core/v3` to handle parsing automatically — it takes care of batch decoding, deduplication, and `Last-Event-ID` tracking:
 
 ```ts
 import { SSEStreamSubscription } from "@trigger.dev/core/v3";
 
 const subscription = new SSEStreamSubscription(
-  `${baseUrl}/realtime/v1/streams/${runId}/chat`,
+  `${baseUrl}/realtime/v1/sessions/${sessionId}/out`,
   {
     headers: { Authorization: `Bearer ${publicAccessToken}` },
     timeoutInSeconds: 120,
+    lastEventId,
   }
 );
 
@@ -159,13 +204,11 @@ while (true) {
 }
 ```
 
-If you prefer to parse the S2 protocol yourself, see the [S2 documentation](https://s2.dev/docs) for the full SSE batch protocol reference.
+If you prefer to parse the S2 protocol yourself, see the [S2 documentation](https://s2.dev/docs).
 
 ### Chunk types
 
-Each chunk's `data` field is a `UIMessageChunk` from the [AI SDK](https://ai-sdk.dev/docs/ai-sdk-ui/ui-message-stream). The stream contains standard AI SDK chunk types (`text-delta`, `reasoning-delta`, `tool-input-available`, `tool-output-available`, `error`, etc.) plus two Trigger.dev-specific control chunks.
-
-See the [AI SDK UIMessageStream documentation](https://ai-sdk.dev/docs/ai-sdk-ui/ui-message-stream) for the full list of chunk types and their shapes.
+Each chunk's `data` field is a `UIMessageChunk` from the [AI SDK](https://ai-sdk.dev/docs/ai-sdk-ui/ui-message-stream) plus two Trigger.dev-specific control chunks (`trigger:turn-complete`, `trigger:upgrade-required`) covered below.
 
 ### `trigger:turn-complete`
 
@@ -181,54 +224,87 @@ Signals that the agent's turn is finished — stop reading and wait for user inp
 | Field | Type | Description |
 | --- | --- | --- |
 | `type` | `"trigger:turn-complete"` | Always this string |
-| `publicAccessToken` | `string` (optional) | A refreshed JWT for this run. If present, replace your stored token with this one — the previous token may be close to expiry. |
+| `publicAccessToken` | `string` (optional) | A refreshed JWT with the same session + run scopes. If present, replace your stored token. |
 
 When you receive this chunk:
-1. Update `publicAccessToken` if one is included
-2. Close the stream reader
-3. Wait for the next user message before subscribing again
+1. Update `publicAccessToken` if one is included.
+2. Close the stream reader (unless you want to keep it open across turns — see [Resuming a stream](#resuming-a-stream)).
+3. Wait for the next user message before sending on `.in`.
 
 ### `trigger:upgrade-required`
 
-Signals that the agent cannot handle this message on its current version and the client should retry on a new run. This is emitted when the agent calls [`chat.requestUpgrade()`](/ai-chat/patterns/version-upgrades) before processing the turn.
+Signals that the agent cannot handle this message on its current version and the client should re-trigger on a new run. Emitted when the agent calls [`chat.requestUpgrade()`](/ai-chat/patterns/version-upgrades) before processing the turn.
 
 ```json
-{
-  "type": "trigger:upgrade-required"
-}
+{ "type": "trigger:upgrade-required" }
 ```
 
 When you receive this chunk:
-1. Close the stream reader
-2. Clear the current session
-3. Immediately trigger a **new run** with the full message history and `continuation: true` (same as [Step 4: Handle continuations](#step-4-handle-continuations))
-4. Subscribe to the new run's stream and pipe it through to the consumer
+1. Close the stream reader.
+2. Immediately trigger a **new run** on the **same session** — keep `sessionId`, refresh `runId` + `publicAccessToken`. Include `continuation: true` in the payload.
+3. Resubscribe to `/realtime/v1/sessions/{sessionId}/out`.
 
-The user's message is **not lost** — it gets replayed on the new version. The built-in clients (`TriggerChatTransport`, `AgentChat`) handle this transparently. The consumer sees a seamless response from the upgraded agent.
+The user's message is not lost — it gets replayed on the new version. The built-in clients handle this transparently.
 
 ### Resuming a stream
 
 If the SSE connection drops, reconnect with the `Last-Event-ID` header set to the last `seq_num` you received:
 
 ```
-GET /realtime/v1/streams/{runId}/chat
+GET /realtime/v1/sessions/{sessionId}/out
 Authorization: Bearer <publicAccessToken>
 Last-Event-ID: 42
 ```
 
 `SSEStreamSubscription` tracks this automatically via its `lastEventId` option.
 
-## Step 3: Send subsequent messages
+### `X-Session-Settled` — fast close on idle reconnects
 
-After the first turn, send messages via the run's input stream instead of triggering a new run:
+When you reconnect to `.out` and the last record on the session is a `trigger:turn-complete` marker (the agent has finished a turn and is either idle-waiting or exited), the server responds with:
 
-```bash
-POST /realtime/v1/streams/{runId}/input/chat-messages
+- `X-Session-Settled: true` response header
+- A fast SSE close (milliseconds instead of the usual 60s long-poll)
+
+This lets the client distinguish a close that means "nothing is streaming right now" from a normal mid-stream disconnect. Your transport can use this to settle into a "ready" state on page reload without maintaining its own `isStreaming` flag.
+
+```ts
+const response = await fetch(sseUrl, { headers });
+const settled = response.headers.get("X-Session-Settled") === "true";
+// ...subscribe as normal; if settled and nothing arrives, you're done.
+```
+
+## Step 4: Send messages, stops, and actions
+
+All client-to-agent signals are appended to the session's `.in` channel:
+
+```
+POST /realtime/v1/sessions/{sessionId}/in/append
+Authorization: Bearer <publicAccessToken>
+Content-Type: application/json
+```
+
+The body is a JSON-serialized [`ChatInputChunk`](#chatinputchunk) — a tagged union covering messages, stops, and actions. Send them as raw JSON strings (not wrapped in a `data` field).
+
+### `ChatInputChunk`
+
+```ts
+type ChatInputChunk =
+  | { kind: "message"; payload: ChatTaskWirePayload }
+  | { kind: "stop"; message?: string };
+```
+
+The discriminator `kind` drives the agent's dispatch — `"message"` goes to the turn loop, `"stop"` fires the abort controller.
+
+### Sending a message
+
+```
+POST /realtime/v1/sessions/{sessionId}/in/append
 Authorization: Bearer <publicAccessToken>
 Content-Type: application/json
 
 {
-  "data": {
+  "kind": "message",
+  "payload": {
     "messages": [
       {
         "id": "msg-2",
@@ -243,25 +319,53 @@ Content-Type: application/json
 }
 ```
 
-Note the `{ "data": ... }` wrapper — the input stream API wraps the payload in a `data` field.
-
-After sending, subscribe to the output stream again (same URL, same auth) to receive the response.
+After sending, subscribe to `.out` (if you closed the stream after `trigger:turn-complete`) to receive the response.
 
 <Warning>
-  On turn 2+, only send the **new** message(s) in the `messages` array — not the full history. The agent accumulates the conversation internally. On turn 1 (or after a continuation), send the **full** message history.
+  On turn 2+ against an existing run, only send the **new** message(s) in `messages` — not the full history. The agent accumulates the conversation internally. On turn 1 (or after a continuation), send the **full** message history.
 </Warning>
 
-### Tool approval responses
+### Sending a stop
+
+```json
+{ "kind": "stop" }
+```
 
-When a tool requires approval (`needsApproval: true`), the agent streams the tool call with an `approval-requested` state and completes the turn. After the user approves or denies, send the **updated assistant message** (with `approval-responded` tool parts) back via the same input stream:
+Interrupts the agent's current turn. `streamText` aborts, the agent emits `trigger:turn-complete`, and the run returns to idle.
 
-```bash
-POST /realtime/v1/streams/{runId}/input/chat-messages
-Authorization: Bearer <publicAccessToken>
-Content-Type: application/json
+An optional `message` field surfaces in the agent's stop handler:
+
+```json
+{ "kind": "stop", "message": "user cancelled" }
+```
+
+### Sending an action
+
+Custom actions (undo, rollback, edit) ride on the same `.in` channel using `kind: "message"` with `trigger: "action"` in the payload:
+
+```json
+{
+  "kind": "message",
+  "payload": {
+    "messages": [],
+    "chatId": "conversation-123",
+    "trigger": "action",
+    "action": { "type": "undo" },
+    "metadata": { "userId": "user-456" }
+  }
+}
+```
+
+Actions wake the agent from suspension (same as messages), fire the `onAction` hook, then trigger a normal `run()` turn. The `action` payload is validated against the agent's `actionSchema`. See [Actions](/ai-chat/backend#actions).
 
+### Tool approval responses
+
+When a tool requires approval (`needsApproval: true`), the agent streams the tool call with an `approval-requested` state and completes the turn. After the user approves or denies, send the **updated assistant message** (with `approval-responded` tool parts) back as a `kind: "message"` chunk:
+
+```json
 {
-  "data": {
+  "kind": "message",
+  "payload": {
     "messages": [
       {
         "id": "asst-msg-1",
@@ -284,89 +388,39 @@ Content-Type: application/json
 }
 ```
 
-The agent matches the incoming message by its `id` against the accumulated conversation. If a match is found, it **replaces** the existing message (instead of appending). This updates the tool approval state, and `streamText` executes the approved tool on the next step.
+The agent matches the incoming message by `id` against the accumulated conversation. If a match is found, it **replaces** the existing message instead of appending.
 
 <Note>
-  The message `id` must match the one the agent assigned during streaming. If you're using `TriggerChatTransport`, IDs are kept in sync automatically. Custom transports should use the `messageId` from the stream's `start` chunk.
-</Note>
-
-## Custom actions
-
-Send a custom action (undo, rollback, edit) to the agent using the same `chat-messages` input stream. Actions use `trigger: "action"` and carry a custom payload in the `action` field:
-
-```bash
-POST /realtime/v1/streams/{runId}/input/chat-messages
-Authorization: Bearer <publicAccessToken>
-Content-Type: application/json
-
-{
-  "data": {
-    "messages": [],
-    "chatId": "conversation-123",
-    "trigger": "action",
-    "action": { "type": "undo" },
-    "metadata": { "userId": "user-456" }
-  }
-}
-```
-
-Actions wake the agent from suspension (same as messages), fire the `onAction` hook, then trigger a normal `run()` turn. The `action` payload is validated against the agent's `actionSchema`.
-
-After sending, subscribe to the output stream to receive the agent's response — the same flow as [Step 2](#step-2-subscribe-to-the-output-stream).
-
-<Note>
-  `messages` is empty for actions. The agent's `onAction` handler modifies the conversation state via `chat.history.*`, and the LLM responds to the updated state. See [Actions](/ai-chat/backend#actions) for backend setup.
+  The message `id` must match the one the agent assigned during streaming. `TriggerChatTransport` keeps IDs in sync automatically. Custom transports should use the `messageId` from the stream's `start` chunk.
 </Note>
 
 ## Pending and steering messages
 
-You can send messages to the agent **while it's still streaming a response**. These are called pending messages — the agent receives them mid-turn and can inject them between tool-call steps.
+You can send messages while the agent is still streaming a response. These are **pending messages** — the agent receives them mid-turn and can inject them between tool-call steps.
 
-Send a pending message to the same `chat-messages` input stream:
+The wire format is identical to a normal message — the same `kind: "message"` on `.in`. The difference is timing. What happens depends on the agent's `pendingMessages` configuration:
 
-```bash
-POST /realtime/v1/streams/{runId}/input/chat-messages
-Authorization: Bearer <publicAccessToken>
-Content-Type: application/json
-
-{
-  "data": {
-    "messages": [
-      {
-        "id": "msg-steering-1",
-        "role": "user",
-        "parts": [{ "type": "text", "text": "Actually, focus on the security issues first" }]
-      }
-    ],
-    "chatId": "conversation-123",
-    "trigger": "submit-message",
-    "metadata": { "userId": "user-456" }
-  }
-}
-```
-
-This is the same endpoint and format as a normal message. The difference is timing — the agent is already streaming. What happens to the message depends on the agent's `pendingMessages` configuration:
-
-- **With `pendingMessages.shouldInject`**: The message is injected into the model's context at the next `prepareStep` boundary (between tool-call steps). The agent sees it and can adjust its behavior mid-response.
-- **Without `pendingMessages` config**: The message queues for the next turn. It becomes the `currentWirePayload` for the following turn, skipping the wait-for-message phase.
+- **With `pendingMessages.shouldInject`**: the message is injected into the model's context at the next `prepareStep` boundary. The agent sees it and can adjust its behavior mid-response.
+- **Without `pendingMessages` config**: the message queues for the next turn.
 
 See [Pending Messages](/ai-chat/pending-messages) for how to configure the agent side.
 
 <Note>
-  Unlike a normal `sendMessage`, pending messages should **not** cancel the active stream subscription. Keep reading the current response stream — the agent incorporates the pending message into the same turn or queues it for the next one.
+  Unlike a normal `sendMessage`, pending messages should **not** cancel the active stream subscription. Keep reading — the agent incorporates the message into the same turn or queues it for the next one.
 </Note>
 
-## Step 4: Handle continuations
+## Continuations
 
-A run can end for several reasons: idle timeout, max turns reached, `chat.requestUpgrade()`, or cancellation. When this happens, the input stream POST will fail (400 "Cannot send to input stream on a completed run").
+A run can end for several reasons: idle timeout, max turns reached, `chat.requestUpgrade()`, crash, or cancellation. When this happens, the append POST to `.in` will deliver the record to the session — but with no live run consuming `.in`, nothing will happen until the next run starts.
 
-When this error occurs, trigger a **new run** with the full message history and `continuation: true`:
+The transport's job is to detect "no live run" and trigger a new one on the **same session**. Trigger with `continuation: true` so the agent's `onChatStart` hook can distinguish from a brand-new conversation:
 
 ```json
 {
   "payload": {
     "messages": [/* full UIMessage history */],
     "chatId": "conversation-123",
+    "sessionId": "session_cm4z2plfh000abcd1efgh",
     "trigger": "submit-message",
     "metadata": { "userId": "user-456" },
     "continuation": true,
@@ -375,49 +429,25 @@ When this error occurs, trigger a **new run** with the full message history and
 }
 ```
 
-The new run picks up the latest deployed version automatically. The agent's `onChatStart` hook receives `continuation: true` and `previousRunId` so it can distinguish from a brand new conversation.
+`sessionId` is reused. Only `runId` and `publicAccessToken` change.
 
 <Tip>
-  This is how [version upgrades](/ai-chat/patterns/version-upgrades) work transparently — the agent calls `chat.requestUpgrade()`, the run exits, and the client's next message triggers a continuation on the new version. No special handling needed beyond the standard continuation flow.
+  This is how [version upgrades](/ai-chat/patterns/version-upgrades) work transparently — the agent calls `chat.requestUpgrade()`, the run exits, and the client's next message triggers a continuation on the new version. Same session, new run.
 </Tip>
 
-## Stopping and closing
-
-### Stop the current turn
-
-Send a stop signal to interrupt the agent mid-response:
-
-```bash
-POST /realtime/v1/streams/{runId}/input/chat-stop
-Authorization: Bearer <publicAccessToken>
-Content-Type: application/json
-
-{
-  "data": { "stop": true }
-}
-```
-
-The agent's stop signal fires, `streamText` aborts, and a `trigger:turn-complete` chunk is emitted.
-
-### Close the conversation
+## Closing the conversation
 
-Send a close signal to end the conversation gracefully:
+When the user is done with the conversation, close the session:
 
 ```bash
-POST /realtime/v1/streams/{runId}/input/chat-messages
-Authorization: Bearer <publicAccessToken>
+POST /api/v1/sessions/{sessionId}/close
+Authorization: Bearer <secret-key-or-jwt>
 Content-Type: application/json
 
-{
-  "data": {
-    "messages": [],
-    "chatId": "conversation-123",
-    "trigger": "close"
-  }
-}
+{ "reason": "user-ended" }
 ```
 
-The agent exits its loop and the run completes. If you skip this, the agent closes on its own when the idle/turn timeout expires.
+Closing is idempotent and optional. A long-running chat that's just between turns is a **live** session, not a closed one — don't close it prematurely.
 
 ## Session state
 
@@ -425,26 +455,33 @@ A client needs to track per-conversation:
 
 | Field | Description |
 | --- | --- |
-| `chatId` | Stable conversation ID (survives continuations) |
-| `runId` | Current run ID (changes on continuation) |
-| `publicAccessToken` | JWT for stream auth (refreshed on each turn-complete) |
-| `lastEventId` | Last SSE event ID (for stream resumption) |
+| `sessionId` | Durable session ID (`session_*`). Stable for the life of the conversation. |
+| `chatId` | Your stable conversation ID (passed as `externalId` on create). |
+| `runId` | Current run ID. Changes when a run ends and a continuation starts. |
+| `publicAccessToken` | JWT for session + run access. Refreshed via `trigger:turn-complete` chunks. |
+| `lastEventId` | Last SSE event ID received on `.out`. Use to resume mid-stream. |
 
-On continuation, `runId` and `publicAccessToken` change. `chatId` stays the same.
+`sessionId` and `chatId` are durable. `runId` and `publicAccessToken` are live-run state that refreshes on each new run. On reload, you only need `sessionId` + `publicAccessToken` + `lastEventId` to resume — `runId` is a live-run hint that can be `null` when no run is active.
 
 ## Authentication
 
 | Operation | Auth |
 | --- | --- |
-| Trigger task | Secret API key or scoped JWT with `write:tasks` |
-| Input stream POST | JWT with `write:inputStreams` scope for the run |
-| Output stream GET | JWT with `read:runs` scope for the run |
+| Create session (`POST /api/v1/sessions`) | Secret API key or JWT with `write:sessions` |
+| Close session (`POST /api/v1/sessions/{id}/close`) | Secret API key or JWT with `admin:sessions:{id}` |
+| Trigger task | Secret API key or JWT with `write:tasks` |
+| `.in` append | JWT with `write:sessions:{id}` (or `write:sessions:{id}:in`) |
+| `.out` subscribe | JWT with `read:sessions:{id}` |
+
+The transport-facing `publicAccessToken` returned from the trigger response carries both `read:sessions:{id}` and `write:sessions:{id}` for the session, plus `read:runs:{runId}` + `write:inputStreams:{runId}` for the run. Use it for all session operations.
 
-The `publicAccessToken` returned from the trigger response has both `read:runs` and `write:inputStreams` scopes for the run. Use it for all stream operations.
+See [Session reference — Token scopes](/sessions/reference#token-scopes) for the full scope list.
 
 ## See also
 
-- [`TriggerChatTransport`](/ai-chat/frontend) — built-in frontend transport (implements this protocol)
-- [`AgentChat`](/ai-chat/server-chat) — built-in server-side client (implements this protocol)
-- [Backend lifecycle](/ai-chat/backend#lifecycle-hooks) — what the agent does on each event
-- [Version upgrades](/ai-chat/patterns/version-upgrades) — how `chat.requestUpgrade()` uses continuations
+- [Sessions Overview](/sessions/overview) — The durable primitive chat.agent is built on
+- [Sessions Reference](/sessions/reference) — Full `sessions.*` API and wire endpoints
+- [`TriggerChatTransport`](/ai-chat/frontend) — Built-in browser transport (implements this protocol)
+- [`AgentChat`](/ai-chat/server-chat) — Built-in server-side client
+- [Backend lifecycle](/ai-chat/backend#lifecycle-hooks) — What the agent does on each event
+- [Version upgrades](/ai-chat/patterns/version-upgrades) — How `chat.requestUpgrade()` uses continuations
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 32a24705af2..e2961935345 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -73,6 +73,18 @@ const transport = useTriggerChatTransport({
 
 ## Session management
 
+Every chat is backed by a [Session](/sessions/overview) — a durable, bidirectional I/O primitive that outlives any single run. The transport manages the session for you; what you persist on your side is a small piece of state per chat.
+
+### What the transport persists per chat
+
+| Field | Type | Notes |
+| --- | --- | --- |
+| `sessionId` | `string` | `session_*` friendly ID. **Durable** — stays the same for the life of the conversation. |
+| `publicAccessToken` | `string` | JWT for session + run access. Refreshed via `trigger:turn-complete` chunks. |
+| `lastEventId` | `string \| undefined` | Last SSE event received on `.out`. Used to resume mid-stream. |
+| `runId` | `string \| undefined` | Current run ID. Changes on continuations. `undefined` means no live run right now. |
+| `isStreaming` | `boolean \| undefined` | **Optional.** The transport sets it internally, but you don't have to persist it — the server decides "nothing is streaming" via the session's [`X-Session-Settled`](/ai-chat/client-protocol#x-session-settled-fast-close-on-idle-reconnects) signal on reconnect. If you do persist it, the transport keeps the fast-path short-circuit. If you drop it, reconnects open the SSE and close fast on settled sessions. |
+
 ### Session cleanup (frontend)
 
 Since session creation and updates are handled server-side, the frontend only needs to handle session deletion when a run ends:
@@ -94,6 +106,8 @@ const transport = useTriggerChatTransport<typeof myChat>({
 
 On page load, fetch both the messages and the session from your database, then pass them to `useChat` and the transport. Pass `resume: true` to `useChat` when there's an existing conversation — this tells the AI SDK to reconnect to the stream via the transport.
 
+Because sessions outlive individual runs, a chat you were in yesterday resumes against the same `sessionId` — even if the original run has long since exited. The transport uses `sessionId` + `lastEventId` to resubscribe, and triggers a fresh run on the same session if the client tries to send a new message.
+
 ```tsx app/page.tsx
 "use client";
 
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
index 0e58bc997e8..04c735f5b1e 100644
--- a/docs/ai-chat/overview.mdx
+++ b/docs/ai-chat/overview.mdx
@@ -114,11 +114,13 @@ sequenceDiagram
 
 ## How multi-turn works
 
-### One run, many turns
+### One conversation, many runs
 
-The entire conversation lives in a **single Trigger.dev run**. After each AI response, the run waits for the next message via input streams. The frontend transport handles this automatically — it triggers a new run for the first message and sends subsequent messages to the existing run.
+Each chat is backed by a [Session](/sessions/overview) — a durable, bidirectional I/O primitive that outlives any single run. The conversation's identity (`sessionId`) stays the same across run boundaries. Messages flow through the session's `.in` channel; responses stream on `.out`.
 
-This means your conversation has full observability in the Trigger.dev dashboard: every turn is a span inside the same run.
+Within a session, a single run handles many turns. After each AI response, the run waits for the next message via the session's `.in` channel. The frontend transport handles this automatically — triggers a new run on the session for the first message, and sends subsequent messages into the existing run.
+
+Every turn is a span inside the same run in the Trigger.dev dashboard. The Agents dashboard view also lets you inspect the session directly — all runs that have ever touched it, filterable and resumable.
 
 ### Warm and suspended states
 
@@ -127,12 +129,28 @@ After each turn, the run goes through two phases of waiting:
 1. **Warm phase** (default 30s) — The run stays active and responds instantly to the next message. Uses compute.
 2. **Suspended phase** (default up to 1h) — The run suspends, freeing compute. It wakes when the next message arrives. There's a brief delay as the run resumes.
 
-If no message arrives within the turn timeout, the run ends gracefully. The next message from the frontend will automatically start a fresh run.
+If no message arrives within the turn timeout, the run ends gracefully. The session stays open. The next message from the frontend automatically starts a fresh run **on the same session** — chat history and identity persist across the run boundary.
 
 <Info>
   You are not charged for compute during the suspended phase. Only the idle phase uses compute resources.
 </Info>
 
+### Resume and inbox
+
+Because the session outlives the run, a chat you were in yesterday resumes against the same session today — even after the original run has idle-timed out or crashed. Pass `resume: true` to `useChat` on page load and the transport reconnects via `sessionId` + `lastEventId`, kicking off a new run only if the user sends a message.
+
+You can also enumerate every chat in your environment with `sessions.list`:
+
+```ts
+import { sessions } from "@trigger.dev/sdk";
+
+for await (const s of sessions.list({ type: "chat.agent", tag: "user:user-456" })) {
+  console.log(s.id, s.externalId, s.createdAt, s.closedAt);
+}
+```
+
+This powers inbox-style UIs (your own chat list page) without maintaining a separate index.
+
 ### What the backend accumulates
 
 The backend automatically accumulates the full conversation history across turns. After the first turn, the frontend transport only sends the new user message — not the entire history. This is handled transparently by the transport and agent.
@@ -157,6 +175,7 @@ There are three ways to build the backend, from most opinionated to most flexibl
 ## Related
 
 - [Quick Start](/ai-chat/quick-start) — Get a working chat in 3 steps
+- [Sessions](/sessions/overview) — The durable primitive chat.agent is built on
 - [Database persistence](/ai-chat/patterns/database-persistence) — Conversation + session state across hooks (ORM-agnostic)
 - [Code execution sandbox](/ai-chat/patterns/code-sandbox) — Warm/teardown pattern for E2B (or similar) with `onWait` / `chat.local`
 - [Backend](/ai-chat/backend) — Backend approaches in detail
diff --git a/docs/ai-chat/patterns/version-upgrades.mdx b/docs/ai-chat/patterns/version-upgrades.mdx
index 8679b664f0b..972750ddb9a 100644
--- a/docs/ai-chat/patterns/version-upgrades.mdx
+++ b/docs/ai-chat/patterns/version-upgrades.mdx
@@ -17,7 +17,9 @@ When `chat.requestUpgrade()` is called in `onTurnStart` or `onValidateMessages`:
 3. The transport receives the chunk and immediately triggers a **new run** on the currently promoted deployment with the same message (as a continuation)
 4. The new run's response is piped through transparently — the user sees a single seamless response from the upgraded agent
 
-When called from inside `run()` or `chat.defer()`, the current turn completes normally first and the run exits afterward. The next message triggers the continuation.
+The new run lives on the **same [Session](/sessions/overview)** as the old run — `sessionId` persists across the upgrade. Only `runId` and `publicAccessToken` refresh. The transport's SSE subscription to `session.out` doesn't even need to re-establish; it just continues receiving chunks from whichever run is currently writing.
+
+When called from inside `run()` or `chat.defer()`, the current turn completes normally first and the run exits afterward. The next message triggers the continuation on the same session.
 
 ```mermaid
 sequenceDiagram
@@ -90,7 +92,7 @@ export const myChat = chat
   });
 ```
 
-The transport includes `clientData` in every payload — both the initial trigger and subsequent input stream messages — so the agent always has the current value.
+The transport includes `clientData` in every payload — both the initial trigger and subsequent records on the session's `.in` channel — so the agent always has the current value.
 
 This pattern is useful when:
 - Your frontend is backwards-compatible across several agent versions, but occasionally ships breaking changes
diff --git a/docs/ai-chat/quick-start.mdx b/docs/ai-chat/quick-start.mdx
index cf066f090a3..a6ce43a15ba 100644
--- a/docs/ai-chat/quick-start.mdx
+++ b/docs/ai-chat/quick-start.mdx
@@ -114,5 +114,6 @@ description: "Get a working AI agent in 3 steps — define an agent, generate a
 
 - [Backend](/ai-chat/backend) — Lifecycle hooks, persistence, session iterator, raw task primitives
 - [Frontend](/ai-chat/frontend) — Session management, client data, reconnection
+- [Sessions](/sessions/overview) — The durable primitive chat.agent is built on (for cross-run resume, inbox views, and custom transports)
 - [Types](/ai-chat/types) — `chat.withUIMessage`, `InferChatUIMessage`, and related typing
 - [Features](/ai-chat/features) — Per-run data, deferred work, streaming, subtasks
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index db16638b5ec..f8819629106 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -67,7 +67,8 @@ The payload passed to the `run` function.
 | -------------- | ------------------------------------------ | -------------------------------------------------------------------- |
 | `ctx`          | `TaskRunContext`                           | Full task run context — same as `task` `run`’s `{ ctx }`             |
 | `messages`     | `ModelMessage[]`                           | Model-ready messages — pass directly to `streamText`                 |
-| `chatId`       | `string`                                   | Unique chat session ID                                               |
+| `chatId`       | `string`                                   | Your conversation ID (the session's `externalId`)                    |
+| `sessionId`    | `string \| undefined`                      | Friendly ID of the backing [Session](/sessions/overview) (`session_*`). Use with `sessions.open()` for advanced cases. |
 | `trigger`      | `"submit-message" \| "regenerate-message"` | What triggered the request                                           |
 | `messageId`    | `string \| undefined`                      | Message ID (for regenerate)                                          |
 | `clientData`   | Typed by `clientDataSchema`                | Custom data from the frontend (typed when schema is provided)        |
@@ -496,8 +497,8 @@ Options for the frontend transport constructor and `useTriggerChatTransport` hoo
 | `headers`              | `Record<string, string>`                                             | —                           | Extra headers for API requests                                              |
 | `streamTimeoutSeconds` | `number`                                                             | `120`                       | How long to wait for stream data                                            |
 | `clientData`           | Typed by `clientDataSchema`                                          | —                           | Default client data for every request                                       |
-| `sessions`             | `Record<string, {...}>`                                              | —                           | Restore sessions from storage                                               |
-| `onSessionChange`      | `(chatId, session \| null) => void`                                  | —                           | Fires when session state changes                                            |
+| `sessions`             | `Record<string, ChatSession>`                                        | —                           | Restore sessions from storage. See [ChatSession](#chatsession).             |
+| `onSessionChange`      | `(chatId, session \| null) => void`                                  | —                           | Fires when session state changes. `session` is the full `ChatSession` or `null` when the run ends. |
 | `renewRunAccessToken`  | `(params: RenewRunAccessTokenParams) => string \| ... \| Promise<...>` | —                           | Mint a new run-scoped PAT when the run PAT returns 401 (realtime / input stream). Retries once. |
 | `triggerOptions`       | `{...}`                                                              | —                           | Options for the initial task trigger (see below)                            |
 
@@ -512,7 +513,20 @@ Use this to mint or log per-chat trigger tokens. A plain **`string`** is still s
 
 ### `renewRunAccessToken` callback
 
-Optional. When the **run** public access token used for realtime SSE or input streams expires, the transport calls this once with **`RenewRunAccessTokenParams`** (`chatId`, `runId`), then retries the failing request. Implement it with your server `auth.createPublicToken` (scopes `read:runs:<runId>` and `write:inputStreams:<runId>`). See [Authentication](/realtime/auth).
+Optional. When the public access token the transport holds returns 401 (realtime SSE, `.in` append, etc.), the transport calls this once with **`RenewRunAccessTokenParams`**, then retries the failing request. Implement it with your server `auth.createPublicToken`. See [Authentication](/realtime/auth).
+
+`RenewRunAccessTokenParams`:
+
+| Field | Type | Description |
+| --- | --- | --- |
+| `chatId` | `string` | Your conversation id. |
+| `runId` | `string` | The current run backing the chat. |
+| `sessionId` | `string \| undefined` | The backing [Session](/sessions/overview) friendlyId. Present after the transport has observed a session for this chat. |
+
+Minted tokens should carry **both** run and session scopes so the PAT covers the live input stream AND the Session's `.in` / `.out` channels:
+
+- `read:runs:<runId>` + `write:inputStreams:<runId>`
+- `read:sessions:<sessionId>` + `write:sessions:<sessionId>` (when `sessionId` is present)
 
 ```ts
 import { auth } from "@trigger.dev/sdk";
@@ -525,9 +539,18 @@ async function getChatToken(input: ResolveChatAccessTokenParams) {
 const transport = useTriggerChatTransport({
   task: "my-chat",
   accessToken: getChatToken,
-  renewRunAccessToken: async ({ chatId, runId }) => {
+  renewRunAccessToken: async ({ chatId, runId, sessionId }) => {
     return auth.createPublicToken({
-      scopes: { read: { runs: runId }, write: { inputStreams: runId } },
+      scopes: {
+        read: {
+          runs: runId,
+          ...(sessionId ? { sessions: sessionId } : {}),
+        },
+        write: {
+          inputStreams: runId,
+          ...(sessionId ? { sessions: sessionId } : {}),
+        },
+      },
       expirationTime: "1h",
     });
   },
@@ -669,8 +692,52 @@ The hook handles:
 
 See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination).
 
+## ChatSession
+
+Persistable session state for the frontend `TriggerChatTransport` and the server-side `AgentChat`. `sessionId` is the durable key — it stays the same for the life of the conversation, even across run boundaries.
+
+| Field | Type | Description |
+| --- | --- | --- |
+| `sessionId` | `string` | Friendly ID of the backing [Session](/sessions/overview) (`session_*`). Durable across runs. |
+| `publicAccessToken` | `string` | JWT covering both run + session scopes for this chat. Refreshed via `trigger:turn-complete` chunks. |
+| `lastEventId` | `string \| undefined` | Last SSE event received on `.out`. Used to resume mid-stream after a disconnect. |
+| `runId` | `string \| undefined` | Current run ID. Changes on continuations. `undefined` means no live run. |
+| `isStreaming` | `boolean \| undefined` | Optional. If persisted, `reconnectToStream` uses it as a fast-path short-circuit. If omitted, the server decides via the session's [`X-Session-Settled`](/ai-chat/client-protocol#x-session-settled-fast-close-on-idle-reconnects) response header. |
+
+## ChatInputChunk
+
+The wire shape for records sent on `.in`. Consumed by `chat.agent` internally — you typically don't write these yourself; `transport.sendMessage`, `transport.stopGeneration`, and `transport.sendAction` all serialize into this shape.
+
+```ts
+type ChatInputChunk<TMessage = UIMessage, TMetadata = unknown> =
+  | { kind: "message"; payload: ChatTaskWirePayload<TMessage, TMetadata> }
+  | { kind: "stop"; message?: string };
+```
+
+| Variant | When | Payload |
+| --- | --- | --- |
+| `kind: "message"` | New message, action, approval response, or close | `payload` is a full `ChatTaskWirePayload` — its `trigger` field (`"submit-message"` / `"action"` / `"close"`) determines the agent's dispatch |
+| `kind: "stop"` | Client aborted the active turn | Optional `message` surfaces in the stop handler |
+
+For the raw wire format, see [Client Protocol — ChatInputChunk](/ai-chat/client-protocol#chatinputchunk).
+
+## Session scopes
+
+Tokens minted for `TriggerChatTransport` and `AgentChat` carry both run and session scopes. Full surface:
+
+| Scope | Grants |
+| --- | --- |
+| `read:sessions:<sessionId>` | Subscribe to `.out`, retrieve the session row |
+| `write:sessions:<sessionId>` | Append to `.in`, close the session, update metadata |
+| `read:runs:<runId>` | Read run state |
+| `write:inputStreams:<runId>` | Legacy run-scoped input streams (used alongside session scopes during live runs) |
+
+See [Sessions Reference — Token scopes](/sessions/reference#token-scopes) for the full scope list including wildcards and admin scopes.
+
 ## Related
 
+- [Sessions Overview](/sessions/overview) — The durable primitive chat.agent is built on
+- [Sessions Reference](/sessions/reference) — Full `sessions.*` API
 - [Realtime Streams](/tasks/streams) — How streams work under the hood
 - [Using the Vercel AI SDK](/guides/examples/vercel-ai-sdk) — Basic AI SDK usage with Trigger.dev
 - [Realtime React Hooks](/realtime/react-hooks/overview) — Lower-level realtime hooks
diff --git a/docs/ai-chat/server-chat.mdx b/docs/ai-chat/server-chat.mdx
index eef67b7331c..01a315622a0 100644
--- a/docs/ai-chat/server-chat.mdx
+++ b/docs/ai-chat/server-chat.mdx
@@ -116,20 +116,26 @@ for await (const chunk of stream) {
 
 In a stateless environment (HTTP handler, serverless function), you need to persist and restore the session across requests.
 
-`AgentChat` provides a `session` option and two callbacks for this:
+Each chat is backed by a [Session](/sessions/overview), addressed by a durable `sessionId` that outlives any single run. `AgentChat` exposes the session state via `chat.run` (a `ChatSession` object — the name is legacy, the content is the full session state).
+
+`AgentChat` provides a `session` option and two callbacks for persistence:
 
 ```ts
 import { AgentChat } from "@trigger.dev/sdk/chat";
 
 export async function POST(req: Request) {
-  const { chatId, message, runId, lastEventId } = await req.json();
+  const { chatId, message } = await req.json();
+  const saved = await db.sessions.find({ chatId });
 
   const chat = new AgentChat({
     agent: "my-agent",
     id: chatId,
-    // Restore from previous request
-    session: runId ? { runId, lastEventId } : undefined,
-    // Persist when a new run starts
+    // Restore from previous request. sessionId is the durable key —
+    // runId + lastEventId are live-run hints that speed up resume.
+    session: saved
+      ? { sessionId: saved.sessionId, runId: saved.runId, lastEventId: saved.lastEventId }
+      : undefined,
+    // Persist when a new run starts against this session
     onTriggered: async ({ runId, chatId }) => {
       await db.sessions.upsert({ chatId, runId });
     },
@@ -142,7 +148,35 @@ export async function POST(req: Request) {
   const stream = await chat.sendMessage(message);
   const text = await stream.text();
 
-  return Response.json({ text, runId: chat.run?.runId });
+  // On first-ever request there was no session in the DB — save the
+  // sessionId that AgentChat just created server-side. For subsequent
+  // requests this is a cheap no-op upsert.
+  if (chat.run?.sessionId) {
+    await db.sessions.upsert({
+      chatId,
+      sessionId: chat.run.sessionId,
+      runId: chat.run.runId,
+      lastEventId: chat.run.lastEventId,
+    });
+  }
+
+  return Response.json({ text });
+}
+```
+
+<Info>
+  Because `sessionId` is durable, a chat that was active yesterday resumes against the same session today — even if the original run has long since exited. `AgentChat` triggers a fresh run on the same session when needed, carrying the conversation forward without losing history or identity.
+</Info>
+
+### Inbox view of all chats
+
+Use [`sessions.list`](/sessions/reference#list-sessions) to enumerate every chat in your environment, filtered by tag or status:
+
+```ts
+import { sessions } from "@trigger.dev/sdk";
+
+for await (const session of sessions.list({ type: "chat.agent", tag: "user:user-456" })) {
+  console.log(session.id, session.createdAt, session.closedAt);
 }
 ```
 
@@ -227,7 +261,7 @@ const stream = await chat.reconnect();
 | `agent` | `string` | required | The agent task ID to trigger |
 | `id` | `string` | `crypto.randomUUID()` | Conversation ID for tagging and correlation |
 | `clientData` | typed from agent | `undefined` | Client data included in every request |
-| `session` | `{ runId: string; lastEventId?: string }` | `undefined` | Restore a previous session |
+| `session` | `ChatSession` (`{ sessionId: string; runId?: string; lastEventId?: string }`) | `undefined` | Restore a previous session. `sessionId` is the durable key; `runId` and `lastEventId` are live-run hints. |
 | `onTriggered` | `(event) => void` | `undefined` | Called when a new run is created |
 | `onTurnComplete` | `(event) => void` | `undefined` | Called when a turn's stream ends |
 | `streamKey` | `string` | `"chat"` | Output stream key |
diff --git a/docs/ai-chat/testing.mdx b/docs/ai-chat/testing.mdx
index b59b37c61d1..1c3bbb7dd6a 100644
--- a/docs/ai-chat/testing.mdx
+++ b/docs/ai-chat/testing.mdx
@@ -8,6 +8,8 @@ description: "Drive a chat.agent through real turns in unit tests — no network
 
 `@trigger.dev/sdk/ai/test` exports `mockChatAgent`, an offline harness that runs your `chat.agent` definition's `run()` function inside an in-memory task runtime. You send messages, actions, and stop signals through driver methods and assert against the chunks the agent emits.
 
+Under the hood the harness drives the agent's backing [Session](/sessions/overview) channels — `.in` receives the records your `sendMessage` / `sendStop` / `sendAction` produce, `.out` captures the chunks the agent emits. The harness API itself is session-agnostic; you don't need to manage `sessionId` in tests.
+
 The harness exercises the real turn loop, lifecycle hooks, validation, hydration, and action routing — only the language model and the surrounding Trigger.dev runtime are replaced. Pair it with [`MockLanguageModelV3`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/mock-language-model-v3) and `simulateReadableStream` from `ai` to control LLM responses.
 
 <Note>
@@ -217,7 +219,7 @@ await harness.sendStop("user requested stop");
 
 ### close
 
-Sends a `close` trigger, closes the input stream, and aborts the run signal so the task exits cleanly. Always call this at the end of every test:
+Sends a `close` trigger, closes the session's `.in` channel, and aborts the run signal so the task exits cleanly. Always call this at the end of every test:
 
 ```ts
 afterEach(() => harness.close());
diff --git a/docs/docs.json b/docs/docs.json
index 21d3edc5987..8a9c901b0c4 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -10,7 +10,11 @@
   },
   "favicon": "/images/favicon.png",
   "contextual": {
-    "options": ["copy", "view", "claude"]
+    "options": [
+      "copy",
+      "view",
+      "claude"
+    ]
   },
   "navigation": {
     "dropdowns": [
@@ -36,7 +40,11 @@
             "pages": [
               {
                 "group": "Tasks",
-                "pages": ["tasks/overview", "tasks/schemaTask", "tasks/scheduled"]
+                "pages": [
+                  "tasks/overview",
+                  "tasks/schemaTask",
+                  "tasks/scheduled"
+                ]
               },
               "triggering",
               "runs",
@@ -49,7 +57,10 @@
               "building-with-ai",
               {
                 "group": "MCP Server",
-                "pages": ["mcp-introduction", "mcp-tools"]
+                "pages": [
+                  "mcp-introduction",
+                  "mcp-tools"
+                ]
               },
               "skills",
               "mcp-agent-rules"
@@ -63,7 +74,12 @@
               "errors-retrying",
               {
                 "group": "Wait",
-                "pages": ["wait", "wait-for", "wait-until", "wait-for-token"]
+                "pages": [
+                  "wait",
+                  "wait-for",
+                  "wait-until",
+                  "wait-for-token"
+                ]
               },
               "queue-concurrency",
               "versioning",
@@ -116,6 +132,15 @@
                   "ai-chat/client-protocol",
                   "ai-chat/reference"
                 ]
+              },
+              {
+                "group": "Sessions",
+                "pages": [
+                  "sessions/overview",
+                  "sessions/quick-start",
+                  "sessions/channels",
+                  "sessions/reference"
+                ]
               }
             ]
           },
@@ -152,7 +177,9 @@
           },
           {
             "group": "Development",
-            "pages": ["cli-dev"]
+            "pages": [
+              "cli-dev"
+            ]
           },
           {
             "group": "Deployment",
@@ -164,7 +191,10 @@
               "deployment/atomic-deployment",
               {
                 "group": "Deployment integrations",
-                "pages": ["github-integration", "vercel-integration"]
+                "pages": [
+                  "github-integration",
+                  "vercel-integration"
+                ]
               }
             ]
           },
@@ -228,11 +258,19 @@
           },
           {
             "group": "Observability",
-            "pages": ["observability/query", "observability/dashboards"]
+            "pages": [
+              "observability/query",
+              "observability/dashboards"
+            ]
           },
           {
             "group": "Using the Dashboard",
-            "pages": ["run-tests", "troubleshooting-alerts", "replaying", "bulk-actions"]
+            "pages": [
+              "run-tests",
+              "troubleshooting-alerts",
+              "replaying",
+              "bulk-actions"
+            ]
           },
           {
             "group": "Troubleshooting",
@@ -254,18 +292,30 @@
               "self-hosting/kubernetes",
               {
                 "group": "Environment variables",
-                "pages": ["self-hosting/env/webapp", "self-hosting/env/supervisor"]
+                "pages": [
+                  "self-hosting/env/webapp",
+                  "self-hosting/env/supervisor"
+                ]
               },
               "open-source-self-hosting"
             ]
           },
           {
             "group": "Open source",
-            "pages": ["open-source-contributing", "github-repo", "changelog", "roadmap"]
+            "pages": [
+              "open-source-contributing",
+              "github-repo",
+              "changelog",
+              "roadmap"
+            ]
           },
           {
             "group": "Help",
-            "pages": ["community", "help-slack", "help-email"]
+            "pages": [
+              "community",
+              "help-slack",
+              "help-email"
+            ]
           }
         ]
       },
@@ -371,7 +421,11 @@
           },
           {
             "group": "Query API",
-            "pages": ["management/query/execute", "management/query/schema", "management/query/dashboards"]
+            "pages": [
+              "management/query/execute",
+              "management/query/schema",
+              "management/query/dashboards"
+            ]
           }
         ]
       },
@@ -382,7 +436,9 @@
         "groups": [
           {
             "group": "Introduction",
-            "pages": ["guides/introduction"]
+            "pages": [
+              "guides/introduction"
+            ]
           },
           {
             "group": "Frameworks",
@@ -405,7 +461,6 @@
                 },
                 "pages": [
                   "guides/ai-agents/overview",
-
                   "guides/ai-agents/generate-translate-copy",
                   "guides/ai-agents/route-question",
                   "guides/ai-agents/respond-and-check-content",
@@ -449,7 +504,10 @@
           },
           {
             "group": "Migration guides",
-            "pages": ["migration-mergent", "migration-n8n"]
+            "pages": [
+              "migration-mergent",
+              "migration-n8n"
+            ]
           },
           {
             "group": "Use cases",
@@ -540,7 +598,10 @@
     "href": "https://trigger.dev"
   },
   "api": {
-    "openapi": ["openapi.yml", "v3-openapi.yaml"],
+    "openapi": [
+      "openapi.yml",
+      "v3-openapi.yaml"
+    ],
     "playground": {
       "display": "simple"
     }
diff --git a/docs/realtime/backend/input-streams.mdx b/docs/realtime/backend/input-streams.mdx
index 1224e24244e..4fb4c6c607a 100644
--- a/docs/realtime/backend/input-streams.mdx
+++ b/docs/realtime/backend/input-streams.mdx
@@ -11,6 +11,10 @@ The Input Streams API allows you to send data into running Trigger.dev tasks fro
   Streams](/tasks/streams#input-streams) in the Streams doc.
 </Note>
 
+<Tip>
+  Input streams are keyed by `runId` — they're correct for sending data to a specific live run. If you need a bidirectional channel addressed by a durable ID that survives run boundaries (e.g. a chat that resumes tomorrow, an agent coordinated across many runs), use [Sessions](/sessions/overview) instead. `session.in` has the same consumer-side API (`on` / `once` / `wait` / `waitWithIdleTimeout`) but is addressed by `sessionId` rather than `runId`.
+</Tip>
+
 ## Sending data to a running task
 
 ### Using defined input streams (Recommended)
diff --git a/docs/realtime/backend/streams.mdx b/docs/realtime/backend/streams.mdx
index 8a273ea5a9f..47f101a0191 100644
--- a/docs/realtime/backend/streams.mdx
+++ b/docs/realtime/backend/streams.mdx
@@ -10,6 +10,10 @@ description: "Read AI/LLM output, file chunks, and other streaming data from you
   To emit streams from your tasks, see [Streaming data from tasks](/tasks/streams). For React components, see [Streaming in React](/realtime/react-hooks/streams).
 </Note>
 
+<Tip>
+  Run-scoped streams are the right primitive for ephemeral I/O that lives inside a single run's lifetime. For durable, long-lived channels that outlive a run — chat agents, cross-run workflows, inbox-style listings — use [Sessions](/sessions/overview) instead. Both share the same underlying realtime transport; Sessions layer durable identity and bidirectional channels on top.
+</Tip>
+
 ## Reading streams
 
 ### Using defined streams (Recommended)
diff --git a/docs/sessions/channels.mdx b/docs/sessions/channels.mdx
new file mode 100644
index 00000000000..46419590320
--- /dev/null
+++ b/docs/sessions/channels.mdx
@@ -0,0 +1,214 @@
+---
+title: "Channels"
+sidebarTitle: "Channels"
+description: "Deep dive on `.in` and `.out` — producer/consumer asymmetry, suspend-while-idle, serialization, and the full method surface."
+---
+
+Every session exposes two channels accessed through a [`SessionHandle`](/sessions/reference#sessionhandle):
+
+```ts
+const handle = sessions.open(sessionIdOrExternalId);
+
+handle.out; // SessionOutputChannel
+handle.in;  // SessionInputChannel
+```
+
+The two channels are **disjoint** — no method name appears on both. Producer/consumer roles are fixed per channel:
+
+| Channel | Task role | External-client role |
+|---|---|---|
+| `.out` | Producer (`append`, `pipe`, `writer`) | Consumer (`read`) |
+| `.in` | Consumer (`on`, `once`, `peek`, `wait`, `waitWithIdleTimeout`) | Producer (`send`) |
+
+This asymmetry makes directional intent obvious at every call site.
+
+## `.out` — task writes, client reads
+
+The task is the producer; browsers or other tasks consume via SSE.
+
+### Producing: `append`, `pipe`, `writer`
+
+All three write methods route through the same underlying S2 direct-write pipeline, so subscribers always receive parsed objects (not raw strings), regardless of which producer you use.
+
+#### `append` — single record
+
+```ts
+await handle.out.append({ type: "status", message: "Processing..." });
+```
+
+Returns once the record has been acknowledged. Use when you're writing a small, bounded number of discrete events.
+
+#### `pipe` — forward a ReadableStream or AsyncIterable
+
+```ts
+const { stream, waitUntilComplete } = handle.out.pipe(
+  streamText({ model, messages }).toUIMessageStream()
+);
+
+await waitUntilComplete();
+```
+
+`pipe` attaches your source stream to the session channel and returns immediately. It mirrors [`streams.pipe`](/tasks/streams) but session-scoped — there's no `target` option because the session is the target.
+
+`stream` is a local `ReadableStream` you can `await` / consume if you also want to observe the records from inside the producing task. `waitUntilComplete()` resolves when all source records have been flushed to S2.
+
+#### `writer({ execute })` — fine-grained control
+
+Use when you want to produce records imperatively or interleave multiple sources:
+
+```ts
+const { waitUntilComplete } = handle.out.writer({
+  execute: ({ write, merge }) => {
+    write({ type: "start" });
+    write({ type: "section", title: "Summary" });
+    merge(llmStream.toUIMessageStream()); // splice in another stream
+    write({ type: "end" });
+  },
+});
+
+await waitUntilComplete();
+```
+
+`write` enqueues a single record. `merge` pulls records from another stream into the same channel in order. `execute` can be async — the writer stays open until your callback returns.
+
+Mirrors [`streams.writer`](/tasks/streams#writer) but session-scoped.
+
+### Consuming: `read`
+
+```ts
+const handle = sessions.open(sessionId);
+const stream = await handle.out.read<MyChunk>({
+  lastEventId: "-1",
+  timeoutInSeconds: 60,
+});
+
+for await (const chunk of stream) {
+  console.log(chunk); // MyChunk, already parsed
+}
+```
+
+Options:
+
+| Option | Type | Notes |
+|---|---|---|
+| `lastEventId` | `string \| number` | Resume cursor. `-1` (or omit) starts from the beginning. Use the last seen event ID to resume after a disconnect. |
+| `timeoutInSeconds` | `number` | Max time SSE will hold the connection open between records. Default 60. |
+| `signal` | `AbortSignal` | Cancel the subscription. |
+| `onPart` | `(part) => void` | Observe raw SSE parts (id + chunk + timestamp). |
+| `onComplete` | `() => void` | Fires when the stream closes. |
+| `onError` | `(err) => void` | Fires on subscription errors. |
+
+`read` returns an async iterable — use `for await` to consume. The underlying SSE transport handles reconnection, `Last-Event-ID` resume, and abort propagation automatically.
+
+## `.in` — client writes, task reads
+
+External clients produce via `.send`; the task consumes via `on` / `once` / `peek` / `wait` / `waitWithIdleTimeout`.
+
+### Producing: `send`
+
+```ts
+await handle.in.send({ kind: "message", text: "Hello" });
+```
+
+Any JSON-serializable value. Strings are passed through as-is; objects are `JSON.stringify`-ed.
+
+### Consuming: non-blocking vs. suspending
+
+The consuming-side methods fall into two groups:
+
+- **Non-blocking / keep-running**: `on`, `once`, `peek`. The task stays awake while consuming.
+- **Suspending**: `wait`, `waitWithIdleTimeout`. The run suspends via a [session-stream waitpoint](#suspend-while-idle), freeing compute until a record arrives.
+
+#### `on` — handler fires for every record
+
+```ts
+const { off } = handle.in.on<{ kind: "message" | "stop" }>((msg) => {
+  if (msg.kind === "stop") controller.abort();
+});
+
+// later
+off();
+```
+
+Handlers are flushed with any buffered records on attach, and cleaned up when the task run completes. Use for long-lived listeners (e.g. a stop-signal listener that lives across turns).
+
+#### `once` — await the next record
+
+```ts
+const { ok, output, error } = await handle.in.once<Message>({ timeout: "30s" });
+
+if (ok) {
+  console.log(output);
+}
+```
+
+Returns on the next arrival, or `{ ok: false, error }` on timeout. Non-suspending — the run stays active while waiting. Chain `.unwrap()` to get the output directly and throw on timeout:
+
+```ts
+const msg = await handle.in.once<Message>({ timeout: "30s" }).unwrap();
+```
+
+#### `peek` — inspect buffer without consuming
+
+```ts
+const pending = handle.in.peek<Message>();
+if (pending) { /* ... */ }
+```
+
+Returns `undefined` if the buffer is empty. Does not consume the record — next `on` / `once` / `wait` still sees it.
+
+#### `wait` — suspend until next record
+
+```ts
+const msg = await handle.in.wait<Message>({ timeout: "5m" });
+```
+
+**Suspends the run** while idle. Unlike `once`, the task's container can be hibernated, freeing compute. When a record lands on `.in`, the run-engine waitpoint fires and the run resumes with the message as the resolved value.
+
+Only callable from inside `task.run()`. Throws on timeout.
+
+#### `waitWithIdleTimeout` — suspend with a resetting timeout
+
+```ts
+const msg = await handle.in.waitWithIdleTimeout<Message>({ idleTimeout: "5m" });
+```
+
+Like `wait`, but the timeout **resets on every message**. Use for conversational patterns: "end the conversation after 5 minutes of silence, but stay alive as long as the user keeps talking."
+
+### Suspend-while-idle
+
+`wait` and `waitWithIdleTimeout` use Trigger.dev's run-engine waitpoints — the same primitive behind [`wait-for-token`](/wait-for-token) and [`streams.input.wait`](/tasks/input-streams). When the task calls `wait()`:
+
+1. The run-engine suspends the run and deallocates its container.
+2. You stop paying for compute.
+3. When a record arrives on `.in` (via `send` from any client), the waitpoint is fired synchronously by the append handler.
+4. The run resumes with the record as the resolved value.
+
+Multiple concurrent waiters on the same session (e.g. two agents) are all woken by the same append.
+
+## Serialization
+
+All `.out` producer methods route through a uniform pipeline, so:
+
+- **Subscribers always receive parsed objects.** `handle.out.read<T>()` yields `T` values directly — you never need `JSON.parse`.
+- **Mixed producers work cleanly.** A record written via `append` looks identical on the wire to one emitted by `writer` or `pipe`.
+
+On `.in`, `send` accepts any JSON-serializable value. Strings are passed through; objects are serialized. Consumers decide how to type the records (`on<T>`, `once<T>`, `wait<T>`).
+
+## Buffering and attachment
+
+Records that arrive on `.in` **before** a consumer attaches are buffered on the task side. The first `on` / `once` / `peek` call to attach sees the buffered records in order, followed by live records as they arrive. This means you don't have to race the task's boot against the client's first `send`.
+
+On `.out`, records are appended to the underlying append-only log. Subscribers read from a cursor (`lastEventId`), so a late-connecting client can start from the beginning or resume mid-stream — nothing is lost.
+
+## Abort and cleanup
+
+- Handlers registered via `on` and long-lived subscriptions auto-clean on task-run completion. You don't need to explicitly unsubscribe.
+- Passing `signal` to `read` ties the SSE subscription to your `AbortController`. Aborting the signal closes the SSE cleanly.
+- `pipe` / `writer` return `waitUntilComplete` so you can serialize subsequent logic after all records are flushed.
+
+## Related
+
+- [Overview](/sessions/overview) — Conceptual intro to sessions
+- [API Reference](/sessions/reference) — Types, signatures, token scopes
+- [Run-scoped streams](/tasks/streams) — The ephemeral counterpart; same underlying transport
diff --git a/docs/sessions/overview.mdx b/docs/sessions/overview.mdx
new file mode 100644
index 00000000000..334f523bb3a
--- /dev/null
+++ b/docs/sessions/overview.mdx
@@ -0,0 +1,169 @@
+---
+title: "Sessions"
+sidebarTitle: "Overview"
+description: "A durable, typed, bidirectional I/O primitive that outlives any single run — built for chat agents, long-running workflows, and any conversation you need to resume across runs."
+---
+
+A **Session** is a durable identity for a bidirectional stream of records. Once you create one, it outlives the run that opened it, survives process crashes, and can be resumed from any client that knows its ID.
+
+Under the hood, each session exposes two channels:
+
+- **`.out`** — the task writes, external clients read.
+- **`.in`** — external clients write, the task reads.
+
+Both channels use Trigger.dev's realtime streams, with SSE subscribe, `Last-Event-ID` resume, and direct-to-storage writes.
+
+## Why sessions
+
+Run-scoped streams ([streams.pipe](/tasks/streams) / [streams.input](/tasks/input-streams)) are great for ephemeral I/O within a single run: LLM output from a one-shot task, file chunks during processing, etc. Once the run ends, the stream is gone.
+
+Sessions give you a channel that **isn't tied to a single run**. You can:
+
+- **Resume a conversation across runs.** A chat that ran yesterday, crashed, or hit an idle timeout can pick back up on the same session when the user sends the next message.
+- **Share a channel across runs.** A coordinator run writes to `session.out`; a worker run reads from `session.in` — different runs, same channel.
+- **Show an inbox view.** `sessions.list({ type: "chat.agent" })` returns every chat in your environment, filterable by tag or status.
+- **Key everything on one ID.** `externalId` (your own string — a chat ID, a conversation UUID) is the primary key. Internally, Trigger.dev mints a stable `session_*` friendly ID you can use interchangeably.
+
+## Identity
+
+A session has two equivalent identifiers:
+
+| Field | Who owns it | Example | Notes |
+|---|---|---|---|
+| `externalId` | You | `"chat-abc-123"` | User-supplied idempotency key. Unique per environment. Optional but recommended. |
+| `id` (friendly) | Trigger.dev | `"session_cm4z2plfh000abcd1efgh"` | Stable server-generated identifier. Always present. |
+
+Both work as lookup keys. The server disambiguates via the `session_` prefix — anything else is treated as `externalId`.
+
+Creating a session with the same `externalId` twice returns the same session (idempotent). This is intentional: it's safe to call `sessions.create({ externalId: chatId })` from two browser tabs racing for the same chat.
+
+```ts
+const a = await sessions.create({ type: "chat.agent", externalId: "chat-abc" });
+const b = await sessions.create({ type: "chat.agent", externalId: "chat-abc" });
+
+a.id === b.id; // true — same session
+```
+
+## Channels
+
+```
+SessionHandle
+├── .out  — producer on the task side, consumer on the client side
+└── .in   — producer on the client side, consumer on the task side
+```
+
+The two channels are **disjoint** — no method name appears on both sides. Directional intent is always obvious at the call site.
+
+### `.out` — task writes, client reads
+
+Inside a task:
+
+```ts
+const handle = sessions.open(sessionId);
+
+// Single record
+await handle.out.append({ type: "message", text: "Hello" });
+
+// Pipe a ReadableStream or AsyncIterable
+const { waitUntilComplete } = handle.out.pipe(streamText(...).toUIMessageStream());
+await waitUntilComplete();
+
+// Fine-grained control
+const { waitUntilComplete } = handle.out.writer({
+  execute: ({ write }) => {
+    write({ type: "start" });
+    write({ type: "text-delta", delta: "Hi" });
+    write({ type: "finish" });
+  },
+});
+await waitUntilComplete();
+```
+
+Outside a task (browser, server action, another task):
+
+```ts
+const handle = sessions.open(sessionId);
+const stream = await handle.out.read({ lastEventId: "-1" });
+
+for await (const chunk of stream) {
+  console.log(chunk); // parsed object, not a string
+}
+```
+
+### `.in` — client writes, task reads
+
+Inside a task (consume):
+
+```ts
+const handle = sessions.open(sessionId);
+
+// Fire-and-forget handler
+handle.in.on((msg) => console.log("got", msg));
+
+// Await the next message without suspending
+const { ok, output } = await handle.in.once({ timeout: "30s" });
+
+// Suspend while idle — frees compute until a record arrives
+const msg = await handle.in.wait();
+
+// Suspend with an idle timeout that resets on every message
+await handle.in.waitWithIdleTimeout({ idleTimeout: "5m" });
+```
+
+Outside a task (produce):
+
+```ts
+const handle = sessions.open(sessionId);
+await handle.in.send({ kind: "message", text: "Hello" });
+```
+
+See [Channels](/sessions/channels) for the full surface.
+
+## Durability
+
+A session is durable until you explicitly close it:
+
+- Runs come and go. Starting a new run on an existing session is a normal operation — the run opens the handle and picks up from wherever the channels left off.
+- `sessions.close(id, { reason })` marks the session terminal. Closed sessions still let you subscribe to their `.out` to read history, but no new records can be appended.
+- Close is **client-driven only**. The task runtime never auto-closes on your behalf — a chat that's "between turns" is a live session, not a closed one.
+- Optional `expiresAt` sets a retention window. Past `expiresAt`, the session is treated as closed for new writes.
+
+## When to use sessions vs. run-scoped streams
+
+Use **run-scoped streams** ([`streams.pipe`](/tasks/streams), [`streams.input`](/tasks/input-streams)) when:
+
+- The data is tied to one run's lifetime.
+- You don't need to resume from a different process or user.
+- The stream lives inside your task-runtime boundary (one producer, one consumer, both in the same run).
+
+Use **sessions** when:
+
+- The channel needs to outlive any single run.
+- You want to address it from multiple places (browser + server + another task) using a stable ID.
+- You need a typed bidirectional pair rather than a one-way stream.
+- You want the data visible across runs for resume, inbox views, or cross-run coordination.
+
+Both primitives share the same underlying realtime-streams infrastructure (S2, SSE, `Last-Event-ID` resume). Sessions layer durable identity + bidirectional channels on top.
+
+## Relationship to chat.agent
+
+`chat.agent()` is built on sessions. Every chat conversation is one session:
+
+- `externalId` = your chat ID
+- `type` = `"chat.agent"`
+- `.out` carries `UIMessageChunk` events
+- `.in` carries a `ChatInputChunk` tagged union (`{kind: "message", ...}` or `{kind: "stop"}`)
+
+If you're using `chat.agent` + `TriggerChatTransport` / `AgentChat`, sessions are handled for you. You only need this section directly if you're:
+
+- Building a custom chat transport (e.g. for Slack or a native app — see [Client Protocol](/ai-chat/client-protocol)).
+- Using sessions for a non-chat workload (background agents, cross-run pipelines).
+- Reaching into the underlying primitive for advanced cases.
+
+## Related
+
+- [Quick Start](/sessions/quick-start) — Create, write, read, close in 4 steps
+- [Channels](/sessions/channels) — Full `.in` / `.out` API, waitpoint semantics, serialization
+- [API Reference](/sessions/reference) — `sessions.*` methods, types, scopes
+- [AI Chat](/ai-chat/overview) — The chat primitive built on sessions
+- [Run-scoped streams](/tasks/streams) — The ephemeral counterpart
diff --git a/docs/sessions/quick-start.mdx b/docs/sessions/quick-start.mdx
new file mode 100644
index 00000000000..68abbf3f393
--- /dev/null
+++ b/docs/sessions/quick-start.mdx
@@ -0,0 +1,128 @@
+---
+title: "Quick Start"
+sidebarTitle: "Quick Start"
+description: "Create a session, write to it from a task, subscribe from a client, and close it — in four steps."
+---
+
+This walkthrough uses the raw `sessions` API directly. If you're building a chat app, start with [`chat.agent`](/ai-chat/quick-start) instead — it wraps all of this for you.
+
+<Steps>
+  <Step title="Create a session from your backend">
+    Call `sessions.create` with a `type` discriminator and (optionally) an `externalId` — your own stable ID for the thing this session represents. Creating with the same `externalId` twice returns the same session, so it's safe to call from concurrent requests.
+
+    ```ts app/actions.ts
+    "use server";
+
+    import { sessions } from "@trigger.dev/sdk";
+
+    export async function createConversation(conversationId: string) {
+      const session = await sessions.create({
+        type: "agent.conversation",
+        externalId: conversationId,
+        tags: ["user:abc-123"],
+      });
+
+      return { sessionId: session.id };
+    }
+    ```
+
+    <Tip>
+      `type` is a free-form string you choose. Use it to distinguish different kinds of sessions in your environment — e.g. `"chat.agent"`, `"agent.conversation"`, `"data-sync"`. It's what you'll filter on later in `sessions.list`.
+    </Tip>
+  </Step>
+
+  <Step title="Open the session from inside a task and write to `.out`">
+    Trigger a task with the `sessionId` in the payload. Inside the task, open the session and write records to `.out`. Records can be any JSON-serializable value — strings, objects, streams.
+
+    ```ts trigger/agent.ts
+    import { sessions, task } from "@trigger.dev/sdk";
+
+    export const runAgent = task({
+      id: "run-agent",
+      run: async (payload: { sessionId: string; prompt: string }) => {
+        const handle = sessions.open(payload.sessionId);
+
+        await handle.out.append({ type: "status", message: "Thinking..." });
+
+        // Simulate streaming output
+        for (const word of payload.prompt.split(" ")) {
+          await handle.out.append({ type: "token", text: word });
+        }
+
+        await handle.out.append({ type: "done" });
+      },
+    });
+    ```
+
+    <Note>
+      `sessions.open(id)` does not make a network call — each channel method (`append`, `pipe`, `read`, etc.) hits its own endpoint when invoked. You can open the handle once at the top of `run()` and reuse it.
+    </Note>
+  </Step>
+
+  <Step title="Subscribe to `.out` from a client">
+    Any client with read access to the session can subscribe to `.out` as an SSE stream. This works from a browser, a server action, or another task.
+
+    ```ts app/subscribe.ts
+    import { sessions } from "@trigger.dev/sdk";
+
+    async function watch(sessionId: string) {
+      const handle = sessions.open(sessionId);
+      const stream = await handle.out.read({ lastEventId: "-1" });
+
+      for await (const chunk of stream) {
+        console.log("received:", chunk);
+      }
+    }
+    ```
+
+    Records are delivered as parsed objects — you do not need to `JSON.parse` them.
+
+    **Resume semantics.** `lastEventId` is the cursor. `"-1"` (or omitting the option on a fresh subscription) starts from the beginning. Pass the last-seen event ID to resume mid-stream after a disconnect.
+  </Step>
+
+  <Step title="Close the session when you're done">
+    Closing marks the session terminal. You can still read history from `.out`, but no new records can be appended.
+
+    ```ts app/actions.ts
+    "use server";
+
+    import { sessions } from "@trigger.dev/sdk";
+
+    export async function endConversation(sessionId: string) {
+      await sessions.close(sessionId, { reason: "user-ended" });
+    }
+    ```
+
+    <Tip>
+      Closing is optional. A long-running session that's just between turns is a live session, not a closed one — don't close it just because you don't have anything to write right now.
+    </Tip>
+  </Step>
+</Steps>
+
+## What about `.in`?
+
+The walkthrough above only uses `.out`. The reverse direction — external clients sending records **into** the task — is handled by `.in`.
+
+Inside the task:
+
+```ts
+const handle = sessions.open(payload.sessionId);
+
+// Suspend until a message arrives. Frees compute while waiting.
+const message = await handle.in.wait<{ text: string }>();
+```
+
+From a client:
+
+```ts
+const handle = sessions.open(sessionId);
+await handle.in.send({ text: "Hello" });
+```
+
+See [Channels](/sessions/channels) for the full `.in` API — `on`, `once`, `peek`, `wait`, `waitWithIdleTimeout`.
+
+## Next steps
+
+- [Channels](/sessions/channels) — The full `.in` / `.out` API, waitpoint semantics, serialization
+- [API Reference](/sessions/reference) — All methods and types
+- [AI Chat Quick Start](/ai-chat/quick-start) — The higher-level chat primitive built on sessions
diff --git a/docs/sessions/reference.mdx b/docs/sessions/reference.mdx
new file mode 100644
index 00000000000..9e4edc257e8
--- /dev/null
+++ b/docs/sessions/reference.mdx
@@ -0,0 +1,222 @@
+---
+title: "API Reference"
+sidebarTitle: "API Reference"
+description: "Complete reference for the `sessions` namespace — methods, types, channel surface, and token scopes."
+---
+
+## `sessions` namespace
+
+Import from `@trigger.dev/sdk`.
+
+```ts
+import { sessions } from "@trigger.dev/sdk";
+```
+
+| Method | Signature | Notes |
+|---|---|---|
+| `sessions.create(body, requestOptions?)` | `(body: CreateSessionRequestBody) => Promise<CreatedSessionResponseBody>` | Create a session. Idempotent on `externalId`. |
+| `sessions.retrieve(idOrExternalId, requestOptions?)` | `(id: string) => Promise<SessionItem>` | Fetch a session by `session_*` ID or `externalId`. |
+| `sessions.update(idOrExternalId, body, requestOptions?)` | `(id: string, body: UpdateSessionRequestBody) => Promise<SessionItem>` | Update `tags`, `metadata`, or `externalId`. |
+| `sessions.close(idOrExternalId, body?, requestOptions?)` | `(id: string, body?: { reason?: string }) => Promise<SessionItem>` | Mark the session terminal. Idempotent. |
+| `sessions.list(options?, requestOptions?)` | `(options?: ListSessionsOptions) => CursorPagePromise<SessionItem>` | List sessions with filters + cursor pagination. |
+| `sessions.open(idOrExternalId)` | `(id: string) => SessionHandle` | Open a handle for channel I/O. No network call until you hit a channel method. |
+
+## `CreateSessionRequestBody`
+
+| Field | Type | Default | Description |
+|---|---|---|---|
+| `type` | `string` | required | Free-form discriminator (e.g. `"chat.agent"`, `"agent.conversation"`). 1–64 chars. |
+| `externalId` | `string` | — | User-supplied idempotency key. 1–256 chars. Unique per environment. Cannot start with `session_`. |
+| `taskIdentifier` | `string` | — | Optional pointer for task-owned sessions. |
+| `tags` | `string[]` | — | Up to 10 tags for filtering in `sessions.list`. |
+| `metadata` | `Record<string, unknown>` | — | Arbitrary JSON metadata. |
+| `expiresAt` | `Date \| string` | — | Absolute expiry timestamp. Past this, the session is treated as closed for new writes. |
+
+## `SessionItem`
+
+Returned by `create`, `retrieve`, `update`, `close`, and `list`.
+
+| Field | Type | Description |
+|---|---|---|
+| `id` | `string` | `session_*` friendly ID. |
+| `externalId` | `string \| null` | The key you supplied on create. |
+| `type` | `string` | The discriminator. |
+| `taskIdentifier` | `string \| null` | Optional task pointer. |
+| `tags` | `string[]` | Dashboard filter tags. |
+| `metadata` | `Record<string, unknown> \| null` | Arbitrary JSON. |
+| `closedAt` | `Date \| null` | Set when `sessions.close` is called. |
+| `closedReason` | `string \| null` | Value passed to `sessions.close`. |
+| `expiresAt` | `Date \| null` | Retention deadline. |
+| `createdAt` | `Date` | When the session was created. |
+| `updatedAt` | `Date` | Last write to any mutable field. |
+
+`CreatedSessionResponseBody` extends `SessionItem` with `isCached: boolean` — `true` when `create` returned an existing session (same `externalId`) instead of minting a new one.
+
+## `UpdateSessionRequestBody`
+
+| Field | Type | Notes |
+|---|---|---|
+| `tags` | `string[]` | Replaces existing tags. |
+| `metadata` | `Record<string, unknown> \| null` | Replaces existing metadata. Pass `null` to clear. |
+| `externalId` | `string \| null` | Change or clear the external key. Pass `null` to clear. |
+
+## `CloseSessionRequestBody`
+
+| Field | Type | Notes |
+|---|---|---|
+| `reason` | `string` | Optional close reason (e.g. `"user-ended"`, `"idle-timeout"`). Up to 256 chars. |
+
+## `ListSessionsOptions`
+
+Flattened client shape. `sessions.list` serializes into `filter[*]` / `page[*]` query params internally.
+
+| Field | Type | Notes |
+|---|---|---|
+| `limit` | `number` | Page size. 1–100. Default 20. |
+| `after` | `string` | Cursor: fetch the page after this session ID. |
+| `before` | `string` | Cursor: fetch the page before this session ID. |
+| `type` | `string \| string[]` | Filter by discriminator. |
+| `tag` | `string \| string[]` | Filter by tag (any-of match). |
+| `taskIdentifier` | `string \| string[]` | Filter by task identifier. |
+| `externalId` | `string` | Exact-match filter. |
+| `status` | `"ACTIVE" \| "CLOSED" \| "EXPIRED" \| array` | Filter by derived status. |
+| `period` | `string` | Filter by creation age (e.g. `"7d"`, `"24h"`). |
+| `from` | `number \| Date` | Filter by creation timestamp lower bound. |
+| `to` | `number \| Date` | Filter by creation timestamp upper bound. |
+
+Returns a `CursorPagePromise<SessionItem>`:
+
+```ts
+for await (const session of sessions.list({ type: "chat.agent" })) {
+  console.log(session.id);
+}
+
+// or fetch a single page
+const page = await sessions.list({ type: "chat.agent", limit: 50 });
+const { data, pagination } = page;
+```
+
+## `SessionHandle`
+
+Returned by `sessions.open(idOrExternalId)`. Lightweight — no network call until a channel method is invoked.
+
+```ts
+interface SessionHandle {
+  readonly id: string;
+  readonly out: SessionOutputChannel;
+  readonly in: SessionInputChannel;
+}
+```
+
+## `SessionOutputChannel` (`.out`)
+
+| Method | Signature | Notes |
+|---|---|---|
+| `append<T>(value, options?)` | `(value: T) => Promise<void>` | Append a single record. |
+| `pipe<T>(value, options?)` | `(value: AsyncIterable<T> \| ReadableStream<T>) => PipeStreamResult<T>` | Forward a stream. Returns `{ stream, waitUntilComplete }`. |
+| `writer<T>(options)` | `(options: WriterStreamOptions<T>) => PipeStreamResult<T>` | Fine-grained writer with `write` / `merge` callbacks. |
+| `read<T>(options?)` | `(options?: SessionSubscribeOptions<T>) => Promise<AsyncIterableStream<T>>` | SSE subscribe. Returns an async iterable of parsed records. |
+
+### `WriterStreamOptions<T>` / `SessionPipeStreamOptions`
+
+| Field | Type | Notes |
+|---|---|---|
+| `execute` | `({ write, merge }) => void \| Promise<void>` | Writer callback (required for `writer`). |
+| `spanName` | `string` | Override the OpenTelemetry span name for the operation. |
+| `collapsed` | `boolean` | Collapse the span in the run trace UI. |
+| `signal` | `AbortSignal` | Abort the pipe. |
+| `requestOptions` | `ApiRequestOptions` | Pass-through to the API client. |
+
+### `SessionSubscribeOptions<T>`
+
+| Field | Type | Notes |
+|---|---|---|
+| `lastEventId` | `string \| number` | Cursor. `"-1"` or omit to start from the beginning. |
+| `timeoutInSeconds` | `number` | Max SSE hold time between records. Default 60. |
+| `signal` | `AbortSignal` | Abort the subscription. |
+| `onPart` | `(part: SSEStreamPart<T>) => void` | Observe raw SSE parts. |
+| `onComplete` | `() => void` | Fires on subscription close. |
+| `onError` | `(err: Error) => void` | Fires on subscription errors. |
+
+## `SessionInputChannel` (`.in`)
+
+| Method | Signature | Notes |
+|---|---|---|
+| `send(value, requestOptions?)` | `(value: unknown) => Promise<void>` | Append a record. Strings pass through; objects are `JSON.stringify`-ed. |
+| `on<T>(handler)` | `(handler: (data: T) => void \| Promise<void>) => { off: () => void }` | Long-lived handler. Auto-cleans on run end. |
+| `once<T>(options?)` | `(options?: InputStreamOnceOptions) => InputStreamOncePromise<T>` | Await next record without suspending. |
+| `peek<T>()` | `() => T \| undefined` | Non-consuming head-of-buffer peek. |
+| `wait<T>(options?)` | `(options?: InputStreamWaitOptions) => ManualWaitpointPromise<T>` | Suspend until next record. |
+| `waitWithIdleTimeout<T>(options)` | `(options: InputStreamWaitWithIdleTimeoutOptions) => Promise<{ok: true, output: T} \| {ok: false, error?: any}>` | Suspend with a resetting idle timeout. |
+
+### `InputStreamOnceOptions`
+
+| Field | Type | Notes |
+|---|---|---|
+| `timeoutMs` | `number` | Timeout in ms. |
+| `signal` | `AbortSignal` | Abort the await. |
+| `spanName` | `string` | Span-name override. |
+
+### `InputStreamWaitOptions`
+
+| Field | Type | Notes |
+|---|---|---|
+| `timeout` | `string` | Duration string (e.g. `"30s"`, `"5m"`, `"1h"`). |
+| `idempotencyKey` | `string` | Reuse an existing waitpoint on retries. |
+| `idempotencyKeyTTL` | `string` | TTL for the idempotency key. |
+| `tags` | `string[]` | Tags for the underlying waitpoint token. |
+| `spanName` | `string` | Span-name override. |
+
+### `InputStreamWaitWithIdleTimeoutOptions`
+
+| Field | Type | Notes |
+|---|---|---|
+| `idleTimeoutInSeconds` | `number` | Seconds of idle before suspending (required). |
+| `timeout` | `string` | Max wait after suspending. |
+| `onSuspend` | `() => void \| Promise<void>` | Called right before suspending. |
+| `onResume` | `() => void \| Promise<void>` | Called right after resuming with data. |
+| `skipSuspend` | `boolean` | Skip the suspend phase; return `{ ok: false }` on idle timeout. |
+| `spanName` | `string` | Span-name override. |
+
+## Token scopes
+
+Sessions are protected by bearer-token scopes. Mint tokens via `auth.createPublicToken` or use the environment secret key server-side.
+
+| Scope | Grants |
+|---|---|
+| `read:sessions` | Read any session, subscribe to any `.out` in the environment. |
+| `read:sessions:<sessionId>` | Read only the specified session. |
+| `write:sessions` | Create/update/close any session, append to any `.in`, direct-write to any `.out`. |
+| `write:sessions:<sessionId>` | Write access scoped to one session. |
+| `write:sessions:<sessionId>:in` | Write access scoped to `.in` on one session. |
+| `admin:sessions` | All of the above. |
+
+Super-scopes `read:all` / `admin:all` / `admin` also grant access.
+
+For chat.agent flows, the transport's public access token carries `read:sessions:<id>` + `write:sessions:<id>` for the session it's talking to — that's enough to subscribe on `.out`, append to `.in`, and close.
+
+## Wire endpoints
+
+For custom transports or direct HTTP use. See [AI Chat — Client Protocol](/ai-chat/client-protocol) for a full example.
+
+| Method | Path | Purpose |
+|---|---|---|
+| `POST` | `/api/v1/sessions` | Create (idempotent on `externalId`). |
+| `GET` | `/api/v1/sessions/:id` | Retrieve. Accepts `session_*` ID or `externalId`. |
+| `PATCH` | `/api/v1/sessions/:id` | Update mutable fields. |
+| `POST` | `/api/v1/sessions/:id/close` | Close. |
+| `GET` | `/api/v1/sessions?filter[type]=…&page[size]=…` | List with filters + cursor pagination. |
+| `GET` | `/realtime/v1/sessions/:session/:io` | SSE subscribe to `.in` or `.out`. Supports `Last-Event-ID`. |
+| `POST` | `/realtime/v1/sessions/:session/:io/append` | Append a record. `:io` is `"in"` for clients or `"out"` for direct server writes. |
+| `PUT` | `/realtime/v1/sessions/:session/:io` | Initialize an S2 direct-write channel. Returns S2 credentials in response headers. |
+
+### `X-Session-Settled` response header
+
+Set on `GET /realtime/v1/sessions/:session/out` when the tail record is a terminal chunk (for chat agents: `trigger:turn-complete`). The server uses a short-wait drain instead of long-polling, so the SSE closes in milliseconds rather than up to 60 seconds. Clients can use this to tell a "nothing is streaming right now" close apart from a normal long-poll close.
+
+## Related
+
+- [Overview](/sessions/overview) — Conceptual intro
+- [Quick Start](/sessions/quick-start) — Minimal walkthrough
+- [Channels](/sessions/channels) — Producer/consumer semantics
+- [AI Chat — Client Protocol](/ai-chat/client-protocol) — Session endpoints in the context of chat.agent

From 3579d816ef833f38727b555358640b9c67211db9 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Sat, 25 Apr 2026 21:26:42 +0100
Subject: [PATCH 39/49] document x-peek-settled header

---
 docs/ai-chat/client-protocol.mdx | 27 +++++++++++++++++++++------
 docs/sessions/reference.mdx      |  9 +++++++--
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/docs/ai-chat/client-protocol.mdx b/docs/ai-chat/client-protocol.mdx
index f4381eff86d..df613b4c316 100644
--- a/docs/ai-chat/client-protocol.mdx
+++ b/docs/ai-chat/client-protocol.mdx
@@ -258,19 +258,34 @@ Last-Event-ID: 42
 
 `SSEStreamSubscription` tracks this automatically via its `lastEventId` option.
 
-### `X-Session-Settled` — fast close on idle reconnects
+### `X-Peek-Settled` / `X-Session-Settled` — opt-in fast close on idle reconnects
 
-When you reconnect to `.out` and the last record on the session is a `trigger:turn-complete` marker (the agent has finished a turn and is either idle-waiting or exited), the server responds with:
+On **reconnect-on-reload** paths (resuming a chat where nothing may be streaming), send `X-Peek-Settled: 1` as a request header when opening the SSE. When present, the server peeks the tail record of `.out`; if it's `trigger:turn-complete` (agent finished a turn and is idle-waiting or exited), the SSE:
 
-- `X-Session-Settled: true` response header
-- A fast SSE close (milliseconds instead of the usual 60s long-poll)
+- Uses `wait=0` internally — drains any residual records and closes in ~1s instead of long-polling for 60s.
+- Sets the `X-Session-Settled: true` response header so the client can tell the close is terminal rather than a mid-stream drop.
 
-This lets the client distinguish a close that means "nothing is streaming right now" from a normal mid-stream disconnect. Your transport can use this to settle into a "ready" state on page reload without maintaining its own `isStreaming` flag.
+**Do not send `X-Peek-Settled` on the active-send response-stream path.** The peek would race the newly-triggered turn's first chunk — if the agent hasn't written the new turn's first record yet, the peek sees the prior turn's `trigger:turn-complete` and closes the SSE before the response lands on S2. The built-in `TriggerChatTransport.reconnectToStream` sets the header; `sendMessages → subscribeToStream` does not.
 
 ```ts
-const response = await fetch(sseUrl, { headers });
+// Reconnect path (page reload)
+const response = await fetch(sseUrl, {
+  headers: {
+    Authorization: `Bearer ${publicAccessToken}`,
+    "X-Peek-Settled": "1",
+    "Last-Event-ID": lastEventId,
+  },
+});
 const settled = response.headers.get("X-Session-Settled") === "true";
 // ...subscribe as normal; if settled and nothing arrives, you're done.
+
+// Active send path — no X-Peek-Settled, keep long-poll semantics
+const liveResponse = await fetch(sseUrl, {
+  headers: {
+    Authorization: `Bearer ${publicAccessToken}`,
+    "Last-Event-ID": lastEventId,
+  },
+});
 ```
 
 ## Step 4: Send messages, stops, and actions
diff --git a/docs/sessions/reference.mdx b/docs/sessions/reference.mdx
index 9e4edc257e8..bb277ff8312 100644
--- a/docs/sessions/reference.mdx
+++ b/docs/sessions/reference.mdx
@@ -210,9 +210,14 @@ For custom transports or direct HTTP use. See [AI Chat — Client Protocol](/ai-
 | `POST` | `/realtime/v1/sessions/:session/:io/append` | Append a record. `:io` is `"in"` for clients or `"out"` for direct server writes. |
 | `PUT` | `/realtime/v1/sessions/:session/:io` | Initialize an S2 direct-write channel. Returns S2 credentials in response headers. |
 
-### `X-Session-Settled` response header
+### `X-Peek-Settled` request header (opt-in) / `X-Session-Settled` response header
 
-Set on `GET /realtime/v1/sessions/:session/out` when the tail record is a terminal chunk (for chat agents: `trigger:turn-complete`). The server uses a short-wait drain instead of long-polling, so the SSE closes in milliseconds rather than up to 60 seconds. Clients can use this to tell a "nothing is streaming right now" close apart from a normal long-poll close.
+On `GET /realtime/v1/sessions/:session/out`, the client can send `X-Peek-Settled: 1` to ask the server to peek the tail record before proxying. If the last chunk on `.out` is a terminal marker (for chat agents: `trigger:turn-complete`), the server:
+
+- Uses `wait=0` on the downstream read — drains any residual records and closes in ~1s instead of long-polling for 60s.
+- Sets `X-Session-Settled: true` on the response so the client can tell the close is terminal rather than a normal long-poll cycle.
+
+Without `X-Peek-Settled`, the SSE always long-polls (unconditional `wait` from the caller). Clients should only opt in on **reconnect-on-reload** paths — sending the header while a turn is about to be triggered races the new turn's first chunk and would close the SSE before records land.
 
 ## Related
 

From f0a84ae6f9bee83a5f07089322f6697690dee812 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Mon, 27 Apr 2026 17:39:46 +0100
Subject: [PATCH 40/49] docs: chat.agent on Sessions-as-run-manager
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bring the AI Chat documentation in line with the Sessions-as-run-manager
release. Public surface (chat.agent({...}), useTriggerChatTransport,
AgentChat, chat.store / chat.defer / chat.history) is unchanged in
shape; the wiring docs around it changed enough that every transport
example, every server-action snippet, and the auth/session model needed
updating.

New:
- ai-chat/upgrade-guide.mdx — step-by-step migration for prerelease
  customers (replace getStartToken / getChatToken with startSession +
  accessToken, drop runId from ChatSession persistence, etc.).

Updated transport-shape group (these all share the same callback
shape and ship together):
- quick-start, frontend, backend, server-chat, types, reference

Updated peripherals:
- overview, changelog, client-protocol, features, error-handling,
  testing, patterns/version-upgrades, patterns/database-persistence

Outside ai-chat:
- realtime/backend/{streams,input-streams}: callout cross-references
  no longer point at the deleted Sessions docs.

Removed:
- docs/sessions/{overview,quick-start,channels,reference}.mdx — the
  standalone-Sessions surface predates the task-bound model and was
  giving stale guidance. We'll re-introduce Sessions docs once the
  primitive ships a non-chat.agent customer flow worth documenting.
- The "Sessions" group in docs.json's AI nav.
---
 docs/ai-chat/backend.mdx                      |  21 +-
 docs/ai-chat/changelog.mdx                    |   4 +-
 docs/ai-chat/client-protocol.mdx              |  12 +-
 docs/ai-chat/error-handling.mdx               |  31 +-
 docs/ai-chat/features.mdx                     |  10 +-
 docs/ai-chat/frontend.mdx                     |  94 +++--
 docs/ai-chat/overview.mdx                     |   3 +-
 .../ai-chat/patterns/database-persistence.mdx |  50 ++-
 docs/ai-chat/patterns/version-upgrades.mdx    |  16 +-
 docs/ai-chat/quick-start.mdx                  |  35 +-
 docs/ai-chat/reference.mdx                    | 150 ++++----
 docs/ai-chat/server-chat.mdx                  |  53 +--
 docs/ai-chat/testing.mdx                      |   2 +-
 docs/ai-chat/types.mdx                        |   4 +-
 docs/ai-chat/upgrade-guide.mdx                | 351 ++++++++++++++++++
 docs/docs.json                                |  10 +-
 docs/realtime/backend/input-streams.mdx       |   2 +-
 docs/realtime/backend/streams.mdx             |   2 +-
 docs/sessions/channels.mdx                    | 214 -----------
 docs/sessions/overview.mdx                    | 169 ---------
 docs/sessions/quick-start.mdx                 | 128 -------
 docs/sessions/reference.mdx                   | 227 -----------
 22 files changed, 641 insertions(+), 947 deletions(-)
 create mode 100644 docs/ai-chat/upgrade-guide.mdx
 delete mode 100644 docs/sessions/channels.mdx
 delete mode 100644 docs/sessions/overview.mdx
 delete mode 100644 docs/sessions/quick-start.mdx
 delete mode 100644 docs/sessions/reference.mdx

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 23f1598f138..2e1ae5c30a7 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -13,7 +13,7 @@ The highest-level approach. Handles message accumulation, stop signals, turn lif
 </Tip>
 
 <Info>
-  Every `chat.agent` conversation is backed by a [Session](/sessions/overview) — `externalId` is your `chatId`, `type` is `"chat.agent"`. The session outlives any single run, which is why chats can resume across days or deploys without losing identity. You rarely need to touch the session directly (`chat.stream`, `chat.messages`, `chat.stopSignal` wrap everything), but `payload.sessionId` is available if you want to reach in — e.g. `sessions.open(payload.sessionId)` to write from a sub-agent or from outside the turn loop.
+  Every `chat.agent` conversation is backed by a durable Session — `externalId` is your `chatId`, `type` is `"chat.agent"`, `taskIdentifier` is the agent's task ID. The session is the run manager: it owns the chat's runs, persists across run lifecycles, and orchestrates handoffs (idle continuation, `chat.requestUpgrade`). You rarely need to touch the session directly (`chat.stream`, `chat.messages`, `chat.stopSignal` wrap everything), but `payload.sessionId` is available if you want to reach in — e.g. `sessions.open(payload.sessionId)` to write from a sub-agent or from outside the turn loop.
 </Info>
 
 ### Simple: return a StreamTextResult
@@ -674,11 +674,18 @@ export const myChat = chat.agent({
 ```ts app/actions.ts
 "use server";
 
+import { auth } from "@trigger.dev/sdk";
 import { chat } from "@trigger.dev/sdk/ai";
-import type { myChat } from "@/trigger/chat";
 import { db } from "@/lib/db";
 
-export const getChatToken = () => chat.createAccessToken<typeof myChat>("my-chat");
+export const startChatSession = chat.createStartSessionAction("my-chat");
+
+export async function mintChatAccessToken(chatId: string) {
+  return auth.createPublicToken({
+    scopes: { read: { sessions: chatId }, write: { sessions: chatId } },
+    expirationTime: "1h",
+  });
+}
 
 export async function getChatMessages(chatId: string) {
   const found = await db.chat.findUnique({ where: { id: chatId } });
@@ -690,14 +697,12 @@ export async function getAllSessions() {
   const result: Record<
     string,
     {
-      runId: string;
       publicAccessToken: string;
       lastEventId?: string;
     }
   > = {};
   for (const s of sessions) {
     result[s.id] = {
-      runId: s.runId,
       publicAccessToken: s.publicAccessToken,
       lastEventId: s.lastEventId ?? undefined,
     };
@@ -716,12 +721,14 @@ export async function deleteSession(chatId: string) {
 import { useChat } from "@ai-sdk/react";
 import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
 import type { myChat } from "@/trigger/chat";
-import { getChatToken, deleteSession } from "@/app/actions";
+import { mintChatAccessToken, startChatSession, deleteSession } from "@/app/actions";
 
 export function Chat({ chatId, initialMessages, initialSessions }) {
   const transport = useTriggerChatTransport<typeof myChat>({
     task: "my-chat",
-    accessToken: getChatToken,
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
     clientData: { userId: currentUser.id }, // Type-checked against clientDataSchema
     sessions: initialSessions,
     onSessionChange: (id, session) => {
diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index 39f621eee3a..039995e292a 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -8,7 +8,7 @@ description: "Pre-release updates for AI chat agents."
 
 ## `chat.agent` now runs on Sessions
 
-Every chat is backed by a [Session](/sessions/overview) — a new public, durable, bidirectional I/O primitive that outlives any single run. `externalId` = your chat ID, `type` = `"chat.agent"`. Under the hood:
+Every chat is backed by a durable Session row that outlives any single run. `externalId` = your chat ID, `type` = `"chat.agent"`. Under the hood:
 
 - Output chunks stream on `session.out` (was a run-scoped `streams.writer("chat")`).
 - Client messages and stops land on `session.in` as a [`ChatInputChunk`](/ai-chat/reference#chatinputchunk) tagged union (was two run-scoped `streams.input` definitions).
@@ -21,7 +21,7 @@ Public surface (`chat.agent()`, `TriggerChatTransport`, `AgentChat`, `chat.strea
 - **`TriggerChatTaskResult.sessionId`** + **`ChatTaskRunPayload.sessionId`** — you can reach into the raw session via `sessions.open(payload.sessionId)` for advanced cases (writing from a sub-agent, custom transport).
 - **Dashboard Agent tab** resolves via `sessionId` and stays in sync with the live stream across runs.
 
-See the new [Sessions docs](/sessions/overview) for the underlying primitive.
+The full wire-level protocol (session create, channel routes, JWT scopes) is documented in [Client Protocol](/ai-chat/client-protocol).
 
 ## `X-Session-Settled` — fast reconnect on idle chats
 
diff --git a/docs/ai-chat/client-protocol.mdx b/docs/ai-chat/client-protocol.mdx
index df613b4c316..4bf5bae695f 100644
--- a/docs/ai-chat/client-protocol.mdx
+++ b/docs/ai-chat/client-protocol.mdx
@@ -12,7 +12,7 @@ This page documents the protocol that chat clients use to communicate with `chat
 
 ## Overview
 
-`chat.agent` is built on [Sessions](/sessions/overview) — a durable, bidirectional I/O primitive that outlives a single run. A conversation is one session; a session can host many runs over its lifetime.
+`chat.agent` is built on a durable Session row — the unit of state that owns the chat's runs across their full lifecycle. A conversation is one session; a session can host many runs over its lifetime.
 
 The protocol has four parts:
 
@@ -75,7 +75,7 @@ Response:
 
 `id` is the `session_*` friendly ID — persist it alongside your chat state. `isCached: true` means the server returned an existing session for this `externalId` (safe to ignore).
 
-See [`POST /api/v1/sessions`](/sessions/reference#create) for the full request / response schema.
+`POST /api/v1/sessions` is documented inline in the wire-protocol section below.
 
 ## Step 2: Trigger a run
 
@@ -485,17 +485,13 @@ A client needs to track per-conversation:
 | Create session (`POST /api/v1/sessions`) | Secret API key or JWT with `write:sessions` |
 | Close session (`POST /api/v1/sessions/{id}/close`) | Secret API key or JWT with `admin:sessions:{id}` |
 | Trigger task | Secret API key or JWT with `write:tasks` |
-| `.in` append | JWT with `write:sessions:{id}` (or `write:sessions:{id}:in`) |
+| `.in` append | JWT with `write:sessions:{id}` |
 | `.out` subscribe | JWT with `read:sessions:{id}` |
 
-The transport-facing `publicAccessToken` returned from the trigger response carries both `read:sessions:{id}` and `write:sessions:{id}` for the session, plus `read:runs:{runId}` + `write:inputStreams:{runId}` for the run. Use it for all session operations.
-
-See [Session reference — Token scopes](/sessions/reference#token-scopes) for the full scope list.
+The transport-facing `publicAccessToken` returned from `POST /api/v1/sessions` carries both `read:sessions:{id}` and `write:sessions:{id}` for the session — use it for all session operations. A token minted for either the externalId form or the friendlyId form authorizes both URL forms on every read and write route.
 
 ## See also
 
-- [Sessions Overview](/sessions/overview) — The durable primitive chat.agent is built on
-- [Sessions Reference](/sessions/reference) — Full `sessions.*` API and wire endpoints
 - [`TriggerChatTransport`](/ai-chat/frontend) — Built-in browser transport (implements this protocol)
 - [`AgentChat`](/ai-chat/server-chat) — Built-in server-side client
 - [Backend lifecycle](/ai-chat/backend#lifecycle-hooks) — What the agent does on each event
diff --git a/docs/ai-chat/error-handling.mdx b/docs/ai-chat/error-handling.mdx
index 2556c40751a..7544a68a81f 100644
--- a/docs/ai-chat/error-handling.mdx
+++ b/docs/ai-chat/error-handling.mdx
@@ -300,7 +300,12 @@ const { messages, error, status } = useChat({
 
 ```tsx
 function Chat() {
-  const transport = useTriggerChatTransport({ task: "my-chat", accessToken });
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
+  });
   const { messages, error, sendMessage } = useChat({ transport });
 
   return (
@@ -344,6 +349,30 @@ uiMessageStreamOptions: {
   For richer error structures, use [`chat.response.write()`](/ai-chat/features#custom-data-parts) with a custom `data-error` part type. This lets you ship structured error metadata (codes, retry hints, etc.) instead of stringly-typed messages.
 </Tip>
 
+### Errors from `accessToken` / `startSession`
+
+If your `accessToken` or `startSession` callback throws (auth failure, DB write failure, network error), the rejection surfaces through `useChat`'s `error` state — same as a stream error. The transport doesn't retry the callback automatically; the customer is responsible for handling it.
+
+```tsx
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: async ({ chatId }) => {
+    try {
+      return await mintChatAccessToken(chatId);
+    } catch (err) {
+      // Customer's server action failed (e.g. user lost auth).
+      // Re-throw to surface as a useChat error, or return a sentinel
+      // your UI can detect and prompt re-auth.
+      throw new Error(`AUTH_REFRESH: ${err.message}`);
+    }
+  },
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
+});
+```
+
+`startSession` failures most commonly mean the customer's authorization layer rejected the request (no plan, quota exceeded, user not allowed to chat with this agent). The customer's server should produce a meaningful error message; the transport propagates it verbatim to `useChat`'s `error` state.
+
 ## Run-level retries
 
 `chat.agent` uses `retry: { maxAttempts: 1 }` — the run **never retries** on unhandled failure. This is intentional: each turn is conversation-preserving, so a true run failure is severe and shouldn't silently retry (which could send duplicate API calls or mutate state twice).
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index 490b0199b86..fbe12967480 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -428,13 +428,17 @@ import { useChat } from "@ai-sdk/react";
 export function Chat({ chatId }) {
   const transport = useTriggerChatTransport({
     task: "my-chat",
-    accessToken: getChatToken,
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
     clientData: { userId: currentUser.id },
   });
 
-  // Preload on mount — run starts before the user types anything
+  // Preload on mount — run starts before the user types anything.
+  // Trigger config (idleTimeoutInSeconds, machine, tags) lives in the
+  // server action that wraps `chat.createStartSessionAction`.
   useEffect(() => {
-    transport.preload(chatId, { idleTimeoutInSeconds: 60 });
+    transport.preload(chatId);
   }, [chatId]);
 
   const { messages, sendMessage } = useChat({ id: chatId, transport });
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index e2961935345..24d1693f9ec 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -12,12 +12,14 @@ Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to cre
 import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
 import { useChat } from "@ai-sdk/react";
 import type { myChat } from "@/trigger/chat";
-import { getChatToken } from "@/app/actions";
+import { mintChatAccessToken, startChatSession } from "@/app/actions";
 
 export function Chat() {
   const transport = useTriggerChatTransport<typeof myChat>({
     task: "my-chat",
-    accessToken: getChatToken,
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
   });
 
   const { messages, sendMessage, stop, status } = useChat({ transport });
@@ -27,9 +29,16 @@ export function Chat() {
 
 The transport is created once on first render and reused across re-renders. Pass a type parameter for compile-time validation of the task ID.
 
+The two callbacks have distinct responsibilities:
+
+- **`accessToken`** is a *pure* PAT mint — the transport invokes it on a 401/403 to refresh the session-scoped token. Customer wraps `auth.createPublicToken({ scopes: { sessions: chatId } })`.
+- **`startSession`** wraps `chat.createStartSessionAction(taskId)` and is called when the transport needs to *create* the session (`transport.preload(chatId)`, or lazily on the first `sendMessage` for a chatId without a cached PAT). The customer's server controls authorization here, alongside any DB writes paired with session creation.
+
+See [Quick start](/ai-chat/quick-start) for the matching server actions.
+
 <Tip>
-  The hook keeps `onSessionChange` up to date via a ref internally, so you don't need to memoize the
-  callback or worry about stale closures.
+  The hook keeps `onSessionChange` and `clientData` up to date via internal refs, so you don't need
+  to memoize callbacks or worry about stale closures when those options change between renders.
 </Tip>
 
 ## Typed messages (`chat.withUIMessage`)
@@ -45,44 +54,50 @@ type Msg = InferChatUIMessage<typeof myChat>;
 
 const transport = useTriggerChatTransport<typeof myChat>({
   task: "my-chat",
-  accessToken: getChatToken,
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
 });
 const { messages } = useChat<Msg>({ transport });
 ```
 
 See the [Types](/ai-chat/types) guide for defining `YourUIMessage`, default stream options, and backend examples.
 
-### Dynamic access tokens
+### Calling a fetch endpoint instead of a server action
 
-For token refresh, pass a function instead of a string. The transport calls it when it needs a **trigger** token: starting a run from `sendMessages`, or when you call `preload()`. The callback receives `chatId` and `purpose` (`"trigger"` | `"preload"`). Import `ResolveChatAccessTokenParams` from `@trigger.dev/sdk/chat` to type your server action or fetch handler (see [reference](/ai-chat/reference#triggerchattransport-options)).
+If you want to mint tokens via a REST endpoint instead of a Next.js server action, the same callbacks accept any async function. Import `AccessTokenParams` and `StartSessionParams` from `@trigger.dev/sdk/chat` to type your fetch handler.
 
 ```ts
-import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
+import type { AccessTokenParams, StartSessionParams } from "@trigger.dev/sdk/chat";
 
 const transport = useTriggerChatTransport({
   task: "my-chat",
-  accessToken: async (input: ResolveChatAccessTokenParams) => {
-    const res = await fetch("/api/chat-token", {
+  accessToken: async ({ chatId }: AccessTokenParams) => {
+    const res = await fetch(`/api/chat/${chatId}/access-token`, { method: "POST" });
+    return res.text();
+  },
+  startSession: async ({ chatId, taskId, clientData }: StartSessionParams) => {
+    const res = await fetch(`/api/chat/${chatId}/start`, {
       method: "POST",
-      body: JSON.stringify(input),
+      body: JSON.stringify({ taskId, clientData }),
     });
-    return res.text();
+    return res.json(); // { publicAccessToken: string }
   },
 });
 ```
 
+The fetch handlers on the server side wrap the same SDK helpers as the server-action variant: `auth.createPublicToken({ scopes: { sessions: chatId } })` for refresh and `chat.createStartSessionAction(taskId)` for create.
+
 ## Session management
 
-Every chat is backed by a [Session](/sessions/overview) — a durable, bidirectional I/O primitive that outlives any single run. The transport manages the session for you; what you persist on your side is a small piece of state per chat.
+Every chat is backed by a durable Session — the row that owns the chat's runs, persists across run lifecycles, and orchestrates handoffs. The transport manages the session for you; what you persist on your side is a small piece of state per chat that lets a fresh tab resume without a round-trip to create a new session.
 
 ### What the transport persists per chat
 
 | Field | Type | Notes |
 | --- | --- | --- |
-| `sessionId` | `string` | `session_*` friendly ID. **Durable** — stays the same for the life of the conversation. |
-| `publicAccessToken` | `string` | JWT for session + run access. Refreshed via `trigger:turn-complete` chunks. |
-| `lastEventId` | `string \| undefined` | Last SSE event received on `.out`. Used to resume mid-stream. |
-| `runId` | `string \| undefined` | Current run ID. Changes on continuations. `undefined` means no live run right now. |
+| `publicAccessToken` | `string` | Session-scoped JWT (`read:sessions:{chatId} + write:sessions:{chatId}`). Refreshed automatically on 401/403 via `accessToken`. |
+| `lastEventId` | `string \| undefined` | Last SSE event received on `.out`. Used to resume mid-stream after a reload. |
 | `isStreaming` | `boolean \| undefined` | **Optional.** The transport sets it internally, but you don't have to persist it — the server decides "nothing is streaming" via the session's [`X-Session-Settled`](/ai-chat/client-protocol#x-session-settled-fast-close-on-idle-reconnects) signal on reconnect. If you do persist it, the transport keeps the fast-path short-circuit. If you drop it, reconnects open the SSE and close fast on settled sessions. |
 
 ### Session cleanup (frontend)
@@ -92,7 +107,9 @@ Since session creation and updates are handled server-side, the frontend only ne
 ```tsx
 const transport = useTriggerChatTransport<typeof myChat>({
   task: "my-chat",
-  accessToken: getChatToken,
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
   sessions: loadedSessions, // Restored from DB on page load
   onSessionChange: (chatId, session) => {
     if (!session) {
@@ -104,9 +121,9 @@ const transport = useTriggerChatTransport<typeof myChat>({
 
 ### Restoring on page load
 
-On page load, fetch both the messages and the session from your database, then pass them to `useChat` and the transport. Pass `resume: true` to `useChat` when there's an existing conversation — this tells the AI SDK to reconnect to the stream via the transport.
+On page load, fetch both the messages and the session state from your database, then pass them to `useChat` and the transport. Pass `resume: true` to `useChat` when there's an existing conversation — this tells the AI SDK to reconnect to the stream via the transport.
 
-Because sessions outlive individual runs, a chat you were in yesterday resumes against the same `sessionId` — even if the original run has long since exited. The transport uses `sessionId` + `lastEventId` to resubscribe, and triggers a fresh run on the same session if the client tries to send a new message.
+Because the underlying Session row outlives individual runs, a chat you were in yesterday resumes against the same chat — even if the original run has long since exited. The transport hydrates from the persisted state and uses `lastEventId` to resubscribe; if the client tries to send a new message and no run is alive, the server triggers a fresh continuation run on the same session before the message is appended.
 
 ```tsx app/page.tsx
 "use client";
@@ -114,7 +131,13 @@ Because sessions outlive individual runs, a chat you were in yesterday resumes a
 import { useEffect, useState } from "react";
 import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
 import { useChat } from "@ai-sdk/react";
-import { getChatToken, getChatMessages, getSession, deleteSession } from "@/app/actions";
+import {
+  mintChatAccessToken,
+  startChatSession,
+  getChatMessages,
+  getSession,
+  deleteSession,
+} from "@/app/actions";
 
 export default function ChatPage({ chatId }: { chatId: string }) {
   const [initialMessages, setInitialMessages] = useState([]);
@@ -145,7 +168,9 @@ export default function ChatPage({ chatId }: { chatId: string }) {
 function ChatClient({ chatId, initialMessages, initialSessions }) {
   const transport = useTriggerChatTransport({
     task: "my-chat",
-    accessToken: getChatToken,
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
     sessions: initialSessions,
     onSessionChange: (id, session) => {
       if (!session) deleteSession(id);
@@ -195,11 +220,15 @@ Set default client data on the transport that's included in every request. When
 ```ts
 const transport = useTriggerChatTransport<typeof myChat>({
   task: "my-chat",
-  accessToken: getChatToken,
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
   clientData: { userId: currentUser.id },
 });
 ```
 
+The transport threads `clientData` through three places automatically: into `startSession`'s `params.clientData` for the first run's `payload.metadata`, into per-turn `metadata` on every `.in/append` chunk, and live-updates if the option value changes between renders (so React-driven values like the current user work without reconstructing the transport).
+
 ### Per-message metadata
 
 Pass metadata with individual messages via `sendMessage`. Per-message values are merged with transport-level client data (per-message wins on conflicts):
@@ -247,7 +276,9 @@ The schema also types the `clientData` option on the frontend transport:
 // TypeScript enforces that clientData matches the schema
 const transport = useTriggerChatTransport<typeof myChat>({
   task: "my-chat",
-  accessToken: getChatToken,
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
   clientData: { userId: currentUser.id },
 });
 ```
@@ -390,7 +421,12 @@ Send custom actions (undo, rollback, edit) to the agent via `transport.sendActio
 
 ```tsx
 function ChatControls({ chatId }: { chatId: string }) {
-  const transport = useTriggerChatTransport({ task: "my-chat", accessToken });
+  const transport = useTriggerChatTransport({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
+  });
 
   return (
     <div>
@@ -432,7 +468,9 @@ import { useChat } from "@ai-sdk/react";
 function Chat({ chatId }: { chatId: string }) {
   const transport = useTriggerChatTransport({
     task: "my-chat",
-    accessToken,
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
     multiTab: true,
   });
 
@@ -486,7 +524,9 @@ If you're self-hosting Trigger.dev, pass the `baseURL` option:
 ```ts
 const transport = useTriggerChatTransport({
   task: "my-chat",
-  accessToken,
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
   baseURL: "https://your-trigger-instance.com",
 });
 ```
diff --git a/docs/ai-chat/overview.mdx b/docs/ai-chat/overview.mdx
index 04c735f5b1e..128d3e47190 100644
--- a/docs/ai-chat/overview.mdx
+++ b/docs/ai-chat/overview.mdx
@@ -116,7 +116,7 @@ sequenceDiagram
 
 ### One conversation, many runs
 
-Each chat is backed by a [Session](/sessions/overview) — a durable, bidirectional I/O primitive that outlives any single run. The conversation's identity (`sessionId`) stays the same across run boundaries. Messages flow through the session's `.in` channel; responses stream on `.out`.
+Each chat is backed by a durable Session row — the unit of state that owns the chat's runs across their full lifecycle. The conversation's identity stays keyed on `chatId` across run boundaries; messages flow through the session's `.in` channel; responses stream on `.out`.
 
 Within a session, a single run handles many turns. After each AI response, the run waits for the next message via the session's `.in` channel. The frontend transport handles this automatically — triggers a new run on the session for the first message, and sends subsequent messages into the existing run.
 
@@ -175,7 +175,6 @@ There are three ways to build the backend, from most opinionated to most flexibl
 ## Related
 
 - [Quick Start](/ai-chat/quick-start) — Get a working chat in 3 steps
-- [Sessions](/sessions/overview) — The durable primitive chat.agent is built on
 - [Database persistence](/ai-chat/patterns/database-persistence) — Conversation + session state across hooks (ORM-agnostic)
 - [Code execution sandbox](/ai-chat/patterns/code-sandbox) — Warm/teardown pattern for E2B (or similar) with `onWait` / `chat.local`
 - [Backend](/ai-chat/backend) — Backend approaches in detail
diff --git a/docs/ai-chat/patterns/database-persistence.mdx b/docs/ai-chat/patterns/database-persistence.mdx
index f7b9e22ac94..7d6f6c82852 100644
--- a/docs/ai-chat/patterns/database-persistence.mdx
+++ b/docs/ai-chat/patterns/database-persistence.mdx
@@ -7,7 +7,7 @@ description: "Split conversation state and live session metadata across hooks 
 Durable chat runs can span **hours** and **many turns**. You usually want:
 
 1. **Conversation state** — full **`UIMessage[]`** (or equivalent) keyed by **`chatId`**, so reloads and history views work.
-2. **Live session state** — the **current Trigger `runId`**, a **scoped access token** for realtime + input streams, and optionally **`lastEventId`** for stream resume.
+2. **Live session state** — a **scoped access token** for the session and optionally **`lastEventId`** for stream resume.
 
 This page describes a **hook mapping** that works with any database. Adapt table and column names to your stack.
 
@@ -18,9 +18,11 @@ You can use one table or two; the important split is **semantic**:
 | Concept | Purpose | Typical fields |
 | ------- | ------- | -------------- |
 | **Conversation** | Durable transcript + display metadata | Stable id (same as **`chatId`**), serialized **`uiMessages`**, title, model choice, owner/user id, timestamps |
-| **Active session** | Reconnect + resume the **same** run | Same **`chatId`** as key (or FK), **current `runId`**, **`publicAccessToken`** (or your stored PAT), optional **`lastEventId`** |
+| **Active session** | Hydrate the transport on page reload | Same **`chatId`** as key (or FK), **`publicAccessToken`**, optional **`lastEventId`** |
 
-The **conversation** row is what your UI lists as “chats.” The **session** row is what the **transport** needs after a refresh or token expiry: *which run is live* and *how to authenticate* to it.
+The **conversation** row is what your UI lists as "chats." The **session** row is what the **transport** needs after a refresh: a session-scoped PAT (so the transport doesn't have to re-mint on first paint) and the SSE resume cursor.
+
+Storing the current **`runId`** is optional — useful for telemetry / dashboard linking ("View this run") but not required for resume. The Session row owns its current run server-side; the transport reads from `session.out` keyed on `chatId`, so a run swap (continuation, upgrade) is invisible to your DB schema.
 
 <Note>
   Store **`UIMessage[]`** in a JSON-compatible column, or normalize to a messages table — the pattern is *when* you read/write, not *how* you encode rows.
@@ -33,7 +35,7 @@ The **conversation** row is what your UI lists as “chats.” The **session** r
 When the user triggers [preload](/ai-chat/features#preload), the run starts **before** the first user message.
 
 - Ensure the **conversation** row exists (create or no-op).
-- **Upsert session**: **`runId`**, **`chatAccessToken`** from the event (this is the turn-scoped token for that run).
+- **Upsert session**: **`chatAccessToken`** from the event (a session-scoped PAT covering both `read:sessions:{chatId}` and `write:sessions:{chatId}`).
 - Load any **user / tenant context** you need for prompts (`clientData`).
 
 If you skip preload, do the equivalent in **`onChatStart`** when **`preloaded`** is false.
@@ -42,7 +44,7 @@ If you skip preload, do the equivalent in **`onChatStart`** when **`preloaded`**
 
 - If **`preloaded`** is true, return early — **`onPreload`** already ran.
 - Otherwise mirror preload: user/context, conversation create, session upsert.
-- If **`continuation`** is true, the conversation row usually **already exists** (previous run ended or timed out); only update **session** fields so the **new** run id and token are stored.
+- If **`continuation`** is true, the conversation row usually **already exists** (previous run ended or timed out); only update **session** fields so the **new** PAT and `lastEventId` are stored.
 
 ### `onTurnStart`
 
@@ -52,19 +54,29 @@ If you skip preload, do the equivalent in **`onChatStart`** when **`preloaded`**
 ### `onTurnComplete`
 
 - Persist **`uiMessages`** again with the **assistant** reply finalized.
-- **Upsert session** with **`runId`**, fresh **`chatAccessToken`**, and **`lastEventId`** from the event.
+- **Upsert session** with the fresh **`chatAccessToken`** and **`lastEventId`** from the event.
 
 **`lastEventId`** lets the frontend [resume](/ai-chat/frontend) without replaying SSE events it already applied. Treat it as part of session state, not optional polish, if you care about duplicate chunks after refresh.
 
 ## Token renewal (app server)
 
-Turn tokens expire (see **`chatAccessTokenTTL`** on **`chat.agent`**). When the transport gets **401** on realtime or input streams, mint a **new** public access token with the **same** scopes the task uses — typically **read** for that **`runId`** and **write** for **input streams** on that run — then **persist** it on your **session** row.
+The persisted PAT has a TTL (see **`chatAccessTokenTTL`** on **`chat.agent`**, default 1h). When the transport gets a **401** on a session-PAT-authed request, it calls your **`accessToken`** callback to mint a fresh PAT — no DB lookup required, since the session is keyed on `chatId` (which the transport already has).
+
+Your `accessToken` callback typically just wraps `auth.createPublicToken`:
 
-Your **Next.js server action**, **Remix action**, or **API route** should:
+```ts
+"use server";
+import { auth } from "@trigger.dev/sdk";
+
+export async function mintChatAccessToken(chatId: string) {
+  return auth.createPublicToken({
+    scopes: { read: { sessions: chatId }, write: { sessions: chatId } },
+    expirationTime: "1h",
+  });
+}
+```
 
-1. Load **session** by **`chatId`** → **`runId`**.
-2. Call **`auth.createPublicToken`** (or your platform’s equivalent) with those scopes.
-3. Save the new token (and confirm **`runId`** is unchanged unless you started a new run).
+If you want to keep your DB session row in sync, the transport's **`onSessionChange`** callback fires every time the cached PAT changes — persist the new value there.
 
 No Trigger task code needs to run for renewal.
 
@@ -77,31 +89,30 @@ chat.agent({
   id: "my-chat",
   clientDataSchema: z.object({ userId: z.string() }),
 
-  onPreload: async ({ chatId, runId, chatAccessToken, clientData }) => {
+  onPreload: async ({ chatId, chatAccessToken, clientData }) => {
     if (!clientData) return;
     await ensureUser(clientData.userId);
     await upsertConversation({ id: chatId, userId: clientData.userId /* ... */ });
-    await upsertSession({ chatId, runId, publicAccessToken: chatAccessToken });
+    await upsertSession({ chatId, publicAccessToken: chatAccessToken });
   },
 
-  onChatStart: async ({ chatId, runId, chatAccessToken, clientData, continuation, preloaded }) => {
+  onChatStart: async ({ chatId, chatAccessToken, clientData, continuation, preloaded }) => {
     if (preloaded) return;
     await ensureUser(clientData.userId);
     if (!continuation) {
       await upsertConversation({ id: chatId, userId: clientData.userId /* ... */ });
     }
-    await upsertSession({ chatId, runId, publicAccessToken: chatAccessToken });
+    await upsertSession({ chatId, publicAccessToken: chatAccessToken });
   },
 
   onTurnStart: async ({ chatId, uiMessages }) => {
     chat.defer(saveConversationMessages(chatId, uiMessages));
   },
 
-  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+  onTurnComplete: async ({ chatId, uiMessages, chatAccessToken, lastEventId }) => {
     await saveConversationMessages(chatId, uiMessages);
     await upsertSession({
       chatId,
-      runId,
       publicAccessToken: chatAccessToken,
       lastEventId,
     });
@@ -147,8 +158,9 @@ This replaces the `onTurnStart` persistence pattern — the hook handles both lo
 
 ## Design notes
 
-- **`chatId`** is stable for the life of a thread; **`runId`** changes when the user starts a **new** run (timeout, cancel, explicit new chat). Session rows must always reflect the **current** run.
-- **`continuation: true`** means “same logical chat, new run” — update session, don’t assume an empty conversation.
+- **`chatId`** is stable for the life of a thread and is the only identifier the transport persists. Runs come and go (idle continuation, upgrade, cancel/restart) but the chat keeps its identity.
+- **`continuation: true`** means "same logical chat, new run" — refresh the persisted PAT, don't assume an empty conversation.
+- The current `runId` is available on every hook event for telemetry / dashboard linking ("View this run"), but you don't need to persist it for resume to work — the transport addresses by `chatId`.
 - Keep **task modules** that perform writes **out of** browser bundles; the pattern assumes persistence runs **in the worker** (or your BFF that the task calls).
 
 ## See also
diff --git a/docs/ai-chat/patterns/version-upgrades.mdx b/docs/ai-chat/patterns/version-upgrades.mdx
index 972750ddb9a..db01bb6aac2 100644
--- a/docs/ai-chat/patterns/version-upgrades.mdx
+++ b/docs/ai-chat/patterns/version-upgrades.mdx
@@ -13,11 +13,11 @@ Chat agent runs are pinned to the worker version they started on. When you deplo
 When `chat.requestUpgrade()` is called in `onTurnStart` or `onValidateMessages`:
 
 1. `run()` is **skipped** — no response is generated on old code
-2. The agent writes a `trigger:upgrade-required` control chunk to the stream
-3. The transport receives the chunk and immediately triggers a **new run** on the currently promoted deployment with the same message (as a continuation)
-4. The new run's response is piped through transparently — the user sees a single seamless response from the upgraded agent
+2. The agent calls the server-side `endAndContinueSession` endpoint, which atomically swaps the Session's `currentRunId` to a freshly-triggered run on the latest deployment (optimistic-claim against `currentRunVersion`)
+3. The new run picks up the conversation and produces the response
+4. The transport's existing SSE subscription to `session.out` keeps receiving chunks across the swap — no client-side reconnect
 
-The new run lives on the **same [Session](/sessions/overview)** as the old run — `sessionId` persists across the upgrade. Only `runId` and `publicAccessToken` refresh. The transport's SSE subscription to `session.out` doesn't even need to re-establish; it just continues receiving chunks from whichever run is currently writing.
+The new run lives on the **same Session** as the old one. `chatId` is the durable identity; only the underlying `currentRunId` rotates. The audit log records the new run with `reason: "upgrade"`.
 
 When called from inside `run()` or `chat.defer()`, the current turn completes normally first and the run exits afterward. The next message triggers the continuation on the same session.
 
@@ -51,7 +51,9 @@ import { useChat } from "@ai-sdk/react";
 export function Chat() {
   const transport = useTriggerChatTransport({
     task: "my-chat",
-    accessToken: getChatToken,
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
     // Bump this when you ship a breaking change to the chat UI or tools
     clientData: { userId: user.id, protocolVersion: "v2" },
   });
@@ -112,7 +114,9 @@ const APP_VERSION = process.env.NEXT_PUBLIC_VERCEL_DEPLOYMENT_ID
 export function Chat() {
   const transport = useTriggerChatTransport({
     task: "my-chat",
-    accessToken: getChatToken,
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
     clientData: { userId: user.id, appVersion: APP_VERSION },
   });
   // ...
diff --git a/docs/ai-chat/quick-start.mdx b/docs/ai-chat/quick-start.mdx
index a6ce43a15ba..e4e9fa94cf6 100644
--- a/docs/ai-chat/quick-start.mdx
+++ b/docs/ai-chat/quick-start.mdx
@@ -35,25 +35,39 @@ description: "Get a working AI agent in 3 steps — define an agent, generate a
 
   </Step>
 
-  <Step title="Generate an access token">
-    On your server (e.g. a Next.js server action), create a trigger public token scoped to your chat agent. The transport calls your function with `chatId` and `purpose` (`"trigger"` or `"preload"`). Import `ResolveChatAccessTokenParams` from `@trigger.dev/sdk/chat` so the signature matches — see [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options).
+  <Step title="Add two server actions">
+    On your server (e.g. as Next.js server actions), expose two helpers the transport will call: one that creates the chat session, and one that mints a fresh session-scoped access token for refresh.
 
     ```ts app/actions.ts
     "use server";
 
+    import { auth } from "@trigger.dev/sdk";
     import { chat } from "@trigger.dev/sdk/ai";
-    import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
-    import type { myChat } from "@/trigger/chat";
 
-    export async function getChatToken(_input: ResolveChatAccessTokenParams) {
-      return chat.createAccessToken<typeof myChat>("my-chat");
+    // Creates the Session row + triggers the first run, returns the
+    // session PAT. Idempotent on (env, chatId) so concurrent calls
+    // converge to the same session.
+    export const startChatSession = chat.createStartSessionAction("my-chat");
+
+    // Pure mint — fresh session-scoped PAT for an existing session.
+    // The transport calls this on 401/403 to refresh.
+    export async function mintChatAccessToken(chatId: string) {
+      return auth.createPublicToken({
+        scopes: {
+          read: { sessions: chatId },
+          write: { sessions: chatId },
+        },
+        expirationTime: "1h",
+      });
     }
     ```
 
+    The browser never holds your environment's secret key — both helpers run on your server, where customer-side authorization (per-user, per-plan, etc.) lives alongside any DB writes you want to pair with session creation.
+
   </Step>
 
   <Step title="Use in the frontend">
-    Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to create a memoized transport instance, then pass it to `useChat`:
+    Use the `useTriggerChatTransport` hook from `@trigger.dev/sdk/chat/react` to create a memoized transport instance, then pass it to `useChat`. Wire both server actions into the transport's `accessToken` and `startSession` callbacks:
 
     ```tsx app/components/chat.tsx
     "use client";
@@ -61,12 +75,14 @@ description: "Get a working AI agent in 3 steps — define an agent, generate a
     import { useChat } from "@ai-sdk/react";
     import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
     import type { myChat } from "@/trigger/chat";
-    import { getChatToken } from "@/app/actions";
+    import { mintChatAccessToken, startChatSession } from "@/app/actions";
 
     export function Chat() {
       const transport = useTriggerChatTransport<typeof myChat>({
         task: "my-chat",
-        accessToken: getChatToken,
+        accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+        startSession: ({ chatId, taskId, clientData }) =>
+          startChatSession({ chatId, taskId, clientData }),
       });
 
       const { messages, sendMessage, stop, status } = useChat({ transport });
@@ -114,6 +130,5 @@ description: "Get a working AI agent in 3 steps — define an agent, generate a
 
 - [Backend](/ai-chat/backend) — Lifecycle hooks, persistence, session iterator, raw task primitives
 - [Frontend](/ai-chat/frontend) — Session management, client data, reconnection
-- [Sessions](/sessions/overview) — The durable primitive chat.agent is built on (for cross-run resume, inbox views, and custom transports)
 - [Types](/ai-chat/types) — `chat.withUIMessage`, `InferChatUIMessage`, and related typing
 - [Features](/ai-chat/features) — Per-run data, deferred work, streaming, subtasks
diff --git a/docs/ai-chat/reference.mdx b/docs/ai-chat/reference.mdx
index f8819629106..b688267309e 100644
--- a/docs/ai-chat/reference.mdx
+++ b/docs/ai-chat/reference.mdx
@@ -68,7 +68,7 @@ The payload passed to the `run` function.
 | `ctx`          | `TaskRunContext`                           | Full task run context — same as `task` `run`’s `{ ctx }`             |
 | `messages`     | `ModelMessage[]`                           | Model-ready messages — pass directly to `streamText`                 |
 | `chatId`       | `string`                                   | Your conversation ID (the session's `externalId`)                    |
-| `sessionId`    | `string \| undefined`                      | Friendly ID of the backing [Session](/sessions/overview) (`session_*`). Use with `sessions.open()` for advanced cases. |
+| `sessionId`    | `string`                                   | Friendly ID of the backing Session (`session_*`). Use with `sessions.open()` for advanced cases. Always set — every chat.agent run is bound to a Session. |
 | `trigger`      | `"submit-message" \| "regenerate-message"` | What triggered the request                                           |
 | `messageId`    | `string \| undefined`                      | Message ID (for regenerate)                                          |
 | `clientData`   | Typed by `clientDataSchema`                | Custom data from the frontend (typed when schema is provided)        |
@@ -393,7 +393,8 @@ All methods available on the `chat` object from `@trigger.dev/sdk/ai`.
 | `chat.createStopSignal()`                   | Create a managed stop signal wired to the stop input stream                                                                  |
 | `chat.messages`                             | Input stream for incoming messages — use `.waitWithIdleTimeout()`                                                            |
 | `chat.local<T>({ id })`                     | Create a per-run typed local (see [Per-run data](/ai-chat/features#per-run-data-with-chatlocal))                             |
-| `chat.createAccessToken(taskId)`            | Create a public access token for a chat agent                                                                                |
+| `chat.createStartSessionAction(taskId, options?)` | Returns a server action that creates a chat Session + triggers the first run + returns a session-scoped PAT. Idempotent on `(env, externalId)`.   |
+| `chat.requestUpgrade()`                     | End the current run after this turn so the next message starts on the latest agent version. Server-orchestrated handoff.    |
 | `chat.setTurnTimeout(duration)`             | Override turn timeout at runtime (e.g. `"2h"`)                                                                               |
 | `chat.setTurnTimeoutInSeconds(seconds)`     | Override turn timeout at runtime (in seconds)                                                                                |
 | `chat.setIdleTimeoutInSeconds(seconds)`     | Override idle timeout at runtime                                                                                             |
@@ -490,73 +491,80 @@ Options for the frontend transport constructor and `useTriggerChatTransport` hoo
 
 | Option                 | Type                                                                 | Default                     | Description                                                                 |
 | ---------------------- | -------------------------------------------------------------------- | --------------------------- | --------------------------------------------------------------------------- |
-| `task`                 | `string`                                                             | required                    | Task ID to trigger                                                          |
-| `accessToken`          | `string \| (params: ResolveChatAccessTokenParams) => string \| Promise<string>` | required                    | Trigger / API auth token, or a function that returns one (see below)        |
+| `task`                 | `string`                                                             | required                    | Task ID the transport's session is bound to. Threaded into `startSession`'s params. |
+| `accessToken`          | `(params: AccessTokenParams) => string \| Promise<string>`           | required                    | Pure refresh — mints a fresh session-scoped PAT. Called on 401/403. See [callback shape](#accesstoken-callback). |
+| `startSession`         | `(params: StartSessionParams<TClientData>) => Promise<StartSessionResult>` | optional                    | Creates the chat Session and returns the session-scoped PAT. Called on `transport.preload(chatId)` and lazily on the first `sendMessage` for any chatId without a cached PAT. See [callback shape](#startsession-callback). |
 | `baseURL`              | `string`                                                             | `"https://api.trigger.dev"` | API base URL (for self-hosted)                                              |
-| `streamKey`            | `string`                                                             | `"chat"`                    | Stream key (only change if using custom key)                                |
 | `headers`              | `Record<string, string>`                                             | —                           | Extra headers for API requests                                              |
 | `streamTimeoutSeconds` | `number`                                                             | `120`                       | How long to wait for stream data                                            |
-| `clientData`           | Typed by `clientDataSchema`                                          | —                           | Default client data for every request                                       |
+| `clientData`           | Typed by `clientDataSchema`                                          | —                           | Default client data merged into per-turn `metadata` and threaded through `startSession`'s params (so the first run's `payload.metadata` matches per-turn `metadata`). Live-updated when the option value changes. |
 | `sessions`             | `Record<string, ChatSession>`                                        | —                           | Restore sessions from storage. See [ChatSession](#chatsession).             |
 | `onSessionChange`      | `(chatId, session \| null) => void`                                  | —                           | Fires when session state changes. `session` is the full `ChatSession` or `null` when the run ends. |
-| `renewRunAccessToken`  | `(params: RenewRunAccessTokenParams) => string \| ... \| Promise<...>` | —                           | Mint a new run-scoped PAT when the run PAT returns 401 (realtime / input stream). Retries once. |
-| `triggerOptions`       | `{...}`                                                              | —                           | Options for the initial task trigger (see below)                            |
+| `multiTab`             | `boolean`                                                            | `false`                     | Enable multi-tab claim coordination via `BroadcastChannel`. See [Frontend → multi-tab](/ai-chat/frontend#multi-tab-coordination). |
+| `watch`                | `boolean`                                                            | `false`                     | Read-only watcher mode — keep the SSE subscription open across `trigger:turn-complete` so a viewer sees turns 2, 3, … through one long-lived stream. |
 
 ### `accessToken` callback
 
-When `accessToken` is a function, the transport calls it with **`ResolveChatAccessTokenParams`** (exported from `@trigger.dev/sdk/chat`):
+The transport invokes `accessToken` whenever it needs a *fresh* session-scoped PAT — initial use after no PAT is cached, or after a 401/403 from any session-PAT-authed request. The callback's job is to **return a token, not to start a run.**
 
-- `chatId` — the conversation id (`useChat` id / `sendMessages` chat id).
-- `purpose` — `"trigger"` when calling `triggerTask` from `sendMessages` (new run or after the session ended), or `"preload"` when calling `preload()`.
+`AccessTokenParams`:
 
-Use this to mint or log per-chat trigger tokens. A plain **`string`** is still supported and skips the callback.
+| Field | Type | Description |
+| --- | --- | --- |
+| `chatId` | `string` | The conversation id. |
 
-### `renewRunAccessToken` callback
+Customer implementation typically wraps `auth.createPublicToken` server-side:
 
-Optional. When the public access token the transport holds returns 401 (realtime SSE, `.in` append, etc.), the transport calls this once with **`RenewRunAccessTokenParams`**, then retries the failing request. Implement it with your server `auth.createPublicToken`. See [Authentication](/realtime/auth).
+```ts
+"use server";
+import { auth } from "@trigger.dev/sdk";
 
-`RenewRunAccessTokenParams`:
+export async function mintChatAccessToken(chatId: string) {
+  return auth.createPublicToken({
+    scopes: { read: { sessions: chatId }, write: { sessions: chatId } },
+    expirationTime: "1h",
+  });
+}
+```
+
+```ts
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+});
+```
+
+### `startSession` callback
+
+The transport invokes `startSession` when it needs to *create* the session — on `transport.preload(chatId)`, and lazily on the first `sendMessage` for any chatId without a cached PAT. Concurrent and repeat calls dedupe via an in-flight promise, and the customer's wrapped helper is idempotent on `(env, externalId)` so two tabs / two `preload` calls converge on the same session.
+
+`StartSessionParams<TClientData>`:
 
 | Field | Type | Description |
 | --- | --- | --- |
-| `chatId` | `string` | Your conversation id. |
-| `runId` | `string` | The current run backing the chat. |
-| `sessionId` | `string \| undefined` | The backing [Session](/sessions/overview) friendlyId. Present after the transport has observed a session for this chat. |
+| `taskId` | `string` | The transport's `task` value. |
+| `chatId` | `string` | The conversation id (the session's `externalId`). |
+| `clientData` | `TClientData` | The transport's current `clientData` option. Pass through to `triggerConfig.basePayload.metadata` so the first run's `payload.metadata` matches per-turn `metadata`. |
 
-Minted tokens should carry **both** run and session scopes so the PAT covers the live input stream AND the Session's `.in` / `.out` channels:
-
-- `read:runs:<runId>` + `write:inputStreams:<runId>`
-- `read:sessions:<sessionId>` + `write:sessions:<sessionId>` (when `sessionId` is present)
+Customer implementation wraps `chat.createStartSessionAction(taskId)`:
 
 ```ts
-import { auth } from "@trigger.dev/sdk";
-import type { ResolveChatAccessTokenParams } from "@trigger.dev/sdk/chat";
+"use server";
+import { chat } from "@trigger.dev/sdk/ai";
 
-async function getChatToken(input: ResolveChatAccessTokenParams) {
-  return auth.createTriggerPublicToken("my-chat", { expirationTime: "1h" });
-}
+export const startChatSession = chat.createStartSessionAction("my-chat");
+```
 
+```ts
 const transport = useTriggerChatTransport({
   task: "my-chat",
-  accessToken: getChatToken,
-  renewRunAccessToken: async ({ chatId, runId, sessionId }) => {
-    return auth.createPublicToken({
-      scopes: {
-        read: {
-          runs: runId,
-          ...(sessionId ? { sessions: sessionId } : {}),
-        },
-        write: {
-          inputStreams: runId,
-          ...(sessionId ? { sessions: sessionId } : {}),
-        },
-      },
-      expirationTime: "1h",
-    });
-  },
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
 });
 ```
 
+`startSession` is optional only when the customer fully manages the session lifecycle externally (e.g. by hydrating `sessions: { [chatId]: ... }` and never calling `preload`). Most customers should provide it.
+
 ### multiTab
 
 Enable multi-tab coordination. When `true`, only one browser tab can send messages to a given chatId at a time. Other tabs enter read-only mode with real-time message updates via `BroadcastChannel`.
@@ -571,31 +579,29 @@ const transport = useTriggerChatTransport({
 
 No-op when `BroadcastChannel` is unavailable (SSR, Node.js). See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination).
 
-### triggerOptions
-
-Options forwarded to the Trigger.dev API when starting a new run. Only applies to the first message — subsequent messages reuse the same run.
-
-A `chat:{chatId}` tag is automatically added to every run.
+### Trigger configuration
 
-| Option        | Type                           | Description                                                      |
-| ------------- | ------------------------------ | ---------------------------------------------------------------- |
-| `tags`        | `string[]`                     | Additional tags for the run (merged with auto-tags, max 5 total) |
-| `queue`       | `string`                       | Queue name for the run                                           |
-| `maxAttempts` | `number`                       | Maximum retry attempts                                           |
-| `machine`     | `"micro" \| "small-1x" \| ...` | Machine preset for the run                                       |
-| `priority`    | `number`                       | Priority (lower = higher priority)                               |
+Trigger config (machine, queue, tags, maxAttempts, idleTimeoutInSeconds) lives server-side in `chat.createStartSessionAction(taskId, options?)`. The transport doesn't accept these options directly — pass them when wrapping the action:
 
 ```ts
-const transport = useTriggerChatTransport({
-  task: "my-chat",
-  accessToken: getChatToken,
-  triggerOptions: {
-    tags: ["user:123"],
+"use server";
+import { chat } from "@trigger.dev/sdk/ai";
+
+export const startChatSession = chat.createStartSessionAction("my-chat", {
+  triggerConfig: {
+    machine: "small-1x",
     queue: "chat-queue",
+    tags: ["user:123"],
+    maxAttempts: 3,
+    idleTimeoutInSeconds: 60,
   },
 });
 ```
 
+A `chat:{chatId}` tag is automatically added to every run.
+
+For per-call values that vary by chatId (e.g. plan-tier-driven machine), accept extra params on the customer's server action and pass them into `chat.createStartSessionAction(...)`'s options at call time.
+
 ### transport.stopGeneration()
 
 Stop the current generation for a chat session. Sends a stop signal to the backend task and closes the active SSE connection.
@@ -658,7 +664,9 @@ import type { myChat } from "@/trigger/chat";
 
 const transport = useTriggerChatTransport<typeof myChat>({
   task: "my-chat",
-  accessToken: getChatToken, // (params) => … — same shape as ResolveChatAccessTokenParams
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
   sessions: savedSessions,
   onSessionChange: handleSessionChange,
 });
@@ -694,14 +702,12 @@ See [Multi-tab coordination](/ai-chat/frontend#multi-tab-coordination).
 
 ## ChatSession
 
-Persistable session state for the frontend `TriggerChatTransport` and the server-side `AgentChat`. `sessionId` is the durable key — it stays the same for the life of the conversation, even across run boundaries.
+Persistable session state for the frontend `TriggerChatTransport` and the server-side `AgentChat`. The underlying Session row is keyed on `chatId` (durable across runs); the persistable shape is just the SSE resume cursor and a refresh token.
 
 | Field | Type | Description |
 | --- | --- | --- |
-| `sessionId` | `string` | Friendly ID of the backing [Session](/sessions/overview) (`session_*`). Durable across runs. |
-| `publicAccessToken` | `string` | JWT covering both run + session scopes for this chat. Refreshed via `trigger:turn-complete` chunks. |
+| `publicAccessToken` | `string` | Session-scoped JWT (`read:sessions:{chatId} + write:sessions:{chatId}`). Refreshed automatically on 401/403 via the transport's `accessToken` callback. |
 | `lastEventId` | `string \| undefined` | Last SSE event received on `.out`. Used to resume mid-stream after a disconnect. |
-| `runId` | `string \| undefined` | Current run ID. Changes on continuations. `undefined` means no live run. |
 | `isStreaming` | `boolean \| undefined` | Optional. If persisted, `reconnectToStream` uses it as a fast-path short-circuit. If omitted, the server decides via the session's [`X-Session-Settled`](/ai-chat/client-protocol#x-session-settled-fast-close-on-idle-reconnects) response header. |
 
 ## ChatInputChunk
@@ -721,23 +727,19 @@ type ChatInputChunk<TMessage = UIMessage, TMetadata = unknown> =
 
 For the raw wire format, see [Client Protocol — ChatInputChunk](/ai-chat/client-protocol#chatinputchunk).
 
-## Session scopes
+## Session token scopes
 
-Tokens minted for `TriggerChatTransport` and `AgentChat` carry both run and session scopes. Full surface:
+Tokens minted for `TriggerChatTransport` and `AgentChat` are session-scoped — keyed on the chat's `externalId` (the `chatId` you assign).
 
 | Scope | Grants |
 | --- | --- |
-| `read:sessions:<sessionId>` | Subscribe to `.out`, retrieve the session row |
-| `write:sessions:<sessionId>` | Append to `.in`, close the session, update metadata |
-| `read:runs:<runId>` | Read run state |
-| `write:inputStreams:<runId>` | Legacy run-scoped input streams (used alongside session scopes during live runs) |
+| `read:sessions:<chatId>` | Subscribe to `.out`, HEAD probe the stream, retrieve the session row |
+| `write:sessions:<chatId>` | Append to `.in`, close the session, end-and-continue, update metadata |
 
-See [Sessions Reference — Token scopes](/sessions/reference#token-scopes) for the full scope list including wildcards and admin scopes.
+Tokens are produced by `auth.createPublicToken({ scopes: { read: { sessions: chatId }, write: { sessions: chatId } } })` (used by the customer's `accessToken` server action) or returned automatically from `chat.createStartSessionAction` / `POST /api/v1/sessions`. Either form authorizes both URL forms (`/sessions/{chatId}/...` and `/sessions/session_*/...`) on every read and write route.
 
 ## Related
 
-- [Sessions Overview](/sessions/overview) — The durable primitive chat.agent is built on
-- [Sessions Reference](/sessions/reference) — Full `sessions.*` API
 - [Realtime Streams](/tasks/streams) — How streams work under the hood
 - [Using the Vercel AI SDK](/guides/examples/vercel-ai-sdk) — Basic AI SDK usage with Trigger.dev
 - [Realtime React Hooks](/realtime/react-hooks/overview) — Lower-level realtime hooks
diff --git a/docs/ai-chat/server-chat.mdx b/docs/ai-chat/server-chat.mdx
index 01a315622a0..27688498c92 100644
--- a/docs/ai-chat/server-chat.mdx
+++ b/docs/ai-chat/server-chat.mdx
@@ -116,9 +116,7 @@ for await (const chunk of stream) {
 
 In a stateless environment (HTTP handler, serverless function), you need to persist and restore the session across requests.
 
-Each chat is backed by a [Session](/sessions/overview), addressed by a durable `sessionId` that outlives any single run. `AgentChat` exposes the session state via `chat.run` (a `ChatSession` object — the name is legacy, the content is the full session state).
-
-`AgentChat` provides a `session` option and two callbacks for persistence:
+Each chat is backed by a durable Session row that outlives any single run. `AgentChat` exposes the persistable state via `chat.session` (the SSE resume cursor) and surfaces the current run id via the `onTriggered` callback for telemetry / dashboard linking.
 
 ```ts
 import { AgentChat } from "@trigger.dev/sdk/chat";
@@ -130,17 +128,17 @@ export async function POST(req: Request) {
   const chat = new AgentChat({
     agent: "my-agent",
     id: chatId,
-    // Restore from previous request. sessionId is the durable key —
-    // runId + lastEventId are live-run hints that speed up resume.
-    session: saved
-      ? { sessionId: saved.sessionId, runId: saved.runId, lastEventId: saved.lastEventId }
-      : undefined,
-    // Persist when a new run starts against this session
-    onTriggered: async ({ runId, chatId }) => {
+    // Restore from previous request — `lastEventId` is the SSE resume
+    // cursor; the underlying Session is keyed on `chatId` so it's
+    // implicit and durable.
+    session: saved ? { lastEventId: saved.lastEventId } : undefined,
+    // Useful for telemetry / dashboard linking. The `runId` is the
+    // current run, which may change across continuations and upgrades.
+    onTriggered: async ({ runId }) => {
       await db.sessions.upsert({ chatId, runId });
     },
     // Persist after each turn for stream resumption
-    onTurnComplete: async ({ lastEventId, chatId }) => {
+    onTurnComplete: async ({ lastEventId }) => {
       await db.sessions.update({ chatId, lastEventId });
     },
   });
@@ -148,38 +146,19 @@ export async function POST(req: Request) {
   const stream = await chat.sendMessage(message);
   const text = await stream.text();
 
-  // On first-ever request there was no session in the DB — save the
-  // sessionId that AgentChat just created server-side. For subsequent
-  // requests this is a cheap no-op upsert.
-  if (chat.run?.sessionId) {
-    await db.sessions.upsert({
-      chatId,
-      sessionId: chat.run.sessionId,
-      runId: chat.run.runId,
-      lastEventId: chat.run.lastEventId,
-    });
-  }
-
   return Response.json({ text });
 }
 ```
 
 <Info>
-  Because `sessionId` is durable, a chat that was active yesterday resumes against the same session today — even if the original run has long since exited. `AgentChat` triggers a fresh run on the same session when needed, carrying the conversation forward without losing history or identity.
+  The Session row is the run manager — a chat that was active yesterday
+  resumes against the same chatId today, even if the original run has
+  long since exited. `AgentChat` (server-side) and `TriggerChatTransport`
+  (browser) both rely on this: send a new message and the server
+  triggers a fresh continuation run on the same session, carrying the
+  conversation forward without losing history or identity.
 </Info>
 
-### Inbox view of all chats
-
-Use [`sessions.list`](/sessions/reference#list-sessions) to enumerate every chat in your environment, filtered by tag or status:
-
-```ts
-import { sessions } from "@trigger.dev/sdk";
-
-for await (const session of sessions.list({ type: "chat.agent", tag: "user:user-456" })) {
-  console.log(session.id, session.createdAt, session.closedAt);
-}
-```
-
 ## Sub-agent tool pattern
 
 `AgentChat` can be used inside an AI SDK tool to delegate work to a durable sub-agent. The sub-agent's response streams as preliminary tool results:
@@ -261,7 +240,7 @@ const stream = await chat.reconnect();
 | `agent` | `string` | required | The agent task ID to trigger |
 | `id` | `string` | `crypto.randomUUID()` | Conversation ID for tagging and correlation |
 | `clientData` | typed from agent | `undefined` | Client data included in every request |
-| `session` | `ChatSession` (`{ sessionId: string; runId?: string; lastEventId?: string }`) | `undefined` | Restore a previous session. `sessionId` is the durable key; `runId` and `lastEventId` are live-run hints. |
+| `session` | `ChatSession` (`{ lastEventId?: string }`) | `undefined` | Restore a previous session's SSE resume cursor. The Session row itself is keyed on `chatId` (durable) — no other state to thread. |
 | `onTriggered` | `(event) => void` | `undefined` | Called when a new run is created |
 | `onTurnComplete` | `(event) => void` | `undefined` | Called when a turn's stream ends |
 | `streamKey` | `string` | `"chat"` | Output stream key |
diff --git a/docs/ai-chat/testing.mdx b/docs/ai-chat/testing.mdx
index 1c3bbb7dd6a..c60aee94c9d 100644
--- a/docs/ai-chat/testing.mdx
+++ b/docs/ai-chat/testing.mdx
@@ -8,7 +8,7 @@ description: "Drive a chat.agent through real turns in unit tests — no network
 
 `@trigger.dev/sdk/ai/test` exports `mockChatAgent`, an offline harness that runs your `chat.agent` definition's `run()` function inside an in-memory task runtime. You send messages, actions, and stop signals through driver methods and assert against the chunks the agent emits.
 
-Under the hood the harness drives the agent's backing [Session](/sessions/overview) channels — `.in` receives the records your `sendMessage` / `sendStop` / `sendAction` produce, `.out` captures the chunks the agent emits. The harness API itself is session-agnostic; you don't need to manage `sessionId` in tests.
+Under the hood the harness drives the agent's backing Session channels — `.in` receives the records your `sendMessage` / `sendStop` / `sendAction` produce, `.out` captures the chunks the agent emits. The harness API itself is session-agnostic; you don't need to manage `sessionId` in tests.
 
 The harness exercises the real turn loop, lifecycle hooks, validation, hydration, and action routing — only the language model and the surrounding Trigger.dev runtime are replaced. Pair it with [`MockLanguageModelV3`](https://sdk.vercel.ai/docs/reference/ai-sdk-core/mock-language-model-v3) and `simulateReadableStream` from `ai` to control LLM responses.
 
diff --git a/docs/ai-chat/types.mdx b/docs/ai-chat/types.mdx
index 6f40f4a9a5e..808eb5d6c7f 100644
--- a/docs/ai-chat/types.mdx
+++ b/docs/ai-chat/types.mdx
@@ -116,7 +116,9 @@ type Msg = InferChatUIMessage<typeof myChat>;
 export function Chat() {
   const transport = useTriggerChatTransport<typeof myChat>({
     task: "my-chat",
-    accessToken: getChatToken,
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
   });
 
   const { messages } = useChat<Msg>({ transport });
diff --git a/docs/ai-chat/upgrade-guide.mdx b/docs/ai-chat/upgrade-guide.mdx
new file mode 100644
index 00000000000..6e7af9426f8
--- /dev/null
+++ b/docs/ai-chat/upgrade-guide.mdx
@@ -0,0 +1,351 @@
+---
+title: "Upgrade Guide: prerelease → Sessions-as-run-manager"
+sidebarTitle: "Upgrade Guide"
+description: "Migrating chat.agent code from the prerelease API to the Sessions-as-run-manager release."
+---
+
+This guide is for customers who tried `chat.agent` during the prerelease period.
+The public surface of `chat.agent({...})`, `useTriggerChatTransport`,
+`AgentChat`, `chat.store`, `chat.defer`, and `chat.history` is largely
+unchanged — but the transport's auth callbacks and the server-side helpers
+that feed them were reshaped, so most prerelease apps need a small wiring
+update.
+
+## TL;DR
+
+<CodeGroup>
+
+```ts before.ts
+// Single accessToken callback, dispatches on purpose
+accessToken: async ({ chatId, purpose }) => {
+  if (purpose === "trigger") {
+    return chat.createAccessToken<typeof myChat>("my-chat");
+  }
+  // purpose === "preload" — same call, same trigger token
+  return chat.createAccessToken<typeof myChat>("my-chat");
+};
+```
+
+```ts after.ts
+// Two callbacks: pure refresh + server action that creates the session
+accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+startSession: ({ chatId, taskId, clientData }) =>
+  startChatSession({ chatId, taskId, clientData }),
+```
+
+</CodeGroup>
+
+What changed:
+
+- `accessToken` is now a **pure session-PAT mint** — called only on 401/403
+  to refresh. It must return a token scoped to the session, not a
+  `trigger:tasks` JWT.
+- `startSession` is a **new callback** that wraps a server action calling
+  `chat.createStartSessionAction(taskId)`. The transport invokes it on
+  `transport.preload(chatId)` and lazily on the first `sendMessage` for
+  any chatId without a cached PAT.
+- `ChatSession` persistable state drops `runId` — store only
+  `{publicAccessToken, lastEventId?}`.
+- Per-call options on `transport.preload(chatId, ...)` are gone. Trigger
+  config (machine, idleTimeoutInSeconds, tags, queue, maxAttempts) lives
+  server-side in `chat.createStartSessionAction(taskId, options)`.
+
+<Note>
+  The architectural shift is that `chat.agent` no longer rolls its own
+  per-run streams. It runs on top of a durable **Session** row that owns
+  its current run, persists across run lifecycles, and orchestrates
+  upgrades server-side. The customer-facing surface is similar; the wire
+  path beneath it changed completely.
+</Note>
+
+## Step 1: Replace your access-token server action with two server actions
+
+The old pattern was a single helper that minted a trigger token:
+
+```ts app/actions.ts (before)
+"use server";
+
+import { chat } from "@trigger.dev/sdk/ai";
+import type { myChat } from "@/trigger/chat";
+
+export const getChatToken = () =>
+  chat.createAccessToken<typeof myChat>("my-chat");
+```
+
+Replace with two helpers — one for session creation, one for PAT refresh:
+
+```ts app/actions.ts (after)
+"use server";
+
+import { auth } from "@trigger.dev/sdk";
+import { chat } from "@trigger.dev/sdk/ai";
+
+// Server-side wrapper for session creation. Idempotent on (env, chatId).
+// The customer's server is the only entry point that creates Session rows;
+// the browser never holds a `trigger:tasks` JWT.
+export const startChatSession = chat.createStartSessionAction("my-chat");
+
+// Pure session-PAT mint for the transport's 401/403 retry path.
+export async function mintChatAccessToken(chatId: string) {
+  return auth.createPublicToken({
+    scopes: {
+      read: { sessions: chatId },
+      write: { sessions: chatId },
+    },
+    expirationTime: "1h",
+  });
+}
+```
+
+`chat.createStartSessionAction(taskId)` returns a server action that:
+
+1. Creates the Session row for `chatId` (idempotent on the
+   `(env, externalId)` unique pair).
+2. Triggers the agent task's first run with
+   `basePayload: {messages: [], trigger: "preload"}` defaults plus any
+   overrides you pass.
+3. Returns `{sessionId, runId, publicAccessToken}` to the browser.
+
+## Step 2: Update the transport wiring
+
+The transport now takes two callbacks instead of one:
+
+```tsx app/components/chat.tsx (after)
+"use client";
+
+import { useChat } from "@ai-sdk/react";
+import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
+import type { myChat } from "@/trigger/chat";
+import { mintChatAccessToken, startChatSession } from "@/app/actions";
+
+export function Chat() {
+  const transport = useTriggerChatTransport<typeof myChat>({
+    task: "my-chat",
+    accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+    startSession: ({ chatId, taskId, clientData }) =>
+      startChatSession({ chatId, taskId, clientData }),
+  });
+
+  const { messages, sendMessage, status } = useChat({ transport });
+  // ...
+}
+```
+
+The transport calls them in two distinct flows:
+
+| Trigger | Callback fired |
+|---|---|
+| `transport.preload(chatId)` | `startSession` |
+| First `sendMessage` for a chatId with no cached PAT | `startSession` (auto) |
+| Any 401/403 from `.in/append`, `.out` SSE, or `end-and-continue` | `accessToken` |
+| Page hydrates with `sessions: { [chatId]: ... }` | Neither (uses hydrated PAT) |
+
+`startSession` is deduped via an in-flight promise — concurrent
+`preload` + `sendMessage` calls converge to one server action invocation.
+
+## Step 3: Drop transport-level trigger config
+
+The prerelease transport accepted `triggerConfig`, `triggerOptions`, and
+per-call options on `preload`. All of that moved server-side:
+
+```ts before
+const transport = useTriggerChatTransport({
+  task: "my-chat",
+  accessToken: getChatToken,
+  triggerConfig: { basePayload: { /* ... */ } },
+  triggerOptions: { tags: [...], machine: "small-1x", maxAttempts: 3 },
+});
+
+transport.preload(chatId, { idleTimeoutInSeconds: 60, metadata: { ... } });
+```
+
+```ts after
+// Trigger config now lives in chat.createStartSessionAction
+export const startChatSession = chat.createStartSessionAction("my-chat", {
+  triggerConfig: {
+    machine: "small-1x",
+    maxAttempts: 3,
+    tags: ["my-tag"],
+    idleTimeoutInSeconds: 60,
+  },
+});
+
+// Browser side
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
+});
+
+transport.preload(chatId);  // no second arg
+```
+
+For metadata that varies per chat, use `clientData` on the transport (see
+the next step) — it's typed and threaded through `startSession` automatically.
+
+## Step 4: Use `clientData` for typed payload metadata
+
+If your agent uses `withClientData({schema})`, the transport's `clientData`
+option is now the canonical place to set it. The same value:
+
+- Is passed to your `startSession` callback as `params.clientData`, where
+  you forward it into `chat.createStartSessionAction`'s
+  `triggerConfig.basePayload.metadata`. The agent's first run sees it in
+  `payload.metadata` (visible to `onPreload` / `onChatStart`).
+- Merges into per-turn `metadata` on every `.in/append` chunk
+  (visible to `onTurnStart` / inside `run` via `turn.clientData`).
+
+```tsx
+const transport = useTriggerChatTransport<typeof myChat>({
+  task: "my-chat",
+  accessToken: ({ chatId }) => mintChatAccessToken(chatId),
+  startSession: ({ chatId, taskId, clientData }) =>
+    startChatSession({ chatId, taskId, clientData }),
+  clientData: {
+    userId: currentUser.id,
+    plan: currentUser.plan,
+  },
+});
+```
+
+The `clientData` value is live-updated when the option changes (the hook
+calls `setClientData` under the hood), so dynamic values work without
+reconstructing the transport.
+
+<Tip>
+  Server-side authorization can still override or augment the
+  browser-claimed `clientData` inside `startSession` — never trust the
+  browser's identity claim. A typical pattern: the server action looks up
+  the user from the request session, then merges the trusted server fields
+  on top of `params.clientData`.
+</Tip>
+
+## Step 5: Update your `ChatSession` persistence
+
+If you persist session state across page loads, drop the `runId` field:
+
+```ts before
+type ChatSession = {
+  runId: string;
+  publicAccessToken: string;
+  lastEventId?: string;
+};
+```
+
+```ts after
+type ChatSession = {
+  publicAccessToken: string;
+  lastEventId?: string;
+};
+```
+
+If your DB has a `runId` column, you can drop it (the transport doesn't
+read it) or keep it for telemetry. The current run ID lives on the
+Session row server-side now.
+
+Hydration on page reload is unchanged:
+
+```tsx
+const transport = useTriggerChatTransport<typeof myChat>({
+  // ...
+  sessions: persistedSession
+    ? { [chatId]: persistedSession }
+    : {},
+});
+```
+
+## `chat.requestUpgrade()`: same call, faster handoff
+
+Calling `chat.requestUpgrade()` inside `onTurnStart` /
+`onValidateMessages` still ends the current run so the next message starts
+on the latest version. What changed is the mechanism:
+
+- **Before:** the agent emitted a `trigger:upgrade-required` chunk on
+  `.out`; the transport consumed it browser-side and triggered a new run.
+- **After:** the agent calls `endAndContinueSession` server-to-server;
+  the webapp triggers a new run and atomically swaps `Session.currentRunId`
+  via optimistic locking. The browser's existing SSE subscription keeps
+  receiving chunks across the swap — no transport-side bookkeeping.
+
+The new run is recorded in a `SessionRun` audit row with
+`reason: "upgrade"` for dashboard provenance.
+
+## Hitting raw URLs
+
+If your code talks to the realtime API directly instead of going through
+the SDK, the URL shapes changed:
+
+| Before | After |
+|---|---|
+| `GET /realtime/v1/streams/{runId}/chat` | `GET /realtime/v1/sessions/{chatId}/out` |
+| `POST /realtime/v1/streams/{runId}/{target}/chat-messages/append` | `POST /realtime/v1/sessions/{chatId}/in/append` (body: `{kind: "message", payload}`) |
+| `POST /realtime/v1/streams/{runId}/{target}/chat-stop/append` | `POST /realtime/v1/sessions/{chatId}/in/append` (body: `{kind: "stop"}`) |
+
+The session-scoped PAT
+(`read:sessions:{chatId} + write:sessions:{chatId}`) authorizes both the
+externalId form (`/sessions/my-chat-id/...`) and the friendlyId form
+(`/sessions/session_abc.../...`). The transport always uses the
+externalId form; the friendlyId form is available for dashboard tooling
+and direct API consumers.
+
+## What didn't change
+
+- `chat.agent({...})` definition — `id`, `idleTimeoutInSeconds`,
+  `clientDataSchema`, `actionSchema`, `hydrateMessages`, `onPreload`,
+  `onChatStart`, `onValidateMessages`, `onTurnStart`, `onTurnComplete`,
+  `onChatSuspend`, `onAction`, `run`. All callbacks have the same
+  signature and fire at the same lifecycle points.
+- `chat.customAgent({...})` and the `chat.createSession(payload, ...)`
+  helper for building a session loop manually inside a custom agent.
+- `chat.store` (snapshot store), `chat.defer` (deferred work), and
+  `chat.history` (imperative history mutations from inside `onAction`).
+- `AgentChat` (server-side chat client) — `agent`, `id`, `clientData`,
+  `session`, `onTriggered`, `onTurnComplete`, `sendMessage`, `text()`.
+- `useTriggerChatTransport` React semantics (created once, kept in a
+  ref, callbacks updated under the hood).
+- Multi-tab coordination (`multiTab: true`),
+  [pending messages / steering](/ai-chat/pending-messages),
+  [background injection](/ai-chat/background-injection),
+  [compaction](/ai-chat/compaction).
+- Per-turn `metadata` flowing through
+  `sendMessage({ text }, { metadata })` to `turn.metadata` server-side.
+
+## Verifying the migration
+
+After updating, the smoke check is the same as before: send a message,
+confirm the assistant streams a response, reload mid-stream, confirm
+resume.
+
+A few new things worth verifying once you've cut over:
+
+- **Eager preload.** Click the button (or call `transport.preload(id)`
+  programmatically) — your `startSession` callback should fire and a
+  Session row + first run should be created before you send a message.
+- **Idle-timeout continuation.** Wait past the agent's
+  `idleTimeoutInSeconds` so the run exits, then send another message —
+  the transport's `.in/append` should boot a new run on the same
+  Session, with a `SessionRun` row of `reason: "continuation"`.
+- **PAT refresh.** Force a stale PAT in your DB (corrupt the signature)
+  and reload — the first request should 401, your `accessToken`
+  callback should fire, and the retry should succeed.
+
+If any of those misfire, check that:
+
+- Your `accessToken` callback returns a token minted via
+  `auth.createPublicToken({scopes: {sessions: chatId}})`, **not**
+  `chat.createAccessToken` or `auth.createTriggerPublicToken`. The
+  transport rejects trigger tokens now.
+- Your `startSession` callback returns
+  `{publicAccessToken: string}` — the result of
+  `chat.createStartSessionAction(taskId)({chatId, ...})` already has
+  this shape.
+- You haven't left a stale `getStartToken` option on the transport;
+  it's not part of `TriggerChatTransportOptions` anymore.
+
+## Reference
+
+- [TriggerChatTransport options](/ai-chat/reference#triggerchattransport-options)
+- [`chat.createStartSessionAction`](/ai-chat/reference)
+- [Backend setup](/ai-chat/backend)
+- [Frontend setup](/ai-chat/frontend)
diff --git a/docs/docs.json b/docs/docs.json
index 8a9c901b0c4..dede49f08bf 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -105,6 +105,7 @@
                 "pages": [
                   "ai-chat/overview",
                   "ai-chat/changelog",
+                  "ai-chat/upgrade-guide",
                   "ai-chat/quick-start",
                   "ai-chat/backend",
                   "ai-chat/frontend",
@@ -132,15 +133,6 @@
                   "ai-chat/client-protocol",
                   "ai-chat/reference"
                 ]
-              },
-              {
-                "group": "Sessions",
-                "pages": [
-                  "sessions/overview",
-                  "sessions/quick-start",
-                  "sessions/channels",
-                  "sessions/reference"
-                ]
               }
             ]
           },
diff --git a/docs/realtime/backend/input-streams.mdx b/docs/realtime/backend/input-streams.mdx
index 4fb4c6c607a..65e3bb494b7 100644
--- a/docs/realtime/backend/input-streams.mdx
+++ b/docs/realtime/backend/input-streams.mdx
@@ -12,7 +12,7 @@ The Input Streams API allows you to send data into running Trigger.dev tasks fro
 </Note>
 
 <Tip>
-  Input streams are keyed by `runId` — they're correct for sending data to a specific live run. If you need a bidirectional channel addressed by a durable ID that survives run boundaries (e.g. a chat that resumes tomorrow, an agent coordinated across many runs), use [Sessions](/sessions/overview) instead. `session.in` has the same consumer-side API (`on` / `once` / `wait` / `waitWithIdleTimeout`) but is addressed by `sessionId` rather than `runId`.
+  Input streams are keyed by `runId` — they're correct for sending data to a specific live run. If you need a bidirectional channel that survives run boundaries (e.g. a chat that resumes tomorrow, an agent coordinated across many runs), look at [`chat.agent`](/ai-chat/overview): it's built on a durable Session row that owns its runs and exposes the same consumer-side API (`on` / `once` / `wait` / `waitWithIdleTimeout`) on its `.in` channel.
 </Tip>
 
 ## Sending data to a running task
diff --git a/docs/realtime/backend/streams.mdx b/docs/realtime/backend/streams.mdx
index 47f101a0191..b644e5dab10 100644
--- a/docs/realtime/backend/streams.mdx
+++ b/docs/realtime/backend/streams.mdx
@@ -11,7 +11,7 @@ description: "Read AI/LLM output, file chunks, and other streaming data from you
 </Note>
 
 <Tip>
-  Run-scoped streams are the right primitive for ephemeral I/O that lives inside a single run's lifetime. For durable, long-lived channels that outlive a run — chat agents, cross-run workflows, inbox-style listings — use [Sessions](/sessions/overview) instead. Both share the same underlying realtime transport; Sessions layer durable identity and bidirectional channels on top.
+  Run-scoped streams are the right primitive for ephemeral I/O that lives inside a single run's lifetime. For durable, long-lived channels that outlive a run, see [`chat.agent`](/ai-chat/overview): it's built on a Session row that owns the chat's runs and exposes bidirectional `.in` / `.out` channels addressed by a durable id.
 </Tip>
 
 ## Reading streams
diff --git a/docs/sessions/channels.mdx b/docs/sessions/channels.mdx
deleted file mode 100644
index 46419590320..00000000000
--- a/docs/sessions/channels.mdx
+++ /dev/null
@@ -1,214 +0,0 @@
----
-title: "Channels"
-sidebarTitle: "Channels"
-description: "Deep dive on `.in` and `.out` — producer/consumer asymmetry, suspend-while-idle, serialization, and the full method surface."
----
-
-Every session exposes two channels accessed through a [`SessionHandle`](/sessions/reference#sessionhandle):
-
-```ts
-const handle = sessions.open(sessionIdOrExternalId);
-
-handle.out; // SessionOutputChannel
-handle.in;  // SessionInputChannel
-```
-
-The two channels are **disjoint** — no method name appears on both. Producer/consumer roles are fixed per channel:
-
-| Channel | Task role | External-client role |
-|---|---|---|
-| `.out` | Producer (`append`, `pipe`, `writer`) | Consumer (`read`) |
-| `.in` | Consumer (`on`, `once`, `peek`, `wait`, `waitWithIdleTimeout`) | Producer (`send`) |
-
-This asymmetry makes directional intent obvious at every call site.
-
-## `.out` — task writes, client reads
-
-The task is the producer; browsers or other tasks consume via SSE.
-
-### Producing: `append`, `pipe`, `writer`
-
-All three write methods route through the same underlying S2 direct-write pipeline, so subscribers always receive parsed objects (not raw strings), regardless of which producer you use.
-
-#### `append` — single record
-
-```ts
-await handle.out.append({ type: "status", message: "Processing..." });
-```
-
-Returns once the record has been acknowledged. Use when you're writing a small, bounded number of discrete events.
-
-#### `pipe` — forward a ReadableStream or AsyncIterable
-
-```ts
-const { stream, waitUntilComplete } = handle.out.pipe(
-  streamText({ model, messages }).toUIMessageStream()
-);
-
-await waitUntilComplete();
-```
-
-`pipe` attaches your source stream to the session channel and returns immediately. It mirrors [`streams.pipe`](/tasks/streams) but session-scoped — there's no `target` option because the session is the target.
-
-`stream` is a local `ReadableStream` you can `await` / consume if you also want to observe the records from inside the producing task. `waitUntilComplete()` resolves when all source records have been flushed to S2.
-
-#### `writer({ execute })` — fine-grained control
-
-Use when you want to produce records imperatively or interleave multiple sources:
-
-```ts
-const { waitUntilComplete } = handle.out.writer({
-  execute: ({ write, merge }) => {
-    write({ type: "start" });
-    write({ type: "section", title: "Summary" });
-    merge(llmStream.toUIMessageStream()); // splice in another stream
-    write({ type: "end" });
-  },
-});
-
-await waitUntilComplete();
-```
-
-`write` enqueues a single record. `merge` pulls records from another stream into the same channel in order. `execute` can be async — the writer stays open until your callback returns.
-
-Mirrors [`streams.writer`](/tasks/streams#writer) but session-scoped.
-
-### Consuming: `read`
-
-```ts
-const handle = sessions.open(sessionId);
-const stream = await handle.out.read<MyChunk>({
-  lastEventId: "-1",
-  timeoutInSeconds: 60,
-});
-
-for await (const chunk of stream) {
-  console.log(chunk); // MyChunk, already parsed
-}
-```
-
-Options:
-
-| Option | Type | Notes |
-|---|---|---|
-| `lastEventId` | `string \| number` | Resume cursor. `-1` (or omit) starts from the beginning. Use the last seen event ID to resume after a disconnect. |
-| `timeoutInSeconds` | `number` | Max time SSE will hold the connection open between records. Default 60. |
-| `signal` | `AbortSignal` | Cancel the subscription. |
-| `onPart` | `(part) => void` | Observe raw SSE parts (id + chunk + timestamp). |
-| `onComplete` | `() => void` | Fires when the stream closes. |
-| `onError` | `(err) => void` | Fires on subscription errors. |
-
-`read` returns an async iterable — use `for await` to consume. The underlying SSE transport handles reconnection, `Last-Event-ID` resume, and abort propagation automatically.
-
-## `.in` — client writes, task reads
-
-External clients produce via `.send`; the task consumes via `on` / `once` / `peek` / `wait` / `waitWithIdleTimeout`.
-
-### Producing: `send`
-
-```ts
-await handle.in.send({ kind: "message", text: "Hello" });
-```
-
-Any JSON-serializable value. Strings are passed through as-is; objects are `JSON.stringify`-ed.
-
-### Consuming: non-blocking vs. suspending
-
-The consuming-side methods fall into two groups:
-
-- **Non-blocking / keep-running**: `on`, `once`, `peek`. The task stays awake while consuming.
-- **Suspending**: `wait`, `waitWithIdleTimeout`. The run suspends via a [session-stream waitpoint](#suspend-while-idle), freeing compute until a record arrives.
-
-#### `on` — handler fires for every record
-
-```ts
-const { off } = handle.in.on<{ kind: "message" | "stop" }>((msg) => {
-  if (msg.kind === "stop") controller.abort();
-});
-
-// later
-off();
-```
-
-Handlers are flushed with any buffered records on attach, and cleaned up when the task run completes. Use for long-lived listeners (e.g. a stop-signal listener that lives across turns).
-
-#### `once` — await the next record
-
-```ts
-const { ok, output, error } = await handle.in.once<Message>({ timeout: "30s" });
-
-if (ok) {
-  console.log(output);
-}
-```
-
-Returns on the next arrival, or `{ ok: false, error }` on timeout. Non-suspending — the run stays active while waiting. Chain `.unwrap()` to get the output directly and throw on timeout:
-
-```ts
-const msg = await handle.in.once<Message>({ timeout: "30s" }).unwrap();
-```
-
-#### `peek` — inspect buffer without consuming
-
-```ts
-const pending = handle.in.peek<Message>();
-if (pending) { /* ... */ }
-```
-
-Returns `undefined` if the buffer is empty. Does not consume the record — next `on` / `once` / `wait` still sees it.
-
-#### `wait` — suspend until next record
-
-```ts
-const msg = await handle.in.wait<Message>({ timeout: "5m" });
-```
-
-**Suspends the run** while idle. Unlike `once`, the task's container can be hibernated, freeing compute. When a record lands on `.in`, the run-engine waitpoint fires and the run resumes with the message as the resolved value.
-
-Only callable from inside `task.run()`. Throws on timeout.
-
-#### `waitWithIdleTimeout` — suspend with a resetting timeout
-
-```ts
-const msg = await handle.in.waitWithIdleTimeout<Message>({ idleTimeout: "5m" });
-```
-
-Like `wait`, but the timeout **resets on every message**. Use for conversational patterns: "end the conversation after 5 minutes of silence, but stay alive as long as the user keeps talking."
-
-### Suspend-while-idle
-
-`wait` and `waitWithIdleTimeout` use Trigger.dev's run-engine waitpoints — the same primitive behind [`wait-for-token`](/wait-for-token) and [`streams.input.wait`](/tasks/input-streams). When the task calls `wait()`:
-
-1. The run-engine suspends the run and deallocates its container.
-2. You stop paying for compute.
-3. When a record arrives on `.in` (via `send` from any client), the waitpoint is fired synchronously by the append handler.
-4. The run resumes with the record as the resolved value.
-
-Multiple concurrent waiters on the same session (e.g. two agents) are all woken by the same append.
-
-## Serialization
-
-All `.out` producer methods route through a uniform pipeline, so:
-
-- **Subscribers always receive parsed objects.** `handle.out.read<T>()` yields `T` values directly — you never need `JSON.parse`.
-- **Mixed producers work cleanly.** A record written via `append` looks identical on the wire to one emitted by `writer` or `pipe`.
-
-On `.in`, `send` accepts any JSON-serializable value. Strings are passed through; objects are serialized. Consumers decide how to type the records (`on<T>`, `once<T>`, `wait<T>`).
-
-## Buffering and attachment
-
-Records that arrive on `.in` **before** a consumer attaches are buffered on the task side. The first `on` / `once` / `peek` call to attach sees the buffered records in order, followed by live records as they arrive. This means you don't have to race the task's boot against the client's first `send`.
-
-On `.out`, records are appended to the underlying append-only log. Subscribers read from a cursor (`lastEventId`), so a late-connecting client can start from the beginning or resume mid-stream — nothing is lost.
-
-## Abort and cleanup
-
-- Handlers registered via `on` and long-lived subscriptions auto-clean on task-run completion. You don't need to explicitly unsubscribe.
-- Passing `signal` to `read` ties the SSE subscription to your `AbortController`. Aborting the signal closes the SSE cleanly.
-- `pipe` / `writer` return `waitUntilComplete` so you can serialize subsequent logic after all records are flushed.
-
-## Related
-
-- [Overview](/sessions/overview) — Conceptual intro to sessions
-- [API Reference](/sessions/reference) — Types, signatures, token scopes
-- [Run-scoped streams](/tasks/streams) — The ephemeral counterpart; same underlying transport
diff --git a/docs/sessions/overview.mdx b/docs/sessions/overview.mdx
deleted file mode 100644
index 334f523bb3a..00000000000
--- a/docs/sessions/overview.mdx
+++ /dev/null
@@ -1,169 +0,0 @@
----
-title: "Sessions"
-sidebarTitle: "Overview"
-description: "A durable, typed, bidirectional I/O primitive that outlives any single run — built for chat agents, long-running workflows, and any conversation you need to resume across runs."
----
-
-A **Session** is a durable identity for a bidirectional stream of records. Once you create one, it outlives the run that opened it, survives process crashes, and can be resumed from any client that knows its ID.
-
-Under the hood, each session exposes two channels:
-
-- **`.out`** — the task writes, external clients read.
-- **`.in`** — external clients write, the task reads.
-
-Both channels use Trigger.dev's realtime streams, with SSE subscribe, `Last-Event-ID` resume, and direct-to-storage writes.
-
-## Why sessions
-
-Run-scoped streams ([streams.pipe](/tasks/streams) / [streams.input](/tasks/input-streams)) are great for ephemeral I/O within a single run: LLM output from a one-shot task, file chunks during processing, etc. Once the run ends, the stream is gone.
-
-Sessions give you a channel that **isn't tied to a single run**. You can:
-
-- **Resume a conversation across runs.** A chat that ran yesterday, crashed, or hit an idle timeout can pick back up on the same session when the user sends the next message.
-- **Share a channel across runs.** A coordinator run writes to `session.out`; a worker run reads from `session.in` — different runs, same channel.
-- **Show an inbox view.** `sessions.list({ type: "chat.agent" })` returns every chat in your environment, filterable by tag or status.
-- **Key everything on one ID.** `externalId` (your own string — a chat ID, a conversation UUID) is the primary key. Internally, Trigger.dev mints a stable `session_*` friendly ID you can use interchangeably.
-
-## Identity
-
-A session has two equivalent identifiers:
-
-| Field | Who owns it | Example | Notes |
-|---|---|---|---|
-| `externalId` | You | `"chat-abc-123"` | User-supplied idempotency key. Unique per environment. Optional but recommended. |
-| `id` (friendly) | Trigger.dev | `"session_cm4z2plfh000abcd1efgh"` | Stable server-generated identifier. Always present. |
-
-Both work as lookup keys. The server disambiguates via the `session_` prefix — anything else is treated as `externalId`.
-
-Creating a session with the same `externalId` twice returns the same session (idempotent). This is intentional: it's safe to call `sessions.create({ externalId: chatId })` from two browser tabs racing for the same chat.
-
-```ts
-const a = await sessions.create({ type: "chat.agent", externalId: "chat-abc" });
-const b = await sessions.create({ type: "chat.agent", externalId: "chat-abc" });
-
-a.id === b.id; // true — same session
-```
-
-## Channels
-
-```
-SessionHandle
-├── .out  — producer on the task side, consumer on the client side
-└── .in   — producer on the client side, consumer on the task side
-```
-
-The two channels are **disjoint** — no method name appears on both sides. Directional intent is always obvious at the call site.
-
-### `.out` — task writes, client reads
-
-Inside a task:
-
-```ts
-const handle = sessions.open(sessionId);
-
-// Single record
-await handle.out.append({ type: "message", text: "Hello" });
-
-// Pipe a ReadableStream or AsyncIterable
-const { waitUntilComplete } = handle.out.pipe(streamText(...).toUIMessageStream());
-await waitUntilComplete();
-
-// Fine-grained control
-const { waitUntilComplete } = handle.out.writer({
-  execute: ({ write }) => {
-    write({ type: "start" });
-    write({ type: "text-delta", delta: "Hi" });
-    write({ type: "finish" });
-  },
-});
-await waitUntilComplete();
-```
-
-Outside a task (browser, server action, another task):
-
-```ts
-const handle = sessions.open(sessionId);
-const stream = await handle.out.read({ lastEventId: "-1" });
-
-for await (const chunk of stream) {
-  console.log(chunk); // parsed object, not a string
-}
-```
-
-### `.in` — client writes, task reads
-
-Inside a task (consume):
-
-```ts
-const handle = sessions.open(sessionId);
-
-// Fire-and-forget handler
-handle.in.on((msg) => console.log("got", msg));
-
-// Await the next message without suspending
-const { ok, output } = await handle.in.once({ timeout: "30s" });
-
-// Suspend while idle — frees compute until a record arrives
-const msg = await handle.in.wait();
-
-// Suspend with an idle timeout that resets on every message
-await handle.in.waitWithIdleTimeout({ idleTimeout: "5m" });
-```
-
-Outside a task (produce):
-
-```ts
-const handle = sessions.open(sessionId);
-await handle.in.send({ kind: "message", text: "Hello" });
-```
-
-See [Channels](/sessions/channels) for the full surface.
-
-## Durability
-
-A session is durable until you explicitly close it:
-
-- Runs come and go. Starting a new run on an existing session is a normal operation — the run opens the handle and picks up from wherever the channels left off.
-- `sessions.close(id, { reason })` marks the session terminal. Closed sessions still let you subscribe to their `.out` to read history, but no new records can be appended.
-- Close is **client-driven only**. The task runtime never auto-closes on your behalf — a chat that's "between turns" is a live session, not a closed one.
-- Optional `expiresAt` sets a retention window. Past `expiresAt`, the session is treated as closed for new writes.
-
-## When to use sessions vs. run-scoped streams
-
-Use **run-scoped streams** ([`streams.pipe`](/tasks/streams), [`streams.input`](/tasks/input-streams)) when:
-
-- The data is tied to one run's lifetime.
-- You don't need to resume from a different process or user.
-- The stream lives inside your task-runtime boundary (one producer, one consumer, both in the same run).
-
-Use **sessions** when:
-
-- The channel needs to outlive any single run.
-- You want to address it from multiple places (browser + server + another task) using a stable ID.
-- You need a typed bidirectional pair rather than a one-way stream.
-- You want the data visible across runs for resume, inbox views, or cross-run coordination.
-
-Both primitives share the same underlying realtime-streams infrastructure (S2, SSE, `Last-Event-ID` resume). Sessions layer durable identity + bidirectional channels on top.
-
-## Relationship to chat.agent
-
-`chat.agent()` is built on sessions. Every chat conversation is one session:
-
-- `externalId` = your chat ID
-- `type` = `"chat.agent"`
-- `.out` carries `UIMessageChunk` events
-- `.in` carries a `ChatInputChunk` tagged union (`{kind: "message", ...}` or `{kind: "stop"}`)
-
-If you're using `chat.agent` + `TriggerChatTransport` / `AgentChat`, sessions are handled for you. You only need this section directly if you're:
-
-- Building a custom chat transport (e.g. for Slack or a native app — see [Client Protocol](/ai-chat/client-protocol)).
-- Using sessions for a non-chat workload (background agents, cross-run pipelines).
-- Reaching into the underlying primitive for advanced cases.
-
-## Related
-
-- [Quick Start](/sessions/quick-start) — Create, write, read, close in 4 steps
-- [Channels](/sessions/channels) — Full `.in` / `.out` API, waitpoint semantics, serialization
-- [API Reference](/sessions/reference) — `sessions.*` methods, types, scopes
-- [AI Chat](/ai-chat/overview) — The chat primitive built on sessions
-- [Run-scoped streams](/tasks/streams) — The ephemeral counterpart
diff --git a/docs/sessions/quick-start.mdx b/docs/sessions/quick-start.mdx
deleted file mode 100644
index 68abbf3f393..00000000000
--- a/docs/sessions/quick-start.mdx
+++ /dev/null
@@ -1,128 +0,0 @@
----
-title: "Quick Start"
-sidebarTitle: "Quick Start"
-description: "Create a session, write to it from a task, subscribe from a client, and close it — in four steps."
----
-
-This walkthrough uses the raw `sessions` API directly. If you're building a chat app, start with [`chat.agent`](/ai-chat/quick-start) instead — it wraps all of this for you.
-
-<Steps>
-  <Step title="Create a session from your backend">
-    Call `sessions.create` with a `type` discriminator and (optionally) an `externalId` — your own stable ID for the thing this session represents. Creating with the same `externalId` twice returns the same session, so it's safe to call from concurrent requests.
-
-    ```ts app/actions.ts
-    "use server";
-
-    import { sessions } from "@trigger.dev/sdk";
-
-    export async function createConversation(conversationId: string) {
-      const session = await sessions.create({
-        type: "agent.conversation",
-        externalId: conversationId,
-        tags: ["user:abc-123"],
-      });
-
-      return { sessionId: session.id };
-    }
-    ```
-
-    <Tip>
-      `type` is a free-form string you choose. Use it to distinguish different kinds of sessions in your environment — e.g. `"chat.agent"`, `"agent.conversation"`, `"data-sync"`. It's what you'll filter on later in `sessions.list`.
-    </Tip>
-  </Step>
-
-  <Step title="Open the session from inside a task and write to `.out`">
-    Trigger a task with the `sessionId` in the payload. Inside the task, open the session and write records to `.out`. Records can be any JSON-serializable value — strings, objects, streams.
-
-    ```ts trigger/agent.ts
-    import { sessions, task } from "@trigger.dev/sdk";
-
-    export const runAgent = task({
-      id: "run-agent",
-      run: async (payload: { sessionId: string; prompt: string }) => {
-        const handle = sessions.open(payload.sessionId);
-
-        await handle.out.append({ type: "status", message: "Thinking..." });
-
-        // Simulate streaming output
-        for (const word of payload.prompt.split(" ")) {
-          await handle.out.append({ type: "token", text: word });
-        }
-
-        await handle.out.append({ type: "done" });
-      },
-    });
-    ```
-
-    <Note>
-      `sessions.open(id)` does not make a network call — each channel method (`append`, `pipe`, `read`, etc.) hits its own endpoint when invoked. You can open the handle once at the top of `run()` and reuse it.
-    </Note>
-  </Step>
-
-  <Step title="Subscribe to `.out` from a client">
-    Any client with read access to the session can subscribe to `.out` as an SSE stream. This works from a browser, a server action, or another task.
-
-    ```ts app/subscribe.ts
-    import { sessions } from "@trigger.dev/sdk";
-
-    async function watch(sessionId: string) {
-      const handle = sessions.open(sessionId);
-      const stream = await handle.out.read({ lastEventId: "-1" });
-
-      for await (const chunk of stream) {
-        console.log("received:", chunk);
-      }
-    }
-    ```
-
-    Records are delivered as parsed objects — you do not need to `JSON.parse` them.
-
-    **Resume semantics.** `lastEventId` is the cursor. `"-1"` (or omitting the option on a fresh subscription) starts from the beginning. Pass the last-seen event ID to resume mid-stream after a disconnect.
-  </Step>
-
-  <Step title="Close the session when you're done">
-    Closing marks the session terminal. You can still read history from `.out`, but no new records can be appended.
-
-    ```ts app/actions.ts
-    "use server";
-
-    import { sessions } from "@trigger.dev/sdk";
-
-    export async function endConversation(sessionId: string) {
-      await sessions.close(sessionId, { reason: "user-ended" });
-    }
-    ```
-
-    <Tip>
-      Closing is optional. A long-running session that's just between turns is a live session, not a closed one — don't close it just because you don't have anything to write right now.
-    </Tip>
-  </Step>
-</Steps>
-
-## What about `.in`?
-
-The walkthrough above only uses `.out`. The reverse direction — external clients sending records **into** the task — is handled by `.in`.
-
-Inside the task:
-
-```ts
-const handle = sessions.open(payload.sessionId);
-
-// Suspend until a message arrives. Frees compute while waiting.
-const message = await handle.in.wait<{ text: string }>();
-```
-
-From a client:
-
-```ts
-const handle = sessions.open(sessionId);
-await handle.in.send({ text: "Hello" });
-```
-
-See [Channels](/sessions/channels) for the full `.in` API — `on`, `once`, `peek`, `wait`, `waitWithIdleTimeout`.
-
-## Next steps
-
-- [Channels](/sessions/channels) — The full `.in` / `.out` API, waitpoint semantics, serialization
-- [API Reference](/sessions/reference) — All methods and types
-- [AI Chat Quick Start](/ai-chat/quick-start) — The higher-level chat primitive built on sessions
diff --git a/docs/sessions/reference.mdx b/docs/sessions/reference.mdx
deleted file mode 100644
index bb277ff8312..00000000000
--- a/docs/sessions/reference.mdx
+++ /dev/null
@@ -1,227 +0,0 @@
----
-title: "API Reference"
-sidebarTitle: "API Reference"
-description: "Complete reference for the `sessions` namespace — methods, types, channel surface, and token scopes."
----
-
-## `sessions` namespace
-
-Import from `@trigger.dev/sdk`.
-
-```ts
-import { sessions } from "@trigger.dev/sdk";
-```
-
-| Method | Signature | Notes |
-|---|---|---|
-| `sessions.create(body, requestOptions?)` | `(body: CreateSessionRequestBody) => Promise<CreatedSessionResponseBody>` | Create a session. Idempotent on `externalId`. |
-| `sessions.retrieve(idOrExternalId, requestOptions?)` | `(id: string) => Promise<SessionItem>` | Fetch a session by `session_*` ID or `externalId`. |
-| `sessions.update(idOrExternalId, body, requestOptions?)` | `(id: string, body: UpdateSessionRequestBody) => Promise<SessionItem>` | Update `tags`, `metadata`, or `externalId`. |
-| `sessions.close(idOrExternalId, body?, requestOptions?)` | `(id: string, body?: { reason?: string }) => Promise<SessionItem>` | Mark the session terminal. Idempotent. |
-| `sessions.list(options?, requestOptions?)` | `(options?: ListSessionsOptions) => CursorPagePromise<SessionItem>` | List sessions with filters + cursor pagination. |
-| `sessions.open(idOrExternalId)` | `(id: string) => SessionHandle` | Open a handle for channel I/O. No network call until you hit a channel method. |
-
-## `CreateSessionRequestBody`
-
-| Field | Type | Default | Description |
-|---|---|---|---|
-| `type` | `string` | required | Free-form discriminator (e.g. `"chat.agent"`, `"agent.conversation"`). 1–64 chars. |
-| `externalId` | `string` | — | User-supplied idempotency key. 1–256 chars. Unique per environment. Cannot start with `session_`. |
-| `taskIdentifier` | `string` | — | Optional pointer for task-owned sessions. |
-| `tags` | `string[]` | — | Up to 10 tags for filtering in `sessions.list`. |
-| `metadata` | `Record<string, unknown>` | — | Arbitrary JSON metadata. |
-| `expiresAt` | `Date \| string` | — | Absolute expiry timestamp. Past this, the session is treated as closed for new writes. |
-
-## `SessionItem`
-
-Returned by `create`, `retrieve`, `update`, `close`, and `list`.
-
-| Field | Type | Description |
-|---|---|---|
-| `id` | `string` | `session_*` friendly ID. |
-| `externalId` | `string \| null` | The key you supplied on create. |
-| `type` | `string` | The discriminator. |
-| `taskIdentifier` | `string \| null` | Optional task pointer. |
-| `tags` | `string[]` | Dashboard filter tags. |
-| `metadata` | `Record<string, unknown> \| null` | Arbitrary JSON. |
-| `closedAt` | `Date \| null` | Set when `sessions.close` is called. |
-| `closedReason` | `string \| null` | Value passed to `sessions.close`. |
-| `expiresAt` | `Date \| null` | Retention deadline. |
-| `createdAt` | `Date` | When the session was created. |
-| `updatedAt` | `Date` | Last write to any mutable field. |
-
-`CreatedSessionResponseBody` extends `SessionItem` with `isCached: boolean` — `true` when `create` returned an existing session (same `externalId`) instead of minting a new one.
-
-## `UpdateSessionRequestBody`
-
-| Field | Type | Notes |
-|---|---|---|
-| `tags` | `string[]` | Replaces existing tags. |
-| `metadata` | `Record<string, unknown> \| null` | Replaces existing metadata. Pass `null` to clear. |
-| `externalId` | `string \| null` | Change or clear the external key. Pass `null` to clear. |
-
-## `CloseSessionRequestBody`
-
-| Field | Type | Notes |
-|---|---|---|
-| `reason` | `string` | Optional close reason (e.g. `"user-ended"`, `"idle-timeout"`). Up to 256 chars. |
-
-## `ListSessionsOptions`
-
-Flattened client shape. `sessions.list` serializes into `filter[*]` / `page[*]` query params internally.
-
-| Field | Type | Notes |
-|---|---|---|
-| `limit` | `number` | Page size. 1–100. Default 20. |
-| `after` | `string` | Cursor: fetch the page after this session ID. |
-| `before` | `string` | Cursor: fetch the page before this session ID. |
-| `type` | `string \| string[]` | Filter by discriminator. |
-| `tag` | `string \| string[]` | Filter by tag (any-of match). |
-| `taskIdentifier` | `string \| string[]` | Filter by task identifier. |
-| `externalId` | `string` | Exact-match filter. |
-| `status` | `"ACTIVE" \| "CLOSED" \| "EXPIRED" \| array` | Filter by derived status. |
-| `period` | `string` | Filter by creation age (e.g. `"7d"`, `"24h"`). |
-| `from` | `number \| Date` | Filter by creation timestamp lower bound. |
-| `to` | `number \| Date` | Filter by creation timestamp upper bound. |
-
-Returns a `CursorPagePromise<SessionItem>`:
-
-```ts
-for await (const session of sessions.list({ type: "chat.agent" })) {
-  console.log(session.id);
-}
-
-// or fetch a single page
-const page = await sessions.list({ type: "chat.agent", limit: 50 });
-const { data, pagination } = page;
-```
-
-## `SessionHandle`
-
-Returned by `sessions.open(idOrExternalId)`. Lightweight — no network call until a channel method is invoked.
-
-```ts
-interface SessionHandle {
-  readonly id: string;
-  readonly out: SessionOutputChannel;
-  readonly in: SessionInputChannel;
-}
-```
-
-## `SessionOutputChannel` (`.out`)
-
-| Method | Signature | Notes |
-|---|---|---|
-| `append<T>(value, options?)` | `(value: T) => Promise<void>` | Append a single record. |
-| `pipe<T>(value, options?)` | `(value: AsyncIterable<T> \| ReadableStream<T>) => PipeStreamResult<T>` | Forward a stream. Returns `{ stream, waitUntilComplete }`. |
-| `writer<T>(options)` | `(options: WriterStreamOptions<T>) => PipeStreamResult<T>` | Fine-grained writer with `write` / `merge` callbacks. |
-| `read<T>(options?)` | `(options?: SessionSubscribeOptions<T>) => Promise<AsyncIterableStream<T>>` | SSE subscribe. Returns an async iterable of parsed records. |
-
-### `WriterStreamOptions<T>` / `SessionPipeStreamOptions`
-
-| Field | Type | Notes |
-|---|---|---|
-| `execute` | `({ write, merge }) => void \| Promise<void>` | Writer callback (required for `writer`). |
-| `spanName` | `string` | Override the OpenTelemetry span name for the operation. |
-| `collapsed` | `boolean` | Collapse the span in the run trace UI. |
-| `signal` | `AbortSignal` | Abort the pipe. |
-| `requestOptions` | `ApiRequestOptions` | Pass-through to the API client. |
-
-### `SessionSubscribeOptions<T>`
-
-| Field | Type | Notes |
-|---|---|---|
-| `lastEventId` | `string \| number` | Cursor. `"-1"` or omit to start from the beginning. |
-| `timeoutInSeconds` | `number` | Max SSE hold time between records. Default 60. |
-| `signal` | `AbortSignal` | Abort the subscription. |
-| `onPart` | `(part: SSEStreamPart<T>) => void` | Observe raw SSE parts. |
-| `onComplete` | `() => void` | Fires on subscription close. |
-| `onError` | `(err: Error) => void` | Fires on subscription errors. |
-
-## `SessionInputChannel` (`.in`)
-
-| Method | Signature | Notes |
-|---|---|---|
-| `send(value, requestOptions?)` | `(value: unknown) => Promise<void>` | Append a record. Strings pass through; objects are `JSON.stringify`-ed. |
-| `on<T>(handler)` | `(handler: (data: T) => void \| Promise<void>) => { off: () => void }` | Long-lived handler. Auto-cleans on run end. |
-| `once<T>(options?)` | `(options?: InputStreamOnceOptions) => InputStreamOncePromise<T>` | Await next record without suspending. |
-| `peek<T>()` | `() => T \| undefined` | Non-consuming head-of-buffer peek. |
-| `wait<T>(options?)` | `(options?: InputStreamWaitOptions) => ManualWaitpointPromise<T>` | Suspend until next record. |
-| `waitWithIdleTimeout<T>(options)` | `(options: InputStreamWaitWithIdleTimeoutOptions) => Promise<{ok: true, output: T} \| {ok: false, error?: any}>` | Suspend with a resetting idle timeout. |
-
-### `InputStreamOnceOptions`
-
-| Field | Type | Notes |
-|---|---|---|
-| `timeoutMs` | `number` | Timeout in ms. |
-| `signal` | `AbortSignal` | Abort the await. |
-| `spanName` | `string` | Span-name override. |
-
-### `InputStreamWaitOptions`
-
-| Field | Type | Notes |
-|---|---|---|
-| `timeout` | `string` | Duration string (e.g. `"30s"`, `"5m"`, `"1h"`). |
-| `idempotencyKey` | `string` | Reuse an existing waitpoint on retries. |
-| `idempotencyKeyTTL` | `string` | TTL for the idempotency key. |
-| `tags` | `string[]` | Tags for the underlying waitpoint token. |
-| `spanName` | `string` | Span-name override. |
-
-### `InputStreamWaitWithIdleTimeoutOptions`
-
-| Field | Type | Notes |
-|---|---|---|
-| `idleTimeoutInSeconds` | `number` | Seconds of idle before suspending (required). |
-| `timeout` | `string` | Max wait after suspending. |
-| `onSuspend` | `() => void \| Promise<void>` | Called right before suspending. |
-| `onResume` | `() => void \| Promise<void>` | Called right after resuming with data. |
-| `skipSuspend` | `boolean` | Skip the suspend phase; return `{ ok: false }` on idle timeout. |
-| `spanName` | `string` | Span-name override. |
-
-## Token scopes
-
-Sessions are protected by bearer-token scopes. Mint tokens via `auth.createPublicToken` or use the environment secret key server-side.
-
-| Scope | Grants |
-|---|---|
-| `read:sessions` | Read any session, subscribe to any `.out` in the environment. |
-| `read:sessions:<sessionId>` | Read only the specified session. |
-| `write:sessions` | Create/update/close any session, append to any `.in`, direct-write to any `.out`. |
-| `write:sessions:<sessionId>` | Write access scoped to one session. |
-| `write:sessions:<sessionId>:in` | Write access scoped to `.in` on one session. |
-| `admin:sessions` | All of the above. |
-
-Super-scopes `read:all` / `admin:all` / `admin` also grant access.
-
-For chat.agent flows, the transport's public access token carries `read:sessions:<id>` + `write:sessions:<id>` for the session it's talking to — that's enough to subscribe on `.out`, append to `.in`, and close.
-
-## Wire endpoints
-
-For custom transports or direct HTTP use. See [AI Chat — Client Protocol](/ai-chat/client-protocol) for a full example.
-
-| Method | Path | Purpose |
-|---|---|---|
-| `POST` | `/api/v1/sessions` | Create (idempotent on `externalId`). |
-| `GET` | `/api/v1/sessions/:id` | Retrieve. Accepts `session_*` ID or `externalId`. |
-| `PATCH` | `/api/v1/sessions/:id` | Update mutable fields. |
-| `POST` | `/api/v1/sessions/:id/close` | Close. |
-| `GET` | `/api/v1/sessions?filter[type]=…&page[size]=…` | List with filters + cursor pagination. |
-| `GET` | `/realtime/v1/sessions/:session/:io` | SSE subscribe to `.in` or `.out`. Supports `Last-Event-ID`. |
-| `POST` | `/realtime/v1/sessions/:session/:io/append` | Append a record. `:io` is `"in"` for clients or `"out"` for direct server writes. |
-| `PUT` | `/realtime/v1/sessions/:session/:io` | Initialize an S2 direct-write channel. Returns S2 credentials in response headers. |
-
-### `X-Peek-Settled` request header (opt-in) / `X-Session-Settled` response header
-
-On `GET /realtime/v1/sessions/:session/out`, the client can send `X-Peek-Settled: 1` to ask the server to peek the tail record before proxying. If the last chunk on `.out` is a terminal marker (for chat agents: `trigger:turn-complete`), the server:
-
-- Uses `wait=0` on the downstream read — drains any residual records and closes in ~1s instead of long-polling for 60s.
-- Sets `X-Session-Settled: true` on the response so the client can tell the close is terminal rather than a normal long-poll cycle.
-
-Without `X-Peek-Settled`, the SSE always long-polls (unconditional `wait` from the caller). Clients should only opt in on **reconnect-on-reload** paths — sending the header while a turn is about to be triggered races the new turn's first chunk and would close the SSE before records land.
-
-## Related
-
-- [Overview](/sessions/overview) — Conceptual intro
-- [Quick Start](/sessions/quick-start) — Minimal walkthrough
-- [Channels](/sessions/channels) — Producer/consumer semantics
-- [AI Chat — Client Protocol](/ai-chat/client-protocol) — Session endpoints in the context of chat.agent

From dd2a699baddec542ad6f96afbbbe430eb84fe172 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 28 Apr 2026 11:56:35 +0100
Subject: [PATCH 41/49] docs(ai-chat): add not-released-yet banner to Sessions
 upgrade guide

---
 docs/ai-chat/upgrade-guide.mdx | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/ai-chat/upgrade-guide.mdx b/docs/ai-chat/upgrade-guide.mdx
index 6e7af9426f8..b77464e1ef4 100644
--- a/docs/ai-chat/upgrade-guide.mdx
+++ b/docs/ai-chat/upgrade-guide.mdx
@@ -4,6 +4,13 @@ sidebarTitle: "Upgrade Guide"
 description: "Migrating chat.agent code from the prerelease API to the Sessions-as-run-manager release."
 ---
 
+<Note>
+  **Not released yet.** This guide describes an upcoming release of `chat.agent`
+  on top of the new Sessions primitive. The packages and server-side support
+  are still rolling out — we'll remove this banner once the release ships and
+  publish the matching `@trigger.dev/sdk` prerelease tag.
+</Note>
+
 This guide is for customers who tried `chat.agent` during the prerelease period.
 The public surface of `chat.agent({...})`, `useTriggerChatTransport`,
 `AgentChat`, `chat.store`, `chat.defer`, and `chat.history` is largely

From 673fe58f36c49601e3b9d95e030e89d079557473 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Tue, 28 Apr 2026 17:08:56 +0100
Subject: [PATCH 42/49] docs(ai-chat): warn against non-atomic onTurnComplete
 persistence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The page-load reads Chat.messages and ChatSession.lastEventId in parallel.
A non-atomic onTurnComplete that writes them as two separate awaits has a
narrow race window where messages are post-write but lastEventId is still
pre-write — the transport then replays this turn's chunks on resume and
duplicates the assistant render.

Add a Warning callout in the persistence pattern doc with the ✅ atomic and
❌ non-atomic shapes, and update both code examples (basic + hydrateMessages
variant) to use prisma.$transaction.
---
 .../ai-chat/patterns/database-persistence.mdx | 49 +++++++++++++++----
 1 file changed, 40 insertions(+), 9 deletions(-)

diff --git a/docs/ai-chat/patterns/database-persistence.mdx b/docs/ai-chat/patterns/database-persistence.mdx
index 7d6f6c82852..80c95e529cc 100644
--- a/docs/ai-chat/patterns/database-persistence.mdx
+++ b/docs/ai-chat/patterns/database-persistence.mdx
@@ -58,6 +58,28 @@ If you skip preload, do the equivalent in **`onChatStart`** when **`preloaded`**
 
 **`lastEventId`** lets the frontend [resume](/ai-chat/frontend) without replaying SSE events it already applied. Treat it as part of session state, not optional polish, if you care about duplicate chunks after refresh.
 
+<Warning>
+**Write the messages and `lastEventId` in a single transaction.** Both values are read in parallel on the next page load (one fetches the conversation, the other fetches the session). If a refresh races between the two writes, the page can see the assistant message persisted (full history) but a stale `lastEventId` from the previous turn. The transport then resumes from that stale cursor and replays this turn's chunks on top of the already-persisted assistant message, producing a duplicated render.
+
+```ts
+// ✅ Atomic — refresh on the next page load reads both writes consistently.
+await db.$transaction([
+  db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }),
+  db.chatSession.upsert({
+    where: { id: chatId },
+    create: { id: chatId, publicAccessToken: chatAccessToken, lastEventId },
+    update: { publicAccessToken: chatAccessToken, lastEventId },
+  }),
+]);
+
+// ❌ Two awaits — narrow race window where messages are post-write but
+// lastEventId is still pre-write. A page refresh that lands here will
+// duplicate the assistant message on resume.
+await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+await db.chatSession.upsert({ /* ... */ });
+```
+</Warning>
+
 ## Token renewal (app server)
 
 The persisted PAT has a TTL (see **`chatAccessTokenTTL`** on **`chat.agent`**, default 1h). When the transport gets a **401** on a session-PAT-authed request, it calls your **`accessToken`** callback to mint a fresh PAT — no DB lookup required, since the session is keyed on `chatId` (which the transport already has).
@@ -110,12 +132,12 @@ chat.agent({
   },
 
   onTurnComplete: async ({ chatId, uiMessages, chatAccessToken, lastEventId }) => {
-    await saveConversationMessages(chatId, uiMessages);
-    await upsertSession({
-      chatId,
-      publicAccessToken: chatAccessToken,
-      lastEventId,
-    });
+    // Atomic: messages + lastEventId must be readable consistently on resume.
+    // See the warning above for why a non-atomic write causes duplicate renders.
+    await db.$transaction([
+      saveConversationMessagesQuery(chatId, uiMessages),
+      upsertSessionQuery({ chatId, publicAccessToken: chatAccessToken, lastEventId }),
+    ]);
   },
 
   run: async ({ messages, signal }) => {
@@ -144,9 +166,18 @@ export const myChat = chat.agent({
 
     return stored;
   },
-  onTurnComplete: async ({ chatId, uiMessages }) => {
-    // Persist the response
-    await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+  onTurnComplete: async ({ chatId, uiMessages, chatAccessToken, lastEventId }) => {
+    // Persist the response and refresh session state atomically — see the
+    // warning in the previous section for why these two writes have to be
+    // in the same transaction.
+    await db.$transaction([
+      db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }),
+      db.chatSession.upsert({
+        where: { id: chatId },
+        create: { id: chatId, publicAccessToken: chatAccessToken, lastEventId },
+        update: { publicAccessToken: chatAccessToken, lastEventId },
+      }),
+    ]);
   },
   run: async ({ messages, signal }) => {
     return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });

From 202007e6a20b217b60a8cd198b87b791b76b39de Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Wed, 29 Apr 2026 10:18:34 +0100
Subject: [PATCH 43/49] docs(ai-chat): tidy Sessions changelog + sidebar

- Move Upgrade Guide to the bottom of the Agents sidebar (after API Reference) and rename its sidebarTitle to "Sessions Upgrade Guide" so it reads as a standalone migration doc rather than a regular concept page.
- Replace the inaccurate inline Migration section in the 4/24 Sessions changelog entry with a pointer to the upgrade guide.
- Add a Docs section to the same entry summarizing the Sessions doc surface that shipped: rewritten Client Protocol, atomic-write warning on the persistence pattern, new reference symbols, and the broader page refreshes.
---
 docs/ai-chat/changelog.mdx     | 9 +++++++--
 docs/ai-chat/upgrade-guide.mdx | 2 +-
 docs/docs.json                 | 4 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index 039995e292a..482731d948b 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -31,9 +31,14 @@ Practical impact: `TriggerChatTransport.reconnectToStream` no longer needs a cli
 
 ## Migration
 
-Nothing to do for users of `chat.agent()` + `TriggerChatTransport` + `AgentChat`. Your existing code keeps working; you pick up the new durability guarantees for free.
+See the [Sessions Upgrade Guide](/ai-chat/upgrade-guide) for the full step-by-step — auth callback split, persisted `ChatSession` shape, server-side helpers (`chat.createStartSessionAction`, `chat.createAccessToken` for renewal), and the `clientData` validation pivot.
 
-If you built a custom transport against the old `/realtime/v1/streams/{runId}/...` endpoints, see the rewritten [Client Protocol](/ai-chat/client-protocol) for the new session-based wire format. The old constants (`CHAT_STREAM_KEY`, `CHAT_MESSAGES_STREAM_ID`, `CHAT_STOP_STREAM_ID`) are removed from `@trigger.dev/core/v3/chat-client` — migrate to `sessions.open(sessionId).out` / `.in`.
+## Docs
+
+- Rewritten [Client Protocol](/ai-chat/client-protocol) — full wire format for the new `/realtime/v1/sessions/{sessionId}/...` endpoints, JWT scopes, S2 direct-write credentials, and `Last-Event-ID` resume.
+- [Database persistence pattern](/ai-chat/patterns/database-persistence) — new `chatId`-keyed `ChatSession` shape (no more `runId`) and a warning on the `onTurnComplete` race that requires a single atomic write of `messages` + `lastEventId`.
+- [Reference](/ai-chat/reference) — added `chat.createStartSessionAction`, `chat.createAccessToken`, `ChatInputChunk`, `TriggerChatTaskResult.sessionId`, `ChatTaskRunPayload.sessionId`. The old run-scoped stream-ID constants are gone.
+- Refreshed [Backend](/ai-chat/backend), [Frontend](/ai-chat/frontend), [Server Chat](/ai-chat/server-chat), [Quick start](/ai-chat/quick-start), [Overview](/ai-chat/overview), [Features](/ai-chat/features), [Types](/ai-chat/types), [Error handling](/ai-chat/error-handling), and [Testing](/ai-chat/testing) for the session-based wiring.
 
 </Update>
 
diff --git a/docs/ai-chat/upgrade-guide.mdx b/docs/ai-chat/upgrade-guide.mdx
index b77464e1ef4..dee74171fab 100644
--- a/docs/ai-chat/upgrade-guide.mdx
+++ b/docs/ai-chat/upgrade-guide.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Upgrade Guide: prerelease → Sessions-as-run-manager"
-sidebarTitle: "Upgrade Guide"
+sidebarTitle: "Sessions Upgrade Guide"
 description: "Migrating chat.agent code from the prerelease API to the Sessions-as-run-manager release."
 ---
 
diff --git a/docs/docs.json b/docs/docs.json
index dede49f08bf..10ebb0fd5fa 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -105,7 +105,6 @@
                 "pages": [
                   "ai-chat/overview",
                   "ai-chat/changelog",
-                  "ai-chat/upgrade-guide",
                   "ai-chat/quick-start",
                   "ai-chat/backend",
                   "ai-chat/frontend",
@@ -131,7 +130,8 @@
                     ]
                   },
                   "ai-chat/client-protocol",
-                  "ai-chat/reference"
+                  "ai-chat/reference",
+                  "ai-chat/upgrade-guide"
                 ]
               }
             ]

From 63558e5409bc171a6cbf4920b86f5e53d833ed16 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Wed, 29 Apr 2026 14:13:26 +0100
Subject: [PATCH 44/49] docs(ai-chat): warn against chat.defer for onTurnStart
 message persistence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`chat.defer(db.chat.update(...))` in `onTurnStart` is fire-and-forget — the hook resolves and streaming begins before the write lands. A mid-stream page refresh then reads `[]` from the DB, the resumed SSE stream pushes the assistant into an empty array, and the user's message disappears from the rendered conversation.

- patterns/database-persistence.mdx: replace the misleading "optionally use chat.defer" line with an awaited persistence + a Warning showing wrong/right examples and the failure mode. Update the minimal pseudocode to use await.
- features.mdx (chat.defer reference): swap the misleading example (db.chat.update inside onTurnStart) for an analytics-tracking example. Add a Warning cross-linking back to the persistence doc.

Reserve chat.defer for writes whose timing has no resume implication.
---
 docs/ai-chat/features.mdx                     | 12 ++++++----
 .../ai-chat/patterns/database-persistence.mdx | 24 ++++++++++++++++---
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index fbe12967480..3b355a57866 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -162,14 +162,14 @@ onTurnComplete: async ({ chatId }) => {
 
 Use `chat.defer()` to run background work in parallel with streaming. The deferred promise runs alongside the LLM response and is awaited (with a 5s timeout) before `onTurnComplete` fires.
 
-This moves non-blocking work (DB writes, analytics, etc.) out of the critical path:
+This moves non-blocking work (analytics, audit logs, search-index writes, cache warming) out of the critical path:
 
 ```ts
 export const myChat = chat.agent({
   id: "my-chat",
-  onTurnStart: async ({ chatId, uiMessages }) => {
-    // Persist messages without blocking the LLM call
-    chat.defer(db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }));
+  onTurnStart: async ({ chatId, runId }) => {
+    // Analytics — fire-and-forget, irrelevant to resume.
+    chat.defer(analytics.track("turn_started", { chatId, runId }));
   },
   run: async ({ messages, signal }) => {
     return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal });
@@ -179,6 +179,10 @@ export const myChat = chat.agent({
 
 `chat.defer()` can be called from anywhere during a turn — hooks, `run()`, or nested helpers. All deferred promises are collected and awaited together before `onTurnComplete`.
 
+<Warning>
+**Don't use `chat.defer()` for the message-history write in `onTurnStart`.** That write must land *before* the model starts streaming, otherwise a mid-stream page refresh will read `[]` from your DB and lose the user's message from the rendered conversation. See [Database persistence — `onTurnStart`](/ai-chat/patterns/database-persistence#onturnstart). Reserve `chat.defer` for writes whose timing has no resume implication.
+</Warning>
+
 ---
 
 ## Custom data parts
diff --git a/docs/ai-chat/patterns/database-persistence.mdx b/docs/ai-chat/patterns/database-persistence.mdx
index 80c95e529cc..24a84a9a8e7 100644
--- a/docs/ai-chat/patterns/database-persistence.mdx
+++ b/docs/ai-chat/patterns/database-persistence.mdx
@@ -48,8 +48,25 @@ If you skip preload, do the equivalent in **`onChatStart`** when **`preloaded`**
 
 ### `onTurnStart`
 
-- Persist **`uiMessages`** (full accumulated history including the new user turn) **before** streaming starts — so a mid-stream refresh still shows the user’s message.
-- Optionally use [`chat.defer()`](/ai-chat/features#chat-defer) so the write does not block the model if your driver is slow.
+- **`await`** persist **`uiMessages`** (full accumulated history including the new user turn) **before** the hook returns — `chat.agent` does not begin streaming until `onTurnStart` resolves, so this is what bounds "user message is durable before the stream".
+
+<Warning>
+**Don't use [`chat.defer()`](/ai-chat/features#chat-defer) for the message write here.** `chat.defer` is fire-and-forget — the hook resolves before the write lands and the stream starts immediately. If the user refreshes mid-stream, the next page load reads `[]` from your DB, the resumed SSE stream pushes the assistant into an empty array, and the user's message disappears from the rendered conversation forever.
+
+```ts
+// ❌ Bad — non-blocking write, mid-stream refresh drops the user message.
+onTurnStart: async ({ chatId, uiMessages }) => {
+  chat.defer(db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } }));
+},
+
+// ✅ Good — awaited, durable before the model starts.
+onTurnStart: async ({ chatId, uiMessages }) => {
+  await db.chat.update({ where: { id: chatId }, data: { messages: uiMessages } });
+},
+```
+
+`chat.defer` is for writes whose timing doesn't matter for resume — analytics, audit logs, search-index updates, etc. Anything the next page load reads needs to land before the stream begins.
+</Warning>
 
 ### `onTurnComplete`
 
@@ -128,7 +145,8 @@ chat.agent({
   },
 
   onTurnStart: async ({ chatId, uiMessages }) => {
-    chat.defer(saveConversationMessages(chatId, uiMessages));
+    // Awaited, not chat.defer — see the warning in `onTurnStart` above.
+    await saveConversationMessages(chatId, uiMessages);
   },
 
   onTurnComplete: async ({ chatId, uiMessages, chatAccessToken, lastEventId }) => {

From acd3cf52fdf243bb13f00897c4ac11e4735bbdac Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 30 Apr 2026 13:52:11 +0100
Subject: [PATCH 45/49] docs(ai-chat): large-payloads pattern +
 ChatChunkTooLargeError reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The chat output stream caps each record at ~1 MiB, and chat.agent now
throws a typed ChatChunkTooLargeError when a chunk overruns. Document
both the typed error and the two workaround patterns:

- ID-reference: persist large values to your store, emit only an id +
  preview through the chat stream, fetch the full payload on demand.
- Out-of-band streams.writer(): a separate run-scoped channel for
  transient/per-turn data the chat stream shouldn't carry.

Pages
- New patterns/large-payloads.mdx covering the cause, the typed error,
  both patterns, and what doesn't trigger the cap (chat.history,
  chat.inject, chat.defer).
- error-handling.mdx gains a short ChatChunkTooLargeError section that
  cross-links the new patterns page.
- docs.json adds the new patterns page to the Agents → Patterns sidebar.
---
 docs/ai-chat/error-handling.mdx          |   9 ++
 docs/ai-chat/patterns/large-payloads.mdx | 188 +++++++++++++++++++++++
 docs/docs.json                           |   1 +
 3 files changed, 198 insertions(+)
 create mode 100644 docs/ai-chat/patterns/large-payloads.mdx

diff --git a/docs/ai-chat/error-handling.mdx b/docs/ai-chat/error-handling.mdx
index 7544a68a81f..8f8ac56ae78 100644
--- a/docs/ai-chat/error-handling.mdx
+++ b/docs/ai-chat/error-handling.mdx
@@ -393,10 +393,19 @@ To add retry-like behavior:
 6. **Use `onFailure` for run-level monitoring** (Sentry, monitoring dashboards).
 7. **For known transient errors (rate limits, network)**, consider a fallback model inside `run()` instead of failing the turn.
 
+## `ChatChunkTooLargeError`
+
+A specific run-failing error worth flagging on its own. Anything written through the chat output is one record on the underlying realtime stream, capped at ~1 MiB per record. A single chunk over the cap throws `ChatChunkTooLargeError` (named export from `@trigger.dev/sdk`). The most common trigger is a tool whose result object is large enough to overflow as one `tool-output-available` chunk.
+
+The error carries `chunkType`, `chunkSize`, and `maxSize`. Catch with the `isChatChunkTooLargeError` guard and route oversized values out-of-band.
+
+See [Large payloads in chat.agent](/ai-chat/patterns/large-payloads) for the two patterns that work around the cap (ID-reference + run-scoped `streams.writer()`).
+
 ## See also
 
 - [`uiMessageStreamOptions.onError`](/ai-chat/backend#error-handling-with-onerror) — stream error handler details
 - [Custom actions](/ai-chat/backend#actions) — implement undo/retry actions
 - [`chat.history`](/ai-chat/backend#chat-history) — rollback to a previous message
+- [Large payloads](/ai-chat/patterns/large-payloads) — handling the ~1 MiB per-chunk cap
 - [Database persistence](/ai-chat/patterns/database-persistence) — saving conversation state
 - [Standard task hooks](/tasks/overview) — `onFailure`, `onComplete`, `onWait`, etc.
diff --git a/docs/ai-chat/patterns/large-payloads.mdx b/docs/ai-chat/patterns/large-payloads.mdx
new file mode 100644
index 00000000000..0795d0ea269
--- /dev/null
+++ b/docs/ai-chat/patterns/large-payloads.mdx
@@ -0,0 +1,188 @@
+---
+title: "Large payloads in chat.agent"
+sidebarTitle: "Large payloads"
+description: "Why a single chunk on the chat stream is capped at ~1 MiB, what error you'll see, and the two patterns that work around it: ID references and out-of-band run streams."
+---
+
+The realtime stream that backs `chat.agent` enforces a **per-record cap of ~1 MiB** (`1048576` bytes minus a small envelope reserve). Anything written through the chat output — auto-piped LLM chunks, `chat.response.write`, `chat.store.set`, custom `writer.write` parts — counts as one record per chunk and is rejected if it crosses the cap.
+
+This is a platform-level limit and cannot be raised per project or per stream.
+
+## What you'll see
+
+When a chunk crosses the cap, the run fails with a typed [`ChatChunkTooLargeError`](/ai-chat/error-handling):
+
+```
+ChatChunkTooLargeError: chat.agent chunk of type "tool-output-available" is 2000126 bytes,
+over the realtime stream's per-record cap of 1047552 bytes. For oversized payloads
+(e.g. large tool outputs), write the value to your own store and emit only an id/url
+through the chat stream — see https://trigger.dev/docs/ai-chat/patterns/large-payloads.
+```
+
+The error includes:
+
+- `chunkType` — discriminant on the chunk that failed (e.g. `tool-output-available`, `data-handover`, `text-delta`).
+- `chunkSize` — UTF-8 byte count of the JSON-serialized record.
+- `maxSize` — the effective cap.
+
+You can catch and re-throw / log it explicitly:
+
+```ts
+import { ChatChunkTooLargeError, isChatChunkTooLargeError } from "@trigger.dev/sdk";
+
+try {
+  await someWrite();
+} catch (err) {
+  if (isChatChunkTooLargeError(err)) {
+    logger.error("Oversized chunk", { type: err.chunkType, size: err.chunkSize });
+  }
+  throw err;
+}
+```
+
+## Most common cause: large tool outputs
+
+If you return a `streamText` result from `run()`, the AI SDK auto-pipes its `UIMessageStream` into the chat output. A tool whose result object is large (a fetched HTML body, a CSV blob, an image as base64, a deep DB row dump) gets emitted as one `tool-output-available` chunk — and that's the chunk that overruns.
+
+**Diagnose first**: log tool sizes during development.
+
+```ts
+const fetchPage = tool({
+  inputSchema: z.object({ url: z.string().url() }),
+  execute: async ({ url }) => {
+    const html = await (await fetch(url)).text();
+    if (html.length > 500_000) {
+      logger.warn("Large tool output", { tool: "fetchPage", bytes: html.length });
+    }
+    return { html };
+  },
+});
+```
+
+If the size is unbounded by input, fix the tool — not the stream.
+
+## Pattern 1: ID-reference (recommended)
+
+Store the large value in your own database (or object store) and emit only an identifier through the chat stream. The frontend fetches the full payload separately on demand.
+
+This keeps the chat stream small, predictable, and resumable, and lets you reuse the value across turns or sessions without re-streaming it.
+
+<CodeGroup>
+
+```ts task.ts
+import { chat } from "@trigger.dev/sdk/ai";
+import { tool } from "ai";
+import { z } from "zod";
+
+const fetchPage = tool({
+  description: "Fetch a URL and store the HTML for later inspection.",
+  inputSchema: z.object({ url: z.string().url() }),
+  execute: async ({ url }) => {
+    const html = await (await fetch(url)).text();
+    const docId = await db.documents.create({
+      data: { url, html, byteSize: html.length },
+    });
+
+    // Tool result is small — just an id and metadata.
+    // The model and the UI both work with this lightweight handle.
+    return {
+      docId,
+      url,
+      byteSize: html.length,
+      preview: html.slice(0, 500),
+    };
+  },
+});
+```
+
+```ts api/document/[id]/route.ts
+// Frontend fetches the full document on demand.
+import { auth, currentUser } from "@/lib/auth";
+
+export async function GET(_req: Request, { params }: { params: { id: string } }) {
+  const user = await currentUser();
+  const doc = await db.documents.findUniqueOrThrow({
+    where: { id: params.id, userId: user.id },
+  });
+  return new Response(doc.html, { headers: { "content-type": "text/html" } });
+}
+```
+
+```tsx component.tsx
+function ToolResultCard({ part }: { part: ToolUIPart<"fetchPage"> }) {
+  const { docId, url, byteSize, preview } = part.output;
+  return (
+    <div>
+      <p>{url} — {(byteSize / 1024).toFixed(0)} KB</p>
+      <pre>{preview}…</pre>
+      <a href={`/api/document/${docId}`}>Open full HTML</a>
+    </div>
+  );
+}
+```
+
+</CodeGroup>
+
+The same pattern works for `chat.response.write` — push the heavy value to your DB, then emit a small data part with the id:
+
+```ts
+const id = await db.attachments.create({ data: { content: hugeReport } });
+chat.response.write({ type: "data-report", data: { id, summary: shortSummary } });
+```
+
+<Tip>
+  Persist the large value **before** you emit the id chunk. If the chunk reaches the UI before the row is written, the frontend gets a 404 on the follow-up fetch.
+</Tip>
+
+## Pattern 2: Out-of-band `streams.writer()`
+
+If the value is **only useful for the lifetime of the run** (a long log tail, a transient progress dump, a per-turn debug trace) and you don't want to persist it, write it to a **separate run-scoped stream** instead. Run-scoped `streams.writer()` is its own channel — chunks go through the same per-record cap, but the chat stream stays untouched, and `useRealtimeRunWithStreams` consumes them independently of the chat UI.
+
+```ts
+import { task, streams } from "@trigger.dev/sdk";
+import { chat } from "@trigger.dev/sdk/ai";
+
+const debugLog = streams.define<{ line: string }>("debug-log");
+
+export const myChat = chat.agent({
+  id: "my-chat",
+  run: async ({ messages, signal }) => {
+    // Heavy diagnostic stream lives on its own channel.
+    const log = debugLog.writer();
+    log.write({ line: "starting turn" });
+
+    return streamText({ /* ... */ });
+  },
+});
+```
+
+Frontend:
+
+```tsx
+import { useRealtimeRunWithStreams } from "@trigger.dev/react-hooks";
+
+function DebugPanel({ runId }: { runId: string }) {
+  const { streams } = useRealtimeRunWithStreams<typeof myChat>(runId);
+  return (
+    <pre>{streams?.["debug-log"]?.map((c) => c.line).join("\n")}</pre>
+  );
+}
+```
+
+Same 1 MiB cap applies per record, so split long content across multiple writes (one record per line, per page, per progress tick) rather than one large blob.
+
+## What does **not** trigger the cap
+
+These calls don't go through the realtime stream and have no per-record cap:
+
+- [`chat.history.set` / `slice` / `replace` / `remove`](/ai-chat/features#chathistory) — locals-only mutations on the in-memory message list.
+- [`chat.inject`](/ai-chat/features#chatinject) — appends to the run's pending message queue, not the stream.
+- [`chat.defer`](/ai-chat/features#chatdefer) — promise registry; awaited at turn boundaries, never serialized to the stream.
+
+The control markers `chat.agent` emits internally (`trigger:turn-complete`, `trigger:upgrade-required`) are tiny by construction.
+
+## See also
+
+- [Error handling](/ai-chat/error-handling) — how `ChatChunkTooLargeError` flows through the layers.
+- [Database persistence](/ai-chat/patterns/database-persistence) — your own store as the durable backing for ID references.
+- [Client protocol](/ai-chat/client-protocol) — chunk shapes that travel on the chat stream.
diff --git a/docs/docs.json b/docs/docs.json
index 10ebb0fd5fa..a30245ffbdb 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -126,6 +126,7 @@
                       "ai-chat/patterns/branching-conversations",
                       "ai-chat/patterns/code-sandbox",
                       "ai-chat/patterns/human-in-the-loop",
+                      "ai-chat/patterns/large-payloads",
                       "ai-chat/patterns/skills"
                     ]
                   },

From 44d6b1962fe9f5b4e19709d0e4c67c9380ef27ce Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Thu, 30 Apr 2026 21:31:09 +0100
Subject: [PATCH 46/49] docs(ai-chat): unblock check-broken-links + address PR
 #3226 review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace `### chat.history {#chat-history}` and `## chat.defer() {#chat-defer}`
  with sentence-case headings ("Chat history", "Chat defer"). Mintlify's MDX
  parser couldn't parse the `{#...}` explicit-anchor syntax — `{...}` is JSX
  expression syntax in MDX, and `#chat-history` is not a valid expression. The
  sentence-case slug matches the existing cross-references on `/ai-chat/backend#chat-history`
  and `/ai-chat/features#chat-defer`.
- Fix the dead link to `/ai-chat/skills/overview` in patterns/skills.mdx by
  reframing as descriptive text instead.

Review feedback (CodeRabbit on PR #3226):

- backend.mdx persistence example: add a `<Warning>` callout reminding readers
  to scope every server action by the authenticated user (`where: { userId }`)
  before pasting into a real multi-user app. The example keeps auth out of the
  way to focus on the persistence shape; the warning makes the gap explicit.
- features.mdx `chat.local` example: `userContext.get().userId` was read later
  but `userId` wasn't part of the type or init — added it to both.
- features.mdx preload example: missing `import { useEffect } from "react";`.
- frontend.mdx restoring-on-page-load example: relabel `app/page.tsx` to
  `app/chat/[chatId]/ChatPage.tsx`. The exported `ChatPage` is a client
  component that takes `chatId` as a normal prop — not an App Router page,
  which would receive `params`. Added a comment showing the trivial server-
  component wrapper that awaits `params` and forwards `chatId` into it.

Internal "Phase 1/2/3" language stripped from skills.mdx and changelog.mdx —
roadmap-internal terminology that doesn't mean anything to readers.
---
 docs/ai-chat/backend.mdx         | 6 +++++-
 docs/ai-chat/changelog.mdx       | 2 +-
 docs/ai-chat/features.mdx        | 5 ++++-
 docs/ai-chat/frontend.mdx        | 9 ++++++++-
 docs/ai-chat/patterns/skills.mdx | 8 ++++----
 5 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/docs/ai-chat/backend.mdx b/docs/ai-chat/backend.mdx
index 2e1ae5c30a7..0506d0c88fc 100644
--- a/docs/ai-chat/backend.mdx
+++ b/docs/ai-chat/backend.mdx
@@ -619,6 +619,10 @@ To build a chat app that survives page refreshes, you need to persist two things
 
 #### Full persistence example
 
+<Warning>
+  The example below trusts raw `chatId` and returns rows without filtering by user. In a real multi-user app, **scope every query by the authenticated user** — read the user from your auth/session in each server action and add `where: { userId }` to all `db.chat.*` and `db.chatSession.*` queries. Without that, one client could read or delete another user's chat state, and `getAllSessions()` would leak other users' `publicAccessToken`s. The snippet keeps auth out of the way to focus on the persistence shape.
+</Warning>
+
 <CodeGroup>
 ```ts trigger/chat.ts
 import { chat } from "@trigger.dev/sdk/ai";
@@ -905,7 +909,7 @@ The action payload is validated against `actionSchema` on the backend — invali
   Actions always trigger `run()` — the LLM responds to the modified state. For silent state changes that don't need a response (e.g. injecting background context), use [`chat.inject()`](/ai-chat/background-injection) instead.
 </Note>
 
-### chat.history {#chat-history}
+### Chat history
 
 Imperative API for modifying the accumulated message history. Works from any hook (`onAction`, `onTurnStart`, `onBeforeTurnComplete`, `onTurnComplete`) or from `run()` and AI SDK tools.
 
diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index 482731d948b..8263dbe7326 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -44,7 +44,7 @@ See the [Sessions Upgrade Guide](/ai-chat/upgrade-guide) for the full step-by-st
 
 <Update label="April 19, 2026" description="0.0.0-chat-prerelease-20260419173457" tags={["SDK", "CLI"]}>
 
-## Agent Skills (Phase 1)
+## Agent Skills
 
 Ship reusable capabilities as folders — a `SKILL.md` plus optional scripts, references, and assets. The agent sees short descriptions in its system prompt, loads full instructions on demand via `loadSkill`, and invokes bundled scripts via `bash` — no manual wiring.
 
diff --git a/docs/ai-chat/features.mdx b/docs/ai-chat/features.mdx
index 3b355a57866..55ee44b2bd6 100644
--- a/docs/ai-chat/features.mdx
+++ b/docs/ai-chat/features.mdx
@@ -25,6 +25,7 @@ import { db } from "@/lib/db";
 
 // Declare at module level — each local needs a unique id
 const userContext = chat.local<{
+  userId: string;
   name: string;
   plan: "free" | "pro";
   messageCount: number;
@@ -39,6 +40,7 @@ export const myChat = chat.agent({
       where: { id: clientData.userId },
     });
     userContext.init({
+      userId: clientData.userId,
       name: user.name,
       plan: user.plan,
       messageCount: user.messageCount,
@@ -158,7 +160,7 @@ onTurnComplete: async ({ chatId }) => {
 
 ---
 
-## chat.defer() {#chat-defer}
+## Chat defer
 
 Use `chat.defer()` to run background work in parallel with streaming. The deferred promise runs alongside the LLM response and is awaited (with a 5s timeout) before `onTurnComplete` fires.
 
@@ -426,6 +428,7 @@ Preload eagerly triggers a run for a chat before the first message is sent. This
 Call `transport.preload(chatId)` to start a run early:
 
 ```tsx
+import { useEffect } from "react";
 import { useTriggerChatTransport } from "@trigger.dev/sdk/chat/react";
 import { useChat } from "@ai-sdk/react";
 
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 24d1693f9ec..4025c68aa34 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -125,7 +125,7 @@ On page load, fetch both the messages and the session state from your database,
 
 Because the underlying Session row outlives individual runs, a chat you were in yesterday resumes against the same chat — even if the original run has long since exited. The transport hydrates from the persisted state and uses `lastEventId` to resubscribe; if the client tries to send a new message and no run is alive, the server triggers a fresh continuation run on the same session before the message is appended.
 
-```tsx app/page.tsx
+```tsx app/chat/[chatId]/ChatPage.tsx
 "use client";
 
 import { useEffect, useState } from "react";
@@ -139,6 +139,13 @@ import {
   deleteSession,
 } from "@/app/actions";
 
+// Rendered from `app/chat/[chatId]/page.tsx`, which awaits `params`
+// and forwards `chatId` into this client component:
+//
+//   export default async function Page({ params }: { params: Promise<{ chatId: string }> }) {
+//     const { chatId } = await params;
+//     return <ChatPage chatId={chatId} />;
+//   }
 export default function ChatPage({ chatId }: { chatId: string }) {
   const [initialMessages, setInitialMessages] = useState([]);
   const [initialSession, setInitialSession] = useState(undefined);
diff --git a/docs/ai-chat/patterns/skills.mdx b/docs/ai-chat/patterns/skills.mdx
index 97933587fe7..bca7e8fef41 100644
--- a/docs/ai-chat/patterns/skills.mdx
+++ b/docs/ai-chat/patterns/skills.mdx
@@ -17,7 +17,7 @@ Compared to regular AI SDK tools:
 
 PDFs are the canonical example: you don't want to ask the LLM to parse PDF bytes inline. You want it to `bash scripts/extract.py report.pdf` using a bundled `pdfplumber` wrapper. A skill ships the script, the instructions, and any reference notes together.
 
-Skills are also [dashboard-editable](/ai-chat/skills/overview) in Phase 2 — a platform team can tighten a skill's description or "when to use" text without a redeploy. Phase 1 (today) is SDK-only.
+Dashboard-editable `SKILL.md` is on the roadmap so a platform team can tighten a skill's description or "when to use" text without a redeploy. Today, skills are SDK-only — defined in your task code and shipped with each deploy.
 
 ## Trust model
 
@@ -197,11 +197,11 @@ If you're running `trigger dev`, the same layout appears in the local dev output
 - `bash` runs with `cwd` set to the skill's root. Inside the script, relative paths resolve against the skill directory.
 - Cross-skill access isn't provided — each skill is isolated by design. If two skills need to share data, either duplicate the shared file or consolidate the skills.
 
-## Limitations in Phase 1
+## Current limitations
 
-- `skill.resolve()` (backend-managed overrides) is not available yet. It throws a "not available in Phase 1, use `.local()`" error. Phase 2 ships dashboard-editable `SKILL.md` text.
+- `skill.resolve()` (backend-managed overrides) is not available yet — use `.local()` for now. Dashboard-editable `SKILL.md` is on the roadmap.
 - No per-skill metrics in the dashboard yet.
-- No Anthropic `/v1/skills` integration — use the portable path today; the Anthropic optimization comes in Phase 4.
+- No Anthropic `/v1/skills` integration — use the portable path today; we're tracking the Anthropic optimization separately.
 
 ## Full example
 

From 4cf26164b47560860def6cd9e806c7dcf5de4eb5 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 1 May 2026 14:26:27 +0100
Subject: [PATCH 47/49] Release 0.0.0-chat-prerelease-20260501122331

---
 docs/ai-chat/changelog.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index 8263dbe7326..c82d5ac0110 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,7 +4,7 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
-<Update label="April 24, 2026" description="chat.agent on Sessions" tags={["SDK", "Platform"]}>
+<Update label="April 24, 2026" description="0.0.0-chat-prerelease-20260501122331" tags={["SDK", "Platform"]}>
 
 ## `chat.agent` now runs on Sessions
 

From a682af0d88a567033d17ce049fcdf2ae248b6683 Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 1 May 2026 18:18:17 +0100
Subject: [PATCH 48/49] docs(ai-chat): resilient SSE reconnection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changelog entry for the next prerelease (version placeholder pending
publish) covering the indefinite retry, jittered backoff, force-
reconnect on online/visibilitychange/pageshow, fetch timeout, stall
detector, and 404/410 short-circuit behavior in the chat transport.

Adds a Network resilience subsection in frontend.mdx pointing
customers at the changelog for details — TL;DR is they don't need
to handle network drops, mobile background-kills, or Safari bfcache
restores; the transport recovers automatically.

Refs TRI-8903.
---
 docs/ai-chat/changelog.mdx | 14 ++++++++++++++
 docs/ai-chat/frontend.mdx  |  4 ++++
 2 files changed, 18 insertions(+)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index c82d5ac0110..b069fa1c96e 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,6 +4,20 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
+<Update label="May 1, 2026" description="<NEXT-PRERELEASE>" tags={["SDK"]}>
+
+## Resilient SSE reconnection
+
+The chat transport now retries indefinitely on network drops with bounded exponential backoff (100ms initial, 5s cap, 50% jitter) instead of giving up after 5 attempts. Reconnects are immediate on `online`, on tab refocus after a long background, and on Safari bfcache restore (`pageshow` with `event.persisted`).
+
+A 60s stall detector catches silent-dead-socket cases on mobile where the OS killed the TCP socket without the reader noticing. A 30s per-attempt fetch timeout prevents stuck connections from blocking the retry loop.
+
+Resume continues to use `Last-Event-ID`, so no chunks are lost when the connection comes back. No public API change — these are defaults on `TriggerChatTransport`. Customers who built `hasActiveStream` / `isStreaming` flag tracking on their side can drop it: the transport handles the silent-but-stale case internally now.
+
+`SSEStreamSubscription` (used by `TriggerChatTransport` and `AgentChat`) gained `retryNow()` and `forceReconnect()` for callers writing custom transports, plus options to tune `maxRetries` / `retryDelayMs` / `maxRetryDelayMs` / `retryJitter` / `fetchTimeoutMs` / `stallTimeoutMs` / `nonRetryableStatuses`. `404` and `410` short-circuit retry by default (stream gone / session closed).
+
+</Update>
+
 <Update label="April 24, 2026" description="0.0.0-chat-prerelease-20260501122331" tags={["SDK", "Platform"]}>
 
 ## `chat.agent` now runs on Sessions
diff --git a/docs/ai-chat/frontend.mdx b/docs/ai-chat/frontend.mdx
index 4025c68aa34..75a7d97eb88 100644
--- a/docs/ai-chat/frontend.mdx
+++ b/docs/ai-chat/frontend.mdx
@@ -218,6 +218,10 @@ function ChatClient({ chatId, initialMessages, initialSessions }) {
   will not occur in production builds.
 </Warning>
 
+### Network resilience
+
+You don't need to handle network drops, mobile background-kills, or Safari bfcache restores. The transport retries indefinitely with bounded backoff, reconnects on `online` / tab refocus / `pageshow` with `event.persisted`, and uses `Last-Event-ID` to resume without dropping chunks. See the [changelog entry](/ai-chat/changelog) for the gory details.
+
 ## Client data and metadata
 
 ### Transport-level client data

From b0988037642acdb43bbb44b3cbf39bd4758f59df Mon Sep 17 00:00:00 2001
From: Eric Allam <eallam@icloud.com>
Date: Fri, 1 May 2026 19:44:50 +0100
Subject: [PATCH 49/49] docs(ai-chat): mark SSE reconnection entry as Upcoming

Replace the <NEXT-PRERELEASE> placeholder with explicit Upcoming /
"Pending next chat-prerelease publish" framing so the docs can ship
ahead of the prerelease cut. Update at publish time with the actual
version label.
---
 docs/ai-chat/changelog.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ai-chat/changelog.mdx b/docs/ai-chat/changelog.mdx
index b069fa1c96e..d4c598b834b 100644
--- a/docs/ai-chat/changelog.mdx
+++ b/docs/ai-chat/changelog.mdx
@@ -4,7 +4,7 @@ sidebarTitle: "Changelog"
 description: "Pre-release updates for AI chat agents."
 ---
 
-<Update label="May 1, 2026" description="<NEXT-PRERELEASE>" tags={["SDK"]}>
+<Update label="Upcoming" description="Pending next chat-prerelease publish" tags={["SDK"]}>
 
 ## Resilient SSE reconnection