CopilotKit · jerelvelarde · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.env.example b/.env.example
@@ -1,8 +1,9 @@
-OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
 
-# LLM model — strong models are required for reliable UI generation
-# Recommended: gpt-5.4, gpt-5.4-pro, claude-opus-4-6, gemini-3.1-pro
-LLM_MODEL=gpt-5.4-2026-03-05
+# Claude model — strong models are required for reliable UI generation
+# Recommended: claude-fable-5 (default), claude-opus-4-6
+# Fallback: gpt-* names route to OpenAI (requires OPENAI_API_KEY)
+LLM_MODEL=claude-fable-5
 
 # Rate limiting (per IP) — disabled by default
 RATE_LIMIT_ENABLED=false

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -130,10 +130,34 @@ jobs:
       - name: Run linting
         run: pnpm lint
 
+      - name: Run tests
+        run: pnpm test
+
+  agent-tests:
+    name: Agent tests (pytest)
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+        with:
+          persist-credentials: false
+
+      - name: Install uv
+        run: pip install uv
+
+      - name: Sync agent dependencies
+        working-directory: apps/agent
+        run: uv sync
+
+      - name: Run pytest
+        working-directory: apps/agent
+        run: uv run pytest -q
+
   notify-slack:
     name: Notify Slack on Failure
     runs-on: ubuntu-latest
-    needs: [smoke, lint]
+    needs: [smoke, lint, agent-tests]
     if: |
       failure() &&
       github.event_name == 'schedule'

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -121,7 +121,7 @@ from copilotkit import CopilotKitMiddleware
 from src.todos import todo_tools, AgentState
 
 agent = create_agent(
-    model="gpt-5.2",
+    model="claude-fable-5",
     tools=[*todo_tools, ...],  # manage_todos, get_todos
     middleware=[CopilotKitMiddleware()],
     state_schema=AgentState,  # Defines state shape
@@ -213,7 +213,7 @@ export function TodoList({ todos, onUpdate, isAgentRunning }: TodoListProps) {
 ## Tech Stack
 
 - **Frontend**: Next.js 16, React 19, TailwindCSS 4
-- **Agent**: LangGraph (Python), OpenAI GPT-5.2
+- **Agent**: LangGraph (Python), Anthropic Claude (Fable 5)
 - **CopilotKit**: React hooks for agent integration (v2)
 - **Monorepo**: Turborepo with pnpm workspaces
 - **Other**: MCP (Model Context Protocol) integration, Recharts for generative UI examples
@@ -244,8 +244,8 @@ pnpm lint
 ### Environment Setup
 
 ```bash
-# Set OpenAI API key for the agent
-echo 'OPENAI_API_KEY=your-key-here' > apps/agent/.env
+# Set Anthropic API key for the agent
+echo 'ANTHROPIC_API_KEY=your-key-here' > apps/agent/.env
 ```
 
 ## Design Principles

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -61,10 +61,10 @@ Or manually:
 
 ```bash
 pnpm install
-echo 'OPENAI_API_KEY=your-key-here' > apps/agent/.env
+echo 'ANTHROPIC_API_KEY=your-key-here' > apps/agent/.env
 ```
 
-Then add your real OpenAI API key to `apps/agent/.env`.
+Then add your real Anthropic API key to `apps/agent/.env`.
 
 ### 3) Run the Project
 

diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ install: ## Install all dependencies (Node + Python)
 
 setup: install ## Full setup: install deps and create .env from template
 	@if [ ! -f apps/agent/.env ]; then \
-		echo "OPENAI_API_KEY=your-key-here" > apps/agent/.env; \
+		echo "ANTHROPIC_API_KEY=your-key-here" > apps/agent/.env; \
 		echo "Created apps/agent/.env — add your OpenAI API key"; \
 	else \
 		echo "apps/agent/.env already exists, skipping"; \

diff --git a/README.md b/README.md
@@ -21,19 +21,18 @@ All visuals are rendered in sandboxed iframes with automatic light/dark theming,
 
 ```bash
 make setup    # Install deps + create .env template
-# Edit apps/agent/.env with your real OpenAI API key
+# Edit apps/agent/.env with your real Anthropic API key
 make dev      # Start all services
 ```
 
-> **Strong models required.** Generative UI demands high-capability models that can produce complex, well-structured HTML/SVG in a single pass. Set `LLM_MODEL` in your `.env` to one of:
+> **Strong models required.** Generative UI demands high-capability models that can produce complex, well-structured HTML/SVG in a single pass. The agent runs on Anthropic Claude — `claude-fable-5` by default. Override with `LLM_MODEL` in your `.env`:
 >
-> | Model | Provider |
-> |-------|----------|
-> | `gpt-5.4` / `gpt-5.4-pro` | OpenAI |
-> | `claude-opus-4-6` | Anthropic |
-> | `gemini-3.1-pro` | Google |
+> | Model | Notes |
+> |-------|-------|
+> | `claude-fable-5` | Default |
+> | `claude-opus-4-6` | Strong alternative |
 >
-> Smaller or weaker models will produce broken layouts, missing interactivity, or incomplete visualizations.
+> Setting `LLM_MODEL` to a `gpt-*` name routes to OpenAI instead (requires `OPENAI_API_KEY`). For other providers, swap the chat model in `apps/agent/src/model.py` (see [docs/bring-to-your-app.md](docs/bring-to-your-app.md)). Smaller or weaker models will produce broken layouts, missing interactivity, or incomplete visualizations.
 
 - **App**: http://localhost:3000
 - **Agent**: http://localhost:8123
@@ -127,15 +126,17 @@ Deep agents also provide built-in planning (`write_todos`), filesystem tools, an
 
 1. **User sends a prompt** via the CopilotKit chat UI
 2. **Deep agent decides** whether to respond with text, call a tool, or render a visual component — consulting relevant skills as needed
-3. **`widgetRenderer`** — a frontend `useComponent` hook — receives the agent's HTML and renders it in a sandboxed iframe
-4. **Skeleton loading** shows while the iframe loads, then content fades in smoothly
-5. **ResizeObserver** inside the iframe reports content height back to the parent for seamless auto-sizing
+3. **`generateSandboxedUi`** — the canonical tool the CopilotKit runtime exposes when `openGenerativeUI` is enabled — receives the UI as ordered streaming parameters: `initialHeight` → `placeholderMessages` → `css` → `html` → `jsFunctions` → `jsExpressions`
+4. **`OpenGenerativeUIMiddleware`** in the runtime translates the streaming tool call into `open-generative-ui` activity events the frontend subscribes to
+5. **The demo's activity renderer** (registered via `renderActivityMessages`) shows the html streaming in live — morphing each update into a preview iframe with Idiomorph so nothing flickers — then boots the final websandbox iframe with the shared design-system CSS and CDN importmap injected
+6. **Sandbox bridge + autosize** — the generated UI calls back into the host through Zod-validated `sendPrompt`/`openLink` sandbox functions, and a ResizeObserver inside the iframe continuously reports content height for seamless auto-sizing
 
 ### Key CopilotKit Patterns
 
-| Pattern | Hook | Example |
-|---------|------|---------|
-| Generative UI | `useComponent` | Pie charts, bar charts, widget renderer |
+| Pattern | Hook / Option | Example |
+|---------|---------------|---------|
+| Open Generative UI | `openGenerativeUI` + `renderActivityMessages` | Streaming sandboxed widgets via `generateSandboxedUi` |
+| Generative UI | `useComponent` | Pie charts, bar charts |
 | Frontend tools | `useFrontendTool` | Theme toggle |
 | Human-in-the-loop | `useHumanInTheLoop` | Meeting scheduler |
 | Default tool render | `useDefaultRenderTool` | Tool execution status |

diff --git a/apps/agent/main.py b/apps/agent/main.py
@@ -12,78 +12,26 @@
 from copilotkit import CopilotKitMiddleware, LangGraphAGUIAgent
 from ag_ui_langgraph import add_langgraph_fastapi_endpoint
 from deepagents import create_deep_agent
-from langchain_openai import ChatOpenAI
 
+from src.anthropic_compat import ConsecutiveSystemMessagesMiddleware
 from src.bounded_memory_saver import BoundedMemorySaver
+from src.model import build_model
 from src.query import query_data
 from src.todos import AgentState, todo_tools
 from src.form import generate_form
 from src.plan import plan_visualization
+from src.prompt import SYSTEM_PROMPT
 
 load_dotenv()
 
 agent = create_deep_agent(
-    model=ChatOpenAI(model=os.environ.get("LLM_MODEL", "gpt-5.4-2026-03-05")),
+    model=build_model(),
     tools=[query_data, plan_visualization, *todo_tools, generate_form],
-    middleware=[CopilotKitMiddleware()],
+    middleware=[CopilotKitMiddleware(), ConsecutiveSystemMessagesMiddleware()],
     context_schema=AgentState,
     skills=[str(Path(__file__).parent / "skills")],
     checkpointer=BoundedMemorySaver(max_threads=200),
-    system_prompt="""
-        You are a helpful assistant that helps users understand CopilotKit and LangGraph used together.
-
-        Be brief in your explanations of CopilotKit and LangGraph, 1 to 2 sentences.
-
-        When demonstrating charts, always call the query_data tool to fetch all data from the database first.
-
-        ## Visual Response Skills
-
-        You have the ability to produce rich, interactive visual responses using the
-        `widgetRenderer` component. When a user asks you to visualize, explain visually,
-        diagram, or illustrate something, you MUST use the `widgetRenderer` component
-        instead of plain text.
-
-        The `widgetRenderer` component accepts three parameters:
-        - title: A short title for the visualization
-        - description: A one-sentence description of what the visualization shows
-        - html: A self-contained HTML fragment with inline <style> and <script> tags
-
-        The HTML you produce will be rendered inside a sandboxed iframe that already has:
-        - CSS variables for light/dark mode theming (use var(--color-text-primary), etc.)
-        - Pre-styled form elements (buttons, inputs, sliders look native automatically)
-        - Pre-built SVG CSS classes for color ramps (.c-purple, .c-teal, .c-blue, etc.)
-
-        ## Visualization Workflow (MANDATORY)
-
-        When producing ANY visual response (widgetRenderer, pieChart, barChart), you MUST
-        follow this exact sequence:
-
-        1. **Acknowledge** — Reply with 1-2 sentences of plain text acknowledging the
-           request and setting context for what the visualization will show.
-        2. **Plan** — Call `plan_visualization` with your approach, technology choice,
-           and 2-4 key elements. Keep it concise.
-        3. **Build** — Call the appropriate visualization tool (widgetRenderer, pieChart,
-           or barChart).
-        4. **Narrate** — After the visualization, add 2-3 sentences walking through
-           what was built and offering to go deeper.
-
-        NEVER skip the plan_visualization step. NEVER call widgetRenderer, pieChart, or
-        barChart without calling plan_visualization first.
-
-        ## Visualization Quality Standards
-
-        The iframe has an import map with these ES module libraries — use `<script type="module">` and bare import specifiers:
-        - `three` — 3D graphics. `import * as THREE from "three"`. Also `three/examples/jsm/controls/OrbitControls.js` for camera controls.
-        - `gsap` — animation. `import gsap from "gsap"`.
-        - `d3` — data visualization and force layouts. `import * as d3 from "d3"`.
-        - `chart.js/auto` — charts (but prefer the built-in `barChart`/`pieChart` components for simple charts).
-
-        **3D content**: ALWAYS use Three.js with proper WebGL rendering. Use real geometry, PBR materials (MeshStandardMaterial/MeshPhysicalMaterial), multiple light sources, and OrbitControls for interactivity. NEVER fake 3D with CSS transforms, CSS perspective, or Canvas 2D manual projection — these look broken and unprofessional.
-
-        **Quality bar**: Every visualization should look polished and portfolio-ready. Use smooth animations, proper lighting (ambient + directional at minimum), responsive canvas sizing (`window.addEventListener('resize', ...)`), and antialiasing (`antialias: true`). No proof-of-concept quality.
-
-        **Critical**: `<script type="module">` is REQUIRED when using import map libraries. Regular `<script>` tags cannot use `import` statements.
-    """,
+    system_prompt=SYSTEM_PROMPT,
 )
 
 app = FastAPI()

diff --git a/apps/agent/pyproject.toml b/apps/agent/pyproject.toml
@@ -7,13 +7,22 @@ dependencies = [
     "langchain==1.2.0",
     "langgraph==1.0.7",  # pinned: BoundedMemorySaver relies on MemorySaver.storage internal
     "langsmith>=0.4.49",
-    "openai>=1.68.2,<2.0.0",
     "fastapi>=0.115.5,<1.0.0",
     "uvicorn>=0.29.0,<1.0.0",
     "python-dotenv>=1.0.0,<2.0.0",
-    "langchain-openai>=1.1.0",
-    "copilotkit>=0.1.78",
-    "ag-ui-langgraph==0.0.25",
+    "copilotkit>=0.1.94",
+    "ag-ui-langgraph>=0.0.38",
     "langchain-mcp-adapters>=0.2.1",
     "deepagents>=0.1.0",
+    "langchain-anthropic>=1.4.0",
+    "langchain-openai>=1.3.0",
 ]
+
+[dependency-groups]
+dev = [
+    "pytest>=9.0.3",
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["."]