future-agi · abhijaisrivastava15 · May 6, 2026 · May 6, 2026 · May 7, 2026 · May 7, 2026
diff --git a/public/images/docs/observe/1.png b/public/images/docs/observe/1.png
diff --git a/public/images/docs/observe/2.png b/public/images/docs/observe/2.png
diff --git a/public/images/docs/observe/3.png b/public/images/docs/observe/3.png
diff --git a/public/images/docs/observe/4.png b/public/images/docs/observe/4.png
diff --git a/public/images/docs/observe/5.png b/public/images/docs/observe/5.png
diff --git a/public/images/docs/observe/5.webp b/public/images/docs/observe/5.webp
diff --git a/public/images/docs/observe/alerts-create.png b/public/images/docs/observe/alerts-create.png
diff --git a/public/images/docs/observe/alerts-overview.png b/public/images/docs/observe/alerts-overview.png
diff --git a/public/images/docs/observe/dashboard-add-widget.png b/public/images/docs/observe/dashboard-add-widget.png
diff --git a/public/images/docs/observe/dashboard-overview.png b/public/images/docs/observe/dashboard-overview.png
diff --git a/public/images/docs/observe/dashboard-populated.png b/public/images/docs/observe/dashboard-populated.png
diff --git a/public/images/docs/observe/evals-create.png b/public/images/docs/observe/evals-create.png
diff --git a/public/images/docs/observe/evals-overview.png b/public/images/docs/observe/evals-overview.png
diff --git a/public/images/docs/observe/llm-tracing-agent-graph.png b/public/images/docs/observe/llm-tracing-agent-graph.png
diff --git a/public/images/docs/observe/llm-tracing-agent-path.png b/public/images/docs/observe/llm-tracing-agent-path.png
diff --git a/public/images/docs/observe/llm-tracing-bulk-actions.png b/public/images/docs/observe/llm-tracing-bulk-actions.png
diff --git a/public/images/docs/observe/llm-tracing-date-range.png b/public/images/docs/observe/llm-tracing-date-range.png
diff --git a/public/images/docs/observe/llm-tracing-detail-drawer.png b/public/images/docs/observe/llm-tracing-detail-drawer.png
diff --git a/public/images/docs/observe/llm-tracing-display.png b/public/images/docs/observe/llm-tracing-display.png
diff --git a/public/images/docs/observe/llm-tracing-filter.png b/public/images/docs/observe/llm-tracing-filter.png
diff --git a/public/images/docs/observe/llm-tracing-overview.png b/public/images/docs/observe/llm-tracing-overview.png
diff --git a/public/images/docs/observe/llm-tracing-sessions-tab.png b/public/images/docs/observe/llm-tracing-sessions-tab.png
diff --git a/public/images/docs/observe/llm-tracing-users-tab.png b/public/images/docs/observe/llm-tracing-users-tab.png
diff --git a/public/images/docs/observe/llm-tracing-voice-detail.png b/public/images/docs/observe/llm-tracing-voice-detail.png
diff --git a/public/images/docs/observe/llm-tracing-voice-overview.png b/public/images/docs/observe/llm-tracing-voice-overview.png
diff --git a/public/images/docs/observe/sessions-bulk-actions.png b/public/images/docs/observe/sessions-bulk-actions.png
diff --git a/public/images/docs/observe/sessions-date-range.png b/public/images/docs/observe/sessions-date-range.png
diff --git a/public/images/docs/observe/sessions-detail.png b/public/images/docs/observe/sessions-detail.png
diff --git a/public/images/docs/observe/sessions-display.png b/public/images/docs/observe/sessions-display.png
diff --git a/public/images/docs/observe/sessions-filter.png b/public/images/docs/observe/sessions-filter.png
diff --git a/public/images/docs/observe/sessions-overview.png b/public/images/docs/observe/sessions-overview.png
diff --git a/public/images/docs/observe/sessions-replay-config.png b/public/images/docs/observe/sessions-replay-config.png
diff --git a/public/images/docs/observe/users-date-range.png b/public/images/docs/observe/users-date-range.png
diff --git a/public/images/docs/observe/users-detail.png b/public/images/docs/observe/users-detail.png
diff --git a/public/images/docs/observe/users-display.png b/public/images/docs/observe/users-display.png
diff --git a/public/images/docs/observe/users-filter.png b/public/images/docs/observe/users-filter.png
diff --git a/public/images/docs/observe/users-overview.png b/public/images/docs/observe/users-overview.png
diff --git a/public/images/docs/observe/voice-agent-definitions.png b/public/images/docs/observe/voice-agent-definitions.png
diff --git a/public/images/docs/observe/voice-call-detail.png b/public/images/docs/observe/voice-call-detail.png
diff --git a/public/images/docs/observe/voice-create-form.png b/public/images/docs/observe/voice-create-form.png
diff --git a/public/images/docs/observe/voice-projects-list.png b/public/images/docs/observe/voice-projects-list.png
diff --git a/public/images/docs/observe/voice-tracing-overview.png b/public/images/docs/observe/voice-tracing-overview.png
diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts
@@ -366,12 +366,13 @@ export const tabNavigation: NavTab[] = [
             title: 'Features',
             items: [
               { title: 'Set Up Observability', href: '/docs/observe/features/quickstart' },
-              { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' },
+              { title: 'LLM Tracing', href: '/docs/observe/features/llm-tracing' },
               { title: 'Sessions', href: '/docs/observe/features/session' },
               { title: 'Users', href: '/docs/observe/features/users' },
+              { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' },
+              { title: 'Dashboards', href: '/docs/observe/features/dashboard' },
               { title: 'Alerts & Monitors', href: '/docs/observe/features/alerts' },
               { title: 'Voice Observability', href: '/docs/observe/features/voice' },
-              { title: 'Dashboards', href: '/docs/observe/features/dashboard' },
               {
                 title: 'Manual Tracing',
                 items: [

diff --git a/src/pages/docs/observe/features/llm-tracing.mdx b/src/pages/docs/observe/features/llm-tracing.mdx
@@ -0,0 +1,236 @@
+---
+title: "Tracing"
+description: "See every request your AI app handled — what went in, what came out, how long it took, and where it went wrong."
+---
+
+## About
+
+Every time someone uses your AI app, the platform records that entire request as a **trace**. A trace captures everything: the user's input, every AI call made along the way, the final output, how long each step took, and whether anything failed.
+
+The **Tracing** page is where you come to see all of those recordings in one place. Think of it as a searchable history of every conversation or task your AI has handled.
+
+<img src="/images/docs/observe/llm-tracing-overview.png" alt="Tracing overview" style={{ borderRadius: '5px' }} />
+
+---
+
+## When to use
+
+- **Something went wrong for a user** — Find their exact request and see what your AI said, step by step.
+- **Your app feels slow** — Check the Latency column to see which requests are taking the longest.
+- **You want to see error patterns** — Filter to show only failed requests and spot what they have in common.
+- **You're reviewing how your AI agent thinks** — Open the Agent Graph to see the full decision path it took.
+- **You need to train or test your AI on real data** — Select traces in bulk and add them to a dataset.
+
+---
+
+## Getting around the page
+
+<Steps>
+  <Step title="Open your project">
+    Click **Tracing** in the left sidebar under **Observe**, then click the project you want to look at.
+
+    <img src="/images/docs/observe/llm-tracing-overview.png" alt="Tracing page with project open" style={{ borderRadius: '5px' }} />
+  </Step>
+
+  <Step title="Choose a time window">
+    The date picker in the top-right corner controls how far back you're looking. It defaults to the past 7 days. Click it to change the range.
+
+    <img src="/images/docs/observe/llm-tracing-date-range.png" alt="Date range picker showing Today, Yesterday, Past 7D, Past 30D, Past 3M, Past 6M, Past 12M, Custom range" style={{ borderRadius: '5px' }} />
+
+    Options: Today · Yesterday · Past 7D · Past 30D · Past 3M · Past 6M · Past 12M · or pick a custom date range.
+  </Step>
+
+  <Step title="Find the trace you're looking for">
+    Use **Filter** to search by model, user, error status, or any other property. Use **Display** to quickly show only failed requests or requests that haven't been reviewed yet.
+  </Step>
+
+  <Step title="Click into a trace">
+    Click any row to open it. A side panel slides in showing the full breakdown — every step the AI took, the input and output at each step, and the timing. Use the ↑ ↓ arrow buttons to move between traces without closing the panel.
+  </Step>
+</Steps>
+
+---
+
+## The trace list
+
+The **Trace** tab (the default view) shows one row per request. Here's what each column means:
+
+| Column | What it tells you |
+|---|---|
+| **Trace Name** | The name of the top-level task (e.g. `support_agent.run`). |
+| **Input** | A preview of what the user sent. |
+| **Output** | A preview of what your AI replied. |
+| **Timestamp** | When this request happened. |
+| **Status** | **OK** (green — it worked) or **ERROR** (red — something failed). |
+| **Latency** | Total time from request to response. |
+| **Tokens** | Total number of AI tokens used across this entire request. |
+
+<img src="/images/docs/observe/llm-tracing-bulk-actions.png" alt="Trace list showing Trace Name, Input, Output, Timestamp, Status, Latency, Tokens columns" style={{ borderRadius: '5px' }} />
+
+<Note>
+  If you're using a **voice project**, the columns are different: Call Details, Status, Duration, Avg Latency, Turn Count, and Tokens — because voice calls are measured differently than text exchanges.
+</Note>
+
+---
+
+## Opening a trace (the detail panel)
+
+Click any row to open the detail panel. It splits into two sides:
+
+<img src="/images/docs/observe/llm-tracing-detail-drawer.png" alt="Trace detail panel showing span tree on the left and span details on the right" style={{ borderRadius: '5px' }} />
+
+### Left side — the span tree
+
+This shows every step your AI took to answer the request, in order. Each step (called a **span**) is shown as a row with its name, how long it took, and whether it passed or failed.
+
+For example, a support agent might show:
+- `llm.intent_classification` — the AI figures out what the user wants
+- `tool.check_order_status` — it looks up an order
+- `llm.response_generation` — it writes the reply
+
+Click any step to see its full details on the right side.
+
+### Right side — the step details
+
+When you click a step on the left, the right side shows you everything about it:
+
+- **At the top**: Type, Status, when it started, how long it took, total tokens, prompt tokens, completion tokens, and Cost. For LLM spans, you'll also see the Model name (e.g. `gpt-4o`).
+- **Preview tab**: The exact text that went into this step and the exact text that came out. Below that, a full list of technical attributes like model name, provider, and token counts.
+- **Log View tab**: Raw logs for this step.
+- **Evals tab**: Any quality scores attached to this step.
+- **Annotations tab**: Any notes a human reviewer has added.
+- **Events tab**: Any events that fired during this step.
+
+---
+
+## Filtering
+
+Click **Filter** to narrow down which traces you're looking at.
+
+<img src="/images/docs/observe/llm-tracing-filter.png" alt="Filter panel with AI search bar, Basic and Query tabs, and property list" style={{ borderRadius: '5px' }} />
+
+There are three ways to filter:
+
+- **AI search** — Just describe what you want in plain English, e.g. *"show traces with errors on gpt-4"*, and the filter is built for you automatically.
+- **Basic mode** — Pick a property (like Model or Status), pick a condition (like "is" or "contains"), and enter a value. Add as many as you need — they all apply together.
+- **Query mode** — For technical users who want to write a filter expression directly.
+
+Properties you can filter on include: Trace ID, Trace Name, Span Name, Status, Model, Node Type, User ID, Service / Trace Name, Provider, and Span Kind. You can also filter on eval scores and annotation values.
+
+---
+
+## Display settings
+
+Click **Display** to control how the page looks and what the graph at the top shows.
+
+<img src="/images/docs/observe/llm-tracing-display.png" alt="Display panel open with Graph View tab active and sections for Rows, Columns, Metrics, Group, Graph, Settings" style={{ borderRadius: '5px' }} />
+
+**The three graph views at the top:**
+
+| View | What you see |
+|---|---|
+| **Graph View** | A standard chart showing latency and request volume over time. Good for spotting spikes. |
+| **Agent Graph** | A diagram showing how the AI's steps connect to each other — useful for understanding complex agent flows. |
+| **Agent Path** | A different layout of the same agent flow, showing paths rather than a graph. |
+
+<img src="/images/docs/observe/llm-tracing-agent-graph.png" alt="Agent Graph view — nodes and edges showing span relationships" style={{ borderRadius: '5px' }} />
+
+<img src="/images/docs/observe/llm-tracing-agent-path.png" alt="Agent Path view — path-based visualization of agent flow" style={{ borderRadius: '5px' }} />
+
+**Other settings in this panel:**
+
+- **Rows** — Make each row taller or shorter.
+- **Columns** — Choose which columns to show or hide, and add custom ones.
+- **Metrics** — One-click filters: show only traces that have eval scores, show only errors, or show only traces that haven't been annotated yet.
+- **Group** — Group your traces using the Group by dropdown.
+- **Compare graph** — Overlay a second time period on the graph to compare before and after a change.
+- **Set default for everyone** — Save your current layout as the default view for the whole team.
+
+---
+
+## Bulk actions
+
+To take action on multiple traces at once, tick the checkboxes on the left of each row. A toolbar appears at the top showing how many you've selected.
+
+<img src="/images/docs/observe/llm-tracing-bulk-actions.png" alt="Bulk action bar showing 5 rows selected and the Actions dropdown with Move to dataset, Add tags, Add to annotation queue" style={{ borderRadius: '5px' }} />
+
+Click **Actions** to see what you can do:
+
+| Action | What it does |
+|---|---|
+| **Move to dataset** | Saves these traces to a dataset — useful for testing or fine-tuning your AI. |
+| **Add tags** | Labels all the selected traces at once. Good for organizing by topic, issue type, etc. |
+| **Add to annotation queue** | Sends them to a queue for a human to review and score. |
+
+---
+
+## Saving a view
+
+If you've set up filters or display settings you want to come back to, click the **+** button in the top-right corner to save it as a named view.
+
+Once saved, if you make changes to it, a **Save view** button appears so you can update it. Saved views are shared across your whole team — anyone on the project can see and use them.
+
+---
+
+## Sessions tab
+
+Click the **Sessions** tab to group traces by conversation. Instead of seeing individual requests, you see complete multi-turn conversations with their total stats.
+
+<img src="/images/docs/observe/llm-tracing-sessions-tab.png" alt="Sessions tab" style={{ borderRadius: '5px' }} />
+
+| Column | What it shows |
+|---|---|
+| **Session Id** | A unique ID for this conversation. |
+| **First Message** | The opening message of the conversation. |
+| **Last Message** | The most recent message. |
+| **Duration** | How long the conversation lasted. |
+| **Total Cost** | Combined cost of all AI calls in this conversation. |
+| **Total Traces** | How many individual requests were part of this session. |
+
+For more on sessions, see [Sessions](/docs/observe/features/session).
+
+---
+
+## Users tab
+
+Click the **Users** tab to see activity grouped by individual end users — useful for understanding how different users interact with your AI.
+
+<img src="/images/docs/observe/llm-tracing-users-tab.png" alt="Users tab" style={{ borderRadius: '5px' }} />
+
+| Column | What it shows |
+|---|---|
+| **User ID** | The user's identifier (set via `user.id` in your code). |
+| **First Active** | When this user's first request arrived. |
+| **Last Active** | When their most recent request arrived. |
+| **No. of Traces** | Total number of requests from this user. |
+| **No. of Sessions** | How many conversations this user has had. |
+| **Actions** | Options to view or manage this user's data. |
+
+For more on user tracking, see [Users](/docs/observe/features/users).
+
+---
+
+## Auto-refresh and export
+
+- **Auto refresh** — Toggle this in the header to automatically check for new traces every 10 seconds. The timestamp next to it shows when it last updated.
+- **Manual refresh** — Click the refresh icon anytime to pull the latest data immediately.
+- **Export** — Click the download icon to save the current view as a file.
+
+---
+
+## Next Steps
+
+<CardGroup cols={2}>
+  <Card title="Sessions" icon="table-rows" href="/docs/observe/features/session">
+    Analyze multi-turn conversations grouped by session.
+  </Card>
+  <Card title="Users" icon="user" href="/docs/observe/features/users">
+    View activity and metrics broken down per end user.
+  </Card>
+  <Card title="Run Evals on Traces" icon="chart-line" href="/docs/observe/features/evals">
+    Run automated quality checks on your production traces.
+  </Card>
+  <Card title="Alerts & Monitors" icon="zap" href="/docs/observe/features/alerts">
+    Get notified when metrics cross a threshold.
+  </Card>
+</CardGroup>
diff --git a/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx b/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx
@@ -3,6 +3,8 @@ title: "In-line Evaluations: Attach Evals to Spans in Future AGI"
 description: "Run evaluations directly inside a traced span so results are automatically attached to that span in the Future AGI dashboard."
 ---
 
+{/* MANUAL REVIEW NEEDED: verify the inline eval API (trace_eval=True, evaluator.evaluate()) against the current SDK version before publishing — the SDK interface may have changed */}
+
 ## About
 
 Evaluation results are most useful when they sit next to the data that produced them. Running evals as a separate step means matching results back to specific spans after the fact. In-line evaluations remove that gap by running `evaluator.evaluate()` with `trace_eval=True` inside an active span. The evaluation result is automatically attached to that span as attributes, so both the trace data and the eval score appear together in the dashboard.

diff --git a/src/pages/index.astro b/src/pages/index.astro
@@ -36,7 +36,7 @@ const sections = [
     color: "blue",
     href: "/docs/observe",
     links: [
-      { title: "Quickstart", href: "/docs/observe/quickstart" },
+      { title: "Quickstart", href: "/docs/observe/features/quickstart" },
       { title: "Tracing", href: "/docs/tracing" },
     ]
   },