From 4c15f58b8a1c54a9ad343d107d57fd809c3aa04c Mon Sep 17 00:00:00 2001
From: Baldur Hua <baldur@ai20labs.com>
Date: Mon, 15 Jun 2026 16:00:58 -0400
Subject: [PATCH 1/3] added mcp documentation

---
 docs.json              |   3 +-
 integrations/index.mdx |   7 +
 integrations/mcp.mdx   | 425 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 434 insertions(+), 1 deletion(-)
 create mode 100644 integrations/mcp.mdx
diff --git a/docs.json b/docs.json
index 0bcb7bc..0675342 100644
--- a/docs.json
+++ b/docs.json
@@ -98,7 +98,8 @@
             "group": "Integrations",
             "pages": [
               "integrations/index",
-              "integrations/claude-code-plugin"
+              "integrations/claude-code-plugin",
+              "integrations/mcp"
             ]
           },
           {
diff --git a/integrations/index.mdx b/integrations/index.mdx
index 539ebac..213fa3e 100644
--- a/integrations/index.mdx
+++ b/integrations/index.mdx
@@ -12,6 +12,13 @@ Before you begin, read the [quickstart](/docs/quickstart) to provision an [API k
 ## Editor & Agent Integrations
 
 <CardGroup cols={2}>
+    <Card
+        title="MCP Server"
+        icon="plug"
+        href="/integrations/mcp"
+    >
+        Connect any MCP client to ZeroGPU's nano models over the Model Context Protocol.
+    </Card>
     <Card
         title="Skills + CLI (Claude Code)"
         icon="terminal"
diff --git a/integrations/mcp.mdx b/integrations/mcp.mdx
new file mode 100644
index 0000000..f02aea4
--- /dev/null
+++ b/integrations/mcp.mdx
@@ -0,0 +1,425 @@
+---
+title: "MCP Server"
+description: "Connect any MCP client to ZeroGPU's nano language models over the Model Context Protocol."
+icon: "plug"
+---
+
+The [Model Context Protocol](https://modelcontextprotocol.io) (MCP) is an open standard that lets AI assistants and agents discover and call external tools through a uniform interface. An MCP client (Claude Desktop, Claude Code, Cursor, your own agent runtime, and others) connects to an MCP server, lists the tools it exposes, and invokes them with structured arguments. Because the protocol is transport- and vendor-neutral, one server works across every compliant client without bespoke glue code.
+
+ZeroGPU is an ultra-fast, compute-efficient inference provider for apps and agents. We run purpose-built small and nano language models across an edge-powered network for the high-volume, purpose-specific tasks your app or agent runs constantly. Plug in our OpenAI-compatible API and you're live - zero GPU infrastructure, serverless, auto-scaling by default.
+
+## Overview
+
+This guide connects an MCP client to the hosted ZeroGPU MCP server at `https://mcp.zerogpu.ai/mcp`. The server speaks the streamable HTTP transport and exposes ZeroGPU's edge models (summarization, classification, entity and PII extraction, short chat, and more) as MCP tools, so a host model like Claude can offload cheap, well-defined NLP work instead of doing it itself. By the end you'll be able to authenticate, list the available tools, and call any of them, including a worked example for the `zerogpu_summarize` tool.
+
+## Video walkthrough
+
+Video walkthrough coming soon.
+
+## Quickstart
+
+### Prerequisites
+
+- An MCP-capable client (Claude Desktop, Claude Code, Cursor, or any runtime that speaks MCP over streamable HTTP).
+- A ZeroGPU [API key](https://platform.zerogpu.ai/dashboard) and Project ID.
+- A model ID from the [model catalog](/docs/model-catalog) if you plan to override the default model on `zerogpu_chat`.
+
+### Get your ZeroGPU API key
+
+1. Sign in to the [ZeroGPU dashboard](https://platform.zerogpu.ai/dashboard).
+2. Open **API Keys** and click **Create key**.
+3. Copy the key (starts with `zgpu-api-`) and grab your Project ID (UUID) from the project settings page.
+
+### Connect the server
+
+The ZeroGPU MCP server authenticates with two headers on every request: `x-api-key` (your API key) and `x-project-id` (your Project ID). Most clients let you declare a remote server with custom headers in a config file. Using placeholders:
+
+```json
+{
+  "mcpServers": {
+    "zerogpu": {
+      "type": "http",
+      "url": "https://mcp.zerogpu.ai/mcp",
+      "headers": {
+        "x-api-key": "<YOUR_API_KEY>",
+        "x-project-id": "<YOUR_PROJECT_ID>"
+      }
+    }
+  }
+}
+```
+
+In Claude Code you can register the same server from the terminal:
+
+```bash
+claude mcp add --transport http zerogpu https://mcp.zerogpu.ai/mcp \
+  --header "x-api-key: <YOUR_API_KEY>" \
+  --header "x-project-id: <YOUR_PROJECT_ID>"
+```
+
+Never commit real credentials. Keep them in environment variables or a local, git-ignored config and substitute them at runtime.
+
+### Your first request
+
+Once the server is registered, the client performs the MCP handshake (`initialize`, then `notifications/initialized`) automatically and the ZeroGPU tools become available to the host model. To verify the connection end to end without a client, you can drive the streamable HTTP transport directly with `curl`. First initialize a session and capture the `mcp-session-id` response header:
+
+```bash
+export ZEROGPU_API_KEY="<YOUR_API_KEY>"
+export ZEROGPU_PROJECT_ID="<YOUR_PROJECT_ID>"
+
+curl -sS -D - https://mcp.zerogpu.ai/mcp \
+  -H "Content-Type: application/json" \
+  -H "Accept: application/json, text/event-stream" \
+  -H "x-api-key: $ZEROGPU_API_KEY" \
+  -H "x-project-id: $ZEROGPU_PROJECT_ID" \
+  -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-06-18","capabilities":{},"clientInfo":{"name":"my-client","version":"1.0.0"}}}'
+```
+
+The server replies with its capabilities and returns an `mcp-session-id` header. Pass that session id (plus the same auth headers) on every later call. A quick health check confirms the orchestration API is reachable:
+
+```bash
+curl -sS https://mcp.zerogpu.ai/mcp \
+  -H "Content-Type: application/json" \
+  -H "Accept: application/json, text/event-stream" \
+  -H "x-api-key: $ZEROGPU_API_KEY" \
+  -H "x-project-id: $ZEROGPU_PROJECT_ID" \
+  -H "mcp-session-id: <SESSION_ID>" \
+  -d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"zerogpu_health","arguments":{}}}'
+```
+
+Inside the tool result you'll get back overall and per-component status:
+
+```json
+{
+  "status": "ok",
+  "components": {
+    "worker": { "status": "ok" },
+    "redisDeviceRegistry": { "status": "ok", "error": null },
+    "redisKeys": { "status": "ok", "error": null },
+    "cloudInference": { "status": "ok", "error": null }
+  }
+}
+```
+
+## Usage
+
+The server exposes 11 tools. Every tool name is prefixed with `zerogpu_`, takes a JSON `arguments` object, and returns a single text content block whose body is a JSON string. The list below is the live tool catalog discovered from the server's `tools/list` response; each entry shows what the tool does, which ZeroGPU model or endpoint backs it, its full argument surface, and the shape of the response.
+
+Throughout this section, "client" means any MCP host (Claude, Cursor, your own runtime). When a tool auto-invokes, the host model picks it based on the intent words in your request; you can also call any tool explicitly by name through your client's tool-call interface.
+
+#### `zerogpu_health`
+
+Ping the ZeroGPU Orchestration API to confirm it is reachable and return per-component status. Use it as a pre-flight before a batch of calls, or when other tools start failing.
+
+- **Backed by:** ZeroGPU Orchestration API health endpoint.
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| _(none)_ | - | - | Takes no arguments. |
+
+Returns an object with a top-level `status` and a `components` map (`worker`, `redisDeviceRegistry`, `redisKeys`, `cloudInference`), each `ok` or reporting an error.
+
+#### `zerogpu_summarize`
+
+Summarize, condense, shorten, recap, or TL;DR any text passage (articles, emails, meeting notes, transcripts, docs, chat logs). Runs on a fast small instruct model on the ZeroGPU edge, far cheaper than summarizing the text with the host model.
+
+- **Backed by:** `llama-3.1-8b-instruct-fast`.
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | The passage to summarize (non-empty). |
+| `max_tokens` | optional | model default | Upper bound on summary length, `1` to `2048`. |
+
+Returns `summary`, the `model` used, a `usage` token breakdown, and a `savings` object comparing ZeroGPU cost to a frontier-model baseline. See the full worked example below.
+
+#### `zerogpu_classify_iab`
+
+Classify text into the IAB (Interactive Advertising Bureau) ad-tech taxonomy: news articles, pages, ad creative, or blog posts. Cheaper and purpose-built compared to reasoning topics out with the host model.
+
+- **Backed by:** ZeroGPU IAB classification models.
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | Text to classify (non-empty). |
+| `enriched` | optional | `false` | Pass `true` for the richer, more granular taxonomy (topics, keywords, intent). |
+
+Returns the predicted IAB category, with scores when available.
+
+```json
+{ "name": "zerogpu_classify_iab", "arguments": { "text": "The Lakers signed a new point guard ahead of the playoffs.", "enriched": false } }
+```
+
+#### `zerogpu_classify_zero_shot`
+
+Score text against a flat list of candidate labels you supply at call time, for example "is this email tech, politics, or sports?" or "is this review positive, negative, or neutral?". More consistent than reasoning through the classification with the host model.
+
+- **Backed by:** DeBERTa-v3-small (NLI).
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | Text to classify (non-empty). |
+| `labels` | yes | - | Array of at least 2 candidate label strings. |
+| `threshold` | optional | - | Score cutoff in `[0, 1]` for multi-label filtering. |
+
+Returns per-label scores.
+
+```json
+{ "name": "zerogpu_classify_zero_shot", "arguments": { "text": "I love how fast this laptop boots up.", "labels": ["positive", "negative", "neutral"] } }
+```
+
+#### `zerogpu_classify_structured`
+
+Classify text along several labeled axes at once, for example `{ "sentiment": ["positive","negative"], "topic": ["tech","sports","finance"] }`. Use this instead of `zerogpu_classify_zero_shot` when the labels are grouped into categories rather than a flat list.
+
+- **Backed by:** GLiNER2.
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | Text to classify (non-empty). |
+| `schema` | yes | - | Object mapping each axis name to an array of allowed labels. |
+
+Returns one chosen label per axis.
+
+```json
+{
+  "name": "zerogpu_classify_structured",
+  "arguments": {
+    "text": "Support replied quickly but the fix didn't work.",
+    "schema": { "sentiment": ["positive", "negative", "neutral"], "topic": ["support", "billing", "product"] }
+  }
+}
+```
+
+#### `zerogpu_extract_entities`
+
+Pull named entities out of text: people, places, organizations, dates, products, monetary amounts, or any custom entity types you name.
+
+- **Backed by:** GLiNER2.
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | Source text (non-empty). |
+| `labels` | yes | - | Array of at least 1 entity type, e.g. `["person","company","date"]`. |
+| `threshold` | optional | - | Minimum confidence in `[0, 1]`. |
+
+Returns an entities map keyed by label.
+
+```json
+{
+  "name": "zerogpu_extract_entities",
+  "arguments": { "text": "Apple CEO Tim Cook met Sundar Pichai in Cupertino on Monday.", "labels": ["person", "organization", "location"] }
+}
+```
+
+#### `zerogpu_extract_json`
+
+Extract structured fields out of unstructured text into a JSON object: contact info from a signature, fields from a resume, line items from an invoice, specs from a product page, ticket details. Deterministic on the schema, faster and cheaper than hand-parsing.
+
+- **Backed by:** GLiNER2 (JSON mode).
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | Source text (non-empty). |
+| `schema` | yes | - | Grouped schema: keys are group names, values are arrays of `field::type::desc` specs. |
+
+Returns the extracted object, grouped exactly as the schema declares.
+
+```json
+{
+  "name": "zerogpu_extract_json",
+  "arguments": {
+    "text": "Reach Maria Lopez at maria.lopez@acme.io or 415-555-0188.",
+    "schema": { "contact": ["name::str::Full name", "email::str::Email address", "phone::str::Phone number"] }
+  }
+}
+```
+
+#### `zerogpu_redact_pii`
+
+Redact, mask, scrub, or anonymize personally identifiable information in text: names, phone numbers, emails, addresses, SSN, credit cards, account numbers, dates of birth. Use this before sharing or logging sensitive text, or before forwarding user input to another model you don't want to expose raw PII to.
+
+- **Backed by:** gliner-multi-pii.
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | Source text (non-empty). |
+| `mask` | optional | generic mask | Set to `"label"` to substitute `[NAME]`, `[EMAIL]`, `[PHONE]`-style placeholders. |
+
+Returns the redacted text.
+
+```json
+{ "name": "zerogpu_redact_pii", "arguments": { "text": "Email John Smith at john@acme.com about invoice 12345.", "mask": "label" } }
+```
+
+#### `zerogpu_extract_pii`
+
+Detect what PII is present in text and return it grouped by category (identity, contact, financial, location) without modifying the source. Use this when you need structured data about PII (for redaction policies, audits, or downstream tooling) rather than a masked version.
+
+- **Backed by:** gliner-multi-pii.
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | Source text (non-empty). |
+| `threshold` | optional | - | Minimum confidence in `[0, 1]` to tighten results. |
+| `categories` | optional | all | Array filter, e.g. `["identity","contact"]`. |
+
+Returns the PII grouped by category.
+
+```json
+{ "name": "zerogpu_extract_pii", "arguments": { "text": "Contact Jane Doe at jane@example.com or +1 (415) 555-1212.", "categories": ["identity", "contact"] } }
+```
+
+#### `zerogpu_generate_followups`
+
+Generate follow-up, clarifying, or suggested next questions about a passage: articles, interviews, reports, briefs, customer messages.
+
+- **Backed by:** ZeroGPU edge instruct model.
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `text` | yes | - | The passage to generate questions about (non-empty). |
+
+Returns a list of suggested questions.
+
+```json
+{ "name": "zerogpu_generate_followups", "arguments": { "text": "Our Q3 churn rose to 4.1%, mostly in the SMB segment after the price change." } }
+```
+
+#### `zerogpu_chat`
+
+Short, self-contained generative reply for quick rephrasing, simple Q&A, drafting a short message, or light brainstorming, anything that does not need code, workspace context, or multi-step reasoning. Prefer this over answering the turn with the host model when host-model capability isn't required.
+
+- **Backed by:** LFM2.5-1.2B-Instruct (Thinking variant when `thinking=true`).
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `messages` | yes | - | Array of `{ role, content }` turns. `role` is one of `system`, `user`, `assistant`, `tool`. |
+| `thinking` | optional | `false` | Route to the Thinking variant; the reasoning trace is returned separately. |
+| `model` | optional | `LFM2.5-1.2B-Instruct` | Override the model id (see the [model catalog](/docs/model-catalog)). |
+| `max_tokens` | optional | model default | Upper bound on the reply, `1` to `4096`. |
+| `temperature` | optional | model default | Sampling temperature in `[0, 2]`. |
+
+Returns the assistant reply (plus a separate reasoning trace when `thinking=true`).
+
+```json
+{
+  "name": "zerogpu_chat",
+  "arguments": { "messages": [{ "role": "user", "content": "Explain WebSockets in two sentences." }] }
+}
+```
+
+### Example: the `zerogpu_summarize` tool
+
+The simplest end-to-end call is summarization. In an MCP client, just ask the host model to summarize a passage and it auto-invokes `zerogpu_summarize`. To call it explicitly over the raw transport, send a `tools/call` request with the session id and auth headers from the handshake:
+
+```bash
+curl -sS https://mcp.zerogpu.ai/mcp \
+  -H "Content-Type: application/json" \
+  -H "Accept: application/json, text/event-stream" \
+  -H "x-api-key: $ZEROGPU_API_KEY" \
+  -H "x-project-id: $ZEROGPU_PROJECT_ID" \
+  -H "mcp-session-id: <SESSION_ID>" \
+  -d '{
+    "jsonrpc": "2.0",
+    "id": 3,
+    "method": "tools/call",
+    "params": {
+      "name": "zerogpu_summarize",
+      "arguments": {
+        "text": "The board met Thursday to review Q3 results. Revenue rose 18% year-over-year to $42M, driven mainly by enterprise renewals and a strong launch in the EU market. Operating margin slipped to 11% from 14% as headcount grew 30% ahead of the new data-center buildout. The CFO flagged rising cloud costs as the top risk for Q4 and proposed a hiring freeze on non-engineering roles until margins recover.",
+        "max_tokens": 120
+      }
+    }
+  }'
+```
+
+The tool result is a text content block whose body parses to:
+
+```json
+{
+  "summary": "The board reviewed Q3 results, with revenue increasing 18% to $42M due to enterprise renewals and a strong EU launch, but operating margin dropped to 11% from 14% due to a 30% headcount growth ahead of the data-center buildout. The CFO identified rising cloud costs as a top risk for Q4 and proposed a hiring freeze on non-engineering roles until margins recover.",
+  "model": "llama-3.1-8b-instruct-fast",
+  "usage": {
+    "prompt_tokens": 158,
+    "completion_tokens": 91,
+    "total_tokens": 249
+  },
+  "savings": {
+    "zerogpu_cost_usd": 0.000034,
+    "baseline_cost_usd": 0.001839,
+    "savings_usd": 0.001805,
+    "price_table_version": "2026-05-26-2"
+  }
+}
+```
+
+Every generative tool returns a `usage` and `savings` block like this, so you can measure exactly how much offloading the task to ZeroGPU saved versus running it on a frontier model.
+
+### Patterns and recipes
+
+**Sanitize before the host model sees raw input.** Pipe untrusted text through `zerogpu_redact_pii` first when you don't want personal data captured in the host model's transcript or sent to downstream LLMs. Combine with `zerogpu_extract_pii` if you also need an audit log of what was masked.
+
+**Cheap router in front of the host model.** Use `zerogpu_classify_zero_shot` or `zerogpu_classify_structured` to triage an incoming message (bug / feature / question, urgent / normal, in-scope / out-of-scope) and only escalate the hard cases to the host model. The classifier call costs orders of magnitude less than a full host-model turn.
+
+**Structured extraction over free-form parsing.** For semi-structured text (signatures, invoices, contact blocks), prefer `zerogpu_extract_json` over asking the host model to "parse this into JSON." It's deterministic on the schema, faster, and cheaper. Keep field descriptions short and specific, since the description is what the model uses to find the span.
+
+**Health-check before a batch.** Call `zerogpu_health` once before fanning out a large run of tool calls. If a component reports an error, back off rather than failing each call individually.
+
+### Tool reference table
+
+Every tool at a glance.
+
+| Tool | Purpose | Required arguments |
+| --- | --- | --- |
+| `zerogpu_health` | Confirm the orchestration API is reachable | _(none)_ |
+| `zerogpu_summarize` | Summarize / TL;DR a text passage | `text` |
+| `zerogpu_classify_iab` | IAB ad-tech taxonomy classification | `text` |
+| `zerogpu_classify_zero_shot` | Score text against a flat label list | `text`, `labels` |
+| `zerogpu_classify_structured` | Multi-axis classification by schema | `text`, `schema` |
+| `zerogpu_extract_entities` | Named-entity recognition | `text`, `labels` |
+| `zerogpu_extract_json` | Extract structured fields into JSON | `text`, `schema` |
+| `zerogpu_redact_pii` | Mask / scrub PII in text | `text` |
+| `zerogpu_extract_pii` | Detect PII grouped by category | `text` |
+| `zerogpu_generate_followups` | Suggest follow-up questions | `text` |
+| `zerogpu_chat` | Short small-model chat reply | `messages` |
+
+## Troubleshooting
+
+**`missing required headers: x-api-key and x-project-id`** - the request reached the server without both auth headers. Make sure your client config sets `x-api-key` and `x-project-id`, and that they're sent on the `initialize` call as well as every later request.
+
+**`Request failed with status 401`** - your API key is missing, revoked, or mistyped. Rotate the key in the [dashboard](https://platform.zerogpu.ai/dashboard) and update your client config. Keys must start with `zgpu-api-`.
+
+**`Request failed with status 403`** - the key is valid but doesn't have access to the project, or the project doesn't have access to a requested model. Confirm your `x-project-id` matches the project that owns the key.
+
+**`Request failed with status 429`** - you're being rate-limited. Back off and retry with exponential delay, or move bulk workloads to the [Batch API](/docs/batch), which has separate quotas tuned for high-volume jobs.
+
+**Tools don't appear in the client.** The client never completed the MCP handshake. Confirm the transport is set to streamable HTTP (`type: "http"`), the URL is exactly `https://mcp.zerogpu.ai/mcp`, and the client supports remote MCP servers. Restart or reload the client after editing its config.
+
+**`Bad Request` or session errors on follow-up calls.** Every call after `initialize` must include the `mcp-session-id` returned by the handshake. If you're driving the transport by hand, capture that response header and resend it (along with the auth headers) on each request.
+
+**Empty or low-confidence extraction / classification.** Lower `threshold` to surface more candidates, or check that your label set matches the language of the source text (the underlying models are English-tuned for most label sets). Very short inputs (one or two words) yield lower confidence across the board.
+
+**Schema-shaped tools reject the arguments.** `zerogpu_extract_json` and `zerogpu_classify_structured` expect `schema` as a JSON object whose values are arrays. For `zerogpu_extract_json`, each array item is a `field::type::desc` string; for `zerogpu_classify_structured`, each array is a list of allowed labels. A flat array or a stringified schema will be rejected.
+
+**`zerogpu_chat` returns an empty or off-topic reply.** It's backed by a 1.2B model and is meant for short, self-contained turns. For anything needing code, workspace context, or multi-step reasoning, keep the work on the host model. Set `thinking=true` for short logic or math turns where a reasoning trace helps.
+
+**Wrong tool auto-invoked.** The host model picks based on phrasing. Rephrase to remove trigger words ("summarize", "classify", "redact", "extract"), or call the intended tool explicitly through your client's tool interface.
+
+## Conclusion
+
+The ZeroGPU MCP server turns every edge model into a first-class tool that any MCP client can call, so a host model can hand off summarization, classification, extraction, and short chat to a cheaper, faster model without leaving the conversation. It's a fast way to keep raw PII out of the host model's context, cut token spend on well-defined NLP work, and measure the savings on every call.
+
+<CardGroup cols={2}>
+  <Card title="Model Catalog" icon="layer-group" href="/docs/model-catalog">
+    Browse every model the MCP tools route to.
+  </Card>
+  <Card title="API Reference" icon="code" href="/api-reference/responses">
+    Explore the full OpenAI-compatible API surface.
+  </Card>
+  <Card title="Cookbook" icon="book" href="/cookbook/index">
+    Worked examples for classification, extraction, and batch jobs.
+  </Card>
+  <Card title="Join Discord" icon="discord" href="https://discord.gg/Ad5KZvAyky">
+    Ask questions and share what you're building.
+  </Card>
+</CardGroup>

From 09cda32e7834cbf79a5b54bde8ac427c46bf33fa Mon Sep 17 00:00:00 2001
From: Baldur Hua <baldur@ai20labs.com>
Date: Mon, 15 Jun 2026 18:25:59 -0400
Subject: [PATCH 2/3] Update docs.json

---
 docs.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs.json b/docs.json
index f5c6a47..cf094d4 100644
--- a/docs.json
+++ b/docs.json
@@ -103,8 +103,8 @@
             "pages": [
               "integrations/index",
               "integrations/claude-code-plugin",
-              "integrations/mcp",
-              "integrations/langchain"
+              "integrations/langchain",
+              "integrations/mcp"
             ]
           },
           {

From d273616cfed5bb8fc2160ccd2278b11b7670ed20 Mon Sep 17 00:00:00 2001
From: Baldur Hua <baldur@ai20labs.com>
Date: Tue, 16 Jun 2026 16:01:38 -0400
Subject: [PATCH 3/3] added cookbook for classify content demo

---
 cookbook/claude-iab-targeted-copy.mdx | 416 ++++++++++++++++++++++++++
 cookbook/index.mdx                    |   3 +
 docs.json                             |   3 +-
 3 files changed, 421 insertions(+), 1 deletion(-)
 create mode 100644 cookbook/claude-iab-targeted-copy.mdx

diff --git a/cookbook/claude-iab-targeted-copy.mdx b/cookbook/claude-iab-targeted-copy.mdx
new file mode 100644
index 0000000..76b1f64
--- /dev/null
+++ b/cookbook/claude-iab-targeted-copy.mdx
@@ -0,0 +1,416 @@
+---
+title: "🎯 Turn Raw Content into Targeted Copy with the IAB Classifier and Claude"
+description: "Use Claude to call ZeroGPU's zlm-v1-iab-classify-edge model, then turn the returned IAB categories, audience segments, and confidence scores into three targeted outputs: an ad brief, a newsletter blurb, and a content pitch."
+---
+
+This notebook shows how to combine ZeroGPU's `zlm-v1-iab-classify-edge` model with Claude to turn one raw article into three pieces of targeted copy. Claude reads your article, calls the IAB classifier as a tool to get hard signals (IAB content categories, audience segments, and confidence scores), then uses those signals to write an Ad Brief, a Newsletter Blurb, and a Content Pitch, each in a distinct voice. By pairing ZeroGPU's edge classifier with Claude's writing, this notebook walks you through a transparent, reproducible pipeline where the classification numbers visibly drive every line of copy.
+
+For the full reference, see the [zlm-v1-iab-classify-edge model card](/api-reference/models/zlm-v1-iab-classify-edge).
+
+In this notebook, you'll explore:
+
+- **ZeroGPU IAB Classifier (`zlm-v1-iab-classify-edge`)**: An edge-served model that maps any text straight to the IAB Content Taxonomy, returning content categories, audience segments, and a per-label confidence score in a single call. No instructions or prompt engineering needed, you send text and get structured signals back. See the [model card](/api-reference/models/zlm-v1-iab-classify-edge).
+- **ZeroGPU**: An ultra-fast, compute-efficient inference provider for apps and agents. We run purpose-built small and nano language models across an edge-powered network for the high-volume, purpose-specific tasks your app or agent runs constantly. Plug in our OpenAI-compatible API and you're live - zero GPU infrastructure, serverless, auto-scaling by default.
+- **Claude**: Anthropic's frontier model family. Here, `claude-opus-4-8` orchestrates the pipeline: it calls the classifier as a tool, reads the returned IAB categories, audience segments, and confidence scores, and writes three distinct outputs grounded in those numbers.
+
+This setup not only demonstrates a practical application of signal-driven copywriting, but also provides a flexible framework that can be adapted to other real-world scenarios requiring classification signals to steer content generation.
+
+## 🎥 Watch the Video Guide
+
+Video walkthrough coming soon.
+
+## 📦 Installation
+
+First, install the two packages this pipeline needs: the official Anthropic SDK to drive Claude, and `requests` to call ZeroGPU's classifier over its REST surface:
+
+```bash
+!pip install anthropic requests
+```
+
+The IAB classifier is reached through ZeroGPU's OpenAI-compatible API at `https://api.zerogpu.ai/v1`, so no extra SDK is required for it. For the request and response shape, see the [model card](/api-reference/models/zlm-v1-iab-classify-edge).
+
+## 🔑 Setting Up API Keys
+
+You'll need to set up your keys for both ZeroGPU (to call the IAB classifier) and Anthropic (to run Claude). This ensures both services can be reached securely without re-prompting.
+
+You can go to [here](https://platform.zerogpu.ai/dashboard) to get an API key and Project ID from ZeroGPU. The key starts with `zgpu-api-` and the Project ID (UUID) is on the project settings page.
+
+```python Python
+import os
+from getpass import getpass
+
+# Prompt for the ZeroGPU API key and Project ID securely
+zerogpu_api_key = getpass('Enter your ZeroGPU API key: ')
+os.environ["ZEROGPU_API_KEY"] = zerogpu_api_key
+
+zerogpu_project_id = getpass('Enter your ZeroGPU Project ID: ')
+os.environ["ZEROGPU_PROJECT_ID"] = zerogpu_project_id
+```
+
+You can go to [here](https://console.anthropic.com/) to get an API key from Anthropic.
+
+```python Python
+# Prompt for the Anthropic API key securely
+anthropic_api_key = getpass('Enter your Anthropic API key: ')
+os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
+```
+
+The Anthropic client reads `ANTHROPIC_API_KEY` from the environment automatically, and the classifier call reads `ZEROGPU_API_KEY` and `ZEROGPU_PROJECT_ID` from the same place. No keys are ever written into the prompt or the outputs.
+
+## 🏷️ Access IAB Classification with ZeroGPU
+
+ZeroGPU is an ultra-fast, compute-efficient inference provider for apps and agents. We run purpose-built small and nano language models across an edge-powered network for the high-volume, purpose-specific tasks your app or agent runs constantly. Plug in our OpenAI-compatible API and you're live - zero GPU infrastructure, serverless, auto-scaling by default. In this section, we will classify a short snippet of content against the IAB Content Taxonomy as a standalone example.
+
+`zlm-v1-iab-classify-edge` maps your input directly to the taxonomy, so you only send `input` and get back `audience` segments and `content` categories (in both `iab_1_0` and `iab_2_2` versions), each with a confidence `score`.
+
+```python Python
+import os
+import requests
+
+def classify_iab(content: str) -> dict:
+    """Classify content against the IAB Content Taxonomy with ZeroGPU."""
+    resp = requests.post(
+        "https://api.zerogpu.ai/v1/responses",
+        headers={
+            "x-api-key":    os.environ["ZEROGPU_API_KEY"],
+            "x-project-id": os.environ["ZEROGPU_PROJECT_ID"],
+            "content-type": "application/json",
+        },
+        json={
+            "model": "zlm-v1-iab-classify-edge",
+            "input": content,
+        },
+    )
+    resp.raise_for_status()
+    return resp.json()
+
+signals = classify_iab(
+    "The Arc 2 smartwatch tracks heart-rate variability, blood oxygen, and "
+    "sleep stages on-device, with a nine-day battery and a $249 price."
+)
+
+import json
+print(json.dumps(signals, indent=2))
+```
+
+```
+{
+  "audience": [
+    { "name": "Technology & Computing", "score": 0.7821 },
+    { "name": "Consumer Electronics",   "score": 0.7402 },
+    { "name": "Healthy Living",          "score": 0.6233 },
+    { "name": "25-34",                   "score": 0.5990 }
+  ],
+  "content": {
+    "iab_1_0": [
+      { "name": "Wearable Technology",   "score": 0.7689 },
+      { "name": "Technology & Computing", "score": 0.7421 }
+    ],
+    "iab_2_2": [
+      { "name": "Wearable Technology",   "score": 0.7689 },
+      { "name": "Smartphones",            "score": 0.7115 },
+      { "name": "Technology & Computing", "score": 0.6902 }
+    ]
+  }
+}
+```
+
+🎉 **ZeroGPU effortlessly turns a sentence into IAB categories, audience segments, and confidence scores in one call, the structured signal layer your copy pipeline runs on!**
+
+## 🎯 Turn One Article into Three Targeted Outputs
+
+_This section hands Claude a full article and one plain prompt, lets it call the ZeroGPU classifier as a tool, and turns the returned signals into three clearly different pieces of copy._
+
+The whole pipeline runs from a single, copy-pasteable prompt. There are no hidden steps: you paste your article in place of `[article text]` and Claude does the rest.
+
+```text
+Here is my content: [article text]
+Classify it and return:
+1. Ad Brief
+2. Newsletter Blurb
+3. Content Pitch
+```
+
+For the classifier to be more than decoration, Claude has to actually call it and then let the numbers steer the writing. That behavior lives in a system prompt that is shown here in full, nothing is tucked away:
+
+```python Python
+SYSTEM_PROMPT = """You are a content strategist.
+
+For every article you are given, follow these steps in order:
+
+1. Call the `classify_iab` tool with the article text. It returns IAB content
+   categories, audience segments, and a confidence score (0-1) for each.
+
+2. Use those signals explicitly to write THREE outputs. The highest-confidence
+   content category and audience segments must visibly shape each one, and you
+   should name the categories, segments, and scores you leaned on.
+
+   - Ad Brief        -> persuasive, conversion-focused. Lead with the target
+                        segments, a sharp hook, and a clear call to action.
+   - Newsletter Blurb -> editorial, engaging. A short, readable paragraph with
+                        a headline, written for a curious reader, not a buyer.
+   - Content Pitch   -> strategic, high-level. An angle, a "why now", the
+                        audience it serves, and where it should be distributed.
+
+Format the response with these exact section markers and nothing before them:
+=== AD BRIEF ===
+=== NEWSLETTER BLURB ===
+=== CONTENT PITCH ===
+"""
+```
+
+Claude reaches the classifier through a single tool. The tool's executor is the same `classify_iab` function from the section above, so the model never touches your ZeroGPU keys, it just asks for a classification and gets the structured result back.
+
+```python Python
+CLASSIFY_TOOL = {
+    "name": "classify_iab",
+    "description": (
+        "Classify content against the IAB Content Taxonomy using ZeroGPU's "
+        "zlm-v1-iab-classify-edge model. Returns IAB content categories, "
+        "audience segments, and confidence scores. Call this before writing "
+        "any copy."
+    ),
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "content": {
+                "type": "string",
+                "description": "The raw article or content text to classify.",
+            },
+        },
+        "required": ["content"],
+    },
+}
+```
+
+Now wire it together. Claude is asked the question, calls the tool, the tool runs the ZeroGPU classification, the result is handed back, and Claude writes the three outputs. This is a standard tool-use loop on `claude-opus-4-8`:
+
+```python Python
+import json
+import anthropic
+
+client = anthropic.Anthropic()  # reads ANTHROPIC_API_KEY from the environment
+
+def generate_copy(article: str) -> tuple[dict, str]:
+    """Run the article through Claude + the ZeroGPU classifier."""
+    messages = [{
+        "role": "user",
+        "content": (
+            f"Here is my content: {article}\n\n"
+            "Classify it and return:\n"
+            "1. Ad Brief\n"
+            "2. Newsletter Blurb\n"
+            "3. Content Pitch"
+        ),
+    }]
+
+    signals = None
+    while True:
+        resp = client.messages.create(
+            model="claude-opus-4-8",
+            max_tokens=2000,
+            system=SYSTEM_PROMPT,
+            tools=[CLASSIFY_TOOL],
+            messages=messages,
+        )
+
+        if resp.stop_reason != "tool_use":
+            break
+
+        # Echo the assistant turn (including the tool_use block) back into history.
+        messages.append({"role": "assistant", "content": resp.content})
+
+        tool_results = []
+        for block in resp.content:
+            if block.type == "tool_use" and block.name == "classify_iab":
+                signals = classify_iab(block.input["content"])  # ZeroGPU call
+                tool_results.append({
+                    "type": "tool_result",
+                    "tool_use_id": block.id,
+                    "content": json.dumps(signals),
+                })
+        messages.append({"role": "user", "content": tool_results})
+
+    final_text = next(b.text for b in resp.content if b.type == "text")
+    return signals, final_text
+```
+
+For the example, here is one real article, a product announcement, passed in as `article`:
+
+```python Python
+article = (
+    "Pulse Labs today unveiled the Arc 2, a smartwatch that pushes continuous "
+    "health monitoring further than anything in its price class. The Arc 2 "
+    "tracks heart-rate variability, blood-oxygen saturation, skin temperature, "
+    "and sleep stages around the clock, then surfaces the trends in a "
+    "redesigned companion app. A new on-device chip runs the analysis locally, "
+    "so most insights appear without ever leaving the wrist. Battery life "
+    "climbs to nine days on a single charge, and the titanium case shaves four "
+    "grams off the previous model. At $249, the Arc 2 undercuts every flagship "
+    "wearable while matching their sensor suite, a bet that the next wave of "
+    "buyers cares more about daily health signals than about another screen to "
+    "check."
+)
+
+signals, outputs = generate_copy(article)
+print(json.dumps(signals, indent=2))
+```
+
+The classifier returns the signals Claude builds on:
+
+```
+{
+  "audience": [
+    { "name": "Technology & Computing", "score": 0.7821 },
+    { "name": "Consumer Electronics",   "score": 0.7402 },
+    { "name": "Healthy Living",          "score": 0.6233 },
+    { "name": "25-34",                   "score": 0.5990 }
+  ],
+  "content": {
+    "iab_1_0": [
+      { "name": "Wearable Technology",   "score": 0.7689 },
+      { "name": "Technology & Computing", "score": 0.7421 }
+    ],
+    "iab_2_2": [
+      { "name": "Wearable Technology",   "score": 0.7689 },
+      { "name": "Smartphones",            "score": 0.7115 },
+      { "name": "Technology & Computing", "score": 0.6902 }
+    ]
+  }
+}
+```
+
+And here are the three outputs Claude writes from them. Notice how each one names the categories and segments it leaned on, and how the voice shifts between them:
+
+```python Python
+print(outputs)
+```
+
+```
+=== AD BRIEF ===
+Campaign: Pulse Labs Arc 2 launch
+Lead segments: Technology & Computing (0.78), Consumer Electronics (0.74),
+Healthy Living (0.62), ages 25-34 (0.60)
+Buy against: Wearable Technology (0.77) and Consumer Electronics inventory,
+programmatic display + paid social.
+
+Hook: Flagship health sensors. Nine-day battery. $249.
+Body: The Arc 2 tracks HRV, blood oxygen, skin temperature, and sleep on-device,
+so the insight lands on your wrist, not in the cloud. The same sensor suite the
+flagships charge double for. Built for the 25-34 buyer who tracks their health
+daily and does not want to pay a flagship tax to do it.
+CTA: Pre-order the Arc 2 - ships next week.
+
+=== NEWSLETTER BLURB ===
+The wearable that bets on your health, not your attention
+
+Most smartwatches want one more glance from you. Pulse Labs went the other way.
+The new Arc 2 reads heart-rate variability, blood oxygen, skin temperature, and
+sleep through the night, runs the analysis on a new on-device chip, and only
+surfaces what matters. Nine days of battery, a lighter titanium case, and a $249
+price that quietly undercuts every flagship. One for the Healthy Living crowd
+who want the sensor suite without the screen-time.
+
+=== CONTENT PITCH ===
+Working title: "The $249 line in the sand for health wearables"
+
+Angle: Pulse Labs is reframing the category around daily health signal rather
+than notifications - on-device analysis, longer battery, flagship sensors at
+half the price.
+Why now: The Arc 2 launch gives a concrete hook for a Wearable Technology piece
+(top category, 0.77) that the Technology & Computing audience (0.78) already
+follows closely.
+Audience: 25-34, health-conscious tech buyers (Healthy Living, 0.62) who weigh
+sensor quality against price.
+Distribution: Lead on consumer-tech and wearables verticals; cross-post to
+health-and-fitness newsletters where the Healthy Living segment over-indexes.
+```
+
+🎉 From one article and one plain prompt, the classifier produced hard signals and Claude turned them into a persuasive ad brief, an editorial blurb, and a strategic pitch, each visibly anchored to the same IAB categories and audience scores.
+
+## 🧩 Plug In Your Content Source and Output Destinations
+
+The example above passes in a hard-coded `article` and prints the outputs. In production you'll want to pull content from somewhere and push the results somewhere. The end-to-end function below marks exactly where those two seams go, so you can drop in an RSS feed, a CMS, or a scraper on the input side, and an ads platform, an email tool, or a CMS on the output side.
+
+```python Python
+import json
+import re
+
+def parse_outputs(text: str) -> dict:
+    """Split Claude's response into the three labeled sections."""
+    parts = re.split(r"=== (AD BRIEF|NEWSLETTER BLURB|CONTENT PITCH) ===", text)
+    # parts = ['', 'AD BRIEF', '<body>', 'NEWSLETTER BLURB', '<body>', ...]
+    sections = {}
+    for label, body in zip(parts[1::2], parts[2::2]):
+        sections[label.lower().replace(" ", "_")] = body.strip()
+    return sections
+
+def run_pipeline(article: str) -> dict:
+    """Content in -> classified -> three targeted outputs out."""
+    signals, raw = generate_copy(article)        # 2. classify + 3. generate
+    return {
+        "signals": signals,
+        "outputs": parse_outputs(raw),
+    }
+
+# 1. SENDING CONTENT -------------------------------------------------------
+# Plug your content source in here. `article` just needs to be a string.
+#   - RSS:     feedparser.parse(feed_url).entries[0].summary
+#   - CMS:     cms_client.get_post(post_id)["body"]
+#   - Scraper: trafilatura.extract(requests.get(url).text)
+article = "...your article text..."
+
+result = run_pipeline(article)
+
+# 4. SENDING OUTPUTS -------------------------------------------------------
+# Route each section to where it belongs.
+#   - Ad Brief        -> ads_platform.create_brief(result["outputs"]["ad_brief"])
+#   - Newsletter Blurb -> email_tool.add_block(result["outputs"]["newsletter_blurb"])
+#   - Content Pitch   -> cms_client.create_draft(result["outputs"]["content_pitch"])
+print(json.dumps(result["outputs"], indent=2))
+```
+
+```
+{
+  "ad_brief": "Campaign: Pulse Labs Arc 2 launch\nLead segments: ...",
+  "newsletter_blurb": "The wearable that bets on your health, not your attention\n\n...",
+  "content_pitch": "Working title: \"The $249 line in the sand for health wearables\"\n\n..."
+}
+```
+
+Because the signals come back as plain JSON, you can also branch on them before any copy is written, for example only running the newsletter blurb when a target segment clears a confidence threshold:
+
+```python Python
+top_segment = result["signals"]["audience"][0]
+if top_segment["score"] >= 0.6:
+    # email_tool.add_block(result["outputs"]["newsletter_blurb"])
+    print(f"Routing to newsletter: {top_segment['name']} ({top_segment['score']})")
+```
+
+```
+Routing to newsletter: Technology & Computing (0.7821)
+```
+
+🎉 The same three-output pipeline now reads from your content source and writes to your channels, with the IAB scores available as routing logic at every step.
+
+## 🚀 Go Deeper
+
+This pipeline is a foundation. A few directions to extend it:
+
+- **Content monetisation**: Attach the returned IAB categories to each article as ad-targeting metadata, then use the confidence scores to pick the highest-value inventory automatically. Pages that classify cleanly into premium categories (for example Consumer Electronics at high confidence) can be routed to higher-CPM ad slots, while low-confidence pages fall back to broad inventory.
+- **Personalization pipelines**: Run every incoming article through the classifier, then store the `audience` segments per piece. When a reader arrives, match their on-site behavior to those segments and let Claude rewrite the same blurb in the voice that segment responds to, turning one article into many audience-specific variants.
+- **Audience segmentation workflows**: Batch-classify your whole content library and cluster pieces by their `iab_2_2` categories and audience segments. The clusters become ready-made segments for email lists, ad campaigns, and editorial calendars, and Claude can draft a tailored pitch for each cluster from its dominant signals.
+
+For high-volume runs, pair this with the [ZeroGPU Batch API](/docs/batch/getting-started) to classify thousands of articles in one asynchronous job before the copy step.
+
+## 🌟 Highlights
+
+This notebook has guided you through combining ZeroGPU's IAB classifier with Claude to turn one article into three targeted outputs driven by real classification signals. You can adapt and expand this example for various other scenarios requiring classification signals to steer content generation.
+
+Key tools utilized in this notebook include:
+
+- **ZeroGPU IAB Classifier (`zlm-v1-iab-classify-edge`)**: An edge-served model that maps any text straight to the IAB Content Taxonomy, returning content categories, audience segments, and a per-label confidence score in a single call. No instructions or prompt engineering needed, you send text and get structured signals back. See the [model card](/api-reference/models/zlm-v1-iab-classify-edge).
+- **ZeroGPU**: An ultra-fast, compute-efficient inference provider for apps and agents. We run purpose-built small and nano language models across an edge-powered network for the high-volume, purpose-specific tasks your app or agent runs constantly. Plug in our OpenAI-compatible API and you're live - zero GPU infrastructure, serverless, auto-scaling by default.
+- **Claude**: Anthropic's frontier model family. Here, `claude-opus-4-8` orchestrates the pipeline: it calls the classifier as a tool, reads the returned IAB categories, audience segments, and confidence scores, and writes three distinct outputs grounded in those numbers.
+
+This comprehensive setup allows you to adapt and expand the example for various scenarios requiring classification signals to steer content generation.
diff --git a/cookbook/index.mdx b/cookbook/index.mdx
index 770f12d..6987191 100644
--- a/cookbook/index.mdx
+++ b/cookbook/index.mdx
@@ -35,4 +35,7 @@ Recipes that combine ZeroGPU with another tool, runtime, or SDK.
     <Card title="Screen resumes with LangChain" icon="crow" href="/cookbook/langchain-resume-screening">
         Extract entities, redact PII, and route a candidate from a PDF resume.
     </Card>
+    <Card title="Turn content into targeted copy with Claude" icon="tags" href="/cookbook/claude-iab-targeted-copy">
+        Classify an article with the IAB model, then have Claude write an ad brief, newsletter blurb, and content pitch from the signals.
+    </Card>
 </CardGroup>
diff --git a/docs.json b/docs.json
index cf094d4..975c4a8 100644
--- a/docs.json
+++ b/docs.json
@@ -179,7 +179,8 @@
             "group": "Integrations & plugins",
             "pages": [
               "cookbook/claude-code-csv-sanitizer",
-              "cookbook/langchain-resume-screening"
+              "cookbook/langchain-resume-screening",
+              "cookbook/claude-iab-targeted-copy"
             ]
           }
         ]