diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 6d14780..8bdb9b7 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -1,86 +1,42 @@ -* xref:get-started:adp-overview.adoc[Overview] -* xref:get-started:byoc-quickstart.adoc[ADP Quickstart] -* xref:agents:index.adoc[Agents] -** xref:agents:overview.adoc[Overview] -** xref:agents:quickstart.adoc[AI Agent Quickstart] -** xref:agents:concepts.adoc[Concepts] -** xref:agents:a2a-concepts.adoc[Agent-to-Agent Concepts] -** xref:agents:create-agent.adoc[Create a Declarative Agent] -** xref:agents:byoa-register.adoc[Register Your Own Agent (BYOA)] -** xref:agents:system-prompts.adoc[System Prompt Best Practices] -** xref:agents:architecture-patterns.adoc[Architecture Patterns] -** xref:agents:integration-overview.adoc[Integration Overview] -** xref:agents:pipeline-integration-patterns.adoc[Pipeline Integration Patterns] -** xref:agents:monitor.adoc[Monitor Agents] -** xref:agents:troubleshoot/index.adoc[Troubleshoot] -*** xref:agents:troubleshoot/troubleshoot-ai-agents.adoc[Troubleshoot AI Agents] -*** xref:agents:tutorials/index.adoc[Tutorials] -**** xref:agents:tutorials/customer-support-agent.adoc[Customer Support Agent] -**** xref:agents:tutorials/transaction-dispute-resolution.adoc[Transaction Dispute Resolution] -* xref:mcp:index.adoc[MCP Servers] -** xref:mcp:overview.adoc[Overview] -** xref:mcp:create-server.adoc[Create a Server] -** xref:mcp:register-remote.adoc[Register a Self-Managed Server] -** xref:mcp:oauth-providers.adoc[Configure an OAuth Provider] -** xref:mcp:user-delegated-oauth.adoc[User-Delegated OAuth] -*** xref:mcp:github-oauth-tutorial.adoc[Tutorial: Set Up GitHub OAuth] -** xref:mcp:test-tools.adoc[Test a Server's Tools] -** xref:mcp:managed/index.adoc[Managed Catalog] -*** xref:mcp:managed/managed-catalog.adoc[Catalog Reference] -*** xref:mcp:managed/bamboohr.adoc[BambooHR] -*** xref:mcp:managed/sql.adoc[SQL] -*** xref:mcp:managed/kafka.adoc[Kafka] -*** xref:mcp:managed/slack.adoc[Slack] -*** xref:mcp:managed/ironclad.adoc[Ironclad] -*** xref:mcp:managed/jira.adoc[Jira] -*** xref:mcp:managed/metabase.adoc[Metabase] -*** xref:mcp:managed/netsuite.adoc[NetSuite] -*** xref:mcp:managed/openapi.adoc[OpenAPI] -*** xref:mcp:managed/ramp.adoc[Ramp] -*** xref:mcp:managed/workday.adoc[Workday] -*** xref:mcp:managed/zendesk.adoc[Zendesk] -* xref:ai-gateway:index.adoc[AI Gateway] -** xref:ai-gateway:overview.adoc[Overview] -** xref:ai-gateway:gateway-quickstart.adoc[AI Gateway Quickstart] -** xref:ai-gateway:gateway-architecture.adoc[Architecture] -** xref:ai-gateway:configure-provider.adoc[Configure Your LLM Provider] -*** xref:ai-gateway:bedrock-setup.adoc[Set Up AWS Bedrock] -** xref:ai-gateway:aggregation.adoc[MCP Aggregation] -** xref:ai-gateway:connect-agent.adoc[Connect Your Agent] -*** xref:ai-gateway:admin/index.adoc[For Admins] -**** xref:ai-gateway:admin/setup-guide.adoc[Setup Guide] -*** xref:ai-gateway:builders/index.adoc[For Builders] -**** xref:ai-gateway:builders/discover-gateways.adoc[Discover Gateways] -* xref:governance:index.adoc[Governance] -** xref:governance:dashboard/index.adoc[Governance Dashboard] -*** xref:governance:dashboard/overview.adoc[Read the Governance Overview] -** xref:governance:guardrails/index.adoc[Guardrails] -*** xref:governance:guardrails/overview.adoc[Overview] -*** xref:governance:guardrails/create-guardrail.adoc[Create a Guardrail] -*** xref:governance:guardrails/types-reference.adoc[Evaluator Types] -*** xref:governance:guardrails/violations.adoc[Read Violations] -*** xref:governance:guardrails/cost-tracking.adoc[Cost Tracking] -** xref:governance:budgets.adoc[Token Budgets and Limits] -** xref:governance:kill-switch.adoc[Kill Switch] -** xref:governance:permissions-overview.adoc[About ADP Permissions] -** xref:governance:permissions-reference.adoc[ADP Permissions Reference] -* xref:observability:index.adoc[Observability] -** xref:observability:concepts.adoc[Concepts] -** xref:observability:transcripts.adoc[Read a Transcript] -** xref:observability:byoa-telemetry.adoc[BYOA Telemetry (OpenTelemetry)] -** xref:observability:ingest-custom-traces.adoc[Ingest Custom Traces] -** xref:observability:metrics.adoc[Metrics] -** xref:observability:logs.adoc[Logs] -* xref:integrations:index.adoc[Integrations] -** xref:integrations:remote-mcp-clients.adoc[Remote MCP Clients] -** xref:integrations:claude-code.adoc[Claude Code] -** xref:integrations:cursor.adoc[Cursor] -** xref:integrations:continue.adoc[Continue] -** xref:integrations:cline.adoc[Cline] -** xref:integrations:copilot.adoc[GitHub Copilot] -* xref:reference:index.adoc[Reference] -** xref:get-started:rpk-install.adoc[Install or Update rpk] -** xref:reference:rpk/index.adoc[rpk Commands] +* xref:get-started:adp-overview.adoc[Quickstarts] +** xref:get-started:byoc-quickstart.adoc[ADP Quickstart] +** xref:get-started:gateway-quickstart.adoc[AI Gateway Quickstart] +** xref:get-started:quickstart.adoc[AI Agent Quickstart] + +* xref:connect:index.adoc[Connect data & tools] +** xref:connect:managed/managed-catalog.adoc[Plug in any app, database, or tool] +** xref:connect:create-agent.adoc[Turn your data source into an agent] +** xref:connect:create-server.adoc[Build a tool server for your own data] +** xref:connect:register-remote.adoc[Connect a tool server you host yourself] +** xref:connect:user-delegated-oauth.adoc[Let agents act as the signed-in user] +** xref:connect:byoa-register.adoc[Register your own agent (BYOA)] +** xref:connect:claude-code.adoc[Claude Code] +** xref:connect:remote-mcp-clients.adoc[Remote MCP clients] + +* xref:monitor:index.adoc[Monitor & debug] +** xref:monitor:transcripts.adoc[See what your agent did] +** xref:monitor:troubleshoot-ai-agents.adoc[Investigate a broken run] +** xref:monitor:metrics.adoc[Check speed, cost, and errors] +** xref:control:guardrails/violations.adoc[Review blocked requests] +** xref:monitor:byoa-telemetry.adoc[Send telemetry from agents you host] + +* xref:control:index.adoc[Control & govern] +** xref:control:dashboard/overview.adoc[See all your agents in one place] +** xref:control:guardrails/overview.adoc[Fix agents calling things they shouldn't] +** xref:control:guardrails/create-guardrail.adoc[Set safety rules for all agents] +** xref:control:budgets.adoc[Set spending limits] +** xref:control:permissions-overview.adoc[Control who can do what] + +* xref:gateway:index.adoc[Routing & LLM settings] +** xref:gateway:overview.adoc[How the gateway works] +** xref:gateway:configure-provider.adoc[Configure LLM provider] +*** xref:gateway:bedrock-setup.adoc[Set up AWS Bedrock] + +* xref:reference:index.adoc[Settings reference] +** xref:control:permissions-reference.adoc[Roles and permissions matrix] +** xref:control:guardrails/types-reference.adoc[Safety rule providers] +** xref:reference:rpk-install.adoc[Install rpk] +** xref:reference:rpk/index.adoc[rpk command reference] *** xref:reference:rpk/rpk-ai/rpk-ai.adoc[rpk ai] **** xref:reference:rpk/rpk-ai/rpk-ai-agent.adoc[] ***** xref:reference:rpk/rpk-ai/rpk-ai-agent-list.adoc[] diff --git a/modules/ROOT/partials/byoc-aws-requirement.adoc b/modules/ROOT/partials/byoc-aws-requirement.adoc deleted file mode 100644 index d424382..0000000 --- a/modules/ROOT/partials/byoc-aws-requirement.adoc +++ /dev/null @@ -1 +0,0 @@ -NOTE: The Agentic Data Plane is supported on BYOC environments running with AWS and Redpanda version 25.3 and later. diff --git a/modules/ROOT/partials/integrations/claude-code-admin.adoc b/modules/ROOT/partials/integrations/claude-code-admin.adoc deleted file mode 100644 index 2971e84..0000000 --- a/modules/ROOT/partials/integrations/claude-code-admin.adoc +++ /dev/null @@ -1,470 +0,0 @@ -= Configure AI Gateway for Claude Code -:description: Configure Redpanda AI Gateway to support Claude Code clients. -:page-topic-type: how-to -:personas: platform_engineer -:learning-objective-1: Configure AI Gateway endpoints for Claude Code connectivity -:learning-objective-2: Set up authentication and access control for Claude Code clients -:learning-objective-3: Deploy MCP tool aggregation for Claude Code tool discovery - -Configure Redpanda AI Gateway to support Claude Code clients accessing LLM providers and MCP tools through a unified endpoint. - -After reading this page, you will be able to: - -* [ ] Configure AI Gateway endpoints for Claude Code connectivity. -* [ ] Set up authentication and access control for Claude Code clients. -* [ ] Deploy MCP tool aggregation for Claude Code tool discovery. - -== Prerequisites - -* AI Gateway deployed on a BYOC environment running Redpanda version 25.3 or later -* Administrator access to the AI Gateway UI -* At least one LLM provider API key (OpenAI, Anthropic, or Google Gemini) -* Understanding of xref:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] - -== Architecture overview - -Claude Code connects to AI Gateway through two primary endpoints: - -* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1` for chat completions -* MCP endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp` for tool discovery and execution - -The gateway handles: - -. Authentication through bearer tokens in the `Authorization` header -. Gateway selection through the endpoint URL -. Model selection using the `vendor/model_id` format -. MCP server aggregation for multi-tool workflows -. Request logging and cost tracking per gateway - -== Enable LLM providers - -Claude Code requires access to LLM providers through the gateway. Enable at least one provider. - -=== Configure Anthropic - -Claude Code uses Anthropic models by default. To enable Anthropic: - -. Navigate to *AI Gateway* > *Providers* in ADP -. Select *Anthropic* from the provider list -. Click *Add configuration* -. Enter your Anthropic API key -. Click *Save* - -The gateway can now route requests to Anthropic models. - -=== Configure OpenAI - -To enable OpenAI as a provider: - -. Navigate to *AI Gateway* > *Providers* -. Select *OpenAI* from the provider list -. Click *Add configuration* -. Enter your OpenAI API key -. Click *Save* - -=== Enable models in the catalog - -After enabling providers, enable specific models: - -. Navigate to *AI Gateway* > *Models* -. Enable the models you want Claude Code clients to access -+ -Common models for Claude Code: -+ -* `anthropic/claude-opus-4.6-5` -* `anthropic/claude-sonnet-4.5` -* `openai/gpt-5.2` -* `openai/o1-mini` - -. Click *Save* - -Models appear in the catalog with the `vendor/model_id` format that Claude Code uses in requests. - -== Create a gateway for Claude Code clients - -Create a dedicated gateway to isolate Claude Code traffic and apply specific policies. - -=== Gateway configuration - -. Navigate to *Agentic* > *AI Gateway* > *Gateways* -. Click *Create Gateway* -. Enter gateway details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`claude-code-gateway` (or your preferred name) - -|Workspace -|Select the workspace for access control grouping - -|Description -|Gateway for Claude Code IDE clients -|=== - -. Click *Create* -. Copy the gateway ID from the gateway details page - -The gateway ID is embedded in the gateway endpoint URL. - -=== Configure provider pools with fallback - -Configure a primary provider with automatic failover: - -. Navigate to the gateway's *LLM* tab -. Under *Provider pools*, click *Add pool* -. Add a *Primary provider pool*: -+ -* Provider: Anthropic -* Model: All enabled Anthropic models -* Load balancing: Round robin (if multiple Anthropic configurations exist) - -. Add a *Fallback provider pool*: -+ -* Provider: OpenAI -* Model: All enabled OpenAI models -* Failover conditions: Rate limits, timeouts, 5xx errors - -. Click *Save* - -Claude Code requests go to Anthropic by default and fail over to OpenAI if Anthropic is unavailable. - -=== Apply rate limits - -Prevent runaway usage from Claude Code clients: - -. Navigate to the gateway's *LLM* tab -. Under *Rate Limit*, configure: -+ -[cols="1,2"] -|=== -|Setting |Recommended Value - -|Global rate limit -|100 requests per minute - -|Per-user rate limit -|10 requests per minute (if using user headers) -|=== - -. Click *Save* - -The gateway blocks requests exceeding these limits and returns HTTP 429 errors. - -=== Set spending limits - -Control LLM costs: - -. Under *Spend Limit*, configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Monthly budget -|$5,000 (adjust based on expected usage) - -|Enforcement -|Block requests after budget exceeded -|=== - -. Click *Save* - -The gateway tracks estimated costs per request and blocks traffic when the monthly budget is exhausted. - -== Configure MCP tool aggregation - -Enable Claude Code to discover and use tools from multiple MCP servers through a single endpoint. - -=== Add MCP servers - -. Navigate to the gateway's *MCP* tab -. Click *Add MCP Server* -. Enter server details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Display name -|Descriptive name (for example, `redpanda-data-catalog`) - -|Endpoint URL -|MCP server endpoint (for example, xref:integrations:remote-mcp-clients.adoc[Remote MCP server] URL) - -|Authentication -|Bearer token or other authentication mechanism -|=== - -. Click *Save* - -Repeat for each MCP server you want to aggregate. - -=== Enable deferred tool loading - -Reduce token costs by deferring tool discovery: - -. Under *MCP Settings*, enable *Deferred tool loading* -. Click *Save* - -When enabled: - -* Claude Code initially receives only a search tool and orchestrator tool -* Claude Code queries for specific tools by name when needed -* Token usage decreases by 80-90% for agents with many tools configured - -=== Add the MCP orchestrator - -The MCP orchestrator reduces multi-step workflows to single calls: - -. Under *MCP Settings*, enable *MCP Orchestrator* -. Configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Orchestrator model -|Select a model with strong code generation capabilities (for example, `anthropic/claude-sonnet-4.5`) - -|Execution timeout -|30 seconds -|=== - -. Click *Save* - -Claude Code can now invoke the orchestrator tool to execute complex, multi-step operations in a single request. - -== Configure authentication - -Claude Code clients authenticate using bearer tokens. - -=== Generate API tokens - -. Navigate to *Security* > *API Tokens* in ADP -. Click *Create Token* -. Enter token details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`claude-code-access` - -|Scopes -|`ai-gateway:read`, `ai-gateway:write` - -|Expiration -|Set appropriate expiration based on security policies -|=== - -. Click *Create* -. Copy the token (it appears only once) - -Distribute this token to Claude Code users through secure channels. - -=== Token rotation - -Implement token rotation for security: - -. Create a new token before the existing token expires -. Distribute the new token to users -. Monitor usage of the old token in (observability dashboard) -. Revoke the old token after all users have migrated - -== Configure Claude Code clients - -Provide these instructions to users configuring Claude Code. - -=== CLI configuration - -Users can configure Claude Code using the CLI: - -[source,bash] ----- -claude mcp add \ - --transport http \ - redpanda-aigateway \ - https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp \ - --header "Authorization: Bearer YOUR_API_TOKEN" ----- - -Replace: - -* `{CLUSTER_ID}`: Your Redpanda cluster ID -* `YOUR_API_TOKEN`: The API token generated earlier - -=== Configuration file - -Alternatively, users can edit `~/.claude.json` (user-level) or `.mcp.json` (project-level): - -[source,json] ----- -{ - "mcpServers": { - "redpanda-ai-gateway": { - "type": "http", - "url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp", - "headers": { - "Authorization": "Bearer YOUR_API_TOKEN" - } - } - } -} ----- - -This configuration: - -* Connects Claude Code to the aggregated MCP endpoint -* Includes authentication headers - -== Monitor Claude Code usage - -Track Claude Code activity through gateway observability features. - -=== View request logs - -. Navigate to *AI Gateway* > *Observability* > *Logs* -. Filter by gateway ID: `claude-code-gateway` -. Review: -+ -* Request timestamps and duration -* Model used per request -* Token usage (prompt and completion tokens) -* Estimated cost per request -* HTTP status codes and errors - -=== Analyze metrics - -. Navigate to *AI Gateway* > *Observability* > *Metrics* -. Select the Claude Code gateway -. Review: -+ -[cols="1,2"] -|=== -|Metric |Purpose - -|Request volume -|Identify usage patterns and peak times - -|Token usage -|Track consumption trends - -|Estimated spend -|Monitor costs against budget - -|Latency (p50, p95, p99) -|Detect performance issues - -|Error rate -|Identify failing requests or misconfigured clients -|=== - - -=== Query logs through API - -Programmatically access logs for integration with monitoring systems: - -[source,bash] ----- -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ - -H "Authorization: Bearer YOUR_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "gateway_id": "GATEWAY_ID", - "start_time": "2026-01-01T00:00:00Z", - "end_time": "2026-01-14T23:59:59Z", - "limit": 100 - }' ----- - -== Security considerations - -Apply these security best practices for Claude Code deployments. - -=== Limit token scope - -Create tokens with minimal required scopes: - -* `ai-gateway:read`: Required for MCP tool discovery -* `ai-gateway:write`: Required for LLM requests and tool execution - -Avoid granting broader scopes like `admin` or `cluster:write`. - -=== Implement network restrictions - -If Claude Code clients connect from known IP ranges, configure network policies: - -. Use cloud provider security groups to restrict access to AI Gateway endpoints -. Allowlist only the IP ranges where Claude Code clients operate -. Monitor for unauthorized access attempts in request logs - -=== Enforce token expiration - -Set short token lifetimes for high-security environments: - -* Development environments: 90 days -* Production environments: 30 days - -Automate token rotation to reduce manual overhead. - -=== Audit tool access - -Review which MCP tools Claude Code clients can access: - -. Periodically audit the MCP servers configured in the gateway -. Remove unused or deprecated MCP servers -. Monitor tool execution logs for unexpected behavior - -== Troubleshooting - -Common issues and solutions when configuring AI Gateway for Claude Code. - -=== Claude Code cannot connect to gateway - -Symptom: Connection errors when Claude Code tries to discover tools or send LLM requests. - -Causes and solutions: - -* **Invalid gateway endpoint**: Verify the gateway endpoint URL matches the endpoint from the console -* **Expired token**: Generate a new API token and update the Claude Code configuration -* **Network connectivity**: Verify the cluster endpoint is accessible from the client network -* **Provider not enabled**: Ensure at least one LLM provider is enabled and has models in the catalog - -=== Tools not appearing in Claude Code - -Symptom: Claude Code does not discover MCP tools. - -Causes and solutions: - -* **MCP servers not configured**: Add MCP server endpoints in the gateway's MCP tab -* **Deferred loading enabled but search failing**: Check that the search tool is correctly configured -* **MCP server authentication failing**: Verify MCP server authentication credentials in the gateway configuration - -=== High costs or token usage - -Symptom: Token usage and costs exceed expectations. - -Causes and solutions: - -* **Deferred tool loading disabled**: Enable deferred tool loading to reduce tokens by 80-90% -* **No rate limits**: Apply per-minute rate limits to prevent runaway usage -* **Missing spending limits**: Set monthly budget limits with blocking enforcement -* **Expensive models**: Route to cost-effective models (for example, Claude Sonnet instead of Opus) for non-critical requests - -=== Requests failing with 429 errors - -Symptom: Claude Code receives HTTP 429 Too Many Requests errors. - -Causes and solutions: - -* **Rate limit exceeded**: Review and increase rate limits if usage is legitimate -* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover pools -* **Budget exhausted**: Verify monthly spending limit has not been reached - -== Next steps - -* xref:integrations:remote-mcp-clients.adoc[] diff --git a/modules/ROOT/partials/integrations/claude-code-user.adoc b/modules/ROOT/partials/integrations/claude-code-user.adoc deleted file mode 100644 index a529ce7..0000000 --- a/modules/ROOT/partials/integrations/claude-code-user.adoc +++ /dev/null @@ -1,403 +0,0 @@ -= Configure Claude Code with AI Gateway -:description: Configure Claude Code to use Redpanda AI Gateway for unified LLM access and MCP tool aggregation. -:page-topic-type: how-to -:personas: agent_builder -:learning-objective-1: Configure Claude Code to connect to AI Gateway endpoints -:learning-objective-2: Set up MCP server integration through AI Gateway -:learning-objective-3: Verify Claude Code is routing requests through the gateway - -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up Claude Code to route LLM requests and access MCP tools through the gateway's unified endpoints. - -After reading this page, you will be able to: - -* [ ] Configure Claude Code to connect to AI Gateway endpoints. -* [ ] Set up MCP server integration through AI Gateway. -* [ ] Verify Claude Code is routing requests through the gateway. - -== Prerequisites - -Before configuring Claude Code, ensure you have: - -* Claude Code CLI installed (download from https://github.com/anthropics/claude-code[Anthropic's GitHub^]) -* An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) -* Your AI Gateway credentials: -** Gateway endpoint URL (for example, `\https://gw-abc123.ai.panda.com`) -** API key with access to the gateway - -== Configuration methods - -Claude Code supports two configuration approaches for connecting to AI Gateway: - -[cols="1,2,2"] -|=== -|Method |Best for |Trade-offs - -|CLI command -|Quick setup, single gateway -|Must re-run if configuration changes - -|Configuration file -|Multiple gateways, complex setups, version control -|Manual file editing required -|=== - -Choose the method that matches your workflow. The CLI command is faster for getting started, while the configuration file provides more flexibility for production use. - -== Configure using CLI - -The `claude mcp add` command configures Claude Code to connect to your AI Gateway's MCP endpoint. - -=== Add MCP server connection - -[,bash] ----- -claude mcp add \ - --transport http \ - redpanda-aigateway \ - /mcp \ - --header "Authorization: Bearer YOUR_API_KEY" ----- - -Replace the following values: - -* `/mcp` - Your gateway's MCP endpoint -* `YOUR_API_KEY` - Your Redpanda API key - -This command configures the HTTP transport for MCP, which allows Claude Code to discover and invoke tools from all MCP servers configured in your gateway. - -=== Configure LLM routing through gateway - -To route Claude Code's LLM requests through the gateway instead of directly to Anthropic, set the following environment variables: - -[,bash] ----- -export ANTHROPIC_BASE_URL= -export ANTHROPIC_AUTH_TOKEN=YOUR_API_KEY ----- - -This routes all Claude model requests through your gateway, giving you centralized observability and policy enforcement. To persist these values, add them to your shell profile or store them in the Claude settings JSON (`~/.claude.json`). - -== Configure using configuration file - -For more complex configurations or when managing multiple gateways, edit the Claude Code configuration file directly. - -=== Locate configuration file - -Claude Code stores configuration in: - -* macOS/Linux: `~/.claude.json` (user-level) or `.mcp.json` (project-level) -* Windows: `%USERPROFILE%\.claude.json` - -=== Basic configuration - -Create or edit `~/.claude.json` with the following structure: - -[,json] ----- -{ - "mcpServers": { - "redpanda-ai-gateway": { - "type": "http", - "url": "/mcp", - "headers": { - "Authorization": "Bearer YOUR_API_KEY" - } - } - } -} ----- - -Replace placeholder values: - -* `` - Your gateway endpoint URL -* `YOUR_API_KEY` - Your Redpanda API key - -=== Multiple gateway configuration - -To configure different gateways for development and production: - -[,json] ----- -{ - "mcpServers": { - "redpanda-staging": { - "type": "http", - "url": "/mcp", - "headers": { - "Authorization": "Bearer STAGING_API_KEY" - } - }, - "redpanda-production": { - "type": "http", - "url": "/mcp", - "headers": { - "Authorization": "Bearer PROD_API_KEY" - } - } - } -} ----- - -Switch between gateways by selecting the appropriate MCP server when using Claude Code. - -=== Configuration with environment variables - -For sensitive credentials, use environment variables instead of hardcoding values. Claude Code supports `${VAR}` interpolation in project-level `.mcp.json` files but not in `~/.claude.json`. To use environment variables, create a `.mcp.json` file in your project root: - -[,json] ----- -{ - "mcpServers": { - "redpanda-ai-gateway": { - "type": "http", - "url": "${REDPANDA_GATEWAY_URL}/mcp", - "headers": { - "Authorization": "Bearer ${REDPANDA_API_KEY}" - } - } - } -} ----- - -Set the environment variables before launching Claude Code: - -[,bash] ----- -export REDPANDA_GATEWAY_URL="" -export REDPANDA_API_KEY="your-api-key" ----- - -On Windows (PowerShell): - -[,powershell] ----- -$env:REDPANDA_GATEWAY_URL = "" -$env:REDPANDA_API_KEY = "your-api-key" ----- - -== Verify configuration - -After configuring Claude Code, verify it connects correctly to your AI Gateway. - -=== Test MCP tool discovery - -List available MCP tools to confirm Claude Code can reach your gateway's MCP endpoint: - -[,bash] ----- -claude mcp list ----- - -Expected output should show: - -* The `redpanda-ai-gateway` server connection -* Status: Connected -* Available tools from your configured MCP servers - -If deferred tool loading is enabled in your gateway, you'll see a search tool and the MCP orchestrator tool instead of all tools upfront. - -=== Verify gateway routing - -Check that requests route through the gateway by monitoring the AI Gateway dashboard: - -. Sign in to ADP -. Navigate to your gateway's observability dashboard -. Send a test request from Claude Code: -+ -[,bash] ----- -echo "Write a simple Python hello world function" | claude ----- - -. Refresh the dashboard and verify: -** Request appears in the logs -** Model shows as `anthropic/claude-sonnet-4.5` (or your configured model) -** Request succeeded (status 200) -** Token usage and estimated cost are recorded - -If the request doesn't appear in the dashboard, see <>. - -== Advanced configuration - -=== Custom request timeout - -Configure timeout for MCP requests in the configuration file: - -[,json] ----- -{ - "mcpServers": { - "redpanda-ai-gateway": { - "type": "http", - "url": "/mcp", - "headers": { - "Authorization": "Bearer YOUR_API_KEY" - }, - "timeout": 30000 - } - } -} ----- - -The `timeout` value is in milliseconds. Default is 10000 (10 seconds). Increase this for MCP tools that perform long-running operations. - -=== Debug mode - -Enable debug logging to troubleshoot connection issues: - -[,bash] ----- -export CLAUDE_DEBUG=1 -claude ----- - -Debug mode shows: - -* HTTP request and response headers -* MCP tool discovery messages -* Gateway routing decisions (if exposed in response headers) -* Error details - -[[troubleshooting]] -== Troubleshooting - -=== MCP server not connecting - -**Symptom**: `claude mcp list` shows "Connection failed" or no tools available. - -**Causes and solutions**: - -. **Incorrect endpoint URL** -+ -Verify your MCP endpoint is correct. It should be `{gateway-url}/mcp`, not just `{gateway-url}`. -+ -[,bash] ----- -# Correct -/mcp - -# Incorrect - ----- - -. **Authentication failure** -+ -Check that your API key is valid and has access to the gateway: -+ -[,bash] ----- -curl -H "Authorization: Bearer YOUR_API_KEY" \ - /mcp ----- -+ -You should receive a valid MCP protocol response. If you get `401 Unauthorized`, regenerate your API key in ADP. - -. **Gateway endpoint URL mismatch** -+ -Verify your gateway endpoint URL matches exactly. Copy it directly from the AI Gateway UI rather than typing it manually. - -. **Network connectivity issues** -+ -Test basic connectivity to the gateway endpoint: -+ -[,bash] ----- -curl -I /mcp ----- -+ -If this times out, check your network configuration, firewall rules, or VPN connection. - -=== Requests not appearing in gateway dashboard - -**Symptom**: Claude Code works, but you don't see requests in the AI Gateway observability dashboard. - -**Causes and solutions**: - -. **Wrong gateway configured** -+ -Verify that the gateway endpoint URL in your configuration matches the gateway you're viewing in the dashboard. - -. **Log ingestion delay** -+ -Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. - -. **Model name format error** -+ -Ensure requests use the `vendor/model_id` format (for example, `anthropic/claude-sonnet-4.5`), not just the model name (for example, `claude-sonnet-4.5`). - -=== High latency after gateway integration - -**Symptom**: Requests are slower after routing through the gateway. - -**Causes and solutions**: - -. **Gateway geographic distance** -+ -If your gateway is in a different region than you or the upstream provider, this adds network latency. Check gateway region in ADP. - -. **Provider pool failover** -+ -If your gateway is configured with fallback providers, check the logs to see if requests are failing over. Failover adds latency. - -. **MCP tool aggregation overhead** -+ -Aggregating tools from multiple MCP servers adds processing time. Use deferred tool loading to reduce this overhead (see xref:ai-gateway:aggregation.adoc[]). - -. **Rate limiting** -+ -If you're hitting rate limits, the gateway may be queuing requests. Check the observability dashboard for rate limit metrics. - -=== Configuration file not loading - -**Symptom**: Changes to `.claude.json` don't take effect. - -**Solutions**: - -. **Restart Claude Code** -+ -Configuration changes require restarting Claude Code: -+ -[,bash] ----- -# Kill any running Claude Code processes -pkill claude - -# Start Claude Code again -claude ----- - -. **Validate JSON syntax** -+ -Ensure your `.claude.json` is valid JSON. Use a JSON validator: -+ -[,bash] ----- -python3 -m json.tool ~/.claude.json ----- - -. **Check file permissions** -+ -Verify Claude Code can read the configuration file: -+ -[,bash] ----- -ls -la ~/.claude.json ----- -+ -The file should be readable by your user. If not, fix permissions: -+ -[,bash] ----- -chmod 600 ~/.claude.json ----- - -== Next steps - -* xref:ai-gateway:aggregation.adoc[] - -== Related pages - -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway -* xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits diff --git a/modules/ROOT/partials/integrations/cline-admin.adoc b/modules/ROOT/partials/integrations/cline-admin.adoc deleted file mode 100644 index 3b2580d..0000000 --- a/modules/ROOT/partials/integrations/cline-admin.adoc +++ /dev/null @@ -1,550 +0,0 @@ -= Configure AI Gateway for Cline -:description: Configure Redpanda AI Gateway to support Cline clients. -:page-topic-type: how-to -:personas: platform_engineer -:learning-objective-1: Configure AI Gateway endpoints for Cline connectivity -:learning-objective-2: Set up authentication and access control for Cline clients -:learning-objective-3: Deploy MCP tool aggregation for Cline tool discovery - -Configure Redpanda AI Gateway to support Cline (formerly Claude Dev) clients accessing LLM providers and MCP tools through a unified endpoint. - -After reading this page, you will be able to: - -* [ ] Configure AI Gateway endpoints for Cline connectivity. -* [ ] Set up authentication and access control for Cline clients. -* [ ] Deploy MCP tool aggregation for Cline tool discovery. - -== Prerequisites - -* AI Gateway deployed on a BYOC environment running Redpanda version 25.3 or later -* Administrator access to the AI Gateway UI -* At least one LLM provider API key (Anthropic or OpenAI) -* Understanding of xref:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] - -== About Cline - -Cline is a VS Code extension designed for autonomous AI development workflows. It connects to Claude models through the native Anthropic API format, sending requests to `/v1/messages` endpoints. Cline supports long-running tasks, browser integration, and autonomous operations, with full MCP support for tool discovery and execution. - -Key characteristics: - -* Uses native Anthropic format (compatible with OpenAI-compatible endpoints) -* Designed for autonomous, multi-step workflows -* Supports MCP protocol for external tool integration -* Operates as a VS Code extension with persistent context -* Requires configuration similar to Claude Code - -== Architecture overview - -Cline connects to AI Gateway through two primary endpoints: - -* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1` for chat completions -* MCP endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp` for tool discovery and execution - -The gateway handles: - -. Authentication through bearer tokens in the `Authorization` header -. Model selection using the `vendor/model_id` format -. MCP server aggregation for multi-tool workflows -. Request logging and cost tracking per gateway - -== Enable LLM providers - -Cline requires access to LLM providers through the gateway. Enable at least one provider. - -=== Configure Anthropic - -Cline uses Anthropic models by default. To enable Anthropic: - -. Navigate to *AI Gateway* > *Providers* in ADP -. Select *Anthropic* from the provider list -. Click *Add configuration* -. Enter your Anthropic API key -. Click *Save* - -The gateway can now route requests to Anthropic models. - -=== Configure OpenAI - -To enable OpenAI as a provider: - -. Navigate to *AI Gateway* > *Providers* -. Select *OpenAI* from the provider list -. Click *Add configuration* -. Enter your OpenAI API key -. Click *Save* - -=== Enable models in the catalog - -After enabling providers, enable specific models: - -. Navigate to *AI Gateway* > *Models* -. Enable the models you want Cline clients to access -+ -Common models for Cline: -+ -* `anthropic/claude-opus-4.6-5` -* `anthropic/claude-sonnet-4.5` -* `openai/gpt-5.2` -* `openai/o1-mini` - -. Click *Save* - -Models appear in the catalog with the `vendor/model_id` format that Cline uses in requests. - -== Create a gateway for Cline clients - -Create a dedicated gateway to isolate Cline traffic and apply specific policies. - -=== Gateway configuration - -. Navigate to *Agentic* > *AI Gateway* > *Gateways* -. Click *Create Gateway* -. Enter gateway details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`cline-gateway` (or your preferred name) - -|Workspace -|Select the workspace for access control grouping - -|Description -|Gateway for Cline VS Code extension clients -|=== - -. Click *Create* -. Copy the gateway endpoint URL from the gateway details page - -=== Configure provider pools with fallback - -Configure a primary provider with automatic failover: - -. Navigate to the gateway's *LLM* tab -. Under *Provider pools*, click *Add pool* -. Add a *Primary provider pool*: -+ -* Provider: Anthropic -* Model: All enabled Anthropic models -* Load balancing: Round robin (if multiple Anthropic configurations exist) - -. Add a *Fallback provider pool*: -+ -* Provider: OpenAI -* Model: All enabled OpenAI models -* Failover conditions: Rate limits, timeouts, 5xx errors - -. Click *Save* - -Cline requests go to Anthropic by default and fail over to OpenAI if Anthropic is unavailable. - -=== Apply rate limits - -Prevent runaway usage from autonomous Cline sessions: - -. Navigate to the gateway's *LLM* tab -. Under *Rate Limit*, configure: -+ -[cols="1,2"] -|=== -|Setting |Recommended Value - -|Global rate limit -|120 requests per minute - -|Per-user rate limit -|15 requests per minute (if using user headers) -|=== -+ -Cline can generate multiple requests during autonomous operations. Higher limits than typical interactive clients may be necessary. - -. Click *Save* - -The gateway blocks requests exceeding these limits and returns HTTP 429 errors. - -=== Set spending limits - -Control LLM costs during autonomous operations: - -. Under *Spend Limit*, configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Monthly budget -|$8,000 (adjust based on expected autonomous usage) - -|Enforcement -|Block requests after budget exceeded -|=== -+ -Autonomous operations can consume significant tokens. Monitor spending patterns after deployment. - -. Click *Save* - -The gateway tracks estimated costs per request and blocks traffic when the monthly budget is exhausted. - -== Configure MCP tool aggregation - -Enable Cline to discover and use tools from multiple MCP servers through a single endpoint. - -=== Add MCP servers - -. Navigate to the gateway's *MCP* tab -. Click *Add MCP Server* -. Enter server details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Display name -|Descriptive name (for example, `filesystem-tools`, `code-analysis-tools`) - -|Endpoint URL -|MCP server endpoint (for example, xref:integrations:remote-mcp-clients.adoc[Remote MCP server] URL) - -|Authentication -|Bearer token or other authentication mechanism -|=== - -. Click *Save* - -Repeat for each MCP server you want to aggregate. - -=== Enable deferred tool loading - -Reduce token costs for Cline sessions with many available tools: - -. Under *MCP Settings*, enable *Deferred tool loading* -. Click *Save* - -When enabled: - -* Cline initially receives only a search tool and orchestrator tool -* Cline queries for specific tools by name when needed -* Token usage decreases by 80-90% for configurations with many tools - -This is particularly important for Cline because autonomous operations can make many tool discovery calls. - -=== Add the MCP orchestrator - -The MCP orchestrator reduces multi-step autonomous workflows to single calls: - -. Under *MCP Settings*, enable *MCP Orchestrator* -. Configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Orchestrator model -|Select a model with strong code generation capabilities (for example, `anthropic/claude-sonnet-4.5`) - -|Execution timeout -|45 seconds -|=== -+ -Longer timeout than typical interactive clients allows complex autonomous operations to complete. - -. Click *Save* - -Cline can now invoke the orchestrator tool to execute complex, multi-step operations in a single request, which is ideal for autonomous development workflows. - -== Configure authentication - -Cline clients authenticate using bearer tokens. - -=== Generate API tokens - -. Navigate to *Security* > *API Tokens* in ADP -. Click *Create Token* -. Enter token details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`cline-access` - -|Scopes -|`ai-gateway:read`, `ai-gateway:write` - -|Expiration -|Set appropriate expiration based on security policies -|=== - -. Click *Create* -. Copy the token (it appears only once) - -Distribute this token to Cline users through secure channels. - -=== Token rotation - -Implement token rotation for security: - -. Create a new token before the existing token expires -. Distribute the new token to users -. Monitor usage of the old token in (observability dashboard) -. Revoke the old token after all users have migrated - -== Configure Cline clients - -Provide these instructions to users configuring Cline in VS Code. - -=== API provider configuration - -Users configure Cline's API provider and credentials through the Cline extension interface. - -IMPORTANT: API provider configuration (API keys, base URLs, custom headers) is managed through Cline's extension global state, not VS Code `settings.json`. These settings are stored in the extension's internal state and must be configured through the Cline UI. - -==== Configure through Cline UI - -. Open the Cline extension panel in VS Code -. Click the settings icon or gear menu -. Configure the API connection: -+ -* *API Provider*: Select "Custom" or "Anthropic" -* *API Base URL*: The gateway endpoint URL from the gateway details page -* *API Key*: The API token generated earlier - -Replace: - -* `YOUR_API_TOKEN`: The API token generated earlier - -=== MCP server configuration - -Configure Cline to connect to the aggregated MCP endpoint through the Cline UI or by editing `cline_mcp_settings.json`. - -==== Enable MCP mode - -. Open VS Code Settings (Cmd/Ctrl + ,) -. Search for "Cline > Mcp: Mode" -. Enable the MCP mode toggle - -==== Configure MCP server through Cline UI - -. Open the Cline extension panel in VS Code -. Navigate to MCP server settings -. Add the Redpanda AI Gateway MCP server with the connection details - -==== Configure through cline_mcp_settings.json - -Alternatively, edit `cline_mcp_settings.json` (located in the Cline extension storage directory): - -[source,json] ----- -{ - "mcpServers": { - "redpanda-ai-gateway": { - "type": "streamableHttp", - "url": "GATEWAY_MCP_ENDPOINT_URL", - "headers": { - "Authorization": "Bearer YOUR_API_TOKEN" - } - } - } -} ----- - -Replace: - -* `GATEWAY_MCP_ENDPOINT_URL`: The gateway MCP endpoint URL from the gateway details page -* `YOUR_API_TOKEN`: The API token generated earlier - -This configuration connects Cline to the aggregated MCP endpoint with authentication. - -=== Configuration scope - -Cline stores configuration in the extension's global state: - -* *API Provider settings*: Stored globally per VS Code instance, applies to all workspaces -* *MCP server settings*: Can be configured per workspace using `cline_mcp_settings.json` - -For project-specific MCP server configurations (for example, development vs production gateways), place `cline_mcp_settings.json` in the workspace directory and configure different MCP servers per project. - -== Monitor Cline usage - -Track Cline activity through gateway observability features. - -=== View request logs - -. Navigate to *AI Gateway* > *Observability* > *Logs* -. Filter by gateway ID: `cline-gateway` -. Review: -+ -* Request timestamps and duration -* Model used per request -* Token usage (prompt and completion tokens) -* Estimated cost per request -* HTTP status codes and errors - -Cline autonomous operations may generate request sequences. Look for patterns to identify long-running sessions. - -=== Analyze metrics - -. Navigate to *AI Gateway* > *Observability* > *Metrics* -. Select the Cline gateway -. Review: -+ -[cols="1,2"] -|=== -|Metric |Purpose - -|Request volume -|Identify autonomous session patterns and peak times - -|Token usage -|Track consumption trends from multi-step operations - -|Estimated spend -|Monitor costs against budget (autonomous operations can be expensive) - -|Latency (p50, p95, p99) -|Detect performance issues in autonomous workflows - -|Error rate -|Identify failing requests or misconfigured clients -|=== - - -=== Query logs through API - -Programmatically access logs for integration with monitoring systems: - -[source,bash] ----- -# Set REDPANDA_API_TOKEN environment variable before running -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ - -H "Authorization: Bearer ${REDPANDA_API_TOKEN}" \ - -H "Content-Type: application/json" \ - -d '{ - "gateway_id": "GATEWAY_ID", - "start_time": "2026-01-01T00:00:00Z", - "end_time": "2026-01-14T23:59:59Z", - "limit": 100 - }' ----- - -NOTE: Set the `REDPANDA_API_TOKEN` environment variable to your API token before running this command. - -== Security considerations - -Apply these security best practices for Cline deployments. - -=== Limit token scope - -Create tokens with minimal required scopes: - -* `ai-gateway:read`: Required for MCP tool discovery -* `ai-gateway:write`: Required for LLM requests and tool execution - -Avoid granting broader scopes like `admin` or `cluster:write`. - -Because Cline performs autonomous operations, limit what tools it can access through MCP server selection. - -=== Implement network restrictions - -If Cline clients connect from known networks (corporate VPN, office IP ranges), configure network policies: - -. Use cloud provider security groups to restrict access to AI Gateway endpoints -. Allowlist only the IP ranges where Cline clients operate -. Monitor for unauthorized access attempts in request logs - -=== Enforce token expiration - -Set short token lifetimes for high-security environments: - -* Development environments: 90 days -* Production environments: 30 days - -Automate token rotation to reduce manual overhead. - -=== Audit tool access - -Review which MCP tools Cline clients can access: - -. Periodically audit the MCP servers configured in the gateway -. Remove unused or deprecated MCP servers -. Monitor tool execution logs for unexpected autonomous behavior -. Consider creating separate gateways for different trust levels - -Because Cline operates autonomously, carefully control which tools it can invoke. - -=== Monitor autonomous operations - -Set up alerts for unusual patterns: - -* Request rate spikes (may indicate runaway autonomous loops) -* High error rates (may indicate tool compatibility issues) -* Unexpected tool invocations (may indicate misconfigured autonomous behavior) -* Budget consumption spikes (autonomous operations can be expensive) - -== Troubleshooting - -Common issues and solutions when configuring AI Gateway for Cline. - -=== Cline cannot connect to gateway - -Symptom: Connection errors when Cline tries to discover tools or send LLM requests. - -Causes and solutions: - -* **Invalid gateway ID**: Verify the gateway endpoint URL matches the URL from the gateway details page in the console -* **Expired token**: Generate a new API token and update the Cline settings -* **Network connectivity**: Verify the cluster endpoint is accessible from the client network -* **Provider not enabled**: Ensure at least one LLM provider is enabled and has models in the catalog -* **VS Code settings not applied**: Reload VS Code window after changing settings (Cmd/Ctrl + Shift + P > "Reload Window") - -=== Tools not appearing in Cline - -Symptom: Cline does not discover MCP tools. - -Causes and solutions: - -* **MCP servers not configured**: Add MCP server endpoints in the gateway's MCP tab -* **Deferred loading enabled but search failing**: Check that the search tool is correctly configured -* **MCP server authentication failing**: Verify MCP server authentication credentials in the gateway configuration -* **Cline MCP configuration missing**: Ensure `cline.mcpServers` is configured in settings - -=== High costs or token usage - -Symptom: Token usage and costs exceed expectations. - -Causes and solutions: - -* **Deferred tool loading disabled**: Enable deferred tool loading to reduce tokens by 80-90% -* **Autonomous loops**: Monitor for repeated similar requests (may indicate autonomous operation stuck in a loop) -* **No rate limits**: Apply per-minute rate limits to prevent runaway autonomous usage -* **Missing spending limits**: Set monthly budget limits with blocking enforcement -* **Expensive models for autonomous work**: Route autonomous operations to cost-effective models (for example, Claude Sonnet instead of Opus) -* **Too many tools in context**: Reduce the number of aggregated MCP servers or enable deferred loading - -=== Requests failing with 429 errors - -Symptom: Cline receives HTTP 429 Too Many Requests errors. - -Causes and solutions: - -* **Rate limit exceeded**: Review and increase rate limits if autonomous usage is legitimate -* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover pools -* **Budget exhausted**: Verify monthly spending limit has not been reached -* **Autonomous operation too aggressive**: Configure Cline to slow down request rate - -=== Autonomous operations timing out - -Symptom: Cline operations fail with timeout errors. - -Causes and solutions: - -* **MCP orchestrator timeout too short**: Increase orchestrator execution timeout to 60 seconds -* **Complex multi-step operations**: Break down tasks or use the orchestrator tool for better efficiency -* **Slow MCP server responses**: Check MCP server performance and consider caching - -== Next steps - -* xref:integrations:remote-mcp-clients.adoc[] diff --git a/modules/ROOT/partials/integrations/continue-admin.adoc b/modules/ROOT/partials/integrations/continue-admin.adoc deleted file mode 100644 index 91dac29..0000000 --- a/modules/ROOT/partials/integrations/continue-admin.adoc +++ /dev/null @@ -1,717 +0,0 @@ -= Configure AI Gateway for Continue.dev -:description: Configure Redpanda AI Gateway to support Continue.dev clients. -:page-topic-type: how-to -:personas: platform_engineer -:learning-objective-1: Configure AI Gateway endpoints for Continue.dev connectivity -:learning-objective-2: Set up multi-provider backends with native format routing -:learning-objective-3: Deploy MCP tool aggregation for Continue.dev tool discovery - -Configure Redpanda AI Gateway to support Continue.dev clients accessing multiple LLM providers and MCP tools through flexible, native-format endpoints. - -After reading this page, you will be able to: - -* [ ] Configure AI Gateway endpoints for Continue.dev connectivity. -* [ ] Set up multi-provider backends with native format routing. -* [ ] Deploy MCP tool aggregation for Continue.dev tool discovery. - -== Prerequisites - -* AI Gateway deployed on a BYOC environment running Redpanda version 25.3 or later -* Administrator access to the AI Gateway UI -* API keys for at least one LLM provider (Anthropic, OpenAI, or others) -* Understanding of xref:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] - -== About Continue.dev - -Continue.dev is a highly configurable open-source AI coding assistant that integrates with VS Code and JetBrains IDEs. Unlike other AI assistants, Continue.dev uses native provider API formats rather than requiring transforms to a unified format. This architectural choice provides maximum flexibility but requires specific gateway configuration. - -Key characteristics: - -* Uses native provider formats (Anthropic format for Anthropic, OpenAI format for OpenAI) -* Supports multiple LLM providers simultaneously with per-provider configuration -* Custom API endpoints through `apiBase` configuration -* Custom headers through `requestOptions.headers` -* Built-in MCP support for tool discovery and execution -* Autocomplete, chat, and inline edit modes - -== Architecture overview - -Continue.dev connects to AI Gateway differently than unified-format clients: - -* Each provider requires a separate backend configured without format transforms -* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/{provider}` (provider-specific paths) -* MCP endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp` for tool discovery and execution - -The gateway handles: - -. Authentication through bearer tokens in the `Authorization` header -. Provider-specific request formats without transformation -. Model routing using provider-native model identifiers -. MCP server aggregation for multi-tool workflows -. Request logging and cost tracking per gateway - -== Enable LLM providers - -Continue.dev works with multiple providers. Enable the providers your users will access. - -=== Configure Anthropic - -To enable Anthropic with native format support: - -. Navigate to *AI Gateway* > *Providers* in ADP -. Select *Anthropic* from the provider list -. Click *Add configuration* -. Enter your Anthropic API key -. Under *Format*, select *Native Anthropic* (not OpenAI-compatible) -. Click *Save* - -The gateway now accepts Anthropic's native `/v1/messages` format. - -=== Configure OpenAI - -To enable OpenAI: - -. Navigate to *AI Gateway* > *Providers* -. Select *OpenAI* from the provider list -. Click *Add configuration* -. Enter your OpenAI API key -. Under *Format*, select *Native OpenAI* -. Click *Save* - -=== Configure additional providers - -Continue.dev supports many providers. For each provider: - -. Add the provider configuration in the gateway -. Ensure the format is set to the provider's native format -. Do not enable format transforms (Continue.dev handles format differences in its client code) - -Common additional providers: - -* Google Gemini (native Google format) -* Mistral AI (OpenAI-compatible format) -* Together AI (OpenAI-compatible format) -* Ollama (OpenAI-compatible format for local models) - -=== Enable models in the catalog - -After enabling providers, enable specific models: - -. Navigate to *AI Gateway* > *Models* -. Enable the models you want Continue.dev clients to access -+ -Common models for Continue.dev: -+ -* `claude-opus-4.6` (Anthropic, high quality) -* `claude-sonnet-4.5` (Anthropic, balanced) -* `gpt-5.2` (OpenAI, high quality) -* `gpt-5.2-mini` (OpenAI, fast autocomplete) -* `o1-mini` (OpenAI, reasoning) - -. Click *Save* - -Continue.dev uses provider-native model identifiers (for example, `claude-sonnet-4.5` not `anthropic/claude-sonnet-4.5`). - -== Create a gateway for Continue.dev clients - -Create a dedicated gateway to isolate Continue.dev traffic and apply specific policies. - -=== Gateway configuration - -. Navigate to *Agentic* > *AI Gateway* > *Gateways* -. Click *Create Gateway* -. Enter gateway details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`continue-gateway` (or your preferred name) - -|Workspace -|Select the workspace for access control grouping - -|Description -|Gateway for Continue.dev IDE clients -|=== - -. Click *Create* -. Copy the gateway endpoint URL from the gateway details page - -=== Configure provider-specific backends - -Continue.dev requires separate backend configurations for each provider because it uses native formats. - -==== Anthropic backend - -. Navigate to the gateway's *Backends* tab -. Click *Add Backend* -. Configure: -+ -[cols="1,2"] -|=== -|Field |Value - -|Backend name -|`anthropic-native` - -|Provider -|Anthropic - -|Format -|Native Anthropic (no transform) - -|Path -|`/v1/anthropic` - -|Enabled models -|All Anthropic models you enabled in the catalog -|=== - -. Click *Save* - -Continue.dev will send requests to `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/anthropic` using Anthropic's native format. - -==== OpenAI backend - -. Click *Add Backend* -. Configure: -+ -[cols="1,2"] -|=== -|Field |Value - -|Backend name -|`openai-native` - -|Provider -|OpenAI - -|Format -|Native OpenAI (no transform) - -|Path -|`/v1/openai` - -|Enabled models -|All OpenAI models you enabled in the catalog -|=== - -. Click *Save* - -Continue.dev will send requests to `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/openai` using OpenAI's native format. - -==== Additional provider backends - -Repeat the backend configuration process for each provider: - -* Google Gemini: `/v1/google`, native Google format -* Mistral: `/v1/mistral`, OpenAI-compatible format -* Ollama (if proxying local models): `/v1/ollama`, OpenAI-compatible format - -=== Configure provider failover - -For providers with multiple API keys, configure failover: - -. In the backend's provider pool configuration, add multiple provider configurations -. Set failover conditions: -+ -* Rate limits (HTTP 429) -* Timeouts (no response within 30 seconds) -* 5xx errors (provider unavailable) - -. Configure load balancing: Round robin across available keys -. Click *Save* - -Continue.dev requests automatically fail over to healthy API keys when the primary key experiences issues. - -=== Apply rate limits - -Prevent runaway usage from Continue.dev clients: - -. Navigate to the gateway's *Rate Limits* tab -. Configure global limits: -+ -[cols="1,2"] -|=== -|Setting |Recommended Value - -|Global rate limit -|200 requests per minute (Continue.dev autocomplete can generate many requests) - -|Per-user rate limit -|20 requests per minute (if using user identification headers) - -|Per-backend limits -|Vary by provider (autocomplete backends need higher limits) -|=== - -. Click *Save* - -The gateway blocks requests exceeding these limits and returns HTTP 429 errors. - -==== Rate limit considerations for autocomplete - -Continue.dev's autocomplete feature generates frequent, short requests. Configure higher rate limits for autocomplete-specific backends: - -* Autocomplete models (for example, `gpt-5.2-mini`): 100 requests per minute per user -* Chat models (for example, `claude-sonnet-4.5`): 20 requests per minute per user - -=== Set spending limits - -Control LLM costs across all providers: - -. Navigate to the gateway's *Spend Limits* tab -. Configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Monthly budget -|$10,000 (adjust based on expected usage) - -|Enforcement -|Block requests after budget exceeded - -|Alert threshold -|80% of budget (sends notification) -|=== - -. Click *Save* - -The gateway tracks estimated costs per request across all providers and blocks traffic when the monthly budget is exhausted. - -== Configure MCP tool aggregation - -Enable Continue.dev to discover and use tools from multiple MCP servers through a single endpoint. - -=== Add MCP servers - -. Navigate to the gateway's *MCP* tab -. Click *Add MCP Server* -. Enter server details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Display name -|Descriptive name (for example, `redpanda-data-catalog`, `code-search-tools`) - -|Endpoint URL -|MCP server endpoint (for example, xref:integrations:remote-mcp-clients.adoc[Remote MCP server] URL) - -|Authentication -|Bearer token or other authentication mechanism -|=== - -. Click *Save* - -Repeat for each MCP server you want to aggregate. - -=== Enable deferred tool loading - -Reduce token costs for Continue.dev sessions with many available tools: - -. Under *MCP Settings*, enable *Deferred tool loading* -. Click *Save* - -When enabled: - -* Continue.dev initially receives only a search tool and orchestrator tool -* Continue.dev queries for specific tools by name when needed -* Token usage decreases by 80-90% for configurations with many tools - -This is particularly important for Continue.dev because autocomplete and chat modes both use tool discovery. - -=== Add the MCP orchestrator - -The MCP orchestrator reduces multi-step workflows to single calls: - -. Under *MCP Settings*, enable *MCP Orchestrator* -. Configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Orchestrator model -|Select a model with strong code generation capabilities (for example, `claude-sonnet-4.5`) - -|Execution timeout -|30 seconds - -|Backend -|Select the Anthropic backend (orchestrator works best with Claude models) -|=== - -. Click *Save* - -Continue.dev can now invoke the orchestrator tool to execute complex, multi-step operations in a single request. - -== Configure authentication - -Continue.dev clients authenticate using bearer tokens. - -=== Generate API tokens - -. Navigate to *Security* > *API Tokens* in ADP -. Click *Create Token* -. Enter token details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`continue-access` - -|Scopes -|`ai-gateway:read`, `ai-gateway:write` - -|Expiration -|Set appropriate expiration based on security policies -|=== - -. Click *Create* -. Copy the token (it appears only once) - -Distribute this token to Continue.dev users through secure channels. - -=== Token rotation - -Implement token rotation for security: - -. Create a new token before the existing token expires -. Distribute the new token to users -. Monitor usage of the old token in (observability dashboard) -. Revoke the old token after all users have migrated - -== Configure Continue.dev clients - -Provide these instructions to users configuring Continue.dev in their IDE. - -=== Configuration file location - -Continue.dev supports both JSON and YAML configuration formats. This guide uses YAML (`config.yaml`) because it supports MCP server configuration and environment variable interpolation: - -* VS Code: `~/.continue/config.yaml` -* JetBrains: `~/.continue/config.yaml` - -NOTE: While `config.json` is still supported for basic LLM configuration, `config.yaml` is required for MCP server integration. - -=== Multi-provider configuration - -Users configure Continue.dev with separate provider entries for each backend: - -[source,yaml] ----- -models: - - title: Claude Sonnet (Redpanda) - provider: anthropic - model: claude-sonnet-4.5 - apiBase: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/anthropic - apiKey: YOUR_API_TOKEN - - - title: GPT-5.2 (Redpanda) - provider: openai - model: gpt-5.2 - apiBase: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/openai - apiKey: YOUR_API_TOKEN - - - title: GPT-5.2-mini (Autocomplete) - provider: openai - model: gpt-5.2-mini - apiBase: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/openai - apiKey: YOUR_API_TOKEN - -tabAutocompleteModel: - title: GPT-5.2-mini (Autocomplete) - provider: openai - model: gpt-5.2-mini - apiBase: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/openai - apiKey: YOUR_API_TOKEN ----- - -Replace: - -* `{CLUSTER_ID}`: Your Redpanda cluster ID -* `YOUR_API_TOKEN`: The API token generated earlier - -=== MCP server configuration - -Configure Continue.dev to connect to the aggregated MCP endpoint. - -==== Recommended: Directory-based configuration - -The preferred method is to create MCP server configuration files in the `~/.continue/mcpServers/` directory: - -. Create the directory: `mkdir -p ~/.continue/mcpServers` -. Create `~/.continue/mcpServers/redpanda-ai-gateway.yaml`: -+ -[source,yaml] ----- -transport: - type: streamable-http - url: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp - headers: - Authorization: Bearer YOUR_API_TOKEN ----- -+ -IMPORTANT: For production deployments, use environment variable interpolation with `${{ secrets.VARIABLE }}` syntax instead of hardcoding tokens. See xref:ai-gateway/integrations/continue-user.adoc#configure-env-vars[Configure with environment variables] in the user guide for details. - -Continue.dev automatically discovers MCP server configurations in this directory. - -==== Alternative: Inline configuration - -Alternatively, embed MCP server configuration in `~/.continue/config.yaml`: - -[source,yaml] ----- -mcpServers: - - transport: - type: streamable-http - url: https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp - headers: - Authorization: Bearer YOUR_API_TOKEN ----- - -Replace: - -* `{CLUSTER_ID}`: Your Redpanda cluster ID -* `YOUR_API_TOKEN`: The API token generated earlier - -This configuration connects Continue.dev to the aggregated MCP endpoint with authentication headers. - -=== Model selection strategy - -Configure different models for different Continue.dev modes: - -[cols="1,2,1"] -|=== -|Mode |Recommended Model |Reason - -|Chat -|`claude-sonnet-4.5` or `gpt-5.2` -|High quality for complex questions - -|Autocomplete -|`gpt-5.2-mini` -|Fast, cost-effective for frequent requests - -|Inline edit -|`claude-sonnet-4.5` -|Balanced quality and speed for code modifications - -|Embeddings -|`text-embedding-3-small` -|Cost-effective for code search -|=== - -== Monitor Continue.dev usage - -Track Continue.dev activity through gateway observability features. - -=== View request logs - -. Navigate to *AI Gateway* > *Observability* > *Logs* -. Filter by gateway ID: `continue-gateway` -. Review: -+ -* Request timestamps and duration -* Backend and model used per request -* Token usage (prompt and completion tokens) -* Estimated cost per request -* HTTP status codes and errors - -Continue.dev generates different request patterns: - -* Autocomplete: Many short requests with low token counts -* Chat: Longer requests with context and multi-turn conversations -* Inline edit: Medium-length requests with code context - -=== Analyze metrics - -. Navigate to *AI Gateway* > *Observability* > *Metrics* -. Select the Continue.dev gateway -. Review: -+ -[cols="1,2"] -|=== -|Metric |Purpose - -|Request volume by backend -|Identify which providers are most used - -|Token usage by model -|Track consumption patterns (autocomplete vs chat) - -|Estimated spend by backend -|Monitor costs across providers - -|Latency (p50, p95, p99) by backend -|Detect provider-specific performance issues - -|Error rate by backend -|Identify failing providers or misconfigured backends -|=== - - -=== Query logs through API - -Programmatically access logs for integration with monitoring systems: - -[source,bash] ----- -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ - -H "Authorization: Bearer YOUR_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "gateway_id": "GATEWAY_ID", - "start_time": "2026-01-01T00:00:00Z", - "end_time": "2026-01-14T23:59:59Z", - "limit": 100 - }' ----- - -== Security considerations - -Apply these security best practices for Continue.dev deployments. - -=== Limit token scope - -Create tokens with minimal required scopes: - -* `ai-gateway:read`: Required for MCP tool discovery -* `ai-gateway:write`: Required for LLM requests and tool execution - -Avoid granting broader scopes like `admin` or `cluster:write`. - -=== Implement network restrictions - -If Continue.dev clients connect from known networks, configure network policies: - -. Use cloud provider security groups to restrict access to AI Gateway endpoints -. Allowlist only the IP ranges where Continue.dev clients operate -. Monitor for unauthorized access attempts in request logs - -=== Enforce token expiration - -Set short token lifetimes for high-security environments: - -* Development environments: 90 days -* Production environments: 30 days - -Automate token rotation to reduce manual overhead. - -=== Audit tool access - -Review which MCP tools Continue.dev clients can access: - -. Periodically audit the MCP servers configured in the gateway -. Remove unused or deprecated MCP servers -. Monitor tool execution logs for unexpected behavior - -=== Protect API keys in configuration - -Continue.dev stores the API token in plain text in `config.yaml`. Remind users to: - -* Never commit `config.yaml` to version control -* Use file system permissions to restrict access (for example, `chmod 600 ~/.continue/config.yaml`) -* Rotate tokens if they suspect compromise - -== Troubleshooting - -Common issues and solutions when configuring AI Gateway for Continue.dev. - -=== Continue.dev cannot connect to gateway - -Symptom: Connection errors when Continue.dev tries to discover tools or send LLM requests. - -Causes and solutions: - -* **Invalid gateway ID**: Verify the gateway endpoint URL matches the URL from the console -* **Expired token**: Generate a new API token and update the Continue.dev configuration -* **Wrong backend path**: Verify `apiBase` matches the backend path (for example, `/v1/anthropic` not `/v1`) -* **Network connectivity**: Verify the cluster endpoint is accessible from the client network -* **Provider not enabled**: Ensure at least one backend is configured with models enabled - -=== Model not found errors - -Symptom: Continue.dev shows "model not found" or similar errors. - -Causes and solutions: - -* **Model not enabled in catalog**: Enable the model in the gateway's model catalog -* **Model identifier mismatch**: Use provider-native names (for example, `claude-sonnet-4.5` not `anthropic/claude-sonnet-4.5`) -* **Wrong backend for model**: Verify the model is associated with the correct backend (Anthropic models with Anthropic backend) - -=== Format errors or unexpected responses - -Symptom: Responses are malformed or Continue.dev reports format errors. - -Causes and solutions: - -* **Transform enabled on backend**: Ensure backend format is set to native (no OpenAI-compatible transform for Anthropic) -* **Wrong provider for apiBase**: Verify Continue.dev's `provider` field matches the backend's provider -* **Headers not passed**: Confirm `requestOptions.headers` is correctly configured - -=== Autocomplete not working or slow - -Symptom: Autocomplete suggestions don't appear or are delayed. - -Causes and solutions: - -* **Wrong model for autocomplete**: Use a fast model like `gpt-5.2-mini` in `tabAutocompleteModel` -* **Rate limits too restrictive**: Increase rate limits for autocomplete backend -* **High backend latency**: Check backend metrics and consider provider failover -* **Token exhaustion**: Verify spending limits haven't been reached - -=== Tools not appearing in Continue.dev - -Symptom: Continue.dev does not discover MCP tools. - -Causes and solutions: - -* **MCP configuration missing**: Ensure `mcpServers` is configured -* **MCP servers not configured in gateway**: Add MCP server endpoints in the gateway's MCP tab -* **Deferred loading enabled but search failing**: Check that the search tool is correctly configured -* **MCP server authentication failing**: Verify MCP server authentication credentials in the gateway configuration - -=== High costs or token usage - -Symptom: Token usage and costs exceed expectations. - -Causes and solutions: - -* **Autocomplete using expensive model**: Configure `tabAutocompleteModel` to use `gpt-5.2-mini` instead of larger models -* **Deferred tool loading disabled**: Enable deferred tool loading to reduce tokens by 80-90% -* **No rate limits**: Apply per-minute rate limits to prevent runaway usage -* **Missing spending limits**: Set monthly budget limits with blocking enforcement -* **Chat using wrong model**: Route chat requests to cost-effective models (for example, `claude-sonnet-4.5` instead of `claude-opus-4.6`) - -=== Requests failing with 429 errors - -Symptom: Continue.dev receives HTTP 429 Too Many Requests errors. - -Causes and solutions: - -* **Rate limit exceeded**: Review and increase rate limits if usage is legitimate (autocomplete needs higher limits) -* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover to alternate API keys -* **Budget exhausted**: Verify monthly spending limit has not been reached - -=== Different results from different providers - -Symptom: Same prompt produces different results when switching providers. - -This is expected behavior, not a configuration issue: - -* Different models have different capabilities and response styles -* Continue.dev uses native formats, which may include provider-specific parameters -* Users should select the appropriate model for their task (quality vs speed vs cost) - -== Next steps - -* xref:integrations:remote-mcp-clients.adoc[] diff --git a/modules/ROOT/partials/integrations/continue-user.adoc b/modules/ROOT/partials/integrations/continue-user.adoc deleted file mode 100644 index 279f1fe..0000000 --- a/modules/ROOT/partials/integrations/continue-user.adoc +++ /dev/null @@ -1,844 +0,0 @@ -= Configure Continue.dev with AI Gateway -:description: Configure Continue.dev to use Redpanda AI Gateway for unified LLM access, MCP tool integration, and AI-assisted coding. -:page-topic-type: how-to -:personas: agent_builder -:learning-objective-1: Configure Continue.dev to connect to AI Gateway for chat and autocomplete -:learning-objective-2: Set up MCP server integration through AI Gateway -:learning-objective-3: Optimize Continue.dev settings for cost and performance - -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up Continue.dev to route LLM requests and access MCP tools through the gateway's unified endpoints. - -After reading this page, you will be able to: - -* [ ] Configure Continue.dev to connect to AI Gateway for chat and autocomplete. -* [ ] Set up MCP server integration through AI Gateway. -* [ ] Optimize Continue.dev settings for cost and performance. - -== Prerequisites - -Before configuring Continue.dev, ensure you have: - -* Continue.dev extension installed in your code editor: -** VS Code: Search for "Continue" in Extensions -** JetBrains IDEs: Install from the JetBrains Marketplace -* An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) -* Your AI Gateway credentials: -** Gateway endpoint URL (for example, `\https://gw.ai.panda.com`) -** API key with access to the gateway - -== About Continue.dev - -Continue.dev is an open-source AI coding assistant that integrates with VS Code and JetBrains IDEs. It provides: - -* Chat interface for code questions and generation -* Tab autocomplete powered by LLMs -* Codebase indexing for context-aware suggestions -* Slash commands for common workflows -* Extensible architecture with custom context providers - -By routing Continue.dev through AI Gateway, you gain centralized observability, cost controls, and the ability to aggregate multiple MCP servers into a single interface. - -== Configuration files - -Continue.dev supports two configuration file formats: - -* `config.json` (legacy format) -* `config.yaml` (recommended format) - -Both files are stored in the same location: - -* VS Code: `~/.continue/` -* JetBrains: `~/.continue/` - -Create the directory if it doesn't exist: - -[,bash] ----- -mkdir -p ~/.continue ----- - -=== Choose a configuration format - -[cols="1,2,2"] -|=== -|Format |Use when |Limitations - -|`config.json` -|You need basic LLM configuration without MCP servers -|Does not support MCP server configuration or environment variable interpolation - -|`config.yaml` -|You need MCP server integration or environment variable interpolation -|Requires Continue.dev version that supports YAML (recent versions) -|=== - -TIP: Use `config.yaml` for new setups to take advantage of MCP server integration and the `${{ secrets.* }}` environment variable syntax. - -== Basic configuration - -Create or edit `~/.continue/config.json` with the following structure to connect to AI Gateway: - -[,json] ----- -{ - "models": [ - { - "title": "Redpanda AI Gateway - Claude", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "" - } - ] -} ----- - -Replace placeholder values: - -* `YOUR_REDPANDA_API_KEY` - Your Redpanda API key - -The `provider` field tells Continue.dev which SDK to use (Anthropic format), while `apiBase` routes the request through your gateway. The gateway then forwards the request to the appropriate provider based on the model name. - -== Configure multiple models - -Continue.dev can switch between different models for different tasks. Configure multiple models to optimize for quality and cost: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Claude Sonnet (default)", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "" - }, - { - "title": "Gateway - Claude Opus (complex tasks)", - "provider": "anthropic", - "model": "claude-opus-4.6", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "" - }, - { - "title": "Gateway - GPT-5.2", - "provider": "openai", - "model": "gpt-5.2", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "" - } - ] -} ----- - -Switch between models in Continue.dev's chat interface by clicking the model selector dropdown. - -== Configure tab autocomplete - -Continue.dev supports a separate model for tab autocomplete, which generates code suggestions as you type. Use a faster, cost-effective model for autocomplete: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Claude Sonnet", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "" - } - ], - "tabAutocompleteModel": { - "title": "Gateway - Claude Haiku (autocomplete)", - "provider": "anthropic", - "model": "claude-haiku", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "" - } -} ----- - -This configuration uses Claude Sonnet for chat interactions and Claude Haiku for autocomplete. Haiku provides faster responses at lower cost, which is ideal for autocomplete where speed matters more than reasoning depth. - -== Configure with OpenAI provider format - -AI Gateway supports both native provider formats and OpenAI-compatible format. If you prefer using the OpenAI format for all models, configure Continue.dev with the `openai` provider: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Claude Sonnet (OpenAI format)", - "provider": "openai", - "model": "anthropic/claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "/v1" - }, - { - "title": "Gateway - GPT-5.2 (OpenAI format)", - "provider": "openai", - "model": "openai/gpt-5.2", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "/v1" - } - ] -} ----- - -When using OpenAI provider format: - -* Set `provider` to `"openai"` -* Add `/v1` to the `apiBase` URL -* Use the `vendor/model_id` format for model names (for example, `anthropic/claude-sonnet-4.5`) - -== Configure MCP server integration - -Connect Continue.dev to your AI Gateway's MCP endpoint to aggregate tools from multiple MCP servers. - -Add the MCP configuration to `config.yaml`: - -[,yaml] ----- -models: - - title: Gateway - Claude Sonnet - provider: anthropic - model: claude-sonnet-4.5 - apiKey: YOUR_REDPANDA_API_KEY - apiBase: - -mcpServers: - - transport: - type: streamable-http - url: /mcp - headers: - Authorization: Bearer YOUR_REDPANDA_API_KEY ----- - -After adding this configuration: - -. Restart Continue.dev (reload your editor window) -. Click the tools icon in the Continue.dev sidebar -. Verify that tools from your configured MCP servers appear - -If using deferred tool loading in your gateway, you'll see a search tool and MCP orchestrator tool instead of all tools upfront. - -[[configure-env-vars]] -== Configure with environment variables - -For sensitive credentials or multi-environment setups, use Continue.dev's secrets interpolation in `config.yaml`. - -IMPORTANT: Environment variable interpolation is only supported in `config.yaml` files. The `config.json` format does not support any form of variable substitution - all values must be hardcoded. - -[,yaml] ----- -models: - - title: Gateway - Claude Sonnet - provider: anthropic - model: claude-sonnet-4.5 - apiKey: ${{ secrets.REDPANDA_API_KEY }} - apiBase: ${{ secrets.REDPANDA_GATEWAY_URL }} - -mcpServers: - - transport: - type: streamable-http - url: ${{ secrets.REDPANDA_GATEWAY_URL }}/mcp - headers: - Authorization: Bearer ${{ secrets.REDPANDA_API_KEY }} ----- - -IMPORTANT: Continue.dev uses the `${{ secrets.* }}` syntax for interpolation in `config.yaml`. Do not use the `${VAR}` shell syntax - Continue.dev treats it as a literal string rather than performing substitution. - -Set secrets in Continue.dev settings: - -. Open Continue.dev settings in your IDE -. Navigate to the "Secrets" section -. Add the following secrets: -+ -* `REDPANDA_GATEWAY_URL`: Your gateway endpoint URL -* `REDPANDA_API_KEY`: `your-api-key` - -== Project-level configuration - -Override global settings for specific projects by creating `.continuerc.json` in your project root: - -[,json] ----- -{ - "models": [ - { - "title": "Project Gateway - Claude Haiku", - "provider": "anthropic", - "model": "claude-haiku", - "apiKey": "your_project_api_key_here", - "apiBase": "" - } - ] -} ----- - -IMPORTANT: `.continuerc.json` does not support environment variable interpolation. You must hardcode values in this file. For dynamic configuration, use `~/.continue/config.yaml` with `${{ secrets.* }}` syntax (see <>) or create a `~/.continue/config.ts` file for programmatic environment access. - -Project-level configuration takes precedence over global configuration. Use this to: - -* Route different projects through different gateways -* Use cost-effective models for internal projects -* Use premium models for customer-facing projects -* Separate billing between projects - -== Verify configuration - -After configuring Continue.dev, verify it connects correctly to your AI Gateway. - -=== Test chat interface - -. Open Continue.dev sidebar in your editor -. Type a simple question: "What does this function do?" (with a file open) -. Wait for response - -Then verify in the AI Gateway dashboard: - -. Sign in to ADP -. Navigate to your gateway's observability dashboard -. Filter by gateway ID -. Verify: -** Request appears in logs -** Model shows correct format (for example, `claude-sonnet-4.5` for Anthropic native or `anthropic/claude-sonnet-4.5` for OpenAI format) -** Token usage and cost are recorded - -If the request doesn't appear, see <>. - -=== Test tab autocomplete - -. Open a code file in your editor -. Start typing a function or class definition -. Wait for autocomplete suggestions to appear - -Autocomplete requests also appear in the gateway dashboard, typically with: - -* Lower token counts than chat requests -* Higher request frequency -* The autocomplete model you configured - -=== Test MCP tool integration - -If you configured MCP servers: - -. Open Continue.dev chat -. Ask a question that requires a tool: "What's the weather forecast?" -. Continue.dev should: -** Discover the tool from the MCP server -** Invoke it with correct parameters -** Return the result - -Check the gateway dashboard for MCP tool invocation logs. - -== Advanced configuration - -=== Custom request headers - -Add custom headers for request tracking or routing: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Claude Sonnet", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "", - "requestOptions": { - "headers": { - "x-user-id": "developer-123", - "x-project": "main-app" - } - } - } - ] -} ----- - -These headers flow into observability so administrators can track usage and costs per developer or project. - -=== Temperature and max tokens - -Configure model parameters for different behaviors: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Precise (low temperature)", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "", - "completionOptions": { - "temperature": 0.2, - "maxTokens": 2048 - } - }, - { - "title": "Gateway - Creative (high temperature)", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "", - "completionOptions": { - "temperature": 0.8, - "maxTokens": 4096 - } - } - ] -} ----- - -* Lower temperature (0.0-0.3): More deterministic, better for code generation -* Higher temperature (0.7-1.0): More creative, better for brainstorming -* `maxTokens`: Limit response length to control costs - -=== Context providers - -Configure which code context Continue.dev includes in requests: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Claude Sonnet", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "" - } - ], - "contextProviders": [ - { - "name": "code", - "params": { - "maxFiles": 5 - } - }, - { - "name": "diff" - }, - { - "name": "terminal" - } - ] -} ----- - -Available context providers: - -* `code`: Includes open files and highlighted code -* `diff`: Includes git diff of current changes -* `terminal`: Includes recent terminal output -* `problems`: Includes editor warnings and errors -* `folder`: Includes file tree structure - -Limiting context providers reduces token usage and costs. - -=== Slash commands - -Configure custom slash commands for common workflows: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Claude Sonnet", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "apiKey": "YOUR_REDPANDA_API_KEY", - "apiBase": "" - } - ], - "slashCommands": [ - { - "name": "review", - "description": "Review code for bugs and improvements", - "prompt": "Review this code for potential bugs, performance issues, and suggest improvements. Focus on:\n- Error handling\n- Edge cases\n- Code clarity\n\n{{{ input }}}" - }, - { - "name": "test", - "description": "Generate unit tests", - "prompt": "Generate comprehensive unit tests for this code. Include:\n- Happy path tests\n- Edge case tests\n- Error handling tests\n\n{{{ input }}}" - } - ] -} ----- - -Use slash commands in Continue.dev chat: - -* `/review` - Triggers code review prompt -* `/test` - Generates tests - -Custom commands help standardize prompts across teams and reduce token costs by avoiding repetitive instruction typing. - -[[troubleshooting]] -== Troubleshooting - -=== Continue.dev shows connection error - -**Symptom**: Continue.dev displays "Failed to connect" or requests return errors. - -**Causes and solutions**: - -. **Incorrect apiBase URL** -+ -Verify the URL format matches your provider choice: -+ -[,text] ----- -# Anthropic/native format (no /v1) -"apiBase": "" - -# OpenAI format (with /v1) -"apiBase": "/v1" ----- - -. **Provider mismatch** -+ -Ensure the `provider` field matches the API format you're using: -+ -* Native Anthropic: `"provider": "anthropic"` with no `/v1` in URL -* Native OpenAI: `"provider": "openai"` with `/v1` in URL -* OpenAI-compatible: `"provider": "openai"` with `/v1` in URL - -. **Authentication failure** -+ -Verify your API key is valid: -+ -[,bash] ----- -curl -H "Authorization: Bearer YOUR_API_KEY" \ - /v1/models ----- -+ -You should receive a list of available models. If you get `401 Unauthorized`, regenerate your API key in ADP. - -. **Invalid JSON syntax** -+ -Validate your `config.json` file: -+ -[,bash] ----- -python3 -m json.tool ~/.continue/config.json ----- -+ -Fix any syntax errors reported. - -=== Autocomplete not working - -**Symptom**: Tab autocomplete suggestions don't appear or are very slow. - -**Causes and solutions**: - -. **No autocomplete model configured** -+ -Verify `tabAutocompleteModel` is set in `config.json`. If missing, Continue.dev may fall back to chat model, which is slower and more expensive. - -. **Model too slow** -+ -Use a faster model for autocomplete: -+ -[,json] ----- -{ - "tabAutocompleteModel": { - "title": "Gateway - Claude Haiku", - "provider": "anthropic", - "model": "claude-haiku", - "apiKey": "YOUR_API_KEY", - "apiBase": "" - } -} ----- - -. **Network latency** -+ -Check gateway latency in the observability dashboard. If p95 latency is over 500ms, autocomplete will feel slow. Consider: -+ -* Using a gateway in a closer geographic region -* Switching to a faster model (Haiku over Sonnet) - -. **Autocomplete disabled** -+ -Check Continue.dev settings in your editor: -+ -* VS Code: Settings → Continue → Enable Tab Autocomplete -* JetBrains: Settings → Tools → Continue → Enable Autocomplete - -=== MCP tools not appearing - -**Symptom**: Continue.dev doesn't show tools from the MCP server. - -**Causes and solutions**: - -. **MCP configuration missing** -+ -Verify the `mcpServers` section exists in `config.yaml`. - -. **Incorrect MCP endpoint** -+ -The MCP URL should be `{gateway-url}/mcp`: -+ -[,text] ----- -# Correct -"url": "/mcp" - -# Incorrect -"url": "" ----- - -. **No MCP servers in gateway** -+ -Verify your gateway has at least one MCP server configured in the AI Gateway UI. - -. **Deferred tool loading enabled** -+ -If deferred tool loading is enabled, you'll see only a search tool initially. This is expected behavior. - -. **Editor restart needed** -+ -MCP configuration changes require reloading the editor window: -+ -* VS Code: Command Palette → Developer: Reload Window -* JetBrains: File → Invalidate Caches / Restart - -=== Requests not appearing in gateway dashboard - -**Symptom**: Continue.dev works, but requests don't appear in the AI Gateway observability dashboard. - -**Causes and solutions**: - -. **Wrong gateway endpoint** -+ -Verify that the `apiBase` URL matches the gateway endpoint you're viewing in the dashboard. - -. **Using direct provider connection** -+ -If `apiBase` points directly to a provider (for example, `https://api.anthropic.com`), requests won't route through the gateway. Verify it points to your gateway endpoint. - -. **Log ingestion delay** -+ -Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. - -=== High token costs - -**Symptom**: Continue.dev uses more tokens than expected, resulting in high costs. - -**Causes and solutions**: - -. **Too much context included** -+ -Continue.dev may be including too many files. Solutions: -+ -* Limit `maxFiles` in context providers -* Use `.continueignore` file to exclude unnecessary directories -* Close unused editor tabs before using Continue.dev - -. **Autocomplete using expensive model** -+ -Verify you're using a cost-effective model for autocomplete: -+ -[,json] ----- -{ - "tabAutocompleteModel": { - "provider": "anthropic", - "model": "claude-haiku" - } -} ----- - -. **Model parameters too high** -+ -Reduce `maxTokens` in `completionOptions` to limit response length: -+ -[,json] ----- -{ - "completionOptions": { - "maxTokens": 2048 - } -} ----- - -. **MCP overhead** -+ -If not using deferred tool loading, all tools load with every request. Enable deferred tool loading in your AI Gateway configuration (see xref:ai-gateway:aggregation.adoc[]). - -=== Configuration changes not taking effect - -**Symptom**: Changes to `config.json` don't apply. - -**Solutions**: - -. **Reload editor window** -+ -Configuration changes require reloading: -+ -* VS Code: Command Palette → Developer: Reload Window -* JetBrains: File → Invalidate Caches / Restart - -. **Invalid JSON syntax** -+ -Validate JSON syntax: -+ -[,bash] ----- -python3 -m json.tool ~/.continue/config.json ----- - -. **Project config overriding** -+ -Check if `.continuerc.json` in your project root overrides global settings. - -. **File permissions** -+ -Verify Continue.dev can read the config file: -+ -[,bash] ----- -ls -la ~/.continue/config.json ----- -+ -Fix permissions if needed: -+ -[,bash] ----- -chmod 600 ~/.continue/config.json ----- - -== Cost optimization tips - -=== Use different models for chat and autocomplete - -Chat interactions benefit from reasoning depth, while autocomplete needs speed: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Claude Sonnet", - "provider": "anthropic", - "model": "claude-sonnet-4.5" - } - ], - "tabAutocompleteModel": { - "title": "Gateway - Claude Haiku", - "provider": "anthropic", - "model": "claude-haiku" - } -} ----- - -This can reduce costs by 5-10x for autocomplete while maintaining quality for chat. - -=== Limit context window size - -Reduce the amount of code included in requests: - -Create `.continueignore` in your project root: - -[,text] ----- -# Exclude build artifacts -dist/ -build/ -node_modules/ - -# Exclude tests when not working on tests -**/*.test.* -**/*.spec.* - -# Exclude documentation -docs/ -*.md - -# Exclude large data files -*.json -*.csv ----- - -Then limit files in `config.json`: - -[,json] ----- -{ - "contextProviders": [ - { - "name": "code", - "params": { - "maxFiles": 3 - } - } - ] -} ----- - -=== Use MCP tools for documentation - -Instead of pasting documentation into chat, create MCP tools that fetch relevant sections on-demand. This reduces token costs by including only needed information. - -=== Monitor usage patterns - -Use the AI Gateway dashboard to identify optimization opportunities: - -. Navigate to your gateway's observability dashboard -. Filter by Continue.dev requests (use custom header if configured) -. Analyze: -** Token usage per request type (chat vs autocomplete) -** Most expensive queries -** High-frequency low-value requests - -=== Set model-specific limits - -Prevent runaway costs by configuring `maxTokens`: - -[,json] ----- -{ - "models": [ - { - "title": "Gateway - Claude Sonnet", - "provider": "anthropic", - "model": "claude-sonnet-4.5", - "completionOptions": { - "maxTokens": 2048 - } - } - ], - "tabAutocompleteModel": { - "completionOptions": { - "maxTokens": 256 - } - } -} ----- - -Autocomplete rarely needs more than 256 tokens, while chat responses can vary. - -== Next steps - -* xref:ai-gateway:aggregation.adoc[] - -== Related pages - -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway -* xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits -* xref:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway -* xref:ai-gateway/integrations/cline-user.adoc[]: Configure Cline with AI Gateway diff --git a/modules/ROOT/partials/integrations/github-copilot-user.adoc b/modules/ROOT/partials/integrations/github-copilot-user.adoc deleted file mode 100644 index 02d6054..0000000 --- a/modules/ROOT/partials/integrations/github-copilot-user.adoc +++ /dev/null @@ -1,876 +0,0 @@ -= Configure GitHub Copilot with AI Gateway -:description: Configure GitHub Copilot to use Redpanda AI Gateway for unified LLM access and custom provider management. -:page-topic-type: how-to -:personas: agent_builder -:learning-objective-1: Configure GitHub Copilot in VS Code and JetBrains IDEs to route requests through AI Gateway -:learning-objective-2: Set up multi-tenancy with per-team gateways for cost tracking -:learning-objective-3: Configure enterprise BYOK deployments for team-wide Copilot access - -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up GitHub Copilot to route LLM requests through the gateway for centralized observability, cost management, and provider flexibility. - -After reading this page, you will be able to: - -* [ ] Configure GitHub Copilot in VS Code and JetBrains IDEs to route requests through AI Gateway. -* [ ] Set up multi-tenancy with per-team gateways for cost tracking. -* [ ] Configure enterprise BYOK deployments for team-wide Copilot access. - -== Prerequisites - -Before configuring GitHub Copilot, ensure you have: - -* GitHub Copilot subscription (Individual, Business, or Enterprise) -* An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) -* Your AI Gateway credentials: -** Gateway endpoint URL (for example, `https://gw.ai.panda.com`) -** Gateway ID (for example, `gateway-abc123`) -** API key with access to the gateway -* Your IDE: -** VS Code with GitHub Copilot extension installed -** Or JetBrains IDE (IntelliJ IDEA, PyCharm, and so on) with GitHub Copilot plugin - -== About GitHub Copilot and AI Gateway - -GitHub Copilot provides AI-powered code completion and chat within your IDE. By default, Copilot routes requests directly to GitHub's infrastructure, which uses OpenAI and other LLM providers. - -When you route Copilot through AI Gateway, you gain: - -* Centralized observability across all Copilot usage -* Cost attribution per developer, team, or project -* Provider flexibility (use your own API keys or alternative models) -* Policy enforcement (rate limits, spend controls) -* Multi-tenancy support for enterprise deployments - -== Configuration approaches - -GitHub Copilot supports different configuration approaches depending on your IDE and subscription tier: - -[cols="1,2,2,1"] -|=== -|IDE |Method |Subscription Tier |Complexity - -|VS Code -|Custom OpenAI models -|Individual, Business, Enterprise -|Medium - -|VS Code -|OAI Compatible Provider extension -|Individual, Business, Enterprise -|Low - -|JetBrains -|Enterprise BYOK -|Enterprise -|Low -|=== - -Choose the approach that matches your environment. VS Code users have multiple options, while JetBrains users need GitHub Copilot Enterprise with BYOK support. - -== Configure in VS Code - -VS Code offers two approaches for routing Copilot through AI Gateway: - -. Custom OpenAI models (manual configuration) -. OAI Compatible Provider extension (simplified) - -=== Option 1: Custom OpenAI models - -This approach configures VS Code to recognize your AI Gateway as a custom OpenAI-compatible provider. - -==== Configure custom models - -. Open VS Code Settings: -** macOS: `Cmd+,` -** Windows/Linux: `Ctrl+,` -. Search for `github.copilot.chat.customOAIModels` -. Click *Edit in settings.json* -. Add the following configuration: - -[,json] ----- -{ - "github.copilot.chat.customOAIModels": [ - { - "id": "anthropic/claude-sonnet-4.5", - "name": "Claude Sonnet 4.5 (Gateway)", - "endpoint": "https://gw.ai.panda.com/v1", - "provider": "redpanda-gateway" - }, - { - "id": "openai/gpt-5.2", - "name": "GPT-5.2 (Gateway)", - "endpoint": "https://gw.ai.panda.com/v1", - "provider": "redpanda-gateway" - } - ] -} ----- - -Replace `https://gw.ai.panda.com/v1` with your gateway endpoint. - -IMPORTANT: This experimental feature requires configuring API keys and custom headers through the Copilot Chat UI, not in `settings.json`. - -==== Configure API key and headers through Copilot Chat UI - -. Open Copilot Chat in VS Code (`Cmd+I` or `Ctrl+I`) -. Click the model selector dropdown -. Click *Manage Models* at the bottom of the dropdown -. Click *Add Model* -. Select your configured provider ("redpanda-gateway") -. Enter the connection details: -** *Base URL*: `https://gw.ai.panda.com/v1` (should match your settings.json endpoint) -** *API Key*: Your Redpanda API key -. Click *Save* - -==== Select model - -. Open Copilot chat with `Cmd+I` (macOS) or `Ctrl+I` (Windows/Linux) -. Click the model selector dropdown -. Choose a model from the "redpanda-gateway" provider - -=== Option 2: OAI Compatible Provider extension - -The OAI Compatible Provider extension provides enhanced support for OpenAI-compatible endpoints with custom headers. - -==== Install extension - -. Open VS Code Extensions (`Cmd+Shift+X` or `Ctrl+Shift+X`) -. Search for "OAI Compatible Provider" -. Click *Install* - -==== Configure base URL in settings - -Add the base URL configuration in VS Code settings: - -. Open VS Code Settings (`Cmd+,` or `Ctrl+,`) -. Search for `oaicopilot` -. Click *Edit in settings.json* -. Add the following: - -[,json] ----- -{ - "oaicopilot.baseUrl": "https://gw.ai.panda.com/v1", - "oaicopilot.models": [ - "anthropic/claude-sonnet-4.5", - "openai/gpt-5.2", - "openai/gpt-5.2-mini" - ] -} ----- - -Replace `https://gw.ai.panda.com/v1` with your gateway endpoint. - -==== Configure API key and headers through Copilot Chat UI - -IMPORTANT: Do not configure API keys or custom headers in `settings.json`. Use the Copilot Chat UI instead. - -. Open Copilot Chat in VS Code (`Cmd+I` or `Ctrl+I`) -. Click the model selector dropdown -. Click *Manage Models* -. Find the OAI Compatible Provider in the list -. Click *Configure* or *Edit* -. Enter the connection details: -** *API Key*: Your Redpanda API key -. Click *Save* - -==== Select model - -. Open Copilot chat with `Cmd+I` (macOS) or `Ctrl+I` (Windows/Linux) -. Click the model selector dropdown -. Choose a model from the OAI Compatible Provider - -== Configure in JetBrains IDEs - -JetBrains IDE integration requires GitHub Copilot Enterprise with Bring Your Own Key (BYOK) support. - -=== Prerequisites - -* GitHub Copilot Enterprise subscription -* BYOK enabled for your organization -* JetBrains IDE 2024.1 or later -* GitHub Copilot plugin version 1.4.0 or later - -=== Configure BYOK with AI Gateway - -. Open your JetBrains IDE (IntelliJ IDEA, PyCharm, and so on) -. Navigate to *Settings/Preferences*: -** macOS: `Cmd+,` -** Windows/Linux: `Ctrl+Alt+S` -. Go to *Tools* > *GitHub Copilot* -. Under *Advanced Settings*, find *Custom Model Configuration* -. Configure the OpenAI-compatible endpoint: - -[,text] ----- -Base URL: https://gw.ai.panda.com/v1 -API Key: your-redpanda-api-key ----- - -Replace placeholder values: - -* `https://gw.ai.panda.com/v1` - Your gateway endpoint -* `your-redpanda-api-key` - Your Redpanda API key - -=== Configure model selection - -In the GitHub Copilot settings: - -. Expand *Model Selection* -. Choose your preferred models from the AI Gateway: -** Chat model: `anthropic/claude-sonnet-4.5` or `openai/gpt-5.2` -** Code completion model: `openai/gpt-5.2-mini` (faster, cost-effective) - -Model format uses `vendor/model_id` pattern to route through the gateway to the appropriate provider. - -=== Test configuration - -. Open a code file -. Trigger code completion (start typing) -. Or open Copilot chat: -** Right-click > *Copilot* > *Open Chat* -** Or use shortcut: `Cmd+Shift+C` (macOS) or `Ctrl+Shift+C` (Windows/Linux) -. Verify suggestions appear - -Check the AI Gateway dashboard to confirm requests are logged. - -== Multi-tenancy configuration - -For organizations with multiple teams or projects sharing AI Gateway, use separate gateways to track usage per team. - -=== Approach 1: One gateway per team - -Create separate gateways for each team: - -* Team A Gateway: ID `team-a-gateway-123` -* Team B Gateway: ID `team-b-gateway-456` - -Each team configures their IDE with their team's gateway endpoint URL, which includes the gateway ID in the path. - -Benefits: - -* Isolated cost tracking per team -* Team-specific rate limits and budgets -* Separate observability dashboards - -=== Approach 2: Shared gateway with custom headers - -Use a single gateway with custom headers for attribution: - -[,json] ----- -{ - "oai.provider.headers": { - "x-team": "backend-team", - "x-project": "api-service" - } -} ----- - -Benefits: - -* Single gateway to manage -* Flexible cost attribution - -Filter observability dashboard by `x-team` or `x-project` headers to generate team-specific reports. - -=== Approach 3: Environment-based gateways - -Separate development, staging, and production environments: - -[,json] ----- -{ - "oai.provider.headers": { - "x-environment": "${env:ENVIRONMENT}" - } -} ----- - -Set environment variables per workspace: - -[,bash] ----- -# Development workspace -export ENVIRONMENT="development" - -# Production workspace -export ENVIRONMENT="production" ----- - -Benefits: - -* Prevent development usage from affecting production metrics -* Different rate limits and budgets per environment -* Environment-specific model access policies - -== Enterprise BYOK at scale - -For large organizations deploying GitHub Copilot Enterprise with AI Gateway across hundreds or thousands of developers. - -=== Centralized configuration management - -Distribute IDE configuration files through: - -* **Git repository**: Store `settings.json` or IDE configuration in a shared repository -* **Configuration management tools**: Puppet, Chef, Ansible -* **Group Policy** (Windows environments) -* **MDM solutions** (macOS environments) - -Example centralized configuration: - -[,json] ----- -{ - "oai.provider.endpoint": "https://gw.company.com/v1", - "oai.provider.apiKey": "${env:COPILOT_GATEWAY_KEY}", - "oai.provider.headers": { - "x-user-email": "${env:USER_EMAIL}", - "x-department": "${env:DEPARTMENT}" - } -} ----- - -Developers set environment variables locally or receive them from identity management systems. - -=== API key management - -**Option 1: Individual API keys** - -Each developer gets their own Redpanda API key: - -* Tied to their identity (email, employee ID) -* Revocable when they leave the organization -* Enables per-developer cost attribution - -**Option 2: Team API keys** - -Teams share API keys: - -* Simpler key management -* Cost attribution by team, not individual -* Use custom headers for finer-grained tracking - -**Option 3: Service account keys** - -Single key for all developers: - -* Simplest to deploy -* No per-developer tracking -* Use custom headers for all attribution - -=== Automated provisioning workflow - -. Developer joins organization -. Identity system (Okta, Azure AD, and so on) triggers provisioning: -.. Create Redpanda API key -.. Assign to appropriate gateway -.. Generate IDE configuration file with embedded keys -.. Distribute to developer workstation -. Developer installs IDE and GitHub Copilot -. Configuration auto-applies (through MDM or configuration management) -. Developer starts using Copilot immediately - -=== Observability and governance - -Track usage across the organization: - -. Navigate to AI Gateway dashboard -. Filter by custom headers: -** `x-department`: View costs per department -** `x-user-email`: Track usage per developer -** `x-project`: Attribute costs to specific projects -. Generate reports: -** Top 10 users by token usage -** Departments exceeding budget -** Projects using deprecated models -. Set alerts: -** Individual developer exceeds threshold (potential misuse) -** Department budget approaching limit -** Unusual request patterns (security concern) - -== Verify configuration - -After configuring GitHub Copilot, verify it routes requests through your AI Gateway. - -=== Test code completion - -. Open a code file in your IDE -. Start typing a function definition -. Wait for code completion suggestions to appear - -Completion requests appear in the gateway dashboard with: - -* Low token counts (typically 50-200 tokens) -* High request frequency (as you type) -* The completion model you configured - -=== Test chat interface - -. Open Copilot chat: -** VS Code: `Cmd+I` (macOS) or `Ctrl+I` (Windows/Linux) -** JetBrains: Right-click > *Copilot* > *Open Chat* -. Ask a simple question: "Explain this function" -. Wait for response - -Chat requests appear in the gateway dashboard with: - -* Higher token counts (500-2000 tokens typical) -* The chat model you configured -* Response status (200 for success) - -=== Verify in dashboard - -. Sign in to ADP -. Navigate to your gateway's observability dashboard -. Filter by gateway ID -. Verify: -** Requests appear in logs -** Models show correct format (for example, `anthropic/claude-sonnet-4.5`) -** Token usage and cost are recorded -** Custom headers appear (if configured) - -If requests don't appear, see <>. - -== Advanced configuration - -=== Model-specific settings - -Configure different models for different tasks: - -[,json] ----- -{ - "oai.provider.models": [ - { - "id": "anthropic/claude-sonnet-4.5", - "name": "Claude Sonnet (chat)", - "type": "chat", - "temperature": 0.7, - "maxTokens": 4096 - }, - { - "id": "openai/gpt-5.2-mini", - "name": "GPT-5.2 Mini (completion)", - "type": "completion", - "temperature": 0.2, - "maxTokens": 512 - } - ] -} ----- - -Settings explained: - -* Chat uses Claude Sonnet with higher temperature for creative responses -* Completion uses GPT-5.2 Mini with lower temperature for deterministic code -* Chat allows longer responses, completion limits tokens for speed - -=== Workspace-specific configuration - -Override global settings for specific projects using workspace settings. - -In VS Code, create `.vscode/settings.json` in your project root: - -[,json] ----- -{ - "oai.provider.headers": { - "x-project": "customer-portal" - } -} ----- - -Benefits: - -* Route different projects through different gateways -* Track costs per project -* Use different models per project (cost-effective for internal, premium for customer-facing) - -=== Custom request timeouts - -Configure timeout for AI Gateway requests: - -[,json] ----- -{ - "oai.provider.timeout": 30000 -} ----- - -Timeout is in milliseconds. Default is typically 30000 (30 seconds). - -Increase timeouts for: - -* High-latency network environments -* Complex code generation tasks -* Large file context - -=== Debug mode - -Enable debug logging to troubleshoot issues: - -[,json] ----- -{ - "oai.provider.debug": true, - "github.copilot.advanced": { - "debug": true - } -} ----- - -View debug logs: - -* VS Code: Developer Console (`Help` > `Toggle Developer Tools` > `Console` tab) -* JetBrains: `Help` > `Diagnostic Tools` > `Debug Log Settings` > Add `github.copilot` - -Debug mode shows: - -* HTTP request and response headers -* Model selection decisions -* Token usage calculations -* Error details - -[[troubleshooting]] -== Troubleshooting - -=== Copilot shows no suggestions - -**Symptom**: Code completion doesn't work or Copilot shows "No suggestions available". - -**Causes and solutions**: - -. **Configuration not loaded** -+ -Reload your IDE to apply configuration changes: -+ -* VS Code: Command Palette > "Developer: Reload Window" -* JetBrains: File > Invalidate Caches / Restart - -. **Incorrect endpoint URL** -+ -Verify the URL format includes `/v1` at the end: -+ -[,text] ----- -# Correct -https://gw.ai.panda.com/v1 - -# Incorrect -https://gw.ai.panda.com ----- - -. **Authentication failure** -+ -Verify your API key is valid: -+ -[,bash] ----- -curl -H "Authorization: Bearer YOUR_API_KEY" \ - https://gw.ai.panda.com/v1/models ----- -+ -You should receive a list of available models. If you get `401 Unauthorized`, regenerate your API key in ADP. - -. **Extension/plugin disabled** -+ -Verify GitHub Copilot is enabled: -+ -* VS Code: Extensions view > GitHub Copilot > Ensure "Enabled" -* JetBrains: Settings > Plugins > GitHub Copilot > Check "Enabled" - -. **Network connectivity issues** -+ -Test connectivity to the gateway: -+ -[,bash] ----- -curl -I https://gw.ai.panda.com/v1 ----- -+ -If this times out, check your network configuration, firewall rules, or VPN connection. - -=== Requests not appearing in gateway dashboard - -**Symptom**: Copilot works, but requests don't appear in the AI Gateway observability dashboard. - -**Causes and solutions**: - -. **Wrong gateway ID** -+ -Verify the gateway ID in your endpoint URL matches the gateway you're viewing in the dashboard (case-sensitive). - -. **Using direct GitHub connection** -+ -If the endpoint configuration is missing or incorrect, Copilot may route directly to GitHub instead of your gateway. Verify endpoint configuration. - -. **Log ingestion delay** -+ -Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. - -. **Environment variable not set** -+ -If using environment variables like `${env:REDPANDA_API_KEY}`, verify they're set before launching the IDE: -+ -[,bash] ----- -echo $REDPANDA_API_KEY # Should print your API key ----- - -=== High latency or slow suggestions - -**Symptom**: Code completion is slow or chat responses take a long time. - -**Causes and solutions**: - -. **Gateway geographic distance** -+ -If your gateway is in a different region than you or the upstream provider, this adds network latency. Check gateway region in ADP. - -. **Slow model for completion** -+ -Use a faster model for code completion: -+ -[,json] ----- -{ - "oai.provider.models": [ - { - "id": "openai/gpt-5.2-mini", - "type": "completion" - } - ] -} ----- -+ -Models like GPT-5.2 Mini or Claude Haiku provide faster responses ideal for code completion. - -. **Provider pool failover** -+ -If your gateway is configured with fallback providers, check the logs to see if requests are failing over. Failover adds latency. - -. **Rate limiting** -+ -If you're hitting rate limits, the gateway may be queuing requests. Check the observability dashboard for rate limit metrics. - -. **Token limit too high** -+ -Reduce `maxTokens` for completion models to improve speed: -+ -[,json] ----- -{ - "oai.provider.models": [ - { - "id": "openai/gpt-5.2-mini", - "type": "completion", - "maxTokens": 256 - } - ] -} ----- - -=== Custom headers not being sent - -**Symptom**: Custom headers (like `x-team` or `x-project`) don't appear in gateway logs. - -**Causes and solutions**: - -. **Extension not installed (VS Code)** -+ -Custom headers require the OAI Compatible Provider extension in VS Code. Install it from the Extensions marketplace. - -. **Header configuration location** -+ -Ensure headers are in the correct configuration section: -+ -[,json] ----- -{ - "oai.provider.headers": { - "x-custom": "value" - } -} ----- -+ -Not: -+ -[,json] ----- -{ - "github.copilot.advanced": { - "headers": { // Wrong location - "x-custom": "value" - } - } -} ----- - -. **Environment variable not expanded** -+ -If using `${env:VAR_NAME}` syntax, verify the environment variable is set before launching the IDE. - -=== Model not recognized - -**Symptom**: Error message "Model not found" or "Invalid model ID". - -**Causes and solutions**: - -. **Incorrect model format** -+ -Ensure model names use the `vendor/model_id` format: -+ -[,text] ----- -# Correct -anthropic/claude-sonnet-4.5 -openai/gpt-5.2 - -# Incorrect -claude-sonnet-4.5 -gpt-5.2 ----- - -. **Model not enabled in gateway** -+ -Verify the model is enabled in your AI Gateway configuration: -+ -.. Sign in to ADP -.. Navigate to your gateway -.. Check enabled providers and models - -. **Typo in model ID** -+ -Double-check the model ID matches exactly (case-sensitive). Copy from the AI Gateway UI rather than typing manually. - -=== Configuration changes not taking effect - -**Symptom**: Changes to settings don't apply. - -**Solutions**: - -. **Reload IDE** -+ -Configuration changes require reloading: -+ -* VS Code: Command Palette > "Developer: Reload Window" -* JetBrains: File > Invalidate Caches / Restart - -. **Invalid JSON syntax** -+ -Validate your `settings.json` file: -+ -[,bash] ----- -python3 -m json.tool ~/.config/Code/User/settings.json ----- -+ -Fix any syntax errors reported. - -. **Workspace settings override** -+ -Check if `.vscode/settings.json` in your project root overrides global settings. Workspace settings take precedence over global settings. - -. **File permissions** -+ -Verify the IDE can read the configuration file: -+ -[,bash] ----- -ls -la ~/.config/Code/User/settings.json ----- -+ -Fix permissions if needed: -+ -[,bash] ----- -chmod 600 ~/.config/Code/User/settings.json ----- - -== Cost optimization tips - -=== Use different models for chat and completion - -Code completion needs speed, while chat benefits from reasoning depth: - -[,json] ----- -{ - "oai.provider.models": [ - { - "id": "anthropic/claude-sonnet-4.5", - "type": "chat" - }, - { - "id": "openai/gpt-5.2-mini", - "type": "completion" - } - ] -} ----- - -This can reduce costs by 5-10x for code completion while maintaining chat quality. - -=== Limit token usage - -Reduce maximum tokens for completion to prevent runaway costs: - -[,json] ----- -{ - "oai.provider.models": [ - { - "id": "openai/gpt-5.2-mini", - "type": "completion", - "maxTokens": 256 - } - ] -} ----- - -Code completion rarely needs more than 256 tokens. - -=== Monitor usage patterns - -Use the AI Gateway dashboard to identify optimization opportunities: - -. Navigate to your gateway's observability dashboard -. Filter by custom headers (for example, `x-team`, `x-user-email`) -. Analyze: -** Token usage per developer or team -** Most expensive queries -** High-frequency low-value requests - -=== Set team-based budgets - -Use separate gateways per team to track and enforce budgets. Configure alerts in the dashboard when teams approach their limits. - -=== Track costs per project - -Use custom headers to attribute costs: - -[,json] ----- -{ - "oai.provider.headers": { - "x-project": "mobile-app" - } -} ----- - -Generate project-specific cost reports from the gateway dashboard. - -== Next steps - -* xref:ai-gateway:aggregation.adoc[] - -== Related pages - -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway -* xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits -* xref:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway -* xref:ai-gateway/integrations/continue-user.adoc[]: Configure Continue.dev with AI Gateway -* xref:ai-gateway/integrations/cursor-user.adoc[]: Configure Cursor IDE with AI Gateway diff --git a/modules/ROOT/partials/migration-guide.adoc b/modules/ROOT/partials/migration-guide.adoc index 4719e83..5d6ca1d 100644 --- a/modules/ROOT/partials/migration-guide.adoc +++ b/modules/ROOT/partials/migration-guide.adoc @@ -869,4 +869,4 @@ A/B testing == Next steps -* xref:ai-gateway:aggregation.adoc[] +* xref:gateway:aggregation.adoc[] diff --git a/modules/ROOT/partials/observability-metrics.adoc b/modules/ROOT/partials/observability-metrics.adoc deleted file mode 100644 index 258207c..0000000 --- a/modules/ROOT/partials/observability-metrics.adoc +++ /dev/null @@ -1,858 +0,0 @@ -= Observability: Metrics and Analytics -:description: Guide to AI Gateway metrics and analytics, including where to find metrics, key metrics explained, dashboard views, filtering/grouping, alerting, exporting, common analysis tasks, retention, API access, best practices, and troubleshooting. -:page-topic-type: reference -:personas: platform_engineer, pilot_lead -:learning-objective-1: Monitor aggregate metrics to track usage patterns and budget adherence -:learning-objective-2: Compare model and provider performance using latency and cost metrics -:learning-objective-3: Configure alerts for budget thresholds and performance degradation - -AI Gateway provides aggregate metrics and analytics dashboards to help you understand usage patterns, costs, performance, and errors across all your LLM traffic. - -== Before you begin - -* You have an active AI Gateway with at least one request processed. -* You have access to ADP. -* You have the appropriate permissions to view gateway metrics. - -Use metrics for: - -* Cost tracking and budget management -* Usage trends over time -* Performance monitoring (latency, error rates) -* Capacity planning -* Model/provider comparison - -Use logs for: Debugging specific requests, viewing full prompts/responses. See xref:observability:logs.adoc[]. - -== Where to find metrics - -1. Navigate to analytics dashboard: - * In the sidebar, navigate to *Agentic* > *AI Gateway* > *Gateways* > *{gateway-name}*, then select the *Analytics* tab. - * Or: Gateway detail page -> Analytics tab - -2. Select gateway (optional): - * View all gateways (org-wide metrics) - * Or filter to specific gateway - -3. Set time range: - * Default: Last 7 days - * Options: Last 24 hours, 7 days, 30 days, 90 days, Custom - * // PLACEHOLDER: screenshot of time range picker - -== Key metrics - -=== Request volume - -What it shows: Total number of requests over time - -// PLACEHOLDER: Screenshot of request volume graph - -Graph type: Time series line chart - -Filters: - -* By gateway -* By model -* By provider -* By status (success/error) - -Use cases: - -* Identify usage patterns (peak hours, days of week) -* Detect traffic spikes or drops -* Capacity planning - -Example insights: - -* "Traffic doubles every Monday morning at 9am" → Scale infrastructure -* "Staging gateway has more traffic than prod" → Investigate runaway testing - -=== Token usage - -What it shows: Prompt, completion, and total tokens consumed - -// PLACEHOLDER: Screenshot of token usage graph - -Graph type: Stacked area chart (prompt vs completion tokens) - -Metrics: - -* Total tokens -* Prompt tokens (input) -* Completion tokens (output) -* Tokens per request (average) - -Breakdowns: - -* By gateway -* By model -* By provider - -Use cases: - -* Understand cost drivers (prompt vs completion tokens) -* Identify verbose prompts or responses -* Optimize token usage - -Example insights: - -* "90% of tokens are completion tokens" → Responses are verbose, optimize max_tokens -* "Staging uses 10x more tokens than prod" → Investigate test suite - -=== Estimated spend - -What it shows: Calculated cost based on token usage and public pricing - -// PLACEHOLDER: Screenshot of cost tracking dashboard - -Graph type: Time series line chart with cost breakdown - -Metrics: - -* Total estimated spend -* Spend by model -* Spend by provider -* Spend by gateway -* Cost per 1K requests -* Cost per 1M tokens - -Breakdowns: - -* By gateway (for chargeback/showback) -* By model (for cost optimization) -* By provider (for negotiation leverage) -* By custom header (if configured, for example, `x-customer-id`) - -Use cases: - -* Budget tracking ("Are we staying under $50K/month?") -* Cost attribution ("Which team spent the most?") -* Model comparison ("Is Claude cheaper than GPT-4 for our use case?") -* Forecasting ("At this rate, we'll spend $X next month") - -Important notes: - -* *Estimates based on public pricing* (may differ from your contract) -* *Not a substitute for provider invoices* (use for approximation only) -* Update frequency: // PLACEHOLDER: Real-time? Hourly? Daily? - -Example insights: - -* "Customer A accounts for 60% of spend" → Consider rate limits or tiered pricing -* "GPT-5.2 is 3x more expensive than Claude Sonnet for similar quality" → Optimize routing - -=== Latency - -What it shows: Request duration from gateway to provider and back - -// PLACEHOLDER: Screenshot of latency histogram - -Metrics: - -* p50 (median) latency -* p95 latency -* p99 latency -* Min/max latency -* Average latency - -Breakdowns: - -* By gateway -* By model -* By provider -* By token range (longer responses = higher latency) - -Use cases: - -* Identify slow models or providers -* Set SLO targets (for example, "p95 < 2 seconds") -* Detect performance regressions - -Example insights: - -* "GPT-5.2 p99 latency spiked to 10 seconds yesterday" → Investigate provider issue -* "Claude Sonnet is 30% faster than GPT-5.2 for same prompts" → Optimize for latency - -Latency components (if available): - -// PLACEHOLDER: Does gateway show latency breakdown? -* Gateway processing time -* Provider API time -* Network time - -=== Error rate - -What it shows: Percentage of failed requests over time - -// PLACEHOLDER: Screenshot of error rate graph - -Metrics: - -* Total error rate (%) -* Errors by status code (400, 401, 429, 500, and so on) -* Errors by model -* Errors by provider - -Graph type: Time series line chart with error percentage - -Breakdowns: - -* By error type: - * Client errors (4xx) - * Rate limits (429) - * Server errors (5xx) - * Provider errors - * Gateway errors - -Use cases: - -* Detect provider outages -* Identify configuration issues (for example, model not enabled) -* Monitor rate limit breaches - -Example insights: - -* "Error rate spiked to 15% at 2pm" → OpenAI outage, fallback to Anthropic worked -* "10% of requests fail with 'model not found'" → Model not enabled in gateway - -=== Success rate - -What it shows: Percentage of successful (200) requests over time - -Metric: `Success Rate = (Successful Requests / Total Requests) × 100` - -Target: Typically 99%+ for production workloads - -Use cases: - -* Monitor overall health -* Set up alerts (for example, "Alert if success rate < 95%") - -=== Fallback rate - -What it shows: Percentage of requests that used fallback provider - -// PLACEHOLDER: Screenshot of fallback rate graph - -Metric: `Fallback Rate = (Fallback Requests / Total Requests) × 100` - -Breakdowns: - -* By fallback reason: - * Rate limit exceeded - * Timeout - * 5xx error - -Use cases: - -* Monitor primary provider reliability -* Verify fallback is working -* Identify when to renegotiate rate limits - -Example insights: - -* "Fallback rate increased to 20% yesterday" → OpenAI hit rate limits, time to increase quota -* "Zero fallbacks in 30 days" → Fallback config may not be working, or primary provider is very reliable - -== Dashboard views - -=== Overview dashboard - -Shows: High-level metrics across all gateways - -// PLACEHOLDER: Screenshot of overview dashboard - -Widgets: - -* Total requests (last 24h, 7d, 30d) -* Total spend (last 24h, 7d, 30d) -* Success rate (current) -* Average latency (current) -* Top 5 models by request volume -* Top 5 gateways by spend - -Use case: Executive view, health at a glance - -=== Gateway dashboard - -Shows: Metrics for a specific gateway - -// PLACEHOLDER: Screenshot of gateway dashboard - -Widgets: - -* Request volume (time series) -* Token usage (time series) -* Estimated spend (time series) -* Latency percentiles (histogram) -* Error rate (time series) -* Model breakdown (pie chart) -* Provider breakdown (pie chart) - -Use case: Team-specific monitoring, gateway optimization - -=== Model comparison dashboard - -Shows: Side-by-side comparison of models - -// PLACEHOLDER: Screenshot of model comparison - -Metrics per model: - -* Request count -* Total tokens -* Estimated cost -* Cost per 1K requests -* Average latency -* Error rate - -Use case: Evaluate whether to switch models (cost vs performance) - -Example: - -[cols="2,1,1,1,1"] -|=== -| Model | Requests | Avg Latency | Cost per 1K | Error Rate - -| openai/gpt-5.2 -| 10,000 -| 1.2s -| $5.00 -| 0.5% - -| anthropic/claude-sonnet-4.5 -| 5,000 -| 0.9s -| $3.50 -| 0.3% - -| openai/gpt-5.2-mini -| 20,000 -| 0.7s -| $0.50 -| 1.0% -|=== - -Insight: Claude Sonnet is 25% faster and 30% cheaper than GPT-5.2 with better reliability - -=== Provider comparison dashboard - -Shows: Side-by-side comparison of providers - -Metrics per provider: - -* Request count -* Total spend -* Average latency -* Error rate -* Fallback trigger rate - -Use case: Evaluate provider reliability, negotiate contracts - -=== Cost breakdown dashboard - -Shows: Detailed cost analysis - -// PLACEHOLDER: Screenshot of cost breakdown - -Widgets: - -* Spend by gateway (stacked bar chart) -* Spend by model (pie chart) -* Spend by provider (pie chart) -* Spend by custom dimension (if configured, for example, customer ID) -* Spend trend (time series with forecast) -* Budget utilization (progress bar: $X / $Y monthly limit) - -Use case: FinOps, budget management, chargeback/showback - -== Filter and group - -=== Filter by gateway - -[source,text] ----- -Filter: Gateway = "production-gateway" ----- - - -Shows metrics for specific gateway only. - -Use case: Isolate prod from staging metrics - -=== Filter by model - -[source,text] ----- -Filter: Model = "openai/gpt-5.2" ----- - - -Shows metrics for specific model only. - -Use case: Evaluate model performance in isolation - -=== Filter by provider - -[source,text] ----- -Filter: Provider = "OpenAI" ----- - - -Shows metrics for specific provider only. - -Use case: Evaluate provider reliability - -=== Filter by status - -[source,text] ----- -Filter: Status = "200" // Only successful requests -Filter: Status >= "500" // Only server errors ----- - - -Use case: Focus on errors, or calculate success rate - -=== Filter by custom dimension - -// PLACEHOLDER: Confirm if custom dimensions are supported for filtering - -[source,text] ----- -Filter: request.headers["x-customer-id"] = "customer_abc" ----- - - -Shows metrics for specific customer. - -Use case: Customer-specific cost tracking for chargeback - -=== Group by dimension - -Common groupings: - -* Group by Gateway -* Group by Model -* Group by Provider -* Group by Status -* Group by Hour/Day/Week/Month (time aggregation) - -Example: "Show me spend grouped by model, for production gateway, over last 30 days" - -== Alerting - -// PLACEHOLDER: Confirm if alerting is supported - -If alerting is supported: - -=== Alert types - -Budget alerts: - -* Alert when spend exceeds X% of monthly budget -* Alert when spend grows Y% week-over-week - -Performance alerts: - -* Alert when error rate > X% -* Alert when p99 latency > Xms -* Alert when success rate < X% - -Usage alerts: - -* Alert when request volume drops (potential outage) -* Alert when fallback rate > X% (primary provider issue) - -=== Alert channels - -// PLACEHOLDER: Supported notification channels -* Email -* Slack -* PagerDuty -* Webhook -* // PLACEHOLDER: Others? - -=== Example alert configuration - -[source,yaml] ----- -# PLACEHOLDER: Actual alert configuration format -alerts: - - name: "High Error Rate" - condition: error_rate > 5% - duration: 5 minutes - channels: [slack, email] - - - name: "Budget Threshold" - condition: monthly_spend > 80% of budget - channels: [email] - - - name: "Latency Spike" - condition: p99_latency > 5000ms - duration: 10 minutes - channels: [pagerduty] ----- - -== Export metrics - -// PLACEHOLDER: Confirm export capabilities - -=== Export to CSV - -1. Apply filters for desired metrics -2. Click "Export to CSV" -3. Download includes time series data - -Use case: Import into spreadsheet for analysis, reporting - -=== Export through API - -// PLACEHOLDER: If API is available for metrics - -[source,bash] ----- -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/metrics \ - -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ - -G \ - --data-urlencode "gateway_id=gw_abc123" \ - --data-urlencode "start_time=2025-01-01T00:00:00Z" \ - --data-urlencode "end_time=2025-01-31T23:59:59Z" \ - --data-urlencode "metric=requests,tokens,cost" ----- - - -Response: - -[source,json] ----- -{ - "gateway_id": "gw_abc123", - "start_time": "2025-01-01T00:00:00Z", - "end_time": "2025-01-31T23:59:59Z", - "metrics": { - "requests": 1000000, - "tokens": 500000000, - "estimated_cost": 2500.00 - } -} ----- - - -=== Integration with observability platforms - -Supported integrations: - -* *Prometheus*: Native metrics endpoint on port 9090 at `/metrics` -* *OpenTelemetry*: Traces exported to Redpanda topics through the OpenTelemetry exporter - -== Common analysis tasks - -=== "Are we staying within budget?" - -1. View cost breakdown dashboard -2. Check budget utilization widget: - * Current spend: $X - * Monthly budget: $Y - * Utilization: X% - * Days remaining in month: Z -3. Forecast: - * At current rate: $X × (30 / days_elapsed) - * On track to exceed budget? Yes/No - -Action: - -* If approaching limit: Adjust rate limits, optimize models, pause non-prod usage -* If well under budget: Opportunity to test more expensive models - -=== "Which team is using the most resources?" - -1. Filter by gateway (assuming one gateway per team) -2. *Sort by Spend* (descending) -3. View table: - -[cols="2,1,1,1,1"] -|=== -| Gateway | Requests | Tokens | Spend | % of Total - -| team-ml -| 500K -| 250M -| $1,250 -| 50% - -| team-product -| 300K -| 150M -| $750 -| 30% - -| team-eng -| 200K -| 100M -| $500 -| 20% -|=== - -Action: Chargeback costs to teams, or investigate high-usage teams - -=== "Is this model worth the extra cost?" - -1. *Open Model Comparison Dashboard* -2. Select models to compare: - * Expensive model: `openai/gpt-5.2` - * Cheap model: `openai/gpt-5.2-mini` -3. Compare metrics: - -[cols="2,1,1,2"] -|=== -| Metric | GPT-5.2 | GPT-5.2-mini | Difference - -| Cost per 1K requests -| $5.00 -| $0.50 -| *10x* - -| Avg Latency -| 1.2s -| 0.7s -| 58% *faster* (mini) - -| Error Rate -| 0.5% -| 1.0% -| 2x errors (mini) -|=== - -Decision: If mini's error rate is acceptable, save 10x on costs - -=== "Why did costs spike yesterday?" - -1. View cost trend graph -2. Identify spike (for example, Jan 10th: $500 vs usual $100) -3. Drill down: - * By gateway: Which gateway caused the spike? - * By model: Did someone switch to expensive model? - * By hour: What time did spike occur? -4. Cross-reference with logs: - * Filter logs to spike timeframe - * Check for unusual request patterns - * Identify custom header (user ID, customer ID) if present - -Common causes: - -* Test suite running against prod gateway -* A/B test routing all traffic to expensive model -* User error (wrong model in config) -* Runaway loop in application code - -=== "Is provider X more reliable than provider Y?" - -1. Open provider comparison dashboard -2. Compare error rates: - -[cols="2,1,1,2"] -|=== -| Provider | Requests | Error Rate | Fallback Triggers - -| OpenAI -| 500K -| 0.8% -| 50 (rate limits) - -| Anthropic -| 300K -| 0.3% -| 5 (timeouts) -|=== - -Insight: Anthropic has 62% lower error rate - -3. Compare latencies: - -[cols="2,1,1"] -|=== -| Provider | p50 Latency | p99 Latency - -| OpenAI -| 1.0s -| 3.5s - -| Anthropic -| 0.8s -| 2.5s -|=== - -Insight: Anthropic is 20% faster at p50, 28% faster at p99 - -Decision: Prioritize Anthropic in routing pools - -== Metrics retention - -// PLACEHOLDER: Confirm metrics retention policy - -Retention period: - -* *High-resolution* (1-minute granularity): // PLACEHOLDER: for example, 7 days -* *Medium-resolution* (1-hour granularity): // PLACEHOLDER: for example, 30 days -* *Low-resolution* (1-day granularity): // PLACEHOLDER: for example, 1 year - -Note: Aggregate metrics retained longer than individual request logs - -== API access to metrics - -// PLACEHOLDER: Document metrics API if available - -=== List available metrics - -[source,bash] ----- -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/metrics/list \ - -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" ----- - - -Response: - -[source,json] ----- -{ - "metrics": [ - "requests", - "tokens.prompt", - "tokens.completion", - "tokens.total", - "cost.estimated", - "latency.p50", - "latency.p95", - "latency.p99", - "errors.rate", - "success.rate", - "fallback.rate" - ] -} ----- - - -=== Query specific metric - -[source,bash] ----- -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/metrics/query \ - -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ - -H "Content-Type: application/json" \ - -d '{ - "metric": "requests", - "gateway_id": "gw_abc123", - "start_time": "2025-01-01T00:00:00Z", - "end_time": "2025-01-31T23:59:59Z", - "granularity": "1d", - "group_by": ["model"] - }' ----- - - -Response: - -[source,json] ----- -{ - "metric": "requests", - "granularity": "1d", - "data": [ - { - "timestamp": "2025-01-01T00:00:00Z", - "model": "openai/gpt-5.2", - "value": 10000 - }, - { - "timestamp": "2025-01-01T00:00:00Z", - "model": "anthropic/claude-sonnet-4.5", - "value": 5000 - }, - ... - ] -} ----- - - -== Best practices - -Set up budget alerts early - -* Don't wait for surprise bills -* Alert at 50%, 80%, 90% of budget -* Include multiple stakeholders (eng, finance) - -Create team dashboards - -* One dashboard per team showing their gateway(s) -* Empowers teams to self-optimize -* Reduces central ops burden - -Monitor fallback rate - -* Low fallback rate (0-5%): Normal, failover working -* High fallback rate (>20%): Investigate primary provider issues -* Zero fallback rate: Verify fallback config is correct - -Compare models regularly - -* Run A/B tests with metrics -* Reassess as pricing and models change -* Don't assume expensive = better quality for your use case - -Track trends, not point-in-time - -* Day-to-day variance is normal -* Look for week-over-week and month-over-month trends -* Seasonal patterns (for example, more usage on weekdays) - -== Troubleshoot metrics issues - -=== Issue: "Metrics don't match my provider invoice" - -Possible causes: - -1. Metrics are estimates based on public pricing -2. Your contract has custom pricing -3. Provider changed pricing mid-month - -Solution: - -* Use metrics for trends and optimization decisions -* Use provider invoices for actual billing -* // PLACEHOLDER: Can users configure custom pricing in gateway? - -=== Issue: "Metrics are delayed or missing" - -Possible causes: - -1. Metrics aggregation has delay (// PLACEHOLDER: typical delay?) -2. Time range outside retention period -3. No requests in selected time range (empty data) - -Solution: - -1. Wait and refresh (// PLACEHOLDER: Xminutes typical delay) -2. Check retention policy -3. Verify requests were sent (check logs) - -=== Issue: "Dashboard shows 'no data'" - -Possible causes: - -1. Filters too restrictive (no matching requests) -2. Gateway has no traffic yet -3. Permissions issue (can't access this gateway's metrics) - -Solution: - -1. Remove filters, widen time range -2. Send test request (see xref:ai-gateway:gateway-quickstart.adoc[]) -3. Check permissions with admin - -== Next steps - -* xref:observability:logs.adoc[] diff --git a/modules/ROOT/partials/transcripts-ui-guide.adoc b/modules/ROOT/partials/transcripts-ui-guide.adoc deleted file mode 100644 index 02567c0..0000000 --- a/modules/ROOT/partials/transcripts-ui-guide.adoc +++ /dev/null @@ -1,89 +0,0 @@ -// ============================================================================= -// PARTIAL: transcripts-ui-guide.adoc -// ============================================================================= -// -// PURPOSE: -// Documents the Transcripts UI interface for both AI agents and MCP servers. -// Single-sources UI navigation and component descriptions that are identical -// across both contexts. -// -// INCLUDED BY: -// - cloud-docs: modules/ai-agents/pages/agents/monitor-agents.adoc -// - cloud-docs: modules/ai-agents/pages/mcp/remote/monitor-mcp-servers.adoc -// -// INCLUDE SYNTAX: -// :context: agent -// include::partial$transcripts-ui-guide.adoc[] -// -// :context: mcp -// include::partial$transcripts-ui-guide.adoc[] -// -// ATTRIBUTES USED: -// - context: Controls agent-specific vs MCP-specific content -// Valid values: "agent" | "mcp" -// -// DEPENDENCIES: -// - xref:observability:concepts.adoc#agent-trace-hierarchy[] -// - xref:observability:concepts.adoc#mcp-server-trace-hierarchy[] -// -// CONTENT TYPE: -// UI navigation and interface explanation (procedural context for how-to pages) -// -// ============================================================================= - -=== Navigate the transcripts view - -// Navigation is identical for both contexts -. Click *Transcripts*. -ifeval::["{context}" == "agent"] -. Select a recent transcript from your agent executions. -endif::[] -ifeval::["{context}" == "mcp"] -. Select a recent transcript from your MCP server tool invocations. -endif::[] - -The transcripts view displays: - -* *Timeline*: Visual history of recent executions with success/error indicators -* *Trace list*: Hierarchical view of traces and spans -* *Summary panel*: Detailed metrics when you select a transcript - -// UI component descriptions -==== Timeline visualization - -The timeline shows execution patterns over time: - -* Green bars: Successful executions -* Red bars: Failed executions with errors -* Gray bars: Incomplete traces or traces still loading -* Time range: Displays the last few hours by default - -Use the timeline to spot patterns like error clusters, performance degradation over time, or gaps indicating downtime. - -==== Trace hierarchy - -The trace list shows nested operations with visual duration bars indicating how long each operation took. Click the expand arrows (▶) to drill into nested spans and see the complete execution flow. - -// Link to appropriate concepts section based on context -ifeval::["{context}" == "agent"] -For details on span types, see xref:observability:concepts.adoc#agent-trace-hierarchy[Agent trace hierarchy]. -endif::[] -ifeval::["{context}" == "mcp"] -For details on span types, see xref:observability:concepts.adoc#mcp-server-trace-hierarchy[MCP server trace hierarchy]. -endif::[] - -==== Summary panel - -When you select a transcript, the summary panel shows: - -* Duration: Total execution time for this request -* Total Spans: Number of operations in the trace -ifeval::["{context}" == "agent"] -* Token Usage: Input tokens, output tokens, and total (critical for cost tracking) -* LLM Calls: How many times the agent called the language model -* Service: The agent identifier -* Conversation ID: Links to session data topics -endif::[] -ifeval::["{context}" == "mcp"] -* Service: The MCP server identifier -endif::[] diff --git a/modules/agents/examples/agents/account-agent-prompt.txt b/modules/agents/examples/agents/account-agent-prompt.txt deleted file mode 100644 index 63469b0..0000000 --- a/modules/agents/examples/agents/account-agent-prompt.txt +++ /dev/null @@ -1,62 +0,0 @@ -You are the account agent for ACME Bank's dispute resolution system. You specialize in retrieving customer account information and transaction data. - -## Your Responsibilities - -- Look up customer account details with PII masking -- Retrieve specific transaction information -- Provide transaction pattern analysis -- Return only data available from your tools - -## Available Tools - -1. **get_customer_account**: Returns account data with masked PII - - Input: customer_id - - Returns: Name, masked email, card last 4, account type, location - -2. **get_transaction_details**: Returns detailed transaction information - - Input: transaction_id - - Returns: Amount, merchant, date, location, card used - -3. **get_transaction_history**: Returns spending pattern analysis - - Input: customer_id - - Returns: Aggregated spending patterns, categories, locations - -## PII Protection Rules - -Always return masked data: -- Email: First letter + **** + @domain (for example, "s****@example.com") -- Phone: ***-***-XXXX (last 4 digits only) -- Card: Last 4 digits only -- Never return: Full card numbers, SSNs, full account numbers - -## Response Format - -Structure responses clearly: - -"I found the following account information: -- Customer: [Name] -- Account Type: [Type] -- Card ending in: [Last 4] -- Primary Location: [City, State, Country] - -Transaction details: -- Amount: $[Amount] -- Merchant: [Merchant Name] -- Date: [Date] -- Location: [Transaction Location]" - -## Error Handling - -If data not found: -- "I couldn't find an account for customer ID [ID]" -- "No transaction found with ID [ID]" -- Never guess or make up information - -## What You Don't Do - -- Don't calculate fraud scores (that's fraud-agent's job) -- Don't verify merchants (that's merchant-agent's job) -- Don't make recommendations about disputes -- Don't log audit events (that's compliance-agent's job) - -Your job is data retrieval only. Provide accurate, masked data and let the root agent make decisions. diff --git a/modules/agents/examples/agents/compliance-agent-prompt.txt b/modules/agents/examples/agents/compliance-agent-prompt.txt deleted file mode 100644 index 8704ecd..0000000 --- a/modules/agents/examples/agents/compliance-agent-prompt.txt +++ /dev/null @@ -1,120 +0,0 @@ -You are the compliance agent for ACME Bank's dispute resolution system. You specialize in regulatory requirements and audit logging. - -## Your Responsibilities - -- Log all dispute investigation actions for audit trail -- Check regulatory requirements for dispute types -- Verify compliance with banking regulations -- Provide timeline and documentation requirements - -## Available Tools - -1. **log_audit_event**: Log investigation actions - - Input: Transaction ID, customer ID, decision, evidence, outcome - - Returns: Audit record confirmation - -2. **check_regulatory_requirements**: Look up compliance rules - - Input: dispute_type (fraud, billing_error, service_not_received) - - Returns: Regulations, timelines, documentation requirements - -## Regulatory Frameworks - -You work with these regulations: - -1. **Regulation E (Electronic Fund Transfer Act)** - - Applies to: Fraud disputes, unauthorized transactions - - Customer liability: $50 if reported within 2 days, $500 if reported within 60 days - - Bank must provide provisional credit within 10 business days - - Investigation deadline: 90 days - -2. **Fair Credit Billing Act** - - Applies to: Billing errors, disputes - - Customer must dispute within 60 days of statement - - Bank must acknowledge within 30 days - - Resolution deadline: 90 days - -3. **Card Network Rules (Visa/Mastercard)** - - Chargeback rights and timelines - - Merchant response requirements - - Evidence requirements - -## Documentation Requirements - -For each dispute type, log: - -**Fraud Disputes:** -- Customer dispute affidavit -- Transaction details -- Fraud indicators identified -- Decision and reasoning -- Customer notification - -**Billing Errors:** -- Billing statement -- Customer dispute letter -- Merchant communication attempts -- Resolution details - -**Service Not Received:** -- Proof of non-delivery -- Merchant communication attempts -- Order/booking confirmation -- Resolution outcome - -## Timeline Tracking - -Monitor key deadlines: - -- Acknowledge dispute: 24-30 days (varies by type) -- Provisional credit: 10 business days (fraud) -- Final decision: 90 days (most disputes) -- Chargeback filing: 120 days (service issues) - -## Response Format - -For regulatory checks: - -"Compliance Requirements: - -Dispute Type: [Type] -Applicable Regulations: -- [Regulation 1] -- [Regulation 2] - -Customer Rights: -- Liability Limit: $[Amount] -- Notification Deadline: [Days] days - -Bank Obligations: -- Provisional Credit: [Required/Not Required] -- Investigation Deadline: [Days] days -- Customer Notification: [Required/Not Required] - -Documentation Required: -- [Document 1] -- [Document 2] -- [Document 3] - -Timeline: -- Acknowledge: [Timeframe] -- Decision: [Timeframe]" - -For audit logging: - -"Audit Event Logged: - -Audit ID: [UUID] -Timestamp: [ISO 8601] -Investigation Details: [Summary] -Decision: [Decision] -Evidence: [Evidence Sources] -Status: Recorded" - -## What You Don't Do - -- Don't retrieve transaction or account data -- Don't calculate fraud scores -- Don't verify merchants -- Don't make dispute recommendations - -Your job is compliance and audit only. Ensure all investigations are properly documented and regulatory requirements are met. diff --git a/modules/agents/examples/agents/dispute-root-agent-prompt.txt b/modules/agents/examples/agents/dispute-root-agent-prompt.txt deleted file mode 100644 index 2461e47..0000000 --- a/modules/agents/examples/agents/dispute-root-agent-prompt.txt +++ /dev/null @@ -1,130 +0,0 @@ -You are the root agent for a transaction dispute resolution system at ACME Bank. Your role is to orchestrate sub-agents and make final recommendations to customers about disputed transactions. - -## Your Responsibilities - -- Route customer queries to appropriate sub-agents -- Aggregate results from multiple sub-agents -- Make evidence-based recommendations -- Communicate clearly with customers -- Escalate complex cases to human agents - -## Available Sub-Agents - -You have access to four specialized sub-agents via A2A protocol: - -1. **account-agent**: Retrieves customer account data and transaction history -2. **fraud-agent**: Analyzes fraud risk and calculates risk scores -3. **merchant-agent**: Verifies merchant legitimacy and reputation -4. **compliance-agent**: Logs audit events and checks regulatory requirements - -## Decision Framework - -When investigating a dispute, follow this process: - -1. Start with account-agent to get customer and transaction details -2. Route to fraud-agent if fraud is suspected -3. Route to merchant-agent to verify merchant legitimacy -4. Route to compliance-agent to log the investigation and check requirements -5. Aggregate all evidence and make recommendation - -## Risk-Based Recommendations - -Based on aggregated evidence, take these actions: - -- **Fraud score 80-100 + high merchant risk**: Block the transaction immediately, block the card, issue new card -- **Fraud score 60-79**: Hold for specialist review, temporary card block -- **Fraud score 40-59**: Ask customer to verify with merchant first before taking action -- **Fraud score 0-39**: Likely legitimate transaction, help customer recall the purchase - -## Escalation Criteria - -Escalate to human agent when: - -- Fraud score is medium (40-70) and evidence is conflicting -- Customer disputes the recommendation strongly -- Regulatory requirements exceed available tools -- Subscription or recurring billing issues require merchant intervention - -## Compliance Constraints - -Never: - -- Expose full credit card numbers or SSNs (use masked versions only) -- Make guarantees about dispute outcomes (use "likely" or "recommend") -- Process disputes without logging to compliance-agent -- Reveal internal fraud detection logic or merchant scoring details to customers -- Make decisions without sub-agent evidence -- Ask customers for screenshots or additional proof (you have the transaction records) - -## Customer Communication Style - -**Clear, bank-appropriate language:** -- Use "I've reviewed your account" not "I called the account-agent" -- Use "this charge doesn't match your typical spending" not "fraud score is 95/100" -- Use "We're blocking this card" not "I recommend you freeze it" -- Don't reveal merchant reputation scores or fraud report counts - -**Proactive protection:** -For likely fraud (score 80+): -- Block the card immediately: "We're blocking your card ending in [XXXX] right now to prevent additional fraudulent charges" -- Issue replacement: "We'll send you a replacement card with a new number" -- Process the claim: "You'll see the credit for this charge within 10 business days" - -For uncertain cases (score 40-79): -- Temporary block: "I'm placing a temporary hold on this card while we investigate" -- Escalate: "A specialist will contact you within 24 hours" - -**Concise responses:** -Keep responses to 3-4 short paragraphs maximum. Customers want action, not detailed analysis. - -## Example Investigation Flow - -Customer: "I see a $1,847.99 charge from 'LUXURY WATCHES INT' in Singapore on transaction TXN-89012. This is fraud. My customer ID is CUST-1001." - -**Your response to customer:** - -"I've reviewed your account and this transaction. This charge doesn't match your typical spending pattern, and you haven't made international purchases in the past 90 days. - -Here's what I'm doing: -- Blocking your card ending in 4532 right now to prevent any additional unauthorized charges -- Approving your dispute for the full $1,847.99 - you'll see the credit within 10 business days -- Sending you a replacement card with a new number within 5-7 business days - -Your dispute has been logged and meets the requirements under Regulation E for unauthorized electronic fund transfers. - -Is there anything else I can help you with today?" - -**What you actually did behind the scenes:** -1. Called account-agent → confirmed US-based customer, no international history -2. Called fraud-agent → received score 95/100 (critical risk) -3. Called merchant-agent → confirmed high fraud indicators -4. Called compliance-agent → logged under Regulation E -5. Made decision: transaction is fraudulent, block card immediately - -(Don't share the scores or technical details with the customer) - -**Note:** When talking to customers, use natural banking language like "approving your dispute." But for programmatic JSON responses, "recommendation" describes the TRANSACTION status, not the dispute claim status. - -## Programmatic Invocations - -When invoked from a pipeline or automated system (you'll receive transaction data without conversational context), respond with ONLY valid JSON. No explanatory text, no markdown formatting, no commentary before or after - just the JSON object. - -Required JSON format: -{ - "recommendation": "block_and_investigate" | "hold_for_review" | "approve", - "fraud_score": , - "confidence": "high" | "medium" | "low", - "reasoning": "" -} - -**Recommendation field definitions:** -- **"block_and_investigate"**: Transaction is fraudulent. Block the card immediately and investigate. -- **"hold_for_review"**: Unclear if fraudulent. Place temporary hold and escalate to human specialist. -- **"approve"**: Transaction is legitimate. Customer likely forgot about it or needs clarification. - -**Mapping from conversational actions:** -- If you would block the card → use "block_and_investigate" -- If you would escalate to specialist → use "hold_for_review" -- If transaction seems legitimate → use "approve" - -The pipeline will parse this JSON to make automated decisions. Any non-JSON response will cause processing failures. diff --git a/modules/agents/examples/agents/fraud-agent-prompt.txt b/modules/agents/examples/agents/fraud-agent-prompt.txt deleted file mode 100644 index b2c8a26..0000000 --- a/modules/agents/examples/agents/fraud-agent-prompt.txt +++ /dev/null @@ -1,85 +0,0 @@ -You are the fraud detection agent for ACME Bank's dispute resolution system. You specialize in analyzing transactions for fraud indicators and calculating risk scores. - -## Your Responsibilities - -- Calculate fraud risk scores (0-100 scale) -- Identify specific fraud indicators -- Provide risk assessment reasoning -- Return confidence levels with assessments - -## Available Tools - -1. **calculate_fraud_score**: Multi-factor fraud scoring - - Input: transaction_id, customer_id - - Returns: Fraud score (0-100), risk level, breakdown by factor, recommendation - -2. **get_risk_indicators**: Detailed fraud signal detection - - Input: transaction_id - - Returns: Array of risk indicators with severity levels - -## Risk Scoring Factors - -Consider these factors: - -1. **Location Risk** (0-30 points) - - International vs. customer's country - - City mismatch from customer's primary location - - High-risk countries - -2. **Merchant Risk** (0-25 points) - - Merchant reputation score - - Fraud report history - - Business verification status - -3. **Amount Risk** (0-25 points) - - Deviation from customer's average - - Unusually large for merchant category - - Round numbers (potential testing) - -4. **Velocity Risk** (0-10 points) - - Multiple transactions in short timeframe - - Rapid succession of purchases - - Geographic impossibility - -5. **Category Risk** (0-10 points) - - Outside customer's typical categories - - High-risk MCC codes - - Mismatch with spending patterns -## Risk Levels - -- **Critical (80-100)**: Almost certainly fraud, immediate action needed -- **High (60-79)**: Strong fraud indicators, hold for review -- **Medium (40-59)**: Some concerning factors, customer verification recommended -- **Low (20-39)**: Minor flags, likely legitimate -- **Minimal (0-19)**: No significant fraud indicators - -## Response Format - -Structure your analysis: - -"Fraud Risk Analysis: - -Fraud Score: [Score]/100 - [Risk Level] - -Risk Breakdown: -- Location Risk: [Score] - [Explanation] -- Merchant Risk: [Score] - [Explanation] -- Amount Risk: [Score] - [Explanation] -- Velocity Risk: [Score] - [Explanation] -- Category Risk: [Score] - [Explanation] - -Key Indicators: -- [Indicator 1] -- [Indicator 2] -- [Indicator 3] - -Recommendation: [block_and_investigate | hold_for_review | monitor_closely | approve]" - -## What You Don't Do - -- Don't retrieve account or transaction data (use what's provided) -- Don't verify merchants (that's merchant-agent's job) -- Don't make final dispute decisions (provide recommendation only) -- Don't log audit events - -Your job is fraud analysis only. Provide objective risk assessment based on available data. diff --git a/modules/agents/examples/agents/merchant-agent-prompt.txt b/modules/agents/examples/agents/merchant-agent-prompt.txt deleted file mode 100644 index bb6ee31..0000000 --- a/modules/agents/examples/agents/merchant-agent-prompt.txt +++ /dev/null @@ -1,87 +0,0 @@ -You are the merchant verification agent for ACME Bank's dispute resolution system. You specialize in verifying merchant legitimacy and reputation. - -## Your Responsibilities - -- Verify merchant reputation and legitimacy -- Look up merchant category codes (MCC) -- Identify known fraud patterns for merchant categories -- Provide merchant-specific insights - -## Available Tools - -1. **verify_merchant**: Merchant reputation lookup - - Input: merchant_name - - Returns: Reputation score, fraud reports, business verification, red flags - -2. **get_merchant_category**: MCC code analysis - - Input: mcc (4-digit code) - - Returns: Category details, typical transaction ranges, fraud risk profile - -## Reputation Scoring - -Interpret reputation scores: - -- **90-100**: Excellent, trusted merchant -- **70-89**: Good, established business -- **50-69**: Moderate, some concerns -- **30-49**: Poor, significant red flags -- **0-29**: High risk, strong fraud indicators - -## Red Flags to Report - -Watch for: -- High volume of fraud reports -- Recently established businesses in high-risk categories -- Unverified business registration -- Pattern of chargebacks -- Operates in high-risk jurisdictions -- Billing descriptor mismatches - -## Common Merchant Issues - -Be aware of legitimate merchant problems: - -- **Subscription services**: Known for duplicate billing, difficult cancellation -- **International hotels**: Currency conversion confusion, incidental charges -- **Online marketplaces**: Third-party sellers, billing descriptor confusion -- **Travel booking**: Pre-authorization holds, cancellation fee disputes - -## Response Format - -Structure your verification: - -"Merchant Verification Results: - -Merchant: [Name] -Reputation Score: [Score]/100 - [Level] -Verification Status: [Verified | Unverified | Unknown] - -Business Details: -- Country: [Country] -- Years in Operation: [Years] -- Registration: [Verified/Unverified] - -Fraud Reports: -- Total Reports: [Count] -- Recent (30 days): [Count] -- Confirmed Fraud Cases: [Count] - -Category Analysis (MCC [Code]): -- Category: [Category Name] -- Risk Profile: [High/Medium/Low] -- Typical Transaction Range: $[Min]-$[Max] - -Red Flags: -- [Flag 1] -- [Flag 2] - -Recommendation: [trusted_merchant | verify_subscription_details | manual_review_required | block_merchant]" - -## What You Don't Do - -- Don't calculate fraud scores (that's fraud-agent's job) -- Don't retrieve transaction data (that's account-agent's job) -- Don't make final dispute decisions -- Don't log audit events - -Your job is merchant verification only. Provide objective assessment of merchant legitimacy. diff --git a/modules/agents/pages/build-index.adoc b/modules/agents/pages/build-index.adoc deleted file mode 100644 index f367980..0000000 --- a/modules/agents/pages/build-index.adoc +++ /dev/null @@ -1,5 +0,0 @@ -= Build Agents -:page-layout: index -:description: Create production AI agents with effective prompts and scalable architecture. - -Create agents, write effective prompts, and design scalable agent systems. diff --git a/modules/agents/pages/get-started-index.adoc b/modules/agents/pages/get-started-index.adoc deleted file mode 100644 index aa08dab..0000000 --- a/modules/agents/pages/get-started-index.adoc +++ /dev/null @@ -1,5 +0,0 @@ -= Get Started with AI Agents -:page-layout: index -:description: Get started with declarative AI agents in ADP. Connect tools, configure behavior, and deploy without writing agent code. - -Start here to create your first declarative AI agent. Select an LLM, define behavior, and connect tools from built-in connectors. diff --git a/modules/agents/pages/index.adoc b/modules/agents/pages/index.adoc deleted file mode 100644 index 6bb731e..0000000 --- a/modules/agents/pages/index.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= AI Agents -:page-layout: index -:description: Declare agent behavior using built-in connectors. No custom agent code required. - diff --git a/modules/agents/pages/integration-index.adoc b/modules/agents/pages/integration-index.adoc deleted file mode 100644 index 7a4b567..0000000 --- a/modules/agents/pages/integration-index.adoc +++ /dev/null @@ -1,6 +0,0 @@ -= Agent Integration -:page-layout: index -:description: Connect agents to external applications, pipelines, and other systems. - -Choose integration patterns and connect agents to your systems. - diff --git a/modules/agents/pages/troubleshoot/index.adoc b/modules/agents/pages/troubleshoot/index.adoc deleted file mode 100644 index d83e3e0..0000000 --- a/modules/agents/pages/troubleshoot/index.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= Troubleshoot -:page-layout: index -:description: Diagnose and fix common issues with AI agents, including deployment failures, runtime behavior, tool execution, and pipeline integration. - diff --git a/modules/agents/pages/tutorials/index.adoc b/modules/agents/pages/tutorials/index.adoc deleted file mode 100644 index 8ec8a2b..0000000 --- a/modules/agents/pages/tutorials/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= Tutorials -:description: End-to-end agent tutorials that walk through customer support, transaction dispute resolution, and other multi-agent workflows. -:page-layout: index diff --git a/modules/ai-gateway/pages/admin/index.adoc b/modules/ai-gateway/pages/admin/index.adoc deleted file mode 100644 index b820f01..0000000 --- a/modules/ai-gateway/pages/admin/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= For Admins -:description: Set up and operate AI Gateway across providers, secrets, authentication, and observability for your team. -:page-layout: index diff --git a/modules/ai-gateway/pages/builders/index.adoc b/modules/ai-gateway/pages/builders/index.adoc deleted file mode 100644 index f66b716..0000000 --- a/modules/ai-gateway/pages/builders/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= For Builders -:description: Discover available gateways and connect your applications and agents to AI Gateway. -:page-layout: index diff --git a/modules/ai-gateway/pages/index.adoc b/modules/ai-gateway/pages/index.adoc deleted file mode 100644 index c23d63d..0000000 --- a/modules/ai-gateway/pages/index.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= AI Gateway -:page-layout: index -:description: Redpanda's managed proxy for LLM APIs. Create an LLM provider, and point your applications at a Redpanda-hosted URL with managed secrets, authentication, and observability. - diff --git a/modules/mcp/doc-detective-create-server.json b/modules/connect/doc-detective-create-server.json similarity index 100% rename from modules/mcp/doc-detective-create-server.json rename to modules/connect/doc-detective-create-server.json diff --git a/modules/mcp/doc-detective-test-tools.json b/modules/connect/doc-detective-test-tools.json similarity index 100% rename from modules/mcp/doc-detective-test-tools.json rename to modules/connect/doc-detective-test-tools.json diff --git a/modules/agents/pages/a2a-concepts.adoc b/modules/connect/pages/a2a-concepts.adoc similarity index 93% rename from modules/agents/pages/a2a-concepts.adoc rename to modules/connect/pages/a2a-concepts.adoc index 153ca5b..3eda4f0 100644 --- a/modules/agents/pages/a2a-concepts.adoc +++ b/modules/connect/pages/a2a-concepts.adoc @@ -44,7 +44,7 @@ For example, if your agent URL is `\https://my-agent.ai-agents.abc123.cloud.redp The `.well-known` path follows internet standards for service discovery, making agents discoverable without configuration. -To configure the agent card, see xref:agents:create-agent.adoc#configure-a2a-discovery-metadata-optional[Configure A2A discovery metadata]. +To configure the agent card, see xref:connect:create-agent.adoc#configure-a2a-discovery-metadata-optional[Configure A2A discovery metadata]. == Where A2A is used in ADP @@ -54,7 +54,7 @@ ADP uses the A2A protocol in two contexts: External applications and agents hosted outside ADP use A2A to call ADP agents. This includes backend services, CLI tools, custom UIs, and agents hosted on other platforms. -For integration pattern guidance, see xref:agents:integration-overview.adoc[]. +For integration pattern guidance, see xref:connect:integration-overview.adoc[]. === Internal pipeline-to-agent integration @@ -64,7 +64,7 @@ Redpanda Connect pipelines use the xref:cloud-data-platform:develop:connect/comp * Streaming data enrichment with AI-generated fields. * Event-driven agent invocation for automated processing. -The `a2a_message` processor uses the A2A protocol internally to discover and call agents. For pipeline patterns, see xref:agents:pipeline-integration-patterns.adoc[]. +The `a2a_message` processor uses the A2A protocol internally to discover and call agents. For pipeline patterns, see xref:connect:pipeline-integration-patterns.adoc[]. == How agents discover each other @@ -116,5 +116,5 @@ The A2A protocol uses semantic versioning (major.minor.patch). Agents declare th == Next steps -* xref:agents:integration-overview.adoc[] -* xref:agents:create-agent.adoc[] +* xref:connect:integration-overview.adoc[] +* xref:connect:create-agent.adoc[] diff --git a/modules/agents/pages/overview.adoc b/modules/connect/pages/agents-overview.adoc similarity index 85% rename from modules/agents/pages/overview.adoc rename to modules/connect/pages/agents-overview.adoc index 946148c..6ed6486 100644 --- a/modules/agents/pages/overview.adoc +++ b/modules/connect/pages/agents-overview.adoc @@ -25,18 +25,18 @@ When you create an agent, you configure the components through the Agentic Data * *System prompt*: Defines the agent's role, responsibilities, and constraints * *LLM*: Interprets user intent and decides which tools to invoke -* *Tools*: External capabilities exposed through the xref:mcp:overview.adoc[Model Context Protocol (MCP)] +* *Tools*: External capabilities exposed through the xref:connect:mcp-overview.adoc[Model Context Protocol (MCP)] * *Context*: Conversation history, tool results, and real-time events from Redpanda topics Agents can invoke Redpanda Connect components as tools on-demand. Redpanda Connect pipelines can also invoke agents for event-driven processing. This bidirectional integration supports both interactive workflows and automated streaming. When a user makes a request, the LLM receives the system prompt and context, decides which tools to invoke, and processes the results. This cycle repeats until the task completes. -For a deeper understanding of how agents execute, manage context, and maintain state, see xref:agents:concepts.adoc[]. +For a deeper understanding of how agents execute, manage context, and maintain state, see xref:connect:concepts.adoc[]. == Key benefits -A declarative approach means you configure agent behavior instead of coding it, with access to 300+ built-in Redpanda Connect connectors for data sources, APIs, and services. Real-time streaming data ensures agents access live events instead of batch snapshots. xref:mcp:overview.adoc[Remote MCP] support enables standardized tool access. Managed infrastructure handles deployment, scaling, and security for you. Low-latency execution means tools run close to your data. Integrated secrets management securely stores API keys and credentials. +A declarative approach means you configure agent behavior instead of coding it, with access to 300+ built-in Redpanda Connect connectors for data sources, APIs, and services. Real-time streaming data ensures agents access live events instead of batch snapshots. xref:connect:mcp-overview.adoc[Remote MCP] support enables standardized tool access. Managed infrastructure handles deployment, scaling, and security for you. Low-latency execution means tools run close to your data. Integrated secrets management securely stores API keys and credentials. == Use cases @@ -62,8 +62,8 @@ Process every event with AI reasoning at scale. Invoke agents automatically from == Next steps -* xref:agents:quickstart.adoc[] -* xref:agents:concepts.adoc[] -* xref:agents:architecture-patterns.adoc[] -* xref:agents:integration-overview.adoc[] -* xref:agents:create-agent.adoc[] +* xref:get-started:quickstart.adoc[] +* xref:connect:concepts.adoc[] +* xref:connect:architecture-patterns.adoc[] +* xref:connect:integration-overview.adoc[] +* xref:connect:create-agent.adoc[] diff --git a/modules/agents/pages/architecture-patterns.adoc b/modules/connect/pages/architecture-patterns.adoc similarity index 95% rename from modules/agents/pages/architecture-patterns.adoc rename to modules/connect/pages/architecture-patterns.adoc index d107fb3..add2c8d 100644 --- a/modules/agents/pages/architecture-patterns.adoc +++ b/modules/connect/pages/architecture-patterns.adoc @@ -59,7 +59,7 @@ Every architecture pattern involves trade-offs. - *Complexity now versus complexity later:* Starting simple means faster initial development but may require refactoring. Starting structured requires more upfront work but makes the system easier to extend. -For foundational concepts on how agents execute and manage complexity, see xref:agents:concepts.adoc[]. +For foundational concepts on how agents execute and manage complexity, see xref:connect:concepts.adoc[]. == Single-agent pattern @@ -117,7 +117,7 @@ Use external glossterm:Agent2Agent (A2A) protocol[] for multi-organization workf === How it works -Agents communicate using the xref:agents:a2a-concepts.adoc[A2A protocol], a standard HTTP-based protocol for discovery and invocation. Each agent manages its own credentials and access control independently, and can deploy, scale, and update without coordinating with other agents. Agent cards define capabilities without exposing implementation details. +Agents communicate using the xref:connect:a2a-concepts.adoc[A2A protocol], a standard HTTP-based protocol for discovery and invocation. Each agent manages its own credentials and access control independently, and can deploy, scale, and update without coordinating with other agents. Agent cards define capabilities without exposing implementation details. === Example: Multi-platform customer service @@ -137,7 +137,7 @@ External A2A lets different teams own and deploy their agents independently, wit External A2A adds network latency on every cross-agent call, and authentication complexity multiplies with each agent requiring credential management. Removing capabilities or changing contracts requires coordination across consuming systems, and debugging requires tracing requests across organizational boundaries. -For implementation details on external A2A integration, see xref:agents:integration-overview.adoc[]. +For implementation details on external A2A integration, see xref:connect:integration-overview.adoc[]. == Common anti-patterns @@ -165,7 +165,7 @@ This pattern fails because: * Tool descriptions compete for limited prompt space * The agent invokes wrong tools with similar names, wasting iteration budget on selection mistakes -Limit tools per agent to 10-15 for optimal performance. Agents with more than 20-25 tools often show degraded tool selection accuracy. Use subagents to partition tools by domain. For tool design patterns, see xref:mcp:overview.adoc[]. +Limit tools per agent to 10-15 for optimal performance. Agents with more than 20-25 tools often show degraded tool selection accuracy. Use subagents to partition tools by domain. For tool design patterns, see xref:connect:mcp-overview.adoc[]. === Premature A2A splitting @@ -256,6 +256,6 @@ Provide clear error messages to users. Log errors for debugging. == Next steps -* xref:agents:integration-overview.adoc[] -* xref:agents:a2a-concepts.adoc[] -* xref:mcp:overview.adoc[] +* xref:connect:integration-overview.adoc[] +* xref:connect:a2a-concepts.adoc[] +* xref:connect:mcp-overview.adoc[] diff --git a/modules/agents/pages/byoa-register.adoc b/modules/connect/pages/byoa-register.adoc similarity index 90% rename from modules/agents/pages/byoa-register.adoc rename to modules/connect/pages/byoa-register.adoc index a33ab16..81a4dae 100644 --- a/modules/agents/pages/byoa-register.adoc +++ b/modules/connect/pages/byoa-register.adoc @@ -57,7 +57,7 @@ When a BYOA agent is registered: // TODO: cross-agent topology views are roadmap; remove the topology mention once those views ship publicly, or keep it once they exist. * AI Gateway can discover and call its A2A endpoint by name. Agents calling tools or sub-agents can target your BYOA agent the same way they target any other agent. -* Service-account credentials are issued to the agent for authenticated inbound calls and for OTLP telemetry ingestion. (Service-account creation follows the same pattern as managed agents. See xref:agents:concepts.adoc#service-account-authorization[Service account authorization].) +* Service-account credentials are issued to the agent for authenticated inbound calls and for OTLP telemetry ingestion. (Service-account creation follows the same pattern as managed agents. See xref:connect:concepts.adoc#service-account-authorization[Service account authorization].) // TODO: confirm whether service-account creation is automatic on BYOA registration (as it is for managed agents) or whether the BYOA variant requires the operator to bring their own credentials. Open Q with team-ai. @@ -67,8 +67,8 @@ Before you register a BYOA agent, make sure you have: * An agent running in your own infrastructure with a reachable HTTPS endpoint. * The ability to add an HTTPS route at `/.well-known/agent-card.json` (or equivalent) on that endpoint. See <>. -* The agent instrumented with OpenTelemetry, emitting the minimum required spans contract. See xref:observability:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)]. -* The `dataplane_adp_agent_create` permission, granted by the Writer built-in role. See xref:governance:permissions-reference.adoc#agent-management-permissions[Agent management permissions]. +* The agent instrumented with OpenTelemetry, emitting the minimum required spans contract. See xref:monitor:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)]. +* The `dataplane_adp_agent_create` permission, granted by the Writer built-in role. See xref:control:permissions-reference.adoc#agent-management-permissions[Agent management permissions]. * A name for the agent that follows DNS-1123 conventions (1–63 characters, lowercase letters / numbers / hyphens, starting with a letter). The name is immutable once the agent is registered. [[a2a-endpoint-contract]] @@ -87,7 +87,7 @@ GET https:///.well-known/agent-card.json The response is a JSON document (the *agent card*) per the https://a2aproject.org/[A2A protocol specification], containing the agent's identity, supported skills, supported message formats, and the message endpoint URL. Per RFC 8615, the `/.well-known/agent-card.json` path is a standard discovery convention. -For the agent-card schema and a sample document, see xref:agents:a2a-concepts.adoc[Agent-to-agent concepts]. +For the agent-card schema and a sample document, see xref:connect:a2a-concepts.adoc[Agent-to-agent concepts]. // TODO: link the canonical A2A spec version Redpanda's runtime is compatible with once team-ai confirms the pin (the spec is iterating; we should peg to a specific version in docs). @@ -100,7 +100,7 @@ The agent card declares one or more endpoints that accept A2A messages: These are the standard A2A endpoints; your agent must expose at least the `:send` endpoint to be invokable. The streaming endpoint is required for callers that want to surface tool-call and model-call progress in the calling agent's transcript. -For the wire shapes and request/response examples, see xref:agents:a2a-concepts.adoc[Agent-to-agent concepts]. +For the wire shapes and request/response examples, see xref:connect:a2a-concepts.adoc[Agent-to-agent concepts]. === Authentication @@ -149,7 +149,7 @@ After registering, confirm three things end-to-end: . *Discovery*: The agent appears in *AgentRegistryService.ListAgents* and in the governance dashboard's Agents list. *Type* shows as *BYOA*. . *A2A reachability*: A test A2A call to `https://aigw..clusters.rdpa.co/agents//v1/message:send` returns the expected response (or a `FAILED_PRECONDITION` if your agent isn't running, but no `404 Not Found`). -. *Telemetry*: Open the transcripts list, filter by your agent's `service.name`, and confirm a recent execution shows up with non-zero token counts and a non-empty conversation ID. If it doesn't, see xref:observability:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)] troubleshooting. +. *Telemetry*: Open the transcripts list, filter by your agent's `service.name`, and confirm a recent execution shows up with non-zero token counts and a non-empty conversation ID. If it doesn't, see xref:monitor:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)] troubleshooting. == Troubleshooting @@ -168,7 +168,7 @@ The symptom-driven checks in this section cover the three observable parts of th |The agent endpoint URL on the registration record doesn't match where your agent is actually running, or the agent isn't serving `/.well-known/agent-card.json`. Update the registration with the correct URL, or fix the well-known route. |Transcripts list shows the agent column blank for your agent's runs -|Your agent's OTel `service.name` resource attribute doesn't match the registered name (or isn't being emitted at all). See xref:observability:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)]. +|Your agent's OTel `service.name` resource attribute doesn't match the registered name (or isn't being emitted at all). See xref:monitor:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)]. |Inbound A2A calls fail with `401 Unauthorized` |Your agent is rejecting the access token AI Gateway presents. Confirm your agent's token validator points at the right issuer and accepts the right audience. @@ -181,12 +181,12 @@ The symptom-driven checks in this section cover the three observable parts of th This page does not cover: * *Building the agent itself.* Bring whatever runtime, framework, and language you want. The contract (A2A endpoint + telemetry attributes) is what makes it visible in ADP. -* *Tool use through MCP.* If your BYOA agent calls MCP servers hosted in AI Gateway, see xref:mcp:overview.adoc[MCP Servers] for the consumer-side flow. Tool calls automatically appear in your agent's transcript when MCP servers emit their own spans. +* *Tool use through MCP.* If your BYOA agent calls MCP servers hosted in AI Gateway, see xref:connect:mcp-overview.adoc[MCP Servers] for the consumer-side flow. Tool calls automatically appear in your agent's transcript when MCP servers emit their own spans. * *Migration from a managed declarative agent to BYOA.* The two coexist; BYOA is for agents that already exist outside the managed runtime, not for re-platforming existing managed agents. == Related topics -* xref:agents:a2a-concepts.adoc[Agent-to-agent concepts] -* xref:observability:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)] -* xref:agents:concepts.adoc[Agent concepts] -* xref:agents:create-agent.adoc[Create a declarative agent] +* xref:connect:a2a-concepts.adoc[Agent-to-agent concepts] +* xref:monitor:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)] +* xref:connect:concepts.adoc[Agent concepts] +* xref:connect:create-agent.adoc[Create a declarative agent] diff --git a/modules/integrations/pages/claude-code.adoc b/modules/connect/pages/claude-code.adoc similarity index 76% rename from modules/integrations/pages/claude-code.adoc rename to modules/connect/pages/claude-code.adoc index e86ef63..0b7c51b 100644 --- a/modules/integrations/pages/claude-code.adoc +++ b/modules/connect/pages/claude-code.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Attach ADP-hosted MCP servers to Claude Code so its tools resolve against your managed tool catalog :learning-objective-3: Verify the connection and read usage -Claude Code is Anthropic's command-line coding agent. When you point it at an AI Gateway proxy URL instead of the public Anthropic API, your team's LLM calls flow through ADP: API keys stay in the dataplane secret store, usage rolls up in the *Cost & Usage* tab, and the calls land in xref:observability:transcripts.adoc[the Transcripts view] for investigation. +Claude Code is Anthropic's command-line coding agent. When you point it at an AI Gateway proxy URL instead of the public Anthropic API, your team's LLM calls flow through ADP: API keys stay in the dataplane secret store, usage rolls up in the *Cost & Usage* tab, and the calls land in xref:monitor:transcripts.adoc[the Transcripts view] for investigation. After completing this guide, you will be able to: @@ -23,13 +23,13 @@ Use this integration when you want to: * Apply per-provider transcript logging. For example, route a regulated team to a "no-logging" provider while keeping the default provider's full conversation history available for review. * Forward each developer's own Anthropic subscription token through ADP (Anthropic *Auth passthrough*), so the existing Max- or Team-plan entitlement still applies but the call is observed by ADP. -This guide does not cover building agents that *call* Claude Code. For that, see xref:agents:integration-overview.adoc[Integration patterns overview]. +This guide does not cover building agents that *call* Claude Code. For that, see xref:connect:integration-overview.adoc[Integration patterns overview]. == Prerequisites -* An Anthropic LLM provider configured in AI Gateway. If you haven't created one, follow xref:ai-gateway:configure-provider.adoc[Configure an LLM provider] and pick *Anthropic* as the type. Enable at least one Claude model (for example, `claude-sonnet-4-6` or `claude-opus-4-7`) in the model picker. +* An Anthropic LLM provider configured in AI Gateway. If you haven't created one, follow xref:gateway:configure-provider.adoc[Configure an LLM provider] and pick *Anthropic* as the type. Enable at least one Claude model (for example, `claude-sonnet-4-6` or `claude-opus-4-7`) in the model picker. * Claude Code installed on the developer's workstation. See https://docs.anthropic.com/claude-code[Anthropic's Claude Code documentation]. -* A Redpanda Cloud service account with permission to invoke the provider (`dataplane_adp_llmprovider_invoke`). See xref:governance:permissions-reference.adoc#llm-provider-permissions[LLM provider permissions]. Both shared-developer-tooling and per-developer setups use the same OIDC client-credentials grant; the differences are operational. +* A Redpanda Cloud service account with permission to invoke the provider (`dataplane_adp_llmprovider_invoke`). See xref:control:permissions-reference.adoc#llm-provider-permissions[LLM provider permissions]. Both shared-developer-tooling and per-developer setups use the same OIDC client-credentials grant; the differences are operational. == Get the proxy URL @@ -53,9 +53,9 @@ Claude Code reads the Anthropic base URL and an authentication token from enviro OIDC service account (default):: + -- -Use the OIDC `client_credentials` grant to mint an access token, then hand the token to Claude Code through `ANTHROPIC_AUTH_TOKEN`. This is the same flow xref:ai-gateway:connect-agent.adoc[Connect your agent] documents for SDK clients; the only thing different here is how Claude Code reads the token. +Use the OIDC `client_credentials` grant to mint an access token, then hand the token to Claude Code through `ANTHROPIC_AUTH_TOKEN`. This is the same flow xref:gateway:connect-agent.adoc[Connect your agent] documents for SDK clients; the only thing different here is how Claude Code reads the token. -. Mint an access token. The full cURL, Python, and Node.js examples live in xref:ai-gateway:connect-agent.adoc#authenticate-with-oidc-client-credentials[Authenticate with OIDC client credentials]. The short version: +. Mint an access token. The full cURL, Python, and Node.js examples live in xref:gateway:connect-agent.adoc#authenticate-with-oidc-client-credentials[Authenticate with OIDC client credentials]. The short version: + [source,bash] ---- @@ -86,7 +86,7 @@ Anthropic Auth passthrough:: -- Use when developers should authenticate to Anthropic with their own subscription (Max plan, Team plan, enterprise) and ADP should only observe the call. -. Confirm the provider has *Auth passthrough* enabled. The *Connection* card on the provider detail page shows the current setting. If it is off, an admin needs to flip it. See xref:ai-gateway:configure-provider.adoc#anthropic-authorization-passthrough[Anthropic: Authorization passthrough]. +. Confirm the provider has *Auth passthrough* enabled. The *Connection* card on the provider detail page shows the current setting. If it is off, an admin needs to flip it. See xref:gateway:configure-provider.adoc#anthropic-authorization-passthrough[Anthropic: Authorization passthrough]. . Set the base URL but pass the developer's own Anthropic key as the token: + @@ -114,9 +114,9 @@ Claude Code can call MCP servers for tool access. To use the MCP servers you alr claude mcp add https://aigw..clusters.rdpa.co/mcp/v1/servers/ ---- -For OAuth-protected MCP servers (most managed types), Claude Code prompts the developer to complete the consent flow on first use. ADP stores the resulting token in the per-user xref:mcp:user-delegated-oauth.adoc[token vault], so subsequent invocations reuse it. +For OAuth-protected MCP servers (most managed types), Claude Code prompts the developer to complete the consent flow on first use. ADP stores the resulting token in the per-user xref:connect:user-delegated-oauth.adoc[token vault], so subsequent invocations reuse it. -To front many MCP servers behind a single Claude Code endpoint, use xref:ai-gateway:aggregation.adoc[MCP aggregation] and point Claude Code at the aggregated URL. +To front many MCP servers behind a single Claude Code endpoint, use xref:gateway:aggregation.adoc[MCP aggregation] and point Claude Code at the aggregated URL. // TODO: Confirm the exact MCP URL shape (`/mcp/v1/servers/` vs. another path) against adp-production once the MCP servers detail page surfaces the canonical URL. @@ -130,7 +130,7 @@ claude "say hello" ---- . Open *LLM Providers > > Cost & Usage* in ADP. Within a few seconds the request appears in the *Requests over time* chart. -. Open xref:observability:transcripts.adoc[Transcripts] to read the full turn (if transcript logging is enabled on this provider). +. Open xref:monitor:transcripts.adoc[Transcripts] to read the full turn (if transcript logging is enabled on this provider). == Troubleshooting @@ -142,7 +142,7 @@ claude "say hello" |Token is missing, malformed, or expired. Re-mint the OIDC access token (it has a short TTL) and re-export `ANTHROPIC_AUTH_TOKEN`. Confirm the audience is `cloudv2-production.redpanda.cloud` and that Claude Code is sending the token as `Authorization: Bearer `. For Auth passthrough, confirm the upstream Anthropic key is valid. |`403 Forbidden` -|The service account or user lacks `dataplane_adp_llmprovider_invoke` on the provider. See xref:governance:permissions-reference.adoc#llm-provider-permissions[LLM provider permissions] or have an admin assign the `LLMProviderInvoker` built-in role. +|The service account or user lacks `dataplane_adp_llmprovider_invoke` on the provider. See xref:control:permissions-reference.adoc#llm-provider-permissions[LLM provider permissions] or have an admin assign the `LLMProviderInvoker` built-in role. |`404 Not Found` |`ANTHROPIC_BASE_URL` doesn't match the provider's Proxy URL. Copy it again from the *Connection* card on the detail page; the path segment after `/providers/` must be exactly the provider's `Name`. @@ -154,13 +154,13 @@ claude "say hello" |Allow a few seconds for the cost-reporting pipeline to catch up. If the chart still shows zero after a minute, verify the request actually reached the provider (the *Requests over time* chart populates first) and that you're looking at the right date range. |MCP tool calls return `OAuthConnectionRequired` -|The developer hasn't yet completed the consent flow for that MCP server. See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]; Claude Code surfaces the `authorize_url` in the error so the developer can finish the handshake. +|The developer hasn't yet completed the consent flow for that MCP server. See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]; Claude Code surfaces the `authorize_url` in the error so the developer can finish the handshake. |=== == Related topics -* xref:ai-gateway:configure-provider.adoc[] -* xref:ai-gateway:connect-agent.adoc[] -* xref:ai-gateway:configure-provider.adoc#anthropic-authorization-passthrough[Anthropic: Authorization passthrough] -* xref:integrations:remote-mcp-clients.adoc[] -* xref:ai-gateway:aggregation.adoc[] +* xref:gateway:configure-provider.adoc[] +* xref:gateway:connect-agent.adoc[] +* xref:gateway:configure-provider.adoc#anthropic-authorization-passthrough[Anthropic: Authorization passthrough] +* xref:connect:remote-mcp-clients.adoc[] +* xref:gateway:aggregation.adoc[] diff --git a/modules/agents/pages/concepts.adoc b/modules/connect/pages/concepts.adoc similarity index 96% rename from modules/agents/pages/concepts.adoc rename to modules/connect/pages/concepts.adoc index 6177648..7ebae5f 100644 --- a/modules/agents/pages/concepts.adoc +++ b/modules/connect/pages/concepts.adoc @@ -130,7 +130,7 @@ Tool design affects agent behavior. Coarse-grained tools that do many things res Choose granularity based on how often you'll reuse tool logic across workflows, whether intermediate results help with debugging, and how much control you want over tool invocation order. -For tool design guidance, see xref:mcp:overview.adoc[]. +For tool design guidance, see xref:connect:mcp-overview.adoc[]. == Context and state management @@ -150,11 +150,11 @@ When context exceeds the limit, the oldest tool results get truncated, the agent Design workflows to complete within context limits. Avoid unbounded tool chaining. -include::ROOT:partial$service-account-authorization.adoc[] +include::connect:partial$service-account-authorization.adoc[] == Next steps -* xref:agents:architecture-patterns.adoc[] -* xref:agents:quickstart.adoc[] -* xref:agents:system-prompts.adoc[] -* xref:mcp:overview.adoc[] +* xref:connect:architecture-patterns.adoc[] +* xref:get-started:quickstart.adoc[] +* xref:connect:system-prompts.adoc[] +* xref:connect:mcp-overview.adoc[] diff --git a/modules/agents/pages/create-agent.adoc b/modules/connect/pages/create-agent.adoc similarity index 89% rename from modules/agents/pages/create-agent.adoc rename to modules/connect/pages/create-agent.adoc index c017160..fb7fdea 100644 --- a/modules/agents/pages/create-agent.adoc +++ b/modules/connect/pages/create-agent.adoc @@ -21,9 +21,9 @@ After reading this page, you will be able to: == Prerequisites * An ADP BYOC environment. -* At least one xref:ai-gateway:configure-provider.adoc[LLM provider configured] in AI Gateway. -* Optional: One or more xref:mcp:overview.adoc[MCP servers] registered in ADP if you want the agent to call tools. -* A system prompt prepared in advance (see xref:agents:system-prompts.adoc[System prompt best practices]). +* At least one xref:gateway:configure-provider.adoc[LLM provider configured] in AI Gateway. +* Optional: One or more xref:connect:mcp-overview.adoc[MCP servers] registered in ADP if you want the agent to call tools. +* A system prompt prepared in advance (see xref:connect:system-prompts.adoc[System prompt best practices]). == Open the create form @@ -68,10 +68,10 @@ Start with Medium for production workloads. Monitor CPU and memory usage, then a In the *LLM Provider Configuration* section, select the glossterm:large language model (LLM)[] provider and model the agent uses to interpret user intent and decide which tools to invoke. -. Select a *Provider*. Required. The dropdown lists every LLM provider configured on this cluster (for example, `support-bedrock-v2`). To add a new provider, see xref:ai-gateway:configure-provider.adoc[Configure an LLM provider]. +. Select a *Provider*. Required. The dropdown lists every LLM provider configured on this cluster (for example, `support-bedrock-v2`). To add a new provider, see xref:gateway:configure-provider.adoc[Configure an LLM provider]. . Select a *Model*. Required. The model list is populated by the selected provider (for example, `us.anthropic.claude-haiku-4-5-20251001-v1:0`). + -For model selection guidance, see xref:agents:architecture-patterns.adoc#model-selection-guide[Model selection guide]. +For model selection guidance, see xref:connect:architecture-patterns.adoc#model-selection-guide[Model selection guide]. . Set *Max Iterations*. Default is 30. The proto contract allows 0-200. Max iterations is the upper bound on agent reasoning loops per request. Each iteration consumes tokens and adds latency. + @@ -126,7 +126,7 @@ Response format: - [Format guideline] ---- -For full guidance on writing effective prompts, see xref:agents:system-prompts.adoc[System prompt best practices]. +For full guidance on writing effective prompts, see xref:connect:system-prompts.adoc[System prompt best practices]. == Connect MCP tools @@ -143,7 +143,7 @@ In the *MCP Tools* section, select the glossterm:Model Context Protocol (MCP)[] TIP: Apply the principle of least privilege. Connect only the servers the agent needs. -For registering MCP servers, see xref:mcp:create-server.adoc[Create an MCP server]. +For registering MCP servers, see xref:connect:create-server.adoc[Create an MCP server]. == Add subagents @@ -159,7 +159,7 @@ The *Subagents (Optional)* section is for adding internal specialists within the The root agent orchestrates and delegates work to the appropriate subagent based on the request. -For multi-agent design patterns, see xref:agents:architecture-patterns.adoc[Agent architecture patterns]. +For multi-agent design patterns, see xref:connect:architecture-patterns.adoc[Agent architecture patterns]. == Configure the service account @@ -167,7 +167,7 @@ In the *Service Account* section, set the name for the service account that auth . Review the auto-generated *Service Account Name*. The default uses the pattern `cluster--agent--sa`. . Optionally, override the name. Up to 128 characters. The name cannot contain `<` or `>`. -. The service account is created automatically when you create the agent. ADP applies default permissions; for details and how to manage service accounts, see xref:agents:concepts.adoc#service-account-authorization[Service account authorization]. +. The service account is created automatically when you create the agent. ADP applies default permissions; for details and how to manage service accounts, see xref:connect:concepts.adoc#service-account-authorization[Service account authorization]. == Create the agent @@ -176,7 +176,7 @@ In the *Service Account* section, set the name for the service account that auth When the agent is running, ADP shows its HTTP endpoint URL on the *Configuration* tab. Copy the value from the endpoint field. -You can use this URL to call the agent programmatically or integrate it with external systems. See xref:agents:integration-overview.adoc[]. +You can use this URL to call the agent programmatically or integrate it with external systems. See xref:connect:integration-overview.adoc[]. == Issue agent credentials @@ -251,7 +251,7 @@ For agent card design guidance, see link:https://agent2agent.info/docs/guides/cr . Save your changes. The updated metadata appears immediately at `\https:///.well-known/agent-card.json`. -For more about agent cards and how they're consumed, see xref:agents:a2a-concepts.adoc#agent-cards[Agent cards]. +For more about agent cards and how they're consumed, see xref:connect:a2a-concepts.adoc#agent-cards[Agent cards]. == Test your agent @@ -270,7 +270,7 @@ Two tabs on the agent detail page support testing and observability: . Iterate on the system prompt, MCP tool selection, or model choice as needed. -For detailed observability strategies, see xref:agents:monitor.adoc[]. +For detailed observability strategies, see xref:monitor:monitor-agents.adoc[]. == Example configurations @@ -301,8 +301,8 @@ For detailed observability strategies, see xref:agents:monitor.adoc[]. == Next steps -* xref:agents:integration-overview.adoc[] -* xref:agents:system-prompts.adoc[] -* xref:mcp:create-server.adoc[] -* xref:agents:architecture-patterns.adoc[] -* xref:troubleshoot/troubleshoot-ai-agents.adoc[] +* xref:connect:integration-overview.adoc[] +* xref:connect:system-prompts.adoc[] +* xref:connect:create-server.adoc[] +* xref:connect:architecture-patterns.adoc[] +* xref:monitor:troubleshoot-ai-agents.adoc[] diff --git a/modules/mcp/pages/create-server.adoc b/modules/connect/pages/create-server.adoc similarity index 89% rename from modules/mcp/pages/create-server.adoc rename to modules/connect/pages/create-server.adoc index f72943d..c10e0e3 100644 --- a/modules/mcp/pages/create-server.adoc +++ b/modules/connect/pages/create-server.adoc @@ -22,7 +22,7 @@ After completing this guide, you will be able to: * For any auth mode that uses upstream credentials: the credentials in hand and a secret already created in the ADP secret store. Secret references must be `UPPER_SNAKE_CASE` (proto regex `^[A-Z][A-Z0-9_]*$`). For example: `MCP_API_KEY`, `OPENAI_API_KEY`. + // TODO: xref the ADP secrets-management page once confirmed. -* For user-delegated OAuth: an OAuth Provider resource already configured. See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +* For user-delegated OAuth: an OAuth Provider resource already configured. See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. == Open the MCP Servers page @@ -40,7 +40,7 @@ The marketplace picker lists every managed type as a card and includes a *Remote // TODO: screenshot of the marketplace picker, with both a managed card and the Remote (Proxied) option visible. -For a tour of every managed type and which one fits your use case, see xref:mcp:managed/managed-catalog.adoc[Managed catalog]. To go deep on the self-managed path (transport choices, TLS, multi-server aggregation), see xref:mcp:register-remote.adoc[Register a self-managed server]. +For a tour of every managed type and which one fits your use case, see xref:connect:managed/managed-catalog.adoc[Managed catalog]. To go deep on the self-managed path (transport choices, TLS, multi-server aggregation), see xref:connect:register-remote.adoc[Register a self-managed server]. == Name and basic fields @@ -71,7 +71,7 @@ Each managed type ships its own configuration schema. The form on this page is r // TODO: screenshot of the SQL configuration form as the exemplar (it covers the most common field shapes). -For per-type fields, see the xref:mcp:managed/managed-catalog.adoc[Managed catalog]: a reference of every managed MCP type Redpanda hosts, grouped by category, with a description and a link to its deep-dive page where one exists. +For per-type fields, see the xref:connect:managed/managed-catalog.adoc[Managed catalog]: a reference of every managed MCP type Redpanda hosts, grouped by category, with a description and a link to its deep-dive page where one exists. [NOTE] ==== @@ -94,7 +94,7 @@ Two fields on top of the identity fields: |Transport |Yes -|`SSE` (server-sent events) or `Streamable HTTP` (newer bidirectional protocol). Pick whichever your server speaks. See xref:mcp:register-remote.adoc[Register a self-managed server] for how to test which transport your server uses. +|`SSE` (server-sent events) or `Streamable HTTP` (newer bidirectional protocol). Pick whichever your server speaks. See xref:connect:register-remote.adoc[Register a self-managed server] for how to test which transport your server uses. |=== == Configure authentication @@ -118,7 +118,7 @@ Both managed and self-managed servers offer the same five authentication modes. |2-legged OAuth client credentials. One shared upstream identity for every caller. Provide `client_id`, `client_secret_ref`, `token_url`, and any required `scopes`. |*User-delegated OAuth* -|Each end-user authenticates against the upstream system with their own credentials, and Redpanda injects the user's token at call time. Pick the configured *OAuth Provider* and the required scopes. The first time a user calls a tool that needs this server, Redpanda surfaces a consent prompt; Redpanda stores the resulting connection in the token vault, where it shows up under *My Connections*. See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] for the full flow. +|Each end-user authenticates against the upstream system with their own credentials, and Redpanda injects the user's token at call time. Pick the configured *OAuth Provider* and the required scopes. The first time a user calls a tool that needs this server, Redpanda surfaces a consent prompt; Redpanda stores the resulting connection in the token vault, where it shows up under *My Connections*. See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth] for the full flow. |=== NOTE: Choosing between *Service-account OAuth* and *User-delegated OAuth* is the credential-mode decision. Service-account auth gives every caller the same identity at the upstream; user-delegated auth gives each caller their own. @@ -140,7 +140,7 @@ NOTE: Defer advanced code-mode patterns (sandboxing limits, runtime selection, d . Click *Create*. The server appears in the list with a *Type* badge: *Managed* or *Self-managed*. . Open the detail page. The *Overview* tab shows the *API URL*: this is the MCP URL agents connect to. Copy it for use later. . Open the *Connection* tab to see ready-to-paste connection snippets for common MCP clients (Claude Code, Claude Desktop, ChatGPT, Cursor) pre-filled with this server's API URL. -. Open the *Inspector* tab. Redpanda performs a live `tools/list` against the server and lists every tool it discovered. See xref:mcp:test-tools.adoc[Test a server's tools] for how to call them. +. Open the *Inspector* tab. Redpanda performs a live `tools/list` against the server and lists every tool it discovered. See xref:connect:test-tools.adoc[Test a server's tools] for how to call them. A populated tools list confirms that the connection works and credentials resolve correctly. If the list is empty or the tab shows an error, see <>. @@ -198,7 +198,7 @@ rpk ai mcp update github-proxy \ |JSON blob carrying the managed type's `_config.proto` shape, including a `@type` URL. |`--user-oauth-provider` -|Self-managed servers only. Name of an OAuth Provider already registered under *OAuth Providers*. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. The principal needs `dataplane_aigateway_oauthprovider_attach` on the named provider (AI-893). Managed servers that support user-delegated OAuth carry their auth shape inside `--managed-config`. +|Self-managed servers only. Name of an OAuth Provider already registered under *OAuth Providers*. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. The principal needs `dataplane_aigateway_oauthprovider_attach` on the named provider (AI-893). Managed servers that support user-delegated OAuth carry their auth shape inside `--managed-config`. |`--user-oauth-scopes` |Self-managed servers only. Comma-separated scopes the server requires. Provide every scope any tool may need; user re-consent is required if scopes change later. @@ -240,7 +240,7 @@ The command resolves the gateway URL from your active rpk cloud profile and read == Related topics -* xref:mcp:user-delegated-oauth.adoc[] -* xref:mcp:test-tools.adoc[] -* xref:ai-gateway:aggregation.adoc[] -* xref:mcp:managed/managed-catalog.adoc[] +* xref:connect:user-delegated-oauth.adoc[] +* xref:connect:test-tools.adoc[] +* xref:gateway:aggregation.adoc[] +* xref:connect:managed/managed-catalog.adoc[] diff --git a/modules/mcp/pages/github-oauth-tutorial.adoc b/modules/connect/pages/github-oauth-tutorial.adoc similarity index 98% rename from modules/mcp/pages/github-oauth-tutorial.adoc rename to modules/connect/pages/github-oauth-tutorial.adoc index 77ed5af..1259007 100644 --- a/modules/mcp/pages/github-oauth-tutorial.adoc +++ b/modules/connect/pages/github-oauth-tutorial.adoc @@ -367,9 +367,9 @@ You could next create an agent using this GitHub MCP server. Each user who calls == Related topics -* xref:mcp:oauth-providers.adoc[Configure an OAuth Provider] -* xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] -* xref:mcp:create-server.adoc[Create an MCP Server] -* xref:mcp:managed/managed-catalog.adoc[Managed MCP Catalog] +* xref:connect:oauth-providers.adoc[Configure an OAuth Provider] +* xref:connect:user-delegated-oauth.adoc[User-delegated OAuth] +* xref:connect:create-server.adoc[Create an MCP Server] +* xref:connect:managed/managed-catalog.adoc[Managed MCP Catalog] * https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/authorizing-oauth-apps[GitHub OAuth Apps documentation^] * https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/scopes-for-oauth-apps[GitHub OAuth scopes^] diff --git a/modules/connect/pages/index.adoc b/modules/connect/pages/index.adoc new file mode 100644 index 0000000..8bf7cef --- /dev/null +++ b/modules/connect/pages/index.adoc @@ -0,0 +1,3 @@ += Connect data & tools +:description: Connect agents, MCP servers, and the apps and data sources behind them. +:page-layout: index diff --git a/modules/agents/pages/integration-overview.adoc b/modules/connect/pages/integration-overview.adoc similarity index 91% rename from modules/agents/pages/integration-overview.adoc rename to modules/connect/pages/integration-overview.adoc index 18cbadf..6e66e8c 100644 --- a/modules/agents/pages/integration-overview.adoc +++ b/modules/connect/pages/integration-overview.adoc @@ -25,17 +25,17 @@ ADP supports three primary integration scenarios based on who initiates the call | Agent needs capabilities | Your agent invokes MCP tools to fetch data, call APIs, or access external systems on-demand | Agent-initiated, synchronous, interactive workflows -| xref:mcp:overview.adoc[] +| xref:connect:mcp-overview.adoc[] | Pipeline processes events | Your Redpanda Connect pipeline invokes agents for each event in a stream using the `a2a_message` processor | Event-driven, automated, high-volume stream processing -| xref:agents:pipeline-integration-patterns.adoc[] +| xref:connect:pipeline-integration-patterns.adoc[] | External system calls agent | Your application or agent (hosted outside ADP) calls ADP agents using the A2A protocol | Backend services, CLI tools, custom UIs, multi-platform agent workflows -| xref:agents:a2a-concepts.adoc[] +| xref:connect:a2a-concepts.adoc[] |=== == Common use cases by pattern @@ -53,7 +53,7 @@ This pattern works well for interactive workflows: customer support lookups, app Avoid MCP tools for high-volume stream processing or automated workflows without user interaction. Use pipeline-initiated integration instead. -For implementation details, see xref:mcp:overview.adoc[]. +For implementation details, see xref:connect:mcp-overview.adoc[]. [[pipeline-processes-events]] === Pipeline processes events (`a2a_message`) @@ -64,7 +64,7 @@ The pipeline controls when agents execute. This pattern is ideal for automated, Common scenarios include real-time fraud detection, sentiment scoring for customer reviews, and content moderation that classifies and routes content. -For implementation details, see xref:agents:pipeline-integration-patterns.adoc[]. +For implementation details, see xref:connect:pipeline-integration-patterns.adoc[]. === External system calls agent @@ -74,7 +74,7 @@ External systems send requests using the A2A protocol and receive responses sync Common scenarios include backend services analyzing data as part of workflows, CLI tools invoking agents for batch tasks, custom UIs displaying agent responses, CRM agents coordinating with Redpanda agents, and multi-platform workflows spanning different infrastructure. -To learn how the A2A protocol enables this integration, see xref:agents:a2a-concepts.adoc[]. +To learn how the A2A protocol enables this integration, see xref:connect:a2a-concepts.adoc[]. == Pattern comparison @@ -123,6 +123,6 @@ Access tokens grant full access to the agent. Anyone with a valid token can send == Next steps -* xref:agents:a2a-concepts.adoc[] -* xref:mcp:overview.adoc[] -* xref:agents:pipeline-integration-patterns.adoc[] +* xref:connect:a2a-concepts.adoc[] +* xref:connect:mcp-overview.adoc[] +* xref:connect:pipeline-integration-patterns.adoc[] diff --git a/modules/mcp/pages/managed/bamboohr.adoc b/modules/connect/pages/managed/bamboohr.adoc similarity index 97% rename from modules/mcp/pages/managed/bamboohr.adoc rename to modules/connect/pages/managed/bamboohr.adoc index 898975e..e079a72 100644 --- a/modules/mcp/pages/managed/bamboohr.adoc +++ b/modules/connect/pages/managed/bamboohr.adoc @@ -103,6 +103,6 @@ Test the server in the Inspector before you connect it to an agent. Start with ` == Next steps -* xref:mcp:user-delegated-oauth.adoc[] -* xref:mcp:test-tools.adoc[] -* xref:agents:create-agent.adoc[] +* xref:connect:user-delegated-oauth.adoc[] +* xref:connect:test-tools.adoc[] +* xref:connect:create-agent.adoc[] diff --git a/modules/mcp/pages/managed/ironclad.adoc b/modules/connect/pages/managed/ironclad.adoc similarity index 93% rename from modules/mcp/pages/managed/ironclad.adoc rename to modules/connect/pages/managed/ironclad.adoc index 41eae4b..9ac9a6b 100644 --- a/modules/mcp/pages/managed/ironclad.adoc +++ b/modules/connect/pages/managed/ironclad.adoc @@ -25,8 +25,8 @@ It is *not* a replacement for the Ironclad web UI for complex workflow managemen Before you create the server, make sure you have: * An Ironclad tenant where you can register an OAuth app. -* An OAuth Provider configured in ADP for Ironclad. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. -* Familiarity with xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +* An OAuth Provider configured in ADP for Ironclad. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. +* Familiarity with xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. == Get Ironclad credentials @@ -159,6 +159,6 @@ This page does not cover: == Related topics -* xref:mcp:oauth-providers.adoc[Configure an OAuth Provider] -* xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] -* xref:mcp:create-server.adoc[Create an MCP Server] +* xref:connect:oauth-providers.adoc[Configure an OAuth Provider] +* xref:connect:user-delegated-oauth.adoc[User-delegated OAuth] +* xref:connect:create-server.adoc[Create an MCP Server] diff --git a/modules/mcp/pages/managed/jira.adoc b/modules/connect/pages/managed/jira.adoc similarity index 97% rename from modules/mcp/pages/managed/jira.adoc rename to modules/connect/pages/managed/jira.adoc index b0d4232..1364eee 100644 --- a/modules/mcp/pages/managed/jira.adoc +++ b/modules/connect/pages/managed/jira.adoc @@ -32,7 +32,7 @@ The Jira managed type exposes tools for: + // TODO: confirm whether Redpanda publishes a reference Atlassian app or whether each customer brings their own. * An OAuth Provider in the Agentic Data Plane UI configured for Atlassian's authorize/token URLs and carrying the app's client credentials. -* Familiarity with xref:mcp:user-delegated-oauth.adoc[]. +* Familiarity with xref:connect:user-delegated-oauth.adoc[]. == Atlassian's scope model @@ -112,5 +112,5 @@ For agents that need both read and write capabilities, define the server's `requ == Limitations * *Atlassian app management*: The OAuth app and its callback URLs are managed in `developer.atlassian.com`, not in ADP. -* *Jira Server / Data Center* (self-hosted): This MCP type targets Atlassian Cloud. Self-hosted Jira may need a self-managed MCP server instead. See xref:mcp:register-remote.adoc[Register a self-managed MCP server]. +* *Jira Server / Data Center* (self-hosted): This MCP type targets Atlassian Cloud. Self-hosted Jira may need a self-managed MCP server instead. See xref:connect:register-remote.adoc[Register a self-managed MCP server]. * *Confluence access*: Separate scope namespace; not exposed by this MCP server. diff --git a/modules/mcp/pages/managed/kafka.adoc b/modules/connect/pages/managed/kafka.adoc similarity index 98% rename from modules/mcp/pages/managed/kafka.adoc rename to modules/connect/pages/managed/kafka.adoc index 85c139f..8672fbf 100644 --- a/modules/mcp/pages/managed/kafka.adoc +++ b/modules/connect/pages/managed/kafka.adoc @@ -59,7 +59,7 @@ The Kafka managed type proxies a managed Kafka client. It exposes tools for: // TODO: confirm exact tool names for produce/consume/list-topics and capture screenshots. -See xref:mcp:test-tools.adoc[] for general Inspector usage. +See xref:connect:test-tools.adoc[] for general Inspector usage. == Authentication diff --git a/modules/mcp/pages/managed/managed-catalog.adoc b/modules/connect/pages/managed/managed-catalog.adoc similarity index 82% rename from modules/mcp/pages/managed/managed-catalog.adoc rename to modules/connect/pages/managed/managed-catalog.adoc index 2632707..bec721d 100644 --- a/modules/mcp/pages/managed/managed-catalog.adoc +++ b/modules/connect/pages/managed/managed-catalog.adoc @@ -3,7 +3,7 @@ :page-topic-type: reference :personas: agent_builder, platform_engineer -Managed MCP servers are in-process implementations Redpanda hosts for you. Each type has a fixed set of glossterm:tool[,tools] and a type-specific configuration schema. To create one, open *MCP Servers > Create Server* in the Agentic Data Plane UI and pick the type from the marketplace picker. See xref:mcp:create-server.adoc[Create an MCP Server] for the full create flow. +Managed MCP servers are in-process implementations Redpanda hosts for you. Each type has a fixed set of glossterm:tool[,tools] and a type-specific configuration schema. To create one, open *MCP Servers > Create Server* in the Agentic Data Plane UI and pick the type from the marketplace picker. See xref:connect:create-server.adoc[Create an MCP Server] for the full create flow. // Source: `cloudv2` `apps/aigw/internal/mcp/managed/defaults.go` and managed type registration files on `origin/main`, verified 2026-05-10. This catalog lists the 36 managed MCP server types Redpanda registers by default, grouped by category. @@ -27,7 +27,7 @@ This catalog lists the 36 managed MCP server types Redpanda registers by default |The managed type supports user-delegated OAuth for that system. |=== -If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-managed server] instead. +If any of these answers are "no," prefer xref:connect:register-remote.adoc[a self-managed server] instead. == AI @@ -79,15 +79,15 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*Jira* |Manage Jira issues, projects, and workflows. -|xref:mcp:managed/jira.adoc[See the deep-dive] +|xref:connect:managed/jira.adoc[See the deep-dive] |*Slack* |Post messages and read channels on Slack. -|xref:mcp:managed/slack.adoc[See the deep-dive] +|xref:connect:managed/slack.adoc[See the deep-dive] |*Zendesk* |Search and manage Zendesk Support tickets, users, and Help Center articles. -|xref:mcp:managed/zendesk.adoc[See the deep-dive] +|xref:connect:managed/zendesk.adoc[See the deep-dive] |=== == Database @@ -102,7 +102,7 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*Metabase* |Search tables and metrics, run queries, and inspect saved questions in Metabase. -|xref:mcp:managed/metabase.adoc[See the deep-dive] +|xref:connect:managed/metabase.adoc[See the deep-dive] |*MongoDB* |Query collections and documents in MongoDB. @@ -118,7 +118,7 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*SQL* |Query SQL databases (Postgres, MySQL, ClickHouse, MSSQL, SQLite) with MCP. -|xref:mcp:managed/sql.adoc[See the deep-dive] +|xref:connect:managed/sql.adoc[See the deep-dive] |=== == Google @@ -148,7 +148,7 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*Kafka* |Produce, consume, and inspect topics on Kafka or Redpanda brokers. -|xref:mcp:managed/kafka.adoc[See the deep-dive] +|xref:connect:managed/kafka.adoc[See the deep-dive] |*NATS* |Publish and subscribe on NATS and NATS JetStream. @@ -167,7 +167,7 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*BambooHR* |Access employee directory, time-off, and performance data from BambooHR. -|xref:mcp:managed/bamboohr.adoc[See the deep-dive] +|xref:connect:managed/bamboohr.adoc[See the deep-dive] |*BILL (bill.com)* |Manage accounts-payable bills, vendors, AR invoices, payments, and customers in BILL. @@ -183,7 +183,7 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*Ironclad* |Read and manage contracts in Ironclad CLM. -|xref:mcp:managed/ironclad.adoc[See the deep-dive] +|xref:connect:managed/ironclad.adoc[See the deep-dive] |*Morningstar Portfolio Analytics* |Run X-Ray, performance, risk-score, hypothetical, and ESG analytics on caller-supplied portfolios with Morningstar Direct Web Services. @@ -195,7 +195,7 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*NetSuite* |Query Oracle NetSuite records and run SuiteQL with the SuiteTalk REST API. -|xref:mcp:managed/netsuite.adoc[See the deep-dive] +|xref:connect:managed/netsuite.adoc[See the deep-dive] |*Okta* |Manage Okta users and groups. @@ -203,11 +203,11 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*OpenAPI* |Expose any OpenAPI/Swagger HTTP API as MCP tools. -|xref:mcp:managed/openapi.adoc[See the deep-dive] +|xref:connect:managed/openapi.adoc[See the deep-dive] |*Ramp* |Manage Ramp corporate cards, transactions, spend limits, and reimbursements. -|xref:mcp:managed/ramp.adoc[See the deep-dive] +|xref:connect:managed/ramp.adoc[See the deep-dive] |*Salesforce* |Query, create, update, and delete Salesforce CRM records using SOQL and the REST API. @@ -219,11 +219,11 @@ If any of these answers are "no," prefer xref:mcp:register-remote.adoc[a self-ma |*Workday* |Drive Workday Human Resources business processes with SOAP. -|xref:mcp:managed/workday.adoc[See the deep-dive] +|xref:connect:managed/workday.adoc[See the deep-dive] |=== == Next steps -* xref:mcp:create-server.adoc[] -* xref:mcp:test-tools.adoc[] -* xref:mcp:user-delegated-oauth.adoc[] +* xref:connect:create-server.adoc[] +* xref:connect:test-tools.adoc[] +* xref:connect:user-delegated-oauth.adoc[] diff --git a/modules/mcp/pages/managed/metabase.adoc b/modules/connect/pages/managed/metabase.adoc similarity index 98% rename from modules/mcp/pages/managed/metabase.adoc rename to modules/connect/pages/managed/metabase.adoc index dcd304d..5e616ad 100644 --- a/modules/mcp/pages/managed/metabase.adoc +++ b/modules/connect/pages/managed/metabase.adoc @@ -291,7 +291,7 @@ This page does not cover: == Related topics -* xref:mcp:create-server.adoc[Create an MCP Server] -* xref:mcp:test-tools.adoc[Test a server's tools] -* xref:mcp:managed/managed-catalog.adoc[Managed MCP Server Catalog] -* xref:mcp:managed/sql.adoc[SQL managed MCP] +* xref:connect:create-server.adoc[Create an MCP Server] +* xref:connect:test-tools.adoc[Test a server's tools] +* xref:connect:managed/managed-catalog.adoc[Managed MCP Server Catalog] +* xref:connect:managed/sql.adoc[SQL managed MCP] diff --git a/modules/mcp/pages/managed/netsuite.adoc b/modules/connect/pages/managed/netsuite.adoc similarity index 98% rename from modules/mcp/pages/managed/netsuite.adoc rename to modules/connect/pages/managed/netsuite.adoc index e9ea6e8..96d2ca1 100644 --- a/modules/mcp/pages/managed/netsuite.adoc +++ b/modules/connect/pages/managed/netsuite.adoc @@ -140,6 +140,6 @@ NetSuite record filters use NetSuite-specific forms: == Next steps -* xref:mcp:user-delegated-oauth.adoc[] -* xref:mcp:test-tools.adoc[] -* xref:agents:create-agent.adoc[] +* xref:connect:user-delegated-oauth.adoc[] +* xref:connect:test-tools.adoc[] +* xref:connect:create-agent.adoc[] diff --git a/modules/mcp/pages/managed/openapi.adoc b/modules/connect/pages/managed/openapi.adoc similarity index 95% rename from modules/mcp/pages/managed/openapi.adoc rename to modules/connect/pages/managed/openapi.adoc index 2c710bf..cb01082 100644 --- a/modules/mcp/pages/managed/openapi.adoc +++ b/modules/connect/pages/managed/openapi.adoc @@ -66,7 +66,7 @@ OpenAPI is the most flexible managed type for auth: the upstream API can need an |API supports OAuth client credentials and you want one shared identity. |*User-delegated OAuth* -|API supports OAuth on behalf of users and you want per-user identities. Requires an OAuth Provider configured for that API. See xref:mcp:user-delegated-oauth.adoc[]. +|API supports OAuth on behalf of users and you want per-user identities. Requires an OAuth Provider configured for that API. See xref:connect:user-delegated-oauth.adoc[]. |=== == Test @@ -77,7 +77,7 @@ OpenAPI is the most flexible managed type for auth: the upstream API can need an // TODO: capture screenshots of a non-trivial OpenAPI spec rendered in the Inspector once we walk a real example on `adp-production`. -See xref:mcp:test-tools.adoc[] for general Inspector usage. +See xref:connect:test-tools.adoc[] for general Inspector usage. == Use with agents @@ -109,6 +109,6 @@ Once tools generate cleanly, point an agent at the *API URL* on the server's det == Limitations -* *Custom tool logic*: The OpenAPI type is purely a spec-to-tools generator. For business logic on top of the API, use a xref:mcp:register-remote.adoc[self-managed MCP server]. +* *Custom tool logic*: The OpenAPI type is purely a spec-to-tools generator. For business logic on top of the API, use a xref:connect:register-remote.adoc[self-managed MCP server]. * *GraphQL APIs*: OpenAPI doesn't describe GraphQL. For GraphQL APIs, use a self-managed server. * *gRPC services*: Same as GraphQL: use a self-managed server. diff --git a/modules/mcp/pages/managed/ramp.adoc b/modules/connect/pages/managed/ramp.adoc similarity index 94% rename from modules/mcp/pages/managed/ramp.adoc rename to modules/connect/pages/managed/ramp.adoc index f592365..179176f 100644 --- a/modules/mcp/pages/managed/ramp.adoc +++ b/modules/connect/pages/managed/ramp.adoc @@ -25,8 +25,8 @@ It is suitable for expense analysis, spend-policy enforcement, and corporate car Before you create the server, make sure you have: * A Ramp account with admin access to the Ramp Developer Portal. -* An OAuth Provider configured in ADP for Ramp. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. -* Familiarity with xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +* An OAuth Provider configured in ADP for Ramp. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. +* Familiarity with xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. == Get Ramp credentials @@ -216,6 +216,6 @@ This page does not cover: == Related topics -* xref:mcp:oauth-providers.adoc[Configure an OAuth Provider] -* xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] -* xref:mcp:create-server.adoc[Create an MCP Server] +* xref:connect:oauth-providers.adoc[Configure an OAuth Provider] +* xref:connect:user-delegated-oauth.adoc[User-delegated OAuth] +* xref:connect:create-server.adoc[Create an MCP Server] diff --git a/modules/mcp/pages/managed/slack.adoc b/modules/connect/pages/managed/slack.adoc similarity index 96% rename from modules/mcp/pages/managed/slack.adoc rename to modules/connect/pages/managed/slack.adoc index d41dac0..52599a3 100644 --- a/modules/mcp/pages/managed/slack.adoc +++ b/modules/connect/pages/managed/slack.adoc @@ -32,8 +32,8 @@ Before you create the server, make sure you have: * A Slack OAuth app registered (your own or a Redpanda-published reference app). + // TODO: confirm whether Redpanda ships a reference Slack OAuth app or whether each customer brings their own. Document the path. -* An OAuth Provider configured in the Agentic Data Plane UI under *OAuth Providers*, pointing at Slack's authorize/token URLs and carrying the OAuth app's client credentials. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. -* Familiarity with xref:mcp:user-delegated-oauth.adoc[]. +* An OAuth Provider configured in the Agentic Data Plane UI under *OAuth Providers*, pointing at Slack's authorize/token URLs and carrying the OAuth app's client credentials. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. +* Familiarity with xref:connect:user-delegated-oauth.adoc[]. == Configure diff --git a/modules/mcp/pages/managed/sql.adoc b/modules/connect/pages/managed/sql.adoc similarity index 99% rename from modules/mcp/pages/managed/sql.adoc rename to modules/connect/pages/managed/sql.adoc index f5fd10d..dea7c9a 100644 --- a/modules/mcp/pages/managed/sql.adoc +++ b/modules/connect/pages/managed/sql.adoc @@ -55,7 +55,7 @@ It exposes a small set of tools for querying schemas and running parameterized q == Test -After create, exercise the server through the Inspector tab. See xref:mcp:test-tools.adoc[]. +After create, exercise the server through the Inspector tab. See xref:connect:test-tools.adoc[]. A canonical first call: diff --git a/modules/mcp/pages/managed/workday.adoc b/modules/connect/pages/managed/workday.adoc similarity index 98% rename from modules/mcp/pages/managed/workday.adoc rename to modules/connect/pages/managed/workday.adoc index cfb3812..c8027d4 100644 --- a/modules/mcp/pages/managed/workday.adoc +++ b/modules/connect/pages/managed/workday.adoc @@ -197,5 +197,5 @@ This page does not cover: == Related topics -* xref:mcp:create-server.adoc[Create an MCP Server] -* xref:mcp:test-tools.adoc[Test a server's tools] +* xref:connect:create-server.adoc[Create an MCP Server] +* xref:connect:test-tools.adoc[Test a server's tools] diff --git a/modules/mcp/pages/managed/zendesk.adoc b/modules/connect/pages/managed/zendesk.adoc similarity index 94% rename from modules/mcp/pages/managed/zendesk.adoc rename to modules/connect/pages/managed/zendesk.adoc index a9d0f60..feb6f20 100644 --- a/modules/mcp/pages/managed/zendesk.adoc +++ b/modules/connect/pages/managed/zendesk.adoc @@ -31,7 +31,7 @@ Before you create the server, make sure you have: * A Zendesk Support instance. * For *API token* mode: ability to create an API token under *Apps and integrations > APIs > Zendesk API*. -* For *User OAuth* mode: a Zendesk OAuth client and an OAuth Provider configured in ADP. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. +* For *User OAuth* mode: a Zendesk OAuth client and an OAuth Provider configured in ADP. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. == Get Zendesk credentials @@ -50,7 +50,7 @@ Before you create the server, make sure you have: For per-user authentication, register an OAuth client on Zendesk and a matching OAuth Provider in ADP: . Configure a Zendesk OAuth client under *Apps and integrations > APIs > OAuth Clients* (Confidential client, Authorization Code grant). -. Register a matching OAuth Provider in ADP. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. Use Zendesk's authorize and token endpoints. +. Register a matching OAuth Provider in ADP. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. Use Zendesk's authorize and token endpoints. . Each end-user authenticates once through the OAuth flow; tokens are stored in the gateway's token vault. *Required scopes*: `read tickets:write hc:read` covers all 12 tools. Drop `tickets:write` if the MCP only needs to read. @@ -255,7 +255,7 @@ Common symptoms and fixes: |The agent role on Zendesk's side is below *Light Agent*. Upgrade the role or use API-token mode with a Light Agent or Admin email. |`OAuthConnectionRequired` (User-OAuth mode) -|First call from a user with no stored token. The user completes Zendesk's OAuth consent flow, the token lands in the vault, and subsequent calls reuse it. See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +|First call from a user with no stored token. The user completes Zendesk's OAuth consent flow, the token lands in the vault, and subsequent calls reuse it. See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. |`scope_upgrade_required` (User-OAuth mode) |Server's `required_scopes` was extended after users had already consented. Users re-consent with the higher scope. @@ -273,7 +273,7 @@ This page does not cover: == Related topics -* xref:mcp:oauth-providers.adoc[Configure an OAuth Provider] -* xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] -* xref:mcp:create-server.adoc[Create an MCP Server] -* xref:mcp:test-tools.adoc[Test a server's tools] +* xref:connect:oauth-providers.adoc[Configure an OAuth Provider] +* xref:connect:user-delegated-oauth.adoc[User-delegated OAuth] +* xref:connect:create-server.adoc[Create an MCP Server] +* xref:connect:test-tools.adoc[Test a server's tools] diff --git a/modules/mcp/pages/overview.adoc b/modules/connect/pages/mcp-overview.adoc similarity index 83% rename from modules/mcp/pages/overview.adoc rename to modules/connect/pages/mcp-overview.adoc index 481b8a4..d283e95 100644 --- a/modules/mcp/pages/overview.adoc +++ b/modules/connect/pages/mcp-overview.adoc @@ -19,7 +19,7 @@ Redpanda offers two kinds of MCP server backends: |=== // Source: `cloudv2` `apps/aigw/internal/mcp/managed/defaults.go` on `origin/main`, verified 2026-05-10. -The default registry contains 36 managed MCP server types. See xref:mcp:managed/managed-catalog.adoc[Managed catalog] for the verified list. +The default registry contains 36 managed MCP server types. See xref:connect:managed/managed-catalog.adoc[Managed catalog] for the verified list. == When to use each @@ -52,10 +52,10 @@ The default registry contains 36 managed MCP server types. See xref:mcp:managed/ The Agentic Data Plane UI has four top-level areas: -* *LLM Providers*: OpenAI, Anthropic, Bedrock, Gemini, OpenAI-compatible endpoints. See xref:ai-gateway:configure-provider.adoc[Configure your LLM provider]. +* *LLM Providers*: OpenAI, Anthropic, Bedrock, Gemini, OpenAI-compatible endpoints. See xref:gateway:configure-provider.adoc[Configure your LLM provider]. * *MCP Servers*: You are here. Both managed and self-managed servers live in this list. -* *OAuth Providers*: Reusable OAuth provider definitions used by user-delegated MCP auth (and elsewhere). See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. -* *My Connections*: Per-user OAuth connections for user-delegated MCP servers. See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +* *OAuth Providers*: Reusable OAuth provider definitions used by user-delegated MCP auth (and elsewhere). See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. +* *My Connections*: Per-user OAuth connections for user-delegated MCP servers. See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. // TODO: screenshot of the four-area sidebar on adp-production once standalone-ADP wording is final. @@ -64,13 +64,13 @@ The Agentic Data Plane UI has four top-level areas: * *Tool discovery*: After you create or register a server, Redpanda performs a live `tools/list` against it and populates the server's detail page so you can see which tools are visible to agents. * *Service-account and user-delegated auth*: Pick a single shared upstream identity for all callers, or have each end-user authenticate against the upstream system with their own credentials. * *Code mode*: Optionally expose `{name}_search` and `{name}_execute` helpers so an agent can discover and orchestrate tools through generated Python or JavaScript instead of calling them one at a time. -* *Inspector*: Test each tool, resource, and prompt directly from the Agentic Data Plane UI before pointing an agent at the server. See xref:mcp:test-tools.adoc[Test a server's tools]. -* *Aggregation*: Connect your agent to a single MCP URL and have Redpanda fan out across multiple registered MCP servers. See xref:ai-gateway:aggregation.adoc[MCP aggregation]. +* *Inspector*: Test each tool, resource, and prompt directly from the Agentic Data Plane UI before pointing an agent at the server. See xref:connect:test-tools.adoc[Test a server's tools]. +* *Aggregation*: Connect your agent to a single MCP URL and have Redpanda fan out across multiple registered MCP servers. See xref:gateway:aggregation.adoc[MCP aggregation]. == Next steps -* xref:mcp:create-server.adoc[] -* xref:mcp:test-tools.adoc[] -* xref:mcp:user-delegated-oauth.adoc[] -* xref:mcp:register-remote.adoc[] -* xref:mcp:managed/managed-catalog.adoc[] +* xref:connect:create-server.adoc[] +* xref:connect:test-tools.adoc[] +* xref:connect:user-delegated-oauth.adoc[] +* xref:connect:register-remote.adoc[] +* xref:connect:managed/managed-catalog.adoc[] diff --git a/modules/mcp/pages/oauth-providers.adoc b/modules/connect/pages/oauth-providers.adoc similarity index 93% rename from modules/mcp/pages/oauth-providers.adoc rename to modules/connect/pages/oauth-providers.adoc index ff34305..c36bbb3 100644 --- a/modules/mcp/pages/oauth-providers.adoc +++ b/modules/connect/pages/oauth-providers.adoc @@ -21,7 +21,7 @@ OAuth providers and OAuth clients govern *opposite directions* of authentication * An *OAuth provider* (this page) governs *outbound* authentication: AI Gateway authenticating to an upstream system (GitHub, Slack, Salesforce, and so on) on a user's behalf so MCP servers can call that upstream. * An *OAuth client* governs *inbound* authentication: an external app (Claude Desktop, ChatGPT, Copilot Studio) authenticating to AI Gateway so the app's users can invoke MCP tools. -The two live under separate sidebar entries (*OAuth Providers* and *OAuth Clients*) with separate API definitions, permissions, and lifecycles. To register or manage an OAuth client (including revoking its refresh tokens to force a re-sign-in), see xref:integrations:remote-mcp-clients.adoc[Connect remote MCP clients to AI Gateway]. +The two live under separate sidebar entries (*OAuth Providers* and *OAuth Clients*) with separate API definitions, permissions, and lifecycles. To register or manage an OAuth client (including revoking its refresh tokens to force a re-sign-in), see xref:connect:remote-mcp-clients.adoc[Connect remote MCP clients to AI Gateway]. ==== == Prerequisites @@ -235,7 +235,7 @@ rpk ai oauth create \ == Attach to an MCP server -To attach an OAuth provider to an MCP server, the principal needs `dataplane_aigateway_oauthprovider_attach` on the named provider plus the usual `mcpserver_create` / `mcpserver_update` permission. See xref:mcp:create-server.adoc[Create an MCP Server] for the full attach flow and xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] for the consent flow that runs on first call. +To attach an OAuth provider to an MCP server, the principal needs `dataplane_aigateway_oauthprovider_attach` on the named provider plus the usual `mcpserver_create` / `mcpserver_update` permission. See xref:connect:create-server.adoc[Create an MCP Server] for the full attach flow and xref:connect:user-delegated-oauth.adoc[User-delegated OAuth] for the consent flow that runs on first call. == Edit and rotate credentials @@ -278,11 +278,11 @@ Common symptoms and fixes: == Related topics -* xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] -* xref:mcp:create-server.adoc[Create an MCP Server] -* xref:mcp:managed/slack.adoc[Slack managed MCP] -* xref:mcp:managed/jira.adoc[Jira managed MCP] -* xref:mcp:managed/zendesk.adoc[Zendesk managed MCP] -* xref:mcp:managed/workday.adoc[Workday managed MCP] -* xref:mcp:managed/ironclad.adoc[Ironclad managed MCP] -* xref:mcp:managed/ramp.adoc[Ramp managed MCP] +* xref:connect:user-delegated-oauth.adoc[User-delegated OAuth] +* xref:connect:create-server.adoc[Create an MCP Server] +* xref:connect:managed/slack.adoc[Slack managed MCP] +* xref:connect:managed/jira.adoc[Jira managed MCP] +* xref:connect:managed/zendesk.adoc[Zendesk managed MCP] +* xref:connect:managed/workday.adoc[Workday managed MCP] +* xref:connect:managed/ironclad.adoc[Ironclad managed MCP] +* xref:connect:managed/ramp.adoc[Ramp managed MCP] diff --git a/modules/agents/pages/pipeline-integration-patterns.adoc b/modules/connect/pages/pipeline-integration-patterns.adoc similarity index 81% rename from modules/agents/pages/pipeline-integration-patterns.adoc rename to modules/connect/pages/pipeline-integration-patterns.adoc index 937e42f..dc246b0 100644 --- a/modules/agents/pages/pipeline-integration-patterns.adoc +++ b/modules/connect/pages/pipeline-integration-patterns.adoc @@ -14,11 +14,11 @@ After reading this page, you will be able to: * [ ] {learning-objective-2} * [ ] {learning-objective-3} -This page focuses on pipelines calling agents (pipeline-initiated integration). For agents invoking MCP tools, see xref:agents:integration-overview.adoc#agent-needs-capabilities[Agent needs capabilities]. For external applications calling agents, see xref:agents:integration-overview.adoc#external-system-calls-agent[External system calls agent]. +This page focuses on pipelines calling agents (pipeline-initiated integration). For agents invoking MCP tools, see xref:connect:integration-overview.adoc#agent-needs-capabilities[Agent needs capabilities]. For external applications calling agents, see xref:connect:integration-overview.adoc#external-system-calls-agent[External system calls agent]. == How pipelines invoke agents -Pipelines use the xref:cloud-data-platform:develop:connect/components/processors/a2a_message.adoc[`a2a_message`] processor to invoke agents for each event in a stream. The processor uses the xref:agents:a2a-concepts.adoc[A2A protocol] to discover and communicate with agents. +Pipelines use the xref:cloud-data-platform:develop:connect/components/processors/a2a_message.adoc[`a2a_message`] processor to invoke agents for each event in a stream. The processor uses the xref:connect:a2a-concepts.adoc[A2A protocol] to discover and communicate with agents. When the `a2a_message` processor receives an event, it sends the event data to the specified agent along with any prompt you provide. The agent processes the event using its reasoning capabilities and returns a response. The processor then adds the agent's response to the event for further processing or output. @@ -49,7 +49,7 @@ Invoke agents automatically for each event: include::ROOT:example$pipelines/event-driven-invocation.yaml[] ---- -Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:agents:a2a-concepts.adoc#agent-card-location[Agent card location]. +Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:connect:a2a-concepts.adoc#agent-card-location[Agent card location]. **Use case:** Real-time fraud detection on every transaction. @@ -62,7 +62,7 @@ Add AI-generated metadata to events: include::ROOT:example$pipelines/streaming-enrichment.yaml[tag=processors,indent=0] ---- -Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:agents:a2a-concepts.adoc#agent-card-location[Agent card location]. +Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:connect:a2a-concepts.adoc#agent-card-location[Agent card location]. **Use case:** Add sentiment scores to every customer review in real-time. @@ -75,7 +75,7 @@ Process events in the background: include::ROOT:example$pipelines/async-workflows.yaml[tag=pipeline,indent=0] ---- -Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:agents:a2a-concepts.adoc#agent-card-location[Agent card location]. +Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:connect:a2a-concepts.adoc#agent-card-location[Agent card location]. **Use case:** Nightly batch summarization of reports where latency is acceptable. @@ -88,7 +88,7 @@ Chain multiple agents in sequence: include::ROOT:example$pipelines/multi-agent-orchestration.yaml[tag=processors,indent=0] ---- -Replace the agent URL variables with your actual agent card URLs. See xref:agents:a2a-concepts.adoc#agent-card-location[Agent card location]. +Replace the agent URL variables with your actual agent card URLs. See xref:connect:a2a-concepts.adoc#agent-card-location[Agent card location]. **Use case:** Translate feedback, analyze sentiment, then route to appropriate team. @@ -101,7 +101,7 @@ Use agent reasoning for complex transformations: include::ROOT:example$pipelines/agent-transformation.yaml[tag=processors,indent=0] ---- -Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:agents:a2a-concepts.adoc#agent-card-location[Agent card location]. +Replace `AGENT_CARD_URL` with your actual agent card URL. See xref:connect:a2a-concepts.adoc#agent-card-location[Agent card location]. **Use case:** Convert natural language queries to SQL for downstream processing. @@ -113,7 +113,7 @@ Do not use the `a2a_message` processor when: * The transformation is simple and does not require AI reasoning. * Agents need to dynamically decide what data to fetch based on context. -For a detailed comparison between pipeline-initiated and agent-initiated integration patterns, see xref:agents:integration-overview.adoc#pattern-comparison[Pattern comparison]. +For a detailed comparison between pipeline-initiated and agent-initiated integration patterns, see xref:connect:integration-overview.adoc#pattern-comparison[Pattern comparison]. == Example: Real-time fraud detection @@ -126,7 +126,7 @@ This example shows a complete pipeline that analyzes every transaction with an a include::ROOT:example$pipelines/fraud-detection-routing.yaml[] ---- -Replace `AGENT_CARD_URL` with your agent card URL. See xref:agents:a2a-concepts.adoc#agent-card-location[Agent card location]. +Replace `AGENT_CARD_URL` with your agent card URL. See xref:connect:a2a-concepts.adoc#agent-card-location[Agent card location]. This pipeline: @@ -137,6 +137,6 @@ This pipeline: == Next steps -* xref:mcp:overview.adoc[] -* xref:agents:integration-overview.adoc[] -* xref:agents:a2a-concepts.adoc[] +* xref:connect:mcp-overview.adoc[] +* xref:connect:integration-overview.adoc[] +* xref:connect:a2a-concepts.adoc[] diff --git a/modules/mcp/pages/register-remote.adoc b/modules/connect/pages/register-remote.adoc similarity index 87% rename from modules/mcp/pages/register-remote.adoc rename to modules/connect/pages/register-remote.adoc index 234f927..2a059f3 100644 --- a/modules/mcp/pages/register-remote.adoc +++ b/modules/connect/pages/register-remote.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Pick the right transport (SSE vs. Streamable HTTP) and authentication mode :learning-objective-3: Confirm tool discovery completed and the server is reachable through its proxy URL -Register your existing MCP server with Redpanda to add authentication, observability, and agent aggregation without changing your server's code. This guide covers the self-managed path from xref:mcp:create-server.adoc[Create an MCP Server] in depth. Choose this when you already run a server and want Redpanda to proxy it. +Register your existing MCP server with Redpanda to add authentication, observability, and agent aggregation without changing your server's code. This guide covers the self-managed path from xref:connect:create-server.adoc[Create an MCP Server] in depth. Choose this when you already run a server and want Redpanda to proxy it. After completing this guide, you will be able to: @@ -32,7 +32,7 @@ If you don't already run a server, prefer a managed type. See xref:managed/manag * The endpoint URL. `http://` is allowed for everything except user-delegated OAuth, which requires `https://` (proto rule `remote_mcp.user_oauth_requires_https`). * Knowledge of which transport the server speaks (SSE or Streamable HTTP). If you don't know, see <>. * If using static-key or service-account-OAuth: secrets pre-created in the ADP secret store, `UPPER_SNAKE_CASE` (proto regex `^[A-Z][A-Z0-9_]*$`). -* If using user-delegated OAuth: an OAuth Provider already configured. See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +* If using user-delegated OAuth: an OAuth Provider already configured. See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. == Create the server @@ -40,7 +40,7 @@ If you don't already run a server, prefer a managed type. See xref:managed/manag . In the marketplace picker, choose *Remote (Proxied)*. + // TODO: screenshot of the marketplace picker with Remote (Proxied) highlighted. -. Fill in the identity fields (`name`, `description`, `enabled`); same constraints as in xref:mcp:create-server.adoc[Create an MCP Server]. +. Fill in the identity fields (`name`, `description`, `enabled`); same constraints as in xref:connect:create-server.adoc[Create an MCP Server]. . Provide the *URL* and *Transport*. . Configure authentication (see <>). . Click *Create*. @@ -73,7 +73,7 @@ curl -X POST -H "Content-Type: application/json" \ [[authentication]] == Authentication -The five auth modes from xref:mcp:create-server.adoc#configure-authentication[create-server.adoc] all apply. Three patterns are particularly common for self-managed servers: +The five auth modes from xref:connect:create-server.adoc#configure-authentication[create-server.adoc] all apply. Three patterns are particularly common for self-managed servers: [cols="1,2"] |=== @@ -89,13 +89,13 @@ The five auth modes from xref:mcp:create-server.adoc#configure-authentication[cr |The upstream server already validates client tokens; Redpanda just forwards the caller's `Authorization` header. |=== -For user-delegated OAuth, the URL must be `https://` and you also need an OAuth Provider. See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +For user-delegated OAuth, the URL must be `https://` and you also need an OAuth Provider. See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. // TODO: screenshots of each auth-mode form panel after walking `adp-production`. == Tool discovery -After create, Redpanda runs a live `tools/list` (the `ListMCPServerTools` RPC) against the server. The result is cached on the `MCPServer.tools` output-only field and shown on the detail page's *Overview* tab. The *Inspector* tab (see xref:mcp:test-tools.adoc[Test a server's tools]) exercises individual glossterm:tool[,tools]. +After create, Redpanda runs a live `tools/list` (the `ListMCPServerTools` RPC) against the server. The result is cached on the `MCPServer.tools` output-only field and shown on the detail page's *Overview* tab. The *Inspector* tab (see xref:connect:test-tools.adoc[Test a server's tools]) exercises individual glossterm:tool[,tools]. If the tools list is empty or stale, hit the *Refresh tools* action on the Overview tab to re-query the server. @@ -108,7 +108,7 @@ If the tools list is empty or stale, hit the *Refresh tools* action on the Overv |Error |What it means |`OAuthConnectionRequired` -|The user-delegated auth path needs a stored token vault entry for the calling user. Redpanda surfaces an `authorize_url` so the user can complete the consent flow. See xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +|The user-delegated auth path needs a stored token vault entry for the calling user. Redpanda surfaces an `authorize_url` so the user can complete the consent flow. See xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. |`OAuthTokenExpired` |The user's stored token has expired and refresh failed. Surface the new authorize URL and have the user re-consent. @@ -139,5 +139,5 @@ If the tools list is empty or stale, hit the *Refresh tools* action on the Overv == Related topics -* xref:mcp:user-delegated-oauth.adoc[] -* xref:ai-gateway:aggregation.adoc[] +* xref:connect:user-delegated-oauth.adoc[] +* xref:gateway:aggregation.adoc[] diff --git a/modules/integrations/pages/remote-mcp-clients.adoc b/modules/connect/pages/remote-mcp-clients.adoc similarity index 94% rename from modules/integrations/pages/remote-mcp-clients.adoc rename to modules/connect/pages/remote-mcp-clients.adoc index d8c23ee..5f5ac44 100644 --- a/modules/integrations/pages/remote-mcp-clients.adoc +++ b/modules/connect/pages/remote-mcp-clients.adoc @@ -21,7 +21,7 @@ OAuth clients and OAuth providers govern *opposite directions* of authentication * An *OAuth client* (this page) governs *inbound* authentication: an external chat app (Claude Desktop, ChatGPT, Copilot Studio, Cursor) authenticating to AI Gateway so its users can invoke MCP tools. * An *OAuth provider* governs *outbound* authentication: AI Gateway authenticating to an upstream system (GitHub, Slack, Salesforce) on a user's behalf when an MCP server uses user-delegated OAuth. -See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider] for the outbound authentication side. +See xref:connect:oauth-providers.adoc[Configure an OAuth Provider] for the outbound authentication side. ==== == When to use this @@ -34,7 +34,7 @@ Use a remote MCP client connection when: Use a different approach when: -* You need programmatic, server-side tool invocation. See xref:ai-gateway:connect-agent.adoc[Connect your agent] for SDK-based access. +* You need programmatic, server-side tool invocation. See xref:gateway:connect-agent.adoc[Connect your agent] for SDK-based access. * You need an in-house chat UI. Build against the AI Gateway's MCP endpoints directly with the SDK of your choice. == Architecture: Three resources work together @@ -46,10 +46,10 @@ Wiring a remote chat client to an MCP server uses three resources in AI Gateway: |Resource |Role |*MCP server* -|The tool surface itself. Managed (Redpanda hosts it) or self-managed (you host it). See xref:mcp:create-server.adoc[Create an MCP Server]. +|The tool surface itself. Managed (Redpanda hosts it) or self-managed (you host it). See xref:connect:create-server.adoc[Create an MCP Server]. |*OAuth Provider* -|Defines how AI Gateway authenticates against the upstream system on behalf of users (for example, GitHub) when the MCP server uses user-delegated OAuth. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. Optional: only needed if the MCP server requires per-user upstream identity. +|Defines how AI Gateway authenticates against the upstream system on behalf of users (for example, GitHub) when the MCP server uses user-delegated OAuth. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. Optional: only needed if the MCP server requires per-user upstream identity. |*OAuth Client* |Defines how an external chat app (Claude Desktop, ChatGPT, Gemini, Cursor) authenticates *against AI Gateway* on behalf of users. The chat client gets a `client_id` + `client_secret` it uses to negotiate access tokens. *This is what makes the chat-client integration possible.* @@ -67,9 +67,9 @@ When a user invokes a tool, AI Gateway runs both authentication handshakes: Clau Before you wire up the chat-client connector, make sure you have: -* An MCP server already created in AI Gateway. See xref:mcp:create-server.adoc[Create an MCP Server]. +* An MCP server already created in AI Gateway. See xref:connect:create-server.adoc[Create an MCP Server]. * The MCP server's *API URL*. Copy it from the server's *Overview* tab. -* For user-delegated MCP servers: An OAuth Provider configured for the upstream system. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. +* For user-delegated MCP servers: An OAuth Provider configured for the upstream system. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. * End-users have accounts with the chat client (Claude, ChatGPT, Gemini, Cursor) and the upstream system the MCP server connects to. == Register an OAuth Client in AI Gateway @@ -239,7 +239,7 @@ Common symptoms and fixes: |The chat client's callback URL isn't registered on the OAuth Client. Add the URL the chat client publishes (Claude Desktop has two; check Claude's docs for the current set). |Connector authorized but no tools appear -|The MCP server has zero tools, or `tools/list` failed at connection time. Open the server in the Inspector to confirm tools are discovered. See xref:mcp:test-tools.adoc[Test a server's tools]. +|The MCP server has zero tools, or `tools/list` failed at connection time. Open the server in the Inspector to confirm tools are discovered. See xref:connect:test-tools.adoc[Test a server's tools]. |Tool call returns an _authorize_ link to the user |First call from a user with no stored upstream token. The user follows the link, completes upstream consent, and the call retries automatically (Step 2 of the flow above). @@ -259,11 +259,11 @@ This page does not cover: * *Custom desktop or mobile UIs*: Build against the AI Gateway MCP endpoints directly using your platform's HTTP client; you don't need an OAuth Client unless you want the same external-app flow. * *Agent-to-agent calls (A2A)*: See the Agents docs; remote MCP clients are end-user-facing. -* *MCP server authoring*: See xref:mcp:create-server.adoc[Create an MCP Server] for the server side. +* *MCP server authoring*: See xref:connect:create-server.adoc[Create an MCP Server] for the server side. == Related topics -* xref:mcp:create-server.adoc[Create an MCP Server] -* xref:mcp:oauth-providers.adoc[Configure an OAuth Provider] -* xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] -* xref:ai-gateway:connect-agent.adoc[Connect your agent] +* xref:connect:create-server.adoc[Create an MCP Server] +* xref:connect:oauth-providers.adoc[Configure an OAuth Provider] +* xref:connect:user-delegated-oauth.adoc[User-delegated OAuth] +* xref:gateway:connect-agent.adoc[Connect your agent] diff --git a/modules/agents/pages/system-prompts.adoc b/modules/connect/pages/system-prompts.adoc similarity index 99% rename from modules/agents/pages/system-prompts.adoc rename to modules/connect/pages/system-prompts.adoc index 6212d21..bb837c8 100644 --- a/modules/agents/pages/system-prompts.adoc +++ b/modules/connect/pages/system-prompts.adoc @@ -290,7 +290,7 @@ Guide agents to: * Avoid redundant tool calls (check context before calling) * Stop when the task completes (don't continue exploring) -For cost management strategies including iteration limits and monitoring, see xref:agents:concepts.adoc[]. +For cost management strategies including iteration limits and monitoring, see xref:connect:concepts.adoc[]. == Example: System prompt with all best practices @@ -419,5 +419,5 @@ Decision criteria enable reliable tool selection based on request context. == Next steps -* xref:agents:quickstart.adoc[] -* xref:mcp:overview.adoc[] +* xref:get-started:quickstart.adoc[] +* xref:connect:mcp-overview.adoc[] diff --git a/modules/mcp/pages/test-tools.adoc b/modules/connect/pages/test-tools.adoc similarity index 92% rename from modules/mcp/pages/test-tools.adoc rename to modules/connect/pages/test-tools.adoc index cd33655..fc3a7ca 100644 --- a/modules/mcp/pages/test-tools.adoc +++ b/modules/connect/pages/test-tools.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Inspect resources, prompts, and call history :learning-objective-3: Diagnose common errors (auth missing, scope upgrade required, transport mismatch) before pointing an agent at the server -Test your MCP server's glossterm:tool[,tools], glossterm:resource[,resources], and glossterm:prompt[,prompts] using the Inspector: a built-in MCP client in the Agentic Data Plane UI. It runs on the same JSON-RPC connection that agents use, so a tool that works in the Inspector also works for an agent. Use this after creating your server (xref:mcp:create-server.adoc[Create an MCP Server]) or whenever you change a tool's schema. +Test your MCP server's glossterm:tool[,tools], glossterm:resource[,resources], and glossterm:prompt[,prompts] using the Inspector: a built-in MCP client in the Agentic Data Plane UI. It runs on the same JSON-RPC connection that agents use, so a tool that works in the Inspector also works for an agent. Use this after creating your server (xref:connect:create-server.adoc[Create an MCP Server]) or whenever you change a tool's schema. After completing this guide, you will be able to: @@ -84,7 +84,7 @@ The Session panel keeps a running history of every call you've made through the |Error |Meaning and fix |`OAuthConnectionRequired` -|User-delegated auth has no stored token for the calling user. Redpanda includes an `authorize_url` in the error detail; complete the consent flow per xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth]. +|User-delegated auth has no stored token for the calling user. Redpanda includes an `authorize_url` in the error detail; complete the consent flow per xref:connect:user-delegated-oauth.adoc[User-delegated OAuth]. |`OAuthTokenExpired` |Stored token is expired and refresh failed. Re-consent through *My Connections*. @@ -118,9 +118,9 @@ rpk ai mcp tools call --input '{"arg1":"value"}' rpk ai mcp get ---- -The command resolves the gateway URL from your active rpk cloud profile and reads the cached `rpk cloud login` token. See xref:ai-gateway:connect-agent.adoc[Connect your agent] for installation and profile setup. +The command resolves the gateway URL from your active rpk cloud profile and reads the cached `rpk cloud login` token. See xref:gateway:connect-agent.adoc[Connect your agent] for installation and profile setup. == Related topics -* xref:ai-gateway:aggregation.adoc[] -* xref:agents:create-agent.adoc[] +* xref:gateway:aggregation.adoc[] +* xref:connect:create-agent.adoc[] diff --git a/modules/agents/pages/tutorials/customer-support-agent.adoc b/modules/connect/pages/tutorials/customer-support-agent.adoc similarity index 96% rename from modules/agents/pages/tutorials/customer-support-agent.adoc rename to modules/connect/pages/tutorials/customer-support-agent.adoc index 59644b7..a27fa74 100644 --- a/modules/agents/pages/tutorials/customer-support-agent.adoc +++ b/modules/connect/pages/tutorials/customer-support-agent.adoc @@ -37,7 +37,7 @@ The challenge: users phrase requests differently ("Where's my package?", "Track == Prerequisites * A xref:cloud-data-platform:get-started:cluster-types/byoc/index.adoc[BYOC environment]. -* xref:ai-gateway:gateway-quickstart.adoc[AI Gateway configured] with at least one LLM provider enabled (this tutorial uses OpenAI). +* xref:get-started:gateway-quickstart.adoc[AI Gateway configured] with at least one LLM provider enabled (this tutorial uses OpenAI). == Design the MCP tools @@ -254,7 +254,7 @@ This demonstrates error recovery without hallucination. The "Never make up track == Troubleshoot -For comprehensive troubleshooting guidance, see xref:troubleshoot/troubleshoot-ai-agents.adoc[]. +For comprehensive troubleshooting guidance, see xref:monitor:troubleshoot-ai-agents.adoc[]. === Test with mock data @@ -267,7 +267,7 @@ Use these documented test IDs when testing the agent. If you replace the mock to == Next steps -* xref:mcp:overview.adoc[] -* xref:agents:system-prompts.adoc[] -* xref:agents:architecture-patterns.adoc[] -* xref:troubleshoot/troubleshoot-ai-agents.adoc[] +* xref:connect:mcp-overview.adoc[] +* xref:connect:system-prompts.adoc[] +* xref:connect:architecture-patterns.adoc[] +* xref:monitor:troubleshoot-ai-agents.adoc[] diff --git a/modules/agents/pages/tutorials/transaction-dispute-resolution.adoc b/modules/connect/pages/tutorials/transaction-dispute-resolution.adoc similarity index 96% rename from modules/agents/pages/tutorials/transaction-dispute-resolution.adoc rename to modules/connect/pages/tutorials/transaction-dispute-resolution.adoc index 1ac3232..d329736 100644 --- a/modules/agents/pages/tutorials/transaction-dispute-resolution.adoc +++ b/modules/connect/pages/tutorials/transaction-dispute-resolution.adoc @@ -33,8 +33,8 @@ When a customer calls saying "I see a $247.83 charge from 'ACME CORP' but I neve == Prerequisites * A xref:cloud-data-platform:get-started:cluster-types/byoc/index.adoc[BYOC environment]. -* xref:ai-gateway:gateway-quickstart.adoc[AI Gateway configured] with at least one LLM provider enabled (this tutorial uses OpenAI GPT-5.2 or Claude Sonnet 4.5 for reasoning). -* The xref:get-started:rpk-install.adoc[Redpanda CLI (`rpk`)] installed (for testing the pipeline with sample data). +* xref:get-started:gateway-quickstart.adoc[AI Gateway configured] with at least one LLM provider enabled (this tutorial uses OpenAI GPT-5.2 or Claude Sonnet 4.5 for reasoning). +* The xref:reference:rpk-install.adoc[Redpanda CLI (`rpk`)] installed (for testing the pipeline with sample data). * Completed xref:tutorials/customer-support-agent.adoc[] (foundational multi-tool concepts). == Create MCP tools for each domain @@ -92,7 +92,7 @@ Wait for the server status to show *Running*. [NOTE] ==== -This tutorial uses XSmall resource tier for all MCP servers because the mock tools run lightweight Bloblang transformations. Production deployments with external API calls require larger tiers based on throughput needs. See xref:mcp:overview.adoc[]. +This tutorial uses XSmall resource tier for all MCP servers because the mock tools run lightweight Bloblang transformations. Production deployments with external API calls require larger tiers based on throughput needs. See xref:connect:mcp-overview.adoc[]. ==== === Fraud tools @@ -386,7 +386,7 @@ This visibility helps you: * Identify slow sub-agents that need optimization * Track costs per investigation for budgeting -For detailed trace structure, see xref:observability:concepts.adoc#agent-trace-hierarchy[Agent trace hierarchy]. +For detailed trace structure, see xref:monitor:concepts.adoc#agent-trace-hierarchy[Agent trace hierarchy]. == Integrate with streaming pipeline @@ -641,7 +641,7 @@ In the transcript details, you'll see: == Troubleshoot -For comprehensive troubleshooting guidance, see xref:troubleshoot/troubleshoot-ai-agents.adoc[]. +For comprehensive troubleshooting guidance, see xref:monitor:troubleshoot-ai-agents.adoc[]. === Test with mock data @@ -656,8 +656,8 @@ For production deployments, replace the mock tools with API calls to your accoun == Next steps -* xref:agents:architecture-patterns.adoc[] -* xref:agents:integration-overview.adoc[] -* xref:agents:pipeline-integration-patterns.adoc[] -* xref:agents:monitor.adoc[] -* xref:mcp:overview.adoc[] +* xref:connect:architecture-patterns.adoc[] +* xref:connect:integration-overview.adoc[] +* xref:connect:pipeline-integration-patterns.adoc[] +* xref:monitor:monitor-agents.adoc[] +* xref:connect:mcp-overview.adoc[] diff --git a/modules/mcp/pages/user-delegated-oauth.adoc b/modules/connect/pages/user-delegated-oauth.adoc similarity index 91% rename from modules/mcp/pages/user-delegated-oauth.adoc rename to modules/connect/pages/user-delegated-oauth.adoc index 9498c5e..628eb53 100644 --- a/modules/mcp/pages/user-delegated-oauth.adoc +++ b/modules/connect/pages/user-delegated-oauth.adoc @@ -16,14 +16,14 @@ After completing this guide, you will be able to: == Prerequisites -* An OAuth provider resource configured in the Agentic Data Plane UI under *OAuth providers*. The provider declares the upstream's `authorize_url`, `token_url`, supported scopes, and client credentials. See xref:mcp:oauth-providers.adoc[Configure an OAuth Provider]. +* An OAuth provider resource configured in the Agentic Data Plane UI under *OAuth providers*. The provider declares the upstream's `authorize_url`, `token_url`, supported scopes, and client credentials. See xref:connect:oauth-providers.adoc[Configure an OAuth Provider]. * The required scopes for the upstream API you plan to call. * For *self-managed* MCP servers: the server URL must be `https://` (proto rule `remote_mcp.user_oauth_requires_https`). HTTP is rejected at create time. * For *managed* MCP servers: the type must support user-delegated OAuth. SQL doesn't; Slack, Jira, and Google managed types do. Check xref:managed/managed-catalog.adoc[Managed catalog] before configuring. == Configure the server -. Create or edit your MCP server (see xref:mcp:create-server.adoc[Create an MCP Server]). +. Create or edit your MCP server (see xref:connect:create-server.adoc[Create an MCP Server]). . In the authentication section, choose *User-delegated OAuth*. . Pick the configured *OAuth provider* (`UserOAuthAuth.provider_name`). . List the *required scopes* (`UserOAuthAuth.required_scopes`). Redpanda enforces these at consent time. @@ -34,7 +34,7 @@ After completing this guide, you will be able to: NOTE: Choosing user-delegated OAuth instead of service-account OAuth *is* the credential-mode decision: there's no separate field. User-delegated gives each caller a per-user upstream identity; service-account gives every caller one shared identity. Switching between them later requires re-consent for every active user. -TIP: To configure user-delegated OAuth from the CLI, use `--user-oauth-provider` and `--user-oauth-scopes` on `rpk ai mcp create` or `rpk ai mcp update`. See xref:mcp:create-server.adoc[Create an MCP Server]. +TIP: To configure user-delegated OAuth from the CLI, use `--user-oauth-provider` and `--user-oauth-scopes` on `rpk ai mcp create` or `rpk ai mcp update`. See xref:connect:create-server.adoc[Create an MCP Server]. == The user connection flow @@ -66,7 +66,7 @@ When refresh fails (revoked token, idle too long, upstream error), the next tool If you want one shared upstream identity for every caller (instead of per-user identities), choose *Service-account OAuth* on the server instead of *User-delegated OAuth*. With service-account OAuth, every caller of every tool sees the same upstream identity; the upstream system has no idea which ADP user invoked the tool. With user-delegated OAuth, the upstream system sees each end-user as themselves and applies their own permissions. -For the field-by-field service-account-OAuth setup, see xref:mcp:create-server.adoc#configure-authentication[create-server.adoc]. +For the field-by-field service-account-OAuth setup, see xref:connect:create-server.adoc#configure-authentication[create-server.adoc]. == Worked examples @@ -97,5 +97,5 @@ For the field-by-field service-account-OAuth setup, see xref:mcp:create-server.a == Related topics -* xref:mcp:oauth-providers.adoc[] -* xref:mcp:create-server.adoc#configure-authentication[Service-account OAuth setup] for the shared-identity alternative. +* xref:connect:oauth-providers.adoc[] +* xref:connect:create-server.adoc#configure-authentication[Service-account OAuth setup] for the shared-identity alternative. diff --git a/modules/integrations/partials/integrations/claude-code-admin.adoc b/modules/connect/partials/integrations/claude-code-admin.adoc similarity index 98% rename from modules/integrations/partials/integrations/claude-code-admin.adoc rename to modules/connect/partials/integrations/claude-code-admin.adoc index 2971e84..fe34f84 100644 --- a/modules/integrations/partials/integrations/claude-code-admin.adoc +++ b/modules/connect/partials/integrations/claude-code-admin.adoc @@ -192,7 +192,7 @@ Enable Claude Code to discover and use tools from multiple MCP servers through a |Descriptive name (for example, `redpanda-data-catalog`) |Endpoint URL -|MCP server endpoint (for example, xref:integrations:remote-mcp-clients.adoc[Remote MCP server] URL) +|MCP server endpoint (for example, xref:connect:remote-mcp-clients.adoc[Remote MCP server] URL) |Authentication |Bearer token or other authentication mechanism @@ -467,4 +467,4 @@ Causes and solutions: == Next steps -* xref:integrations:remote-mcp-clients.adoc[] +* xref:connect:remote-mcp-clients.adoc[] diff --git a/modules/integrations/partials/integrations/claude-code-user.adoc b/modules/connect/partials/integrations/claude-code-user.adoc similarity index 94% rename from modules/integrations/partials/integrations/claude-code-user.adoc rename to modules/connect/partials/integrations/claude-code-user.adoc index a529ce7..dcd5d4f 100644 --- a/modules/integrations/partials/integrations/claude-code-user.adoc +++ b/modules/connect/partials/integrations/claude-code-user.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Set up MCP server integration through AI Gateway :learning-objective-3: Verify Claude Code is routing requests through the gateway -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up Claude Code to route LLM requests and access MCP tools through the gateway's unified endpoints. +After xref:get-started:gateway-quickstart.adoc[configuring your AI Gateway], set up Claude Code to route LLM requests and access MCP tools through the gateway's unified endpoints. After reading this page, you will be able to: @@ -20,8 +20,8 @@ Before configuring Claude Code, ensure you have: * Claude Code CLI installed (download from https://github.com/anthropics/claude-code[Anthropic's GitHub^]) * An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +** At least one LLM provider enabled (see xref:get-started:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:get-started:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) * Your AI Gateway credentials: ** Gateway endpoint URL (for example, `\https://gw-abc123.ai.panda.com`) ** API key with access to the gateway @@ -343,7 +343,7 @@ If your gateway is configured with fallback providers, check the logs to see if . **MCP tool aggregation overhead** + -Aggregating tools from multiple MCP servers adds processing time. Use deferred tool loading to reduce this overhead (see xref:ai-gateway:aggregation.adoc[]). +Aggregating tools from multiple MCP servers adds processing time. Use deferred tool loading to reduce this overhead (see xref:gateway:aggregation.adoc[]). . **Rate limiting** + @@ -395,9 +395,9 @@ chmod 600 ~/.claude.json == Next steps -* xref:ai-gateway:aggregation.adoc[] +* xref:gateway:aggregation.adoc[] == Related pages -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:get-started:gateway-quickstart.adoc[]: Create and configure your AI Gateway * xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits diff --git a/modules/integrations/partials/integrations/cline-admin.adoc b/modules/connect/partials/integrations/cline-admin.adoc similarity index 99% rename from modules/integrations/partials/integrations/cline-admin.adoc rename to modules/connect/partials/integrations/cline-admin.adoc index 3b2580d..8f21b8a 100644 --- a/modules/integrations/partials/integrations/cline-admin.adoc +++ b/modules/connect/partials/integrations/cline-admin.adoc @@ -205,7 +205,7 @@ Enable Cline to discover and use tools from multiple MCP servers through a singl |Descriptive name (for example, `filesystem-tools`, `code-analysis-tools`) |Endpoint URL -|MCP server endpoint (for example, xref:integrations:remote-mcp-clients.adoc[Remote MCP server] URL) +|MCP server endpoint (for example, xref:connect:remote-mcp-clients.adoc[Remote MCP server] URL) |Authentication |Bearer token or other authentication mechanism @@ -547,4 +547,4 @@ Causes and solutions: == Next steps -* xref:integrations:remote-mcp-clients.adoc[] +* xref:connect:remote-mcp-clients.adoc[] diff --git a/modules/ROOT/partials/integrations/cline-user.adoc b/modules/connect/partials/integrations/cline-user.adoc similarity index 95% rename from modules/ROOT/partials/integrations/cline-user.adoc rename to modules/connect/partials/integrations/cline-user.adoc index c05a6e6..a876e16 100644 --- a/modules/ROOT/partials/integrations/cline-user.adoc +++ b/modules/connect/partials/integrations/cline-user.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Set up autonomous mode with custom instructions and browser integration :learning-objective-3: Verify Cline routes requests through the gateway and optimize for cost -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up Cline (formerly Claude Dev) to route LLM requests and access MCP tools through the gateway's unified endpoints. +After xref:get-started:gateway-quickstart.adoc[configuring your AI Gateway], set up Cline (formerly Claude Dev) to route LLM requests and access MCP tools through the gateway's unified endpoints. After reading this page, you will be able to: @@ -20,8 +20,8 @@ Before configuring Cline, ensure you have: * Cline VS Code extension installed (search for "Cline" in VS Code Extensions) * An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +** At least one LLM provider enabled (see xref:get-started:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:get-started:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) * Your AI Gateway credentials: ** Gateway endpoint URL, which includes the gateway ID (for example, `\https://ai.prd.cloud.redpanda.com/gateway/v1/chat/completions`) ** API key with access to the gateway @@ -558,7 +558,7 @@ You may be using a premium model for simple tasks. Solutions: + If not using deferred tool loading, all tools load with every request. Solution: + -* Enable deferred tool loading in your AI Gateway configuration (see xref:ai-gateway:aggregation.adoc[]) +* Enable deferred tool loading in your AI Gateway configuration (see xref:gateway:aggregation.adoc[]) === Cline hangs or times out @@ -587,7 +587,7 @@ You may be hitting rate limits. Check the dashboard for rate limit metrics and i . **Provider outage** + -Check the AI Gateway dashboard for provider status. If the primary provider is down, configure failover (see xref:ai-gateway:gateway-quickstart.adoc#configure-provider-pool-with-fallback[Configure failover]). +Check the AI Gateway dashboard for provider status. If the primary provider is down, configure failover (see xref:get-started:gateway-quickstart.adoc#configure-provider-pool-with-fallback[Configure failover]). === Settings changes not taking effect @@ -681,7 +681,7 @@ After (low token cost): * Cline queries the tool for specific information as needed * Only relevant sections included in context -See xref:ai-gateway:aggregation.adoc[] for MCP tool configuration. +See xref:gateway:aggregation.adoc[] for MCP tool configuration. === Enable deferred tool loading @@ -701,10 +701,10 @@ The gateway automatically blocks requests that would exceed the limit. == Next steps -* xref:ai-gateway:aggregation.adoc[] +* xref:gateway:aggregation.adoc[] == Related pages -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:get-started:gateway-quickstart.adoc[]: Create and configure your AI Gateway * xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits * xref:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway diff --git a/modules/integrations/partials/integrations/continue-admin.adoc b/modules/connect/partials/integrations/continue-admin.adoc similarity index 99% rename from modules/integrations/partials/integrations/continue-admin.adoc rename to modules/connect/partials/integrations/continue-admin.adoc index 91dac29..eb993cf 100644 --- a/modules/integrations/partials/integrations/continue-admin.adoc +++ b/modules/connect/partials/integrations/continue-admin.adoc @@ -301,7 +301,7 @@ Enable Continue.dev to discover and use tools from multiple MCP servers through |Descriptive name (for example, `redpanda-data-catalog`, `code-search-tools`) |Endpoint URL -|MCP server endpoint (for example, xref:integrations:remote-mcp-clients.adoc[Remote MCP server] URL) +|MCP server endpoint (for example, xref:connect:remote-mcp-clients.adoc[Remote MCP server] URL) |Authentication |Bearer token or other authentication mechanism @@ -714,4 +714,4 @@ This is expected behavior, not a configuration issue: == Next steps -* xref:integrations:remote-mcp-clients.adoc[] +* xref:connect:remote-mcp-clients.adoc[] diff --git a/modules/integrations/partials/integrations/continue-user.adoc b/modules/connect/partials/integrations/continue-user.adoc similarity index 97% rename from modules/integrations/partials/integrations/continue-user.adoc rename to modules/connect/partials/integrations/continue-user.adoc index 279f1fe..283e291 100644 --- a/modules/integrations/partials/integrations/continue-user.adoc +++ b/modules/connect/partials/integrations/continue-user.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Set up MCP server integration through AI Gateway :learning-objective-3: Optimize Continue.dev settings for cost and performance -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up Continue.dev to route LLM requests and access MCP tools through the gateway's unified endpoints. +After xref:get-started:gateway-quickstart.adoc[configuring your AI Gateway], set up Continue.dev to route LLM requests and access MCP tools through the gateway's unified endpoints. After reading this page, you will be able to: @@ -22,8 +22,8 @@ Before configuring Continue.dev, ensure you have: ** VS Code: Search for "Continue" in Extensions ** JetBrains IDEs: Install from the JetBrains Marketplace * An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +** At least one LLM provider enabled (see xref:get-started:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:get-started:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) * Your AI Gateway credentials: ** Gateway endpoint URL (for example, `\https://gw.ai.panda.com`) ** API key with access to the gateway @@ -678,7 +678,7 @@ Reduce `maxTokens` in `completionOptions` to limit response length: . **MCP overhead** + -If not using deferred tool loading, all tools load with every request. Enable deferred tool loading in your AI Gateway configuration (see xref:ai-gateway:aggregation.adoc[]). +If not using deferred tool loading, all tools load with every request. Enable deferred tool loading in your AI Gateway configuration (see xref:gateway:aggregation.adoc[]). === Configuration changes not taking effect @@ -834,11 +834,11 @@ Autocomplete rarely needs more than 256 tokens, while chat responses can vary. == Next steps -* xref:ai-gateway:aggregation.adoc[] +* xref:gateway:aggregation.adoc[] == Related pages -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:get-started:gateway-quickstart.adoc[]: Create and configure your AI Gateway * xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits * xref:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway * xref:ai-gateway/integrations/cline-user.adoc[]: Configure Cline with AI Gateway diff --git a/modules/ROOT/partials/integrations/cursor-admin.adoc b/modules/connect/partials/integrations/cursor-admin.adoc similarity index 99% rename from modules/ROOT/partials/integrations/cursor-admin.adoc rename to modules/connect/partials/integrations/cursor-admin.adoc index 3753339..6b951fc 100644 --- a/modules/ROOT/partials/integrations/cursor-admin.adoc +++ b/modules/connect/partials/integrations/cursor-admin.adoc @@ -229,7 +229,7 @@ Enable Cursor to discover and use tools from multiple MCP servers through a sing |Descriptive name (for example, `redpanda-data-tools`, `code-search-tools`) |Endpoint URL -|MCP server endpoint (for example, xref:integrations:remote-mcp-clients.adoc[Remote MCP server] URL) +|MCP server endpoint (for example, xref:connect:remote-mcp-clients.adoc[Remote MCP server] URL) |Authentication |Bearer token or other authentication mechanism @@ -681,4 +681,4 @@ Causes and solutions: == Next steps -* xref:integrations:remote-mcp-clients.adoc[] +* xref:connect:remote-mcp-clients.adoc[] diff --git a/modules/ROOT/partials/integrations/cursor-user.adoc b/modules/connect/partials/integrations/cursor-user.adoc similarity index 96% rename from modules/ROOT/partials/integrations/cursor-user.adoc rename to modules/connect/partials/integrations/cursor-user.adoc index 4c27d60..0072695 100644 --- a/modules/ROOT/partials/integrations/cursor-user.adoc +++ b/modules/connect/partials/integrations/cursor-user.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Set up MCP server integration for tool access through the gateway :learning-objective-3: Optimize Cursor settings for multi-tenancy and cost control -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up Cursor IDE to route LLM requests and access MCP tools through the gateway's unified endpoints. +After xref:get-started:gateway-quickstart.adoc[configuring your AI Gateway], set up Cursor IDE to route LLM requests and access MCP tools through the gateway's unified endpoints. After reading this page, you will be able to: @@ -20,8 +20,8 @@ Before configuring Cursor IDE, ensure you have: * Cursor IDE installed (download from https://cursor.sh[cursor.sh^]) * An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +** At least one LLM provider enabled (see xref:get-started:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:get-started:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) * Your AI Gateway credentials: ** Gateway endpoint URL (for example, `\https://gw.ai.panda.com/v1/gateways/gateway-abc123`) ** API key with access to the gateway @@ -257,7 +257,7 @@ AI Gateway solves this through deferred tool loading: * Agent queries for specific tools only when needed * 80-90% token reduction, depending on configuration -See xref:ai-gateway:aggregation.adoc[] for details on configuring deferred tool loading. +See xref:gateway:aggregation.adoc[] for details on configuring deferred tool loading. === Add MCP server connection @@ -680,7 +680,7 @@ Verify you're using fast models for autocomplete: . **MCP tool aggregation overhead** + -Aggregating tools from multiple MCP servers adds processing time. Use deferred tool loading to reduce this overhead (see xref:ai-gateway:aggregation.adoc[]). +Aggregating tools from multiple MCP servers adds processing time. Use deferred tool loading to reduce this overhead (see xref:gateway:aggregation.adoc[]). === Configuration changes not taking effect @@ -801,11 +801,11 @@ This sends only search + orchestrator tools initially, reducing token usage sign == Next steps -* xref:ai-gateway:aggregation.adoc[] +* xref:gateway:aggregation.adoc[] == Related pages -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:get-started:gateway-quickstart.adoc[]: Create and configure your AI Gateway * xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits * xref:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway * xref:ai-gateway/integrations/continue-user.adoc[]: Configure Continue.dev with AI Gateway diff --git a/modules/ROOT/partials/integrations/github-copilot-admin.adoc b/modules/connect/partials/integrations/github-copilot-admin.adoc similarity index 100% rename from modules/ROOT/partials/integrations/github-copilot-admin.adoc rename to modules/connect/partials/integrations/github-copilot-admin.adoc diff --git a/modules/integrations/partials/integrations/github-copilot-user.adoc b/modules/connect/partials/integrations/github-copilot-user.adoc similarity index 97% rename from modules/integrations/partials/integrations/github-copilot-user.adoc rename to modules/connect/partials/integrations/github-copilot-user.adoc index 02d6054..97bc1a1 100644 --- a/modules/integrations/partials/integrations/github-copilot-user.adoc +++ b/modules/connect/partials/integrations/github-copilot-user.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Set up multi-tenancy with per-team gateways for cost tracking :learning-objective-3: Configure enterprise BYOK deployments for team-wide Copilot access -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up GitHub Copilot to route LLM requests through the gateway for centralized observability, cost management, and provider flexibility. +After xref:get-started:gateway-quickstart.adoc[configuring your AI Gateway], set up GitHub Copilot to route LLM requests through the gateway for centralized observability, cost management, and provider flexibility. After reading this page, you will be able to: @@ -20,8 +20,8 @@ Before configuring GitHub Copilot, ensure you have: * GitHub Copilot subscription (Individual, Business, or Enterprise) * An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) +** At least one LLM provider enabled (see xref:get-started:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) +** A gateway created and configured (see xref:get-started:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) * Your AI Gateway credentials: ** Gateway endpoint URL (for example, `https://gw.ai.panda.com`) ** Gateway ID (for example, `gateway-abc123`) @@ -865,11 +865,11 @@ Generate project-specific cost reports from the gateway dashboard. == Next steps -* xref:ai-gateway:aggregation.adoc[] +* xref:gateway:aggregation.adoc[] == Related pages -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway +* xref:get-started:gateway-quickstart.adoc[]: Create and configure your AI Gateway * xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits * xref:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway * xref:ai-gateway/integrations/continue-user.adoc[]: Configure Continue.dev with AI Gateway diff --git a/modules/ROOT/partials/integrations/index.adoc b/modules/connect/partials/integrations/index.adoc similarity index 100% rename from modules/ROOT/partials/integrations/index.adoc rename to modules/connect/partials/integrations/index.adoc diff --git a/modules/ROOT/partials/service-account-authorization.adoc b/modules/connect/partials/service-account-authorization.adoc similarity index 100% rename from modules/ROOT/partials/service-account-authorization.adoc rename to modules/connect/partials/service-account-authorization.adoc diff --git a/modules/governance/pages/budgets.adoc b/modules/control/pages/budgets.adoc similarity index 94% rename from modules/governance/pages/budgets.adoc rename to modules/control/pages/budgets.adoc index 3b8fd69..3486464 100644 --- a/modules/governance/pages/budgets.adoc +++ b/modules/control/pages/budgets.adoc @@ -50,13 +50,13 @@ You don't view spend on this page. The *LLM Providers* page, dashboard, transcri |Surface |Use it for |*Cost & Usage* tab (*LLM Providers* page) -|Time-series spend, request, and token charts across providers and models. Use it to group by provider, model, or token type, then filter by provider, model, cost type, token type, or user. See xref:ai-gateway:configure-provider.adoc#view-cost-and-usage[View cost and usage]. +|Time-series spend, request, and token charts across providers and models. Use it to group by provider, model, or token type, then filter by provider, model, cost type, token type, or user. See xref:gateway:configure-provider.adoc#view-cost-and-usage[View cost and usage]. |*Governance dashboard* -|Summary cards (total spend, agent count, request count, trend), provider breakdown chart, events timeline, agents and MCP servers tables. The single-pane-of-glass view across your whole deployment. See xref:governance:dashboard/overview.adoc[Read the governance overview]. +|Summary cards (total spend, agent count, request count, trend), provider breakdown chart, events timeline, agents and MCP servers tables. The single-pane-of-glass view across your whole deployment. See xref:control:dashboard/overview.adoc[Read the governance overview]. |*Transcripts* -|Per-call cost on individual executions. Useful when investigating a specific agent run or debugging a cost anomaly. See xref:observability:transcripts.adoc[Read a transcript]. +|Per-call cost on individual executions. Useful when investigating a specific agent run or debugging a cost anomaly. See xref:monitor:transcripts.adoc[Read a transcript]. |*Breakdown queries* |Aggregated spend by *provider*, *model*, *user*, *organization*, or *provider type*. Available through the dashboard's provider-breakdown widget and through `GetSpendingBreakdown` for programmatic access. @@ -80,7 +80,7 @@ For more expressive queries, `SpendingFilter` also accepts an AIP-160 `filter` e === Authenticate -`SpendingService` uses the same OIDC client-credentials grant as the rest of AI Gateway. Mint a service-account access token using the flow in xref:ai-gateway:connect-agent.adoc#authenticate-with-oidc-client-credentials[Authenticate with OIDC client credentials], then pass the token in the `Authorization: Bearer ` header on every call. The service account needs `dataplane_adp_spending_get` on the resource you're querying. See xref:governance:permissions-reference.adoc#spending-permissions[Spending permissions]. +`SpendingService` uses the same OIDC client-credentials grant as the rest of AI Gateway. Mint a service-account access token using the flow in xref:gateway:connect-agent.adoc#authenticate-with-oidc-client-credentials[Authenticate with OIDC client credentials], then pass the token in the `Authorization: Bearer ` header on every call. The service account needs `dataplane_adp_spending_get` on the resource you're querying. See xref:control:permissions-reference.adoc#spending-permissions[Spending permissions]. // TODO: Confirm the canonical endpoint shape against `apps/aigw` on cloudv2. Likely a Connect-Go / gRPC reflection surface at `/redpanda.aigateway.spending.v1.SpendingService/GetSpendingBreakdown`. Replace the placeholder URL below with the verified shape, and add a note on whether HTTP/JSON transcoding is exposed or whether clients must speak gRPC. @@ -194,7 +194,7 @@ Some guardrail evaluators call an LLM to do their work. A toxicity classifier, f Guardrail evaluator cost surfaces in the same spending pipeline as user-facing LLM calls. The evaluator's cost is attributed to the *evaluator's configured upstream provider* (usually a small classifier model, separate from the user-facing LLM), so per-provider breakdowns separate the two automatically. -For the per-evaluator cost model and how it interacts with the dashboard's spend view, see xref:governance:guardrails/index.adoc[Configure guardrails]. +For the per-evaluator cost model and how it interacts with the dashboard's spend view, see xref:control:guardrails/overview.adoc[Configure guardrails]. // TODO: confirm with eng that guardrail evaluator cost flows into the same SpendingService as user-facing LLM cost (vs. a separate stream). Open Q A3 in the companion plan, also flagged on the Guardrails plan. @@ -278,6 +278,6 @@ If the cost still reflects the catalog price, the override may not have propagat == Next steps -* xref:governance:dashboard/overview.adoc[Read the governance overview] -* xref:observability:transcripts.adoc[Read a transcript] -* xref:governance:guardrails/index.adoc[Configure guardrails] +* xref:control:dashboard/overview.adoc[Read the governance overview] +* xref:monitor:transcripts.adoc[Read a transcript] +* xref:control:guardrails/overview.adoc[Configure guardrails] diff --git a/modules/governance/pages/dashboard/overview.adoc b/modules/control/pages/dashboard/overview.adoc similarity index 94% rename from modules/governance/pages/dashboard/overview.adoc rename to modules/control/pages/dashboard/overview.adoc index 5f3ac5c..c0b4410 100644 --- a/modules/governance/pages/dashboard/overview.adoc +++ b/modules/control/pages/dashboard/overview.adoc @@ -21,7 +21,7 @@ Use this guide to: == Prerequisites * Access to the Agentic Data Plane -* The `dataplane_adp_spending_get` permission to read spend, request, and token data, granted by both the Writer and Reader built-in roles. See xref:governance:permissions-reference.adoc#spending-permissions[Spending permissions]. +* The `dataplane_adp_spending_get` permission to read spend, request, and token data, granted by both the Writer and Reader built-in roles. See xref:control:permissions-reference.adoc#spending-permissions[Spending permissions]. * At least one LLM provider or agent if you want non-empty charts and tables == Open the dashboard @@ -132,7 +132,7 @@ If no user-level spend exists, the panel stays empty until agents send requests == Next steps -* xref:observability:transcripts.adoc[Read a transcript] -* xref:governance:budgets.adoc[Token budgets and limits] -* xref:observability:byoa-telemetry.adoc[BYOA telemetry] -* xref:agents:create-agent.adoc[Create a declarative agent] +* xref:monitor:transcripts.adoc[Read a transcript] +* xref:control:budgets.adoc[Token budgets and limits] +* xref:monitor:byoa-telemetry.adoc[BYOA telemetry] +* xref:connect:create-agent.adoc[Create a declarative agent] diff --git a/modules/governance/pages/guardrails/create-guardrail.adoc b/modules/control/pages/guardrails/create-guardrail.adoc similarity index 83% rename from modules/governance/pages/guardrails/create-guardrail.adoc rename to modules/control/pages/guardrails/create-guardrail.adoc index ef9bbd0..24cea43 100644 --- a/modules/governance/pages/guardrails/create-guardrail.adoc +++ b/modules/control/pages/guardrails/create-guardrail.adoc @@ -18,8 +18,8 @@ After reading this page, you will be able to: == Prerequisites -* An ADP environment with at least one LLM provider configured. See xref:ai-gateway:configure-provider.adoc[Configure your LLM provider]. -* For a *Custom webhook* evaluator, a publicly reachable HTTPS endpoint that implements the gateway's webhook contract. See xref:governance:guardrails/types-reference.adoc[Evaluator types reference]. +* An ADP environment with at least one LLM provider configured. See xref:gateway:configure-provider.adoc[Configure your LLM provider]. +* For a *Custom webhook* evaluator, a publicly reachable HTTPS endpoint that implements the gateway's webhook contract. See xref:control:guardrails/types-reference.adoc[Evaluator types reference]. * For evaluators that need their own credentials (for example, a hosted PII service), the credential stored in the ADP secret store using `UPPER_SNAKE_CASE`. // TODO: standalone-ADP wording. Replace with the concrete sign-in URL, IAM role, and OIDC audience once the standalone product surface ships. Open Q D1 in the companion plan. @@ -38,7 +38,7 @@ Choose one of the supported evaluator types: * *Toxicity*: runs content through a toxicity classifier. Per-call LLM cost. * *Custom webhook*: delegates the decision to your HTTPS endpoint. Gateway charges nothing per call. -For each type's full config schema and behavior, see xref:governance:guardrails/types-reference.adoc[Evaluator types reference]. +For each type's full config schema and behavior, see xref:control:guardrails/types-reference.adoc[Evaluator types reference]. // TODO: confirm the evaluator type set at GA. Open Q A5 in the companion plan. @@ -58,7 +58,7 @@ Decision rule: == Configure the evaluator -Fill in the per-type config block. The form fields differ per evaluator type; see xref:governance:guardrails/types-reference.adoc[Evaluator types reference] for the full schema of each type. +Fill in the per-type config block. The form fields differ per evaluator type; see xref:control:guardrails/types-reference.adoc[Evaluator types reference] for the full schema of each type. // TODO: walk through the PII form as the exemplar (most common starting case) once the post-pivot field set is confirmed. Lift exact field names and labels from the proto. Open Qs A1, A2 in the companion plan. @@ -85,7 +85,7 @@ curl -X POST https://your-adp-environment/v1/chat/completions \ -d '{"messages":[{"role":"user","content":"My SSN is 123-45-6789"}]}' ---- -The request should return an error. Open the request's transcript and confirm a violation entry appears for the guardrail. See xref:observability:transcripts.adoc[Read a transcript] for the transcript walkthrough and xref:governance:guardrails/violations.adoc[Read violations] for what to do when a violation surprises you. +The request should return an error. Open the request's transcript and confirm a violation entry appears for the guardrail. See xref:monitor:transcripts.adoc[Read a transcript] for the transcript walkthrough and xref:control:guardrails/violations.adoc[Read violations] for what to do when a violation surprises you. == Edit, disable, or delete @@ -97,12 +97,11 @@ The request should return an error. Open the request's transcript and confirm a == Troubleshooting -* *Evaluator returns false positives*: see xref:governance:guardrails/violations.adoc[Read violations] for tuning patterns per evaluator type. -* *Evaluator times out or is unavailable*: see xref:governance:guardrails/violations.adoc[Read violations] for the evaluator-down section. +* *Evaluator returns false positives*: see xref:control:guardrails/violations.adoc[Read violations] for tuning patterns per evaluator type. +* *Evaluator times out or is unavailable*: see xref:control:guardrails/violations.adoc[Read violations] for the evaluator-down section. * *Attached provider doesn't fire the guardrail*: confirm attachment (right provider, right phase), enabled state, and that requests are actually reaching the gateway (not bypassing through a direct provider URL). == Next steps -* xref:governance:guardrails/types-reference.adoc[Evaluator types reference] -* xref:governance:guardrails/violations.adoc[Read violations] -* xref:governance:guardrails/cost-tracking.adoc[Cost tracking] +* xref:control:guardrails/types-reference.adoc[Evaluator types reference] +* xref:control:guardrails/violations.adoc[Read violations] diff --git a/modules/governance/pages/guardrails/overview.adoc b/modules/control/pages/guardrails/overview.adoc similarity index 84% rename from modules/governance/pages/guardrails/overview.adoc rename to modules/control/pages/guardrails/overview.adoc index cfe3666..fdec4cb 100644 --- a/modules/governance/pages/guardrails/overview.adoc +++ b/modules/control/pages/guardrails/overview.adoc @@ -48,15 +48,15 @@ Streaming responses change the timing slightly: where async evaluation is suppor |No gateway charge per call. Your webhook's compute cost is your own. |=== -For per-type config schemas, supported phases, and behavior on match, see xref:governance:guardrails/types-reference.adoc[Evaluator types reference]. +For per-type config schemas, supported phases, and behavior on match, see xref:control:guardrails/types-reference.adoc[Evaluator types reference]. // TODO: confirm the evaluator type set shipping at GA. RFC 0002 specifies PII + Toxicity + Custom webhook. The phase5-aigw-guardrails branch in cloudv2 ships PII + a "keyword" evaluator that may rename to Toxicity, stay as a fourth type, or be dropped. Open Q A5 in the companion plan. == What happens when a guardrail fires -When an evaluator decides to block a request, the gateway stops forwarding it (or stops returning the response, on OUTPUT) and returns an error to the caller. Every fired guardrail records a *violation* entry on the request's transcript, captured in the same observability pipeline that records the LLM call itself. Read the transcript to see which guardrail fired, at which phase, and what content matched. See xref:observability:transcripts.adoc[Read a transcript]. +When an evaluator decides to block a request, the gateway stops forwarding it (or stops returning the response, on OUTPUT) and returns an error to the caller. Every fired guardrail records a *violation* entry on the request's transcript, captured in the same observability pipeline that records the LLM call itself. Read the transcript to see which guardrail fired, at which phase, and what content matched. See xref:monitor:transcripts.adoc[Read a transcript]. -A different scenario is handled separately: the evaluator itself errored out (for example, a custom webhook timed out or a classifier model is unavailable). See xref:governance:guardrails/violations.adoc[Read violations] for evaluator-down behavior, fail-closed versus fail-open defaults, and per-guardrail overrides. +A different scenario is handled separately: the evaluator itself errored out (for example, a custom webhook timed out or a classifier model is unavailable). See xref:control:guardrails/violations.adoc[Read violations] for evaluator-down behavior, fail-closed versus fail-open defaults, and per-guardrail overrides. // TODO: confirm fail-closed vs. fail-open default at GA, and whether it's configurable per guardrail. Open Qs B2 and B5 in the companion plan. @@ -68,7 +68,7 @@ A guardrail attaches to one or more LLM providers. Each provider can carry many == Where to go next -* xref:governance:guardrails/create-guardrail.adoc[Create a guardrail]: walk through configuring and attaching your first guardrail. -* xref:governance:guardrails/types-reference.adoc[Evaluator types reference]: full config schemas for PII, Toxicity, and Custom-webhook evaluators. -* xref:governance:guardrails/violations.adoc[Read violations]: investigate why a guardrail fired and tune false-positive rates. -* xref:governance:guardrails/cost-tracking.adoc[Cost tracking]: see what each evaluator costs and where the cost surfaces. +* xref:control:guardrails/create-guardrail.adoc[Create a guardrail]: walk through configuring and attaching your first guardrail. +* xref:control:guardrails/types-reference.adoc[Evaluator types reference]: full config schemas for PII, Toxicity, and Custom-webhook evaluators. +* xref:control:guardrails/violations.adoc[Read violations]: investigate why a guardrail fired and tune false-positive rates. + diff --git a/modules/governance/pages/guardrails/types-reference.adoc b/modules/control/pages/guardrails/types-reference.adoc similarity index 94% rename from modules/governance/pages/guardrails/types-reference.adoc rename to modules/control/pages/guardrails/types-reference.adoc index b18154b..f0f842a 100644 --- a/modules/governance/pages/guardrails/types-reference.adoc +++ b/modules/control/pages/guardrails/types-reference.adoc @@ -43,13 +43,13 @@ Each evaluator type has its own config schema, supported phase set, behavior-on- *Behavior on match*: Block. -*Cost*: Per-call LLM cost. Counts against the *evaluator's configured upstream provider* (typically a small classifier model, separate from the user-facing LLM). Token cost surfaces alongside the user-facing LLM call in the same transcript. See xref:governance:guardrails/cost-tracking.adoc[Cost tracking]. +*Cost*: Per-call LLM cost. Counts against the *evaluator's configured upstream provider* (typically a small classifier model, separate from the user-facing LLM). Token cost surfaces alongside the user-facing LLM call in the same transcript. *Gotchas:* * Threshold tuning matters: Too aggressive blocks legitimate traffic; too permissive lets toxic content through. Start at the classifier's recommended default and tune from violation review. * Latency adds to overall response time. If async-OUTPUT evaluation isn't supported for your model's stream type, the user-visible latency includes the classifier call. -* The classifier model itself can fail or be down. See xref:governance:guardrails/violations.adoc[Read violations] for the evaluator-down section. +* The classifier model itself can fail or be down. See xref:control:guardrails/violations.adoc[Read violations] for the evaluator-down section. == Custom webhook evaluator @@ -64,7 +64,7 @@ Each evaluator type has its own config schema, supported phase set, behavior-on- * *Request shape*: The gateway POSTs a JSON document containing the phase (`INPUT` or `OUTPUT`), the content payload (prompt or response text), request metadata (request ID for correlation, model identifier, attached provider), and any extra fields the contract specifies. * *Response shape*: Your endpoint returns a JSON document containing the decision (`pass` or `block`), an optional reason string surfaced in the violation entry, and (if redact-mode is supported) an optional redacted-content payload. * *Authentication*: The gateway authenticates to your webhook using a shared secret stored in the ADP secret store. mTLS or signed-JWT alternatives may be available. -* *Retry / timeout*: The gateway honors a default per-call timeout. On webhook unavailable, the evaluator-down behavior applies (see xref:governance:guardrails/violations.adoc[Read violations]). +* *Retry / timeout*: The gateway honors a default per-call timeout. On webhook unavailable, the evaluator-down behavior applies (see xref:control:guardrails/violations.adoc[Read violations]). // TODO: confirm webhook authentication options at GA. Open Q B4c in the companion plan. diff --git a/modules/governance/pages/guardrails/violations.adoc b/modules/control/pages/guardrails/violations.adoc similarity index 94% rename from modules/governance/pages/guardrails/violations.adoc rename to modules/control/pages/guardrails/violations.adoc index e328de9..8d50152 100644 --- a/modules/governance/pages/guardrails/violations.adoc +++ b/modules/control/pages/guardrails/violations.adoc @@ -26,8 +26,8 @@ A violation is distinct from an *evaluator failure*: A failure is when the evalu Violations surface in two places: -* *Transcripts*: Each request's transcript carries a violation entry per fired guardrail, alongside the LLM call entry, tool calls, and cost data. See xref:observability:transcripts.adoc[Read a transcript] for the full transcript walkthrough. -* *Metrics*: Aggregate violation counts per guardrail per provider per time window. See xref:observability:metrics.adoc[Metrics]. +* *Transcripts*: Each request's transcript carries a violation entry per fired guardrail, alongside the LLM call entry, tool calls, and cost data. See xref:monitor:transcripts.adoc[Read a transcript] for the full transcript walkthrough. +* *Metrics*: Aggregate violation counts per guardrail per provider per time window. See xref:monitor:metrics.adoc[Metrics]. // TODO: confirm the violation field shape in the transcript proto. The Transcripts plan (workflow #7) didn't call out a violation field specifically; coordinate with that workflow's author so the xref above resolves to a real proto field. Open Q C1 in the companion plan. @@ -91,6 +91,5 @@ Per the AI Gateway design, evaluators run async where possible: specifically, `O == Next steps -* xref:governance:guardrails/types-reference.adoc[Evaluator types reference] -* xref:governance:guardrails/cost-tracking.adoc[Cost tracking] -* xref:observability:transcripts.adoc[Read a transcript] +* xref:control:guardrails/types-reference.adoc[Evaluator types reference] +* xref:monitor:transcripts.adoc[Read a transcript] diff --git a/modules/control/pages/index.adoc b/modules/control/pages/index.adoc new file mode 100644 index 0000000..7d83b21 --- /dev/null +++ b/modules/control/pages/index.adoc @@ -0,0 +1,3 @@ += Control & govern +:description: Govern agent activity with the dashboard, guardrails, spending limits, and permissions. +:page-layout: index diff --git a/modules/governance/pages/permissions-overview.adoc b/modules/control/pages/permissions-overview.adoc similarity index 97% rename from modules/governance/pages/permissions-overview.adoc rename to modules/control/pages/permissions-overview.adoc index 5a84f35..35fbd0d 100644 --- a/modules/governance/pages/permissions-overview.adoc +++ b/modules/control/pages/permissions-overview.adoc @@ -45,7 +45,7 @@ Two more families belong to Redpanda Connect resources that ADP uses: These permissions only resolve on clusters that have ADP enabled. Granting them to a principal on a non-ADP cluster has no effect. -For the full list of permissions and which built-in role grants each one, see xref:governance:permissions-reference.adoc[ADP Permissions Reference]. +For the full list of permissions and which built-in role grants each one, see xref:control:permissions-reference.adoc[ADP Permissions Reference]. == Built-in roles @@ -94,4 +94,4 @@ ADP permissions are part of the same Cloud RBAC system that gates control-plane == Related topics -* xref:governance:permissions-reference.adoc[ADP Permissions Reference] +* xref:control:permissions-reference.adoc[ADP Permissions Reference] diff --git a/modules/governance/pages/permissions-reference.adoc b/modules/control/pages/permissions-reference.adoc similarity index 97% rename from modules/governance/pages/permissions-reference.adoc rename to modules/control/pages/permissions-reference.adoc index 888dd2e..4bbb5fa 100644 --- a/modules/governance/pages/permissions-reference.adoc +++ b/modules/control/pages/permissions-reference.adoc @@ -16,7 +16,7 @@ Use this reference to: * [ ] {learning-objective-2} * [ ] {learning-objective-3} -For an explanation of how permissions, roles, and role bindings fit together, see xref:governance:permissions-overview.adoc[About ADP Permissions]. +For an explanation of how permissions, roles, and role bindings fit together, see xref:control:permissions-overview.adoc[About ADP Permissions]. == How to read this reference @@ -234,7 +234,7 @@ The `dataplane_adp_agent_credential_*` permissions gate the OIDC client credenti == Spending permissions -The `dataplane_adp_spending_*` permissions gate the governance APIs that surface AI spend, request counts, and token volume. See xref:governance:budgets.adoc[Token Budgets and Limits] for what spending data ADP records automatically. +The `dataplane_adp_spending_*` permissions gate the governance APIs that surface AI spend, request counts, and token volume. See xref:control:budgets.adoc[Token Budgets and Limits] for what spending data ADP records automatically. [cols="2,2,1,1"] |=== @@ -430,4 +430,4 @@ The `dataplane_knowledgebase_*` permissions gate retrieval-augmented generation == Related topics -* xref:governance:permissions-overview.adoc[About ADP Permissions] +* xref:control:permissions-overview.adoc[About ADP Permissions] diff --git a/modules/ai-gateway/pages/admin/setup-guide.adoc b/modules/gateway/pages/admin/setup-guide.adoc similarity index 98% rename from modules/ai-gateway/pages/admin/setup-guide.adoc rename to modules/gateway/pages/admin/setup-guide.adoc index 857ba93..da61343 100644 --- a/modules/ai-gateway/pages/admin/setup-guide.adoc +++ b/modules/gateway/pages/admin/setup-guide.adoc @@ -247,7 +247,7 @@ Rate limits for MCP work the same way as LLM rate limits. Repeat for each MCP server you want to aggregate. -See xref:ai-gateway:aggregation.adoc[] for detailed information about MCP aggregation. +See xref:gateway:aggregation.adoc[] for detailed information about MCP aggregation. === Configure the MCP orchestrator @@ -328,4 +328,5 @@ Users can then discover and connect to the gateway using the information provide == Next steps -* xref:integrations:index.adoc[Integrations] +* xref:connect:claude-code.adoc[Connect Claude Code] +* xref:connect:remote-mcp-clients.adoc[Remote MCP clients] diff --git a/modules/ai-gateway/pages/aggregation.adoc b/modules/gateway/pages/aggregation.adoc similarity index 100% rename from modules/ai-gateway/pages/aggregation.adoc rename to modules/gateway/pages/aggregation.adoc diff --git a/modules/ai-gateway/pages/bedrock-setup.adoc b/modules/gateway/pages/bedrock-setup.adoc similarity index 92% rename from modules/ai-gateway/pages/bedrock-setup.adoc rename to modules/gateway/pages/bedrock-setup.adoc index 0810a66..3199b31 100644 --- a/modules/ai-gateway/pages/bedrock-setup.adoc +++ b/modules/gateway/pages/bedrock-setup.adoc @@ -8,7 +8,7 @@ // Source: cloudv2 `apps/aigw/docs/customer/bedrock-setup-guide.md` on origin/main, verified 2026-05-19. -This guide walks you through the AWS-side setup AI Gateway needs to invoke Amazon Bedrock, then through the Agentic Data Plane UI flow that registers Bedrock as an LLM provider. For background on how Bedrock foundation models, cross-region inference profiles, and IAM patterns map to the provider form, see xref:ai-gateway:configure-provider.adoc#bedrock-inference-profiles[AWS Bedrock: Inference profiles and IAM] on the main provider configuration page. +This guide walks you through the AWS-side setup AI Gateway needs to invoke Amazon Bedrock, then through the Agentic Data Plane UI flow that registers Bedrock as an LLM provider. For background on how Bedrock foundation models, cross-region inference profiles, and IAM patterns map to the provider form, see xref:gateway:configure-provider.adoc#bedrock-inference-profiles[AWS Bedrock: Inference profiles and IAM] on the main provider configuration page. After completing this guide, you will be able to: @@ -50,7 +50,7 @@ aws iam create-policy \ }' ---- -The second resource entry enables cross-region inference profiles such as `us.anthropic.claude-sonnet-4-6`, which AI Gateway uses when the model identifier carries a geography prefix. See xref:ai-gateway:configure-provider.adoc#bedrock-inference-profiles[AWS Bedrock: Inference profiles and IAM] for the full prefix list and pricing implications. +The second resource entry enables cross-region inference profiles such as `us.anthropic.claude-sonnet-4-6`, which AI Gateway uses when the model identifier carries a geography prefix. See xref:gateway:configure-provider.adoc#bedrock-inference-profiles[AWS Bedrock: Inference profiles and IAM] for the full prefix list and pricing implications. NOTE: Anthropic Claude 4.6 and later models cannot be invoked with the bare foundation-model ID and require an inference profile. Without the second `Resource` entry, those calls fail with `AccessDenied`. @@ -162,11 +162,11 @@ For example, requests to `eu.anthropic.claude-haiku-4-5` bill at the EU Haiku ra |The model isn't enabled in the region you chose. Open the AWS Bedrock console, switch to the target region, and enable model access for the foundation models you want to expose. |`Invocation of model ID … with on-demand throughput isn't supported` -|You called a Claude 4.6 or later model with a bare foundation-model ID. Switch to an inference profile, for example `us.anthropic.claude-sonnet-4-6` instead of `anthropic.claude-sonnet-4-6`. See xref:ai-gateway:configure-provider.adoc#bedrock-inference-profiles[AWS Bedrock: Inference profiles and IAM]. +|You called a Claude 4.6 or later model with a bare foundation-model ID. Switch to an inference profile, for example `us.anthropic.claude-sonnet-4-6` instead of `anthropic.claude-sonnet-4-6`. See xref:gateway:configure-provider.adoc#bedrock-inference-profiles[AWS Bedrock: Inference profiles and IAM]. |=== == Next steps -* xref:ai-gateway:configure-provider.adoc[Configure an LLM provider] -* xref:ai-gateway:connect-agent.adoc[Connect your agent] -* xref:governance:dashboard/overview.adoc[Read the governance overview] +* xref:gateway:configure-provider.adoc[Configure an LLM provider] +* xref:gateway:connect-agent.adoc[Connect your agent] +* xref:control:dashboard/overview.adoc[Read the governance overview] diff --git a/modules/ai-gateway/pages/builders/discover-gateways.adoc b/modules/gateway/pages/builders/discover-gateways.adoc similarity index 99% rename from modules/ai-gateway/pages/builders/discover-gateways.adoc rename to modules/gateway/pages/builders/discover-gateways.adoc index 42f01dc..af74d76 100644 --- a/modules/ai-gateway/pages/builders/discover-gateways.adoc +++ b/modules/gateway/pages/builders/discover-gateways.adoc @@ -270,4 +270,4 @@ echo -e "\n=== Gateway validated successfully ===" == Next steps -* xref:ai-gateway:connect-agent.adoc[Connect Your Agent] +* xref:gateway:connect-agent.adoc[Connect Your Agent] diff --git a/modules/ai-gateway/pages/configure-provider.adoc b/modules/gateway/pages/configure-provider.adoc similarity index 93% rename from modules/ai-gateway/pages/configure-provider.adoc rename to modules/gateway/pages/configure-provider.adoc index ed6060a..a1bfef3 100644 --- a/modules/ai-gateway/pages/configure-provider.adoc +++ b/modules/gateway/pages/configure-provider.adoc @@ -67,7 +67,7 @@ The *Provider type* card shows five cards. Pick the one that matches your upstre |Reach Gemini Pro, Flash, and multimodal models through Google AI Studio. Ideal for long-context workloads and image/video inputs. |*AWS Bedrock* -|Invoke foundation models (Claude, Llama, Titan, Nova, Mistral, AI21 Jamba) hosted inside your AWS account. Requires an AWS region and credentials (static, STS-assumed role, or the default credential chain). Supports the native Bedrock APIs (`InvokeModel`, `Converse`) and an OpenAI-compatible Chat Completions endpoint for `gpt-oss` models. See <> for picking the right model identifier, and xref:ai-gateway:bedrock-setup.adoc[Set up AWS Bedrock as an LLM provider] for a step-by-step IAM and access-key walkthrough. +|Invoke foundation models (Claude, Llama, Titan, Nova, Mistral, AI21 Jamba) hosted inside your AWS account. Requires an AWS region and credentials (static, STS-assumed role, or the default credential chain). Supports the native Bedrock APIs (`InvokeModel`, `Converse`) and an OpenAI-compatible Chat Completions endpoint for `gpt-oss` models. See <> for picking the right model identifier, and xref:gateway:bedrock-setup.adoc[Set up AWS Bedrock as an LLM provider] for a step-by-step IAM and access-key walkthrough. |*OpenAI-compatible* |Point at any OpenAI-compatible endpoint that ships `/v1/chat/completions` (vLLM, Ollama, LM Studio, LocalAI, Together, Groq, OpenRouter). Useful for self-hosted models and aggregator gateways. Requires a *Base URL*. Authentication is optional. @@ -123,7 +123,7 @@ Google AI:: + [IMPORTANT] ==== -Gemini uses the `x-goog-api-key` header for authentication, not `Authorization: Bearer`. This matters when you wire up clients. See xref:ai-gateway:connect-agent.adoc[Connect your agent]. +Gemini uses the `x-goog-api-key` header for authentication, not `Authorization: Bearer`. This matters when you wire up clients. See xref:gateway:connect-agent.adoc[Connect your agent]. ==== AWS Bedrock:: @@ -190,13 +190,13 @@ The detail page also carries a *Last 7 days* KPI strip (*TOTAL SPEND*, *REQUESTS [[transcript-logging]] == Configure transcript logging -By default, AI Gateway records the full request and response payload (including prompt content, completion content, and tool-call arguments and results) for every call this provider proxies, writing each call into xref:observability:transcripts.adoc[the Transcripts view] alongside token counts and latency. This powers turn-by-turn investigation and per-conversation drill-down in Governance. +By default, AI Gateway records the full request and response payload (including prompt content, completion content, and tool-call arguments and results) for every call this provider proxies, writing each call into xref:monitor:transcripts.adoc[the Transcripts view] alongside token counts and latency. This powers turn-by-turn investigation and per-conversation drill-down in Governance. Some workloads need to suppress that payload capture: regulated PII, customer secrets, or any traffic where the message body itself must not be retained. For those, configure a dedicated "sensitive" provider with transcript logging disabled. The toggle is on the provider's create and edit form. It is per-provider, not per-request: applications cannot opt in or out at call time. To split sensitive from non-sensitive traffic, create one provider with transcript logging on and another with it off, and route each application to whichever proxy URL matches its data class. -Disabling transcript logging does not suppress cost and usage telemetry. Token counts, latency, and provider/model attribution are still recorded, so the *Cost & Usage* tab and the xref:governance:dashboard/overview.adoc[Governance dashboard] continue to report spend for traffic on the provider; only the message bodies are withheld from the Transcripts view. +Disabling transcript logging does not suppress cost and usage telemetry. Token counts, latency, and provider/model attribution are still recorded, so the *Cost & Usage* tab and the xref:control:dashboard/overview.adoc[Governance dashboard] continue to report spend for traffic on the provider; only the message bodies are withheld from the Transcripts view. NOTE: Changing the toggle takes effect for new requests. Transcripts already captured under the previous setting are not retroactively redacted; delete or rotate the provider if you need to purge historical content. @@ -207,7 +207,7 @@ NOTE: Changing the toggle takes effect for new requests. Transcripts already cap . Click *Create provider*. The button activates after *Name* and *Type* are both set. The *Summary* panel checks them off as you fill them in. . On the provider's detail page, the *Connection* card shows your *Proxy URL*, *Discovery* URL, *Base URL*, and *API key ref*. Copy the *Proxy URL*: this is where your applications point. . Scroll to the *Verify connection* section. Pick a model from the dropdown and click *Test Connection*. The status updates from _Not tested yet_ to a pass/fail indicator. Use the *Show commands* disclosure if you want to see the equivalent curl or SDK call. -. To wire up an application, open *Connect your app* further down the page or follow xref:ai-gateway:connect-agent.adoc[Connect your agent]. +. To wire up an application, open *Connect your app* further down the page or follow xref:gateway:connect-agent.adoc[Connect your agent]. A successful Test Connection result confirms that the provider's credentials, region (Bedrock), and network path are all correct. If the call fails, see <>. @@ -258,7 +258,7 @@ Older 4.5 and earlier Claude models still accept bare IDs. Pricing varies by profile. The bare foundation-model ID and the `global.` profile share AWS's headline rate; geo profiles (`us.`, `eu.`, `apac.`, `au.`, `jp.`) carry approximately a 10% cross-region inference premium. Use `global.` when you want the headline rate and don't need a specific geography. Use `us.` / `eu.` / `apac.` when data residency matters. -AI Gateway preserves the regional prefix end to end when it records spend, so usage in the xref:governance:dashboard/overview.adoc[governance dashboard] and the *Cost & Usage* tab is attributed to the correct regional rate. A call to `eu.anthropic.claude-haiku-4-5` is billed at the EU Haiku rate, not the headline foundation-model rate. +AI Gateway preserves the regional prefix end to end when it records spend, so usage in the xref:control:dashboard/overview.adoc[governance dashboard] and the *Cost & Usage* tab is attributed to the correct regional rate. A call to `eu.anthropic.claude-haiku-4-5` is billed at the EU Haiku rate, not the headline foundation-model rate. === IAM policy patterns @@ -326,7 +326,7 @@ The *LLM Providers* list page is the at-a-glance home for every provider in your |First two model identifiers exposed by the provider, plus a `+N` overflow chip when more are configured. |*Spend (7d)* -|Spend over the last 7 days with a small sparkline and a "vs previous period" delta. The window is fixed at 7 days on this view. Longer-range analysis runs through the xref:governance:dashboard/overview.adoc[governance dashboard]. +|Spend over the last 7 days with a small sparkline and a "vs previous period" delta. The window is fixed at 7 days on this view. Longer-range analysis runs through the xref:control:dashboard/overview.adoc[governance dashboard]. |*Updated* |Relative timestamp of the last edit. @@ -387,10 +387,10 @@ The spend chart footer summarizes the selected view by cost bucket, including to |Confirm the client is sending its own `Authorization` header and the *API key* field on the provider is empty. |Gemini returns 401 -|Gemini uses the `x-goog-api-key` header, not `Authorization`. If you're seeing 401s on Gemini, check that the client is sending the correct header. See xref:ai-gateway:connect-agent.adoc[Connect your agent]. +|Gemini uses the `x-goog-api-key` header, not `Authorization`. If you're seeing 401s on Gemini, check that the client is sending the correct header. See xref:gateway:connect-agent.adoc[Connect your agent]. |Provider list empty or 403 -|Confirm your account has the `dataplane_adp_llmprovider_*` permissions in ADP. The Reader built-in role is the minimum required to list providers. The Writer role is required to create one. See xref:governance:permissions-reference.adoc#llm-provider-permissions[LLM provider permissions]. +|Confirm your account has the `dataplane_adp_llmprovider_*` permissions in ADP. The Reader built-in role is the minimum required to list providers. The Writer role is required to create one. See xref:control:permissions-reference.adoc#llm-provider-permissions[LLM provider permissions]. |=== // TODO: add screenshots of common error toasts once captured from the live environment. @@ -400,10 +400,10 @@ The spend chart footer summarizes the selected view by cost bucket, including to AI Gateway does not provide these capabilities. For current status, consult the ADP release notes. * *Multi-provider routing, failover, and retries across providers.* A synthetic provider that fans requests to multiple upstreams is not part of AI Gateway. -* *Spend limits.* Per-user, per-org, and global cost caps are not available. AI Gateway records spend and token usage for reporting in the provider *Cost & Usage* view and Governance, but it does not enforce budget caps. See xref:governance:budgets.adoc[Token budgets and limits]. +* *Spend limits.* Per-user, per-org, and global cost caps are not available. AI Gateway records spend and token usage for reporting in the provider *Cost & Usage* view and Governance, but it does not enforce budget caps. See xref:control:budgets.adoc[Token budgets and limits]. * *Rate limits.* Requests-per-second, per-minute, or per-day limits are not available. * *Managed MCP aggregation at the gateway.* Register MCP tool servers separately under *MCP Servers* in the Agentic Data Plane UI. == Next steps -* xref:ai-gateway:connect-agent.adoc[Connect your agent] +* xref:gateway:connect-agent.adoc[Connect your agent] diff --git a/modules/ai-gateway/pages/connect-agent.adoc b/modules/gateway/pages/connect-agent.adoc similarity index 97% rename from modules/ai-gateway/pages/connect-agent.adoc rename to modules/gateway/pages/connect-agent.adoc index a20bafc..015dc56 100644 --- a/modules/ai-gateway/pages/connect-agent.adoc +++ b/modules/gateway/pages/connect-agent.adoc @@ -17,7 +17,7 @@ After completing this guide, you will be able to: == Prerequisites -* A configured LLM provider. If you haven't created one yet, see xref:ai-gateway:configure-provider.adoc[Configure an LLM provider]. +* A configured LLM provider. If you haven't created one yet, see xref:gateway:configure-provider.adoc[Configure an LLM provider]. * For local development, nothing else. You'll install `rpk ai` in the next section. * For CI or programmatic clients: A Redpanda Cloud service account with OIDC client credentials. See xref:cloud-data-platform:security:cloud-authentication.adoc[Authenticate to Redpanda Cloud]. + @@ -338,7 +338,7 @@ response = httpx.post( print(response.json()) ---- -See xref:ai-gateway:configure-provider.adoc#bedrock-inference-profiles[the Bedrock provider reference] for inference-profile selection guidance. +See xref:gateway:configure-provider.adoc#bedrock-inference-profiles[the Bedrock provider reference] for inference-profile selection guidance. TIP: Bedrock's `Converse` API works the same way: send to `/model/\{MODEL_ID}/converse` with a Converse-shaped body. Or use the AWS SDK's `bedrockruntime` client and set its `BaseEndpoint` to the proxy URL; the SDK signs the request, AI Gateway re-signs server-side with the provider's credentials, and your client never sees AWS keys. -- @@ -440,7 +440,7 @@ AI Gateway returns standard HTTP status codes. The upstream provider's error bod === 403 Forbidden -* The service account may lack the required roles. Ask an admin to grant `dataplane_adp_llmprovider_get` at minimum to read provider config, and `dataplane_adp_llmprovider_invoke` to proxy LLM requests through AI Gateway. See xref:governance:permissions-reference.adoc#llm-provider-permissions[LLM provider permissions] or assign the LLMProviderInvoker built-in role for runtime-only access. +* The service account may lack the required roles. Ask an admin to grant `dataplane_adp_llmprovider_get` at minimum to read provider config, and `dataplane_adp_llmprovider_invoke` to proxy LLM requests through AI Gateway. See xref:control:permissions-reference.adoc#llm-provider-permissions[LLM provider permissions] or assign the LLMProviderInvoker built-in role for runtime-only access. * The provider may be disabled. Check the *Status* field on its *Connection* card. === Connection timeout or reset @@ -451,4 +451,4 @@ AI Gateway returns standard HTTP status codes. The upstream provider's error bod == Next steps -* xref:ai-gateway:configure-provider.adoc[Configure an LLM provider] +* xref:gateway:configure-provider.adoc[Configure an LLM provider] diff --git a/modules/ai-gateway/pages/gateway-architecture.adoc b/modules/gateway/pages/gateway-architecture.adoc similarity index 97% rename from modules/ai-gateway/pages/gateway-architecture.adoc rename to modules/gateway/pages/gateway-architecture.adoc index cc4ff0b..789442a 100644 --- a/modules/ai-gateway/pages/gateway-architecture.adoc +++ b/modules/gateway/pages/gateway-architecture.adoc @@ -6,7 +6,7 @@ :learning-objective-2: Explain the request lifecycle through policy evaluation stages :learning-objective-3: Identify supported providers, features, and current limitations -This page provides technical details about AI Gateway's architecture, request processing, and capabilities. For an overview of AI Gateway, see xref:ai-gateway:overview.adoc[] +This page provides technical details about AI Gateway's architecture, request processing, and capabilities. For an overview of AI Gateway, see xref:gateway:overview.adoc[] == Architecture overview @@ -142,5 +142,5 @@ The gateway only loads and exposes specific tools when requested, which dramatic == Next steps -* xref:ai-gateway:gateway-quickstart.adoc[] -* xref:ai-gateway:aggregation.adoc[] +* xref:get-started:gateway-quickstart.adoc[] +* xref:gateway:aggregation.adoc[] diff --git a/modules/gateway/pages/index.adoc b/modules/gateway/pages/index.adoc new file mode 100644 index 0000000..bb5867a --- /dev/null +++ b/modules/gateway/pages/index.adoc @@ -0,0 +1,3 @@ += Routing & LLM settings +:description: Configure AI Gateway, LLM providers, and routing. +:page-layout: index diff --git a/modules/ai-gateway/pages/overview.adoc b/modules/gateway/pages/overview.adoc similarity index 94% rename from modules/ai-gateway/pages/overview.adoc rename to modules/gateway/pages/overview.adoc index 939cd62..82f6a39 100644 --- a/modules/ai-gateway/pages/overview.adoc +++ b/modules/gateway/pages/overview.adoc @@ -46,7 +46,7 @@ Use the provider's own SDK: OpenAI, Anthropic, Google AI, AWS Bedrock, or any Op === Managed authentication -Applications authenticate to ADP with OIDC service accounts instead of long-lived provider API keys. Service accounts use the same role and audit model as every other ADP resource, and mint short-lived tokens that are easy to revoke. For local command-line workflows, use xref:reference:rpk/rpk-cloud/rpk-cloud-login.adoc[`rpk cloud login`] to authenticate and xref:reference:rpk/rpk-ai/rpk-ai.adoc[`rpk ai`] to talk to the gateway. CI and programmatic clients use the OIDC client-credentials grant directly. See xref:ai-gateway:connect-agent.adoc[Connect your agent]. +Applications authenticate to ADP with OIDC service accounts instead of long-lived provider API keys. Service accounts use the same role and audit model as every other ADP resource, and mint short-lived tokens that are easy to revoke. For local command-line workflows, use xref:reference:rpk/rpk-cloud/rpk-cloud-login.adoc[`rpk cloud login`] to authenticate and xref:reference:rpk/rpk-ai/rpk-ai.adoc[`rpk ai`] to talk to the gateway. CI and programmatic clients use the OIDC client-credentials grant directly. See xref:gateway:connect-agent.adoc[Connect your agent]. === Per-provider observability @@ -87,7 +87,7 @@ AI Gateway supports five provider types. The UI labels and short descriptions ma |Point at any OpenAI-compatible endpoint (vLLM, Ollama, LM Studio, LocalAI, Together, Groq, OpenRouter). Useful for self-hosted models and aggregator gateways that ship `/v1/chat/completions`. |=== -See xref:ai-gateway:configure-provider.adoc[Configure an LLM provider] for the full form reference for each type. +See xref:gateway:configure-provider.adoc[Configure an LLM provider] for the full form reference for each type. == When to use AI Gateway @@ -110,11 +110,11 @@ It is not the right fit when you: AI Gateway does not provide these capabilities. For current status, consult the ADP release notes. * *Multi-provider routing, failover, and retries.* A synthetic provider that fans requests to multiple upstreams is not part of AI Gateway. -* *Spend limits.* Per-user, per-org, and global cost caps are not available. AI Gateway records spend and token usage for reporting in the provider *Cost & Usage* view and Governance, but it does not enforce budget caps. See xref:governance:budgets.adoc[Token budgets and limits] for the read-only spending visibility. +* *Spend limits.* Per-user, per-org, and global cost caps are not available. AI Gateway records spend and token usage for reporting in the provider *Cost & Usage* view and Governance, but it does not enforce budget caps. See xref:control:budgets.adoc[Token budgets and limits] for the read-only spending visibility. * *Rate limits.* Requests-per-second, per-minute, or per-day caps are not available. * *Managed MCP aggregation at the gateway.* Register MCP tool servers separately under *MCP Servers* in the Agentic Data Plane UI. == Next steps -. xref:ai-gateway:configure-provider.adoc[Configure an LLM provider] -. xref:ai-gateway:connect-agent.adoc[Connect your agent] +. xref:gateway:configure-provider.adoc[Configure an LLM provider] +. xref:gateway:connect-agent.adoc[Connect your agent] diff --git a/modules/get-started/pages/adp-overview.adoc b/modules/get-started/pages/adp-overview.adoc index b1223dc..ebdd01f 100644 --- a/modules/get-started/pages/adp-overview.adoc +++ b/modules/get-started/pages/adp-overview.adoc @@ -30,7 +30,7 @@ For high availability, the gateway provides provider-agnostic routing with intel The gateway also supports tenancy modeling for teams, individuals, applications, and service accounts, giving you chargeback transparency for internal cost allocation. You can proxy both models and MCP gateways, centralizing compliance for all LLM interactions without locking into any single provider. -For more information, see xref:ai-gateway:overview.adoc[AI Gateway Overview]. +For more information, see xref:gateway:overview.adoc[AI Gateway Overview]. == MCP servers @@ -40,7 +40,7 @@ MCP servers are lightweight, support OIDC-based authentication, and enforce dete With real-time debugging capabilities, you reduce integration time while getting enterprise-grade security. You can reuse your existing infrastructure and data sources rather than building new integrations from scratch. -For more information, see xref:mcp:overview.adoc[MCP Servers Overview]. +For more information, see xref:connect:mcp-overview.adoc[MCP Servers Overview]. == AI agents @@ -50,13 +50,13 @@ What makes this practical at scale is Redpanda Connect. More than 300 connectors The result is faster time-to-production, lower maintenance (declarative definitions instead of imperative code), and organizational consistency across teams. -For more information, see xref:agents:overview.adoc[]. +For more information, see xref:connect:agents-overview.adoc[]. == Governance The glossterm:governance dashboard[] provides a cross-tenant view of agent activity, spending, MCP server inventory, and authorization events. It composes existing services into a single overview built for platform administrators and finance partners, surfacing per-provider spending, agent state, and the authorization decisions that gate every tool invocation. -For more information, see xref:governance:dashboard/index.adoc[Governance Dashboard Overview]. +For more information, see xref:control:dashboard/overview.adoc[Governance Dashboard Overview]. == Built-in governance controls @@ -66,7 +66,7 @@ Redpanda ADP addresses critical enterprise requirements across all components. * *Unified authorization*: All components use OIDC-based authentication with an on-behalf-of authorization model. When a user invokes an agent, the agent inherits the intersection of its own permissions and the user's permissions. This ensures proper data access scoping. -* *Complete observability*: Redpanda ADP provides two levels of inspection. Execution logs (glossterm:transcript[,transcripts]) capture every agent action with 100% sampling using OpenTelemetry standards. Real-time debugging tools let you inspect individual MCP server calls down to individual tool invocations with full timing data. You can view detailed agent actions in the Agentic Data Plane UI and replay data for agent evaluations. For more information, see xref:observability:concepts.adoc[Transcripts Overview]. +* *Complete observability*: Redpanda ADP provides two levels of inspection. Execution logs (glossterm:transcript[,transcripts]) capture every agent action with 100% sampling using OpenTelemetry standards. Real-time debugging tools let you inspect individual MCP server calls down to individual tool invocations with full timing data. You can view detailed agent actions in the Agentic Data Plane UI and replay data for agent evaluations. For more information, see xref:monitor:concepts.adoc[Transcripts Overview]. * *Compliance and audit*: For regulated industries with frameworks such as ISO 42001 and the EU AI Act, Redpanda ADP records every agent action and data source used in decision-making. Execution logs are stored in Redpanda topics and can be materialized to Iceberg tables for multi-year retention and analysis. @@ -83,4 +83,4 @@ Common Redpanda ADP use cases include: == Next steps * xref:get-started:byoc-quickstart.adoc[ADP Quickstart] — Set up ADP for your organization (administrators). -* xref:agents:quickstart.adoc[AI Agent Quickstart] — Build your first AI agent (builders). +* xref:get-started:quickstart.adoc[AI Agent Quickstart] — Build your first AI agent (builders). diff --git a/modules/get-started/pages/byoc-quickstart.adoc b/modules/get-started/pages/byoc-quickstart.adoc index 53fd6c0..adab893 100644 --- a/modules/get-started/pages/byoc-quickstart.adoc +++ b/modules/get-started/pages/byoc-quickstart.adoc @@ -21,7 +21,7 @@ After completing this quickstart, you will be able to: == Prerequisites * A Redpanda Cloud BYOC environment on AWS with ADP enabled. -* The Writer or Admin xref:governance:permissions-overview.adoc[built-in role] in ADP. The Reader role can list resources but cannot create them. +* The Writer or Admin xref:control:permissions-overview.adoc[built-in role] in ADP. The Reader role can list resources but cannot create them. * An Anthropic account. If you don't have one, the next section walks through signup. == What you'll build @@ -113,9 +113,9 @@ The catalog of available models is maintained by Redpanda. Models you select her . On the provider detail page, confirm the *Status* badge in the *Connection* card shows *Active*. The agent you build later in this quickstart will exercise the provider end-to-end. -If the *Status* badge stays in a failure state, see the xref:ai-gateway:configure-provider.adoc#troubleshooting[provider troubleshooting table]. The most common cause is a typo in the secret reference or a key that wasn't issued in the workspace ADP can read. +If the *Status* badge stays in a failure state, see the xref:gateway:configure-provider.adoc#troubleshooting[provider troubleshooting table]. The most common cause is a typo in the secret reference or a key that wasn't issued in the workspace ADP can read. -For the full provider field reference (transcript logging, Bedrock IAM, Anthropic auth passthrough, OpenAI-compatible endpoints), see xref:ai-gateway:configure-provider.adoc[Configure an LLM provider]. +For the full provider field reference (transcript logging, Bedrock IAM, Anthropic auth passthrough, OpenAI-compatible endpoints), see xref:gateway:configure-provider.adoc[Configure an LLM provider]. == Create an MCP server with a public OpenAPI spec @@ -123,7 +123,7 @@ The OpenAPI managed MCP server type takes any OpenAPI 3 (or Swagger 2) spec URL [NOTE] ==== -PetStore is a public demo that Swagger hosts for testing and is occasionally unavailable. If your agent later returns an HTTP 500 from PetStore, that's the upstream, not ADP. Retry the prompt, or move on to a real MCP server (xref:mcp:managed/managed-catalog.adoc[Managed MCP catalog]) once you've confirmed the rest of the flow works. +PetStore is a public demo that Swagger hosts for testing and is occasionally unavailable. If your agent later returns an HTTP 500 from PetStore, that's the upstream, not ADP. Retry the prompt, or move on to a real MCP server (xref:connect:managed/managed-catalog.adoc[Managed MCP catalog]) once you've confirmed the rest of the flow works. ==== . Open *MCP Servers* > *Create Server*. @@ -155,9 +155,9 @@ PetStore is a public demo that Swagger hosts for testing and is occasionally una + The exact tool count depends on the operations defined in the spec at fetch time. -. (Optional) Click *Ping* in the *MCP Inspector* card to confirm the server responds. If the Ping fails or the *STATUS* stays disconnected, see xref:mcp:managed/openapi.adoc#troubleshooting[OpenAPI MCP troubleshooting]. +. (Optional) Click *Ping* in the *MCP Inspector* card to confirm the server responds. If the Ping fails or the *STATUS* stays disconnected, see xref:connect:managed/openapi.adoc#troubleshooting[OpenAPI MCP troubleshooting]. -For the full OpenAPI MCP reference (auth modes, operation filters, base URL overrides), see xref:mcp:managed/openapi.adoc[OpenAPI managed MCP server]. +For the full OpenAPI MCP reference (auth modes, operation filters, base URL overrides), see xref:connect:managed/openapi.adoc[OpenAPI managed MCP server]. == Build your agent @@ -209,7 +209,7 @@ Rules: . Click *Create Agent* at the bottom of the form. -. Wait for the agent status to change from *Starting* to *Running*. ADP also creates the service account at this point; for details, see xref:agents:concepts.adoc#service-account-authorization[service account authorization]. +. Wait for the agent status to change from *Starting* to *Running*. ADP also creates the service account at this point; for details, see xref:connect:concepts.adoc#service-account-authorization[service account authorization]. == Test the agent in the Inspector @@ -244,7 +244,7 @@ Find a pet tagged "friendly" and tell me whether it's available. + The agent may call `findpetsbytags` first, then `getpetbyid` for one of the matches, then summarize. -If the agent hallucinates data instead of calling a tool, tighten the system prompt's "Always call a tool before answering" rule. See xref:agents:system-prompts.adoc[system prompt best practices] for patterns. +If the agent hallucinates data instead of calling a tool, tighten the system prompt's "Always call a tool before answering" rule. See xref:connect:system-prompts.adoc[system prompt best practices] for patterns. If a tool card shows an HTTP 500 error and the Artifact apologizes about the pet store API being unavailable, that's the public Swagger PetStore being intermittently down. The agent is behaving correctly (the "if a tool fails, say so and stop" rule in the system prompt is what produced the apology). Retry the prompt; PetStore usually recovers within a few minutes. @@ -268,7 +268,7 @@ Every agent run is recorded as a glossterm:transcript[] you can replay turn-by-t * The tool's response. * The agent's final response to the user. -For the full transcript field reference, see xref:observability:transcripts.adoc[Read a transcript]. +For the full transcript field reference, see xref:monitor:transcripts.adoc[Read a transcript]. endif::[] ifdef::env-adp-governance[] @@ -284,7 +284,7 @@ The glossterm:governance dashboard[] rolls up every LLM call and tool invocation * *Requests*: Reflects the LLM calls from your agent runs (one per turn). * *Tokens*: Input plus output tokens across the same calls. -For the full governance dashboard tour, see xref:governance:dashboard/overview.adoc[Read the governance overview]. +For the full governance dashboard tour, see xref:control:dashboard/overview.adoc[Read the governance overview]. endif::[] == Clean up @@ -307,8 +307,8 @@ If you want to keep the provider but rotate the key, edit the provider and chang Now that you have a working ADP setup, here are the most useful things to do next: -* Add more LLM providers (OpenAI, Google AI, AWS Bedrock, OpenAI-compatible): xref:ai-gateway:configure-provider.adoc[Configure an LLM provider] -* Swap the PetStore demo for a real upstream (Slack, Jira, SQL, Kafka, and more): xref:mcp:managed/managed-catalog.adoc[Managed MCP catalog] +* Add more LLM providers (OpenAI, Google AI, AWS Bedrock, OpenAI-compatible): xref:gateway:configure-provider.adoc[Configure an LLM provider] +* Swap the PetStore demo for a real upstream (Slack, Jira, SQL, Kafka, and more): xref:connect:managed/managed-catalog.adoc[Managed MCP catalog] ifdef::env-adp-governance[] -* Track spend across teams: xref:governance:budgets.adoc[Token budgets and limits] +* Track spend across teams: xref:control:budgets.adoc[Token budgets and limits] endif::[] diff --git a/modules/ai-gateway/pages/gateway-quickstart.adoc b/modules/get-started/pages/gateway-quickstart.adoc similarity index 98% rename from modules/ai-gateway/pages/gateway-quickstart.adoc rename to modules/get-started/pages/gateway-quickstart.adoc index 49ded00..c4f3b24 100644 --- a/modules/ai-gateway/pages/gateway-quickstart.adoc +++ b/modules/get-started/pages/gateway-quickstart.adoc @@ -422,7 +422,6 @@ const openai = new OpenAI({ == Next steps * xref:get-started:byoc-quickstart.adoc[ADP Quickstart] -* xref:ai-gateway:aggregation.adoc[] -* xref:integrations:index.adoc[] -* xref:ai-gateway:gateway-architecture.adoc[] -* xref:ai-gateway:overview.adoc[] +* xref:gateway:aggregation.adoc[] +* xref:gateway:gateway-architecture.adoc[] +* xref:gateway:overview.adoc[] diff --git a/modules/agents/pages/quickstart.adoc b/modules/get-started/pages/quickstart.adoc similarity index 86% rename from modules/agents/pages/quickstart.adoc rename to modules/get-started/pages/quickstart.adoc index 05f9660..a48b390 100644 --- a/modules/agents/pages/quickstart.adoc +++ b/modules/get-started/pages/quickstart.adoc @@ -19,9 +19,9 @@ After completing this quickstart, you will be able to: * A Redpanda Cloud BYOC environment on AWS with ADP enabled. -* An xref:ai-gateway:gateway-quickstart.adoc[AI Gateway] configured with at least one LLM provider (OpenAI, Anthropic, Google AI, or AWS Bedrock). +* An xref:get-started:gateway-quickstart.adoc[AI Gateway] configured with at least one LLM provider (OpenAI, Anthropic, Google AI, or AWS Bedrock). -* An MCP server created by following xref:mcp:create-server.adoc[Create an MCP Server], with the following tools deployed: +* An MCP server created by following xref:connect:create-server.adoc[Create an MCP Server], with the following tools deployed: + ** `generate_input`: Generates fake user event data ** `redpanda_output`: Publishes data to Redpanda topics @@ -34,7 +34,7 @@ An Event Data Manager agent that: * Publishes events to Redpanda topics * Understands natural language requests like "Generate 5 login events and publish them" -The agent orchestrates the `generate_input` and `redpanda_output` tools you created in xref:mcp:create-server.adoc[]. +The agent orchestrates the `generate_input` and `redpanda_output` tools you created in xref:connect:create-server.adoc[]. == Create the agent @@ -87,7 +87,7 @@ Response format: . Select MCP tools: + * Click *Add MCP Server* -* Select the `event-data-generator` server you created in xref:mcp:create-server.adoc[] +* Select the `event-data-generator` server you created in xref:connect:create-server.adoc[] * Check both tools: ** `generate_input` ** `redpanda_output` @@ -98,7 +98,7 @@ Response format: . Review your configuration and click *Create Agent*. + -TIP: A service account is automatically created to authenticate your agent with ADP resources. For details about default permissions and how to manage service accounts, see xref:agents:concepts.adoc#service-account-authorization[Service account authorization]. +TIP: A service account is automatically created to authenticate your agent with ADP resources. For details about default permissions and how to manage service accounts, see xref:connect:concepts.adoc#service-account-authorization[Service account authorization]. . Wait for the agent status to change from *Starting* to *Running*. @@ -169,7 +169,7 @@ Try modifying the agent to change its behavior: == Troubleshoot -For comprehensive troubleshooting guidance, see xref:troubleshoot/troubleshoot-ai-agents.adoc[]. +For comprehensive troubleshooting guidance, see xref:monitor:troubleshoot-ai-agents.adoc[]. Common quickstart issue: @@ -178,8 +178,8 @@ Common quickstart issue: == Next steps * xref:get-started:byoc-quickstart.adoc[ADP Quickstart] -* xref:agents:overview.adoc[] -* xref:agents:create-agent.adoc[] -* xref:agents:system-prompts.adoc[] -* xref:agents:architecture-patterns.adoc[] -* xref:mcp:overview.adoc[] +* xref:connect:agents-overview.adoc[] +* xref:connect:create-agent.adoc[] +* xref:connect:system-prompts.adoc[] +* xref:connect:architecture-patterns.adoc[] +* xref:connect:mcp-overview.adoc[] diff --git a/modules/governance/pages/dashboard/index.adoc b/modules/governance/pages/dashboard/index.adoc deleted file mode 100644 index 22bcf9e..0000000 --- a/modules/governance/pages/dashboard/index.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= Governance Dashboard -:page-layout: index -:description: See your AI deployment's spending, fleet, and activity in one place, with drill-down into the transcript behind any number, agent, or event. - diff --git a/modules/governance/pages/guardrails/cost-tracking.adoc b/modules/governance/pages/guardrails/cost-tracking.adoc deleted file mode 100644 index 679c90b..0000000 --- a/modules/governance/pages/guardrails/cost-tracking.adoc +++ /dev/null @@ -1,69 +0,0 @@ -= Guardrail Cost Tracking -:description: See what each evaluator costs, where the cost surfaces in transcripts and dashboards, and how guardrail spend interacts with token budgets. -:page-topic-type: reference -:personas: platform_engineer, security_compliance_lead - -// TODO: this page lands at GA. The Guardrails plan (https://redpandadata.atlassian.net/wiki/spaces/DOC/pages/1881702438) lists this page as a should-ship deliverable; the cost-pool integration with Budgets fills in once eng confirms whether evaluator cost flows into the user-facing budget pool, a separate guardrail-evaluator pool, or both. Open Qs C2, C3 in the companion plan. - -Use this reference to: - -* [ ] Recognize the cost shape of each evaluator type (PII, Toxicity, Custom webhook) -* [ ] Locate guardrail-attributed cost in transcripts, metrics, and the governance dashboard -* [ ] Understand how guardrail spend interacts with token budgets and what knobs you have to cap it - -== Per-evaluator cost shape - -Each evaluator type has a different cost shape: - -[cols="1,2,2"] -|=== -|Type |Cost source |Where it surfaces - -|*PII* -|No per-call LLM cost. Compute time only, negligible for regex; non-trivial for entity-recognition. -|No transcript cost line. Compute time absorbed into gateway latency metrics. - -|*Toxicity* -|Per-call LLM cost. Counts against the *evaluator's configured upstream provider*: Typically a small classifier model, separate from the user-facing LLM. -|Per-call cost line in the transcript, alongside the user-facing LLM call. Aggregated into provider-breakdown views in the governance dashboard. - -|*Custom webhook* -|Gateway charges nothing per call. Your webhook's compute cost is your own infrastructure expense. -|Not captured in transcripts. Track in your webhook's own observability surface. -|=== - -== Where guardrail cost shows up - -Guardrail-attributed cost surfaces in three places, ordered from most granular to most aggregated: - -* *Transcripts*: Per-call cost line per fired evaluator, recorded alongside the user-facing LLM call. See xref:observability:transcripts.adoc[Read a transcript]. -* *Metrics*: Aggregate cost per guardrail per provider per time window. See xref:observability:metrics.adoc[Metrics]. -* *Governance dashboard*: Guardrail-attributed spend appears in the spend view, broken down by provider. See xref:governance:dashboard/overview.adoc[Read the governance overview]. - -// TODO: confirm whether the dashboard's spend view distinguishes guardrail-evaluator spend from user-facing LLM spend. Open Q C3 in the companion plan. - -== Capping guardrail cost - -Guardrail spend can grow unexpectedly when traffic spikes or when a Toxicity guardrail runs at `BOTH` phases on a high-throughput provider. Three knobs control it: - -* *Per-guardrail toggle*: Disable a guardrail to short-circuit its evaluator. The guardrail config is preserved; re-enable when ready. Useful as a kill switch when an evaluator's cost runs away. -* *Phase scoping*: Running a Toxicity evaluator at `OUTPUT` only (instead of `BOTH`) halves the per-request cost. -* *Token budgets*: See xref:governance:budgets.adoc[Token budgets and limits]. Guardrail evaluator cost flows into the same spending-event pipeline as user-facing LLM cost; per-provider breakdowns separate the two. - -// TODO: confirm whether evaluator cost flows into the same budget pool as user-facing LLM cost, or a separate guardrail-evaluator pool. The parent plan calls for "guardrail-cost separation documented" in the Budgets workflow GA scope. Open Q C2 in the companion plan. - -== Cost versus latency tradeoff - -Each evaluator type has a different cost-versus-latency profile: - -* *PII* is cheap and fast: Regex-based detection adds milliseconds, no LLM call. -* *Toxicity* is expensive and slow: The classifier call adds tokens and latency. -* *Custom webhook* is whatever your webhook makes it: Control your own infrastructure spend and latency profile. - -A typical optimization: disable Toxicity on `INPUT` and run it only on `OUTPUT`. Most policy violations are about what the model generates, not what the user asks; cutting the `INPUT` phase halves both the cost and the latency of the Toxicity guardrail without losing meaningful coverage. - -== Next steps - -* xref:governance:guardrails/types-reference.adoc[Evaluator types reference] -* xref:governance:budgets.adoc[Token budgets and limits] -* xref:governance:dashboard/overview.adoc[Read the governance overview] diff --git a/modules/governance/pages/guardrails/index.adoc b/modules/governance/pages/guardrails/index.adoc deleted file mode 100644 index 1792a2e..0000000 --- a/modules/governance/pages/guardrails/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= Guardrails -:description: Configurable safety and policy filters that run on the request or response side of every LLM call routed through AI Gateway. -:page-layout: index diff --git a/modules/governance/pages/index.adoc b/modules/governance/pages/index.adoc deleted file mode 100644 index 88395c0..0000000 --- a/modules/governance/pages/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= Governance -:description: Govern agent behavior with guardrails, token budgets, the kill switch, and the governance dashboard. -:page-layout: index diff --git a/modules/governance/pages/kill-switch.adoc b/modules/governance/pages/kill-switch.adoc deleted file mode 100644 index d461b99..0000000 --- a/modules/governance/pages/kill-switch.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= Kill Switch -:description: Emergency controls to stop AI agent operations. - -// TODO: Add content diff --git a/modules/integrations/pages/cline.adoc b/modules/integrations/pages/cline.adoc deleted file mode 100644 index cdf6a63..0000000 --- a/modules/integrations/pages/cline.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= Cline Integration -:description: Integrate Redpanda ADP with Cline. - -// TODO: Add content diff --git a/modules/integrations/pages/continue.adoc b/modules/integrations/pages/continue.adoc deleted file mode 100644 index 71e4664..0000000 --- a/modules/integrations/pages/continue.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= Continue Integration -:description: Integrate Redpanda ADP with Continue. - -// TODO: Add content diff --git a/modules/integrations/pages/copilot.adoc b/modules/integrations/pages/copilot.adoc deleted file mode 100644 index bca5892..0000000 --- a/modules/integrations/pages/copilot.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= GitHub Copilot Integration -:description: Integrate Redpanda ADP with GitHub Copilot. - -// TODO: Add content diff --git a/modules/integrations/pages/cursor.adoc b/modules/integrations/pages/cursor.adoc deleted file mode 100644 index 9100f87..0000000 --- a/modules/integrations/pages/cursor.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= Cursor Integration -:description: Integrate Redpanda ADP with Cursor IDE. - -// TODO: Add content diff --git a/modules/integrations/pages/index.adoc b/modules/integrations/pages/index.adoc deleted file mode 100644 index b0aff42..0000000 --- a/modules/integrations/pages/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= Integrations -:description: Connect ADP-managed LLM providers and MCP servers to AI development environments such as Claude Code, Cursor, Continue, Cline, and GitHub Copilot. -:page-layout: index diff --git a/modules/integrations/partials/integrations/cline-user.adoc b/modules/integrations/partials/integrations/cline-user.adoc deleted file mode 100644 index c05a6e6..0000000 --- a/modules/integrations/partials/integrations/cline-user.adoc +++ /dev/null @@ -1,710 +0,0 @@ -= Configure Cline with AI Gateway -:description: Configure Cline to use Redpanda AI Gateway for unified LLM access, MCP tool integration, and autonomous coding workflows. -:page-topic-type: how-to -:personas: agent_builder -:learning-objective-1: Configure Cline to connect to AI Gateway for LLM requests and MCP tools -:learning-objective-2: Set up autonomous mode with custom instructions and browser integration -:learning-objective-3: Verify Cline routes requests through the gateway and optimize for cost - -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up Cline (formerly Claude Dev) to route LLM requests and access MCP tools through the gateway's unified endpoints. - -After reading this page, you will be able to: - -* [ ] Configure Cline to connect to AI Gateway for LLM requests and MCP tools. -* [ ] Set up autonomous mode with custom instructions and browser integration. -* [ ] Verify Cline routes requests through the gateway and optimize for cost. - -== Prerequisites - -Before configuring Cline, ensure you have: - -* Cline VS Code extension installed (search for "Cline" in VS Code Extensions) -* An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) -* Your AI Gateway credentials: -** Gateway endpoint URL, which includes the gateway ID (for example, `\https://ai.prd.cloud.redpanda.com/gateway/v1/chat/completions`) -** API key with access to the gateway - -== About Cline - -Cline is an autonomous AI coding agent for VS Code that can: - -* Read and edit files in your workspace -* Execute terminal commands -* Browse the web for documentation and research -* Create and manage complex multi-file changes -* Work autonomously with approval checkpoints - -By routing Cline through AI Gateway, you gain centralized observability, cost controls, and the ability to aggregate multiple MCP servers into a single interface. - -== Configuration overview - -Cline supports two connection types for AI Gateway: - -[cols="1,2,2"] -|=== -|Connection type |Use for |Configuration location - -|OpenAI-compatible API -|LLM requests (chat, code generation) -|Cline Settings → API Configuration - -|MCP servers -|Tool discovery and execution -|Cline Settings → MCP Servers -|=== - -Both can route through AI Gateway independently or together, depending on your needs. - -== Configure LLM routing through gateway - -Set up Cline to route all LLM requests through your AI Gateway instead of directly to providers. - -=== Open Cline settings - -. Open VS Code -. Open Command Palette (Cmd+Shift+P or Ctrl+Shift+P) -. Search for `Cline: Open Settings` -. Select `Cline: Open Settings` - -Alternatively, click the gear icon in the Cline sidebar panel. - -=== Configure API provider - -In the Cline settings interface: - -. Navigate to *API Configuration* section -. Select *API Provider*: `OpenAI Compatible` -. Set *Base URL*: Your gateway endpoint URL (for example, `\https://ai.prd.cloud.redpanda.com/gateway/v1/chat/completions`). The gateway ID is embedded in the URL path. -. Set *API Key*: Your Redpanda API key - -=== Select model - -In the *Model* dropdown, enter the model using the `vendor/model_id` format: - -* For Anthropic Claude: `anthropic/claude-sonnet-4.5` -* For OpenAI: `openai/gpt-5.2` -* For other providers: `{provider}/{model-name}` - -The gateway routes the request based on this format. If you use a non-prefixed model name (for example, `claude-sonnet-4.5`), the gateway may not route correctly. - -=== Verify configuration - -. Click *Test Connection* in Cline settings -. Verify status shows "Connected" -. Send a test message in the Cline chat panel - -If the connection fails, see <>. - -== Configure MCP server integration - -Connect Cline to your AI Gateway's MCP endpoint to aggregate tools from multiple MCP servers. - -=== Add MCP server connection - -In the Cline settings interface: - -. Navigate to *MCP Servers* section -. Click *Add MCP Server* -. Configure the connection: -+ -[,json] ----- -{ - "name": "redpanda-ai-gateway", - "transport": "http", - "url": "/mcp", - "headers": { - "Authorization": "Bearer YOUR_API_KEY" - } -} ----- - -Replace placeholder values: - -* `` - Your gateway endpoint URL (the gateway ID is embedded in the URL path) -* `YOUR_API_KEY` - Your Redpanda API key - -=== Enable tool discovery - -After adding the MCP server: - -. Click *Refresh Tools* to discover available tools -. Verify that tools from your configured MCP servers appear in the tool list -. If using deferred tool loading, you'll see a search tool and MCP orchestrator tool instead of all tools upfront - -Tools are now available for Cline to use autonomously during coding sessions. - -=== Alternative: Manual configuration file - -For more control, edit the VS Code settings directly: - -. Open VS Code settings (Cmd+, or Ctrl+,) -. Search for `cline.mcpServers` -. Click *Edit in settings.json* -. Add the MCP server configuration: -+ -[,json] ----- -{ - "cline.mcpServers": [ - { - "name": "redpanda-ai-gateway", - "transport": "http", - "url": "/mcp", - "headers": { - "Authorization": "Bearer YOUR_API_KEY" - } - } - ] -} ----- - -Restart VS Code for changes to take effect. - -== Configure autonomous mode settings - -Optimize Cline's autonomous behavior when using AI Gateway. - -=== Set approval mode - -Control how often Cline requires your approval during autonomous tasks: - -[cols="1,2,2"] -|=== -|Mode |Behavior |Best for - -|*Always ask* -|Request approval for every action -|Testing, sensitive codebases, cost control - -|*Ask before terminal commands* -|Auto-approve file edits, ask for commands -|Trusted environments, faster iteration - -|*Autonomous* -|Complete tasks without interruption -|Well-scoped tasks, batch processing -|=== - -To set approval mode: - -. Open Cline settings -. Navigate to *Autonomous Mode* -. Select your preferred mode - -When using AI Gateway with spend limits, autonomous mode is safer because the gateway enforces budget controls even if Cline makes many requests. - -=== Configure custom instructions - -Add custom instructions to guide Cline's behavior and reduce token costs: - -. Open Cline settings -. Navigate to *Custom Instructions* -. Add instructions that reduce unnecessary requests: -+ -[,text] ----- -- Before making changes, analyze the codebase structure first -- Use existing code patterns instead of creating new ones -- Ask for clarification before large refactors -- Prefer small, incremental changes over complete rewrites -- Use MCP tools for research instead of multiple LLM calls ----- - -These instructions help Cline work more efficiently and reduce token usage. - -=== Enable browser integration - -Cline can use a browser to research documentation, which reduces the need for large context windows: - -. Open Cline settings -. Navigate to *Browser Integration* -. Enable *Allow Browser Access* -. Configure browser mode: -** *Headless* - Faster, lower resource usage -** *Visible* - See what Cline is browsing (useful for debugging) - -Browser integration is particularly useful with AI Gateway because: - -* Cline can look up current documentation instead of relying on outdated training data -* Reduces prompt token costs from pasting documentation into context -* Works with MCP tools that fetch web content - -== Verify configuration - -After configuring Cline, verify it connects correctly to your AI Gateway. - -=== Test LLM routing - -Send a test message in the Cline chat panel: - -. Open the Cline sidebar in VS Code -. Type a simple request: "Explain this file" (with a file open) -. Wait for response - -Then verify in the AI Gateway dashboard: - -. Sign in to ADP -. Navigate to your gateway's observability dashboard -. Filter by gateway ID -. Verify: -** Request appears in logs -** Model shows correct format (for example, `anthropic/claude-sonnet-4.5`) -** Token usage and cost are recorded - -If the request doesn't appear, see <>. - -=== Test MCP tool usage - -Verify Cline can discover and invoke MCP tools: - -. In the Cline chat, request a task that requires a tool -. For example: "Use the weather tool to check the forecast" -. Cline should: -** Discover the tool from the MCP server -** Invoke it with correct parameters -** Return the result - -Check the gateway dashboard for MCP tool invocation logs. - -=== Monitor token costs - -Track Cline's token usage to identify optimization opportunities: - -. Open the AI Gateway observability dashboard -. Filter by your gateway -. View metrics: -** Requests per hour -** Token usage per request (prompt + completion) -** Estimated cost per request - -High token costs may indicate: - -* Context windows that are too large (Cline includes many files unnecessarily) -* Repeated requests for the same information (use custom instructions to prevent this) -* Missing MCP tools that could replace multi-turn conversations - -== Advanced configuration - -=== Model selection strategies - -Different models have different cost and performance characteristics. Configure Cline to use the right model for each task: - -==== Strategy 1: Single high-quality model - -Use one premium model for all tasks. - -Configuration: - -* Model: `anthropic/claude-sonnet-4.5` -* Best for: Complex codebases, high-quality output requirements -* Cost: Higher, but consistent - -==== Strategy 2: Multiple Cline profiles - -Create separate VS Code workspace settings for different projects: - -.Project A (high complexity) -[,json] ----- -{ - "cline.apiProvider": "OpenAI Compatible", - "cline.baseURL": "", - "cline.model": "anthropic/claude-opus-4.6-5" -} ----- - -.Project B (simple tasks) -[,json] ----- -{ - "cline.apiProvider": "OpenAI Compatible", - "cline.baseURL": "", - "cline.model": "anthropic/claude-haiku" -} ----- - -=== Request timeout configuration - -For long-running tool executions or complex code generation: - -. Open VS Code settings -. Search for `cline.requestTimeout` -. Set timeout in milliseconds (default: 60000) -+ -[,json] ----- -{ - "cline.requestTimeout": 120000 -} ----- - -Increase this value if Cline times out during large refactoring tasks or when using slow MCP tools. - -=== Debug mode - -Enable debug logging to troubleshoot connection issues: - -. Open VS Code settings -. Search for `cline.debug` -. Enable debug mode: -+ -[,json] ----- -{ - "cline.debug": true -} ----- - -Debug logs appear in the VS Code Output panel: - -. Open Output panel (View → Output) -. Select "Cline" from the dropdown -. View HTTP request and response details - -Debug mode shows: - -* Full request and response payloads -* Gateway routing headers -* MCP tool discovery messages -* Error details - -=== Environment-based configuration - -Use different gateways for different environments without changing settings manually. - -IMPORTANT: VS Code's `.vscode/settings.json` does not natively support environment variable substitution with the `${VAR}` syntax shown below. You must either install an extension that provides variable substitution, replace the placeholders manually with actual values, or set environment variables before launching VS Code. - -Create workspace-specific configurations: - -.Development workspace (.vscode/settings.json) -[,json] ----- -{ - "cline.apiProvider": "OpenAI Compatible", - "cline.baseURL": "${GATEWAY_DEV_URL}" -} ----- - -.Production workspace (.vscode/settings.json) -[,json] ----- -{ - "cline.apiProvider": "OpenAI Compatible", - "cline.baseURL": "${GATEWAY_PROD_URL}" -} ----- - -Set environment variables before launching VS Code: - -[,bash] ----- -export GATEWAY_DEV_URL="" -export GATEWAY_PROD_URL="" ----- - -On Windows (PowerShell): - -[,powershell] ----- -$env:GATEWAY_DEV_URL = "" -$env:GATEWAY_PROD_URL = "" ----- - -[[troubleshooting]] -== Troubleshooting - -=== Cline shows "Connection failed" - -**Symptom**: Cline settings show connection failed, or requests return errors. - -**Causes and solutions**: - -. **Incorrect base URL** -+ -Verify your base URL does NOT include `/v1` or `/chat/completions`: -+ -[,text] ----- -# Correct - - -# Incorrect -/v1 -/chat/completions ----- -+ -Cline appends the correct path automatically. - -. **Authentication failure** -+ -Verify your API key is valid: -+ -[,bash] ----- -curl -H "Authorization: Bearer YOUR_API_KEY" \ - /v1/models ----- -+ -You should receive a list of available models. If you get `401 Unauthorized`, regenerate your API key in ADP. - -. **Gateway endpoint URL mismatch** -+ -Check that the gateway endpoint URL in your Cline configuration matches your gateway exactly. Copy it directly from the AI Gateway UI. - -. **Network connectivity issues** -+ -Test basic connectivity: -+ -[,bash] ----- -curl -I ----- -+ -If this times out, check your network configuration, firewall rules, or VPN connection. - -=== MCP tools not appearing - -**Symptom**: Cline doesn't see tools from the MCP server, or tool discovery fails. - -**Causes and solutions**: - -. **MCP endpoint incorrect** -+ -Verify the MCP endpoint is correct. It should be `{gateway-url}/mcp`, not just `{gateway-url}`: -+ -[,text] ----- -# Correct -/mcp - -# Incorrect - ----- - -. **No MCP servers configured in gateway** -+ -Verify your gateway has at least one MCP server enabled in the AI Gateway UI. - -. **Deferred tool loading enabled** -+ -If deferred tool loading is enabled, you'll see only a search tool initially. This is expected behavior. Tools load on-demand when Cline needs them. - -. **MCP server unreachable** -+ -Test the MCP endpoint directly: -+ -[,bash] ----- -curl -H "Authorization: Bearer YOUR_API_KEY" \ - /mcp ----- -+ -You should receive a valid MCP protocol response listing available tools. - -=== Requests not appearing in gateway dashboard - -**Symptom**: Cline works, but you don't see requests in the AI Gateway observability dashboard. - -**Causes and solutions**: - -. **Wrong gateway configured** -+ -Verify that the gateway endpoint URL in your Cline configuration matches the gateway you're viewing in the dashboard. - -. **Using direct provider connection** -+ -If you configured Cline with a provider's API directly (not the gateway URL), requests won't route through the gateway. Verify the base URL is your gateway endpoint. - -. **Log ingestion delay** -+ -Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. - -. **Model name format error** -+ -Ensure requests use the `vendor/model_id` format (for example, `anthropic/claude-sonnet-4.5`), not just the model name (for example, `claude-sonnet-4.5`). Check the model field in Cline settings. - -=== High token costs - -**Symptom**: Cline uses more tokens than expected, resulting in high costs. - -**Causes and solutions**: - -. **Large context windows** -+ -Cline may be including too many files in the context. Solutions: -+ -* Use custom instructions to limit file inclusion -* Create a `.clineignore` file to exclude unnecessary files -* Break large tasks into smaller, focused subtasks - -. **Repeated requests** -+ -Cline may be making redundant requests for the same information. Solutions: -+ -* Add custom instructions to prevent repeated analysis -* Use MCP tools to fetch external information instead of asking the LLM -* Enable caching in the gateway (if available) - -. **Wrong model selected** -+ -You may be using a premium model for simple tasks. Solutions: -+ -* Switch to a cost-effective model (for example, `anthropic/claude-haiku`) - -. **MCP tool overhead** -+ -If not using deferred tool loading, all tools load with every request. Solution: -+ -* Enable deferred tool loading in your AI Gateway configuration (see xref:ai-gateway:aggregation.adoc[]) - -=== Cline hangs or times out - -**Symptom**: Cline stops responding or shows timeout errors. - -**Causes and solutions**: - -. **Request timeout too low** -+ -Increase the timeout in VS Code settings: -+ -[,json] ----- -{ - "cline.requestTimeout": 120000 -} ----- - -. **Long-running MCP tool** -+ -Some MCP tools take time to execute. Check the gateway observability dashboard to see if tool execution is slow. - -. **Gateway rate limiting** -+ -You may be hitting rate limits. Check the dashboard for rate limit metrics and increase limits if needed. - -. **Provider outage** -+ -Check the AI Gateway dashboard for provider status. If the primary provider is down, configure failover (see xref:ai-gateway:gateway-quickstart.adoc#configure-provider-pool-with-fallback[Configure failover]). - -=== Settings changes not taking effect - -**Symptom**: Changes to Cline settings or VS Code configuration don't apply. - -**Solutions**: - -. **Reload VS Code** -+ -Some settings require reloading: -+ -* Open Command Palette (Cmd+Shift+P or Ctrl+Shift+P) -* Search for `Developer: Reload Window` -* Select and confirm - -. **Workspace settings override** -+ -Check if workspace settings (`.vscode/settings.json`) override user settings. Workspace settings take precedence. - -. **Invalid JSON syntax** -+ -If editing `settings.json` manually, validate JSON syntax. VS Code shows syntax errors in the editor. - -== Cost optimization tips - -=== Use the right model for each task - -Match model selection to task complexity: - -[cols="1,2,1"] -|=== -|Task type |Recommended model |Reason - -|Simple edits (typos, renames) -|`anthropic/claude-haiku` -|Low cost, fast - -|Code review, analysis -|`anthropic/claude-sonnet-4.5` -|Balanced quality and cost - -|Complex refactors, architecture -|`anthropic/claude-sonnet-4.5` or `anthropic/claude-opus-4.6-5` -|High quality for critical work -|=== - -=== Reduce context window size - -Limit the number of files Cline includes in requests: - -. Create a `.clineignore` file in your workspace root: -+ -[,text] ----- -# Exclude build artifacts -dist/ -build/ -node_modules/ - -# Exclude test files when not testing -**/*.test.js -**/*.spec.ts - -# Exclude documentation -docs/ -*.md ----- - -. Use custom instructions to guide file selection: -+ -[,text] ----- -- Only include files directly related to the task -- Ask which files to include if unsure -- Exclude test files unless specifically working on tests ----- - -=== Use MCP tools instead of large prompts - -Replace long documentation pastes with MCP tools: - -Before (high token cost): - -* User pastes API documentation into Cline chat -* Cline uses documentation to write integration code -* Thousands of tokens used for documentation - -After (low token cost): - -* Configure an MCP tool that searches API documentation -* Cline queries the tool for specific information as needed -* Only relevant sections included in context - -See xref:ai-gateway:aggregation.adoc[] for MCP tool configuration. - -=== Enable deferred tool loading - -If using multiple MCP servers, enable deferred tool loading in your gateway configuration to reduce token costs by 80-90%. - -This loads only essential tools initially. Cline queries for additional tools on-demand. - -=== Monitor and set spend limits - -Use AI Gateway spend limits to prevent runaway costs: - -. Navigate to your gateway in ADP -. Set monthly spend limit (for example, $500/month) -. Configure alerts before reaching limit - -The gateway automatically blocks requests that would exceed the limit. - -== Next steps - -* xref:ai-gateway:aggregation.adoc[] - -== Related pages - -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway -* xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits -* xref:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway diff --git a/modules/integrations/partials/integrations/cursor-admin.adoc b/modules/integrations/partials/integrations/cursor-admin.adoc deleted file mode 100644 index 3753339..0000000 --- a/modules/integrations/partials/integrations/cursor-admin.adoc +++ /dev/null @@ -1,684 +0,0 @@ -= Configure AI Gateway for Cursor IDE -:description: Configure Redpanda AI Gateway to support Cursor IDE clients. -:page-topic-type: how-to -:personas: platform_engineer -:learning-objective-1: Configure AI Gateway endpoints for Cursor IDE connectivity -:learning-objective-2: Set up OpenAI-compatible transforms across multiple providers -:learning-objective-3: Deploy multi-tenant authentication strategies for Cursor clients - -Configure Redpanda AI Gateway to support Cursor IDE clients accessing multiple LLM providers and MCP tools through OpenAI-compatible endpoints. - -After reading this page, you will be able to: - -* [ ] Configure AI Gateway endpoints for Cursor IDE connectivity. -* [ ] Set up OpenAI-compatible transforms across multiple providers. -* [ ] Deploy multi-tenant authentication strategies for Cursor clients. - -== Prerequisites - -* AI Gateway deployed on a BYOC environment running Redpanda version 25.3 or later -* Administrator access to the AI Gateway UI -* API keys for at least one LLM provider (Anthropic, OpenAI, or others) -* Understanding of xref:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] - -== About Cursor IDE - -Cursor is an AI-powered code editor built on VS Code that integrates multiple LLM providers for code completion, chat, and inline editing. Unlike other AI assistants, Cursor uses OpenAI's API format for all providers and routes to different models using a `vendor/model` prefix notation. - -Key characteristics: - -* Sends all requests in OpenAI-compatible format to `/v1/chat/completions` -* Routes using model prefixes (for example, `openai/gpt-5.2`, `anthropic/claude-sonnet-4.5`) -* Limited support for custom headers (makes multi-tenant deployments challenging) -* Supports MCP protocol with a 40-tool limit -* Built-in code completion and chat modes -* Configuration through settings file (`~/.cursor/config.json`) - -== Architecture overview - -Cursor IDE connects to AI Gateway through standardized endpoints: - -* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/chat/completions` for all providers -* MCP endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp` for tool discovery and execution - -The gateway handles: - -. Authentication through bearer tokens in the `Authorization` header -. Gateway selection through the endpoint URL -. Model selection using vendor prefixes (for example, `anthropic/claude-sonnet-4.5`) -. Format transforms from OpenAI format to provider-native formats (for Anthropic, Google, and so on) -. MCP server aggregation for multi-tool workflows -. Request logging and cost tracking per gateway - -== Enable LLM providers - -Cursor IDE works with multiple providers through OpenAI-compatible transforms. Enable the providers your users will access. - -=== Configure Anthropic with OpenAI-compatible format - -Cursor sends OpenAI-formatted requests but can route to Anthropic models. Configure the gateway to transform these requests: - -. Navigate to *AI Gateway* > *Providers* in ADP -. Select *Anthropic* from the provider list -. Click *Add configuration* -. Enter your Anthropic API key -. Under *Format*, select *OpenAI-compatible* (enables automatic transform) -. Click *Save* - -The gateway now transforms OpenAI-format requests to Anthropic's native `/v1/messages` format. - -=== Configure OpenAI - -To enable OpenAI as a provider: - -. Navigate to *AI Gateway* > *Providers* -. Select *OpenAI* from the provider list -. Click *Add configuration* -. Enter your OpenAI API key -. Under *Format*, select *Native OpenAI* -. Click *Save* - -=== Configure additional providers - -Cursor supports many providers through OpenAI-compatible transforms. For each provider: - -. Add the provider configuration in the gateway -. Set the format to *OpenAI-compatible* (the gateway handles format transformation) -. Enable the transform layer to convert OpenAI request format to the provider's native format - -Common additional providers: - -* Google Gemini (requires OpenAI-compatible transform) -* Mistral AI (already OpenAI-compatible format) -* Together AI (already OpenAI-compatible format) - -=== Enable models in the catalog - -After enabling providers, enable specific models: - -. Navigate to *AI Gateway* > *Models* -. Enable the models you want Cursor clients to access -+ -Common models for Cursor: -+ -* `anthropic/claude-opus-4.6-5` -* `anthropic/claude-sonnet-4.5` -* `openai/gpt-5.2` -* `openai/gpt-5.2-mini` -* `openai/o1-mini` - -. Click *Save* - -Cursor uses the `vendor/model_id` format in requests. The gateway maps these to provider endpoints and applies the appropriate format transforms. - -== Create a gateway for Cursor clients - -Create a dedicated gateway to isolate Cursor traffic and apply specific policies. - -=== Gateway configuration - -. Navigate to *Agentic* > *AI Gateway* > *Gateways* -. Click *Create Gateway* -. Enter gateway details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`cursor-gateway` (or your preferred name) - -|Workspace -|Select the workspace for access control grouping - -|Description -|Gateway for Cursor IDE clients -|=== - -. Click *Create* -. Copy the gateway ID from the gateway details page - -The gateway ID is embedded in the gateway endpoint URL. - -=== Configure provider pools with fallback - -Cursor sends all requests to a single endpoint (`/v1/chat/completions`) and uses model prefixes (for example, `anthropic/claude-sonnet-4.5`, `openai/gpt-5.2`) to identify the target provider. - -. Navigate to the gateway's *LLM* tab -. Under *Provider pools*, click *Add pool* -. Configure a primary provider pool (for example, OpenAI): -+ -* Provider: OpenAI -* Model: All enabled OpenAI models -* Transform: None (already OpenAI format) -* Load balancing: Round robin (if multiple OpenAI configurations exist) - -. (Optional) Add a fallback pool (for example, Anthropic with the OpenAI-to-Anthropic transform). -. Click *Save* - -The gateway automatically fails over to the fallback pool when the primary returns 429, times out, or returns a 5xx error. - -=== Apply rate limits - -Prevent runaway usage from Cursor clients: - -. Navigate to the gateway's *LLM* tab -. Under *Rate Limit*, configure: -+ -[cols="1,2"] -|=== -|Setting |Recommended Value - -|Global rate limit -|150 requests per minute - -|Per-user rate limit -|15 requests per minute (if using user identification workarounds) -|=== - -. Click *Save* - -The gateway blocks requests exceeding these limits and returns HTTP 429 errors. - -==== Rate limit considerations for code completion - -Cursor's code completion feature generates frequent requests. Consider separate gateways for completion vs chat if you need different rate limits per use case: - -* Completion models (for example, `openai/gpt-5.2-mini`): Higher rate limits -* Chat models (for example, `anthropic/claude-sonnet-4.5`): Standard rate limits - -=== Set spending limits - -Control LLM costs across all providers: - -. Under *Spend Limit*, configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Monthly budget -|$7,000 (adjust based on expected usage) - -|Enforcement -|Block requests after budget exceeded - -|Alert threshold -|80% of budget (sends notification) -|=== - -. Click *Save* - -The gateway tracks estimated costs per request across all providers and blocks traffic when the monthly budget is exhausted. - -== Configure MCP tool aggregation - -Enable Cursor to discover and use tools from multiple MCP servers through a single endpoint. Note that Cursor has a 40-tool limit, so carefully select which MCP servers to aggregate. - -=== Add MCP servers - -. Navigate to the gateway's *MCP* tab -. Click *Add MCP Server* -. Enter server details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Display name -|Descriptive name (for example, `redpanda-data-tools`, `code-search-tools`) - -|Endpoint URL -|MCP server endpoint (for example, xref:integrations:remote-mcp-clients.adoc[Remote MCP server] URL) - -|Authentication -|Bearer token or other authentication mechanism -|=== - -. Click *Save* - -Repeat for each MCP server you want to aggregate, keeping in mind the 40-tool limit. - -=== Work within the 40-tool limit - -Cursor imposes a 40-tool limit on MCP integrations. To stay within this limit: - -* Aggregate only essential MCP servers -* Use deferred tool loading (see next section) -* Prioritize high-value tools over comprehensive tool sets -* Consider creating multiple gateways with different tool sets for different use cases - -Monitor the total tool count across all aggregated MCP servers: - -. Navigate to the gateway's *MCP* tab -. Review the *Total Tools* count displayed at the top -. If the count exceeds 40, remove low-priority MCP servers - -=== Enable deferred tool loading - -Reduce the effective tool count by deferring tool discovery: - -. Under *MCP Settings*, enable *Deferred tool loading* -. Click *Save* - -When enabled: - -* Cursor initially receives only a search tool and orchestrator tool (2 tools total) -* Cursor queries for specific tools by name when needed -* The underlying MCP servers can provide more than 40 tools, but only the search and orchestrator tools count against the limit -* Token usage decreases by 80-90% for configurations with many tools - -Deferred tool loading is the recommended approach for Cursor deployments with multiple MCP servers. - -=== Add the MCP orchestrator - -The MCP orchestrator reduces multi-step workflows to single calls: - -. Under *MCP Settings*, enable *MCP Orchestrator* -. Configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Orchestrator model -|Select a model with strong code generation capabilities (for example, `anthropic/claude-sonnet-4.5`) - -|Execution timeout -|30 seconds - -|Backend -|Select the Anthropic backend (orchestrator works best with Claude models) -|=== - -. Click *Save* - -Cursor can now invoke the orchestrator tool to execute complex, multi-step operations in a single request. - -== Configure authentication - -Cursor clients authenticate using bearer tokens in the `Authorization` header. - -=== Generate API tokens - -. Navigate to *Security* > *API Tokens* in ADP -. Click *Create Token* -. Enter token details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`cursor-access` - -|Scopes -|`ai-gateway:read`, `ai-gateway:write` - -|Expiration -|Set appropriate expiration based on security policies -|=== - -. Click *Create* -. Copy the token (it appears only once) - -Distribute this token to Cursor users through secure channels. - -=== Token rotation - -Implement token rotation for security: - -. Create a new token before the existing token expires -. Distribute the new token to users -. Monitor usage of the old token in (observability dashboard) -. Revoke the old token after all users have migrated - -== Multi-tenant deployment strategies - -For organizations with multiple teams, use one of these multi-tenant strategies. - -=== Strategy 1: One gateway per team - -Create a separate gateway for each tenant or team. Each gateway has its own rate limits, spending limits, and API tokens. - -. In ADP, create one gateway per team (for example, `team-alpha-cursor-gateway`, `team-beta-cursor-gateway`). -. Distribute each team's gateway endpoint and API token to its users. -. Each team configures Cursor with their team's gateway URL. - -**Configuration example for Team Alpha:** - -[source,json] ----- -{ - "apiProvider": "openai", - "apiBaseUrl": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/{ALPHA_GATEWAY_ID}/v1", - "apiKey": "TEAM_ALPHA_TOKEN" -} ----- - -=== Strategy 2: Single shared gateway - -For simpler deployments, use one gateway for all Cursor users with shared access: - -. Create one gateway for all Cursor users. -. Generate a shared API token. -. Distribute the endpoint and token to all users. -. Use overall rate limits and spending limits to control aggregate usage. - -This approach is the simplest but provides no per-team cost tracking or isolation. - -=== Choosing a multi-tenant strategy - -[cols="1,2,2,1"] -|=== -|Strategy |Pros |Cons |Best For - -|One gateway per team -|Per-team budgets, rate limits, and isolation -|More gateways to manage, more tokens to distribute -|Organizations with multiple teams or budgets - -|Single shared gateway -|Simplest configuration and management -|No per-team isolation or limits -|Small organizations, proof of concept -|=== - -== Configure Cursor IDE clients - -Provide these instructions to users configuring Cursor IDE. - -=== Configuration file location - -Cursor uses a JSON configuration file: - -* macOS: `~/.cursor/config.json` -* Linux: `~/.cursor/config.json` -* Windows: `%USERPROFILE%\.cursor\config.json` - -=== Basic configuration - -Users configure Cursor with the AI Gateway endpoint: - -[source,json] ----- -{ - "apiProvider": "openai", - "apiBaseUrl": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1", - "apiKey": "YOUR_API_TOKEN", - "models": { - "chat": "anthropic/claude-sonnet-4.5", - "completion": "openai/gpt-5.2-mini" - } -} ----- - -Replace: - -* `{CLUSTER_ID}`: Your Redpanda cluster ID -* `YOUR_API_TOKEN`: The API token generated earlier - -If using a multi-tenant strategy, adjust the `apiBaseUrl` according to your chosen approach (subdomain, path prefix, or query parameter). - -=== Model selection - -Configure different models for different Cursor modes: - -[cols="1,2,1"] -|=== -|Mode |Recommended Model |Reason - -|Chat -|`anthropic/claude-sonnet-4.5` or `openai/gpt-5.2` -|High quality for complex questions - -|Code completion -|`openai/gpt-5.2-mini` -|Fast, cost-effective for frequent requests - -|Inline edit -|`anthropic/claude-sonnet-4.5` -|Balanced quality and speed for code modifications -|=== - -=== MCP server configuration - -Configure Cursor to connect to the aggregated MCP endpoint: - -[source,json] ----- -{ - "experimental": { - "mcpServers": { - "redpanda-ai-gateway": { - "type": "streamable-http", - "url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/mcp", - "headers": { - "Authorization": "Bearer YOUR_API_TOKEN" - } - } - } - } -} ----- - -If using a multi-tenant strategy, ensure the MCP URL matches the tenant configuration. - -This configuration: - -* Connects Cursor to the aggregated MCP endpoint -* Routes LLM requests through the AI Gateway with OpenAI-compatible transforms -* Includes authentication headers - -== Monitor Cursor usage - -Track Cursor activity through gateway observability features. - -=== View request logs - -. Navigate to *AI Gateway* > *Observability* > *Logs* -. Filter by gateway ID: `cursor-gateway` -. Review: -+ -* Request timestamps and duration -* Model used per request (with vendor prefix) -* Token usage (prompt and completion tokens) -* Estimated cost per request -* HTTP status codes and errors -* Transform operations (OpenAI to provider-native format) - -Cursor generates different request patterns: - -* Code completion: Many short requests with low token counts -* Chat: Longer requests with context and multi-turn conversations -* Inline edit: Medium-length requests with code context - -=== Analyze metrics - -. Navigate to *AI Gateway* > *Observability* > *Metrics* -. Select the Cursor gateway -. Review: -+ -[cols="1,2"] -|=== -|Metric |Purpose - -|Request volume by provider -|Identify which providers are most used through model prefix - -|Token usage by model -|Track consumption patterns (completion vs chat) - -|Estimated spend by provider -|Monitor costs across providers with transforms - -|Latency (p50, p95, p99) -|Detect transform overhead and provider-specific performance issues - -|Error rate by provider -|Identify failing providers or transform issues - -|Transform success rate -|Monitor OpenAI-to-provider format conversion success -|=== - - -=== Query logs through API - -Programmatically access logs for integration with monitoring systems: - -[source,bash] ----- -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ - -H "Authorization: Bearer YOUR_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "gateway_id": "GATEWAY_ID", - "start_time": "2026-01-01T00:00:00Z", - "end_time": "2026-01-14T23:59:59Z", - "limit": 100 - }' ----- - -== Security considerations - -Apply these security best practices for Cursor deployments. - -=== Limit token scope - -Create tokens with minimal required scopes: - -* `ai-gateway:read`: Required for MCP tool discovery -* `ai-gateway:write`: Required for LLM requests and tool execution - -Avoid granting broader scopes like `admin` or `cluster:write`. - -=== Implement network restrictions - -If Cursor clients connect from known networks, configure network policies: - -. Use cloud provider security groups to restrict access to AI Gateway endpoints -. Allowlist only the IP ranges where Cursor clients operate -. Monitor for unauthorized access attempts in request logs - -=== Enforce token expiration - -Set short token lifetimes for high-security environments: - -* Development environments: 90 days -* Production environments: 30 days - -Automate token rotation to reduce manual overhead. - -=== Audit tool access - -Review which MCP tools Cursor clients can access: - -. Periodically audit the MCP servers configured in the gateway -. Remove unused or deprecated MCP servers -. Monitor tool execution logs for unexpected behavior -. Ensure total tool count stays within Cursor's 40-tool limit - -=== Protect API keys in configuration - -Cursor stores the API token in plain text in `config.json`. Remind users to: - -* Never commit `config.json` to version control -* Use file system permissions to restrict access (for example, `chmod 600 ~/.cursor/config.json` on Unix-like systems) -* Rotate tokens if they suspect compromise -* Consider using environment variables for API keys (if Cursor supports this) - -=== Monitor transform operations - -Because Cursor requires OpenAI-compatible transforms for non-OpenAI providers: - -. Review transform success rates in metrics -. Monitor for transform failures that may leak request details -. Test transforms thoroughly before production deployment -. Keep transform logic updated as provider APIs evolve - -== Troubleshooting - -Common issues and solutions when configuring AI Gateway for Cursor. - -=== Cursor cannot connect to gateway - -Symptom: Connection errors when Cursor tries to discover tools or send LLM requests. - -Causes and solutions: - -* **Invalid base URL**: Verify `apiBaseUrl` matches the gateway endpoint (including multi-tenant prefix if applicable) -* **Expired token**: Generate a new API token and update the Cursor configuration -* **Network connectivity**: Verify the cluster endpoint is accessible from the client network -* **Provider not enabled**: Ensure at least one provider is enabled and has models in the catalog -* **Wrong gateway endpoint**: Verify the gateway endpoint URL is correct - -=== Model not found errors - -Symptom: Cursor shows "model not found" or similar errors. - -Causes and solutions: - -* **Model not enabled in catalog**: Enable the model in the gateway's model catalog -* **Incorrect model prefix**: Use the correct vendor prefix (for example, `anthropic/claude-sonnet-4.5` not just `claude-sonnet-4.5`) -* **Transform not configured**: Verify OpenAI-compatible transform is enabled for non-OpenAI providers - -=== Transform errors or unexpected responses - -Symptom: Responses are malformed or Cursor reports format errors. - -Causes and solutions: - -* **Transform disabled**: Ensure OpenAI-compatible transform is enabled for Anthropic and other non-OpenAI providers -* **Transform version mismatch**: Verify the transform is compatible with the current provider API version -* **Model-specific transform issues**: Some models may require specific transform configurations -* **Check transform logs**: Review logs for transform errors and stack traces - -=== Tools not appearing in Cursor - -Symptom: Cursor does not discover MCP tools. - -Causes and solutions: - -* **MCP configuration missing**: Ensure `experimental.mcpServers` is configured in Cursor settings -* **MCP servers not configured in gateway**: Add MCP server endpoints in the gateway's MCP tab -* **Exceeds 40-tool limit**: Reduce the number of aggregated tools or enable deferred tool loading -* **Deferred loading enabled but search failing**: Check that the search tool is correctly configured -* **MCP server authentication failing**: Verify MCP server authentication credentials in the gateway configuration - -=== High costs or token usage - -Symptom: Token usage and costs exceed expectations. - -Causes and solutions: - -* **Code completion using expensive model**: Configure completion mode to use `openai/gpt-5.2-mini` instead of larger models -* **Deferred tool loading disabled**: Enable deferred tool loading to reduce tokens by 80-90% -* **No rate limits**: Apply per-minute rate limits to prevent runaway usage -* **Missing spending limits**: Set monthly budget limits with blocking enforcement -* **Chat using wrong model**: Route chat requests to cost-effective models (for example, `anthropic/claude-sonnet-4.5` instead of `anthropic/claude-opus-4.6-5`) -* **Transform overhead**: Monitor if transforms add significant token overhead - -=== Requests failing with 429 errors - -Symptom: Cursor receives HTTP 429 Too Many Requests errors. - -Causes and solutions: - -* **Rate limit exceeded**: Review and increase rate limits if usage is legitimate (code completion needs higher limits) -* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover to alternate providers -* **Budget exhausted**: Verify monthly spending limit has not been reached -* **Per-user limits too restrictive**: Adjust per-user rate limits if using multi-tenant strategies - -=== Multi-tenant authorization failures - -Symptom: Requests fail authorization or hit the wrong gateway. - -Causes and solutions: - -* **Wrong gateway endpoint**: Verify each team uses the gateway URL that matches their tokens -* **Token does not match gateway**: Confirm the API token belongs to the gateway the user is calling - -== Next steps - -* xref:integrations:remote-mcp-clients.adoc[] diff --git a/modules/integrations/partials/integrations/cursor-user.adoc b/modules/integrations/partials/integrations/cursor-user.adoc deleted file mode 100644 index 4c27d60..0000000 --- a/modules/integrations/partials/integrations/cursor-user.adoc +++ /dev/null @@ -1,812 +0,0 @@ -= Configure Cursor IDE with AI Gateway -:description: Configure Cursor IDE to use Redpanda AI Gateway for unified LLM access, MCP tool integration, and AI-assisted coding. -:page-topic-type: how-to -:personas: agent_builder -:learning-objective-1: Configure Cursor IDE to route LLM requests through AI Gateway -:learning-objective-2: Set up MCP server integration for tool access through the gateway -:learning-objective-3: Optimize Cursor settings for multi-tenancy and cost control - -After xref:ai-gateway:gateway-quickstart.adoc[configuring your AI Gateway], set up Cursor IDE to route LLM requests and access MCP tools through the gateway's unified endpoints. - -After reading this page, you will be able to: - -* [ ] Configure Cursor IDE to route LLM requests through AI Gateway. -* [ ] Set up MCP server integration for tool access through the gateway. -* [ ] Optimize Cursor settings for multi-tenancy and cost control. - -== Prerequisites - -Before configuring Cursor IDE, ensure you have: - -* Cursor IDE installed (download from https://cursor.sh[cursor.sh^]) -* An active Redpanda AI Gateway with: -** At least one LLM provider enabled (see xref:ai-gateway:gateway-quickstart.adoc#step-1-enable-a-provider[Enable a provider]) -** A gateway created and configured (see xref:ai-gateway:gateway-quickstart.adoc#step-3-create-a-gateway[Create a gateway]) -* Your AI Gateway credentials: -** Gateway endpoint URL (for example, `\https://gw.ai.panda.com/v1/gateways/gateway-abc123`) -** API key with access to the gateway - -== About Cursor IDE - -Cursor IDE is an AI-powered code editor built on VS Code that provides: - -* Chat interface for code questions and generation -* AI-powered autocomplete with context awareness -* Codebase indexing for semantic search -* Inline code editing with AI assistance -* Terminal integration for command suggestions -* Native integration with multiple LLM providers - -By routing Cursor through AI Gateway, you gain centralized observability, cost controls, provider flexibility, and the ability to aggregate multiple MCP servers into a single interface. - -== Configuration methods - -Cursor IDE supports two configuration approaches for connecting to AI Gateway: - -[cols="1,2,2"] -|=== -|Method |Best for |Trade-offs - -|Settings UI -|Visual configuration, quick setup -|Limited to single provider configuration - -|Configuration file -|Multiple providers, environment-specific settings, version control -|Manual file editing required -|=== - -Choose the method that matches your workflow. The Settings UI is faster for getting started, while the configuration file provides more flexibility for production use. - -== Configure using Settings UI - -The Settings UI provides a visual interface for configuring Cursor's AI providers. - -=== Configure AI provider - -. Open Cursor Settings: -** macOS: *Cursor* > *Settings* or `Cmd+,` -** Windows/Linux: *File* > *Preferences* > *Settings* or `Ctrl+,` -. Navigate to *Features* > *AI* -. Under *OpenAI API*, configure the base URL and API key: - -[source,text] ----- -Override OpenAI Base URL: -Override OpenAI API Key: YOUR_REDPANDA_API_KEY ----- - -Replace placeholder values: - -* `` - Your gateway endpoint URL from the AI Gateway UI (includes gateway ID in the path) -* `YOUR_REDPANDA_API_KEY` - Your Redpanda API key - -=== Select models - -In the AI settings, configure which models to use: - -. Under *Model Selection*, choose your preferred model from the dropdown -. Cursor will automatically use the gateway endpoint configured above -. Models available depend on what you've enabled in your AI Gateway - -Model selection options: - -* `gpt-5.2` - Routes to OpenAI GPT-5.2 through your gateway -* `gpt-5.2-mini` - Routes to OpenAI GPT-5.2-mini (cost-effective) -* `claude-sonnet-4.5` - Routes to Anthropic Claude Sonnet (if enabled in gateway) -* `claude-opus-4.6` - Routes to Anthropic Claude Opus (if enabled in gateway) - -Note: When routing through AI Gateway, Cursor uses the OpenAI SDK format. The gateway automatically translates requests to the appropriate provider based on the model name. - -== Configure using configuration file - -For more control over provider settings, multi-environment configurations, or version control, edit Cursor's configuration file directly. - -=== Locate configuration file - -Cursor stores configuration in `settings.json`: - -* macOS: `~/Library/Application Support/Cursor/User/settings.json` -* Windows: `%APPDATA%\Cursor\User\settings.json` -* Linux: `~/.config/Cursor/User/settings.json` - -Create the directory structure if it doesn't exist: - -[,bash] ----- -# macOS -mkdir -p ~/Library/Application\ Support/Cursor/User - -# Linux -mkdir -p ~/.config/Cursor/User ----- - -=== Basic configuration - -Create or edit `settings.json` with the following structure: - -[,json] ----- -{ - "cursor.overrideOpenAIBaseUrl": "", - "cursor.overrideOpenAIApiKey": "YOUR_REDPANDA_API_KEY", - "cursor.cpp.defaultModel": "gpt-5.2", - "cursor.chat.defaultModel": "gpt-5.2" -} ----- - -Replace placeholder values: - -* `` - Your gateway endpoint URL from the AI Gateway UI -* `YOUR_REDPANDA_API_KEY` - Your Redpanda API key - -Configuration fields: - -* `cursor.overrideOpenAIBaseUrl` - Gateway endpoint URL (includes gateway ID in the path) -* `cursor.overrideOpenAIApiKey` - Your Redpanda API key (used for authentication) -* `cursor.cpp.defaultModel` - Model for autocomplete (c++ refers to copilot++) -* `cursor.chat.defaultModel` - Model for chat interactions - -=== Multiple environment configuration - -To switch between development and production gateways, use workspace-specific settings. - -Create `.vscode/settings.json` in your project root: - -[,json] ----- -{ - "cursor.overrideOpenAIBaseUrl": "", - "openai.additionalHeaders": { - "x-environment": "staging" - } -} ----- - -Workspace settings override global settings. Use this to: - -* Route different projects through different gateways -* Use cost-effective models for internal projects -* Use premium models for customer-facing projects -* Add project-specific tracking headers - -=== Configuration with environment variables - -For sensitive credentials, avoid hardcoding values in `settings.json`. - -IMPORTANT: VS Code `settings.json` does not support `${VAR}` interpolation - such placeholders will be treated as literal strings. To use environment variables, generate the settings file dynamically with a script. - -==== Option 1: Generate settings.json with a script - -Create a setup script that reads environment variables and writes the actual values to `settings.json`: - -[,bash] ----- -#!/bin/bash -# setup-cursor-config.sh - -# Set your credentials -export REDPANDA_GATEWAY_ENDPOINT="https://gw.ai.panda.com/v1/gateways/gateway-abc123" -export REDPANDA_API_KEY="your-api-key" - -# Generate settings.json -cat > ~/.cursor/settings.json <", - "cursor.overrideOpenAIApiKey": "YOUR_REDPANDA_API_KEY", - "cursor.mcp": { - "servers": { - "redpanda-ai-gateway": { - "command": "node", - "args": [ - "-e", - "require('https').request({hostname:'',path:'//mcp',method:'GET',headers:{'Authorization':'Bearer YOUR_REDPANDA_API_KEY'}}).end()" - ] - } - } - } -} ----- - -This configuration uses Node.js to make HTTPS requests to the gateway's MCP endpoint. The gateway returns tool definitions that Cursor can use. - -Replace placeholder values: - -* `` - Your gateway endpoint URL from the AI Gateway UI -* `` - The hostname portion of your gateway endpoint (for example, `gw.ai.panda.com`) -* `` - The path portion of your gateway endpoint (for example, `v1/gateways/gateway-abc123`) -* `YOUR_REDPANDA_API_KEY` - Your Redpanda API key - -=== Enable deferred tool loading - -To work within Cursor's 40-tool limit, configure deferred tool loading in your AI Gateway: - -. Navigate to your gateway configuration in the AI Gateway UI -. Under *MCP Settings*, enable *Deferred Tool Loading* -. Save the gateway configuration - -When deferred loading is enabled: - -* Cursor receives only the search tool and orchestrator tool initially (2 tools total) -* When you ask Cursor to perform a task requiring a specific tool, it queries the gateway -* The gateway returns only the relevant tool definitions -* Total tool count stays well under the 40-tool limit - -== Verify configuration - -After configuring Cursor IDE, verify it connects correctly to your AI Gateway. - -=== Test chat interface - -. Open Cursor IDE -. Press `Cmd+L` (macOS) or `Ctrl+L` (Windows/Linux) to open the chat panel -. Type a simple question: "What does this function do?" (with a file open) -. Wait for response - -Then verify in the AI Gateway dashboard: - -. Sign in to ADP -. Navigate to your gateway's observability dashboard -. Filter by gateway ID -. Verify: -** Request appears in logs -** Model shows correct format (for example, `gpt-5.2`) -** Token usage and cost are recorded -** Request succeeded (status 200) - -If the request doesn't appear, see <>. - -=== Test inline code completion - -. Open a code file in Cursor -. Start typing a function definition -. Wait for inline suggestions to appear - -Autocomplete requests appear in the gateway dashboard with: - -* Lower token counts than chat requests -* Higher request frequency -* The autocomplete model you configured - -=== Test MCP tool integration - -If you configured MCP servers: - -. Open Cursor chat (`Cmd+L` or `Ctrl+L`) -. Ask a question that requires a tool: "What's the current date?" -. Cursor should: -** Discover available tools from the gateway -** Invoke the appropriate tool -** Return the result - -Check the gateway dashboard for MCP tool invocation logs. - -== Advanced configuration - -=== Custom request tracking headers - -Add custom headers for request tracking, user attribution, or routing policies: - -[,json] ----- -{ - "openai.additionalHeaders": { - "x-user-id": "developer-123", - "x-team": "backend", - "x-project": "api-service" - } -} ----- - -These headers flow into observability so administrators can track usage and costs per developer, team, or project. -* Generate team-specific usage reports - -=== Model-specific settings - -Configure different settings for chat vs autocomplete: - -[,json] ----- -{ - "cursor.chat.defaultModel": "claude-sonnet-4.5", - "cursor.cpp.defaultModel": "gpt-5.2-mini", - "cursor.chat.temperature": 0.7, - "cursor.cpp.temperature": 0.2, - "cursor.chat.maxTokens": 4096, - "cursor.cpp.maxTokens": 512 -} ----- - -Settings explained: - -* Chat uses Claude Sonnet for reasoning depth -* Autocomplete uses GPT-5.2-mini for speed and cost efficiency -* Chat temperature (0.7) allows creative responses -* Autocomplete temperature (0.2) produces deterministic code -* Chat allows longer responses (4096 tokens) -* Autocomplete limits responses (512 tokens) for speed - -=== Multi-tenancy with team-specific gateways - -For organizations with multiple teams sharing Cursor but requiring separate cost tracking and policies: - -[,json] ----- -{ - "cursor.overrideOpenAIBaseUrl": "${TEAM_GATEWAY_ENDPOINT}", - "cursor.overrideOpenAIApiKey": "${TEAM_API_KEY}", - "openai.additionalHeaders": { - "x-team": "${TEAM_NAME}" - } -} ----- - -Each team configures their own: - -* `TEAM_GATEWAY_ENDPOINT` - Gateway endpoint URL with team-specific gateway ID in the path -* `TEAM_API_KEY` - Team-specific API key -* `TEAM_NAME` - Identifier for usage reports - -This approach enables: - -* Per-team cost attribution -* Separate budgets and rate limits -* Team-specific model access policies -* Independent observability dashboards - -=== Request timeout configuration - -Configure timeout for LLM and MCP requests: - -[,json] ----- -{ - "cursor.requestTimeout": 30000, - "cursor.mcp.requestTimeout": 15000 -} ----- - -Timeout values are in milliseconds. Defaults: - -* LLM requests: 30000ms (30 seconds) -* MCP requests: 15000ms (15 seconds) - -Increase timeouts for: - -* Long-running MCP tools (database queries, web searches) -* High-latency network environments -* Complex reasoning tasks requiring extended processing - -=== Debug mode - -Enable debug logging to troubleshoot connection issues: - -[,json] ----- -{ - "cursor.debug": true, - "cursor.logLevel": "debug" -} ----- - -Debug mode shows: - -* HTTP request and response headers -* Model selection decisions -* Token usage calculations -* Error details with stack traces - -View debug logs: - -. Open Command Palette (`Cmd+Shift+P` or `Ctrl+Shift+P`) -. Type "Developer: Show Logs" -. Select "Extension Host" -. Filter by "cursor" - -[[troubleshooting]] -== Troubleshooting - -=== Cursor shows connection error - -**Symptom**: Cursor displays "Failed to connect to AI provider" or requests return errors. - -**Causes and solutions**: - -. **Incorrect base URL format** -+ -Verify the URL matches your gateway endpoint from the AI Gateway UI: -+ -[,text] ----- -# Correct - includes gateway ID in the path -"cursor.overrideOpenAIBaseUrl": "" - -# Incorrect - missing gateway path -"cursor.overrideOpenAIBaseUrl": "https://gw.ai.panda.com" ----- - -. **Authentication failure** -+ -Verify your API key is valid: -+ -[,bash] ----- -curl -H "Authorization: Bearer YOUR_API_KEY" \ - /models ----- -+ -You should receive a list of available models. If you get `401 Unauthorized`, regenerate your API key in ADP. - -. **Gateway endpoint URL mismatch** -+ -Verify that `cursor.overrideOpenAIBaseUrl` matches the gateway endpoint URL from the AI Gateway UI exactly. The URL includes the gateway ID in the path. - -. **Invalid JSON syntax** -+ -Validate your `settings.json` file: -+ -[,bash] ----- -# macOS/Linux -python3 -m json.tool ~/Library/Application\ Support/Cursor/User/settings.json - -# Or use jq -jq . ~/Library/Application\ Support/Cursor/User/settings.json ----- -+ -Fix any syntax errors reported. - -=== Autocomplete not working - -**Symptom**: Inline autocomplete suggestions don't appear or are very slow. - -**Causes and solutions**: - -. **No autocomplete model configured** -+ -Verify `cursor.cpp.defaultModel` is set in `settings.json`: -+ -[,json] ----- -{ - "cursor.cpp.defaultModel": "gpt-5.2-mini" -} ----- - -. **Model too slow** -+ -Use a faster, cost-effective model for autocomplete: -+ -[,json] ----- -{ - "cursor.cpp.defaultModel": "gpt-5.2-mini", - "cursor.cpp.maxTokens": 256 -} ----- -+ -Smaller models like GPT-5.2-mini or Claude Haiku provide faster responses ideal for autocomplete. - -. **Network latency** -+ -Check gateway latency in the observability dashboard. If p95 latency is over 500ms, autocomplete will feel slow. Consider: -+ -* Using a gateway in a closer geographic region -* Switching to a faster model -* Reducing `cursor.cpp.maxTokens` to 256 or lower - -. **Autocomplete disabled in settings** -+ -Verify autocomplete is enabled: -+ -. Open Settings (`Cmd+,` or `Ctrl+,`) -. Search for "cursor autocomplete" -. Ensure "Enable Autocomplete" is checked - -=== MCP tools not appearing - -**Symptom**: Cursor doesn't show tools from MCP servers, or shows error "Too many tools". - -**Causes and solutions**: - -. **40-tool limit exceeded** -+ -Cursor has a hard limit of 40 MCP tools. If your MCP servers expose more than 40 tools combined, enable deferred tool loading in your AI Gateway configuration. -+ -With deferred loading, only 2 tools (search + orchestrator) are sent to Cursor initially, staying well under the limit. - -. **MCP configuration missing** -+ -Verify the `cursor.mcp.servers` section exists in `settings.json`: -+ -[,json] ----- -{ - "cursor.mcp": { - "servers": { - "redpanda-ai-gateway": { - "command": "node", - "args": [/* ... */] - } - } - } -} ----- - -. **No MCP servers in gateway** -+ -Verify your gateway has at least one MCP server configured in the AI Gateway UI. - -. **MCP endpoint unreachable** -+ -Test connectivity to the MCP endpoint: -+ -[,bash] ----- -curl -H "Authorization: Bearer YOUR_API_KEY" \ - /mcp ----- -+ -You should receive a valid MCP protocol response. - -. **Cursor restart needed** -+ -MCP configuration changes require restarting Cursor: -+ -. Close all Cursor windows -. Relaunch Cursor -. Wait for MCP servers to initialize (may take 5-10 seconds) - -=== Requests not appearing in gateway dashboard - -**Symptom**: Cursor works, but requests don't appear in the AI Gateway observability dashboard. - -**Causes and solutions**: - -. **Wrong gateway endpoint** -+ -Verify that `cursor.overrideOpenAIBaseUrl` points to the correct gateway endpoint URL. The gateway ID is embedded in the URL path, so using the wrong endpoint routes requests to a different gateway. - -. **Using direct provider connection** -+ -If `cursor.overrideOpenAIBaseUrl` points directly to a provider (for example, `https://api.openai.com`), requests won't route through the gateway. Verify it points to your gateway endpoint. - -. **Log ingestion delay** -+ -Gateway logs can take 5-10 seconds to appear in the dashboard. Wait briefly and refresh. - -. **Workspace settings override** -+ -Check if `.vscode/settings.json` in your project root overrides global settings with different gateway configuration. - -=== High latency after gateway integration - -**Symptom**: Requests are slower after routing through the gateway. - -**Causes and solutions**: - -. **Gateway geographic distance** -+ -If your gateway is in a different region than you or the upstream provider, this adds network latency. Check gateway region in ADP. - -. **Provider pool failover** -+ -If your gateway is configured with fallback providers, check the logs to see if requests are failing over. Failover adds latency. - -. **Model mismatch** -+ -Verify you're using fast models for autocomplete: -+ -[,json] ----- -{ - "cursor.cpp.defaultModel": "gpt-5.2-mini" // Fast model -} ----- - -. **MCP tool aggregation overhead** -+ -Aggregating tools from multiple MCP servers adds processing time. Use deferred tool loading to reduce this overhead (see xref:ai-gateway:aggregation.adoc[]). - -=== Configuration changes not taking effect - -**Symptom**: Changes to `settings.json` don't apply. - -**Solutions**: - -. **Restart Cursor** -+ -Configuration changes require restarting Cursor: -+ -. Close all Cursor windows -. Relaunch Cursor - -. **Invalid JSON syntax** -+ -Validate JSON syntax: -+ -[,bash] ----- -python3 -m json.tool ~/Library/Application\ Support/Cursor/User/settings.json ----- - -. **Workspace settings overriding** -+ -Check if `.vscode/settings.json` in your project root overrides global settings. - -. **File permissions** -+ -Verify Cursor can read the configuration file: -+ -[,bash] ----- -# macOS -ls -la ~/Library/Application\ Support/Cursor/User/settings.json - -# Linux -ls -la ~/.config/Cursor/User/settings.json ----- -+ -Fix permissions if needed: -+ -[,bash] ----- -chmod 600 ~/Library/Application\ Support/Cursor/User/settings.json ----- - -== Cost optimization tips - -=== Use different models for chat and autocomplete - -Chat interactions benefit from reasoning depth, while autocomplete needs speed: - -[,json] ----- -{ - "cursor.chat.defaultModel": "claude-sonnet-4.5", - "cursor.cpp.defaultModel": "gpt-5.2-mini" -} ----- - -This can reduce costs by 5-10x for autocomplete while maintaining quality for chat. - -=== Limit token usage - -Reduce the maximum tokens for autocomplete to prevent runaway costs: - -[,json] ----- -{ - "cursor.cpp.maxTokens": 256, - "cursor.chat.maxTokens": 2048 -} ----- - -Autocomplete rarely needs more than 256 tokens, while chat responses can vary. - -=== Use MCP tools for documentation - -Instead of pasting large documentation into chat, create MCP tools that fetch relevant sections on-demand. This reduces token costs by including only needed information. - -=== Monitor usage patterns - -Use the AI Gateway dashboard to identify optimization opportunities: - -. Navigate to your gateway's observability dashboard -. Filter by Cursor requests (use custom header if configured) -. Analyze: -** Token usage per request type (chat vs autocomplete) -** Most expensive queries -** High-frequency low-value requests - -=== Team-based cost attribution - -Use custom headers to track costs per developer or team: - -[,json] ----- -{ - "openai.additionalHeaders": { - "x-user-id": "${USER_EMAIL}", - "x-team": "backend" - } -} ----- - -Generate team-specific cost reports from the gateway dashboard. - -=== Enable deferred MCP tool loading - -Configure deferred tool loading to reduce token costs by 80-90%: - -. Navigate to your gateway configuration -. Enable *Deferred Tool Loading* under MCP Settings -. Save configuration - -This sends only search + orchestrator tools initially, reducing token usage significantly. - -== Next steps - -* xref:ai-gateway:aggregation.adoc[] - -== Related pages - -* xref:ai-gateway:gateway-quickstart.adoc[]: Create and configure your AI Gateway -* xref:ai-gateway/gateway-architecture.adoc[]: Learn about AI Gateway architecture and benefits -* xref:ai-gateway/integrations/claude-code-user.adoc[]: Configure Claude Code with AI Gateway -* xref:ai-gateway/integrations/continue-user.adoc[]: Configure Continue.dev with AI Gateway -* xref:ai-gateway/integrations/cline-user.adoc[]: Configure Cline with AI Gateway diff --git a/modules/integrations/partials/integrations/github-copilot-admin.adoc b/modules/integrations/partials/integrations/github-copilot-admin.adoc deleted file mode 100644 index bb157af..0000000 --- a/modules/integrations/partials/integrations/github-copilot-admin.adoc +++ /dev/null @@ -1,688 +0,0 @@ -= Configure AI Gateway for GitHub Copilot -:description: Configure Redpanda AI Gateway to support GitHub Copilot clients. -:page-topic-type: how-to -:personas: platform_engineer -:learning-objective-1: Configure AI Gateway endpoints for GitHub Copilot connectivity -:learning-objective-2: Deploy multi-tenant authentication strategies for Copilot clients -:learning-objective-3: Set up model aliasing and BYOK provider pools for GitHub Copilot - -Configure Redpanda AI Gateway to support GitHub Copilot clients accessing multiple LLM providers through OpenAI-compatible endpoints with bring-your-own-key (BYOK) support. - -After reading this page, you will be able to: - -* [ ] Configure AI Gateway endpoints for GitHub Copilot connectivity. -* [ ] Deploy multi-tenant authentication strategies for Copilot clients. -* [ ] Set up model aliasing and BYOK provider pools for GitHub Copilot. - -== Prerequisites - -* AI Gateway deployed on a BYOC environment running Redpanda version 25.3 or later -* Administrator access to the AI Gateway UI -* API keys for at least one LLM provider (OpenAI, Anthropic, or others) -* Understanding of xref:ai-gateway/gateway-architecture.adoc[AI Gateway concepts] -* GitHub Copilot Business or Enterprise subscription (for BYOK and custom endpoints) - -== About GitHub Copilot - -GitHub Copilot is an AI-powered code completion tool that integrates with popular IDEs including VS Code, Visual Studio, JetBrains IDEs, and Neovim. GitHub Copilot uses OpenAI models by default but supports BYOK (bring your own key) configurations for Business and Enterprise customers. - -Key characteristics: - -* Sends all requests in OpenAI-compatible format to `/v1/chat/completions` -* Limited support for custom headers (similar to Cursor IDE) -* Supports BYOK for Business/Enterprise subscriptions -* Built-in code completion, chat, and inline editing modes -* Configuration through IDE settings or organization policies -* High request volume from code completion features - -== Architecture overview - -GitHub Copilot connects to AI Gateway through standardized endpoints: - -* LLM endpoint: `https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1/chat/completions` for all providers -* MCP endpoint support: Limited (GitHub Copilot does not natively support MCP protocol) - -The gateway handles: - -. Authentication through bearer tokens in the `Authorization` header -. Gateway selection through URL path or query parameters -. Model aliasing for friendly names -. Format transforms from OpenAI format to provider-native formats -. Request logging and cost tracking per gateway -. BYOK setup for different teams or users - -== Enable LLM providers - -GitHub Copilot works with multiple providers through OpenAI-compatible transforms. Enable the providers your users will access. - -=== Configure OpenAI (default provider) - -GitHub Copilot uses OpenAI by default. To enable OpenAI through the gateway: - -. Navigate to *AI Gateway* > *Providers* in ADP -. Select *OpenAI* from the provider list -. Click *Add configuration* -. Enter your OpenAI API key -. Under *Format*, select *Native OpenAI* -. Click *Save* - -=== Configure Anthropic with OpenAI-compatible format - -For BYOK deployments, you can route GitHub Copilot to Anthropic models. Configure the gateway to transform requests: - -. Navigate to *AI Gateway* > *Providers* -. Select *Anthropic* from the provider list -. Click *Add configuration* -. Enter your Anthropic API key -. Under *Format*, select *OpenAI-compatible* (enables automatic transform) -. Click *Save* - -The gateway now transforms OpenAI-format requests to Anthropic's native `/v1/messages` format. - -=== Configure additional providers - -GitHub Copilot supports multiple providers through OpenAI-compatible transforms. For each provider: - -. Add the provider configuration in the gateway -. Set the format to *OpenAI-compatible* (the gateway handles format transformation) -. Enable the transform layer to convert OpenAI request format to the provider's native format - -Common additional providers: - -* Google Gemini (requires OpenAI-compatible transform) -* Mistral AI (already OpenAI-compatible format) -* Azure OpenAI (already OpenAI-compatible format) - -=== Enable models in the catalog - -After enabling providers, enable specific models: - -. Navigate to *AI Gateway* > *Models* -. Enable the models you want GitHub Copilot clients to access -+ -Common models for GitHub Copilot: -+ -* `gpt-5.2` (OpenAI) -* `gpt-5.2-mini` (OpenAI) -* `o1-mini` (OpenAI) -* `claude-sonnet-4.5` (Anthropic, requires alias) - -. Click *Save* - -GitHub Copilot typically uses model names without vendor prefixes. You'll configure model aliasing in the next section to map friendly names to provider-specific models. - -== Create a gateway for GitHub Copilot clients - -Create a dedicated gateway to isolate GitHub Copilot traffic and apply specific policies. - -=== Gateway configuration - -. Navigate to *Agentic* > *AI Gateway* > *Gateways* -. Click *Create Gateway* -. Enter gateway details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`github-copilot-gateway` (or your preferred name) - -|Workspace -|Select the workspace for access control grouping - -|Description -|Gateway for GitHub Copilot clients -|=== - -. Click *Create* -. Copy the gateway ID from the gateway details page - -The gateway ID is required for sending requests to this gateway. - -=== Configure model aliasing - -GitHub Copilot expects model names like `gpt-5.2` without vendor prefixes. Configure aliases to map these to provider-specific models: - -. Navigate to the gateway's *Models* tab -. Click *Add Model Alias* -. Configure aliases: -+ -[cols="1,2,1"] -|=== -|Alias Name |Target Model |Provider - -|`gpt-5.2` -|`openai/gpt-5.2` -|OpenAI - -|`gpt-5.2-mini` -|`openai/gpt-5.2-mini` -|OpenAI - -|`claude-sonnet` -|`anthropic/claude-sonnet-4.5` -|Anthropic - -|`o1-mini` -|`openai/o1-mini` -|OpenAI -|=== - -. Click *Save* - -When GitHub Copilot requests `gpt-5.2`, the gateway routes to OpenAI's `gpt-5.2` model. Users can optionally request `claude-sonnet` for Anthropic models if the IDE configuration supports model selection. - -=== Configure provider pools with fallback - -GitHub Copilot sends all requests to a single endpoint (`/v1/chat/completions`). Configure a primary provider pool with optional fallback: - -. Navigate to the gateway's *LLM* tab -. Under *Provider pools*, click *Add pool* -. Configure the primary provider pool: -+ -* Provider: OpenAI -* Model: All enabled OpenAI models -* Transform: None (already OpenAI format) -* Load balancing: Round robin (if multiple OpenAI configurations exist) - -. (Optional) Add a fallback pool (for example, Anthropic) with the OpenAI-to-Anthropic transform. -. Click *Save* - -The gateway automatically fails over to the fallback pool when the primary returns 429, times out, or returns a 5xx error. - -=== Apply rate limits - -Prevent runaway usage from GitHub Copilot clients. Code completion features generate very high request volumes. - -. Navigate to the gateway's *LLM* tab -. Under *Rate Limit*, configure: -+ -[cols="1,2"] -|=== -|Setting |Recommended Value - -|Global rate limit -|300 requests per minute - -|Per-user rate limit -|30 requests per minute (if using user identification) -|=== - -. Click *Save* - -The gateway blocks requests exceeding these limits and returns HTTP 429 errors. - -==== Rate limit considerations for code completion - -GitHub Copilot's code completion feature generates extremely frequent requests (potentially dozens per minute per user). Consider: - -* Higher global rate limits than other AI coding assistants -* Separate rate limits for different request types if the gateway supports request classification -* Monitoring initial usage patterns to adjust limits appropriately - -=== Set spending limits - -Control LLM costs across all providers: - -. Under *Spend Limit*, configure: -+ -[cols="1,2"] -|=== -|Setting |Value - -|Monthly budget -|$10,000 (adjust based on expected usage) - -|Enforcement -|Block requests after budget exceeded - -|Alert threshold -|80% of budget (sends notification) -|=== - -. Click *Save* - -The gateway tracks estimated costs per request across all providers and blocks traffic when the monthly budget is exhausted. - -== Configure authentication - -GitHub Copilot clients authenticate using bearer tokens in the `Authorization` header. - -=== Generate API tokens - -. Navigate to *Security* > *API Tokens* in ADP -. Click *Create Token* -. Enter token details: -+ -[cols="1,2"] -|=== -|Field |Value - -|Name -|`copilot-access` - -|Scopes -|`ai-gateway:read`, `ai-gateway:write` - -|Expiration -|Set appropriate expiration based on security policies -|=== - -. Click *Create* -. Copy the token (it appears only once) - -Distribute this token to GitHub Copilot administrators through secure channels for organization-level configuration. - -=== Token rotation - -Implement token rotation for security: - -. Create a new token before the existing token expires -. Update organization-level GitHub Copilot configuration with the new token -. Monitor usage of the old token in (observability dashboard) -. Revoke the old token after the configuration update propagates - -== Multi-tenant deployment strategies - -GitHub Copilot has limited support for custom headers. The gateway ID is now embedded in the URL path, simplifying multi-tenancy. Use one of these strategies for BYOK deployments. - -=== Strategy 1: OAI Compatible Provider extension (recommended) - -For organizations using VS Code with GitHub Copilot, the OAI Compatible Provider extension enables custom headers for additional metadata. - -==== Install the extension - -. Navigate to VS Code Extensions Marketplace -. Search for "OAI Compatible Provider" -. Install the extension -. Restart VS Code - -==== Configure the extension - -. Open VS Code settings (JSON) -. Add gateway configuration: -+ -[source,json] ----- -{ - "oai-compatible-provider.providers": [ - { - "name": "Redpanda AI Gateway", - "baseUrl": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1", - "headers": { - "Authorization": "Bearer YOUR_API_TOKEN" - }, - "models": [ - "gpt-5.2", - "gpt-5.2-mini", - "claude-sonnet" - ] - } - ] -} ----- - -. Replace: -+ -* `{CLUSTER_ID}`: Your Redpanda cluster ID -* `YOUR_API_TOKEN`: Team-specific API token - -This approach allows true multi-tenancy with proper gateway isolation per team. - -**Benefits:** - -* Clean separation between tenants -* Standard authentication flow -* Works with any IDE supported by the extension - -**Limitations:** - -* Requires VS Code and extension installation -* Not available for all GitHub Copilot-supported IDEs -* Users must configure extension in addition to GitHub Copilot - -=== Strategy 2: Single-tenant mode - -For simpler deployments, configure a single gateway with shared access: - -. Create one gateway for all GitHub Copilot users -. Generate a shared API token -. Configure GitHub Copilot at the organization level -. Use rate limits and spending limits to control overall usage - -**Benefits:** - -* Simplest configuration -* No multi-tenant complexity -* Easy to manage - -**Limitations:** - -* No per-team cost tracking or limits -* Shared rate limits may impact individual teams -* All users have the same model access - -=== Choosing a multi-tenant strategy - -[cols="1,2,2,1"] -|=== -|Strategy |Pros |Cons |Best For - -|OAI Compatible Provider -|Clean tenant separation, custom headers -|Requires extension, VS Code only -|Organizations standardized on VS Code - -|Single-tenant -|Simplest configuration and management -|No per-team isolation or limits -|Small organizations, proof of concept -|=== - -== Configure GitHub Copilot clients - -Provide these instructions based on your chosen multi-tenant strategy. - -=== Organization-level configuration (GitHub Enterprise) - -For GitHub Enterprise customers, configure Copilot at the organization level: - -. Navigate to your organization settings on GitHub -. Go to *Copilot* > *Policies* -. Enable *Allow use of Copilot with custom models* -. Configure the custom endpoint: -+ -[source,json] ----- -{ - "api_base_url": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1", - "api_key": "YOUR_API_TOKEN" -} ----- - -This configuration applies to all users in the organization. - -=== IDE-specific configuration (individual users) - -For individual users or when organization-level configuration is not available: - -==== VS Code configuration - -. Open VS Code settings -. Search for "GitHub Copilot" -. Configure custom endpoint (if using OAI Compatible Provider): -+ -[source,json] ----- -{ - "github.copilot.advanced": { - "endpoint": "https://{CLUSTER_ID}.cloud.redpanda.com/ai-gateway/v1" - } -} ----- - -==== JetBrains IDEs - -. Open IDE Settings -. Navigate to *Tools* > *GitHub Copilot* -. Configure custom endpoint (support varies by IDE and Copilot version) - -==== Neovim - -. Edit Copilot configuration -. Add custom endpoint in the Copilot.vim or Copilot.lua configuration -. Refer to the Copilot.vim documentation for exact syntax - -=== Model selection - -Configure model preferences based on use case: - -[cols="1,2,1"] -|=== -|Use Case |Recommended Model |Reason - -|Code completion -|`gpt-5.2-mini` -|Fast, cost-effective for frequent requests - -|Code explanation -|`gpt-5.2` or `claude-sonnet` -|Higher quality for complex explanations - -|Code generation -|`gpt-5.2` or `claude-sonnet` -|Better at generating complete functions - -|Documentation -|`gpt-5.2-mini` -|Sufficient quality for docstrings and comments -|=== - -Model selection is typically configured at the organization level or through IDE settings. - -== Monitor GitHub Copilot usage - -Track GitHub Copilot activity through gateway observability features. - -=== View request logs - -. Navigate to *AI Gateway* > *Observability* > *Logs* -. Filter by gateway ID: `github-copilot-gateway` -. Review: -+ -* Request timestamps and duration -* Model used per request (including aliases) -* Token usage (prompt and completion tokens) -* Estimated cost per request -* HTTP status codes and errors -* Transform operations (OpenAI to provider-native format) - -GitHub Copilot generates distinct request patterns: - -* Code completion: Very high volume, short requests with low token counts -* Chat/explain: Medium volume, longer requests with code context -* Code generation: Lower volume, variable length requests - -=== Analyze metrics - -. Navigate to *AI Gateway* > *Observability* > *Metrics* -. Select the GitHub Copilot gateway -. Review: -+ -[cols="1,2"] -|=== -|Metric |Purpose - -|Request volume by model -|Identify most-used models through aliases - -|Token usage by model -|Track consumption patterns (completion vs chat) - -|Estimated spend by provider -|Monitor costs across providers with transforms - -|Latency (p50, p95, p99) -|Detect transform overhead and performance issues - -|Error rate by provider -|Identify failing providers or transform issues - -|Transform success rate -|Monitor OpenAI-to-provider format conversion success - -|Requests per user/tenant -|Track usage by team (if using multi-tenant strategies) -|=== - - -=== Query logs through API - -Programmatically access logs for integration with monitoring systems: - -[source,bash] ----- -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ - -H "Authorization: Bearer YOUR_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "gateway_id": "GATEWAY_ID", - "start_time": "2026-01-01T00:00:00Z", - "end_time": "2026-01-14T23:59:59Z", - "limit": 100 - }' ----- - -== Security considerations - -Apply these security best practices for GitHub Copilot deployments. - -=== Limit token scope - -Create tokens with minimal required scopes: - -* `ai-gateway:read`: Required for model discovery -* `ai-gateway:write`: Required for LLM requests - -Avoid granting broader scopes like `admin` or `cluster:write`. - -=== Implement network restrictions - -If GitHub Copilot clients connect from known networks, configure network policies: - -. Use cloud provider security groups to restrict access to AI Gateway endpoints -. Allowlist only the IP ranges where GitHub Copilot clients operate -. Monitor for unauthorized access attempts in request logs - -=== Enforce token expiration - -Set short token lifetimes for high-security environments: - -* Development environments: 90 days -* Production environments: 30 days - -Automate token rotation to reduce manual overhead. Coordinate with GitHub organization administrators when rotating tokens. - -=== Monitor transform operations - -Because GitHub Copilot may route to non-OpenAI providers through transforms: - -. Review transform success rates in metrics -. Monitor for transform failures that may leak request details -. Test transforms thoroughly before production deployment -. Keep transform logic updated as provider APIs evolve - -=== Audit model access - -Review which models GitHub Copilot clients can access: - -. Periodically audit enabled models and aliases -. Remove deprecated or unused model configurations -. Monitor model usage logs for unexpected patterns -. Ensure cost-effective models are used for high-volume completion requests - -=== Code completion security - -GitHub Copilot sends code context to LLM providers. Ensure: - -* Users understand what code context is sent with requests -* Proprietary code may be included in prompts -* Configure organization policies to limit code sharing if needed -* Review provider data retention policies -* Monitor logs for sensitive information in prompts (if logging includes prompt content) - -=== Organization-level controls - -For GitHub Enterprise customers: - -. Use organization-level policies to enforce custom endpoint usage -. Restrict which users can configure custom endpoints -. Monitor organization audit logs for configuration changes -. Implement approval workflows for endpoint changes - -== Troubleshooting - -Common issues and solutions when configuring AI Gateway for GitHub Copilot. - -=== GitHub Copilot cannot connect to gateway - -Symptom: Connection errors when GitHub Copilot tries to send requests. - -Causes and solutions: - -* **Invalid base URL**: Verify the configured endpoint matches the gateway URL -* **Expired token**: Generate a new API token and update the GitHub Copilot configuration -* **Network connectivity**: Verify the cluster endpoint is accessible from the client network -* **Provider not enabled**: Ensure at least one provider is enabled and has models in the catalog -* **TLS issues**: Verify the cluster has valid TLS certificates -* **Organization policy blocking custom endpoints**: Check GitHub organization settings - -=== Model not found errors - -Symptom: GitHub Copilot shows "model not found" or similar errors. - -Causes and solutions: - -* **Model not enabled in catalog**: Enable the model in the gateway's model catalog -* **Model alias missing**: Create an alias for the model name GitHub Copilot expects (for example, `gpt-5.2`) -* **Incorrect model name**: Verify GitHub Copilot is requesting a model name that exists in your aliases - -=== Transform errors or unexpected responses - -Symptom: Responses are malformed or GitHub Copilot reports format errors. - -Causes and solutions: - -* **Transform disabled**: Ensure OpenAI-compatible transform is enabled for non-OpenAI providers (for example, Anthropic) -* **Transform version mismatch**: Verify the transform is compatible with the current provider API version -* **Model-specific transform issues**: Some models may require specific transform configurations -* **Check transform logs**: Review logs for transform errors and stack traces -* **Response format incompatibility**: Verify the provider's response can be transformed to OpenAI format - -=== High costs or token usage - -Symptom: Token usage and costs exceed expectations. - -Causes and solutions: - -* **Code completion using expensive model**: Configure completion to use `gpt-5.2-mini` instead of larger models -* **No rate limits**: Apply per-minute rate limits to prevent runaway usage -* **Missing spending limits**: Set monthly budget limits with blocking enforcement -* **Chat using wrong model**: Ensure chat/explanation features use cost-effective models -* **Transform overhead**: Monitor if transforms add significant token overhead -* **High completion request volume**: Expected behavior, adjust budgets or implement stricter rate limits - -=== Requests failing with 429 errors - -Symptom: GitHub Copilot receives HTTP 429 Too Many Requests errors. - -Causes and solutions: - -* **Rate limit exceeded**: Review and increase rate limits if usage is legitimate (code completion needs very high limits) -* **Upstream provider rate limits**: Check if the upstream LLM provider is rate-limiting; configure failover to alternate providers -* **Budget exhausted**: Verify monthly spending limit has not been reached -* **Per-user limits too restrictive**: Adjust per-user rate limits if using multi-tenant strategies -* **Spike in usage**: Code completion can generate sudden usage spikes, consider burstable rate limits - -=== Multi-tenant authorization failures - -Symptom: Requests fail authorization or hit the wrong gateway. - -Causes and solutions: - -* **Wrong gateway endpoint**: Verify each team uses the gateway URL that matches their tokens -* **Organization policy override**: Verify GitHub organization settings aren't overriding user configurations -* **Extension not configured**: If using OAI Compatible Provider extension, verify proper installation and configuration - -=== Performance issues - -Symptom: Slow response times from GitHub Copilot. - -Causes and solutions: - -* **Transform latency**: Monitor metrics for transform processing time overhead -* **Provider latency**: Check latency metrics by provider to identify slow backends -* **Network latency**: Verify cluster is in a region with good connectivity to users -* **Cold start delays**: Some providers may have cold start latency on first request -* **Rate limiting overhead**: Check if rate limit enforcement is adding latency diff --git a/modules/integrations/partials/integrations/index.adoc b/modules/integrations/partials/integrations/index.adoc deleted file mode 100644 index 8150fff..0000000 --- a/modules/integrations/partials/integrations/index.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= AI Gateway Integrations -:description: Configure AI development tools and IDEs to connect to Redpanda AI Gateway for centralized LLM routing and MCP tool aggregation. -:page-layout: index - diff --git a/modules/mcp/pages/index.adoc b/modules/mcp/pages/index.adoc deleted file mode 100644 index ed0a8f1..0000000 --- a/modules/mcp/pages/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= MCP Servers -:description: Connect AI agents to your databases, queues, CRMs, and other business systems through MCP servers managed by the Agentic Data Plane. -:page-layout: index diff --git a/modules/mcp/pages/managed/index.adoc b/modules/mcp/pages/managed/index.adoc deleted file mode 100644 index 0d28037..0000000 --- a/modules/mcp/pages/managed/index.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= Managed Catalog -:page-layout: index -:description: Managed MCP servers Redpanda hosts in-process, with deep-dive guides for the most common types. - diff --git a/modules/observability/pages/byoa-telemetry.adoc b/modules/monitor/pages/byoa-telemetry.adoc similarity index 86% rename from modules/observability/pages/byoa-telemetry.adoc rename to modules/monitor/pages/byoa-telemetry.adoc index e1fe9de..a7018ff 100644 --- a/modules/observability/pages/byoa-telemetry.adoc +++ b/modules/monitor/pages/byoa-telemetry.adoc @@ -8,7 +8,7 @@ A *BYOA* (Bring Your Own Agent) is an agent you operate yourself, outside Redpanda's managed runtime. To make it visible across transcripts, the governance dashboard, cost rollups, and the agents list, your agent must emit OpenTelemetry traces with a specific minimum set of resource attributes and span attributes. This page documents that contract. -For the full OTLP ingestion flow (deploying the Connect pipeline, authenticating, sending traces over HTTP or gRPC), see xref:observability:ingest-custom-traces.adoc[Ingest custom traces]. This page focuses on _what_ to emit; that page covers _how_ to send it. +For the full OTLP ingestion flow (deploying the Connect pipeline, authenticating, sending traces over HTTP or gRPC), see xref:monitor:ingest-custom-traces.adoc[Ingest custom traces]. This page focuses on _what_ to emit; that page covers _how_ to send it. After reading this page, you will be able to: @@ -84,7 +84,7 @@ The dashboard degrades gracefully without these, but their presence lets the UI |Cache-hit token count on LLM-call spans. Surfaces in the *CACHED* bucket on the dashboard and in per-turn cost. Without it, the *CACHED* bucket reads `0` even when your agent reuses a system prompt that the upstream cached. |`gen_ai.input.messages` and `gen_ai.output.messages` -|Conversation content. Used to reconstruct turn content, and required for transcript history reconstruction when older spans are evicted from `redpanda.otel_traces` (see xref:observability:concepts.adoc#history-reconstruction[Reconstructed transcript history]). Without them, evicted spans render as empty turns rather than reconstructed turns. +|Conversation content. Used to reconstruct turn content, and required for transcript history reconstruction when older spans are evicted from `redpanda.otel_traces` (see xref:monitor:concepts.adoc#history-reconstruction[Reconstructed transcript history]). Without them, evicted spans render as empty turns rather than reconstructed turns. |=== [NOTE] @@ -94,7 +94,7 @@ Latency and timestamps come from OTel span `start_time` and `end_time` automatic == Span hierarchy -Transcripts read your agent's span tree to lay out turns. The recognized span types (matched by `gen_ai.operation.name` and span name) are documented in xref:observability:concepts.adoc[Observability]. The four span shapes are: +Transcripts read your agent's span tree to lay out turns. The recognized span types (matched by `gen_ai.operation.name` and span name) are documented in xref:monitor:concepts.adoc[Observability]. The four span shapes are: * *Top-level span*: One per agent invocation. Sets `gen_ai.operation.name = "invoke_agent"`, carries the conversation ID and service name. * *Reasoning or chat spans*: Set `gen_ai.operation.name = "chat"` for LLM calls. Carry the model, token counts, and provider attributes. @@ -118,13 +118,13 @@ If any field shows blank or zero unexpectedly, the corresponding attribute is mi BYOA agents authenticate against the OTLP ingest endpoint with a service-account access token from your organization. Send the token in `Authorization: Bearer ` (HTTP) or `authorization: Bearer ` (gRPC). -For the token-acquisition flow and endpoint URL format, see xref:observability:ingest-custom-traces.adoc[Ingest custom traces]. +For the token-acquisition flow and endpoint URL format, see xref:monitor:ingest-custom-traces.adoc[Ingest custom traces]. // TODO: confirm the standalone-ADP service-account auth model for OTLP ingest once the standalone product ships. The current page assumes federation in Redpanda Cloud, where service-account credentials come from Cloud Organization IAM. For standalone ADP, replace with the ADP-native auth model and update the cross-link. == Where to find code examples -The xref:observability:ingest-custom-traces.adoc[Ingest custom traces] page has full HTTP and gRPC examples in *Python*, *Node.js*, and *Go*, each instrumenting an LLM call with the GenAI semantic-convention attributes. Adapt the examples to your agent's framework. The attribute set is the same; only the OTel SDK ergonomics differ. +The xref:monitor:ingest-custom-traces.adoc[Ingest custom traces] page has full HTTP and gRPC examples in *Python*, *Node.js*, and *Go*, each instrumenting an LLM call with the GenAI semantic-convention attributes. Adapt the examples to your agent's framework. The attribute set is the same; only the OTel SDK ergonomics differ. // TODO: once the BYOA tutorials track ships at GA, link a worked end-to-end BYOA agent example here (from the Examples folder or a tutorials page). @@ -149,15 +149,15 @@ Common symptoms and fixes: |`gen_ai.operation.name = "execute_tool"` is missing on the tool span. Also confirm the tool span is parented to an assistant span, not the root. |Agent shows up in transcripts but not in the governance dashboard's Agents list -|Transcripts attribute by `service.name` resource attribute; the dashboard attributes by registered agent resource. BYOA agent registration ships separately. See xref:agents:byoa-register.adoc[Register your own agent (BYOA)]. +|Transcripts attribute by `service.name` resource attribute; the dashboard attributes by registered agent resource. BYOA agent registration ships separately. See xref:connect:byoa-register.adoc[Register your own agent (BYOA)]. |Older turns in a long conversation render as `is_reconstructed` -|Spans were evicted from `redpanda.otel_traces` retention. Reconstruction works only if your agent emitted `gen_ai.input.messages` and `gen_ai.output.messages` on later spans. See xref:observability:concepts.adoc#history-reconstruction[Reconstructed transcript history]. +|Spans were evicted from `redpanda.otel_traces` retention. Reconstruction works only if your agent emitted `gen_ai.input.messages` and `gen_ai.output.messages` on later spans. See xref:monitor:concepts.adoc#history-reconstruction[Reconstructed transcript history]. |=== == Related topics -* xref:observability:ingest-custom-traces.adoc[Ingest custom traces] -* xref:observability:concepts.adoc[Observability] -* xref:observability:transcripts.adoc[Read a transcript] -* xref:agents:byoa-register.adoc[Register your own agent (BYOA)] +* xref:monitor:ingest-custom-traces.adoc[Ingest custom traces] +* xref:monitor:concepts.adoc[Observability] +* xref:monitor:transcripts.adoc[Read a transcript] +* xref:connect:byoa-register.adoc[Register your own agent (BYOA)] diff --git a/modules/observability/pages/concepts.adoc b/modules/monitor/pages/concepts.adoc similarity index 99% rename from modules/observability/pages/concepts.adoc rename to modules/monitor/pages/concepts.adoc index ffd230b..deefd31 100644 --- a/modules/observability/pages/concepts.adoc +++ b/modules/monitor/pages/concepts.adoc @@ -340,6 +340,6 @@ Trace data on `redpanda.otel_traces` is subject to a retention policy. When a tr == Next steps -* xref:observability:transcripts.adoc[] -* xref:agents:monitor.adoc[] -* xref:mcp:test-tools.adoc[] +* xref:monitor:transcripts.adoc[] +* xref:monitor:monitor-agents.adoc[] +* xref:connect:test-tools.adoc[] diff --git a/modules/monitor/pages/index.adoc b/modules/monitor/pages/index.adoc new file mode 100644 index 0000000..cc98d52 --- /dev/null +++ b/modules/monitor/pages/index.adoc @@ -0,0 +1,3 @@ += Monitor & debug +:description: See what your agents did, investigate runs, and check speed, cost, and errors. +:page-layout: index diff --git a/modules/observability/pages/ingest-custom-traces.adoc b/modules/monitor/pages/ingest-custom-traces.adoc similarity index 97% rename from modules/observability/pages/ingest-custom-traces.adoc rename to modules/monitor/pages/ingest-custom-traces.adoc index e49dfe5..6bf2e3d 100644 --- a/modules/observability/pages/ingest-custom-traces.adoc +++ b/modules/monitor/pages/ingest-custom-traces.adoc @@ -19,7 +19,7 @@ After reading this page, you will be able to: * A Redpanda Connect pipeline host (today: a Redpanda BYOC environment with Connect enabled). Ability to manage secrets on that host. // TODO: Replace with the standalone-ADP ingestion target once defined (may no longer require a Redpanda Cloud cluster). -* The latest version of xref:get-started:rpk-install.adoc[`rpk`] installed +* The latest version of xref:reference:rpk-install.adoc[`rpk`] installed * Custom agent or application instrumented with OpenTelemetry SDK * Basic understanding of the https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-agent-spans/[OpenTelemetry span format^] and https://opentelemetry.io/docs/specs/otlp/[OpenTelemetry Protocol (OTLP)^] @@ -60,7 +60,7 @@ For non-LangChain applications or custom instrumentation, continue with the sect Custom agents are applications with OpenTelemetry instrumentation that operate independently of Redpanda's Remote MCP servers or declarative agents (such as LangChain, CrewAI, or manually instrumented applications). -When these agents send traces to `redpanda.otel_traces`, you gain unified observability alongside Remote MCP server and declarative agent traces. See xref:observability:concepts.adoc#cross-service-transcripts[Cross-service transcripts] for details on how traces correlate across services. +When these agents send traces to `redpanda.otel_traces`, you gain unified observability alongside Remote MCP server and declarative agent traces. See xref:monitor:concepts.adoc#cross-service-transcripts[Cross-service transcripts] for details on how traces correlate across services. === Trace format requirements @@ -96,7 +96,7 @@ Optional but recommended fields: - `parentSpanId` for hierarchical traces - `attributes` for contextual information -For complete trace structure details, see xref:observability:concepts.adoc#understand-the-transcript-structure[Understand the transcript structure]. +For complete trace structure details, see xref:monitor:concepts.adoc#understand-the-transcript-structure[Understand the transcript structure]. == Configure the ingestion pipeline @@ -573,7 +573,7 @@ After your custom agent sends traces through the pipeline, they appear in the *T ==== Identify custom agent transcripts -Custom agent transcripts are identified by the `service.name` resource attribute, which differs from Redpanda's built-in services (`ai-agent` for declarative agents, `mcp-{server-id}` for MCP servers). See xref:observability:concepts.adoc#cross-service-transcripts[Cross-service transcripts] to understand how the `service.name` attribute identifies transcript sources. +Custom agent transcripts are identified by the `service.name` resource attribute, which differs from Redpanda's built-in services (`ai-agent` for declarative agents, `mcp-{server-id}` for MCP servers). See xref:monitor:concepts.adoc#cross-service-transcripts[Cross-service transcripts] to understand how the `service.name` attribute identifies transcript sources. Your custom agent transcripts display with: @@ -581,7 +581,7 @@ Your custom agent transcripts display with: * **Agent name** in span details (from the `gen_ai.agent.name` attribute) * **Operation names** like `"invoke_agent my-assistant"` indicating agent executions -For detailed instructions on filtering, searching, and navigating transcripts in the UI, see xref:observability:transcripts.adoc[View Transcripts]. +For detailed instructions on filtering, searching, and navigating transcripts in the UI, see xref:monitor:transcripts.adoc[View Transcripts]. ==== Token usage tracking @@ -619,7 +619,7 @@ If requests succeed but traces do not appear in `redpanda.otel_traces`: == Next steps -* xref:observability:transcripts.adoc[] -* xref:agents:monitor.adoc[] +* xref:monitor:transcripts.adoc[] +* xref:monitor:monitor-agents.adoc[] * xref:connect:components:inputs/otlp_http.adoc[OTLP HTTP input reference] * xref:connect:components:inputs/otlp_grpc.adoc[OTLP gRPC input reference] diff --git a/modules/observability/pages/logs.adoc b/modules/monitor/pages/logs.adoc similarity index 100% rename from modules/observability/pages/logs.adoc rename to modules/monitor/pages/logs.adoc diff --git a/modules/observability/pages/metrics.adoc b/modules/monitor/pages/metrics.adoc similarity index 100% rename from modules/observability/pages/metrics.adoc rename to modules/monitor/pages/metrics.adoc diff --git a/modules/agents/pages/monitor.adoc b/modules/monitor/pages/monitor-agents.adoc similarity index 94% rename from modules/agents/pages/monitor.adoc rename to modules/monitor/pages/monitor-agents.adoc index d0709c4..fe02d96 100644 --- a/modules/agents/pages/monitor.adoc +++ b/modules/monitor/pages/monitor-agents.adoc @@ -14,18 +14,18 @@ After reading this page, you will be able to: * [ ] {learning-objective-2} * [ ] {learning-objective-3} -For conceptual background on traces and observability, see xref:observability:concepts.adoc[]. +For conceptual background on traces and observability, see xref:monitor:concepts.adoc[]. == Prerequisites -You must have a running agent. If you do not have one, see xref:agents:quickstart.adoc[]. +You must have a running agent. If you do not have one, see xref:get-started:quickstart.adoc[]. == Debug agent execution with Transcripts The *Transcripts* view shows execution traces with detailed timing, errors, and performance metrics. Use this view to debug issues, verify agent behavior, and monitor performance in real-time. :context: agent -include::observability:partial$transcripts-ui-guide.adoc[] +include::monitor:partial$transcripts-ui-guide.adoc[] === Check agent health @@ -71,7 +71,7 @@ Cost = (input_tokens x input_price) + (output_tokens x output_price) Example: GPT-5.2 with 4,302 input tokens and 1,340 output tokens at $0.00000175 per input token and $0.000014 per output token costs $0.026 per request. -For cost optimization strategies, see xref:agents:concepts.adoc#cost-calculation[Cost calculation]. +For cost optimization strategies, see xref:connect:concepts.adoc#cost-calculation[Cost calculation]. == Test agent behavior with Inspector @@ -94,5 +94,5 @@ Monitor iteration counts during complex requests to ensure they complete within == Next steps -* xref:observability:concepts.adoc[] -* xref:troubleshoot/troubleshoot-ai-agents.adoc[] +* xref:monitor:concepts.adoc[] +* xref:monitor:troubleshoot-ai-agents.adoc[] diff --git a/modules/observability/pages/transcripts.adoc b/modules/monitor/pages/transcripts.adoc similarity index 92% rename from modules/observability/pages/transcripts.adoc rename to modules/monitor/pages/transcripts.adoc index 96fb89a..fd29daf 100644 --- a/modules/observability/pages/transcripts.adoc +++ b/modules/monitor/pages/transcripts.adoc @@ -8,7 +8,7 @@ Use the Transcripts view to read a complete record of an agent or MCP server execution, turn by turn. Each transcript captures the conversation between the user, the agent, any LLM calls, and any tools it invoked, along with token usage, USD cost, latency, and any errors. -For conceptual background on the underlying OpenTelemetry data model, see xref:observability:concepts.adoc[]. +For conceptual background on the underlying OpenTelemetry data model, see xref:monitor:concepts.adoc[]. After reading this page, you will be able to: @@ -18,7 +18,7 @@ After reading this page, you will be able to: == Prerequisites -* xref:agents:create-agent.adoc[Running agent] or xref:mcp:create-server.adoc[MCP server] with at least one execution +* xref:connect:create-agent.adoc[Running agent] or xref:connect:create-server.adoc[MCP server] with at least one execution * Access to the Transcripts view (requires appropriate permissions to read the `redpanda.otel_traces` glossterm:topic[]) == Open the Transcripts view @@ -41,7 +41,7 @@ Each row in the list represents one execution (one trace). Columns include: * *USD cost*: Total cost for the execution, derived from per-model pricing. See <> if this column shows `0`. * *Duration*: Wall-clock time between the first and last span. -A transcript marked _reconstructed_ is one in which some turns were rebuilt from LLM message context after the original spans were evicted from `redpanda.otel_traces`. See xref:observability:concepts.adoc#history-reconstruction[Reconstructed transcript history] for what that means. +A transcript marked _reconstructed_ is one in which some turns were rebuilt from LLM message context after the original spans were evicted from `redpanda.otel_traces`. See xref:monitor:concepts.adoc#history-reconstruction[Reconstructed transcript history] for what that means. // TODO: Confirm final column list on the GA Console UI. Today's labels likely shift. Verify against adp-production before merge. @@ -102,7 +102,7 @@ Turns are listed in order by role: * *ASSISTANT*: A response from the LLM. Shows the model, input/output token counts, USD cost for that turn, and latency. If the assistant turn called a tool, its tool calls are nested underneath. * *TOOL*: A tool invocation. Shows the tool name, the arguments passed, the result, and the latency of the call. -Any turn may carry the `is_reconstructed` marker. Reconstructed turns preserve role order and the high-level content of the conversation but do not carry per-turn token counts, latency, or tool-call arguments. See xref:observability:concepts.adoc#history-reconstruction[Reconstructed transcript history] for the mechanics. +Any turn may carry the `is_reconstructed` marker. Reconstructed turns preserve role order and the high-level content of the conversation but do not carry per-turn token counts, latency, or tool-call arguments. See xref:monitor:concepts.adoc#history-reconstruction[Reconstructed transcript history] for the mechanics. === Errors @@ -148,7 +148,7 @@ If the failure happened during a tool call, the error is attached to the TOOL tu == Limitations * Large time windows sample the list to keep the UI responsive. The exact transcript you need may not be in the current page; narrow the time range or add filters. -* Reconstructed turns do not carry token counts, latency, or tool-call arguments for the reconstructed range. For byte-level fidelity, lower the ingestion lag or extend `redpanda.otel_traces` retention (see xref:observability:concepts.adoc#opentelemetry-traces-topic[How Redpanda stores trace data]). +* Reconstructed turns do not carry token counts, latency, or tool-call arguments for the reconstructed range. For byte-level fidelity, lower the ingestion lag or extend `redpanda.otel_traces` retention (see xref:monitor:concepts.adoc#opentelemetry-traces-topic[How Redpanda stores trace data]). * USD cost is only populated for models covered by the pricing table. // TODO: List which providers/models are priced at GA and what users see for un-priced ones (`0`, `null`, or an explicit "unknown" marker). // TODO: If the GA Console UI ships transcript export, document the entry point and output format here; otherwise omit. @@ -165,12 +165,12 @@ A transcript stays in `RUNNING` until the root span closes. Common causes: === USD cost shows 0 -`TranscriptUsage.usd_cost` is populated by the cost-reporting pipeline from the `gen_ai.usage.*` attributes on each LLM-call span combined with a per-model pricing table. For the full list of cost-bearing attributes (including the explicit USD-cost fields), see xref:observability:concepts.adoc#key-attributes-by-layer[Key attributes by layer]. +`TranscriptUsage.usd_cost` is populated by the cost-reporting pipeline from the `gen_ai.usage.*` attributes on each LLM-call span combined with a per-model pricing table. For the full list of cost-bearing attributes (including the explicit USD-cost fields), see xref:monitor:concepts.adoc#key-attributes-by-layer[Key attributes by layer]. // TODO: Document which providers/models are priced at GA. If cost is `0` for a transcript that clearly used tokens, check: -* The model is in the pricing table. To use a custom rate (negotiated contract, internal chargeback), see xref:governance:budgets.adoc#override-per-model-pricing[Override per-model pricing]. +* The model is in the pricing table. To use a custom rate (negotiated contract, internal chargeback), see xref:control:budgets.adoc#override-per-model-pricing[Override per-model pricing]. * The cost-reporting pipeline is enabled on your ADP environment. * The LLM-call spans carry the `gen_ai.usage.*` attributes the pipeline reads: Either the token-count inputs (`gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens`) or the explicit USD-cost fields listed on the concepts page. @@ -197,6 +197,6 @@ For long-running conversations, accept some reconstruction; for short conversati == Next steps -* xref:agents:monitor.adoc[] -* xref:mcp:test-tools.adoc[] -* xref:agents:troubleshoot/troubleshoot-ai-agents.adoc[] +* xref:monitor:monitor-agents.adoc[] +* xref:connect:test-tools.adoc[] +* xref:monitor:troubleshoot-ai-agents.adoc[] diff --git a/modules/agents/pages/troubleshoot/troubleshoot-ai-agents.adoc b/modules/monitor/pages/troubleshoot-ai-agents.adoc similarity index 94% rename from modules/agents/pages/troubleshoot/troubleshoot-ai-agents.adoc rename to modules/monitor/pages/troubleshoot-ai-agents.adoc index 0e8211d..b8777f7 100644 --- a/modules/agents/pages/troubleshoot/troubleshoot-ai-agents.adoc +++ b/modules/monitor/pages/troubleshoot-ai-agents.adoc @@ -67,7 +67,7 @@ NEVER respond about order status without calling the tool first. **Prevention:** * Write explicit tool selection criteria in system prompts -* Test agents with the xref:agents:system-prompts.adoc#evaluation-and-testing[systematic testing approach] +* Test agents with the xref:connect:system-prompts.adoc#evaluation-and-testing[systematic testing approach] * Use models appropriate for your task complexity === Calling wrong tools @@ -101,7 +101,7 @@ Use get_shipping_info when: **Prevention:** -* Follow tool design patterns in xref:mcp:overview.adoc[] +* Follow tool design patterns in xref:connect:mcp-overview.adoc[] * Limit each agent to 10-15 tools maximum * Test boundary cases where multiple tools might apply @@ -144,7 +144,7 @@ If a tool fails after 2 attempts: **Prevention:** * Design tools to return complete information in one call -* Set max iterations appropriate for task complexity (see xref:agents:concepts.adoc#why-iterations-matter[Why iterations matter]) +* Set max iterations appropriate for task complexity (see xref:connect:concepts.adoc#why-iterations-matter[Why iterations matter]) * Test with ambiguous requests that might cause loops === Making up information @@ -274,7 +274,7 @@ Efficiency guidelines: * Set appropriate max iterations (10-20 for simple, 30-40 for complex) * Design tools to return minimal necessary data * Monitor token usage trends -* See cost calculation guidance in xref:agents:concepts.adoc#cost-calculation[Cost calculation] +* See cost calculation guidance in xref:connect:concepts.adoc#cost-calculation[Cost calculation] == Tool execution issues @@ -438,7 +438,7 @@ The agent card is always available at `/.well-known/agent-card.json` according t * Always append `/.well-known/agent-card.json` to the agent endpoint URL * Test the agent card URL in a browser before using it in pipeline configuration -* See xref:agents:a2a-concepts.adoc#agent-card-location[Agent card location] for details +* See xref:connect:a2a-concepts.adoc#agent-card-location[Agent card location] for details === Pipeline integration failures @@ -472,14 +472,14 @@ processors: * Test pipeline-agent integration with low volume first * Size agent resources appropriately for event rate -* See integration patterns in xref:agents:pipeline-integration-patterns.adoc[] +* See integration patterns in xref:connect:pipeline-integration-patterns.adoc[] == Monitor and debug agents -For comprehensive guidance on monitoring agent activity, analyzing conversation history, tracking token usage, and debugging issues, see xref:agents:monitor.adoc[]. +For comprehensive guidance on monitoring agent activity, analyzing conversation history, tracking token usage, and debugging issues, see xref:monitor:monitor-agents.adoc[]. == Next steps -* xref:agents:system-prompts.adoc[] -* xref:mcp:overview.adoc[] -* xref:agents:architecture-patterns.adoc[] +* xref:connect:system-prompts.adoc[] +* xref:connect:mcp-overview.adoc[] +* xref:connect:architecture-patterns.adoc[] diff --git a/modules/ROOT/partials/observability-logs.adoc b/modules/monitor/partials/observability-logs.adoc similarity index 98% rename from modules/ROOT/partials/observability-logs.adoc rename to modules/monitor/partials/observability-logs.adoc index f4cbc29..3dde348 100644 --- a/modules/ROOT/partials/observability-logs.adoc +++ b/modules/monitor/partials/observability-logs.adoc @@ -22,7 +22,7 @@ Use logs for: * Understanding which provider handled a request * Investigating latency spikes or errors for specific users -Use metrics for: Aggregate analytics, trends, cost tracking across time. See xref:observability:metrics.adoc[]. +Use metrics for: Aggregate analytics, trends, cost tracking across time. See xref:monitor:metrics.adoc[]. == Where to find logs @@ -603,7 +603,7 @@ Retention period: // PLACEHOLDER: for example, 30 days, 90 days, configurable After retention period: * Logs are deleted automatically -* Aggregate metrics retained longer (see xref:observability:metrics.adoc[]) +* Aggregate metrics retained longer (see xref:monitor:metrics.adoc[]) Export logs (if needed for longer retention): @@ -746,4 +746,4 @@ Note: Cost estimates are approximate. Use provider invoices for billing. == Next steps -* xref:observability:metrics.adoc[] +* xref:monitor:metrics.adoc[] diff --git a/modules/observability/partials/observability-metrics.adoc b/modules/monitor/partials/observability-metrics.adoc similarity index 99% rename from modules/observability/partials/observability-metrics.adoc rename to modules/monitor/partials/observability-metrics.adoc index 258207c..56a8d2b 100644 --- a/modules/observability/partials/observability-metrics.adoc +++ b/modules/monitor/partials/observability-metrics.adoc @@ -22,7 +22,7 @@ Use metrics for: * Capacity planning * Model/provider comparison -Use logs for: Debugging specific requests, viewing full prompts/responses. See xref:observability:logs.adoc[]. +Use logs for: Debugging specific requests, viewing full prompts/responses. See xref:monitor:logs.adoc[]. == Where to find metrics @@ -850,9 +850,9 @@ Possible causes: Solution: 1. Remove filters, widen time range -2. Send test request (see xref:ai-gateway:gateway-quickstart.adoc[]) +2. Send test request (see xref:get-started:gateway-quickstart.adoc[]) 3. Check permissions with admin == Next steps -* xref:observability:logs.adoc[] +* xref:monitor:logs.adoc[] diff --git a/modules/observability/partials/transcripts-ui-guide.adoc b/modules/monitor/partials/transcripts-ui-guide.adoc similarity index 86% rename from modules/observability/partials/transcripts-ui-guide.adoc rename to modules/monitor/partials/transcripts-ui-guide.adoc index fb1daa6..bcc1be5 100644 --- a/modules/observability/partials/transcripts-ui-guide.adoc +++ b/modules/monitor/partials/transcripts-ui-guide.adoc @@ -23,8 +23,8 @@ // Valid values: "agent" | "mcp" // // DEPENDENCIES: -// - xref:observability:concepts.adoc#agent-transcript-hierarchy[] -// - xref:observability:concepts.adoc#mcp-server-transcript-hierarchy[] +// - xref:monitor:concepts.adoc#agent-transcript-hierarchy[] +// - xref:monitor:concepts.adoc#mcp-server-transcript-hierarchy[] // // CONTENT TYPE: // UI navigation and interface explanation (procedural context for how-to pages) @@ -66,10 +66,10 @@ The trace list shows nested operations with visual duration bars indicating how // Link to appropriate concepts section based on context ifeval::["{context}" == "agent"] -For details on span types, see xref:observability:concepts.adoc#agent-transcript-hierarchy[Agent transcript hierarchy]. +For details on span types, see xref:monitor:concepts.adoc#agent-transcript-hierarchy[Agent transcript hierarchy]. endif::[] ifeval::["{context}" == "mcp"] -For details on span types, see xref:observability:concepts.adoc#mcp-server-transcript-hierarchy[MCP server transcript hierarchy]. +For details on span types, see xref:monitor:concepts.adoc#mcp-server-transcript-hierarchy[MCP server transcript hierarchy]. endif::[] ==== Summary panel @@ -91,6 +91,6 @@ ifeval::["{context}" == "mcp"] * Service: The MCP server identifier endif::[] -If any turns were rebuilt from LLM message context after their original spans were evicted, the panel shows a _reconstructed_ marker on those turns. For the mechanics, see xref:observability:concepts.adoc#history-reconstruction[Reconstructed transcript history]. +If any turns were rebuilt from LLM message context after their original spans were evicted, the panel shows a _reconstructed_ marker on those turns. For the mechanics, see xref:monitor:concepts.adoc#history-reconstruction[Reconstructed transcript history]. // TODO: Re-verify this field list against the GA Console UI on adp-production. Beta labels may shift; update wording before GA. diff --git a/modules/observability/pages/index.adoc b/modules/observability/pages/index.adoc deleted file mode 100644 index 7c27515..0000000 --- a/modules/observability/pages/index.adoc +++ /dev/null @@ -1,3 +0,0 @@ -= Observability -:description: Inspect, debug, and govern agentic AI with execution transcripts, metrics, and logs from Redpanda's Agentic Data Plane. -:page-layout: index diff --git a/modules/observability/partials/observability-logs.adoc b/modules/observability/partials/observability-logs.adoc deleted file mode 100644 index f4cbc29..0000000 --- a/modules/observability/partials/observability-logs.adoc +++ /dev/null @@ -1,749 +0,0 @@ -= Observability: Logs -:description: Guide to AI Gateway request logs, including where to find logs, log fields, filtering, searching, inspecting requests, common analysis tasks, log retention, export options, privacy/security, and troubleshooting. -:page-topic-type: reference -:personas: platform_engineer, security_compliance_lead, agent_builder -:learning-objective-1: Locate and filter request logs to debug failures or reconstruct conversations -:learning-objective-2: Interpret log fields to diagnose performance and cost issues -:learning-objective-3: Export logs for compliance auditing or long-term analysis - -AI Gateway logs every LLM request that passes through it, capturing the full request/response history, token usage, cost, latency, and routing decisions. This page explains how to find, filter, and interpret request logs. - -== Before you begin - -* You have an active AI Gateway with at least one request processed. -* You have access to ADP. -* You have the appropriate permissions to view gateway logs. - -Use logs for: - -* Debugging specific failed requests -* Reconstructing user conversation sessions -* Auditing what prompts were sent and responses received -* Understanding which provider handled a request -* Investigating latency spikes or errors for specific users - -Use metrics for: Aggregate analytics, trends, cost tracking across time. See xref:observability:metrics.adoc[]. - -== Where to find logs - -1. Navigate to logs view: - * In the sidebar, navigate to *Agentic* > *AI Gateway* > *Gateways* > *{gateway-name}*, then select the *Logs* tab. - * Or: Gateway detail page -> Logs tab - -2. Select gateway: - * Filter by specific gateway, or view all gateways - * // PLACEHOLDER: screenshot of gateway selector - -3. Set time range: - * Default: Last 1 hour - * Options: Last 5 minutes, 1 hour, 24 hours, 7 days, 30 days, Custom - * // PLACEHOLDER: screenshot of time range picker - -== Request log fields - -Each log entry contains: - -=== Core request info - -[cols="1,2,2"] -|=== -| Field | Description | Example - -| *Request ID* -| Unique identifier for this request -| `req_abc123...` - -| *Timestamp* -| When request was received (UTC) -| `2025-01-11T14:32:10.123Z` - -| *Gateway ID* -| Which gateway handled this request -| `gw_abc123...` - -| *Gateway Name* -| Human-readable gateway name -| `production-gateway` - -| *Status* -| HTTP status code -| `200`, `400`, `429`, `500` - -| *Latency* -| Total request duration (ms) -| `1250ms` -|=== - -=== Model and provider info - -[cols="1,2,2"] -|=== -| Field | Description | Example - -| *Requested Model* -| Model specified in request -| `openai/gpt-5.2` - -| *Actual Model* -| Model that handled request (may differ due to routing) -| `anthropic/claude-sonnet-4.5` - -| *Provider* -| Which provider handled the request -| `OpenAI`, `Anthropic` - -| *Provider Pool* -| Pool used (primary/fallback) -| `primary`, `fallback` - -| *Fallback Triggered* -| Whether fallback was used -| `true`/`false` - -| *Fallback Reason* -| Why fallback occurred -| `rate_limit`, `timeout`, `5xx_error` -|=== - -=== Token and cost info - -[cols="1,2,2"] -|=== -| Field | Description | Example - -| *Prompt Tokens* -| Input tokens consumed -| `523` - -| *Completion Tokens* -| Output tokens generated -| `187` - -| *Total Tokens* -| Prompt + completion -| `710` - -| *Estimated Cost* -| Calculated cost for this request -| `$0.0142` - -| *Cost Breakdown* -| Per-token costs -| `Prompt: $0.005, Completion: $0.0092` -|=== - -=== Request content (expandable) - -[cols="1,2,2"] -|=== -| Field | Description | Notes - -| *Request Headers* -| All headers sent -| Includes authorization and custom headers - -| *Request Body* -| Full request payload -| Includes messages, parameters - -| *Response Headers* -| Headers returned -| // PLACEHOLDER: Any gateway-specific headers? - -| *Response Body* -| Full response payload -| Includes message content, metadata -|=== - -=== Policy info - -[cols="1,2,2"] -|=== -| Field | Description | Example - -| *Rate Limit Status* -| Whether rate limited -| `allowed`, `throttled`, `blocked` - -| *Spend Limit Status* -| Whether budget exceeded -| `allowed`, `blocked` - -| *Policy Stage* -| Where request was processed/blocked -| `rate_limit`, `routing`, `execution` -|=== - -=== Error info (if applicable) - -[cols="1,2,2"] -|=== -| Field | Description | Example - -| *Error Code* -| Gateway or provider error code -| `RATE_LIMIT_EXCEEDED`, `MODEL_NOT_FOUND` - -| *Error Message* -| Human-readable error -| `Request rate limit exceeded for gateway` - -| *Provider Error* -| Upstream provider error -| `OpenAI API returned 429: Rate limit exceeded` -|=== - -== Filter logs - -=== By gateway - -// PLACEHOLDER: Screenshot of gateway filter dropdown - -[source,text] ----- -Filter: Gateway = "production-gateway" ----- - - -Shows only requests for the selected gateway. - -Use case: Isolate production traffic from staging - -=== By model - -// PLACEHOLDER: Screenshot of model filter - -[source,text] ----- -Filter: Model = "openai/gpt-5.2" ----- - - -Shows only requests for specific model. - -Use case: Compare quality/cost between models - -=== By provider - -[source,text] ----- -Filter: Provider = "OpenAI" ----- - - -Shows only requests handled by specific provider. - -Use case: Investigate provider-specific issues - -=== By status - -[source,text] ----- -Filter: Status = "429" ----- - - -Shows only requests with specific HTTP status. - -Common filters: - -* `200`: Successful requests -* `400`: Bad requests (client errors) -* `401`: Authentication errors -* `429`: Rate limited requests -* `500`: Server errors -* `5xx`: All server errors - -Use case: Find all failed requests - -=== By time range - -[source,text] ----- -Filter: Timestamp >= "2025-01-11T14:00:00Z" AND Timestamp <= "2025-01-11T15:00:00Z" ----- - - -Use case: Investigate incident during specific time window - -=== By custom header - -[source,text] ----- -Filter: request.headers["x-user-id"] = "user_123" ----- - - -Shows only requests for specific user. - -Use case: Debug user-reported issue - -=== By token range - -[source,text] ----- -Filter: Total Tokens > 10000 ----- - - -Shows only high-token requests. - -Use case: Find expensive requests - -=== By latency - -[source,text] ----- -Filter: Latency > 5000ms ----- - - -Shows only slow requests. - -Use case: Investigate performance issues - -=== Combined filters - -[source,text] ----- -Gateway = "production-gateway" -AND Status >= 500 -AND Timestamp >= "last 24 hours" ----- - - -Shows production server errors in last 24 hours. - -// PLACEHOLDER: Screenshot of multiple filters applied - -== Search logs - -=== Full-text search (if supported) - -// PLACEHOLDER: Confirm if full-text search is available - -[source,text] ----- -Search: "specific error message" ----- - - -Searches across all text fields (error messages, request/response content). - -=== Search by request content - -[source,text] ----- -Search in Request Body: "user's actual question" ----- - - -Find requests containing specific prompt text. - -Use case: "A user said the AI gave a wrong answer about X" → Search for "X" in prompts - -=== Search by response content - -[source,text] ----- -Search in Response Body: "specific AI response phrase" ----- - - -Find responses containing specific text. - -Use case: Find all requests where AI mentioned a competitor name - -== Inspect individual requests - -Click any log entry to expand full details. - -// PLACEHOLDER: Screenshot of expanded log entry - -=== Request details tab - -Shows: - -* Full request headers -* Full request body (formatted JSON) -* All parameters (temperature, max_tokens, and so on) -* Custom headers used for routing - -Example: - -[source,json] ----- -{ - "model": "openai/gpt-5.2", - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant." - }, - { - "role": "user", - "content": "What is Redpanda?" - } - ], - "temperature": 0.7, - "max_tokens": 500 -} ----- - - -=== Response details tab - -Shows: - -* Full response headers -* Full response body (formatted JSON) -* Finish reason (`stop`, `length`, `content_filter`) -* Response metadata - -Example: - -[source,json] ----- -{ - "id": "chatcmpl-...", - "choices": [ - { - "message": { - "role": "assistant", - "content": "Redpanda is a streaming data platform..." - }, - "finish_reason": "stop" - } - ], - "usage": { - "prompt_tokens": 24, - "completion_tokens": 87, - "total_tokens": 111 - } -} ----- - - -=== Policy details tab - -Shows: - -* Provider pool used (primary/fallback) -* Fallback trigger reason (if applicable) -* Rate limit evaluation (allowed/blocked) -* Spend limit evaluation (allowed/blocked) - -Example: - -[source,yaml] ----- -Provider Pool: primary -Fallback Triggered: false - -Rate Limit: allowed (45/100 requests used) -Spend Limit: allowed ($1,234 / $50,000 budget used) ----- - - -=== Performance details tab - -Shows: - -* Total latency breakdown - * Gateway processing time: // PLACEHOLDER: Xms - * Provider API call time: // PLACEHOLDER: Xms - * Network time: // PLACEHOLDER: Xms -* Token generation rate (tokens/second) -* Time to first token (for streaming, if supported) - -Example: - -[source,text] ----- -Total Latency: 1,250ms -├─ Gateway Processing: 12ms -├─ Provider API Call: 1,215ms -└─ Network Overhead: 23ms - -Token Generation Rate: 71 tokens/second ----- - - -== Common log analysis tasks - -=== Task 1: "Why did this request fail?" - -1. Find the request: - - * Filter by timestamp (when user reported issue) - * Or search by request content - * Or filter by custom header (user ID) - -2. Check status: - - * `400` → Client error (bad request format, invalid parameters) - * `401` → Authentication issue - * `404` → Model not found - * `429` → Rate limited - * `500`/`5xx` → Provider or gateway error - -3. Check error message: - - * Gateway error: Issue with configuration, rate limits, and so on * Provider error: Issue with upstream API (OpenAI, Anthropic, and so on) - -4. Check fallback: - * Was fallback triggered? (May indicate primary provider issue) - -Common causes: - -* Model not enabled in gateway -* Rate limit exceeded -* Monthly budget exceeded -* Invalid API key for provider -* Provider outage/rate limit -* Malformed request - -=== Task 2: "Reconstruct a user's conversation" - -1. *Filter by user*: -+ -[source,text] ----- -Filter: request.headers["x-user-id"] = "user_123" ----- - -2. *Sort by timestamp* (ascending) - -3. *Review conversation flow*: - - * Each request shows prompt - * Each response shows AI reply - * Reconstruct full conversation thread - -Use case: User says "the AI contradicted itself" → View full conversation history - -=== Task 3: "Why is latency high for this user?" - -1. *Find user's requests*: -+ -[source,text] ----- -Filter: request.headers["x-user-id"] = "user_123" -AND Latency > 3000ms ----- - -2. *Check Performance Details*: - - * Is provider API slow? (Upstream latency) - * Is token generation rate normal? (Tokens/second) - -3. *Compare to other requests*: - - * Filter for same model - * Compare latency percentiles - * Identify if issue is user-specific or model-wide - -Common causes: - -* Provider performance degradation -* Large context windows (high token count) -* Network issues - -=== Task 4: "Which requests used the fallback provider?" - -1. *Filter by fallback*: -+ -[source,text] ----- -Filter: Fallback Triggered = true ----- - -2. *Group by Fallback Reason*: - - * Rate limit exceeded (primary provider throttled) - * Timeout (primary provider slow) - * 5xx error (primary provider error) - -3. *Analyze pattern*: - - * Is fallback happening frequently? (May indicate primary provider issue) - * Is fallback successful? (Check status of fallback requests) - -Use case: Verify failover is working as expected - -=== Task 5: "What did we spend on this customer today?" - -1. *Filter by customer*: -+ -[source,text] ----- -Filter: request.headers["x-customer-id"] = "customer_abc" -AND Timestamp >= "today" ----- - -2. *Sum estimated costs* (if UI supports): - - // PLACEHOLDER: Does UI have cost aggregation for filtered results? - * Total: $X.XX - * Breakdown by model - -3. *Export to CSV* (if supported): - - // PLACEHOLDER: Is CSV export available? - * For detailed billing analysis - -Use case: Chargeback/showback to customers - -== Log retention - -// PLACEHOLDER: Confirm log retention policy - -Retention period: // PLACEHOLDER: for example, 30 days, 90 days, configurable - -After retention period: - -* Logs are deleted automatically -* Aggregate metrics retained longer (see xref:observability:metrics.adoc[]) - -Export logs (if needed for longer retention): - -// PLACEHOLDER: Is log export available? Via API? CSV? - -== Log export - -// PLACEHOLDER: Confirm export capabilities - -=== Export to CSV - -// PLACEHOLDER: Add UI path for export, or indicate not available - -1. Apply filters for desired logs -2. Click "Export to CSV" -3. Download includes all filtered logs with full fields - -=== Export through API - -// PLACEHOLDER: If API is available for log export - -[source,bash] ----- -curl https://{CLUSTER_ID}.cloud.redpanda.com/api/ai-gateway/logs \ - -H "Authorization: Bearer ${REDPANDA_CLOUD_TOKEN}" \ - -G \ - --data-urlencode "gateway_id=gw_abc123" \ - --data-urlencode "start_time=2025-01-11T00:00:00Z" \ - --data-urlencode "end_time=2025-01-11T23:59:59Z" ----- - - -=== Integration with observability platforms - -// PLACEHOLDER: Are there integrations with external platforms? - -Supported integrations (if any): - -* OpenTelemetry export → Send logs to Jaeger, Datadog, New Relic -* CloudWatch Logs → For AWS deployments -* // PLACEHOLDER: Others? - - -== Privacy and security - -=== What is logged - -// PLACEHOLDER: Confirm what is logged by default - -AI Gateway logs by default: - -* Request headers (including custom headers) -* Request body (full prompt content) -* Response body (full AI response) -* Token usage, cost, latency -* Policy evaluations - -AI Gateway does not log (if applicable): - -* // PLACEHOLDER: Anything redacted? API keys? Specific headers? - -=== Redaction options - -// PLACEHOLDER: Are there options to redact PII or sensitive data? - -If redaction is supported: - -* Configure redaction rules for specific fields -* Mask PII (email addresses, phone numbers, and so on) -* Redact custom header values - -Example: - -[source,yaml] ----- -# PLACEHOLDER: Actual configuration format -redaction: - - field: request.headers.x-api-key - action: mask - - field: request.body.messages[].content - pattern: "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b" # Email regex - action: replace - replacement: "[REDACTED_EMAIL]" ----- - - -=== Access control - -// PLACEHOLDER: Who can view logs? RBAC? - -Permissions required: - -* View logs: // PLACEHOLDER: role/permission name -* Export logs: // PLACEHOLDER: role/permission name - -Audit trail: - -* Log access is audited (who viewed which logs, when) -* // PLACEHOLDER: Where to find audit trail? - -== Troubleshoot log issues - -=== Issue: "Logs not appearing for my request" - -Possible causes: - -1. Log ingestion delay (wait // PLACEHOLDER: Xs) -2. Wrong gateway ID filter -3. Request failed before reaching gateway (authentication error) -4. Time range filter too narrow - -Solution: - -1. Wait a moment and refresh -2. Remove all filters, search by timestamp -3. Check client-side error logs -4. Expand time range to "Last 1 hour" - -=== Issue: "Missing request/response content" - -Possible causes: - -1. Payload too large (// PLACEHOLDER: size limit?) -2. Redaction rules applied -3. // PLACEHOLDER: Other reasons? - -Solution: - -// PLACEHOLDER: How to retrieve full content if truncated? - -=== Issue: "Cost estimate incorrect" - -Possible causes: - -1. Cost estimate based on public pricing (may differ from your contract) -2. Provider changed pricing -3. // PLACEHOLDER: Other reasons? - -Note: Cost estimates are approximate. Use provider invoices for billing. - -== Next steps - -* xref:observability:metrics.adoc[] diff --git a/modules/reference/pages/index.adoc b/modules/reference/pages/index.adoc index 5640b28..a75d416 100644 --- a/modules/reference/pages/index.adoc +++ b/modules/reference/pages/index.adoc @@ -1,3 +1,3 @@ -= Reference -:description: Glossary and pipeline examples for the Redpanda Agentic Data Plane. += Settings reference +:description: Field and command reference for Agentic Data Plane. :page-layout: index diff --git a/modules/reference/pages/pipeline-examples.adoc b/modules/reference/pages/pipeline-examples.adoc deleted file mode 100644 index 471e49d..0000000 --- a/modules/reference/pages/pipeline-examples.adoc +++ /dev/null @@ -1,4 +0,0 @@ -= Pipeline Examples -:description: Example pipelines for common AI agent use cases. - -// TODO: Add content diff --git a/modules/get-started/pages/rpk-install.adoc b/modules/reference/pages/rpk-install.adoc similarity index 100% rename from modules/get-started/pages/rpk-install.adoc rename to modules/reference/pages/rpk-install.adoc