Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions docs/features/output-schema-validation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Output-Schema Validation (Spec 056 / Security Gateway Track A)

When an upstream MCP tool declares an `outputSchema`, MCPProxy can verify that
the tool's **structured response** conforms to that schema *before* it reaches
your agent. This protects the agent's context from a buggy or compromised
server injecting malformed, oversized, or unexpected data.

This is **Track A** of the MCP security-gateway hardening effort (Spec 054). It
validates `structuredContent` only; sanitisation/redaction of untrusted text
(Track B) and access control (Track C) are separate features.

## Configuration

Add an `output_validation` block to `~/.mcpproxy/mcp_config.json`:

```json
{
"output_validation": {
"mode": "warn",
"max_bytes": 5242880,
"max_depth": 64,
"missing_structured_content": "allow"
}
}
```

| Field | Values | Default | Meaning |
|-------|--------|---------|---------|
| `mode` | `off` \| `warn` \| `strict` | `warn` | `off` disables validation; `warn` forwards violations but logs them; `strict` blocks violations. |
| `max_bytes` | integer | `5242880` (5 MiB) | Max serialized size of the structured payload; larger payloads are a guard violation. |
| `max_depth` | integer | `64` | Max nesting depth of the structured payload; deeper payloads are a guard violation. |
| `missing_structured_content` | `allow` \| `block` | `allow` | In **strict** mode only: what to do when a tool declares a schema but returns no `structuredContent`. `allow` forwards it (recommended); `block` rejects it. |

If the block is **absent**, validation runs in `warn` mode with the defaults
above — safe to leave on, since it never blocks a working agent; it only adds
audit signal for tools that declare an output schema. Set `mode: "strict"` once
you've reviewed the warnings.

Config is hot-reloaded; changing `mode` does not require a restart.

## Behaviour

| Tool declares `outputSchema`? | Response has `structuredContent`? | Conforms? | `warn` | `strict` |
|---|---|---|---|---|
| No | — | — | forward (no-op) | forward (no-op) |
| Yes | No (text only) | — | forward (no-op) | forward, unless `missing_structured_content=block` |
| Yes | Yes | Yes | **forward unchanged** | **forward unchanged** |
| Yes | Yes | No | forward + audit | **block** + audit |
| Yes | Yes (oversized / too deep) | — | forward + audit (guard) | **block** + audit (guard) |
| Yes | upstream error result | — | forward (skip) | forward (skip) |

Key guarantees:

- **Lossless on success**: a conforming `structuredContent` is forwarded
byte-for-byte unchanged (validation runs on a read-only view).
- **Never blocks on a bad schema**: if a tool's declared schema is itself not
compilable, validation degrades to a no-op (logged once) — it never blocks
traffic on the proxy's inability to compile a schema.
- **Strict blocks return a clear error** to the agent:
`output schema validation failed: <keyword> at <path>: <detail>`.

## Auditing

Every violation (block or warn) is recorded as a `policy_decision` activity
record with `decision = "blocked"` or `"warning"` and the violation detail:

```bash
mcpproxy activity list --type policy_decision # validation warnings + blocks
mcpproxy activity list --status blocked # strict blocks only
mcpproxy activity show <id> # tool, decision, reason
```

Or over the REST API:

```bash
curl -s -H "X-API-Key: $KEY" \
"http://127.0.0.1:8080/api/v1/activity?type=policy_decision&limit=5" | jq .
```

## Editions

Identical behaviour in the personal and server editions (no build-tag-specific
logic).
61 changes: 61 additions & 0 deletions e2e/stubs/outputschema/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Command outputschema is a minimal stdio MCP server used by the Spec 056
// output-schema-validation E2E test. It exposes three tools, all declaring the
// same output schema (an object with a required integer "id"):
//
// - conforming: returns structured {"id": 7} -> passes validation
// - bad_output: returns structured {"id": "not-an-int"} -> violates the schema
// - text_only: returns only text content (no structuredContent) -> the
// ContextForge #4042 case (declared schema, nothing to validate)
//
// It is intentionally dependency-light and deterministic so the proxy's
// validation behaviour can be asserted from curl/JSON-RPC.
package main

import (
"context"
"encoding/json"

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
)

const outputSchema = `{
"type": "object",
"properties": { "id": { "type": "integer" } },
"required": ["id"],
"additionalProperties": true
}`

func main() {
s := server.NewMCPServer("outputschema-stub", "1.0.0")

rawSchema := json.RawMessage(outputSchema)

conforming := mcp.NewTool("conforming",
mcp.WithDescription("Returns a structured response that conforms to its output schema."),
mcp.WithRawOutputSchema(rawSchema),
)
s.AddTool(conforming, func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
return mcp.NewToolResultStructured(map[string]any{"id": 7}, `{"id":7}`), nil
})

badOutput := mcp.NewTool("bad_output",
mcp.WithDescription("Returns a structured response that VIOLATES its output schema (id is a string)."),
mcp.WithRawOutputSchema(rawSchema),
)
s.AddTool(badOutput, func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
return mcp.NewToolResultStructured(map[string]any{"id": "not-an-int"}, `{"id":"not-an-int"}`), nil
})

textOnly := mcp.NewTool("text_only",
mcp.WithDescription("Declares an output schema but returns only text content (no structuredContent)."),
mcp.WithRawOutputSchema(rawSchema),
)
s.AddTool(textOnly, func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
return mcp.NewToolResultText("plain text, no structured content"), nil
})

if err := server.ServeStdio(s); err != nil {
panic(err)
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ require (
github.com/oklog/ulid/v2 v2.1.1
github.com/pkoukk/tiktoken-go v0.1.8
github.com/prometheus/client_golang v1.23.2
github.com/santhosh-tekuri/jsonschema/v6 v6.0.2
github.com/spf13/cobra v1.10.2
github.com/spf13/pflag v1.0.10
github.com/spf13/viper v1.21.0
Expand Down Expand Up @@ -119,7 +120,6 @@ require (
github.com/prometheus/procfs v0.16.1 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/sagikazarmark/locafero v0.11.0 // indirect
github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect
github.com/sergeymakinen/go-bmp v1.0.0 // indirect
github.com/sergeymakinen/go-ico v1.0.0-beta.0 // indirect
github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 // indirect
Expand Down
94 changes: 83 additions & 11 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ type Config struct {
// Sensitive data detection settings (Spec 026)
SensitiveDataDetection *SensitiveDataDetectionConfig `json:"sensitive_data_detection,omitempty" mapstructure:"sensitive-data-detection"`

// Output-schema validation settings (Spec 056)
OutputValidation *OutputValidationConfig `json:"output_validation,omitempty" mapstructure:"output-validation"`

// Telemetry settings (Spec 036)
Telemetry *TelemetryConfig `json:"telemetry,omitempty" mapstructure:"telemetry"`

Expand Down Expand Up @@ -239,9 +242,9 @@ type ServerConfig struct {
// when the server is configured with both Command and an HTTP/SSE URL — i.e.,
// mcpproxy starts the process AND connects via network. Stdio servers ignore
// this field. Zero or unset → 30s default.
LauncherWaitTimeout Duration `json:"launcher_wait_timeout,omitempty" mapstructure:"launcher_wait_timeout" swaggertype:"string"`
EnabledTools []string `json:"enabled_tools,omitempty" mapstructure:"enabled_tools"` // Allowlist: only these tools are exposed; mutually exclusive with disabled_tools
DisabledTools []string `json:"disabled_tools,omitempty" mapstructure:"disabled_tools"` // Denylist: these tools are hidden; mutually exclusive with enabled_tools
LauncherWaitTimeout Duration `json:"launcher_wait_timeout,omitempty" mapstructure:"launcher_wait_timeout" swaggertype:"string"`
EnabledTools []string `json:"enabled_tools,omitempty" mapstructure:"enabled_tools"` // Allowlist: only these tools are exposed; mutually exclusive with disabled_tools
DisabledTools []string `json:"disabled_tools,omitempty" mapstructure:"disabled_tools"` // Denylist: these tools are hidden; mutually exclusive with enabled_tools
}

// OAuthConfig represents OAuth configuration for a server
Expand Down Expand Up @@ -467,6 +470,71 @@ func (c *SensitiveDataDetectionConfig) GetEntropyThreshold() float64 {
return c.EntropyThreshold
}

// OutputValidationConfig controls output-schema validation behaviour (Spec 056).
type OutputValidationConfig struct {
Mode string `json:"mode,omitempty" mapstructure:"mode"` // "off" | "warn" | "strict"; default "warn"
MaxBytes int `json:"max_bytes,omitempty" mapstructure:"max-bytes"` // structured payload byte cap; default 5<<20
MaxDepth int `json:"max_depth,omitempty" mapstructure:"max-depth"` // nesting depth cap; default 64
MissingStructuredContent string `json:"missing_structured_content,omitempty" mapstructure:"missing-structured-content"` // "allow" | "block"; default "allow"
}

// DefaultOutputValidationConfig returns the default configuration for output-schema validation.
func DefaultOutputValidationConfig() *OutputValidationConfig {
return &OutputValidationConfig{
Mode: "warn",
MaxBytes: 5 << 20,
MaxDepth: 64,
MissingStructuredContent: "allow",
}
}

// IsEnabled returns true unless Mode is "off". A nil receiver defaults to true (warn).
func (c *OutputValidationConfig) IsEnabled() bool {
if c == nil {
return true
}
return c.Mode != "off"
}

// IsStrict returns true when Mode is "strict". A nil receiver returns false.
func (c *OutputValidationConfig) IsStrict() bool {
if c == nil {
return false
}
return c.Mode == "strict"
}

// IsWarn returns true when validation is enabled but not strict (i.e. warn mode).
// A nil receiver returns true (default is warn).
func (c *OutputValidationConfig) IsWarn() bool {
return c.IsEnabled() && !c.IsStrict()
}

// EffectiveMaxBytes returns MaxBytes, falling back to 5<<20 when zero or nil.
func (c *OutputValidationConfig) EffectiveMaxBytes() int {
if c == nil || c.MaxBytes <= 0 {
return 5 << 20
}
return c.MaxBytes
}

// EffectiveMaxDepth returns MaxDepth, falling back to 64 when zero or nil.
func (c *OutputValidationConfig) EffectiveMaxDepth() int {
if c == nil || c.MaxDepth <= 0 {
return 64
}
return c.MaxDepth
}

// BlockOnMissingStructured returns true when MissingStructuredContent is "block".
// A nil receiver returns false (default is "allow").
func (c *OutputValidationConfig) BlockOnMissingStructured() bool {
if c == nil {
return false
}
return c.MissingStructuredContent == "block"
}

// RegistryEntry represents a registry in the configuration
type RegistryEntry struct {
ID string `json:"id"`
Expand Down Expand Up @@ -523,14 +591,15 @@ func ConvertFromCursorFormat(cursorConfig *CursorMCPConfig) []*ServerConfig {

// ToolMetadata represents tool information stored in the index
type ToolMetadata struct {
Name string `json:"name"`
ServerName string `json:"server_name"`
Description string `json:"description"`
ParamsJSON string `json:"params_json"`
Hash string `json:"hash"`
Created time.Time `json:"created"`
Updated time.Time `json:"updated"`
Annotations *ToolAnnotations `json:"annotations,omitempty"`
Name string `json:"name"`
ServerName string `json:"server_name"`
Description string `json:"description"`
ParamsJSON string `json:"params_json"`
OutputSchemaJSON string `json:"output_schema_json,omitempty"` // declared output schema, raw JSON bytes (Spec 056)
Hash string `json:"hash"`
Created time.Time `json:"created"`
Updated time.Time `json:"updated"`
Annotations *ToolAnnotations `json:"annotations,omitempty"`
}

// ToolAnnotations represents MCP tool behavior hints
Expand Down Expand Up @@ -693,6 +762,9 @@ func DefaultConfig() *Config {
// Default sensitive data detection settings (enabled by default for security)
SensitiveDataDetection: DefaultSensitiveDataDetectionConfig(),

// Default output-schema validation settings (Spec 056)
OutputValidation: DefaultOutputValidationConfig(),

// Default registries for MCP server discovery
Registries: []RegistryEntry{
{
Expand Down
Loading
Loading