smart-mcp-proxy · Dumbris · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/docs/features/output-schema-validation.md b/docs/features/output-schema-validation.md
@@ -0,0 +1,83 @@
+# Output-Schema Validation (Spec 056 / Security Gateway Track A)
+
+When an upstream MCP tool declares an `outputSchema`, MCPProxy can verify that
+the tool's **structured response** conforms to that schema *before* it reaches
+your agent. This protects the agent's context from a buggy or compromised
+server injecting malformed, oversized, or unexpected data.
+
+This is **Track A** of the MCP security-gateway hardening effort (Spec 054). It
+validates `structuredContent` only; sanitisation/redaction of untrusted text
+(Track B) and access control (Track C) are separate features.
+
+## Configuration
+
+Add an `output_validation` block to `~/.mcpproxy/mcp_config.json`:
+
+```json
+{
+  "output_validation": {
+    "mode": "warn",
+    "max_bytes": 5242880,
+    "max_depth": 64,
+    "missing_structured_content": "allow"
+  }
+}
+```
+
+| Field | Values | Default | Meaning |
+|-------|--------|---------|---------|
+| `mode` | `off` \| `warn` \| `strict` | `warn` | `off` disables validation; `warn` forwards violations but logs them; `strict` blocks violations. |
+| `max_bytes` | integer | `5242880` (5 MiB) | Max serialized size of the structured payload; larger payloads are a guard violation. |
+| `max_depth` | integer | `64` | Max nesting depth of the structured payload; deeper payloads are a guard violation. |
+| `missing_structured_content` | `allow` \| `block` | `allow` | In **strict** mode only: what to do when a tool declares a schema but returns no `structuredContent`. `allow` forwards it (recommended); `block` rejects it. |
+
+If the block is **absent**, validation runs in `warn` mode with the defaults
+above — safe to leave on, since it never blocks a working agent; it only adds
+audit signal for tools that declare an output schema. Set `mode: "strict"` once
+you've reviewed the warnings.
+
+Config is hot-reloaded; changing `mode` does not require a restart.
+
+## Behaviour
+
+| Tool declares `outputSchema`? | Response has `structuredContent`? | Conforms? | `warn` | `strict` |
+|---|---|---|---|---|
+| No | — | — | forward (no-op) | forward (no-op) |
+| Yes | No (text only) | — | forward (no-op) | forward, unless `missing_structured_content=block` |
+| Yes | Yes | Yes | **forward unchanged** | **forward unchanged** |
+| Yes | Yes | No | forward + audit | **block** + audit |
+| Yes | Yes (oversized / too deep) | — | forward + audit (guard) | **block** + audit (guard) |
+| Yes | upstream error result | — | forward (skip) | forward (skip) |
+
+Key guarantees:
+
+- **Lossless on success**: a conforming `structuredContent` is forwarded
+  byte-for-byte unchanged (validation runs on a read-only view).
+- **Never blocks on a bad schema**: if a tool's declared schema is itself not
+  compilable, validation degrades to a no-op (logged once) — it never blocks
+  traffic on the proxy's inability to compile a schema.
+- **Strict blocks return a clear error** to the agent:
+  `output schema validation failed: <keyword> at <path>: <detail>`.
+
+## Auditing
+
+Every violation (block or warn) is recorded as a `policy_decision` activity
+record with `decision = "blocked"` or `"warning"` and the violation detail:
+
+```bash
+mcpproxy activity list --type policy_decision    # validation warnings + blocks
+mcpproxy activity list --status blocked          # strict blocks only
+mcpproxy activity show <id>                       # tool, decision, reason
+```
+
+Or over the REST API:
+
+```bash
+curl -s -H "X-API-Key: $KEY" \
+  "http://127.0.0.1:8080/api/v1/activity?type=policy_decision&limit=5" | jq .
+```
+
+## Editions
+
+Identical behaviour in the personal and server editions (no build-tag-specific
+logic).
diff --git a/e2e/stubs/outputschema/main.go b/e2e/stubs/outputschema/main.go
@@ -0,0 +1,61 @@
+// Command outputschema is a minimal stdio MCP server used by the Spec 056
+// output-schema-validation E2E test. It exposes three tools, all declaring the
+// same output schema (an object with a required integer "id"):
+//
+//   - conforming:   returns structured {"id": 7}            -> passes validation
+//   - bad_output:   returns structured {"id": "not-an-int"} -> violates the schema
+//   - text_only:    returns only text content (no structuredContent) -> the
+//     ContextForge #4042 case (declared schema, nothing to validate)
+//
+// It is intentionally dependency-light and deterministic so the proxy's
+// validation behaviour can be asserted from curl/JSON-RPC.
+package main
+
+import (
+	"context"
+	"encoding/json"
+
+	"github.com/mark3labs/mcp-go/mcp"
+	"github.com/mark3labs/mcp-go/server"
+)
+
+const outputSchema = `{
+  "type": "object",
+  "properties": { "id": { "type": "integer" } },
+  "required": ["id"],
+  "additionalProperties": true
+}`
+
+func main() {
+	s := server.NewMCPServer("outputschema-stub", "1.0.0")
+
+	rawSchema := json.RawMessage(outputSchema)
+
+	conforming := mcp.NewTool("conforming",
+		mcp.WithDescription("Returns a structured response that conforms to its output schema."),
+		mcp.WithRawOutputSchema(rawSchema),
+	)
+	s.AddTool(conforming, func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+		return mcp.NewToolResultStructured(map[string]any{"id": 7}, `{"id":7}`), nil
+	})
+
+	badOutput := mcp.NewTool("bad_output",
+		mcp.WithDescription("Returns a structured response that VIOLATES its output schema (id is a string)."),
+		mcp.WithRawOutputSchema(rawSchema),
+	)
+	s.AddTool(badOutput, func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+		return mcp.NewToolResultStructured(map[string]any{"id": "not-an-int"}, `{"id":"not-an-int"}`), nil
+	})
+
+	textOnly := mcp.NewTool("text_only",
+		mcp.WithDescription("Declares an output schema but returns only text content (no structuredContent)."),
+		mcp.WithRawOutputSchema(rawSchema),
+	)
+	s.AddTool(textOnly, func(_ context.Context, _ mcp.CallToolRequest) (*mcp.CallToolResult, error) {
+		return mcp.NewToolResultText("plain text, no structured content"), nil
+	})
+
+	if err := server.ServeStdio(s); err != nil {
+		panic(err)
+	}
+}
diff --git a/go.mod b/go.mod
@@ -20,6 +20,7 @@ require (
 	github.com/oklog/ulid/v2 v2.1.1
 	github.com/pkoukk/tiktoken-go v0.1.8
 	github.com/prometheus/client_golang v1.23.2
+	github.com/santhosh-tekuri/jsonschema/v6 v6.0.2
 	github.com/spf13/cobra v1.10.2
 	github.com/spf13/pflag v1.0.10
 	github.com/spf13/viper v1.21.0
@@ -119,7 +120,6 @@ require (
 	github.com/prometheus/procfs v0.16.1 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/sagikazarmark/locafero v0.11.0 // indirect
-	github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect
 	github.com/sergeymakinen/go-bmp v1.0.0 // indirect
 	github.com/sergeymakinen/go-ico v1.0.0-beta.0 // indirect
 	github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 // indirect

diff --git a/internal/config/config.go b/internal/config/config.go
@@ -140,6 +140,9 @@ type Config struct {
 	// Sensitive data detection settings (Spec 026)
 	SensitiveDataDetection *SensitiveDataDetectionConfig `json:"sensitive_data_detection,omitempty" mapstructure:"sensitive-data-detection"`
 
+	// Output-schema validation settings (Spec 056)
+	OutputValidation *OutputValidationConfig `json:"output_validation,omitempty" mapstructure:"output-validation"`
+
 	// Telemetry settings (Spec 036)
 	Telemetry *TelemetryConfig `json:"telemetry,omitempty" mapstructure:"telemetry"`
 
@@ -239,9 +242,9 @@ type ServerConfig struct {
 	// when the server is configured with both Command and an HTTP/SSE URL — i.e.,
 	// mcpproxy starts the process AND connects via network. Stdio servers ignore
 	// this field. Zero or unset → 30s default.
-	LauncherWaitTimeout Duration  `json:"launcher_wait_timeout,omitempty" mapstructure:"launcher_wait_timeout" swaggertype:"string"`
-	EnabledTools        []string  `json:"enabled_tools,omitempty" mapstructure:"enabled_tools"`   // Allowlist: only these tools are exposed; mutually exclusive with disabled_tools
-	DisabledTools       []string  `json:"disabled_tools,omitempty" mapstructure:"disabled_tools"` // Denylist: these tools are hidden; mutually exclusive with enabled_tools
+	LauncherWaitTimeout Duration `json:"launcher_wait_timeout,omitempty" mapstructure:"launcher_wait_timeout" swaggertype:"string"`
+	EnabledTools        []string `json:"enabled_tools,omitempty" mapstructure:"enabled_tools"`   // Allowlist: only these tools are exposed; mutually exclusive with disabled_tools
+	DisabledTools       []string `json:"disabled_tools,omitempty" mapstructure:"disabled_tools"` // Denylist: these tools are hidden; mutually exclusive with enabled_tools
 }
 
 // OAuthConfig represents OAuth configuration for a server
@@ -467,6 +470,71 @@ func (c *SensitiveDataDetectionConfig) GetEntropyThreshold() float64 {
 	return c.EntropyThreshold
 }
 
+// OutputValidationConfig controls output-schema validation behaviour (Spec 056).
+type OutputValidationConfig struct {
+	Mode                     string `json:"mode,omitempty" mapstructure:"mode"`                                             // "off" | "warn" | "strict"; default "warn"
+	MaxBytes                 int    `json:"max_bytes,omitempty" mapstructure:"max-bytes"`                                   // structured payload byte cap; default 5<<20
+	MaxDepth                 int    `json:"max_depth,omitempty" mapstructure:"max-depth"`                                   // nesting depth cap; default 64
+	MissingStructuredContent string `json:"missing_structured_content,omitempty" mapstructure:"missing-structured-content"` // "allow" | "block"; default "allow"
+}
+
+// DefaultOutputValidationConfig returns the default configuration for output-schema validation.
+func DefaultOutputValidationConfig() *OutputValidationConfig {
+	return &OutputValidationConfig{
+		Mode:                     "warn",
+		MaxBytes:                 5 << 20,
+		MaxDepth:                 64,
+		MissingStructuredContent: "allow",
+	}
+}
+
+// IsEnabled returns true unless Mode is "off". A nil receiver defaults to true (warn).
+func (c *OutputValidationConfig) IsEnabled() bool {
+	if c == nil {
+		return true
+	}
+	return c.Mode != "off"
+}
+
+// IsStrict returns true when Mode is "strict". A nil receiver returns false.
+func (c *OutputValidationConfig) IsStrict() bool {
+	if c == nil {
+		return false
+	}
+	return c.Mode == "strict"
+}
+
+// IsWarn returns true when validation is enabled but not strict (i.e. warn mode).
+// A nil receiver returns true (default is warn).
+func (c *OutputValidationConfig) IsWarn() bool {
+	return c.IsEnabled() && !c.IsStrict()
+}
+
+// EffectiveMaxBytes returns MaxBytes, falling back to 5<<20 when zero or nil.
+func (c *OutputValidationConfig) EffectiveMaxBytes() int {
+	if c == nil || c.MaxBytes <= 0 {
+		return 5 << 20
+	}
+	return c.MaxBytes
+}
+
+// EffectiveMaxDepth returns MaxDepth, falling back to 64 when zero or nil.
+func (c *OutputValidationConfig) EffectiveMaxDepth() int {
+	if c == nil || c.MaxDepth <= 0 {
+		return 64
+	}
+	return c.MaxDepth
+}
+
+// BlockOnMissingStructured returns true when MissingStructuredContent is "block".
+// A nil receiver returns false (default is "allow").
+func (c *OutputValidationConfig) BlockOnMissingStructured() bool {
+	if c == nil {
+		return false
+	}
+	return c.MissingStructuredContent == "block"
+}
+
 // RegistryEntry represents a registry in the configuration
 type RegistryEntry struct {
 	ID          string      `json:"id"`
@@ -523,14 +591,15 @@ func ConvertFromCursorFormat(cursorConfig *CursorMCPConfig) []*ServerConfig {
 
 // ToolMetadata represents tool information stored in the index
 type ToolMetadata struct {
-	Name        string           `json:"name"`
-	ServerName  string           `json:"server_name"`
-	Description string           `json:"description"`
-	ParamsJSON  string           `json:"params_json"`
-	Hash        string           `json:"hash"`
-	Created     time.Time        `json:"created"`
-	Updated     time.Time        `json:"updated"`
-	Annotations *ToolAnnotations `json:"annotations,omitempty"`
+	Name             string           `json:"name"`
+	ServerName       string           `json:"server_name"`
+	Description      string           `json:"description"`
+	ParamsJSON       string           `json:"params_json"`
+	OutputSchemaJSON string           `json:"output_schema_json,omitempty"` // declared output schema, raw JSON bytes (Spec 056)
+	Hash             string           `json:"hash"`
+	Created          time.Time        `json:"created"`
+	Updated          time.Time        `json:"updated"`
+	Annotations      *ToolAnnotations `json:"annotations,omitempty"`
 }
 
 // ToolAnnotations represents MCP tool behavior hints
@@ -693,6 +762,9 @@ func DefaultConfig() *Config {
 		// Default sensitive data detection settings (enabled by default for security)
 		SensitiveDataDetection: DefaultSensitiveDataDetectionConfig(),
 
+		// Default output-schema validation settings (Spec 056)
+		OutputValidation: DefaultOutputValidationConfig(),
+
 		// Default registries for MCP server discovery
 		Registries: []RegistryEntry{
 			{