diff --git a/docs/modelcontextprotocol-io/package-types.mdx b/docs/modelcontextprotocol-io/package-types.mdx index b7a31b903..6cac890f2 100644 --- a/docs/modelcontextprotocol-io/package-types.mdx +++ b/docs/modelcontextprotocol-io/package-types.mdx @@ -125,6 +125,57 @@ This MCP server manages Azure DevOps work items and pipelines. ``` +## Cargo (Rust) Packages + +For Cargo packages, the MCP Registry currently supports the official crates.io registry (`https://crates.io`) only. + +Cargo packages use `"registryType": "cargo"` in `server.json`. For example: + +```json server.json highlight={9} +{ + "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", + "name": "io.github.username/widget-mcp", + "title": "Widget", + "description": "Rust-native MCP server", + "version": "0.3.0", + "packages": [ + { + "registryType": "cargo", + "identifier": "widget-mcp", + "version": "0.3.0", + "transport": { + "type": "stdio" + } + } + ] +} +``` + +### Runtime Model + +Cargo's runtime model differs from npm/PyPI/NuGet. `cargo install ` places the compiled binary on PATH at `~/.cargo/bin`, after which MCP clients invoke it directly by name. There is no per-invocation runner equivalent to `npx` (npm), `uvx` (PyPI), or `dnx` (NuGet, .NET 10 SDK Preview 6+) — install is one-time, execution is by binary name. The Cargo example above intentionally omits `runtimeHint` for this reason. + +Rust MCP authors have two first-class distribution paths: + +- **Cargo (`registryType: cargo`)** — source-distributed via crates.io. End users need the Rust toolchain (`rustup`) to run `cargo install`. Idiomatic for the Rust ecosystem and consistent with how Rust CLIs are typically published. +- **MCPB (`registryType: mcpb`)** — prebuilt binary distributed via GitHub or GitLab Releases. End users need no toolchain. Right choice if the priority is "no Rust toolchain required." + +Both paths are supported; the choice is the author's. Cargo native support exists so Rust authors who prefer source distribution are not forced into the MCPB binary-packaging workaround. + +### Ownership Verification + +The MCP Registry verifies ownership of Cargo packages by checking for the existence of an `mcp-name: $SERVER_NAME` string in the package README (which is rendered to HTML and served by crates.io's static CDN). The `$SERVER_NAME` portion **MUST** match the server name from `server.json`. For example: + +```markdown README.md highlight={5} +# Widget MCP Server + +A Rust-native MCP server for widget operations. + +- MCP Registry name: `mcp-name: io.github.username/widget-mcp` +``` + +**Cargo-specific gotcha:** Unlike PyPI and NuGet (which preserve HTML comments in their README rendering), **crates.io strips HTML comments during markdown→HTML conversion**. The `` hidden-comment form that works for PyPI/NuGet **does not work for cargo** — the token will not appear in the rendered HTML the validator inspects. Cargo authors must include the `mcp-name:` token as visible markdown text. A simple bullet in the Links section is the recommended pattern. + ## Docker/OCI Images For Docker/OCI images, the MCP Registry currently supports: diff --git a/docs/reference/api/openapi.yaml b/docs/reference/api/openapi.yaml index faf88950f..25ec5ce1b 100644 --- a/docs/reference/api/openapi.yaml +++ b/docs/reference/api/openapi.yaml @@ -659,10 +659,11 @@ components: properties: registryType: type: string - description: Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb') + description: Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'cargo', 'oci', 'nuget', 'mcpb') examples: - "npm" - "pypi" + - "cargo" - "oci" - "nuget" - "mcpb" @@ -673,6 +674,7 @@ components: examples: - "https://registry.npmjs.org" - "https://pypi.org" + - "https://crates.io" - "https://docker.io" - "https://api.nuget.org/v3/index.json" - "https://github.com" diff --git a/docs/reference/server-json/draft/server.schema.json b/docs/reference/server-json/draft/server.schema.json index 5c59335fc..0b9597a70 100644 --- a/docs/reference/server-json/draft/server.schema.json +++ b/docs/reference/server-json/draft/server.schema.json @@ -239,6 +239,7 @@ "examples": [ "https://registry.npmjs.org", "https://pypi.org", + "https://crates.io", "https://docker.io", "https://api.nuget.org/v3/index.json", "https://github.com", @@ -248,10 +249,11 @@ "type": "string" }, "registryType": { - "description": "Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb')", + "description": "Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'cargo', 'oci', 'nuget', 'mcpb')", "examples": [ "npm", "pypi", + "cargo", "oci", "nuget", "mcpb" diff --git a/docs/reference/server-json/generic-server-json.md b/docs/reference/server-json/generic-server-json.md index bb5cc44c6..7415988ae 100644 --- a/docs/reference/server-json/generic-server-json.md +++ b/docs/reference/server-json/generic-server-json.md @@ -372,6 +372,35 @@ The same `registryType` / `identifier` pattern works for other supported OCI hos } ``` +### Cargo (Rust) Package Example + +`cargo install ` places the binary on PATH (via `~/.cargo/bin`); MCP clients invoke it directly by name. There is no single-shot equivalent of `npx` (npm), `uvx` (PyPI), or `dnx` (NuGet, .NET 10 SDK) for cargo — install once, run by name. + +```json +{ + "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", + "name": "io.github.example/widget-mcp", + "description": "Rust-native MCP server", + "title": "Widget", + "repository": { + "url": "https://github.com/example/widget-mcp", + "source": "github" + }, + "version": "0.3.0", + "packages": [ + { + "registryType": "cargo", + "registryBaseUrl": "https://crates.io", + "identifier": "widget-mcp", + "version": "0.3.0", + "transport": { + "type": "stdio" + } + } + ] +} +``` + ### NuGet (.NET) Package Example The `dnx` tool ships with the .NET 10 SDK, starting with Preview 6. diff --git a/internal/validators/package.go b/internal/validators/package.go index 7104f730b..b458244b1 100644 --- a/internal/validators/package.go +++ b/internal/validators/package.go @@ -23,6 +23,8 @@ func ValidatePackage(ctx context.Context, pkg model.Package, serverName string) return registries.ValidateOCI(ctx, pkg, serverName) case model.RegistryTypeMCPB: return registries.ValidateMCPB(ctx, pkg, serverName) + case model.RegistryTypeCargo: + return registries.ValidateCargo(ctx, pkg, serverName) default: return fmt.Errorf("unsupported registry type: %s", pkg.RegistryType) } diff --git a/internal/validators/registries/cargo.go b/internal/validators/registries/cargo.go new file mode 100644 index 000000000..18f7c0f16 --- /dev/null +++ b/internal/validators/registries/cargo.go @@ -0,0 +1,160 @@ +package registries + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/modelcontextprotocol/registry/pkg/model" +) + +var ( + ErrMissingIdentifierForCargo = errors.New("package identifier is required for Cargo packages") + ErrMissingVersionForCargo = errors.New("package version is required for Cargo packages") +) + +// CargoReadmeMetaResponse is the structure returned by the crates.io readme metadata endpoint. +// +// With `Accept: application/json`, crates.io's /api/v1/crates/{name}/{version}/readme +// endpoint returns 200 OK with a JSON body containing a `url` field that points to the +// rendered README on the static CDN. (Without the Accept header, or via HEAD, the same +// endpoint emits a 302 redirect to the CDN URL — the validator uses the JSON path so +// that crates.io controls where the README lives.) Validators must follow the pointer +// to retrieve the actual README content. +type CargoReadmeMetaResponse struct { + URL string `json:"url"` +} + +// ValidateCargo validates that a Cargo (crates.io) package contains the correct MCP server name. +// +// Verification mechanism: the `mcp-name: ` token is searched for in the package's +// rendered README. This mirrors the PyPI validator's README-token approach (see ValidatePyPI), +// requiring no Cargo.toml parsing on the registry side. Crate authors add a single line +// `mcp-name: io.github.OWNER/REPO` to their README before publishing. +// +// Two-call retrieval pattern: +// 1. GET https://crates.io/api/v1/crates/{name}/{version}/readme +// → 200 OK with JSON: {"url": "https://static.crates.io/readmes/.../...html"} +// 2. GET +// → 200 OK with rendered README HTML, or 403 if the crate/version is missing +// +// The two-call pattern stays on the documented crates.io API surface rather than relying +// on the CDN URL layout being stable. +func ValidateCargo(ctx context.Context, pkg model.Package, serverName string) error { + // Set default registry base URL if empty + if pkg.RegistryBaseURL == "" { + pkg.RegistryBaseURL = model.RegistryURLCrates + } + + if pkg.Identifier == "" { + return ErrMissingIdentifierForCargo + } + + if pkg.Version == "" { + return ErrMissingVersionForCargo + } + + // Validate that MCPB-specific fields are not present + if pkg.FileSHA256 != "" { + return fmt.Errorf("cargo packages must not have 'fileSha256' field - this is only for MCPB packages") + } + + // Validate that the registry base URL matches crates.io exactly + if pkg.RegistryBaseURL != model.RegistryURLCrates { + return fmt.Errorf("registry type and base URL do not match: '%s' is not valid for registry type '%s'. Expected: %s", + pkg.RegistryBaseURL, model.RegistryTypeCargo, model.RegistryURLCrates) + } + + return validateCargoREADME(ctx, pkg, serverName) +} + +// validateCargoREADME performs the two-call README fetch and the mcp-name token +// check. It is split out from ValidateCargo so that httptest-based tests can +// drive the HTTP pipeline against a mock server (exposed via export_test.go), +// bypassing the exact-baseURL guard that ValidateCargo enforces for callers. +func validateCargoREADME(ctx context.Context, pkg model.Package, serverName string) error { + client := &http.Client{Timeout: 10 * time.Second} + // crates.io's crawler policy expects a non-generic User-Agent identifying the source. + userAgent := "MCP-Registry-Validator/1.0 (https://registry.modelcontextprotocol.io)" + + // Step 1: fetch the README pointer from the documented API endpoint. + metaURL := fmt.Sprintf("%s/api/v1/crates/%s/%s/readme", + pkg.RegistryBaseURL, + url.PathEscape(pkg.Identifier), + url.PathEscape(pkg.Version)) + + metaReq, err := http.NewRequestWithContext(ctx, http.MethodGet, metaURL, nil) + if err != nil { + return fmt.Errorf("failed to create crates.io metadata request: %w", err) + } + metaReq.Header.Set("User-Agent", userAgent) + metaReq.Header.Set("Accept", "application/json") + + metaResp, err := client.Do(metaReq) + if err != nil { + return fmt.Errorf("failed to fetch package metadata from crates.io: %w", err) + } + defer metaResp.Body.Close() + + if metaResp.StatusCode != http.StatusOK { + // 5xx from the metadata endpoint is upstream availability, not a missing crate. + if metaResp.StatusCode >= 500 && metaResp.StatusCode < 600 { + return fmt.Errorf("crates.io upstream error fetching metadata for cargo package '%s' (status: %d) — likely transient, retry later", pkg.Identifier, metaResp.StatusCode) + } + return fmt.Errorf("cargo package '%s' metadata fetch failed (status: %d)", pkg.Identifier, metaResp.StatusCode) + } + + var meta CargoReadmeMetaResponse + if err := json.NewDecoder(metaResp.Body).Decode(&meta); err != nil { + return fmt.Errorf("failed to parse crates.io readme metadata: %w", err) + } + if meta.URL == "" { + return fmt.Errorf("cargo package '%s' metadata response missing 'url' field", pkg.Identifier) + } + + // Step 2: fetch the rendered README from the URL the API gave us. + readmeReq, err := http.NewRequestWithContext(ctx, http.MethodGet, meta.URL, nil) + if err != nil { + return fmt.Errorf("failed to create crates.io readme request: %w", err) + } + readmeReq.Header.Set("User-Agent", userAgent) + readmeReq.Header.Set("Accept", "text/html") + + readmeResp, err := client.Do(readmeReq) + if err != nil { + return fmt.Errorf("failed to fetch rendered README from crates.io: %w", err) + } + defer readmeResp.Body.Close() + + // Missing crates and missing versions surface as 403 from static.crates.io + // (S3's default for missing keys), not 404. 5xx from the CDN is upstream + // availability — surface it as transient so callers can distinguish retryable + // failures from genuinely missing crates. + if readmeResp.StatusCode != http.StatusOK { + if readmeResp.StatusCode >= 500 && readmeResp.StatusCode < 600 { + return fmt.Errorf("crates.io upstream error fetching README for cargo package '%s' version '%s' (status: %d) — likely transient, retry later", pkg.Identifier, pkg.Version, readmeResp.StatusCode) + } + return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io (status: %d)", pkg.Identifier, pkg.Version, readmeResp.StatusCode) + } + + body, err := io.ReadAll(readmeResp.Body) + if err != nil { + return fmt.Errorf("failed to read rendered README: %w", err) + } + + // Search for the mcp-name: token. The token contains no characters + // that get HTML-escaped during README rendering (no <, >, &, ", '), so a direct + // substring match against the rendered HTML is reliable. + mcpNamePattern := "mcp-name: " + serverName + if strings.Contains(string(body), mcpNamePattern) { + return nil + } + + return fmt.Errorf("cargo package '%s' ownership validation failed. The server name '%s' must appear as 'mcp-name: %s' in the package README", pkg.Identifier, serverName, serverName) +} diff --git a/internal/validators/registries/cargo_test.go b/internal/validators/registries/cargo_test.go new file mode 100644 index 000000000..9e53e9a21 --- /dev/null +++ b/internal/validators/registries/cargo_test.go @@ -0,0 +1,411 @@ +package registries_test + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "sync/atomic" + "testing" + + "github.com/modelcontextprotocol/registry/internal/validators/registries" + "github.com/modelcontextprotocol/registry/pkg/model" + "github.com/stretchr/testify/assert" +) + +func TestValidateCargo_RealPackages(t *testing.T) { + ctx := context.Background() + + tests := []struct { + name string + packageName string + version string + serverName string + expectError bool + errorMessage string + }{ + { + name: "empty package identifier should fail", + packageName: "", + version: "0.1.0", + serverName: "io.github.example/test", + expectError: true, + errorMessage: "package identifier is required for Cargo packages", + }, + { + name: "empty package version should fail", + packageName: "rust-faf-mcp", + version: "", + serverName: "io.github.example/test", + expectError: true, + errorMessage: "package version is required for Cargo packages", + }, + { + name: "non-existent crate should fail", + packageName: generateRandomPackageName(), + version: "0.1.0", + serverName: "io.github.example/test", + expectError: true, + errorMessage: "not found", + }, + { + name: "non-existent version of real crate should fail", + packageName: "serde", + version: "99.99.99-not-real", + serverName: "io.github.example/test", + expectError: true, + errorMessage: "not found", + }, + { + name: "real crate without mcp-name token should fail", + packageName: "serde", // most-downloaded crate; no MCP server claim + version: "1.0.219", + serverName: "io.github.example/test", + expectError: true, + errorMessage: "ownership validation failed", + }, + { + name: "real crate with mismatched mcp-name should fail", + packageName: "tokio", + version: "1.40.0", + serverName: "io.github.example/completely-different-name", + expectError: true, + errorMessage: "ownership validation failed", + }, + { + name: "additional real crate without mcp-name (rand)", + packageName: "rand", + version: "0.9.0", + serverName: "io.github.example/test", + expectError: true, + errorMessage: "ownership validation failed", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + Identifier: tt.packageName, + Version: tt.version, + } + + err := registries.ValidateCargo(ctx, pkg, tt.serverName) + + if tt.expectError { + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.errorMessage) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestValidateCargo_RegistryBaseURLMismatch(t *testing.T) { + ctx := context.Background() + + tests := []struct { + name string + baseURL string + }{ + {name: "different host", baseURL: "https://example.com"}, + {name: "trailing slash", baseURL: "https://crates.io/"}, + {name: "http (not https)", baseURL: "http://crates.io"}, + {name: "subdomain", baseURL: "https://www.crates.io"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + RegistryBaseURL: tt.baseURL, + Identifier: "rust-faf-mcp", + Version: "0.2.2", + } + + err := registries.ValidateCargo(ctx, pkg, "io.github.Wolfe-Jam/rust-faf-mcp") + assert.Error(t, err) + assert.Contains(t, err.Error(), "registry type and base URL do not match") + }) + } +} + +func TestValidateCargo_RejectsMCPBOnlyFields(t *testing.T) { + ctx := context.Background() + + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + Identifier: "rust-faf-mcp", + Version: "0.2.2", + FileSHA256: "0000000000000000000000000000000000000000000000000000000000000000", + } + + err := registries.ValidateCargo(ctx, pkg, "io.github.Wolfe-Jam/rust-faf-mcp") + assert.Error(t, err) + assert.Contains(t, err.Error(), "cargo packages must not have 'fileSha256' field") +} + +// Server names follow io.github.OWNER/REPO and may contain dots, slashes, +// hyphens, underscores, and digits. None of these get HTML-escaped during +// README rendering, so substring match against the rendered HTML is reliable. +// These tests exercise format variations against a real crate that doesn't +// declare any mcp-name (serde) — every case fails ownership, but we verify +// the failure error preserves the exact server name unchanged. +func TestValidateCargo_ServerNameFormats(t *testing.T) { + ctx := context.Background() + + tests := []struct { + name string + serverName string + }{ + {name: "canonical io.github format", serverName: "io.github.Wolfe-Jam/rust-faf-mcp"}, + {name: "multiple hyphens", serverName: "io.github.example/multi-hyphen-test-name"}, + {name: "underscore", serverName: "io.github.example/snake_case_name"}, + {name: "numeric suffix", serverName: "io.github.example/server-v2"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + Identifier: "serde", + Version: "1.0.219", + } + + err := registries.ValidateCargo(ctx, pkg, tt.serverName) + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.serverName) + }) + } +} + +// TestValidateCargo_PositivePathMock exercises the success branch: a README +// that contains the exact mcp-name token must return no error. Uses httptest +// to stand in for crates.io, so the test is hermetic — it doesn't depend on +// any live crate publishing a specific mcp-name line or on network reachability. +// Calls the test-only ValidateCargoREADME shim so the mock URL can take the +// place of https://crates.io without tripping the exact-baseURL guard. +func TestValidateCargo_PositivePathMock(t *testing.T) { + ctx := context.Background() + const serverName = "io.github.test/positive-path" + + var mock *httptest.Server + mock = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/readme") { + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(map[string]string{ + "url": mock.URL + "/static-readme", + }); err != nil { + t.Fatalf("encode meta response: %v", err) + } + return + } + // Rendered README HTML containing the mcp-name token. + fmt.Fprintf(w, "

some content

mcp-name: %s

", serverName) + })) + defer mock.Close() + + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + RegistryBaseURL: mock.URL, + Identifier: "test-crate", + Version: "0.1.0", + } + + err := registries.ValidateCargoREADME(ctx, pkg, serverName) + assert.NoError(t, err, "validator should accept a README containing the exact mcp-name token") +} + +// TestValidateCargo_LivePositivePath is the live anchor — it validates against +// rust-faf-mcp v0.3.1 on real crates.io, the first crate published with the +// mcp-name token as visible markdown (v0.3.0 used a hidden HTML comment, which +// crates.io strips during README rendering — see package-types.mdx for the +// cargo-specific gotcha). Complements TestValidateCargo_PositivePathMock: +// the mock proves the validator works in principle (hermetic, fast); the live +// anchor proves it works against the real crates.io API + static CDN pipeline. +// +// If this test ever starts failing, check (in order): +// 1. Has rust-faf-mcp v0.3.1 been yanked or replaced? +// 2. Did crates.io change its README rendering pipeline (e.g., start +// stripping markdown lines that look like email-like tokens)? +// 3. Is the test machine network-blocked from crates.io or static.crates.io? +func TestValidateCargo_LivePositivePath(t *testing.T) { + ctx := context.Background() + + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + Identifier: "rust-faf-mcp", + Version: "0.3.1", + } + + err := registries.ValidateCargo(ctx, pkg, "io.github.Wolfe-Jam/rust-faf-mcp") + assert.NoError(t, err, "validator should accept the live rust-faf-mcp v0.3.1 crate (the canonical live anchor for cargo positive-path)") +} + +// TestValidateCargo_TransientUpstreamError exercises the 5xx-as-transient branch: +// a 502/503 from static.crates.io is upstream availability, not "crate missing", +// and the error message must signal a retryable failure rather than imply the +// crate doesn't exist (the previous behavior, flagged in PR #1207 review). +func TestValidateCargo_TransientUpstreamError(t *testing.T) { + ctx := context.Background() + + var mock *httptest.Server + mock = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/readme") { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{ + "url": mock.URL + "/static-readme", + }) + return + } + // Simulate static.crates.io 502 — upstream blip, not a missing crate. + http.Error(w, "bad gateway", http.StatusBadGateway) + })) + defer mock.Close() + + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + RegistryBaseURL: mock.URL, + Identifier: "test-crate", + Version: "0.1.0", + } + + err := registries.ValidateCargoREADME(ctx, pkg, "io.github.test/transient") + assert.Error(t, err) + assert.Contains(t, err.Error(), "transient") + assert.NotContains(t, err.Error(), "not found", "transient upstream errors should not be reported as 'not found'") +} + +// TestValidateCargoCombinedFixture exercises path-encoding and the full status +// matrix in one httptest fixture — the same pattern praised by @P4ST4S on +// PR #1321 for Go modules. One server dispatches on the crate identifier +// embedded in the URL path; wrong encoding routes to the fallback 500, which +// breaks the appropriate assertion. The explicit path assertion (assert.Equal +// on lastMetaPath) also catches encoding regressions directly. +func TestValidateCargoCombinedFixture(t *testing.T) { + ctx := context.Background() + const serverName = "io.github.test/combined" + + tests := []struct { + name string + crateName string + version string + metaStatus int + readmeStatus int + readmeBody string + wantErr bool + wantContains []string + wantNotContains []string + }{ + { + name: "happy_path_visible_token", + crateName: "combined-happy", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusOK, + readmeBody: fmt.Sprintf("

mcp-name: %s

", serverName), + }, + { + name: "metadata_404", + crateName: "combined-meta404", + version: "0.1.0", + metaStatus: http.StatusNotFound, + wantErr: true, + wantContains: []string{"metadata fetch failed", "status: 404"}, + }, + { + name: "readme_403_s3_not_found", + crateName: "combined-readme403", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusForbidden, + wantErr: true, + wantContains: []string{"not found", "status: 403"}, + }, + { + name: "readme_502_transient", + crateName: "combined-readme502", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusBadGateway, + wantErr: true, + wantContains: []string{"transient"}, + wantNotContains: []string{"not found"}, + }, + } + + // lastMetaPath captures the metadata request path seen by the handler so + // each sub-test can assert the exact url.PathEscape-encoded form. + var lastMetaPath atomic.Value + lastMetaPath.Store("") + + var srv *httptest.Server + srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + for i := range tests { + tt := &tests[i] + metaPath := fmt.Sprintf("/api/v1/crates/%s/%s/readme", + url.PathEscape(tt.crateName), url.PathEscape(tt.version)) + staticPath := "/readme-static/" + url.PathEscape(tt.crateName) + + if r.URL.Path == metaPath { + lastMetaPath.Store(r.URL.Path) + if tt.metaStatus != http.StatusOK { + http.Error(w, "simulated non-200", tt.metaStatus) + return + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"url": srv.URL + staticPath}) + return + } + if r.URL.Path == staticPath { + if tt.readmeStatus != http.StatusOK { + http.Error(w, "simulated non-200", tt.readmeStatus) + return + } + fmt.Fprint(w, tt.readmeBody) + return + } + } + http.Error(w, "unexpected path: "+r.URL.Path, http.StatusInternalServerError) + })) + defer srv.Close() + + for i := range tests { + tt := tests[i] + lastMetaPath.Store("") + t.Run(tt.name, func(t *testing.T) { + wantMetaPath := fmt.Sprintf("/api/v1/crates/%s/%s/readme", + url.PathEscape(tt.crateName), url.PathEscape(tt.version)) + + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + RegistryBaseURL: srv.URL, + Identifier: tt.crateName, + Version: tt.version, + } + + err := registries.ValidateCargoREADME(ctx, pkg, serverName) + + // Assert encode step: the validator must have requested exactly the + // url.PathEscape-encoded path; wrong encoding hits the fallback 500. + assert.Equal(t, wantMetaPath, lastMetaPath.Load().(string), + "meta request path must be exactly the url.PathEscape-encoded form") + + if tt.wantErr { + assert.Error(t, err) + for _, want := range tt.wantContains { + assert.Contains(t, err.Error(), want) + } + for _, notWant := range tt.wantNotContains { + assert.NotContains(t, err.Error(), notWant) + } + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/internal/validators/registries/export_test.go b/internal/validators/registries/export_test.go new file mode 100644 index 000000000..89878de24 --- /dev/null +++ b/internal/validators/registries/export_test.go @@ -0,0 +1,9 @@ +package registries + +// ValidateCargoREADME exposes the package-private validateCargoREADME to the +// external _test package so httptest-driven tests can exercise the README-fetch +// and mcp-name token-match pipeline against a mock server, bypassing the +// exact-baseURL guard that the public ValidateCargo enforces. +// +// Intended for cargo_test.go's positive-path and transient-error tests only. +var ValidateCargoREADME = validateCargoREADME diff --git a/pkg/model/constants.go b/pkg/model/constants.go index ead176d7f..98a3ba189 100644 --- a/pkg/model/constants.go +++ b/pkg/model/constants.go @@ -7,10 +7,12 @@ const ( RegistryTypeOCI = "oci" RegistryTypeNuGet = "nuget" RegistryTypeMCPB = "mcpb" + RegistryTypeCargo = "cargo" ) // Registry Base URLs - supported package registry base URLs const ( + RegistryURLCrates = "https://crates.io" RegistryURLGitHub = "https://github.com" RegistryURLGitLab = "https://gitlab.com" RegistryURLNPM = "https://registry.npmjs.org" diff --git a/tools/validate-examples/main.go b/tools/validate-examples/main.go index 2f8be56ff..4c48ae649 100644 --- a/tools/validate-examples/main.go +++ b/tools/validate-examples/main.go @@ -33,7 +33,7 @@ func main() { func runValidation() error { // Define what we validate and how - expectedServerJSONCount := 16 + expectedServerJSONCount := 17 targets := []validationTarget{ { path: filepath.Join("docs", "reference", "server-json", "generic-server-json.md"),