diff --git a/docs/modelcontextprotocol-io/package-types.mdx b/docs/modelcontextprotocol-io/package-types.mdx index 6cac890f..0367facc 100644 --- a/docs/modelcontextprotocol-io/package-types.mdx +++ b/docs/modelcontextprotocol-io/package-types.mdx @@ -87,6 +87,8 @@ This MCP server executes SQL queries and manages database connections. ``` +The `mcp-name:` token must be followed by a boundary — a newline, whitespace, an HTML tag, or the comment close `-->`. Keep it on its own line or inside ``; do not glue it directly to trailing characters such as a sentence-ending period (`…/database-query-mcp.`), which prevents the match. + ## NuGet Packages For NuGet packages, the MCP Registry currently supports the official NuGet registry (`https://api.nuget.org/v3/index.json`) only. @@ -125,6 +127,8 @@ This MCP server manages Azure DevOps work items and pipelines. ``` +The `mcp-name:` token must be followed by a boundary — a newline, whitespace, an HTML tag, or the comment close `-->`. Keep it on its own line or inside ``; do not glue it directly to trailing characters such as a sentence-ending period (`…/azure-devops-mcp.`), which prevents the match. + ## Cargo (Rust) Packages For Cargo packages, the MCP Registry currently supports the official crates.io registry (`https://crates.io`) only. diff --git a/internal/validators/registries/cargo.go b/internal/validators/registries/cargo.go index d90af778..5aa72b10 100644 --- a/internal/validators/registries/cargo.go +++ b/internal/validators/registries/cargo.go @@ -106,16 +106,37 @@ func cargoAllowedHosts(baseURL string) map[string]struct{} { return hosts } +// cargoURLAllowed reports whether u is a URL the validator may fetch for the +// given base. The host must be in allowedHosts; additionally, for the real +// crates.io base, the scheme must be https and the port must be the default — +// so a metadata response or redirect cannot downgrade to http or steer the +// validator at a non-standard port on an otherwise-trusted host. For test bases +// (httptest servers) only the host is checked, so mocks keep working. +func cargoURLAllowed(u *url.URL, baseURL string, allowedHosts map[string]struct{}) bool { + if _, ok := allowedHosts[u.Hostname()]; !ok { + return false + } + if baseURL == model.RegistryURLCrates { + if u.Scheme != "https" { + return false + } + if p := u.Port(); p != "" && p != "443" { + return false + } + } + return true +} + // newCargoHTTPClient builds the client used for all crates.io calls. The -// CheckRedirect policy pins every redirect hop to allowedHosts, so even though -// the initial URL is host-pinned, an upstream 3xx cannot redirect the validator -// to an unexpected host. -func newCargoHTTPClient(allowedHosts map[string]struct{}) *http.Client { +// CheckRedirect policy pins every redirect hop via cargoURLAllowed, so even +// though the initial URL is checked, an upstream 3xx cannot redirect the +// validator to an unexpected host, scheme, or port. +func newCargoHTTPClient(baseURL string, allowedHosts map[string]struct{}) *http.Client { return &http.Client{ Timeout: 10 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error { - if _, ok := allowedHosts[req.URL.Hostname()]; !ok { - return fmt.Errorf("refusing redirect to unexpected host %q", req.URL.Hostname()) + if !cargoURLAllowed(req.URL, baseURL, allowedHosts) { + return fmt.Errorf("refusing redirect to unexpected URL %q", req.URL.Redacted()) } if len(via) >= 10 { return errors.New("stopped after 10 redirects") @@ -125,37 +146,53 @@ func newCargoHTTPClient(allowedHosts map[string]struct{}) *http.Client { } } -// cargoVersionExists checks whether a specific crate version exists on crates.io, -// used to disambiguate a 403 from the README CDN. static.crates.io (S3) returns -// 403 both for a genuinely-missing crate/version AND for a crate that exists but -// has no rendered README, so a 403 alone cannot tell a publisher which it is. -// -// Returns (exists, determined): determined is false if the existence endpoint -// itself was unreachable or returned an unexpected status, in which case the -// caller should fall back to a generic message rather than assert existence. -func cargoVersionExists(ctx context.Context, client *http.Client, baseURL, identifier, version string) (exists, determined bool) { +// cargoVersionState is the outcome of probing the crate-version metadata +// endpoint, used to disambiguate a 403 from the README CDN. +type cargoVersionState int + +const ( + // cargoVersionUnknown: the probe returned an unexpected status we can't classify. + cargoVersionUnknown cargoVersionState = iota + // cargoVersionExists: the crate version exists (200). + cargoVersionExists + // cargoVersionMissing: the crate version does not exist (404). + cargoVersionMissing + // cargoVersionTransient: the probe failed for a retryable reason (network + // error, 429, or 5xx) — existence is undetermined and the caller should not + // report "not found". + cargoVersionTransient +) + +// probeCargoVersion checks whether a specific crate version exists on crates.io. +// static.crates.io (S3) returns 403 both for a genuinely-missing crate/version +// AND for a crate that exists but has no rendered README, so a 403 from the CDN +// alone cannot tell a publisher which it is; this probe disambiguates, while +// distinguishing a transient failure from a definitive missing/exists answer. +func probeCargoVersion(ctx context.Context, client *http.Client, baseURL, identifier, version string) cargoVersionState { versionURL := fmt.Sprintf("%s/api/v1/crates/%s/%s", baseURL, url.PathEscape(identifier), url.PathEscape(version)) req, err := http.NewRequestWithContext(ctx, http.MethodGet, versionURL, nil) if err != nil { - return false, false + return cargoVersionUnknown } req.Header.Set("User-Agent", cargoUserAgent) req.Header.Set("Accept", "application/json") resp, err := client.Do(req) if err != nil { - return false, false + return cargoVersionTransient } defer resp.Body.Close() - switch resp.StatusCode { - case http.StatusOK: - return true, true - case http.StatusNotFound: - return false, true + switch { + case resp.StatusCode == http.StatusOK: + return cargoVersionExists + case resp.StatusCode == http.StatusNotFound: + return cargoVersionMissing + case resp.StatusCode == http.StatusTooManyRequests, resp.StatusCode >= 500 && resp.StatusCode < 600: + return cargoVersionTransient default: - return false, false + return cargoVersionUnknown } } @@ -196,15 +233,21 @@ func cargoReadmeStatusError(ctx context.Context, client *http.Client, pkg model. // endpoint so the publisher gets an actionable message rather than a blanket // "not found". func cargoReadme403Error(ctx context.Context, client *http.Client, pkg model.Package, serverName string) error { - exists, determined := cargoVersionExists(ctx, client, pkg.RegistryBaseURL, pkg.Identifier, pkg.Version) - switch { - case determined && exists: - return fmt.Errorf("cargo package '%s' version '%s' exists on crates.io but has no rendered README. Add a README containing 'mcp-name: %s' and publish a new version", pkg.Identifier, pkg.Version, serverName) - case determined && !exists: + switch probeCargoVersion(ctx, client, pkg.RegistryBaseURL, pkg.Identifier, pkg.Version) { + case cargoVersionExists: + // The crate/version exists but the README CDN returned 403. The likely + // cause is a missing README, but a 403 is not definitive proof (e.g. a + // transient CDN/WAF block), so don't flatly assert "no README". + return fmt.Errorf("cargo package '%s' version '%s' exists on crates.io, but its rendered README could not be retrieved (status: 403). If it has no README, add one containing 'mcp-name: %s' and publish a new version", pkg.Identifier, pkg.Version, serverName) + case cargoVersionMissing: return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io", pkg.Identifier, pkg.Version) - default: - return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io (status: 403)", pkg.Identifier, pkg.Version) + case cargoVersionTransient: + return fmt.Errorf("crates.io could not confirm cargo package '%s' version '%s' (README status: 403, version check inconclusive) — likely transient, retry later", pkg.Identifier, pkg.Version) + case cargoVersionUnknown: + // Probe returned an unclassifiable status — fall through to the + // best-effort message below. } + return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io (status: 403)", pkg.Identifier, pkg.Version) } // validateCargoREADME performs the two-call README fetch and the mcp-name token @@ -213,7 +256,7 @@ func cargoReadme403Error(ctx context.Context, client *http.Client, pkg model.Pac // bypassing the exact-baseURL guard that ValidateCargo enforces for callers. func validateCargoREADME(ctx context.Context, pkg model.Package, serverName string) error { allowedHosts := cargoAllowedHosts(pkg.RegistryBaseURL) - client := newCargoHTTPClient(allowedHosts) + client := newCargoHTTPClient(pkg.RegistryBaseURL, allowedHosts) // Step 1: fetch the README pointer from the documented API endpoint. metaURL := fmt.Sprintf("%s/api/v1/crates/%s/%s/readme", @@ -246,14 +289,15 @@ func validateCargoREADME(ctx context.Context, pkg model.Package, serverName stri return fmt.Errorf("cargo package '%s' metadata response missing 'url' field", pkg.Identifier) } - // Pin the README pointer to an allowed host before fetching it, so a metadata - // response cannot steer the validator at an internal or attacker-chosen host. + // Pin the README pointer to an allowed host/scheme/port before fetching it, so + // a metadata response cannot steer the validator at an internal or + // attacker-chosen URL. readmeParsed, err := url.Parse(meta.URL) if err != nil || readmeParsed.Hostname() == "" { return fmt.Errorf("cargo package '%s': crates.io returned an unparseable README URL", pkg.Identifier) } - if _, ok := allowedHosts[readmeParsed.Hostname()]; !ok { - return fmt.Errorf("cargo package '%s': crates.io returned a README URL on unexpected host %q — refusing to fetch", pkg.Identifier, readmeParsed.Hostname()) + if !cargoURLAllowed(readmeParsed, pkg.RegistryBaseURL, allowedHosts) { + return fmt.Errorf("cargo package '%s': crates.io returned a README URL on an unexpected host/scheme %q — refusing to fetch", pkg.Identifier, readmeParsed.Redacted()) } // Step 2: fetch the rendered README from the (now host-validated) URL. @@ -287,5 +331,11 @@ func validateCargoREADME(ctx context.Context, pkg model.Package, serverName stri return nil } + // If the token IS present but glued to a trailing character, explain that + // rather than telling the publisher to add a token they can already see. + if trailing, glued := mcpNameTokenGluedTrailing(string(body), serverName); glued { + return fmt.Errorf("cargo package '%s' ownership validation failed: found 'mcp-name: %s' in the README, but it is immediately followed by %q rather than a boundary. The token must be followed by a space, newline, or an HTML tag — put it on its own line and publish a new version", pkg.Identifier, serverName, trailing) + } + return fmt.Errorf("cargo package '%s' ownership validation failed. The server name '%s' must appear as 'mcp-name: %s' in the package README", pkg.Identifier, serverName, serverName) } diff --git a/internal/validators/registries/cargo_internal_test.go b/internal/validators/registries/cargo_internal_test.go new file mode 100644 index 00000000..44273b53 --- /dev/null +++ b/internal/validators/registries/cargo_internal_test.go @@ -0,0 +1,47 @@ +package registries + +import ( + "net/url" + "testing" + + "github.com/modelcontextprotocol/registry/pkg/model" +) + +// TestCargoURLAllowed covers the SSRF allow-check: for the real crates.io base, +// the host must be allow-listed AND the scheme/port must be https/default; for a +// test (httptest) base only the host is checked so mocks keep working. +func TestCargoURLAllowed(t *testing.T) { + prodHosts := cargoAllowedHosts(model.RegistryURLCrates) // {crates.io, static.crates.io} + mockBase := "http://127.0.0.1:54321" + mockHosts := cargoAllowedHosts(mockBase) // {127.0.0.1} + + cases := []struct { + desc string + raw string + baseURL string + hosts map[string]struct{} + want bool + }{ + {"prod: https static.crates.io", "https://static.crates.io/readmes/x/x.html", model.RegistryURLCrates, prodHosts, true}, + {"prod: https crates.io", "https://crates.io/api/v1/crates/x/1.0.0", model.RegistryURLCrates, prodHosts, true}, + {"prod: http downgrade rejected", "http://static.crates.io/x", model.RegistryURLCrates, prodHosts, false}, + {"prod: non-default port rejected", "https://static.crates.io:8443/x", model.RegistryURLCrates, prodHosts, false}, + {"prod: explicit 443 ok", "https://static.crates.io:443/x", model.RegistryURLCrates, prodHosts, true}, + {"prod: foreign host rejected", "https://evil.example/x", model.RegistryURLCrates, prodHosts, false}, + {"prod: userinfo host is evil rejected", "https://static.crates.io@evil.example/x", model.RegistryURLCrates, prodHosts, false}, + {"test base: mock host any scheme/port ok", "http://127.0.0.1:54321/readme-static/x", mockBase, mockHosts, true}, + {"test base: foreign host rejected", "http://127.0.0.2:54321/x", mockBase, mockHosts, false}, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + u, err := url.Parse(tc.raw) + if err != nil { + t.Fatalf("parse %q: %v", tc.raw, err) + } + if got := cargoURLAllowed(u, tc.baseURL, tc.hosts); got != tc.want { + t.Fatalf("cargoURLAllowed(%q, base=%q) = %v, want %v", tc.raw, tc.baseURL, got, tc.want) + } + }) + } +} diff --git a/internal/validators/registries/cargo_test.go b/internal/validators/registries/cargo_test.go index d2c8128d..5328f452 100644 --- a/internal/validators/registries/cargo_test.go +++ b/internal/validators/registries/cargo_test.go @@ -340,6 +340,7 @@ func TestValidateCargoCombinedFixture(t *testing.T) { readmeStatus int readmeBody string versionExists bool // response for the /api/v1/crates/{n}/{v} existence probe (403 disambiguation) + versionProbe int // if non-zero, the existence probe returns this status (overrides versionExists) wantErr bool wantContains []string wantNotContains []string @@ -377,8 +378,9 @@ func TestValidateCargoCombinedFixture(t *testing.T) { wantNotContains: []string{"has no rendered README"}, }, { - // Crate/version exists but has no rendered README: CDN 403 + existence - // probe 200. Must NOT be reported as "not found". + // Crate/version exists but the README CDN 403s: existence probe 200. + // Must NOT be reported as "not found", and must not flatly assert the + // README is absent (a 403 isn't definitive proof of that). name: "readme_403_no_readme", crateName: "combined-readme403-noreadme", version: "0.1.0", @@ -386,7 +388,20 @@ func TestValidateCargoCombinedFixture(t *testing.T) { readmeStatus: http.StatusForbidden, versionExists: true, wantErr: true, - wantContains: []string{"has no rendered README"}, + wantContains: []string{"exists on crates.io", "could not be retrieved"}, + wantNotContains: []string{"not found"}, + }, + { + // CDN 403 + the existence probe itself is rate-limited (429): existence + // is undetermined, so report transient/retryable, NOT "not found". + name: "readme_403_probe_transient", + crateName: "combined-readme403-probe429", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusForbidden, + versionProbe: http.StatusTooManyRequests, + wantErr: true, + wantContains: []string{"transient"}, wantNotContains: []string{"not found"}, }, { @@ -421,6 +436,18 @@ func TestValidateCargoCombinedFixture(t *testing.T) { wantErr: true, wantContains: []string{"ownership validation failed"}, }, + { + // Token present but glued to a trailing period — the error must explain + // the boundary cause, not tell the publisher to add a token they can see. + name: "glued_trailing_period_explained", + crateName: "combined-glued", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusOK, + readmeBody: fmt.Sprintf("
mcp-name: %s.
", serverName), + wantErr: true, + wantContains: []string{"immediately followed by", `"."`}, + }, } // lastMetaPath captures the metadata request path seen by the handler so @@ -450,10 +477,13 @@ func TestValidateCargoCombinedFixture(t *testing.T) { } // Existence probe used to disambiguate a README 403. if r.URL.Path == versionPath { - if tt.versionExists { + switch { + case tt.versionProbe != 0: + http.Error(w, "simulated probe status", tt.versionProbe) + case tt.versionExists: w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(map[string]any{"version": map[string]string{"num": tt.version}}) - } else { + default: http.Error(w, "not found", http.StatusNotFound) } return diff --git a/internal/validators/registries/mcpname.go b/internal/validators/registries/mcpname.go index a757db3b..527aefb1 100644 --- a/internal/validators/registries/mcpname.go +++ b/internal/validators/registries/mcpname.go @@ -18,6 +18,28 @@ func isServerNameChar(c byte) bool { } } +// isMCPNameBoundary reports whether the content immediately following a matched +// server name terminates the token, so that the matched name is not merely a +// prefix of a longer declared name. +// +// A boundary is the end of content, any non-server-name character (whitespace, a +// newline, or an HTML tag delimiter such as `<`), or the start of an HTML comment +// close (`-->` / `--!>`). The comment-close case matters because PyPI and NuGet +// publishers commonly hide the token in ``, and authors +// (or minifiers) frequently omit the space before `-->`, producing +// ``. There the byte after NAME is `-`, which is a +// server-name character, so without this case the documented hidden-comment form +// would fail to validate. +func isMCPNameBoundary(rest string) bool { + if rest == "" { + return true + } + if !isServerNameChar(rest[0]) { + return true + } + return strings.HasPrefix(rest, "-->") || strings.HasPrefix(rest, "--!>") +} + // containsMCPNameToken reports whether the package README/description contains // the ownership token "mcp-name: