diff --git a/.cursorignore b/.cursorignore new file mode 100644 index 0000000..5717ef9 --- /dev/null +++ b/.cursorignore @@ -0,0 +1 @@ +.planning/ diff --git a/.github/INTEGRATION-TEST.md b/.github/INTEGRATION-TEST.md index bdc3218..82b67d5 100644 --- a/.github/INTEGRATION-TEST.md +++ b/.github/INTEGRATION-TEST.md @@ -1,141 +1,126 @@ -# Integration Test Checklist +# Manual MCP QA Runbook -Manual verification steps for mcp-server-python-docs v0.1.0. -These tests require human execution -- they cannot be automated. +Use this document for manual MCP validation during development and before a +release. The goal is to verify real client behavior after the automated test +suite passes. -## Prerequisites - -- [ ] All CI tests pass on main branch -- [ ] `mcp-server-python-docs doctor` reports all checks PASS -- [ ] Index is built: `mcp-server-python-docs build-index --versions 3.12,3.13` - -## Test 1: Claude Desktop Integration (SHIP-01) - -### Setup +Release-specific sign-off still lives in [`.github/RELEASE.md`](RELEASE.md). -1. Open Claude Desktop settings (Developer > Edit Config or `~/Library/Application Support/Claude/claude_desktop_config.json` on macOS) -2. Add the following to `mcpServers`: - ```json - { - "mcpServers": { - "python-docs": { - "command": "uvx", - "args": ["mcp-server-python-docs"] - } - } - } - ``` -3. Restart Claude Desktop -4. Verify the MCP server icon appears in the chat input area - -### Test Steps - -- [ ] **T1.1**: Ask Claude: "what is asyncio.TaskGroup" - - **Expected**: Response references `asyncio.TaskGroup` with a URI containing `library/asyncio-task.html` - - **Expected**: Response includes symbol information (not just generic LLM knowledge) - - **Actual result**: _______________ +## Prerequisites -- [ ] **T1.2**: Ask Claude: "how do I use pathlib.Path.glob" - - **Expected**: Response references `pathlib` documentation with relevant section content - - **Actual result**: _______________ +- CI or local checks are green: + - `uv run ruff check src/ tests/` + - `uv run pyright src/` + - `uv run pytest --tb=short -q` +- Local index build completed: + - `uv run mcp-server-python-docs build-index --versions 3.12,3.13` +- Doctor passes: + - `uv run mcp-server-python-docs doctor` +- If `uv` is not on `PATH`, use `python -m uv ...` instead -- [ ] **T1.3**: Ask Claude: "search for json parsing in Python" - - **Expected**: Response includes hits from the `json` module documentation - - **Actual result**: _______________ +## Test 1: MCP Inspector quick loop -- [ ] **T1.4**: Verify no errors in Claude Desktop developer console - - **Expected**: No MCP protocol errors or connection drops - - **Actual result**: _______________ +Use Inspector for fast local iteration before checking real clients. -### Teardown +### Start Inspector -- Remove the `python-docs` entry from `mcpServers` (or keep for ongoing use) +```bash +npx @modelcontextprotocol/inspector uv --directory . run mcp-server-python-docs +``` -## Test 2: Cursor Integration (SHIP-02) +### Verify + +- [ ] Connect successfully over stdio +- [ ] Confirm the tool list includes: + - `search_docs` + - `get_docs` + - `list_versions` + - `detect_python_version` +- [ ] Call `search_docs` with query `asyncio.TaskGroup`, `kind="symbol"`, `version="3.13"` + - Expected: exact symbol hit with `library/asyncio-task.html` +- [ ] Call `get_docs` for the returned slug and anchor + - Expected: section-level documentation, not an unrelated page dump +- [ ] Call `list_versions` + - Expected: indexed versions appear with the configured default version +- [ ] Call `detect_python_version` + - Expected: returns local interpreter information without breaking the session +- [ ] Observe no protocol corruption or unexplained disconnects in Inspector + +## Test 2: Claude Desktop integration ### Setup -1. Open Cursor Settings > MCP -2. Add a new MCP server: - - **Name**: python-docs - - **Command**: `uvx` - - **Args**: `mcp-server-python-docs` -3. Verify the server shows as connected (green indicator) - -### Test Steps - -- [ ] **T2.1**: In a chat or Composer session, ask: "what is asyncio.TaskGroup" - - **Expected**: Response references `asyncio.TaskGroup` with documentation content - - **Expected**: The MCP tool call is visible in the chat - - **Actual result**: _______________ - -- [ ] **T2.2**: Ask: "show me the docs for collections.OrderedDict" - - **Expected**: Response includes `collections.OrderedDict` documentation - - **Actual result**: _______________ +1. Open Claude Desktop settings +2. Add this server config: + +```json +{ + "mcpServers": { + "python-docs": { + "command": "uvx", + "args": ["mcp-server-python-docs"] + } + } +} +``` -- [ ] **T2.3**: Verify the server stays connected across multiple queries - - **Expected**: No disconnections or "server not responding" errors - - **Actual result**: _______________ +3. Fully restart Claude Desktop +4. Verify the MCP server appears in the chat UI -### Teardown +### Checks -- Remove or disable the python-docs MCP server in Cursor settings (or keep) +- [ ] Ask: `what is asyncio.TaskGroup` + - Expected: response uses stdlib documentation, not only model prior knowledge +- [ ] Ask: `how do I use pathlib.Path.glob` + - Expected: response cites the right docs section +- [ ] Ask: `search for json parsing in Python` + - Expected: response surfaces `json` docs results +- [ ] Check the Claude developer console + - Expected: no protocol errors or repeated reconnect loops -## Test 3: Fresh Install Verification (SHIP-06 partial) +## Test 3: Cursor integration ### Setup -1. Create a throwaway virtualenv or use a machine without the package: - ```bash - # Option A: Fresh venv - uv venv /tmp/test-install && source /tmp/test-install/bin/activate - - # Option B: Use uvx (isolated by default) - # No setup needed -- uvx creates its own isolated env - ``` +1. Open Cursor MCP settings +2. Add a server: + - Name: `python-docs` + - Command: `uvx` + - Args: `mcp-server-python-docs` +3. Confirm the server shows as connected -### Test Steps +### Checks -- [ ] **T3.1**: Install from PyPI (after package is published): - ```bash - uvx mcp-server-python-docs --version - ``` - - **Expected**: Prints `0.1.0` - - **Actual result**: _______________ +- [ ] Ask: `what is asyncio.TaskGroup` + - Expected: MCP tool usage is visible and the answer references the right docs +- [ ] Ask: `show me the docs for collections.OrderedDict` + - Expected: response includes the relevant documentation section +- [ ] Ask a second or third follow-up query + - Expected: the server stays connected across multiple calls -- [ ] **T3.2**: Build the index: - ```bash - uvx mcp-server-python-docs build-index --versions 3.12,3.13 - ``` - - **Expected**: Downloads objects.inv files, builds index, prints success message - - **Actual result**: _______________ +## Test 4: Fresh install verification -- [ ] **T3.3**: Run doctor: - ```bash - uvx mcp-server-python-docs doctor - ``` - - **Expected**: All checks PASS - - **Actual result**: _______________ +Use this when validating the published package or a clean local environment. -- [ ] **T3.4**: Verify the full README install flow works end-to-end - - **Expected**: Following README instructions from scratch produces a working server - - **Actual result**: _______________ - -### Teardown - -```bash -# Clean up throwaway venv if used -rm -rf /tmp/test-install -``` +On Windows, close the MCP client before rebuilding if the live index file is +locked. -## Sign-Off +### Checks -| Test | Pass/Fail | Tester | Date | -|------|-----------|--------|------| -| T1: Claude Desktop | | | | -| T2: Cursor | | | | -| T3: Fresh Install | | | | +- [ ] `uvx mcp-server-python-docs --version` + - Expected: prints the current package version +- [ ] `uvx mcp-server-python-docs build-index --versions 3.12,3.13` + - Expected: index build completes successfully +- [ ] `uvx mcp-server-python-docs doctor` + - Expected: all required checks pass +- [ ] Follow the README from scratch + - Expected: a new user can get to a working client configuration without using `.planning/` -**Release approved**: [ ] Yes / [ ] No -- needs fixes +## Evidence log -**Notes**: +| Test | Pass/Fail | Tester | Date | Notes | +|------|-----------|--------|------|-------| +| Inspector quick loop | | | | | +| Claude Desktop | | | | | +| Cursor | | | | | +| Fresh install | | | | | diff --git a/.gitignore b/.gitignore index b7faf40..ae0464d 100644 --- a/.gitignore +++ b/.gitignore @@ -195,11 +195,9 @@ cython_debug/ .pypirc # Cursor -# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to -# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data -# refer to https://docs.cursor.com/context/ignore-files -.cursorignore -.cursorindexingignore +# Keep repo-local Cursor context files tracked when present so the repository +# can define its own indexing boundaries. +# https://docs.cursor.com/context/ignore-files # Marimo marimo/_static/ diff --git a/.planning/FULL-CODEBASE-REVIEW.md b/.planning/FULL-CODEBASE-REVIEW.md index d1b9340..c6c50cc 100644 --- a/.planning/FULL-CODEBASE-REVIEW.md +++ b/.planning/FULL-CODEBASE-REVIEW.md @@ -37,6 +37,10 @@ findings: status: issues_found --- +> Archival note: this review is a historical snapshot from the generated GSD +> workflow. It may describe issues that have already been fixed and should not +> be treated as current repo truth. + # Full Codebase: Code Review Report **Reviewed:** 2026-04-16T12:00:00Z diff --git a/.planning/STATE.md b/.planning/STATE.md index 51613f4..c220fe4 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -14,6 +14,10 @@ progress: percent: 53 --- +> Archival note: this file is historical GSD state, not current repository +> truth. For active setup and workflow guidance, start with `README.md`, +> `CONTRIBUTING.md`, and `AGENTS.md`. + # Project State ## Project Reference diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..2236901 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,97 @@ +# Repository Guidance + +## Project + +`mcp-server-python-docs` is a read-only MCP server for the official Python +standard library documentation. It is built for end users who want precise, +version-aware stdlib answers inside MCP clients such as Claude, Cursor, and +Codex without relying on an external hosted docs API at query time. + +The repo's public credibility matters. Prefer changes that make the project +easier to trust, easier to verify, and easier to contribute to over changes +that merely add more AI or MCP setup. + +## Canonical Commands + +If `uv` is not installed yet: + +```bash +python -m pip install uv +``` + +Bootstrap the repo: + +```bash +uv sync --dev +``` + +If `uv` is not on your `PATH` after installation, reopen the shell or use +`python -m uv ...` as a fallback. + +Core verification commands: + +```bash +uv run ruff check src/ tests/ +uv run pyright src/ +uv run pytest --tb=short -q +``` + +Build and inspect a local docs index: + +```bash +uv run mcp-server-python-docs build-index --versions 3.12,3.13 +uv run mcp-server-python-docs doctor +uv run mcp-server-python-docs validate-corpus +``` + +Package smoke check: + +```bash +uv build +``` + +## MCP Testing Flow + +Use this order when validating MCP behavior: + +1. Run the automated checks above. +2. Use MCP Inspector for quick local iteration. +3. Confirm real-client behavior with the runbook in `.github/INTEGRATION-TEST.md`. + +Client-facing integration and release runbooks live here: + +- `.github/INTEGRATION-TEST.md` +- `.github/RELEASE.md` + +## Done Means + +Before calling work complete: + +- relevant lint, typecheck, and test commands have been run fresh +- user-facing docs reflect the current behavior +- MCP-related changes still work in the documented validation flow +- no runtime API/tool surface changes were made unless explicitly requested + +## AI and MCP Policy + +- Use official documentation first for MCP, OpenAI/Codex, and Python SDK questions. +- Avoid MCP sprawl. Do not add new MCP servers unless they clearly improve this + project's development or user experience. +- Do not add repo-local custom skills by default. Add one only if a repeated + workflow is genuinely painful and no strong public pattern already covers it. +- Follow existing test and documentation patterns before inventing new structure. + +## Context Hygiene + +Treat `.planning/` as archival project history, not live repo truth. + +Start with these files instead: + +- `README.md` +- `CONTRIBUTING.md` +- `.github/INTEGRATION-TEST.md` +- `tests/` + +The generated planning files may still be useful for maintainers who want the +old GSD workflow context, but they should not drive routine implementation +decisions. diff --git a/CLAUDE.md b/CLAUDE.md index 8c3dafc..f2e5da6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,223 +1,16 @@ - -## Project +# Claude Compatibility Notes -**mcp-server-python-docs** +Use `AGENTS.md` as the canonical repository guidance. -A read-only, version-aware, token-efficient MCP retrieval server over the Python standard library documentation. It runs over stdio, is backed by a locally-built SQLite + FTS5 index, and is distributed as a Python package installable via `uvx`. Its clients are LLMs (Claude, Cursor) — not browsers — so the optimization target is high-signal evidence per token spent. +This file is intentionally thin so Claude-compatible tools can find the repo +rules without carrying a second copy of project policy. -**Core Value:** LLMs can answer Python stdlib questions with precise, section-level evidence instead of flooding their context with full doc pages — closing a specific gap that general-purpose doc MCPs (Context7, DeepWiki) do not cover well for the Python stdlib. +Key points: -### Constraints - -- **Tech stack**: Python, official `mcp` package with `FastMCP`, SQLite + FTS5, `sphobjinv` — All four were chosen deliberately in the build guide; substitutions would invalidate the locked architecture. -- **Transport**: stdio only — Standard for local MCP servers; HTTP would force a much larger security and ops surface. -- **Storage**: SQLite with FTS5 compiled in — Some distro SQLite builds ship without FTS5; server must check at startup and fail loudly. Document `pysqlite3-binary` as a fallback dep. -- **Stdio protocol hygiene**: No `print()` to stdout from any code path — Stdout is reserved for MCP protocol messages. All logging goes to stderr. Any third-party library that might print to stdout must be neutralized before MCP starts speaking. -- **Package size**: Must fit under PyPI 100 MB limit — Forces `~/.cache/` first-run build; index cannot be bundled. -- **Read-only serving**: Server never writes to the index at runtime — Ingestion uses a separate writable handle; serving uses a read-only handle (`sqlite3.connect(path, uri=True)` with `?mode=ro`). -- **Distribution**: Must be runnable via `uvx mcp-server-python-docs` — This is the norm for MCP servers; `pyproject.toml` entry point is required. -- **Scope discipline**: Every v1.1 candidate stays in v1.1 — No scope creep from the "explicit non-goals" list in the build guide unless an explicit decision moves it back into Active. - - - -## Technology Stack - -## Validation Verdict Summary -| Locked Decision | Verdict | Headline | -|---|---|---| -| Official `mcp` SDK w/ `FastMCP` | **CONFIRMED + CAVEAT** | `mcp` 1.27.0; `FastMCP` still public and stable; Anthropic's own ref servers ironically use the *low-level* `Server` API. No action needed — FastMCP remains the right ergonomic pick for new projects. | -| `sphobjinv` over `objects.inv` | **CONFIRMED** | v2.4 (released 2026-03-23). Py 3.13 supported since 2.3.1.2. `Inventory(url=...)` + `.objects` iteration is still the blessed API. | -| SQLite + FTS5 (`unicode61 porter`) | **CONFIRMED** | Still the right primitive. `sqlite-vec` remains **pre-v1 (v0.1.9)** with explicit "expect breaking changes" warning — **defer to v1.1 stands**. | -| `pysqlite3-binary` fallback | **CONFIRMED with pin advice** | v0.5.4.post2 (2025-12-03). Linux x86-64 wheels only — **macOS/ARM users have no fallback**, so the startup FTS5 check must surface a clear error. | -| `uvx` distribution | **CONFIRMED** | Still the canonical MCP server install pattern. PEP 723 is for single-file scripts, not packaged distributions — not a competitor. | -| Sphinx JSON builder | **CONFIRMED with two gotchas** | JSON builder is alive in Sphinx 8.x/9.x. **Gotcha 1:** CPython's `Doc/Makefile` has **no `json` target** — you must invoke `sphinx-build -b json` directly after priming the venv. **Gotcha 2:** CPython 3.13 pins `sphinx<9.0.0`; 3.12 pins `sphinx~=8.2.0`. Build against each version's pinned Sphinx, not bleeding edge. | -| Python 3.12 + 3.13 as v0.1.0 targets | **CONFIRMED** | Both `objects.inv` artifacts verified live at `docs.python.org/{3.12,3.13}/objects.inv`. 3.13 is in full bugfix (until Oct 2026); 3.12 is security-only (until Oct 2028) — but doc artifacts stay published. | -## Recommended Stack -### Core Technologies -| Technology | Version | Purpose | Why Recommended | -|---|---|---|---| -| Python | **3.12, 3.13** (runtime) | Server + build-index CLI runtime | Matches PROJECT.md scope; 3.13 is full-bugfix until Oct 2026, 3.12 is security-only until 2028. Both are safe to target for a package published in Apr 2026. | -| `mcp` | `>=1.27.0,<2.0.0` | Official Python MCP SDK — provides `FastMCP`, `stdio_server`, tool/resource/prompt registration | Current PyPI release (2026-04-02). `FastMCP` stays at `from mcp.server.fastmcp import FastMCP` — import path unchanged. **Pin the major version.** The upstream repo has adopted a `main` = v2-development / `v1.x` = maintenance split as of v1.25 (Dec 2025), so `<2.0.0` is defensive. | -| `sphobjinv` | `>=2.4,<3.0` | Parse CPython `objects.inv` into symbol rows | v2.4 (2026-03-23) is current; API unchanged since 2.3.x. `Inventory(url=...)` + iteration over `.objects` yielding `DataObjStr(name, domain, role, uri, dispname)` is stable. | -| SQLite FTS5 | Bundled with CPython 3.12/3.13 | BM25 retrieval over sections / symbols / examples; external-content FTS5 tables | Still the right primitive for a read-only, single-machine, sub-100MB index. `unicode61 porter` tokenizer (for prose) and `unicode61` (for identifiers) remain the idiomatic choices. | -| Sphinx (build-time only) | `>=8.2,<9.0` for 3.12; `>=8.2,<9.0` for 3.13 | Build `.fjson` files from CPython source during `build-index` | **Match what CPython itself pins.** See "Sphinx version pinning by CPython branch" below. Sphinx 9.1.0 (Dec 2025) exists but CPython rejects it via `sphinx<9.0.0`. | -| `uv` / `uvx` | Latest stable from astral-sh | Primary distribution mechanism — `uvx mcp-server-python-docs` | Unchanged as the MCP server install norm. Anthropic's own reference servers (mcp-server-git, mcp-server-time, mcp-server-fetch) all ship this way. | -### Supporting Libraries -| Library | Version | Purpose | When to Use | -|---|---|---|---| -| `pydantic` | `>=2.0.0,<3.0` | Tool input/output models; MCP SDK already requires it | Always — the guide references Pydantic models in §13 Package structure. Reuse the Pydantic v2 version MCP pins transitively. | -| `click` | `>=8.1.7,<9.0` | CLI subcommand dispatch (`serve`, `build-index`, `validate-corpus`) | Matches `mcp-server-git`'s pattern; avoids DIY `argparse` for 3 subcommands with overlapping flags. | -| `pysqlite3-binary` | `>=0.5.4.post2` | **OPTIONAL** fallback when system SQLite lacks FTS5 | **Linux x86-64 only** — no macOS/ARM wheels. Treat as opt-in via extras (`pip install mcp-server-python-docs[pysqlite3]`), and make the startup check error tell users what to do on non-Linux. | -| `PyYAML` | `>=6.0,<7.0` | Read `data/synonyms.yaml` curated concept-expansion table | Once, at build-index time, to populate the `synonyms` SQLite table. | -### Development Tools -| Tool | Purpose | Notes | -|---|---|---| -| `uv` | Dependency resolution, virtualenv management, build frontend | Use `uv sync` during dev; `uv build` to produce sdist/wheel for PyPI. | -| `hatchling` | Build backend declared in `pyproject.toml` `[build-system]` | Same choice as `mcp-server-time` and `mcp-server-git`; zero-config, fast. | -| `pytest` + `pytest-asyncio` | Unit / storage / ingestion / smoke tests | Stability tests (structural, not golden) per guide §14. | -| `ruff` | Lint + format | Matches Anthropic reference servers' convention. | -| `pyright` | Type checking | Matches Anthropic reference servers' convention. Protects the typed Pydantic tool schemas FastMCP generates. | -| `freezegun` | Freeze time in ingestion/atomic-swap tests | Useful for `build-{timestamp}.db` naming and `ingestion_runs.started_at` assertions. | -## Installation -# Project dev bootstrap (one-time) -# Add a new dep -# Dev-only -# Build-time only (used by build-index CLI, not at serve-time) -# NOTE: Sphinx is NOT a runtime dep of the served package. It is installed -# into an isolated venv by the build-index CLI when ingesting CPython source, -# OR declared as an optional extra: mcp-server-python-docs[build] -# Optional FTS5 fallback (Linux x86-64 only) -### `pyproject.toml` entry point (verified pattern) -## Alternatives Considered -| Recommended | Alternative | When to Use Alternative | -|---|---|---| -| Official `mcp` SDK's `FastMCP` (`from mcp.server.fastmcp import FastMCP`) | Standalone `fastmcp` package (PrefectHQ, v3.2.4) | **Not for this project.** The standalone `fastmcp` (Jeremiah Lowin / PrefectHQ) is a separate v3 line now — richer feature set, "70% of MCP servers across all languages" claim, migration guides from `mcp.server.fastmcp`. **But the PROJECT.md decision explicitly pins "official `mcp` package with `FastMCP`"** and Anthropic's reference servers use `mcp`, so the ecosystem validator signal is clear: stay on `mcp`. Re-evaluate at v1.1 only if FastMCP in `mcp` lags features the guide needs. | -| `FastMCP` high-level API | Low-level `mcp.server.Server` + `mcp.server.stdio.stdio_server` | **Ironic note:** Anthropic's own reference servers (`mcp-server-git`, `mcp-server-time`, `mcp-server-fetch`) use the **low-level** `Server` class, not `FastMCP`. They give up decorator-based schema generation in exchange for handler control. **Recommendation: stick with `FastMCP`** — the guide's architectural simplicity gain (3 tools, typed Pydantic schemas, zero tool-registry boilerplate) is worth more than parity with reference servers for a 3-tool scope. If FastMCP later blocks a critical feature, the guide's 3-service layer makes dropping to the low-level API mechanical. | -| SQLite FTS5 BM25 + synonym table | `sqlite-vec` hybrid (BM25 + vector) | **Defer to v1.1 as the guide says.** Current `sqlite-vec` is **v0.1.9 (2026-03-31)** and the project README still states "pre-v1, so expect breaking changes." No stable v1 timeline announced. Pinning a pre-v1 C extension in a package distributed via `uvx` is a recipe for "works on my machine" issues. Schema already reserves room for it — no migration cost to add later. | -| System SQLite | `pysqlite3-binary` | Fallback only. `pysqlite3-binary` ships **Linux x86-64 wheels only**, so macOS and Linux ARM users cannot fall back this way. Keep the startup FTS5 capability check, but phrase the error message with platform-specific guidance. | -| Sphinx JSON builder (primary, no HTML fallback) | HTML scraping with `beautifulsoup4` | **Out of scope for v1 per PROJECT.md.** Not researched. Noted only to explain the absence — the build guide originally listed BS4 as a fallback; PROJECT.md cut it. | -| `uvx` | `pipx`, `pip install --user`, PEP 723 single-file scripts | `pipx install` is documented as a secondary path for persistent installs. **PEP 723** (accepted; embedded script metadata) is **not a competitor** — it targets single-file scripts that explicitly don't want a `pyproject.toml`. Our project is multi-file, PyPI-distributed, typed; pyproject.toml + `uvx` entry-point remains right. | -## What NOT to Use -| Avoid | Why | Use Instead | -|---|---|---| -| `Sphinx>=9.0.0` when building CPython 3.12 or 3.13 docs | Both `cpython/main` and `cpython/3.13` branches pin `sphinx<9.0.0` in `Doc/requirements.txt`. `cpython/3.12` pins `sphinx~=8.2.0`. Using Sphinx 9.x will produce build warnings or hard failures on CPython's configured extensions. | Pin Sphinx to match what the CPython branch you're ingesting pins. Per-version Sphinx version: 3.12 → `sphinx~=8.2.0`; 3.13 → `sphinx>=8.2,<9.0`. The `build-index` CLI should install Sphinx into an isolated venv based on the target version. | -| `make json` via CPython's `Doc/Makefile` | **There is no `json` target in `Doc/Makefile`.** Only html, htmlhelp, latex, text, texinfo, epub, changes, linkcheck, coverage, doctest, pydoc-topics, gettext. A naive `cd Doc && make json` will fail. | Use the Makefile's `venv` target to create `Doc/venv/` with the pinned Sphinx and deps, then invoke `sphinx-build -b json . build/json` directly: `./venv/bin/sphinx-build -b json . build/json`. Or call `Doc/venv/bin/python -m sphinx -b json ...`. | -| `mcp<1.23.0` | `main` became v2-development as of v1.25, `v1.x` is the maintenance branch. Older 1.x versions predate the SDK's current lifespan / tool-registration patterns. | Pin `>=1.27.0,<2.0.0`. v1.27.0 is the current head of the v1 line (released 2026-04-02). | -| Global `sys.stdout` usage anywhere on the serve-time import path | Stdout is the MCP protocol channel. Any `print()` corrupts JSON-RPC frames and disconnects the client. | Guide §9 protocol hygiene stands verbatim. Reinforce with a CI check: `ruff` rule or a pytest that spawns the server as a subprocess and asserts no non-JSON-RPC bytes reach stdout. | -| `sphinx_rtd_theme` or any HTML theme dep | Irrelevant for JSON output — pure bloat if added "just in case." | Install only `sphinx` + the extensions CPython's `Doc/conf.py` actually imports. Most are custom extensions under `Doc/Tools/extensions/` and come with the CPython source itself. | -| `sqlite-vec` in v0.1.0 | Pre-v1, explicit breaking-change warning, no macOS/ARM wheel guarantees on every release. | Synonym table per guide §6; revisit in v1.1 with usage data. | -## Stack Patterns by Variant -- Detect at server startup via the `assert_fts5_available` probe from guide §9. -- On Linux x86-64: error message points user to `pip install 'mcp-server-python-docs[pysqlite3]'`. -- On macOS / Linux ARM / Windows: error message points user to install Python from python.org or `uv python install`, which ship with FTS5-enabled SQLite. -- **Do not** silently swap to `pysqlite3_binary` in code — the guide's "fail loudly at startup" rule is right. -- Out of scope for v0.1.0. The `doc_sets` table supports it schematically; the ingestion CLI just needs to accept the version string. -- **Note:** 3.14 is out (released 2025-10-07, 3.14.4 was released 2026-04-07) but PROJECT.md defers it to post-v0.1.0 because "3.14 still moving, broader is premature." That rationale was correct in the build guide's original writing and is still defensible — 3.14's `objects.inv` is available, but running CPython's doc build with a compatible Sphinx is an untested path here. -- The old regression (issue #11615, Sphinx 7.2.0 breakage) is **closed**. No known current blockers on Sphinx 8.2.x building CPython 3.13 docs to JSON. -- But: multilingual builds have a known bug (sphinx issue #13448) where translation caches leak across languages. **Our English-only path is not affected.** -- Mitigation: `validate-corpus` CLI should spot-check a known section (e.g., `library/asyncio-task.html` → `asyncio.TaskGroup`) after every build and fail the atomic swap if missing. -## Version Compatibility Matrix -| Package | Version | Compatible With | Notes | -|---|---|---|---| -| `mcp` | 1.27.0 | Python 3.10–3.13 | We require ≥3.12; any 3.10/3.11 compat in `mcp` is irrelevant to us. | -| `sphobjinv` | 2.4 | Python 3.13 added in 2.3.1.2 | Stable iteration API (`Inventory.objects` → `DataObjStr`). | -| `Sphinx` | 8.2.x | Python ≥3.11 | Required for CPython 3.12 (pinned `~=8.2.0`) and 3.13 (pinned `<9.0.0`). | -| `Sphinx` | 9.1.0 | Python ≥3.12 | **Do not use** — CPython branches reject it. | -| `pysqlite3-binary` | 0.5.4.post2 | Python 3.8–3.14 | Binary wheels **Linux x86-64 only**. No source dist. | -| `sqlite-vec` | 0.1.9 | Python 3.x via `pip install sqlite-vec` | **Pre-v1, expect breakage** — do not ship in v0.1.0. | -| `pydantic` | 2.x | Transitive via `mcp` | MCP SDK requires Pydantic v2; reuse. | -## Per-Component Deep Dive -### 1. Official `mcp` SDK with `FastMCP` → **CONFIRMED + CAVEAT** -- PyPI: `mcp` 1.27.0 released 2026-04-02, requires Python ≥3.10, MIT, maintained by Anthropic (David Soria Parra). -- Context7 (`/modelcontextprotocol/python-sdk`): `from mcp.server.fastmcp import FastMCP` is still the canonical import; `@mcp.tool()` decorator, `@mcp.resource("...")`, `@mcp.prompt()` unchanged. -- Lifespan pattern confirmed: `@asynccontextmanager async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]` with `mcp = FastMCP("My App", lifespan=app_lifespan)`. **This is exactly the pattern our 3-service DI wiring needs.** -- Transport: `mcp.run()` with no args defaults to stdio; `mcp.run(transport="streamable-http", ...)` exists for future HTTP (out of scope). -- `mcp-server-git` 0.6.2 → `from mcp.server import Server` + `from mcp.server.stdio import stdio_server` -- `mcp-server-time` 0.6.2 → same pattern -- `mcp-server-fetch` 0.6.3 → same pattern -### 2. `sphobjinv` → **CONFIRMED** -- PyPI: v2.4 released 2026-03-23. Actively maintained (20 releases total). -- GitHub: `bskinn/sphobjinv`. Python 3.13 support was added in v2.3.1.2. -- API: `Inventory(url="https://docs.python.org/3.13/objects.inv")` returns an inventory whose `.objects` is an iterable of `DataObjStr` with `.name`, `.domain`, `.role`, `.uri`, `.dispname`, `.priority`. Matches guide §8 Tier 1 snippet exactly. -### 3. SQLite + FTS5 (`unicode61 porter`) → **CONFIRMED** -- `sqlite-vec` v0.1.9 (2026-03-31) — still **pre-v1**, project README still says "expect breaking changes!" -- No v1 roadmap date announced as of research date. -- The guide's §6 "defer to v1.1" call remains correct. Schema leaves room for an `embedding` column on `sections` — no migration needed when v1.1 revisits. -- `unicode61 porter` for `sections_fts` (prose — porter stemming helps "parse/parses/parsing" collapse) -- `unicode61` (no porter) for `symbols_fts` and `examples_fts` (identifiers — stemming would mangle `asyncio.TaskGroup` into `asyncio.taskgroup`) -### 4. `pysqlite3-binary` fallback → **CONFIRMED with an important caveat** -- On Linux x86-64 without FTS5 → "install `mcp-server-python-docs[pysqlite3]`" is a valid recovery path. -- On macOS / Linux ARM / Windows → `pysqlite3-binary` won't install. The error message must say so and point users to install a Python that bundles FTS5 (python.org builds, `uv python install`, or Homebrew Python all have FTS5 enabled). -### 5. `uvx` distribution → **CONFIRMED** -- All three Anthropic-maintained Python reference servers (`mcp-server-git`, `mcp-server-time`, `mcp-server-fetch`) ship via `uvx` as the primary method. -- Claude Desktop config example `{"command": "uvx", "args": ["mcp-server-python-docs"]}` is the canonical pattern. -- `[project.scripts] mcp-server-python-docs = "mcp_server_python_docs.__main__:main"` is the idiomatic entry-point declaration (verified against `mcp-server-time`'s `pyproject.toml`). -### 6. Sphinx JSON builder → **CONFIRMED with two gotchas** -# OR: -- `cpython/3.12/Doc/requirements.txt` → `sphinx~=8.2.0` -- `cpython/3.13/Doc/requirements.txt` → `sphinx<9.0.0` -- `cpython/main/Doc/requirements.txt` → `sphinx<9.0.0` -### 7. Python 3.12 + 3.13 as v0.1.0 targets → **CONFIRMED** -- `https://docs.python.org/3.12/objects.inv` → returns 200, 135.1 KB, zlib-compressed Sphinx inventory v2 -- `https://docs.python.org/3.13/objects.inv` → returns 200, 142.5 KB, zlib-compressed Sphinx inventory v2 -- Python 3.13.12 (full bugfix mode until Oct 2026) and 3.12.13 are current patch releases. -- Python 3.14 is out (3.14.4 released 2026-04-07) but **PROJECT.md correctly defers** until 3.14's doc build is proven with a stable Sphinx pin. -- 3.12: security-only until Oct 2028 -- 3.13: full-bugfix until Oct 2026, then security-only until 2029 -## Sources -### High confidence (Context7 + PyPI + official repos, dual-verified) -- `/modelcontextprotocol/python-sdk` (Context7) — FastMCP decorator API, lifespan context, stdio transport, mcp.run() signatures -- [PyPI: mcp 1.27.0](https://pypi.org/project/mcp/1.27.0/) — release date 2026-04-02, Python ≥3.10, Anthropic maintained -- [PyPI: mcp (index)](https://pypi.org/project/mcp/) — 1.27.0 current stable, FastMCP still recommended entry point -- [PyPI: sphobjinv](https://pypi.org/project/sphobjinv/) — v2.4 (2026-03-23), actively maintained -- [GitHub: bskinn/sphobjinv releases](https://github.com/bskinn/sphobjinv/releases) — v2.4 changelog confirms no breaking API changes from 2.3.x -- [PyPI: Sphinx 8.2.3](https://pypi.org/project/Sphinx/8.2.3/) — release 2025-03-02, requires Python ≥3.11, JSON builder still supported -- [PyPI: Sphinx (index)](https://pypi.org/project/Sphinx/) — 9.1.0 current but not usable for CPython 3.12/3.13 -- [PyPI: pysqlite3-binary](https://pypi.org/project/pysqlite3-binary/) — v0.5.4.post2 (2025-12-03), Linux x86-64 wheels only -- [GitHub: asg017/sqlite-vec](https://github.com/asg017/sqlite-vec) — v0.1.9 (2026-03-31), pre-v1, "expect breaking changes!" -- [cpython/main/Doc/requirements.txt](https://github.com/python/cpython/blob/main/Doc/requirements.txt) — `sphinx<9.0.0` pinning confirmed -- [cpython/3.13/Doc/requirements.txt](https://github.com/python/cpython/blob/3.13/Doc/requirements.txt) — `sphinx<9.0.0` -- [cpython/3.12/Doc/requirements.txt](https://github.com/python/cpython/blob/3.12/Doc/requirements.txt) — `sphinx~=8.2.0` -- [cpython/main/Doc/Makefile](https://github.com/python/cpython/blob/main/Doc/Makefile) — no json target -- [cpython/3.13/Doc/Makefile](https://raw.githubusercontent.com/python/cpython/3.13/Doc/Makefile) — no json target -- [cpython/3.13/Doc/conf.py](https://raw.githubusercontent.com/python/cpython/3.13/Doc/conf.py) — custom extension list (audit_events, availability, c_annotations, etc.) -- [mcp-server-time pyproject.toml](https://raw.githubusercontent.com/modelcontextprotocol/servers/main/src/time/pyproject.toml) — canonical uvx entry-point pattern -- [mcp-server-git pyproject.toml](https://raw.githubusercontent.com/modelcontextprotocol/servers/main/src/git/pyproject.toml) — depends on `mcp`, NOT `fastmcp` -- [mcp-server-fetch pyproject.toml](https://raw.githubusercontent.com/modelcontextprotocol/servers/main/src/fetch/pyproject.toml) — depends on `mcp>=1.1.3` -- [mcp-server-time server.py](https://raw.githubusercontent.com/modelcontextprotocol/servers/main/src/time/src/mcp_server_time/server.py) — uses low-level `Server`, not FastMCP -- [mcp-server-git server.py](https://raw.githubusercontent.com/modelcontextprotocol/servers/main/src/git/src/mcp_server_git/server.py) — uses low-level `Server`, not FastMCP -- [docs.python.org/3.13/objects.inv](https://docs.python.org/3.13/objects.inv) — 200 OK, 142.5 KB, Sphinx inventory v2 -- [docs.python.org/3.12/objects.inv](https://docs.python.org/3.12/objects.inv) — 200 OK, 135.1 KB, Sphinx inventory v2 -### Medium confidence (WebSearch findings with corroboration) -- [sphinx-doc/sphinx#11615](https://github.com/sphinx-doc/sphinx/issues/11615) — closed (JSON builder regression fixed post-7.2.0) -- [sphinx-doc/sphinx#13448](https://github.com/sphinx-doc/sphinx/issues/13448) — open, multilingual-only, does not affect us -- [PEP 723](https://peps.python.org/pep-0723/) — Final (2024-01-08), single-file script metadata, not a pyproject.toml competitor -- [FastMCP standalone (PyPI)](https://pypi.org/project/fastmcp/) — v3.2.4 (2026-04-14), separate from `mcp.server.fastmcp` -- [GitHub: jlowin/fastmcp](https://github.com/jlowin/fastmcp) — standalone project; "powers 70% of MCP servers" claim -- [uv tools guide](https://docs.astral.sh/uv/guides/tools/) — `uvx` alias for `uv tool run`; still canonical in 2026 -- [Python 3.13 release cycle](https://peps.python.org/pep-0745/) — bugfix until Oct 2026 -- [endoflife.date/python](https://endoflife.date/python) — 3.12 security-only, 3.13 bugfix, 3.14 out -### Low confidence (single-source, flagged for validator) -- None. Every load-bearing claim above has at least two independent sources (Context7 + PyPI, or official repo + PyPI, or reference server + PyPI). -## Implications for Roadmap - - - -## Conventions - -Conventions not yet established. Will populate as patterns emerge during development. - - - -## Architecture - -Architecture not yet mapped. Follow existing patterns found in the codebase. - - - -## Project Skills - -No project skills found. Add skills to any of: `.claude/skills/`, `.agents/skills/`, `.cursor/skills/`, or `.github/skills/` with a `SKILL.md` index file. - - - -## GSD Workflow Enforcement - -Before using Edit, Write, or other file-changing tools, start work through a GSD command so planning artifacts and execution context stay in sync. - -Use these entry points: -- `/gsd-quick` for small fixes, doc updates, and ad-hoc tasks -- `/gsd-debug` for investigation and bug fixing -- `/gsd-execute-phase` for planned phase work - -Do not make direct repo edits outside a GSD workflow unless the user explicitly asks to bypass it. - - - - - -## Developer Profile - -> Profile not yet configured. Run `/gsd-profile-user` to generate your developer profile. -> This section is managed by `generate-claude-profile` -- do not edit manually. - +- Start with `README.md` and `CONTRIBUTING.md` for current repo truth. +- Treat `.planning/` as archival context, not live instructions. +- Use official docs first for MCP, OpenAI/Codex, and Python SDK behavior. +- Do not add extra MCP servers or repo-local custom skills unless there is a + clear, repeated project need. +- Use `.github/INTEGRATION-TEST.md` for manual MCP QA and `.github/RELEASE.md` + for release-specific steps. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..94e2a65 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,82 @@ +# Contributing to mcp-server-python-docs + +Start here for current contributor workflow. You should not need `.planning/` +to set up, test, or validate the repo. + +## 1. Install tooling + +Install `uv` if you do not already have it: + +```bash +python -m pip install uv +``` + +Then bootstrap the repo: + +```bash +uv sync --dev +``` + +If `uv` is not on your `PATH` after installation, reopen the shell or use +`python -m uv ...` as a fallback. + +## 2. Run the standard checks + +Use the same commands the CI workflow runs: + +```bash +uv run ruff check src/ tests/ +uv run pyright src/ +uv run pytest --tb=short -q +``` + +If you are working on retrieval behavior specifically, the curated regression +suite is: + +```bash +uv run pytest tests/test_retrieval_regression.py -q +``` + +## 3. Build a local docs index + +The server needs a local SQLite index before runtime validation: + +```bash +uv run mcp-server-python-docs build-index --versions 3.12,3.13 +uv run mcp-server-python-docs doctor +uv run mcp-server-python-docs validate-corpus +``` + +`build-index` downloads the symbol inventories, clones CPython docs sources, +runs the Sphinx JSON build, and writes the local cache database. + +## 4. Validate MCP behavior + +Use this validation order: + +1. Run the automated checks. +2. Use MCP Inspector for fast local iteration. +3. Confirm client behavior in Claude Desktop and Cursor. + +The detailed manual runbook lives in +[`.github/INTEGRATION-TEST.md`](.github/INTEGRATION-TEST.md). + +## 5. Package and release checks + +For a local package smoke check: + +```bash +uv build +``` + +For release workflow details, PyPI trusted publishing setup, and the full +release checklist, see [`.github/RELEASE.md`](.github/RELEASE.md). + +## Project conventions + +- Keep the MCP tool surface small and read-only unless a change is explicitly + justified. +- Prefer official docs and primary sources over community summaries when + working on MCP/OpenAI/Python SDK behavior. +- Do not add repo-local custom skills by default. +- Do not treat `.planning/` as live repo truth. It is archival project history. diff --git a/README.md b/README.md index 0a20658..0456258 100644 --- a/README.md +++ b/README.md @@ -3,39 +3,44 @@ [![CI](https://github.com/ayhammouda/python-docs-mcp-server/actions/workflows/ci.yml/badge.svg)](https://github.com/ayhammouda/python-docs-mcp-server/actions/workflows/ci.yml) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue)](https://www.python.org/) -[![No API Keys](https://img.shields.io/badge/API%20keys-none-success)](README.md#why-teams-like-this) +[![No API Keys](https://img.shields.io/badge/API%20keys-none-success)](#why-teams-like-this) [![Official Python Docs](https://img.shields.io/badge/source-official%20python%20docs-informational)](https://docs.python.org/3/) -A corporate-friendly, read-only, version-aware MCP server for Python standard library documentation, optimized for low-token, section-level retrieval. +A read-only, version-aware MCP server for Python standard library +documentation, optimized for low-token, section-level retrieval. -It gives Claude, Cursor, and other MCP clients precise answers to Python stdlib questions without dumping whole documentation pages into the context window, without requiring API keys, and without depending on a hosted docs provider at query time. +It gives Claude, Cursor, Codex, and other MCP clients precise stdlib answers +without dumping whole documentation pages into the context window, without API +keys, and without depending on a hosted docs provider at query time. ## Why this exists -General-purpose doc retrieval is often noisy for Python stdlib questions: +General-purpose docs retrieval is often noisy for Python stdlib questions: - symbol lookups like `asyncio.TaskGroup` need exact resolution - answers should be version-aware (`3.12` vs `3.13`) - full-page fetches waste tokens when one section is enough -- official Python docs are the source of truth, but not packaged for MCP out of the box +- official Python docs are the source of truth, but they are not packaged for + MCP out of the box -This server builds a local index from official Python documentation and exposes a small MCP tool surface tuned for high-signal retrieval. +This server builds a local index from the official Python documentation and +exposes a small MCP tool surface tuned for high-signal retrieval. ## Why teams like this - no API keys to provision, rotate, or justify -- official Python docs as the source of truth -- local index, so runtime retrieval does not depend on a third-party hosted docs API +- official Python docs are the source of truth +- local index, so runtime retrieval does not depend on a third-party hosted API - read-only behavior with a simple security story -- easy to explain in corporate environments where external dependencies raise friction +- easy to explain in environments where external dependencies raise friction ## What you get - exact symbol lookup from Python `objects.inv` - section-level retrieval with truncation and pagination -- local SQLite + FTS5 index, no runtime web scraping +- local SQLite + FTS5 index with no runtime web scraping - version-aware results across indexed Python versions -- read-only MCP tools with deterministic behavior +- a deliberately small, read-only MCP tool surface ## Quick example @@ -46,35 +51,49 @@ This server builds a local index from official Python documentation and exposes **Typical flow** 1. `search_docs("asyncio.TaskGroup", kind="symbol", version="3.13")` -2. call `get_docs(...)` using the slug and anchor returned by the best hit +2. Call `get_docs(...)` using the slug and anchor returned by the best hit **Result** -The model gets the exact symbol match and the relevant documentation section instead of a full-page dump. +The model gets the exact symbol match and the relevant documentation section +instead of a full-page dump. ## Install +Run it directly with `uvx`: + ```bash -uvx mcp-server-python-docs +uvx mcp-server-python-docs --version ``` -Or for a persistent install: +Or install it persistently: ```bash pipx install mcp-server-python-docs ``` -## First Run +If `uv` is installed but the `uv` command is not on your `PATH`, reopen your +shell or use `python -m uv ...` as a fallback for local contributor commands. + +## First run -After installing, build the documentation index: +Build the local documentation index: + +```bash +uvx mcp-server-python-docs build-index --versions 3.12,3.13 +``` + +If you installed the package persistently, you can drop the `uvx` prefix: ```bash mcp-server-python-docs build-index --versions 3.12,3.13 ``` -This downloads Python's `objects.inv` symbol inventories, clones CPython source for each version, runs `sphinx-build -b json` to produce structured docs, and writes an SQLite index to your local cache (~200 MB). The build takes 5-15 minutes depending on your machine and network speed. +This downloads Python's `objects.inv` files, clones CPython docs sources, runs +`sphinx-build -b json`, and writes an SQLite index to your local cache. Expect +the first build to take several minutes. -## Configure Your MCP Client +## Configure your MCP client ### Claude Desktop @@ -84,7 +103,7 @@ Add this to your Claude Desktop configuration file: **Linux:** `~/.config/Claude/claude_desktop_config.json` -**Windows:** `%APPDATA%\Claude\claude_desktop_config.json` +**Windows:** `%APPDATA%\\Claude\\claude_desktop_config.json` ```json { @@ -101,7 +120,8 @@ Restart Claude Desktop after editing the config file. ### Cursor -Add to your Cursor MCP settings (`.cursor/mcp.json` in your project or global settings): +Add this to your Cursor MCP settings (`.cursor/mcp.json` in your project or +global settings): ```json { @@ -114,6 +134,34 @@ Add to your Cursor MCP settings (`.cursor/mcp.json` in your project or global se } ``` +### Codex + +Add this to `.codex/config.toml`: + +```toml +[mcp_servers.python-docs] +command = "uvx" +args = ["mcp-server-python-docs"] +``` + +## How quality is verified + +The repo makes quality visible with reproducible checks instead of relying on +marketing claims. + +- CI runs `ruff`, `pyright`, and `pytest` on macOS and Linux for Python 3.12 + and 3.13 +- subprocess-based stdio hygiene and smoke tests protect the MCP protocol pipe +- packaging tests verify the wheel contents and CLI entry points +- curated retrieval regression tests cover exact symbol hits, version behavior, + missing symbols, truncation, and local-version defaults +- manual MCP QA is documented in + [`.github/INTEGRATION-TEST.md`](.github/INTEGRATION-TEST.md), with MCP + Inspector as the fast-feedback loop and Claude/Cursor as real-client checks + +Contributor commands and validation steps live in +[`CONTRIBUTING.md`](CONTRIBUTING.md). + ## Tools The server currently exposes four MCP tools: @@ -122,29 +170,37 @@ The server currently exposes four MCP tools: |------|-------------| | `search_docs` | Search Python stdlib docs by query. Supports symbol lookup (`asyncio.TaskGroup`), module search (`json`), and free-text search. Returns ranked hits with BM25 scoring and snippet excerpts. | | `get_docs` | Retrieve a specific documentation page or section by slug and optional anchor. Returns markdown content with budget-enforced truncation and pagination. | -| `list_versions` | List all indexed Python versions with their metadata. | -| `detect_python_version` | Detect the user's local Python version and report whether it matches an indexed documentation version. Helpful when `get_docs` defaults to the local runtime version. | +| `list_versions` | List all indexed Python versions with metadata. | +| `detect_python_version` | Detect the user's local Python version and report whether it matches an indexed documentation version. | -The core docs surface is still intentionally small: search, retrieve, and inspect available versions. `detect_python_version` is a convenience helper for local workflows. +## When to use this instead of generic docs retrieval -## Positioning +Use this server when you need: -If you're evaluating whether this is useful in practice, the key point is simple: +- exact Python stdlib symbol resolution +- consistent version-aware answers across Python 3.12 and 3.13 +- token-efficient section retrieval from official docs +- a local, read-only MCP server with a simple operational story -**this is not a generic web fetcher for Python docs.** -It is a purpose-built MCP server for official Python documentation with exact symbol resolution, version awareness, token-efficient section retrieval, and a cleaner corporate story than API-key-based doc services. +Use a generic fetcher or broader docs MCP when you need: -Think of it as an MCP passthrough to the official Python docs, but indexed locally so LLMs can retrieve the right section without hauling entire pages into context. +- third-party package docs outside the Python stdlib +- arbitrary web pages +- mixed-source research across many frameworks ## Diagnostics -Run the built-in health check to verify your environment: +Check the local environment: ```bash -mcp-server-python-docs doctor +uvx mcp-server-python-docs doctor ``` -This checks Python version, SQLite FTS5 availability, cache directory, index presence, and free disk space. +Validate an existing index: + +```bash +uvx mcp-server-python-docs validate-corpus +``` ## Troubleshooting @@ -152,20 +208,21 @@ This checks Python version, SQLite FTS5 availability, cache directory, index pre If you see an error about SQLite FTS5 not being available: -**Linux x86-64:** +**Linux x86-64** + ```bash pip install 'mcp-server-python-docs[pysqlite3]' ``` -**macOS / Windows / Linux ARM:** +**macOS / Windows / Linux ARM** + Install Python from [python.org](https://www.python.org/) or use: + ```bash uv python install ``` -Python builds from python.org and `uv python install` include FTS5. Some Linux distribution Python packages strip FTS5 from SQLite. - -### uvx cache stale +### `uvx` cache stale If `uvx mcp-server-python-docs` runs an old version: @@ -181,7 +238,8 @@ uv cache clean mcp-server-python-docs ### Claude Desktop on Windows (MSIX) -The MSIX-packaged version of Claude Desktop on Windows may have restricted PATH access. If `uvx` is not found, specify the full path in your config: +The MSIX-packaged version of Claude Desktop on Windows may have restricted PATH +access. If `uvx` is not found, specify the full path in your config: ```json { @@ -194,17 +252,30 @@ The MSIX-packaged version of Claude Desktop on Windows may have restricted PATH } ``` -Replace `YOU` with your Windows username. Find the exact path with `where uvx` in a terminal. +Replace `YOU` with your Windows username. Find the exact path with `where uvx`. ### Restart after rebuild -After running `build-index` to update the documentation index, you must restart your MCP client (Claude Desktop, Cursor, etc.) to pick up the new index. The server opens the database in read-only mode at startup and does not detect changes to the index file at runtime. +After running `build-index`, restart your MCP client so it picks up the new +database file. The server opens the index read-only at startup and does not +hot-reload an updated database. + +On Windows, close the MCP client before rebuilding if the index file is locked. + +## Contributor workflow + +For contributor setup and verification: + +- [`CONTRIBUTING.md`](CONTRIBUTING.md) +- [`.github/INTEGRATION-TEST.md`](.github/INTEGRATION-TEST.md) +- [`.github/RELEASE.md`](.github/RELEASE.md) ## Support -Tested on macOS and Linux. Windows should work (uses `platformdirs` + `pathlib` for cross-platform paths) but is not verified on every release. +Tested on macOS and Linux. Windows should work, but it is not verified on +every release. -Python 3.12 and 3.13 are supported. When `search_docs` is called without a version, it searches across indexed versions. When `get_docs` is called without a version, it can default to the detected local Python runtime if a matching index exists. +Python 3.12 and 3.13 are currently supported. ## License diff --git a/src/mcp_server_python_docs/__main__.py b/src/mcp_server_python_docs/__main__.py index c952f89..c5517c1 100644 --- a/src/mcp_server_python_docs/__main__.py +++ b/src/mcp_server_python_docs/__main__.py @@ -48,9 +48,10 @@ def _consume_saved_stdout_fd() -> int: # === SIGPIPE HANDLER (HYGN-03) === # Ignore SIGPIPE so client disconnect doesn't crash with BrokenPipeError. -# Windows doesn't have SIGPIPE. -if hasattr(signal, "SIGPIPE"): - signal.signal(signal.SIGPIPE, signal.SIG_IGN) +# Windows does not expose SIGPIPE. +sigpipe = getattr(signal, "SIGPIPE", None) +if sigpipe is not None: + signal.signal(sigpipe, signal.SIG_IGN) # === LOGGING TO STDERR (HYGN-02) === import logging # noqa: E402 diff --git a/src/mcp_server_python_docs/ingestion/publish.py b/src/mcp_server_python_docs/ingestion/publish.py index a9d8fdb..5e4c414 100644 --- a/src/mcp_server_python_docs/ingestion/publish.py +++ b/src/mcp_server_python_docs/ingestion/publish.py @@ -238,7 +238,7 @@ def atomic_swap( logger.info("Previous index backed up to %s", previous) previous_path = previous - os.rename(new_db_path, target_path) + os.replace(new_db_path, target_path) logger.info("New index published at %s", target_path) return previous_path @@ -260,7 +260,7 @@ def rollback(target_path: Path | None = None) -> bool: previous = target_path.parent / (target_path.name + ".previous") if previous.exists(): - os.rename(previous, target_path) + os.replace(previous, target_path) logger.info("Rolled back to previous index") return True diff --git a/tests/fixtures/retrieval_regression_cases.json b/tests/fixtures/retrieval_regression_cases.json new file mode 100644 index 0000000..8b95ba3 --- /dev/null +++ b/tests/fixtures/retrieval_regression_cases.json @@ -0,0 +1,129 @@ +[ + { + "id": "exact_symbol_hit", + "operation": "search", + "input": { + "query": "asyncio.TaskGroup", + "kind": "symbol", + "version": "3.13", + "max_results": 5 + }, + "expect": { + "min_hits": 1, + "first_hit": { + "title": "asyncio.TaskGroup", + "slug": "library/asyncio-task.html", + "anchor": "asyncio.TaskGroup", + "version": "3.13" + } + } + }, + { + "id": "module_lookup", + "operation": "search", + "input": { + "query": "json parsing", + "kind": "section", + "version": "3.13", + "max_results": 5 + }, + "expect": { + "min_hits": 1, + "first_hit": { + "title": "JSON parsing", + "slug": "library/json.html", + "version": "3.13" + } + } + }, + { + "id": "cross_version_distinction", + "operation": "search", + "input": { + "query": "asyncio.TaskGroup", + "kind": "symbol", + "version": null, + "max_results": 5 + }, + "expect": { + "min_hits": 2, + "versions": [ + "3.12", + "3.13" + ] + } + }, + { + "id": "section_retrieval_by_anchor", + "operation": "get_docs", + "input": { + "slug": "library/asyncio-task.html", + "version": "3.13", + "anchor": "asyncio.TaskGroup", + "max_chars": 8000, + "start_index": 0 + }, + "expect": { + "version": "3.13", + "anchor": "asyncio.TaskGroup", + "title": "asyncio.TaskGroup", + "content_contains": "Python 3.13 TaskGroup documentation" + } + }, + { + "id": "truncation_behavior", + "operation": "get_docs", + "input": { + "slug": "library/asyncio-task.html", + "version": "3.13", + "max_chars": 40, + "start_index": 0 + }, + "expect": { + "version": "3.13", + "truncated": true, + "content_max_length": 40, + "next_start_index": true + } + }, + { + "id": "unsupported_version", + "operation": "get_docs", + "input": { + "slug": "library/asyncio-task.html", + "version": "3.99", + "max_chars": 8000, + "start_index": 0 + }, + "error": "VersionNotFoundError" + }, + { + "id": "missing_symbol", + "operation": "search", + "input": { + "query": "nonexistent.symbol_name", + "kind": "symbol", + "version": "3.13", + "max_results": 5 + }, + "expect": { + "hits": 0 + } + }, + { + "id": "local_version_defaulting", + "operation": "server_get_docs_defaulted", + "input": { + "slug": "library/asyncio-task.html", + "anchor": "asyncio.TaskGroup", + "max_chars": 8000, + "start_index": 0 + }, + "detected_python_version": "3.12", + "expect": { + "version": "3.12", + "anchor": "asyncio.TaskGroup", + "content_contains": "Python 3.12 TaskGroup documentation" + } + } +] diff --git a/tests/test_doctor.py b/tests/test_doctor.py index 64f616b..25b1d6c 100644 --- a/tests/test_doctor.py +++ b/tests/test_doctor.py @@ -7,6 +7,20 @@ import subprocess import sys import tempfile +from pathlib import Path + + +def _isolated_cache_env(tmpdir: str) -> dict[str, str]: + """Build subprocess env that forces platformdirs into a temp cache root.""" + tmp_path = Path(tmpdir) + overrides = { + "HOME": str(tmp_path), + "XDG_CACHE_HOME": str(tmp_path), + "LOCALAPPDATA": str(tmp_path / "AppData" / "Local"), + "APPDATA": str(tmp_path / "AppData" / "Roaming"), + "USERPROFILE": str(tmp_path), + } + return {**os.environ, **overrides} class TestDoctor: @@ -74,11 +88,7 @@ def test_doctor_reports_missing_index(self): capture_output=True, text=True, timeout=15, - env={ - **os.environ, - "HOME": tmpdir, - "XDG_CACHE_HOME": tmpdir, - }, + env=_isolated_cache_env(tmpdir), ) assert "FAIL: Index database" in result.stderr assert "build-index" in result.stderr @@ -91,11 +101,7 @@ def test_doctor_exit_code_on_failure(self): capture_output=True, text=True, timeout=15, - env={ - **os.environ, - "HOME": tmpdir, - "XDG_CACHE_HOME": tmpdir, - }, + env=_isolated_cache_env(tmpdir), ) assert result.returncode == 1 diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 41a0863..5f707d9 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -5,6 +5,7 @@ """ from __future__ import annotations +import shutil import subprocess import sys import zipfile @@ -15,6 +16,15 @@ PROJECT_ROOT = Path(__file__).parent.parent +def _uv_command() -> list[str]: + """Return a runnable uv command on platforms where Scripts may not be on PATH.""" + uv_executable = shutil.which("uv") + if uv_executable is not None: + return [uv_executable] + base_executable = getattr(sys, "_base_executable", sys.executable) + return [base_executable, "-m", "uv"] + + class TestWheelContent: """PKG-04: Built wheel contains synonyms.yaml.""" @@ -23,7 +33,7 @@ def built_wheel(self, tmp_path_factory) -> Path: """Build the wheel using uv build and return its path.""" dist_dir = tmp_path_factory.mktemp("dist") result = subprocess.run( - ["uv", "build", "--wheel", "--out-dir", str(dist_dir)], + _uv_command() + ["build", "--wheel", "--out-dir", str(dist_dir)], capture_output=True, text=True, cwd=str(PROJECT_ROOT), diff --git a/tests/test_publish.py b/tests/test_publish.py index 901e965..99a9b5f 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -7,8 +7,11 @@ from __future__ import annotations import sqlite3 +import sys from pathlib import Path +import pytest + from mcp_server_python_docs.ingestion.publish import ( atomic_swap, compute_sha256, @@ -343,6 +346,9 @@ def test_server_survives_rebuild(self, tmp_path): 4. Asserts the original RO connection still works 5. New RO connection sees new data """ + if sys.platform == "win32": + pytest.skip("Windows locks the live SQLite file during atomic swap") + index_path = tmp_path / "index.db" # Step 1: Create initial populated DB diff --git a/tests/test_retrieval_regression.py b/tests/test_retrieval_regression.py new file mode 100644 index 0000000..4bc013d --- /dev/null +++ b/tests/test_retrieval_regression.py @@ -0,0 +1,214 @@ +"""Curated retrieval regression coverage for search/get_docs behavior.""" +from __future__ import annotations + +import json +from pathlib import Path +from types import SimpleNamespace + +import pytest + +from mcp_server_python_docs.app_context import AppContext +from mcp_server_python_docs.errors import VersionNotFoundError +from mcp_server_python_docs.server import create_server +from mcp_server_python_docs.services.content import ContentService +from mcp_server_python_docs.services.search import SearchService +from mcp_server_python_docs.services.version import VersionService +from mcp_server_python_docs.storage.db import bootstrap_schema, get_readwrite_connection + +_CASES_PATH = Path(__file__).parent / "fixtures" / "retrieval_regression_cases.json" +_REGRESSION_CASES = json.loads(_CASES_PATH.read_text()) + + +@pytest.fixture +def regression_db(tmp_path): + """A small multi-version docs index for retrieval regression tests.""" + db_path = tmp_path / "retrieval-regression.db" + conn = get_readwrite_connection(db_path) + bootstrap_schema(conn) + + conn.execute( + "INSERT INTO doc_sets (id, source, version, language, label, is_default, base_url) " + "VALUES (1, 'python-docs', '3.12', 'en', 'Python 3.12', 0, " + "'https://docs.python.org/3.12/')" + ) + conn.execute( + "INSERT INTO doc_sets (id, source, version, language, label, is_default, base_url) " + "VALUES (2, 'python-docs', '3.13', 'en', 'Python 3.13', 1, " + "'https://docs.python.org/3.13/')" + ) + + conn.execute( + "INSERT INTO documents (id, doc_set_id, uri, slug, title, content_text, char_count) " + "VALUES (1, 1, 'library/asyncio-task.html', 'library/asyncio-task.html', " + "'asyncio Task', 'Python 3.12 asyncio task documentation.', 300)" + ) + conn.execute( + "INSERT INTO documents (id, doc_set_id, uri, slug, title, content_text, char_count) " + "VALUES (2, 2, 'library/asyncio-task.html', 'library/asyncio-task.html', " + "'asyncio Task', 'Python 3.13 asyncio task documentation.', 300)" + ) + conn.execute( + "INSERT INTO documents (id, doc_set_id, uri, slug, title, content_text, char_count) " + "VALUES (3, 2, 'library/json.html', 'library/json.html', " + "'json module', 'Python 3.13 json module documentation.', 240)" + ) + + asyncio_312 = ( + "Python 3.12 TaskGroup documentation for concurrent task management. " + "Use TaskGroup to supervise multiple child tasks and await them together." + ) + asyncio_313 = ( + "Python 3.13 TaskGroup documentation for concurrent task management. " + "The section explains structured concurrency and highlights 3.13 behavior." + ) + + conn.execute( + "INSERT INTO sections (id, document_id, uri, anchor, heading, level, ordinal, " + "content_text, char_count) " + "VALUES (1, 1, 'library/asyncio-task.html#asyncio.TaskGroup', " + "'asyncio.TaskGroup', 'asyncio.TaskGroup', 2, 0, ?, ?)", + (asyncio_312, len(asyncio_312)), + ) + conn.execute( + "INSERT INTO sections (id, document_id, uri, anchor, heading, level, ordinal, " + "content_text, char_count) " + "VALUES (2, 2, 'library/asyncio-task.html#asyncio.TaskGroup', " + "'asyncio.TaskGroup', 'asyncio.TaskGroup', 2, 0, ?, ?)", + (asyncio_313, len(asyncio_313)), + ) + conn.execute( + "INSERT INTO sections (id, document_id, uri, anchor, heading, level, ordinal, " + "content_text, char_count) " + "VALUES (3, 2, 'library/asyncio-task.html#introduction', " + "'introduction', 'Introduction', 1, 1, " + "'Introduction to asyncio tasks in Python 3.13.', 46)" + ) + conn.execute( + "INSERT INTO sections (id, document_id, uri, anchor, heading, level, ordinal, " + "content_text, char_count) " + "VALUES (4, 3, 'library/json.html#json-parsing', " + "'json-parsing', 'JSON parsing', 2, 0, " + "'Parse JSON strings with json.loads and inspect JSON objects safely.', 67)" + ) + + conn.execute( + "INSERT INTO symbols (id, doc_set_id, qualified_name, normalized_name, module, " + "symbol_type, uri, anchor) " + "VALUES (1, 1, 'asyncio.TaskGroup', 'asyncio.taskgroup', 'asyncio', " + "'class', 'library/asyncio-task.html#asyncio.TaskGroup', 'asyncio.TaskGroup')" + ) + conn.execute( + "INSERT INTO symbols (id, doc_set_id, qualified_name, normalized_name, module, " + "symbol_type, uri, anchor) " + "VALUES (2, 2, 'asyncio.TaskGroup', 'asyncio.taskgroup', 'asyncio', " + "'class', 'library/asyncio-task.html#asyncio.TaskGroup', 'asyncio.TaskGroup')" + ) + conn.execute( + "INSERT INTO symbols (id, doc_set_id, qualified_name, normalized_name, module, " + "symbol_type, uri, anchor) " + "VALUES (3, 2, 'json.loads', 'json.loads', 'json', " + "'function', 'library/json.html#json.loads', 'json.loads')" + ) + + conn.commit() + conn.execute("INSERT INTO sections_fts(sections_fts) VALUES('rebuild')") + conn.execute("INSERT INTO symbols_fts(symbols_fts) VALUES('rebuild')") + conn.execute("INSERT INTO examples_fts(examples_fts) VALUES('rebuild')") + conn.commit() + + yield conn + conn.close() + + +def _make_app_context(db, detected_python_version: str | None) -> AppContext: + """Build an AppContext for direct tool invocation tests.""" + return AppContext( + db=db, + index_path=Path("retrieval-regression.db"), + search_service=SearchService(db, {}), + content_service=ContentService(db), + version_service=VersionService(db), + detected_python_version=detected_python_version, + detected_python_source="test fixture", + ) + + +def _make_ctx(app_context: AppContext): + """Build a minimal FastMCP tool context shim.""" + return SimpleNamespace( + request_context=SimpleNamespace(lifespan_context=app_context) + ) + + +def _assert_search_expectations(result, expect: dict) -> None: + """Assert the expected shape of a search result.""" + if "hits" in expect: + assert len(result.hits) == expect["hits"] + return + + assert len(result.hits) >= expect["min_hits"] + if "first_hit" in expect: + first_hit = result.hits[0] + for field, value in expect["first_hit"].items(): + assert getattr(first_hit, field) == value + if "versions" in expect: + returned_versions = {hit.version for hit in result.hits} + assert set(expect["versions"]).issubset(returned_versions) + + +def _assert_docs_expectations(result, expect: dict) -> None: + """Assert the expected shape of a get_docs result.""" + if "version" in expect: + assert result.version == expect["version"] + if "anchor" in expect: + assert result.anchor == expect["anchor"] + if "title" in expect: + assert result.title == expect["title"] + if "content_contains" in expect: + assert expect["content_contains"] in result.content + if "truncated" in expect: + assert result.truncated is expect["truncated"] + if "content_max_length" in expect: + assert len(result.content) <= expect["content_max_length"] + if expect.get("next_start_index") is True: + assert result.next_start_index is not None + + +@pytest.mark.parametrize( + "case", + _REGRESSION_CASES, + ids=[case["id"] for case in _REGRESSION_CASES], +) +def test_retrieval_regression_cases(case, regression_db): + """Keep core retrieval and defaulting behavior stable over time.""" + search_service = SearchService(regression_db, {}) + content_service = ContentService(regression_db) + + if case["operation"] == "search": + result = search_service.search(**case["input"]) + _assert_search_expectations(result, case["expect"]) + return + + if case["operation"] == "get_docs": + if case.get("error") == "VersionNotFoundError": + with pytest.raises(VersionNotFoundError): + content_service.get_docs(**case["input"]) + return + + result = content_service.get_docs(**case["input"]) + _assert_docs_expectations(result, case["expect"]) + return + + if case["operation"] == "server_get_docs_defaulted": + server = create_server() + tool = server._tool_manager._tools["get_docs"] + app_context = _make_app_context( + regression_db, + detected_python_version=case["detected_python_version"], + ) + ctx = _make_ctx(app_context) + result = tool.fn(ctx=ctx, **case["input"]) + _assert_docs_expectations(result, case["expect"]) + return + + pytest.fail(f"Unknown regression case operation: {case['operation']}") diff --git a/tests/test_stdio_hygiene.py b/tests/test_stdio_hygiene.py index f123f82..3a21130 100644 --- a/tests/test_stdio_hygiene.py +++ b/tests/test_stdio_hygiene.py @@ -7,8 +7,23 @@ - C extension writes to fd 1 - atexit handlers printing to stdout """ +import os import subprocess import sys +from pathlib import Path + + +def _isolated_cache_env(tmpdir: str) -> dict[str, str]: + """Build subprocess env that forces platformdirs into a temp cache root.""" + tmp_path = Path(tmpdir) + overrides = { + "HOME": str(tmp_path), + "XDG_CACHE_HOME": str(tmp_path), + "LOCALAPPDATA": str(tmp_path / "AppData" / "Local"), + "APPDATA": str(tmp_path / "AppData" / "Roaming"), + "USERPROFILE": str(tmp_path), + } + return {**os.environ, **overrides} class TestStdioHygiene: @@ -57,12 +72,7 @@ def test_server_startup_no_index_stderr_only(self): capture_output=True, text=True, timeout=10, - env={ - **dict(__import__("os").environ), - # Override cache dir to trigger missing index - "HOME": tmpdir, - "XDG_CACHE_HOME": tmpdir, - }, + env=_isolated_cache_env(tmpdir), ) # Server should exit with error (missing index) assert result.returncode != 0 diff --git a/tests/test_stdio_smoke.py b/tests/test_stdio_smoke.py index 11051ba..f2e7541 100644 --- a/tests/test_stdio_smoke.py +++ b/tests/test_stdio_smoke.py @@ -23,6 +23,30 @@ from mcp_server_python_docs.storage.db import bootstrap_schema, get_readwrite_connection +def _isolated_cache_env(tmp_path: Path) -> tuple[dict[str, str], Path]: + """Build subprocess env and matching platformdirs cache path.""" + overrides = { + "HOME": str(tmp_path), + "XDG_CACHE_HOME": str(tmp_path), + "LOCALAPPDATA": str(tmp_path / "AppData" / "Local"), + "APPDATA": str(tmp_path / "AppData" / "Roaming"), + "USERPROFILE": str(tmp_path), + } + env = {**os.environ, **overrides} + result = subprocess.run( + [ + sys.executable, + "-c", + "import platformdirs; print(platformdirs.user_cache_dir('mcp-python-docs'))", + ], + capture_output=True, + text=True, + check=True, + env=env, + ) + return env, Path(result.stdout.strip()) + + def _create_test_index(cache_dir: Path) -> Path: """Create a minimal index.db for the server to start with.""" cache_dir.mkdir(parents=True, exist_ok=True) @@ -122,19 +146,9 @@ class TestStdioSmoke: def _setup_test_env(self, tmp_path): """Create a temp dir with a minimal index.db.""" self.tmp_dir = tmp_path - # platformdirs resolves differently per OS; set both HOME and XDG - if sys.platform == "darwin": - self.cache_dir = self.tmp_dir / "Library" / "Caches" / "mcp-python-docs" - else: - self.cache_dir = self.tmp_dir / "mcp-python-docs" + self.env, self.cache_dir = _isolated_cache_env(self.tmp_dir) _create_test_index(self.cache_dir) - self.env = { - **os.environ, - "HOME": str(self.tmp_dir), - "XDG_CACHE_HOME": str(self.tmp_dir), - } - def _run_server_with_input( self, stdin_data: bytes, timeout: int = 15, ) -> subprocess.CompletedProcess: