diff --git a/CLAUDE.md b/CLAUDE.md index 79448a2..dce0e54 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,6 +4,9 @@ ## Development workflow +Before running the e2e tests, ensure the test prerequisites from +HACKING.md are installed. + Always run the test suite before committing: ```bash diff --git a/HACKING.md b/HACKING.md index f485822..b322539 100644 --- a/HACKING.md +++ b/HACKING.md @@ -35,6 +35,11 @@ curl -fL https://github.com/astral-sh/uv/releases/latest/download/uv-aarch64-unk No sudo is required to run the VM or the test suite. +**Running tests inside a VM (nested):** If you are running the e2e +tests from inside the VM itself, uncomment the Debian cloud image +offloader rules in `allowlist.txt` — the image download redirects to +hosts outside `*.debian.org` that are blocked by default. + ## Running the tests ```bash diff --git a/allowlist.txt b/allowlist.txt index 25ecde5..731766d 100644 --- a/allowlist.txt +++ b/allowlist.txt @@ -13,6 +13,30 @@ # POST https://api.github.com/repos/myorg/myrepo/issues # GET https://api.openweathermap.org/data/2.5/weather +# ── mitmproxy CA certificate ─────────────────────────────────────── +# Magic domain served by mitmproxy over plain HTTP. The guest +# fetches the CA cert at boot before any HTTPS traffic. +GET http://mitm.it/cert/pem + +# ── OS package repos (Debian) ───────────────────────────────────── +# The VM runs Debian. deb.debian.org is the primary apt CDN (Fastly). +# cloud.debian.org hosts cloud image checksums (large files redirect +# to offloaders — see the "Debian cloud images" section below). +GET https://deb.debian.org/* +GET https://security.debian.org/* +GET https://cloud.debian.org/* + +# ── OS package repos (Ubuntu — uncomment if using an Ubuntu image) ─ +# GET https://archive.ubuntu.com/* +# GET https://security.ubuntu.com/* +# GET https://ports.ubuntu.com/* +# If your Ubuntu mirror is a geo subdomain (e.g. us.archive.ubuntu.com), +# add it here — domain wildcards are not supported. + +# ── Python package repos ────────────────────────────────────────── +GET https://pypi.org/* +GET https://files.pythonhosted.org/* + # ── Claude Code ──────────────────────────────────────────────────── # Anthropic API — scoped to the v1 API prefix so only API calls are # permitted, not arbitrary requests to the domain. @@ -24,17 +48,41 @@ GET https://api.anthropic.com/v1/* GET https://api.anthropic.com/api/* POST https://api.anthropic.com/api/* -# Claude Code binary downloads from Google Cloud Storage. GET-only -# to prevent POST-based exfiltration. Scoped to the known Anthropic -# release bucket; paths vary by version and platform. +# Claude Code binary downloads from Google Cloud Storage. Scoped +# to the known Anthropic release bucket; paths vary by version and platform. GET https://storage.googleapis.com/claude-code-dist-86c565f3-f756-42ad-8dfa-d59b1c096819/* GET https://downloads.claude.ai/claude-code-releases/* GET https://api.anthropic.com/api/hello # ── uv (Python package manager) ─────────────────────────────────── # Installer script and binary download. The install script lives at -# astral.sh and redirects to a GitHub release asset whose URL varies -# by version and platform. +# astral.sh; binary downloads come from releases.astral.sh (or GitHub +# release assets as a fallback). URLs vary by version and platform. GET https://astral.sh/uv/install.sh +GET https://releases.astral.sh/github/uv/releases/* GET https://github.com/astral-sh/uv/releases/* GET https://release-assets.githubusercontent.com/github-production-release-asset/* + +# ── Docker Hub ──────────────────────────────────────────────────── +# Registry API — paths vary by image name, tag, and sha256 digest +# (e.g. /v2/library/hello-world/manifests/latest). Scoped to /v2/. +GET https://registry-1.docker.io/v2/* +# Auth tokens — the registry returns 401 with a token URL whose +# query parameters vary per request (scope, service, etc.). +GET https://auth.docker.io/token* +# Blob storage — the registry redirects layer downloads to this +# Cloudflare R2 bucket. Paths contain per-blob sha256 digests. +GET https://docker-images-prod.6aa30f8b08e16409b46e0173d6de2f56.r2.cloudflarestorage.com/registry-v2/* + +# ── Debian cloud images (nested VM testing only) ────────────────── +# Only needed when running the e2e test suite inside a VM (i.e. the +# tests boot a nested QEMU guest). See HACKING.md for details. +# cloud.debian.org (*.debian.org, already trusted) serves checksums +# directly but 302-redirects large files (qcow2) to offloader hosts +# at Umea University. The offloader is deterministic per-URL (hash), +# so different images may hit different hosts. Paths vary by arch, +# release, and date. +# GET https://gemmei.ftp.acc.umu.se/images/cloud/* +# GET https://saimei.ftp.acc.umu.se/images/cloud/* +# GET https://laotzu.ftp.acc.umu.se/images/cloud/* +# GET https://chuangtzu.ftp.acc.umu.se/images/cloud/* diff --git a/cloud-init/user-data b/cloud-init/user-data index 6a8f638..044d6dd 100644 --- a/cloud-init/user-data +++ b/cloud-init/user-data @@ -5,6 +5,7 @@ users: lock_passwd: true sudo: ALL=(ALL) NOPASSWD:ALL shell: /bin/bash + groups: docker, kvm ssh_authorized_keys: - __SSH_PUB_KEY__ @@ -53,6 +54,16 @@ write_files: #!/bin/sh printf '\n\033]8;;%s\a%s\033]8;;\a\n\n' "$1" "$1" + # Docker daemon proxy configuration. write_files runs before the + # packages stage, so this override is already in place when docker.io + # is installed and systemd first loads the docker.service unit. + - path: /etc/systemd/system/docker.service.d/proxy.conf + content: | + [Service] + Environment="HTTP_PROXY=http://__HOST_IP__:__PROXY_PORT__" + Environment="HTTPS_PROXY=http://__HOST_IP__:__PROXY_PORT__" + Environment="NO_PROXY=localhost,127.0.0.1,__HOST_IP__" + - path: /etc/systemd/system/mnt-9p.mount content: | [Unit] @@ -144,11 +155,12 @@ write_files: > POST https://api.anthropic.com/v1/* > GET https://api.anthropic.com/v1/* - ## Trusted infrastructure (always allowed) + ## Default allowlist - Package repos (debian.org, ubuntu.com, pypi.org) and the - mitmproxy CA endpoint (mitm.it) are trusted at the proxy level - and need no allowlist rules. + Package repos (deb.debian.org, pypi.org), the mitmproxy CA + endpoint (mitm.it), and other infrastructure are included in the + default allowlist.txt. All network access is governed by that + single file — there are no hidden trusted domains. - path: /etc/systemd/system/home-vm-shared.service content: | @@ -169,6 +181,7 @@ packages: - curl - bindfs - git + - docker.io runcmd: - mkdir -p /mnt/9p /home/vm/shared diff --git a/filter.py b/filter.py index 7ed5bd8..1ef27f9 100644 --- a/filter.py +++ b/filter.py @@ -1,19 +1,13 @@ """ mitmproxy allowlist filter — controls what the VM can access. -Traffic is filtered at two levels: +All network access is governed by allowlist.txt. Each non-blank, +non-comment line must be: -1. **Trusted domains** (below): infrastructure the VM needs to function — - package repos, CA cert endpoint. All HTTP methods and paths are allowed. - Edit these only when changing system-level dependencies. + METHOD https://hostname/path/pattern -2. **User rules** (allowlist.txt): per-method, per-URL patterns that grant - access to specific endpoints. Each non-blank, non-comment line must be: - - METHOD https://hostname/path/pattern - - Wildcards (*) are allowed only in the path, not in the hostname. - The filter reloads the file automatically when it changes. +Wildcards (*) are allowed only in the path, not in the hostname. +The filter reloads the file automatically when it changes. """ import json @@ -24,26 +18,6 @@ from mitmproxy import http -# ── Trusted domains ───────────────────────────────────────────────── -# Full-domain allowlist for system infrastructure. Patterns are -# matched with re.fullmatch against the request hostname. -TRUSTED_DOMAINS: list[str] = [ - # OS package repos — scoped to actual apt hostnames - r".*\.debian\.org", - "archive.ubuntu.com", - "security.ubuntu.com", - "ports.ubuntu.com", - r".*\.archive\.ubuntu\.com", - # Python package repos - "pypi.org", - r".*\.pypi\.org", - "files.pythonhosted.org", - # mitmproxy's magic domain that serves the CA cert - "mitm.it", -] - -_trusted = [re.compile(p) for p in TRUSTED_DOMAINS] - # ── Paths ─────────────────────────────────────────────────────────── ALLOWLIST_PATH = Path(__file__).parent / "allowlist.txt" BLOCKED_LOG = Path(__file__).parent / ".vm" / "blocked.jsonl" @@ -109,13 +83,9 @@ def is_allowed( ) -> bool: """Return True if the request is permitted. - Checks trusted domains first (all methods/paths allowed), then user - rules. A ``GET`` rule implicitly allows ``HEAD`` requests to the - same URL pattern. + A ``GET`` rule implicitly allows ``HEAD`` requests to the same URL + pattern. """ - if any(p.fullmatch(host) for p in _trusted): - return True - req_path = urlparse(url).path or "/" for rule_method, url_pattern in rules: diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 869945e..3216c92 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -237,7 +237,7 @@ def test_cloud_init_success(running_vm): SSH subprocess open during the entire cloud-init run (which includes package installation and can take several minutes in TCG mode). """ - deadline = time.monotonic() + 300 + deadline = time.monotonic() + 600 last_detail = "" while time.monotonic() < deadline: try: @@ -262,7 +262,7 @@ def test_cloud_init_success(running_vm): if "status: error" in r.stdout: pytest.fail(f"cloud-init finished with errors:\n{r.stdout}") time.sleep(10) - pytest.fail("cloud-init did not complete within 300s") + pytest.fail("cloud-init did not complete within 600s") def test_curl_http_pypi_org(running_vm): @@ -307,6 +307,31 @@ def test_curl_https_pypi_org(running_vm): ) +def test_docker_hello_world(running_vm): + """docker run hello-world should pull the image and print the greeting. + + Exercises the Docker daemon's proxy configuration (systemd service + override) and the Docker Hub allowlist rules. The daemon pulls the + image through mitmproxy, then runs the container locally. + """ + _progress("Running docker hello-world (includes image pull)…") + result = _vm_ssh( + "docker run hello-world 2>&1", + timeout=180, + ) + if result.returncode != 0: + _dump_logs() + pytest.fail( + f"docker run hello-world failed (rc={result.returncode})\n" + f"stdout: {result.stdout[:1000]}\n" + f"stderr: {result.stderr[:1000]}" + ) + assert "Hello from Docker!" in result.stdout, ( + f"Expected 'Hello from Docker!' in output.\n" + f"stdout: {result.stdout[:1000]}" + ) + + def test_blocked_domain(running_vm): """Requests to domains not in filter.py's allowlist should be blocked with 403.""" result = _vm_ssh( diff --git a/tests/test_filter.py b/tests/test_filter.py index c04b6dd..d53d58f 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -107,15 +107,12 @@ def test_missing_file_returns_empty(self, tmp_path): # --------------------------------------------------------------------------- class TestIsAllowed: - def test_trusted_domain_allows_any_method(self): - assert fm.is_allowed([], "GET", "pypi.org", "https://pypi.org/simple/") - assert fm.is_allowed([], "POST", "pypi.org", "https://pypi.org/") + def test_empty_rules_block_known_domains(self): + """With no rules, even well-known domains are blocked.""" + assert not fm.is_allowed([], "GET", "pypi.org", "https://pypi.org/simple/") + assert not fm.is_allowed([], "GET", "deb.debian.org", "http://deb.debian.org/") - def test_trusted_domain_regex(self): - assert fm.is_allowed([], "GET", "ftp.debian.org", "http://ftp.debian.org/") - assert fm.is_allowed([], "GET", "security.debian.org", "http://security.debian.org/") - - def test_non_trusted_domain_blocked(self): + def test_non_matching_domain_blocked(self): assert not fm.is_allowed([], "GET", "example.com", "http://example.com/") def test_method_url_rule_matching(self): diff --git a/uv.lock b/uv.lock index 129183c..3ae98a2 100644 --- a/uv.lock +++ b/uv.lock @@ -6,10 +6,6 @@ resolution-markers = [ "python_full_version < '3.14'", ] -[options] -exclude-newer = "2026-04-11T02:09:05.342972377Z" -exclude-newer-span = "PT0S" - [[package]] name = "agent-vm" version = "0.1.0" diff --git a/vm.py b/vm.py index f346a30..50d356a 100755 --- a/vm.py +++ b/vm.py @@ -438,7 +438,13 @@ def build_qemu_args(backend: Backend, memory: str) -> list[str]: def start_mitmproxy(proxy_port: int = PROXY_PORT) -> subprocess.Popen: """Start mitmdump in the background, logging to .vm/mitmdump.log.""" log_path = STATE_DIR / "mitmdump.log" - cmd = ["mitmdump", "--listen-host", "127.0.0.1", "-p", str(proxy_port)] + cmd = [ + "mitmdump", "--listen-host", "127.0.0.1", "-p", str(proxy_port), + # Stream large responses instead of buffering them in memory. + # Without this, a 200+ MB download (e.g. Claude Code binary) can + # OOM the process — especially in a nested VM with limited RAM. + "--set", "stream_large_bodies=1m", + ] # If this host itself uses an upstream proxy (e.g. we're inside a sandboxed # VM), forward mitmproxy's own outbound traffic through it.