diff --git a/.github/scripts/modal-run-integ-tests.sh b/.github/scripts/modal-run-integ-tests.sh index e05702b9b..9336eb480 100755 --- a/.github/scripts/modal-run-integ-tests.sh +++ b/.github/scripts/modal-run-integ-tests.sh @@ -2,6 +2,12 @@ # Run simulation integration tests # Usage: ./modal-run-integ-tests.sh [us-version] # Environment: beta runs all tests, prod excludes beta_only tests +# +# Required env vars (set in the calling workflow from org-wide GH secrets): +# GATEWAY_AUTH_ISSUER, GATEWAY_AUTH_AUDIENCE, +# GATEWAY_AUTH_CLIENT_ID, GATEWAY_AUTH_CLIENT_SECRET +# Used to fetch an Auth0 client_credentials token that the pytest client +# sends as Authorization: Bearer on every call to the gated gateway. set -euo pipefail @@ -9,10 +15,61 @@ ENVIRONMENT="${1:?Environment required (beta or prod)}" BASE_URL="${2:?Base URL required}" US_VERSION="${3:-}" +: "${GATEWAY_AUTH_ISSUER:?GATEWAY_AUTH_ISSUER is required to mint an integ-test token}" +: "${GATEWAY_AUTH_AUDIENCE:?GATEWAY_AUTH_AUDIENCE is required to mint an integ-test token}" +: "${GATEWAY_AUTH_CLIENT_ID:?GATEWAY_AUTH_CLIENT_ID is required to mint an integ-test token}" +: "${GATEWAY_AUTH_CLIENT_SECRET:?GATEWAY_AUTH_CLIENT_SECRET is required to mint an integ-test token}" + +ISSUER="${GATEWAY_AUTH_ISSUER%/}" +TOKEN_URL="$ISSUER/oauth/token" + +# Build the token-request JSON with Python so that any ", \, or newline in +# the client secret is encoded correctly (Auth0-generated secrets are +# random strings that routinely contain characters that break a shell +# heredoc). +TOKEN_REQUEST_JSON=$( + CLIENT_ID="$GATEWAY_AUTH_CLIENT_ID" \ + CLIENT_SECRET="$GATEWAY_AUTH_CLIENT_SECRET" \ + AUDIENCE="$GATEWAY_AUTH_AUDIENCE" \ + python3 -c ' +import json, os +print(json.dumps({ + "client_id": os.environ["CLIENT_ID"], + "client_secret": os.environ["CLIENT_SECRET"], + "audience": os.environ["AUDIENCE"], + "grant_type": "client_credentials", +})) +' +) + +echo "Requesting client_credentials access token from $TOKEN_URL" +TOKEN_RESPONSE=$( + curl --fail-with-body --silent --show-error \ + --request POST "$TOKEN_URL" \ + --header "content-type: application/json" \ + --data-binary "$TOKEN_REQUEST_JSON" +) + +ACCESS_TOKEN=$( + printf '%s' "$TOKEN_RESPONSE" | python3 -c ' +import json, sys +data = json.load(sys.stdin) +token = data.get("access_token") +if not token: + sys.exit(f"Auth0 response missing access_token: {data}") +print(token) +' +) +if [ -z "$ACCESS_TOKEN" ]; then + echo "Failed to extract access_token from Auth0 response" >&2 + exit 1 +fi + cd projects/policyengine-apis-integ uv sync --extra test export simulation_integ_test_base_url="$BASE_URL" +export simulation_integ_test_access_token="$ACCESS_TOKEN" if [ -n "$US_VERSION" ]; then export simulation_integ_test_us_model_version="$US_VERSION" diff --git a/.github/scripts/modal-sync-secrets.sh b/.github/scripts/modal-sync-secrets.sh index b56384d88..0b0bd825a 100755 --- a/.github/scripts/modal-sync-secrets.sh +++ b/.github/scripts/modal-sync-secrets.sh @@ -25,4 +25,55 @@ if [ -n "${GCP_CREDENTIALS_JSON:-}" ]; then --force || true fi +# Sync gateway auth secret. The gateway container consumes issuer+audience to +# validate bearer tokens; client_id/secret are stored alongside so rotating the +# Auth0 M2M app updates every consumer from one place. +# +# Fail loud if some but not all of the four GH secrets are present — a +# partial config would silently leave the Modal secret stale or missing, +# which surfaces as 503s from /require_auth on every gated request. +GATEWAY_AUTH_VARS=( + GATEWAY_AUTH_ISSUER + GATEWAY_AUTH_AUDIENCE + GATEWAY_AUTH_CLIENT_ID + GATEWAY_AUTH_CLIENT_SECRET +) +present=() +missing=() +for var in "${GATEWAY_AUTH_VARS[@]}"; do + if [ -n "${!var:-}" ]; then + present+=("$var") + else + missing+=("$var") + fi +done + +if [ ${#present[@]} -gt 0 ] && [ ${#missing[@]} -gt 0 ]; then + echo "Partial GATEWAY_AUTH_* GitHub secrets detected." >&2 + echo " Present: ${present[*]}" >&2 + echo " Missing: ${missing[*]}" >&2 + echo "Refusing to write a partial gateway-auth Modal secret." >&2 + exit 1 +fi + +if [ ${#present[@]} -eq ${#GATEWAY_AUTH_VARS[@]} ]; then + # Auth0 issuer strings are expected to end with "/" to match the `iss` + # claim and JWKS-url construction on the verifier side. Normalize here + # so an operator who stored the GH secret without the trailing slash + # doesn't silently break JWT validation on every gated call. + NORMALIZED_ISSUER="$GATEWAY_AUTH_ISSUER" + case "$NORMALIZED_ISSUER" in + */) ;; + *) NORMALIZED_ISSUER="$NORMALIZED_ISSUER/" ;; + esac + + uv run modal secret create gateway-auth \ + "GATEWAY_AUTH_ISSUER=$NORMALIZED_ISSUER" \ + "GATEWAY_AUTH_AUDIENCE=$GATEWAY_AUTH_AUDIENCE" \ + "GATEWAY_AUTH_CLIENT_ID=$GATEWAY_AUTH_CLIENT_ID" \ + "GATEWAY_AUTH_CLIENT_SECRET=$GATEWAY_AUTH_CLIENT_SECRET" \ + --env="$MODAL_ENV" \ + --force +fi + echo "Modal secrets synced" diff --git a/.github/workflows/modal-deploy.reusable.yml b/.github/workflows/modal-deploy.reusable.yml index 07ce2d777..044c51081 100644 --- a/.github/workflows/modal-deploy.reusable.yml +++ b/.github/workflows/modal-deploy.reusable.yml @@ -55,6 +55,10 @@ jobs: MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} LOGFIRE_TOKEN: ${{ secrets.LOGFIRE_TOKEN }} GCP_CREDENTIALS_JSON: ${{ secrets.GCP_CREDENTIALS_JSON }} + GATEWAY_AUTH_ISSUER: ${{ secrets.GATEWAY_AUTH_ISSUER }} + GATEWAY_AUTH_AUDIENCE: ${{ secrets.GATEWAY_AUTH_AUDIENCE }} + GATEWAY_AUTH_CLIENT_ID: ${{ secrets.GATEWAY_AUTH_CLIENT_ID }} + GATEWAY_AUTH_CLIENT_SECRET: ${{ secrets.GATEWAY_AUTH_CLIENT_SECRET }} run: ../../.github/scripts/modal-sync-secrets.sh "${{ inputs.modal_environment }}" "${{ inputs.environment }}" - name: Deploy simulation API to Modal @@ -102,4 +106,9 @@ jobs: run: ./scripts/generate-clients.sh - name: Run simulation integration tests + env: + GATEWAY_AUTH_ISSUER: ${{ secrets.GATEWAY_AUTH_ISSUER }} + GATEWAY_AUTH_AUDIENCE: ${{ secrets.GATEWAY_AUTH_AUDIENCE }} + GATEWAY_AUTH_CLIENT_ID: ${{ secrets.GATEWAY_AUTH_CLIENT_ID }} + GATEWAY_AUTH_CLIENT_SECRET: ${{ secrets.GATEWAY_AUTH_CLIENT_SECRET }} run: .github/scripts/modal-run-integ-tests.sh "${{ inputs.environment }}" "${{ needs.deploy.outputs.simulation_api_url }}" "${{ needs.deploy.outputs.us_version }}" diff --git a/projects/policyengine-api-simulation/src/modal/gateway/app.py b/projects/policyengine-api-simulation/src/modal/gateway/app.py index 55d506007..796dfed6f 100644 --- a/projects/policyengine-api-simulation/src/modal/gateway/app.py +++ b/projects/policyengine-api-simulation/src/modal/gateway/app.py @@ -13,6 +13,12 @@ # Stable app name - this should rarely change app = modal.App("policyengine-simulation-gateway") +# Injects GATEWAY_AUTH_ISSUER, GATEWAY_AUTH_AUDIENCE, GATEWAY_AUTH_CLIENT_ID, +# and GATEWAY_AUTH_CLIENT_SECRET. Only the issuer and audience are consumed +# by this container (see gateway.auth); the client id/secret are kept in the +# same secret so a single rotation updates every consumer at once. +gateway_auth_secret = modal.Secret.from_name("gateway-auth") + # Lightweight image for gateway - no heavy dependencies gateway_image = ( modal.Image.debian_slim(python_version="3.13") @@ -30,7 +36,7 @@ ) -@app.function(image=gateway_image) +@app.function(image=gateway_image, secrets=[gateway_auth_secret]) @modal.asgi_app() def web_app(): """ @@ -44,15 +50,21 @@ def web_app(): """ from fastapi import FastAPI - from src.modal.gateway.auth import enforce_production_auth_guard + from src.modal.gateway.auth import ( + enforce_auth_configured_guard, + enforce_production_auth_guard, + ) from src.modal.gateway.endpoints import router - # Startup guard: crash the container if GATEWAY_AUTH_DISABLED is set in - # a production-equivalent Modal environment, or set without the - # explicit acknowledgement env var. This prevents the bypass from - # accidentally shipping to prod if a dev deploy grabs the wrong secret - # bundle. See gateway.auth.enforce_production_auth_guard for the rules. + # Startup guards: + # 1. Crash if GATEWAY_AUTH_DISABLED is set in a production-equivalent + # Modal env, or set without the explicit acknowledgement — prevents + # the bypass from accidentally shipping to prod. + # 2. Crash if auth is enabled but issuer/audience aren't configured — + # prevents a silently broken gateway that returns 503 on every + # gated request. enforce_production_auth_guard() + enforce_auth_configured_guard() api = FastAPI( title="PolicyEngine Simulation Gateway", diff --git a/projects/policyengine-api-simulation/src/modal/gateway/auth.py b/projects/policyengine-api-simulation/src/modal/gateway/auth.py index 385113b7f..0d7fb80a6 100644 --- a/projects/policyengine-api-simulation/src/modal/gateway/auth.py +++ b/projects/policyengine-api-simulation/src/modal/gateway/auth.py @@ -92,6 +92,13 @@ def _get_decoder() -> JWTDecoder: f"{GATEWAY_AUTH_ISSUER_ENV} and {GATEWAY_AUTH_AUDIENCE_ENV} or " f"{GATEWAY_AUTH_DISABLED_ENV}=1 for local/test use." ) + # The verifier expects issuer to end with "/" so that Auth0's `iss` + # claim matches and the JWKS URL is constructed correctly. Operators + # storing the secret without the trailing slash would otherwise see + # every gated request fail with an opaque JWKS-fetch or iss-mismatch + # error. + if not issuer.endswith("/"): + issuer = issuer + "/" return _build_decoder(issuer, audience) @@ -169,6 +176,34 @@ def enforce_production_auth_guard() -> None: pass +class AuthMisconfiguredError(RuntimeError): + """Refuse to start when the issuer/audience env vars are missing in prod.""" + + +def enforce_auth_configured_guard() -> None: + """Crash the ASGI factory if auth is enabled but misconfigured. + + Without this, a missing ``GATEWAY_AUTH_ISSUER`` / ``GATEWAY_AUTH_AUDIENCE`` + (e.g. the ``gateway-auth`` Modal secret failed to attach, or a GH secret + is misspelled) surfaces only as 503s at request time from + :func:`require_auth`. Fail fast at container boot so Modal's deploy + reports the misconfiguration instead of a silently broken gateway. + """ + if _auth_disabled(): + return + + issuer = os.environ.get(GATEWAY_AUTH_ISSUER_ENV) + audience = os.environ.get(GATEWAY_AUTH_AUDIENCE_ENV) + if not issuer or not audience: + raise AuthMisconfiguredError( + "Gateway auth is enabled but " + f"{GATEWAY_AUTH_ISSUER_ENV}/{GATEWAY_AUTH_AUDIENCE_ENV} are not set " + "in the container environment. Verify the 'gateway-auth' Modal " + "secret is attached and synced from the GATEWAY_AUTH_* GitHub " + "Actions secrets." + ) + + def require_auth( token: HTTPAuthorizationCredentials | None = Depends(_bearer_scheme), ) -> dict | None: diff --git a/projects/policyengine-api-simulation/tests/gateway/test_auth.py b/projects/policyengine-api-simulation/tests/gateway/test_auth.py index a9a98e416..d983fee71 100644 --- a/projects/policyengine-api-simulation/tests/gateway/test_auth.py +++ b/projects/policyengine-api-simulation/tests/gateway/test_auth.py @@ -268,3 +268,92 @@ def test__given_disabled_in_dev_with_correct_ack__then_allows_and_logs( assert any( "GATEWAY AUTH IS DISABLED" in record.message for record in caplog.records ), f"Expected critical auth-disabled banner, got {caplog.records!r}" + + +class TestAuthConfiguredGuard: + """``enforce_auth_configured_guard`` crashes the ASGI factory at boot + when auth is enabled but issuer/audience env vars are missing.""" + + def test__given_auth_disabled__then_guard_noops(self, monkeypatch): + monkeypatch.setenv(auth_module.GATEWAY_AUTH_DISABLED_ENV, "1") + monkeypatch.delenv(auth_module.GATEWAY_AUTH_ISSUER_ENV, raising=False) + monkeypatch.delenv(auth_module.GATEWAY_AUTH_AUDIENCE_ENV, raising=False) + + auth_module.enforce_auth_configured_guard() + + def test__given_issuer_missing__then_raises(self, monkeypatch): + monkeypatch.delenv(auth_module.GATEWAY_AUTH_DISABLED_ENV, raising=False) + monkeypatch.delenv(auth_module.GATEWAY_AUTH_ISSUER_ENV, raising=False) + monkeypatch.setenv(auth_module.GATEWAY_AUTH_AUDIENCE_ENV, "aud") + + with pytest.raises(auth_module.AuthMisconfiguredError): + auth_module.enforce_auth_configured_guard() + + def test__given_audience_missing__then_raises(self, monkeypatch): + monkeypatch.delenv(auth_module.GATEWAY_AUTH_DISABLED_ENV, raising=False) + monkeypatch.setenv( + auth_module.GATEWAY_AUTH_ISSUER_ENV, "https://tenant.auth0.com/" + ) + monkeypatch.delenv(auth_module.GATEWAY_AUTH_AUDIENCE_ENV, raising=False) + + with pytest.raises(auth_module.AuthMisconfiguredError): + auth_module.enforce_auth_configured_guard() + + def test__given_both_set__then_noops(self, monkeypatch): + monkeypatch.delenv(auth_module.GATEWAY_AUTH_DISABLED_ENV, raising=False) + monkeypatch.setenv( + auth_module.GATEWAY_AUTH_ISSUER_ENV, "https://tenant.auth0.com/" + ) + monkeypatch.setenv(auth_module.GATEWAY_AUTH_AUDIENCE_ENV, "aud") + + auth_module.enforce_auth_configured_guard() + + +class TestIssuerNormalization: + """``_get_decoder`` appends a trailing "/" to issuer values that lack + one, so Auth0's ``iss`` claim and JWKS URL construction line up.""" + + def test__given_issuer_without_trailing_slash__then_decoder_receives_slash( + self, monkeypatch + ): + monkeypatch.setenv( + auth_module.GATEWAY_AUTH_ISSUER_ENV, "https://tenant.auth0.com" + ) + monkeypatch.setenv(auth_module.GATEWAY_AUTH_AUDIENCE_ENV, "aud") + auth_module.reset_decoder_cache() + + captured = {} + + def fake_builder(issuer, audience): + captured["issuer"] = issuer + captured["audience"] = audience + return object() + + monkeypatch.setattr(auth_module, "_build_decoder", fake_builder) + + auth_module._get_decoder() + + assert captured["issuer"] == "https://tenant.auth0.com/" + assert captured["audience"] == "aud" + + def test__given_issuer_with_trailing_slash__then_decoder_receives_unchanged( + self, monkeypatch + ): + monkeypatch.setenv( + auth_module.GATEWAY_AUTH_ISSUER_ENV, "https://tenant.auth0.com/" + ) + monkeypatch.setenv(auth_module.GATEWAY_AUTH_AUDIENCE_ENV, "aud") + auth_module.reset_decoder_cache() + + captured = {} + + def fake_builder(issuer, audience): + captured["issuer"] = issuer + captured["audience"] = audience + return object() + + monkeypatch.setattr(auth_module, "_build_decoder", fake_builder) + + auth_module._get_decoder() + + assert captured["issuer"] == "https://tenant.auth0.com/" diff --git a/projects/policyengine-apis-integ/tests/simulation/test_auth_smoke.py b/projects/policyengine-apis-integ/tests/simulation/test_auth_smoke.py new file mode 100644 index 000000000..b0035ad82 --- /dev/null +++ b/projects/policyengine-apis-integ/tests/simulation/test_auth_smoke.py @@ -0,0 +1,74 @@ +"""Authenticated smoke tests that must pass in both beta and prod. + +These tests assert the end-to-end auth wiring is functional: the gateway +has the ``gateway-auth`` Modal secret attached, the JWKS-fetch and token +verification work against the configured Auth0 tenant, and the test +harness can mint a bearer token that the gateway accepts. + +They intentionally do NOT use ``@pytest.mark.beta_only`` so they run in the +prod deployment job too. Without an auth test in the prod integ suite, a +misconfigured ``gateway-auth`` secret in the ``main`` Modal environment +would pass CI while serving 503s to every real client. +""" + +from __future__ import annotations + +import httpx +import pytest + +from .conftest import settings + + +pytestmark = pytest.mark.skipif( + not settings.access_token, + reason="Auth token not configured; skipping auth smoke tests (dev only).", +) + + +def _base() -> str: + return settings.base_url.rstrip("/") + + +def test_gated_endpoint_rejects_missing_token() -> None: + """No ``Authorization`` header on a gated endpoint must be rejected. + + Without a token the gateway's ``Depends(require_auth)`` surfaces a 403 + (HTTPBearer auto_error=False + JWTDecoder rejects). A 2xx here means + the auth dependency is not actually wired and the gateway is open. + """ + response = httpx.get( + f"{_base()}/jobs/auth-smoke-probe-no-token", + timeout=30.0, + ) + + assert response.status_code in (401, 403), ( + f"Expected the gated /jobs endpoint to reject an unauthenticated " + f"request with 401/403, got {response.status_code}: {response.text[:200]}" + ) + + +def test_gated_endpoint_accepts_valid_token() -> None: + """With a valid bearer token the endpoint must advance past auth. + + The probe job id will not resolve, so the expected body is a 404. + Any auth-layer status (401, 403, 503) means the container's + ``gateway-auth`` secret is misattached or ``GATEWAY_AUTH_ISSUER`` / + ``GATEWAY_AUTH_AUDIENCE`` do not match the tenant that minted the + token — which is exactly the silent-failure mode this test guards. + """ + response = httpx.get( + f"{_base()}/jobs/auth-smoke-probe-does-not-exist", + headers={"Authorization": f"Bearer {settings.access_token}"}, + timeout=30.0, + ) + + auth_failures = {401, 403, 503} + assert response.status_code not in auth_failures, ( + f"Gated endpoint rejected a valid token with {response.status_code}: " + f"{response.text[:200]}. Check that the gateway-auth Modal secret " + f"in the deploy environment matches the Auth0 tenant minting the token." + ) + assert response.status_code == 404, ( + f"Expected 404 for an unknown job id after auth, got " + f"{response.status_code}: {response.text[:200]}" + )