hotdata-dev · zfarrell · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -0,0 +1,80 @@
+name: Integration Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  # Parity check runs on every PR and push: confirms every scenario in
+  # www.hotdata.dev/api/test-scenarios.yaml has a matching test file in this
+  # repo. www.hotdata.dev is private, so we fetch via the GitHub App token —
+  # same pattern as regenerate.yml.
+  scenario-parity:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
+        with:
+          app-id: 3060111
+          private-key: ${{ secrets.HOTDATA_AUTOMATION_PRIVATE_KEY }}
+          owner: hotdata-dev
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
+        with:
+          python-version: '3.12'
+      - name: Install PyYAML
+        run: pip install --quiet pyyaml
+      - name: Fetch scenarios manifest
+        env:
+          GH_TOKEN: ${{ steps.app-token.outputs.token }}
+        run: |
+          curl -sS -f -L \
+            -H "Accept: application/vnd.github.v3.raw" \
+            -H "Authorization: Bearer $GH_TOKEN" \
+            https://api.github.com/repos/hotdata-dev/www.hotdata.dev/contents/api/test-scenarios.yaml \
+            -o test-scenarios.yaml
+      - name: Check parity
+        run: |
+          python3 - <<'PY'
+          import sys, pathlib, yaml
+          scenarios = yaml.safe_load(pathlib.Path("test-scenarios.yaml").read_text())["scenarios"]
+          missing = []
+          for s in scenarios:
+              if "python" in (s.get("optional_for") or []):
+                  continue
+              expected = pathlib.Path("tests/integration") / f"test_{s['name']}.py"
+              if not expected.exists():
+                  missing.append(str(expected))
+          if missing:
+              print(f"::error::sdk-python is missing tests for {len(missing)} scenarios:")
+              for m in missing:
+                  print(f"  - {m}")
+              sys.exit(1)
+          print(f"All {len(scenarios)} scenarios have corresponding test files.")
+          PY
+
+  # Integration tests run against production. Skipped automatically by the
+  # conftest if HOTDATA_SDK_TEST_API_KEY / HOTDATA_SDK_TEST_WORKSPACE_ID aren't
+  # set (e.g. PRs from forks where secrets aren't injected).
+  integration:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
+        with:
+          python-version: '3.12'
+      - name: Install package and test deps
+        run: |
+          pip install --quiet -r requirements.txt -r test-requirements.txt
+          pip install --quiet -e .
+      - name: Run integration tests
+        env:
+          HOTDATA_SDK_TEST_API_URL: ${{ vars.HOTDATA_SDK_TEST_API_URL }}
+          HOTDATA_SDK_TEST_API_KEY: ${{ secrets.HOTDATA_SDK_TEST_API_KEY }}
+          HOTDATA_SDK_TEST_WORKSPACE_ID: ${{ vars.HOTDATA_SDK_TEST_WORKSPACE_ID }}
+          HOTDATA_SDK_TEST_CONNECTION_ID: ${{ vars.HOTDATA_SDK_TEST_CONNECTION_ID }}
+        run: pytest tests/integration -v
diff --git a/.github/workflows/regenerate.yml b/.github/workflows/regenerate.yml
@@ -110,6 +110,35 @@ jobs:
           # cd away from the source tree so the import resolves against the installed wheel.
           cd /tmp && python -c "import hotdata; print(hotdata.__version__)"
 
+      - name: Check integration test scenario parity
+        env:
+          GH_TOKEN: ${{ steps.app-token.outputs.token }}
+        run: |
+          curl -sS -f -L \
+            -H "Accept: application/vnd.github.v3.raw" \
+            -H "Authorization: Bearer $GH_TOKEN" \
+            https://api.github.com/repos/hotdata-dev/www.hotdata.dev/contents/api/test-scenarios.yaml \
+            -o test-scenarios.yaml
+          pip install --quiet pyyaml
+          python3 - <<'PY'
+          import sys, pathlib, yaml
+          scenarios = yaml.safe_load(pathlib.Path("test-scenarios.yaml").read_text())["scenarios"]
+          missing = []
+          for s in scenarios:
+              if "python" in (s.get("optional_for") or []):
+                  continue
+              expected = pathlib.Path("tests/integration") / f"test_{s['name']}.py"
+              if not expected.exists():
+                  missing.append(str(expected))
+          if missing:
+              print(f"::warning::sdk-python is missing tests for {len(missing)} scenarios after regen:")
+              for m in missing:
+                  print(f"  - {m}")
+          else:
+              print(f"All {len(scenarios)} scenarios have corresponding test files.")
+          PY
+          rm -f test-scenarios.yaml
+
       - name: Create PR
         uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8
         with:

diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -0,0 +1,124 @@
+"""Shared fixtures for SDK integration tests.
+
+Tests run against production. See www.hotdata.dev/api/README.md for the
+contract — env vars, naming conventions, blast-radius rules.
+"""
+
+from __future__ import annotations
+
+import os
+import uuid
+from dataclasses import dataclass
+from typing import Iterator
+
+import pytest
+
+from hotdata import ApiClient, Configuration
+from hotdata.api.connections_api import ConnectionsApi
+from hotdata.api.datasets_api import DatasetsApi
+from hotdata.api.indexes_api import IndexesApi
+from hotdata.api.saved_queries_api import SavedQueriesApi
+from hotdata.api.secrets_api import SecretsApi
+from hotdata.api.workspaces_api import WorkspacesApi
+
+
+REQUIRED_ENV = ("HOTDATA_SDK_TEST_API_KEY", "HOTDATA_SDK_TEST_WORKSPACE_ID")
+DEFAULT_API_URL = "https://api.hotdata.dev"
+
+
+@dataclass(frozen=True)
+class TestEnv:
+    api_url: str
+    api_key: str
+    workspace_id: str
+    connection_id: str | None
+
+
+def _load_env() -> TestEnv:
+    missing = [name for name in REQUIRED_ENV if not os.environ.get(name)]
+    if missing:
+        pytest.skip(
+            "SDK integration tests require env vars: " + ", ".join(missing)
+        )
+    # GitHub Actions sets `env:` keys even when the underlying secret/var is
+    # unset, producing empty strings rather than absent keys. Use `or` to fall
+    # back to the default for url and to None for the optional connection id.
+    return TestEnv(
+        api_url=os.environ.get("HOTDATA_SDK_TEST_API_URL") or DEFAULT_API_URL,
+        api_key=os.environ["HOTDATA_SDK_TEST_API_KEY"],
+        workspace_id=os.environ["HOTDATA_SDK_TEST_WORKSPACE_ID"],
+        connection_id=os.environ.get("HOTDATA_SDK_TEST_CONNECTION_ID") or None,
+    )
+
+
+@pytest.fixture(scope="session")
+def env() -> TestEnv:
+    return _load_env()
+
+
+@pytest.fixture(scope="session")
+def api_client(env: TestEnv) -> Iterator[ApiClient]:
+    config = Configuration(
+        host=env.api_url,
+        api_key=env.api_key,
+        workspace_id=env.workspace_id,
+    )
+    with ApiClient(config) as client:
+        yield client
+
+
+@pytest.fixture(scope="session")
+def workspace_id(env: TestEnv) -> str:
+    return env.workspace_id
+
+
+@pytest.fixture(scope="session")
+def connection_id(env: TestEnv) -> str:
+    if not env.connection_id:
+        pytest.skip("HOTDATA_SDK_TEST_CONNECTION_ID required for this scenario")
+    return env.connection_id
+
+
+@pytest.fixture
+def sdkci_name() -> "callable[[str], str]":
+    """Returns `sdkci-<scenario>-<uuid8>` so orphans are identifiable.
+
+    See api/README.md — every test-created resource must use this prefix.
+    """
+
+    def _make(scenario: str) -> str:
+        return f"sdkci-{scenario}-{uuid.uuid4().hex[:8]}"
+
+    return _make
+
+
+# Per-API client fixtures keep tests one-liner short and avoid every test
+# instantiating its own *Api(api_client).
+@pytest.fixture
+def datasets_api(api_client: ApiClient) -> DatasetsApi:
+    return DatasetsApi(api_client)
+
+
+@pytest.fixture
+def workspaces_api(api_client: ApiClient) -> WorkspacesApi:
+    return WorkspacesApi(api_client)
+
+
+@pytest.fixture
+def connections_api(api_client: ApiClient) -> ConnectionsApi:
+    return ConnectionsApi(api_client)
+
+
+@pytest.fixture
+def indexes_api(api_client: ApiClient) -> IndexesApi:
+    return IndexesApi(api_client)
+
+
+@pytest.fixture
+def saved_queries_api(api_client: ApiClient) -> SavedQueriesApi:
+    return SavedQueriesApi(api_client)
+
+
+@pytest.fixture
+def secrets_api(api_client: ApiClient) -> SecretsApi:
+    return SecretsApi(api_client)
diff --git a/tests/integration/test_auth_missing_token_401.py b/tests/integration/test_auth_missing_token_401.py
@@ -0,0 +1,27 @@
+"""Scenario: auth_missing_token_401.
+
+Calls without a bearer token return 401 with the documented ApiErrorResponse
+shape. Uses an unauthenticated client built locally — does not touch the
+session-scoped api_client.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from hotdata import ApiClient, Configuration
+from hotdata.api.workspaces_api import WorkspacesApi
+from hotdata.exceptions import ApiException
+
+
+def test_auth_missing_token_401(env) -> None:
+    config = Configuration(host=env.api_url)  # no api_key, no workspace_id
+    with ApiClient(config) as client:
+        api = WorkspacesApi(client)
+        with pytest.raises(ApiException) as excinfo:
+            api.list_workspaces()
+    assert excinfo.value.status == 401, (
+        f"expected 401 without bearer token, got {excinfo.value.status}"
+    )
+    body = excinfo.value.body or ""
+    assert body, "expected non-empty error body on 401"
diff --git a/tests/integration/test_auth_unknown_workspace.py b/tests/integration/test_auth_unknown_workspace.py
@@ -0,0 +1,32 @@
+"""Scenario: auth_unknown_workspace.
+
+A valid bearer token combined with a fabricated workspace id (random UUID) must
+return a 4xx error and never leak data from another workspace. Server may
+respond 403 (forbidden) or 404 (not found) — both are acceptable.
+"""
+
+from __future__ import annotations
+
+import uuid
+
+import pytest
+
+from hotdata import ApiClient, Configuration
+from hotdata.api.datasets_api import DatasetsApi
+from hotdata.exceptions import ApiException
+
+
+def test_auth_unknown_workspace(env) -> None:
+    fake_workspace = f"ws_{uuid.uuid4().hex}"
+    config = Configuration(
+        host=env.api_url,
+        api_key=env.api_key,
+        workspace_id=fake_workspace,
+    )
+    with ApiClient(config) as client:
+        api = DatasetsApi(client)
+        with pytest.raises(ApiException) as excinfo:
+            api.list_datasets()
+    assert excinfo.value.status in (403, 404), (
+        f"expected 403/404 for fabricated workspace, got {excinfo.value.status}"
+    )
diff --git a/tests/integration/test_connections_read.py b/tests/integration/test_connections_read.py
@@ -0,0 +1,29 @@
+"""Scenario: connections_read.
+
+Read-only lifecycle ops on the seeded connection — get, list, health check,
+and cache purge. Does not create or delete connections in prod (would require
+real datastore credentials in CI secrets).
+"""
+
+from __future__ import annotations
+
+from hotdata.api.connections_api import ConnectionsApi
+
+
+def test_connections_read(connections_api: ConnectionsApi, connection_id: str) -> None:
+    detail = connections_api.get_connection(connection_id)
+    assert detail.id == connection_id
+    assert detail.source_type
+    assert detail.name
+
+    listing = connections_api.list_connections()
+    assert any(c.id == connection_id for c in listing.connections), (
+        f"seeded connection {connection_id} not in list_connections"
+    )
+
+    health = connections_api.check_connection_health(connection_id)
+    assert health.connection_id == connection_id
+    assert health.healthy, f"seeded connection unhealthy: {health.error}"
+
+    # purge_connection_cache returns None on success.
+    connections_api.purge_connection_cache(connection_id)
diff --git a/tests/integration/test_dataset_versioning.py b/tests/integration/test_dataset_versioning.py
@@ -0,0 +1,56 @@
+"""Scenario: dataset_versioning.
+
+Create a dataset, exercise list_dataset_versions, pin to a specific version,
+then unpin. Confirms the versioning surface is reachable and consistent.
+"""
+
+from __future__ import annotations
+
+from hotdata.api.datasets_api import DatasetsApi
+from hotdata.exceptions import ApiException
+from hotdata.models.create_dataset_request import CreateDatasetRequest
+from hotdata.models.dataset_source import DatasetSource
+from hotdata.models.inline_data import InlineData
+from hotdata.models.inline_dataset_source import InlineDatasetSource
+from hotdata.models.update_dataset_request import UpdateDatasetRequest
+
+
+def _inline_csv_source() -> DatasetSource:
+    return DatasetSource(
+        InlineDatasetSource(
+            inline=InlineData(content="a,b\n1,2\n3,4\n", format="csv")
+        )
+    )
+
+
+def test_dataset_versioning(datasets_api: DatasetsApi, sdkci_name) -> None:
+    label = sdkci_name("dataset-versioning")
+    created_id: str | None = None
+
+    try:
+        created = datasets_api.create_dataset(
+            CreateDatasetRequest(label=label, source=_inline_csv_source())
+        )
+        created_id = created.id
+
+        versions = datasets_api.list_dataset_versions(created.id)
+        assert versions.dataset_id == created.id
+        assert versions.count >= 1
+        assert any(v.version == 1 for v in versions.versions), (
+            f"expected version 1 in {[v.version for v in versions.versions]}"
+        )
+
+        pinned = datasets_api.update_dataset(
+            created.id, UpdateDatasetRequest(pinned_version=1)
+        )
+        assert pinned.pinned_version == 1
+        assert pinned.latest_version >= 1
+
+        fetched = datasets_api.get_dataset(created.id)
+        assert fetched.pinned_version == 1
+    finally:
+        if created_id is not None:
+            try:
+                datasets_api.delete_dataset(created_id)
+            except ApiException:
+                pass