Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
name: Integration Tests

on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:

jobs:
# Parity check runs on every PR and push: confirms every scenario in
# www.hotdata.dev/api/test-scenarios.yaml has a matching test file in this
# repo. www.hotdata.dev is private, so we fetch via the GitHub App token —
# same pattern as regenerate.yml.
scenario-parity:
runs-on: ubuntu-latest
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
with:
app-id: 3060111
private-key: ${{ secrets.HOTDATA_AUTOMATION_PRIVATE_KEY }}
owner: hotdata-dev
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: '3.12'
- name: Install PyYAML
run: pip install --quiet pyyaml
- name: Fetch scenarios manifest
env:
GH_TOKEN: ${{ steps.app-token.outputs.token }}
run: |
curl -sS -f -L \
-H "Accept: application/vnd.github.v3.raw" \
-H "Authorization: Bearer $GH_TOKEN" \
https://api.github.com/repos/hotdata-dev/www.hotdata.dev/contents/api/test-scenarios.yaml \
-o test-scenarios.yaml
- name: Check parity
run: |
python3 - <<'PY'
import sys, pathlib, yaml
scenarios = yaml.safe_load(pathlib.Path("test-scenarios.yaml").read_text())["scenarios"]
missing = []
for s in scenarios:
if "python" in (s.get("optional_for") or []):
continue
expected = pathlib.Path("tests/integration") / f"test_{s['name']}.py"
if not expected.exists():
missing.append(str(expected))
if missing:
print(f"::error::sdk-python is missing tests for {len(missing)} scenarios:")
for m in missing:
print(f" - {m}")
sys.exit(1)
print(f"All {len(scenarios)} scenarios have corresponding test files.")
PY

# Integration tests run against production. Skipped automatically by the
# conftest if HOTDATA_SDK_TEST_API_KEY / HOTDATA_SDK_TEST_WORKSPACE_ID aren't
# set (e.g. PRs from forks where secrets aren't injected).
integration:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: '3.12'
- name: Install package and test deps
run: |
pip install --quiet -r requirements.txt -r test-requirements.txt
pip install --quiet -e .
- name: Run integration tests
env:
HOTDATA_SDK_TEST_API_URL: ${{ vars.HOTDATA_SDK_TEST_API_URL }}
HOTDATA_SDK_TEST_API_KEY: ${{ secrets.HOTDATA_SDK_TEST_API_KEY }}
HOTDATA_SDK_TEST_WORKSPACE_ID: ${{ vars.HOTDATA_SDK_TEST_WORKSPACE_ID }}
HOTDATA_SDK_TEST_CONNECTION_ID: ${{ vars.HOTDATA_SDK_TEST_CONNECTION_ID }}
run: pytest tests/integration -v
29 changes: 29 additions & 0 deletions .github/workflows/regenerate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,35 @@ jobs:
# cd away from the source tree so the import resolves against the installed wheel.
cd /tmp && python -c "import hotdata; print(hotdata.__version__)"

- name: Check integration test scenario parity
env:
GH_TOKEN: ${{ steps.app-token.outputs.token }}
run: |
curl -sS -f -L \
-H "Accept: application/vnd.github.v3.raw" \
-H "Authorization: Bearer $GH_TOKEN" \
https://api.github.com/repos/hotdata-dev/www.hotdata.dev/contents/api/test-scenarios.yaml \
-o test-scenarios.yaml
pip install --quiet pyyaml
python3 - <<'PY'
import sys, pathlib, yaml
scenarios = yaml.safe_load(pathlib.Path("test-scenarios.yaml").read_text())["scenarios"]
missing = []
for s in scenarios:
if "python" in (s.get("optional_for") or []):
continue
expected = pathlib.Path("tests/integration") / f"test_{s['name']}.py"
if not expected.exists():
missing.append(str(expected))
if missing:
print(f"::warning::sdk-python is missing tests for {len(missing)} scenarios after regen:")
for m in missing:
print(f" - {m}")
else:
print(f"All {len(scenarios)} scenarios have corresponding test files.")
PY
rm -f test-scenarios.yaml

- name: Create PR
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8
with:
Expand Down
Empty file added tests/__init__.py
Empty file.
Empty file added tests/integration/__init__.py
Empty file.
124 changes: 124 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""Shared fixtures for SDK integration tests.

Tests run against production. See www.hotdata.dev/api/README.md for the
contract — env vars, naming conventions, blast-radius rules.
"""

from __future__ import annotations

import os
import uuid
from dataclasses import dataclass
from typing import Iterator

import pytest

from hotdata import ApiClient, Configuration
from hotdata.api.connections_api import ConnectionsApi
from hotdata.api.datasets_api import DatasetsApi
from hotdata.api.indexes_api import IndexesApi
from hotdata.api.saved_queries_api import SavedQueriesApi
from hotdata.api.secrets_api import SecretsApi
from hotdata.api.workspaces_api import WorkspacesApi


REQUIRED_ENV = ("HOTDATA_SDK_TEST_API_KEY", "HOTDATA_SDK_TEST_WORKSPACE_ID")
DEFAULT_API_URL = "https://api.hotdata.dev"


@dataclass(frozen=True)
class TestEnv:
api_url: str
api_key: str
workspace_id: str
connection_id: str | None


def _load_env() -> TestEnv:
missing = [name for name in REQUIRED_ENV if not os.environ.get(name)]
if missing:
pytest.skip(
"SDK integration tests require env vars: " + ", ".join(missing)
)
# GitHub Actions sets `env:` keys even when the underlying secret/var is
# unset, producing empty strings rather than absent keys. Use `or` to fall
# back to the default for url and to None for the optional connection id.
return TestEnv(
api_url=os.environ.get("HOTDATA_SDK_TEST_API_URL") or DEFAULT_API_URL,
api_key=os.environ["HOTDATA_SDK_TEST_API_KEY"],
workspace_id=os.environ["HOTDATA_SDK_TEST_WORKSPACE_ID"],
connection_id=os.environ.get("HOTDATA_SDK_TEST_CONNECTION_ID") or None,
)


@pytest.fixture(scope="session")
def env() -> TestEnv:
return _load_env()


@pytest.fixture(scope="session")
def api_client(env: TestEnv) -> Iterator[ApiClient]:
config = Configuration(
host=env.api_url,
api_key=env.api_key,
workspace_id=env.workspace_id,
)
with ApiClient(config) as client:
yield client


@pytest.fixture(scope="session")
def workspace_id(env: TestEnv) -> str:
return env.workspace_id


@pytest.fixture(scope="session")
def connection_id(env: TestEnv) -> str:
if not env.connection_id:
pytest.skip("HOTDATA_SDK_TEST_CONNECTION_ID required for this scenario")
return env.connection_id


@pytest.fixture
def sdkci_name() -> "callable[[str], str]":
"""Returns `sdkci-<scenario>-<uuid8>` so orphans are identifiable.

See api/README.md — every test-created resource must use this prefix.
"""

def _make(scenario: str) -> str:
return f"sdkci-{scenario}-{uuid.uuid4().hex[:8]}"

return _make


# Per-API client fixtures keep tests one-liner short and avoid every test
# instantiating its own *Api(api_client).
@pytest.fixture
def datasets_api(api_client: ApiClient) -> DatasetsApi:
return DatasetsApi(api_client)


@pytest.fixture
def workspaces_api(api_client: ApiClient) -> WorkspacesApi:
return WorkspacesApi(api_client)


@pytest.fixture
def connections_api(api_client: ApiClient) -> ConnectionsApi:
return ConnectionsApi(api_client)


@pytest.fixture
def indexes_api(api_client: ApiClient) -> IndexesApi:
return IndexesApi(api_client)


@pytest.fixture
def saved_queries_api(api_client: ApiClient) -> SavedQueriesApi:
return SavedQueriesApi(api_client)


@pytest.fixture
def secrets_api(api_client: ApiClient) -> SecretsApi:
return SecretsApi(api_client)
27 changes: 27 additions & 0 deletions tests/integration/test_auth_missing_token_401.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Scenario: auth_missing_token_401.

Calls without a bearer token return 401 with the documented ApiErrorResponse
shape. Uses an unauthenticated client built locally — does not touch the
session-scoped api_client.
"""

from __future__ import annotations

import pytest

from hotdata import ApiClient, Configuration
from hotdata.api.workspaces_api import WorkspacesApi
from hotdata.exceptions import ApiException


def test_auth_missing_token_401(env) -> None:
config = Configuration(host=env.api_url) # no api_key, no workspace_id
with ApiClient(config) as client:
api = WorkspacesApi(client)
with pytest.raises(ApiException) as excinfo:
api.list_workspaces()
assert excinfo.value.status == 401, (
f"expected 401 without bearer token, got {excinfo.value.status}"
)
body = excinfo.value.body or ""
assert body, "expected non-empty error body on 401"
32 changes: 32 additions & 0 deletions tests/integration/test_auth_unknown_workspace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Scenario: auth_unknown_workspace.

A valid bearer token combined with a fabricated workspace id (random UUID) must
return a 4xx error and never leak data from another workspace. Server may
respond 403 (forbidden) or 404 (not found) — both are acceptable.
"""

from __future__ import annotations

import uuid

import pytest

from hotdata import ApiClient, Configuration
from hotdata.api.datasets_api import DatasetsApi
from hotdata.exceptions import ApiException


def test_auth_unknown_workspace(env) -> None:
fake_workspace = f"ws_{uuid.uuid4().hex}"
config = Configuration(
host=env.api_url,
api_key=env.api_key,
workspace_id=fake_workspace,
)
with ApiClient(config) as client:
api = DatasetsApi(client)
with pytest.raises(ApiException) as excinfo:
api.list_datasets()
assert excinfo.value.status in (403, 404), (
f"expected 403/404 for fabricated workspace, got {excinfo.value.status}"
)
29 changes: 29 additions & 0 deletions tests/integration/test_connections_read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Scenario: connections_read.

Read-only lifecycle ops on the seeded connection — get, list, health check,
and cache purge. Does not create or delete connections in prod (would require
real datastore credentials in CI secrets).
"""

from __future__ import annotations

from hotdata.api.connections_api import ConnectionsApi


def test_connections_read(connections_api: ConnectionsApi, connection_id: str) -> None:
detail = connections_api.get_connection(connection_id)
assert detail.id == connection_id
assert detail.source_type
assert detail.name

listing = connections_api.list_connections()
assert any(c.id == connection_id for c in listing.connections), (
f"seeded connection {connection_id} not in list_connections"
)

health = connections_api.check_connection_health(connection_id)
assert health.connection_id == connection_id
assert health.healthy, f"seeded connection unhealthy: {health.error}"

# purge_connection_cache returns None on success.
connections_api.purge_connection_cache(connection_id)
56 changes: 56 additions & 0 deletions tests/integration/test_dataset_versioning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Scenario: dataset_versioning.

Create a dataset, exercise list_dataset_versions, pin to a specific version,
then unpin. Confirms the versioning surface is reachable and consistent.
"""

from __future__ import annotations

from hotdata.api.datasets_api import DatasetsApi
from hotdata.exceptions import ApiException
from hotdata.models.create_dataset_request import CreateDatasetRequest
from hotdata.models.dataset_source import DatasetSource
from hotdata.models.inline_data import InlineData
from hotdata.models.inline_dataset_source import InlineDatasetSource
from hotdata.models.update_dataset_request import UpdateDatasetRequest


def _inline_csv_source() -> DatasetSource:
return DatasetSource(
InlineDatasetSource(
inline=InlineData(content="a,b\n1,2\n3,4\n", format="csv")
)
)


def test_dataset_versioning(datasets_api: DatasetsApi, sdkci_name) -> None:
label = sdkci_name("dataset-versioning")
created_id: str | None = None

try:
created = datasets_api.create_dataset(
CreateDatasetRequest(label=label, source=_inline_csv_source())
)
created_id = created.id

versions = datasets_api.list_dataset_versions(created.id)
assert versions.dataset_id == created.id
assert versions.count >= 1
assert any(v.version == 1 for v in versions.versions), (
f"expected version 1 in {[v.version for v in versions.versions]}"
)

pinned = datasets_api.update_dataset(
created.id, UpdateDatasetRequest(pinned_version=1)
)
assert pinned.pinned_version == 1
assert pinned.latest_version >= 1

fetched = datasets_api.get_dataset(created.id)
assert fetched.pinned_version == 1
finally:
if created_id is not None:
try:
datasets_api.delete_dataset(created_id)
except ApiException:
pass
Loading