From 25756e3521f5fe04197c06022415c3198eefcd0f Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 5 Jun 2026 16:07:42 -0700 Subject: [PATCH 1/2] test(integration): cover category-A scenarios from #207 --- tests/integration/conftest.py | 65 +++++++++++++ .../integration/test_connection_types_read.py | 27 ++++++ .../test_database_catalogs_attach.py | 42 +++++++++ .../test_database_contexts_crud.py | 55 +++++++++++ tests/integration/test_databases_lifecycle.py | 72 +++++++++++++++ .../test_embedding_providers_crud.py | 80 ++++++++++++++++ .../test_information_schema_read.py | 31 +++++++ tests/integration/test_jobs_read.py | 27 ++++++ .../test_managed_tables_lifecycle.py | 91 +++++++++++++++++++ tests/integration/test_saved_queries_read.py | 45 +++++++++ tests/integration/test_uploads_crud.py | 24 +++++ 11 files changed, 559 insertions(+) create mode 100644 tests/integration/test_connection_types_read.py create mode 100644 tests/integration/test_database_catalogs_attach.py create mode 100644 tests/integration/test_database_contexts_crud.py create mode 100644 tests/integration/test_databases_lifecycle.py create mode 100644 tests/integration/test_embedding_providers_crud.py create mode 100644 tests/integration/test_information_schema_read.py create mode 100644 tests/integration/test_jobs_read.py create mode 100644 tests/integration/test_managed_tables_lifecycle.py create mode 100644 tests/integration/test_saved_queries_read.py create mode 100644 tests/integration/test_uploads_crud.py diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 5ccd500..8d60bb6 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -14,13 +14,21 @@ import pytest from hotdata import ApiClient, Configuration +from hotdata.api.connection_types_api import ConnectionTypesApi from hotdata.api.connections_api import ConnectionsApi +from hotdata.api.database_context_api import DatabaseContextApi from hotdata.api.databases_api import DatabasesApi from hotdata.api.datasets_api import DatasetsApi +from hotdata.api.embedding_providers_api import EmbeddingProvidersApi from hotdata.api.indexes_api import IndexesApi +from hotdata.api.information_schema_api import InformationSchemaApi +from hotdata.api.jobs_api import JobsApi +from hotdata.api.refresh_api import RefreshApi from hotdata.api.saved_queries_api import SavedQueriesApi from hotdata.api.secrets_api import SecretsApi +from hotdata.api.uploads_api import UploadsApi from hotdata.api.workspaces_api import WorkspacesApi +from hotdata.exceptions import ApiException from hotdata.models.create_database_request import CreateDatabaseRequest @@ -152,3 +160,60 @@ def saved_queries_api(api_client: ApiClient) -> SavedQueriesApi: @pytest.fixture def secrets_api(api_client: ApiClient) -> SecretsApi: return SecretsApi(api_client) + + +@pytest.fixture +def database_context_api(api_client: ApiClient) -> DatabaseContextApi: + return DatabaseContextApi(api_client) + + +@pytest.fixture +def embedding_providers_api(api_client: ApiClient) -> EmbeddingProvidersApi: + return EmbeddingProvidersApi(api_client) + + +@pytest.fixture +def connection_types_api(api_client: ApiClient) -> ConnectionTypesApi: + return ConnectionTypesApi(api_client) + + +@pytest.fixture +def jobs_api(api_client: ApiClient) -> JobsApi: + return JobsApi(api_client) + + +@pytest.fixture +def information_schema_api(api_client: ApiClient) -> InformationSchemaApi: + return InformationSchemaApi(api_client) + + +@pytest.fixture +def uploads_api(api_client: ApiClient) -> UploadsApi: + return UploadsApi(api_client) + + +@pytest.fixture +def refresh_api(api_client: ApiClient) -> RefreshApi: + return RefreshApi(api_client) + + +@pytest.fixture +def scratch_database(databases_api: DatabasesApi, sdkci_name) -> Iterator[str]: + """Yields the id of a fresh, isolated database, deleting it on teardown. + + Unlike the session-scoped `database_id` (the shared `sdkci-shared` db reused + across runs), scenarios that declare schemas/tables/contexts or attach + catalogs need their own throwaway database so they never touch seeded data + or collide with a parallel run. `expires_at` is a safety net: if teardown is + interrupted, the server reclaims the database rather than leaking it. + """ + created = databases_api.create_database( + CreateDatabaseRequest(name=sdkci_name("scratch"), expires_at="2h") + ) + try: + yield created.id + finally: + try: + databases_api.delete_database(created.id) + except ApiException: + pass diff --git a/tests/integration/test_connection_types_read.py b/tests/integration/test_connection_types_read.py new file mode 100644 index 0000000..5a2723e --- /dev/null +++ b/tests/integration/test_connection_types_read.py @@ -0,0 +1,27 @@ +"""Scenario: connection_types_read. + +Read-only: list_connection_types returns the available connector catalog, and +get_connection_type fetches one by name with its config schema. Mutates nothing. +""" + +from __future__ import annotations + +from hotdata.api.connection_types_api import ConnectionTypesApi + + +def test_connection_types_read(connection_types_api: ConnectionTypesApi) -> None: + listing = connection_types_api.list_connection_types() + assert listing.connection_types, "connector catalog is unexpectedly empty" + for ct in listing.connection_types: + assert ct.name + assert ct.label + + # Fetch one by name; prefer postgres if present, else the first entry. + names = [ct.name for ct in listing.connection_types] + target = "postgres" if "postgres" in names else names[0] + + detail = connection_types_api.get_connection_type(target) + assert detail.name == target + assert detail.label + # Each connector advertises a config schema clients use to build a request. + assert detail.config_schema is not None diff --git a/tests/integration/test_database_catalogs_attach.py b/tests/integration/test_database_catalogs_attach.py new file mode 100644 index 0000000..1194943 --- /dev/null +++ b/tests/integration/test_database_catalogs_attach.py @@ -0,0 +1,42 @@ +"""Scenario: database_catalogs_attach. + +Attach the seeded connection to a fresh scratch database as a catalog (under an +alias), confirm it's reachable via get_database, then detach it. Reversible and +idempotent — it never mutates the connection itself, only the scratch +database's attachment list (which is torn down with the database). +""" + +from __future__ import annotations + +from hotdata.api.databases_api import DatabasesApi +from hotdata.models.attach_database_catalog_request import ( + AttachDatabaseCatalogRequest, +) + + +def test_database_catalogs_attach( + databases_api: DatabasesApi, scratch_database: str, connection_id: str +) -> None: + db_id = scratch_database + alias = "sdkci_cat" + + databases_api.attach_database_catalog( + db_id, + AttachDatabaseCatalogRequest(connection_id=connection_id, alias=alias), + ) + + detail = databases_api.get_database(db_id) + attached = [a for a in detail.attachments if a.connection_id == connection_id] + assert attached, ( + f"connection {connection_id} not in attachments after attach" + ) + assert any(a.alias == alias for a in attached), ( + f"alias {alias!r} not reflected in attachment list" + ) + + databases_api.detach_database_catalog(db_id, connection_id) + + detail_after = databases_api.get_database(db_id) + assert all(a.connection_id != connection_id for a in detail_after.attachments), ( + f"connection {connection_id} still attached after detach" + ) diff --git a/tests/integration/test_database_contexts_crud.py b/tests/integration/test_database_contexts_crud.py new file mode 100644 index 0000000..ca8e3d6 --- /dev/null +++ b/tests/integration/test_database_contexts_crud.py @@ -0,0 +1,55 @@ +"""Scenario: database_contexts_crud. + +Against a fresh scratch database: upsert a named context document, read it back, +confirm it appears in list_database_contexts, upsert the same name again to +verify replace-on-write, delete the context, and confirm it's gone. The +`scratch_database` fixture creates and tears down the owning database. +""" + +from __future__ import annotations + +import pytest + +from hotdata.api.database_context_api import DatabaseContextApi +from hotdata.exceptions import ApiException +from hotdata.models.upsert_database_context_request import ( + UpsertDatabaseContextRequest, +) + + +def test_database_contexts_crud( + database_context_api: DatabaseContextApi, scratch_database: str, sdkci_name +) -> None: + db_id = scratch_database + # Context keys follow dataset table-name rules (letter/underscore first). + name = "sdkci_" + sdkci_name("ctx").replace("-", "_") + initial = "First revision of the context document." + replaced = "Second revision — replace-on-write." + + upserted = database_context_api.upsert_database_context( + db_id, UpsertDatabaseContextRequest(name=name, content=initial) + ) + assert upserted.context.name == name + assert upserted.context.content == initial + + got = database_context_api.get_database_context(db_id, name) + assert got.context.name == name + assert got.context.content == initial + + listing = database_context_api.list_database_contexts(db_id) + assert any(c.name == name for c in listing.contexts), ( + f"context {name} not in list_database_contexts" + ) + + # Upsert reuses the same name: content is replaced, not appended. + database_context_api.upsert_database_context( + db_id, UpsertDatabaseContextRequest(name=name, content=replaced) + ) + got2 = database_context_api.get_database_context(db_id, name) + assert got2.context.content == replaced + + database_context_api.delete_database_context(db_id, name) + + with pytest.raises(ApiException) as excinfo: + database_context_api.get_database_context(db_id, name) + assert excinfo.value.status == 404 diff --git a/tests/integration/test_databases_lifecycle.py b/tests/integration/test_databases_lifecycle.py new file mode 100644 index 0000000..e07b98b --- /dev/null +++ b/tests/integration/test_databases_lifecycle.py @@ -0,0 +1,72 @@ +"""Scenario: databases_lifecycle. + +Create a database (a metadata-only grouping that auto-provisions a default +catalog), read it back, confirm it appears in list_databases, declare a schema +and a table on its default catalog, then delete it and verify it's gone. + +Self-cleaning: the database is created with a short `expires_at` and deleted in +a finally block, so a failed assertion never leaks a database into prod. +""" + +from __future__ import annotations + +import pytest + +from hotdata.api.databases_api import DatabasesApi +from hotdata.exceptions import ApiException +from hotdata.models.add_managed_schema_request import AddManagedSchemaRequest +from hotdata.models.add_managed_table_request import AddManagedTableRequest +from hotdata.models.create_database_request import CreateDatabaseRequest + + +def test_databases_lifecycle(databases_api: DatabasesApi, sdkci_name) -> None: + name = sdkci_name("databases-lifecycle") + # Schema/table identifiers must be SQL identifiers (no dashes). + schema_name = "sdkci_schema" + table_name = "sdkci_table" + db_id: str | None = None + + try: + created = databases_api.create_database( + CreateDatabaseRequest(name=name, expires_at="2h") + ) + db_id = created.id + assert created.id + assert created.name == name + assert created.default_connection_id, ( + "create_database must expose the auto-provisioned default catalog connection" + ) + + detail = databases_api.get_database(db_id) + assert detail.id == db_id + assert detail.name == name + assert detail.default_connection_id == created.default_connection_id + + listing = databases_api.list_databases() + assert any(d.id == db_id for d in listing.databases), ( + f"created database {db_id} not in list_databases" + ) + + schema_resp = databases_api.add_database_schema( + db_id, AddManagedSchemaRequest(name=schema_name) + ) + assert schema_resp.var_schema == schema_name + + table_resp = databases_api.add_database_table( + db_id, schema_name, AddManagedTableRequest(name=table_name) + ) + assert table_resp.var_schema == schema_name + assert table_resp.table == table_name + + databases_api.delete_database(db_id) + db_id = None + + with pytest.raises(ApiException) as excinfo: + databases_api.get_database(created.id) + assert excinfo.value.status == 404 + finally: + if db_id is not None: + try: + databases_api.delete_database(db_id) + except ApiException: + pass diff --git a/tests/integration/test_embedding_providers_crud.py b/tests/integration/test_embedding_providers_crud.py new file mode 100644 index 0000000..51c76a8 --- /dev/null +++ b/tests/integration/test_embedding_providers_crud.py @@ -0,0 +1,80 @@ +"""Scenario: embedding_providers_crud. + +Register an embedding provider, read it, confirm it appears in +list_embedding_providers, update it, then delete it. + +The scenario calls for a credential-free provider. It nominally asks for +`provider_type=local`, but the runtime currently rejects `local` +("not yet supported; use 'service'"), so we register a `service` provider with +**no** api_key/secret_name. A service provider's key is only consulted when +embeddings are actually generated (indexing) — never at create/get/update — so +this exercises the full CRUD surface without any real external credential and +without auto-creating a secret that would need cleanup. +""" + +from __future__ import annotations + +import pytest + +from hotdata.api.embedding_providers_api import EmbeddingProvidersApi +from hotdata.exceptions import ApiException +from hotdata.models.create_embedding_provider_request import ( + CreateEmbeddingProviderRequest, +) +from hotdata.models.update_embedding_provider_request import ( + UpdateEmbeddingProviderRequest, +) + + +def test_embedding_providers_crud( + embedding_providers_api: EmbeddingProvidersApi, sdkci_name +) -> None: + name = sdkci_name("embprov") + updated_name = sdkci_name("embprov-updated") + provider_id: str | None = None + + try: + created = embedding_providers_api.create_embedding_provider( + CreateEmbeddingProviderRequest( + name=name, + provider_type="service", + config={"model": "text-embedding-3-small"}, + ) + ) + provider_id = created.id + assert created.id + assert created.name == name + assert created.provider_type == "service" + + got = embedding_providers_api.get_embedding_provider(provider_id) + assert got.id == provider_id + assert got.name == name + + listing = embedding_providers_api.list_embedding_providers() + assert any(p.id == provider_id for p in listing.embedding_providers), ( + f"created provider {provider_id} not in list_embedding_providers" + ) + + updated = embedding_providers_api.update_embedding_provider( + provider_id, UpdateEmbeddingProviderRequest(name=updated_name) + ) + assert updated.id == provider_id + assert updated.name == updated_name + + # Read-after-update reflects the rename. + assert embedding_providers_api.get_embedding_provider(provider_id).name == ( + updated_name + ) + + embedding_providers_api.delete_embedding_provider(provider_id) + provider_id = None + + with pytest.raises(ApiException) as excinfo: + embedding_providers_api.get_embedding_provider(created.id) + assert excinfo.value.status == 404 + finally: + if provider_id is not None: + try: + embedding_providers_api.delete_embedding_provider(provider_id) + except ApiException: + pass diff --git a/tests/integration/test_information_schema_read.py b/tests/integration/test_information_schema_read.py new file mode 100644 index 0000000..8cc38c1 --- /dev/null +++ b/tests/integration/test_information_schema_read.py @@ -0,0 +1,31 @@ +"""Scenario: information_schema_read. + +Read-only: information_schema returns the catalog/schema/table metadata visible +to the workspace. Verify the seeded connection's tables surface when filtered by +connection id, including column definitions. +""" + +from __future__ import annotations + +from hotdata.api.information_schema_api import InformationSchemaApi + + +def test_information_schema_read( + information_schema_api: InformationSchemaApi, connection_id: str +) -> None: + resp = information_schema_api.information_schema( + connection_id=connection_id, include_columns=True + ) + # Response is well-formed: count matches the page, has_more drives paging. + assert isinstance(resp.tables, list) + assert resp.count == len(resp.tables) + assert isinstance(resp.has_more, bool) + + assert resp.tables, ( + f"seeded connection {connection_id} exposed no tables in information_schema" + ) + for table in resp.tables: + assert table.table + assert table.var_schema + # include_columns=True must populate column definitions. + assert table.columns is not None diff --git a/tests/integration/test_jobs_read.py b/tests/integration/test_jobs_read.py new file mode 100644 index 0000000..eb07c10 --- /dev/null +++ b/tests/integration/test_jobs_read.py @@ -0,0 +1,27 @@ +"""Scenario: jobs_read. + +Read-only: list_jobs returns the workspace job history, and get_job fetches a +single job by id (the first from the list, when any exist). Never starts a job; +tolerates an empty history. +""" + +from __future__ import annotations + +from hotdata.api.jobs_api import JobsApi + + +def test_jobs_read(jobs_api: JobsApi) -> None: + listing = jobs_api.list_jobs() + assert isinstance(listing.jobs, list) + + if not listing.jobs: + # A fresh workspace may have no job history — that's a valid state. + return + + first = listing.jobs[0] + assert first.id + assert first.status + + fetched = jobs_api.get_job(first.id) + assert fetched.id == first.id + assert fetched.status diff --git a/tests/integration/test_managed_tables_lifecycle.py b/tests/integration/test_managed_tables_lifecycle.py new file mode 100644 index 0000000..b3df299 --- /dev/null +++ b/tests/integration/test_managed_tables_lifecycle.py @@ -0,0 +1,91 @@ +"""Scenario: managed_tables_lifecycle. + +The heaviest scenario — it ties databases, uploads, and managed tables together +against a fresh scratch database (whose default catalog is a managed catalog): + + 1. declare a schema and a table on the database's default catalog connection, + 2. upload a small parquet file, + 3. load it into the table (load_managed_table), + 4. read get_table_profile, + 5. refresh the catalog metadata, + 6. purge_table_cache, + 7. delete the managed table. + +The scratch_database fixture tears the database (and its catalog) down, so the +test touches no seeded data. Skipped if pyarrow is unavailable (needed to author +the parquet payload). +""" + +from __future__ import annotations + +import io + +import pytest + +pa = pytest.importorskip("pyarrow") +pq = pytest.importorskip("pyarrow.parquet") + +from hotdata.api.connections_api import ConnectionsApi +from hotdata.api.databases_api import DatabasesApi +from hotdata.api.refresh_api import RefreshApi +from hotdata.api.uploads_api import UploadsApi +from hotdata.models.add_managed_schema_request import AddManagedSchemaRequest +from hotdata.models.add_managed_table_request import AddManagedTableRequest +from hotdata.models.load_managed_table_request import LoadManagedTableRequest +from hotdata.models.refresh_request import RefreshRequest + + +def _parquet_bytes() -> bytes: + table = pa.table({"x": [1, 2, 3], "msg": ["a", "b", "c"]}) + buf = io.BytesIO() + pq.write_table(table, buf) + return buf.getvalue() + + +def test_managed_tables_lifecycle( + databases_api: DatabasesApi, + connections_api: ConnectionsApi, + uploads_api: UploadsApi, + refresh_api: RefreshApi, + scratch_database: str, +) -> None: + # The database's auto-provisioned default catalog is a managed catalog, + # addressed through its default_connection_id. + connection_id = databases_api.get_database(scratch_database).default_connection_id + schema_name = "sdkci_mt" + table_name = "sdkci_loaded" + + connections_api.add_managed_schema( + connection_id, AddManagedSchemaRequest(name=schema_name) + ) + connections_api.add_managed_table( + connection_id, schema_name, AddManagedTableRequest(name=table_name) + ) + + upload = uploads_api.upload_file( + body=_parquet_bytes(), _content_type="application/parquet" + ) + assert upload.id + + loaded = connections_api.load_managed_table( + connection_id, + schema_name, + table_name, + LoadManagedTableRequest(mode="replace", upload_id=upload.id), + ) + assert loaded.schema_name == schema_name + assert loaded.table_name == table_name + assert loaded.row_count == 3 + + profile = connections_api.get_table_profile(connection_id, schema_name, table_name) + assert profile.var_schema == schema_name + assert profile.table == table_name + assert profile.row_count == 3 + + # Refresh the catalog metadata for the managed connection. + refreshed = refresh_api.refresh(RefreshRequest(connection_id=connection_id)) + assert refreshed.actual_instance is not None + + # purge_table_cache and delete_managed_table both return None on success. + connections_api.purge_table_cache(connection_id, schema_name, table_name) + connections_api.delete_managed_table(connection_id, schema_name, table_name) diff --git a/tests/integration/test_saved_queries_read.py b/tests/integration/test_saved_queries_read.py new file mode 100644 index 0000000..06576c1 --- /dev/null +++ b/tests/integration/test_saved_queries_read.py @@ -0,0 +1,45 @@ +"""Scenario: saved_queries_read. + +Create a saved query, confirm it appears in list_saved_queries, fetch it by id +with get_saved_query, then delete it. Read-focused companion to +saved_query_versioning (which exercises the version history). +""" + +from __future__ import annotations + +from hotdata.api.saved_queries_api import SavedQueriesApi +from hotdata.exceptions import ApiException +from hotdata.models.create_saved_query_request import CreateSavedQueryRequest + + +def test_saved_queries_read(saved_queries_api: SavedQueriesApi, sdkci_name) -> None: + name = sdkci_name("savedq-read") + sql = "SELECT 42 AS answer" + created_id: str | None = None + + try: + created = saved_queries_api.create_saved_query( + CreateSavedQueryRequest(name=name, sql=sql, description="sdkci read test") + ) + created_id = created.id + assert created.id + assert created.name == name + + listing = saved_queries_api.list_saved_queries() + match = next((q for q in listing.queries if q.id == created_id), None) + assert match is not None, f"created saved query {created_id} not in list" + assert match.name == name + + got = saved_queries_api.get_saved_query(created_id) + assert got.id == created_id + assert got.name == name + assert got.sql == sql + + saved_queries_api.delete_saved_query(created_id) + created_id = None + finally: + if created_id is not None: + try: + saved_queries_api.delete_saved_query(created_id) + except ApiException: + pass diff --git a/tests/integration/test_uploads_crud.py b/tests/integration/test_uploads_crud.py new file mode 100644 index 0000000..770a37c --- /dev/null +++ b/tests/integration/test_uploads_crud.py @@ -0,0 +1,24 @@ +"""Scenario: uploads_crud. + +Upload a small file via upload_file, then confirm it appears in list_uploads. +There is no delete-upload endpoint, so the upload is not torn down in-test — +orphaned `sdkci` uploads are reclaimed by the nightly sweep. UploadInfo carries +no name field, so we identify the upload by the id returned from upload_file. +""" + +from __future__ import annotations + +from hotdata.api.uploads_api import UploadsApi + + +def test_uploads_crud(uploads_api: UploadsApi) -> None: + body = b"col_a,col_b\n1,2\n3,4\n" + + uploaded = uploads_api.upload_file(body=body, _content_type="text/csv") + assert uploaded.id + assert uploaded.size_bytes == len(body) + + listing = uploads_api.list_uploads() + match = next((u for u in listing.uploads if u.id == uploaded.id), None) + assert match is not None, f"upload {uploaded.id} not in list_uploads" + assert match.size_bytes == len(body) From 06c0743432fd73ba8c3b8240be39026fe9457720 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 5 Jun 2026 16:20:34 -0700 Subject: [PATCH 2/2] docs(test): align embedding scenario note with corrected contract --- tests/integration/test_embedding_providers_crud.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_embedding_providers_crud.py b/tests/integration/test_embedding_providers_crud.py index 51c76a8..9db7bb1 100644 --- a/tests/integration/test_embedding_providers_crud.py +++ b/tests/integration/test_embedding_providers_crud.py @@ -3,13 +3,12 @@ Register an embedding provider, read it, confirm it appears in list_embedding_providers, update it, then delete it. -The scenario calls for a credential-free provider. It nominally asks for -`provider_type=local`, but the runtime currently rejects `local` -("not yet supported; use 'service'"), so we register a `service` provider with -**no** api_key/secret_name. A service provider's key is only consulted when -embeddings are actually generated (indexing) — never at create/get/update — so -this exercises the full CRUD surface without any real external credential and -without auto-creating a secret that would need cleanup. +The scenario calls for a `service` provider with **no** api_key/secret_name +(not `local` — the runtime currently rejects `local` as "not yet supported"). +A service provider's key is only consulted when embeddings are actually +generated (indexing) — never at create/get/update — so this exercises the full +CRUD surface without any real external credential and without auto-creating a +secret that would need cleanup. """ from __future__ import annotations