From 3f2c1ce93e9a5a0124cf04e0a23d1af3ab42d9ed Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 22 Jun 2026 01:51:09 +0000 Subject: [PATCH] Lead ClickHouse audit sort key with tenant + LowCardinality columns New shared audit tables are created natively with ORDER BY (tenant, time, id) plus allow_nullable_key, so tenant-scoped time-range reads prune by tenant first. Single-tenant tables keep the historical (time, id) key. event, actorType, resourceType and country are now LowCardinality columns for smaller storage and faster scans (country wrapped as LowCardinality(Nullable)). The full standard secondary-index set is retained. Migrating a pre-existing (time, id) table (adding/materializing the projection, converting columns to LowCardinality, dropping redundant indexes) is an out-of-band operator step, NOT part of the library. The tenant-leading sort key is covered by a live integration test that runs setup() against the ClickHouse service and asserts system.tables.sorting_key leads with tenant. --- src/Audit/Adapter/ClickHouse.php | 30 ++++++++++-- tests/Audit/Adapter/ClickHouseTest.php | 66 +++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 5 deletions(-) diff --git a/src/Audit/Adapter/ClickHouse.php b/src/Audit/Adapter/ClickHouse.php index c0ee9b7..0ff5880 100644 --- a/src/Audit/Adapter/ClickHouse.php +++ b/src/Audit/Adapter/ClickHouse.php @@ -23,6 +23,16 @@ class ClickHouse extends SQL private const DEFAULT_DATABASE = 'default'; + /** + * @var list + */ + private const LOW_CARDINALITY_COLUMNS = [ + 'event', + 'actorType', + 'resourceType', + 'country', + ]; + /** * Filter methods that must be supplied at least one value. Empty `values` * arrays for these methods are rejected up front so they can't silently @@ -766,6 +776,8 @@ public function setup(): void $tableName = $this->getTableName(); $escapedDatabaseAndTable = $this->escapeIdentifier($this->database) . '.' . $this->escapeIdentifier($tableName); + $orderByExpr = $this->sharedTables ? '(tenant, time, id)' : '(time, id)'; + // Create table with MergeTree engine for optimal performance $createTableSql = " CREATE TABLE IF NOT EXISTS {$escapedDatabaseAndTable} ( @@ -773,9 +785,9 @@ public function setup(): void " . implode(",\n ", $indexes) . " ) ENGINE = MergeTree() - ORDER BY (time, id) + ORDER BY {$orderByExpr} PARTITION BY toYYYYMM(time) - SETTINGS index_granularity = 8192 + SETTINGS index_granularity = 8192" . ($this->sharedTables ? ', allow_nullable_key = 1' : '') . " "; $this->query($createTableSql); @@ -1962,9 +1974,19 @@ protected function getColumnDefinition(string $id): string ? 'DateTime64(3)' : 'String'; - $nullable = !$attribute['required'] ? 'Nullable(' . $type . ')' : $type; + $required = (bool) $attribute['required']; + + if ($type === 'String' && \in_array($id, self::LOW_CARDINALITY_COLUMNS, true)) { + $columnType = $required + ? 'LowCardinality(String)' + : 'LowCardinality(Nullable(String))'; + + return "{$id} {$columnType}"; + } + + $columnType = !$required ? 'Nullable(' . $type . ')' : $type; - return "{$id} {$nullable}"; + return "{$id} {$columnType}"; } /** diff --git a/tests/Audit/Adapter/ClickHouseTest.php b/tests/Audit/Adapter/ClickHouseTest.php index a5eaea1..8074243 100644 --- a/tests/Audit/Adapter/ClickHouseTest.php +++ b/tests/Audit/Adapter/ClickHouseTest.php @@ -25,7 +25,7 @@ protected function initializeAudit(): void $username = getenv('CLICKHOUSE_USER') ?: 'default'; $password = getenv('CLICKHOUSE_PASSWORD') ?: 'clickhouse'; $port = (int) (getenv('CLICKHOUSE_PORT') ?: 8123); - $secure = (bool) (getenv('CLICKHOUSE_SECURE') ?: false); + $secure = filter_var(getenv('CLICKHOUSE_SECURE') ?: false, FILTER_VALIDATE_BOOLEAN); $clickHouse = new ClickHouse( host: $host, @@ -934,4 +934,68 @@ public function testOrderRandomRejectedWithColumnOrder(): void Query::orderDesc('time'), ]); } + + public function testSharedTableSortKeyLeadsWithTenant(): void + { + $host = getenv('CLICKHOUSE_HOST') ?: 'clickhouse'; + $username = getenv('CLICKHOUSE_USER') ?: 'default'; + $password = getenv('CLICKHOUSE_PASSWORD') ?: 'clickhouse'; + $port = (int) (getenv('CLICKHOUSE_PORT') ?: 8123); + $secure = filter_var(getenv('CLICKHOUSE_SECURE') ?: false, FILTER_VALIDATE_BOOLEAN); + $database = getenv('CLICKHOUSE_DATABASE') ?: 'default'; + + $namespace = 'projtest_' . uniqid(); + + $adapter = new ClickHouse( + host: $host, + username: $username, + password: $password, + port: $port, + secure: $secure + ); + $adapter->setDatabase($database); + $adapter->setNamespace($namespace); + $adapter->setSharedTables(true); + $adapter->setTenant(1); + + $table = $namespace . '_audits'; + + $http = function (string $sql, array $params = []) use ($host, $port, $username, $password, $secure, $database): string { + $scheme = $secure ? 'https' : 'http'; + $url = "{$scheme}://{$host}:{$port}/?database=" . rawurlencode($database) + . '&user=' . rawurlencode($username) + . '&password=' . rawurlencode($password); + foreach ($params as $key => $value) { + $url .= '¶m_' . rawurlencode((string) $key) . '=' . rawurlencode((string) $value); + } + $ctx = stream_context_create(['http' => [ + 'method' => 'POST', + 'header' => "Content-Type: text/plain\r\n", + 'content' => $sql, + 'timeout' => 15, + 'ignore_errors' => true, + ]]); + $out = @file_get_contents($url, false, $ctx); + + return $out === false ? '' : trim((string) $out); + }; + + try { + (new Audit($adapter))->setup(); + + $sortingKey = $http( + 'SELECT sorting_key FROM system.tables WHERE database = {db:String} AND name = {tbl:String}', + ['db' => $database, 'tbl' => $table] + ); + + $this->assertTrue( + str_starts_with(trim($sortingKey), 'tenant'), + "Expected sorting key to lead with 'tenant', got: {$sortingKey}" + ); + } finally { + $escDb = '`' . str_replace('`', '``', $database) . '`'; + $escTbl = '`' . str_replace('`', '``', $table) . '`'; + $http("DROP TABLE IF EXISTS {$escDb}.{$escTbl}"); + } + } }