From b38895643172d2addc637ee4494d706806db426a Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Wed, 24 Jun 2026 02:04:43 +0000 Subject: [PATCH] feat(schema): add resourceType dimension to events and gauges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resourceType is the API-level plural resource family the metric belongs to ('functions', 'sites', 'buckets', 'databases', …) — distinct from the existing singular `resource` tag (the row's own type: 'deployment', 'function', 'bucket'). Cloud's StatsResources and StatsUsage use this plural form everywhere in metric names (METRIC_RESOURCE_TYPE_*); making it a first-class column lets callers slice 'storage by resourceType' without parsing metric names. Library changes: - Metric.php - resourceType added to EVENT_COLUMNS and GAUGE_COLUMNS so it round- trips through extractColumns() during write. - String column (size 64) added to getEventSchema() and getGaugeSchema() so new tables include it from the start. - getEventIndexes() and getGaugeIndexes() include resourceType with a `set(0)` data-skipping index (low cardinality — ~10-20 distinct values across a project's lifetime, so set beats bloom_filter for selective filters). - getResourceType() accessor. - ClickHouse.php - EVENT_PROJECTIONS gains `p_by_resourceType` so grouped event aggregations on resourceType route to a sum-projection instead of scanning the base table. - GAUGE_PROJECTIONS gains `p_by_resourceType` and the combined `p_by_resourceType_resource` (latter covers the common "storage breakdown by resourceType × resource" panel in one projection scan). - ensureGaugeDimColumns() extended with resourceType; new ensureEventDimColumns() handles the same migration for existing events tables. Called from setup() — projections that reference resourceType cannot materialize until the source column exists on both base tables. This is a backwards-compatible additive change: existing deployments get the new column via ALTER TABLE ADD COLUMN IF NOT EXISTS at next setup(); existing rows have NULL until publishers start populating the tag (cloud will in a follow-up). --- src/Usage/Adapter/ClickHouse.php | 27 +++++++++++++++++++++++++-- src/Usage/Metric.php | 30 +++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/src/Usage/Adapter/ClickHouse.php b/src/Usage/Adapter/ClickHouse.php index 9cf0cd5..aa3b3c3 100644 --- a/src/Usage/Adapter/ClickHouse.php +++ b/src/Usage/Adapter/ClickHouse.php @@ -682,6 +682,7 @@ private function formatParamValue(mixed $value): string ['name' => 'p_by_path', 'dims' => ['path']], ['name' => 'p_by_country', 'dims' => ['country']], ['name' => 'p_by_service', 'dims' => ['service']], + ['name' => 'p_by_resourceType', 'dims' => ['resourceType']], ]; /** @@ -695,8 +696,10 @@ private function formatParamValue(mixed $value): string private const GAUGE_PROJECTIONS = [ ['name' => 'p_by_service', 'dims' => ['service']], ['name' => 'p_by_resource', 'dims' => ['resource']], + ['name' => 'p_by_resourceType', 'dims' => ['resourceType']], ['name' => 'p_by_resourceId', 'dims' => ['resourceId']], ['name' => 'p_by_resource_resourceId', 'dims' => ['resource', 'resourceId']], + ['name' => 'p_by_resourceType_resource', 'dims' => ['resourceType', 'resource']], ]; /** @@ -741,6 +744,7 @@ public function setup(): void ); $this->ensureGaugeDimColumns(); + $this->ensureEventDimColumns(); // --- Per-dim projections on the events / gauges base tables --- $this->setLightweightMutationProjectionMode($this->getEventsTableName()); @@ -777,7 +781,7 @@ private function setLightweightMutationProjectionMode(string $baseTable): void } /** - * Backfill the service / resource columns on an existing gauges table. + * Backfill late-added dim columns on an existing gauges table. * setup() uses CREATE TABLE IF NOT EXISTS, so deployments that came up * before these columns were added never receive them — the gauge * projections would then fail because their SELECT references columns @@ -790,7 +794,26 @@ private function ensureGaugeDimColumns(): void $sql = "ALTER TABLE {$gaugesTable} " . 'ADD COLUMN IF NOT EXISTS service LowCardinality(Nullable(String)), ' - . 'ADD COLUMN IF NOT EXISTS resource LowCardinality(Nullable(String))'; + . 'ADD COLUMN IF NOT EXISTS resource LowCardinality(Nullable(String)), ' + . 'ADD COLUMN IF NOT EXISTS resourceType LowCardinality(Nullable(String))'; + + $this->query($sql); + } + + /** + * Backfill late-added dim columns on an existing events table. Same + * reasoning as ensureGaugeDimColumns — CREATE TABLE IF NOT EXISTS won't + * pick up columns added to the schema after the table was first created, + * and a per-dim projection on `resourceType` cannot be materialized until + * the source column exists on the base table. + */ + private function ensureEventDimColumns(): void + { + $eventsTable = $this->escapeIdentifier($this->database) + . '.' . $this->escapeIdentifier($this->getEventsTableName()); + + $sql = "ALTER TABLE {$eventsTable} " + . 'ADD COLUMN IF NOT EXISTS resourceType LowCardinality(Nullable(String))'; $this->query($sql); } diff --git a/src/Usage/Metric.php b/src/Usage/Metric.php index c597685..3649e25 100644 --- a/src/Usage/Metric.php +++ b/src/Usage/Metric.php @@ -25,6 +25,7 @@ * 'status' => '201', * 'service' => 'storage', * 'resource' => 'bucket', + * 'resourceType' => 'buckets', * 'resourceId' => 'abc123', * 'resourceInternalId' => '42', * 'teamId' => 'team_x', @@ -51,7 +52,7 @@ class Metric extends ArrayObject */ public const EVENT_COLUMNS = [ 'path', 'method', 'status', - 'service', 'resource', 'resourceId', 'resourceInternalId', + 'service', 'resource', 'resourceType', 'resourceId', 'resourceInternalId', 'teamId', 'teamInternalId', 'country', 'region', 'hostname', 'osCode', 'osName', 'osVersion', @@ -63,7 +64,7 @@ class Metric extends ArrayObject /** * Gauge-specific column names that are extracted from tags into dedicated columns. */ - public const GAUGE_COLUMNS = ['service', 'resource', 'teamId', 'teamInternalId', 'resourceId', 'resourceInternalId']; + public const GAUGE_COLUMNS = ['service', 'resource', 'resourceType', 'teamId', 'teamInternalId', 'resourceId', 'resourceInternalId']; /** * Construct a new metric object. @@ -79,7 +80,7 @@ class Metric extends ArrayObject * Event-only dimension columns (see EVENT_COLUMNS): * - path / method / status: HTTP shape * - service: API service segment (storage, databases, …) - * - resource / resourceId / resourceInternalId: resource identity + * - resource / resourceType / resourceId / resourceInternalId: resource identity (`resource` is singular like 'bucket', `resourceType` is plural like 'buckets') * - teamId / teamInternalId: owning team identity * - country / region / hostname: geographic + caller origin * - osCode / osName / osVersion: parsed user-agent OS fields @@ -236,6 +237,19 @@ public function getResourceId(): ?string return is_string($resourceId) ? $resourceId : null; } + /** + * Get the API-level resource type the metric belongs to (e.g. 'functions', + * 'sites', 'buckets', 'databases'). Plural, contrast with `resource` + * which is the singular type of the row itself (e.g. 'deployment', + * 'function', 'bucket'). Low cardinality — useful as a group-by / + * filter dimension across a project's whole stats surface. + */ + public function getResourceType(): ?string + { + $v = $this->getAttribute('resourceType', null); + return is_string($v) ? $v : null; + } + /** * Get country code (event metrics only). * @@ -611,6 +625,7 @@ public static function getEventSchema(): array $stringColumn('status', 16), $stringColumn('service', 256), $stringColumn('resource', 256), + $stringColumn('resourceType', 64), $stringColumn('resourceId', 255), $stringColumn('resourceInternalId', 255), $stringColumn('teamId', 255), @@ -684,6 +699,7 @@ public static function getGaugeSchema(): array ], $stringColumn('service', 256), $stringColumn('resource', 256), + $stringColumn('resourceType', 64), $stringColumn('teamId', 255), $stringColumn('teamInternalId', 255), $stringColumn('resourceId', 255), @@ -713,13 +729,13 @@ public static function getEventIndexes(): array { $indexed = [ 'path', 'method', 'status', - 'service', 'resource', 'resourceId', 'resourceInternalId', + 'service', 'resource', 'resourceType', 'resourceId', 'resourceInternalId', 'teamId', 'teamInternalId', 'country', 'region', 'hostname', 'osName', 'clientType', 'clientName', 'deviceName', ]; - $setIndexed = ['status', 'method', 'country', 'service', 'clientType', 'osName']; + $setIndexed = ['status', 'method', 'country', 'service', 'resourceType', 'clientType', 'osName']; return array_map( static function (string $col) use ($setIndexed): array { @@ -745,9 +761,9 @@ static function (string $col) use ($setIndexed): array { */ public static function getGaugeIndexes(): array { - $indexed = ['service', 'resource', 'resourceId', 'resourceInternalId', 'teamId', 'teamInternalId']; + $indexed = ['service', 'resource', 'resourceType', 'resourceId', 'resourceInternalId', 'teamId', 'teamInternalId']; - $setIndexed = ['service', 'resource']; + $setIndexed = ['service', 'resource', 'resourceType']; return array_map( static fn (string $col): array => [