From 20404cbf80724723e44baed109cbdfb00eec386e Mon Sep 17 00:00:00 2001 From: Artyom Keydunov Date: Wed, 15 Apr 2026 21:09:19 -0700 Subject: [PATCH 1/2] Docs data modeling improvements (#10698) * docs: add dedicated Views guide page and minor fixes Add a new guide-level Views page to the Data Modeling section, covering why views matter, how they work, design best practices, and folder organization. Also hide the calendar cubes section in dimensions reference, fix the `id` named format to omit grouping commas, and remove the demo deployment callout from the getting started page. Made-with: Cursor * docs: restructure data modeling section into standalone pages - Create dedicated pages: Cubes, Joins, Measures, Dimensions - Move multi-fact queries under Views as "Multi-fact views" - Move extending cubes under Cubes as "Extending cubes" - Move polymorphic cubes to recipes - Move Syntax to standalone page after AI context - Rewrite Joins page with views-first approach - Rewrite multi-fact page to lead with the problem - Remove old Concepts index page, calculated-members, multi-stage-calculations - Hide data-blending page - Update all cross-references across 30+ files Made-with: Cursor --- docs-mintlify/docs.json | 27 +- .../access-control/member-level-security.mdx | 4 +- .../access-control/row-level-security.mdx | 4 +- .../concepts/calculated-members.mdx | 724 ------ .../data-modeling/concepts/calendar-cubes.mdx | 4 +- .../docs/data-modeling/concepts/index.mdx | 860 -------- .../concepts/multi-fact-queries.mdx | 499 ----- .../concepts/multi-stage-calculations.mdx | 714 ------ .../docs/data-modeling/concepts/syntax.mdx | 18 +- .../concepts/working-with-joins.mdx | 1945 ----------------- docs-mintlify/docs/data-modeling/cubes.mdx | 395 ++++ .../docs/data-modeling/dimensions.mdx | 432 ++++ ...xtending-cubes.mdx => extending-cubes.mdx} | 4 +- docs-mintlify/docs/data-modeling/joins.mdx | 542 +++++ docs-mintlify/docs/data-modeling/measures.mdx | 434 ++++ .../docs/data-modeling/multi-fact-views.mdx | 421 ++++ docs-mintlify/docs/data-modeling/overview.mdx | 19 +- docs-mintlify/docs/data-modeling/views.mdx | 449 ++++ .../docs/integrations/google-sheets.mdx | 2 +- .../docs/integrations/microsoft-excel.mdx | 2 +- .../matching-pre-aggregations.mdx | 6 +- .../recipes/data-modeling/custom-calendar.mdx | 2 +- .../data-modeling/custom-granularity.mdx | 4 +- .../recipes/data-modeling/event-analytics.mdx | 2 +- .../data-modeling/filtered-aggregates.mdx | 2 +- .../data-modeling/nested-aggregates.mdx | 2 +- .../data-modeling/period-over-period.mdx | 4 +- .../data-modeling}/polymorphic-cubes.mdx | 2 +- .../data-modeling/using-dynamic-measures.mdx | 2 +- docs-mintlify/recipes/data-modeling/xirr.mdx | 2 +- .../pre-aggregations/non-additivity.mdx | 2 +- .../configuration/environment-variables.mdx | 2 +- .../core-data-apis/dax-api/index.mdx | 4 +- .../reference/core-data-apis/mdx-api.mdx | 2 +- .../core-data-apis/rest-api/query-format.mdx | 4 +- .../core-data-apis/sql-api/joins.mdx | 6 +- .../reference/data-modeling/cube.mdx | 6 +- .../reference/data-modeling/dimensions.mdx | 6 +- .../reference/data-modeling/joins.mdx | 2 +- .../reference/data-modeling/measures.mdx | 10 +- .../reference/data-modeling/view.mdx | 2 +- 41 files changed, 2749 insertions(+), 4824 deletions(-) delete mode 100644 docs-mintlify/docs/data-modeling/concepts/calculated-members.mdx delete mode 100644 docs-mintlify/docs/data-modeling/concepts/index.mdx delete mode 100644 docs-mintlify/docs/data-modeling/concepts/multi-fact-queries.mdx delete mode 100644 docs-mintlify/docs/data-modeling/concepts/multi-stage-calculations.mdx delete mode 100644 docs-mintlify/docs/data-modeling/concepts/working-with-joins.mdx create mode 100644 docs-mintlify/docs/data-modeling/cubes.mdx create mode 100644 docs-mintlify/docs/data-modeling/dimensions.mdx rename docs-mintlify/docs/data-modeling/{concepts/code-reusability-extending-cubes.mdx => extending-cubes.mdx} (96%) create mode 100644 docs-mintlify/docs/data-modeling/joins.mdx create mode 100644 docs-mintlify/docs/data-modeling/measures.mdx create mode 100644 docs-mintlify/docs/data-modeling/multi-fact-views.mdx create mode 100644 docs-mintlify/docs/data-modeling/views.mdx rename docs-mintlify/{docs/data-modeling/concepts => recipes/data-modeling}/polymorphic-cubes.mdx (97%) diff --git a/docs-mintlify/docs.json b/docs-mintlify/docs.json index d5b1378c8697b..d9bfe80ad087f 100644 --- a/docs-mintlify/docs.json +++ b/docs-mintlify/docs.json @@ -81,20 +81,24 @@ "pages": [ "docs/data-modeling/overview", { - "group": "Concepts", - "root": "docs/data-modeling/concepts/index", + "group": "Views", + "root": "docs/data-modeling/views", "pages": [ - "docs/data-modeling/concepts/syntax", - "docs/data-modeling/concepts/calculated-members", - "docs/data-modeling/concepts/multi-stage-calculations", - "docs/data-modeling/concepts/working-with-joins", - "docs/data-modeling/concepts/code-reusability-extending-cubes", - "docs/data-modeling/concepts/polymorphic-cubes", - "docs/data-modeling/concepts/multi-fact-queries", - "docs/data-modeling/concepts/data-blending" + "docs/data-modeling/multi-fact-views" ] }, + { + "group": "Cubes", + "root": "docs/data-modeling/cubes", + "pages": [ + "docs/data-modeling/extending-cubes" + ] + }, + "docs/data-modeling/joins", + "docs/data-modeling/measures", + "docs/data-modeling/dimensions", "docs/data-modeling/ai-context", + "docs/data-modeling/concepts/syntax", { "group": "Access Control", "pages": [ @@ -585,7 +589,8 @@ "recipes/data-modeling/cohort-retention", "recipes/data-modeling/xirr", "recipes/data-modeling/dbt", - "recipes/data-modeling/custom-order" + "recipes/data-modeling/custom-order", + "recipes/data-modeling/polymorphic-cubes" ] } ] diff --git a/docs-mintlify/docs/data-modeling/access-control/member-level-security.mdx b/docs-mintlify/docs/data-modeling/access-control/member-level-security.mdx index 75a3eeaf3197f..58ec6766cbf37 100644 --- a/docs-mintlify/docs/data-modeling/access-control/member-level-security.mdx +++ b/docs-mintlify/docs/data-modeling/access-control/member-level-security.mdx @@ -145,8 +145,8 @@ them entirely, see [data masking][ref-data-masking] in access policies. [ref-data-modeling-concepts]: /docs/data-modeling/concepts [ref-apis]: /reference -[ref-cubes]: /docs/data-modeling/concepts#cubes -[ref-views]: /docs/data-modeling/concepts#views +[ref-cubes]: /docs/data-modeling/cubes +[ref-views]: /docs/data-modeling/views [ref-dap]: /docs/data-modeling/access-control/data-access-policies [ref-ref-cubes]: /reference/data-modeling/cube [ref-ref-views]: /reference/data-modeling/view diff --git a/docs-mintlify/docs/data-modeling/access-control/row-level-security.mdx b/docs-mintlify/docs/data-modeling/access-control/row-level-security.mdx index 5f66ffa5a2ba7..72345f1791491 100644 --- a/docs-mintlify/docs/data-modeling/access-control/row-level-security.mdx +++ b/docs-mintlify/docs/data-modeling/access-control/row-level-security.mdx @@ -65,8 +65,8 @@ cube(`orders`, { [ref-data-modeling-concepts]: /docs/data-modeling/concepts [ref-apis]: /reference -[ref-cubes]: /docs/data-modeling/concepts#cubes -[ref-views]: /docs/data-modeling/concepts#views +[ref-cubes]: /docs/data-modeling/cubes +[ref-views]: /docs/data-modeling/views [ref-cubes-sql]: /reference/data-modeling/cube#sql [ref-dynamic-data-modeling]: /docs/data-modeling/dynamic [ref-dap]: /docs/data-modeling/access-control/data-access-policies diff --git a/docs-mintlify/docs/data-modeling/concepts/calculated-members.mdx b/docs-mintlify/docs/data-modeling/concepts/calculated-members.mdx deleted file mode 100644 index 62257a87e3cc9..0000000000000 --- a/docs-mintlify/docs/data-modeling/concepts/calculated-members.mdx +++ /dev/null @@ -1,724 +0,0 @@ ---- -title: Calculated measures and dimensions -description: Patterns for measures and dimensions built from SQL expressions, cross-member references, and subqueries—including ratios, proxies, and decomposition for pre-aggregations. ---- - -Often, dimensions are mapped to table columns and measures are defined as -aggregations of top of table columns. However, measures and dimensions can also -[reference][ref-references] other members of the same or other cubes, use [SQL -expressions][ref-sql-expressions], and perform calculations involving other measures -and dimensions. - -Most common patterns are known as [calculated measures](#calculated-measures), -[proxy dimensions](#proxy-dimensions), and [subquery dimensions](#subquery-dimensions). - -## Calculated measures - -**Calculated measures perform calculations on other measures using SQL functions and -operators.** They provide a way to decompose complex measures (e.g., ratios or percents) -into formulas that involve simpler measures. Also, calculated measures [can -help][ref-decomposition-recipe] to use [non-additive][ref-non-additive] measures with -pre-aggregations. - -### Members of the same cube - -In the following example, the `completed_ratio` measure is calculated as a division of -`completed_count` by total `count`. Note that the result is also multiplied by `1.0` -since [integer division in SQL][link-postgres-division] would otherwise produce an -integer value. - - - -```yaml title="YAML" -cubes: - - name: orders - sql: | - SELECT 1 AS id, 'processing' AS status UNION ALL - SELECT 2 AS id, 'completed' AS status UNION ALL - SELECT 3 AS id, 'completed' AS status - - measures: - - name: count - type: count - - - name: completed_count - type: count - filters: - - sql: "{CUBE}.status = 'completed'" - - - name: completed_ratio - sql: "1.0 * {completed_count} / {count}" - type: number -``` - -```javascript title="JavaScript" -cube(`orders`, { - sql: ` - SELECT 1 AS id, 'processing' AS status UNION ALL - SELECT 2 AS id, 'completed' AS status UNION ALL - SELECT 3 AS id, 'completed' AS status - `, - - measures: { - count: { - type: `count` - }, - - completed_count: { - type: `count`, - filters: [{ - sql: `${CUBE}.status = 'completed'` - }] - }, - - completed_ratio: { - sql: `1.0 * ${completed_count} / ${count}`, - type: `number` - } - } -}) -``` - - - -If you query for `completed_ratio`, Cube will generate the following SQL: - -```sql -SELECT - 1.0 * COUNT( - CASE WHEN ("orders".status = 'completed') THEN 1 END - ) / COUNT(*) "orders__completed_ratio" -FROM ( - SELECT 1 AS id, 'processing' AS status UNION ALL - SELECT 2 AS id, 'completed' AS status UNION ALL - SELECT 3 AS id, 'completed' AS status -) AS "orders" -``` - -### Members of other cubes - -If you have `first_cube` that is [joined][ref-joins] to `second_cube`, you can define a -calculated measure that references measures from both `first_cube` and `second_cube`. -When you query for this calculated measure, Cube will transparently generate SQL with -necessary joins. - -In the following example, the `orders.purchases_to_users_ratio` measure references the -`purchases` measure from the `orders` cube and the `count` measure from the `users` cube: - - - -```yaml title="YAML" -cubes: - - name: orders - sql: > - SELECT 1 AS id, 11 AS user_id, 'processing' AS status UNION ALL - SELECT 2 AS id, 11 AS user_id, 'completed' AS status UNION ALL - SELECT 3 AS id, 11 AS user_id, 'completed' AS status - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - measures: - - name: purchases - type: count - filters: - - sql: "{CUBE}.status = 'completed'" - - - name: users - sql: > - SELECT 11 AS id, 'Alice' AS name UNION ALL - SELECT 12 AS id, 'Bob' AS name UNION ALL - SELECT 13 AS id, 'Eve' AS name - - joins: - - name: orders - sql: "{CUBE}.id = {orders}.user_id" - relationship: one_to_many - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - measures: - - name: count - type: count - - - name: purchases_to_users_ratio - sql: "1.0 * {orders.purchases} / {CUBE.count}" - type: number -``` - -```javascript title="JavaScript" -cube(`orders`, { - sql: ` - SELECT 1 AS id, 11 AS user_id, 'processing' AS status UNION ALL - SELECT 2 AS id, 11 AS user_id, 'completed' AS status UNION ALL - SELECT 3 AS id, 11 AS user_id, 'completed' AS status - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - } - }, - - measures: { - purchases: { - type: `count`, - filters: [{ - sql: `${CUBE}.status = 'completed'` - }] - } - } -}) - -cube(`users`, { - sql: ` - SELECT 11 AS id, 'Alice' AS name UNION ALL - SELECT 12 AS id, 'Bob' AS name UNION ALL - SELECT 13 AS id, 'Eve' AS name - `, - - joins: { - orders: { - sql: `${CUBE}.id = ${orders}.user_id`, - relationship: `one_to_many` - } - }, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - } - }, - - measures: { - count: { - type: `count` - }, - - purchases_to_users_ratio: { - sql: `100.0 * ${orders.purchases} / ${CUBE.count}`, - type: `number`, - format: `percent` - } - } -}) -``` - - - -If you query for `users.purchases_to_users_ratio`, Cube will generate the following SQL: - -```sql -SELECT - 1.0 * COUNT( - CASE - WHEN ("orders".status = 'completed') THEN "orders".id - END - ) / COUNT(DISTINCT "users".id) "users__purchases_to_users_ratio" -FROM ( - SELECT 11 AS id, 'Alice' AS name UNION ALL - SELECT 12 AS id, 'Bob' AS name UNION ALL - SELECT 13 AS id, 'Eve' AS name -) AS "users" -LEFT JOIN ( - SELECT 1 AS id, 11 AS user_id, 'processing' AS status UNION ALL - SELECT 2 AS id, 11 AS user_id, 'completed' AS status UNION ALL - SELECT 3 AS id, 11 AS user_id, 'completed' AS status -) AS "orders" ON "users".id = "orders".user_id -``` - -## Proxy dimensions - -**Proxy dimensions reference dimensions from the same cube or other cubes.** -Proxy dimensions are convenient for reusing existing dimensions when defining -new ones. - -### Members of the same cube - -If you have a dimension with a non-trivial definition, you can reference that -dimension to reuse the existing definition and reduce code duplication. - -In the following example, the `full_name` dimension references `initials` and -`last_name` dimensions of the same cube: - - - -```yaml title="YAML" -cubes: - - name: users - sql_table: users - - dimensions: - - name: initials - sql: "SUBSTR(first_name, 1, 1)" - type: string - - - name: last_name - sql: "UPPER(last_name)" - type: string - - - name: full_name - sql: "{initials} || '. ' || {last_name}" - type: string -``` - -```javascript title="JavaScript" -cube(`users`, { - sql_table: `users`, - - dimensions: { - initials: { - sql: `SUBSTR(first_name, 1, 1)`, - type: `string` - }, - - last_name: { - sql: `UPPER(last_name)`, - type: `string` - }, - - full_name: { - sql: `${initials} || '. ' || ${last_name}`, - type: `string` - } - } -}) -``` - - - -If you query for `users.full_name`, Cube will generate the following SQL: - -```sql -SELECT - SUBSTR(first_name, 1, 1) || '. ' || UPPER(last_name) "users__full_name" -FROM - users AS "users" -GROUP BY - 1 -``` - -### Members of other cubes - -If you have `first_cube` that is [joined][ref-joins] to `second_cube`, you can use a -proxy dimension to bring `second_cube.dimension` to `first_cube` as `dimension` (or -under a different name). When you query for a proxy dimension, Cube will transparently -generate SQL with necessary joins. - -In the following example, `orders.user_name` is a proxy dimension that brings the -`users.name` dimension to `orders`. You can also see that there's a join relationship -between `orders` and `users`: - - - -```yaml title="YAML" -cubes: - - name: orders - sql: | - SELECT 1 AS id, 1 AS user_id UNION ALL - SELECT 2 AS id, 1 AS user_id UNION ALL - SELECT 3 AS id, 2 AS user_id - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: user_name - sql: "{users.name}" - type: string - - measures: - - name: count - type: count - - joins: - - name: users - sql: "{users}.id = {orders}.user_id" - relationship: one_to_many - - - name: users - sql: | - SELECT 1 AS id, 'Alice' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name - - dimensions: - - name: name - sql: name - type: string -``` - -```javascript title="JavaScript" -cube(`orders`, { - sql: ` - SELECT 1 AS id, 1 AS user_id UNION ALL - SELECT 2 AS id, 1 AS user_id UNION ALL - SELECT 3 AS id, 2 AS user_id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - user_name: { - sql: `${users.name}`, - type: `string` - } - }, - - measures: { - count: { - type: `count` - } - }, - - joins: { - users: { - sql: `${users}.id = ${orders}.user_id`, - relationship: `one_to_many` - } - } -}) - -cube(`users`, { - sql: ` - SELECT 1 AS id, 'Alice' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name - `, - - dimensions: { - name: { - sql: `name`, - type: `string` - } - } -}) -``` - - - -If you query for `orders.user_name` and `orders.count`, Cube will generate the -following SQL: - -```sql -SELECT - "users".name "orders__user_name", - COUNT(DISTINCT "orders".id) "orders__count" -FROM ( - SELECT 1 AS id, 1 AS user_id UNION ALL - SELECT 2 AS id, 1 AS user_id UNION ALL - SELECT 3 AS id, 2 AS user_id -) AS "orders" -LEFT JOIN ( - SELECT 1 AS id, 'Alice' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name -) AS "users" ON "users".id = "orders".user_id -GROUP BY 1 -``` - -Note that if you query for `orders.user_name` only, Cube will figure out that it's -equivalent to querying just `users.name` and there's no need to generate a join in SQL: - -```sql -SELECT - "users".name "orders__user_name" -FROM ( - SELECT 1 AS id, 'Alice' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name -) AS "users" -GROUP BY 1 -``` - -### Time dimension granularity - -When referencing a [time dimension][ref-time-dimension] of the same or another -cube, you can specificy a granularity to refer to a time value with that specific -granularity. It can be one of the [default granularities][ref-default-granularities] -(e.g., `year` or `week`) or a [custom granularity][ref-custom-granularities]: - - - -```yaml title="YAML" -cubes: - - name: users - sql: | - SELECT '2025-01-01T00:00:00Z' AS created_at UNION ALL - SELECT '2025-02-01T00:00:00Z' AS created_at UNION ALL - SELECT '2025-03-01T00:00:00Z' AS created_at - - dimensions: - - name: created_at - sql: created_at - type: time - - granularities: - - name: sunday_week - interval: 1 week - offset: -1 day - - - name: created_at__year - sql: "{created_at.year}" - type: time - - - name: created_at__sunday_week - sql: "{created_at.sunday_week}" - type: time -``` - -```javascript title="JavaScript" -cube(`users`, { - sql: ` - SELECT '2025-01-01T00:00:00Z' AS created_at UNION ALL - SELECT '2025-02-01T00:00:00Z' AS created_at UNION ALL - SELECT '2025-03-01T00:00:00Z' AS created_at - `, - - dimensions: { - created_at: { - sql: `created_at`, - type: `time`, - - granularities: { - sunday_week: { - interval: `1 week`, - offset: `-1 day` - } - } - }, - - created_at__year: { - sql: `${created_at.year}`, - type: `time` - }, - - created_at__sunday_week: { - sql: `${created_at.sunday_week}`, - type: `time` - } - } -}) -``` - - - -If you query for `users.created_at`, `users.created_at__sunday_week`, and -`users.created_at__year` dimensions, Cube will generate the following SQL: - -```sql -SELECT - "users".created_at "users__created_at", - date_trunc('week', ("users".created_at::timestamptz AT TIME ZONE 'UTC') - interval '-1 day') + interval '-1 day' "users__created_at__sunday_week", - date_trunc('year', ("users".created_at::timestamptz AT TIME ZONE 'UTC')) "users__created_at__year" -FROM ( - SELECT '2025-01-01T00:00:00Z' AS created_at UNION ALL - SELECT '2025-02-01T00:00:00Z' AS created_at UNION ALL - SELECT '2025-03-01T00:00:00Z' AS created_at -) AS "users" -GROUP BY 1, 2, 3 -``` - -## Subquery dimensions - -**Subquery dimensions reference measures from other cubes.** Subquery dimensions -provide a way to define measures that aggregate values of other measures. They can be -useful to calculate nested and filtered aggregates. - - - -See the following recipes: - -- To learn how to calculate [nested aggregates][ref-nested-aggregates-recipe]. -- To learn how to calculate [filtered aggregates][ref-filtered-aggregates-recipe]. - - - -If you have `first_cube` that is [joined][ref-joins] to `second_cube`, you can use a -subquery dimension to bring `second_cube.measure` to `first_cube` as `dimension` (or -under a different name). When you query for a subquery dimension, Cube will -transparently generate SQL with necessary joins. It works as a [correlated -subquery][wiki-correlated-subquery] but is implemented via joins for optimal -performance and portability. - -In the following example, `users.order_count` is a subquery dimension that brings the -`orders.count` measure to `users`. Note that the [`sub_query` parameter][ref-ref-subquery] -is set to `true` on `users.order_count`. You can also see that there's a join -relationship between `orders` and `users`: - - - -```yaml title="YAML" -cubes: - - name: orders - sql: | - SELECT 1 AS id, 1 AS user_id UNION ALL - SELECT 2 AS id, 1 AS user_id UNION ALL - SELECT 3 AS id, 2 AS user_id - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - measures: - - name: count - type: count - - joins: - - name: users - sql: "{users}.id = {orders}.user_id" - relationship: one_to_many - - - name: users - sql: | - SELECT 1 AS id, 'Alice' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: name - sql: name - type: string - - - name: order_count - sql: "{orders.count}" - type: number - sub_query: true - - measures: - - name: avg_order_count - sql: "{order_count}" - type: avg -``` - -```javascript title="JavaScript" -cube(`orders`, { - sql: ` - SELECT 1 AS id, 1 AS user_id UNION ALL - SELECT 2 AS id, 1 AS user_id UNION ALL - SELECT 3 AS id, 2 AS user_id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - } - }, - - measures: { - count: { - type: `count` - } - }, - - joins: { - users: { - sql: `${users}.id = ${orders}.user_id`, - relationship: `one_to_many` - } - } -}) - -cube(`users`, { - sql: ` - SELECT 1 AS id, 'Alice' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - name: { - sql: `name`, - type: `string` - }, - - order_count: { - sql: `${orders.count}`, - type: `number`, - sub_query: true - } - }, - - measures: { - avg_order_count: { - sql: `${order_count}`, - type: `avg` - } - } -}) -``` - - - -You can reference subquery dimensions in measures just like usual dimensions. In the -example above, the `avg_order_count` measure performs an aggregation on `order_count`. - -If you query for `users.name` and `users.order_count`, Cube will generate the -following SQL: - -```sql -SELECT - "users".name "users__name", - "users__order_count" "users__order_count" -FROM ( - SELECT 1 AS id, 'Alice' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name -) AS "users" -LEFT JOIN ( - SELECT - "users_order_count_subquery__users".id "users__id", - count(distinct "users_order_count_subquery__orders".id) "users__order_count" - FROM ( - SELECT 1 AS id, 1 AS user_id UNION ALL - SELECT 2 AS id, 1 AS user_id UNION ALL - SELECT 3 AS id, 2 AS user_id - ) AS "users_order_count_subquery__orders" - LEFT JOIN ( - SELECT 1 AS id, 'Alice' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name - ) AS "users_order_count_subquery__users" ON "users_order_count_subquery__users".id = "users_order_count_subquery__orders".user_id - GROUP BY 1 -) AS "users_order_count_subquery" ON "users_order_count_subquery"."users__id" = "users".id -GROUP BY 1, 2 -``` - - -[ref-references]: /docs/data-modeling/syntax#references -[ref-sql-expressions]: /docs/data-modeling/syntax#sql-expressions -[ref-joins]: /docs/data-modeling/concepts/working-with-joins -[ref-ref-subquery]: /reference/data-modeling/dimensions#sub_query -[ref-decomposition-recipe]: /recipes/pre-aggregations/non-additivity#decomposing-into-a-formula-with-additive-measures -[ref-nested-aggregates-recipe]: /recipes/data-modeling/nested-aggregates -[ref-filtered-aggregates-recipe]: /recipes/data-modeling/filtered-aggregates -[ref-non-additive]: /docs/data-modeling/concepts#measure-additivity -[link-postgres-division]: https://www.postgresql.org/docs/current/functions-math.html#FUNCTIONS-MATH -[wiki-correlated-subquery]: https://en.wikipedia.org/wiki/Correlated_subquery -[ref-time-dimension]: /reference/data-modeling/dimensions#type -[ref-default-granularities]: /docs/data-modeling/concepts#time-dimensions -[ref-custom-granularities]: /reference/data-modeling/dimensions#granularities \ No newline at end of file diff --git a/docs-mintlify/docs/data-modeling/concepts/calendar-cubes.mdx b/docs-mintlify/docs/data-modeling/concepts/calendar-cubes.mdx index 524fc7fbedec8..a7f952f0f68fa 100644 --- a/docs-mintlify/docs/data-modeling/concepts/calendar-cubes.mdx +++ b/docs-mintlify/docs/data-modeling/concepts/calendar-cubes.mdx @@ -470,8 +470,8 @@ When querying `sales.revenue` by `custom_calendar.date` with monthly granularity expression in the generated SQL. -[ref-time-shift]: /docs/data-modeling/concepts/multi-stage-calculations#time-shift -[ref-time-dimension]: /docs/data-modeling/concepts#time-dimensions +[ref-time-shift]: /docs/data-modeling/measures#time-shift +[ref-time-dimension]: /docs/data-modeling/dimensions#time-dimensions [ref-granularities]: /reference/data-modeling/dimensions#granularities [ref-cubes]: /reference/data-modeling/cube [ref-cubes-calendar]: /reference/data-modeling/cube#calendar diff --git a/docs-mintlify/docs/data-modeling/concepts/index.mdx b/docs-mintlify/docs/data-modeling/concepts/index.mdx deleted file mode 100644 index 6a3096829bf4b..0000000000000 --- a/docs-mintlify/docs/data-modeling/concepts/index.mdx +++ /dev/null @@ -1,860 +0,0 @@ ---- -title: Concepts -description: Learn foundational OLAP concepts like cubes, dimensions, measures, and joins used in Cube data modeling. ---- - -Cube's key concepts are [cubes](#cubes), [views](#views), and members -([measures](#measures), [dimensions](#dimensions)). This page is intended -for both newcomers and regular users to refresh their understanding. - - - - - -We'll use a sample e-commerce database with two tables, `orders` and -`line_items` to illustrate the concepts throughout this page: - -**`orders`** - -| **id** | **status** | **completed_at** | **created_at** | -| ------ | ---------- | -------------------------- | -------------------------- | -| 1 | completed | 2019-01-05 00:00:00.000000 | 2019-01-02 00:00:00.000000 | -| 2 | shipped | 2019-01-17 00:00:00.000000 | 2019-01-02 00:00:00.000000 | -| 3 | completed | 2019-01-27 00:00:00.000000 | 2019-01-02 00:00:00.000000 | -| 4 | shipped | 2019-01-09 00:00:00.000000 | 2019-01-02 00:00:00.000000 | -| 5 | processing | 2019-01-29 00:00:00.000000 | 2019-01-02 00:00:00.000000 | - -**`line_items`** - -| **id** | **product_id** | **order_id** | **quantity** | **price** | **created_at** | -| ------ | -------------- | ------------ | ------------ | --------- | -------------------------- | -| 1 | 31 | 1 | 1 | 275 | 2019-01-31 00:00:00.000000 | -| 2 | 49 | 2 | 6 | 248 | 2021-01-20 00:00:00.000000 | -| 3 | 89 | 3 | 6 | 197 | 2021-11-25 00:00:00.000000 | -| 4 | 71 | 4 | 8 | 223 | 2019-12-23 00:00:00.000000 | -| 5 | 64 | 5 | 5 | 75 | 2019-04-20 00:00:00.000000 | - -We'll be following the [entity-first](#entity-first-approach) approach to -designing the data model. - - - -See [this recipe][ref-entities-vs-metrics-recipe] to learn about entity-first and -metrics-first approaches. - - - -## Cubes - -_Cubes_ represent datasets in Cube and are conceptually similar to [views in -SQL][wiki-view-sql]. Cubes are usually declared in separate files with one -cube per file. Typically, a cube points to a single table in -your [data source][ref-data-sources] using the [`sql_table` property][ref-schema-ref-sql-table]: - - - -```yaml title="YAML" -cubes: - - name: orders - sql_table: orders -``` - -```javascript title="JavaScript" -cube(`orders`, { - sql_table: `orders` -}) -``` - - - - - -If you're using dbt, see [this recipe][ref-cube-with-dbt] to streamline defining cubes -on top of dbt models. - - - -You can also use the [`sql` property][ref-schema-ref-sql] to accommodate more -complex SQL queries: - - - -```yaml title="YAML" -cubes: - - name: orders - sql: | - SELECT * - FROM orders, line_items - WHERE orders.id = line_items.order_id - -``` - -```javascript title="JavaScript" -cube(`orders`, { - sql: ` - SELECT * - FROM orders, line_items - WHERE orders.id = line_items.order_id - ` -}) -``` - - - -Each cube contains the definitions of its _members_: [dimensions](#dimensions), -[measures](#measures), and [segments](#segments). You can control the access to -cubes and their members by configuring the [member-level security][ref-mls]. - -[Joins](#joins) are used to define relations between cubes. -[Pre-aggregations](#pre-aggregations) are used to accelerate queries to cubes. -Cubes and their members can be further referenced by [views](#views). - -Note that cubes support [extension][ref-extending-cubes], -[polymorphism][ref-polymorphic-cubes], and [data blending][ref-data-blending]. -Custom calendars, such as retail calendars, can be implemented using [calendar -cubes][ref-calendar-cubes]. - -Cubes can be defined statically and you can also build [dynamic data -models][ref-dynamic-data-models]. - - - -Cube supports data models that consist of thousands of cubes and views. -For large [multi-tenancy][ref-multitenancy] configurations, e.g., with more than -100 tenants, consider using a [multi-cluster deployment][ref-pmc]. - - - - - -See the reference documentaton for the full list of cube [parameters][ref-cubes]. - - - -## Views - -_Views_ sit on top of the data graph of cubes and create a facade of your whole -data model with which data consumers can interact. They are useful for defining -metrics, managing governance and data access, and controlling ambiguous join -paths. - -Views do **not** define their own members. Instead, they reference cubes by -specific join paths and include their members. - -In the example below, we create the `orders` view which includes select members -from `base_orders`, `products`, and `users` cubes: - - - -```yaml title="YAML" -views: - - name: orders - - cubes: - - join_path: base_orders - includes: - - status - - created_date - - total_amount - - total_amount_shipped - - count - - average_order_value - - - join_path: base_orders.line_items.products - includes: - - name: name - alias: product - - - join_path: base_orders.users - prefix: true - includes: "*" - excludes: - - company -``` - -```javascript title="JavaScript" -view(`orders`, { - cubes: [ - { - join_path: base_orders, - includes: [ - `status`, - `created_date`, - `total_amount`, - `total_amount_shipped`, - `count`, - `average_order_value` - ] - }, - { - join_path: base_orders.line_items.products, - includes: [ - { - name: `name`, - alias: `product` - } - ] - }, - { - join_path: base_orders.users, - prefix: true, - includes: `*`, - excludes: [`company`] - } - ] -}) -``` - - - -Views do **not** define any [pre-aggregations](#pre-aggregations). Instead, -they [reuse][ref-matching-preaggs] pre-aggregations from underlying cubes. - -View can be defined statically and you can also build [dynamic data -models][ref-dynamic-data-models]. - - - -See the reference documentaton for the full list of view [parameters][ref-views]. - - - -### Folders - -Optionally, members of a view can be organized into [folders][ref-ref-folders]. -Each folder would contain a subset of members of the view. - -Cube supports both flat and nested folder structures, which can be used with various -[visualization tools][ref-viz-tools]. If a specific tool does not support nested folders, -they will be exposed to such a tool as an equivalent flat structure. Check [APIs & -Integrations][ref-apis-support] for details on the nested folders support. - -## Dimensions - -_Dimensions_ represent the properties of a **single** data point in the cube. -[The `orders` table](#top) contains only dimensions, so representing them in the -`orders` cube is straightforward: - - - -```yaml title="YAML" -cubes: - - name: orders - # ... - - dimensions: - - name: id - sql: id - type: number - # Here we explicitly let Cube know this field is the primary key - # This is required for de-duplicating results when using joins - primary_key: true - - - name: status - sql: status - type: string -``` - -```javascript title="JavaScript" -cube(`orders`, { - // ... - - dimensions: { - id: { - sql: `id`, - type: `number`, - // Here we explicitly let Cube know this field is the primary key - // This is required for de-duplicating results when using joins - primary_key: true - }, - - status: { - sql: `status`, - type: `string` - } - } -}) -``` - - - -Note that the `id` dimension is defined as a [primary key][ref-ref-primary-key]. -It is also possible to have more than one primary key dimension in a cube if -you'd like them all to be parts of a composite key. - -[The `line_items` table](#top) also has a couple of dimensions which can be -represented as follows: - - - -```yaml title="YAML" -cubes: - - name: line_items - # ... - - dimensions: - - name: id - sql: id - type: number - # Again, we explicitly let Cube know this field is the primary key - # This is required for de-duplicating results when using joins - primary_key: true - - - name: order_id - sql: order_id - type: number -``` - -```javascript title="JavaScript" -cube(`line_items`, { - // ... - - dimensions: { - id: { - sql: `id`, - type: `number`, - // Again, we explicitly let Cube know this field is the primary key - // This is required for de-duplicating results when using joins - primary_key: true - }, - - order_id: { - sql: `order_id`, - type: `number` - } - } -}) -``` - - - -If needed, dimensions can be organized into [hierarchies][ref-ref-hierarchies]. -Also, [proxy dimensions][ref-proxy-dimensions] are helpful for code reusability -and [subquery dimensions][ref-subquery-dimensions] can be used to join cubes -implicitly. - - - -See the reference documentaton for the full list of [dimension parameters][ref-dimensions]. - - - -### Dimension types - -Dimensions can be of different types, e.g., `string`, `number`, or `time`. Often, -data types in SQL are mapped to dimension types in the following way: - -| Data type in SQL | Dimension type in Cube | -| --- | --- | -| `timestamp`, `date`, `time` | [`time`](/reference/data-modeling/dimensions#type) | -| `text`, `varchar` | [`string`](/reference/data-modeling/dimensions#type) | -| `integer`, `bigint`, `decimal` | [`number`](/reference/data-modeling/dimensions#type) | -| `boolean` | [`boolean`](/reference/data-modeling/dimensions#type) | - - - -See the [dimension type reference][ref-ref-dimension-types] for details. - - - -### Time dimensions - -Time-based properties are modeled using dimensions of the [`time` -type][ref-ref-time-dimensions]. They allow grouping the result set by a unit of -time (e.g., days, weeks, month, etc.), also known as the *time dimension -granularity*. - -The following granularities are available by default for any time dimension: -`year`, `quarter`, `month`, `week` (starting on Monday), `day`, `hour`, `minute`, -`second`. You can also define [custom granularities][ref-ref-dimension-granularities] -and optionally expose them via [proxy dimensions][ref-proxy-granularity] in case -you need to use weeks starting on Sunday, fiscal years, etc. - - - -See the following recipes: -- For a [custom granularity][ref-custom-granularity-recipe] example. -- For a [custom calendar][ref-custom-calendar-recipe] example. - - - -Here's how we can add time dimensions to the data model: - - - -```yaml title="YAML" -cubes: - - name: orders - # ... - - dimensions: - - name: created_at - sql: created_at - type: time - # You can use this time dimension with all default granularities: - # year, quarter, month, week, day, hour, minute, second - - - name: completed_at - sql: completed_at - type: time - # You can use this time dimension with all default granularities - # and an additional custom granularity defined below - granularities: - - name: fiscal_year_starting_on_february_01 - interval: 1 year - offset: 1 month -``` - -```javascript title="JavaScript" -cube(`orders`, { - // ... - - dimensions: { - created_at: { - sql: `created_at`, - type: `time` - // You can use this time dimension with all default granularities: - // year, quarter, month, week, day, hour, minute, second - }, - - completed_at: { - sql: `completed_at`, - type: `time`, - // You can use this time dimension with all default granularities - // and an additional custom granularity defined below - granularities: { - fiscal_year_starting_on_february_01: { - interval: `1 year`, - offset: `1 month` - } - } - } - } -}) -``` - - - -Time dimensions are essential to enabling performance boosts such as -[partitioned pre-aggregations][ref-caching-use-preaggs-partition-time] and -[incremental refreshes][ref-tutorial-incremental-preagg]. - -## Measures - -_Measures_ represent the properties of a **set of data points** in the cube. To -add a measure called `count` to our `orders` cube, for example, we can do the -following: - - - -```yaml title="YAML" -cubes: - - name: orders - # ... - - measures: - - name: count - type: count -``` - -```javascript title="JavaScript" -cube(`orders`, { - // ... - - measures: { - count: { - type: `count` - } - } -}) -``` - - - -In our `LineItems` cube, we can also create a measure to sum up the total value -of line items sold: - - - -```yaml title="YAML" -cubes: - - name: line_items - # ... - - measures: - - name: total - sql: price - type: sum -``` - -```javascript title="JavaScript" -cube(`line_items`, { - // ... - - measures: { - total: { - sql: `price`, - type: `sum` - } - } -}) -``` - - - -[Calculated measures][ref-calculated-measures] and [subquery dimensions][ref-subquery-dimensions] -can be used for measure composition. [Multi-stage calculations][ref-multi-stage-calculations] -enable data modeling of more sophisticated measures. - - - -See the reference documentaton for the full list of measure [parameters][ref-measures]. - - - -### Measure types - -Measures can be of different types, e.g., `count`, `sum`, or `number`. Often, -aggregate functions in SQL are mapped to measure types in the following way: - -| Aggregate function in SQL | Measure type in Cube | -| --- | --- | -| `AVG` | [`avg`](/reference/data-modeling/measures#type) | -| `BOOL_AND`, `BOOL_OR` | [`boolean`](/reference/data-modeling/measures#type) | -| `COUNT` | [`count`](/reference/data-modeling/measures#type) | -| `COUNT(DISTINCT …)` | [`count_distinct`](/reference/data-modeling/measures#type) | -| `APPROX_COUNT_DISTINCT` | [`count_distinct_approx`](/reference/data-modeling/measures#type) | -| `MAX` | [`max`](/reference/data-modeling/measures#type) | -| `MIN` | [`min`](/reference/data-modeling/measures#type) | -| `PERCENTILE_CONT`, `MEDIAN` | [`number`](/reference/data-modeling/measures#type) | -| `STRING_AGG`, `LISTAGG` | [`string`](/reference/data-modeling/measures#type) | -| `SUM` | [`sum`](/reference/data-modeling/measures#type) | -| Any function returning a timestamp, e.g., `MAX(time)` | [`time`](/reference/data-modeling/measures#type) | - - - -See the [measure type reference][ref-ref-measure-types] for details. - - - - - -See the following recipes: -- To learn how to define [average and percentile measures][ref-avg-and-percentile-recipe], -- To learn how to calculate [period-over-period changes][ref-period-over-period-recipe]. - - - -### Measure additivity - -Additivity is a property of measures that detemines whether measure values, -once calculated for a set of dimensions, can be further aggregated to calculate -measure values for a subset of these dimensions. - -Measure additivity has an impact on [pre-aggregation -matching][ref-matching-preaggs]. - -Additivity of a measure depends on its [type](#measure-types). Only measures -with the following types are considered *additive*: -[`count`](/reference/data-modeling/measures#type), -[`count_distinct_approx`](/reference/data-modeling/measures#type), -[`min`](/reference/data-modeling/measures#type), -[`max`](/reference/data-modeling/measures#type), -[`sum`](/reference/data-modeling/measures#type). -Measures with all other types are considered *non-additive*. - -#### Example - -Consider the following cube: - - - -```yaml title="YAML" -cubes: - - name: employees - sql: | - SELECT 1 AS id, 'Ali' AS first_name, 20 AS age, 'Los Gatos' AS city UNION ALL - SELECT 2 AS id, 'Bob' AS first_name, 30 AS age, 'San Diego' AS city UNION ALL - SELECT 3 AS id, 'Eve' AS first_name, 40 AS age, 'San Diego' AS city - - measures: - - name: count - type: count - - - name: avg_age - sql: age - type: avg - - dimensions: - - name: city - sql: city - type: string -``` - -```javascript title="JavaScript" -cube(`employees`, { - sql: ` - SELECT 1 AS id, 'Ali' AS first_name, 20 AS age, 'Los Gatos' AS city UNION ALL - SELECT 2 AS id, 'Bob' AS first_name, 30 AS age, 'San Diego' AS city UNION ALL - SELECT 3 AS id, 'Eve' AS first_name, 40 AS age, 'San Diego' AS city - `, - - measures: { - count: { - type: `count` - }, - - avg_age: { - sql: `age`, - type: `avg` - } - }, - - dimensions: { - city: { - sql: `city`, - type: `string` - } - } -}) -``` - - - -If we run a query that includes `city` as a dimension and `count` and `avg_age` -as measures, we'll get the following results: - -| city | count | avg_age | -| --------- | ----- | ------- | -| Los Gatos | 1 | 20 | -| San Diego | 2 | 35 | - -Then, if we remove the `city` dimension from the query, we'll get the following -results: - -| count | avg_age | -| ----- | ------- | -| 3 | 30 | - -As you can see, the value of the `count` measure that we've got for the second -query could have been calculated based on the results of the first one: -`1 + 2 = 3`. It explains why the `count` measure, having the `count` type, is -considered *additive*. - -However, the value of the `avg_age` measure that we've got for the second query -can't be calculated based on the results of the first one: there's no way to -derive `30` from `20` and `35`. This is why the `avg_age` measure, having the -`avg` type, is considered *non-additive*. - -### Leaf measures - -Measures that do not [reference][ref-syntax-references] other measures are -considered *leaf measures*. - -By definition, all measures that only reference SQL -[columns][ref-syntax-references-column] and expressions are *leaf measures*. -On the other hand, [calculated measures][ref-calculated-measures] might not -necessarily be *leaf measures* because they can reference other measures. - -Whether a query contains only [additive](#measure-additivity) leaf measures has -an impact on [pre-aggregation matching][ref-matching-preaggs]. - -## Joins - -_Joins_ define the relationships between cubes, which then allows accessing and -comparing properties from two or more cubes at the same time. In Cube, all joins -are `LEFT JOIN`s. - - - -An `INNER JOIN` can be replicated with Cube; when making a Cube query, add a -filter for `IS NOT NULL` on the required column. - - - -In the following example, we are left-joining the `line_items` cube onto our -`orders` cube: - - - -```yaml title="YAML" -cubes: - - name: orders - # ... - - joins: - - name: line_items - # Here we use the `CUBE` global to refer to the current cube, - # so the following is equivalent to `orders.id = line_items.order_id` - sql: "{CUBE}.id = {line_items.order_id}" - relationship: many_to_one -``` - -```javascript title="JavaScript" -cube(`orders`, { - // ... - - joins: { - line_items: { - relationship: `many_to_one`, - // Here we use the `CUBE` global to refer to the current cube, - // so the following is equivalent to `orders.id = line_items.order_id` - sql: `${CUBE}.id = ${line_items.order_id}` - } - } -}) -``` - - - -There are three types of join relationships (`one_to_one`, `one_to_many`, and -`many_to_one`) and a few [other concepts][ref-working-with-joins] such as the -direction of joins and transitive joins pitfalls. - - - -See the reference documentaton for the full list of join [parameters][ref-joins]. - - - -## Segments - -_Segments_ are pre-defined filters that are kept within the data model instead of -[a Cube query][ref-backend-query-filters]. They help to simplify queries and make -it easy to reuse common filters across a variety of queries. - -To add a segment which limits results to completed orders, we can do the -following: - - - -```yaml title="YAML" -cubes: - - name: orders - # ... - - segments: - - name: only_completed - sql: "{CUBE}.status = 'completed'" -``` - -```javascript title="JavaScript" -cube(`orders`, { - // ... - - segments: { - only_completed: { - sql: `${CUBE}.status = 'completed'` - } - } -}) -``` - - - - - -See the reference documentaton for the full list of segment [parameters][ref-segments]. - - - -## Pre-aggregations - -_Pre-aggregations_ provide a powerful way to accelerate frequently used queries -and keep the cache up-to-date. Within a data model, they are defined using the -`pre_aggregations` property: - - - -```yaml title="YAML" -cubes: - - name: orders - # ... - - pre_aggregations: - - name: main - measures: - - count - dimensions: - - status - time_dimension: created_at - granularity: day -``` - -```javascript title="JavaScript" -cube(`orders`, { - // ... - - pre_aggregations: { - main: { - measures: [CUBE.count], - dimensions: [CUBE.status], - timeDimension: CUBE.created_at, - granularity: `day` - } - } -}) -``` - - - -A more thorough introduction can be found in [Getting Started with -Pre-Aggregations][ref-caching-preaggs-intro]. - - - -See the reference documentaton for the full list of pre-aggregation -[parameters][ref-preaggs]. - - - - -[ref-backend-query-filters]: /reference/rest-api/query-format#filters-format -[ref-caching-preaggs-intro]: /docs/pre-aggregations/getting-started-pre-aggregations -[ref-caching-use-preaggs-partition-time]: /docs/pre-aggregations/using-pre-aggregations#partitioning -[ref-ref-dimension-types]: /reference/data-modeling/dimensions#type -[ref-ref-measure-types]: /reference/data-modeling/measures#type -[ref-schema-ref-sql]: /reference/data-modeling/cube#sql -[ref-schema-ref-sql-table]: /reference/data-modeling/cube#sql_table -[ref-tutorial-incremental-preagg]: /reference/data-modeling/pre-aggregations#incremental -[ref-cubes]: /reference/data-modeling/cube -[ref-views]: /reference/data-modeling/view -[ref-dimensions]: /reference/data-modeling/dimensions -[ref-measures]: /reference/data-modeling/measures -[ref-joins]: /reference/data-modeling/joins -[ref-segments]: /reference/data-modeling/segments -[ref-preaggs]: /reference/data-modeling/pre-aggregations -[ref-extending-cubes]: /docs/data-modeling/concepts/code-reusability-extending-cubes -[ref-polymorphic-cubes]: /docs/data-modeling/concepts/polymorphic-cubes -[ref-data-blending]: /docs/data-modeling/concepts/data-blending -[ref-dynamic-data-models]: /docs/data-modeling/dynamic -[ref-proxy-dimensions]: /docs/data-modeling/concepts/calculated-members#proxy-dimensions -[ref-subquery-dimensions]: /docs/data-modeling/concepts/calculated-members#subquery-dimensions -[ref-calculated-measures]: /docs/data-modeling/concepts/calculated-members#calculated-measures -[ref-working-with-joins]: /docs/data-modeling/concepts/working-with-joins - -[wiki-view-sql]: https://en.wikipedia.org/wiki/View_(SQL) -[ref-matching-preaggs]: /docs/pre-aggregations/matching-pre-aggregations -[ref-syntax-references]: /docs/data-modeling/syntax#references -[ref-syntax-references-column]: /docs/data-modeling/syntax#column -[ref-calculated-measures]: /docs/data-modeling/overview#4-using-calculated-measures -[ref-multitenancy]: /admin/connect-to-data/multitenancy -[ref-pmc]: /docs/deployment/cloud/deployment-types#production-multi-cluster -[ref-ref-time-dimensions]: /reference/data-modeling/dimensions#type -[ref-ref-dimension-granularities]: /reference/data-modeling/dimensions#granularities -[ref-ref-primary-key]: /reference/data-modeling/dimensions#primary_key -[ref-custom-granularity-recipe]: /recipes/data-modeling/custom-granularity -[ref-proxy-granularity]: /docs/data-modeling/concepts/calculated-members#time-dimension-granularity -[ref-mls]: /docs/data-modeling/access-control/member-level-security -[ref-ref-hierarchies]: /reference/data-modeling/hierarchies -[ref-ref-folders]: /reference/data-modeling/view#folders -[ref-multi-stage-calculations]: /docs/data-modeling/concepts/multi-stage-calculations -[ref-entities-vs-metrics-recipe]: /recipes/data-modeling/designing-metrics -[ref-avg-and-percentile-recipe]: /recipes/data-modeling/percentiles -[ref-period-over-period-recipe]: /recipes/data-modeling/period-over-period -[ref-custom-calendar-recipe]: /recipes/data-modeling/custom-calendar -[ref-cube-with-dbt]: /recipes/data-modeling/dbt -[ref-apis-support]: /reference#data-modeling -[ref-viz-tools]: /admin/connect-to-data/visualization-tools -[ref-data-sources]: /admin/connect-to-data/data-sources -[ref-calendar-cubes]: /docs/data-modeling/concepts/calendar-cubes \ No newline at end of file diff --git a/docs-mintlify/docs/data-modeling/concepts/multi-fact-queries.mdx b/docs-mintlify/docs/data-modeling/concepts/multi-fact-queries.mdx deleted file mode 100644 index 2478ef352154a..0000000000000 --- a/docs-mintlify/docs/data-modeling/concepts/multi-fact-queries.mdx +++ /dev/null @@ -1,499 +0,0 @@ ---- -title: Multi-fact queries -description: When a view includes measures from multiple root fact tables, Cube builds separate aggregating subqueries and joins their results on common dimensions. ---- - -When a [view](/reference/data-modeling/view) includes measures from multiple root -fact tables, Cube can automatically execute a _multi-fact query_. Instead of -joining all fact tables together and risking row multiplication, Cube builds a -**separate aggregating subquery for each fact table** and then joins the results -on the common dimensions. - - - -Multi-fact queries are powered by Tesseract, the [next-generation data modeling -engine][link-tesseract]. Tesseract is currently in preview. Use the -[`CUBEJS_TESSERACT_SQL_PLANNER`](/reference/configuration/environment-variables#cubejs_tesseract_sql_planner) environment variable to enable it. - - - -## When a multi-fact query is triggered - -A multi-fact query is triggered when a view has **multiple root fact tables** -whose measures are queried together. Each distinct root fact table in the view -becomes its own aggregating subquery, and the results are joined on the common -dimensions shared across those facts. - -Consider a data model with two fact cubes, `orders` and `returns`. Both are -joined to two shared dimension tables: `customers` and a `dates` date spine: - - - -```yaml title="YAML" -cubes: - - name: customers - sql_table: customers - - dimensions: - - name: id - type: number - sql: id - primary_key: true - - name: name - type: string - sql: name - - name: city - type: string - sql: city - - - name: dates - sql_table: dates - - dimensions: - - name: date - type: time - sql: date - primary_key: true - - - name: orders - sql_table: orders - - joins: - - name: customers - relationship: many_to_one - sql: "{orders}.customer_id = {customers.id}" - - name: dates - relationship: many_to_one - sql: "DATE_TRUNC('day', {orders}.created_at) = {dates.date}" - - dimensions: - - name: id - type: number - sql: id - primary_key: true - - name: customer_id - type: number - sql: customer_id - - name: status - type: string - sql: status - - measures: - - name: count - type: count - - name: total_amount - type: sum - sql: amount - - - name: returns - sql_table: returns - - joins: - - name: customers - relationship: many_to_one - sql: "{returns}.customer_id = {customers.id}" - - name: dates - relationship: many_to_one - sql: "DATE_TRUNC('day', {returns}.created_at) = {dates.date}" - - dimensions: - - name: id - type: number - sql: id - primary_key: true - - name: customer_id - type: number - sql: customer_id - - measures: - - name: count - type: count - - name: total_refund - type: sum - sql: refund_amount -``` - -```javascript title="JavaScript" -cube(`customers`, { - sql_table: `customers`, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - name: { - sql: `name`, - type: `string` - }, - - city: { - sql: `city`, - type: `string` - } - } -}) - -cube(`dates`, { - sql_table: `dates`, - - dimensions: { - date: { - sql: `date`, - type: `time`, - primary_key: true - } - } -}) - -cube(`orders`, { - sql_table: `orders`, - - joins: { - customers: { - relationship: `many_to_one`, - sql: `${orders}.customer_id = ${customers.id}` - }, - - dates: { - relationship: `many_to_one`, - sql: `DATE_TRUNC('day', ${orders}.created_at) = ${dates.date}` - } - }, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - customer_id: { - sql: `customer_id`, - type: `number` - }, - - status: { - sql: `status`, - type: `string` - } - }, - - measures: { - count: { - type: `count` - }, - - total_amount: { - sql: `amount`, - type: `sum` - } - } -}) - -cube(`returns`, { - sql_table: `returns`, - - joins: { - customers: { - relationship: `many_to_one`, - sql: `${returns}.customer_id = ${customers.id}` - }, - - dates: { - relationship: `many_to_one`, - sql: `DATE_TRUNC('day', ${returns}.created_at) = ${dates.date}` - } - }, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - customer_id: { - sql: `customer_id`, - type: `number` - } - }, - - measures: { - count: { - type: `count` - }, - - total_refund: { - sql: `refund_amount`, - type: `sum` - } - } -}) -``` - - - -You can then define a view where `orders` and `returns` are separate root -fact tables. The shared dimension tables — `customers` and `dates` — are -each included with their own root-level join paths, not nested under a -specific fact like `orders.customers`. This makes their dimensions common to -both facts so they can be used to join the subquery results. The `prefix` -parameter disambiguates identically named members from different fact cubes: - - - -```yaml title="YAML" -views: - - name: customer_overview - cubes: - - join_path: orders - includes: - - count - - total_amount - prefix: true - - join_path: customers - includes: - - name - - city - - join_path: dates - includes: - - date - - join_path: returns - includes: - - count - - total_refund - prefix: true -``` - -```javascript title="JavaScript" -view(`customer_overview`, { - cubes: [ - { - join_path: orders, - includes: [`count`, `total_amount`], - prefix: true - }, - { - join_path: customers, - includes: [`name`, `city`] - }, - { - join_path: dates, - includes: [`date`] - }, - { - join_path: returns, - includes: [`count`, `total_refund`], - prefix: true - } - ] -}) -``` - - - -This view has two root fact tables (`orders` and `returns`) and two shared -dimension tables (`customers` and `dates`). Because each dimension table is -included at its own root-level join path rather than scoped under a single -fact, their dimensions are available as common join keys for both fact -subqueries. - -When you query measures from both facts — such as `orders_count`, -`orders_total_amount`, `returns_count`, and `returns_total_refund` — grouped -by common dimensions like `name`, `city`, and `date`, Cube detects the -multiple roots and triggers a multi-fact query. - -## Join path requirements - -To ensure correct join paths within a multi-fact view, follow these rules: - -- **Within each root fact table**, any join paths to related cubes (e.g., - `orders.line_items`) should be listed explicitly in the view. This removes - ambiguity about which tables are involved in each fact's subquery. -- **Dimension tables that join to other, less granular dimension tables** - (e.g., `customers` joining to `regions`) should also declare those join - paths explicitly in the view if those dimensions are needed. -- **Between root fact tables and root dimension tables**, one-hop joins must - be defined at the cube level (as shown in the `orders` and `returns` cubes - above, each declaring a direct join to `customers` and `dates`). This - allows the multi-fact view to unambiguously resolve how each fact reaches - each common dimension table. - -In the example above, both `orders` and `returns` declare direct joins to -`customers` and `dates`. This means the view can build separate subqueries -where each fact independently joins to the same dimension tables — without -relying on transitive or implicit join paths. - -## How multi-fact queries work - -Cube analyzes the join hints for each measure and groups them by their -**join key** — the set of tables involved in the join path from the root to -the measure's cube. Measures that share the same join key are placed in the -same group; measures with different join keys form separate groups. When there -are **two or more groups**, the query is classified as multi-fact. - -The query is then executed in the following stages: - -### 1. Separate aggregating subqueries - -For each group of measures, Cube builds an independent aggregating subquery. -Each subquery joins only the tables needed for that group's measures, applies -all relevant filters and segments, and aggregates the results by the common -dimensions. - -For example, given a query for `orders_count`, `orders_total_amount`, -`returns_count`, and `returns_total_refund` grouped by `name`, `city`, and -`date`: - -- **Subquery 1** (orders group): joins `orders` to `customers` and `dates`, - computes `COUNT(*)` and `SUM(amount)`, grouped by `customers.name`, - `customers.city`, and `dates.date`. -- **Subquery 2** (returns group): joins `returns` to `customers` and `dates`, - computes `COUNT(*)` and `SUM(refund_amount)`, grouped by `customers.name`, - `customers.city`, and `dates.date`. - -### 2. Join on common dimensions - -The results of the subqueries are joined with `FULL JOIN` semantics on all -common dimension columns — in this case, `name`, `city`, and `date`. This -ensures that all rows from both fact tables are represented, even when a -customer has orders but no returns, or vice versa. The actual SQL -implementation may vary depending on database capabilities. - -### 3. Final result - -The final `SELECT` pulls measures from their respective subqueries and -dimensions from the joined result. Rows with data in only one fact table -will show `NULL` for measures from the other. - -For the `customer_overview` view, the result looks like: - -| name | city | date | orders_count | orders_total_amount | returns_count | returns_total_refund | -| --- | --- | --- | --- | --- | --- | --- | -| Alice | New York | 2025-01-15 | 2 | 200.00 | 0 | NULL | -| Alice | New York | 2025-02-10 | 2 | 225.00 | 1 | 100.00 | -| Bob | Seattle | 2025-01-20 | 3 | 550.00 | 2 | 130.00 | -| Charlie | New York | 2025-02-05 | 0 | NULL | 2 | 100.00 | -| Diana | Boston | 2025-03-01 | 1 | 400.00 | 0 | NULL | - -Notice that Charlie has no orders and Diana has no returns — both are still -included in the results with `NULL` values for the missing fact table. - -## More than two fact tables - -Multi-fact queries are not limited to two root fact tables. If a view includes -three or more fact tables, each one gets its own aggregating subquery, and all -results are joined together on the common dimensions. - -For instance, adding a `reviews` cube as a third root fact in the view and -querying `orders_count`, `returns_count`, and `reviews_count` grouped by -`name`, `city`, and `date` produces three separate subqueries, all joined on -those common dimensions. - -## All facts must share the same common dimensions - -Every root fact table in a multi-fact view must be joinable to the **same set -of common dimension tables**. The subquery results are joined on these common -dimensions, so if a fact table cannot reach one of the dimension tables, the -join will fail. - -If a fact table does not naturally have a foreign key for one of the common -dimension tables, you can create a **synthetic join** by selecting `NULL` for -the missing foreign key in the cube's `sql` definition: - - - -```yaml title="YAML" -cubes: - - name: refunds - sql: > - SELECT *, NULL AS customer_id FROM refunds - joins: - - name: customers - relationship: many_to_one - sql: "{refunds}.customer_id = {customers.id}" - - name: dates - relationship: many_to_one - sql: "DATE_TRUNC('day', {refunds}.created_at) = {dates.date}" - - dimensions: - - name: id - type: number - sql: id - primary_key: true - - measures: - - name: count - type: count - - name: total_amount - type: sum - sql: amount -``` - -```javascript title="JavaScript" -cube(`refunds`, { - sql: `SELECT *, NULL AS customer_id FROM refunds`, - - joins: { - customers: { - relationship: `many_to_one`, - sql: `${refunds}.customer_id = ${customers.id}` - }, - - dates: { - relationship: `many_to_one`, - sql: `DATE_TRUNC('day', ${refunds}.created_at) = ${dates.date}` - } - }, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - } - }, - - measures: { - count: { - type: `count` - }, - - total_amount: { - sql: `amount`, - type: `sum` - } - } -}) -``` - - - -In this example, the `refunds` table has no `customer_id` column. By selecting -`NULL AS customer_id` in the cube's SQL, the join to `customers` is -syntactically valid. The `customer_id` will always be `NULL`, so refund rows -will never match a specific customer, but the subquery can still participate -in the multi-fact join on the full set of common dimensions. - -## Filters in multi-fact queries - -Filters on **common dimensions** (like `name`, `city`, or `date`) are applied to every -subquery, ensuring consistent filtering across all fact tables. - -Filters on **fact-specific dimensions** (like `orders.status`) are applied only -to the subquery for that specific fact table. Other fact table subqueries remain -unaffected. - -**Measure filters** (e.g., `orders_count > 1`) are applied as `HAVING` -conditions after the subqueries are joined, filtering the combined result set. - -## Segments in multi-fact queries - -[Segments](/reference/data-modeling/segments) that belong to a specific fact table are applied only -to that fact table's subquery. For example, applying an `orders.completed_orders` -segment filters only the orders subquery while leaving returns unaffected. - -[link-tesseract]: https://cube.dev/blog/introducing-tesseract diff --git a/docs-mintlify/docs/data-modeling/concepts/multi-stage-calculations.mdx b/docs-mintlify/docs/data-modeling/concepts/multi-stage-calculations.mdx deleted file mode 100644 index 7207d669b8691..0000000000000 --- a/docs-mintlify/docs/data-modeling/concepts/multi-stage-calculations.mdx +++ /dev/null @@ -1,714 +0,0 @@ ---- -title: Multi-stage calculations -description: "Measures are usually calculated as aggregations over dimensions or arbitrary SQL expressions." ---- - -[Measures][ref-measures] are usually calculated as aggregations over [dimensions][ref-dimensions] -or arbitrary SQL expressions. - -_Multi-stage calculations_ enable data modeling of more sophisticated _multi-stage measures_. -They are calculated in two or more stages and often involve manipulations on already -aggregated data. Each stage results in one or more [common table expressions][link-cte] -(CTEs) in the generated SQL query. - - - -Multi-stage calculations are powered by Tesseract, the [next-generation data modeling -engine][link-tesseract]. Tesseract is currently in preview. Use the -[`CUBEJS_TESSERACT_SQL_PLANNER`](/reference/configuration/environment-variables#cubejs_tesseract_sql_planner) environment variable to enable it. - - - - - -Multi-stage calculations are not currently accelerated by pre-aggregations. -Please track [this issue](https://github.com/cube-js/cube/issues/8487). - - - -Common uses of multi-stage calculations: - -- [Rolling window](#rolling-window), e.g., cumulative counts or moving averages. -- [Time-shift](#time-shift), e.g., year-over-year sales growth. -- [Period-to-date](#period-to-date), e.g., year-to-date (YTD) analysis. -- [Conditional measure](#conditional-measure), e.g., amount in a selected currency. - -**Some calculations use inner and outer aggregation stages.** The _inner_ stage computes -a base measure at a specific granularity, and the _outer_ stage aggregates those results -according to the query's dimensions: - -- [Fixed dimension](#fixed-dimension), e.g., percent of total — use the [`group_by`][ref-group-by] -parameter to group by only the listed dimensions. -- [Nested aggregate](#nested-aggregate), e.g., average of per-customer averages — use the -[`add_group_by`][ref-add-group-by] parameter to group by query dimensions plus listed. -- [Ranking](#ranking), e.g., ranking products by revenue — use the [`reduce_by`][ref-reduce-by] -parameter to group by query dimensions minus listed. - -## Rolling window - -Rolling window calculations are used to calculate metrics over a moving window of time. -Use the [`rolling_window` parameter][ref-rolling-window] of a measure to define -a rolling window. - -### Stages - -Here's how the rolling window calculation is performed: - -- **Date range.** First, the date range for the query is determined. -If there's a time dimension with a date range filter in the query, it's used. -Otherwise, the date range is determined by selecting the minimum and maximum -values for the time dimension. - - - -Tesseract enables rolling window calculations without the date range for the time dimension. -If Tesseract is not used, the date range must be provided. Otherwise, the query would -fail with the following error: `Time series queries without dateRange aren't supported`. - - - -- **Time windows.** Then, the series of time windows is calculated. The size of the -window is defined by the time dimension granularity and the `trailing` and -`leading` parameters. -- **Measure.** Finally, the measure is calculated for each window. - -### Example - -Data model: - -```yaml - -cubes: - - name: orders - sql: | - SELECT 1 AS id, '2025-01-01'::TIMESTAMP AS time UNION ALL - SELECT 2 AS id, '2025-01-11'::TIMESTAMP AS time UNION ALL - SELECT 3 AS id, '2025-01-21'::TIMESTAMP AS time UNION ALL - SELECT 4 AS id, '2025-01-31'::TIMESTAMP AS time UNION ALL - SELECT 5 AS id, '2025-02-01'::TIMESTAMP AS time UNION ALL - SELECT 6 AS id, '2025-02-11'::TIMESTAMP AS time UNION ALL - SELECT 7 AS id, '2025-02-21'::TIMESTAMP AS time UNION ALL - SELECT 8 AS id, '2025-03-01'::TIMESTAMP AS time UNION ALL - SELECT 9 AS id, '2025-03-11'::TIMESTAMP AS time UNION ALL - SELECT 10 AS id, '2025-03-21'::TIMESTAMP AS time UNION ALL - SELECT 11 AS id, '2025-03-31'::TIMESTAMP AS time UNION ALL - SELECT 12 AS id, '2025-04-01'::TIMESTAMP AS time - - dimensions: - - name: time - sql: time - type: time - - measures: - - name: rolling_count_month - sql: id - type: count - rolling_window: - trailing: unbounded -``` - -Query and result: - - - - - -## Time shift - -A _time-shift measure_ calculates the value of another measure at a different point in -time. This is achieved by _shifting_ the time dimension from the query in the necessary -direction during the calculation. Time-shifts are configured using the [`time_shift` -parameter][ref-ref-time-shift] of a measure. - -Typically, this is used to compare the current value of a measure with its prior value, -such as the same time last year. For example, if you have the `revenue` measure, you can -calculate its value for the same time last year: - -```yaml -- name: revenue_prior_year - multi_stage: true - sql: "{revenue}" - type: number - time_shift: - - interval: 1 year - type: prior -``` - -You can use time-shift measures with [calendar cubes][ref-calendar-cubes] to customize -how time-shifting works, e.g., to shift the time dimension to the prior date in a retail -calendar. - -### Example - -Data model: - -```yaml -cubes: - - name: prior_date - sql: | - SELECT '2023-04-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-05-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-06-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-07-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-08-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-09-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-10-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-11-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-12-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-01-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-02-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-03-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-04-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-05-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-06-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-07-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-08-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-09-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-10-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-11-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-12-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-01-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-02-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-03-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-04-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-05-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-06-01'::TIMESTAMP AS time, 1000 AS revenue - - dimensions: - - name: time - sql: time - type: time - - measures: - - name: revenue - sql: revenue - type: sum - - - name: revenue_ytd - sql: revenue - type: sum - rolling_window: - type: to_date - granularity: year - - - name: revenue_prior_year - multi_stage: true - sql: "{revenue}" - type: number - time_shift: - - time_dimension: time - interval: 1 year - type: prior - - - name: revenue_prior_year_ytd - multi_stage: true - sql: "{revenue_ytd}" - type: number - time_shift: - - time_dimension: time - interval: 1 year - type: prior -``` - -Queries and results: - - - - - - - - - -## Period-to-date - -Period-to-date calculations can be used to analyze data over different time periods: - -- Year-to-date (YTD) analysis. -- Quarter-to-date (QTD) analysis. -- Month-to-date (MTD) analysis. - -```yaml -- name: revenue_ytd - sql: revenue - type: sum - rolling_window: - type: to_date - granularity: year - -- name: revenue_qtd - sql: revenue - type: sum - rolling_window: - type: to_date - granularity: quarter - -- name: revenue_mtd - sql: revenue - type: sum - rolling_window: - type: to_date - granularity: month -``` - -### Example - -Data model: - -```yaml -cubes: - - name: prior_date - sql: | - SELECT '2023-04-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-05-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-06-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-07-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-08-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-09-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-10-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-11-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2023-12-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-01-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-02-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-03-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-04-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-05-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-06-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-07-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-08-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-09-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-10-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-11-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2024-12-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-01-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-02-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-03-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-04-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-05-01'::TIMESTAMP AS time, 1000 AS revenue UNION ALL - SELECT '2025-06-01'::TIMESTAMP AS time, 1000 AS revenue - - dimensions: - - name: time - sql: time - type: time - - measures: - - name: revenue_ytd - sql: revenue - type: sum - rolling_window: - type: to_date - granularity: year - - - name: revenue_qtd - sql: revenue - type: sum - rolling_window: - type: to_date - granularity: quarter - - - name: revenue_mtd - sql: revenue - type: sum - rolling_window: - type: to_date - granularity: month -``` - -Query and result: - - - - - -## Conditional measure - -Conditional measure calculations can be used to create measures that depend on the value -of a dimension. Such measures are defined using the [`case` parameter][ref-case-measures] -and used together with [`switch` dimensions][ref-switch-dimensions]. - -```yaml -- name: amount_in_currency - multi_stage: true - case: - switch: "{CUBE.currency}" - when: - - value: EUR - sql: "{CUBE.amount_eur}" - - value: GBP - sql: "{CUBE.amount_gbp}" - else: - sql: "{CUBE.amount_usd}" - type: number -``` - -### Example - -Data model: - - - -```yaml title="YAML" -cubes: - - name: orders - sql: | - SELECT 1 AS id, 100 AS amount_usd UNION ALL - SELECT 2 AS id, 200 AS amount_usd UNION ALL - SELECT 3 AS id, 300 AS amount_usd UNION ALL - SELECT 4 AS id, 400 AS amount_usd UNION ALL - SELECT 5 AS id, 500 AS amount_usd - - dimensions: - - name: currency - type: switch - values: - - USD - - EUR - - GBP - - measures: - - name: amount_usd - sql: amount_usd - type: sum - - - name: amount_eur - sql: "{amount_usd} * 0.9" - type: number - - - name: amount_gbp - sql: "{amount_usd} * 0.8" - type: number - - - name: amount_in_currency - multi_stage: true - case: - switch: "{currency}" - when: - - value: EUR - sql: "{amount_eur}" - - value: GBP - sql: "{amount_gbp}" - else: - sql: "{amount_usd}" - type: number -``` - -```javascript title="JavaScript" -cube(`orders`, { - sql: ` - SELECT 1 AS id, 100 AS amount_usd UNION ALL - SELECT 2 AS id, 200 AS amount_usd UNION ALL - SELECT 3 AS id, 300 AS amount_usd UNION ALL - SELECT 4 AS id, 400 AS amount_usd UNION ALL - SELECT 5 AS id, 500 AS amount_usd - `, - - dimensions: { - currency: { - type: `switch`, - values: [`USD`, `EUR`, `GBP`] - } - }, - - measures: { - amount_usd: { - sql: `amount_usd`, - type: `sum` - }, - - amount_eur: { - sql: `${amount_usd} * 0.9`, - type: `number` - }, - - amount_gbp: { - sql: `${amount_usd} * 0.8`, - type: `number` - }, - - amount_in_currency: { - multi_stage: true, - case: { - switch: `${currency}`, - when: [ - { value: `EUR`, sql: `${amount_eur}` }, - { value: `GBP`, sql: `${amount_gbp}` } - ], - else: { sql: `${amount_usd}` } - }, - type: `number` - } - } -}) -``` - - - -Query and result: - - - - - -## Fixed dimension - -Fixed dimension calculations can be used to perform fixed comparisons, e.g., to compare -individual items to a broader dataset. Use the [`group_by` parameter][ref-group-by] -of a multi-stage measure to specify dimensions for the inner aggregation stage. - -For example, comparing revenue sales to the overall average: - -```yaml -- name: revenue - sql: revenue - format: currency - type: sum - -- name: occupied_sq_feet - sql: occupied_sq_feet - type: sum - -- name: occupied_sq_feet_per_city - multi_stage: true - sql: "{occupied_sq_feet}" - type: sum - group_by: - - city - - state - -- name: revenue_per_city_sq_feet - multi_stage: true - sql: "{revenue} / NULLIF({occupied_sq_feet_per_city}, 0)" - type: number -``` - -Percent of total calculations: - -```yaml -- name: revenue - sql: revenue - format: currency - type: sum - -- name: country_revenue - multi_stage: true - sql: "{revenue}" - type: sum - group_by: - - country - -- name: country_revenue_percentage - multi_stage: true - sql: "{revenue} / NULLIF({country_revenue}, 0)" - type: number -``` - -### Example - -Data model: - -```yaml -cubes: - - name: percent_of_total - sql: | - SELECT 1 AS id, 1000 AS revenue, 'A' AS product, 'USA' AS country UNION ALL - SELECT 2 AS id, 2000 AS revenue, 'B' AS product, 'USA' AS country UNION ALL - SELECT 3 AS id, 3000 AS revenue, 'A' AS product, 'Austria' AS country UNION ALL - SELECT 4 AS id, 4000 AS revenue, 'B' AS product, 'Austria' AS country UNION ALL - SELECT 5 AS id, 5000 AS revenue, 'A' AS product, 'Netherlands' AS country UNION ALL - SELECT 6 AS id, 6000 AS revenue, 'B' AS product, 'Netherlands' AS country - - dimensions: - - name: product - sql: product - type: string - - - name: country - sql: country - type: string - - measures: - - name: revenue - sql: revenue - format: currency - type: sum - - - name: country_revenue - multi_stage: true - sql: "{revenue}" - format: currency - type: sum - group_by: - - country - - - name: country_revenue_percentage - multi_stage: true - sql: "{revenue} / NULLIF({country_revenue}, 0)" - type: number -``` - -Query and result: - - - - - -## Nested aggregate - -Nested aggregate calculations are used to compute an aggregate of an aggregate, e.g., -to calculate the average of per-customer averages or to count how many customers exceed -a threshold. Use the [`add_group_by` parameter][ref-add-group-by] of a multi-stage -measure to specify dimensions for the inner aggregation stage. - -For example, calculating the average order value per customer, then averaging across -customers: - -```yaml -- name: avg_order_value - sql: amount - type: avg - -- name: avg_customer_order_value - multi_stage: true - sql: "{avg_order_value}" - type: avg - add_group_by: - - customer_id -``` - -Counting customers with total spending above a threshold: - -```yaml -- name: total_amount - sql: amount - type: sum - -- name: high_value_customer_count - multi_stage: true - sql: "CASE WHEN {total_amount} > 1000 THEN 1 END" - type: count - add_group_by: - - customer_id -``` - -### Example - -Data model: - -```yaml -cubes: - - name: orders - sql: | - SELECT 1 AS id, 100 AS amount, 1 AS customer_id, 'USA' AS country UNION ALL - SELECT 2 AS id, 150 AS amount, 1 AS customer_id, 'USA' AS country UNION ALL - SELECT 3 AS id, 200 AS amount, 2 AS customer_id, 'USA' AS country UNION ALL - SELECT 4 AS id, 300 AS amount, 2 AS customer_id, 'USA' AS country UNION ALL - SELECT 5 AS id, 400 AS amount, 2 AS customer_id, 'USA' AS country UNION ALL - SELECT 6 AS id, 500 AS amount, 3 AS customer_id, 'Germany' AS country UNION ALL - SELECT 7 AS id, 600 AS amount, 3 AS customer_id, 'Germany' AS country UNION ALL - SELECT 8 AS id, 250 AS amount, 4 AS customer_id, 'Germany' AS country - - dimensions: - - name: customer_id - sql: customer_id - type: number - - - name: country - sql: country - type: string - - measures: - - name: avg_order_value - sql: amount - type: avg - - - name: avg_customer_order_value - multi_stage: true - sql: "{avg_order_value}" - type: avg - add_group_by: - - customer_id -``` - -When querying `avg_customer_order_value` grouped by `country`, Cube computes the average -order value per customer first (inner stage), then averages those values per country -(outer stage). This gives equal weight to each customer regardless of order count. - -## Ranking - -Ranking calculations can be used to get valuable insights, especially when analyzing -data across various dimensions. Use the [`reduce_by` parameter][ref-reduce-by] of a -multi-stage measure to specify dimensions to exclude from the inner aggregation stage. - -```yaml -- name: product_rank - multi_stage: true - order_by: - - sql: "{revenue}" - dir: asc - reduce_by: - - product - type: rank -``` - -You can reduce by one or more dimensions. - -### Example - -Data model: - -```yaml -cubes: - - name: ranking - sql: | - SELECT 1 AS id, 1000 AS revenue, 'A' AS product, 'USA' AS country UNION ALL - SELECT 2 AS id, 2000 AS revenue, 'B' AS product, 'USA' AS country UNION ALL - SELECT 3 AS id, 3000 AS revenue, 'A' AS product, 'Austria' AS country UNION ALL - SELECT 4 AS id, 4000 AS revenue, 'B' AS product, 'Austria' AS country UNION ALL - SELECT 5 AS id, 5000 AS revenue, 'A' AS product, 'Netherlands' AS country UNION ALL - SELECT 6 AS id, 6000 AS revenue, 'B' AS product, 'Netherlands' AS country - - dimensions: - - name: product - sql: product - type: string - - - name: country - sql: country - type: string - - measures: - - name: revenue - sql: revenue - format: currency - type: sum - - - name: product_rank - multi_stage: true - order_by: - - sql: "{revenue}" - dir: asc - reduce_by: - - product - type: rank -``` - -Query and result: - - - - - - -[link-tesseract]: https://cube.dev/blog/introducing-next-generation-data-modeling-engine -[ref-measures]: /docs/data-modeling/concepts#measures -[ref-dimensions]: /docs/data-modeling/concepts#dimensions -[ref-rolling-window]: /reference/data-modeling/measures#rolling_window -[link-cte]: https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL#Common_table_expression -[ref-ref-time-shift]: /reference/data-modeling/measures#time_shift -[ref-calendar-cubes]: /docs/data-modeling/concepts/calendar-cubes -[ref-case-measures]: /reference/data-modeling/measures#case -[ref-switch-dimensions]: /reference/data-modeling/dimensions#type -[ref-group-by]: /reference/data-modeling/measures#group_by -[ref-reduce-by]: /reference/data-modeling/measures#reduce_by -[ref-add-group-by]: /reference/data-modeling/measures#add_group_by \ No newline at end of file diff --git a/docs-mintlify/docs/data-modeling/concepts/syntax.mdx b/docs-mintlify/docs/data-modeling/concepts/syntax.mdx index b184059a560be..2765b621d0d93 100644 --- a/docs-mintlify/docs/data-modeling/concepts/syntax.mdx +++ b/docs-mintlify/docs/data-modeling/concepts/syntax.mdx @@ -981,25 +981,25 @@ string values in time dimensions. [ref-context-variables]: /reference/data-modeling/context-variables [ref-config-model-path]: /reference/configuration/config#schemapath [ref-config-repository-factory]: /reference/configuration/config#repositoryfactory -[ref-subquery]: /docs/data-modeling/concepts/calculated-members#subquery-dimensions +[ref-subquery]: /docs/data-modeling/dimensions#subquery-dimensions [wiki-snake-case]: https://en.wikipedia.org/wiki/Snake_case [wiki-yaml]: https://en.wikipedia.org/wiki/YAML [link-snowflake-listagg]: https://docs.snowflake.com/en/sql-reference/functions/listagg [link-bigquery-stringagg]: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#string_agg [link-sql-udf]: https://en.wikipedia.org/wiki/User-defined_function#Databases [ref-time-dimension]: /reference/data-modeling/dimensions#type -[ref-default-granularities]: /docs/data-modeling/concepts#time-dimensions +[ref-default-granularities]: /docs/data-modeling/dimensions#time-dimensions [ref-custom-granularities]: /reference/data-modeling/dimensions#granularities [ref-style-guide]: /recipes/data-modeling/style-guide -[ref-polymorphism]: /docs/data-modeling/concepts/polymorphic-cubes +[ref-polymorphism]: /recipes/data-modeling/polymorphic-cubes [ref-data-blending]: /docs/data-modeling/concepts/data-blending [link-js-template-literals]: https://developer.mozilla.org/en-US/docs/Learn_web_development/Core/Scripting/Strings#embedding_javascript [link-python-reserved-words]: https://docs.python.org/3/reference/lexical_analysis.html#keywords [ref-dax-api-date-hierarchies]: /reference/dax-api#date-hierarchies -[ref-time-dimension]: /docs/data-modeling/concepts#time-dimensions +[ref-time-dimension]: /docs/data-modeling/dimensions#time-dimensions [ref-recipe-string-time-dimensions]: /recipes/data-modeling/string-time-dimensions -[ref-views]: /docs/data-modeling/concepts#views -[ref-preaggs]: /docs/data-modeling/concepts#pre-aggregations -[ref-join-paths]: /docs/data-modeling/concepts/working-with-joins#join-paths -[ref-calculated-members]: /docs/data-modeling/concepts/calculated-members -[ref-diamond-subgraphs]: /docs/data-modeling/concepts/working-with-joins#diamond-subgraphs \ No newline at end of file +[ref-views]: /docs/data-modeling/views +[ref-preaggs]: /reference/data-modeling/pre-aggregations +[ref-join-paths]: /docs/data-modeling/joins#join-paths +[ref-calculated-members]: /docs/data-modeling/measures#calculated-measures +[ref-diamond-subgraphs]: /docs/data-modeling/joins#diamond-subgraphs \ No newline at end of file diff --git a/docs-mintlify/docs/data-modeling/concepts/working-with-joins.mdx b/docs-mintlify/docs/data-modeling/concepts/working-with-joins.mdx deleted file mode 100644 index afc8e2283f203..0000000000000 --- a/docs-mintlify/docs/data-modeling/concepts/working-with-joins.mdx +++ /dev/null @@ -1,1945 +0,0 @@ ---- -title: Joins between cubes -description: "Joins create relationships between cubes in the data model." ---- - -They allow to build complex [queries][ref-queries] that involve members from multiple -cubes. They also allow to [reference][ref-references] members from other cubes in -[calculated members][ref-calculated-members], [views][ref-views], and -[pre-aggregations][ref-preaggs]. - -When defining joins, it's important to understand [join types](#join-types) and the -[direction of joins](#direction-of-joins) as well as how [join paths](#join-paths) and -[join hints](#join-hints) are used to work with the joined cubes. - -## Join types - -Cube supports three [types of join relationships][ref-schema-ref-joins-relationship] -often found in SQL databases: `one_to_one`, `one_to_many`, and `many_to_one`. - -For example, let's take two cubes, `customers` and `orders`: - - - -```yaml title="YAML" -cubes: - - name: customers - # ... - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: company - sql: company - type: string - - - name: orders - # ... - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: customer_id - sql: customer_id - type: number -``` - -```javascript title="JavaScript" -cube(`customers`, { - // ... - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - company: { - sql: `company`, - type: `string` - } - } -}) - -cube(`orders`, { - // ... - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - customer_id: { - sql: `customer_id`, - type: `number` - } - } -}) -``` - - - -We could add a join to the `customers` cube: - - - -```yaml title="YAML" -cubes: - - name: customers - # ... - - joins: - - name: orders - relationship: one_to_many - sql: "{CUBE}.id = {orders.customer_id}" -``` - -```javascript title="JavaScript" -cube(`customers`, { - // ... - - joins: { - orders: { - relationship: `one_to_many`, - sql: `${CUBE}.id = ${orders.customer_id}` - } - } -}) -``` - - - -The join above means a customer has many orders. Let's send the following JSON -query: - -```json -{ - "dimensions": ["orders.status", "customers.company"], - "measures": ["orders.count"], - "timeDimensions": [ - { - "dimension": "orders.created_at" - } - ], - "order": { "customers.company": "asc" } -} -``` - -The query above will generate the following SQL: - -```sql -SELECT - "orders".status "orders__status", - "customers".company "customers__company", - count("orders".id) "orders__count" -FROM - public.customers AS "customers" - LEFT JOIN public.orders AS "orders" - ON "customers".id = "orders".customer_id -GROUP BY 1, 2 -ORDER BY 2 ASC -LIMIT 10000 -``` - -However, if we have guest checkouts, that would mean we would have orders with -no matching customer. Looking back at the `one_to_many` relationship and its' -resulting SQL, any guest checkouts would be excluded from the results. To remedy -this, we'll remove the join from the `customers` cube and instead define a join -with a `many_to_one` relationship on the `orders` cube: - - - -```yaml title="YAML" -cubes: - - name: orders - # ... - - joins: - - name: customers - relationship: many_to_one - sql: "{CUBE}.customer_id = {customers.id}" -``` - -```javascript title="JavaScript" -cube(`orders`, { - // ... - - joins: { - customers: { - relationship: `many_to_one`, - sql: `${CUBE}.customer_id = ${customers.id}` - } - } -}) -``` - - - -In the above data model, our `orders` cube defines the relationship between -itself and the `customer` cube. The same JSON query now results in the following -SQL query: - -```sql -SELECT - "orders".status "orders__status", - "customers".company "customers__company", - count("orders".id) "orders__count" -FROM - public.orders AS "orders" - LEFT JOIN public.customers AS "customers" - ON "orders".customer_id = "customers".id -GROUP BY 1, 2 -ORDER BY 2 ASC -LIMIT 10000 -``` - -As we can see, the base table in the query is `orders`, and `customers` is in -the `LEFT JOIN` clause; this means any orders without a customer will also be -retrieved. - -### Many-to-many joins - -A many-to-many relationship occurs when multiple records in a cube are -associated with multiple records in another cube. - -For example, let's say we have two cubes, `topics` and `posts`, pointing to the -`topics` and `posts` tables in our database, respectively. A `post` can have -more than one `topic`, and a `topic` may have more than one `post`. - -In a database, you would most likely have an associative table (also known as a -junction table or cross-reference table). In our example, this table name might -be `post_topics`. - -The diagram below shows the tables `posts`, `topics`, `post_topics`, and their -relationships. - - - Many-to-Many Entity Diagram for posts, topics and post_topics - - -In the same way the `post_topics` table was specifically created to handle this -association in the database, we need to create an associative cube -`post_topics`, and declare the relationships from it to `topics` cube and from -`posts` to `post_topics`. - - - -```yaml title="YAML" -cubes: - - name: posts - sql_table: posts - - joins: - - name: post_topics - relationship: one_to_many - sql: "{CUBE}.id = {post_topics.post_id}" - - - name: topics - sql_table: topics - - dimensions: - - name: post_id - sql: id - type: string - primary_key: true - - - name: post_topics - sql_table: post_topics - - joins: - - name: topic - relationship: many_to_one - sql: "{CUBE}.topic_id = {topics.id}" - - dimensions: - - name: post_id - sql: post_id - type: string -``` - -```javascript title="JavaScript" -cube(`posts`, { - sql_table: `posts`, - - joins: { - post_topics: { - relationship: `one_to_many`, - sql: `${CUBE}.id = ${post_topics.post_id}` - } - } -}) - -cube(`topics`, { - sql_table: `topics`, - - dimensions: { - post_id: { - sql: `id`, - type: `string`, - primary_key: true - } - } -}) - -cube(`post_topics`, { - sql_table: `post_topics`, - - joins: { - topic: { - relationship: `many_to_one`, - sql: `${CUBE}.topic_id = ${topics.id}` - } - }, - - dimensions: { - post_id: { - sql: `post_id`, - type: `string` - } - } -}) -``` - - - - - -The following example uses the `many_to_one` relationship on the `post_topics` -cube; this causes the direction of joins to be `posts → post_topics → topics`. -Read more about the [direction of joins](#direction-of-joins). - - - -In scenarios where a table doesn't define a primary key, one can be generated -using SQL: - - - -```yaml title="YAML" -cubes: - - name: post_topics - # ... - - dimensions: - - name: id - sql: "CONCAT({CUBE}.post_id, {CUBE}.topic_id)" - type: number - primary_key: true -``` - -```javascript title="JavaScript" -cube(`post_topics`, { - // ... - - dimensions: { - id: { - sql: `CONCAT(${CUBE}.post_id, ${CUBE}.topic_id)`, - type: `number`, - primary_key: true - } - } -}) -``` - - - -**Virtual associative cubes.** -Sometimes there is no associative table in the database, when in reality, there -is a many-to-many relationship. In this case, the solution is to extract some -data from existing tables and create a virtual (not backed by a real table in -the database) associative cube. - -Let’s consider the following example. We have tables `emails` and -`transactions`. The goal is to calculate the amount of transactions per -campaign. Both `emails` and `transactions` have a `campaign_id` column. We don’t -have a campaigns table, but data about campaigns is part of the `emails` table. - -Let’s take a look at the `emails` cube first: - - - -```yaml title="YAML" -cubes: - - name: emails - sql_table: emails - - measures: - - name: count - type: count - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: campaign_name - sql: campaign_name - type: string - - - name: campaign_id - sql: campaign_id - type: number -``` - -```javascript title="JavaScript" -cube(`emails`, { - sql_table: `emails`, - - measures: { - count: { - type: `count` - } - }, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - campaign_name: { - sql: `campaign_name`, - type: `string` - }, - - campaign_id: { - sql: `campaign_id`, - type: `number` - } - } -}) -``` - - - -We can extract campaigns data into a virtual `campaigns` cube: - - - -```yaml title="YAML" -cubes: - - name: campaigns - sql: | - SELECT - campaign_id, - campaign_name, - customer_name, - MIN(created_at) AS started_at - FROM emails GROUP BY 1, 2, 3 - - measures: - - name: count - type: count - - dimensions: - - name: id - sql: campaign_id - type: string - primary_key: true - - - name: name - sql: campaign_name - type: string -``` - -```javascript title="JavaScript" -cube(`campaigns`, { - sql: ` - SELECT - campaign_id, - campaign_name, - customer_name, - MIN(created_at) AS started_at - FROM emails - GROUP BY 1, 2, 3 - `, - - measures: { - count: { - type: `count` - } - }, - - dimensions: { - id: { - sql: `campaign_id`, - type: `string`, - primary_key: true - }, - - name: { - sql: `campaign_name`, - type: `string` - } - } -}) -``` - - - -The following diagram shows our data model with the `Campaigns` cube: - - - Many-to-Many Entity Diagram for emails, campaigns and transactions - - -The last piece is to finally declare a many-to-many relationship. This should be -done by declaring a [`one_to_many` -relationship][ref-schema-ref-joins-relationship] on the associative cube, -`campaigns` in our case. - - - -```yaml title="YAML" -cubes: - - name: emails - sql_table: emails - - joins: - - name: campaigns - relationship: many_to_one - sql: | - {CUBE}.campaign_id = {campaigns.id} AND {CUBE}.customer_name = - {campaigns.customer_name} - - measures: - - name: count - type: count - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: campaign_name - sql: campaign_name - type: string - - - name: campaign_id - sql: campaign_id - type: number - - - name: campaigns - - joins: - - name: transactions - relationship: one_to_many - sql: | - {CUBE}.customer_name = {transactions.customer_name} AND - {CUBE}.campaign_id = {transactions.campaign_id} - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: customer_name - sql: customer_name - type: string -``` - -```javascript title="JavaScript" -cube(`emails`, { - sql_table: `emails`, - - joins: { - campaigns: { - relationship: `many_to_one`, - sql: `${CUBE}.campaign_id = ${campaigns.id} - AND ${CUBE}.customer_name = ${campaigns.customer_name}` - } - }, - - measures: { - count: { - type: `count` - } - }, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - campaign_name: { - sql: `campaign_name`, - type: `string` - }, - - campaign_id: { - sql: `campaign_id`, - type: `number` - } - } -}) - -cube(`campaigns`, { - joins: { - transactions: { - relationship: `one_to_many`, - sql: `${CUBE}.customer_name = ${transactions.customer_name} - AND ${CUBE}.campaign_id = ${transactions.campaign_id}` - } - }, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - customer_name: { - sql: `customer_name`, - type: `string` - } - } -}) -``` - - - -## Join tree - -When Cube analyzes a [query][ref-queries], it builds a _join tree_ that connects all -cubes involved in the query in compliance with the [direction of joins](#direction-of-joins). -If the join tree cannot be built, the query will fail to execute. The presence of -[bidirectional joins](#bidirectional-joins) or [diamond subgraphs](#diamond-subgraphs) -can complicate the join tree structure. - -### Direction of joins - -**All joins in Cube's data model are _directed_.** They flow from the source cube (the one -where the join is defined) to the target cube (the one referenced in the join). - -Cube will respect the join graph when generating SQL queries. It means that _source_ -cubes will be on the left side of `JOIN` clauses and _target_ cubes will be on the right -side of `JOIN` clauses. Consider the following data model, consisting of cubes `left` -and `right`: - - - -```yaml title="YAML" -cubes: - - name: left - sql: | - SELECT 1 AS id, 11 AS value UNION ALL - SELECT 2 AS id, 12 AS value UNION ALL - SELECT 3 AS id, 13 AS value - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: value - sql: value - type: number - - joins: - - name: right - sql: "{left.id} = {right.id}" - relationship: one_to_one - - - name: right - sql: | - SELECT 1 AS id, 101 AS value UNION ALL - SELECT 2 AS id, 102 AS value UNION ALL - SELECT 3 AS id, 103 AS value - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: value - sql: value - type: number - - # joins: - # - name: left - # sql: "{left.id} = {right.id}" - # relationship: one_to_one -``` - -```javascript title="JavaScript" -cube(`left`, { - sql: ` - SELECT 1 AS id, 11 AS value UNION ALL - SELECT 2 AS id, 12 AS value UNION ALL - SELECT 3 AS id, 13 AS value - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - value: { - sql: `value`, - type: `number` - } - }, - - joins: { - right: { - sql: `${left.id} = ${right.id}`, - relationship: `one_to_one` - } - } -}) - -cube(`right`, { - sql: ` - SELECT 1 AS id, 101 AS value UNION ALL - SELECT 2 AS id, 102 AS value UNION ALL - SELECT 3 AS id, 103 AS value - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - value: { - sql: `value`, - type: `number` - } - } - - // joins: { - // left: { - // sql: `${left.id} = ${right.id}`, - // relationship: `one_to_one` - // } - // } -}) -``` - - - -It defines a join that is directed from `left` to `right`. If you query for `left.value` -and `right.value`, Cube will generate the following SQL query. As you can see, `left` is -on the left side of the `JOIN` clause, and `right` is on the right side: - -```sql -SELECT - "left".value "left__value", - "right".value "right__value" -FROM ( - SELECT 1 AS id, 11 AS value UNION ALL - SELECT 2 AS id, 12 AS value UNION ALL - SELECT 3 AS id, 13 AS value -) AS "left" -LEFT JOIN ( - SELECT 1 AS id, 101 AS value UNION ALL - SELECT 2 AS id, 102 AS value UNION ALL - SELECT 3 AS id, 103 AS value -) AS "right" ON "left".id = "right".id -GROUP BY 1, 2 -``` - -If you comment out the join definition in the `left` cube and uncomment the join -definition in the `right` cube, Cube will generate the following SQL for the same query: - -```sql -SELECT - "left".value "left__value", - "right".value "right__value" -FROM ( - SELECT 1 AS id, 101 AS value UNION ALL - SELECT 2 AS id, 102 AS value UNION ALL - SELECT 3 AS id, 103 AS value -) AS "right" -LEFT JOIN ( - SELECT 1 AS id, 11 AS value UNION ALL - SELECT 2 AS id, 12 AS value UNION ALL - SELECT 3 AS id, 13 AS value -) AS "left" ON "left".id = "right".id -GROUP BY 1, 2 -``` - -As you can see, the direction of joins greatly influences the generated SQL and, -conseqently, the final result set. - -### Bidirectional joins - -**As a rule of thumb, it's not recommended to define _bidirectional joins_** in the data -model (i.e., having both cubes define a join to each other) by default. However, it can -still be useful for some valid analytical use cases. - -Consider the following data model with `orders` and `customers` for an e-commerce that -has both registered and guest customers (they have `NULL` as `customer_id`): - - - -```yaml title="YAML" -cubes: - - name: orders - sql: | - SELECT 1 AS id, 1001 AS customer_id, 123 AS revenue UNION ALL - SELECT 2 AS id, 1001 AS customer_id, 234 AS revenue UNION ALL - SELECT 3 AS id, 1002 AS customer_id, 345 AS revenue UNION ALL - SELECT 4 AS id, NULL AS customer_id, 456 AS revenue - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: customer_id - sql: customer_id - type: number - - measures: - - name: order_count - type: count - - - name: total_revenue - sql: revenue - type: sum - - joins: - - name: customers - sql: "{orders.customer_id} = {customers.id}" - relationship: many_to_one - - - name: customers - sql: | - SELECT 1001 AS id, 'Alice' AS name UNION ALL - SELECT 1002 AS id, 'Bob' AS name UNION ALL - SELECT 1003 AS id, 'Eve' AS name - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: name - sql: name - type: string - - measures: - - name: customer_count - type: count - - # joins: - # - name: orders - # sql: "{orders.customer_id} = {customers.id}" - # relationship: one_to_many -``` - -```javascript title="JavaScript" -cube(`orders`, { - sql: ` - SELECT 1 AS id, 1001 AS customer_id, 123 AS revenue UNION ALL - SELECT 2 AS id, 1001 AS customer_id, 234 AS revenue UNION ALL - SELECT 3 AS id, 1002 AS customer_id, 345 AS revenue UNION ALL - SELECT 4 AS id, NULL AS customer_id, 456 AS revenue - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - customer_id: { - sql: `customer_id`, - type: `number` - } - }, - - measures: { - order_count: { - type: `count` - }, - - total_revenue: { - sql: `revenue`, - type: `sum` - } - }, - - joins: { - customers: { - sql: `${orders.customer_id} = ${customers.id}`, - relationship: `many_to_one` - } - } -}) - -cube(`customers`, { - sql: ` - SELECT 1001 AS id, 'Alice' AS name UNION ALL - SELECT 1002 AS id, 'Bob' AS name UNION ALL - SELECT 1003 AS id, 'Eve' AS name - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - name: { - sql: `name`, - type: `string` - } - }, - - measures: { - customer_count: { - type: `count` - } - } - - // joins: { - // orders: { - // sql: `${orders.customer_id} = ${customers.id}`, - // relationship: `one_to_many` - // } - // } -}) -``` - - - -Querying `customers.name` and `orders.order_count` will produce the following result: - - - - - -As you can see, the result set only includes registered customers; we get no data for -guest checkouts. Check the generated SQL query: - -```sql -SELECT - "customers".name "customers__name", - count("orders".id) "orders__order_count" -FROM ( - SELECT 1001 AS id, 'Alice' AS name UNION ALL - SELECT 1002 AS id, 'Bob' AS name UNION ALL - SELECT 1003 AS id, 'Eve' AS name -) AS "customers" -LEFT JOIN ( - SELECT 1 AS id, 1001 AS customer_id, 123 AS revenue UNION ALL - SELECT 2 AS id, 1001 AS customer_id, 234 AS revenue UNION ALL - SELECT 3 AS id, 1002 AS customer_id, 345 AS revenue UNION ALL - SELECT 4 AS id, NULL AS customer_id, 456 AS revenue -) AS "orders" ON "orders".customer_id = "customers".id -GROUP BY 1 -``` - -The `customers` cube is on the left side of the `JOIN` clause, since the direction of -joins is from `customers` to `orders`. This means that the query will only return -registered customers and all orders by guest customers will be excluded. - -Now, if you uncomment the join definition in the `orders` cube and comment out the -join definition in the `customers` cube, running the same query will produce the following -result: - - - - - -As you can see, now the result set includes guest checkouts, but we have no data for -registered customers who have not placed any orders (namely, `Eve`). Check the -generated SQL query, which reveals why: - -```sql -SELECT - "customers".name "customers__name", - count("orders".id) "orders__order_count" -FROM ( - SELECT 1 AS id, 1001 AS customer_id, 123 AS revenue UNION ALL - SELECT 2 AS id, 1001 AS customer_id, 234 AS revenue UNION ALL - SELECT 3 AS id, 1002 AS customer_id, 345 AS revenue UNION ALL - SELECT 4 AS id, NULL AS customer_id, 456 AS revenue -) AS "orders" -LEFT JOIN ( - SELECT 1001 AS id, 'Alice' AS name UNION ALL - SELECT 1002 AS id, 'Bob' AS name UNION ALL - SELECT 1003 AS id, 'Eve' AS name -) AS "customers" ON "orders".customer_id = "customers".id -GROUP BY 1 -``` - -**Bidirectional joins often lead to the ambiguity in the data model** and can produce -ambiguous results, as Cube may not know which direction to follow when generating SQL -queries. You can remove the ambiguity by using [join paths](#join-paths) and [join -hints](#join-hints). - -### Diamond subgraphs - -A _diamond subgraph_ is a specific type of join structure where there's more than one -join path between two cubes, e.g., `users.schools.countries` and -`users.employers.countries`. Join structures like `a.b.c` + `a.c` or `a.b.c.d` + `a.b.d` -are also be considered diamond subgraphs for the purpose of this section. - -In the following example, four cubes are joined together as a _diamond_: `a` joins to `b` -and `c`, and both `b` and `c` join to `d`: - - - -```yaml title="YAML" -cubes: - - name: a - sql: | - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: d_via_b - sql: "{b.d.id}" - type: number - - - name: d_via_c - sql: "{c.d.id}" - type: number - - joins: - - name: b - sql: "{a.id} = {b.id}" - relationship: one_to_one - - - name: c - sql: "{a.id} = {c.id}" - relationship: one_to_one - - - name: b - sql: | - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - joins: - - name: d - sql: "{b.id} = {d.id}" - relationship: one_to_one - - - name: c - sql: | - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - joins: - - name: d - sql: "{c.id} = {d.id}" - relationship: one_to_one - - - name: d - sql: | - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id - - dimensions: - - name: id - sql: id - type: number - primary_key: true -``` - -```javascript title="JavaScript" -cube(`a`, { - sql: ` - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - d_via_b: { - sql: `${b.d.id}`, - type: `number` - }, - - d_via_c: { - sql: `${c.d.id}`, - type: `number` - } - }, - - joins: { - b: { - sql: `${a.id} = ${b.id}`, - relationship: `one_to_one` - }, - - c: { - sql: `${a.id} = ${c.id}`, - relationship: `one_to_one` - } - } -}) - -cube(`b`, { - sql: ` - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - } - }, - - joins: { - d: { - sql: `${b.id} = ${d.id}`, - relationship: `one_to_one` - } - } -}) - -cube(`c`, { - sql: ` - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - } - }, - - joins: { - d: { - sql: `${c.id} = ${d.id}`, - relationship: `one_to_one` - } - } -}) - -cube(`d`, { - sql: ` - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - } - } -}) -``` - - - -When querying `a.d_via_b`, Cube will generate the following SQL query, joining through -`b`: - -```sql -SELECT - "d".id "a__d_via_b" -FROM ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "a" -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "b" ON "a".id = "b".id -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "d" ON "b".id = "d".id -GROUP BY 1 -``` - -However, when querying `a.d_via_c`, Cube will generate the following SQL query, joining -through `c`: - -```sql -SELECT - "d".id "a__d_via_c" -FROM ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "a" -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "c" ON "a".id = "c".id -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "d" ON "c".id = "d".id -GROUP BY 1 -``` - - - -Note that having both `a.d_via_b` and `a.d_via_c` in the same query will produce -ambiguous results, as Cube will only use `d` to the generated SQL once: - -```sql -SELECT - "d".id "a__d_via_b", - "d".id "a__d_via_c" -FROM ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "a" -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "b" ON "a".id = "b".id -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "d" ON "b".id = "d".id -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "c" ON "a".id = "c".id -GROUP BY 1, 2 -``` - - - -Similar to bidirectional joins, **diamond subgraphs often lead to the ambiguity in the -data model** and can produce ambiguous results, as Cube may not know which direction to -follow when generating SQL queries. You can remove the ambiguity by using [join -paths](#join-paths) and [join hints](#join-hints). - -## Join paths - -_Join paths_ serve as the recommended mechanism to remove the ambiguity of joins in the -data model, including cases of [bidirectional joins](#bidirectional-joins) and [diamond -subgraphs](#diamond-subgraphs). - -A join path is defined as a sequence of cube names, separated by dots, that will be -followed in that specific order when resolving a cube or its member, e.g., -`users.locations.countries.flag_aspect_ratio`. - -### Calculated members - -When referencing a member of another cube in a [calculated member][ref-calculated-members], -you can use a join path to specify the path to follow, as demonstrated in the [diamond -subgraphs example](#diamond-subgraphs). - -### Views - -When referencing cubes in a [view][ref-views], you literally provide join paths via the -[`join_path` parameter][ref-view-join-path]. The [bidirectional joins -example](#bidirectional-joins) can be disambiguated with the following views: - - - -```yaml title="YAML" -views: - - name: total_revenue_per_customer - cubes: - - join_path: orders - includes: - - total_revenue - - - join_path: orders.customers - includes: - - name - - - name: customers_without_orders - cubes: - - join_path: customers - includes: - - name - - - join_path: customers.orders - includes: - - order_count -``` - -```javascript title="JavaScript" -view(`total_revenue_per_customer`, { - cubes: [ - { - join_path: orders, - includes: ['total_revenue'] - }, - { - join_path: orders.customers, - includes: ['name'] - } - ] -}) - -view(`customers_without_orders`, { - cubes: [ - { - join_path: customers, - includes: ['name'] - }, - { - join_path: customers.orders, - includes: ['order_count'] - } - ] -}) -``` - - - -### Pre-aggregations - -When referencing members of another cubes in a [pre-aggregation][ref-preaggs], you can -also use join paths, as shown in the following example: - - - -```yaml title="YAML" -cubes: - - name: a - sql: | - SELECT 1 AS id, 1 AS b_id, 1 AS c_id UNION ALL - SELECT 2 AS id, 2 AS b_id, 2 AS c_id - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: dimension1 - sql: "CONCAT({CUBE}.b_id, {CUBE}.b_id)" - type: string - - measures: - - name: measure1 - type: count - - joins: - - name: b - relationship: one_to_one - sql: "{CUBE}.b_id = {b.id}" - - - name: c - relationship: one_to_one - sql: "{CUBE}.c_id = {c.id}" - - pre_aggregations: - - name: a_and_c - dimensions: - - a.dimension1 - - a.b.c.dimension2 - measures: - - a.measure1 - - a.b.c.measure2 - - - name: b - sql: | - SELECT 1 AS id, 1 AS c_id UNION ALL - SELECT 2 AS id, 2 AS c_id - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - joins: - - name: c - relationship: one_to_one - sql: "{CUBE}.c_id = {c.id}" - - - name: c - sql: | - SELECT 1 AS id UNION ALL - SELECT 2 AS id - - dimensions: - - name: id - sql: id - type: number - primary_key: true - - - name: dimension2 - sql: "{CUBE}.id * 3" - type: string - - measures: - - name: measure2 - sql: "{CUBE.dimension2}" - type: sum -``` - -```javascript title="JavaScript" -cube(`a`, { - sql: ` - SELECT 1 AS id, 1 AS b_id, 1 AS c_id UNION ALL - SELECT 2 AS id, 2 AS b_id, 2 AS c_id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - dimension1: { - sql: `CONCAT(${CUBE}.b_id, ${CUBE}.b_id)`, - type: `string` - } - }, - - measures: { - measure1: { - type: `count` - } - }, - - joins: { - b: { - relationship: `one_to_one`, - sql: `${CUBE}.b_id = ${b.id}` - }, - - c: { - relationship: `one_to_one`, - sql: `${CUBE}.c_id = ${c.id}` - } - }, - - pre_aggregations: { - a_and_c: { - dimensions: [ - `a.dimension1`, - `a.b.c.dimension2` - ], - measures: [ - `a.measure1`, - `a.b.c.measure2` - ] - } - } -}) - -cube(`b`, { - sql: ` - SELECT 1 AS id, 1 AS c_id UNION ALL - SELECT 2 AS id, 2 AS c_id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - } - }, - - joins: { - c: { - relationship: `one_to_one`, - sql: `${CUBE}.c_id = ${c.id}` - } - } -}) - -cube(`c`, { - sql: ` - SELECT 1 AS id UNION ALL - SELECT 2 AS id - `, - - dimensions: { - id: { - sql: `id`, - type: `number`, - primary_key: true - }, - - dimension2: { - sql: `${CUBE}.id * 3`, - type: `string` - } - }, - - measures: { - measure2: { - sql: `${CUBE.dimension2}`, - type: `sum` - } - } -}) -``` - - - -When this pre-aggregation is built, Cube will generate and execute the following SQL -query: - -```sql -CREATE TABLE prod_pre_aggregations.a_a_and_c AS -SELECT - "c".id * 3 "c__dimension2", - CONCAT("a".b_id, "a".b_id) "a__dimension1", - sum("c".id * 3) "c__measure2", - count("a".id) "a__measure1" -FROM ( - SELECT 1 AS id, 1 AS b_id, 1 AS c_id UNION ALL - SELECT 2 AS id, 2 AS b_id, 2 AS c_id -) AS "a" -LEFT JOIN ( - SELECT 1 AS id, 1 AS c_id UNION ALL - SELECT 2 AS id, 2 AS c_id -) AS "b" ON "a".b_id = "b".id -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id -) AS "c" ON "b".c_id = "c".id -GROUP BY 1, 2 -``` - -As you can see, in this query, `a` is joined to `c` though `b` here, even though a -direct join from `a` to `c` is also defined in the data model. However, Cube respects -join paths from the pre-aggregation definition and uses them to generate the SQL query. - -## Join hints - -_Join hints_ serve as an auxiliary mechanism to remove the ambiguity of joins _at the -query time_. When possible, use [join paths](#join-paths) in the data model instead. - -### SQL API - -In queries to the [SQL API][ref-sql-api], cubes can be [joined][ref-sql-api-joins] via -the `CROSS JOIN` clause or via `__cubeJoinField`. In any case, Cube will analyze the -query and follow provided join hints. - -Let's run the following query with the data model from the [diamond subgraphs -example](#diamond-subgraphs): - -```sql -SELECT - a.id AS id, - d.id AS d_via_b -FROM a -CROSS JOIN b -CROSS JOIN d -GROUP BY 1, 2 -``` - -The following SQL query will be generated: - -```sql -SELECT - "a".id "a__id", - "d".id "d__id" -FROM ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "a" -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "b" ON "a".id = "b".id -LEFT JOIN ( - SELECT 1 AS id UNION ALL - SELECT 2 AS id UNION ALL - SELECT 3 AS id -) AS "d" ON "b".id = "d".id -GROUP BY 1, 2 -``` - -If the SQL API query contained `CROSS JOIN c` instead of `CROSS JOIN b`, then the -generated SQL query would contain a join through `c` instead of `b`. - -### REST API - -In queries to the [REST API][ref-rest-api], join hints can be provided via the -[`joinHints` parameter][ref-rest-api-join-hints]. - -The SQL API query from the previous section can be rewritten as a REST API query -with join hints as follows: - -```json -{ - "dimensions": [ - "a.id", - "d.id" - ], - "joinHints": [ - ["a", "b"], - ["b", "d"] - ] -} -``` - -The same SQL query as in the previous section will be generated. - -Similarly, if the `joinHints` parameter contained `[["a", "c"], ["c", "d"]]` instead of -`[["a", "b"], ["b", "d"]]`, then the generated SQL query would contain a join through -`c` instead of `b`. - -## Troubleshooting - -### `Can't find join path` - -Sometimes, you might come across the following error message: `Can't find join path to -join 'cube_a', 'cube_b'`. - -It indicates that a query failed because it includes members from cubes that can't be -joined in order to generate a valid query to the upstream data source. -Please check that you've defined necessary joins and that they have [correct -directions](#transitive-join-pitfalls). - -Also, please consider using [views][ref-schema-ref-view] since they -incapsulate join paths and completely remove the possibility of the error in question. -You might also consider setting the [`public` parameter][ref-cube-public] to `false` -on your cubes to hide them from end users. - -If you’re building a custom data application, you might use the [`meta` endpoint][ref-rest-meta] -of the REST API. It groups cubes into `connectedComponents` to help select those ones -that can be joined together. - -### `Primary key is required when join is defined` - -Sometimes, you might come across the following error message: `cube_a cube: primary key -for 'cube_a' is required when join is defined in order to make aggregates work properly`. - -It indicates that you have a [cube][ref-cube] with joins and [pre-aggregations][ref-preaggs]. -However, that cube doesn't have a primary key. - -When generating SQL queries, Cube uses primary keys to avoid fanouts. A fanout happens -when two tables are joined and a single value gets duplicated in the end result, meaning -that some values can be double counted. - -Please define a [primary key][ref-primary-key] dimension in this cube to make joins and -pre-aggregations work correctly. - -If your data doesn't have a natural primary key, e.g., `id`, you can define a composite -primary key by concatenating most or all of the columns in the table. Example: - -```yml -cubes: - - name: cube_a - # ... - - dimensions: - - name: composite_key - sql: CONCAT(column_a, '-', column_b, '-', column_c) - type: string - primary_key: true -``` - -### Transitive join pitfalls - -Let's consider an example where we have a many-to-many relationship between -`users` and `companies` through the `companies_to_users` cube: - - - -```yaml title="YAML" -cubes: - - name: users - sql: | - SELECT 1 AS id, 'Ali' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name UNION ALL - SELECT 3 AS id, 'Eve' AS name - - measures: - - name: count - type: count - - dimensions: - - name: id - sql: id - type: string - primary_key: true - - - name: companies - sql: | - SELECT 11 AS id, 'Acme Corporation' AS name UNION ALL - SELECT 12 AS id, 'Stark Industries' AS name - - dimensions: - - name: id - sql: id - type: string - primary_key: true - - - name: name - sql: name - type: string - - - name: companies_to_users - sql: | - SELECT 11 AS company_id, 1 AS user_id UNION ALL - SELECT 11 AS company_id, 2 AS user_id UNION ALL - SELECT 12 AS company_id, 3 AS user_id - - joins: - - name: users - sql: "{CUBE}.user_id = {users.id}" - relationship: one_to_many - - - name: companies - sql: "{CUBE}.company_id = {companies.id}" - relationship: one_to_many - - dimensions: - - name: id - # Joins require a primary key, so we'll create one on-the-fly - sql: "CONCAT({CUBE}.user_id, ':', {CUBE}.company_id)" - type: string - primary_key: true -``` - -```javascript title="JavaScript" -cube(`users`, { - sql: ` - SELECT 1 AS id, 'Ali' AS name UNION ALL - SELECT 2 AS id, 'Bob' AS name UNION ALL - SELECT 3 AS id, 'Eve' AS name - `, - - measures: { - count: { - type: `count` - } - }, - - dimensions: { - id: { - sql: `id`, - type: `string`, - primary_key: true - } - } -}) - -cube(`companies`, { - sql: ` - SELECT 11 AS id, 'Acme Corporation' AS name UNION ALL - SELECT 12 AS id, 'Stark Industries' AS name - `, - - dimensions: { - id: { - sql: `id`, - type: `string`, - primary_key: true - }, - - name: { - sql: `name`, - type: `string` - } - } -}) - -cube(`companies_to_users`, { - sql: ` - SELECT 11 AS company_id, 1 AS user_id UNION ALL - SELECT 11 AS company_id, 2 AS user_id UNION ALL - SELECT 12 AS company_id, 3 AS user_id - `, - - joins: { - users: { - sql: `${CUBE}.user_id = ${users.id}`, - relationship: `one_to_many` - }, - - companies: { - sql: `${CUBE}.company_id = ${companies.id}`, - relationship: `one_to_many` - } - }, - - dimensions: { - id: { - // Joins require a primary key, so we'll create one on-the-fly - sql: `CONCAT(${CUBE}.user_id, ':', ${CUBE}.company_id)`, - type: `string`, - primary_key: true - } - } -}) -``` - - - -With this data model, querying `users.count` as a measure and `companies.name` -as a dimension would yield the following error: `Can't find join path to join -'users', 'companies'`. - -The root cause is that joins are [directed](#direction-of-joins) and there's no -join path that goes *by join definitions in the data model* from `users` to -`companies` or in the opposite direction. - -In [Visual Modeler][ref-visual-model], you can see that both `users` and `companies` -are to the right of `companies_to_users`, meaning that there's no way to go -from `users` to `companies` moving left to right or right to left: - - - - - -One possible solution is to move one of two joins from the `companies_to_users` -cube to either `users` or `companies` cube. Please note that it would affect -the query semantics and thus the final result: - - - -```yaml title="YAML" -cubes: - - name: users - - joins: - - name: companies_to_users - sql: "{CUBE}.id = {companies_to_users}.user_id" - relationship: one_to_many - - # ... - - - name: companies_to_users - - joins: - # - name: users - # sql: "{CUBE}.user_id = {users.id}" - # relationship: one_to_many - - - name: companies - sql: "{CUBE}.company_id = {companies.id}" - relationship: one_to_many - - # ... -``` - -```javascript title="JavaScript" -cube(`users`, { - joins: { - companies_to_users: { - sql: `${CUBE}.id = ${companies_to_users}.user_id`, - relationship: `one_to_many` - } - } - - // ... -}) - -cube(`companies_to_users`, { - joins: { - // users: { - // sql: `${CUBE}.user_id = ${users.id}`, - // relationship: `one_to_many` - // }, - - companies: { - sql: `${CUBE}.company_id = ${companies.id}`, - relationship: `one_to_many` - } - } - - // ... -}) -``` - - - -Now there's a join path that goes *by join definitions in the data model* from -`users` to `companies_to_users` to `companies`. [Visual Modeler][ref-visual-model] -also shows that you can reach `companies` from `cubes` by going left to right. - - - - - - -[ref-schema-ref-view]: /reference/data-modeling/view -[ref-schema-ref-joins-relationship]: /reference/data-modeling/joins#relationship -[ref-visual-model]: /docs/data-modeling/visual-modeler -[ref-cube]: /reference/data-modeling/cube -[ref-cube-public]: /reference/data-modeling/cube#public -[ref-rest-meta]: /reference/rest-api/reference#base_pathv1meta -[ref-preaggs]: /docs/data-modeling/concepts#pre-aggregations -[ref-primary-key]: /reference/data-modeling/dimensions#primary_key -[ref-data-model]: /docs/data-modeling/concepts -[ref-queries]: /reference/queries -[ref-references]: /docs/data-modeling/syntax#cubecolumn-cubemember -[ref-calculated-members]: /docs/data-modeling/concepts/calculated-members -[ref-views]: /docs/data-modeling/concepts#views -[ref-view-join-path]: /reference/data-modeling/view#join_path -[ref-preaggs]: /docs/data-modeling/concepts#pre-aggregations -[ref-rest-api]: /reference/rest-api -[ref-sql-api]: /reference/sql-api -[ref-sql-api-joins]: /reference/sql-api/joins -[ref-rest-api-join-hints]: /reference/rest-api/query-format#query-properties \ No newline at end of file diff --git a/docs-mintlify/docs/data-modeling/cubes.mdx b/docs-mintlify/docs/data-modeling/cubes.mdx new file mode 100644 index 0000000000000..45bc867e87835 --- /dev/null +++ b/docs-mintlify/docs/data-modeling/cubes.mdx @@ -0,0 +1,395 @@ +--- +title: Cubes +description: Cubes represent the tables in your database. Each cube maps to a table or query in your data source and contains measures, dimensions, joins, and pre-aggregations. +--- + +Cubes represent the tables in your database. Each cube maps to a single +table in your [data source][ref-data-sources] and contains the business +logic — [measures][ref-measures], [dimensions][ref-dimensions], +[joins][ref-joins], and [pre-aggregations][ref-pre-aggs] — that defines +how that data can be queried. + + + +See the [cube reference][ref-cube-reference] for the full list of +parameters and configuration options. + + + +## Defining a cube + +A cube points to a table in your data source using `sql_table`: + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders` +}) +``` + + + +You can also use the `sql` property for more complex queries: + + + +```yaml title="YAML" +cubes: + - name: orders + sql: | + SELECT * + FROM orders, line_items + WHERE orders.id = line_items.order_id +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql: ` + SELECT * + FROM orders, line_items + WHERE orders.id = line_items.order_id + ` +}) +``` + + + + + +If you're using dbt, see [this recipe][ref-cube-with-dbt] to streamline +defining cubes on top of dbt models. + + + +## Cube members + +Each cube contains definitions for its members: dimensions, measures, +and segments. + +### Dimensions + +[Dimensions][ref-dimensions] represent the properties of a single data +point — the attributes you group by and filter on, such as `status`, +`city`, or `created_at`: + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: status + sql: status + type: string + + - name: created_at + sql: created_at + type: time +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders`, + + dimensions: { + id: { + sql: `id`, + type: `number`, + primary_key: true + }, + + status: { + sql: `status`, + type: `string` + }, + + created_at: { + sql: `created_at`, + type: `time` + } + } +}) +``` + + + +Time dimensions enable grouping by granularity (year, quarter, month, +week, day, hour, minute, second) and are essential for +[partitioned pre-aggregations][ref-partition-preaggs]. + +### Measures + +[Measures][ref-measures] represent aggregated values over a set of data +points — counts, sums, averages, and custom calculations: + + + +```yaml title="YAML" +cubes: + - name: orders + # ... + + measures: + - name: count + type: count + + - name: total_amount + sql: amount + type: sum + + - name: average_amount + sql: amount + type: avg +``` + +```javascript title="JavaScript" +cube(`orders`, { + // ... + + measures: { + count: { + type: `count` + }, + + total_amount: { + sql: `amount`, + type: `sum` + }, + + average_amount: { + sql: `amount`, + type: `avg` + } + } +}) +``` + + + +Measures can reference other measures to create +[calculated measures][ref-calculated-measures], and you can apply +[filters][ref-measure-filters] to create filtered aggregations like +"count of completed orders." + +### Segments + +[Segments][ref-segments] are predefined filters on a cube. They allow +you to define commonly used filter logic once and reuse it across +queries: + + + +```yaml title="YAML" +cubes: + - name: orders + # ... + + segments: + - name: completed + sql: "{CUBE}.status = 'completed'" +``` + +```javascript title="JavaScript" +cube(`orders`, { + // ... + + segments: { + completed: { + sql: `${CUBE}.status = 'completed'` + } + } +}) +``` + + + +## Joins + +[Joins][ref-joins] define relationships between cubes, forming the data +graph that Cube uses to generate multi-table SQL queries: + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders + + joins: + - name: users + relationship: many_to_one + sql: "{CUBE}.user_id = {users.id}" + + - name: line_items + relationship: one_to_many + sql: "{CUBE}.id = {line_items.order_id}" +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders`, + + joins: { + users: { + relationship: `many_to_one`, + sql: `${CUBE}.user_id = ${users.id}` + }, + + line_items: { + relationship: `one_to_many`, + sql: `${CUBE}.id = ${line_items.order_id}` + } + } +}) +``` + + + +Cube supports `one_to_one`, `many_to_one`, and `one_to_many` relationship +types. See [working with joins][ref-working-with-joins] for advanced +patterns like cross-database joins and join direction control. + +## Pre-aggregations + +[Pre-aggregations][ref-pre-aggs] are materialized summaries of cube data +that dramatically speed up query execution. Cube automatically matches +incoming queries to the best available pre-aggregation: + + + +```yaml title="YAML" +cubes: + - name: orders + # ... + + pre_aggregations: + - name: main + measures: + - count + - total_amount + dimensions: + - status + time_dimension: created_at + granularity: day +``` + +```javascript title="JavaScript" +cube(`orders`, { + // ... + + pre_aggregations: { + main: { + measures: [count, total_amount], + dimensions: [status], + time_dimension: created_at, + granularity: `day` + } + } +}) +``` + + + +Pre-aggregations support [partitioning][ref-partition-preaggs] by time +and [incremental refreshes][ref-incremental-preaggs] to keep materialized +data up-to-date efficiently. + +## Designing effective cubes + +### One cube per entity + +Map each cube to a single business entity — `orders`, `users`, +`products`, `line_items`. Use [joins](#joins) to connect them rather than +creating wide cubes with data from multiple tables. + +### Keep naming consistent + +Use clear, consistent naming for members. Dimensions should describe +attributes (`status`, `city`, `created_at`), and measures should describe +aggregations (`count`, `total_revenue`, `average_order_value`). Add +[`description`][ref-cube-description] and [`title`][ref-cube-title] for +user-friendly display. + +### Control visibility + +Use [`public`][ref-cube-public] to hide cubes that should not be +directly queried by end-users. In most data models, cubes are internal +building blocks and [views][ref-views] are the public interface: + + + +```yaml title="YAML" +cubes: + - name: base_orders + public: false + sql_table: orders + + # ... +``` + +```javascript title="JavaScript" +cube(`base_orders`, { + public: false, + sql_table: `orders`, + + // ... +}) +``` + + + +### Scale with extension and polymorphism + +When cubes share common members, use [`extends`][ref-extending-cubes] to +avoid duplication. For data models with many similar entities, +[polymorphic cubes][ref-polymorphic-cubes] let you define a base cube +and specialize it per entity. + +## Next steps + +- See the [cube reference][ref-cube-reference] for the full list of + parameters +- Learn about [views][ref-views] to expose cubes to end-users +- Explore [calculated measures][ref-calculated-measures] for derived metrics +- Use the [Semantic Model IDE][ref-ide] to develop cubes interactively + +[wiki-view-sql]: https://en.wikipedia.org/wiki/View_(SQL) +[ref-data-sources]: /admin/connect-to-data +[ref-cube-reference]: /reference/data-modeling/cube +[ref-cube-description]: /reference/data-modeling/cube#description +[ref-cube-title]: /reference/data-modeling/cube#title +[ref-cube-public]: /reference/data-modeling/cube#public +[ref-measures]: /reference/data-modeling/measures +[ref-dimensions]: /reference/data-modeling/dimensions +[ref-segments]: /reference/data-modeling/segments +[ref-joins]: /reference/data-modeling/joins +[ref-pre-aggs]: /reference/data-modeling/pre-aggregations +[ref-views]: /docs/data-modeling/views +[ref-extending-cubes]: /docs/data-modeling/extending-cubes +[ref-polymorphic-cubes]: /recipes/data-modeling/polymorphic-cubes +[ref-dynamic-models]: /docs/data-modeling/dynamic +[ref-calculated-measures]: /docs/data-modeling/measures#calculated-measures +[ref-measure-filters]: /reference/data-modeling/measures#filters +[ref-working-with-joins]: /docs/data-modeling/joins +[ref-partition-preaggs]: /docs/pre-aggregations/matching-pre-aggregations#partitioning +[ref-incremental-preaggs]: /reference/data-modeling/pre-aggregations#incremental +[ref-cube-with-dbt]: /reference/data-modeling/cube-dbt +[ref-explore]: /analytics/explore +[ref-workbooks]: /analytics/workbooks +[ref-rest-api]: /reference/core-data-apis/rest-api +[ref-sql-api]: /reference/core-data-apis/sql-api +[ref-ide]: /docs/data-modeling/data-model-ide diff --git a/docs-mintlify/docs/data-modeling/dimensions.mdx b/docs-mintlify/docs/data-modeling/dimensions.mdx new file mode 100644 index 0000000000000..08e3d1ec1f613 --- /dev/null +++ b/docs-mintlify/docs/data-modeling/dimensions.mdx @@ -0,0 +1,432 @@ +--- +title: Dimensions +description: Dimensions are attributes that describe individual rows of data — the fields you group by and filter on, such as status, city, or created_at. +--- + +Dimensions represent attributes of individual rows in your data. They are +the fields you group by and filter on — things like `status`, `city`, +`product_name`, or `created_at`. Each dimension maps to a column or SQL +expression in your data source. + + + +See the [dimensions reference][ref-dimensions-ref] for the full list of +parameters and configuration options. + + + +## Defining dimensions + +A dimension specifies the SQL expression and its type: + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: status + sql: status + type: string + + - name: created_at + sql: created_at + type: time +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders`, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true }, + status: { sql: `status`, type: `string` }, + created_at: { sql: `created_at`, type: `time` } + } +}) +``` + + + +### Dimension types + +| Data type in SQL | Dimension type in Cube | +| --- | --- | +| `timestamp`, `date`, `time` | [`time`][ref-type] | +| `text`, `varchar` | [`string`][ref-type] | +| `integer`, `bigint`, `decimal` | [`number`][ref-type] | +| `boolean` | [`boolean`][ref-type] | + +### Primary keys + +Every cube that participates in [joins][ref-joins] should define a +[`primary_key`][ref-primary-key] dimension. Cube uses primary keys to avoid +fanouts — when rows get duplicated during joins and aggregates are +over-counted. Composite primary keys can be created by concatenating columns: + +```yaml +dimensions: + - name: composite_key + sql: "CONCAT({CUBE}.order_id, '-', {CUBE}.product_id)" + type: string + primary_key: true +``` + +## Time dimensions + +Time dimensions are dimensions of the [`time` type][ref-type]. They enable +grouping by time granularity (year, quarter, month, week, day, hour, minute, +second) and are essential for time-series analysis. + +```yaml +dimensions: + - name: created_at + sql: created_at + type: time +``` + +When queried, you can group by any built-in granularity without defining +additional dimensions. + +### Custom granularities + +You can define [custom granularities][ref-granularities] for time dimensions +when the built-in ones don't fit — for example, weeks starting on Sunday +or fiscal years: + + + +```yaml title="YAML" +cubes: + - name: orders + # ... + + dimensions: + - name: created_at + sql: created_at + type: time + granularities: + - name: sunday_week + interval: 1 week + offset: -1 day + + - name: fiscal_year + interval: 1 year + offset: 1 month +``` + +```javascript title="JavaScript" +cube(`orders`, { + // ... + + dimensions: { + created_at: { + sql: `created_at`, + type: `time`, + granularities: { + sunday_week: { interval: `1 week`, offset: `-1 day` }, + fiscal_year: { interval: `1 year`, offset: `1 month` } + } + } + } +}) +``` + + + +Time dimensions are essential for performance features like +[partitioned pre-aggregations][ref-partition-preaggs] and +[incremental refreshes][ref-incremental-preaggs]. + + + +See the following recipes: +- For a [custom granularity][ref-custom-granularity-recipe] example. +- For a [custom calendar][ref-custom-calendar-recipe] example. + + + +## Proxy dimensions + +Proxy dimensions reference dimensions from the same cube or other cubes, +providing a way to reuse existing definitions and reduce code duplication. + +### Within the same cube + +Reference existing dimensions to build derived ones without duplicating SQL: + + + +```yaml title="YAML" +cubes: + - name: users + sql_table: users + + dimensions: + - name: initials + sql: "SUBSTR(first_name, 1, 1)" + type: string + + - name: last_name + sql: "UPPER(last_name)" + type: string + + - name: full_name + sql: "{initials} || '. ' || {last_name}" + type: string +``` + +```javascript title="JavaScript" +cube(`users`, { + sql_table: `users`, + + dimensions: { + initials: { sql: `SUBSTR(first_name, 1, 1)`, type: `string` }, + last_name: { sql: `UPPER(last_name)`, type: `string` }, + full_name: { sql: `${initials} || '. ' || ${last_name}`, type: `string` } + } +}) +``` + + + +### From other cubes + +If cubes are [joined][ref-joins], you can bring a dimension from one cube +into another. Cube generates the necessary joins automatically: + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders + + joins: + - name: users + sql: "{CUBE}.user_id = {users.id}" + relationship: many_to_one + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: user_name + sql: "{users.name}" + type: string +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders`, + + joins: { + users: { + sql: `${CUBE}.user_id = ${users.id}`, + relationship: `many_to_one` + } + }, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true }, + user_name: { sql: `${users.name}`, type: `string` } + } +}) +``` + + + +### Time dimension granularity references + +When referencing a time dimension, you can specify a granularity to create +a proxy dimension at that specific granularity — including +[custom granularities](#custom-granularities): + +```yaml +dimensions: + - name: created_at + sql: created_at + type: time + granularities: + - name: sunday_week + interval: 1 week + offset: -1 day + + - name: created_at_year + sql: "{created_at.year}" + type: time + + - name: created_at_sunday_week + sql: "{created_at.sunday_week}" + type: time +``` + +## Subquery dimensions + +Subquery dimensions reference [measures][ref-measures-page] from other cubes, +effectively turning an aggregate into a per-row value. This enables nested +aggregations — for example, calculating the average of per-customer order counts. + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders + + joins: + - name: users + sql: "{users}.id = {CUBE}.user_id" + relationship: many_to_one + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + measures: + - name: count + type: count + + - name: users + sql_table: users + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: name + sql: name + type: string + + - name: order_count + sql: "{orders.count}" + type: number + sub_query: true + + measures: + - name: avg_order_count + sql: "{order_count}" + type: avg +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders`, + + joins: { + users: { + sql: `${users}.id = ${CUBE}.user_id`, + relationship: `many_to_one` + } + }, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true } + }, + + measures: { + count: { type: `count` } + } +}) + +cube(`users`, { + sql_table: `users`, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true }, + name: { sql: `name`, type: `string` }, + + order_count: { + sql: `${orders.count}`, + type: `number`, + sub_query: true + } + }, + + measures: { + avg_order_count: { + sql: `${order_count}`, + type: `avg` + } + } +}) +``` + + + +The `order_count` subquery dimension computes the order count per user. +The `avg_order_count` measure then averages those per-user values. Cube +implements this as a correlated subquery via joins for optimal performance. + + + +See the following recipes: +- How to calculate [nested aggregates][ref-nested-aggregates-recipe]. +- How to calculate [filtered aggregates][ref-filtered-aggregates-recipe]. + + + +## Hierarchies + +Dimensions can be organized into [hierarchies][ref-hierarchies] to define +drill-down paths (e.g., Country → State → City): + +```yaml +cubes: + - name: users + # ... + + dimensions: + - name: country + sql: country + type: string + + - name: state + sql: state + type: string + + - name: city + sql: city + type: string + + hierarchies: + - name: location + levels: + - country + - state + - city +``` + +## Next steps + +- See the [dimensions reference][ref-dimensions-ref] for all parameters +- Learn about [measures][ref-measures-page] for aggregated calculations +- Explore [custom granularities][ref-granularities] for fiscal calendars + and non-standard time periods + +[ref-dimensions-ref]: /reference/data-modeling/dimensions +[ref-measures-page]: /docs/data-modeling/measures +[ref-joins]: /docs/data-modeling/joins +[ref-type]: /reference/data-modeling/dimensions#type +[ref-primary-key]: /reference/data-modeling/dimensions#primary_key +[ref-granularities]: /reference/data-modeling/dimensions#granularities +[ref-hierarchies]: /reference/data-modeling/hierarchies +[ref-partition-preaggs]: /docs/pre-aggregations/matching-pre-aggregations#partitioning +[ref-incremental-preaggs]: /reference/data-modeling/pre-aggregations#incremental +[ref-custom-granularity-recipe]: /recipes/data-modeling/custom-granularity +[ref-custom-calendar-recipe]: /recipes/data-modeling/custom-calendar +[ref-nested-aggregates-recipe]: /recipes/data-modeling/nested-aggregates +[ref-filtered-aggregates-recipe]: /recipes/data-modeling/filtered-aggregates diff --git a/docs-mintlify/docs/data-modeling/concepts/code-reusability-extending-cubes.mdx b/docs-mintlify/docs/data-modeling/extending-cubes.mdx similarity index 96% rename from docs-mintlify/docs/data-modeling/concepts/code-reusability-extending-cubes.mdx rename to docs-mintlify/docs/data-modeling/extending-cubes.mdx index c3ac681f09ea2..fe1cdd83e1f86 100644 --- a/docs-mintlify/docs/data-modeling/concepts/code-reusability-extending-cubes.mdx +++ b/docs-mintlify/docs/data-modeling/extending-cubes.mdx @@ -1,6 +1,6 @@ --- -title: Extension -description: Uses extends on cubes and views to inherit and merge members from a parent so shared measures, dimensions, and joins stay defined once. +title: Extending cubes +description: Use extends on cubes to inherit and merge members from a parent so shared measures, dimensions, and joins stay defined once. --- The `extends` parameter, supported for [cubes][ref-cube-extends] and diff --git a/docs-mintlify/docs/data-modeling/joins.mdx b/docs-mintlify/docs/data-modeling/joins.mdx new file mode 100644 index 0000000000000..d26b06c3d881a --- /dev/null +++ b/docs-mintlify/docs/data-modeling/joins.mdx @@ -0,0 +1,542 @@ +--- +title: Joins +description: Joins define relationships between cubes, allowing Cube to automatically generate multi-table SQL queries when views combine data from multiple cubes. +--- + +Joins define how cubes connect to each other. When a [view][ref-views] +includes members from multiple cubes, Cube uses these relationships to +automatically generate SQL `JOIN` clauses — so end-users can explore data +across tables without writing SQL. + + + +See the [joins reference][ref-schema-ref-joins-relationship] for the full +list of parameters and configuration options. + + + +## Relationship types + +Cube supports three relationship types: `one_to_one`, `one_to_many`, and +`many_to_one`. The relationship type determines which table becomes the left +side of the `LEFT JOIN` in the generated SQL. + +Consider two cubes, `orders` and `customers`. An order belongs to one +customer, but a customer can have many orders: + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders + + joins: + - name: customers + relationship: many_to_one + sql: "{CUBE}.customer_id = {customers.id}" + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: status + sql: status + type: string + + measures: + - name: count + type: count + + - name: customers + sql_table: customers + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: company + sql: company + type: string +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders`, + + joins: { + customers: { + relationship: `many_to_one`, + sql: `${CUBE}.customer_id = ${customers.id}` + } + }, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true }, + status: { sql: `status`, type: `string` } + }, + + measures: { + count: { type: `count` } + } +}) + +cube(`customers`, { + sql_table: `customers`, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true }, + company: { sql: `company`, type: `string` } + } +}) +``` + + + +The `many_to_one` join on `orders` means: many orders belong to one customer. +When a view includes members from both cubes, Cube generates SQL with `orders` +on the left and `customers` on the right: + +```sql +SELECT + "orders".status, + "customers".company, + COUNT("orders".id) +FROM orders AS "orders" +LEFT JOIN customers AS "customers" + ON "orders".customer_id = "customers".id +GROUP BY 1, 2 +``` + +Because `orders` is on the left side of the `LEFT JOIN`, all orders are +preserved — including guest checkouts with no matching customer. + + + +As a rule of thumb, define joins on the **fact table** (e.g., `orders`) +pointing toward the **dimension table** (e.g., `customers`) using +`many_to_one`. This ensures the fact table is always the base of the query, +preserving all its rows. + + + +### Many-to-many relationships + +A many-to-many relationship requires an associative (junction) table. For +example, `posts` and `topics` are connected through a `post_topics` table: + + + Many-to-Many Entity Diagram for posts, topics and post_topics + + +Model this with an associative cube, chaining the joins so they flow in one +direction (`posts → post_topics → topics`): + + + +```yaml title="YAML" +cubes: + - name: posts + sql_table: posts + + joins: + - name: post_topics + relationship: one_to_many + sql: "{CUBE}.id = {post_topics.post_id}" + + - name: post_topics + sql_table: post_topics + + joins: + - name: topics + relationship: many_to_one + sql: "{CUBE}.topic_id = {topics.id}" + + dimensions: + - name: id + sql: "CONCAT({CUBE}.post_id, {CUBE}.topic_id)" + type: string + primary_key: true + + - name: topics + sql_table: topics + + dimensions: + - name: id + sql: id + type: string + primary_key: true + + - name: name + sql: name + type: string +``` + +```javascript title="JavaScript" +cube(`posts`, { + sql_table: `posts`, + + joins: { + post_topics: { + relationship: `one_to_many`, + sql: `${CUBE}.id = ${post_topics.post_id}` + } + } +}) + +cube(`post_topics`, { + sql_table: `post_topics`, + + joins: { + topics: { + relationship: `many_to_one`, + sql: `${CUBE}.topic_id = ${topics.id}` + } + }, + + dimensions: { + id: { + sql: `CONCAT(${CUBE}.post_id, ${CUBE}.topic_id)`, + type: `string`, + primary_key: true + } + } +}) + +cube(`topics`, { + sql_table: `topics`, + + dimensions: { + id: { sql: `id`, type: `string`, primary_key: true }, + name: { sql: `name`, type: `string` } + } +}) +``` + + + +A view can then expose this through the `join_path`: + +```yaml +views: + - name: posts_with_topics + cubes: + - join_path: posts + includes: + - title + - count + + - join_path: posts.post_topics.topics + prefix: true + includes: + - name +``` + +## Direction of joins + +**All joins are directed.** They flow from the source cube (where the join +is defined) to the target cube (the one referenced). Cube places the source +cube on the left side of the `LEFT JOIN` and the target on the right. + +This matters because the left table preserves all its rows, while the right +table contributes matching rows or `NULL`. The direction you choose affects +which records appear in the result set. + +For example, if `orders` defines a `many_to_one` join to `customers`: +- `orders` is the base → all orders are preserved, even guest checkouts +- `customers` without orders won't appear + +If instead `customers` defined a `one_to_many` join to `orders`: +- `customers` is the base → all customers are preserved, even those without orders +- Guest checkout orders (with no matching customer) won't appear + +### Using views to control direction + +Views let you control which join path is followed via the +[`join_path`][ref-view-join-path] parameter. This is the recommended way to +handle cases where you need different join directions for different use cases: + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders + + joins: + - name: customers + sql: "{CUBE}.customer_id = {customers.id}" + relationship: many_to_one + + measures: + - name: count + type: count + + - name: total_revenue + sql: revenue + type: sum + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: customers + sql_table: customers + + joins: + - name: orders + sql: "{CUBE}.id = {orders.customer_id}" + relationship: one_to_many + + measures: + - name: count + type: count + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: name + sql: name + type: string +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders`, + + joins: { + customers: { + sql: `${CUBE}.customer_id = ${customers.id}`, + relationship: `many_to_one` + } + }, + + measures: { + count: { type: `count` }, + total_revenue: { sql: `revenue`, type: `sum` } + }, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true } + } +}) + +cube(`customers`, { + sql_table: `customers`, + + joins: { + orders: { + sql: `${CUBE}.id = ${orders.customer_id}`, + relationship: `one_to_many` + } + }, + + measures: { + count: { type: `count` } + }, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true }, + name: { sql: `name`, type: `string` } + } +}) +``` + + + +Now you can create two views for two different analytical needs: + + + +```yaml title="YAML" +views: + - name: revenue_per_customer + description: All orders with customer details. Includes guest checkouts. + cubes: + - join_path: orders + includes: + - count + - total_revenue + + - join_path: orders.customers + includes: + - name + + - name: customer_activity + description: All customers with their order activity. Includes customers without orders. + cubes: + - join_path: customers + includes: + - name + - count + + - join_path: customers.orders + prefix: true + includes: + - count + - total_revenue +``` + +```javascript title="JavaScript" +view(`revenue_per_customer`, { + description: `All orders with customer details. Includes guest checkouts.`, + cubes: [ + { + join_path: orders, + includes: [`count`, `total_revenue`] + }, + { + join_path: orders.customers, + includes: [`name`] + } + ] +}) + +view(`customer_activity`, { + description: `All customers with their order activity. Includes customers without orders.`, + cubes: [ + { + join_path: customers, + includes: [`name`, `count`] + }, + { + join_path: customers.orders, + prefix: true, + includes: [`count`, `total_revenue`] + } + ] +}) +``` + + + +The `revenue_per_customer` view follows the `orders → customers` path, so all +orders are preserved. The `customer_activity` view follows +`customers → orders`, so all customers are preserved. + +## Diamond subgraphs + +A _diamond subgraph_ occurs when there's more than one join path between two +cubes — for example, `users.schools.countries` and +`users.employers.countries`. This can lead to ambiguous query generation. + +Views resolve this ambiguity by specifying the exact `join_path` for each +included cube. For example, if cube `a` joins to both `b` and `c`, and both +`b` and `c` join to `d`, a view can specify which path to follow: + +```yaml +views: + - name: a_with_d_via_b + cubes: + - join_path: a + includes: "*" + + - join_path: a.b.d + prefix: true + includes: + - value + + - name: a_with_d_via_c + cubes: + - join_path: a + includes: "*" + + - join_path: a.c.d + prefix: true + includes: + - value +``` + +Each view follows a specific, unambiguous path through the data graph. + +## Join paths in calculated members + +When referencing a member of another cube in a [calculated member][ref-calculated-members], +you can use a join path to specify the exact route. This uses dot-separated +cube names: + + + +```yaml title="YAML" +cubes: + - name: orders + # ... + + dimensions: + - name: customer_country + sql: "{customers.country}" + type: string + + - name: shipping_country + sql: "{shipping_addresses.country}" + type: string +``` + +```javascript title="JavaScript" +cube(`orders`, { + // ... + + dimensions: { + customer_country: { + sql: `${customers.country}`, + type: `string` + }, + + shipping_country: { + sql: `${shipping_addresses.country}`, + type: `string` + } + } +}) +``` + + + +## Troubleshooting + +### `Can't find join path` + +The error `Can't find join path to join 'cube_a', 'cube_b'` means the cubes +included in a view or query can't be connected through the defined joins. + +Check that: +- Joins are defined with the correct [direction](#direction-of-joins) +- There is a continuous path from the source cube to the target cube +- You're using the [`join_path`][ref-view-join-path] parameter in views to + specify the exact path + +### `Primary key is required when join is defined` + +Cube uses primary keys to avoid fanouts — when rows get duplicated during +joins and aggregates are over-counted. Define a [primary key][ref-primary-key] +dimension in every cube that participates in joins. + +If your data doesn't have a natural primary key, create a composite one: + +```yaml +cubes: + - name: events + # ... + + dimensions: + - name: composite_key + sql: CONCAT(column_a, '-', column_b, '-', column_c) + type: string + primary_key: true +``` + +[ref-schema-ref-joins-relationship]: /reference/data-modeling/joins +[ref-views]: /docs/data-modeling/views +[ref-view-join-path]: /reference/data-modeling/view#join_path +[ref-calculated-members]: /docs/data-modeling/measures#calculated-measures +[ref-primary-key]: /reference/data-modeling/dimensions#primary_key +[ref-visual-model]: /docs/data-modeling/visual-modeler diff --git a/docs-mintlify/docs/data-modeling/measures.mdx b/docs-mintlify/docs/data-modeling/measures.mdx new file mode 100644 index 0000000000000..0a443613af3bb --- /dev/null +++ b/docs-mintlify/docs/data-modeling/measures.mdx @@ -0,0 +1,434 @@ +--- +title: Measures +description: Measures compute aggregated values across rows — counts, sums, averages, and more complex calculations like rolling windows, time shifts, and rankings. +--- + +While [dimensions][ref-dimensions-page] describe attributes of individual rows, +measures compute values across rows — sums, counts, averages, and other +aggregations. Measures can aggregate columns directly (like `sum of revenue`) +or reference other measures to create compound metrics (like `revenue / count`). + + + +See the [measures reference][ref-measures-ref] for the full list of parameters +and configuration options. + + + +## Defining measures + +A measure specifies the SQL expression to aggregate and the aggregation type: + + + +```yaml title="YAML" +cubes: + - name: orders + sql_table: orders + + measures: + - name: count + type: count + + - name: total_amount + sql: amount + type: sum + + - name: average_amount + sql: amount + type: avg +``` + +```javascript title="JavaScript" +cube(`orders`, { + sql_table: `orders`, + + measures: { + count: { type: `count` }, + total_amount: { sql: `amount`, type: `sum` }, + average_amount: { sql: `amount`, type: `avg` } + } +}) +``` + + + +## Filtered measures + +You can apply [filters][ref-filters] to a measure to create conditional +aggregations. Only rows matching the filter are included: + + + +```yaml title="YAML" +cubes: + - name: orders + # ... + + measures: + - name: count + type: count + + - name: completed_count + type: count + filters: + - sql: "{CUBE}.status = 'completed'" +``` + +```javascript title="JavaScript" +cube(`orders`, { + // ... + + measures: { + count: { type: `count` }, + + completed_count: { + type: `count`, + filters: [{ sql: `${CUBE}.status = 'completed'` }] + } + } +}) +``` + + + +When `completed_count` is queried, Cube generates SQL with a `CASE` expression: + +```sql +SELECT + COUNT(CASE WHEN (orders.status = 'completed') THEN 1 END) AS completed_count +FROM orders +``` + +## Calculated measures + +Calculated measures perform calculations on other measures using SQL functions +and operators. They provide a way to decompose complex metrics (e.g., ratios +or percents) into formulas involving simpler measures. + +### Referencing measures in the same cube + + + +```yaml title="YAML" +cubes: + - name: orders + # ... + + measures: + - name: count + type: count + + - name: completed_count + type: count + filters: + - sql: "{CUBE}.status = 'completed'" + + - name: completed_ratio + sql: "1.0 * {completed_count} / NULLIF({count}, 0)" + type: number +``` + +```javascript title="JavaScript" +cube(`orders`, { + // ... + + measures: { + count: { type: `count` }, + + completed_count: { + type: `count`, + filters: [{ sql: `${CUBE}.status = 'completed'` }] + }, + + completed_ratio: { + sql: `1.0 * ${completed_count} / NULLIF(${count}, 0)`, + type: `number` + } + } +}) +``` + + + +### Referencing measures from other cubes + +If cubes are [joined][ref-joins], you can reference measures across cubes. +Cube generates the necessary joins automatically: + + + +```yaml title="YAML" +cubes: + - name: users + # ... + + joins: + - name: orders + sql: "{CUBE}.id = {orders}.user_id" + relationship: one_to_many + + measures: + - name: count + type: count + + - name: purchases_to_users_ratio + sql: "1.0 * {orders.purchases} / NULLIF({CUBE.count}, 0)" + type: number +``` + +```javascript title="JavaScript" +cube(`users`, { + // ... + + joins: { + orders: { + sql: `${CUBE}.id = ${orders}.user_id`, + relationship: `one_to_many` + } + }, + + measures: { + count: { type: `count` }, + + purchases_to_users_ratio: { + sql: `1.0 * ${orders.purchases} / NULLIF(${CUBE.count}, 0)`, + type: `number` + } + } +}) +``` + + + +## Multi-stage measures + +Multi-stage measures are calculated in two or more stages, enabling +calculations on already-aggregated data. Each stage results in one or more +CTEs in the generated SQL query. + + + +Multi-stage measures are powered by Tesseract, the [next-generation data +modeling engine][link-tesseract]. Tesseract is currently in preview. Use the +[`CUBEJS_TESSERACT_SQL_PLANNER`][ref-tesseract-env] environment variable to +enable it. + + + +### Rolling windows + +Rolling window measures calculate metrics over a moving window of time, such +as cumulative counts or moving averages. Use the +[`rolling_window`][ref-rolling-window] parameter: + +```yaml +measures: + - name: cumulative_count + type: count + rolling_window: + trailing: unbounded + + - name: trailing_month_count + sql: id + type: count + rolling_window: + trailing: 1 month +``` + +### Period-to-date + +Period-to-date measures analyze data from the start of a period to the current +date — year-to-date (YTD), quarter-to-date (QTD), or month-to-date (MTD): + +```yaml +measures: + - name: revenue_ytd + sql: revenue + type: sum + rolling_window: + type: to_date + granularity: year + + - name: revenue_qtd + sql: revenue + type: sum + rolling_window: + type: to_date + granularity: quarter +``` + +### Time shift + +Time-shift measures calculate the value of another measure at a different +point in time, typically for period-over-period comparisons like +year-over-year growth. Use the [`time_shift`][ref-time-shift] parameter: + +```yaml +measures: + - name: revenue + sql: revenue + type: sum + + - name: revenue_prior_year + multi_stage: true + sql: "{revenue}" + type: number + time_shift: + - interval: 1 year + type: prior +``` + +You can combine time shift with period-to-date for comparisons like +"this year's YTD vs. last year's YTD": + +```yaml +measures: + - name: revenue_ytd + sql: revenue + type: sum + rolling_window: + type: to_date + granularity: year + + - name: revenue_prior_year_ytd + multi_stage: true + sql: "{revenue_ytd}" + type: number + time_shift: + - time_dimension: time + interval: 1 year + type: prior +``` + +Time-shift measures can also be used with [calendar cubes][ref-calendar-cubes] +to customize how time-shifting works, e.g., to shift by retail calendar +periods. + +### Percent of total (fixed dimension) + +Use the [`group_by`][ref-group-by] parameter to fix the inner aggregation to +specific dimensions, enabling percent-of-total calculations: + +```yaml +measures: + - name: revenue + sql: revenue + type: sum + + - name: country_revenue + multi_stage: true + sql: "{revenue}" + type: sum + group_by: + - country + + - name: country_revenue_percentage + multi_stage: true + sql: "{revenue} / NULLIF({country_revenue}, 0)" + type: number +``` + +### Nested aggregates + +Use the [`add_group_by`][ref-add-group-by] parameter to compute an aggregate +of an aggregate, e.g., the average of per-customer averages: + +```yaml +measures: + - name: avg_order_value + sql: amount + type: avg + + - name: avg_customer_order_value + multi_stage: true + sql: "{avg_order_value}" + type: avg + add_group_by: + - customer_id +``` + +### Ranking + +Use the [`reduce_by`][ref-reduce-by] parameter to rank items within groups: + +```yaml +measures: + - name: revenue + sql: revenue + type: sum + + - name: product_rank + multi_stage: true + order_by: + - sql: "{revenue}" + dir: asc + reduce_by: + - product + type: rank +``` + +### Conditional measures + +Conditional measures depend on the value of a dimension, using the +[`case`][ref-case] parameter with [`switch` dimensions][ref-switch-dim]: + +```yaml +measures: + - name: amount_in_currency + multi_stage: true + case: + switch: "{CUBE.currency}" + when: + - value: EUR + sql: "{CUBE.amount_eur}" + - value: GBP + sql: "{CUBE.amount_gbp}" + else: + sql: "{CUBE.amount_usd}" + type: number +``` + +## Formatting + +Use the [`format`][ref-format] parameter to control how measures are displayed: + +```yaml +measures: + - name: total_revenue + sql: revenue + type: sum + format: currency + + - name: conversion_rate + sql: "1.0 * {completed_count} / NULLIF({count}, 0)" + type: number + format: percent +``` + +## Next steps + +- See the [measures reference][ref-measures-ref] for all parameters +- Learn about [dimensions][ref-dimensions-page] for grouping and filtering +- Explore [pre-aggregations][ref-pre-aggs] to accelerate measure queries +- See the [period-over-period recipe][ref-pop-recipe] for advanced time + comparisons + +[ref-measures-ref]: /reference/data-modeling/measures +[ref-dimensions-page]: /docs/data-modeling/dimensions +[ref-joins]: /docs/data-modeling/joins +[ref-pre-aggs]: /reference/data-modeling/pre-aggregations +[ref-type]: /reference/data-modeling/measures#type +[ref-filters]: /reference/data-modeling/measures#filters +[ref-format]: /reference/data-modeling/measures#format +[ref-rolling-window]: /reference/data-modeling/measures#rolling_window +[ref-time-shift]: /reference/data-modeling/measures#time_shift +[ref-group-by]: /reference/data-modeling/measures#group_by +[ref-reduce-by]: /reference/data-modeling/measures#reduce_by +[ref-add-group-by]: /reference/data-modeling/measures#add_group_by +[ref-case]: /reference/data-modeling/measures#case +[ref-switch-dim]: /reference/data-modeling/dimensions#type +[ref-tesseract-env]: /reference/configuration/environment-variables#cubejs_tesseract_sql_planner +[ref-calendar-cubes]: /docs/data-modeling/concepts/calendar-cubes +[ref-pop-recipe]: /recipes/data-modeling/period-over-period +[link-tesseract]: https://cube.dev/blog/introducing-next-generation-data-modeling-engine diff --git a/docs-mintlify/docs/data-modeling/multi-fact-views.mdx b/docs-mintlify/docs/data-modeling/multi-fact-views.mdx new file mode 100644 index 0000000000000..bd9634615ab87 --- /dev/null +++ b/docs-mintlify/docs/data-modeling/multi-fact-views.mdx @@ -0,0 +1,421 @@ +--- +title: Multi-fact views +description: Analyze data across multiple fact tables that share common dimensions like time or customers, without row multiplication or manual workarounds. +--- + +In many data models, you have multiple fact tables that share common +dimensions but have no direct relationship to each other. For example, +an e-commerce company tracks both orders and returns: + +- **`orders`** — one row per order, with `customer_id` and `created_at` +- **`returns`** — one row per return, with `customer_id` and `created_at` +- **`customers`** — one row per customer +- **`dates`** — a date spine + +Both `orders` and `returns` join to `customers` and `dates`, but they don't +join to each other: + +``` + customers + / \ + orders returns + \ / + dates +``` + +You need a report showing `orders_count`, `total_revenue`, `returns_count`, +and `total_refunds` grouped by customer and month. But joining `orders` and +`returns` directly would produce a cross product — every order matched with +every return for that customer and date — inflating all counts and sums. + +## How multi-fact views solve this + +In a regular [view][ref-views], there is a single **root cube** — the first +cube listed in the view's `cubes` array. All joins flow from this root, and +Cube uses it as the base table in the generated SQL. + +Multi-fact views work differently. When a view includes measures from +**multiple fact tables**, Cube selects the root dynamically at query time +based on which measures are requested. Each fact table gets its own +aggregating subquery, and the results are joined on the shared dimensions. +No fanout, no manual workarounds. + + + +Multi-fact views are powered by Tesseract, the [next-generation data modeling +engine][link-tesseract]. Tesseract is currently in preview. Use the +[`CUBEJS_TESSERACT_SQL_PLANNER`][ref-tesseract-env] environment variable to +enable it. + + + +## How to model it + +### 1. Define the cubes + +Each fact table becomes a cube with explicit joins to the shared dimension +tables: + + + +```yaml title="YAML" +cubes: + - name: customers + sql_table: customers + + dimensions: + - name: id + type: number + sql: id + primary_key: true + - name: name + type: string + sql: name + - name: city + type: string + sql: city + + - name: dates + sql_table: dates + + dimensions: + - name: date + type: time + sql: date + primary_key: true + + - name: orders + sql_table: orders + + joins: + - name: customers + relationship: many_to_one + sql: "{orders}.customer_id = {customers.id}" + - name: dates + relationship: many_to_one + sql: "DATE_TRUNC('day', {orders}.created_at) = {dates.date}" + + dimensions: + - name: id + type: number + sql: id + primary_key: true + - name: status + type: string + sql: status + + measures: + - name: count + type: count + - name: total_amount + type: sum + sql: amount + + - name: returns + sql_table: returns + + joins: + - name: customers + relationship: many_to_one + sql: "{returns}.customer_id = {customers.id}" + - name: dates + relationship: many_to_one + sql: "DATE_TRUNC('day', {returns}.created_at) = {dates.date}" + + dimensions: + - name: id + type: number + sql: id + primary_key: true + + measures: + - name: count + type: count + - name: total_refund + type: sum + sql: refund_amount +``` + +```javascript title="JavaScript" +cube(`customers`, { + sql_table: `customers`, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true }, + name: { sql: `name`, type: `string` }, + city: { sql: `city`, type: `string` } + } +}) + +cube(`dates`, { + sql_table: `dates`, + + dimensions: { + date: { sql: `date`, type: `time`, primary_key: true } + } +}) + +cube(`orders`, { + sql_table: `orders`, + + joins: { + customers: { + relationship: `many_to_one`, + sql: `${orders}.customer_id = ${customers.id}` + }, + dates: { + relationship: `many_to_one`, + sql: `DATE_TRUNC('day', ${orders}.created_at) = ${dates.date}` + } + }, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true }, + status: { sql: `status`, type: `string` } + }, + + measures: { + count: { type: `count` }, + total_amount: { sql: `amount`, type: `sum` } + } +}) + +cube(`returns`, { + sql_table: `returns`, + + joins: { + customers: { + relationship: `many_to_one`, + sql: `${returns}.customer_id = ${customers.id}` + }, + dates: { + relationship: `many_to_one`, + sql: `DATE_TRUNC('day', ${returns}.created_at) = ${dates.date}` + } + }, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true } + }, + + measures: { + count: { type: `count` }, + total_refund: { sql: `refund_amount`, type: `sum` } + } +}) +``` + + + +The critical detail: both `orders` and `returns` declare direct joins to +`customers` and `dates`. This tells Cube that these dimension tables are shared +between the two facts. + +### 2. Create a view + +The view brings both fact tables and the shared dimension tables together. +Dimension tables are included at root-level join paths (not nested under a +specific fact), which makes their dimensions common to both facts. Use +`prefix` to disambiguate identically named members across fact cubes: + + + +```yaml title="YAML" +views: + - name: customer_overview + cubes: + - join_path: orders + prefix: true + includes: + - count + - total_amount + - join_path: returns + prefix: true + includes: + - count + - total_refund + - join_path: customers + includes: + - name + - city + - join_path: dates + includes: + - date +``` + +```javascript title="JavaScript" +view(`customer_overview`, { + cubes: [ + { + join_path: orders, + prefix: true, + includes: [`count`, `total_amount`] + }, + { + join_path: returns, + prefix: true, + includes: [`count`, `total_refund`] + }, + { + join_path: customers, + includes: [`name`, `city`] + }, + { + join_path: dates, + includes: [`date`] + } + ] +}) +``` + + + +When you query `orders_count`, `orders_total_amount`, `returns_count`, and +`returns_total_refund` grouped by `name`, `city`, and `date`, Cube detects +the two separate fact roots and automatically executes a multi-fact query. + +## What Cube does under the hood + +Cube executes the query in three stages: + +### 1. Separate aggregating subqueries + +Each fact table gets its own independent subquery that joins only the tables +it needs, applies relevant filters, and aggregates by the common dimensions: + +- **Subquery 1** (orders): joins `orders` → `customers` and `orders` → `dates`, + computes `COUNT(*)` and `SUM(amount)`, grouped by `name`, `city`, `date` +- **Subquery 2** (returns): joins `returns` → `customers` and `returns` → `dates`, + computes `COUNT(*)` and `SUM(refund_amount)`, grouped by `name`, `city`, `date` + +### 2. Join on common dimensions + +The subquery results are joined with `FULL JOIN` on all common dimension +columns (`name`, `city`, `date`). This preserves rows that exist in only one +fact table — a customer who placed orders but never returned anything still +appears in the results. + +### 3. Final result + +The combined result shows measures from each fact table side by side: + +| name | city | date | orders_count | orders_total_amount | returns_count | returns_total_refund | +| --- | --- | --- | --- | --- | --- | --- | +| Alice | New York | 2025-01-15 | 2 | 200.00 | 0 | NULL | +| Alice | New York | 2025-02-10 | 2 | 225.00 | 1 | 100.00 | +| Bob | Seattle | 2025-01-20 | 3 | 550.00 | 2 | 130.00 | +| Charlie | New York | 2025-02-05 | 0 | NULL | 2 | 100.00 | +| Diana | Boston | 2025-03-01 | 1 | 400.00 | 0 | NULL | + +Charlie has no orders and Diana has no returns — both are still included +with `NULL` values for the missing fact table. + +## Common patterns + +### Time as the shared dimension + +The most common multi-fact pattern uses time as the shared dimension. +For example, you might have `page_views`, `signups`, and `purchases` that all +have timestamps but no direct relationship. By joining each to a shared +`dates` cube, you can analyze conversion funnels — page views vs. signups +vs. purchases by day — without any row multiplication. + +### More than two fact tables + +Multi-fact queries are not limited to two fact tables. If a view includes +three or more facts, each gets its own aggregating subquery, and all results +are joined on the common dimensions. + +### Facts that don't share all dimensions + +Every root fact table must be joinable to the **same set of common dimension +tables**. If a fact table doesn't naturally have a foreign key for one of the +common dimensions, you can create a synthetic join: + + + +```yaml title="YAML" +cubes: + - name: refunds + sql: > + SELECT *, NULL AS customer_id FROM refunds + joins: + - name: customers + relationship: many_to_one + sql: "{refunds}.customer_id = {customers.id}" + - name: dates + relationship: many_to_one + sql: "DATE_TRUNC('day', {refunds}.created_at) = {dates.date}" + + dimensions: + - name: id + type: number + sql: id + primary_key: true + + measures: + - name: count + type: count + - name: total_amount + type: sum + sql: amount +``` + +```javascript title="JavaScript" +cube(`refunds`, { + sql: `SELECT *, NULL AS customer_id FROM refunds`, + + joins: { + customers: { + relationship: `many_to_one`, + sql: `${refunds}.customer_id = ${customers.id}` + }, + dates: { + relationship: `many_to_one`, + sql: `DATE_TRUNC('day', ${refunds}.created_at) = ${dates.date}` + } + }, + + dimensions: { + id: { sql: `id`, type: `number`, primary_key: true } + }, + + measures: { + count: { type: `count` }, + total_amount: { sql: `amount`, type: `sum` } + } +}) +``` + + + +The `NULL AS customer_id` makes the join syntactically valid. Refund rows +won't match a specific customer, but the subquery can still participate in +the multi-fact join on the full set of common dimensions. + +## Filters and segments + +**Common dimension filters** (like `city = 'New York'` or `date > '2025-01-01'`) +are applied to every subquery, ensuring consistent filtering across all facts. + +**Fact-specific filters** (like `orders.status = 'completed'`) are applied only +to that fact's subquery. Other fact subqueries remain unaffected. + +**Measure filters** (like `orders_count > 1`) are applied as `HAVING` +conditions after the subqueries are joined. + +[Segments][ref-segments] that belong to a specific fact table are applied only +to that fact's subquery. + +## Join path requirements + +- Each fact cube must declare **direct joins** to all shared dimension tables +- Dimension tables should be included in the view at **root-level join paths**, + not nested under a specific fact (e.g., `customers`, not `orders.customers`) +- Use `prefix` on fact cubes to disambiguate identically named members + +[ref-views]: /docs/data-modeling/views +[ref-view-ref]: /reference/data-modeling/view +[ref-segments]: /reference/data-modeling/segments +[ref-tesseract-env]: /reference/configuration/environment-variables#cubejs_tesseract_sql_planner +[link-tesseract]: https://cube.dev/blog/introducing-tesseract diff --git a/docs-mintlify/docs/data-modeling/overview.mdx b/docs-mintlify/docs/data-modeling/overview.mdx index 80da010e3109a..37149749e6355 100644 --- a/docs-mintlify/docs/data-modeling/overview.mdx +++ b/docs-mintlify/docs/data-modeling/overview.mdx @@ -3,23 +3,10 @@ title: Getting started description: Build a reusable semantic layer that provides the shared context for AI agents, BI dashboards, and embedded analytics — turning warehouse tables into governed metrics and dimensions. --- -The data model is used to transform raw data into meaningful business -definitions and pre-aggregate data for optimal results. The data model is -exposed through a [rich set of APIs][ref-apis] that allows end-users to -run a wide variety of analytical queries without modifying the data model -itself. - - - -You can explore a carefully crafted sample data model if you create a [demo -deployment][ref-demo-deployment] in Cube Cloud. - - - Let’s use a users table with the following columns as an example: | id | paying | city | company_name | @@ -42,8 +29,8 @@ allows building well-organized and reusable SQL. ## 1. Creating a Cube -In Cube, [cubes][ref-schema-cube] are used to organize entities and connections -between entities. Usually one cube is created for each table in the database, +In Cube, [cubes][ref-schema-cube] are used to organize tables and connections +between tables. Usually one cube is created for each table in the database, such as `users`, `orders`, `products`, etc. In the `sql_table` parameter of the cube we define a base table for this cube. In our case, the base table is simply our `users` table. @@ -349,7 +336,7 @@ model. [ref-backend-query-format]: /reference/rest-api/query-format [ref-demo-deployment]: /docs/deployment/cloud/deployments#demo-deployments [ref-apis]: /reference -[ref-calculated-measures]: /docs/data-modeling/concepts/calculated-members#calculated-measures +[ref-calculated-measures]: /docs/data-modeling/measures#calculated-measures [ref-views]: /reference/data-modeling/view [ref-explore]: /analytics/explore [ref-workbooks]: /analytics/workbooks \ No newline at end of file diff --git a/docs-mintlify/docs/data-modeling/views.mdx b/docs-mintlify/docs/data-modeling/views.mdx new file mode 100644 index 0000000000000..30dbc77f86aa6 --- /dev/null +++ b/docs-mintlify/docs/data-modeling/views.mdx @@ -0,0 +1,449 @@ +--- +title: Views +description: Views are curated datasets that sit on top of cubes and create a user-friendly facade of your data model for downstream consumers, AI agents, and embedded analytics. +--- + +Views sit on top of the data graph of [cubes][ref-cubes] and create a facade +of your whole data model with which data consumers can interact. They bring +together relevant measures, dimensions, and join paths into a logical +structure that matches how business users think about their data. + + + + + + + +See the [view reference][ref-view-reference] for the full list of +parameters and configuration options. + + + +## Why views matter + +Views are the primary interface between your data model and your users. +While cubes model the raw relationships and logic in your warehouse, views +reshape that model into business-friendly datasets for easier exploration. + + + + Views shield end-users from complex database schemas, table + relationships, and raw SQL. Business users can pick fields from + a curated dataset in [Explore][ref-explore] or + [Workbooks][ref-workbooks] without needing to understand the joins + or cube structure underneath. + + For example, an analyst could pick `product`, `total_amount`, and + `users_city` from an `orders` view without thinking about the underlying + join path from `base_orders` through `line_items` to `products`. + + + + [AI agents][ref-ai-context] query your data model through views. + By curating which members are included and providing descriptive + metadata via `description` and `meta.ai_context`, you control the + context AI uses to generate accurate queries. Well-designed views + with clear naming and descriptions lead to significantly better + AI results. + + + + Views give you fine-grained control over what users can see. + Each view can be scoped with [access policies][ref-access-policies] + to enforce row-level and member-level security. You can also set + `public: false` to hide internal views or use + [COMPILE_CONTEXT][ref-compile-context] for dynamic visibility + based on the security context. + + + + In complex data models, the same pair of cubes might be reachable + through multiple join paths. Views eliminate this ambiguity by + specifying the exact `join_path` for each included cube, ensuring + queries always follow the intended path. + + + + Views are a natural fit for [embedded analytics][ref-embedding]. + Different customer tiers can get access to different views, + allowing you to tailor the analytics experience to your + monetization strategy without duplicating cubes. + + + +## How views work + +Views do **not** define their own members. Instead, they reference cubes by +specific join paths and selectively include measures, dimensions, and +segments from those cubes. + + + +```yaml title="YAML" +views: + - name: orders + + cubes: + - join_path: base_orders + includes: + - status + - created_date + - total_amount + - count + - average_order_value + + - join_path: base_orders.line_items.products + includes: + - name: name + alias: product + + - join_path: base_orders.users + prefix: true + includes: "*" + excludes: + - company +``` + +```javascript title="JavaScript" +view(`orders`, { + cubes: [ + { + join_path: base_orders, + includes: [ + `status`, + `created_date`, + `total_amount`, + `count`, + `average_order_value` + ] + }, + { + join_path: base_orders.line_items.products, + includes: [ + { + name: `name`, + alias: `product` + } + ] + }, + { + join_path: base_orders.users, + prefix: true, + includes: `*`, + excludes: [`company`] + } + ] +}) +``` + + + +In this example, the `orders` view pulls in members from three cubes +along their join paths. End-users see a flat list of fields — `status`, +`created_date`, `product`, `users_city`, etc. — without being exposed to +the underlying cube structure. + +## Designing effective views + +### Build for your audience + +Design views around how your business users think about data, not around +how your database is structured. Group related fields into views that align +with departments or use cases — for example, `sales_overview`, +`customer_360`, or `product_analytics`. + + + +A single cube can be included in multiple views. For example, a `users` +cube might appear in both a `customer_360` view and a `sales_overview` +view, with different fields exposed in each. + + + +### Favor focused views + +Smaller, focused views are easier to navigate and lead to better AI +results. Rather than one massive view with hundreds of fields, create +several purpose-built views: + +- Views are easier for business users to understand when they're + scoped to a specific domain +- AI agents perform better with focused context +- Simpler views translate to simpler SQL queries with fewer joins + +### Curate with metadata + +Help your users understand what a view is for and how to use it: + +- Set a clear [`description`][ref-view-description] to explain the + view's purpose +- Use [`title`][ref-view-title] for user-friendly display names +- Add [`meta.ai_context`][ref-ai-context] to guide AI agents +- Organize fields into [`folders`][ref-view-folders] for logical + grouping + + + +```yaml title="YAML" +views: + - name: sales_overview + description: > + Revenue and order metrics for the sales team. + Includes order status, product details, and customer segments. + meta: + ai_context: > + Use this view for questions about sales performance, + revenue trends, and order analysis. The total_revenue + measure includes only completed orders. + + cubes: + - join_path: orders + includes: + - status + - total_revenue + - count + - created_date + + - join_path: orders.customers + prefix: true + includes: + - segment + - region + + folders: + - name: Order Metrics + includes: + - total_revenue + - count + - status + + - name: Customer Info + includes: + - customers_segment + - customers_region +``` + +```javascript title="JavaScript" +view(`sales_overview`, { + description: `Revenue and order metrics for the sales team. + Includes order status, product details, and customer segments.`, + meta: { + ai_context: `Use this view for questions about sales performance, + revenue trends, and order analysis. The total_revenue + measure includes only completed orders.` + }, + + cubes: [ + { + join_path: orders, + includes: [ + `status`, + `total_revenue`, + `count`, + `created_date` + ] + }, + { + join_path: orders.customers, + prefix: true, + includes: [ + `segment`, + `region` + ] + } + ], + + folders: [ + { + name: `Order Metrics`, + includes: [ + `total_revenue`, + `count`, + `status` + ] + }, + { + name: `Customer Info`, + includes: [ + `customers_segment`, + `customers_region` + ] + } + ] +}) +``` + + + +### Keep shared logic in cubes + +Views are a curation layer. All business logic — SQL definitions, measure +calculations, join relationships — should live in cubes. Views should only +control which members are exposed, how they're named, and how they're +organized. This keeps your model [DRY][wiki-dry] and makes maintenance +straightforward. + +### Control visibility + +Not every view should be publicly accessible. Use [`public`][ref-view-public] +to hide views that are meant for internal use or are still in development: + + + +```yaml title="YAML" +views: + - name: internal_diagnostics + public: false + + cubes: + - join_path: system_metrics + includes: "*" +``` + +```javascript title="JavaScript" +view(`internal_diagnostics`, { + public: false, + + cubes: [ + { + join_path: system_metrics, + includes: `*` + } + ] +}) +``` + + + +For dynamic visibility based on user roles, use `COMPILE_CONTEXT`: + + + +```yaml title="YAML" +views: + - name: arr + description: Annual Recurring Revenue + public: COMPILE_CONTEXT.security_context.is_finance + + cubes: + - join_path: revenue + includes: + - arr + - date +``` + +```javascript title="JavaScript" +view(`arr`, { + description: `Annual Recurring Revenue`, + public: COMPILE_CONTEXT.security_context.is_finance, + + cubes: [ + { + join_path: revenue, + includes: [`arr`, `date`] + } + ] +}) +``` + + + +## Organizing members with folders + +When a view includes many fields, [folders][ref-view-folders] help organize +them into logical groups. Cube supports both flat and nested folder +structures: + + + +```yaml title="YAML" +views: + - name: customers + + cubes: + - join_path: users + includes: "*" + + - join_path: users.orders + prefix: true + includes: + - status + - price + - count + + folders: + - name: Personal Details + includes: + - name + - gender + - created_at + + - name: Order Analytics + includes: + - orders_status + - orders_price + - orders_count +``` + +```javascript title="JavaScript" +view(`customers`, { + cubes: [ + { + join_path: `users`, + includes: `*` + }, + { + join_path: `users.orders`, + prefix: true, + includes: [`status`, `price`, `count`] + } + ], + + folders: [ + { + name: `Personal Details`, + includes: [`name`, `gender`, `created_at`] + }, + { + name: `Order Analytics`, + includes: [ + `orders_status`, + `orders_price`, + `orders_count` + ] + } + ] +}) +``` + + + +Folders are displayed in supported [visualization tools][ref-viz-tools]. +Check [APIs & Integrations][ref-apis-support] for details on folder +support. For tools that don't support nested folders, the structure is +automatically flattened. + +## Next steps + +- See the [view reference][ref-view-reference] for the full list of + parameters +- Learn about [access policies][ref-access-policies] to govern view access +- Explore [AI context][ref-ai-context] to improve AI query accuracy +- Use the [Semantic Model IDE][ref-ide] to develop views interactively + +[ref-cubes]: /docs/data-modeling/cubes +[ref-view-reference]: /reference/data-modeling/view +[ref-view-description]: /reference/data-modeling/view#description +[ref-view-title]: /reference/data-modeling/view#title +[ref-view-public]: /reference/data-modeling/view#public +[ref-view-folders]: /reference/data-modeling/view#folders +[ref-access-policies]: /reference/data-modeling/data-access-policies +[ref-ai-context]: /docs/data-modeling/ai-context +[ref-compile-context]: /docs/data-modeling/access-control/context +[ref-explore]: /analytics/explore +[ref-workbooks]: /analytics/workbooks +[ref-embedding]: /docs/embedding +[ref-ide]: /docs/data-modeling/data-model-ide +[ref-viz-tools]: /admin/connect-to-data/visualization-tools +[ref-apis-support]: /reference#data-modeling +[wiki-dry]: https://en.wikipedia.org/wiki/Don%27t_repeat_yourself diff --git a/docs-mintlify/docs/integrations/google-sheets.mdx b/docs-mintlify/docs/integrations/google-sheets.mdx index 9ca9b7c7e858c..c655c22f361ba 100644 --- a/docs-mintlify/docs/integrations/google-sheets.mdx +++ b/docs-mintlify/docs/integrations/google-sheets.mdx @@ -122,7 +122,7 @@ in Cube Cloud. [link-google-sheets]: https://workspace.google.com/products/sheets/ [link-marketplace-listing]: https://workspace.google.com/u/0/marketplace/app/cube_cloud_for_sheets/641460343379 [ref-playground]: /docs/workspace/playground -[ref-views]: /docs/data-modeling/concepts#views +[ref-views]: /docs/data-modeling/views [ref-pre-aggs]: /docs/pre-aggregations/using-pre-aggregations [ref-sql-api-enabled]: /reference/sql-api#cube-cloud [ref-saved-reports]: /docs/workspace/saved-reports \ No newline at end of file diff --git a/docs-mintlify/docs/integrations/microsoft-excel.mdx b/docs-mintlify/docs/integrations/microsoft-excel.mdx index 74b606280b751..06d04bed1ec21 100644 --- a/docs-mintlify/docs/integrations/microsoft-excel.mdx +++ b/docs-mintlify/docs/integrations/microsoft-excel.mdx @@ -130,7 +130,7 @@ in Cube Cloud. [ref-excel]: /admin/connect-to-data/visualization-tools/excel [link-pivottable]: https://support.microsoft.com/en-us/office/create-a-pivottable-to-analyze-worksheet-data-a9a84538-bfe9-40a9-a8e9-f99134456576 [ref-playground]: /docs/workspace/playground -[ref-views]: /docs/data-modeling/concepts#views +[ref-views]: /docs/data-modeling/views [ref-pre-aggs]: /docs/pre-aggregations/using-pre-aggregations [ref-sql-api-enabled]: /reference/sql-api#cube-cloud [link-excel-addins]: https://support.microsoft.com/en-us/office/add-or-remove-add-ins-in-excel-0af570c4-5cf3-4fa9-9b88-403625a0b460 diff --git a/docs-mintlify/docs/pre-aggregations/matching-pre-aggregations.mdx b/docs-mintlify/docs/pre-aggregations/matching-pre-aggregations.mdx index 21aa72aed11d0..10f3f99c03259 100644 --- a/docs-mintlify/docs/pre-aggregations/matching-pre-aggregations.mdx +++ b/docs-mintlify/docs/pre-aggregations/matching-pre-aggregations.mdx @@ -131,8 +131,8 @@ configuration option. [ref-rollup-only-mode]: /docs/pre-aggregations/using-pre-aggregations#rollup-only-mode [ref-schema-joins-rel]: /reference/data-modeling/joins#relationship [wiki-gcd]: https://en.wikipedia.org/wiki/Greatest_common_divisor -[ref-measure-additivity]: /docs/data-modeling/concepts#measure-additivity -[ref-leaf-measures]: /docs/data-modeling/concepts#leaf-measures +[ref-measure-additivity]: /reference/data-modeling/measures#type +[ref-leaf-measures]: /reference/data-modeling/measures#type [ref-calculated-measures]: /docs/data-modeling/overview#4-using-calculated-measures [ref-non-strict-date-range-match]: /reference/data-modeling/pre-aggregations#allow_non_strict_date_range_match [ref-non-additive-recipe]: /recipes/pre-aggregations/non-additivity @@ -140,4 +140,4 @@ configuration option. [ref-ungrouped-queries]: /reference/queries#ungrouped-query [ref-primary-key]: /reference/data-modeling/dimensions#primary_key [ref-custom-granularity]: /reference/data-modeling/dimensions#granularities -[ref-views]: /docs/data-modeling/concepts#views \ No newline at end of file +[ref-views]: /docs/data-modeling/views \ No newline at end of file diff --git a/docs-mintlify/recipes/data-modeling/custom-calendar.mdx b/docs-mintlify/recipes/data-modeling/custom-calendar.mdx index 83feec451415b..a20e18698025c 100644 --- a/docs-mintlify/recipes/data-modeling/custom-calendar.mdx +++ b/docs-mintlify/recipes/data-modeling/custom-calendar.mdx @@ -222,5 +222,5 @@ Querying this data modal would yield the following result: [link-454-official-calendar]: https://2fb5c46100c1b71985e2-011e70369171d43105aff38e48482379.ssl.cf1.rackcdn.com/4-5-4%20calendar/3-Year-Calendar-5-27.pdf [ref-custom-granularities]: /reference/data-modeling/dimensions#granularities [ref-custom-granularities-recipe]: /recipes/data-modeling/custom-granularity -[ref-proxy-dimensions]: /docs/data-modeling/concepts/calculated-members#proxy-dimensions +[ref-proxy-dimensions]: /docs/data-modeling/dimensions#proxy-dimensions [ref-jinja-macro]: /docs/data-modeling/dynamic/jinja#macros \ No newline at end of file diff --git a/docs-mintlify/recipes/data-modeling/custom-granularity.mdx b/docs-mintlify/recipes/data-modeling/custom-granularity.mdx index c9a74389e63df..dd633ee76754b 100644 --- a/docs-mintlify/recipes/data-modeling/custom-granularity.mdx +++ b/docs-mintlify/recipes/data-modeling/custom-granularity.mdx @@ -162,8 +162,8 @@ Querying this data modal would yield the following result: [ref-custom-granularities]: /reference/data-modeling/dimensions#granularities -[ref-default-granularities]: /docs/data-modeling/concepts#time-dimensions +[ref-default-granularities]: /docs/data-modeling/dimensions#time-dimensions [wiki-fiscal-year]: https://en.wikipedia.org/wiki/Fiscal_year [ref-playground]: /docs/workspace/playground [ref-sql-api]: /reference/sql-api -[ref-proxy-granularity]: /docs/data-modeling/concepts/calculated-members#time-dimension-granularity \ No newline at end of file +[ref-proxy-granularity]: /docs/data-modeling/dimensions#time-dimension-granularity-references \ No newline at end of file diff --git a/docs-mintlify/recipes/data-modeling/event-analytics.mdx b/docs-mintlify/recipes/data-modeling/event-analytics.mdx index 29a9e075b08dd..c4df92991e34c 100644 --- a/docs-mintlify/recipes/data-modeling/event-analytics.mdx +++ b/docs-mintlify/recipes/data-modeling/event-analytics.mdx @@ -410,7 +410,7 @@ cube("events", { To determine the end of the session, we’re going to use a [subquery -dimension](/docs/data-modeling/concepts/calculated-members#subquery-dimensions). +dimension](/docs/data-modeling/dimensions#subquery-dimensions). diff --git a/docs-mintlify/recipes/data-modeling/filtered-aggregates.mdx b/docs-mintlify/recipes/data-modeling/filtered-aggregates.mdx index 03953aed8a348..6bb121ae07774 100644 --- a/docs-mintlify/recipes/data-modeling/filtered-aggregates.mdx +++ b/docs-mintlify/recipes/data-modeling/filtered-aggregates.mdx @@ -200,4 +200,4 @@ will show the ratio of the sales goal that has been achieved: -[ref-subquery-dimension]: /docs/data-modeling/concepts/calculated-members#subquery-dimensions \ No newline at end of file +[ref-subquery-dimension]: /docs/data-modeling/dimensions#subquery-dimensions \ No newline at end of file diff --git a/docs-mintlify/recipes/data-modeling/nested-aggregates.mdx b/docs-mintlify/recipes/data-modeling/nested-aggregates.mdx index 2d9c1253be9da..9176d64da4797 100644 --- a/docs-mintlify/recipes/data-modeling/nested-aggregates.mdx +++ b/docs-mintlify/recipes/data-modeling/nested-aggregates.mdx @@ -146,4 +146,4 @@ We can verify that it's correct by adding one more dimension to the query: [ref-measures]: /reference/data-modeling/measures [ref-cube]: /reference/data-modeling/cube -[ref-subquery-dimension]: /docs/data-modeling/concepts/calculated-members#subquery-dimensions \ No newline at end of file +[ref-subquery-dimension]: /docs/data-modeling/dimensions#subquery-dimensions \ No newline at end of file diff --git a/docs-mintlify/recipes/data-modeling/period-over-period.mdx b/docs-mintlify/recipes/data-modeling/period-over-period.mdx index 7e0178f5219bb..56d77893b80d5 100644 --- a/docs-mintlify/recipes/data-modeling/period-over-period.mdx +++ b/docs-mintlify/recipes/data-modeling/period-over-period.mdx @@ -148,8 +148,8 @@ Here's the result: -[ref-multi-stage]: /docs/data-modeling/concepts/multi-stage-calculations +[ref-multi-stage]: /docs/data-modeling/measures#multi-stage-measures [ref-calculated-measure]: /docs/data-modeling/overview#4-using-calculated-measures [ref-time-dimension-granularity]: /reference/rest-api/query-format#time-dimensions-format [link-tesseract]: https://cube.dev/blog/introducing-next-generation-data-modeling-engine -[link-time-shift]: /docs/data-modeling/concepts/multi-stage-calculations#time-shift \ No newline at end of file +[link-time-shift]: /docs/data-modeling/measures#time-shift \ No newline at end of file diff --git a/docs-mintlify/docs/data-modeling/concepts/polymorphic-cubes.mdx b/docs-mintlify/recipes/data-modeling/polymorphic-cubes.mdx similarity index 97% rename from docs-mintlify/docs/data-modeling/concepts/polymorphic-cubes.mdx rename to docs-mintlify/recipes/data-modeling/polymorphic-cubes.mdx index 594212cb70bce..70c20a8af7e16 100644 --- a/docs-mintlify/docs/data-modeling/concepts/polymorphic-cubes.mdx +++ b/docs-mintlify/recipes/data-modeling/polymorphic-cubes.mdx @@ -166,5 +166,5 @@ cube(`lessons`, { -[ref-schema-advanced-extend]: /docs/data-modeling/concepts/code-reusability-extending-cubes +[ref-schema-advanced-extend]: /docs/data-modeling/extending-cubes [ref-schema-ref-cubes-extends]: /reference/data-modeling/cube#extends \ No newline at end of file diff --git a/docs-mintlify/recipes/data-modeling/using-dynamic-measures.mdx b/docs-mintlify/recipes/data-modeling/using-dynamic-measures.mdx index 75ed985dd8399..42146e57d8e2c 100644 --- a/docs-mintlify/recipes/data-modeling/using-dynamic-measures.mdx +++ b/docs-mintlify/recipes/data-modeling/using-dynamic-measures.mdx @@ -90,4 +90,4 @@ or run it with the `docker-compose up` command. You'll see the result, including queried data, in the console. -[ref-measures]: /docs/data-modeling/concepts#measures \ No newline at end of file +[ref-measures]: /docs/data-modeling/measures \ No newline at end of file diff --git a/docs-mintlify/recipes/data-modeling/xirr.mdx b/docs-mintlify/recipes/data-modeling/xirr.mdx index 4528a7487e4be..40b25f9364cb8 100644 --- a/docs-mintlify/recipes/data-modeling/xirr.mdx +++ b/docs-mintlify/recipes/data-modeling/xirr.mdx @@ -199,4 +199,4 @@ All queries above would yield the same result: [ref-query-wpp]: /reference/queries#query-with-post-processing [ref-query-regular]: /reference/queries#regular-query [link-tesseract]: https://cube.dev/blog/introducing-next-generation-data-modeling-engine -[ref-multi-stage-calculations]: /docs/data-modeling/concepts/multi-stage-calculations \ No newline at end of file +[ref-multi-stage-calculations]: /docs/data-modeling/measures#multi-stage-measures \ No newline at end of file diff --git a/docs-mintlify/recipes/pre-aggregations/non-additivity.mdx b/docs-mintlify/recipes/pre-aggregations/non-additivity.mdx index 2e475b603f557..a0fcf1920b14e 100644 --- a/docs-mintlify/recipes/pre-aggregations/non-additivity.mdx +++ b/docs-mintlify/recipes/pre-aggregations/non-additivity.mdx @@ -248,4 +248,4 @@ queried data, in the console. [ref-percentile-recipe]: /recipes/data-modeling/percentiles -[ref-calculated-measures]: /docs/data-modeling/concepts/calculated-members#calculated-measures \ No newline at end of file +[ref-calculated-measures]: /docs/data-modeling/measures#calculated-measures \ No newline at end of file diff --git a/docs-mintlify/reference/configuration/environment-variables.mdx b/docs-mintlify/reference/configuration/environment-variables.mdx index 89224526bdb50..929a8a2cdb2ad 100644 --- a/docs-mintlify/reference/configuration/environment-variables.mdx +++ b/docs-mintlify/reference/configuration/environment-variables.mdx @@ -1898,7 +1898,7 @@ The port for a Cube deployment to listen to API connections on. [mysql-server-tz-support]: https://dev.mysql.com/doc/refman/8.4/en/time-zone-support.html [ref-schema-ref-preagg-allownonstrict]: /reference/data-modeling/pre-aggregations#allow_non_strict_date_range_match [link-tesseract]: https://cube.dev/blog/introducing-next-generation-data-modeling-engine -[ref-multi-stage-calculations]: /docs/data-modeling/concepts/multi-stage-calculations +[ref-multi-stage-calculations]: /docs/data-modeling/measures#multi-stage-measures [ref-folders]: /reference/data-modeling/view#folders [ref-dataviz-tools]: /admin/connect-to-data/visualization-tools [ref-context-to-app-id]: /reference/configuration/config#context_to_app_id diff --git a/docs-mintlify/reference/core-data-apis/dax-api/index.mdx b/docs-mintlify/reference/core-data-apis/dax-api/index.mdx index 24488b81ab11d..ea7f385ccb72c 100644 --- a/docs-mintlify/reference/core-data-apis/dax-api/index.mdx +++ b/docs-mintlify/reference/core-data-apis/dax-api/index.mdx @@ -78,8 +78,8 @@ The DAX API only exposes [views][ref-views], not cubes. [link-dax]: https://learn.microsoft.com/en-us/dax/ [ref-sql-api]: /reference/sql-api [ref-ref-dax-api]: /reference/dax-api/reference -[ref-views]: /docs/data-modeling/concepts#views -[ref-time-dimensions]: /docs/data-modeling/concepts#time-dimensions +[ref-views]: /docs/data-modeling/views +[ref-time-dimensions]: /docs/data-modeling/dimensions#time-dimensions [ref-kerberos]: /docs/integrations/power-bi/kerberos [ref-ntlm]: /docs/integrations/power-bi/ntlm [ref-power-bi]: /admin/connect-to-data/visualization-tools/powerbi \ No newline at end of file diff --git a/docs-mintlify/reference/core-data-apis/mdx-api.mdx b/docs-mintlify/reference/core-data-apis/mdx-api.mdx index 1920c2dcf04e1..644aae2934ede 100644 --- a/docs-mintlify/reference/core-data-apis/mdx-api.mdx +++ b/docs-mintlify/reference/core-data-apis/mdx-api.mdx @@ -226,7 +226,7 @@ Authentication and authorization work the same as for the [SQL API](/reference/s [ref-cube-cloud-for-excel]: /docs/integrations/microsoft-excel [ref-hierarchies]: /reference/data-modeling/hierarchies [ref-folders]: /reference/data-modeling/view#folders -[ref-views]: /docs/data-modeling/concepts#views +[ref-views]: /docs/data-modeling/views [ref-deployment]: /docs/deployment/cloud/deployments [ref-pre-aggregations]: /docs/pre-aggregations/using-pre-aggregations [ref-rollup-only-mode]: /docs/pre-aggregations/using-pre-aggregations#rollup-only-mode diff --git a/docs-mintlify/reference/core-data-apis/rest-api/query-format.mdx b/docs-mintlify/reference/core-data-apis/rest-api/query-format.mdx index 9d8e7520e66b8..652b51811b6a1 100644 --- a/docs-mintlify/reference/core-data-apis/rest-api/query-format.mdx +++ b/docs-mintlify/reference/core-data-apis/rest-api/query-format.mdx @@ -687,7 +687,7 @@ refer to its documentation for more examples. [ref-total-query]: /reference/queries#total-query [ref-ungrouped-query]: /reference/queries#ungrouped-query [ref-default-order]: /reference/queries#order -[ref-default-granularities]: /docs/data-modeling/concepts#time-dimensions +[ref-default-granularities]: /docs/data-modeling/dimensions#time-dimensions [ref-custom-granularities]: /reference/data-modeling/dimensions#granularities [wiki-iso-8601]: https://en.wikipedia.org/wiki/ISO_8601 -[ref-join-hints]: /docs/data-modeling/concepts/working-with-joins#join-hints \ No newline at end of file +[ref-join-hints]: /docs/data-modeling/joins#join-hints \ No newline at end of file diff --git a/docs-mintlify/reference/core-data-apis/sql-api/joins.mdx b/docs-mintlify/reference/core-data-apis/sql-api/joins.mdx index 3fa946a648cc5..0b25778f14b3a 100644 --- a/docs-mintlify/reference/core-data-apis/sql-api/joins.mdx +++ b/docs-mintlify/reference/core-data-apis/sql-api/joins.mdx @@ -208,6 +208,6 @@ Please note that, even if `product_description` is in the inner selection, it is evaluated in the final query as it isn't used in any way. -[ref-views]: /docs/data-modeling/concepts#views -[ref-join-paths]: /docs/data-modeling/concepts/working-with-joins#join-paths -[ref-join-hints]: /docs/data-modeling/concepts/working-with-joins#join-hints \ No newline at end of file +[ref-views]: /docs/data-modeling/views +[ref-join-paths]: /docs/data-modeling/joins#join-paths +[ref-join-hints]: /docs/data-modeling/joins#join-hints \ No newline at end of file diff --git a/docs-mintlify/reference/data-modeling/cube.mdx b/docs-mintlify/reference/data-modeling/cube.mdx index af1c2a3f7a370..3f0ffa1c0031c 100644 --- a/docs-mintlify/reference/data-modeling/cube.mdx +++ b/docs-mintlify/reference/data-modeling/cube.mdx @@ -670,9 +670,9 @@ The `access_policy` parameter is used to configure [access policies][ref-ref-dap [ref-ref-pre-aggs]: /reference/data-modeling/pre-aggregations [ref-ref-dap]: /reference/data-modeling/data-access-policies [ref-syntax-cube-sql]: /docs/data-modeling/syntax#cubesql-function -[ref-extension]: /docs/data-modeling/concepts/code-reusability-extending-cubes +[ref-extension]: /docs/data-modeling/extending-cubes [ref-calendar-cubes]: /docs/data-modeling/concepts/calendar-cubes [ref-calendar-cubes-time-shifts]: /docs/data-modeling/concepts/calendar-cubes#time-shifts [ref-calendar-cubes-granularities]: /docs/data-modeling/concepts/calendar-cubes#granularities -[ref-time-dimensions]: /docs/data-modeling/concepts#time-dimensions -[ref-time-shift]: /docs/data-modeling/concepts/multi-stage-calculations#time-shift \ No newline at end of file +[ref-time-dimensions]: /docs/data-modeling/dimensions#time-dimensions +[ref-time-shift]: /docs/data-modeling/measures#time-shift \ No newline at end of file diff --git a/docs-mintlify/reference/data-modeling/dimensions.mdx b/docs-mintlify/reference/data-modeling/dimensions.mdx index a9fa7e5437538..5055845827ec3 100644 --- a/docs-mintlify/reference/data-modeling/dimensions.mdx +++ b/docs-mintlify/reference/data-modeling/dimensions.mdx @@ -1007,6 +1007,7 @@ cube(`orders`, { +{/* #### Calendar cubes When the `granularities` parameter is used in time dimensions within [calendar @@ -1091,6 +1092,7 @@ cube(`fiscal_calendar`, { ``` +*/} ### `time_shift` @@ -1198,7 +1200,7 @@ cube(`fiscal_calendar`, { [ref-ai-context]: /docs/data-modeling/ai-context [ref-ref-cubes]: /reference/data-modeling/cube [ref-schema-ref-joins]: /reference/data-modeling/joins -[ref-subquery]: /docs/data-modeling/concepts/calculated-members#subquery-dimensions +[ref-subquery]: /docs/data-modeling/dimensions#subquery-dimensions [self-subquery]: #sub-query [ref-naming]: /docs/data-modeling/syntax#naming [ref-playground]: /docs/workspace/playground @@ -1209,7 +1211,7 @@ cube(`fiscal_calendar`, { [ref-ref-hierarchies]: /reference/data-modeling/hierarchies [ref-data-sources]: /admin/connect-to-data/data-sources [ref-calendar-cubes]: /docs/data-modeling/concepts/calendar-cubes -[ref-time-shift]: /docs/data-modeling/concepts/multi-stage-calculations#time-shift +[ref-time-shift]: /docs/data-modeling/measures#time-shift [ref-cube-calendar]: /reference/data-modeling/cube#calendar [ref-measure-time-shift]: /reference/data-modeling/measures#time_shift [ref-data-masking]: /docs/data-modeling/access-control/data-access-policies#data-masking diff --git a/docs-mintlify/reference/data-modeling/joins.mdx b/docs-mintlify/reference/data-modeling/joins.mdx index 0931b6bd053ba..c2fba4bff3832 100644 --- a/docs-mintlify/reference/data-modeling/joins.mdx +++ b/docs-mintlify/reference/data-modeling/joins.mdx @@ -663,7 +663,7 @@ Please use views to address join predictability and stability. [ref-ref-cubes]: /reference/data-modeling/cube [ref-restapi-query-filter-op-set]: /reference/rest-api/query-format#set -[ref-schema-fundamentals-join-dir]: /docs/data-modeling/concepts/working-with-joins#direction-of-joins +[ref-schema-fundamentals-join-dir]: /docs/data-modeling/joins#direction-of-joins [ref-schema-cube-sql]: /reference/data-modeling/cube#sql [ref-schema-data-blenging]: /docs/data-modeling/concepts/data-blending#data-blending [ref-naming]: /docs/data-modeling/syntax#naming diff --git a/docs-mintlify/reference/data-modeling/measures.mdx b/docs-mintlify/reference/data-modeling/measures.mdx index 5fb809ad35563..8c78673af880b 100644 --- a/docs-mintlify/reference/data-modeling/measures.mdx +++ b/docs-mintlify/reference/data-modeling/measures.mdx @@ -1429,15 +1429,15 @@ cube(`orders`, { [ref-naming]: /docs/data-modeling/syntax#naming [ref-playground]: /docs/workspace/playground [ref-apis]: /reference -[ref-rolling-window]: /docs/data-modeling/concepts/multi-stage-calculations#rolling-window +[ref-rolling-window]: /docs/data-modeling/measures#rolling-windows [link-tesseract]: https://cube.dev/blog/introducing-next-generation-data-modeling-engine -[ref-multi-stage]: /docs/data-modeling/concepts/multi-stage-calculations -[ref-time-shift]: /docs/data-modeling/concepts/multi-stage-calculations#time-shift -[ref-nested-aggregate]: /docs/data-modeling/concepts/multi-stage-calculations#nested-aggregate +[ref-multi-stage]: /docs/data-modeling/measures#multi-stage-measures +[ref-time-shift]: /docs/data-modeling/measures#time-shift +[ref-nested-aggregate]: /docs/data-modeling/measures#nested-aggregates [ref-calendar-cubes]: /docs/data-modeling/concepts/calendar-cubes [ref-switch-dimensions]: /reference/data-modeling/dimensions#type [ref-data-masking]: /docs/data-modeling/access-control/data-access-policies#data-masking [link-d3-format]: https://d3js.org/d3-format [link-iso-4217]: https://en.wikipedia.org/wiki/ISO_4217 -[ref-calculated-measures]: /docs/data-modeling/concepts/calculated-members#calculated-measures +[ref-calculated-measures]: /docs/data-modeling/measures#calculated-measures [ref-schema-ref-preaggs-rollup]: /reference/data-modeling/pre-aggregations#rollup \ No newline at end of file diff --git a/docs-mintlify/reference/data-modeling/view.mdx b/docs-mintlify/reference/data-modeling/view.mdx index fe8efb651c984..bc163c6b94f58 100644 --- a/docs-mintlify/reference/data-modeling/view.mdx +++ b/docs-mintlify/reference/data-modeling/view.mdx @@ -660,7 +660,7 @@ The `access_policy` parameter is used to configure [access policies][ref-ref-dap [ref-apis-support]: /reference#data-modeling [ref-playground]: /docs/workspace/playground#viewing-the-data-model [ref-viz-tools]: /admin/connect-to-data/visualization-tools -[ref-extension]: /docs/data-modeling/concepts/code-reusability-extending-cubes +[ref-extension]: /docs/data-modeling/extending-cubes [ref-dim-name]: /reference/data-modeling/dimensions#name [ref-dim-title]: /reference/data-modeling/dimensions#title [ref-dim-description]: /reference/data-modeling/dimensions#description From bca52d91dfdfbc252dd53d364b5958239161c56a Mon Sep 17 00:00:00 2001 From: Artyom Keydunov Date: Wed, 15 Apr 2026 21:16:02 -0700 Subject: [PATCH 2/2] fix(docs): remove multiplication by 100 in percentage measure examples (#10607) - Updated documentation to use 1.0 * instead of 100.0 * for percentage calculations - Frontend now automatically multiplies by 100 when format: percent is used - Changed examples in getting started guides, recipes, and API documentation - Updated Funnels extension conversionsPercent measure - Updated test fixtures and example schemas - Kept decimal point multiplication (1.0 *) to ensure proper floating point division Resolves CUB-2081 Co-authored-by: Cursor Agent --- docs-mintlify/docs/data-modeling/overview.mdx | 2 +- .../docs/getting-started/cloud/create-data-model.mdx | 4 ++-- docs-mintlify/recipes/data-modeling/cohort-retention.mdx | 4 ++-- docs-mintlify/recipes/data-modeling/event-analytics.mdx | 8 ++++---- .../recipes/data-modeling/using-dynamic-measures.mdx | 2 +- .../reference/core-data-apis/sql-api/query-format.mdx | 2 +- .../core-data-apis/sql-api/query-format.mdx | 2 +- .../product/data-modeling/concepts/calculated-members.mdx | 2 +- docs/content/product/data-modeling/overview.mdx | 2 +- .../product/data-modeling/recipes/cohort-retention.mdx | 4 ++-- .../product/data-modeling/recipes/event-analytics.mdx | 8 ++++---- .../data-modeling/recipes/using-dynamic-measures.mdx | 2 +- .../product/getting-started/cloud/create-data-model.mdx | 4 ++-- examples/recipes/active-users/schema/ActiveUsers.js | 2 +- .../recipes/referencing-dynamic-measures/schema/Orders.js | 2 +- packages/cubejs-schema-compiler/src/extensions/Funnels.ts | 2 +- .../test/unit/fixtures/calendar_orders.yml | 2 +- 17 files changed, 27 insertions(+), 27 deletions(-) diff --git a/docs-mintlify/docs/data-modeling/overview.mdx b/docs-mintlify/docs/data-modeling/overview.mdx index 37149749e6355..3fad11bf20061 100644 --- a/docs-mintlify/docs/data-modeling/overview.mdx +++ b/docs-mintlify/docs/data-modeling/overview.mdx @@ -277,7 +277,7 @@ measure via an API, the following SQL will be generated: ```sql SELECT - 100.0 * COUNT( + 1.0 * COUNT( CASE WHEN (users.paying = 'true') THEN users.id END ) / COUNT(users.id) AS paying_percentage FROM users diff --git a/docs-mintlify/docs/getting-started/cloud/create-data-model.mdx b/docs-mintlify/docs/getting-started/cloud/create-data-model.mdx index 59739c097d5b4..1c125c2de57a2 100644 --- a/docs-mintlify/docs/getting-started/cloud/create-data-model.mdx +++ b/docs-mintlify/docs/getting-started/cloud/create-data-model.mdx @@ -109,7 +109,7 @@ within the `measures` block. ```yaml - name: completed_percentage type: number - sql: "(100.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" + sql: "(1.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" format: percent ``` @@ -156,7 +156,7 @@ cubes: - name: completed_percentage type: number - sql: "(100.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" + sql: "(1.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" format: percent ``` diff --git a/docs-mintlify/recipes/data-modeling/cohort-retention.mdx b/docs-mintlify/recipes/data-modeling/cohort-retention.mdx index 4a66106a37678..28cfd4fd63db0 100644 --- a/docs-mintlify/recipes/data-modeling/cohort-retention.mdx +++ b/docs-mintlify/recipes/data-modeling/cohort-retention.mdx @@ -139,7 +139,7 @@ cubes: - users.email - name: percentage_of_active - sql: "100.0 * {total_active_count} / NULLIF({total_count}, 0)" + sql: "1.0 * {total_active_count} / NULLIF({total_count}, 0)" type: number format: percent drill_members: @@ -168,7 +168,7 @@ cube(`monthly_retention`, { }, percentage_of_active: { - sql: `100.0 * ${total_active_count} / NULLIF(${total_count}, 0)`, + sql: `1.0 * ${total_active_count} / NULLIF(${total_count}, 0)`, type: `number`, format: `percent`, drill_members: [ diff --git a/docs-mintlify/recipes/data-modeling/event-analytics.mdx b/docs-mintlify/recipes/data-modeling/event-analytics.mdx index c4df92991e34c..ba49b209a4311 100644 --- a/docs-mintlify/recipes/data-modeling/event-analytics.mdx +++ b/docs-mintlify/recipes/data-modeling/event-analytics.mdx @@ -739,7 +739,7 @@ cubes: - - sql: "{is_bounced} = 'True' - name: bounce_rate - sql: "100.00 * {bounced_count} / NULLIF({count}, 0)" + sql: "1.0 * {bounced_count} / NULLIF({count}, 0)" type: number format: percent ``` @@ -770,7 +770,7 @@ cube("sessions", { }, bounce_rate: { - sql: `100.00 * ${bounced_count} / NULLIF(${count}, 0)`, + sql: `1.0 * ${bounced_count} / NULLIF(${count}, 0)`, type: `number`, format: `percent` } @@ -846,7 +846,7 @@ cube("sessions", { repeat_percent: { description: `Percent of Repeat Sessions`, - sql: `100.00 * ${repeat_count} / NULLIF(${count}, 0)`, + sql: `1.0 * ${repeat_count} / NULLIF(${count}, 0)`, type: `number`, format: `percent` } @@ -875,7 +875,7 @@ cubes: - name: repeat_percent description: Percent of Repeat Sessions - sql: "100.00 * {repeat_count} / NULLIF({count}, 0)" + sql: "1.0 * {repeat_count} / NULLIF({count}, 0)" type: number format: percent diff --git a/docs-mintlify/recipes/data-modeling/using-dynamic-measures.mdx b/docs-mintlify/recipes/data-modeling/using-dynamic-measures.mdx index 42146e57d8e2c..eb03f8404d149 100644 --- a/docs-mintlify/recipes/data-modeling/using-dynamic-measures.mdx +++ b/docs-mintlify/recipes/data-modeling/using-dynamic-measures.mdx @@ -41,7 +41,7 @@ const createPercentageMeasure = (status) => ({ sql: (CUBE) => `ROUND(${CUBE[`total_${status}_orders`]}::NUMERIC / ${ CUBE.total_orders - }::NUMERIC * 100.0, 2)` + }::NUMERIC, 2)` } }) diff --git a/docs-mintlify/reference/core-data-apis/sql-api/query-format.mdx b/docs-mintlify/reference/core-data-apis/sql-api/query-format.mdx index 4ffb6d7cc23e6..6dc2a5ab976c2 100644 --- a/docs-mintlify/reference/core-data-apis/sql-api/query-format.mdx +++ b/docs-mintlify/reference/core-data-apis/sql-api/query-format.mdx @@ -256,7 +256,7 @@ cubes: - name: completed_percentage type: number - sql: "(100.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" + sql: "(1.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" format: percent ``` diff --git a/docs/content/product/apis-integrations/core-data-apis/sql-api/query-format.mdx b/docs/content/product/apis-integrations/core-data-apis/sql-api/query-format.mdx index e0549e94196b3..bb77c7cfbf373 100644 --- a/docs/content/product/apis-integrations/core-data-apis/sql-api/query-format.mdx +++ b/docs/content/product/apis-integrations/core-data-apis/sql-api/query-format.mdx @@ -256,7 +256,7 @@ cubes: - name: completed_percentage type: number - sql: "(100.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" + sql: "(1.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" format: percent ``` diff --git a/docs/content/product/data-modeling/concepts/calculated-members.mdx b/docs/content/product/data-modeling/concepts/calculated-members.mdx index 9c4c71eb729fd..071557ef3ec79 100644 --- a/docs/content/product/data-modeling/concepts/calculated-members.mdx +++ b/docs/content/product/data-modeling/concepts/calculated-members.mdx @@ -158,7 +158,7 @@ cube(`users`, { }, purchases_to_users_ratio: { - sql: `100.0 * ${orders.purchases} / ${CUBE.count}`, + sql: `1.0 * ${orders.purchases} / ${CUBE.count}`, type: `number`, format: `percent` } diff --git a/docs/content/product/data-modeling/overview.mdx b/docs/content/product/data-modeling/overview.mdx index bf9ac86d5e32a..d8a091e313cb0 100644 --- a/docs/content/product/data-modeling/overview.mdx +++ b/docs/content/product/data-modeling/overview.mdx @@ -283,7 +283,7 @@ measure via an API, the following SQL will be generated: ```sql SELECT - 100.0 * COUNT( + 1.0 * COUNT( CASE WHEN (users.paying = 'true') THEN users.id END ) / COUNT(users.id) AS paying_percentage FROM users diff --git a/docs/content/product/data-modeling/recipes/cohort-retention.mdx b/docs/content/product/data-modeling/recipes/cohort-retention.mdx index d298ba8ad9fe6..01f4b9279b770 100644 --- a/docs/content/product/data-modeling/recipes/cohort-retention.mdx +++ b/docs/content/product/data-modeling/recipes/cohort-retention.mdx @@ -139,7 +139,7 @@ cubes: - users.email - name: percentage_of_active - sql: "100.0 * {total_active_count} / NULLIF({total_count}, 0)" + sql: "1.0 * {total_active_count} / NULLIF({total_count}, 0)" type: number format: percent drill_members: @@ -168,7 +168,7 @@ cube(`monthly_retention`, { }, percentage_of_active: { - sql: `100.0 * ${total_active_count} / NULLIF(${total_count}, 0)`, + sql: `1.0 * ${total_active_count} / NULLIF(${total_count}, 0)`, type: `number`, format: `percent`, drill_members: [ diff --git a/docs/content/product/data-modeling/recipes/event-analytics.mdx b/docs/content/product/data-modeling/recipes/event-analytics.mdx index e047accf8136d..9f2f5cf8745aa 100644 --- a/docs/content/product/data-modeling/recipes/event-analytics.mdx +++ b/docs/content/product/data-modeling/recipes/event-analytics.mdx @@ -742,7 +742,7 @@ cube("sessions", { }, bounce_rate: { - sql: `100.00 * ${bounced_count} / NULLIF(${count}, 0)`, + sql: `1.0 * ${bounced_count} / NULLIF(${count}, 0)`, type: `number`, format: `percent` } @@ -770,7 +770,7 @@ cubes: - - sql: "{is_bounced} = 'True' - name: bounce_rate - sql: "100.00 * {bounced_count} / NULLIF({count}, 0)" + sql: "1.0 * {bounced_count} / NULLIF({count}, 0)" type: number format: percent ``` @@ -843,7 +843,7 @@ cube("sessions", { repeat_percent: { description: `Percent of Repeat Sessions`, - sql: `100.00 * ${repeat_count} / NULLIF(${count}, 0)`, + sql: `1.0 * ${repeat_count} / NULLIF(${count}, 0)`, type: `number`, format: `percent` } @@ -872,7 +872,7 @@ cubes: - name: repeat_percent description: Percent of Repeat Sessions - sql: "100.00 * {repeat_count} / NULLIF({count}, 0)" + sql: "1.0 * {repeat_count} / NULLIF({count}, 0)" type: number format: percent diff --git a/docs/content/product/data-modeling/recipes/using-dynamic-measures.mdx b/docs/content/product/data-modeling/recipes/using-dynamic-measures.mdx index a4e4c307857dd..5e3237d20d016 100644 --- a/docs/content/product/data-modeling/recipes/using-dynamic-measures.mdx +++ b/docs/content/product/data-modeling/recipes/using-dynamic-measures.mdx @@ -38,7 +38,7 @@ const createPercentageMeasure = (status) => ({ sql: (CUBE) => `ROUND(${CUBE[`total_${status}_orders`]}::NUMERIC / ${ CUBE.total_orders - }::NUMERIC * 100.0, 2)` + }::NUMERIC, 2)` } }) diff --git a/docs/content/product/getting-started/cloud/create-data-model.mdx b/docs/content/product/getting-started/cloud/create-data-model.mdx index c591c8b988826..79793dd372823 100644 --- a/docs/content/product/getting-started/cloud/create-data-model.mdx +++ b/docs/content/product/getting-started/cloud/create-data-model.mdx @@ -107,7 +107,7 @@ within the `measures` block. ```yaml - name: completed_percentage type: number - sql: "(100.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" + sql: "(1.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" format: percent ``` @@ -154,7 +154,7 @@ cubes: - name: completed_percentage type: number - sql: "(100.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" + sql: "(1.0 * {CUBE.completed_count} / NULLIF({CUBE.count}, 0))" format: percent ``` diff --git a/examples/recipes/active-users/schema/ActiveUsers.js b/examples/recipes/active-users/schema/ActiveUsers.js index 2e87fc5d4a6e6..b455206fb0c1d 100644 --- a/examples/recipes/active-users/schema/ActiveUsers.js +++ b/examples/recipes/active-users/schema/ActiveUsers.js @@ -31,7 +31,7 @@ cube(`ActiveUsers`, { wauToMau: { title: `WAU to MAU`, - sql: `100.000 * ${weeklyActiveUsers} / NULLIF(${monthlyActiveUsers}, 0)`, + sql: `1.0 * ${weeklyActiveUsers} / NULLIF(${monthlyActiveUsers}, 0)`, type: `number`, format: `percent`, }, diff --git a/examples/recipes/referencing-dynamic-measures/schema/Orders.js b/examples/recipes/referencing-dynamic-measures/schema/Orders.js index 56363fe9208e8..dc960ca1b39f2 100644 --- a/examples/recipes/referencing-dynamic-measures/schema/Orders.js +++ b/examples/recipes/referencing-dynamic-measures/schema/Orders.js @@ -22,7 +22,7 @@ const createPercentageMeasure = (status) => ({ format: `percent`, title: `Percentage of ${status} orders`, sql: (CUBE) => - `ROUND(${CUBE[`Total_${status}_orders`]}::numeric / ${CUBE.totalOrders}::numeric * 100.0, 2)`, + `ROUND(${CUBE[`Total_${status}_orders`]}::numeric / ${CUBE.totalOrders}::numeric, 2)`, }, }); diff --git a/packages/cubejs-schema-compiler/src/extensions/Funnels.ts b/packages/cubejs-schema-compiler/src/extensions/Funnels.ts index d5a8d5bce714b..b24861ef9d12e 100644 --- a/packages/cubejs-schema-compiler/src/extensions/Funnels.ts +++ b/packages/cubejs-schema-compiler/src/extensions/Funnels.ts @@ -45,7 +45,7 @@ ${eventJoin.join('\nLEFT JOIN\n')} shown: false }, conversionsPercent: { - sql: (conversions, firstStepConversions) => `CASE WHEN ${firstStepConversions} > 0 THEN 100.0 * ${conversions} / ${firstStepConversions} ELSE NULL END`, + sql: (conversions, firstStepConversions) => `CASE WHEN ${firstStepConversions} > 0 THEN 1.0 * ${conversions} / ${firstStepConversions} ELSE NULL END`, type: 'number', format: 'percent' } diff --git a/packages/cubejs-schema-compiler/test/unit/fixtures/calendar_orders.yml b/packages/cubejs-schema-compiler/test/unit/fixtures/calendar_orders.yml index c0ffd1e03c45f..6710462892c44 100644 --- a/packages/cubejs-schema-compiler/test/unit/fixtures/calendar_orders.yml +++ b/packages/cubejs-schema-compiler/test/unit/fixtures/calendar_orders.yml @@ -59,7 +59,7 @@ cubes: - sql: "{CUBE}.status = 'completed'" - name: completed_percentage - sql: "({completed_count} / NULLIF({count}, 0)) * 100.0" + sql: "1.0 * {completed_count} / NULLIF({count}, 0)" type: number format: percent