From 7e5540dae0313cddbe41197ac95832ffd990c368 Mon Sep 17 00:00:00 2001 From: sid597 Date: Thu, 18 Jun 2026 12:57:54 +0530 Subject: [PATCH 1/5] [ENG-1847] Define shared cross-app node content contract --- apps/obsidian/src/utils/rid.ts | 42 +---- packages/database/package.json | 4 +- packages/database/src/crossAppNodeContract.ts | 95 ++++++++++ .../database/src/fixtures/crossAppNodes.ts | 164 ++++++++++++++++++ packages/database/src/lib/rid.ts | 38 ++++ 5 files changed, 306 insertions(+), 37 deletions(-) create mode 100644 packages/database/src/crossAppNodeContract.ts create mode 100644 packages/database/src/fixtures/crossAppNodes.ts create mode 100644 packages/database/src/lib/rid.ts diff --git a/apps/obsidian/src/utils/rid.ts b/apps/obsidian/src/utils/rid.ts index c96db9e3d..08ef6aed2 100644 --- a/apps/obsidian/src/utils/rid.ts +++ b/apps/obsidian/src/utils/rid.ts @@ -1,36 +1,6 @@ -// Functions to express a pair of spaceUri, sourceLocalId as a single string, and back. -// We're following https://github.com/BlockScience/rid-lib: -// Either a Web URL, with the last segment as the sourceLocalId; -// OR the format `orn:.:/` -// With the assumption that the sourceUri has the form : -// The subtype may be omitted. - -export const spaceUriAndLocalIdToRid = ( - spaceUri: string, - localId: string, - subtype?: string, -): string => { - if (spaceUri.startsWith("http")) return `${spaceUri}/${localId}`; - const parts = spaceUri.split(":"); - if (parts.length === 2) - return subtype - ? `orn:${parts[0]}.${subtype}:${parts[1]}/${localId}` - : `orn:${parts[0]}:${parts[1]}/${localId}`; - throw new Error("Unrecognized spaceUri"); -}; - -export const ridToSpaceUriAndLocalId = ( - rid: string, -): { spaceUri: string; sourceLocalId: string } => { - const m = rid.match(/^orn:(\w+)\.(\w+):(.*)\/([^/]+)$/); - if (m) { - return { spaceUri: `${m[1]}:${m[3]}`, sourceLocalId: m[4]! }; - } - const m2 = rid.match(/^orn:(\w+):(.*)\/([^/]+)$/); - if (m2) { - return { spaceUri: `${m2[1]}:${m2[2]}`, sourceLocalId: m2[3]! }; - } - const parts = rid.split("/"); - const sourceLocalId = parts.pop()!; - return { spaceUri: parts.join("/"), sourceLocalId }; -}; +// The RID helpers now live in the shared database package so Roam and Obsidian +// share one cross-app identity format. See @repo/database/lib/rid. +export { + spaceUriAndLocalIdToRid, + ridToSpaceUriAndLocalId, +} from "@repo/database/lib/rid"; diff --git a/packages/database/package.json b/packages/database/package.json index b2c0de431..9a814f52f 100644 --- a/packages/database/package.json +++ b/packages/database/package.json @@ -11,7 +11,9 @@ "default": "./src/dbDotEnv.mjs" }, "./dbTypes": "./src/dbTypes.ts", - "./inputTypes": "./src/inputTypes.ts" + "./inputTypes": "./src/inputTypes.ts", + "./crossAppNodeContract": "./src/crossAppNodeContract.ts", + "./fixtures/*": "./src/fixtures/*.ts" }, "typesVersions": { "*": { diff --git a/packages/database/src/crossAppNodeContract.ts b/packages/database/src/crossAppNodeContract.ts new file mode 100644 index 000000000..7f6582b37 --- /dev/null +++ b/packages/database/src/crossAppNodeContract.ts @@ -0,0 +1,95 @@ +import type { Enums } from "./dbTypes"; + +/** + * Shared cross-app discourse-node content contract (MVP0). + * + * This is the payload that lets Roam and Obsidian discover, import and refresh + * each other's discourse nodes. It is a typed *view* over data that already + * persists through `@repo/database/inputTypes` (`LocalConceptDataInput` / + * `LocalContentDataInput`) and the `upsert_concepts` / `upsert_content` RPCs — + * it does NOT introduce a new persistence path. Build/parse the `rid` with the + * helpers in `@repo/database/lib/rid`. The full spec — field-by-field mapping to + * the Concept/Content tables and markdown fidelity limits — lives on Linear + * issue ENG-1847. + */ + +/** Source app a shared node originates from. Mirrors the DB `Platform` enum. */ +export type Platform = Enums<"Platform">; // "Roam" | "Obsidian" + +/** Persisted content scales. Mirrors the DB `ContentVariant` enum. */ +export type ContentVariant = Enums<"ContentVariant">; + +/** + * The Content variants every shared node must persist: + * - `direct`: the import-list title. + * - `full`: a self-sufficient markdown body the destination can materialize + * without querying the source app. + */ +export const SHARED_NODE_CONTENT_VARIANTS = [ + "direct", + "full", +] as const satisfies readonly ContentVariant[]; + +/** + * MIME type of the `full` variant in MVP0. Markdown is the v0 content model; + * atJSON is the planned v1 successor (F16). Keep this as the single place that + * names the format so v1 does not have to hunt down hardcoded strings. + */ +export const FULL_CONTENT_FORMAT = "text/markdown"; + +/** Identity of the node-type schema the destination maps to / creates from. */ +export type CrossAppNodeType = { + /** + * `source_local_id` of the node-type *schema* Concept in the source space + * (the Concept with `is_schema = true`). Maps to + * `LocalConceptDataInput.schema_represented_by_local_id` on the instance. + */ + sourceLocalId: string; + /** Human-readable node-type label, e.g. "Claim". */ + label: string; +}; + +/** The required content variants of a shared node. */ +export type CrossAppNodeContent = { + /** Import-list title. Persisted as the `direct` Content variant (`text`). */ + direct: { value: string }; + /** + * Self-sufficient markdown body. Persisted as the `full` Content variant + * (`text`); `format` is the contract-level media type for that text in MVP0. + */ + full: { format: typeof FULL_CONTENT_FORMAT; value: string }; +}; + +/** + * Stable cross-app identity (F9). The triple + * (`sourceApp`, `sourceSpace.url`, `sourceLocalId`) is equivalent to `rid`; + * build/parse `rid` with `spaceUriAndLocalIdToRid` / `ridToSpaceUriAndLocalId` + * from `@repo/database/lib/rid`. Duplicate-prevention and refresh must key on + * this identity, never on the display title. + */ +export type CrossAppNodeIdentity = { + sourceApp: Platform; + /** + * Source space: `Space.url` (portable cross-app id) and `Space.name` + * (display). Do not use numeric `Space.id` as the payload identity; it is + * local to the receiving database. + */ + sourceSpace: { url: string; name: string }; + /** The node's `source_local_id` within its source space. */ + sourceLocalId: string; + /** Stable cross-app id derived from (`sourceSpace.url`, `sourceLocalId`). */ + rid: string; +}; + +/** The shared cross-app discourse-node payload (discovery + import facing). */ +export type CrossAppNode = CrossAppNodeIdentity & { + nodeType: CrossAppNodeType; + content: CrossAppNodeContent; + /** + * ISO-8601 source last-modified time. Use the source node modified timestamp, + * or the latest `Content.last_modified` across the required `direct` and + * `full` variants when deriving from persisted rows. Basis for freshness + * (F13), refresh, and duplicate-prevention. + */ + sourceModifiedAt: string; +}; diff --git a/packages/database/src/fixtures/crossAppNodes.ts b/packages/database/src/fixtures/crossAppNodes.ts new file mode 100644 index 000000000..b98ecbc13 --- /dev/null +++ b/packages/database/src/fixtures/crossAppNodes.ts @@ -0,0 +1,164 @@ +import type { + LocalConceptDataInput, + LocalContentDataInput, +} from "../inputTypes"; +import { + FULL_CONTENT_FORMAT, + type CrossAppNode, +} from "../crossAppNodeContract"; +import { spaceUriAndLocalIdToRid } from "../lib/rid"; + +/** + * Reference fixtures for the cross-app node content contract (ENG-1847). + * + * Each fixture pairs the contract-level `CrossAppNode` with the existing + * `LocalConceptDataInput` + `LocalContentDataInput[]` it persists as — showing + * downstream Roam/Obsidian tickets exactly how the contract maps onto + * `upsert_concepts` / `upsert_content` without redefining the payload. The + * fixtures use the `space_url` / `author_local_id` string keys so they stay + * portable; the live source apps pass their resolved numeric `space_id` / + * `author_id` from `SupabaseContext` instead. + */ +export type CrossAppNodeFixture = { + node: CrossAppNode; + concept: LocalConceptDataInput; + contents: LocalContentDataInput[]; +}; + +// --- Roam-origin node: a Claim shared from a Roam graph --------------------- + +const ROAM_SPACE_URL = "https://roamresearch.com/#/app/MAPLab"; +const ROAM_NODE_ID = "tgWb6JozF"; // a Roam block/page uid +const ROAM_CLAIM_SCHEMA_ID = "rCLM0schema"; // source_local_id of the Claim schema Concept +const ROAM_NODE_RID = spaceUriAndLocalIdToRid(ROAM_SPACE_URL, ROAM_NODE_ID); + +const roamFullMarkdown = `# Sleep improves memory consolidation + +Multiple studies show that sleep after learning strengthens memory traces. + +- Supported by [[EVD]] - Rasch & Born 2013 +`; + +export const roamOriginNode: CrossAppNodeFixture = { + node: { + sourceApp: "Roam", + sourceSpace: { url: ROAM_SPACE_URL, name: "MAPLab" }, + sourceLocalId: ROAM_NODE_ID, + rid: ROAM_NODE_RID, + nodeType: { sourceLocalId: ROAM_CLAIM_SCHEMA_ID, label: "Claim" }, + content: { + direct: { value: "Sleep improves memory consolidation" }, + full: { format: FULL_CONTENT_FORMAT, value: roamFullMarkdown }, + }, + sourceModifiedAt: "2026-06-12T14:00:00.000Z", + }, + concept: { + space_url: ROAM_SPACE_URL, + name: "Sleep improves memory consolidation", + source_local_id: ROAM_NODE_ID, + schema_represented_by_local_id: ROAM_CLAIM_SCHEMA_ID, + is_schema: false, + author_local_id: "roam-account-uid", + created: "2026-06-10T09:00:00.000Z", + last_modified: "2026-06-12T14:00:00.000Z", + }, + contents: [ + { + space_url: ROAM_SPACE_URL, + source_local_id: ROAM_NODE_ID, + variant: "direct", + scale: "document", + text: "Sleep improves memory consolidation", + author_local_id: "roam-account-uid", + created: "2026-06-10T09:00:00.000Z", + last_modified: "2026-06-12T14:00:00.000Z", + }, + { + space_url: ROAM_SPACE_URL, + source_local_id: ROAM_NODE_ID, + variant: "full", + scale: "document", + text: roamFullMarkdown, + author_local_id: "roam-account-uid", + created: "2026-06-10T09:00:00.000Z", + last_modified: "2026-06-12T14:00:00.000Z", + }, + ], +}; + +// --- Obsidian-origin node: an Evidence note shared from an Obsidian vault ---- + +const OBSIDIAN_VAULT_ID = "9a8b7c6d5e4f3210"; // app.appId +const OBSIDIAN_SPACE_URL = `obsidian:${OBSIDIAN_VAULT_ID}`; +const OBSIDIAN_NODE_ID = "0192f1a0-7b3c-7e2a-9f10-1a2b3c4d5e6f"; // uuidv7 nodeInstanceId +const OBSIDIAN_EVD_SCHEMA_ID = "evd-7c1f9a2b"; // nodeTypeId +const OBSIDIAN_FILE_PATH = "Discourse Nodes/EVD - REM sleep and recall.md"; +const OBSIDIAN_TITLE = "EVD - REM sleep and recall"; // file basename +const OBSIDIAN_NODE_RID = spaceUriAndLocalIdToRid( + OBSIDIAN_SPACE_URL, + OBSIDIAN_NODE_ID, + "note", +); + +// Obsidian's `full` variant is the entire file as read from the vault, which +// includes the YAML frontmatter — a known markdown-fidelity wrinkle the +// destination materialization (ENG-1858 / ENG-1872) must handle. +const obsidianFullMarkdown = `--- +nodeTypeId: ${OBSIDIAN_EVD_SCHEMA_ID} +nodeInstanceId: ${OBSIDIAN_NODE_ID} +--- + +# REM sleep correlates with recall + +Participants with more REM sleep showed better next-day recall. +`; + +export const obsidianOriginNode: CrossAppNodeFixture = { + node: { + sourceApp: "Obsidian", + sourceSpace: { url: OBSIDIAN_SPACE_URL, name: "Research Vault" }, + sourceLocalId: OBSIDIAN_NODE_ID, + rid: OBSIDIAN_NODE_RID, + nodeType: { sourceLocalId: OBSIDIAN_EVD_SCHEMA_ID, label: "Evidence" }, + content: { + direct: { value: OBSIDIAN_TITLE }, + full: { format: FULL_CONTENT_FORMAT, value: obsidianFullMarkdown }, + }, + sourceModifiedAt: "2026-06-14T10:30:00.000Z", + }, + concept: { + space_url: OBSIDIAN_SPACE_URL, + name: OBSIDIAN_FILE_PATH, // Obsidian uses the file path as the Concept name + source_local_id: OBSIDIAN_NODE_ID, + schema_represented_by_local_id: OBSIDIAN_EVD_SCHEMA_ID, + is_schema: false, + author_local_id: "obsidian-account-uid", + created: "2026-06-13T08:00:00.000Z", + last_modified: "2026-06-14T10:30:00.000Z", + literal_content: { label: OBSIDIAN_TITLE }, + }, + contents: [ + { + space_url: OBSIDIAN_SPACE_URL, + source_local_id: OBSIDIAN_NODE_ID, + variant: "direct", + scale: "document", + text: OBSIDIAN_TITLE, + author_local_id: "obsidian-account-uid", + created: "2026-06-13T08:00:00.000Z", + last_modified: "2026-06-14T10:30:00.000Z", + metadata: { filePath: OBSIDIAN_FILE_PATH }, + }, + { + space_url: OBSIDIAN_SPACE_URL, + source_local_id: OBSIDIAN_NODE_ID, + variant: "full", + scale: "document", + text: obsidianFullMarkdown, + author_local_id: "obsidian-account-uid", + created: "2026-06-13T08:00:00.000Z", + last_modified: "2026-06-14T10:30:00.000Z", + metadata: { filePath: OBSIDIAN_FILE_PATH }, + }, + ], +}; diff --git a/packages/database/src/lib/rid.ts b/packages/database/src/lib/rid.ts new file mode 100644 index 000000000..768ff6d8a --- /dev/null +++ b/packages/database/src/lib/rid.ts @@ -0,0 +1,38 @@ +// Express a pair of (spaceUri, sourceLocalId) as a single stable cross-app id +// (RID), and parse it back. Shared by Roam and Obsidian so both apps use one +// identity format for cross-app share / discovery / import / refresh. +// We follow https://github.com/BlockScience/rid-lib: +// Either a Web URL, with the last segment as the sourceLocalId; +// OR the format `orn:.:/` +// With the assumption that the sourceUri has the form : +// The subtype may be omitted. + +export const spaceUriAndLocalIdToRid = ( + spaceUri: string, + localId: string, + subtype?: string, +): string => { + if (spaceUri.startsWith("http")) return `${spaceUri}/${localId}`; + const parts = spaceUri.split(":"); + if (parts.length === 2) + return subtype + ? `orn:${parts[0]}.${subtype}:${parts[1]}/${localId}` + : `orn:${parts[0]}:${parts[1]}/${localId}`; + throw new Error("Unrecognized spaceUri"); +}; + +export const ridToSpaceUriAndLocalId = ( + rid: string, +): { spaceUri: string; sourceLocalId: string } => { + const m = rid.match(/^orn:(\w+)\.(\w+):(.*)\/([^/]+)$/); + if (m) { + return { spaceUri: `${m[1]}:${m[3]}`, sourceLocalId: m[4]! }; + } + const m2 = rid.match(/^orn:(\w+):(.*)\/([^/]+)$/); + if (m2) { + return { spaceUri: `${m2[1]}:${m2[2]}`, sourceLocalId: m2[3]! }; + } + const parts = rid.split("/"); + const sourceLocalId = parts.pop()!; + return { spaceUri: parts.join("/"), sourceLocalId }; +}; From 186cbec375a28e870fa682a47b85b5ecee66d76f Mon Sep 17 00:00:00 2001 From: sid597 Date: Fri, 19 Jun 2026 10:42:45 +0530 Subject: [PATCH 2/5] [ENG-1849] Add node-type schema dependency to cross-app node fixtures The cross-app node contract (ENG-1847) carries nodeType = { sourceLocalId, label } and the instance Concept references it via schema_represented_by_local_id, but the fixtures persisted no is_schema:true schema Concept for that id. Add the required schemaConcept (Roam Claim, Obsidian Evidence) carrying stable source identity + label only -- no source_data/format/color/tag, per the contract's "without redefining schema shape". Roam's existing 5-min sync already persists the schema Concept per contract (convertDgToSupabaseConcepts -> discourseNodeSchemaToLocalConcept; all types on initial sync, edited types incrementally via nodeTypeSince), so no Roam sync code change is needed for F4/F9/F13. Stacked on eng-1847 (PR #1129, unmerged). --- .../database/src/fixtures/crossAppNodes.ts | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/packages/database/src/fixtures/crossAppNodes.ts b/packages/database/src/fixtures/crossAppNodes.ts index b98ecbc13..a08f4faa6 100644 --- a/packages/database/src/fixtures/crossAppNodes.ts +++ b/packages/database/src/fixtures/crossAppNodes.ts @@ -12,15 +12,17 @@ import { spaceUriAndLocalIdToRid } from "../lib/rid"; * Reference fixtures for the cross-app node content contract (ENG-1847). * * Each fixture pairs the contract-level `CrossAppNode` with the existing - * `LocalConceptDataInput` + `LocalContentDataInput[]` it persists as — showing - * downstream Roam/Obsidian tickets exactly how the contract maps onto - * `upsert_concepts` / `upsert_content` without redefining the payload. The - * fixtures use the `space_url` / `author_local_id` string keys so they stay - * portable; the live source apps pass their resolved numeric `space_id` / - * `author_id` from `SupabaseContext` instead. + * persistence rows it maps onto — the node-type `schemaConcept` it depends on, + * the instance `concept`, and its `LocalContentDataInput[]` — showing downstream + * Roam/Obsidian tickets exactly how the contract maps onto `upsert_concepts` / + * `upsert_content` without redefining the payload. The fixtures use the + * `space_url` / `author_local_id` string keys so they stay portable; the live + * source apps pass their resolved numeric `space_id` / `author_id` from + * `SupabaseContext` instead. */ export type CrossAppNodeFixture = { node: CrossAppNode; + schemaConcept: LocalConceptDataInput; concept: LocalConceptDataInput; contents: LocalContentDataInput[]; }; @@ -52,6 +54,16 @@ export const roamOriginNode: CrossAppNodeFixture = { }, sourceModifiedAt: "2026-06-12T14:00:00.000Z", }, + schemaConcept: { + space_url: ROAM_SPACE_URL, + name: "Claim", + source_local_id: ROAM_CLAIM_SCHEMA_ID, + is_schema: true, + author_local_id: "roam-account-uid", + created: "2026-06-01T09:00:00.000Z", + last_modified: "2026-06-01T09:00:00.000Z", + literal_content: { label: "Claim" }, + }, concept: { space_url: ROAM_SPACE_URL, name: "Sleep improves memory consolidation", @@ -126,6 +138,16 @@ export const obsidianOriginNode: CrossAppNodeFixture = { }, sourceModifiedAt: "2026-06-14T10:30:00.000Z", }, + schemaConcept: { + space_url: OBSIDIAN_SPACE_URL, + name: "Evidence", + source_local_id: OBSIDIAN_EVD_SCHEMA_ID, + is_schema: true, + author_local_id: "obsidian-account-uid", + created: "2026-06-01T08:00:00.000Z", + last_modified: "2026-06-01T08:00:00.000Z", + literal_content: { label: "Evidence" }, + }, concept: { space_url: OBSIDIAN_SPACE_URL, name: OBSIDIAN_FILE_PATH, // Obsidian uses the file path as the Concept name From add4831df77fe82d6be5e232befd281f6fa985ea Mon Sep 17 00:00:00 2001 From: sid597 Date: Fri, 19 Jun 2026 14:04:20 +0530 Subject: [PATCH 3/5] [ENG-1848] Add Roam full markdown content variant for shared nodes --- .../convertRoamNodeToFullContent.fixture.ts | 58 +++++++++++++ .../src/utils/convertRoamNodeToFullContent.ts | 81 +++++++++++++++++++ apps/roam/src/utils/syncDgNodesToSupabase.ts | 8 +- 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 apps/roam/src/utils/convertRoamNodeToFullContent.fixture.ts create mode 100644 apps/roam/src/utils/convertRoamNodeToFullContent.ts diff --git a/apps/roam/src/utils/convertRoamNodeToFullContent.fixture.ts b/apps/roam/src/utils/convertRoamNodeToFullContent.fixture.ts new file mode 100644 index 000000000..df12c7afa --- /dev/null +++ b/apps/roam/src/utils/convertRoamNodeToFullContent.fixture.ts @@ -0,0 +1,58 @@ +import type { TreeNode } from "roamjs-components/types"; +import { + FULL_CONTENT_FORMAT, + type CrossAppNodeContent, +} from "@repo/database/crossAppNodeContract"; +import { buildFullMarkdown } from "./convertRoamNodeToFullContent"; + +/** + * Reference fixture for ENG-1848 ("tests or fixtures cover representative Roam + * block content becoming `full` markdown"). The Roam app has no unit-test + * runner, so this exercises the real `buildFullMarkdown` transform on an + * in-memory Roam page tree and types the result against the ENG-1847 contract's + * `full` variant. Downstream importer validation (ENG-1857) can assert against + * `roamClaimFullMarkdownFixture.full.value`, which evaluates to: + * + * # Sleep improves memory consolidation + * + * - Multiple studies show that sleep after learning strengthens memory traces. + * - Supporting evidence: + * - [[EVD]] - Rasch & Born 2013 + */ + +const block = (text: string, children: TreeNode[] = []): TreeNode => ({ + text, + children, + order: 0, + parents: [], + uid: "", + heading: 0, + open: true, + viewType: "bullet", + blockViewType: "outline", + editTime: new Date(0), + textAlign: "left", + props: { imageResize: {}, iframe: {} }, +}); + +const title = "Sleep improves memory consolidation"; + +const blocks: TreeNode[] = [ + block( + "Multiple studies show that sleep after learning strengthens memory traces.", + ), + block("Supporting evidence:", [block("[[EVD]] - Rasch & Born 2013")]), +]; + +export const roamClaimFullMarkdownFixture: { + title: string; + blocks: TreeNode[]; + full: CrossAppNodeContent["full"]; +} = { + title, + blocks, + full: { + format: FULL_CONTENT_FORMAT, + value: buildFullMarkdown({ title, blocks }), + }, +}; diff --git a/apps/roam/src/utils/convertRoamNodeToFullContent.ts b/apps/roam/src/utils/convertRoamNodeToFullContent.ts new file mode 100644 index 000000000..630a9bc74 --- /dev/null +++ b/apps/roam/src/utils/convertRoamNodeToFullContent.ts @@ -0,0 +1,81 @@ +import { toMarkdown } from "./pageToMarkdown"; +import { type RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; +import { type DiscourseNode } from "./getDiscourseNodes"; +import getFullTreeByParentUid from "roamjs-components/queries/getFullTreeByParentUid"; +import getPageViewType from "roamjs-components/queries/getPageViewType"; +import type { TreeNode, ViewType } from "roamjs-components/types"; +import type { LocalContentDataInput } from "@repo/database/inputTypes"; + +/** + * Builds the `full` cross-app content variant for Roam discourse nodes. + * + * Per the shared cross-app node contract (ENG-1847, + * `@repo/database/crossAppNodeContract`), every shared node must persist a + * `full` variant: a self-sufficient markdown body the destination app can + * materialize without querying Roam. Roam previously emitted only the `direct` + * title content; this fills that gap (ENG-1848, F2/F3). The body reuses the + * existing `toMarkdown` page serializer with block-refs and embeds inlined for + * self-sufficiency, prefixed with the node title as an H1 — matching the + * contract's Roam fixture. Known MVP0 markdown-fidelity limits live on F3. + */ + +const FULL_MARKDOWN_OPTS = { + refs: true, + embeds: true, + simplifiedFilename: false, + removeSpecialCharacters: false, + maxFilenameLength: 64, + linkType: "alias", + allNodes: [] as DiscourseNode[], +}; + +export const buildFullMarkdown = ({ + title, + blocks, + viewType = "bullet", +}: { + title: string; + blocks: TreeNode[]; + viewType?: ViewType; +}): string => { + const body = blocks + .filter((block) => !!block.text || !!block.children?.length) + .map((block) => + toMarkdown({ c: block, v: viewType, i: 0, opts: FULL_MARKDOWN_OPTS }), + ) + .join("\n") + .trim(); + return body ? `# ${title}\n\n${body}\n` : `# ${title}\n`; +}; + +export const convertRoamNodeToFullContent = ({ + nodes, +}: { + nodes: RoamDiscourseNodeData[]; +}): LocalContentDataInput[] => + nodes.flatMap((node) => { + try { + const title = node.node_title ?? node.text; + const blocks = getFullTreeByParentUid(node.source_local_id).children; + const viewType = getPageViewType(title) || "bullet"; + return [ + { + author_local_id: node.author_local_id, + source_local_id: node.source_local_id, + created: new Date(node.created || Date.now()).toISOString(), + last_modified: new Date( + node.last_modified || Date.now(), + ).toISOString(), + text: buildFullMarkdown({ title, blocks, viewType }), + variant: "full", + scale: "document", + }, + ]; + } catch (error) { + console.error( + `convertRoamNodeToFullContent: failed to build full markdown for ${node.source_local_id}:`, + error, + ); + return []; + } + }); diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index 69ab5d5cb..512e9bf23 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -18,6 +18,7 @@ import { } from "./conceptConversion"; import { fetchEmbeddingsForNodes } from "./upsertNodesAsContentWithEmbeddings"; import { convertRoamNodeToLocalContent } from "./upsertNodesAsContentWithEmbeddings"; +import { convertRoamNodeToFullContent } from "./convertRoamNodeToFullContent"; import type { DGSupabaseClient } from "@repo/database/lib/client"; import { intersection } from "@repo/utils/setOperations"; import type { Json, Enums } from "@repo/database/dbTypes"; @@ -618,6 +619,9 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( const allNodeInstancesAsLocalContent = convertRoamNodeToLocalContent({ nodes: roamNodes, }); + const fullContent = convertRoamNodeToFullContent({ + nodes: roamNodes, + }); let nodesWithEmbeddings: LocalContentDataInput[]; try { @@ -658,7 +662,9 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( } }; - await uploadBatches(chunk(nodesWithEmbeddings, BATCH_SIZE)); + await uploadBatches( + chunk([...nodesWithEmbeddings, ...fullContent], BATCH_SIZE), + ); }; const getAllUsers = async (): Promise => { From 87a0551e9ce14e17f883f9e9e0a42b20c0d3ca9e Mon Sep 17 00:00:00 2001 From: sid597 Date: Mon, 22 Jun 2026 02:36:23 +0530 Subject: [PATCH 4/5] [ENG-1849] Persist Roam schema labels --- apps/roam/src/utils/conceptConversion.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/roam/src/utils/conceptConversion.ts b/apps/roam/src/utils/conceptConversion.ts index 0851df84a..83abd1a14 100644 --- a/apps/roam/src/utils/conceptConversion.ts +++ b/apps/roam/src/utils/conceptConversion.ts @@ -78,17 +78,21 @@ export const discourseNodeSchemaToLocalConcept = ( node: DiscourseNode, ): LocalConceptDataInput => { const titleParts = node.text.split("/"); + const label = titleParts[titleParts.length - 1] ?? node.text; const result: LocalConceptDataInput = { space_id: context.spaceId, name: node.text, source_local_id: node.type, is_schema: true, + literal_content: { + label, + }, /* eslint-enable @typescript-eslint/naming-convention */ ...getNodeExtraData(node.type), }; if (node.template !== undefined) result.literal_content = { - label: titleParts[titleParts.length - 1], + label, template: templateToText(node.template), }; return result; From 554884cf5894595bae40a377be87e04fa77810fd Mon Sep 17 00:00:00 2001 From: sid597 Date: Mon, 22 Jun 2026 21:40:26 +0530 Subject: [PATCH 5/5] [ENG-1852] Keep Roam shared content fresh --- .../convertRoamNodeToFullContent.example.ts | 9 +- .../convertRoamNodeToFullContent.fixture.ts | 58 ---- ...ertRoamNodeToFullContent.simple.example.ts | 46 ++++ .../src/utils/convertRoamNodeToFullContent.ts | 12 +- apps/roam/src/utils/syncDgNodesToSupabase.ts | 259 +++++++++++++++--- 5 files changed, 281 insertions(+), 103 deletions(-) delete mode 100644 apps/roam/src/utils/convertRoamNodeToFullContent.fixture.ts create mode 100644 apps/roam/src/utils/convertRoamNodeToFullContent.simple.example.ts diff --git a/apps/roam/src/utils/convertRoamNodeToFullContent.example.ts b/apps/roam/src/utils/convertRoamNodeToFullContent.example.ts index 1e5c96310..8912d1a31 100644 --- a/apps/roam/src/utils/convertRoamNodeToFullContent.example.ts +++ b/apps/roam/src/utils/convertRoamNodeToFullContent.example.ts @@ -3,11 +3,10 @@ import type { CrossAppNode } from "@repo/database/crossAppNodeContract"; import { buildFullMarkdown } from "./convertRoamNodeToFullContent"; /** - * Typed example for ENG-1848 ("tests or fixtures cover representative Roam - * block content becoming `full` markdown"). This is not a concrete test; it - * documents the `tree.children` shape returned by `getFullTreeByParentUid` for - * a real Roam claim page and type-checks the generated markdown against the - * contract. + * Typed example for ENG-1848 full markdown coverage. This is not a concrete + * test; it documents the `tree.children` shape returned by + * `getFullTreeByParentUid` for a real Roam claim page and type-checks the + * generated markdown against the contract. * * Derived from: * https://roamresearch.com/#/app/plugin-testing-akamatsulab2/page/dnHNmYwe5 diff --git a/apps/roam/src/utils/convertRoamNodeToFullContent.fixture.ts b/apps/roam/src/utils/convertRoamNodeToFullContent.fixture.ts deleted file mode 100644 index df12c7afa..000000000 --- a/apps/roam/src/utils/convertRoamNodeToFullContent.fixture.ts +++ /dev/null @@ -1,58 +0,0 @@ -import type { TreeNode } from "roamjs-components/types"; -import { - FULL_CONTENT_FORMAT, - type CrossAppNodeContent, -} from "@repo/database/crossAppNodeContract"; -import { buildFullMarkdown } from "./convertRoamNodeToFullContent"; - -/** - * Reference fixture for ENG-1848 ("tests or fixtures cover representative Roam - * block content becoming `full` markdown"). The Roam app has no unit-test - * runner, so this exercises the real `buildFullMarkdown` transform on an - * in-memory Roam page tree and types the result against the ENG-1847 contract's - * `full` variant. Downstream importer validation (ENG-1857) can assert against - * `roamClaimFullMarkdownFixture.full.value`, which evaluates to: - * - * # Sleep improves memory consolidation - * - * - Multiple studies show that sleep after learning strengthens memory traces. - * - Supporting evidence: - * - [[EVD]] - Rasch & Born 2013 - */ - -const block = (text: string, children: TreeNode[] = []): TreeNode => ({ - text, - children, - order: 0, - parents: [], - uid: "", - heading: 0, - open: true, - viewType: "bullet", - blockViewType: "outline", - editTime: new Date(0), - textAlign: "left", - props: { imageResize: {}, iframe: {} }, -}); - -const title = "Sleep improves memory consolidation"; - -const blocks: TreeNode[] = [ - block( - "Multiple studies show that sleep after learning strengthens memory traces.", - ), - block("Supporting evidence:", [block("[[EVD]] - Rasch & Born 2013")]), -]; - -export const roamClaimFullMarkdownFixture: { - title: string; - blocks: TreeNode[]; - full: CrossAppNodeContent["full"]; -} = { - title, - blocks, - full: { - format: FULL_CONTENT_FORMAT, - value: buildFullMarkdown({ title, blocks }), - }, -}; diff --git a/apps/roam/src/utils/convertRoamNodeToFullContent.simple.example.ts b/apps/roam/src/utils/convertRoamNodeToFullContent.simple.example.ts new file mode 100644 index 000000000..9b56a2999 --- /dev/null +++ b/apps/roam/src/utils/convertRoamNodeToFullContent.simple.example.ts @@ -0,0 +1,46 @@ +import type { TreeNode } from "roamjs-components/types"; +import type { CrossAppNode } from "@repo/database/crossAppNodeContract"; +import { buildFullMarkdown } from "./convertRoamNodeToFullContent"; + +/** + * Small typed example for ENG-1848/ENG-1852 full markdown validation. The Roam + * app has no unit-test runner, so this keeps a compact in-memory tree that + * type-checks the generated markdown against the shared cross-app contract. + */ + +const block = (text: string, children: TreeNode[] = []): TreeNode => ({ + text, + children, + order: 0, + parents: [], + uid: "", + heading: 0, + open: true, + viewType: "bullet", + blockViewType: "outline", + editTime: new Date(0), + textAlign: "left", + props: { imageResize: {}, iframe: {} }, +}); + +const title = "Sleep improves memory consolidation"; + +const blocks: TreeNode[] = [ + block( + "Multiple studies show that sleep after learning strengthens memory traces.", + ), + block("Supporting evidence:", [block("[[EVD]] - Rasch & Born 2013")]), +]; + +export const roamClaimFullMarkdownSimpleExample: { + title: string; + blocks: TreeNode[]; + full: CrossAppNode["content"]["full"]; +} = { + title, + blocks, + full: { + format: "text/markdown", + value: buildFullMarkdown({ title, blocks }), + }, +}; diff --git a/apps/roam/src/utils/convertRoamNodeToFullContent.ts b/apps/roam/src/utils/convertRoamNodeToFullContent.ts index 88369bb98..3981f825d 100644 --- a/apps/roam/src/utils/convertRoamNodeToFullContent.ts +++ b/apps/roam/src/utils/convertRoamNodeToFullContent.ts @@ -1,11 +1,19 @@ import { toMarkdown } from "./pageToMarkdown"; -import { type RoamDiscourseNodeData } from "./getAllDiscourseNodesSince"; import { type DiscourseNode } from "./getDiscourseNodes"; import getFullTreeByParentUid from "roamjs-components/queries/getFullTreeByParentUid"; import getPageViewType from "roamjs-components/queries/getPageViewType"; import type { TreeNode, ViewType } from "roamjs-components/types"; import type { LocalContentDataInput } from "@repo/database/inputTypes"; +export type RoamFullContentNode = { + author_local_id: string; + source_local_id: string; + created: string | number; + last_modified: string | number; + text: string; + node_title?: string; +}; + const FULL_MARKDOWN_OPTS = { refs: true, embeds: true, @@ -38,7 +46,7 @@ export const buildFullMarkdown = ({ export const convertRoamNodeToFullContent = ({ nodes, }: { - nodes: RoamDiscourseNodeData[]; + nodes: RoamFullContentNode[]; }): LocalContentDataInput[] => nodes.flatMap((node) => { try { diff --git a/apps/roam/src/utils/syncDgNodesToSupabase.ts b/apps/roam/src/utils/syncDgNodesToSupabase.ts index aaa95db71..6e35963fe 100644 --- a/apps/roam/src/utils/syncDgNodesToSupabase.ts +++ b/apps/roam/src/utils/syncDgNodesToSupabase.ts @@ -3,6 +3,7 @@ import { getAllDiscourseNodesSince, nodeTypeSince, } from "./getAllDiscourseNodesSince"; +import getDiscourseNodeFormatExpression from "./getDiscourseNodeFormatExpression"; import { cleanupOrphanedNodes } from "./cleanupOrphanedNodes"; import { getLoggedInClient, @@ -18,7 +19,10 @@ import { } from "./conceptConversion"; import { fetchEmbeddingsForNodes } from "./upsertNodesAsContentWithEmbeddings"; import { convertRoamNodeToLocalContent } from "./upsertNodesAsContentWithEmbeddings"; -import { convertRoamNodeToFullContent } from "./convertRoamNodeToFullContent"; +import { + convertRoamNodeToFullContent, + type RoamFullContentNode, +} from "./convertRoamNodeToFullContent"; import type { DGSupabaseClient } from "@repo/database/lib/client"; import { intersection } from "@repo/utils/setOperations"; import type { Json, Enums } from "@repo/database/dbTypes"; @@ -42,6 +46,7 @@ const SYNC_TIMEOUT = "60s"; // must be less than half the SYNC_INTERVAL. const BATCH_SIZE = 200; const CONCEPT_BATCH_SIZE = 200; const END_SYNC_TASK_RESULT_VERSION = 1; +const DEFAULT_SYNC_TIME = new Date("1970-01-01").getTime(); type SyncPhaseDurations = Record; @@ -556,16 +561,29 @@ export const convertDgToSupabaseConcepts = async ({ nodesSince, since, allNodeTypes, + sharedNodeTypeIds = new Set(), supabaseClient, context, }: { nodesSince: RoamDiscourseNodeData[]; since: number | undefined; allNodeTypes: DiscourseNode[]; + sharedNodeTypeIds?: ReadonlySet; supabaseClient: DGSupabaseClient; context: SupabaseContext; }) => { - const nodeTypes = await nodeTypeSince(since, allNodeTypes); + const changedNodeTypes = await nodeTypeSince(since, allNodeTypes); + const nodeTypesByUid = new Map( + changedNodeTypes.map((nodeType) => [nodeType.type, nodeType]), + ); + + allNodeTypes.forEach((nodeType) => { + if (sharedNodeTypeIds.has(nodeType.type)) { + nodeTypesByUid.set(nodeType.type, nodeType); + } + }); + const nodeTypes = Array.from(nodeTypesByUid.values()); + await upsertNodeSchemaToContent({ nodeTypesUids: nodeTypes.map((node) => node.type), spaceId: context.spaceId, @@ -606,15 +624,44 @@ export const convertDgToSupabaseConcepts = async ({ }); }; +const uploadContentBatches = async ({ + content, + supabaseClient, + context, +}: { + content: LocalContentDataInput[]; + supabaseClient: DGSupabaseClient; + context: SupabaseContext; +}): Promise => { + if (content.length === 0) { + return; + } + + const batches = chunk(content, BATCH_SIZE); + + for (let idx = 0; idx < batches.length; idx++) { + const batch = batches[idx]; + + const { error } = await supabaseClient.rpc("upsert_content", { + data: batch as Json, + v_space_id: context.spaceId, + v_creator_id: context.userId, + content_as_document: true, + }); + + if (error) { + throw new Error(`upsert_content failed for batch ${idx + 1}`, { + cause: error, + }); + } + } +}; + export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( roamNodes: RoamDiscourseNodeData[], supabaseClient: DGSupabaseClient, context: SupabaseContext, - options: { includeFullContent?: boolean } = {}, ): Promise => { - const { userId } = context; - const { includeFullContent = false } = options; - if (roamNodes.length === 0) { return; } @@ -622,34 +669,6 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( nodes: roamNodes, }); - const uploadBatches = async ( - batches: LocalContentDataInput[][], - ): Promise => { - for (let idx = 0; idx < batches.length; idx++) { - const batch = batches[idx]; - - const { error } = await supabaseClient.rpc("upsert_content", { - data: batch as Json, - v_space_id: context.spaceId, - v_creator_id: userId, - content_as_document: true, - }); - - if (error) { - throw new Error(`upsert_content failed for batch ${idx + 1}`, { - cause: error, - }); - } - } - }; - - if (includeFullContent) { - const fullContent = convertRoamNodeToFullContent({ - nodes: roamNodes, - }); - await uploadBatches(chunk(fullContent, BATCH_SIZE)); - } - let nodesWithEmbeddings: LocalContentDataInput[]; try { nodesWithEmbeddings = await fetchEmbeddingsForNodes( @@ -672,7 +691,32 @@ export const upsertNodesToSupabaseAsContentWithEmbeddings = async ( ); } - await uploadBatches(chunk(nodesWithEmbeddings, BATCH_SIZE)); + await uploadContentBatches({ + content: nodesWithEmbeddings, + supabaseClient, + context, + }); +}; + +const upsertRoamNodesToSupabaseAsFullContent = async ({ + nodes, + supabaseClient, + context, +}: { + nodes: RoamFullContentNode[]; + supabaseClient: DGSupabaseClient; + context: SupabaseContext; +}): Promise => { + if (nodes.length === 0) { + return; + } + + const fullContent = convertRoamNodeToFullContent({ nodes }); + await uploadContentBatches({ + content: fullContent, + supabaseClient, + context, + }); }; const getAllUsers = async (): Promise => { @@ -782,6 +826,108 @@ const getAllMissingOrNewDiscourseNodes = async ({ ]; }; +const getSharedSourceLocalIds = async ({ + supabaseClient, + spaceId, +}: { + supabaseClient: DGSupabaseClient; + spaceId: number; +}): Promise> => { + const sharedResources = await getAllPages( + supabaseClient + .from("ResourceAccess") + .select("source_local_id") + .eq("space_id", spaceId) + .order("source_local_id") + .order("account_uid"), + 1000, + ); + + if (!Array.isArray(sharedResources)) throw sharedResources; + + return new Set(sharedResources.map((resource) => resource.source_local_id)); +}; + +type SharedFullContentUpdateRow = { + author_local_id: string; + source_local_id: string; + created: number; + node_edit_time: number; + page_edit_time: number; + text: string; +}; + +type SharedFullContentUpdate = { + fullContentNode: RoamFullContentNode; + nodeTypeId: string; +}; + +const getSharedRoamNodesWithFullContentUpdatesSince = async ({ + sourceLocalIds, + since, + nodeTypes, +}: { + sourceLocalIds: ReadonlySet; + since: number | undefined; + nodeTypes: DiscourseNode[]; +}): Promise => { + const sharedSourceLocalIds = Array.from(sourceLocalIds); + if (sharedSourceLocalIds.length === 0 || nodeTypes.length === 0) { + return []; + } + + const sinceMs = since ?? DEFAULT_SYNC_TIME; + const query = `[ + :find ?node-title ?uid ?nodeCreateTime ?nodeEditTime ?pageEditTime ?author_local_id + :keys text source_local_id created node_edit_time page_edit_time author_local_id + :in $ [?sharedUid ...] ?since + :where + [?node :block/uid ?sharedUid] + [?node :node/title ?node-title] + [?node :block/uid ?uid] + [?node :create/time ?nodeCreateTime] + [?node :create/user ?user-eid] + [?user-eid :user/uid ?author_local_id] + [(get-else $ ?node :edit/time ?nodeCreateTime) ?nodeEditTime] + [(get-else $ ?node :page/edit-time ?nodeEditTime) ?pageEditTime] + [or + [(> ?nodeEditTime ?since)] + [(> ?pageEditTime ?since)]] + ]`; + + const rows = (await window.roamAlphaAPI.data.backend.q( + query, + sharedSourceLocalIds, + sinceMs, + )) as unknown[] as SharedFullContentUpdateRow[]; + const typeMatchers = nodeTypes.map((node) => ({ + node, + regex: getDiscourseNodeFormatExpression(node.format), + })); + + return rows.flatMap((row) => { + const matchingNodeType = typeMatchers.find(({ regex }) => + regex.test(row.text), + )?.node; + if (matchingNodeType === undefined) { + return []; + } + + return [ + { + fullContentNode: { + author_local_id: row.author_local_id, + source_local_id: row.source_local_id, + created: row.created, + last_modified: Math.max(row.node_edit_time, row.page_edit_time), + text: row.text, + }, + nodeTypeId: matchingNodeType.type, + }, + ]; + }); +}; + export const createOrUpdateDiscourseEmbedding = async ( showToast = false, ): Promise => { @@ -899,7 +1045,7 @@ export const createOrUpdateDiscourseEmbedding = async ( (n) => n.backedBy === "user", ); - const allNodeInstances = await measureSyncPhase({ + const changedNodeInstances = await measureSyncPhase({ phase: isInitialSync ? "getAllMissingOrNewDiscourseNodes" : "getAllDiscourseNodesSince", @@ -914,6 +1060,32 @@ export const createOrUpdateDiscourseEmbedding = async ( }) : getAllDiscourseNodesSince(sinceTime, allDgNodeTypes), }); + const sharedSourceLocalIds = await measureSyncPhase({ + phase: "getSharedSourceLocalIds", + phases, + operation: () => + getSharedSourceLocalIds({ + supabaseClient: activeSupabaseClient, + spaceId: activeContext.spaceId, + }), + }); + const sharedFullContentUpdates = await measureSyncPhase({ + phase: "getSharedFullContentUpdates", + phases, + operation: () => + getSharedRoamNodesWithFullContentUpdatesSince({ + sourceLocalIds: sharedSourceLocalIds, + since: sinceTime, + nodeTypes: allDgNodeTypes, + }), + }); + const sharedFullContentNodes = sharedFullContentUpdates.map( + (update) => update.fullContentNode, + ); + const sharedNodeTypeIds = new Set( + sharedFullContentUpdates.map((update) => update.nodeTypeId), + ); + await measureSyncPhase({ phase: "upsertUsers", phases, @@ -925,19 +1097,30 @@ export const createOrUpdateDiscourseEmbedding = async ( phases, operation: () => upsertNodesToSupabaseAsContentWithEmbeddings( - allNodeInstances, + changedNodeInstances, activeSupabaseClient, activeContext, ), }); + await measureSyncPhase({ + phase: "upsertFullContent", + phases, + operation: () => + upsertRoamNodesToSupabaseAsFullContent({ + nodes: sharedFullContentNodes, + supabaseClient: activeSupabaseClient, + context: activeContext, + }), + }); await measureSyncPhase({ phase: "convertConcepts", phases, operation: () => convertDgToSupabaseConcepts({ - nodesSince: allNodeInstances, + nodesSince: changedNodeInstances, since: sinceTime, allNodeTypes: allDgNodeTypes, + sharedNodeTypeIds, supabaseClient: activeSupabaseClient, context: activeContext, }),