Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions scientific-terminology-unit-guard/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Scientific Terminology And Unit Guard

Self-contained SCIBASE real-time collaborative editor slice for issue #12. The guard checks whether collaborator edits keep scientific terminology, acronyms, units, and equation variables consistent before WYSIWYG, Markdown, LaTeX, or publication exports proceed.

## Why this slice is distinct

Existing #12 submissions cover broad editor foundations, operation replay, offline conflict resolution, notebook/kernel collaboration, reference formatting, round-trip export, accessibility parity, suggestion provenance, chat mentions, notification visibility, task dependencies, section locks, and figure/table review lanes. This module focuses only on shared scientific language consistency while multiple collaborators edit the same manuscript.

## Run

```bash
npm test
npm run demo
npm run demo:video
```

Demo artifacts are written to `reports/`, including JSON, Markdown, SVG, GIF, and MP4 files.
59 changes: 59 additions & 0 deletions scientific-terminology-unit-guard/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
const fs = require("fs");
const path = require("path");

const { assessTerminologyAndUnits } = require("./index");
const { cleanDocument, riskyDocument } = require("./sample-data");

const reportsDir = path.join(__dirname, "reports");
fs.mkdirSync(reportsDir, { recursive: true });

function markdownReport(name, report) {
const findings = report.findings.length
? report.findings
.map((item) => `- ${item.severity.toUpperCase()} ${item.code}: ${item.message}`)
.join("\n")
: "- No terminology or unit findings.";
return `# ${report.title}

Scenario: ${name}

Decision: ${report.decision.toUpperCase()}

Reviewed ${report.summary.blocksReviewed} document blocks and ${report.summary.suggestionsReviewed} collaborator suggestions.

## Findings

${findings}

## Release Criteria

${report.releaseCriteria.map((item) => `- ${item}`).join("\n")}
`;
}

function svgReport(report) {
const color = report.decision === "hold" ? "#dc2626" : report.decision === "revise" ? "#d97706" : "#16a34a";
return `<svg xmlns="http://www.w3.org/2000/svg" width="900" height="420" viewBox="0 0 900 420">
<rect width="900" height="420" fill="#111827"/>
<text x="42" y="70" fill="#f9fafb" font-family="Arial" font-size="34">Scientific Terminology Unit Guard</text>
<text x="42" y="118" fill="#cbd5e1" font-family="Arial" font-size="20">${report.documentId}</text>
<rect x="42" y="156" width="220" height="82" rx="8" fill="${color}"/>
<text x="68" y="207" fill="#fff" font-family="Arial" font-size="30">${report.decision.toUpperCase()}</text>
<text x="42" y="286" fill="#e5e7eb" font-family="Arial" font-size="22">Findings: ${report.summary.findings}</text>
<text x="42" y="326" fill="#fecaca" font-family="Arial" font-size="20">High: ${report.summary.high}</text>
<text x="172" y="326" fill="#fed7aa" font-family="Arial" font-size="20">Medium: ${report.summary.medium}</text>
<text x="342" y="326" fill="#bfdbfe" font-family="Arial" font-size="20">Low: ${report.summary.low}</text>
<text x="42" y="372" fill="#9ca3af" font-family="Arial" font-size="18">Synthetic collaborative manuscript data only.</text>
</svg>`;
}

for (const [name, document] of [
["clean-document", cleanDocument],
["risky-document", riskyDocument],
]) {
const report = assessTerminologyAndUnits(document);
fs.writeFileSync(path.join(reportsDir, `${name}.json`), JSON.stringify(report, null, 2));
fs.writeFileSync(path.join(reportsDir, `${name}.md`), markdownReport(name, report));
fs.writeFileSync(path.join(reportsDir, `${name}.svg`), svgReport(report));
console.log(`${name}: ${report.decision} (${report.summary.findings} findings)`);
}
46 changes: 46 additions & 0 deletions scientific-terminology-unit-guard/demo_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from pathlib import Path

import imageio.v3 as iio
import numpy as np
from PIL import Image, ImageDraw, ImageFont


ROOT = Path(__file__).resolve().parent
REPORTS = ROOT / "reports"
REPORTS.mkdir(exist_ok=True)


def font(size):
for name in ("arial.ttf", "segoeui.ttf"):
try:
return ImageFont.truetype(name, size)
except OSError:
pass
return ImageFont.load_default()


slides = [
("Terminology + Unit Guard", "Real-time collaborative editor #12"),
("Checks", "glossary drift + acronym first-use gaps"),
("Checks", "unit conflicts + equation variable mismatch"),
("Decision", "hold publication export until shared language is consistent"),
]

frames = []
for index, (title, subtitle) in enumerate(slides, start=1):
image = Image.new("RGB", (960, 544), "#172033")
draw = ImageDraw.Draw(image)
draw.rectangle((46, 54, 914, 490), outline="#a3e635", width=3)
draw.text((82, 124), title, fill="#f8fafc", font=font(42))
draw.text((82, 206), subtitle, fill="#ecfccb", font=font(26))
draw.rectangle((82, 326, 742, 382), fill="#365314")
draw.text((104, 342), "collaborator suggestions cannot introduce scientific drift", fill="#f7fee7", font=font(22))
draw.text((82, 438), f"Slide {index}/4 - synthetic reviewer artifact", fill="#cbd5e1", font=font(20))
frames.extend([image] * 14)

gif_path = REPORTS / "demo.gif"
mp4_path = REPORTS / "demo.mp4"
frames[0].save(gif_path, save_all=True, append_images=frames[1:], duration=120, loop=0)
iio.imwrite(mp4_path, [np.asarray(frame) for frame in frames], fps=8, codec="libx264")
print(f"wrote {gif_path}")
print(f"wrote {mp4_path}")
212 changes: 212 additions & 0 deletions scientific-terminology-unit-guard/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
const HIGH = "high";
const MEDIUM = "medium";
const LOW = "low";

function requireString(value, field) {
if (typeof value !== "string" || value.trim() === "") {
throw new TypeError(`${field} must be a non-empty string`);
}
return value.trim();
}

function list(value, field) {
if (!Array.isArray(value)) {
throw new TypeError(`${field} must be an array`);
}
return value;
}

function normalizeToken(value) {
return String(value || "").trim();
}

function normalizeDocument(raw) {
return {
documentId: requireString(raw.documentId, "documentId"),
title: requireString(raw.title, "title"),
glossary: raw.glossary || {},
units: raw.units || {},
variables: raw.variables || {},
blocks: list(raw.blocks || [], "blocks").map((block) => ({
id: requireString(block.id, "block.id"),
author: requireString(block.author, "block.author"),
text: String(block.text || ""),
acronyms: list(block.acronyms || [], "block.acronyms"),
units: list(block.units || [], "block.units"),
variables: list(block.variables || [], "block.variables"),
})),
suggestions: list(raw.suggestions || [], "suggestions").map((suggestion) => ({
id: requireString(suggestion.id, "suggestion.id"),
author: requireString(suggestion.author, "suggestion.author"),
targetBlock: requireString(suggestion.targetBlock, "suggestion.targetBlock"),
text: String(suggestion.text || ""),
acronyms: list(suggestion.acronyms || [], "suggestion.acronyms"),
units: list(suggestion.units || [], "suggestion.units"),
variables: list(suggestion.variables || [], "suggestion.variables"),
})),
};
}

function finding(code, severity, source, message, remediation) {
return {
code,
severity,
sourceId: source.id,
author: source.author,
message,
remediation,
};
}

function analyzeSource(doc, source, findings) {
for (const item of source.acronyms) {
const short = normalizeToken(item.short);
const expected = normalizeToken(doc.glossary[short]);
const observed = normalizeToken(item.long);
if (!short) {
continue;
}
if (!expected) {
findings.push(
finding(
"UNKNOWN_ACRONYM",
MEDIUM,
source,
`${short} is used but is absent from the shared glossary.`,
"Add the acronym to the shared glossary or replace it before publication export."
)
);
} else if (!observed) {
findings.push(
finding(
"MISSING_FIRST_USE_EXPANSION",
MEDIUM,
source,
`${short} appears without its first-use expansion.`,
`Expand ${short} as "${expected}" at first use in the collaborative document.`
)
);
} else if (observed.toLowerCase() !== expected.toLowerCase()) {
findings.push(
finding(
"ACRONYM_DRIFT",
HIGH,
source,
`${short} is defined as "${observed}" but glossary expects "${expected}".`,
"Resolve the conflicting expansion in the suggestion or block before accepting changes."
)
);
}
}

for (const item of source.units) {
const quantity = normalizeToken(item.quantity);
const unit = normalizeToken(item.unit);
const expected = normalizeToken(doc.units[quantity]);
if (!expected) {
findings.push(
finding(
"UNKNOWN_QUANTITY_UNIT",
LOW,
source,
`${quantity} has unit "${unit}" but no canonical unit is registered.`,
"Register a canonical unit or mark the quantity as intentionally free-form."
)
);
} else if (unit !== expected) {
findings.push(
finding(
"UNIT_CONFLICT",
HIGH,
source,
`${quantity} uses "${unit}" but canonical unit is "${expected}".`,
"Convert or annotate the value before export so collaborators compare like with like."
)
);
}
}

for (const item of source.variables) {
const symbol = normalizeToken(item.symbol);
const meaning = normalizeToken(item.meaning);
const expected = normalizeToken(doc.variables[symbol]);
if (!expected) {
findings.push(
finding(
"UNREGISTERED_VARIABLE",
LOW,
source,
`${symbol} is used in an equation context without a shared definition.`,
"Add the variable to the equation legend before accepting the edit."
)
);
} else if (meaning.toLowerCase() !== expected.toLowerCase()) {
findings.push(
finding(
"VARIABLE_MEANING_DRIFT",
HIGH,
source,
`${symbol} means "${meaning}" here but shared definition is "${expected}".`,
"Resolve the variable definition mismatch before rendering equations or exports."
)
);
}
}
}

function assessTerminologyAndUnits(rawDocument) {
const doc = normalizeDocument(rawDocument);
const findings = [];

for (const block of doc.blocks) {
analyzeSource(doc, block, findings);
}
for (const suggestion of doc.suggestions) {
analyzeSource(doc, suggestion, findings);
}

const sourceIds = new Set([...doc.blocks, ...doc.suggestions].map((item) => item.id));
for (const suggestion of doc.suggestions) {
if (!sourceIds.has(suggestion.targetBlock)) {
findings.push(
finding(
"ORPHAN_SUGGESTION_TARGET",
MEDIUM,
suggestion,
`Suggestion targets missing block ${suggestion.targetBlock}.`,
"Retarget or close the suggestion before merging collaborative edits."
)
);
}
}

const high = findings.filter((item) => item.severity === HIGH).length;
const medium = findings.filter((item) => item.severity === MEDIUM).length;
const decision = high > 0 ? "hold" : medium > 0 ? "revise" : "release";

return {
documentId: doc.documentId,
title: doc.title,
decision,
summary: {
blocksReviewed: doc.blocks.length,
suggestionsReviewed: doc.suggestions.length,
findings: findings.length,
high,
medium,
low: findings.filter((item) => item.severity === LOW).length,
},
findings,
releaseCriteria: [
"Shared glossary acronyms keep one expansion across all blocks and suggestions.",
"Scientific quantities use the canonical unit selected for the collaborative document.",
"Equation variables keep one meaning before WYSIWYG, Markdown, LaTeX, or export render.",
"Suggestions cannot introduce terminology drift while being accepted into the manuscript.",
],
};
}

module.exports = {
assessTerminologyAndUnits,
normalizeDocument,
};
13 changes: 13 additions & 0 deletions scientific-terminology-unit-guard/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"name": "scientific-terminology-unit-guard",
"version": "1.0.0",
"description": "Collaborative terminology and unit consistency guard for SCIBASE editor issue #12",
"main": "index.js",
"type": "commonjs",
"scripts": {
"test": "node test.js",
"demo": "node demo.js",
"demo:video": "python demo_video.py"
},
"license": "MIT"
}
20 changes: 20 additions & 0 deletions scientific-terminology-unit-guard/reports/clean-document.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"documentId": "editor-term-001",
"title": "Collaborative catalyst manuscript",
"decision": "release",
"summary": {
"blocksReviewed": 2,
"suggestionsReviewed": 1,
"findings": 0,
"high": 0,
"medium": 0,
"low": 0
},
"findings": [],
"releaseCriteria": [
"Shared glossary acronyms keep one expansion across all blocks and suggestions.",
"Scientific quantities use the canonical unit selected for the collaborative document.",
"Equation variables keep one meaning before WYSIWYG, Markdown, LaTeX, or export render.",
"Suggestions cannot introduce terminology drift while being accepted into the manuscript."
]
}
18 changes: 18 additions & 0 deletions scientific-terminology-unit-guard/reports/clean-document.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Collaborative catalyst manuscript

Scenario: clean-document

Decision: RELEASE

Reviewed 2 document blocks and 1 collaborator suggestions.

## Findings

- No terminology or unit findings.

## Release Criteria

- Shared glossary acronyms keep one expansion across all blocks and suggestions.
- Scientific quantities use the canonical unit selected for the collaborative document.
- Equation variables keep one meaning before WYSIWYG, Markdown, LaTeX, or export render.
- Suggestions cannot introduce terminology drift while being accepted into the manuscript.
Loading