diff --git a/docs/mcp-pilot-guide.md b/docs/mcp-pilot-guide.md
index 9fbaa55b..45dd7502 100644
--- a/docs/mcp-pilot-guide.md
+++ b/docs/mcp-pilot-guide.md
@@ -445,6 +445,14 @@ NitroX is Provar's Hybrid Model for locators — it maps Salesforce component-ba
**Background:** A regression in 1.5.0 (PDX-479) traced to authoring guidance that steered LLMs toward a per-step construction pattern. Multi-call construction drops scenario numbers (e.g. Scenario 1 → Scenario 3, no Scenario 2), flattens asserts that should be nested inside `UiWithScreen` clauses, and produces inconsistent assert API IDs across the case. This scenario exists so the regression class is exercised in pilot evaluation and cannot recur silently.
+**Defense in depth.** Three layers protect against the multi-call construction pattern:
+
+1. **Prompt and resource guidance** — authoring prompts and the MCP step-reference resource describe single-call construction as the contract.
+2. **Tool-description contract** — `provar_testcase_generate` and `provar_testcase_step_edit` descriptions explicitly mark generate as constructor-only and step_edit as amendment-only, so the LLM reads the contract at every call site (including compact schema mode).
+3. **Runtime guard** — `provar_testcase_generate` rejects the exact shape that would produce a skeleton-only file: `steps:[]` + `dry_run:false` + `output_path`. The rejection returns `STEPS_REQUIRED` with `details.suggestion` telling the LLM to pass the full step tree in one call. Empty-steps shapes that don't write a file (dry-run preview, no `output_path`) remain allowed.
+
+If a pilot LLM falls into the multi-call pattern despite the description contract, the runtime guard converts the failure into an actionable error rather than a silently broken file on disk.
+
**Prompt:**
> "Create a Provar test case `AccountFlow.testcase` that covers three scenarios:
diff --git a/docs/mcp.md b/docs/mcp.md
index 7a08fed2..45a2ec51 100644
--- a/docs/mcp.md
+++ b/docs/mcp.md
@@ -765,10 +765,22 @@ AssertValues uses **flat** argument structure (`expectedValue`, `actualValue`, `
**Error codes**
-| Code | Meaning |
-| ------------------ | --------------------------------------------------------------------- |
-| `TESTCASE_INVALID` | Generated XML failed structural validation (see `details.validation`) |
-| `FILE_EXISTS` | `output_path` already exists and `overwrite=false` |
+| Code | Meaning |
+| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `TESTCASE_INVALID` | Generated XML failed structural validation (see `details.validation`) |
+| `FILE_EXISTS` | `output_path` already exists and `overwrite=false` |
+| `STEPS_REQUIRED` | Called with `steps:[]` + `dry_run:false` + `output_path` — constructing a test case requires the full step tree on the write path. `details.suggestion` tells the caller how to fix. |
+
+**`STEPS_REQUIRED`.** The rejected shape is `steps:[]` + `dry_run:false` + `output_path`. Constructing a test case requires the full step tree in a single call; passing an empty array on the write path would produce a skeleton-only file. All other empty-steps shapes remain allowed:
+
+| `steps.length` | `dry_run` | `output_path` | Result |
+| -------------- | ------------- | ------------- | ------------------------------------------------------- |
+| 0 | `true` | any | Allowed — preserves skeleton inspection / IDE preview |
+| 0 | `false` | absent | Allowed — no file would be written anyway |
+| 0 | `false` | **present** | **Rejected** with `STEPS_REQUIRED` (no file is written) |
+| ≥ 1 | true or false | any | Allowed — normal happy path |
+
+`details.suggestion` instructs the caller to pass the FULL step tree in a single call, clarifies that `provar_testcase_step_edit` is for amendment-only, and notes the `dry_run=true` escape hatch for skeleton inspection.
---
diff --git a/scripts/mcp-smoke.cjs b/scripts/mcp-smoke.cjs
index 7d138dc1..50cf628d 100644
--- a/scripts/mcp-smoke.cjs
+++ b/scripts/mcp-smoke.cjs
@@ -177,6 +177,19 @@ async function runTests() {
dry_run: true,
});
+ // ── 6b. provar_testcase_generate STEPS_REQUIRED runtime guard (PDX-483) ───
+ // Drives the rejected shape (steps:[] + dry_run:false + output_path) so the
+ // PDX-479 regression-class shape is exercised on every smoke run. The smoke
+ // framework counts any JSON-RPC response as PASS; the assertion that the
+ // body carries error_code='STEPS_REQUIRED' lives in scripts/pdx-482-validate.cjs.
+ if (inGroup('authoring'))
+ await callTool('provar_testcase_generate', {
+ test_case_name: 'PDX-483 Guard Smoke',
+ steps: [],
+ dry_run: false,
+ output_path: path.join(TMP, 'pdx483-smoke-rejected.testcase'),
+ });
+
// ── 7. provar_testcase_validate ───────────────────────────────────────────
if (inGroup('validation')) await callTool('provar_testcase_validate', { content: '' });
diff --git a/scripts/pdx-482-validate.cjs b/scripts/pdx-482-validate.cjs
index 5b4e5473..8685e9d6 100644
--- a/scripts/pdx-482-validate.cjs
+++ b/scripts/pdx-482-validate.cjs
@@ -1,16 +1,26 @@
-// PDX-482 validation: confirm the construct/amend contract is reachable at the
-// MCP protocol surface in BOTH standard and compact schema modes.
+// PDX-482 / PDX-483 validation: confirm the construct/amend contract is reachable
+// at the MCP protocol surface and that the PDX-483 runtime guard rejects the
+// PDX-479 multi-call pattern shape.
//
-// The LLM reads tools/list before every tool call, so every assertion here is
-// on bytes the LLM literally sees at the call site. Compact mode coverage is
-// critical because the adversarial review identified that PROVAR_MCP_SCHEMA_MODE=compact
-// silently swapped the description for a contract-free one-liner.
+// PDX-482 (standard + compact modes): assertions on tools/list — every byte the
+// LLM literally sees at the call site. Compact mode coverage is critical because
+// the adversarial review identified that PROVAR_MCP_SCHEMA_MODE=compact silently
+// swapped the description for a contract-free one-liner.
+//
+// PDX-483 (runtime-guard mode): drives a real tools/call with the rejected shape
+// (steps:[]+dry_run:false+output_path) and asserts the response is a structured
+// STEPS_REQUIRED error with a non-empty details.suggestion. This catches a
+// regression class that the tools/list assertions cannot reach: the passive
+// contract surviving in the description while the active guard silently
+// regresses (e.g. a refactor reorders the handler so writes happen before the
+// check).
//
// yarn compile
// node scripts/pdx-482-validate.cjs
'use strict';
+const fs = require('fs');
const { spawn } = require('child_process');
const os = require('os');
const path = require('path');
@@ -238,13 +248,156 @@ function compactAssertions(toolList, record) {
}
}
+// ── PDX-483 runtime guard: tools/call assertion ─────────────────────────────
+// Drives a real tools/call(provar_testcase_generate, ...) with the rejected
+// shape (steps:[] + dry_run:false + output_path) and asserts the response is
+// a structured STEPS_REQUIRED error. This is the only check that catches a
+// silent regression where the passive description survives but the active
+// runtime guard is removed or reordered after a side effect.
+function runRuntimeGuardValidation() {
+ return new Promise((resolve, reject) => {
+ const server = spawn(process.execPath, [entry, 'mcp', 'start', '--allowed-paths', TMP, '--no-update-check'], {
+ stdio: ['pipe', 'pipe', 'inherit'],
+ env: { ...process.env },
+ });
+
+ let nextId = 1;
+ const pending = new Map();
+ let buf = '';
+
+ server.stdout.on('data', (chunk) => {
+ buf += chunk.toString('utf-8');
+ let nl;
+ while ((nl = buf.indexOf('\n')) !== -1) {
+ const line = buf.slice(0, nl).trim();
+ buf = buf.slice(nl + 1);
+ if (!line) continue;
+ try {
+ const msg = JSON.parse(line);
+ const cb = pending.get(msg.id);
+ if (cb) {
+ pending.delete(msg.id);
+ cb(msg);
+ }
+ } catch {
+ /* ignore */
+ }
+ }
+ });
+
+ const rpc = (method, params) => {
+ const id = nextId++;
+ const req = JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n';
+ return new Promise((rpcResolve, rpcReject) => {
+ pending.set(id, rpcResolve);
+ setTimeout(() => {
+ if (pending.has(id)) {
+ pending.delete(id);
+ rpcReject(new Error(`Timeout waiting for ${method}`));
+ }
+ }, 10000);
+ server.stdin.write(req);
+ });
+ };
+
+ const results = [];
+ const record = (label, ok, detail) => {
+ results.push({ label: `[runtime-guard] ${label}`, ok, detail });
+ };
+
+ (async () => {
+ await rpc('initialize', {
+ protocolVersion: '2024-11-05',
+ capabilities: {},
+ clientInfo: { name: 'pdx-483-validate', version: '1.0.0' },
+ });
+
+ // Use a unique tmp path so a leftover file from a prior run can't mask the assertion.
+ const outPath = path.join(TMP, `pdx483-validate-${Date.now()}.testcase`);
+ try {
+ if (fs.existsSync(outPath)) fs.unlinkSync(outPath);
+ } catch {
+ /* best-effort */
+ }
+
+ const callRes = await rpc('tools/call', {
+ name: 'provar_testcase_generate',
+ arguments: {
+ test_case_name: 'PDX-483 validate',
+ steps: [],
+ dry_run: false,
+ output_path: outPath,
+ },
+ });
+
+ // MCP tools/call returns { result: { content: [{ type, text }], isError? } }.
+ // The tool's error body is JSON-encoded in content[0].text.
+ const result = callRes.result;
+ record(
+ 'tools/call returned a result (no protocol-level error)',
+ !!result && !callRes.error,
+ callRes.error ? JSON.stringify(callRes.error).slice(0, 120) : 'protocol OK'
+ );
+ record(
+ 'result.isError === true (tool-level rejection)',
+ result?.isError === true,
+ `isError: ${String(result?.isError)} — rejection must surface at content level`
+ );
+
+ let body = null;
+ try {
+ body = JSON.parse(result?.content?.[0]?.text ?? '{}');
+ } catch (parseErr) {
+ record('content[0].text parses as JSON', false, parseErr.message);
+ }
+ record(
+ 'error_code === "STEPS_REQUIRED"',
+ body?.error_code === 'STEPS_REQUIRED',
+ `error_code: ${body?.error_code} — must match the documented code from docs/mcp.md`
+ );
+ record(
+ 'retryable === false',
+ body?.retryable === false,
+ 'STEPS_REQUIRED is a contract violation — retrying with the same payload would never succeed'
+ );
+ record(
+ 'details.suggestion is a non-empty string',
+ typeof body?.details?.suggestion === 'string' && body.details.suggestion.length > 0,
+ 'details.suggestion must tell the LLM how to self-correct (canonical multi-call rejection text)'
+ );
+ record(
+ 'details.suggestion mentions "FULL step tree"',
+ typeof body?.details?.suggestion === 'string' && body.details.suggestion.includes('FULL step tree'),
+ 'suggestion must point the LLM at the single-call pattern'
+ );
+ record(
+ 'details.suggestion mentions dry_run=true escape hatch',
+ typeof body?.details?.suggestion === 'string' && body.details.suggestion.includes('dry_run=true'),
+ 'suggestion must mention dry_run=true for legitimate skeleton-inspection callers'
+ );
+ record(
+ 'no file written at output_path (zero side effects)',
+ !fs.existsSync(outPath),
+ 'STEPS_REQUIRED must run BEFORE fs.writeFileSync — no skeleton on disk'
+ );
+
+ server.stdin.end();
+ resolve(results);
+ })().catch((err) => {
+ server.kill();
+ reject(err);
+ });
+ });
+}
+
(async () => {
const standardResults = await runValidation('standard', {}, standardAssertions);
// Explicitly null out the env var on the standard pass to ensure no leakage.
// For compact, set PROVAR_MCP_SCHEMA_MODE=compact via the spawn env.
const compactResults = await runValidation('compact', { PROVAR_MCP_SCHEMA_MODE: 'compact' }, compactAssertions);
+ const runtimeGuardResults = await runRuntimeGuardValidation();
- const allResults = [...standardResults, ...compactResults];
+ const allResults = [...standardResults, ...compactResults, ...runtimeGuardResults];
let pass = 0;
let fail = 0;
@@ -256,7 +409,7 @@ function compactAssertions(toolList, record) {
fail++;
}
}
- console.log(`\nPDX-482 validation: ${pass} passed, ${fail} failed`);
+ console.log(`\nPDX-482/PDX-483 validation: ${pass} passed, ${fail} failed`);
process.exit(fail > 0 ? 1 : 0);
})().catch((err) => {
console.error('Validation script error:', err);
diff --git a/src/mcp/tools/testCaseGenerate.ts b/src/mcp/tools/testCaseGenerate.ts
index 4d7d64dc..3249cb3d 100644
--- a/src/mcp/tools/testCaseGenerate.ts
+++ b/src/mcp/tools/testCaseGenerate.ts
@@ -256,6 +256,32 @@ export function registerTestCaseGenerate(server: McpServer, config: ServerConfig
target_uri: input.target_uri,
});
+ // PDX-483: active runtime guard for the PDX-479 regression pattern.
+ // Rejects the exact shape that produces a contract-violating skeleton on
+ // disk: empty steps[] + non-dry-run + persistence target. Other empty-
+ // steps shapes (dry_run preview, no output_path) remain allowed.
+ if (input.steps.length === 0 && !input.dry_run && input.output_path) {
+ const err = makeError(
+ 'STEPS_REQUIRED',
+ 'provar_testcase_generate was called with an empty steps[] array and a target output_path. ' +
+ 'This produces a contract-violating skeleton (the PDX-479 regression pattern) and is rejected.',
+ requestId,
+ false,
+ {
+ suggestion:
+ 'Pass the FULL step tree to provar_testcase_generate in a single call. ' +
+ 'provar_testcase_step_edit is for amending an already-validated test case ' +
+ '(single-step add, attribute fix, debug edit), not for constructing one from scratch. ' +
+ 'If you genuinely want a skeleton for inspection, set dry_run=true.',
+ }
+ );
+ log('warn', 'provar_testcase_generate: STEPS_REQUIRED', {
+ requestId,
+ output_path: input.output_path,
+ });
+ return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(err) }] };
+ }
+
try {
const xmlContent = buildTestCaseXml(input);
const filePath: string | undefined = input.output_path ? path.resolve(input.output_path) : undefined;
diff --git a/test/unit/mcp/testCaseGenerate.test.ts b/test/unit/mcp/testCaseGenerate.test.ts
index b2049463..af65578d 100644
--- a/test/unit/mcp/testCaseGenerate.test.ts
+++ b/test/unit/mcp/testCaseGenerate.test.ts
@@ -382,11 +382,16 @@ describe('provar_testcase_generate', () => {
});
describe('writing to disk', () => {
+ // Each disk-write test uses a non-empty steps[] so the PDX-483 STEPS_REQUIRED
+ // guard (which rejects steps:[]+dry_run:false+output_path) does not fire.
+ // These tests assert *other* behaviour: file write, overwrite, mkdirp, path policy.
+ const SMOKE_STEPS = [{ api_id: 'UiConnect', name: 'Connect', attributes: {} }];
+
it('writes file when dry_run=false and output_path provided', () => {
const outPath = path.join(tmpDir, 'Login.testcase');
const result = server.call('provar_testcase_generate', {
test_case_name: 'Login',
- steps: [],
+ steps: SMOKE_STEPS,
output_path: outPath,
dry_run: false,
overwrite: false,
@@ -415,7 +420,7 @@ describe('provar_testcase_generate', () => {
const result = server.call('provar_testcase_generate', {
test_case_name: 'Existing',
- steps: [],
+ steps: SMOKE_STEPS,
output_path: outPath,
dry_run: false,
overwrite: false,
@@ -431,7 +436,7 @@ describe('provar_testcase_generate', () => {
const result = server.call('provar_testcase_generate', {
test_case_name: 'Existing',
- steps: [],
+ steps: SMOKE_STEPS,
output_path: outPath,
dry_run: false,
overwrite: true,
@@ -446,7 +451,7 @@ describe('provar_testcase_generate', () => {
const outPath = path.join(tmpDir, 'tests', 'suite', 'Login.testcase');
server.call('provar_testcase_generate', {
test_case_name: 'Login',
- steps: [],
+ steps: SMOKE_STEPS,
output_path: outPath,
dry_run: false,
overwrite: false,
@@ -456,14 +461,144 @@ describe('provar_testcase_generate', () => {
});
});
+ // ── PDX-483 runtime guard: reject empty steps[] on non-dry-run with output_path ──
+ // The PDX-479 regression class arose from agents calling generate with steps:[]
+ // intending to append later via step_edit. The passive contract (PDX-482) lives in
+ // the description; the active runtime guard rejects the exact shape that produces
+ // a contract-violating file on disk. The 6 edge cases below pin down which empty-
+ // steps shapes are allowed (dry-run preview, inspection-only) vs rejected (file write).
+ describe('STEPS_REQUIRED runtime guard (PDX-483)', () => {
+ const SINGLE_STEP = [{ api_id: 'UiConnect', name: 'Connect', attributes: {} }];
+
+ it('allows steps:[] + dry_run:true + no output_path (skeleton inspection)', () => {
+ const result = server.call('provar_testcase_generate', {
+ test_case_name: 'Skeleton Inspect',
+ steps: [],
+ dry_run: true,
+ overwrite: false,
+ });
+ assert.equal(isError(result), false, 'dry-run skeleton inspection must remain allowed');
+ assert.equal(parseText(result)['written'], false);
+ });
+
+ it('allows steps:[] + dry_run:true + output_path provided (dry-run preview wins)', () => {
+ const outPath = path.join(tmpDir, 'DryRunWithPath.testcase');
+ const result = server.call('provar_testcase_generate', {
+ test_case_name: 'DryRun With Path',
+ steps: [],
+ output_path: outPath,
+ dry_run: true,
+ overwrite: false,
+ });
+ assert.equal(isError(result), false, 'dry-run wins over output_path — no file is written');
+ assert.equal(fs.existsSync(outPath), false, 'file must not be written in dry_run mode');
+ });
+
+ it('allows steps:[] + dry_run:false + no output_path (no persistence target)', () => {
+ const result = server.call('provar_testcase_generate', {
+ test_case_name: 'No Output Path',
+ steps: [],
+ dry_run: false,
+ overwrite: false,
+ });
+ assert.equal(isError(result), false, 'no output_path means no file write — TODO-only XML is harmless');
+ assert.equal(parseText(result)['written'], false);
+ });
+
+ it('REJECTS steps:[] + dry_run:false + output_path with STEPS_REQUIRED', () => {
+ const outPath = path.join(tmpDir, 'Empty.testcase');
+ const result = server.call('provar_testcase_generate', {
+ test_case_name: 'Empty Build',
+ steps: [],
+ output_path: outPath,
+ dry_run: false,
+ overwrite: false,
+ });
+ assert.equal(isError(result), true, 'multi-call construction pattern must be rejected');
+ const body = parseText(result);
+ assert.equal(body['error_code'], 'STEPS_REQUIRED');
+ assert.equal(body['retryable'], false);
+ const details = body['details'] as Record;
+ assert.ok(details, 'error must include details');
+ const suggestion = details['suggestion'];
+ assert.ok(typeof suggestion === 'string', 'details.suggestion must be a string');
+ assert.ok(suggestion.length > 0, 'details.suggestion must be non-empty');
+ assert.ok(
+ suggestion.includes('FULL step tree'),
+ 'suggestion must instruct passing the FULL step tree in a single call'
+ );
+ assert.ok(
+ suggestion.includes('dry_run=true'),
+ 'suggestion must mention the dry_run=true escape hatch for skeleton inspection'
+ );
+ });
+
+ it('STEPS_REQUIRED rejection writes NO file (assertion: fs.existsSync === false)', () => {
+ const outPath = path.join(tmpDir, 'NeverWritten.testcase');
+ server.call('provar_testcase_generate', {
+ test_case_name: 'Never Written',
+ steps: [],
+ output_path: outPath,
+ dry_run: false,
+ overwrite: false,
+ });
+ assert.equal(
+ fs.existsSync(outPath),
+ false,
+ 'STEPS_REQUIRED rejection must run BEFORE fs.writeFileSync — no skeleton on disk'
+ );
+ });
+
+ it('allows non-empty steps + dry_run:false + output_path (happy path — normal write)', () => {
+ const outPath = path.join(tmpDir, 'HappyPath.testcase');
+ const result = server.call('provar_testcase_generate', {
+ test_case_name: 'Happy Path',
+ steps: SINGLE_STEP,
+ output_path: outPath,
+ dry_run: false,
+ overwrite: false,
+ });
+ assert.equal(isError(result), false, 'normal write path must remain unchanged');
+ assert.equal(parseText(result)['written'], true);
+ assert.equal(fs.existsSync(outPath), true, 'happy-path file must be written');
+ });
+
+ // Path-policy ordering check: the guard must fire BEFORE assertPathAllowed
+ // so that a caller in the rejected shape gets STEPS_REQUIRED (the actionable
+ // root-cause error), not PATH_NOT_ALLOWED (which would mislead about the fix).
+ it('STEPS_REQUIRED fires BEFORE path policy when both would reject', () => {
+ const strictServer = new MockMcpServer();
+ registerTestCaseGenerate(strictServer as never, { allowedPaths: [tmpDir] });
+ const result = strictServer.call('provar_testcase_generate', {
+ test_case_name: 'Outside And Empty',
+ steps: [],
+ // Path outside allowedPaths AND empty steps — STEPS_REQUIRED must win
+ // because its suggestion is the actionable one (path is moot if no steps).
+ output_path: path.join(os.tmpdir(), 'outside-and-empty.testcase'),
+ dry_run: false,
+ overwrite: false,
+ });
+ assert.equal(isError(result), true);
+ assert.equal(
+ parseText(result)['error_code'],
+ 'STEPS_REQUIRED',
+ 'STEPS_REQUIRED must fire before assertPathAllowed — the empty-payload root cause is what the LLM needs to see'
+ );
+ });
+ });
+
describe('path policy', () => {
+ // Uses a non-empty steps[] to bypass the PDX-483 STEPS_REQUIRED guard so
+ // the assertion targets the PATH_NOT_ALLOWED branch specifically.
+ const SMOKE_STEPS = [{ api_id: 'UiConnect', name: 'Connect', attributes: {} }];
+
it('returns PATH_NOT_ALLOWED when output_path is outside allowedPaths', () => {
const strictServer = new MockMcpServer();
registerTestCaseGenerate(strictServer as never, { allowedPaths: [tmpDir] });
const result = strictServer.call('provar_testcase_generate', {
test_case_name: 'Evil',
- steps: [],
+ steps: SMOKE_STEPS,
output_path: path.join(os.tmpdir(), 'evil.testcase'),
dry_run: false,
overwrite: false,