diff --git a/.changeset/olive-keys-cross.md b/.changeset/olive-keys-cross.md new file mode 100644 index 000000000..c3d00036f --- /dev/null +++ b/.changeset/olive-keys-cross.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix: Validate inline row origin for evals diff --git a/js/src/framework.test.ts b/js/src/framework.test.ts index 12bcbb8c1..87135dd28 100644 --- a/js/src/framework.test.ts +++ b/js/src/framework.test.ts @@ -166,6 +166,68 @@ function makeTestScorer( } describe("runEvaluator", () => { + test("preserves a valid inline origin", async () => { + const origin: { + object_type: "dataset"; + object_id: string; + id: string; + _xact_id: string; + created: string; + } = { + object_type: "dataset", + object_id: "00000000-0000-0000-0000-000000000001", + id: "dataset-row-1", + _xact_id: "100", + created: "2026-06-01T00:00:00.000Z", + }; + + const out = await runEvaluator( + null, + { + projectName: "proj", + evalName: "eval", + data: [{ input: 1, origin }], + task: async (input: number) => input * 2, + scores: [], + }, + new NoopProgressReporter(), + [], + undefined, + ); + + expect(out.results[0].origin).toEqual(origin); + }); + + test("rejects an invalid inline origin", async () => { + const origin: { + object_type: "dataset"; + object_id: string; + id: string; + } = { + object_type: "dataset", + object_id: "not-a-uuid", + id: "dataset-row-1", + }; + const task = vi.fn(async (input: number) => input * 2); + + await expect( + runEvaluator( + null, + { + projectName: "proj", + evalName: "eval", + data: [{ input: 1, origin }], + task, + scores: [], + }, + new NoopProgressReporter(), + [], + undefined, + ), + ).rejects.toThrow(); + expect(task).not.toHaveBeenCalled(); + }); + describe("errors", () => { test("task errors generate no scores", async () => { const out = await runEvaluator( diff --git a/js/src/framework.ts b/js/src/framework.ts index 3183a575a..4f0ec0c89 100644 --- a/js/src/framework.ts +++ b/js/src/framework.ts @@ -10,6 +10,7 @@ import { } from "../util/index"; import { type GitMetadataSettingsType as GitMetadataSettings, + ObjectReference as ObjectReferenceSchema, type ObjectReferenceType as ObjectReference, type RepoInfoType as RepoInfo, type SSEProgressEventDataType as SSEProgressEventData, @@ -1179,8 +1180,12 @@ async function runEvaluatorInternal( : Dataset.isDataset(evaluator.data) ? evaluator.data : undefined; + const inlineOrigin = + datum.origin === undefined + ? undefined + : ObjectReferenceSchema.parse(datum.origin); const origin = - datum.origin ?? + inlineOrigin ?? (eventDataset && datum.id && datum._xact_id ? { object_type: "dataset",