Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/olive-keys-cross.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"braintrust": patch
---

fix: Validate inline row origin for evals
62 changes: 62 additions & 0 deletions js/src/framework.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,68 @@ function makeTestScorer(
}

describe("runEvaluator", () => {
test("preserves a valid inline origin", async () => {
const origin: {
object_type: "dataset";
object_id: string;
id: string;
_xact_id: string;
created: string;
} = {
object_type: "dataset",
object_id: "00000000-0000-0000-0000-000000000001",
id: "dataset-row-1",
_xact_id: "100",
created: "2026-06-01T00:00:00.000Z",
};

const out = await runEvaluator(
null,
{
projectName: "proj",
evalName: "eval",
data: [{ input: 1, origin }],
task: async (input: number) => input * 2,
scores: [],
},
new NoopProgressReporter(),
[],
undefined,
);

expect(out.results[0].origin).toEqual(origin);
});

test("rejects an invalid inline origin", async () => {
const origin: {
object_type: "dataset";
object_id: string;
id: string;
} = {
object_type: "dataset",
object_id: "not-a-uuid",
id: "dataset-row-1",
};
const task = vi.fn(async (input: number) => input * 2);

await expect(
runEvaluator(
null,
{
projectName: "proj",
evalName: "eval",
data: [{ input: 1, origin }],
task,
scores: [],
},
new NoopProgressReporter(),
[],
undefined,
),
).rejects.toThrow();
expect(task).not.toHaveBeenCalled();
});

describe("errors", () => {
test("task errors generate no scores", async () => {
const out = await runEvaluator(
Expand Down
7 changes: 6 additions & 1 deletion js/src/framework.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
} from "../util/index";
import {
type GitMetadataSettingsType as GitMetadataSettings,
ObjectReference as ObjectReferenceSchema,
type ObjectReferenceType as ObjectReference,
type RepoInfoType as RepoInfo,
type SSEProgressEventDataType as SSEProgressEventData,
Expand Down Expand Up @@ -1179,8 +1180,12 @@ async function runEvaluatorInternal(
: Dataset.isDataset(evaluator.data)
? evaluator.data
: undefined;
const inlineOrigin =
datum.origin === undefined
? undefined
: ObjectReferenceSchema.parse(datum.origin);
const origin =
datum.origin ??
inlineOrigin ??
(eventDataset && datum.id && datum._xact_id
? {
object_type: "dataset",
Expand Down
Loading