Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/all-goats-double.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"braintrust": minor
---

feat: Add reporter for `vitest-evals`
22 changes: 22 additions & 0 deletions e2e/config/pr-comment-scenarios.json
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,28 @@
{ "variantKey": "github-copilot-v0-auto", "label": "Auto-hook" }
]
},
{
"scenarioDirName": "test-framework-evals-vitest",
"label": "Vitest Test Framework Evals",
"metadataScenario": "test-framework-evals-vitest",
"variants": [
{ "variantKey": "v2", "label": "v2" },
{ "variantKey": "v3", "label": "v3" },
{ "variantKey": "v4.1", "label": "v4.1" },
{
"variantKey": "vitest-evals-reporter",
"label": "vitest-evals reporter"
}
],
"evals": [
{
"entry": "scenario.vitest-evals-reporter.ts",
"experimentNameTemplate": "vitest-evals-reporter-{testRunId}",
"label": "vitest-evals reporter",
"variantKey": "vitest-evals-reporter"
}
]
},
{
"scenarioDirName": "openai-agents-instrumentation",
"label": "OpenAI Agents Instrumentation",
Expand Down
106 changes: 82 additions & 24 deletions e2e/helpers/mock-braintrust-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,19 @@ interface StartMockBraintrustServerOptions {
}

const DEFAULT_API_KEY = "mock-braintrust-api-key";
const PROD_FORWARDING_SKIPPED_HEADERS = new Set([
"authorization",
"connection",
"content-length",
"host",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailer",
"transfer-encoding",
"upgrade",
]);

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
Expand Down Expand Up @@ -265,6 +278,7 @@ export async function startMockBraintrustServer(
const events: CapturedLogEvent[] = [];
const mergedRows = new Map<string, CapturedLogRow>();
const projectsByName = new Map<string, { id: string; name: string }>();
const prodForwardingErrors: string[] = [];
const experimentsByProjectAndName = new Map<
string,
{
Expand Down Expand Up @@ -382,18 +396,55 @@ export async function startMockBraintrustServer(
return created;
}

function trackProdForwarding(promise: Promise<void>): void {
function recordProdForwardingError(context: string, error: unknown): void {
prodForwardingErrors.push(
`${context}: ${error instanceof Error ? error.message : String(error)}`,
);
}

function trackProdForwarding(context: string, promise: Promise<void>): void {
pendingProdForwarding.add(promise);
void promise.then(
() => {
pendingProdForwarding.delete(promise);
},
() => {
(error) => {
recordProdForwardingError(context, error);
pendingProdForwarding.delete(promise);
},
);
}

function requestForProdForwarding(
capturedRequest: CapturedRequest,
): CapturedRequest {
if (!prodForwarding || !isRecord(capturedRequest.jsonBody)) {
return capturedRequest;
}

const jsonBody = clone(capturedRequest.jsonBody) as Record<string, unknown>;
let changed = false;

if ("org_id" in jsonBody) {
jsonBody.org_id = prodForwarding.orgId;
changed = true;
}
if ("org_name" in jsonBody) {
jsonBody.org_name = prodForwarding.orgName;
changed = true;
}

if (!changed) {
return capturedRequest;
}

return {
...capturedRequest,
jsonBody: jsonBody as JsonValue,
rawBody: JSON.stringify(jsonBody),
};
}

async function forwardProdRequest(
capturedRequest: CapturedRequest,
options: { drainResponseBody?: boolean } = {},
Expand All @@ -402,22 +453,18 @@ export async function startMockBraintrustServer(
throw new Error("prodForwarding is not enabled");
}

const baseUrl = capturedRequest.path.startsWith("/api/")
const prodRequest = requestForProdForwarding(capturedRequest);
const baseUrl = prodRequest.path.startsWith("/api/")
? prodForwarding.appUrl
: prodForwarding.apiUrl;
const url = new URL(capturedRequest.path, baseUrl);
for (const [key, value] of Object.entries(capturedRequest.query)) {
const url = new URL(prodRequest.path, baseUrl);
for (const [key, value] of Object.entries(prodRequest.query)) {
url.searchParams.set(key, value);
}

const headers = new Headers();
for (const [key, value] of Object.entries(capturedRequest.headers)) {
if (
key === "authorization" ||
key === "connection" ||
key === "content-length" ||
key === "host"
) {
for (const [key, value] of Object.entries(prodRequest.headers)) {
if (PROD_FORWARDING_SKIPPED_HEADERS.has(key)) {
continue;
}

Expand All @@ -427,16 +474,19 @@ export async function startMockBraintrustServer(

const response = await fetch(url, {
body:
capturedRequest.method === "GET" || capturedRequest.method === "HEAD"
prodRequest.method === "GET" || prodRequest.method === "HEAD"
? undefined
: capturedRequest.rawBody,
: prodRequest.rawBody,
headers,
method: capturedRequest.method,
method: prodRequest.method,
});

if (!response.ok) {
const responseText = await response.text().catch(() => "");
throw new Error(
`prodForwarding failed for ${capturedRequest.method} ${capturedRequest.path}: ${response.status} ${response.statusText}`,
`prodForwarding failed for ${capturedRequest.method} ${capturedRequest.path}: ${response.status} ${response.statusText}${
responseText ? `: ${responseText.slice(0, 500)}` : ""
}`,
);
}

Expand Down Expand Up @@ -523,7 +573,8 @@ export async function startMockBraintrustServer(
});
return;
}
} catch {
} catch (error) {
recordProdForwardingError("POST /api/project/register", error);
// Fall back to local registration so e2e assertions still run.
}
}
Expand Down Expand Up @@ -583,7 +634,8 @@ export async function startMockBraintrustServer(
});
return;
}
} catch {
} catch (error) {
recordProdForwardingError("POST /api/experiment/register", error);
// Fall back to local registration so e2e assertions still run.
}
}
Expand Down Expand Up @@ -668,11 +720,10 @@ export async function startMockBraintrustServer(
}
if (prodForwarding) {
trackProdForwarding(
"POST /logs3",
forwardProdRequest(capturedRequest, {
drainResponseBody: true,
})
.then(() => undefined)
.catch(() => undefined),
}).then(() => undefined),
);
}
respondJson(res, 200, { ok: true });
Expand All @@ -685,11 +736,10 @@ export async function startMockBraintrustServer(
) {
if (prodForwarding) {
trackProdForwarding(
"POST /otel/v1/traces",
forwardProdRequest(capturedRequest, {
drainResponseBody: true,
})
.then(() => undefined)
.catch(() => undefined),
}).then(() => undefined),
);
}
respondJson(res, 200, { ok: true });
Expand Down Expand Up @@ -723,6 +773,14 @@ export async function startMockBraintrustServer(
while (pendingProdForwarding.size > 0) {
await Promise.allSettled([...pendingProdForwarding]);
}
if (prodForwardingErrors.length > 0) {
throw new Error(
[
"Braintrust prod forwarding failed:",
...prodForwardingErrors.map((message) => `- ${message}`),
].join("\n"),
);
}
},
events,
payloads,
Expand Down
6 changes: 5 additions & 1 deletion e2e/helpers/prod-forwarding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ export interface ProdForwarding {
apiKey: string;
apiUrl: string;
appUrl: string;
orgId: string;
orgName: string;
projectId: string;
projectName: string;
}
Expand Down Expand Up @@ -36,14 +38,16 @@ export async function initializeProdForwarding(): Promise<void> {
const projectId = await logger.id;
const state = logger.loggingState;

if (!state.apiUrl || !state.appUrl) {
if (!state.apiUrl || !state.appUrl || !state.orgId || !state.orgName) {
throw new Error("Braintrust login did not resolve prodForwarding URLs");
}

prodForwarding = {
apiKey,
apiUrl: state.apiUrl,
appUrl: state.appUrl,
orgId: state.orgId,
orgName: state.orgName,
projectId,
projectName,
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
{
"span_tree": [
{
"name": "vitest-evals braintrust reporter > approves refundable invoice",
"type": "eval",
"children": [
{
"name": "classify refund",
"type": "llm",
"children": [
{
"name": "lookupInvoice",
"type": "tool",
"children": [],
"attributes": {
"external_parent_id": "model-span",
"external_span_id": "tool-span",
"gen_ai.tool.name": "lookupInvoice",
"trace_id": "refund-trace",
"vitest_evals_kind": "tool"
},
"metadata": {
"traceName": "refund trace"
}
}
],
"attributes": {
"external_span_id": "model-span",
"gen_ai.request.model": "deterministic-refund-model",
"trace_id": "refund-trace",
"vitest_evals_kind": "model"
},
"metadata": {
"traceName": "refund trace"
}
}
],
"attributes": {
"framework": "vitest",
"reporter": "vitest-evals"
},
"input": {
"input": "Refund invoice inv_123",
"test": "vitest-evals braintrust reporter > approves refundable invoice"
},
"output": {
"message": "Invoice inv_123 is refundable and the refund is approved.",
"status": "approved"
},
"scores": {
"StatusJudge": 1,
"avg_score": 1,
"pass": 1
},
"metadata": {
"artifacts": {
"case": "vitest-evals-reporter",
"scenario": "test-framework-evals-vitest",
"testRunId": "<run:1>"
},
"errors": [],
"failureMessages": [],
"file": "<repo>/e2e/scenarios/test-framework-evals-vitest/runner.vitest-evals-reporter.case.ts",
"fullName": "vitest-evals braintrust reporter > approves refundable invoice",
"harnessName": "braintrust-refund-harness",
"relativeFile": "runner.vitest-evals-reporter.case.ts",
"scoreMetadata": {
"StatusJudge": {
"expectedStatus": "approved",
"observedStatus": "approved"
}
},
"session": {
"messages": [
{
"content": "Refund invoice inv_123",
"role": "user"
},
{
"content": "Invoice inv_123 is refundable and the refund is approved.",
"role": "assistant"
}
]
},
"status": "passed",
"testId": "-2057137040_0_0",
"thresholdFailed": false
},
"metrics": {
"duration_ms": 0,
"input_tokens": 11,
"output_tokens": 13,
"tool_calls": 1,
"total_tokens": 24
}
}
]
}
Loading