diff --git a/.changeset/new-papayas-warn.md b/.changeset/new-papayas-warn.md new file mode 100644 index 000000000..2d22c85da --- /dev/null +++ b/.changeset/new-papayas-warn.md @@ -0,0 +1,5 @@ +--- +"braintrust": minor +--- + +add responses flavor to tool builder diff --git a/e2e/helpers/mock-braintrust-server.ts b/e2e/helpers/mock-braintrust-server.ts index 802c32d52..098329adc 100644 --- a/e2e/helpers/mock-braintrust-server.ts +++ b/e2e/helpers/mock-braintrust-server.ts @@ -72,6 +72,13 @@ interface StartMockBraintrustServerOptions { } const DEFAULT_API_KEY = "mock-braintrust-api-key"; +const DEFAULT_ORG_ID = "00000000-0000-4000-8000-000000000000"; + +interface MockProject { + id: string; + name: string; + org_id: string; +} function isRecord(value: unknown): value is Record { return typeof value === "object" && value !== null && !Array.isArray(value); @@ -254,6 +261,26 @@ function capturedRequestFrom( }; } +async function respondForwardedResponse( + response: ServerResponse, + forwardedResponse: Response, +): Promise { + const contentType = forwardedResponse.headers.get("content-type") ?? ""; + if (contentType.includes("application/json")) { + respondJson( + response, + forwardedResponse.status, + await forwardedResponse.json(), + ); + return; + } + + response.writeHead(forwardedResponse.status, { + "Content-Type": contentType || "text/plain", + }); + response.end(await forwardedResponse.text()); +} + export async function startMockBraintrustServer( options: StartMockBraintrustServerOptions = {}, ): Promise { @@ -264,7 +291,7 @@ export async function startMockBraintrustServer( const payloads: CapturedLogPayload[] = []; const events: CapturedLogEvent[] = []; const mergedRows = new Map(); - const projectsByName = new Map(); + const projectsByName = new Map(); const experimentsByProjectAndName = new Map< string, { @@ -282,6 +309,7 @@ export async function startMockBraintrustServer( projectsByName.set(prodForwarding.projectName, { id: prodForwarding.projectId, name: prodForwarding.projectName, + org_id: DEFAULT_ORG_ID, }); } @@ -308,7 +336,7 @@ export async function startMockBraintrustServer( } } - function projectForName(name: string): { id: string; name: string } { + function projectForName(name: string): MockProject { const existing = projectsByName.get(name); if (existing) { return existing; @@ -320,25 +348,28 @@ export async function startMockBraintrustServer( ? prodForwarding.projectId : randomUUID(), name, + org_id: DEFAULT_ORG_ID, }; projectsByName.set(name, created); return created; } - function upsertProject(project: { id: string; name: string }): { + function upsertProject(project: { id: string; name: string; - } { + org_id?: string; + }): MockProject { const created = { id: project.id, name: project.name, + org_id: project.org_id ?? DEFAULT_ORG_ID, }; projectsByName.set(project.name, created); return created; } function experimentForProject( - project: { id: string; name: string }, + project: MockProject, name: string, ): { created: string; @@ -363,7 +394,7 @@ export async function startMockBraintrustServer( } function upsertExperiment( - project: { id: string; name: string }, + project: MockProject, experiment: { created: string; id: string; name: string }, ): { created: string; @@ -429,8 +460,9 @@ export async function startMockBraintrustServer( }); if (!response.ok) { + const responseBody = await response.text(); throw new Error( - `prodForwarding failed for ${capturedRequest.method} ${capturedRequest.path}: ${response.status} ${response.statusText}`, + `prodForwarding failed for ${capturedRequest.method} ${capturedRequest.path}: ${response.status} ${response.statusText}${responseBody ? ` (${responseBody})` : ""}`, ); } @@ -509,6 +541,10 @@ export async function startMockBraintrustServer( project: upsertProject({ id: forwardedBody.project.id, name: forwardedBody.project.name, + org_id: + typeof forwardedBody.project.org_id === "string" + ? forwardedBody.project.org_id + : undefined, }), }); return; @@ -560,6 +596,10 @@ export async function startMockBraintrustServer( const forwardedProject = upsertProject({ id: forwardedBody.project.id, name: forwardedBody.project.name, + org_id: + typeof forwardedBody.project.org_id === "string" + ? forwardedBody.project.org_id + : undefined, }); const forwardedExperiment = upsertExperiment(forwardedProject, { created: forwardedBody.experiment.created, @@ -587,6 +627,31 @@ export async function startMockBraintrustServer( return; } + if ( + prodForwarding && + capturedRequest.method === "POST" && + capturedRequest.path === "/insert-functions" + ) { + await respondForwardedResponse( + res, + await forwardProdRequest(capturedRequest), + ); + return; + } + + if ( + prodForwarding && + capturedRequest.method === "GET" && + (capturedRequest.path === "/v1/prompt" || + capturedRequest.path.startsWith("/v1/prompt/")) + ) { + await respondForwardedResponse( + res, + await forwardProdRequest(capturedRequest), + ); + return; + } + if ( capturedRequest.method === "GET" && capturedRequest.path === "/version" diff --git a/e2e/scenarios/prompt-flavors-prod/__snapshots__/scenario.test.ts.snap b/e2e/scenarios/prompt-flavors-prod/__snapshots__/scenario.test.ts.snap new file mode 100644 index 000000000..98b113c3f --- /dev/null +++ b/e2e/scenarios/prompt-flavors-prod/__snapshots__/scenario.test.ts.snap @@ -0,0 +1,327 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`prompt-flavors-prod loads production prompts and builds each flavor > prompt-builds 1`] = ` +{ + "chatPrompt": { + "build": { + "max_tokens": 42, + "messages": [ + { + "content": "What is the weather in Paris?", + "role": "user", + }, + ], + "model": "gpt-4o-mini", + "response_format": { + "json_schema": { + "name": "weather_summary", + "schema": { + "additionalProperties": false, + "properties": { + "forecast": { + "type": "string", + }, + }, + "required": [ + "forecast", + ], + "type": "object", + }, + "strict": true, + }, + "type": "json_schema", + }, + "verbosity": "medium", + }, + "id": "", + "projectId": "", + "slug": "e2e-prompt-flavors-chat-e2e-", + "version": "", + }, + "completionPrompt": { + "build": { + "model": "gpt-4o-mini", + "prompt": "Summarize prompt flavors in one line.", + }, + "id": "", + "slug": "e2e-prompt-flavors-completion-e2e-", + "version": "", + }, + "responsesPrompt": { + "build": { + "input": [ + { + "content": "What is the weather in Paris?", + "role": "user", + "type": "message", + }, + ], + "max_output_tokens": 42, + "model": "gpt-4o-mini", + "text": { + "format": { + "name": "weather_summary", + "schema": { + "additionalProperties": false, + "properties": { + "forecast": { + "type": "string", + }, + }, + "required": [ + "forecast", + ], + "type": "object", + }, + "strict": true, + "type": "json_schema", + }, + "verbosity": "medium", + }, + }, + "buildWithAttachments": { + "input": [ + { + "content": "What is the weather in Paris?", + "role": "user", + "type": "message", + }, + ], + "max_output_tokens": 42, + "model": "gpt-4o-mini", + "text": { + "format": { + "name": "weather_summary", + "schema": { + "additionalProperties": false, + "properties": { + "forecast": { + "type": "string", + }, + }, + "required": [ + "forecast", + ], + "type": "object", + }, + "strict": true, + "type": "json_schema", + }, + "verbosity": "medium", + }, + }, + "id": "", + "slug": "e2e-prompt-flavors-chat-e2e-", + "version": "", + }, +} +`; + +exports[`prompt-flavors-prod loads production prompts and builds each flavor > request-flow 1`] = ` +[ + { + "headers": null, + "jsonBody": null, + "method": "POST", + "path": "/api/apikey/login", + "query": null, + "rawBody": null, + }, + { + "headers": null, + "jsonBody": { + "project_name": "", + }, + "method": "POST", + "path": "/api/project/register", + "query": null, + "rawBody": { + "project_name": "", + }, + }, + { + "headers": null, + "jsonBody": { + "functions": [ + { + "description": "", + "function_data": { + "type": "prompt", + }, + "name": "E2E prompt flavors chat e2e-", + "project_id": "", + "prompt_data": { + "options": { + "model": "gpt-4o-mini", + "params": { + "max_tokens": 42, + "response_format": { + "json_schema": { + "name": "weather_summary", + "schema": { + "additionalProperties": false, + "properties": { + "forecast": { + "type": "string", + }, + }, + "required": [ + "forecast", + ], + "type": "object", + }, + "strict": true, + }, + "type": "json_schema", + }, + "verbosity": "medium", + }, + }, + "prompt": { + "messages": [ + { + "content": "What is the weather in {{city}}?", + "role": "user", + }, + ], + "type": "chat", + }, + }, + "slug": "e2e-prompt-flavors-chat-e2e-", + }, + { + "description": "", + "function_data": { + "type": "prompt", + }, + "name": "E2E prompt flavors completion e2e-", + "project_id": "", + "prompt_data": { + "options": { + "model": "gpt-4o-mini", + }, + "prompt": { + "content": "Summarize {{topic}} in one line.", + "type": "completion", + }, + }, + "slug": "e2e-prompt-flavors-completion-e2e-", + }, + ], + }, + "method": "POST", + "path": "/insert-functions", + "query": null, + "rawBody": { + "functions": [ + { + "description": "", + "function_data": { + "type": "prompt", + }, + "name": "E2E prompt flavors chat e2e-", + "project_id": "", + "prompt_data": { + "options": { + "model": "gpt-4o-mini", + "params": { + "max_tokens": 42, + "response_format": { + "json_schema": { + "name": "weather_summary", + "schema": { + "additionalProperties": false, + "properties": { + "forecast": { + "type": "string", + }, + }, + "required": [ + "forecast", + ], + "type": "object", + }, + "strict": true, + }, + "type": "json_schema", + }, + "verbosity": "medium", + }, + }, + "prompt": { + "messages": [ + { + "content": "What is the weather in {{city}}?", + "role": "user", + }, + ], + "type": "chat", + }, + }, + "slug": "e2e-prompt-flavors-chat-e2e-", + }, + { + "description": "", + "function_data": { + "type": "prompt", + }, + "name": "E2E prompt flavors completion e2e-", + "project_id": "", + "prompt_data": { + "options": { + "model": "gpt-4o-mini", + }, + "prompt": { + "content": "Summarize {{topic}} in one line.", + "type": "completion", + }, + }, + "slug": "e2e-prompt-flavors-completion-e2e-", + }, + ], + }, + }, + { + "headers": null, + "jsonBody": null, + "method": "GET", + "path": "/v1/prompt", + "query": { + "project_name": "", + "slug": "e2e-prompt-flavors-chat-e2e-", + }, + "rawBody": null, + }, + { + "headers": null, + "jsonBody": null, + "method": "GET", + "path": "/v1/prompt/", + "query": null, + "rawBody": null, + }, + { + "headers": null, + "jsonBody": null, + "method": "GET", + "path": "/v1/prompt", + "query": { + "project_name": "", + "slug": "e2e-prompt-flavors-completion-e2e-", + }, + "rawBody": null, + }, + { + "headers": null, + "jsonBody": null, + "method": "GET", + "path": "/v1/prompt", + "query": { + "project_name": "", + "slug": "e2e-prompt-flavors-completion-e2e-", + "version": "", + }, + "rawBody": null, + }, +] +`; diff --git a/e2e/scenarios/prompt-flavors-prod/package.json b/e2e/scenarios/prompt-flavors-prod/package.json new file mode 100644 index 000000000..0bde5e5ab --- /dev/null +++ b/e2e/scenarios/prompt-flavors-prod/package.json @@ -0,0 +1,8 @@ +{ + "name": "@braintrust/e2e-prompt-flavors-prod", + "private": true, + "type": "module", + "dependencies": { + "openai": "6.25.0" + } +} diff --git a/e2e/scenarios/prompt-flavors-prod/pnpm-lock.yaml b/e2e/scenarios/prompt-flavors-prod/pnpm-lock.yaml new file mode 100644 index 000000000..929557b67 --- /dev/null +++ b/e2e/scenarios/prompt-flavors-prod/pnpm-lock.yaml @@ -0,0 +1,31 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + openai: + specifier: 6.25.0 + version: 6.25.0 + +packages: + + openai@6.25.0: + resolution: {integrity: sha512-mEh6VZ2ds2AGGokWARo18aPISI1OhlgdEIC1ewhkZr8pSIT31dec0ecr9Nhxx0JlybyOgoAT1sWeKtwPZzJyww==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.25 || ^4.0 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + +snapshots: + + openai@6.25.0: {} diff --git a/e2e/scenarios/prompt-flavors-prod/scenario.test.ts b/e2e/scenarios/prompt-flavors-prod/scenario.test.ts new file mode 100644 index 000000000..78a79c258 --- /dev/null +++ b/e2e/scenarios/prompt-flavors-prod/scenario.test.ts @@ -0,0 +1,106 @@ +import { expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; +import { + prepareScenarioDir, + resolveScenarioDir, + withScenarioHarness, +} from "../../helpers/scenario-harness"; +import { summarizeRequest } from "../../helpers/trace-summary"; + +const scenarioDir = await prepareScenarioDir({ + scenarioDir: resolveScenarioDir(import.meta.url), +}); + +function parseScenarioSummary(stdout: string): Json { + const lines = stdout + .split("\n") + .map((line) => line.trim()) + .filter(Boolean); + const lastLine = lines.at(-1); + if (!lastLine) { + throw new Error("Scenario did not emit a JSON summary"); + } + + return JSON.parse(lastLine) as Json; +} + +type ScenarioExecutions = { + chatPrompt: { + api: string; + finishReason: string | null; + hasContent: boolean; + }; + completionPrompt: { + api: string; + hasOutputText: boolean; + outputItemTypes: string[]; + status: string | null; + }; + responsesPrompt: { + api: string; + hasOutputText: boolean; + outputItemTypes: string[]; + status: string | null; + }; +}; + +type ScenarioSummary = { + builds: Json; + executions: ScenarioExecutions; +}; + +test("prompt-flavors-prod loads production prompts and builds each flavor", async () => { + await withScenarioHarness( + async ({ requestCursor, requestsAfter, runScenarioDir }) => { + const cursor = requestCursor(); + const result = await runScenarioDir({ + scenarioDir, + timeoutMs: 180_000, + }); + const summary = parseScenarioSummary(result.stdout) as ScenarioSummary; + + expect(normalizeForSnapshot(summary.builds)).toMatchSnapshot( + "prompt-builds", + ); + + expect(summary.executions.chatPrompt.api).toBe("chat.completions.create"); + expect(summary.executions.chatPrompt.hasContent).toBe(true); + expect(summary.executions.responsesPrompt.api).toBe("responses.create"); + expect(summary.executions.responsesPrompt.hasOutputText).toBe(true); + expect(summary.executions.responsesPrompt.outputItemTypes).toEqual( + expect.arrayContaining(["message"]), + ); + expect(["completed", "incomplete"]).toContain( + summary.executions.responsesPrompt.status, + ); + expect(summary.executions.completionPrompt.api).toBe("responses.create"); + expect(summary.executions.completionPrompt.hasOutputText).toBe(true); + expect(summary.executions.completionPrompt.outputItemTypes).toEqual( + expect.arrayContaining(["message"]), + ); + expect(["completed", "incomplete"]).toContain( + summary.executions.completionPrompt.status, + ); + + const requests = requestsAfter( + cursor, + (request) => + request.path === "/api/apikey/login" || + request.path === "/api/project/register" || + request.path === "/insert-functions" || + request.path === "/v1/prompt" || + request.path.startsWith("/v1/prompt/"), + ); + + expect( + normalizeForSnapshot( + requests.map((request) => + summarizeRequest(request, { + normalizeJsonRawBody: true, + }), + ) as Json, + ), + ).toMatchSnapshot("request-flow"); + }, + ); +}); diff --git a/e2e/scenarios/prompt-flavors-prod/scenario.ts b/e2e/scenarios/prompt-flavors-prod/scenario.ts new file mode 100644 index 000000000..823078be4 --- /dev/null +++ b/e2e/scenarios/prompt-flavors-prod/scenario.ts @@ -0,0 +1,198 @@ +import OpenAI from "openai"; +import { loadPrompt, projects } from "braintrust"; + +function requiredEnv(name: string): string { + const value = process.env[name]?.trim(); + if (!value) { + throw new Error(`Missing required environment variable: ${name}`); + } + return value; +} + +function slugSuffix(value: string): string { + return value.toLowerCase().replace(/[^a-z0-9]+/g, "-"); +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function omitSpanInfo( + prompt: T, +): Omit { + const { span_info: _spanInfo, ...rest } = prompt; + return rest; +} + +async function loadPromptWithRetry( + options: Parameters[0], + attempts = 12, +): Promise>> { + let lastError: unknown; + for (let attempt = 1; attempt <= attempts; attempt += 1) { + try { + return await loadPrompt(options); + } catch (error) { + lastError = error; + if (attempt === attempts) { + break; + } + await sleep(1_000); + } + } + + throw lastError instanceof Error ? lastError : new Error(String(lastError)); +} + +const runId = requiredEnv("BRAINTRUST_E2E_RUN_ID"); +const projectName = requiredEnv("BRAINTRUST_E2E_PROJECT_NAME"); +const openAIApiKey = requiredEnv("OPENAI_API_KEY"); +const suffix = slugSuffix(runId); +const openAI = new OpenAI({ + apiKey: openAIApiKey, + ...(process.env.OPENAI_BASE_URL + ? { baseURL: process.env.OPENAI_BASE_URL } + : {}), +}); + +const chatSlug = `e2e-prompt-flavors-chat-${suffix}`; +const completionSlug = `e2e-prompt-flavors-completion-${suffix}`; + +const project = projects.create({ name: projectName }); + +project.prompts.create({ + name: `E2E prompt flavors chat ${runId}`, + slug: chatSlug, + messages: [ + { + role: "user", + content: "What is the weather in {{city}}?", + }, + ], + model: "gpt-4o-mini", + params: { + max_tokens: 42, + response_format: { + type: "json_schema", + json_schema: { + name: "weather_summary", + schema: { + additionalProperties: false, + type: "object", + properties: { + forecast: { type: "string" }, + }, + required: ["forecast"], + }, + strict: true, + }, + }, + verbosity: "medium", + }, +}); + +project.prompts.create({ + name: `E2E prompt flavors completion ${runId}`, + slug: completionSlug, + model: "gpt-4o-mini", + prompt: "Summarize {{topic}} in one line.", +}); + +await project.publish(); + +const chatPrompt = await loadPromptWithRetry({ + projectName, + slug: chatSlug, +}); + +const chatPromptById = await loadPromptWithRetry({ + id: chatPrompt.id, +}); + +const completionPrompt = await loadPromptWithRetry({ + projectName, + slug: completionSlug, +}); + +const completionPromptByVersion = await loadPromptWithRetry({ + projectName, + slug: completionSlug, + version: completionPrompt.version, +}); + +const buildArgs = { + city: "Paris", +}; + +const chatBuild = omitSpanInfo(chatPrompt.build(buildArgs)); +const responsesBuild = omitSpanInfo( + chatPromptById.build(buildArgs, { + flavor: "responses", + }), +); +const responsesBuildWithAttachments = omitSpanInfo( + await chatPromptById.buildWithAttachments(buildArgs, { flavor: "responses" }), +); +const completionBuild = omitSpanInfo( + completionPromptByVersion.build( + { topic: "prompt flavors" }, + { flavor: "completion" }, + ), +); + +const { reasoning_effort: _chatReasoningEffort, ...chatExecutionParams } = + chatBuild; + +const chatExecution = await openAI.chat.completions.create(chatExecutionParams); +const responsesExecution = await openAI.responses.create(responsesBuild); +const completionExecution = await openAI.responses.create({ + input: completionBuild.prompt, + model: completionBuild.model, + max_output_tokens: 64, +}); + +const summary = { + builds: { + chatPrompt: { + id: chatPrompt.id, + projectId: chatPrompt.projectId, + slug: chatPrompt.slug, + version: chatPrompt.version, + build: chatBuild, + }, + responsesPrompt: { + id: chatPromptById.id, + slug: chatPromptById.slug, + version: chatPromptById.version, + build: responsesBuild, + buildWithAttachments: responsesBuildWithAttachments, + }, + completionPrompt: { + id: completionPromptByVersion.id, + slug: completionPromptByVersion.slug, + version: completionPromptByVersion.version, + build: completionBuild, + }, + }, + executions: { + chatPrompt: { + api: "chat.completions.create", + finishReason: chatExecution.choices[0]?.finish_reason ?? null, + hasContent: (chatExecution.choices[0]?.message.content?.length ?? 0) > 0, + }, + responsesPrompt: { + api: "responses.create", + hasOutputText: responsesExecution.output_text.trim().length > 0, + outputItemTypes: responsesExecution.output.map((item) => item.type), + status: responsesExecution.status, + }, + completionPrompt: { + api: "responses.create", + hasOutputText: completionExecution.output_text.trim().length > 0, + outputItemTypes: completionExecution.output.map((item) => item.type), + status: completionExecution.status, + }, + }, +}; + +process.stdout.write(`${JSON.stringify(summary)}\n`); diff --git a/js/src/exports.ts b/js/src/exports.ts index 158479847..bf586d9ad 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -6,7 +6,9 @@ export type { ChatPrompt, CompiledPrompt, CompiledPromptParams, + CompiledResponsesPromptParams, CompletionPrompt, + ResponsesPrompt, ContextParentSpanIds, DataSummary, DatasetSummary, diff --git a/js/src/logger.ts b/js/src/logger.ts index 2dd3d5150..e405dd605 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -78,6 +78,19 @@ import { type PromptBlockDataType as PromptBlockData, type ResponseFormatJsonSchemaType as ResponseFormatJsonSchema, } from "./generated_types"; +import type { + EasyInputMessage, + FunctionTool, + ResponseFormatTextConfig, + ResponseFunctionCallOutputItemList, + ResponseFunctionToolCall, + ResponseInputContent as OpenAIResponseInputContent, + ResponseCreateParamsBase, + ResponseCreateParamsNonStreaming, + ResponseReasoningItem, + ResponseCreateParamsStreaming, + ResponseTextConfig, +} from "openai/resources/responses/responses"; const BRAINTRUST_ATTACHMENT = BraintrustAttachmentReferenceSchema.shape.type.value; @@ -7193,30 +7206,94 @@ export type ChatPrompt = { messages: OpenAIMessage[]; tools?: ChatCompletionTool[]; }; +export type ResponsesTextFormat = ResponseFormatTextConfig; +export type ResponsesTextConfig = ResponseTextConfig; +export type ResponsesReasoningConfig = { + effort?: CompiledPromptReasoningEffort; +}; +export type ResponsesToolChoice = NonNullable< + ResponseCreateParamsBase["tool_choice"] +>; +export type ResponsesTool = FunctionTool; +export type ResponsesInputContent = OpenAIResponseInputContent; +export type ResponsesInputItem = + | EasyInputMessage + | ResponseFunctionToolCall + | { + type: "function_call_output"; + call_id: string; + output: string | ResponseFunctionCallOutputItemList; + id?: string | null; + status?: "in_progress" | "completed" | "incomplete" | null; + } + | ResponseReasoningItem; +export type CompiledResponsesPromptParams = Omit< + CompiledPromptParams, + | "function_call" + | "frequency_penalty" + | "maxOutputTokens" + | "max_completion_tokens" + | "max_tokens" + | "n" + | "presence_penalty" + | "reasoning_effort" + | "response_format" + | "stop" + | "stream" + | "stream_options" + | "tool_choice" + | "verbosity" +> & { + max_output_tokens?: number; + reasoning?: ResponsesReasoningConfig; + text?: ResponsesTextConfig; + tool_choice?: ResponsesToolChoice; +} & ( + | { + stream?: ResponseCreateParamsNonStreaming["stream"]; + stream_options?: ResponseCreateParamsBase["stream_options"]; + } + | { + stream: ResponseCreateParamsStreaming["stream"]; + stream_options?: ResponseCreateParamsBase["stream_options"]; + } + ); +export type ResponsesPrompt = { + input: ResponsesInputItem[]; + tools?: ResponsesTool[]; +}; export type CompletionPrompt = { prompt: string; }; -export type CompiledPrompt = - CompiledPromptParams & { - span_info?: { - name?: string; - spanAttributes?: Record; - metadata: { - prompt: { - variables: Record; - id: string; - project_id: string; - version: string; - }; +type CompiledPromptSpanInfo = { + span_info?: { + name?: string; + spanAttributes?: Record; + metadata: { + prompt: { + variables: Record; + id: string; + project_id: string; + version: string; }; }; - } & (Flavor extends "chat" + }; +}; + +export type CompiledPrompt = + (Flavor extends "responses" + ? CompiledResponsesPromptParams + : CompiledPromptParams) & + CompiledPromptSpanInfo & + (Flavor extends "chat" ? ChatPrompt : Flavor extends "completion" ? CompletionPrompt - : // eslint-disable-next-line @typescript-eslint/no-empty-object-type - {}); + : Flavor extends "responses" + ? ResponsesPrompt + : // eslint-disable-next-line @typescript-eslint/no-empty-object-type + {}); export type DefaultPromptArgs = Partial< CompiledPromptParams & AnyModelParam & ChatPrompt & CompletionPrompt @@ -7481,6 +7558,377 @@ export function renderPromptParams( return params; } +function toResponsesTextFormat( + responseFormat: unknown, +): ResponsesTextFormat | undefined { + if (!isObject(responseFormat) || typeof responseFormat.type !== "string") { + return undefined; + } + + if (responseFormat.type === "text" || responseFormat.type === "json_object") { + return { type: responseFormat.type }; + } + + if ( + responseFormat.type === "json_schema" && + isObject(responseFormat.json_schema) && + typeof responseFormat.json_schema.name === "string" && + isObject(responseFormat.json_schema.schema) + ) { + return { + type: "json_schema", + name: responseFormat.json_schema.name, + ...(typeof responseFormat.json_schema.description === "string" + ? { description: responseFormat.json_schema.description } + : {}), + schema: responseFormat.json_schema.schema, + ...("strict" in responseFormat.json_schema + ? { strict: responseFormat.json_schema.strict as boolean | null } + : {}), + }; + } + + return undefined; +} + +function toResponsesToolChoice( + toolChoice: unknown, + functionCall: unknown, +): ResponsesToolChoice | undefined { + if ( + toolChoice === "auto" || + toolChoice === "none" || + toolChoice === "required" + ) { + return toolChoice; + } + + if ( + isObject(toolChoice) && + toolChoice.type === "function" && + isObject(toolChoice.function) && + typeof toolChoice.function.name === "string" + ) { + return { + type: "function", + name: toolChoice.function.name, + }; + } + + if (isObject(toolChoice) && typeof toolChoice.type === "string") { + return toolChoice as unknown as ResponsesToolChoice; + } + + if (functionCall === "auto" || functionCall === "none") { + return functionCall; + } + + if (isObject(functionCall) && typeof functionCall.name === "string") { + return { + type: "function", + name: functionCall.name, + }; + } + + return undefined; +} + +function toResponsesTools( + tools: ChatCompletionTool[] | undefined, +): ResponsesTool[] | undefined { + if (!tools) { + return undefined; + } + + return tools.flatMap((tool) => { + if ( + tool.type !== "function" || + !("function" in tool) || + !tool.function || + typeof tool.function.name !== "string" + ) { + return []; + } + + const toolFunction = tool.function as Record; + + return [ + { + type: "function", + name: tool.function.name, + parameters: tool.function.parameters ?? null, + strict: + typeof toolFunction.strict === "boolean" ? toolFunction.strict : null, + ...(typeof tool.function.description === "string" + ? { description: tool.function.description } + : {}), + }, + ]; + }); +} + +function toResponsesContent( + content: unknown, +): string | ResponsesInputContent[] | undefined { + if (typeof content === "string") { + return content; + } + + if (!Array.isArray(content)) { + return undefined; + } + + return content.flatMap((part) => { + if (!isObject(part) || typeof part.type !== "string") { + return []; + } + + switch (part.type) { + case "text": + return typeof part.text === "string" + ? [{ type: "input_text" as const, text: part.text }] + : []; + case "image_url": + return [ + { + type: "input_image" as const, + detail: + isObject(part.image_url) && + typeof part.image_url.detail === "string" + ? (part.image_url.detail as "low" | "high" | "auto") + : "auto", + ...(isObject(part.image_url) && + typeof part.image_url.url === "string" + ? { image_url: part.image_url.url } + : {}), + }, + ]; + case "file": + return [ + { + type: "input_file" as const, + ...(isObject(part.file) && typeof part.file.file_data === "string" + ? { file_data: part.file.file_data } + : {}), + ...(isObject(part.file) && typeof part.file.file_id === "string" + ? { file_id: part.file.file_id } + : {}), + ...(isObject(part.file) && typeof part.file.filename === "string" + ? { filename: part.file.filename } + : {}), + }, + ]; + default: + return []; + } + }); +} + +function toResponsesInput(messages: Message[]): ResponsesInputItem[] { + return messages.flatMap((message, messageIndex) => { + switch (message.role) { + case "system": + case "user": + case "developer": + case "model": { + const content = toResponsesContent(message.content); + if (content === undefined) { + return []; + } + + return [ + { + type: "message", + role: message.role === "model" ? "assistant" : message.role, + content, + }, + ]; + } + case "assistant": { + const items: ResponsesInputItem[] = []; + const content = toResponsesContent(message.content); + if ( + content !== undefined && + !(typeof content === "string" && content.length === 0) && + !(Array.isArray(content) && content.length === 0) + ) { + items.push({ + type: "message", + role: "assistant", + content, + }); + } + + if (Array.isArray(message.reasoning)) { + items.push( + ...message.reasoning + .filter( + (reasoning): reasoning is { id?: string; content: string } => + isObject(reasoning) && typeof reasoning.content === "string", + ) + .map((reasoning, reasoningIndex) => ({ + type: "reasoning" as const, + id: + typeof reasoning.id === "string" + ? reasoning.id + : `reasoning_${messageIndex}_${reasoningIndex}`, + summary: [ + { + type: "summary_text" as const, + text: reasoning.content, + }, + ], + })), + ); + } + + if (isObject(message.function_call)) { + items.push({ + type: "function_call", + call_id: message.function_call.name, + name: message.function_call.name, + arguments: message.function_call.arguments, + }); + } + + if (Array.isArray(message.tool_calls)) { + items.push( + ...message.tool_calls + .filter( + (toolCall) => + toolCall.type === "function" && + typeof toolCall.id === "string" && + isObject(toolCall.function), + ) + .map((toolCall) => ({ + type: "function_call" as const, + call_id: toolCall.id, + name: toolCall.function.name, + arguments: toolCall.function.arguments, + })), + ); + } + + return items; + } + case "tool": { + const output = toResponsesContent(message.content); + if (output === undefined) { + return []; + } + + return [ + { + type: "function_call_output", + call_id: message.tool_call_id, + output, + }, + ]; + } + case "function": { + const output = toResponsesContent(message.content); + if (output === undefined) { + return []; + } + + return [ + { + type: "function_call_output", + call_id: message.name, + output, + }, + ]; + } + default: { + const _: never = message; + return _; + } + } + }); +} + +function renderResponsesPromptParams( + params: ModelParams | undefined, + args: Record, + options: { strict?: boolean; templateFormat?: TemplateFormat } = {}, +): CompiledResponsesPromptParams { + const rendered = (renderPromptParams(params, args, options) ?? {}) as Record< + string, + unknown + >; + + const text = isObject(rendered.text) ? rendered.text : undefined; + const reasoning = isObject(rendered.reasoning) + ? rendered.reasoning + : undefined; + const maxOutputTokens = + typeof rendered.max_output_tokens === "number" + ? rendered.max_output_tokens + : typeof rendered.maxOutputTokens === "number" + ? rendered.maxOutputTokens + : typeof rendered.max_completion_tokens === "number" + ? rendered.max_completion_tokens + : typeof rendered.max_tokens === "number" + ? rendered.max_tokens + : undefined; + const textFormat = toResponsesTextFormat(rendered.response_format); + const responsesToolChoice = toResponsesToolChoice( + rendered.tool_choice, + rendered.function_call, + ); + + const { + function_call: _functionCall, + frequency_penalty: _frequencyPenalty, + maxOutputTokens: _maxOutputTokens, + max_completion_tokens: _maxCompletionTokens, + max_output_tokens: _maxOutputTokensSnake, + max_tokens: _maxTokens, + n: _n, + presence_penalty: _presencePenalty, + reasoning_effort: _reasoningEffort, + response_format: _responseFormat, + stop: _stop, + tool_choice: _toolChoice, + verbosity: _verbosity, + ...rest + } = rendered; + + return { + ...rest, + ...(maxOutputTokens !== undefined + ? { max_output_tokens: maxOutputTokens } + : {}), + ...(textFormat !== undefined || rendered.verbosity !== undefined || text + ? { + text: { + ...(text ?? {}), + ...(textFormat !== undefined ? { format: textFormat } : {}), + ...(rendered.verbosity !== undefined + ? { verbosity: rendered.verbosity as AnyModelParam["verbosity"] } + : {}), + }, + } + : {}), + ...(reasoning || rendered.reasoning_effort !== undefined + ? { + reasoning: { + ...(reasoning ?? {}), + ...(rendered.reasoning_effort !== undefined + ? { + effort: + rendered.reasoning_effort as CompiledPromptReasoningEffort, + } + : {}), + }, + } + : {}), + ...(responsesToolChoice !== undefined + ? { tool_choice: responsesToolChoice } + : {}), + } as CompiledResponsesPromptParams; +} + export class Prompt< HasId extends boolean = true, HasVersion extends boolean = true, @@ -7546,7 +7994,7 @@ export class Prompt< * * @param buildArgs Args to forward along to the prompt template. */ - public build( + public build( buildArgs: unknown, options: { flavor?: Flavor; @@ -7572,7 +8020,7 @@ export class Prompt< * @param buildArgs Args to forward along to the prompt template. */ public async buildWithAttachments< - Flavor extends "chat" | "completion" = "chat", + Flavor extends "chat" | "completion" | "responses" = "chat", >( buildArgs: unknown, options: { @@ -7596,7 +8044,7 @@ export class Prompt< }) as CompiledPrompt; } - private runBuild( + private runBuild( buildArgs: unknown, options: { flavor: Flavor; @@ -7680,7 +8128,6 @@ export class Prompt< ); } - // eslint-disable-next-line @typescript-eslint/consistent-type-assertions return { ...renderPromptParams(params, variables, { strict: options.strict, @@ -7688,14 +8135,30 @@ export class Prompt< }), ...spanInfo, messages: renderedPrompt.messages, - ...(renderedPrompt.tools - ? { - tools: chatCompletionToolSchema - .array() - .parse(JSON.parse(renderedPrompt.tools)), - } - : undefined), - } as CompiledPrompt; + } as unknown as CompiledPrompt; + } else if (flavor === "responses") { + if (renderedPrompt.type !== "chat") { + throw new Error( + "Prompt is a completion prompt. Use buildCompletion() instead", + ); + } + + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + const parsedTools = renderedPrompt.tools + ? chatCompletionToolSchema + .array() + .parse(JSON.parse(renderedPrompt.tools)) + : undefined; + + return { + ...renderResponsesPromptParams(params, variables, { + strict: options.strict, + templateFormat: resolvedTemplateFormat, + }), + ...spanInfo, + input: toResponsesInput(renderedPrompt.messages), + ...(parsedTools ? { tools: toResponsesTools(parsedTools) } : undefined), + } as unknown as CompiledPrompt; } else if (flavor === "completion") { if (renderedPrompt.type !== "completion") { throw new Error(`Prompt is a chat prompt. Use flavor: 'chat' instead`); @@ -7709,7 +8172,7 @@ export class Prompt< }), ...spanInfo, prompt: renderedPrompt.content, - } as CompiledPrompt; + } as unknown as CompiledPrompt; } else { throw new Error("never!"); } diff --git a/js/src/prompt.test.ts b/js/src/prompt.test.ts index 3e18470ff..30722948d 100644 --- a/js/src/prompt.test.ts +++ b/js/src/prompt.test.ts @@ -1,8 +1,26 @@ -import { beforeAll, describe, test, expect, vi } from "vitest"; +import { beforeAll, describe, test, expect, expectTypeOf, vi } from "vitest"; +import type { ResponseCreateParams } from "openai/resources/responses/responses"; import { configureNode } from "./node/config"; -import { Prompt } from "./logger"; +import { type CompiledPrompt, Prompt } from "./logger"; import { type PromptDataType as PromptData } from "./generated_types"; +type DistributiveOmit = T extends unknown + ? Omit + : never; + +function omitSpanInfo( + prompt: T, +): DistributiveOmit { + const { span_info: _spanInfo, ...responseParams } = prompt; + return responseParams as DistributiveOmit; +} + +function assertResponseCreateParams( + params: ResponseCreateParams, +): ResponseCreateParams { + return params; +} + describe("prompt strict mode", () => { test("strict mode", () => { for (const strict of [true, false]) { @@ -265,6 +283,321 @@ describe("prompt template_format", () => { "Nunjucks templating requires @braintrust/template-nunjucks. Install and import it to enable templateFormat: 'nunjucks'.", ); }); + + test("supports responses flavor in build()", () => { + const prompt = new Prompt( + { + id: "1", + _xact_id: "xact_123", + created: "2023-10-01T00:00:00Z", + project_id: "project_123", + prompt_session_id: "session_123", + name: "test", + slug: "test", + prompt_data: { + options: { + model: "gpt-4o", + }, + prompt: { + type: "chat", + messages: [{ role: "user", content: "Hello {{name}}" }], + tools: JSON.stringify([ + { + type: "function", + function: { + name: "greet", + parameters: { + type: "object", + properties: {}, + }, + }, + }, + ]), + }, + }, + }, + {}, + true, + ); + + const result = prompt.build({ name: "World" }, { flavor: "responses" }); + const responseParams = assertResponseCreateParams(omitSpanInfo(result)); + + expectTypeOf(result).toExtend>(); + expect(responseParams).toMatchObject({ + model: "gpt-4o", + input: [ + { + type: "message", + role: "user", + content: "Hello World", + }, + ], + tools: [ + { + type: "function", + name: "greet", + }, + ], + }); + expect(responseParams).not.toHaveProperty("messages"); + }); + + test("supports responses flavor in buildWithAttachments()", async () => { + const prompt = new Prompt( + { + id: "1", + _xact_id: "xact_123", + created: "2023-10-01T00:00:00Z", + project_id: "project_123", + prompt_session_id: "session_123", + name: "test", + slug: "test", + prompt_data: { + options: { + model: "gpt-4o", + }, + prompt: { + type: "chat", + messages: [{ role: "user", content: "Hello {{name}}" }], + }, + }, + }, + {}, + true, + ); + + const result = await prompt.buildWithAttachments( + { name: "World" }, + { flavor: "responses" }, + ); + const responseParams = assertResponseCreateParams(omitSpanInfo(result)); + + expectTypeOf(result).toExtend>(); + expect(responseParams).toMatchObject({ + model: "gpt-4o", + input: [ + { + type: "message", + role: "user", + content: "Hello World", + }, + ], + }); + expect(responseParams).not.toHaveProperty("messages"); + }); + + test("responses flavor maps chat params to responses params", () => { + const prompt = new Prompt( + { + id: "1", + _xact_id: "xact_123", + created: "2023-10-01T00:00:00Z", + project_id: "project_123", + prompt_session_id: "session_123", + name: "test", + slug: "test", + prompt_data: { + options: { + model: "gpt-4o", + params: { + max_tokens: 42, + reasoning_effort: "low", + verbosity: "high", + response_format: { + type: "json_schema", + json_schema: { + name: "schema", + schema: { + type: "object", + properties: { + greeting: { type: "string" }, + }, + }, + strict: true, + }, + }, + tool_choice: { + type: "function", + function: { name: "greet" }, + }, + }, + }, + prompt: { + type: "chat", + messages: [{ role: "user", content: "Hello {{name}}" }], + tools: JSON.stringify([ + { + type: "function", + function: { + name: "greet", + description: "Greet the user", + strict: true, + parameters: { + type: "object", + properties: { + name: { type: "string" }, + }, + required: ["name"], + }, + }, + }, + ]), + }, + }, + }, + {}, + true, + ); + + const result = prompt.build({ name: "World" }, { flavor: "responses" }); + const responseParams = assertResponseCreateParams(omitSpanInfo(result)); + + expect(responseParams).toMatchObject({ + model: "gpt-4o", + max_output_tokens: 42, + reasoning: { effort: "low" }, + text: { + verbosity: "high", + format: { + type: "json_schema", + name: "schema", + strict: true, + schema: { + type: "object", + properties: { + greeting: { type: "string" }, + }, + }, + }, + }, + tool_choice: { + type: "function", + name: "greet", + }, + tools: [ + { + type: "function", + name: "greet", + description: "Greet the user", + parameters: { + type: "object", + properties: { + name: { type: "string" }, + }, + required: ["name"], + }, + }, + ], + }); + expect(responseParams).not.toHaveProperty("max_tokens"); + expect(responseParams).not.toHaveProperty("response_format"); + expect(responseParams).not.toHaveProperty("reasoning_effort"); + }); + + test("responses flavor converts tool-call message history to input items", () => { + const prompt = new Prompt( + { + id: "1", + _xact_id: "xact_123", + created: "2023-10-01T00:00:00Z", + project_id: "project_123", + prompt_session_id: "session_123", + name: "test", + slug: "test", + prompt_data: { + options: { + model: "gpt-4o", + }, + prompt: { + type: "chat", + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "What is the weather in Paris?", + }, + { + type: "image_url", + image_url: { + url: "https://example.com/weather-map.png", + }, + }, + ], + }, + ], + }, + }, + }, + {}, + true, + ); + + const result = prompt.build( + {}, + { + flavor: "responses", + messages: [ + { + role: "assistant", + content: "Let me check.", + tool_calls: [ + { + id: "call_123", + type: "function", + function: { + name: "get_weather", + arguments: '{"location":"Paris"}', + }, + }, + ], + }, + { + role: "tool", + tool_call_id: "call_123", + content: "Sunny and 72F", + }, + ], + }, + ); + const responseParams = assertResponseCreateParams(omitSpanInfo(result)); + + expect(responseParams.input).toEqual([ + { + type: "message", + role: "user", + content: [ + { + type: "input_text", + text: "What is the weather in Paris?", + }, + { + type: "input_image", + image_url: "https://example.com/weather-map.png", + detail: "auto", + }, + ], + }, + { + type: "message", + role: "assistant", + content: "Let me check.", + }, + { + type: "function_call", + call_id: "call_123", + name: "get_weather", + arguments: '{"location":"Paris"}', + }, + { + type: "function_call_output", + call_id: "call_123", + output: "Sunny and 72F", + }, + ]); + }); }); describe("prompt template_format (unconfigured/browser-like)", () => {