From 2e45178ff3842350a8a14a6997aa6a2d3d420b9e Mon Sep 17 00:00:00 2001 From: mikee-b <112516913+mikee-b@users.noreply.github.com> Date: Thu, 28 May 2026 13:51:55 -0400 Subject: [PATCH 1/4] fix(integrations/zendesk): Prevent nullable user fields (#15225) --- .../zendesk/integration.definition.ts | 2 +- .../zendesk/src/definitions/schemas.ts | 28 ++++++++++++------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/integrations/zendesk/integration.definition.ts b/integrations/zendesk/integration.definition.ts index d75bf82d8b2..bda05f74d75 100644 --- a/integrations/zendesk/integration.definition.ts +++ b/integrations/zendesk/integration.definition.ts @@ -6,7 +6,7 @@ import { actions, events, configuration, channels, states, user } from './src/de export default new sdk.IntegrationDefinition({ name: 'zendesk', title: 'Zendesk', - version: '3.1.2', + version: '3.1.3', icon: 'icon.svg', description: 'Optimize your support workflow. Trigger workflows from ticket updates as well as manage tickets, access conversations, and engage with customers.', diff --git a/integrations/zendesk/src/definitions/schemas.ts b/integrations/zendesk/src/definitions/schemas.ts index 4687e85b0fe..e4091fe9939 100644 --- a/integrations/zendesk/src/definitions/schemas.ts +++ b/integrations/zendesk/src/definitions/schemas.ts @@ -1,5 +1,5 @@ import { z } from '@botpress/sdk' -import { omit } from 'lodash' +import { omit, pickBy } from 'lodash' const requesterSchema = z.object({ name: z.string().optional().title('Name').describe('Requester name'), @@ -73,23 +73,31 @@ export const userSchema = z.object({ userFields: z.record(z.string()).optional().title('User Fields').describe('Custom user fields'), }) -const _zdUserSchema = userSchema.transform((data) => ({ - ...omit(data, ['createdAt', 'updatedAt', 'externalId', 'userFields', 'remotePhotoUrl']), - created_at: data.createdAt, - updated_at: data.updatedAt, - external_id: data.externalId, - user_fields: data.userFields, - remote_photo_url: data.remotePhotoUrl, -})) +const _zdUserSchema = userSchema + .omit({ userFields: true }) + .extend({ + userFields: z.record(z.string().nullable()).optional(), + }) + .transform((data) => ({ + ...omit(data, ['createdAt', 'updatedAt', 'externalId', 'userFields', 'remotePhotoUrl']), + created_at: data.createdAt, + updated_at: data.updatedAt, + external_id: data.externalId, + user_fields: data.userFields, + remote_photo_url: data.remotePhotoUrl, + })) export type ZendeskUser = z.output export type User = z.input export const transformUser = (ticket: ZendeskUser): User => { + const userFields = ticket.user_fields + ? (pickBy(ticket.user_fields, (value): value is string => value !== null) as Record) + : undefined return { ...omit(ticket, ['external_id', 'user_fields', 'created_at', 'updated_at', 'remote_photo_url']), externalId: ticket.external_id, - userFields: ticket.user_fields, + userFields, createdAt: ticket.created_at, updatedAt: ticket.updated_at, remotePhotoUrl: ticket.remote_photo_url, From cb8b30b0bb2fcf784a33311b0c22db0b5e21f284 Mon Sep 17 00:00:00 2001 From: Yann Allard <42552874+allardy@users.noreply.github.com> Date: Thu, 28 May 2026 15:27:02 -0400 Subject: [PATCH 2/4] feat(cognitive): add tts and image (#15189) --- packages/cognitive/e2e/cognitive-beta.test.ts | 240 ++++++++ .../cognitive-v2/cognitive-beta-image.test.ts | 61 +++ .../cognitive-v2/cognitive-beta-tts.test.ts | 76 +++ packages/cognitive/src/cognitive-v2/index.ts | 182 +++++- packages/cognitive/src/cognitive-v2/models.ts | 516 +++++++++++++++++- packages/cognitive/src/cognitive-v2/types.ts | 122 ++++- packages/cognitive/src/schemas.gen.ts | 2 + packages/common/src/llm/schemas.ts | 2 + 8 files changed, 1178 insertions(+), 23 deletions(-) create mode 100644 packages/cognitive/e2e/cognitive-beta.test.ts create mode 100644 packages/cognitive/src/cognitive-v2/cognitive-beta-image.test.ts create mode 100644 packages/cognitive/src/cognitive-v2/cognitive-beta-tts.test.ts diff --git a/packages/cognitive/e2e/cognitive-beta.test.ts b/packages/cognitive/e2e/cognitive-beta.test.ts new file mode 100644 index 00000000000..9f314c660a5 --- /dev/null +++ b/packages/cognitive/e2e/cognitive-beta.test.ts @@ -0,0 +1,240 @@ +import 'dotenv/config' +import { describe, test, expect, beforeAll } from 'vitest' +import { CognitiveBeta, CognitiveStreamChunk, TtsStreamChunk } from '../src/cognitive-v2' + +const apiUrl = process.env.COGNITIVE_API_URL ?? process.env.CLOUD_API_ENDPOINT ?? 'https://api.botpress.dev' +const botId = process.env.CLOUD_BOT_ID +const token = process.env.CLOUD_PAT + +const hasCreds = !!botId && !!token + +describe.skipIf(!hasCreds)('CognitiveBeta e2e — TTS', () => { + let beta: CognitiveBeta + + beforeAll(() => { + beta = new CognitiveBeta({ apiUrl, botId, token, timeout: 120_000 }) + }) + + test('listVoices returns a non-empty array of well-formed voices', async () => { + const voices = await beta.listVoices() + expect(Array.isArray(voices)).toBe(true) + expect(voices.length).toBeGreaterThan(0) + + const v = voices[0]! + expect(typeof v.id).toBe('string') + expect(typeof v.displayName).toBe('string') + expect(typeof v.provider).toBe('string') + expect(Array.isArray(v.models)).toBe(true) + }, 30_000) + + test('listVoices honors the model filter', async () => { + const voices = await beta.listVoices({ model: 'openai:tts-1' }) + expect(voices.length).toBeGreaterThan(0) + for (const v of voices) { + expect(v.provider).toBe('openai') + expect(v.models).toContain('tts-1') + } + }, 30_000) + + test('generateAudio returns a playable audio URL', async () => { + const res = await beta.generateAudio({ + model: 'openai:tts-1', + input: 'Hello world.', + voice: 'alloy', + format: 'mp3', + }) + + expect(res.output.audioUrl).toMatch(/^https?:\/\//) + expect(res.metadata.provider).toBe('openai') + expect(res.metadata.model).toContain('tts-1') + expect(res.metadata.voice).toBe('alloy') + expect(res.metadata.format).toBe('mp3') + expect(res.metadata.characterCount).toBe('Hello world.'.length) + expect(res.metadata.cost).toBeGreaterThanOrEqual(0) + }, 60_000) + + test('generateAudioStream yields chunks ending with a finished chunk', async () => { + const chunks: TtsStreamChunk[] = [] + for await (const chunk of beta.generateAudioStream({ + model: 'openai:tts-1', + input: 'Streaming test.', + voice: 'alloy', + format: 'mp3', + })) { + chunks.push(chunk) + } + + expect(chunks.length).toBeGreaterThan(0) + + // Intermediate audio chunks are provider-dependent — some providers stream raw audio bytes, + // others (e.g. openai:tts-1) only emit the final chunk with the hosted URL. + const audioChunks = chunks.filter((c): c is Extract => c.finished === false) + for (const c of audioChunks) { + expect(typeof c.audio).toBe('string') + expect(c.audio.length).toBeGreaterThan(0) + } + + const final = chunks[chunks.length - 1]! + expect(final.finished).toBe(true) + if (final.finished) { + expect(final.audioUrl).toMatch(/^https?:\/\//) + expect(final.metadata.provider).toBe('openai') + expect(final.metadata.characterCount).toBe('Streaming test.'.length) + } + }, 90_000) +}) + +describe.skipIf(!hasCreds)('CognitiveBeta e2e — Text generation', () => { + let beta: CognitiveBeta + + beforeAll(() => { + beta = new CognitiveBeta({ apiUrl, botId, token, timeout: 120_000 }) + }) + + test('generateText returns output and usage metadata', async () => { + const res = await beta.generateText({ + messages: [{ role: 'user', content: 'Reply with exactly: pong' }], + model: 'auto', + maxTokens: 500, + }) + + expect(typeof res.output).toBe('string') + expect(res.output.length).toBeGreaterThan(0) + expect(res.metadata.provider).toBeTruthy() + expect(typeof res.metadata.model).toBe('string') + expect(res.metadata.usage.inputTokens).toBeGreaterThan(0) + expect(res.metadata.usage.outputTokens).toBeGreaterThan(0) + expect(res.metadata.cost).toBeGreaterThanOrEqual(0) + }, 60_000) + + test('generateTextStream yields chunks and ends with metadata', async () => { + const chunks: CognitiveStreamChunk[] = [] + for await (const chunk of beta.generateTextStream({ + messages: [{ role: 'user', content: 'Count from 1 to 3, one per line.' }], + model: 'auto', + maxTokens: 500, + })) { + chunks.push(chunk) + } + + expect(chunks.length).toBeGreaterThan(0) + + const aggregated = chunks.map((c) => c.output ?? '').join('') + expect(aggregated.length).toBeGreaterThan(0) + + const final = chunks[chunks.length - 1]! + expect(final.metadata).toBeDefined() + expect(final.metadata?.provider).toBeTruthy() + expect(typeof final.metadata?.model).toBe('string') + }, 90_000) +}) + +describe.skipIf(!hasCreds)('CognitiveBeta e2e — Transcription', () => { + let beta: CognitiveBeta + let audioUrl: string + + beforeAll(async () => { + beta = new CognitiveBeta({ apiUrl, botId, token, timeout: 120_000 }) + + const audio = await beta.generateAudio({ + model: 'openai:tts-1', + input: 'The quick brown fox jumps over the lazy dog.', + voice: 'alloy', + format: 'mp3', + }) + + if (!audio.output.audioUrl) { + throw new Error('generateAudio returned no audioUrl; cannot run transcription e2e') + } + audioUrl = audio.output.audioUrl + }, 60_000) + + test('transcribeAudio returns text and metadata', async () => { + const res = await beta.transcribeAudio({ + url: audioUrl, + model: 'fast', + options: { skipCache: true }, + }) + + expect(typeof res.output).toBe('string') + expect(res.output.length).toBeGreaterThan(0) + expect(res.metadata.provider).toBeTruthy() + expect(typeof res.metadata.model).toBe('string') + expect(res.metadata.durationSeconds).toBeGreaterThan(0) + expect(res.metadata.cost).toBeGreaterThanOrEqual(0) + }, 90_000) +}) + +describe.skipIf(!hasCreds)('CognitiveBeta e2e — Models', () => { + let beta: CognitiveBeta + + beforeAll(() => { + beta = new CognitiveBeta({ apiUrl, botId, token, timeout: 60_000 }) + }) + + test('listModels returns a non-empty array of well-formed models', async () => { + const models = await beta.listModels() + expect(Array.isArray(models)).toBe(true) + expect(models.length).toBeGreaterThan(0) + + const m = models[0]! + expect(typeof m.id).toBe('string') + expect(typeof m.name).toBe('string') + expect(typeof m.description).toBe('string') + expect(typeof m.input.maxTokens).toBe('number') + expect(typeof m.input.costPer1MTokens).toBe('number') + expect(typeof m.output.maxTokens).toBe('number') + expect(typeof m.output.costPer1MTokens).toBe('number') + expect(Array.isArray(m.tags)).toBe(true) + expect(['production', 'preview', 'deprecated', 'discontinued']).toContain(m.lifecycle) + }, 30_000) +}) + +describe.skipIf(!hasCreds)('CognitiveBeta e2e — Image generation', () => { + let beta: CognitiveBeta + + beforeAll(() => { + beta = new CognitiveBeta({ apiUrl, botId, token, timeout: 180_000 }) + }) + + test('generateImage returns a hosted image URL with sensible metadata', async () => { + const res = await beta.generateImage({ + model: 'fast', + prompt: 'A solid red circle on a white background, minimal flat illustration.', + size: '1024x1024', + quality: 'low', + format: 'png', + }) + + expect(res.output.imageUrl).toMatch(/^https?:\/\//) + expect(typeof res.metadata.provider).toBe('string') + expect(res.metadata.provider.length).toBeGreaterThan(0) + expect(typeof res.metadata.model).toBe('string') + expect(res.metadata.format).toBe('png') + expect(typeof res.metadata.size).toBe('string') + expect(res.metadata.size.length).toBeGreaterThan(0) + expect(res.metadata.cost).toBeGreaterThanOrEqual(0) + }, 180_000) + + test('generateImage emits request and response events', async () => { + const events: string[] = [] + const offReq = beta.on('request', (req) => events.push(`request:${req.type}`)) + const offRes = beta.on('response', (req) => events.push(`response:${req.type}`)) + + try { + await beta.generateImage({ + model: 'fast', + prompt: 'A blue square on a white background.', + size: '1024x1024', + quality: 'low', + format: 'png', + }) + + expect(events).toContain('request:generateImage') + expect(events).toContain('response:generateImage') + } finally { + offReq() + offRes() + } + }, 180_000) +}) diff --git a/packages/cognitive/src/cognitive-v2/cognitive-beta-image.test.ts b/packages/cognitive/src/cognitive-v2/cognitive-beta-image.test.ts new file mode 100644 index 00000000000..2fc4f701990 --- /dev/null +++ b/packages/cognitive/src/cognitive-v2/cognitive-beta-image.test.ts @@ -0,0 +1,61 @@ +import { describe, test, expect, vi } from 'vitest' +import { CognitiveBeta } from './index' + +describe('CognitiveBeta.generateImage', () => { + test('POSTs to /v2/cognitive/generate-image and returns parsed body', async () => { + const beta = new CognitiveBeta({ apiUrl: 'http://x', botId: 'b', token: 't' }) + const post = vi.fn().mockResolvedValue({ + data: { + output: { imageUrl: 'https://x/abc.png' }, + metadata: { provider: 'openai', model: 'openai:gpt-image-1', size: '1024x1024', format: 'png', cost: 0.04 }, + }, + }) + ;(beta as any)._axiosClient = { post } + + const result = await beta.generateImage({ prompt: 'a corgi astronaut', size: '1024x1024' }) + + expect(post).toHaveBeenCalledWith( + '/v2/cognitive/generate-image', + expect.objectContaining({ prompt: 'a corgi astronaut', size: '1024x1024' }), + expect.any(Object) + ) + expect(result.output.imageUrl).toBe('https://x/abc.png') + expect(result.metadata.provider).toBe('openai') + expect(result.metadata.format).toBe('png') + }) + + test('emits request and response events around the call', async () => { + const beta = new CognitiveBeta({ apiUrl: 'http://x' }) + const post = vi.fn().mockResolvedValue({ + data: { + output: { imageUrl: 'https://x/y.png' }, + metadata: { provider: 'openai', model: 'openai:gpt-image-1', size: '1024x1024', format: 'png', cost: 0 }, + }, + }) + ;(beta as any)._axiosClient = { post } + + const onRequest = vi.fn() + const onResponse = vi.fn() + beta.on('request', onRequest) + beta.on('response', onResponse) + + await beta.generateImage({ model: 'openai:gpt-image-1', prompt: 'hello', quality: 'high' }) + + expect(onRequest).toHaveBeenCalledTimes(1) + expect(onResponse).toHaveBeenCalledTimes(1) + const [req] = onRequest.mock.calls[0]! + expect(req.type).toBe('generateImage') + }) + + test('emits error event when the call rejects', async () => { + const beta = new CognitiveBeta({ apiUrl: 'http://x' }) + const post = vi.fn().mockRejectedValue(new Error('boom')) + ;(beta as any)._axiosClient = { post } + + const onError = vi.fn() + beta.on('error', onError) + + await expect(beta.generateImage({ prompt: 'x' })).rejects.toThrow('boom') + expect(onError).toHaveBeenCalledTimes(1) + }) +}) diff --git a/packages/cognitive/src/cognitive-v2/cognitive-beta-tts.test.ts b/packages/cognitive/src/cognitive-v2/cognitive-beta-tts.test.ts new file mode 100644 index 00000000000..f350151654d --- /dev/null +++ b/packages/cognitive/src/cognitive-v2/cognitive-beta-tts.test.ts @@ -0,0 +1,76 @@ +import { describe, test, expect, vi } from 'vitest' +import { CognitiveBeta } from './index' + +describe('CognitiveBeta.generateAudio', () => { + test('POSTs to /v2/cognitive/generate-audio and returns parsed body', async () => { + const beta = new CognitiveBeta({ apiUrl: 'http://x', botId: 'b', token: 't' }) + const post = vi.fn().mockResolvedValue({ + data: { + output: { audioUrl: 'https://x/abc.mp3' }, + metadata: { provider: 'openai', cost: 0.0001 }, + }, + }) + ;(beta as any)._axiosClient = { post } + + const result = await beta.generateAudio({ model: 'auto', input: 'hi', voice: 'alloy' }) + + expect(post).toHaveBeenCalledWith( + '/v2/cognitive/generate-audio', + expect.objectContaining({ input: 'hi', voice: 'alloy' }), + expect.any(Object) + ) + expect(result.output.audioUrl).toBe('https://x/abc.mp3') + expect(result.metadata.provider).toBe('openai') + }) + + test('emits request and response events around the call', async () => { + const beta = new CognitiveBeta({ apiUrl: 'http://x' }) + const post = vi.fn().mockResolvedValue({ + data: { output: { audioUrl: 'https://x/y.mp3' }, metadata: { provider: 'openai', cost: 0 } }, + }) + ;(beta as any)._axiosClient = { post } + + const onRequest = vi.fn() + const onResponse = vi.fn() + beta.on('request', onRequest) + beta.on('response', onResponse) + + await beta.generateAudio({ model: 'openai:tts-1', input: 'hello', voice: 'alloy' }) + + expect(onRequest).toHaveBeenCalledTimes(1) + expect(onResponse).toHaveBeenCalledTimes(1) + const [req] = onRequest.mock.calls[0]! + expect(req.type).toBe('generateAudio') + }) +}) + +describe('CognitiveBeta.listVoices', () => { + test('GETs /v2/cognitive/voices with optional filter', async () => { + const beta = new CognitiveBeta({ apiUrl: 'http://x' }) + const get = vi.fn().mockResolvedValue({ + data: { + voices: [{ id: 'alloy', displayName: 'Alloy', provider: 'openai', models: ['tts-1'] }], + }, + }) + ;(beta as any)._axiosClient = { get } + + const voices = await beta.listVoices({ model: 'openai:tts-1' }) + + expect(get).toHaveBeenCalledWith( + '/v2/cognitive/voices', + expect.objectContaining({ params: { model: 'openai:tts-1' } }) + ) + expect(voices).toHaveLength(1) + expect(voices[0]!.id).toBe('alloy') + }) + + test('GETs without params when no filter provided', async () => { + const beta = new CognitiveBeta({ apiUrl: 'http://x' }) + const get = vi.fn().mockResolvedValue({ data: { voices: [] } }) + ;(beta as any)._axiosClient = { get } + + await beta.listVoices() + + expect(get).toHaveBeenCalledWith('/v2/cognitive/voices', expect.objectContaining({ params: {} })) + }) +}) diff --git a/packages/cognitive/src/cognitive-v2/index.ts b/packages/cognitive/src/cognitive-v2/index.ts index 866cdd77caa..272f103cb3a 100644 --- a/packages/cognitive/src/cognitive-v2/index.ts +++ b/packages/cognitive/src/cognitive-v2/index.ts @@ -8,18 +8,42 @@ import { CognitiveStreamChunk, TranscribeRequest, TranscribeResponse, + TtsRequest, + TtsResponse, + TtsStreamChunk, + TtsMetadata, + ImageRequest, + ImageResponse, + ImageMetadata, + Voice, Model, } from './types' -export { CognitiveRequest, CognitiveResponse, CognitiveStreamChunk, TranscribeRequest, TranscribeResponse } +export { + CognitiveRequest, + CognitiveResponse, + CognitiveStreamChunk, + TranscribeRequest, + TranscribeResponse, + TtsRequest, + TtsResponse, + TtsStreamChunk, + TtsMetadata, + ImageRequest, + ImageResponse, + ImageMetadata, + Voice, +} export type BetaTextRequest = { type: 'generateText'; input: CognitiveRequest } export type BetaTranscribeRequest = { type: 'transcribeAudio'; input: TranscribeRequest } -export type BetaRequest = BetaTextRequest | BetaTranscribeRequest +export type BetaTtsRequest = { type: 'generateAudio'; input: TtsRequest } +export type BetaImageRequest = { type: 'generateImage'; input: ImageRequest } +export type BetaRequest = BetaTextRequest | BetaTranscribeRequest | BetaTtsRequest | BetaImageRequest export type BetaEvents = { request: (req: BetaRequest) => void - response: (req: BetaRequest, res: CognitiveResponse | TranscribeResponse) => void + response: (req: BetaRequest, res: CognitiveResponse | TranscribeResponse | TtsResponse | ImageResponse) => void error: (req: BetaRequest, error: any) => void retry: (req: BetaRequest, error: any) => void } @@ -137,6 +161,67 @@ export class CognitiveBeta { return data.models } + public async listVoices(filter: { model?: string; language?: string } = {}): Promise { + const { data } = await this._withServerRetry(() => + this._axiosClient.get<{ voices: Voice[] }>('/v2/cognitive/voices', { + params: filter, + paramsSerializer: { encode: encodeURIComponent }, + }) + ) + + return data.voices + } + + public async generateAudio(input: TtsRequest, options: RequestOptions = {}): Promise { + const signal = options.signal ?? AbortSignal.timeout(this._timeout) + const req: BetaTtsRequest = { type: 'generateAudio', input } + + this._events.emit('request', req) + + try { + const { data } = await this._withServerRetry( + () => + this._axiosClient.post('/v2/cognitive/generate-audio', input, { + signal, + timeout: options.timeout ?? this._timeout, + }), + options, + req + ) + + this._events.emit('response', req, data) + return data + } catch (error) { + this._events.emit('error', req, error) + throw error + } + } + + public async generateImage(input: ImageRequest, options: RequestOptions = {}): Promise { + const signal = options.signal ?? AbortSignal.timeout(this._timeout) + const req: BetaImageRequest = { type: 'generateImage', input } + + this._events.emit('request', req) + + try { + const { data } = await this._withServerRetry( + () => + this._axiosClient.post('/v2/cognitive/generate-image', input, { + signal, + timeout: options.timeout ?? this._timeout, + }), + options, + req + ) + + this._events.emit('response', req, data) + return data + } catch (error) { + this._events.emit('error', req, error) + throw error + } + } + public async transcribeAudio(input: TranscribeRequest, options: RequestOptions = {}) { const signal = options.signal ?? AbortSignal.timeout(this._timeout) const req: BetaTranscribeRequest = { type: 'transcribeAudio', input } @@ -270,6 +355,97 @@ export class CognitiveBeta { } } + public async *generateAudioStream( + input: TtsRequest, + options: RequestOptions = {} + ): AsyncGenerator { + const signal = options.signal ?? AbortSignal.timeout(this._timeout) + const req: BetaTtsRequest = { type: 'generateAudio', input } + let finalChunk: Extract | undefined + + this._events.emit('request', req) + + try { + if (isBrowser()) { + const res = await fetch(`${this._apiUrl}/v2/cognitive/generate-audio-stream`, { + method: 'POST', + headers: { + ...this._headers, + 'Content-Type': 'application/json', + }, + credentials: this._withCredentials ? 'include' : 'omit', + body: JSON.stringify(input), + signal, + }) + + if (!res.ok) { + const text = await res.text().catch(() => '') + const err = new Error(`HTTP ${res.status}: ${text || res.statusText}`) + ;(err as any).response = { status: res.status, data: text } + throw err + } + + const body = res.body + if (!body) { + throw new Error('No response body received for streaming request') + } + + const reader = body.getReader() + const iterable = (async function* () { + for (;;) { + const { value, done } = await reader.read() + if (done) { + break + } + if (value) { + yield value + } + } + })() + + for await (const obj of this._ndjson(iterable)) { + if (obj.finished) { + finalChunk = obj + } + yield obj + } + } else { + const res = await this._withServerRetry( + () => + this._axiosClient.post('/v2/cognitive/generate-audio-stream', input, { + responseType: 'stream', + signal, + timeout: options.timeout ?? this._timeout, + }), + options, + req + ) + + const nodeStream: AsyncIterable = res.data as any + if (!nodeStream) { + throw new Error('No response body received for streaming request') + } + + for await (const obj of this._ndjson(nodeStream)) { + if (obj.finished) { + finalChunk = obj + } + yield obj + } + } + + if (finalChunk) { + this._events.emit('response', req, { + output: { audioUrl: finalChunk.audioUrl }, + metadata: finalChunk.metadata, + }) + } + } catch (error) { + this._events.emit('error', req, error) + throw error + } + } + private async *_ndjson(stream: AsyncIterable): AsyncGenerator { const decoder = new TextDecoder('utf-8') let buffer = '' diff --git a/packages/cognitive/src/cognitive-v2/models.ts b/packages/cognitive/src/cognitive-v2/models.ts index 208cbc670a6..1c82bfd626d 100644 --- a/packages/cognitive/src/cognitive-v2/models.ts +++ b/packages/cognitive/src/cognitive-v2/models.ts @@ -1,6 +1,28 @@ import { Model } from './types' export const models: Record = { + 'openai:gpt-5.5': { + id: 'openai:gpt-5.5', + name: 'GPT-5.5', + description: + 'GPT-5.5 is OpenAI\'s latest frontier model, described as "a new class of intelligence for coding and professional work". It features a 1M+ context window with adaptive reasoning and configurable effort levels, and supports vision, tool use, structured outputs, and server-side web search.', + input: { + maxTokens: 1047576, + costPer1MTokens: 5, + }, + output: { + maxTokens: 128000, + costPer1MTokens: 30, + }, + tags: ['recommended', 'reasoning', 'general-purpose', 'vision', 'coding', 'agents'], + lifecycle: 'production', + capabilities: { + supportsImages: true, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: true, + }, + }, 'openai:gpt-5.4-2026-03-05': { id: 'openai:gpt-5.4-2026-03-05', name: 'GPT-5.4', @@ -310,8 +332,8 @@ export const models: Record = { maxTokens: 32768, costPer1MTokens: 0.4, }, - tags: ['low-cost', 'vision', 'general-purpose'], - lifecycle: 'production', + tags: ['deprecated', 'low-cost', 'vision', 'general-purpose'], + lifecycle: 'deprecated', capabilities: { supportsImages: true, supportsAudio: false, @@ -333,8 +355,8 @@ export const models: Record = { maxTokens: 100000, costPer1MTokens: 4.4, }, - tags: ['reasoning', 'general-purpose', 'coding'], - lifecycle: 'production', + tags: ['deprecated', 'reasoning', 'general-purpose', 'coding'], + lifecycle: 'deprecated', capabilities: { supportsImages: false, supportsAudio: false, @@ -356,8 +378,8 @@ export const models: Record = { maxTokens: 100000, costPer1MTokens: 60, }, - tags: ['reasoning', 'vision', 'general-purpose'], - lifecycle: 'production', + tags: ['deprecated', 'reasoning', 'vision', 'general-purpose'], + lifecycle: 'deprecated', capabilities: { supportsImages: true, supportsAudio: false, @@ -544,6 +566,177 @@ export const models: Record = { supportsSearch: false, }, }, + 'openai:tts-1': { + id: 'openai:tts-1', + name: 'OpenAI TTS-1', + description: 'Standard text-to-speech, low latency', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech', 'recommended'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'openai:tts-1-hd': { + id: 'openai:tts-1-hd', + name: 'OpenAI TTS-1 HD', + description: 'High-definition text-to-speech', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'openai:gpt-4o-mini-tts': { + id: 'openai:gpt-4o-mini-tts', + name: 'GPT-4o Mini TTS', + description: 'Steerable text-to-speech with voice instructions', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech', 'recommended'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'openai:gpt-image-2': { + id: 'openai:gpt-image-2', + name: 'OpenAI gpt-image-2', + description: + "OpenAI's newest native multimodal image generation model. Highest quality, accepts input images for editing.", + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['image-generation', 'recommended'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'openai:gpt-image-1.5': { + id: 'openai:gpt-image-1.5', + name: 'OpenAI gpt-image-1.5', + description: + 'Flagship native multimodal image generation. Strong text rendering, accepts input images for editing.', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['image-generation', 'recommended'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'openai:gpt-image-1-mini': { + id: 'openai:gpt-image-1-mini', + name: 'OpenAI gpt-image-1-mini', + description: 'Affordable variant of gpt-image-1.5 for high-volume, cost-sensitive image generation.', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['image-generation', 'low-cost'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'openai:gpt-image-1': { + id: 'openai:gpt-image-1', + name: 'OpenAI gpt-image-1', + description: 'Original OpenAI native multimodal image generation model. Superseded by gpt-image-1.5 / gpt-image-2.', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['image-generation'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'anthropic:claude-opus-4-7': { + id: 'anthropic:claude-opus-4-7', + name: 'Claude Opus 4.7', + description: + "Claude Opus 4.7 is Anthropic's most capable generally available model, with a step-change improvement in agentic coding over Claude Opus 4.6. Features adaptive thinking for dynamic reasoning allocation, substantially improved vision capabilities, and task budgets for agentic loops. Uses a new tokenizer that may use up to 35% more tokens for the same text.", + input: { + maxTokens: 1000000, + costPer1MTokens: 5, + }, + output: { + maxTokens: 128000, + costPer1MTokens: 25, + }, + tags: ['recommended', 'reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + lifecycle: 'production', + capabilities: { + supportsImages: true, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: true, + }, + }, 'anthropic:claude-opus-4-6': { id: 'anthropic:claude-opus-4-6', name: 'Claude Opus 4.6', @@ -648,7 +841,7 @@ export const models: Record = { costPer1MTokens: 15, }, tags: ['recommended', 'reasoning', 'agents', 'vision', 'general-purpose', 'coding'], - lifecycle: 'production', + lifecycle: 'deprecated', capabilities: { supportsImages: true, supportsAudio: false, @@ -850,7 +1043,7 @@ export const models: Record = { costPer1MTokens: 1.25, }, tags: ['deprecated', 'low-cost', 'general-purpose'], - lifecycle: 'deprecated', + lifecycle: 'discontinued', capabilities: { supportsImages: true, supportsAudio: false, @@ -1037,6 +1230,133 @@ export const models: Record = { }, aliases: ['gemini-3-pro-preview'], }, + 'google-ai:gemini-2.5-flash-preview-tts': { + id: 'google-ai:gemini-2.5-flash-preview-tts', + name: 'Gemini 2.5 Flash TTS', + description: 'Native Gemini text-to-speech, fast tier', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech', 'preview'], + lifecycle: 'preview', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'google-ai:gemini-2.5-pro-preview-tts': { + id: 'google-ai:gemini-2.5-pro-preview-tts', + name: 'Gemini 2.5 Pro TTS', + description: 'Native Gemini text-to-speech, pro tier', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech', 'preview'], + lifecycle: 'preview', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'google-ai:imagen-4.0-ultra-generate-001': { + id: 'google-ai:imagen-4.0-ultra-generate-001', + name: 'Imagen 4 Ultra', + description: "Google's highest-fidelity Imagen 4 variant.", + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['image-generation', 'recommended'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'google-ai:imagen-4.0-generate-001': { + id: 'google-ai:imagen-4.0-generate-001', + name: 'Imagen 4', + description: "Google's standard Imagen 4 image generation model.", + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['image-generation', 'recommended'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'google-ai:imagen-4.0-fast-generate-001': { + id: 'google-ai:imagen-4.0-fast-generate-001', + name: 'Imagen 4 Fast', + description: 'Speed-optimized Imagen 4 variant.', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['image-generation', 'low-cost'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'google-ai:gemini-2.5-flash-image': { + id: 'google-ai:gemini-2.5-flash-image', + name: 'Gemini 2.5 Flash Image', + description: + 'Gemini-native image generation (formerly "Nano Banana"). Token-billed; ~$0.039 per 1024×1024 image. Supports image editing.', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['image-generation'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, 'cerebras:gpt-oss-120b': { id: 'cerebras:gpt-oss-120b', name: 'GPT-OSS 120B (Preview)', @@ -1116,8 +1436,8 @@ export const models: Record = { maxTokens: 16000, costPer1MTokens: 0.1, }, - tags: ['low-cost', 'general-purpose'], - lifecycle: 'production', + tags: ['deprecated', 'low-cost', 'general-purpose'], + lifecycle: 'deprecated', capabilities: { supportsImages: false, supportsAudio: false, @@ -1147,6 +1467,52 @@ export const models: Record = { supportsSearch: false, }, }, + 'groq:qwen3-32b': { + id: 'groq:qwen3-32b', + name: 'Qwen3 32B (Preview)', + description: + 'Qwen3-32B is a reasoning model from Alibaba. It excels in code-gen, tool-calling, and advanced reasoning. Served as a preview model on Groq with fast inference speeds.', + input: { + maxTokens: 131000, + costPer1MTokens: 0.29, + }, + output: { + maxTokens: 16384, + costPer1MTokens: 0.59, + }, + tags: ['preview', 'reasoning', 'general-purpose'], + lifecycle: 'preview', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + aliases: ['qwen/qwen3-32b'], + }, + 'groq:llama-4-scout-17b-16e-instruct': { + id: 'groq:llama-4-scout-17b-16e-instruct', + name: 'Llama 4 Scout 17B (Preview)', + description: + 'Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, using 16 experts per forward pass and activating 17 billion parameters out of a total of 109B. Supports multimodal input (text and image) with multilingual output. Served as a preview model on Groq.', + input: { + maxTokens: 128000, + costPer1MTokens: 0.11, + }, + output: { + maxTokens: 8192, + costPer1MTokens: 0.34, + }, + tags: ['preview', 'vision', 'general-purpose', 'low-cost'], + lifecycle: 'preview', + capabilities: { + supportsImages: true, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + aliases: ['meta-llama/llama-4-scout-17b-16e-instruct'], + }, 'groq:gpt-oss-20b': { id: 'groq:gpt-oss-20b', name: 'GPT-OSS 20B (Preview)', @@ -1712,6 +2078,52 @@ export const models: Record = { supportsSearch: false, }, }, + 'fireworks-ai:kimi-k2p6': { + id: 'fireworks-ai:kimi-k2p6', + name: 'Kimi K2.6', + description: + 'Kimi K2.6 is an open-source, native multimodal agentic model with a 1 trillion parameter mixture-of-experts architecture. It delivers strong performance on agentic and reasoning tasks with a 262K context window.', + input: { + maxTokens: 262144, + costPer1MTokens: 0.95, + }, + output: { + maxTokens: 16384, + costPer1MTokens: 4, + }, + tags: ['recommended', 'reasoning', 'general-purpose', 'agents', 'vision'], + lifecycle: 'production', + capabilities: { + supportsImages: true, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + aliases: ['accounts/fireworks/models/kimi-k2p6'], + }, + 'fireworks-ai:kimi-k2p5': { + id: 'fireworks-ai:kimi-k2p5', + name: 'Kimi K2.5', + description: + 'Kimi K2.5 is an open-source mixture-of-experts agentic model with strong reasoning and tool-use capabilities. Features a 262K context window at a cost-effective price point.', + input: { + maxTokens: 262144, + costPer1MTokens: 0.6, + }, + output: { + maxTokens: 16384, + costPer1MTokens: 3, + }, + tags: ['reasoning', 'general-purpose', 'agents'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + aliases: ['accounts/fireworks/models/kimi-k2p5'], + }, 'fireworks-ai:qwen3-8b': { id: 'fireworks-ai:qwen3-8b', name: 'Qwen3 8B', @@ -2194,6 +2606,90 @@ export const models: Record = { supportsSearch: false, }, }, + 'elevenlabs:eleven_v3': { + id: 'elevenlabs:eleven_v3', + name: 'ElevenLabs v3 (Alpha)', + description: 'Most expressive ElevenLabs model, alpha quality', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech', 'preview'], + lifecycle: 'preview', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'elevenlabs:eleven_multilingual_v2': { + id: 'elevenlabs:eleven_multilingual_v2', + name: 'ElevenLabs Multilingual v2', + description: 'Production multilingual voice synthesis (29 languages)', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech', 'recommended'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'elevenlabs:eleven_turbo_v2_5': { + id: 'elevenlabs:eleven_turbo_v2_5', + name: 'ElevenLabs Turbo v2.5', + description: 'Fast multilingual TTS', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech', 'low-cost'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, + 'elevenlabs:eleven_flash_v2_5': { + id: 'elevenlabs:eleven_flash_v2_5', + name: 'ElevenLabs Flash v2.5', + description: 'Lowest latency TTS, ~75ms TTFB', + input: { + maxTokens: 1, + costPer1MTokens: 0, + }, + output: { + maxTokens: 1, + costPer1MTokens: 0, + }, + tags: ['text-to-speech', 'low-cost', 'recommended'], + lifecycle: 'production', + capabilities: { + supportsImages: false, + supportsAudio: false, + supportsTranscription: false, + supportsSearch: false, + }, + }, } export const defaultModel: Model = { diff --git a/packages/cognitive/src/cognitive-v2/types.ts b/packages/cognitive/src/cognitive-v2/types.ts index ce1a50676b8..9dbbaf43d7e 100644 --- a/packages/cognitive/src/cognitive-v2/types.ts +++ b/packages/cognitive/src/cognitive-v2/types.ts @@ -6,32 +6,45 @@ export type Models = | 'anthropic:claude-haiku-4-5-reasoning-20251001' | 'anthropic:claude-opus-4-5-20251101' | 'anthropic:claude-opus-4-6' - | 'anthropic:claude-sonnet-4-20250514' + | 'anthropic:claude-opus-4-7' | 'anthropic:claude-sonnet-4-5-20250929' | 'anthropic:claude-sonnet-4-6' | 'cerebras:gpt-oss-120b' - | 'cerebras:llama3.1-8b' + | 'elevenlabs:eleven_flash_v2_5' + | 'elevenlabs:eleven_multilingual_v2' + | 'elevenlabs:eleven_turbo_v2_5' + | 'elevenlabs:eleven_v3' | 'fireworks-ai:deepseek-v3p1' | 'fireworks-ai:deepseek-v3p2' | 'fireworks-ai:gpt-oss-120b' | 'fireworks-ai:gpt-oss-20b' + | 'fireworks-ai:kimi-k2p5' + | 'fireworks-ai:kimi-k2p6' | 'fireworks-ai:llama-v3p3-70b-instruct' | 'fireworks-ai:qwen3-8b' | 'google-ai:gemini-2.5-flash' + | 'google-ai:gemini-2.5-flash-image' | 'google-ai:gemini-2.5-flash-lite' + | 'google-ai:gemini-2.5-flash-preview-tts' | 'google-ai:gemini-2.5-pro' + | 'google-ai:gemini-2.5-pro-preview-tts' | 'google-ai:gemini-3-flash' | 'google-ai:gemini-3.1-flash-lite' | 'google-ai:gemini-3.1-pro' + | 'google-ai:imagen-4.0-fast-generate-001' + | 'google-ai:imagen-4.0-generate-001' + | 'google-ai:imagen-4.0-ultra-generate-001' | 'groq:gpt-oss-120b' | 'groq:gpt-oss-20b' | 'groq:llama-3.1-8b-instant' | 'groq:llama-3.3-70b-versatile' + | 'groq:llama-4-scout-17b-16e-instruct' + | 'groq:qwen3-32b' | 'openai:gpt-4.1-2025-04-14' | 'openai:gpt-4.1-mini-2025-04-14' - | 'openai:gpt-4.1-nano-2025-04-14' | 'openai:gpt-4o-2024-11-20' | 'openai:gpt-4o-mini-2024-07-18' + | 'openai:gpt-4o-mini-tts' | 'openai:gpt-5-2025-08-07' | 'openai:gpt-5-mini-2025-08-07' | 'openai:gpt-5-nano-2025-08-07' @@ -41,10 +54,15 @@ export type Models = | 'openai:gpt-5.4-2026-03-05' | 'openai:gpt-5.4-mini-2026-03-17' | 'openai:gpt-5.4-nano-2026-03-17' - | 'openai:o1-2024-12-17' + | 'openai:gpt-5.5' + | 'openai:gpt-image-1' + | 'openai:gpt-image-1-mini' + | 'openai:gpt-image-1.5' + | 'openai:gpt-image-2' | 'openai:o3-2025-04-16' - | 'openai:o3-mini-2025-01-31' | 'openai:o4-mini-2025-04-16' + | 'openai:tts-1' + | 'openai:tts-1-hd' | 'openrouter:gpt-oss-120b' | 'xai:grok-3' | 'xai:grok-3-mini' @@ -84,8 +102,12 @@ export type Models = | 'google-ai:gemini-3.1-flash-lite-preview' | 'google-ai:models/gemini-2.0-flash' | 'google-ai:gemini-3-pro-preview' + | 'groq:qwen/qwen3-32b' + | 'groq:meta-llama/llama-4-scout-17b-16e-instruct' | 'groq:openai/gpt-oss-20b' | 'groq:openai/gpt-oss-120b' + | 'fireworks-ai:accounts/fireworks/models/kimi-k2p6' + | 'fireworks-ai:accounts/fireworks/models/kimi-k2p5' | 'fireworks-ai:accounts/fireworks/models/qwen3-8b' | 'fireworks-ai:accounts/fireworks/models/gpt-oss-20b' | 'fireworks-ai:accounts/fireworks/models/gpt-oss-120b' @@ -292,13 +314,12 @@ export type TranscribeRequest = { options?: CommonRequestOptions } -/** - * Transcription metadata. Picks shared fields from CognitiveMetadata and adds transcription-specific ones. - */ -export type TranscribeMetadata = Pick< +type BaseBetaMetadata = Pick< CognitiveMetadata, 'requestId' | 'provider' | 'cost' | 'latency' | 'cached' | 'fallbackPath' | 'debug' -> & { +> + +export type TranscribeMetadata = BaseBetaMetadata & { /** Full model ID including provider (e.g. groq:whisper-large-v3-turbo) */ model: string /** Audio duration in seconds */ @@ -312,6 +333,85 @@ export type TranscribeResponse = { metadata: TranscribeMetadata } +export type TtsRequest = { + /** TTS model or ordered list of models to try. Additional models are used as fallback. */ + model: string | string[] + input: string + /** Voice id (provider-specific). Use listVoices() to discover available voices. Omit to use the model default voice. */ + voice?: string + /** Audio format. Defaults to mp3. */ + format?: 'mp3' | 'opus' | 'wav' + speed?: number + /** Optional natural-language voice steering instructions (provider-dependent) */ + instructions?: string + language?: string + options?: CommonRequestOptions & { + /** Number of days the generated audio URL should be retained */ + expirationDays?: number + } + meta?: Record +} + +export type TtsMetadata = BaseBetaMetadata & { + /** Full model ID including provider (e.g. openai:tts-1) */ + model: string + voice: string + format: string + /** Number of input characters synthesized */ + characterCount: number + /** Generated audio duration in seconds (omitted by providers that do not expose duration) */ + durationSeconds?: number +} + +export type TtsResponse = { + output: { audioUrl: string | null } + metadata: TtsMetadata +} + +export type TtsStreamChunk = + | { audio: string; finished: false } + | { audioUrl: string | null; metadata: TtsMetadata; finished: true; error?: string } + +export type Voice = { + id: string + displayName: string + provider: string + gender?: 'male' | 'female' | 'neutral' + description?: string + languages?: string[] + tags?: string[] + models: string[] +} + +export type ImageRequest = { + /** Image model or ordered list of models to try. Additional models are used as fallback. Defaults to auto. */ + model?: string | string[] + prompt: string + /** Output size in pixels (e.g. 1024x1024) or aspect ratio (e.g. 16:9). Defaults to the model default. */ + size?: string + quality?: 'low' | 'medium' | 'high' | 'auto' + /** Output image format. Defaults to png. */ + format?: 'png' | 'jpeg' + options?: CommonRequestOptions & { + /** Number of days the generated image URL should be retained */ + expirationDays?: number + } + meta?: Record +} + +export type ImageMetadata = BaseBetaMetadata & { + /** Full model ID including provider (e.g. openai:gpt-image-1) */ + model: string + /** Resolved output size (pixels or ratio) */ + size: string + quality?: string + format: string +} + +export type ImageResponse = + | { output: { imageUrl: string }; metadata: ImageMetadata; error?: never } + | { output: { imageUrl: null }; metadata: ImageMetadata; error: string } + export type ModelTag = | 'recommended' | 'deprecated' @@ -326,6 +426,8 @@ export type ModelTag = | 'reasoning' | 'preview' | 'speech-to-text' + | 'image-generation' + | 'text-to-speech' export type Model = { id: string diff --git a/packages/cognitive/src/schemas.gen.ts b/packages/cognitive/src/schemas.gen.ts index 231fb1e77db..a2049a8e152 100644 --- a/packages/cognitive/src/schemas.gen.ts +++ b/packages/cognitive/src/schemas.gen.ts @@ -155,6 +155,8 @@ export type Model = { | 'reasoning' | 'preview' | 'speech-to-text' + | 'image-generation' + | 'text-to-speech' > input: { maxTokens: number diff --git a/packages/common/src/llm/schemas.ts b/packages/common/src/llm/schemas.ts index f8e4f28fd27..5641245236f 100644 --- a/packages/common/src/llm/schemas.ts +++ b/packages/common/src/llm/schemas.ts @@ -71,6 +71,8 @@ export const ModelSchema = ModelRefSchema.extend({ 'reasoning', 'preview', 'speech-to-text', + 'image-generation', + 'text-to-speech', ]) ), input: z.object({ From 78eb4041b10fe2704222d71fa89055ff14a6c355 Mon Sep 17 00:00:00 2001 From: Yiming Su Date: Thu, 28 May 2026 16:11:35 -0400 Subject: [PATCH 3/4] fix(cli): surface diagnostic context bp dev swallows (#15219) --- packages/cli/package.json | 2 +- packages/cli/src/cli.ts | 6 +- .../command-implementations/base-command.ts | 4 +- .../command-implementations/dev-command.ts | 13 +++- packages/cli/src/errors.test.ts | 63 ++++++++++++++++++ packages/cli/src/errors.ts | 64 +++++++++++++++++-- packages/cli/src/utils/tunnel-utils.ts | 22 +++++-- 7 files changed, 153 insertions(+), 21 deletions(-) create mode 100644 packages/cli/src/errors.test.ts diff --git a/packages/cli/package.json b/packages/cli/package.json index 931b99fb8ca..488fd208f32 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@botpress/cli", - "version": "6.8.1", + "version": "6.8.2", "description": "Botpress CLI", "scripts": { "build": "pnpm run build:types && pnpm run bundle && pnpm run template:gen", diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index e02de405a30..99c1a6d092e 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -9,7 +9,8 @@ import { registerYargs } from './register-yargs' const logError = (thrown: unknown) => { const error = errors.BotpressCLIError.map(thrown) - new Logger().error(error.message) + // genuine crashes only: print the full chain so headless callers (no -v) still get the reason. + new Logger().error(errors.BotpressCLIError.fullStack(error)) } const onError = (thrown: unknown) => { @@ -18,7 +19,8 @@ const onError = (thrown: unknown) => { } const yargsFail = (msg: string) => { - logError(`${msg}\n`) + // usage errors are bad input, not crashes; show the clean message and help, never a stack. + new Logger().error(`${msg}\n`) yargs.showHelp() process.exit(1) } diff --git a/packages/cli/src/command-implementations/base-command.ts b/packages/cli/src/command-implementations/base-command.ts index 6c0c3b7f98c..77f78f7dca6 100644 --- a/packages/cli/src/command-implementations/base-command.ts +++ b/packages/cli/src/command-implementations/base-command.ts @@ -27,9 +27,7 @@ export abstract class BaseCommand { const error = errors.BotpressCLIError.map(thrown) this.logger.error(error.message) - - const stack = error.stack ?? 'No stack trace available' - this.logger.debug(`[${this._cmdName}] ${stack}`) + this.logger.debug(`[${this._cmdName}] ${errors.BotpressCLIError.fullStack(error)}`) exitCode = 1 } finally { diff --git a/packages/cli/src/command-implementations/dev-command.ts b/packages/cli/src/command-implementations/dev-command.ts index 0396e853bb7..26b1eeb56c2 100644 --- a/packages/cli/src/command-implementations/dev-command.ts +++ b/packages/cli/src/command-implementations/dev-command.ts @@ -135,12 +135,16 @@ export class DevCommand extends ProjectCommand { tunnel.send(res) }) .catch((thrown) => { - const err = errors.BotpressCLIError.wrap(thrown, 'An error occurred while handling request') + const err = errors.BotpressCLIError.wrap( + thrown, + `An error occurred while handling request ${req.method} ${req.path}` + ) this.logger.error(err.message) + this.logger.debug(errors.BotpressCLIError.fullStack(err)) tunnel.send({ requestId: req.id, status: 500, - body: err.message, + body: 'Internal error while handling request', }) }) }) @@ -210,6 +214,7 @@ export class DevCommand extends ProjectCommand { } catch (thrown) { const error = errors.BotpressCLIError.wrap(thrown, 'Build failed') this.logger.error(error.message) + this.logger.debug(errors.BotpressCLIError.fullStack(error)) return } @@ -296,7 +301,7 @@ export class DevCommand extends ProjectCommand { }, this.logger ).catch((thrown) => { - throw errors.BotpressCLIError.wrap(thrown, 'Could not start dev worker') + throw errors.BotpressCLIError.wrap(thrown, `Could not start dev worker on port ${port}`) }) return worker @@ -321,6 +326,7 @@ export class DevCommand extends ProjectCommand { const resp = await api.client.getIntegration({ id: devId }).catch(async (thrown) => { const err = errors.BotpressCLIError.wrap(thrown, `Could not find existing dev integration with id "${devId}"`) this.logger.warn(err.message) + this.logger.debug(errors.BotpressCLIError.fullStack(err)) return { integration: undefined } }) @@ -372,6 +378,7 @@ export class DevCommand extends ProjectCommand { const resp = await api.client.getBot({ id: devId }).catch(async (thrown) => { const err = errors.BotpressCLIError.wrap(thrown, `Could not find existing dev bot with id "${devId}"`) this.logger.warn(err.message) + this.logger.debug(errors.BotpressCLIError.fullStack(err)) return { bot: undefined } }) diff --git a/packages/cli/src/errors.test.ts b/packages/cli/src/errors.test.ts new file mode 100644 index 00000000000..83cf77e40ce --- /dev/null +++ b/packages/cli/src/errors.test.ts @@ -0,0 +1,63 @@ +import { AxiosError } from 'axios' +import { describe, expect, it } from 'vitest' +import { BotpressCLIError } from './errors' + +describe('BotpressCLIError.map', () => { + it('maps a bare Error without duplicating its message', () => { + const mapped = BotpressCLIError.map(new Error('boom')) + expect(mapped).toBeInstanceOf(BotpressCLIError) + expect(mapped.message).toBe('boom') + }) + + it('preserves the original thrown error as the cause', () => { + const original = new Error('boom') + const mapped = BotpressCLIError.map(original) + expect(mapped.cause()).toBe(original) + }) + + it('returns a BotpressCLIError unchanged (idempotent)', () => { + const err = new BotpressCLIError('already mapped') + expect(BotpressCLIError.map(err)).toBe(err) + }) +}) + +describe('BotpressCLIError.wrap', () => { + it('chains the cause message when the cause has one', () => { + const wrapped = BotpressCLIError.wrap(new Error('real cause'), 'Build failed') + expect(wrapped.message).toBe('Build failed: real cause') + }) + + it('omits the dangling colon when the cause has no message, but keeps it for fullStack', () => { + const wrapped = BotpressCLIError.wrap(new Error('', { cause: new Error('DEEP_CAUSE_MARKER') }), 'Build failed') + expect(wrapped.message).toBe('Build failed') + expect(BotpressCLIError.fullStack(wrapped)).toContain('DEEP_CAUSE_MARKER') + }) +}) + +describe('BotpressCLIError.fullStack', () => { + it('walks the preserved cause so the original throw site is included', () => { + const mapped = BotpressCLIError.map(new Error('boom')) + // 'caused by:' only appears when a cause is preserved; it would be absent if map() severed it + expect(BotpressCLIError.fullStack(mapped)).toContain('caused by:') + }) + + it('recursively follows native Error.cause', () => { + const inner = new Error('INNER_CAUSE_MARKER') + const outer = new Error('outer', { cause: inner }) + + const mapped = BotpressCLIError.map(outer) + expect(mapped.cause()).toBe(outer) // outer is preserved (one level) + expect(BotpressCLIError.fullStack(mapped)).toContain('INNER_CAUSE_MARKER') + }) + + it('follows axios transport causes without changing the mapped message', () => { + const cause = new Error('AXIOS_CAUSE_MARKER') + const axiosError = new AxiosError('') + axiosError.cause = cause + + const mapped = BotpressCLIError.map(axiosError) + + expect(mapped.message).toBe('') + expect(BotpressCLIError.fullStack(mapped)).toContain('AXIOS_CAUSE_MARKER') + }) +}) diff --git a/packages/cli/src/errors.ts b/packages/cli/src/errors.ts index fa5d433612f..a79ea7f8d30 100644 --- a/packages/cli/src/errors.ts +++ b/packages/cli/src/errors.ts @@ -10,6 +10,11 @@ const isKnownApiError = (e: unknown): e is KnownApiError => client.isApiError(e) export class BotpressCLIError extends VError { public static wrap(thrown: unknown, message: string): BotpressCLIError { const err = BotpressCLIError.map(thrown) + if (!err.message.trim()) { + // the cause carries no message of its own; avoid rendering a dangling ": " + // while still keeping it as a cause so fullStack can surface the deeper chain under --verbose + return new BotpressCLIError(message ?? '', { cause: err }) + } return new BotpressCLIError(err, message ?? '') } @@ -39,20 +44,60 @@ export class BotpressCLIError extends VError { return HTTPError.fromAxios(thrown) } if (thrown instanceof Error) { - const { message } = thrown - return new BotpressCLIError(message) + return new BotpressCLIError(thrown.message, { cause: thrown }) } return new BotpressCLIError(String(thrown)) } public constructor(error: BotpressCLIError, message: string) public constructor(message: string) - public constructor(first: BotpressCLIError | string, second?: string) { + public constructor(message: string, opts: { cause?: Error }) + public constructor(first: BotpressCLIError | string, second?: string | { cause?: Error }) { if (typeof first === 'string') { + if (typeof second === 'object') { + // preserve the original error as a cause without duplicating its message into ours. + // `skipCauseMessage` is supported by verror at runtime (validated against verror@1.10.1) + // but missing from @types/verror, so the option object is typed inline rather than as + // VError.Options. The message-neutrality this relies on is guarded by errors.test.ts. + super({ cause: second.cause, skipCauseMessage: true } as { cause?: Error; skipCauseMessage: boolean }, first) + return + } super(first) return } - super(first, second!) + super(first, second as string) + } + + // VError.fullStack only follows VError causes; this also follows native `Error.cause`/axios + // causes (with a cycle guard). static to mirror VError's own `fullStack(err)`. + public static fullStack(err: Error): string { + return BotpressCLIError._fullStack(err, new Set()) + } + + private static _fullStack(err: Error, seen: Set): string { + if (seen.has(err)) { + return '[Circular error cause]' + } + seen.add(err) + + const stack = err.stack || err.message + const cause = BotpressCLIError._cause(err) + + if (!cause) { + return stack + } + + return `${stack}\ncaused by: ${BotpressCLIError._fullStack(cause, seen)}` + } + + private static _cause(err: Error): Error | undefined { + const vErrorCause = VError.cause(err) + if (vErrorCause) { + return vErrorCause + } + + const nativeCause = (err as { cause?: unknown }).cause + return nativeCause instanceof Error ? nativeCause : undefined } } @@ -73,14 +118,21 @@ export class ExclusiveIntegrationFeatureError extends BotpressCLIError { export class HTTPError extends BotpressCLIError { public constructor( public readonly status: number | undefined, - message: string + message: string, + opts?: { cause?: Error } ) { + if (opts?.cause) { + super(message, opts) + return + } super(message) } public static fromAxios(e: AxiosError<{ message?: string }>): HTTPError { const message = this._axiosMsg(e) - return new HTTPError(e.response?.status, message) + // keep the axios error as a cause so fullStack can show the transport-level chain under --verbose. + // only its message/stack are ever rendered; never serialize this cause — its config holds auth headers. + return new HTTPError(e.response?.status, message, { cause: e }) } public static fromApi(e: KnownApiError): HTTPError { diff --git a/packages/cli/src/utils/tunnel-utils.ts b/packages/cli/src/utils/tunnel-utils.ts index 2d49081296b..95ee4ef8d73 100644 --- a/packages/cli/src/utils/tunnel-utils.ts +++ b/packages/cli/src/utils/tunnel-utils.ts @@ -1,4 +1,5 @@ import { TunnelTail, ClientCloseEvent, ClientErrorEvent, errors } from '@bpinternal/tunnel' +import { BotpressCLIError } from '../errors' import { Logger } from '../logger' import { EventEmitter } from './event-emitter' @@ -22,9 +23,14 @@ export type ReconnectedEvent = { } export class ReconnectionFailedError extends Error { - public constructor(public readonly event: ReconnectionTriggerEvent) { + public constructor( + public readonly event: ReconnectionTriggerEvent, + cause?: Error + ) { const reason = ReconnectionFailedError._reason(event) - super(`Reconnection failed: ${reason}`) + const message = cause ? `Reconnection failed: ${reason}: ${cause.message}` : `Reconnection failed: ${reason}` + const options = cause ? { cause } : undefined + super(message, options) } private static _reason(event: ReconnectionTriggerEvent): string { @@ -46,7 +52,7 @@ export class TunnelSupervisor { private _started = false public readonly events = new EventEmitter<{ - connectionFailed: ReconnectionTriggerEvent + connectionFailed: { ev: ReconnectionTriggerEvent; cause: Error } manuallyClosed: null connected: { tunnel: TunnelTail @@ -86,8 +92,8 @@ export class TunnelSupervisor { } return new Promise((resolve, reject) => { - this.events.on('connectionFailed', (ev) => { - reject(new ReconnectionFailedError(ev)) + this.events.on('connectionFailed', ({ ev, cause }) => { + reject(new ReconnectionFailedError(ev, cause)) }) this.events.on('manuallyClosed', () => { @@ -111,7 +117,11 @@ export class TunnelSupervisor { .then((t) => { this._tunnel = t }) - .catch(() => this.events.emit('connectionFailed', ev)) + .catch((thrown) => { + // carry the real failure as the cause; the dev server then tears down and the single + // "running the dev server" error surfaces this reason (avoids a duplicate log line here) + this.events.emit('connectionFailed', { ev, cause: BotpressCLIError.map(thrown) }) + }) } private async _reconnect(ev: ReconnectionTriggerEvent): Promise { From 32477377117d80ce001b57dd9d658d8e8a6dab84 Mon Sep 17 00:00:00 2001 From: Mak <98408710+makhlouf1102@users.noreply.github.com> Date: Thu, 28 May 2026 16:34:28 -0400 Subject: [PATCH 4/4] feat(monday): add OAuth setup wizard step (#15229) Co-authored-by: Makhlouf Hennine --- integrations/monday/integration.definition.ts | 2 +- .../monday/src/oauth-wizard/wizard.ts | 91 +++++++++++-------- 2 files changed, 56 insertions(+), 37 deletions(-) diff --git a/integrations/monday/integration.definition.ts b/integrations/monday/integration.definition.ts index 2b9b28deb25..d147f0ad981 100644 --- a/integrations/monday/integration.definition.ts +++ b/integrations/monday/integration.definition.ts @@ -5,7 +5,7 @@ export default new IntegrationDefinition({ name: 'monday', title: 'Monday', description: 'Manage items in Monday boards.', - version: '1.1.2', + version: '1.1.3', readme: 'hub.md', icon: 'icon.svg', states: { diff --git a/integrations/monday/src/oauth-wizard/wizard.ts b/integrations/monday/src/oauth-wizard/wizard.ts index 70a4697af26..9cd38b85f24 100644 --- a/integrations/monday/src/oauth-wizard/wizard.ts +++ b/integrations/monday/src/oauth-wizard/wizard.ts @@ -1,38 +1,58 @@ -import { OAUTH_IDENTIFIER_HEADER, RuntimeError, type Response } from '@botpress/sdk' +import * as oauthWizard from '@botpress/common/src/oauth-wizard' +import { RuntimeError, type Response } from '@botpress/sdk' import { createOAuthMondayClient } from 'src/misc/auth' import { exchangeCodeForTokens } from 'src/misc/monday-client' import * as bp from '.botpress' const OAUTH_CONFIGURATION_ERROR_MESSAGE = 'Unable to complete the Monday OAuth setup. Please try again.' -const BASE_WIZARD_PATH = '/oauth/wizard/' const DISABLE_INTERSTITIAL_HEADER = { 'x-bp-disable-interstitial': 'true' } as const const SCOPES = 'boards:read boards:write' -export const handler = async (props: bp.HandlerProps) => { - if (!isOAuthWizardUrl(props.req.path)) { - throw new RuntimeError('Invalid OAuth wizard URL') - } +type WizardHandler = oauthWizard.WizardStepHandler - const stepId = props.req.path.slice(BASE_WIZARD_PATH.length) - const query = new URLSearchParams(props.req.query) +const getMondayInstallUrl = () => { + const url = new URL('https://auth.monday.com/oauth2/authorize') + url.search = new URLSearchParams({ + client_id: bp.secrets.CLIENT_ID, + response_type: 'install', + }).toString() + return url.toString() +} - if (stepId === 'oauth-redirect') { - return await _oauthRedirectHandler(props) - } +export const handler = async (props: bp.HandlerProps) => { + const wizard = new oauthWizard.OAuthWizardBuilder(props) + .addStep({ id: 'start', handler: _startHandler }) + .addStep({ id: 'oauth-redirect', handler: _oauthRedirectHandler }) + .addStep({ id: 'oauth-callback', handler: _oauthCallbackHandler }) + .build() - if (stepId === 'oauth-callback') { - return await _oauthCallbackHandler(props, query) - } + return await wizard.handleRequest() +} - throw new RuntimeError(`Unknown OAuth wizard step: ${stepId}`) +const _startHandler: WizardHandler = ({ responses }) => { + return responses.displayButtons({ + pageTitle: 'Connect Monday.com', + htmlOrMarkdownPageContents: + `1. Open the Monday.com install page and install the Botpress app in your workspace.\n` + + '2. Come back to this page after the installation is complete.\n' + + '3. Click **Next step** to start the OAuth connection.', + buttons: [ + { + action: 'navigate', + label: 'Next step', + navigateToStep: 'oauth-redirect', + buttonType: 'primary', + }, + ], + }) } -const _oauthRedirectHandler = async ({ ctx }: bp.HandlerProps) => { +const _oauthRedirectHandler: WizardHandler = async ({ ctx, responses }) => { try { const url = new URL('https://auth.monday.com/oauth2/authorize') const params = new URLSearchParams({ client_id: bp.secrets.CLIENT_ID, - redirect_uri: getOAuthRedirectUri(), + redirect_uri: getOAuthRedirectUri().toString(), response_type: 'code', scope: SCOPES, state: ctx.webhookId, @@ -40,26 +60,29 @@ const _oauthRedirectHandler = async ({ ctx }: bp.HandlerProps) => { }) url.search = params.toString() - return redirectToUrl(url) + return responses.redirectToExternalUrl(url.toString()) } catch (thrown) { - return redirectToInterstitial(false, _formatWizardError(thrown, OAUTH_CONFIGURATION_ERROR_MESSAGE)) + return responses.endWizard({ + success: false, + errorMessage: _formatWizardError(thrown, OAUTH_CONFIGURATION_ERROR_MESSAGE), + }) } } -const _oauthCallbackHandler = async ({ ctx, client }: bp.HandlerProps, query: URLSearchParams) => { +const _oauthCallbackHandler: WizardHandler = async ({ ctx, client, query, responses, setIntegrationIdentifier }) => { try { const code = query.get('code') const state = query.get('state') if (!code) { - return redirectToInterstitial(false, 'Missing OAuth code') + return responses.endWizard({ success: false, errorMessage: 'Missing OAuth code' }) } if (state !== ctx.webhookId) { - return redirectToInterstitial(false, 'Invalid OAuth state') + return responses.endWizard({ success: false, errorMessage: 'Invalid OAuth state' }) } - const credentials = await _exchangeCodeForTokens({ code, redirectUri: getOAuthRedirectUri() }) + const credentials = await _exchangeCodeForTokens({ code, redirectUri: getOAuthRedirectUri().toString() }) const mondayClient = createOAuthMondayClient(credentials.accessToken) await mondayClient.validateAccessToken() @@ -72,18 +95,14 @@ const _oauthCallbackHandler = async ({ ctx, client }: bp.HandlerProps, query: UR }, }) - await client.configureIntegration({ identifier: ctx.webhookId }) + setIntegrationIdentifier(ctx.webhookId) - const response = redirectToInterstitial(true) - return { - ...response, - headers: { - ...response.headers, - [OAUTH_IDENTIFIER_HEADER]: ctx.webhookId, - }, - } + return responses.endWizard({ success: true }) } catch (thrown) { - return redirectToInterstitial(false, _formatWizardError(thrown, OAUTH_CONFIGURATION_ERROR_MESSAGE)) + return responses.endWizard({ + success: false, + errorMessage: _formatWizardError(thrown, OAUTH_CONFIGURATION_ERROR_MESSAGE), + }) } } @@ -106,11 +125,11 @@ const _exchangeCodeForTokens = async ({ code, redirectUri }: { code: string; red } } -const getWizardStepUrl = (stepId: string) => new URL(`${BASE_WIZARD_PATH}${stepId}`, process.env.BP_WEBHOOK_URL) +const getWizardStepUrl = (stepId: string) => oauthWizard.getWizardStepUrl(stepId) -const getOAuthRedirectUri = () => getWizardStepUrl('oauth-callback').toString() +const getOAuthRedirectUri = () => getWizardStepUrl('oauth-callback') -export const isOAuthWizardUrl = (path: string) => path.startsWith(BASE_WIZARD_PATH) +export const isOAuthWizardUrl = oauthWizard.isOAuthWizardUrl const redirectToUrl = (url: URL): Response => ({ status: 303,