Skip to content

Commit c431968

Browse files
committed
Minimize CanopyWave routing change
1 parent 3cafdd4 commit c431968

1 file changed

Lines changed: 33 additions & 120 deletions

File tree

web/src/llm-api/canopywave.ts

Lines changed: 33 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ interface CanopyWavePricing {
3434
outputCostPerToken: number
3535
}
3636

37-
/** Single source of truth: CanopyWave model metadata and per-model pricing. */
37+
/** Single source of truth for CanopyWave model metadata and pricing.
38+
* Kept as one map so adding a model can't drift between routing and billing. */
3839
const CANOPYWAVE_MODELS: Record<
3940
string,
4041
{ canopywaveId: string; pricing: CanopyWavePricing }
@@ -52,7 +53,7 @@ const CANOPYWAVE_MODELS: Record<
5253
pricing: {
5354
inputCostPerToken: 0.95 / 1_000_000,
5455
cachedInputCostPerToken: 0.16 / 1_000_000,
55-
outputCostPerToken: 4.0 / 1_000_000,
56+
outputCostPerToken: 4.00 / 1_000_000,
5657
},
5758
},
5859
}
@@ -75,12 +76,7 @@ function getCanopyWavePricing(model: string): CanopyWavePricing {
7576
return entry.pricing
7677
}
7778

78-
type StreamState = {
79-
responseText: string
80-
reasoningText: string
81-
ttftMs: number | null
82-
billedAlready: boolean
83-
}
79+
type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
8480

8581
type LineResult = {
8682
state: StreamState
@@ -129,39 +125,15 @@ function createCanopyWaveRequest(params: {
129125
})
130126
}
131127

132-
function extractUsageAndCost(
133-
usage: Record<string, unknown> | undefined | null,
134-
model: string,
135-
): UsageData {
136-
if (!usage)
137-
return {
138-
inputTokens: 0,
139-
outputTokens: 0,
140-
cacheReadInputTokens: 0,
141-
reasoningTokens: 0,
142-
cost: 0,
143-
}
144-
const promptDetails = usage.prompt_tokens_details as
145-
| Record<string, unknown>
146-
| undefined
147-
| null
148-
const completionDetails = usage.completion_tokens_details as
149-
| Record<string, unknown>
150-
| undefined
151-
| null
152-
153-
const inputTokens =
154-
typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0
155-
const outputTokens =
156-
typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0
157-
const cacheReadInputTokens =
158-
typeof promptDetails?.cached_tokens === 'number'
159-
? promptDetails.cached_tokens
160-
: 0
161-
const reasoningTokens =
162-
typeof completionDetails?.reasoning_tokens === 'number'
163-
? completionDetails.reasoning_tokens
164-
: 0
128+
function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
129+
if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
130+
const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
131+
const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
132+
133+
const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0
134+
const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0
135+
const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
136+
const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
165137

166138
const pricing = getCanopyWavePricing(model)
167139
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
@@ -170,13 +142,7 @@ function extractUsageAndCost(
170142
cacheReadInputTokens * pricing.cachedInputCostPerToken +
171143
outputTokens * pricing.outputCostPerToken
172144

173-
return {
174-
inputTokens,
175-
outputTokens,
176-
cacheReadInputTokens,
177-
reasoningTokens,
178-
cost,
179-
}
145+
return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
180146
}
181147

182148
export async function handleCanopyWaveNonStream({
@@ -198,10 +164,7 @@ export async function handleCanopyWaveNonStream({
198164
}) {
199165
const originalModel = body.model
200166
const startTime = new Date()
201-
const { clientId, clientRequestId, costMode } = extractRequestMetadata({
202-
body,
203-
logger,
204-
})
167+
const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
205168

206169
const response = await createCanopyWaveRequest({ body, originalModel, fetch })
207170

@@ -211,10 +174,7 @@ export async function handleCanopyWaveNonStream({
211174

212175
const data = await response.json()
213176
const content = data.choices?.[0]?.message?.content ?? ''
214-
const reasoningText =
215-
data.choices?.[0]?.message?.reasoning_content ??
216-
data.choices?.[0]?.message?.reasoning ??
217-
''
177+
const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
218178
const usageData = extractUsageAndCost(data.usage, originalModel)
219179

220180
insertMessageToBigQuery({
@@ -281,10 +241,7 @@ export async function handleCanopyWaveStream({
281241
}) {
282242
const originalModel = body.model
283243
const startTime = new Date()
284-
const { clientId, clientRequestId, costMode } = extractRequestMetadata({
285-
body,
286-
logger,
287-
})
244+
const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })
288245

289246
const response = await createCanopyWaveRequest({ body, originalModel, fetch })
290247

@@ -298,12 +255,7 @@ export async function handleCanopyWaveStream({
298255
}
299256

300257
let heartbeatInterval: NodeJS.Timeout
301-
let state: StreamState = {
302-
responseText: '',
303-
reasoningText: '',
304-
ttftMs: null,
305-
billedAlready: false,
306-
}
258+
let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null, billedAlready: false }
307259
let clientDisconnected = false
308260

309261
const stream = new ReadableStream({
@@ -364,13 +316,9 @@ export async function handleCanopyWaveStream({
364316

365317
if (!clientDisconnected) {
366318
try {
367-
controller.enqueue(
368-
new TextEncoder().encode(lineResult.patchedLine),
369-
)
319+
controller.enqueue(new TextEncoder().encode(lineResult.patchedLine))
370320
} catch {
371-
logger.warn(
372-
'Client disconnected during stream, continuing for billing',
373-
)
321+
logger.warn('Client disconnected during stream, continuing for billing')
374322
clientDisconnected = true
375323
}
376324
}
@@ -490,17 +438,13 @@ async function handleLine({
490438
}
491439

492440
const patchedLine = `data: ${JSON.stringify(obj)}\n`
493-
return {
494-
state: result.state,
495-
billedCredits: result.billedCredits,
496-
patchedLine,
497-
}
441+
return { state: result.state, billedCredits: result.billedCredits, patchedLine }
498442
}
499443

500444
function isFinalChunk(data: Record<string, unknown>): boolean {
501445
const choices = data.choices as Array<Record<string, unknown>> | undefined
502446
if (!choices || choices.length === 0) return true
503-
return choices.some((c) => c.finish_reason != null)
447+
return choices.some(c => c.finish_reason != null)
504448
}
505449

506450
async function handleResponse({
@@ -532,24 +476,11 @@ async function handleResponse({
532476
logger: Logger
533477
insertMessage: InsertMessageBigqueryFn
534478
}): Promise<{ state: StreamState; billedCredits?: number }> {
535-
state = handleStreamChunk({
536-
data,
537-
state,
538-
startTime,
539-
logger,
540-
userId,
541-
agentId,
542-
model: originalModel,
543-
})
479+
state = handleStreamChunk({ data, state, startTime, logger, userId, agentId, model: originalModel })
544480

545481
// Some providers send cumulative usage on EVERY chunk (not just the final one),
546482
// so we must only bill once on the final chunk to avoid charging N times.
547-
if (
548-
'error' in data ||
549-
!data.usage ||
550-
state.billedAlready ||
551-
!isFinalChunk(data)
552-
) {
483+
if ('error' in data || !data.usage || state.billedAlready || !isFinalChunk(data)) {
553484
// Strip usage from non-final chunks and duplicate final chunks
554485
// so the SDK doesn't see multiple usage objects
555486
if (data.usage && (!isFinalChunk(data) || state.billedAlready)) {
@@ -558,10 +489,7 @@ async function handleResponse({
558489
return { state }
559490
}
560491

561-
const usageData = extractUsageAndCost(
562-
data.usage as Record<string, unknown>,
563-
originalModel,
564-
)
492+
const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
565493
const messageId = typeof data.id === 'string' ? data.id : 'unknown'
566494

567495
state.billedAlready = true
@@ -649,27 +577,17 @@ function handleStreamChunk({
649577
if (state.responseText.length >= MAX_BUFFER_SIZE) {
650578
state.responseText =
651579
state.responseText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---'
652-
logger.warn(
653-
{ userId, agentId, model },
654-
'Response text buffer truncated at 1MB',
655-
)
580+
logger.warn({ userId, agentId, model }, 'Response text buffer truncated at 1MB')
656581
}
657582
}
658583

659-
const reasoningDelta =
660-
typeof delta?.reasoning_content === 'string'
661-
? delta.reasoning_content
662-
: typeof delta?.reasoning === 'string'
663-
? delta.reasoning
664-
: ''
584+
const reasoningDelta = typeof delta?.reasoning_content === 'string' ? delta.reasoning_content
585+
: typeof delta?.reasoning === 'string' ? delta.reasoning
586+
: ''
665587

666588
// Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
667-
const hasToolCallsDelta =
668-
delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0
669-
if (
670-
state.ttftMs === null &&
671-
(contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)
672-
) {
589+
const hasToolCallsDelta = delta?.tool_calls != null && (delta.tool_calls as unknown[])?.length > 0
590+
if (state.ttftMs === null && (contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta)) {
673591
state.ttftMs = Date.now() - startTime.getTime()
674592
}
675593

@@ -678,10 +596,7 @@ function handleStreamChunk({
678596
if (state.reasoningText.length >= MAX_BUFFER_SIZE) {
679597
state.reasoningText =
680598
state.reasoningText.slice(0, MAX_BUFFER_SIZE) + '\n---[TRUNCATED]---'
681-
logger.warn(
682-
{ userId, agentId, model },
683-
'Reasoning text buffer truncated at 1MB',
684-
)
599+
logger.warn({ userId, agentId, model }, 'Reasoning text buffer truncated at 1MB')
685600
}
686601
}
687602

@@ -715,9 +630,7 @@ export class CanopyWaveError extends Error {
715630
}
716631
}
717632

718-
async function parseCanopyWaveError(
719-
response: Response,
720-
): Promise<CanopyWaveError> {
633+
async function parseCanopyWaveError(response: Response): Promise<CanopyWaveError> {
721634
const errorText = await response.text()
722635
let errorBody: CanopyWaveError['errorBody']
723636
try {

0 commit comments

Comments
 (0)