@@ -34,7 +34,8 @@ interface CanopyWavePricing {
3434 outputCostPerToken : number
3535}
3636
37- /** Single source of truth: CanopyWave model metadata and per-model pricing. */
37+ /** Single source of truth for CanopyWave model metadata and pricing.
38+ * Kept as one map so adding a model can't drift between routing and billing. */
3839const CANOPYWAVE_MODELS : Record <
3940 string ,
4041 { canopywaveId : string ; pricing : CanopyWavePricing }
@@ -52,7 +53,7 @@ const CANOPYWAVE_MODELS: Record<
5253 pricing : {
5354 inputCostPerToken : 0.95 / 1_000_000 ,
5455 cachedInputCostPerToken : 0.16 / 1_000_000 ,
55- outputCostPerToken : 4.0 / 1_000_000 ,
56+ outputCostPerToken : 4.00 / 1_000_000 ,
5657 } ,
5758 } ,
5859}
@@ -75,12 +76,7 @@ function getCanopyWavePricing(model: string): CanopyWavePricing {
7576 return entry . pricing
7677}
7778
78- type StreamState = {
79- responseText : string
80- reasoningText : string
81- ttftMs : number | null
82- billedAlready : boolean
83- }
79+ type StreamState = { responseText : string ; reasoningText : string ; ttftMs : number | null ; billedAlready : boolean }
8480
8581type LineResult = {
8682 state : StreamState
@@ -129,39 +125,15 @@ function createCanopyWaveRequest(params: {
129125 } )
130126}
131127
132- function extractUsageAndCost (
133- usage : Record < string , unknown > | undefined | null ,
134- model : string ,
135- ) : UsageData {
136- if ( ! usage )
137- return {
138- inputTokens : 0 ,
139- outputTokens : 0 ,
140- cacheReadInputTokens : 0 ,
141- reasoningTokens : 0 ,
142- cost : 0 ,
143- }
144- const promptDetails = usage . prompt_tokens_details as
145- | Record < string , unknown >
146- | undefined
147- | null
148- const completionDetails = usage . completion_tokens_details as
149- | Record < string , unknown >
150- | undefined
151- | null
152-
153- const inputTokens =
154- typeof usage . prompt_tokens === 'number' ? usage . prompt_tokens : 0
155- const outputTokens =
156- typeof usage . completion_tokens === 'number' ? usage . completion_tokens : 0
157- const cacheReadInputTokens =
158- typeof promptDetails ?. cached_tokens === 'number'
159- ? promptDetails . cached_tokens
160- : 0
161- const reasoningTokens =
162- typeof completionDetails ?. reasoning_tokens === 'number'
163- ? completionDetails . reasoning_tokens
164- : 0
128+ function extractUsageAndCost ( usage : Record < string , unknown > | undefined | null , model : string ) : UsageData {
129+ if ( ! usage ) return { inputTokens : 0 , outputTokens : 0 , cacheReadInputTokens : 0 , reasoningTokens : 0 , cost : 0 }
130+ const promptDetails = usage . prompt_tokens_details as Record < string , unknown > | undefined | null
131+ const completionDetails = usage . completion_tokens_details as Record < string , unknown > | undefined | null
132+
133+ const inputTokens = typeof usage . prompt_tokens === 'number' ? usage . prompt_tokens : 0
134+ const outputTokens = typeof usage . completion_tokens === 'number' ? usage . completion_tokens : 0
135+ const cacheReadInputTokens = typeof promptDetails ?. cached_tokens === 'number' ? promptDetails . cached_tokens : 0
136+ const reasoningTokens = typeof completionDetails ?. reasoning_tokens === 'number' ? completionDetails . reasoning_tokens : 0
165137
166138 const pricing = getCanopyWavePricing ( model )
167139 const nonCachedInputTokens = Math . max ( 0 , inputTokens - cacheReadInputTokens )
@@ -170,13 +142,7 @@ function extractUsageAndCost(
170142 cacheReadInputTokens * pricing . cachedInputCostPerToken +
171143 outputTokens * pricing . outputCostPerToken
172144
173- return {
174- inputTokens,
175- outputTokens,
176- cacheReadInputTokens,
177- reasoningTokens,
178- cost,
179- }
145+ return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
180146}
181147
182148export async function handleCanopyWaveNonStream ( {
@@ -198,10 +164,7 @@ export async function handleCanopyWaveNonStream({
198164} ) {
199165 const originalModel = body . model
200166 const startTime = new Date ( )
201- const { clientId, clientRequestId, costMode } = extractRequestMetadata ( {
202- body,
203- logger,
204- } )
167+ const { clientId, clientRequestId, costMode } = extractRequestMetadata ( { body, logger } )
205168
206169 const response = await createCanopyWaveRequest ( { body, originalModel, fetch } )
207170
@@ -211,10 +174,7 @@ export async function handleCanopyWaveNonStream({
211174
212175 const data = await response . json ( )
213176 const content = data . choices ?. [ 0 ] ?. message ?. content ?? ''
214- const reasoningText =
215- data . choices ?. [ 0 ] ?. message ?. reasoning_content ??
216- data . choices ?. [ 0 ] ?. message ?. reasoning ??
217- ''
177+ const reasoningText = data . choices ?. [ 0 ] ?. message ?. reasoning_content ?? data . choices ?. [ 0 ] ?. message ?. reasoning ?? ''
218178 const usageData = extractUsageAndCost ( data . usage , originalModel )
219179
220180 insertMessageToBigQuery ( {
@@ -281,10 +241,7 @@ export async function handleCanopyWaveStream({
281241} ) {
282242 const originalModel = body . model
283243 const startTime = new Date ( )
284- const { clientId, clientRequestId, costMode } = extractRequestMetadata ( {
285- body,
286- logger,
287- } )
244+ const { clientId, clientRequestId, costMode } = extractRequestMetadata ( { body, logger } )
288245
289246 const response = await createCanopyWaveRequest ( { body, originalModel, fetch } )
290247
@@ -298,12 +255,7 @@ export async function handleCanopyWaveStream({
298255 }
299256
300257 let heartbeatInterval : NodeJS . Timeout
301- let state : StreamState = {
302- responseText : '' ,
303- reasoningText : '' ,
304- ttftMs : null ,
305- billedAlready : false ,
306- }
258+ let state : StreamState = { responseText : '' , reasoningText : '' , ttftMs : null , billedAlready : false }
307259 let clientDisconnected = false
308260
309261 const stream = new ReadableStream ( {
@@ -364,13 +316,9 @@ export async function handleCanopyWaveStream({
364316
365317 if ( ! clientDisconnected ) {
366318 try {
367- controller . enqueue (
368- new TextEncoder ( ) . encode ( lineResult . patchedLine ) ,
369- )
319+ controller . enqueue ( new TextEncoder ( ) . encode ( lineResult . patchedLine ) )
370320 } catch {
371- logger . warn (
372- 'Client disconnected during stream, continuing for billing' ,
373- )
321+ logger . warn ( 'Client disconnected during stream, continuing for billing' )
374322 clientDisconnected = true
375323 }
376324 }
@@ -490,17 +438,13 @@ async function handleLine({
490438 }
491439
492440 const patchedLine = `data: ${ JSON . stringify ( obj ) } \n`
493- return {
494- state : result . state ,
495- billedCredits : result . billedCredits ,
496- patchedLine,
497- }
441+ return { state : result . state , billedCredits : result . billedCredits , patchedLine }
498442}
499443
500444function isFinalChunk ( data : Record < string , unknown > ) : boolean {
501445 const choices = data . choices as Array < Record < string , unknown > > | undefined
502446 if ( ! choices || choices . length === 0 ) return true
503- return choices . some ( ( c ) => c . finish_reason != null )
447+ return choices . some ( c => c . finish_reason != null )
504448}
505449
506450async function handleResponse ( {
@@ -532,24 +476,11 @@ async function handleResponse({
532476 logger : Logger
533477 insertMessage : InsertMessageBigqueryFn
534478} ) : Promise < { state : StreamState ; billedCredits ?: number } > {
535- state = handleStreamChunk ( {
536- data,
537- state,
538- startTime,
539- logger,
540- userId,
541- agentId,
542- model : originalModel ,
543- } )
479+ state = handleStreamChunk ( { data, state, startTime, logger, userId, agentId, model : originalModel } )
544480
545481 // Some providers send cumulative usage on EVERY chunk (not just the final one),
546482 // so we must only bill once on the final chunk to avoid charging N times.
547- if (
548- 'error' in data ||
549- ! data . usage ||
550- state . billedAlready ||
551- ! isFinalChunk ( data )
552- ) {
483+ if ( 'error' in data || ! data . usage || state . billedAlready || ! isFinalChunk ( data ) ) {
553484 // Strip usage from non-final chunks and duplicate final chunks
554485 // so the SDK doesn't see multiple usage objects
555486 if ( data . usage && ( ! isFinalChunk ( data ) || state . billedAlready ) ) {
@@ -558,10 +489,7 @@ async function handleResponse({
558489 return { state }
559490 }
560491
561- const usageData = extractUsageAndCost (
562- data . usage as Record < string , unknown > ,
563- originalModel ,
564- )
492+ const usageData = extractUsageAndCost ( data . usage as Record < string , unknown > , originalModel )
565493 const messageId = typeof data . id === 'string' ? data . id : 'unknown'
566494
567495 state . billedAlready = true
@@ -649,27 +577,17 @@ function handleStreamChunk({
649577 if ( state . responseText . length >= MAX_BUFFER_SIZE ) {
650578 state . responseText =
651579 state . responseText . slice ( 0 , MAX_BUFFER_SIZE ) + '\n---[TRUNCATED]---'
652- logger . warn (
653- { userId, agentId, model } ,
654- 'Response text buffer truncated at 1MB' ,
655- )
580+ logger . warn ( { userId, agentId, model } , 'Response text buffer truncated at 1MB' )
656581 }
657582 }
658583
659- const reasoningDelta =
660- typeof delta ?. reasoning_content === 'string'
661- ? delta . reasoning_content
662- : typeof delta ?. reasoning === 'string'
663- ? delta . reasoning
664- : ''
584+ const reasoningDelta = typeof delta ?. reasoning_content === 'string' ? delta . reasoning_content
585+ : typeof delta ?. reasoning === 'string' ? delta . reasoning
586+ : ''
665587
666588 // Track time to first token (TTFT) - set on first meaningful delta (content, reasoning, or tool_calls)
667- const hasToolCallsDelta =
668- delta ?. tool_calls != null && ( delta . tool_calls as unknown [ ] ) ?. length > 0
669- if (
670- state . ttftMs === null &&
671- ( contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta )
672- ) {
589+ const hasToolCallsDelta = delta ?. tool_calls != null && ( delta . tool_calls as unknown [ ] ) ?. length > 0
590+ if ( state . ttftMs === null && ( contentDelta !== '' || reasoningDelta !== '' || hasToolCallsDelta ) ) {
673591 state . ttftMs = Date . now ( ) - startTime . getTime ( )
674592 }
675593
@@ -678,10 +596,7 @@ function handleStreamChunk({
678596 if ( state . reasoningText . length >= MAX_BUFFER_SIZE ) {
679597 state . reasoningText =
680598 state . reasoningText . slice ( 0 , MAX_BUFFER_SIZE ) + '\n---[TRUNCATED]---'
681- logger . warn (
682- { userId, agentId, model } ,
683- 'Reasoning text buffer truncated at 1MB' ,
684- )
599+ logger . warn ( { userId, agentId, model } , 'Reasoning text buffer truncated at 1MB' )
685600 }
686601 }
687602
@@ -715,9 +630,7 @@ export class CanopyWaveError extends Error {
715630 }
716631}
717632
718- async function parseCanopyWaveError (
719- response : Response ,
720- ) : Promise < CanopyWaveError > {
633+ async function parseCanopyWaveError ( response : Response ) : Promise < CanopyWaveError > {
721634 const errorText = await response . text ( )
722635 let errorBody : CanopyWaveError [ 'errorBody' ]
723636 try {
0 commit comments