Skip to content

Commit 6dfbb3b

Browse files
jahoomaclaude
andauthored
Route Kimi K2.6 requests through CanopyWave (#550)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent f58c850 commit 6dfbb3b

3 files changed

Lines changed: 58 additions & 27 deletions

File tree

scripts/test-canopywave-long.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,10 @@ const MODEL_CONFIGS: Record<string, ModelConfig> = {
3333
outputCostPerToken: 1.20 / 1_000_000,
3434
},
3535
kimi: {
36-
// Pricing is approximate — based on public Moonshot k2 rates; CanopyWave may differ.
3736
id: 'moonshotai/kimi-k2.6',
38-
inputCostPerToken: 0.60 / 1_000_000,
39-
cachedInputCostPerToken: 0.15 / 1_000_000,
40-
outputCostPerToken: 2.50 / 1_000_000,
37+
inputCostPerToken: 0.95 / 1_000_000,
38+
cachedInputCostPerToken: 0.16 / 1_000_000,
39+
outputCostPerToken: 4.00 / 1_000_000,
4140
},
4241
}
4342

web/src/app/api/v1/chat/completions/_post.ts

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -532,9 +532,10 @@ export async function postChatCompletions(params: {
532532
if (bodyStream) {
533533
// Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
534534
const useSiliconFlow = false // isSiliconFlowModel(typedBody.model)
535-
const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
536-
const useFireworks = isFireworksModel(typedBody.model)
537-
const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model)
535+
const useCanopyWave = isCanopyWaveModel(typedBody.model)
536+
const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model)
537+
const useOpenAIDirect =
538+
!useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model)
538539
const stream = useSiliconFlow
539540
? await handleSiliconFlowStream({
540541
body: typedBody,
@@ -606,12 +607,12 @@ export async function postChatCompletions(params: {
606607
})
607608
} else {
608609
// Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
609-
// TEMPORARILY DISABLED: route through OpenRouter
610610
const model = typedBody.model
611611
const useSiliconFlow = false // isSiliconFlowModel(model)
612-
const useCanopyWave = false // isCanopyWaveModel(model)
613-
const useFireworks = isFireworksModel(model)
614-
const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model)
612+
const useCanopyWave = isCanopyWaveModel(model)
613+
const useFireworks = !useCanopyWave && isFireworksModel(model)
614+
const shouldUseOpenAIEndpoint =
615+
!useCanopyWave && !useFireworks && isOpenAIDirectModel(model)
615616

616617
const nonStreamRequest = useSiliconFlow
617618
? handleSiliconFlowNonStream({

web/src/llm-api/canopywave.ts

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,52 @@ const canopywaveAgent = new Agent({
2626
bodyTimeout: 0,
2727
})
2828

29-
/** Map from OpenRouter model IDs to CanopyWave model IDs */
30-
const CANOPYWAVE_MODEL_MAP: Record<string, string> = {
31-
'minimax/minimax-m2.5': 'minimax/minimax-m2.5',
29+
// CanopyWave per-token pricing (dollars per token)
30+
interface CanopyWavePricing {
31+
inputCostPerToken: number
32+
cachedInputCostPerToken: number
33+
outputCostPerToken: number
34+
}
35+
36+
/** Single source of truth: which OpenRouter model IDs we route through
37+
* CanopyWave, the corresponding CanopyWave model ID, and per-model pricing.
38+
* Kept as one map so adding a model can't drift between routing and billing. */
39+
const CANOPYWAVE_MODELS: Record<
40+
string,
41+
{ canopywaveId: string; pricing: CanopyWavePricing }
42+
> = {
43+
'minimax/minimax-m2.5': {
44+
canopywaveId: 'minimax/minimax-m2.5',
45+
pricing: {
46+
inputCostPerToken: 0.27 / 1_000_000,
47+
cachedInputCostPerToken: 0.03 / 1_000_000,
48+
outputCostPerToken: 1.08 / 1_000_000,
49+
},
50+
},
51+
'moonshotai/kimi-k2.6': {
52+
canopywaveId: 'moonshotai/kimi-k2.6',
53+
pricing: {
54+
inputCostPerToken: 0.95 / 1_000_000,
55+
cachedInputCostPerToken: 0.16 / 1_000_000,
56+
outputCostPerToken: 4.00 / 1_000_000,
57+
},
58+
},
3259
}
3360

3461
export function isCanopyWaveModel(model: string): boolean {
35-
return model in CANOPYWAVE_MODEL_MAP
62+
return model in CANOPYWAVE_MODELS
3663
}
3764

3865
function getCanopyWaveModelId(openrouterModel: string): string {
39-
return CANOPYWAVE_MODEL_MAP[openrouterModel] ?? openrouterModel
66+
return CANOPYWAVE_MODELS[openrouterModel]?.canopywaveId ?? openrouterModel
67+
}
68+
69+
function getCanopyWavePricing(model: string): CanopyWavePricing {
70+
const entry = CANOPYWAVE_MODELS[model]
71+
if (!entry) {
72+
throw new Error(`No CanopyWave pricing found for model: ${model}`)
73+
}
74+
return entry.pricing
4075
}
4176

4277
type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null; billedAlready: boolean }
@@ -85,12 +120,7 @@ function createCanopyWaveRequest(params: {
85120
})
86121
}
87122

88-
// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5
89-
const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000
90-
const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
91-
const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000
92-
93-
function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
123+
function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
94124
if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
95125
const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
96126
const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
@@ -100,11 +130,12 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
100130
const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
101131
const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
102132

133+
const pricing = getCanopyWavePricing(model)
103134
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
104135
const cost =
105-
nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN +
106-
cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN +
107-
outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN
136+
nonCachedInputTokens * pricing.inputCostPerToken +
137+
cacheReadInputTokens * pricing.cachedInputCostPerToken +
138+
outputTokens * pricing.outputCostPerToken
108139

109140
return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
110141
}
@@ -139,7 +170,7 @@ export async function handleCanopyWaveNonStream({
139170
const data = await response.json()
140171
const content = data.choices?.[0]?.message?.content ?? ''
141172
const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
142-
const usageData = extractUsageAndCost(data.usage)
173+
const usageData = extractUsageAndCost(data.usage, originalModel)
143174

144175
insertMessageToBigQuery({
145176
messageId: data.id,
@@ -453,7 +484,7 @@ async function handleResponse({
453484
return { state }
454485
}
455486

456-
const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
487+
const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
457488
const messageId = typeof data.id === 'string' ? data.id : 'unknown'
458489

459490
state.billedAlready = true

0 commit comments

Comments
 (0)