diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts
index 36d6b75c5..dd5630930 100644
--- a/agents/__tests__/editor.test.ts
+++ b/agents/__tests__/editor.test.ts
@@ -67,6 +67,11 @@ describe('editor agent', () => {
expect(glmEditor.model).toBe('z-ai/glm-5.1')
})
+ test('creates kimi editor', () => {
+ const kimiEditor = createCodeEditor({ model: 'kimi' })
+ expect(kimiEditor.model).toBe('moonshotai/kimi-k2.6')
+ })
+
test('creates minimax editor', () => {
const minimaxEditor = createCodeEditor({ model: 'minimax' })
expect(minimaxEditor.model).toBe('minimax/minimax-m2.7')
@@ -84,6 +89,12 @@ describe('editor agent', () => {
expect(glmEditor.instructionsPrompt).not.toContain('')
})
+ test('kimi editor does not include think tags in instructions', () => {
+ const kimiEditor = createCodeEditor({ model: 'kimi' })
+ expect(kimiEditor.instructionsPrompt).not.toContain('')
+ expect(kimiEditor.instructionsPrompt).not.toContain('')
+ })
+
test('minimax editor does not include think tags in instructions', () => {
const minimaxEditor = createCodeEditor({ model: 'minimax' })
expect(minimaxEditor.instructionsPrompt).not.toContain('')
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 1a81f948b..b1e24efff 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -25,7 +25,7 @@ export function createBase2(
const isFree = mode === 'free' || mode === 'lite'
const isSonnet = false
- const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7'
+ const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7'
return {
publisher,
diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts
index 29225f0c2..6dbb4bb3c 100644
--- a/agents/editor/editor-lite.ts
+++ b/agents/editor/editor-lite.ts
@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
import type { AgentDefinition } from '../types/agent-definition'
const definition: AgentDefinition = {
- ...createCodeEditor({ model: 'glm' }),
+ ...createCodeEditor({ model: 'kimi' }),
id: 'editor-lite',
}
export default definition
diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts
index c98544d0f..bb31eaaeb 100644
--- a/agents/editor/editor.ts
+++ b/agents/editor/editor.ts
@@ -4,7 +4,7 @@ import { publisher } from '../constants'
import type { AgentDefinition } from '../types/agent-definition'
export const createCodeEditor = (options: {
- model: 'gpt-5' | 'opus' | 'glm' | 'minimax'
+ model: 'gpt-5' | 'opus' | 'glm' | 'kimi' | 'minimax'
}): Omit => {
const { model } = options
return {
@@ -14,6 +14,8 @@ export const createCodeEditor = (options: {
? 'openai/gpt-5.1'
: options.model === 'minimax'
? 'minimax/minimax-m2.7'
+ : options.model === 'kimi'
+ ? 'moonshotai/kimi-k2.6'
: options.model === 'glm'
? 'z-ai/glm-5.1'
: 'anthropic/claude-opus-4.7',
@@ -67,7 +69,7 @@ OR for new files or major rewrites:
}
-${model === 'gpt-5' || model === 'glm' || model === 'minimax'
+${model === 'gpt-5' || model === 'glm' || model === 'kimi' || model === 'minimax'
? ''
: `Before you start writing your implementation, you should use tags to think about the best way to implement the changes.
diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts
index feafb87c4..888cadf4f 100644
--- a/agents/reviewer/code-reviewer-lite.ts
+++ b/agents/reviewer/code-reviewer-lite.ts
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
const definition: SecretAgentDefinition = {
id: 'code-reviewer-lite',
publisher,
- ...createReviewer('z-ai/glm-5.1'),
+ ...createReviewer('moonshotai/kimi-k2.6'),
}
export default definition
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
index 3608f3631..088dd1dca 100644
--- a/agents/types/agent-definition.ts
+++ b/agents/types/agent-definition.ts
@@ -423,6 +423,7 @@ export type ModelName =
// Other open source models
| 'moonshotai/kimi-k2'
| 'moonshotai/kimi-k2:nitro'
+ | 'moonshotai/kimi-k2.6'
| 'z-ai/glm-5'
| 'z-ai/glm-5.1'
| 'z-ai/glm-4.6'
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index a453a1538..ddc2922ab 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -5,7 +5,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'
import { Button } from './button'
import {
FALLBACK_FREEBUFF_MODEL_ID,
- FREEBUFF_GLM_MODEL_ID,
+ FREEBUFF_KIMI_MODEL_ID,
FREEBUFF_MODELS,
getFreebuffDeploymentAvailabilityLabel,
isFreebuffModelAvailable,
@@ -25,8 +25,8 @@ import {
import type { KeyEvent } from '@opentui/core'
const FREEBUFF_MODEL_SELECTOR_MODELS = [
- ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
- ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
+ ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_KIMI_MODEL_ID),
+ ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_KIMI_MODEL_ID),
]
/**
@@ -72,7 +72,7 @@ export const FreebuffModelSelector: React.FC = () => {
// unavailable (e.g. deployment hours close while the picker is open),
// swap to the always-available fallback so Enter doesn't POST a model
// the server will immediately reject. In-memory only — the user's saved
- // preference (e.g. GLM) is preserved for the next launch.
+ // preference (e.g. Kimi) is preserved for the next launch.
if (
(session?.status === 'none' || !session) &&
!isFreebuffModelAvailable(selectedModel, new Date(now))
diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx
index 7cc0aca4a..cdd28e5d5 100644
--- a/cli/src/components/waiting-room-screen.tsx
+++ b/cli/src/components/waiting-room-screen.tsx
@@ -260,7 +260,7 @@ export const WaitingRoomScreen: React.FC = ({
Elapsed
{formatElapsed(elapsedMs)}
- {/* Per-model session quota (e.g. GLM 5.1 caps at 5/12h). Only
+ {/* Per-model session quota (e.g. Kimi K2.6 caps at 5/12h). Only
rendered for rate-limited models so the Minimax queue stays
clutter-free. */}
{session.rateLimit && (
@@ -343,7 +343,7 @@ export const WaitingRoomScreen: React.FC = ({
>
)}
- {/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
+ {/* Per-model session quota exhausted (e.g. 5+ Kimi sessions in the
last 12h). Terminal for this run — the user can exit and come
back once the oldest session in the window rolls off. */}
{session?.status === 'rate_limited' && (
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
index 463a49126..c78d4bbd0 100644
--- a/cli/src/hooks/use-freebuff-session.ts
+++ b/cli/src/hooks/use-freebuff-session.ts
@@ -104,7 +104,7 @@ async function callSession(
return body
}
}
- // 429 from POST is the per-model session-quota reject (e.g. too many GLM
+ // 429 from POST is the per-model session-quota reject (e.g. too many Kimi
// sessions in the last 12h). Terminal for the current poll — the CLI shows
// a screen explaining the limit and when the user can try again. The 429
// status (rather than 200) keeps older CLIs in their error path so they
@@ -442,9 +442,9 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
}
if (next.status === 'model_unavailable') {
// Server says the requested model isn't available right now (e.g.
- // GLM outside deployment hours). Flip to the always-available
+ // Kimi outside deployment hours). Flip to the always-available
// fallback for this run. In-memory only — `setSelectedModel`
- // doesn't persist, so the user's saved preference (e.g. GLM)
+ // doesn't persist, so the user's saved preference (e.g. Kimi)
// is preserved for their next launch during deployment hours.
useFreebuffModelStore
.getState()
diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts
index 0d01d2762..752f6bb28 100644
--- a/common/src/__tests__/freebuff-models.test.ts
+++ b/common/src/__tests__/freebuff-models.test.ts
@@ -1,11 +1,17 @@
import { describe, expect, test } from 'bun:test'
import {
+ DEFAULT_FREEBUFF_MODEL_ID,
+ FREEBUFF_KIMI_MODEL_ID,
getFreebuffDeploymentAvailabilityLabel,
isFreebuffDeploymentHours,
} from '../constants/freebuff-models'
describe('freebuff model availability', () => {
+ test('defaults to Kimi K2.6', () => {
+ expect(DEFAULT_FREEBUFF_MODEL_ID).toBe(FREEBUFF_KIMI_MODEL_ID)
+ })
+
test('formats the close time in the user local timezone while deployment is open', () => {
expect(
getFreebuffDeploymentAvailabilityLabel(new Date('2026-01-05T18:00:00Z'), {
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index 308e12df6..4a2a4a147 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record> = {
// Root orchestrator
'base2-free': new Set([
'minimax/minimax-m2.7',
- 'z-ai/glm-5.1',
+ 'moonshotai/kimi-k2.6',
]),
// File exploration agents
@@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record> = {
// Editor for free mode
'editor-lite': new Set([
'minimax/minimax-m2.7',
- 'z-ai/glm-5.1',
+ 'moonshotai/kimi-k2.6',
]),
// Code reviewer for free mode
'code-reviewer-lite': new Set([
'minimax/minimax-m2.7',
- 'z-ai/glm-5.1',
+ 'moonshotai/kimi-k2.6',
]),
}
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index 8b3e9d82d..9c6ff423e 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -21,7 +21,7 @@ export interface FreebuffModelOption {
* the caller's local timezone. The CLI should render
* `getFreebuffDeploymentAvailabilityLabel()` instead. */
export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day'
-export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
+export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York'
const FREEBUFF_PACIFIC_TIMEZONE = 'America/Los_Angeles'
@@ -47,8 +47,8 @@ export const FREEBUFF_MODELS = [
availability: 'always',
},
{
- id: FREEBUFF_GLM_MODEL_ID,
- displayName: 'GLM 5.1',
+ id: FREEBUFF_KIMI_MODEL_ID,
+ displayName: 'Kimi K2.6',
tagline: 'Smartest',
availability: 'deployment_hours',
},
@@ -57,15 +57,15 @@ export const FREEBUFF_MODELS = [
export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
/** What new freebuff users see selected in the picker. May not be currently
- * available (GLM is closed outside deployment hours); callers that need an
+ * available (Kimi is closed outside deployment hours); callers that need an
* always-available id for resolution / auto-fallbacks should use
* FALLBACK_FREEBUFF_MODEL_ID instead. */
-export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID
+export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_KIMI_MODEL_ID
/** Always-available fallback used when the requested model can't be served
* right now (unknown id, deployment hours closed, etc.). Kept distinct from
* DEFAULT_FREEBUFF_MODEL_ID so a new user's "preferred default" can be the
- * smartest model without auto-flipping anyone to a closed deployment. */
+ * smartest model without auto-flipping anyone to a closed serverless model. */
export const FALLBACK_FREEBUFF_MODEL_ID: FreebuffModelId =
FREEBUFF_MINIMAX_MODEL_ID
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index 3608f3631..088dd1dca 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -423,6 +423,7 @@ export type ModelName =
// Other open source models
| 'moonshotai/kimi-k2'
| 'moonshotai/kimi-k2:nitro'
+ | 'moonshotai/kimi-k2.6'
| 'z-ai/glm-5'
| 'z-ai/glm-5.1'
| 'z-ai/glm-4.6'
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
index 31fc4c87e..428a73df4 100644
--- a/common/src/types/freebuff-session.ts
+++ b/common/src/types/freebuff-session.ts
@@ -9,7 +9,7 @@
/**
* Per-model usage counter surfaced to the CLI so the waiting-room UI can
* render "N of M sessions used" alongside queue/active state. Present when
- * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
+ * the joined model has a rate limit applied (today: Kimi K2.6 with 5 admits
* per 12-hour window). `recentCount` is the number of admissions inside
* `windowHours` at the time the response was produced — see also the
* standalone `rate_limited` status for the reject path.
@@ -72,7 +72,7 @@ export type FreebuffSessionServerResponse =
queueDepthByModel: Record
estimatedWaitMs: number
queuedAt: string
- /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+ /** Rate-limit quota for rate-limited models (Kimi K2.6 today). Absent
* for unlimited models or when the status was produced outside the
* rate-limit check path (e.g. pure read via GET). */
rateLimit?: FreebuffSessionRateLimit
@@ -85,7 +85,7 @@ export type FreebuffSessionServerResponse =
admittedAt: string
expiresAt: string
remainingMs: number
- /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+ /** Rate-limit quota for rate-limited models (Kimi K2.6 today). Absent
* for unlimited models or when the status was produced outside the
* rate-limit check path (e.g. pure read via GET). */
rateLimit?: FreebuffSessionRateLimit
@@ -131,7 +131,7 @@ export type FreebuffSessionServerResponse =
/** User has an active session bound to a different model. Returned
* from POST /session when they pick a new model without ending their
* current session first. The CLI shows a confirmation prompt: "End
- * your active GLM session to switch?" → on confirm, DELETE then
+ * your active Kimi session to switch?" → on confirm, DELETE then
* re-POST with the new model. */
status: 'model_locked'
currentModel: string
@@ -152,7 +152,7 @@ export type FreebuffSessionServerResponse =
}
| {
/** User has used up their per-model admission quota in the rolling
- * window (GLM 5.1: 5 one-hour sessions per 12h). Returned from POST
+ * window (Kimi K2.6: 5 one-hour sessions per 12h). Returned from POST
* /session before the user is placed in the queue. `retryAfterMs` is
* the time until the oldest admission inside the window falls off
* and one quota slot opens up — clients should show the user when
diff --git a/freebuff/README.md b/freebuff/README.md
index 0749fc7c0..1ba4405f6 100644
--- a/freebuff/README.md
+++ b/freebuff/README.md
@@ -54,7 +54,7 @@ freebuff
**How can it be free?** Freebuff is supported by ads shown in the CLI.
-**What models do you use?** GLM 5.1 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.
+**What models do you use?** Kimi K2.6 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.
**Are you training on my data?** No. We only use model providers that do not train on our requests. Your code stays yours.
diff --git a/freebuff/SPEC.md b/freebuff/SPEC.md
index 195081533..5fad08369 100644
--- a/freebuff/SPEC.md
+++ b/freebuff/SPEC.md
@@ -84,7 +84,7 @@ Freebuff only supports **FREE mode**. All mode-related features are stripped.
| `/agent:gpt-5` | Premium agent, not available in free tier |
| `/review` | Uses thinker-gpt under the hood |
| `/publish` | Agent publishing not available in free tier |
-| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free model (GLM 5.1) |
+| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free model (Kimi K2.6) |
### Commands to KEEP
diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx
index 3cff424a3..6a016272e 100644
--- a/freebuff/web/src/app/home-client.tsx
+++ b/freebuff/web/src/app/home-client.tsx
@@ -31,7 +31,7 @@ const faqs = [
{
question: 'What models do you use?',
answer:
- 'GLM 5.1 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research.\n\nConnect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.',
+ 'Kimi K2.6 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research.\n\nConnect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.',
},
{
question: 'Which countries is Freebuff available in?',
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index e0b531c70..ce28f91e0 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -642,7 +642,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
expect(body.countryBlockReason).toBe('anonymized_or_unknown_country')
})
- it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => {
+ it('lets freebuff use Kimi K2.6 through Fireworks availability rules', async () => {
const fetchedBodies: Record[] = []
const fetchViaFireworks = mock(
async (_url: string | URL | Request, init?: RequestInit) => {
@@ -650,7 +650,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
return new Response(
JSON.stringify({
id: 'test-id',
- model: 'accounts/fireworks/models/glm-5p1',
+ model: 'accounts/fireworks/models/kimi-k2p6',
choices: [{ message: { content: 'test response' } }],
usage: {
prompt_tokens: 10,
@@ -672,7 +672,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
method: 'POST',
headers: allowedFreeModeHeaders('test-api-key-new-free'),
body: JSON.stringify({
- model: 'z-ai/glm-5.1',
+ model: 'moonshotai/kimi-k2.6',
stream: false,
codebuff_metadata: {
run_id: 'run-free',
@@ -701,9 +701,9 @@ describe('/api/v1/chat/completions POST endpoint', () => {
expect(response.status).toBe(200)
expect(fetchedBodies).toHaveLength(1)
expect(fetchedBodies[0].model).toBe(
- 'accounts/fireworks/models/glm-5p1',
+ 'accounts/fireworks/models/kimi-k2p6',
)
- expect(body.model).toBe('z-ai/glm-5.1')
+ expect(body.model).toBe('moonshotai/kimi-k2.6')
expect(body.provider).toBe('Fireworks')
} else {
expect(response.status).toBe(503)
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index 4c55a6458..54481dca8 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -281,10 +281,10 @@ describe('POST /api/v1/freebuff/session', () => {
expect(body.status).toBe('queued')
})
- test('returns model_unavailable for GLM outside deployment hours', async () => {
+ test('returns model_unavailable for Kimi outside deployment hours', async () => {
const sessionDeps = makeSessionDeps()
const resp = await postFreebuffSession(
- makeReq('ok', { model: 'z-ai/glm-5.1' }),
+ makeReq('ok', { model: 'moonshotai/kimi-k2.6' }),
makeDeps(sessionDeps, 'u1'),
)
expect(resp.status).toBe(409)
diff --git a/web/src/app/docs/[category]/[slug]/page.tsx b/web/src/app/docs/[category]/[slug]/page.tsx
index 44d5174e0..21d093d49 100644
--- a/web/src/app/docs/[category]/[slug]/page.tsx
+++ b/web/src/app/docs/[category]/[slug]/page.tsx
@@ -33,7 +33,7 @@ const FAQ_ITEMS = [
{
question: 'What model does Codebuff use?',
answer:
- 'Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or GLM 5.1 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research.',
+ 'Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or Kimi K2.6 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research.',
},
{
question: 'Can I use my Claude Pro or Max subscription with Codebuff?',
diff --git a/web/src/content/advanced/how-does-it-work.mdx b/web/src/content/advanced/how-does-it-work.mdx
index 08f13366f..79d2ecab3 100644
--- a/web/src/content/advanced/how-does-it-work.mdx
+++ b/web/src/content/advanced/how-does-it-work.mdx
@@ -24,8 +24,8 @@ The main agent ("Buffy") runs on Claude Opus 4.7. It reads your prompt, gathers
- [**Code Searcher**](/publishers/codebuff/agents/code-searcher) - grep-style pattern matching
- [**Researcher**](/publishers/codebuff/agents/researcher) (Gemini 3.1 Flash Lite) - web and docs lookup
- [**Thinker**](/publishers/codebuff/agents/thinker) (Claude Opus 4.7, GPT-5.4) - works through hard problems
-- [**Editor**](/publishers/codebuff/agents/editor) (Claude Opus 4.7, GPT-5.1, GLM 5.1) - writes and modifies code
-- [**Reviewer**](/publishers/codebuff/agents/reviewer) (Claude Opus 4.7, GLM 5.1 in Lite mode) - catches bugs and style issues
+- [**Editor**](/publishers/codebuff/agents/editor) (Claude Opus 4.7, GPT-5.1, Kimi K2.6) - writes and modifies code
+- [**Reviewer**](/publishers/codebuff/agents/reviewer) (Claude Opus 4.7, Kimi K2.6 in Lite mode) - catches bugs and style issues
- [**Basher**](/publishers/codebuff/agents/basher) (Gemini 3.1 Flash Lite) - runs terminal commands
## Best-of-N Selection (Max Mode)
diff --git a/web/src/content/advanced/what-models.mdx b/web/src/content/advanced/what-models.mdx
index 6fb3cd736..f3dc59b38 100644
--- a/web/src/content/advanced/what-models.mdx
+++ b/web/src/content/advanced/what-models.mdx
@@ -19,7 +19,7 @@ The main agent ("Buffy") coordinates everything:
| Default | Opus 4.7 |
| Plan | Opus 4.7 |
| Max | Opus 4.7 |
- | Lite | GLM 5.1 |
+ | Lite | Kimi K2.6 |
## Subagents
@@ -29,7 +29,7 @@ The orchestrator spawns these for specific jobs:
| Task | Models |
|------|--------|
- | Code editing | Claude Opus 4.7, GLM 5.1 |
+ | Code editing | Claude Opus 4.7, Kimi K2.6 |
| Thinking/reasoning | Claude Opus 4.7, GPT-5.4 |
| Code review | Claude Opus 4.7, GPT-5.4 |
| File discovery | Gemini 3.1 Flash Lite, Gemini 2.5 Flash Lite |
@@ -37,4 +37,4 @@ The orchestrator spawns these for specific jobs:
| Web/docs research | Gemini 3.1 Flash Lite |
-Max mode runs multiple implementations in parallel and picks the best one. Default mode runs a single implementation pass. Lite mode uses GLM 5.1 and includes code review support.
+Max mode runs multiple implementations in parallel and picks the best one. Default mode runs a single implementation pass. Lite mode uses Kimi K2.6 and includes code review support.
diff --git a/web/src/content/help/faq.mdx b/web/src/content/help/faq.mdx
index 477adbd8f..bfd1df083 100644
--- a/web/src/content/help/faq.mdx
+++ b/web/src/content/help/faq.mdx
@@ -13,7 +13,7 @@ Software development: Writing features, tests, and scripts across common languag
## What model does Codebuff use?
-Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or GLM 5.1 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research. See [What models do you use?](/docs/advanced/what-models) for the full breakdown.
+Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or Kimi K2.6 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research. See [What models do you use?](/docs/advanced/what-models) for the full breakdown.
## Can I use my Claude Pro or Max subscription with Codebuff?
diff --git a/web/src/content/tips/modes.mdx b/web/src/content/tips/modes.mdx
index 1b67daecd..acab5d8aa 100644
--- a/web/src/content/tips/modes.mdx
+++ b/web/src/content/tips/modes.mdx
@@ -15,7 +15,7 @@ Codebuff has four modes. Switch during a session with `Shift+Tab` or `/mode:` co
| Default | Claude Opus 4.7 | editor | Yes |
| Max | Claude Opus 4.7 | editor-multi-prompt | Yes |
| Plan | Claude Opus 4.7 | None | No |
- | Lite | GLM 5.1 | None | No |
+ | Lite | Kimi K2.6 | None | No |
## Default
@@ -60,7 +60,7 @@ Switch to this mode with `/mode:plan`.
## Lite
-GLM 5.1, cheaper and faster.
+Kimi K2.6, cheaper and faster.
An efficient mode for most coding tasks.
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index 00ccf1f81..2d897767a 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -12,6 +12,7 @@ import {
import type { Logger } from '@codebuff/common/types/contracts/logger'
const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1'
+const KIMI_STANDARD_MODEL_ID = 'accounts/fireworks/models/kimi-k2p6'
const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea'
const TEST_DEPLOYMENT_MAP = {
'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID,
@@ -91,6 +92,14 @@ describe('Fireworks deployment routing', () => {
model: 'z-ai/glm-5.1',
messages: [{ role: 'user' as const, content: 'test' }],
}
+ const kimiBody = {
+ model: 'moonshotai/kimi-k2.6',
+ messages: [{ role: 'user' as const, content: 'test' }],
+ }
+ const kimiLiteBody = {
+ ...kimiBody,
+ codebuff_metadata: { cost_mode: 'lite' },
+ }
const liteBody = {
...minimalBody,
codebuff_metadata: { cost_mode: 'lite' },
@@ -143,6 +152,55 @@ describe('Fireworks deployment routing', () => {
expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
})
+ it('uses serverless API for Kimi during hours without a deployment', async () => {
+ const fetchCalls: string[] = []
+
+ const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+ const body = JSON.parse(init?.body as string)
+ fetchCalls.push(body.model)
+ return new Response(JSON.stringify({ ok: true }), { status: 200 })
+ }) as unknown as typeof globalThis.fetch
+
+ const response = await createFireworksRequestWithFallback({
+ body: kimiBody as never,
+ originalModel: 'moonshotai/kimi-k2.6',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ deploymentMap: {
+ 'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID,
+ },
+ sessionId: 'test-user-id',
+ now: IN_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(200)
+ expect(fetchCalls).toEqual([KIMI_STANDARD_MODEL_ID])
+ })
+
+ it('keeps Kimi unavailable outside hours when no deployment is mapped', async () => {
+ const mockFetch = mock(async () => {
+ throw new Error('should not fetch outside deployment hours')
+ }) as unknown as typeof globalThis.fetch
+
+ const response = await createFireworksRequestWithFallback({
+ body: kimiBody as never,
+ originalModel: 'moonshotai/kimi-k2.6',
+ fetch: mockFetch,
+ logger,
+ useCustomDeployment: true,
+ deploymentMap: {
+ 'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID,
+ },
+ sessionId: 'test-user-id',
+ now: BEFORE_DEPLOYMENT_HOURS,
+ })
+
+ expect(response.status).toBe(503)
+ const body = await response.json()
+ expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+ })
+
it('keeps GLM unavailable outside hours when no deployment is mapped', async () => {
const mockFetch = mock(async () => {
throw new Error('should not fetch outside deployment hours')
@@ -356,7 +414,7 @@ describe('Fireworks deployment routing', () => {
expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
})
- it('falls back to the standard Fireworks API in lite mode outside deployment hours', async () => {
+ it('falls back to the standard Fireworks API for Kimi lite mode outside deployment hours', async () => {
const fetchCalls: string[] = []
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -366,8 +424,8 @@ describe('Fireworks deployment routing', () => {
}) as unknown as typeof globalThis.fetch
const response = await createFireworksRequestWithFallback({
- body: liteBody as never,
- originalModel: 'z-ai/glm-5.1',
+ body: kimiLiteBody as never,
+ originalModel: 'moonshotai/kimi-k2.6',
fetch: mockFetch,
logger,
useCustomDeployment: true,
@@ -377,7 +435,7 @@ describe('Fireworks deployment routing', () => {
})
expect(response.status).toBe(200)
- expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
+ expect(fetchCalls).toEqual([KIMI_STANDARD_MODEL_ID])
})
it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => {
diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 9a5b2ba12..bf14f57fc 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -48,14 +48,6 @@ const CANOPYWAVE_MODELS: Record<
outputCostPerToken: 1.08 / 1_000_000,
},
},
- 'moonshotai/kimi-k2.6': {
- canopywaveId: 'moonshotai/kimi-k2.6',
- pricing: {
- inputCostPerToken: 0.95 / 1_000_000,
- cachedInputCostPerToken: 0.16 / 1_000_000,
- outputCostPerToken: 4.00 / 1_000_000,
- },
- },
}
export function isCanopyWaveModel(model: string): boolean {
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index b0013e62a..6bd5851fe 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -2,7 +2,7 @@ import { Agent } from 'undici'
import {
FREEBUFF_DEPLOYMENT_HOURS_LABEL,
- FREEBUFF_GLM_MODEL_ID,
+ FREEBUFF_KIMI_MODEL_ID,
isFreebuffDeploymentHours,
} from '@codebuff/common/constants/freebuff-models'
import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
@@ -36,12 +36,14 @@ const fireworksAgent = new Agent({
const FIREWORKS_MODEL_MAP: Record = {
'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
'minimax/minimax-m2.7': 'accounts/fireworks/models/minimax-m2p7',
+ 'moonshotai/kimi-k2.6': 'accounts/fireworks/models/kimi-k2p6',
'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
}
/** Models that stay limited to freebuff deployment hours even on serverless. */
const FIREWORKS_HOURS_GATED_MODELS = new Set([
- FREEBUFF_GLM_MODEL_ID,
+ FREEBUFF_KIMI_MODEL_ID,
+ 'z-ai/glm-5.1',
])
/** Flag to enable custom Fireworks deployments (set to false to use global API only) */
@@ -169,6 +171,11 @@ const FIREWORKS_PRICING_MAP: Record = {
cachedInputCostPerToken: 0.06 / 1_000_000,
outputCostPerToken: 1.20 / 1_000_000,
},
+ 'moonshotai/kimi-k2.6': {
+ inputCostPerToken: 0.95 / 1_000_000,
+ cachedInputCostPerToken: 0.16 / 1_000_000,
+ outputCostPerToken: 4.00 / 1_000_000,
+ },
'z-ai/glm-5.1': {
inputCostPerToken: 1.40 / 1_000_000,
cachedInputCostPerToken: 0.26 / 1_000_000,
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index f46a0f8c4..7f08d2bdd 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -203,12 +203,12 @@ describe('requestSession', () => {
test('deployment-hours-only model is unavailable outside deployment hours', async () => {
const state = await requestSession({
userId: 'u1',
- model: 'z-ai/glm-5.1',
+ model: 'moonshotai/kimi-k2.6',
deps,
})
expect(state).toEqual({
status: 'model_unavailable',
- requestedModel: 'z-ai/glm-5.1',
+ requestedModel: 'moonshotai/kimi-k2.6',
availableHours: '9am ET-5pm PT every day',
})
expect(deps.rows.size).toBe(0)
@@ -216,18 +216,18 @@ describe('requestSession', () => {
test('queued response includes a per-model depth snapshot for the selector', async () => {
deps._tick(new Date('2026-04-17T16:00:00Z'))
- // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both.
+ // Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both.
await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
deps._tick(new Date(deps._now().getTime() + 1000))
await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
deps._tick(new Date(deps._now().getTime() + 1000))
- await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps })
+ await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.6', deps })
const state = await getSessionState({ userId: 'u1', deps })
if (state.status !== 'queued') throw new Error('unreachable')
expect(state.queueDepthByModel).toEqual({
[DEFAULT_MODEL]: 2,
- 'z-ai/glm-5.1': 1,
+ 'moonshotai/kimi-k2.6': 1,
})
})
@@ -302,7 +302,7 @@ describe('requestSession', () => {
})
test('instant-admit: per-model capacities are independent', async () => {
- // MiniMax saturated at 1 active, GLM still has room.
+ // MiniMax saturated at 1 active, Kimi still has room.
const admitDeps = makeDeps({
getInstantAdmitCapacity: (model) =>
model === DEFAULT_MODEL ? 1 : 10,
@@ -316,25 +316,25 @@ describe('requestSession', () => {
})
const s3 = await requestSession({
userId: 'u3',
- model: 'z-ai/glm-5.1',
+ model: 'moonshotai/kimi-k2.6',
deps: admitDeps,
})
expect(s2.status).toBe('queued')
expect(s3.status).toBe('active')
})
- // Per-user rate limit (5 GLM admissions per 12h) — the wire limit is
+ // Per-user rate limit (5 Kimi admissions per 12h) — the wire limit is
// hard-coded in public-api.ts, so tests seed the fake admit log directly
- // rather than configuring it. GLM also has deployment-hours gating, so
+ // rather than configuring it. Kimi also has deployment-hours gating, so
// these tests bump `now` into the open window (12pm ET on a weekday)
// before issuing the request.
- const GLM_MODEL = 'z-ai/glm-5.1'
- const GLM_LIMIT = 5
- const GLM_WINDOW_HOURS = 12
- const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
+ const KIMI_MODEL = 'moonshotai/kimi-k2.6'
+ const KIMI_LIMIT = 5
+ const KIMI_WINDOW_HOURS = 12
+ const KIMI_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
- test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => {
- deps._tick(GLM_OPEN_TIME)
+ test('rate_limited: 5th Kimi admit in window blocks the 6th attempt', async () => {
+ deps._tick(KIMI_OPEN_TIME)
// Seed 5 admits inside the 12h window, spaced so we can verify retryAfter
// points at the oldest one sliding off.
const now = deps._now()
@@ -343,22 +343,22 @@ describe('requestSession', () => {
for (const hoursAgo of ages) {
deps.admits.push({
user_id: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
})
}
const state = await requestSession({
userId: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
deps,
})
expect(state.status).toBe('rate_limited')
if (state.status !== 'rate_limited') throw new Error('unreachable')
- expect(state.model).toBe(GLM_MODEL)
- expect(state.limit).toBe(GLM_LIMIT)
- expect(state.windowHours).toBe(GLM_WINDOW_HOURS)
- expect(state.recentCount).toBe(GLM_LIMIT)
+ expect(state.model).toBe(KIMI_MODEL)
+ expect(state.limit).toBe(KIMI_LIMIT)
+ expect(state.windowHours).toBe(KIMI_WINDOW_HOURS)
+ expect(state.recentCount).toBe(KIMI_LIMIT)
// Oldest admit is 11h ago; slot opens when it hits 12h, i.e. in 1h.
expect(state.retryAfterMs).toBe(60 * 60 * 1000)
// Blocked before any row is written — the user doesn't take a queue slot.
@@ -366,21 +366,21 @@ describe('requestSession', () => {
})
test('rate_limited: admits outside the 12h window do not count', async () => {
- deps._tick(GLM_OPEN_TIME)
+ deps._tick(KIMI_OPEN_TIME)
// 5 admits, each just over 12h old → all fall off the window.
const now = deps._now()
for (let i = 0; i < 5; i++) {
deps.admits.push({
user_id: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
admitted_at: new Date(
- now.getTime() - (GLM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i),
+ now.getTime() - (KIMI_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i),
),
})
}
const state = await requestSession({
userId: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
deps,
})
expect(state.status).toBe('queued')
@@ -408,41 +408,41 @@ describe('requestSession', () => {
expect(state.rateLimit).toBeUndefined()
})
- test('queued GLM response carries the current admit count', async () => {
- deps._tick(GLM_OPEN_TIME)
+ test('queued Kimi response carries the current admit count', async () => {
+ deps._tick(KIMI_OPEN_TIME)
const now = deps._now()
// 2 admits in the window — under the limit so the user still queues.
deps.admits.push({
user_id: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
})
deps.admits.push({
user_id: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
})
const state = await requestSession({
userId: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
deps,
})
if (state.status !== 'queued') throw new Error('unreachable')
expect(state.rateLimit).toEqual({
- model: GLM_MODEL,
- limit: GLM_LIMIT,
- windowHours: GLM_WINDOW_HOURS,
+ model: KIMI_MODEL,
+ limit: KIMI_LIMIT,
+ windowHours: KIMI_WINDOW_HOURS,
recentCount: 2,
})
})
- test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => {
- // Reclaim path: user has an active+unexpired GLM session and restarts
+ test('rate_limited: takeover of an active Kimi row is allowed even when at cap', async () => {
+ // Reclaim path: user has an active+unexpired Kimi session and restarts
// the CLI. POST must rotate their instance id (takeover) and NOT reject
// with rate_limited — otherwise they'd be stranded with a live session
// they can't reconnect to. The 5th admission is already in the log, so
// this also exercises "at the cap" rather than "over the cap".
- deps._tick(GLM_OPEN_TIME)
+ deps._tick(KIMI_OPEN_TIME)
const now = deps._now()
// Seed 5 prior admits (the cap), with the latest one matching the
// active row we're about to install.
@@ -450,7 +450,7 @@ describe('requestSession', () => {
for (const hoursAgo of ages) {
deps.admits.push({
user_id: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
})
}
@@ -461,7 +461,7 @@ describe('requestSession', () => {
user_id: 'u1',
status: 'active',
active_instance_id: 'inst-pre',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
queued_at: admittedAt,
admitted_at: admittedAt,
expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
@@ -471,27 +471,27 @@ describe('requestSession', () => {
const state = await requestSession({
userId: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
deps,
})
expect(state.status).toBe('active')
if (state.status !== 'active') throw new Error('unreachable')
// Instance id rotated; quota snapshot still reflects the full window.
expect(state.instanceId).not.toBe('inst-pre')
- expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+ expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT)
})
- test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => {
+ test('rate_limited: reclaim of a queued Kimi row is allowed even when at cap', async () => {
// Same reclaim exception for queued rows: if a user has already queued
// (say they slipped in just before their 5th admit landed), a subsequent
// POST from the same CLI must preserve their queue position instead of
// flipping to rate_limited.
- deps._tick(GLM_OPEN_TIME)
+ deps._tick(KIMI_OPEN_TIME)
const now = deps._now()
- for (let i = 0; i < GLM_LIMIT; i++) {
+ for (let i = 0; i < KIMI_LIMIT; i++) {
deps.admits.push({
user_id: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
})
}
@@ -500,7 +500,7 @@ describe('requestSession', () => {
user_id: 'u1',
status: 'queued',
active_instance_id: 'inst-pre',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
queued_at: queuedAt,
admitted_at: null,
expires_at: null,
@@ -510,7 +510,7 @@ describe('requestSession', () => {
const state = await requestSession({
userId: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
deps,
})
expect(state.status).toBe('queued')
@@ -518,20 +518,20 @@ describe('requestSession', () => {
// Same position (1) since we preserved queued_at and nobody else is
// ahead; the instance id rotated so any prior CLI is superseded.
expect(state.instanceId).not.toBe('inst-pre')
- expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+ expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT)
})
- test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => {
+ test('rate_limited: expired Kimi row is not a reclaim — quota still applies', async () => {
// The stored row's expires_at is in the past, so it doesn't represent
// an in-flight session. This POST is effectively a fresh request and
// must be blocked by the quota.
- deps._tick(GLM_OPEN_TIME)
+ deps._tick(KIMI_OPEN_TIME)
const now = deps._now()
const ages = [11, 4, 3, 2, 1]
for (const hoursAgo of ages) {
deps.admits.push({
user_id: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
})
}
@@ -540,7 +540,7 @@ describe('requestSession', () => {
user_id: 'u1',
status: 'active',
active_instance_id: 'inst-pre',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
queued_at: admittedAt,
admitted_at: admittedAt,
expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
@@ -549,7 +549,7 @@ describe('requestSession', () => {
})
const state = await requestSession({
userId: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
deps,
})
expect(state.status).toBe('rate_limited')
@@ -557,18 +557,18 @@ describe('requestSession', () => {
test('instant-admit bumps the quota count for the freshly-written admit row', async () => {
const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 })
- admitDeps._tick(GLM_OPEN_TIME)
+ admitDeps._tick(KIMI_OPEN_TIME)
// 1 existing admit in the window; this new call should instant-admit and
// write a second row, so the response's recentCount reflects 2.
const now = admitDeps._now()
admitDeps.admits.push({
user_id: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
})
const state = await requestSession({
userId: 'u1',
- model: GLM_MODEL,
+ model: KIMI_MODEL,
deps: admitDeps,
})
if (state.status !== 'active') throw new Error('unreachable')
@@ -636,16 +636,16 @@ describe('getSessionState', () => {
// Regression: the POST response attached rateLimit, but GET polls did
// not — so the "Sessions N/M used" line flashed once then disappeared on
// the next 5s poll. GET must attach the same quota snapshot. Rate
- // limits only apply to GLM, so this test uses GLM explicitly (inside
+ // limits only apply to Kimi, so this test uses Kimi explicitly (inside
// deployment hours) rather than the Minimax DEFAULT_MODEL.
deps._tick(new Date('2026-04-17T16:00:00Z'))
const now = deps._now()
deps.admits.push({
user_id: 'u1',
- model: 'z-ai/glm-5.1',
+ model: 'moonshotai/kimi-k2.6',
admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
})
- await requestSession({ userId: 'u1', model: 'z-ai/glm-5.1', deps })
+ await requestSession({ userId: 'u1', model: 'moonshotai/kimi-k2.6', deps })
const row = deps.rows.get('u1')!
row.status = 'active'
row.admitted_at = now
@@ -658,7 +658,7 @@ describe('getSessionState', () => {
})
if (state.status !== 'active') throw new Error('unreachable')
expect(state.rateLimit).toEqual({
- model: 'z-ai/glm-5.1',
+ model: 'moonshotai/kimi-k2.6',
limit: 5,
windowHours: 12,
recentCount: 1,
diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts
index 52dc82c12..215059b84 100644
--- a/web/src/server/free-session/__tests__/session-view.test.ts
+++ b/web/src/server/free-session/__tests__/session-view.test.ts
@@ -7,7 +7,7 @@ import type { InternalSessionRow } from '../types'
const WAIT_PER_SPOT_MS = 24_000
const GRACE_MS = 30 * 60_000
-const TEST_MODEL = 'z-ai/glm-5.1'
+const TEST_MODEL = 'moonshotai/kimi-k2.6'
function row(overrides: Partial = {}): InternalSessionRow {
const now = new Date('2026-04-17T12:00:00Z')
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index 10071b35f..6d162c461 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -48,7 +48,7 @@ export function getSessionGraceMs(): number {
* queue).
*/
const INSTANT_ADMIT_CAPACITY: Record = {
- 'z-ai/glm-5.1': 50,
+ 'moonshotai/kimi-k2.6': 50,
'minimax/minimax-m2.7': 1000,
}
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 528cd4ab3..75c2f24ff 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -37,7 +37,7 @@ import type {
/**
* Per-model admission rate limits. Keyed by freebuff model id; a model not
- * in the map has no rate limit applied. Today only GLM 5.1 is limited
+ * in the map has no rate limit applied. Today only Kimi K2.6 is limited
* (Minimax is cheap enough to leave unlimited).
*
* Hard-coded rather than env-driven: the values need to be observable in the
@@ -45,7 +45,7 @@ import type {
* queued/active responses — changing them is a deliberate, typed edit.
*/
const RATE_LIMITS: Record = {
- 'z-ai/glm-5.1': { limit: 5, windowHours: 12 },
+ 'moonshotai/kimi-k2.6': { limit: 5, windowHours: 12 },
}
/** Fetch the caller's current quota snapshot for `model`, or undefined if the
diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts
index 8831ad7a8..d22835658 100644
--- a/web/src/server/free-session/store.ts
+++ b/web/src/server/free-session/store.ts
@@ -466,7 +466,7 @@ export async function promoteQueuedUser(params: {
* the oldest is needed to compute `retryAfterMs` when the window is full,
* so one query covers both the check and the reject path.
*
- * Drives the per-user, per-model rate limit (e.g. at most 5 GLM sessions in
+ * Drives the per-user, per-model rate limit (e.g. at most 5 Kimi sessions in
* the last 12h) enforced before `joinOrTakeOver`.
*/
export async function listRecentAdmits(params: {