CodebuffAI · jahooma · Apr 29, 2026 · Apr 28, 2026 · Apr 29, 2026
@@ -67,6 +67,11 @@ describe('editor agent', () => {
       expect(glmEditor.model).toBe('z-ai/glm-5.1')
     })
 
+    test('creates kimi editor', () => {
+      const kimiEditor = createCodeEditor({ model: 'kimi' })
+      expect(kimiEditor.model).toBe('moonshotai/kimi-k2.6')
+    })
+
     test('creates minimax editor', () => {
       const minimaxEditor = createCodeEditor({ model: 'minimax' })
       expect(minimaxEditor.model).toBe('minimax/minimax-m2.7')
@@ -84,6 +89,12 @@ describe('editor agent', () => {
       expect(glmEditor.instructionsPrompt).not.toContain('</think>')
     })
 
+    test('kimi editor does not include think tags in instructions', () => {
+      const kimiEditor = createCodeEditor({ model: 'kimi' })
+      expect(kimiEditor.instructionsPrompt).not.toContain('<think>')
+      expect(kimiEditor.instructionsPrompt).not.toContain('</think>')
+    })
+
     test('minimax editor does not include think tags in instructions', () => {
       const minimaxEditor = createCodeEditor({ model: 'minimax' })
       expect(minimaxEditor.instructionsPrompt).not.toContain('<think>')

@@ -25,7 +25,7 @@ export function createBase2(
   const isFree = mode === 'free' || mode === 'lite'
 
   const isSonnet = false
-  const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7'
+  const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7'
 
   return {
     publisher,

@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
 import type { AgentDefinition } from '../types/agent-definition'
 
 const definition: AgentDefinition = {
-  ...createCodeEditor({ model: 'glm' }),
+  ...createCodeEditor({ model: 'kimi' }),
   id: 'editor-lite',
 }
 export default definition
@@ -4,7 +4,7 @@ import { publisher } from '../constants'
 import type { AgentDefinition } from '../types/agent-definition'
 
 export const createCodeEditor = (options: {
-  model: 'gpt-5' | 'opus' | 'glm' | 'minimax'
+  model: 'gpt-5' | 'opus' | 'glm' | 'kimi' | 'minimax'
 }): Omit<AgentDefinition, 'id'> => {
   const { model } = options
   return {
@@ -14,6 +14,8 @@ export const createCodeEditor = (options: {
         ? 'openai/gpt-5.1'
         : options.model === 'minimax'
           ? 'minimax/minimax-m2.7'
+        : options.model === 'kimi'
+          ? 'moonshotai/kimi-k2.6'
         : options.model === 'glm'
           ? 'z-ai/glm-5.1'
           : 'anthropic/claude-opus-4.7',
@@ -67,7 +69,7 @@ OR for new files or major rewrites:
 }
 </codebuff_tool_call>
 
-${model === 'gpt-5' || model === 'glm' || model === 'minimax'
+${model === 'gpt-5' || model === 'glm' || model === 'kimi' || model === 'minimax'
         ? ''
         : `Before you start writing your implementation, you should use <think> tags to think about the best way to implement the changes.
 

@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
 const definition: SecretAgentDefinition = {
   id: 'code-reviewer-lite',
   publisher,
-  ...createReviewer('z-ai/glm-5.1'),
+  ...createReviewer('moonshotai/kimi-k2.6'),
 }
 
 export default definition
@@ -423,6 +423,7 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
+  | 'moonshotai/kimi-k2.6'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'

@@ -5,7 +5,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'
 import { Button } from './button'
 import {
   FALLBACK_FREEBUFF_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
+  FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MODELS,
   getFreebuffDeploymentAvailabilityLabel,
   isFreebuffModelAvailable,
@@ -25,8 +25,8 @@ import {
 import type { KeyEvent } from '@opentui/core'
 
 const FREEBUFF_MODEL_SELECTOR_MODELS = [
-  ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
-  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
+  ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_KIMI_MODEL_ID),
+  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_KIMI_MODEL_ID),
 ]
 
 /**
@@ -72,7 +72,7 @@ export const FreebuffModelSelector: React.FC = () => {
     // unavailable (e.g. deployment hours close while the picker is open),
     // swap to the always-available fallback so Enter doesn't POST a model
     // the server will immediately reject. In-memory only — the user's saved
-    // preference (e.g. GLM) is preserved for the next launch.
+    // preference (e.g. Kimi) is preserved for the next launch.
     if (
       (session?.status === 'none' || !session) &&
       !isFreebuffModelAvailable(selectedModel, new Date(now))

@@ -260,7 +260,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
                   <span>Elapsed </span>
                   {formatElapsed(elapsedMs)}
                 </text>
-                {/* Per-model session quota (e.g. GLM 5.1 caps at 5/12h). Only
+                {/* Per-model session quota (e.g. Kimi K2.6 caps at 5/12h). Only
                     rendered for rate-limited models so the Minimax queue stays
                     clutter-free. */}
                 {session.rateLimit && (
@@ -343,7 +343,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
             </>
           )}
 
-          {/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
+          {/* Per-model session quota exhausted (e.g. 5+ Kimi sessions in the
               last 12h). Terminal for this run — the user can exit and come
               back once the oldest session in the window rolls off. */}
           {session?.status === 'rate_limited' && (

@@ -104,7 +104,7 @@ async function callSession(
       return body
     }
   }
-  // 429 from POST is the per-model session-quota reject (e.g. too many GLM
+  // 429 from POST is the per-model session-quota reject (e.g. too many Kimi
   // sessions in the last 12h). Terminal for the current poll — the CLI shows
   // a screen explaining the limit and when the user can try again. The 429
   // status (rather than 200) keeps older CLIs in their error path so they
@@ -442,9 +442,9 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
         }
         if (next.status === 'model_unavailable') {
           // Server says the requested model isn't available right now (e.g.
-          // GLM outside deployment hours). Flip to the always-available
+          // Kimi outside deployment hours). Flip to the always-available
           // fallback for this run. In-memory only — `setSelectedModel`
-          // doesn't persist, so the user's saved preference (e.g. GLM)
+          // doesn't persist, so the user's saved preference (e.g. Kimi)
           // is preserved for their next launch during deployment hours.
           useFreebuffModelStore
             .getState()

@@ -1,11 +1,17 @@
 import { describe, expect, test } from 'bun:test'
 
 import {
+  DEFAULT_FREEBUFF_MODEL_ID,
+  FREEBUFF_KIMI_MODEL_ID,
   getFreebuffDeploymentAvailabilityLabel,
   isFreebuffDeploymentHours,
 } from '../constants/freebuff-models'
 
 describe('freebuff model availability', () => {
+  test('defaults to Kimi K2.6', () => {
+    expect(DEFAULT_FREEBUFF_MODEL_ID).toBe(FREEBUFF_KIMI_MODEL_ID)
+  })
+
   test('formats the close time in the user local timezone while deployment is open', () => {
     expect(
       getFreebuffDeploymentAvailabilityLabel(new Date('2026-01-05T18:00:00Z'), {

@@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
   'base2-free': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // File exploration agents
@@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Editor for free mode
   'editor-lite': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // Code reviewer for free mode
   'code-reviewer-lite': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 }
 

@@ -21,7 +21,7 @@ export interface FreebuffModelOption {
  *  the caller's local timezone. The CLI should render
  *  `getFreebuffDeploymentAvailabilityLabel()` instead. */
 export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day'
-export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
+export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
 export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
 const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York'
 const FREEBUFF_PACIFIC_TIMEZONE = 'America/Los_Angeles'
@@ -47,8 +47,8 @@ export const FREEBUFF_MODELS = [
     availability: 'always',
   },
   {
-    id: FREEBUFF_GLM_MODEL_ID,
-    displayName: 'GLM 5.1',
+    id: FREEBUFF_KIMI_MODEL_ID,
+    displayName: 'Kimi K2.6',
     tagline: 'Smartest',
     availability: 'deployment_hours',
   },
@@ -57,15 +57,15 @@ export const FREEBUFF_MODELS = [
 export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
 
 /** What new freebuff users see selected in the picker. May not be currently
- *  available (GLM is closed outside deployment hours); callers that need an
+ *  available (Kimi is closed outside deployment hours); callers that need an
  *  always-available id for resolution / auto-fallbacks should use
  *  FALLBACK_FREEBUFF_MODEL_ID instead. */
-export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID
+export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_KIMI_MODEL_ID
 
 /** Always-available fallback used when the requested model can't be served
  *  right now (unknown id, deployment hours closed, etc.). Kept distinct from
  *  DEFAULT_FREEBUFF_MODEL_ID so a new user's "preferred default" can be the
- *  smartest model without auto-flipping anyone to a closed deployment. */
+ *  smartest model without auto-flipping anyone to a closed serverless model. */
 export const FALLBACK_FREEBUFF_MODEL_ID: FreebuffModelId =
   FREEBUFF_MINIMAX_MODEL_ID
 

@@ -423,6 +423,7 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
+  | 'moonshotai/kimi-k2.6'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'

@@ -9,7 +9,7 @@
 /**
  * Per-model usage counter surfaced to the CLI so the waiting-room UI can
  * render "N of M sessions used" alongside queue/active state. Present when
- * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
+ * the joined model has a rate limit applied (today: Kimi K2.6 with 5 admits
  * per 12-hour window). `recentCount` is the number of admissions inside
  * `windowHours` at the time the response was produced — see also the
  * standalone `rate_limited` status for the reject path.
@@ -72,7 +72,7 @@ export type FreebuffSessionServerResponse =
       queueDepthByModel: Record<string, number>
       estimatedWaitMs: number
       queuedAt: string
-      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+      /** Rate-limit quota for rate-limited models (Kimi K2.6 today). Absent
        *  for unlimited models or when the status was produced outside the
        *  rate-limit check path (e.g. pure read via GET). */
       rateLimit?: FreebuffSessionRateLimit
@@ -85,7 +85,7 @@ export type FreebuffSessionServerResponse =
       admittedAt: string
       expiresAt: string
       remainingMs: number
-      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+      /** Rate-limit quota for rate-limited models (Kimi K2.6 today). Absent
        *  for unlimited models or when the status was produced outside the
        *  rate-limit check path (e.g. pure read via GET). */
       rateLimit?: FreebuffSessionRateLimit
@@ -131,7 +131,7 @@ export type FreebuffSessionServerResponse =
       /** User has an active session bound to a different model. Returned
        *  from POST /session when they pick a new model without ending their
        *  current session first. The CLI shows a confirmation prompt: "End
-       *  your active GLM session to switch?" → on confirm, DELETE then
+       *  your active Kimi session to switch?" → on confirm, DELETE then
        *  re-POST with the new model. */
       status: 'model_locked'
       currentModel: string
@@ -152,7 +152,7 @@ export type FreebuffSessionServerResponse =
     }
   | {
       /** User has used up their per-model admission quota in the rolling
-       *  window (GLM 5.1: 5 one-hour sessions per 12h). Returned from POST
+       *  window (Kimi K2.6: 5 one-hour sessions per 12h). Returned from POST
        *  /session before the user is placed in the queue. `retryAfterMs` is
        *  the time until the oldest admission inside the window falls off
        *  and one quota slot opens up — clients should show the user when

@@ -54,7 +54,7 @@ freebuff
 
 **How can it be free?** Freebuff is supported by ads shown in the CLI.
 
-**What models do you use?** GLM 5.1 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.
+**What models do you use?** Kimi K2.6 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.
 
 **Are you training on my data?** No. We only use model providers that do not train on our requests. Your code stays yours.
 

@@ -84,7 +84,7 @@ Freebuff only supports **FREE mode**. All mode-related features are stripped.
 | `/agent:gpt-5` | Premium agent, not available in free tier |
 | `/review` | Uses thinker-gpt under the hood |
 | `/publish` | Agent publishing not available in free tier |
-| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free model (GLM 5.1) |
+| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free model (Kimi K2.6) |
 
 ### Commands to KEEP
 

@@ -31,7 +31,7 @@ const faqs = [
   {
     question: 'What models do you use?',
     answer:
-      'GLM 5.1 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research.\n\nConnect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.',
+      'Kimi K2.6 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research.\n\nConnect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.',
   },
   {
     question: 'Which countries is Freebuff available in?',

@@ -642,15 +642,15 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(body.countryBlockReason).toBe('anonymized_or_unknown_country')
     })
 
-    it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => {
+    it('lets freebuff use Kimi K2.6 through Fireworks availability rules', async () => {
       const fetchedBodies: Record<string, unknown>[] = []
       const fetchViaFireworks = mock(
         async (_url: string | URL | Request, init?: RequestInit) => {
           fetchedBodies.push(JSON.parse(init?.body as string))
           return new Response(
             JSON.stringify({
               id: 'test-id',
-              model: 'accounts/fireworks/models/glm-5p1',
+              model: 'accounts/fireworks/models/kimi-k2p6',
               choices: [{ message: { content: 'test response' } }],
               usage: {
                 prompt_tokens: 10,
@@ -672,7 +672,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: allowedFreeModeHeaders('test-api-key-new-free'),
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'moonshotai/kimi-k2.6',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -701,9 +701,9 @@ describe('/api/v1/chat/completions POST endpoint', () => {
         expect(response.status).toBe(200)
         expect(fetchedBodies).toHaveLength(1)
         expect(fetchedBodies[0].model).toBe(
-          'accounts/fireworks/models/glm-5p1',
+          'accounts/fireworks/models/kimi-k2p6',
         )
-        expect(body.model).toBe('z-ai/glm-5.1')
+        expect(body.model).toBe('moonshotai/kimi-k2.6')
         expect(body.provider).toBe('Fireworks')
       } else {
         expect(response.status).toBe(503)

@@ -281,10 +281,10 @@ describe('POST /api/v1/freebuff/session', () => {
     expect(body.status).toBe('queued')
   })
 
-  test('returns model_unavailable for GLM outside deployment hours', async () => {
+  test('returns model_unavailable for Kimi outside deployment hours', async () => {
     const sessionDeps = makeSessionDeps()
     const resp = await postFreebuffSession(
-      makeReq('ok', { model: 'z-ai/glm-5.1' }),
+      makeReq('ok', { model: 'moonshotai/kimi-k2.6' }),
       makeDeps(sessionDeps, 'u1'),
     )
     expect(resp.status).toBe(409)

@@ -33,7 +33,7 @@ const FAQ_ITEMS = [
   {
     question: 'What model does Codebuff use?',
     answer:
-      'Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or GLM 5.1 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research.',
+      'Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or Kimi K2.6 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research.',
   },
   {
     question: 'Can I use my Claude Pro or Max subscription with Codebuff?',

@@ -24,8 +24,8 @@ The main agent ("Buffy") runs on Claude Opus 4.7. It reads your prompt, gathers
 - [**Code Searcher**](/publishers/codebuff/agents/code-searcher) - grep-style pattern matching
 - [**Researcher**](/publishers/codebuff/agents/researcher) (Gemini 3.1 Flash Lite) - web and docs lookup
 - [**Thinker**](/publishers/codebuff/agents/thinker) (Claude Opus 4.7, GPT-5.4) - works through hard problems
-- [**Editor**](/publishers/codebuff/agents/editor) (Claude Opus 4.7, GPT-5.1, GLM 5.1) - writes and modifies code
-- [**Reviewer**](/publishers/codebuff/agents/reviewer) (Claude Opus 4.7, GLM 5.1 in Lite mode) - catches bugs and style issues
+- [**Editor**](/publishers/codebuff/agents/editor) (Claude Opus 4.7, GPT-5.1, Kimi K2.6) - writes and modifies code
+- [**Reviewer**](/publishers/codebuff/agents/reviewer) (Claude Opus 4.7, Kimi K2.6 in Lite mode) - catches bugs and style issues
 - [**Basher**](/publishers/codebuff/agents/basher) (Gemini 3.1 Flash Lite) - runs terminal commands
 
 ## Best-of-N Selection (Max Mode)