Support legacy GLM free sessions

jahooma · jahooma · commit 1de36687b1b7 · 2026-04-30T11:08:39.000-07:00
diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts
@@ -3,11 +3,15 @@ import { describe, expect, test } from 'bun:test'
 import {
   DEFAULT_FREEBUFF_MODEL_ID,
   FREEBUFF_GEMINI_PRO_MODEL_ID,
+  FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MODELS,
+  SUPPORTED_FREEBUFF_MODELS,
   getFreebuffDeploymentAvailabilityLabel,
   isFreebuffDeploymentHours,
+  isFreebuffModelId,
   isFreebuffModelAvailable,
+  isSupportedFreebuffModelId,
 } from '../constants/freebuff-models'
 
 describe('freebuff model availability', () => {
@@ -33,6 +37,17 @@ describe('freebuff model availability', () => {
     expect(DEFAULT_FREEBUFF_MODEL_ID).toBe(FREEBUFF_KIMI_MODEL_ID)
   })
 
+  test('supports GLM 5.1 as a legacy server-side model without selecting it for new clients', () => {
+    expect(FREEBUFF_MODELS.map((model) => model.id)).not.toContain(
+      FREEBUFF_GLM_MODEL_ID,
+    )
+    expect(SUPPORTED_FREEBUFF_MODELS.map((model) => model.id)).toContain(
+      FREEBUFF_GLM_MODEL_ID,
+    )
+    expect(isFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(false)
+    expect(isSupportedFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(true)
+  })
+
   test('formats the close time in the user local timezone while deployment is open', () => {
     expect(
       getFreebuffDeploymentAvailabilityLabel(new Date('2026-01-05T18:00:00Z'), {
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
@@ -1,6 +1,6 @@
 import { parseAgentId } from '../util/agent-id-parsing'
 
-import { FREEBUFF_MODELS } from './freebuff-models'
+import { SUPPORTED_FREEBUFF_MODELS } from './freebuff-models'
 
 import type { CostMode } from './model-config'
 
@@ -20,7 +20,9 @@ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const
 const FREEBUFF_ROOT_AGENT_ID_SET: ReadonlySet<string> = new Set(
   FREEBUFF_ROOT_AGENT_IDS,
 )
-const FREEBUFF_SELECTABLE_MODEL_IDS = FREEBUFF_MODELS.map((model) => model.id)
+const FREEBUFF_ALLOWED_MODEL_IDS = SUPPORTED_FREEBUFF_MODELS.map(
+  (model) => model.id,
+)
 
 /**
  * Agents that are allowed to run in FREE mode.
@@ -32,7 +34,7 @@ const FREEBUFF_SELECTABLE_MODEL_IDS = FREEBUFF_MODELS.map((model) => model.id)
  */
 export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
-  'base2-free': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
+  'base2-free': new Set(FREEBUFF_ALLOWED_MODEL_IDS),
 
   // File exploration agents
   'file-picker': new Set(['google/gemini-2.5-flash-lite']),
@@ -44,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   'researcher-docs': new Set(['google/gemini-3.1-flash-lite-preview']),
 
   // Command execution
-  'basher': new Set(['google/gemini-3.1-flash-lite-preview']),
+  basher: new Set(['google/gemini-3.1-flash-lite-preview']),
 
   // Editor for free mode
-  'editor-lite': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
+  'editor-lite': new Set(FREEBUFF_ALLOWED_MODEL_IDS),
 
   // Code reviewer for free mode
-  'code-reviewer-lite': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
+  'code-reviewer-lite': new Set(FREEBUFF_ALLOWED_MODEL_IDS),
 }
 
 /**
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
@@ -22,6 +22,7 @@ export interface FreebuffModelOption {
  *  `getFreebuffDeploymentAvailabilityLabel()` instead. */
 export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day'
 export const FREEBUFF_GEMINI_PRO_MODEL_ID = 'google/gemini-3.1-pro-preview'
+export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
 export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
 export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
 const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York'
@@ -61,7 +62,23 @@ export const FREEBUFF_MODELS = [
   },
 ] as const satisfies readonly FreebuffModelOption[]
 
+export const LEGACY_FREEBUFF_MODELS = [
+  {
+    id: FREEBUFF_GLM_MODEL_ID,
+    displayName: 'GLM 5.1',
+    tagline: 'Legacy',
+    availability: 'deployment_hours',
+  },
+] as const satisfies readonly FreebuffModelOption[]
+
+export const SUPPORTED_FREEBUFF_MODELS = [
+  ...FREEBUFF_MODELS,
+  ...LEGACY_FREEBUFF_MODELS,
+] as const satisfies readonly FreebuffModelOption[]
+
 export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
+export type SupportedFreebuffModelId =
+  (typeof SUPPORTED_FREEBUFF_MODELS)[number]['id']
 
 /** What new freebuff users see selected in the picker. May not be currently
  *  available (Kimi is closed outside deployment hours); callers that need an
@@ -89,9 +106,22 @@ export function resolveFreebuffModel(
   return isFreebuffModelId(id) ? id : FALLBACK_FREEBUFF_MODEL_ID
 }
 
+export function isSupportedFreebuffModelId(
+  id: string | null | undefined,
+): id is SupportedFreebuffModelId {
+  if (!id) return false
+  return SUPPORTED_FREEBUFF_MODELS.some((m) => m.id === id)
+}
+
+export function resolveSupportedFreebuffModel(
+  id: string | null | undefined,
+): SupportedFreebuffModelId {
+  return isSupportedFreebuffModelId(id) ? id : FALLBACK_FREEBUFF_MODEL_ID
+}
+
 export function getFreebuffModel(id: string): FreebuffModelOption {
   return (
-    FREEBUFF_MODELS.find((m) => m.id === id) ??
+    SUPPORTED_FREEBUFF_MODELS.find((m) => m.id === id) ??
     FREEBUFF_MODELS.find((m) => m.id === FALLBACK_FREEBUFF_MODEL_ID)!
   )
 }
@@ -242,7 +272,7 @@ export function isFreebuffModelAvailable(
   id: string,
   now: Date = new Date(),
 ): boolean {
-  const model = FREEBUFF_MODELS.find((m) => m.id === id)
+  const model = SUPPORTED_FREEBUFF_MODELS.find((m) => m.id === id)
   if (!model) return false
   return model.availability === 'always' || isFreebuffDeploymentHours(now)
 }
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -3,6 +3,7 @@ import { NextRequest } from 'next/server'
 
 import {
   FREEBUFF_GEMINI_PRO_MODEL_ID,
+  FREEBUFF_GLM_MODEL_ID,
   isFreebuffDeploymentHours,
 } from '@codebuff/common/constants/freebuff-models'
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
@@ -741,6 +742,74 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       }
     })
 
+    it('lets old freebuff clients keep using GLM 5.1 through Fireworks availability rules', async () => {
+      const fetchedBodies: Record<string, unknown>[] = []
+      const fetchViaFireworks = mock(
+        async (_url: string | URL | Request, init?: RequestInit) => {
+          fetchedBodies.push(JSON.parse(init?.body as string))
+          return new Response(
+            JSON.stringify({
+              id: 'test-id',
+              model: 'accounts/fireworks/models/glm-5p1',
+              choices: [{ message: { content: 'test response' } }],
+              usage: {
+                prompt_tokens: 10,
+                completion_tokens: 20,
+                total_tokens: 30,
+              },
+            }),
+            {
+              status: 200,
+              headers: { 'Content-Type': 'application/json' },
+            },
+          )
+        },
+      ) as unknown as typeof globalThis.fetch
+
+      const req = new NextRequest(
+        'http://localhost:3000/api/v1/chat/completions',
+        {
+          method: 'POST',
+          headers: allowedFreeModeHeaders('test-api-key-new-free'),
+          body: JSON.stringify({
+            model: FREEBUFF_GLM_MODEL_ID,
+            stream: false,
+            codebuff_metadata: {
+              run_id: 'run-free',
+              client_id: 'test-client-id-123',
+              cost_mode: 'free',
+            },
+          }),
+        },
+      )
+
+      const response = await postChatCompletions({
+        req,
+        getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+        logger: mockLogger,
+        trackEvent: mockTrackEvent,
+        getUserUsageData: mockGetUserUsageData,
+        getAgentRunFromId: mockGetAgentRunFromId,
+        fetch: fetchViaFireworks,
+        insertMessageBigquery: mockInsertMessageBigquery,
+        loggerWithContext: mockLoggerWithContext,
+        checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+      })
+
+      const body = await response.json()
+      if (isFreebuffDeploymentHours()) {
+        expect(response.status).toBe(200)
+        expect(fetchedBodies).toHaveLength(1)
+        expect(fetchedBodies[0].model).toBe('accounts/fireworks/models/glm-5p1')
+        expect(body.model).toBe(FREEBUFF_GLM_MODEL_ID)
+        expect(body.provider).toBe('Fireworks')
+      } else {
+        expect(response.status).toBe(503)
+        expect(fetchedBodies).toHaveLength(0)
+        expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+      }
+    })
+
     it('lets freebuff use Gemini 3.1 Pro through the free-mode allowlist', async () => {
       const req = new NextRequest(
         'http://localhost:3000/api/v1/chat/completions',
diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts
@@ -1,5 +1,7 @@
 import { describe, expect, test } from 'bun:test'
 
+import { FREEBUFF_GLM_MODEL_ID } from '@codebuff/common/constants/freebuff-models'
+
 import { runAdmissionTick } from '../admission'
 
 import type { AdmissionDeps } from '../admission'
@@ -8,7 +10,9 @@ import type { FireworksHealth, FleetHealth } from '../fireworks-health'
 const NOW = new Date('2026-04-17T12:00:00Z')
 const TEST_MODEL = 'test-model'
 
-function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDeps & {
+function makeAdmissionDeps(
+  overrides: Partial<AdmissionDeps> = {},
+): AdmissionDeps & {
   calls: { admit: number }
 } {
   const calls = { admit: 0 }
@@ -37,7 +41,10 @@ function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDep
   return deps
 }
 
-function fleet(health: FireworksHealth, model: string = TEST_MODEL): FleetHealth {
+function fleet(
+  health: FireworksHealth,
+  model: string = TEST_MODEL,
+): FleetHealth {
   return { [model]: health }
 }
 
@@ -106,6 +113,17 @@ describe('runAdmissionTick', () => {
     expect(result.skipped).toBeNull()
   })
 
+  test('legacy GLM 5.1 is admitted during deployment hours', async () => {
+    const deps = makeAdmissionDeps({
+      models: [FREEBUFF_GLM_MODEL_ID],
+      now: () => new Date('2026-04-17T16:00:00Z'),
+      getFleetHealth: async () => ({ [FREEBUFF_GLM_MODEL_ID]: 'healthy' }),
+    })
+    const result = await runAdmissionTick(deps)
+    expect(result.admitted).toBe(1)
+    expect(result.skipped).toBeNull()
+  })
+
   test('propagates expiry count and admit count together', async () => {
     const deps = makeAdmissionDeps({
       sweepExpired: async () => 2,
diff --git a/web/src/server/free-session/__tests__/config.test.ts b/web/src/server/free-session/__tests__/config.test.ts
@@ -1,6 +1,9 @@
 import { describe, expect, test } from 'bun:test'
 
-import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
+import {
+  FREEBUFF_MODELS,
+  SUPPORTED_FREEBUFF_MODELS,
+} from '@codebuff/common/constants/freebuff-models'
 
 import { getInstantAdmitCapacity } from '../config'
 
@@ -10,4 +13,10 @@ describe('free session config', () => {
       expect(getInstantAdmitCapacity(model.id)).toBeGreaterThan(0)
     }
   })
+
+  test('every supported freebuff model has instant-admit capacity', () => {
+    for (const model of SUPPORTED_FREEBUFF_MODELS) {
+      expect(getInstantAdmitCapacity(model.id)).toBeGreaterThan(0)
+    }
+  })
 })
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -2,6 +2,7 @@ import { beforeEach, describe, expect, test } from 'bun:test'
 
 import {
   FREEBUFF_GEMINI_PRO_MODEL_ID,
+  FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
 } from '@codebuff/common/constants/freebuff-models'
 
@@ -223,6 +224,54 @@ describe('requestSession', () => {
     expect(deps.rows.size).toBe(0)
   })
 
+  test('legacy GLM 5.1 model is still accepted for old clients during deployment hours', async () => {
+    deps._tick(new Date('2026-04-17T16:00:00Z'))
+    const state = await requestSession({
+      userId: 'u1',
+      model: FREEBUFF_GLM_MODEL_ID,
+      deps,
+    })
+    expect(state.status).toBe('queued')
+    if (state.status !== 'queued') throw new Error('unreachable')
+    expect(deps.rows.get('u1')?.model).toBe(FREEBUFF_GLM_MODEL_ID)
+    expect(state.rateLimit).toEqual({
+      model: FREEBUFF_GLM_MODEL_ID,
+      limit: 5,
+      windowHours: 12,
+      recentCount: 0,
+    })
+  })
+
+  test('legacy GLM 5.1 active session can be reclaimed outside deployment hours', async () => {
+    const admittedAt = new Date(deps._now().getTime() - 10 * 60 * 1000)
+    deps.rows.set('u1', {
+      user_id: 'u1',
+      status: 'active',
+      active_instance_id: 'inst-pre',
+      model: FREEBUFF_GLM_MODEL_ID,
+      queued_at: admittedAt,
+      admitted_at: admittedAt,
+      expires_at: new Date(deps._now().getTime() + SESSION_LEN),
+      created_at: admittedAt,
+      updated_at: admittedAt,
+    })
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: FREEBUFF_GLM_MODEL_ID,
+      deps,
+    })
+    expect(state.status).toBe('active')
+    if (state.status !== 'active') throw new Error('unreachable')
+    expect(state.instanceId).not.toBe('inst-pre')
+    expect(state.rateLimit).toEqual({
+      model: FREEBUFF_GLM_MODEL_ID,
+      limit: 5,
+      windowHours: 12,
+      recentCount: 0,
+    })
+  })
+
   test('queued response includes a per-model depth snapshot for the selector', async () => {
     deps._tick(new Date('2026-04-17T16:00:00Z'))
     // Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both.
@@ -436,6 +485,29 @@ describe('requestSession', () => {
     expect(deps.rows.has('u1')).toBe(false)
   })
 
+  test('rate_limited: legacy GLM 5.1 keeps the deployment-hours quota', async () => {
+    deps._tick(KIMI_OPEN_TIME)
+    const now = deps._now()
+    for (let i = 0; i < KIMI_LIMIT; i++) {
+      deps.admits.push({
+        user_id: 'u1',
+        model: FREEBUFF_GLM_MODEL_ID,
+        admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
+      })
+    }
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: FREEBUFF_GLM_MODEL_ID,
+      deps,
+    })
+    expect(state.status).toBe('rate_limited')
+    if (state.status !== 'rate_limited') throw new Error('unreachable')
+    expect(state.model).toBe(FREEBUFF_GLM_MODEL_ID)
+    expect(state.limit).toBe(KIMI_LIMIT)
+    expect(state.windowHours).toBe(KIMI_WINDOW_HOURS)
+  })
+
   test('rate_limited: admits outside the 12h window do not count', async () => {
     deps._tick(KIMI_OPEN_TIME)
     // 5 admits, each just over 12h old → all fall off the window.
diff --git a/web/src/server/free-session/admission.ts b/web/src/server/free-session/admission.ts
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts