Skip to content

Commit 1de3668

Browse files
committed
Support legacy GLM free sessions
1 parent c431968 commit 1de3668

10 files changed

Lines changed: 262 additions & 36 deletions

File tree

common/src/__tests__/freebuff-models.test.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@ import { describe, expect, test } from 'bun:test'
33
import {
44
DEFAULT_FREEBUFF_MODEL_ID,
55
FREEBUFF_GEMINI_PRO_MODEL_ID,
6+
FREEBUFF_GLM_MODEL_ID,
67
FREEBUFF_KIMI_MODEL_ID,
78
FREEBUFF_MODELS,
9+
SUPPORTED_FREEBUFF_MODELS,
810
getFreebuffDeploymentAvailabilityLabel,
911
isFreebuffDeploymentHours,
12+
isFreebuffModelId,
1013
isFreebuffModelAvailable,
14+
isSupportedFreebuffModelId,
1115
} from '../constants/freebuff-models'
1216

1317
describe('freebuff model availability', () => {
@@ -33,6 +37,17 @@ describe('freebuff model availability', () => {
3337
expect(DEFAULT_FREEBUFF_MODEL_ID).toBe(FREEBUFF_KIMI_MODEL_ID)
3438
})
3539

40+
test('supports GLM 5.1 as a legacy server-side model without selecting it for new clients', () => {
41+
expect(FREEBUFF_MODELS.map((model) => model.id)).not.toContain(
42+
FREEBUFF_GLM_MODEL_ID,
43+
)
44+
expect(SUPPORTED_FREEBUFF_MODELS.map((model) => model.id)).toContain(
45+
FREEBUFF_GLM_MODEL_ID,
46+
)
47+
expect(isFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(false)
48+
expect(isSupportedFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(true)
49+
})
50+
3651
test('formats the close time in the user local timezone while deployment is open', () => {
3752
expect(
3853
getFreebuffDeploymentAvailabilityLabel(new Date('2026-01-05T18:00:00Z'), {

common/src/constants/free-agents.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { parseAgentId } from '../util/agent-id-parsing'
22

3-
import { FREEBUFF_MODELS } from './freebuff-models'
3+
import { SUPPORTED_FREEBUFF_MODELS } from './freebuff-models'
44

55
import type { CostMode } from './model-config'
66

@@ -20,7 +20,9 @@ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const
2020
const FREEBUFF_ROOT_AGENT_ID_SET: ReadonlySet<string> = new Set(
2121
FREEBUFF_ROOT_AGENT_IDS,
2222
)
23-
const FREEBUFF_SELECTABLE_MODEL_IDS = FREEBUFF_MODELS.map((model) => model.id)
23+
const FREEBUFF_ALLOWED_MODEL_IDS = SUPPORTED_FREEBUFF_MODELS.map(
24+
(model) => model.id,
25+
)
2426

2527
/**
2628
* Agents that are allowed to run in FREE mode.
@@ -32,7 +34,7 @@ const FREEBUFF_SELECTABLE_MODEL_IDS = FREEBUFF_MODELS.map((model) => model.id)
3234
*/
3335
export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
3436
// Root orchestrator
35-
'base2-free': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
37+
'base2-free': new Set(FREEBUFF_ALLOWED_MODEL_IDS),
3638

3739
// File exploration agents
3840
'file-picker': new Set(['google/gemini-2.5-flash-lite']),
@@ -44,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
4446
'researcher-docs': new Set(['google/gemini-3.1-flash-lite-preview']),
4547

4648
// Command execution
47-
'basher': new Set(['google/gemini-3.1-flash-lite-preview']),
49+
basher: new Set(['google/gemini-3.1-flash-lite-preview']),
4850

4951
// Editor for free mode
50-
'editor-lite': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
52+
'editor-lite': new Set(FREEBUFF_ALLOWED_MODEL_IDS),
5153

5254
// Code reviewer for free mode
53-
'code-reviewer-lite': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
55+
'code-reviewer-lite': new Set(FREEBUFF_ALLOWED_MODEL_IDS),
5456
}
5557

5658
/**

common/src/constants/freebuff-models.ts

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ export interface FreebuffModelOption {
2222
* `getFreebuffDeploymentAvailabilityLabel()` instead. */
2323
export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day'
2424
export const FREEBUFF_GEMINI_PRO_MODEL_ID = 'google/gemini-3.1-pro-preview'
25+
export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
2526
export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
2627
export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
2728
const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York'
@@ -61,7 +62,23 @@ export const FREEBUFF_MODELS = [
6162
},
6263
] as const satisfies readonly FreebuffModelOption[]
6364

65+
export const LEGACY_FREEBUFF_MODELS = [
66+
{
67+
id: FREEBUFF_GLM_MODEL_ID,
68+
displayName: 'GLM 5.1',
69+
tagline: 'Legacy',
70+
availability: 'deployment_hours',
71+
},
72+
] as const satisfies readonly FreebuffModelOption[]
73+
74+
export const SUPPORTED_FREEBUFF_MODELS = [
75+
...FREEBUFF_MODELS,
76+
...LEGACY_FREEBUFF_MODELS,
77+
] as const satisfies readonly FreebuffModelOption[]
78+
6479
export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
80+
export type SupportedFreebuffModelId =
81+
(typeof SUPPORTED_FREEBUFF_MODELS)[number]['id']
6582

6683
/** What new freebuff users see selected in the picker. May not be currently
6784
* available (Kimi is closed outside deployment hours); callers that need an
@@ -89,9 +106,22 @@ export function resolveFreebuffModel(
89106
return isFreebuffModelId(id) ? id : FALLBACK_FREEBUFF_MODEL_ID
90107
}
91108

109+
export function isSupportedFreebuffModelId(
110+
id: string | null | undefined,
111+
): id is SupportedFreebuffModelId {
112+
if (!id) return false
113+
return SUPPORTED_FREEBUFF_MODELS.some((m) => m.id === id)
114+
}
115+
116+
export function resolveSupportedFreebuffModel(
117+
id: string | null | undefined,
118+
): SupportedFreebuffModelId {
119+
return isSupportedFreebuffModelId(id) ? id : FALLBACK_FREEBUFF_MODEL_ID
120+
}
121+
92122
export function getFreebuffModel(id: string): FreebuffModelOption {
93123
return (
94-
FREEBUFF_MODELS.find((m) => m.id === id) ??
124+
SUPPORTED_FREEBUFF_MODELS.find((m) => m.id === id) ??
95125
FREEBUFF_MODELS.find((m) => m.id === FALLBACK_FREEBUFF_MODEL_ID)!
96126
)
97127
}
@@ -242,7 +272,7 @@ export function isFreebuffModelAvailable(
242272
id: string,
243273
now: Date = new Date(),
244274
): boolean {
245-
const model = FREEBUFF_MODELS.find((m) => m.id === id)
275+
const model = SUPPORTED_FREEBUFF_MODELS.find((m) => m.id === id)
246276
if (!model) return false
247277
return model.availability === 'always' || isFreebuffDeploymentHours(now)
248278
}

web/src/app/api/v1/chat/completions/__tests__/completions.test.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { NextRequest } from 'next/server'
33

44
import {
55
FREEBUFF_GEMINI_PRO_MODEL_ID,
6+
FREEBUFF_GLM_MODEL_ID,
67
isFreebuffDeploymentHours,
78
} from '@codebuff/common/constants/freebuff-models'
89
import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
@@ -741,6 +742,74 @@ describe('/api/v1/chat/completions POST endpoint', () => {
741742
}
742743
})
743744

745+
it('lets old freebuff clients keep using GLM 5.1 through Fireworks availability rules', async () => {
746+
const fetchedBodies: Record<string, unknown>[] = []
747+
const fetchViaFireworks = mock(
748+
async (_url: string | URL | Request, init?: RequestInit) => {
749+
fetchedBodies.push(JSON.parse(init?.body as string))
750+
return new Response(
751+
JSON.stringify({
752+
id: 'test-id',
753+
model: 'accounts/fireworks/models/glm-5p1',
754+
choices: [{ message: { content: 'test response' } }],
755+
usage: {
756+
prompt_tokens: 10,
757+
completion_tokens: 20,
758+
total_tokens: 30,
759+
},
760+
}),
761+
{
762+
status: 200,
763+
headers: { 'Content-Type': 'application/json' },
764+
},
765+
)
766+
},
767+
) as unknown as typeof globalThis.fetch
768+
769+
const req = new NextRequest(
770+
'http://localhost:3000/api/v1/chat/completions',
771+
{
772+
method: 'POST',
773+
headers: allowedFreeModeHeaders('test-api-key-new-free'),
774+
body: JSON.stringify({
775+
model: FREEBUFF_GLM_MODEL_ID,
776+
stream: false,
777+
codebuff_metadata: {
778+
run_id: 'run-free',
779+
client_id: 'test-client-id-123',
780+
cost_mode: 'free',
781+
},
782+
}),
783+
},
784+
)
785+
786+
const response = await postChatCompletions({
787+
req,
788+
getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
789+
logger: mockLogger,
790+
trackEvent: mockTrackEvent,
791+
getUserUsageData: mockGetUserUsageData,
792+
getAgentRunFromId: mockGetAgentRunFromId,
793+
fetch: fetchViaFireworks,
794+
insertMessageBigquery: mockInsertMessageBigquery,
795+
loggerWithContext: mockLoggerWithContext,
796+
checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
797+
})
798+
799+
const body = await response.json()
800+
if (isFreebuffDeploymentHours()) {
801+
expect(response.status).toBe(200)
802+
expect(fetchedBodies).toHaveLength(1)
803+
expect(fetchedBodies[0].model).toBe('accounts/fireworks/models/glm-5p1')
804+
expect(body.model).toBe(FREEBUFF_GLM_MODEL_ID)
805+
expect(body.provider).toBe('Fireworks')
806+
} else {
807+
expect(response.status).toBe(503)
808+
expect(fetchedBodies).toHaveLength(0)
809+
expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
810+
}
811+
})
812+
744813
it('lets freebuff use Gemini 3.1 Pro through the free-mode allowlist', async () => {
745814
const req = new NextRequest(
746815
'http://localhost:3000/api/v1/chat/completions',

web/src/server/free-session/__tests__/admission.test.ts

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import { describe, expect, test } from 'bun:test'
22

3+
import { FREEBUFF_GLM_MODEL_ID } from '@codebuff/common/constants/freebuff-models'
4+
35
import { runAdmissionTick } from '../admission'
46

57
import type { AdmissionDeps } from '../admission'
@@ -8,7 +10,9 @@ import type { FireworksHealth, FleetHealth } from '../fireworks-health'
810
const NOW = new Date('2026-04-17T12:00:00Z')
911
const TEST_MODEL = 'test-model'
1012

11-
function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDeps & {
13+
function makeAdmissionDeps(
14+
overrides: Partial<AdmissionDeps> = {},
15+
): AdmissionDeps & {
1216
calls: { admit: number }
1317
} {
1418
const calls = { admit: 0 }
@@ -37,7 +41,10 @@ function makeAdmissionDeps(overrides: Partial<AdmissionDeps> = {}): AdmissionDep
3741
return deps
3842
}
3943

40-
function fleet(health: FireworksHealth, model: string = TEST_MODEL): FleetHealth {
44+
function fleet(
45+
health: FireworksHealth,
46+
model: string = TEST_MODEL,
47+
): FleetHealth {
4148
return { [model]: health }
4249
}
4350

@@ -106,6 +113,17 @@ describe('runAdmissionTick', () => {
106113
expect(result.skipped).toBeNull()
107114
})
108115

116+
test('legacy GLM 5.1 is admitted during deployment hours', async () => {
117+
const deps = makeAdmissionDeps({
118+
models: [FREEBUFF_GLM_MODEL_ID],
119+
now: () => new Date('2026-04-17T16:00:00Z'),
120+
getFleetHealth: async () => ({ [FREEBUFF_GLM_MODEL_ID]: 'healthy' }),
121+
})
122+
const result = await runAdmissionTick(deps)
123+
expect(result.admitted).toBe(1)
124+
expect(result.skipped).toBeNull()
125+
})
126+
109127
test('propagates expiry count and admit count together', async () => {
110128
const deps = makeAdmissionDeps({
111129
sweepExpired: async () => 2,

web/src/server/free-session/__tests__/config.test.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import { describe, expect, test } from 'bun:test'
22

3-
import { FREEBUFF_MODELS } from '@codebuff/common/constants/freebuff-models'
3+
import {
4+
FREEBUFF_MODELS,
5+
SUPPORTED_FREEBUFF_MODELS,
6+
} from '@codebuff/common/constants/freebuff-models'
47

58
import { getInstantAdmitCapacity } from '../config'
69

@@ -10,4 +13,10 @@ describe('free session config', () => {
1013
expect(getInstantAdmitCapacity(model.id)).toBeGreaterThan(0)
1114
}
1215
})
16+
17+
test('every supported freebuff model has instant-admit capacity', () => {
18+
for (const model of SUPPORTED_FREEBUFF_MODELS) {
19+
expect(getInstantAdmitCapacity(model.id)).toBeGreaterThan(0)
20+
}
21+
})
1322
})

web/src/server/free-session/__tests__/public-api.test.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { beforeEach, describe, expect, test } from 'bun:test'
22

33
import {
44
FREEBUFF_GEMINI_PRO_MODEL_ID,
5+
FREEBUFF_GLM_MODEL_ID,
56
FREEBUFF_KIMI_MODEL_ID,
67
} from '@codebuff/common/constants/freebuff-models'
78

@@ -223,6 +224,54 @@ describe('requestSession', () => {
223224
expect(deps.rows.size).toBe(0)
224225
})
225226

227+
test('legacy GLM 5.1 model is still accepted for old clients during deployment hours', async () => {
228+
deps._tick(new Date('2026-04-17T16:00:00Z'))
229+
const state = await requestSession({
230+
userId: 'u1',
231+
model: FREEBUFF_GLM_MODEL_ID,
232+
deps,
233+
})
234+
expect(state.status).toBe('queued')
235+
if (state.status !== 'queued') throw new Error('unreachable')
236+
expect(deps.rows.get('u1')?.model).toBe(FREEBUFF_GLM_MODEL_ID)
237+
expect(state.rateLimit).toEqual({
238+
model: FREEBUFF_GLM_MODEL_ID,
239+
limit: 5,
240+
windowHours: 12,
241+
recentCount: 0,
242+
})
243+
})
244+
245+
test('legacy GLM 5.1 active session can be reclaimed outside deployment hours', async () => {
246+
const admittedAt = new Date(deps._now().getTime() - 10 * 60 * 1000)
247+
deps.rows.set('u1', {
248+
user_id: 'u1',
249+
status: 'active',
250+
active_instance_id: 'inst-pre',
251+
model: FREEBUFF_GLM_MODEL_ID,
252+
queued_at: admittedAt,
253+
admitted_at: admittedAt,
254+
expires_at: new Date(deps._now().getTime() + SESSION_LEN),
255+
created_at: admittedAt,
256+
updated_at: admittedAt,
257+
})
258+
259+
const state = await requestSession({
260+
userId: 'u1',
261+
model: FREEBUFF_GLM_MODEL_ID,
262+
deps,
263+
})
264+
expect(state.status).toBe('active')
265+
if (state.status !== 'active') throw new Error('unreachable')
266+
expect(state.instanceId).not.toBe('inst-pre')
267+
expect(state.rateLimit).toEqual({
268+
model: FREEBUFF_GLM_MODEL_ID,
269+
limit: 5,
270+
windowHours: 12,
271+
recentCount: 0,
272+
})
273+
})
274+
226275
test('queued response includes a per-model depth snapshot for the selector', async () => {
227276
deps._tick(new Date('2026-04-17T16:00:00Z'))
228277
// Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both.
@@ -436,6 +485,29 @@ describe('requestSession', () => {
436485
expect(deps.rows.has('u1')).toBe(false)
437486
})
438487

488+
test('rate_limited: legacy GLM 5.1 keeps the deployment-hours quota', async () => {
489+
deps._tick(KIMI_OPEN_TIME)
490+
const now = deps._now()
491+
for (let i = 0; i < KIMI_LIMIT; i++) {
492+
deps.admits.push({
493+
user_id: 'u1',
494+
model: FREEBUFF_GLM_MODEL_ID,
495+
admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
496+
})
497+
}
498+
499+
const state = await requestSession({
500+
userId: 'u1',
501+
model: FREEBUFF_GLM_MODEL_ID,
502+
deps,
503+
})
504+
expect(state.status).toBe('rate_limited')
505+
if (state.status !== 'rate_limited') throw new Error('unreachable')
506+
expect(state.model).toBe(FREEBUFF_GLM_MODEL_ID)
507+
expect(state.limit).toBe(KIMI_LIMIT)
508+
expect(state.windowHours).toBe(KIMI_WINDOW_HOURS)
509+
})
510+
439511
test('rate_limited: admits outside the 12h window do not count', async () => {
440512
deps._tick(KIMI_OPEN_TIME)
441513
// 5 admits, each just over 12h old → all fall off the window.

0 commit comments

Comments
 (0)