Skip to content

Commit d0fbe5a

Browse files
committed
disable glm deployment
1 parent 18b0f12 commit d0fbe5a

5 files changed

Lines changed: 87 additions & 9 deletions

File tree

freebuff/e2e/tests/slash-commands.e2e.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ const KEPT_COMMANDS = [
3838
'/theme:toggle',
3939
]
4040

41-
describe('Freebuff: Slash Commands', () => {
41+
describe.skip('Freebuff: Slash Commands', () => {
4242
let session: FreebuffSession | null = null
4343

4444
afterEach(async () => {

web/src/app/api/v1/chat/completions/__tests__/completions.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
644644
return new Response(
645645
JSON.stringify({
646646
id: 'test-id',
647-
model: 'accounts/james-65d217/deployments/mjb4i7ea',
647+
model: 'accounts/fireworks/models/glm-5p1',
648648
choices: [{ message: { content: 'test response' } }],
649649
usage: {
650650
prompt_tokens: 10,
@@ -695,7 +695,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
695695
expect(response.status).toBe(200)
696696
expect(fetchedBodies).toHaveLength(1)
697697
expect(fetchedBodies[0].model).toBe(
698-
'accounts/james-65d217/deployments/mjb4i7ea',
698+
'accounts/fireworks/models/glm-5p1',
699699
)
700700
expect(body.model).toBe('z-ai/glm-5.1')
701701
expect(body.provider).toBe('Fireworks')

web/src/llm-api/__tests__/fireworks-deployment.test.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ import type { Logger } from '@codebuff/common/types/contracts/logger'
1313

1414
const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1'
1515
const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea'
16+
const TEST_DEPLOYMENT_MAP = {
17+
'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID,
18+
}
1619
const IN_DEPLOYMENT_HOURS = new Date('2026-04-17T16:00:00Z') // Friday, 12pm ET / 9am PT
1720
const BEFORE_DEPLOYMENT_HOURS = new Date('2026-04-17T12:59:00Z') // Friday, 8:59am ET
1821
const AFTER_DEPLOYMENT_HOURS = new Date('2026-04-18T00:00:00Z') // Friday, 5pm PT
@@ -108,6 +111,7 @@ describe('Fireworks deployment routing', () => {
108111
fetch: mockFetch,
109112
logger,
110113
useCustomDeployment: false,
114+
now: IN_DEPLOYMENT_HOURS,
111115
sessionId: 'test-user-id',
112116
})
113117

@@ -116,6 +120,49 @@ describe('Fireworks deployment routing', () => {
116120
expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID)
117121
})
118122

123+
it('uses standard API for GLM during hours when no deployment is mapped', async () => {
124+
const fetchCalls: string[] = []
125+
126+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
127+
const body = JSON.parse(init?.body as string)
128+
fetchCalls.push(body.model)
129+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
130+
}) as unknown as typeof globalThis.fetch
131+
132+
const response = await createFireworksRequestWithFallback({
133+
body: minimalBody as never,
134+
originalModel: 'z-ai/glm-5.1',
135+
fetch: mockFetch,
136+
logger,
137+
useCustomDeployment: true,
138+
sessionId: 'test-user-id',
139+
now: IN_DEPLOYMENT_HOURS,
140+
})
141+
142+
expect(response.status).toBe(200)
143+
expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
144+
})
145+
146+
it('keeps GLM unavailable outside hours when no deployment is mapped', async () => {
147+
const mockFetch = mock(async () => {
148+
throw new Error('should not fetch outside deployment hours')
149+
}) as unknown as typeof globalThis.fetch
150+
151+
const response = await createFireworksRequestWithFallback({
152+
body: minimalBody as never,
153+
originalModel: 'z-ai/glm-5.1',
154+
fetch: mockFetch,
155+
logger,
156+
useCustomDeployment: true,
157+
sessionId: 'test-user-id',
158+
now: BEFORE_DEPLOYMENT_HOURS,
159+
})
160+
161+
expect(response.status).toBe(503)
162+
const body = await response.json()
163+
expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
164+
})
165+
119166
it('tries custom deployment during deployment hours', async () => {
120167
const fetchCalls: string[] = []
121168

@@ -131,6 +178,7 @@ describe('Fireworks deployment routing', () => {
131178
fetch: mockFetch,
132179
logger,
133180
useCustomDeployment: true,
181+
deploymentMap: TEST_DEPLOYMENT_MAP,
134182
sessionId: 'test-user-id',
135183
now: IN_DEPLOYMENT_HOURS,
136184
})
@@ -164,6 +212,7 @@ describe('Fireworks deployment routing', () => {
164212
fetch: mockFetch,
165213
logger,
166214
useCustomDeployment: true,
215+
deploymentMap: TEST_DEPLOYMENT_MAP,
167216
sessionId: 'test-user-id',
168217
now: IN_DEPLOYMENT_HOURS,
169218
})
@@ -197,6 +246,7 @@ describe('Fireworks deployment routing', () => {
197246
fetch: mockFetch,
198247
logger,
199248
useCustomDeployment: true,
249+
deploymentMap: TEST_DEPLOYMENT_MAP,
200250
sessionId: 'test-user-id',
201251
now: IN_DEPLOYMENT_HOURS,
202252
})
@@ -224,6 +274,7 @@ describe('Fireworks deployment routing', () => {
224274
fetch: mockFetch,
225275
logger,
226276
useCustomDeployment: true,
277+
deploymentMap: TEST_DEPLOYMENT_MAP,
227278
sessionId: 'test-user-id',
228279
now: IN_DEPLOYMENT_HOURS,
229280
})
@@ -249,6 +300,7 @@ describe('Fireworks deployment routing', () => {
249300
fetch: mockFetch,
250301
logger,
251302
useCustomDeployment: true,
303+
deploymentMap: TEST_DEPLOYMENT_MAP,
252304
sessionId: 'test-user-id',
253305
now: IN_DEPLOYMENT_HOURS,
254306
})
@@ -272,6 +324,7 @@ describe('Fireworks deployment routing', () => {
272324
fetch: mockFetch,
273325
logger,
274326
useCustomDeployment: true,
327+
deploymentMap: TEST_DEPLOYMENT_MAP,
275328
sessionId: 'test-user-id',
276329
now: BEFORE_DEPLOYMENT_HOURS,
277330
})
@@ -293,6 +346,7 @@ describe('Fireworks deployment routing', () => {
293346
fetch: mockFetch,
294347
logger,
295348
useCustomDeployment: true,
349+
deploymentMap: TEST_DEPLOYMENT_MAP,
296350
sessionId: 'test-user-id',
297351
now: BEFORE_DEPLOYMENT_HOURS,
298352
})
@@ -317,6 +371,7 @@ describe('Fireworks deployment routing', () => {
317371
fetch: mockFetch,
318372
logger,
319373
useCustomDeployment: true,
374+
deploymentMap: TEST_DEPLOYMENT_MAP,
320375
sessionId: 'test-user-id',
321376
now: BEFORE_DEPLOYMENT_HOURS,
322377
})
@@ -343,6 +398,7 @@ describe('Fireworks deployment routing', () => {
343398
fetch: mockFetch,
344399
logger,
345400
useCustomDeployment: true,
401+
deploymentMap: TEST_DEPLOYMENT_MAP,
346402
sessionId: 'test-user-id',
347403
now: IN_DEPLOYMENT_HOURS,
348404
})
@@ -371,6 +427,7 @@ describe('Fireworks deployment routing', () => {
371427
fetch: mockFetch,
372428
logger,
373429
useCustomDeployment: false,
430+
now: IN_DEPLOYMENT_HOURS,
374431
sessionId: 'test-user-id',
375432
})
376433

@@ -397,6 +454,7 @@ describe('Fireworks deployment routing', () => {
397454
fetch: mockFetch,
398455
logger,
399456
useCustomDeployment: false,
457+
now: IN_DEPLOYMENT_HOURS,
400458
sessionId: 'test-user-id',
401459
})
402460

@@ -423,6 +481,7 @@ describe('Fireworks deployment routing', () => {
423481
fetch: mockFetch,
424482
logger,
425483
useCustomDeployment: false,
484+
now: IN_DEPLOYMENT_HOURS,
426485
sessionId: 'test-user-id',
427486
})
428487

@@ -450,6 +509,7 @@ describe('Fireworks deployment routing', () => {
450509
fetch: mockFetch,
451510
logger,
452511
useCustomDeployment: false,
512+
now: IN_DEPLOYMENT_HOURS,
453513
sessionId: 'test-user-id',
454514
})
455515

@@ -476,6 +536,7 @@ describe('Fireworks deployment routing', () => {
476536
fetch: mockFetch,
477537
logger,
478538
useCustomDeployment: false,
539+
now: IN_DEPLOYMENT_HOURS,
479540
sessionId: 'test-user-id',
480541
})
481542

@@ -502,6 +563,7 @@ describe('Fireworks deployment routing', () => {
502563
fetch: mockFetch,
503564
logger,
504565
useCustomDeployment: false,
566+
now: IN_DEPLOYMENT_HOURS,
505567
sessionId: 'test-user-id',
506568
})
507569

@@ -529,6 +591,7 @@ describe('Fireworks deployment routing', () => {
529591
fetch: mockFetch,
530592
logger,
531593
useCustomDeployment: true,
594+
deploymentMap: TEST_DEPLOYMENT_MAP,
532595
sessionId: 'test-user-id',
533596
now: IN_DEPLOYMENT_HOURS,
534597
})
@@ -563,6 +626,7 @@ describe('Fireworks deployment routing', () => {
563626
fetch: mockFetch,
564627
logger,
565628
useCustomDeployment: true,
629+
deploymentMap: TEST_DEPLOYMENT_MAP,
566630
sessionId: 'test-user-id',
567631
now: IN_DEPLOYMENT_HOURS,
568632
})
@@ -588,6 +652,7 @@ describe('Fireworks deployment routing', () => {
588652
fetch: mockFetch,
589653
logger,
590654
useCustomDeployment: true,
655+
deploymentMap: TEST_DEPLOYMENT_MAP,
591656
sessionId: 'test-user-id',
592657
now: IN_DEPLOYMENT_HOURS,
593658
})
@@ -614,6 +679,7 @@ describe('Fireworks deployment routing', () => {
614679
fetch: mockFetch,
615680
logger,
616681
useCustomDeployment: true,
682+
deploymentMap: TEST_DEPLOYMENT_MAP,
617683
sessionId: 'test-user-id',
618684
now: IN_DEPLOYMENT_HOURS,
619685
})

web/src/llm-api/fireworks-config.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export const FIREWORKS_ACCOUNT_ID = 'james-65d217'
1010

1111
export const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
1212
// 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
13-
'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
13+
// Disabled: route GLM 5.1 through the Fireworks serverless API during
14+
// availability hours instead of the dedicated deployment.
15+
// 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea',
1416
// 'minimax/minimax-m2.7': 'accounts/james-65d217/deployments/nrdudqxd',
1517
}

web/src/llm-api/fireworks.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { Agent } from 'undici'
22

33
import {
44
FREEBUFF_DEPLOYMENT_HOURS_LABEL,
5+
FREEBUFF_GLM_MODEL_ID,
56
isFreebuffDeploymentHours,
67
} from '@codebuff/common/constants/freebuff-models'
78
import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
@@ -38,6 +39,11 @@ const FIREWORKS_MODEL_MAP: Record<string, string> = {
3839
'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
3940
}
4041

42+
/** Models that stay limited to freebuff deployment hours even on serverless. */
43+
const FIREWORKS_HOURS_GATED_MODELS = new Set<string>([
44+
FREEBUFF_GLM_MODEL_ID,
45+
])
46+
4147
/** Flag to enable custom Fireworks deployments (set to false to use global API only) */
4248
const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true
4349

@@ -706,9 +712,10 @@ async function parseFireworksError(response: Response): Promise<FireworksError>
706712
}
707713

708714
/**
709-
* Uses custom Fireworks deployments only during deployment hours. Deployment
710-
* mapped models never fall back to the serverless API outside hours, during
711-
* cooldown, or after deployment 5xxs; those states surface as provider errors
715+
* Uses custom Fireworks deployments only during deployment hours. Some models
716+
* are still availability-gated even when served by the Fireworks serverless
717+
* API. Deployment-mapped models never fall back to the serverless API during
718+
* cooldown or after deployment 5xxs; those states surface as provider errors
712719
* so freebuff can offer MiniMax as the always-on option.
713720
*/
714721
export async function createFireworksRequestWithFallback(params: {
@@ -717,20 +724,23 @@ export async function createFireworksRequestWithFallback(params: {
717724
fetch: typeof globalThis.fetch
718725
logger: Logger
719726
useCustomDeployment?: boolean
727+
deploymentMap?: Record<string, string>
720728
sessionId: string
721729
now?: Date
722730
}): Promise<Response> {
723731
const { body, originalModel, fetch, logger, sessionId } = params
724732
const now = params.now ?? new Date()
725733
const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT
726-
const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel]
734+
const deploymentMap = params.deploymentMap ?? FIREWORKS_DEPLOYMENT_MAP
735+
const deploymentModelId = deploymentMap[originalModel]
727736
const hasDeployment = useCustomDeployment && Boolean(deploymentModelId)
737+
const isHoursGatedModel = FIREWORKS_HOURS_GATED_MODELS.has(originalModel)
728738
const shouldFallbackToStandardApi = body.codebuff_metadata?.cost_mode === 'lite'
729739

730740
const createStandardApiRequest = () =>
731741
createFireworksRequest({ body, originalModel, fetch, sessionId })
732742

733-
if (hasDeployment && !isDeploymentHours(now)) {
743+
if (isHoursGatedModel && !isDeploymentHours(now)) {
734744
if (shouldFallbackToStandardApi) {
735745
logger.info(
736746
{ model: originalModel },

0 commit comments

Comments
 (0)