Skip to content

Commit 5603283

Browse files
committed
Limit GLM sessions to 12 hours
1 parent 35819f6 commit 5603283

7 files changed

Lines changed: 19 additions & 19 deletions

File tree

cli/src/components/waiting-room-screen.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
230230
<span>Elapsed </span>
231231
{formatElapsed(elapsedMs)}
232232
</text>
233-
{/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only
233+
{/* Per-model session quota (e.g. GLM 5.1 caps at 5/12h). Only
234234
rendered for rate-limited models so the Minimax queue stays
235235
clutter-free. */}
236236
{session.rateLimit && (
@@ -298,7 +298,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
298298
)}
299299

300300
{/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
301-
last 20h). Terminal for this run — the user can exit and come
301+
last 12h). Terminal for this run — the user can exit and come
302302
back once the oldest session in the window rolls off. */}
303303
{session?.status === 'rate_limited' && (
304304
<>

cli/src/hooks/use-freebuff-session.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ async function callSession(
101101
}
102102
}
103103
// 429 from POST is the per-model session-quota reject (e.g. too many GLM
104-
// sessions in the last 20h). Terminal for the current poll — the CLI shows
104+
// sessions in the last 12h). Terminal for the current poll — the CLI shows
105105
// a screen explaining the limit and when the user can try again. The 429
106106
// status (rather than 200) keeps older CLIs in their error path so they
107107
// back off instead of tight-polling an unrecognized 200 body.

common/src/types/freebuff-session.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Per-model usage counter surfaced to the CLI so the waiting-room UI can
1111
* render "N of M sessions used" alongside queue/active state. Present when
1212
* the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
13-
* per 20-hour window). `recentCount` is the number of admissions inside
13+
* per 12-hour window). `recentCount` is the number of admissions inside
1414
* `windowHours` at the time the response was produced — see also the
1515
* standalone `rate_limited` status for the reject path.
1616
*/
@@ -132,7 +132,7 @@ export type FreebuffSessionServerResponse =
132132
}
133133
| {
134134
/** User has used up their per-model admission quota in the rolling
135-
* window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST
135+
* window (GLM 5.1: 5 one-hour sessions per 12h). Returned from POST
136136
* /session before the user is placed in the queue. `retryAfterMs` is
137137
* the time until the oldest admission inside the window falls off
138138
* and one quota slot opens up — clients should show the user when

packages/internal/src/db/schema.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ export const freeSession = pgTable(
873873

874874
/**
875875
* Audit log of every admission — one row per queued→active transition. Used
876-
* to rate-limit heavy users (e.g. no more than 5 GLM sessions per 20h).
876+
* to rate-limit heavy users (e.g. no more than 5 GLM sessions per 12h).
877877
*
878878
* Separate from `free_session` because that table is one-row-per-user (state,
879879
* not history); the UPSERT path there would otherwise destroy prior admissions.

web/src/server/free-session/__tests__/public-api.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -323,23 +323,23 @@ describe('requestSession', () => {
323323
expect(s3.status).toBe('active')
324324
})
325325

326-
// Per-user rate limit (5 GLM admissions per 20h) — the wire limit is
326+
// Per-user rate limit (5 GLM admissions per 12h) — the wire limit is
327327
// hard-coded in public-api.ts, so tests seed the fake admit log directly
328328
// rather than configuring it. GLM also has deployment-hours gating, so
329329
// these tests bump `now` into the open window (12pm ET on a weekday)
330330
// before issuing the request.
331331
const GLM_MODEL = 'z-ai/glm-5.1'
332332
const GLM_LIMIT = 5
333-
const GLM_WINDOW_HOURS = 20
333+
const GLM_WINDOW_HOURS = 12
334334
const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
335335

336336
test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => {
337337
deps._tick(GLM_OPEN_TIME)
338-
// Seed 5 admits inside the 20h window, spaced so we can verify retryAfter
338+
// Seed 5 admits inside the 12h window, spaced so we can verify retryAfter
339339
// points at the oldest one sliding off.
340340
const now = deps._now()
341-
// Oldest: 19h ago (still in window). Next 4: 1h, 2h, 3h, 4h ago.
342-
const ages = [19, 4, 3, 2, 1]
341+
// Oldest: 11h ago (still in window). Next 4: 1h, 2h, 3h, 4h ago.
342+
const ages = [11, 4, 3, 2, 1]
343343
for (const hoursAgo of ages) {
344344
deps.admits.push({
345345
user_id: 'u1',
@@ -359,15 +359,15 @@ describe('requestSession', () => {
359359
expect(state.limit).toBe(GLM_LIMIT)
360360
expect(state.windowHours).toBe(GLM_WINDOW_HOURS)
361361
expect(state.recentCount).toBe(GLM_LIMIT)
362-
// Oldest admit is 19h ago; slot opens when it hits 20h, i.e. in 1h.
362+
// Oldest admit is 11h ago; slot opens when it hits 12h, i.e. in 1h.
363363
expect(state.retryAfterMs).toBe(60 * 60 * 1000)
364364
// Blocked before any row is written — the user doesn't take a queue slot.
365365
expect(deps.rows.has('u1')).toBe(false)
366366
})
367367

368-
test('rate_limited: admits outside the 20h window do not count', async () => {
368+
test('rate_limited: admits outside the 12h window do not count', async () => {
369369
deps._tick(GLM_OPEN_TIME)
370-
// 5 admits, each just over 20h old → all fall off the window.
370+
// 5 admits, each just over 12h old → all fall off the window.
371371
const now = deps._now()
372372
for (let i = 0; i < 5; i++) {
373373
deps.admits.push({
@@ -446,7 +446,7 @@ describe('requestSession', () => {
446446
const now = deps._now()
447447
// Seed 5 prior admits (the cap), with the latest one matching the
448448
// active row we're about to install.
449-
const ages = [19, 4, 3, 2, 0]
449+
const ages = [11, 4, 3, 2, 0]
450450
for (const hoursAgo of ages) {
451451
deps.admits.push({
452452
user_id: 'u1',
@@ -527,7 +527,7 @@ describe('requestSession', () => {
527527
// must be blocked by the quota.
528528
deps._tick(GLM_OPEN_TIME)
529529
const now = deps._now()
530-
const ages = [19, 4, 3, 2, 1]
530+
const ages = [11, 4, 3, 2, 1]
531531
for (const hoursAgo of ages) {
532532
deps.admits.push({
533533
user_id: 'u1',
@@ -660,7 +660,7 @@ describe('getSessionState', () => {
660660
expect(state.rateLimit).toEqual({
661661
model: 'z-ai/glm-5.1',
662662
limit: 5,
663-
windowHours: 20,
663+
windowHours: 12,
664664
recentCount: 1,
665665
})
666666
})

web/src/server/free-session/public-api.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ import type { InternalSessionRow, SessionStateResponse } from './types'
4141
* queued/active responses — changing them is a deliberate, typed edit.
4242
*/
4343
const RATE_LIMITS: Record<string, { limit: number; windowHours: number }> = {
44-
'z-ai/glm-5.1': { limit: 5, windowHours: 20 },
44+
'z-ai/glm-5.1': { limit: 5, windowHours: 12 },
4545
}
4646

4747
/** Fetch the caller's current quota snapshot for `model`, or undefined if the

web/src/server/free-session/store.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ export async function promoteQueuedUser(params: {
436436
* so one query covers both the check and the reject path.
437437
*
438438
* Drives the per-user, per-model rate limit (e.g. at most 5 GLM sessions in
439-
* the last 20h) enforced before `joinOrTakeOver`.
439+
* the last 12h) enforced before `joinOrTakeOver`.
440440
*/
441441
export async function listRecentAdmits(params: {
442442
userId: string

0 commit comments

Comments
 (0)