Skip to content

Commit b7c0155

Browse files
authored
Limit GLM sessions to 12 hours (#553)
1 parent b305351 commit b7c0155

7 files changed

Lines changed: 28 additions & 25 deletions

File tree

cli/src/components/waiting-room-screen.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
230230
<span>Elapsed </span>
231231
{formatElapsed(elapsedMs)}
232232
</text>
233-
{/* Per-model session quota (e.g. GLM 5.1 caps at 5/20h). Only
233+
{/* Per-model session quota (e.g. GLM 5.1 caps at 5/12h). Only
234234
rendered for rate-limited models so the Minimax queue stays
235235
clutter-free. */}
236236
{session.rateLimit && (
@@ -298,7 +298,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
298298
)}
299299

300300
{/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
301-
last 20h). Terminal for this run — the user can exit and come
301+
last 12h). Terminal for this run — the user can exit and come
302302
back once the oldest session in the window rolls off. */}
303303
{session?.status === 'rate_limited' && (
304304
<>

cli/src/hooks/use-freebuff-session.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ async function callSession(
101101
}
102102
}
103103
// 429 from POST is the per-model session-quota reject (e.g. too many GLM
104-
// sessions in the last 20h). Terminal for the current poll — the CLI shows
104+
// sessions in the last 12h). Terminal for the current poll — the CLI shows
105105
// a screen explaining the limit and when the user can try again. The 429
106106
// status (rather than 200) keeps older CLIs in their error path so they
107107
// back off instead of tight-polling an unrecognized 200 body.

common/src/types/freebuff-session.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Per-model usage counter surfaced to the CLI so the waiting-room UI can
1111
* render "N of M sessions used" alongside queue/active state. Present when
1212
* the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
13-
* per 20-hour window). `recentCount` is the number of admissions inside
13+
* per 12-hour window). `recentCount` is the number of admissions inside
1414
* `windowHours` at the time the response was produced — see also the
1515
* standalone `rate_limited` status for the reject path.
1616
*/
@@ -132,7 +132,7 @@ export type FreebuffSessionServerResponse =
132132
}
133133
| {
134134
/** User has used up their per-model admission quota in the rolling
135-
* window (GLM 5.1: 5 one-hour sessions per 20h). Returned from POST
135+
* window (GLM 5.1: 5 one-hour sessions per 12h). Returned from POST
136136
* /session before the user is placed in the queue. `retryAfterMs` is
137137
* the time until the oldest admission inside the window falls off
138138
* and one quota slot opens up — clients should show the user when

packages/internal/src/db/schema.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ export const freeSession = pgTable(
873873

874874
/**
875875
* Audit log of every admission — one row per queued→active transition. Used
876-
* to rate-limit heavy users (e.g. no more than 5 GLM sessions per 20h).
876+
* to rate-limit heavy users (e.g. no more than 5 GLM sessions per 12h).
877877
*
878878
* Separate from `free_session` because that table is one-row-per-user (state,
879879
* not history); the UPSERT path there would otherwise destroy prior admissions.

web/src/server/free-session/__tests__/public-api.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -323,23 +323,23 @@ describe('requestSession', () => {
323323
expect(s3.status).toBe('active')
324324
})
325325

326-
// Per-user rate limit (5 GLM admissions per 20h) — the wire limit is
326+
// Per-user rate limit (5 GLM admissions per 12h) — the wire limit is
327327
// hard-coded in public-api.ts, so tests seed the fake admit log directly
328328
// rather than configuring it. GLM also has deployment-hours gating, so
329329
// these tests bump `now` into the open window (12pm ET on a weekday)
330330
// before issuing the request.
331331
const GLM_MODEL = 'z-ai/glm-5.1'
332332
const GLM_LIMIT = 5
333-
const GLM_WINDOW_HOURS = 20
333+
const GLM_WINDOW_HOURS = 12
334334
const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
335335

336336
test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => {
337337
deps._tick(GLM_OPEN_TIME)
338-
// Seed 5 admits inside the 20h window, spaced so we can verify retryAfter
338+
// Seed 5 admits inside the 12h window, spaced so we can verify retryAfter
339339
// points at the oldest one sliding off.
340340
const now = deps._now()
341-
// Oldest: 19h ago (still in window). Next 4: 1h, 2h, 3h, 4h ago.
342-
const ages = [19, 4, 3, 2, 1]
341+
// Oldest: 11h ago (still in window). Next 4: 1h, 2h, 3h, 4h ago.
342+
const ages = [11, 4, 3, 2, 1]
343343
for (const hoursAgo of ages) {
344344
deps.admits.push({
345345
user_id: 'u1',
@@ -359,15 +359,15 @@ describe('requestSession', () => {
359359
expect(state.limit).toBe(GLM_LIMIT)
360360
expect(state.windowHours).toBe(GLM_WINDOW_HOURS)
361361
expect(state.recentCount).toBe(GLM_LIMIT)
362-
// Oldest admit is 19h ago; slot opens when it hits 20h, i.e. in 1h.
362+
// Oldest admit is 11h ago; slot opens when it hits 12h, i.e. in 1h.
363363
expect(state.retryAfterMs).toBe(60 * 60 * 1000)
364364
// Blocked before any row is written — the user doesn't take a queue slot.
365365
expect(deps.rows.has('u1')).toBe(false)
366366
})
367367

368-
test('rate_limited: admits outside the 20h window do not count', async () => {
368+
test('rate_limited: admits outside the 12h window do not count', async () => {
369369
deps._tick(GLM_OPEN_TIME)
370-
// 5 admits, each just over 20h old → all fall off the window.
370+
// 5 admits, each just over 12h old → all fall off the window.
371371
const now = deps._now()
372372
for (let i = 0; i < 5; i++) {
373373
deps.admits.push({
@@ -446,7 +446,7 @@ describe('requestSession', () => {
446446
const now = deps._now()
447447
// Seed 5 prior admits (the cap), with the latest one matching the
448448
// active row we're about to install.
449-
const ages = [19, 4, 3, 2, 0]
449+
const ages = [11, 4, 3, 2, 0]
450450
for (const hoursAgo of ages) {
451451
deps.admits.push({
452452
user_id: 'u1',
@@ -527,7 +527,7 @@ describe('requestSession', () => {
527527
// must be blocked by the quota.
528528
deps._tick(GLM_OPEN_TIME)
529529
const now = deps._now()
530-
const ages = [19, 4, 3, 2, 1]
530+
const ages = [11, 4, 3, 2, 1]
531531
for (const hoursAgo of ages) {
532532
deps.admits.push({
533533
user_id: 'u1',
@@ -660,7 +660,7 @@ describe('getSessionState', () => {
660660
expect(state.rateLimit).toEqual({
661661
model: 'z-ai/glm-5.1',
662662
limit: 5,
663-
windowHours: 20,
663+
windowHours: 12,
664664
recentCount: 1,
665665
})
666666
})

web/src/server/free-session/public-api.ts

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,28 @@ import type { InternalSessionRow, SessionStateResponse } from './types'
4141
* queued/active responses — changing them is a deliberate, typed edit.
4242
*/
4343
const RATE_LIMITS: Record<string, { limit: number; windowHours: number }> = {
44-
'z-ai/glm-5.1': { limit: 5, windowHours: 20 },
44+
'z-ai/glm-5.1': { limit: 5, windowHours: 12 },
4545
}
4646

4747
/** Fetch the caller's current quota snapshot for `model`, or undefined if the
4848
* model isn't rate-limited. Used by both POST (after admit) and GET polls so
4949
* the CLI's "N of M sessions used" line stays live instead of disappearing
50-
* after the first poll. Also returns the oldest admit in-window so callers
51-
* that need `retryAfterMs` don't have to re-query. */
50+
* after the first poll. Also returns the oldest admit in-window and the
51+
* window duration so callers that need `retryAfterMs` don't have to re-query
52+
* or duplicate the window math. */
5253
async function fetchRateLimitSnapshot(
5354
userId: string,
5455
model: string,
5556
deps: SessionDeps,
5657
): Promise<
57-
{ info: FreebuffSessionRateLimit; oldest: Date | null } | undefined
58+
| { info: FreebuffSessionRateLimit; oldest: Date | null; windowMs: number }
59+
| undefined
5860
> {
5961
const cfg = RATE_LIMITS[model]
6062
if (!cfg) return undefined
6163
const now = nowOf(deps)
62-
const since = new Date(now.getTime() - cfg.windowHours * 60 * 60 * 1000)
64+
const windowMs = cfg.windowHours * 60 * 60 * 1000
65+
const since = new Date(now.getTime() - windowMs)
6366
const admits = await deps.listRecentAdmits({
6467
userId,
6568
model,
@@ -74,6 +77,7 @@ async function fetchRateLimitSnapshot(
7477
recentCount: admits.length,
7578
},
7679
oldest: admits[0] ?? null,
80+
windowMs,
7781
}
7882
}
7983

@@ -271,10 +275,9 @@ export async function requestSession(params: {
271275
if (snapshot && snapshot.info.recentCount >= snapshot.info.limit) {
272276
// Oldest admit's window-anniversary is when one slot opens back up.
273277
// Clamped at 0 so a clock skew can't surface a negative retry-after.
274-
const windowMs = snapshot.info.windowHours * 60 * 60 * 1000
275278
const retryAfterMs = Math.max(
276279
0,
277-
(snapshot.oldest?.getTime() ?? 0) + windowMs - now.getTime(),
280+
(snapshot.oldest?.getTime() ?? 0) + snapshot.windowMs - now.getTime(),
278281
)
279282
return {
280283
status: 'rate_limited',

web/src/server/free-session/store.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ export async function promoteQueuedUser(params: {
436436
* so one query covers both the check and the reject path.
437437
*
438438
* Drives the per-user, per-model rate limit (e.g. at most 5 GLM sessions in
439-
* the last 20h) enforced before `joinOrTakeOver`.
439+
* the last 12h) enforced before `joinOrTakeOver`.
440440
*/
441441
export async function listRecentAdmits(params: {
442442
userId: string

0 commit comments

Comments
 (0)