From 0faee3d3cc6b5a607b852048b0642cf0cf6aeb01 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Wed, 29 Apr 2026 16:06:19 -0700 Subject: [PATCH 1/2] Add Gemini Pro freebuff model --- agents/base2/base2-gemini-no-editor-evals.ts | 13 ++ agents/base2/base2.ts | 22 ++- .../components/freebuff-model-selector.tsx | 10 +- common/src/__tests__/freebuff-models.test.ts | 21 +++ common/src/constants/free-agents.ts | 28 +-- common/src/constants/freebuff-models.ts | 7 + common/src/types/contracts/database.ts | 1 + common/src/types/freebuff-session.ts | 13 +- .../completions/__tests__/completions.test.ts | 165 +++++++++++++++++- web/src/app/api/v1/chat/completions/_post.ts | 45 ++++- .../free-session/__tests__/public-api.test.ts | 52 ++++++ web/src/server/free-session/config.ts | 10 +- web/src/server/free-session/public-api.ts | 9 +- 13 files changed, 356 insertions(+), 40 deletions(-) create mode 100644 agents/base2/base2-gemini-no-editor-evals.ts diff --git a/agents/base2/base2-gemini-no-editor-evals.ts b/agents/base2/base2-gemini-no-editor-evals.ts new file mode 100644 index 000000000..e092edb51 --- /dev/null +++ b/agents/base2/base2-gemini-no-editor-evals.ts @@ -0,0 +1,13 @@ +import { createBase2 } from './base2' + +const definition = { + ...createBase2('free', { + noAskUser: true, + model: 'google/gemini-3.1-pro-preview', + providerOptions: {}, + }), + id: 'base2-gemini-no-editor-evals', + displayName: 'Buffy the Gemini Evals Orchestrator', +} + +export default definition diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 1a81f948b..bacc90b48 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -12,12 +12,16 @@ export function createBase2( hasNoValidation?: boolean planOnly?: boolean noAskUser?: boolean + model?: SecretAgentDefinition['model'] + providerOptions?: SecretAgentDefinition['providerOptions'] }, ): Omit { const { hasNoValidation = mode === 'fast', planOnly = false, noAskUser = false, + model: modelOverride, + providerOptions, } = options ?? {} const isDefault = mode === 'default' const isFast = mode === 'fast' @@ -25,16 +29,20 @@ export function createBase2( const isFree = mode === 'free' || mode === 'lite' const isSonnet = false - const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7' + const model = + modelOverride ?? (isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7') + const defaultProviderOptions = isFree + ? { + data_collection: 'deny' as const, + } + : { + only: ['amazon-bedrock'], + } return { publisher, model, - providerOptions: isFree ? { - data_collection: 'deny', - } : { - only: ['amazon-bedrock'], - }, + providerOptions: providerOptions ?? defaultProviderOptions, displayName: 'Buffy the Orchestrator', spawnerPrompt: 'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks', @@ -150,8 +158,6 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u isMax && `- IMPORTANT: You must spawn the editor-multi-prompt agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`, isFree && - '- Implement code changes using the str_replace or write_file tools directly.', - isFree && '- Spawn a code-reviewer-lite to review the changes after you have implemented the changes.', '- Spawn bashers sequentially if the second command depends on the the first.', isDefault && diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx index a453a1538..f553ce398 100644 --- a/cli/src/components/freebuff-model-selector.tsx +++ b/cli/src/components/freebuff-model-selector.tsx @@ -5,6 +5,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react' import { Button } from './button' import { FALLBACK_FREEBUFF_MODEL_ID, + FREEBUFF_GEMINI_PRO_MODEL_ID, FREEBUFF_GLM_MODEL_ID, FREEBUFF_MODELS, getFreebuffDeploymentAvailabilityLabel, @@ -25,8 +26,15 @@ import { import type { KeyEvent } from '@opentui/core' const FREEBUFF_MODEL_SELECTOR_MODELS = [ + ...FREEBUFF_MODELS.filter( + (model) => model.id === FREEBUFF_GEMINI_PRO_MODEL_ID, + ), ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID), - ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID), + ...FREEBUFF_MODELS.filter( + (model) => + model.id !== FREEBUFF_GEMINI_PRO_MODEL_ID && + model.id !== FREEBUFF_GLM_MODEL_ID, + ), ] /** diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts index 0d01d2762..664c4c3ef 100644 --- a/common/src/__tests__/freebuff-models.test.ts +++ b/common/src/__tests__/freebuff-models.test.ts @@ -1,11 +1,32 @@ import { describe, expect, test } from 'bun:test' import { + FREEBUFF_GEMINI_PRO_MODEL_ID, + FREEBUFF_MODELS, getFreebuffDeploymentAvailabilityLabel, isFreebuffDeploymentHours, + isFreebuffModelAvailable, } from '../constants/freebuff-models' describe('freebuff model availability', () => { + test('includes Gemini 3.1 Pro as an always-available option', () => { + expect(FREEBUFF_MODELS.map((model) => model.id)).toContain( + FREEBUFF_GEMINI_PRO_MODEL_ID, + ) + expect( + isFreebuffModelAvailable( + FREEBUFF_GEMINI_PRO_MODEL_ID, + new Date('2026-01-05T18:00:00Z'), + ), + ).toBe(true) + expect( + isFreebuffModelAvailable( + FREEBUFF_GEMINI_PRO_MODEL_ID, + new Date('2026-01-05T12:00:00Z'), + ), + ).toBe(true) + }) + test('formats the close time in the user local timezone while deployment is open', () => { expect( getFreebuffDeploymentAvailabilityLabel(new Date('2026-01-05T18:00:00Z'), { diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 308e12df6..5f020cf8e 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -1,5 +1,7 @@ import { parseAgentId } from '../util/agent-id-parsing' +import { FREEBUFF_MODELS } from './freebuff-models' + import type { CostMode } from './model-config' /** @@ -15,6 +17,10 @@ export const FREE_COST_MODE = 'free' as const * every user's apparent activity. */ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const +const FREEBUFF_ROOT_AGENT_ID_SET: ReadonlySet = new Set( + FREEBUFF_ROOT_AGENT_IDS, +) +const FREEBUFF_SELECTABLE_MODEL_IDS = FREEBUFF_MODELS.map((model) => model.id) /** * Agents that are allowed to run in FREE mode. @@ -26,10 +32,7 @@ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const */ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator - 'base2-free': new Set([ - 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', - ]), + 'base2-free': new Set(FREEBUFF_SELECTABLE_MODEL_IDS), // File exploration agents 'file-picker': new Set(['google/gemini-2.5-flash-lite']), @@ -44,16 +47,10 @@ export const FREE_MODE_AGENT_MODELS: Record> = { 'basher': new Set(['google/gemini-3.1-flash-lite-preview']), // Editor for free mode - 'editor-lite': new Set([ - 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', - ]), + 'editor-lite': new Set(FREEBUFF_SELECTABLE_MODEL_IDS), // Code reviewer for free mode - 'code-reviewer-lite': new Set([ - 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', - ]), + 'code-reviewer-lite': new Set(FREEBUFF_SELECTABLE_MODEL_IDS), } /** @@ -87,6 +84,13 @@ export function isFreeMode(costMode: CostMode | string | undefined): boolean { return costMode === FREE_COST_MODE } +export function isFreebuffRootAgent(fullAgentId: string): boolean { + const { publisherId, agentId } = parseAgentId(fullAgentId) + if (!agentId) return false + if (publisherId && publisherId !== 'codebuff') return false + return FREEBUFF_ROOT_AGENT_ID_SET.has(agentId) +} + /** * Check if a specific agent is allowed to use a specific model in FREE mode. * This is the strictest check - validates both the agent AND model combination. diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 8b3e9d82d..2394a03e4 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -21,6 +21,7 @@ export interface FreebuffModelOption { * the caller's local timezone. The CLI should render * `getFreebuffDeploymentAvailabilityLabel()` instead. */ export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day' +export const FREEBUFF_GEMINI_PRO_MODEL_ID = 'google/gemini-3.1-pro-preview' export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1' export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7' const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York' @@ -40,6 +41,12 @@ interface LocalTimeFormatOptions { } export const FREEBUFF_MODELS = [ + { + id: FREEBUFF_GEMINI_PRO_MODEL_ID, + displayName: 'Gemini 3.1 Pro', + tagline: 'Deepest, 1/day', + availability: 'always', + }, { id: FREEBUFF_MINIMAX_MODEL_ID, displayName: 'MiniMax M2.7', diff --git a/common/src/types/contracts/database.ts b/common/src/types/contracts/database.ts index 88685c720..bcb29b74a 100644 --- a/common/src/types/contracts/database.ts +++ b/common/src/types/contracts/database.ts @@ -35,6 +35,7 @@ export type GetUserInfoFromApiKeyFn = ( type AgentRun = { agent_id: string + ancestor_run_ids: string[] status: 'running' | 'completed' | 'failed' | 'cancelled' } export type AgentRunColumn = keyof AgentRun diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts index 31fc4c87e..f638bb942 100644 --- a/common/src/types/freebuff-session.ts +++ b/common/src/types/freebuff-session.ts @@ -9,10 +9,9 @@ /** * Per-model usage counter surfaced to the CLI so the waiting-room UI can * render "N of M sessions used" alongside queue/active state. Present when - * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits - * per 12-hour window). `recentCount` is the number of admissions inside - * `windowHours` at the time the response was produced — see also the - * standalone `rate_limited` status for the reject path. + * the joined model has a rate limit applied. `recentCount` is the number of + * admissions inside `windowHours` at the time the response was produced — + * see also the standalone `rate_limited` status for the reject path. */ export interface FreebuffSessionRateLimit { model: string @@ -72,7 +71,7 @@ export type FreebuffSessionServerResponse = queueDepthByModel: Record estimatedWaitMs: number queuedAt: string - /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent + /** Rate-limit quota for rate-limited models. Absent * for unlimited models or when the status was produced outside the * rate-limit check path (e.g. pure read via GET). */ rateLimit?: FreebuffSessionRateLimit @@ -85,7 +84,7 @@ export type FreebuffSessionServerResponse = admittedAt: string expiresAt: string remainingMs: number - /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent + /** Rate-limit quota for rate-limited models. Absent * for unlimited models or when the status was produced outside the * rate-limit check path (e.g. pure read via GET). */ rateLimit?: FreebuffSessionRateLimit @@ -152,7 +151,7 @@ export type FreebuffSessionServerResponse = } | { /** User has used up their per-model admission quota in the rolling - * window (GLM 5.1: 5 one-hour sessions per 12h). Returned from POST + * window. Returned from POST * /session before the user is placed in the queue. `retryAfterMs` is * the time until the oldest admission inside the window falls off * and one quota slot opens up — clients should show the user when diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index e0b531c70..a4b46278e 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -1,8 +1,23 @@ -import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test' +import { + afterEach, + beforeEach, + describe, + expect, + mock, + it, + spyOn, +} from 'bun:test' import { NextRequest } from 'next/server' -import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models' +import { + FREEBUFF_GEMINI_PRO_MODEL_ID, + isFreebuffDeploymentHours, +} from '@codebuff/common/constants/freebuff-models' import { formatQuotaResetCountdown, postChatCompletions } from '../_post' +import { + resetFreeModeRateLimits, + FREE_MODE_RATE_LIMITS, +} from '../free-mode-rate-limiter' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' @@ -36,6 +51,10 @@ describe('/api/v1/chat/completions POST endpoint', () => { id: 'user-new-free', banned: false, }, + 'test-api-key-new-free-gemini': { + id: 'user-new-free-gemini', + banned: false, + }, } const mockGetUserInfoFromApiKey: GetUserInfoFromApiKeyFn = async ({ @@ -73,6 +92,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { }) beforeEach(() => { + resetFreeModeRateLimits() nextQuotaReset = new Date( Date.now() + 3 * 24 * 60 * 60 * 1000 + 5 * 60 * 1000, ).toISOString() @@ -119,6 +139,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { if (runId === 'run-123') { return { agent_id: 'agent-123', + ancestor_run_ids: [], status: 'running', } } @@ -126,12 +147,28 @@ describe('/api/v1/chat/completions POST endpoint', () => { return { // Real free-mode allowlisted agent (see FREE_MODE_AGENT_MODELS). agent_id: 'base2-free', + ancestor_run_ids: [], + status: 'running', + } + } + if (runId === 'run-reviewer-direct') { + return { + agent_id: 'code-reviewer-lite', + ancestor_run_ids: [], + status: 'running', + } + } + if (runId === 'run-reviewer-child') { + return { + agent_id: 'code-reviewer-lite', + ancestor_run_ids: ['run-free'], status: 'running', } } if (runId === 'run-completed') { return { agent_id: 'agent-123', + ancestor_run_ids: [], status: 'completed', } } @@ -700,9 +737,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { if (isFreebuffDeploymentHours()) { expect(response.status).toBe(200) expect(fetchedBodies).toHaveLength(1) - expect(fetchedBodies[0].model).toBe( - 'accounts/fireworks/models/glm-5p1', - ) + expect(fetchedBodies[0].model).toBe('accounts/fireworks/models/glm-5p1') expect(body.model).toBe('z-ai/glm-5.1') expect(body.provider).toBe('Fireworks') } else { @@ -712,6 +747,126 @@ describe('/api/v1/chat/completions POST endpoint', () => { } }) + it('lets freebuff use Gemini 3.1 Pro through the free-mode allowlist', async () => { + const req = new NextRequest( + 'http://localhost:3000/api/v1/chat/completions', + { + method: 'POST', + headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'), + body: JSON.stringify({ + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + stream: false, + codebuff_metadata: { + run_id: 'run-free', + client_id: 'test-client-id-123', + cost_mode: 'free', + }, + }), + }, + ) + + const response = await postChatCompletions({ + req, + getUserInfoFromApiKey: mockGetUserInfoFromApiKey, + logger: mockLogger, + trackEvent: mockTrackEvent, + getUserUsageData: mockGetUserUsageData, + getAgentRunFromId: mockGetAgentRunFromId, + fetch: mockFetch, + insertMessageBigquery: mockInsertMessageBigquery, + loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, + }) + + expect(response.status).toBe(200) + }) + + it('rejects standalone free-mode reviewer runs even when the model is allowlisted', async () => { + const req = new NextRequest( + 'http://localhost:3000/api/v1/chat/completions', + { + method: 'POST', + headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'), + body: JSON.stringify({ + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + stream: false, + codebuff_metadata: { + run_id: 'run-reviewer-direct', + client_id: 'test-client-id-123', + cost_mode: 'free', + }, + }), + }, + ) + + const response = await postChatCompletions({ + req, + getUserInfoFromApiKey: mockGetUserInfoFromApiKey, + logger: mockLogger, + trackEvent: mockTrackEvent, + getUserUsageData: mockGetUserUsageData, + getAgentRunFromId: mockGetAgentRunFromId, + fetch: mockFetch, + insertMessageBigquery: mockInsertMessageBigquery, + loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, + }) + + expect(response.status).toBe(403) + const body = await response.json() + expect(body.error).toBe('free_mode_invalid_agent_hierarchy') + }) + + it('counts child reviewer Gemini requests toward the free-mode request limit', async () => { + const nowSpy = spyOn(Date, 'now').mockImplementation( + () => 1_000_000_000_000, + ) + try { + const postFreeRequest = (runId: string) => + postChatCompletions({ + req: new NextRequest( + 'http://localhost:3000/api/v1/chat/completions', + { + method: 'POST', + headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'), + body: JSON.stringify({ + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + stream: false, + codebuff_metadata: { + run_id: runId, + client_id: 'test-client-id-123', + cost_mode: 'free', + }, + }), + }, + ), + getUserInfoFromApiKey: mockGetUserInfoFromApiKey, + logger: mockLogger, + trackEvent: mockTrackEvent, + getUserUsageData: mockGetUserUsageData, + getAgentRunFromId: mockGetAgentRunFromId, + fetch: mockFetch, + insertMessageBigquery: mockInsertMessageBigquery, + loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, + }) + + for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) { + const response = await postFreeRequest( + i === 0 ? 'run-reviewer-child' : 'run-free', + ) + expect(response.status).toBe(200) + } + + const limited = await postFreeRequest('run-free') + expect(limited.status).toBe(429) + const body = await limited.json() + expect(body.error).toBe('free_mode_rate_limited') + } finally { + nowSpy.mockRestore() + } + }) + it('skips credit check when in FREE mode even with 0 credits', async () => { const req = new NextRequest( 'http://localhost:3000/api/v1/chat/completions', diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index 5f9c2b7e6..0a7771d46 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -1,6 +1,7 @@ import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events' import { BYOK_OPENROUTER_HEADER } from '@codebuff/common/constants/byok' import { + isFreebuffRootAgent, isFreeMode, isFreeModeAllowedAgentModel, } from '@codebuff/common/constants/free-agents' @@ -323,7 +324,7 @@ export async function postChatCompletions(params: { const agentRun = await getAgentRunFromId({ runId: runIdFromBody, userId, - fields: ['agent_id', 'status'], + fields: ['agent_id', 'ancestor_run_ids', 'status'], }) if (!agentRun) { trackEvent({ @@ -341,7 +342,11 @@ export async function postChatCompletions(params: { ) } - const { agent_id: agentId, status: agentRunStatus } = agentRun + const { + agent_id: agentId, + ancestor_run_ids: ancestorRunIds, + status: agentRunStatus, + } = agentRun if (agentRunStatus !== 'running') { trackEvent({ @@ -392,6 +397,42 @@ export async function postChatCompletions(params: { ) } + if (isFreeModeRequest && !isFreebuffRootAgent(agentId)) { + const rootRunId = ancestorRunIds[0] + const rootRun = rootRunId + ? await getAgentRunFromId({ + runId: rootRunId, + userId, + fields: ['agent_id', 'status'], + }) + : null + if ( + !rootRun || + rootRun.status !== 'running' || + !isFreebuffRootAgent(rootRun.agent_id) + ) { + trackEvent({ + event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR, + userId, + properties: { + error: 'free_mode_invalid_agent_hierarchy', + agentId, + runId: runIdFromBody, + rootRunId, + }, + logger, + }) + return NextResponse.json( + { + error: 'free_mode_invalid_agent_hierarchy', + message: + 'Free mode subagents must run under an active freebuff session root.', + }, + { status: 403 }, + ) + } + } + // Freebuff waiting-room gate. Only enforced for free-mode requests, and // only when FREEBUFF_WAITING_ROOM_ENABLED=true — otherwise this is a // no-op that returns { ok: true, reason: 'disabled' } without a DB hit. diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index f46a0f8c4..fbe2fde43 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -1,5 +1,7 @@ import { beforeEach, describe, expect, test } from 'bun:test' +import { FREEBUFF_GEMINI_PRO_MODEL_ID } from '@codebuff/common/constants/freebuff-models' + import { checkSessionAdmissible, endUserSession, @@ -332,6 +334,56 @@ describe('requestSession', () => { const GLM_LIMIT = 5 const GLM_WINDOW_HOURS = 12 const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z') + const GEMINI_LIMIT = 1 + const GEMINI_WINDOW_HOURS = 24 + + test('rate_limited: Gemini 3.1 Pro allows one admit per 24h', async () => { + deps._tick(GLM_OPEN_TIME) + const now = deps._now() + deps.admits.push({ + user_id: 'u1', + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + admitted_at: new Date(now.getTime() - 23 * 60 * 60 * 1000), + }) + + const state = await requestSession({ + userId: 'u1', + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + deps, + }) + expect(state.status).toBe('rate_limited') + if (state.status !== 'rate_limited') throw new Error('unreachable') + expect(state.model).toBe(FREEBUFF_GEMINI_PRO_MODEL_ID) + expect(state.limit).toBe(GEMINI_LIMIT) + expect(state.windowHours).toBe(GEMINI_WINDOW_HOURS) + expect(state.recentCount).toBe(GEMINI_LIMIT) + expect(state.retryAfterMs).toBe(60 * 60 * 1000) + expect(deps.rows.has('u1')).toBe(false) + }) + + test('rate_limited: Gemini 3.1 Pro admit outside 24h window does not count', async () => { + deps._tick(GLM_OPEN_TIME) + const now = deps._now() + deps.admits.push({ + user_id: 'u1', + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + admitted_at: new Date(now.getTime() - 25 * 60 * 60 * 1000), + }) + + const state = await requestSession({ + userId: 'u1', + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + deps, + }) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.rateLimit).toEqual({ + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + limit: GEMINI_LIMIT, + windowHours: GEMINI_WINDOW_HOURS, + recentCount: 0, + }) + }) test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => { deps._tick(GLM_OPEN_TIME) diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index 10071b35f..5c1a6945a 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -1,3 +1,8 @@ +import { + FREEBUFF_GEMINI_PRO_MODEL_ID, + FREEBUFF_GLM_MODEL_ID, + FREEBUFF_MINIMAX_MODEL_ID, +} from '@codebuff/common/constants/freebuff-models' import { env } from '@codebuff/internal/env' /** @@ -48,8 +53,9 @@ export function getSessionGraceMs(): number { * queue). */ const INSTANT_ADMIT_CAPACITY: Record = { - 'z-ai/glm-5.1': 50, - 'minimax/minimax-m2.7': 1000, + [FREEBUFF_GEMINI_PRO_MODEL_ID]: 50, + [FREEBUFF_GLM_MODEL_ID]: 50, + [FREEBUFF_MINIMAX_MODEL_ID]: 1000, } export function getInstantAdmitCapacity(id: string): number { diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 528cd4ab3..ba01567fc 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -1,5 +1,7 @@ import { FREEBUFF_DEPLOYMENT_HOURS_LABEL, + FREEBUFF_GEMINI_PRO_MODEL_ID, + FREEBUFF_GLM_MODEL_ID, isFreebuffModelAvailable, isFreebuffModelId as isSelectableFreebuffModel, resolveFreebuffModel, @@ -37,15 +39,16 @@ import type { /** * Per-model admission rate limits. Keyed by freebuff model id; a model not - * in the map has no rate limit applied. Today only GLM 5.1 is limited - * (Minimax is cheap enough to leave unlimited). + * in the map has no rate limit applied. Minimax is cheap enough to leave + * unlimited. * * Hard-coded rather than env-driven: the values need to be observable in the * code review, and the CLI already renders the numbers via `rateLimit` on * queued/active responses — changing them is a deliberate, typed edit. */ const RATE_LIMITS: Record = { - 'z-ai/glm-5.1': { limit: 5, windowHours: 12 }, + [FREEBUFF_GEMINI_PRO_MODEL_ID]: { limit: 1, windowHours: 24 }, + [FREEBUFF_GLM_MODEL_ID]: { limit: 5, windowHours: 12 }, } /** Fetch the caller's current quota snapshot for `model`, or undefined if the From f5029df4c16feefd40d42d1e5680e379240f2039 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Wed, 29 Apr 2026 17:29:56 -0700 Subject: [PATCH 2/2] Fix free mode rate limit test timeout --- .../completions/__tests__/completions.test.ts | 85 +++++++------------ 1 file changed, 29 insertions(+), 56 deletions(-) diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index a4b46278e..8822f94dc 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -1,12 +1,4 @@ -import { - afterEach, - beforeEach, - describe, - expect, - mock, - it, - spyOn, -} from 'bun:test' +import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test' import { NextRequest } from 'next/server' import { @@ -15,8 +7,8 @@ import { } from '@codebuff/common/constants/freebuff-models' import { formatQuotaResetCountdown, postChatCompletions } from '../_post' import { + checkFreeModeRateLimit, resetFreeModeRateLimits, - FREE_MODE_RATE_LIMITS, } from '../free-mode-rate-limiter' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' @@ -818,53 +810,34 @@ describe('/api/v1/chat/completions POST endpoint', () => { }) it('counts child reviewer Gemini requests toward the free-mode request limit', async () => { - const nowSpy = spyOn(Date, 'now').mockImplementation( - () => 1_000_000_000_000, - ) - try { - const postFreeRequest = (runId: string) => - postChatCompletions({ - req: new NextRequest( - 'http://localhost:3000/api/v1/chat/completions', - { - method: 'POST', - headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'), - body: JSON.stringify({ - model: FREEBUFF_GEMINI_PRO_MODEL_ID, - stream: false, - codebuff_metadata: { - run_id: runId, - client_id: 'test-client-id-123', - cost_mode: 'free', - }, - }), - }, - ), - getUserInfoFromApiKey: mockGetUserInfoFromApiKey, - logger: mockLogger, - trackEvent: mockTrackEvent, - getUserUsageData: mockGetUserUsageData, - getAgentRunFromId: mockGetAgentRunFromId, - fetch: mockFetch, - insertMessageBigquery: mockInsertMessageBigquery, - loggerWithContext: mockLoggerWithContext, - checkSessionAdmissible: mockCheckSessionAdmissibleAllow, - }) - - for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) { - const response = await postFreeRequest( - i === 0 ? 'run-reviewer-child' : 'run-free', - ) - expect(response.status).toBe(200) - } + const response = await postChatCompletions({ + req: new NextRequest('http://localhost:3000/api/v1/chat/completions', { + method: 'POST', + headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'), + body: JSON.stringify({ + model: FREEBUFF_GEMINI_PRO_MODEL_ID, + stream: false, + codebuff_metadata: { + run_id: 'run-reviewer-child', + client_id: 'test-client-id-123', + cost_mode: 'free', + }, + }), + }), + getUserInfoFromApiKey: mockGetUserInfoFromApiKey, + logger: mockLogger, + trackEvent: mockTrackEvent, + getUserUsageData: mockGetUserUsageData, + getAgentRunFromId: mockGetAgentRunFromId, + fetch: mockFetch, + insertMessageBigquery: mockInsertMessageBigquery, + loggerWithContext: mockLoggerWithContext, + checkSessionAdmissible: mockCheckSessionAdmissibleAllow, + }) - const limited = await postFreeRequest('run-free') - expect(limited.status).toBe(429) - const body = await limited.json() - expect(body.error).toBe('free_mode_rate_limited') - } finally { - nowSpy.mockRestore() - } + expect(response.status).toBe(200) + expect(checkFreeModeRateLimit('user-new-free-gemini').limited).toBe(false) + expect(checkFreeModeRateLimit('user-new-free-gemini').limited).toBe(true) }) it('skips credit check when in FREE mode even with 0 credits', async () => {