From 0faee3d3cc6b5a607b852048b0642cf0cf6aeb01 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Wed, 29 Apr 2026 16:06:19 -0700
Subject: [PATCH 1/2] Add Gemini Pro freebuff model

---
 agents/base2/base2-gemini-no-editor-evals.ts  |  13 ++
 agents/base2/base2.ts                         |  22 ++-
 .../components/freebuff-model-selector.tsx    |  10 +-
 common/src/__tests__/freebuff-models.test.ts  |  21 +++
 common/src/constants/free-agents.ts           |  28 +--
 common/src/constants/freebuff-models.ts       |   7 +
 common/src/types/contracts/database.ts        |   1 +
 common/src/types/freebuff-session.ts          |  13 +-
 .../completions/__tests__/completions.test.ts | 165 +++++++++++++++++-
 web/src/app/api/v1/chat/completions/_post.ts  |  45 ++++-
 .../free-session/__tests__/public-api.test.ts |  52 ++++++
 web/src/server/free-session/config.ts         |  10 +-
 web/src/server/free-session/public-api.ts     |   9 +-
 13 files changed, 356 insertions(+), 40 deletions(-)
 create mode 100644 agents/base2/base2-gemini-no-editor-evals.ts

diff --git a/agents/base2/base2-gemini-no-editor-evals.ts b/agents/base2/base2-gemini-no-editor-evals.ts
new file mode 100644
index 000000000..e092edb51
--- /dev/null
+++ b/agents/base2/base2-gemini-no-editor-evals.ts
@@ -0,0 +1,13 @@
+import { createBase2 } from './base2'
+
+const definition = {
+  ...createBase2('free', {
+    noAskUser: true,
+    model: 'google/gemini-3.1-pro-preview',
+    providerOptions: {},
+  }),
+  id: 'base2-gemini-no-editor-evals',
+  displayName: 'Buffy the Gemini Evals Orchestrator',
+}
+
+export default definition
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 1a81f948b..bacc90b48 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -12,12 +12,16 @@ export function createBase2(
     hasNoValidation?: boolean
     planOnly?: boolean
     noAskUser?: boolean
+    model?: SecretAgentDefinition['model']
+    providerOptions?: SecretAgentDefinition['providerOptions']
   },
 ): Omit<SecretAgentDefinition, 'id'> {
   const {
     hasNoValidation = mode === 'fast',
     planOnly = false,
     noAskUser = false,
+    model: modelOverride,
+    providerOptions,
   } = options ?? {}
   const isDefault = mode === 'default'
   const isFast = mode === 'fast'
@@ -25,16 +29,20 @@ export function createBase2(
   const isFree = mode === 'free' || mode === 'lite'
 
   const isSonnet = false
-  const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7'
+  const model =
+    modelOverride ?? (isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7')
+  const defaultProviderOptions = isFree
+    ? {
+        data_collection: 'deny' as const,
+      }
+    : {
+        only: ['amazon-bedrock'],
+      }
 
   return {
     publisher,
     model,
-    providerOptions: isFree ? {
-      data_collection: 'deny',
-    } : {
-      only: ['amazon-bedrock'],
-    },
+    providerOptions: providerOptions ?? defaultProviderOptions,
     displayName: 'Buffy the Orchestrator',
     spawnerPrompt:
       'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks',
@@ -150,8 +158,6 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u
         isMax &&
         `- IMPORTANT: You must spawn the editor-multi-prompt agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`,
         isFree &&
-        '- Implement code changes using the str_replace or write_file tools directly.',
-        isFree &&
         '- Spawn a code-reviewer-lite to review the changes after you have implemented the changes.',
         '- Spawn bashers sequentially if the second command depends on the the first.',
         isDefault &&
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index a453a1538..f553ce398 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -5,6 +5,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'
 import { Button } from './button'
 import {
   FALLBACK_FREEBUFF_MODEL_ID,
+  FREEBUFF_GEMINI_PRO_MODEL_ID,
   FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_MODELS,
   getFreebuffDeploymentAvailabilityLabel,
@@ -25,8 +26,15 @@ import {
 import type { KeyEvent } from '@opentui/core'
 
 const FREEBUFF_MODEL_SELECTOR_MODELS = [
+  ...FREEBUFF_MODELS.filter(
+    (model) => model.id === FREEBUFF_GEMINI_PRO_MODEL_ID,
+  ),
   ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
-  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
+  ...FREEBUFF_MODELS.filter(
+    (model) =>
+      model.id !== FREEBUFF_GEMINI_PRO_MODEL_ID &&
+      model.id !== FREEBUFF_GLM_MODEL_ID,
+  ),
 ]
 
 /**
diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts
index 0d01d2762..664c4c3ef 100644
--- a/common/src/__tests__/freebuff-models.test.ts
+++ b/common/src/__tests__/freebuff-models.test.ts
@@ -1,11 +1,32 @@
 import { describe, expect, test } from 'bun:test'
 
 import {
+  FREEBUFF_GEMINI_PRO_MODEL_ID,
+  FREEBUFF_MODELS,
   getFreebuffDeploymentAvailabilityLabel,
   isFreebuffDeploymentHours,
+  isFreebuffModelAvailable,
 } from '../constants/freebuff-models'
 
 describe('freebuff model availability', () => {
+  test('includes Gemini 3.1 Pro as an always-available option', () => {
+    expect(FREEBUFF_MODELS.map((model) => model.id)).toContain(
+      FREEBUFF_GEMINI_PRO_MODEL_ID,
+    )
+    expect(
+      isFreebuffModelAvailable(
+        FREEBUFF_GEMINI_PRO_MODEL_ID,
+        new Date('2026-01-05T18:00:00Z'),
+      ),
+    ).toBe(true)
+    expect(
+      isFreebuffModelAvailable(
+        FREEBUFF_GEMINI_PRO_MODEL_ID,
+        new Date('2026-01-05T12:00:00Z'),
+      ),
+    ).toBe(true)
+  })
+
   test('formats the close time in the user local timezone while deployment is open', () => {
     expect(
       getFreebuffDeploymentAvailabilityLabel(new Date('2026-01-05T18:00:00Z'), {
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index 308e12df6..5f020cf8e 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -1,5 +1,7 @@
 import { parseAgentId } from '../util/agent-id-parsing'
 
+import { FREEBUFF_MODELS } from './freebuff-models'
+
 import type { CostMode } from './model-config'
 
 /**
@@ -15,6 +17,10 @@ export const FREE_COST_MODE = 'free' as const
  * every user's apparent activity.
  */
 export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const
+const FREEBUFF_ROOT_AGENT_ID_SET: ReadonlySet<string> = new Set(
+  FREEBUFF_ROOT_AGENT_IDS,
+)
+const FREEBUFF_SELECTABLE_MODEL_IDS = FREEBUFF_MODELS.map((model) => model.id)
 
 /**
  * Agents that are allowed to run in FREE mode.
@@ -26,10 +32,7 @@ export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const
  */
 export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
-  'base2-free': new Set([
-    'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
-  ]),
+  'base2-free': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
 
   // File exploration agents
   'file-picker': new Set(['google/gemini-2.5-flash-lite']),
@@ -44,16 +47,10 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   'basher': new Set(['google/gemini-3.1-flash-lite-preview']),
 
   // Editor for free mode
-  'editor-lite': new Set([
-    'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
-  ]),
+  'editor-lite': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
 
   // Code reviewer for free mode
-  'code-reviewer-lite': new Set([
-    'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
-  ]),
+  'code-reviewer-lite': new Set(FREEBUFF_SELECTABLE_MODEL_IDS),
 }
 
 /**
@@ -87,6 +84,13 @@ export function isFreeMode(costMode: CostMode | string | undefined): boolean {
   return costMode === FREE_COST_MODE
 }
 
+export function isFreebuffRootAgent(fullAgentId: string): boolean {
+  const { publisherId, agentId } = parseAgentId(fullAgentId)
+  if (!agentId) return false
+  if (publisherId && publisherId !== 'codebuff') return false
+  return FREEBUFF_ROOT_AGENT_ID_SET.has(agentId)
+}
+
 /**
  * Check if a specific agent is allowed to use a specific model in FREE mode.
  * This is the strictest check - validates both the agent AND model combination.
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index 8b3e9d82d..2394a03e4 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -21,6 +21,7 @@ export interface FreebuffModelOption {
  *  the caller's local timezone. The CLI should render
  *  `getFreebuffDeploymentAvailabilityLabel()` instead. */
 export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day'
+export const FREEBUFF_GEMINI_PRO_MODEL_ID = 'google/gemini-3.1-pro-preview'
 export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
 export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
 const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York'
@@ -40,6 +41,12 @@ interface LocalTimeFormatOptions {
 }
 
 export const FREEBUFF_MODELS = [
+  {
+    id: FREEBUFF_GEMINI_PRO_MODEL_ID,
+    displayName: 'Gemini 3.1 Pro',
+    tagline: 'Deepest, 1/day',
+    availability: 'always',
+  },
   {
     id: FREEBUFF_MINIMAX_MODEL_ID,
     displayName: 'MiniMax M2.7',
diff --git a/common/src/types/contracts/database.ts b/common/src/types/contracts/database.ts
index 88685c720..bcb29b74a 100644
--- a/common/src/types/contracts/database.ts
+++ b/common/src/types/contracts/database.ts
@@ -35,6 +35,7 @@ export type GetUserInfoFromApiKeyFn = <T extends UserColumn>(
 
 type AgentRun = {
   agent_id: string
+  ancestor_run_ids: string[]
   status: 'running' | 'completed' | 'failed' | 'cancelled'
 }
 export type AgentRunColumn = keyof AgentRun
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
index 31fc4c87e..f638bb942 100644
--- a/common/src/types/freebuff-session.ts
+++ b/common/src/types/freebuff-session.ts
@@ -9,10 +9,9 @@
 /**
  * Per-model usage counter surfaced to the CLI so the waiting-room UI can
  * render "N of M sessions used" alongside queue/active state. Present when
- * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
- * per 12-hour window). `recentCount` is the number of admissions inside
- * `windowHours` at the time the response was produced — see also the
- * standalone `rate_limited` status for the reject path.
+ * the joined model has a rate limit applied. `recentCount` is the number of
+ * admissions inside `windowHours` at the time the response was produced —
+ * see also the standalone `rate_limited` status for the reject path.
  */
 export interface FreebuffSessionRateLimit {
   model: string
@@ -72,7 +71,7 @@ export type FreebuffSessionServerResponse =
       queueDepthByModel: Record<string, number>
       estimatedWaitMs: number
       queuedAt: string
-      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+      /** Rate-limit quota for rate-limited models. Absent
        *  for unlimited models or when the status was produced outside the
        *  rate-limit check path (e.g. pure read via GET). */
       rateLimit?: FreebuffSessionRateLimit
@@ -85,7 +84,7 @@ export type FreebuffSessionServerResponse =
       admittedAt: string
       expiresAt: string
       remainingMs: number
-      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+      /** Rate-limit quota for rate-limited models. Absent
        *  for unlimited models or when the status was produced outside the
        *  rate-limit check path (e.g. pure read via GET). */
       rateLimit?: FreebuffSessionRateLimit
@@ -152,7 +151,7 @@ export type FreebuffSessionServerResponse =
     }
   | {
       /** User has used up their per-model admission quota in the rolling
-       *  window (GLM 5.1: 5 one-hour sessions per 12h). Returned from POST
+       *  window. Returned from POST
        *  /session before the user is placed in the queue. `retryAfterMs` is
        *  the time until the oldest admission inside the window falls off
        *  and one quota slot opens up — clients should show the user when
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index e0b531c70..a4b46278e 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,8 +1,23 @@
-import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
+import {
+  afterEach,
+  beforeEach,
+  describe,
+  expect,
+  mock,
+  it,
+  spyOn,
+} from 'bun:test'
 import { NextRequest } from 'next/server'
 
-import { isFreebuffDeploymentHours } from '@codebuff/common/constants/freebuff-models'
+import {
+  FREEBUFF_GEMINI_PRO_MODEL_ID,
+  isFreebuffDeploymentHours,
+} from '@codebuff/common/constants/freebuff-models'
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
+import {
+  resetFreeModeRateLimits,
+  FREE_MODE_RATE_LIMITS,
+} from '../free-mode-rate-limiter'
 
 import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
 import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery'
@@ -36,6 +51,10 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       id: 'user-new-free',
       banned: false,
     },
+    'test-api-key-new-free-gemini': {
+      id: 'user-new-free-gemini',
+      banned: false,
+    },
   }
 
   const mockGetUserInfoFromApiKey: GetUserInfoFromApiKeyFn = async ({
@@ -73,6 +92,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
   })
 
   beforeEach(() => {
+    resetFreeModeRateLimits()
     nextQuotaReset = new Date(
       Date.now() + 3 * 24 * 60 * 60 * 1000 + 5 * 60 * 1000,
     ).toISOString()
@@ -119,6 +139,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       if (runId === 'run-123') {
         return {
           agent_id: 'agent-123',
+          ancestor_run_ids: [],
           status: 'running',
         }
       }
@@ -126,12 +147,28 @@ describe('/api/v1/chat/completions POST endpoint', () => {
         return {
           // Real free-mode allowlisted agent (see FREE_MODE_AGENT_MODELS).
           agent_id: 'base2-free',
+          ancestor_run_ids: [],
+          status: 'running',
+        }
+      }
+      if (runId === 'run-reviewer-direct') {
+        return {
+          agent_id: 'code-reviewer-lite',
+          ancestor_run_ids: [],
+          status: 'running',
+        }
+      }
+      if (runId === 'run-reviewer-child') {
+        return {
+          agent_id: 'code-reviewer-lite',
+          ancestor_run_ids: ['run-free'],
           status: 'running',
         }
       }
       if (runId === 'run-completed') {
         return {
           agent_id: 'agent-123',
+          ancestor_run_ids: [],
           status: 'completed',
         }
       }
@@ -700,9 +737,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       if (isFreebuffDeploymentHours()) {
         expect(response.status).toBe(200)
         expect(fetchedBodies).toHaveLength(1)
-        expect(fetchedBodies[0].model).toBe(
-          'accounts/fireworks/models/glm-5p1',
-        )
+        expect(fetchedBodies[0].model).toBe('accounts/fireworks/models/glm-5p1')
         expect(body.model).toBe('z-ai/glm-5.1')
         expect(body.provider).toBe('Fireworks')
       } else {
@@ -712,6 +747,126 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       }
     })
 
+    it('lets freebuff use Gemini 3.1 Pro through the free-mode allowlist', async () => {
+      const req = new NextRequest(
+        'http://localhost:3000/api/v1/chat/completions',
+        {
+          method: 'POST',
+          headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'),
+          body: JSON.stringify({
+            model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+            stream: false,
+            codebuff_metadata: {
+              run_id: 'run-free',
+              client_id: 'test-client-id-123',
+              cost_mode: 'free',
+            },
+          }),
+        },
+      )
+
+      const response = await postChatCompletions({
+        req,
+        getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+        logger: mockLogger,
+        trackEvent: mockTrackEvent,
+        getUserUsageData: mockGetUserUsageData,
+        getAgentRunFromId: mockGetAgentRunFromId,
+        fetch: mockFetch,
+        insertMessageBigquery: mockInsertMessageBigquery,
+        loggerWithContext: mockLoggerWithContext,
+        checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+      })
+
+      expect(response.status).toBe(200)
+    })
+
+    it('rejects standalone free-mode reviewer runs even when the model is allowlisted', async () => {
+      const req = new NextRequest(
+        'http://localhost:3000/api/v1/chat/completions',
+        {
+          method: 'POST',
+          headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'),
+          body: JSON.stringify({
+            model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+            stream: false,
+            codebuff_metadata: {
+              run_id: 'run-reviewer-direct',
+              client_id: 'test-client-id-123',
+              cost_mode: 'free',
+            },
+          }),
+        },
+      )
+
+      const response = await postChatCompletions({
+        req,
+        getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+        logger: mockLogger,
+        trackEvent: mockTrackEvent,
+        getUserUsageData: mockGetUserUsageData,
+        getAgentRunFromId: mockGetAgentRunFromId,
+        fetch: mockFetch,
+        insertMessageBigquery: mockInsertMessageBigquery,
+        loggerWithContext: mockLoggerWithContext,
+        checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+      })
+
+      expect(response.status).toBe(403)
+      const body = await response.json()
+      expect(body.error).toBe('free_mode_invalid_agent_hierarchy')
+    })
+
+    it('counts child reviewer Gemini requests toward the free-mode request limit', async () => {
+      const nowSpy = spyOn(Date, 'now').mockImplementation(
+        () => 1_000_000_000_000,
+      )
+      try {
+        const postFreeRequest = (runId: string) =>
+          postChatCompletions({
+            req: new NextRequest(
+              'http://localhost:3000/api/v1/chat/completions',
+              {
+                method: 'POST',
+                headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'),
+                body: JSON.stringify({
+                  model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+                  stream: false,
+                  codebuff_metadata: {
+                    run_id: runId,
+                    client_id: 'test-client-id-123',
+                    cost_mode: 'free',
+                  },
+                }),
+              },
+            ),
+            getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+            logger: mockLogger,
+            trackEvent: mockTrackEvent,
+            getUserUsageData: mockGetUserUsageData,
+            getAgentRunFromId: mockGetAgentRunFromId,
+            fetch: mockFetch,
+            insertMessageBigquery: mockInsertMessageBigquery,
+            loggerWithContext: mockLoggerWithContext,
+            checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+          })
+
+        for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
+          const response = await postFreeRequest(
+            i === 0 ? 'run-reviewer-child' : 'run-free',
+          )
+          expect(response.status).toBe(200)
+        }
+
+        const limited = await postFreeRequest('run-free')
+        expect(limited.status).toBe(429)
+        const body = await limited.json()
+        expect(body.error).toBe('free_mode_rate_limited')
+      } finally {
+        nowSpy.mockRestore()
+      }
+    })
+
     it('skips credit check when in FREE mode even with 0 credits', async () => {
       const req = new NextRequest(
         'http://localhost:3000/api/v1/chat/completions',
diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts
index 5f9c2b7e6..0a7771d46 100644
--- a/web/src/app/api/v1/chat/completions/_post.ts
+++ b/web/src/app/api/v1/chat/completions/_post.ts
@@ -1,6 +1,7 @@
 import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
 import { BYOK_OPENROUTER_HEADER } from '@codebuff/common/constants/byok'
 import {
+  isFreebuffRootAgent,
   isFreeMode,
   isFreeModeAllowedAgentModel,
 } from '@codebuff/common/constants/free-agents'
@@ -323,7 +324,7 @@ export async function postChatCompletions(params: {
     const agentRun = await getAgentRunFromId({
       runId: runIdFromBody,
       userId,
-      fields: ['agent_id', 'status'],
+      fields: ['agent_id', 'ancestor_run_ids', 'status'],
     })
     if (!agentRun) {
       trackEvent({
@@ -341,7 +342,11 @@ export async function postChatCompletions(params: {
       )
     }
 
-    const { agent_id: agentId, status: agentRunStatus } = agentRun
+    const {
+      agent_id: agentId,
+      ancestor_run_ids: ancestorRunIds,
+      status: agentRunStatus,
+    } = agentRun
 
     if (agentRunStatus !== 'running') {
       trackEvent({
@@ -392,6 +397,42 @@ export async function postChatCompletions(params: {
       )
     }
 
+    if (isFreeModeRequest && !isFreebuffRootAgent(agentId)) {
+      const rootRunId = ancestorRunIds[0]
+      const rootRun = rootRunId
+        ? await getAgentRunFromId({
+            runId: rootRunId,
+            userId,
+            fields: ['agent_id', 'status'],
+          })
+        : null
+      if (
+        !rootRun ||
+        rootRun.status !== 'running' ||
+        !isFreebuffRootAgent(rootRun.agent_id)
+      ) {
+        trackEvent({
+          event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR,
+          userId,
+          properties: {
+            error: 'free_mode_invalid_agent_hierarchy',
+            agentId,
+            runId: runIdFromBody,
+            rootRunId,
+          },
+          logger,
+        })
+        return NextResponse.json(
+          {
+            error: 'free_mode_invalid_agent_hierarchy',
+            message:
+              'Free mode subagents must run under an active freebuff session root.',
+          },
+          { status: 403 },
+        )
+      }
+    }
+
     // Freebuff waiting-room gate. Only enforced for free-mode requests, and
     // only when FREEBUFF_WAITING_ROOM_ENABLED=true — otherwise this is a
     // no-op that returns { ok: true, reason: 'disabled' } without a DB hit.
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index f46a0f8c4..fbe2fde43 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -1,5 +1,7 @@
 import { beforeEach, describe, expect, test } from 'bun:test'
 
+import { FREEBUFF_GEMINI_PRO_MODEL_ID } from '@codebuff/common/constants/freebuff-models'
+
 import {
   checkSessionAdmissible,
   endUserSession,
@@ -332,6 +334,56 @@ describe('requestSession', () => {
   const GLM_LIMIT = 5
   const GLM_WINDOW_HOURS = 12
   const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
+  const GEMINI_LIMIT = 1
+  const GEMINI_WINDOW_HOURS = 24
+
+  test('rate_limited: Gemini 3.1 Pro allows one admit per 24h', async () => {
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    deps.admits.push({
+      user_id: 'u1',
+      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      admitted_at: new Date(now.getTime() - 23 * 60 * 60 * 1000),
+    })
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      deps,
+    })
+    expect(state.status).toBe('rate_limited')
+    if (state.status !== 'rate_limited') throw new Error('unreachable')
+    expect(state.model).toBe(FREEBUFF_GEMINI_PRO_MODEL_ID)
+    expect(state.limit).toBe(GEMINI_LIMIT)
+    expect(state.windowHours).toBe(GEMINI_WINDOW_HOURS)
+    expect(state.recentCount).toBe(GEMINI_LIMIT)
+    expect(state.retryAfterMs).toBe(60 * 60 * 1000)
+    expect(deps.rows.has('u1')).toBe(false)
+  })
+
+  test('rate_limited: Gemini 3.1 Pro admit outside 24h window does not count', async () => {
+    deps._tick(GLM_OPEN_TIME)
+    const now = deps._now()
+    deps.admits.push({
+      user_id: 'u1',
+      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      admitted_at: new Date(now.getTime() - 25 * 60 * 60 * 1000),
+    })
+
+    const state = await requestSession({
+      userId: 'u1',
+      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      deps,
+    })
+    expect(state.status).toBe('queued')
+    if (state.status !== 'queued') throw new Error('unreachable')
+    expect(state.rateLimit).toEqual({
+      model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+      limit: GEMINI_LIMIT,
+      windowHours: GEMINI_WINDOW_HOURS,
+      recentCount: 0,
+    })
+  })
 
   test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => {
     deps._tick(GLM_OPEN_TIME)
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index 10071b35f..5c1a6945a 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -1,3 +1,8 @@
+import {
+  FREEBUFF_GEMINI_PRO_MODEL_ID,
+  FREEBUFF_GLM_MODEL_ID,
+  FREEBUFF_MINIMAX_MODEL_ID,
+} from '@codebuff/common/constants/freebuff-models'
 import { env } from '@codebuff/internal/env'
 
 /**
@@ -48,8 +53,9 @@ export function getSessionGraceMs(): number {
  * queue).
  */
 const INSTANT_ADMIT_CAPACITY: Record<string, number> = {
-  'z-ai/glm-5.1': 50,
-  'minimax/minimax-m2.7': 1000,
+  [FREEBUFF_GEMINI_PRO_MODEL_ID]: 50,
+  [FREEBUFF_GLM_MODEL_ID]: 50,
+  [FREEBUFF_MINIMAX_MODEL_ID]: 1000,
 }
 
 export function getInstantAdmitCapacity(id: string): number {
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 528cd4ab3..ba01567fc 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -1,5 +1,7 @@
 import {
   FREEBUFF_DEPLOYMENT_HOURS_LABEL,
+  FREEBUFF_GEMINI_PRO_MODEL_ID,
+  FREEBUFF_GLM_MODEL_ID,
   isFreebuffModelAvailable,
   isFreebuffModelId as isSelectableFreebuffModel,
   resolveFreebuffModel,
@@ -37,15 +39,16 @@ import type {
 
 /**
  * Per-model admission rate limits. Keyed by freebuff model id; a model not
- * in the map has no rate limit applied. Today only GLM 5.1 is limited
- * (Minimax is cheap enough to leave unlimited).
+ * in the map has no rate limit applied. Minimax is cheap enough to leave
+ * unlimited.
  *
  * Hard-coded rather than env-driven: the values need to be observable in the
  * code review, and the CLI already renders the numbers via `rateLimit` on
  * queued/active responses — changing them is a deliberate, typed edit.
  */
 const RATE_LIMITS: Record<string, { limit: number; windowHours: number }> = {
-  'z-ai/glm-5.1': { limit: 5, windowHours: 12 },
+  [FREEBUFF_GEMINI_PRO_MODEL_ID]: { limit: 1, windowHours: 24 },
+  [FREEBUFF_GLM_MODEL_ID]: { limit: 5, windowHours: 12 },
 }
 
 /** Fetch the caller's current quota snapshot for `model`, or undefined if the

From f5029df4c16feefd40d42d1e5680e379240f2039 Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Wed, 29 Apr 2026 17:29:56 -0700
Subject: [PATCH 2/2] Fix free mode rate limit test timeout

---
 .../completions/__tests__/completions.test.ts | 85 +++++++------------
 1 file changed, 29 insertions(+), 56 deletions(-)

diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index a4b46278e..8822f94dc 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -1,12 +1,4 @@
-import {
-  afterEach,
-  beforeEach,
-  describe,
-  expect,
-  mock,
-  it,
-  spyOn,
-} from 'bun:test'
+import { afterEach, beforeEach, describe, expect, mock, it } from 'bun:test'
 import { NextRequest } from 'next/server'
 
 import {
@@ -15,8 +7,8 @@ import {
 } from '@codebuff/common/constants/freebuff-models'
 import { formatQuotaResetCountdown, postChatCompletions } from '../_post'
 import {
+  checkFreeModeRateLimit,
   resetFreeModeRateLimits,
-  FREE_MODE_RATE_LIMITS,
 } from '../free-mode-rate-limiter'
 
 import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics'
@@ -818,53 +810,34 @@ describe('/api/v1/chat/completions POST endpoint', () => {
     })
 
     it('counts child reviewer Gemini requests toward the free-mode request limit', async () => {
-      const nowSpy = spyOn(Date, 'now').mockImplementation(
-        () => 1_000_000_000_000,
-      )
-      try {
-        const postFreeRequest = (runId: string) =>
-          postChatCompletions({
-            req: new NextRequest(
-              'http://localhost:3000/api/v1/chat/completions',
-              {
-                method: 'POST',
-                headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'),
-                body: JSON.stringify({
-                  model: FREEBUFF_GEMINI_PRO_MODEL_ID,
-                  stream: false,
-                  codebuff_metadata: {
-                    run_id: runId,
-                    client_id: 'test-client-id-123',
-                    cost_mode: 'free',
-                  },
-                }),
-              },
-            ),
-            getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
-            logger: mockLogger,
-            trackEvent: mockTrackEvent,
-            getUserUsageData: mockGetUserUsageData,
-            getAgentRunFromId: mockGetAgentRunFromId,
-            fetch: mockFetch,
-            insertMessageBigquery: mockInsertMessageBigquery,
-            loggerWithContext: mockLoggerWithContext,
-            checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
-          })
-
-        for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_SECOND; i++) {
-          const response = await postFreeRequest(
-            i === 0 ? 'run-reviewer-child' : 'run-free',
-          )
-          expect(response.status).toBe(200)
-        }
+      const response = await postChatCompletions({
+        req: new NextRequest('http://localhost:3000/api/v1/chat/completions', {
+          method: 'POST',
+          headers: allowedFreeModeHeaders('test-api-key-new-free-gemini'),
+          body: JSON.stringify({
+            model: FREEBUFF_GEMINI_PRO_MODEL_ID,
+            stream: false,
+            codebuff_metadata: {
+              run_id: 'run-reviewer-child',
+              client_id: 'test-client-id-123',
+              cost_mode: 'free',
+            },
+          }),
+        }),
+        getUserInfoFromApiKey: mockGetUserInfoFromApiKey,
+        logger: mockLogger,
+        trackEvent: mockTrackEvent,
+        getUserUsageData: mockGetUserUsageData,
+        getAgentRunFromId: mockGetAgentRunFromId,
+        fetch: mockFetch,
+        insertMessageBigquery: mockInsertMessageBigquery,
+        loggerWithContext: mockLoggerWithContext,
+        checkSessionAdmissible: mockCheckSessionAdmissibleAllow,
+      })
 
-        const limited = await postFreeRequest('run-free')
-        expect(limited.status).toBe(429)
-        const body = await limited.json()
-        expect(body.error).toBe('free_mode_rate_limited')
-      } finally {
-        nowSpy.mockRestore()
-      }
+      expect(response.status).toBe(200)
+      expect(checkFreeModeRateLimit('user-new-free-gemini').limited).toBe(false)
+      expect(checkFreeModeRateLimit('user-new-free-gemini').limited).toBe(true)
     })
 
     it('skips credit check when in FREE mode even with 0 credits', async () => {