PostHog · jurajmajerik · May 8, 2026 · May 8, 2026 · greptile-apps · May 8, 2026
diff --git a/apps/code/src/renderer/features/setup/prompts.ts b/apps/code/src/renderer/features/setup/prompts.ts
@@ -4,7 +4,7 @@ After the integration is wired up, also instrument error tracking and session re
 
 Run autonomously with sensible defaults — do not ask the user questions. If the PostHog API key isn't already in the project's env files and you can't read it from the PostHog MCP server, leave a placeholder env var and note it in the PR body rather than blocking.`;
 
-export const DISCOVERY_PROMPT = `You are analyzing this codebase to find the highest-value first tasks for the developer.
+const DISCOVERY_PROMPT_BASE = `You are analyzing this codebase to find the highest-value first tasks for the developer.
 
 Scan the codebase for issues in two tiers. Tier 1 applies to every repo. Tier 2 only applies when PostHog is already installed (look for posthog-js, posthog-node, posthog-react-native or similar PostHog SDK imports).
 
@@ -21,7 +21,29 @@ Scan the codebase for issues in two tiers. Tier 1 applies to every repo. Tier 2
 - **Stale feature flags**: Flags that are always evaluated the same way, flags referenced in code but never toggled, flags guarding code that shipped long ago. Category: stale_feature_flag
 - **Error tracking gaps**: Catch blocks that swallow errors without reporting, missing error boundaries, untracked 5xx responses. Category: error_tracking
 - **Event tracking improvements**: Key user actions (signup, purchase, invite, upgrade) with no analytics event, events missing useful properties (plan, user role, page context). Category: event_tracking
-- **Funnel weak spots**: Multi-step flows (onboarding, checkout, activation) where intermediate steps have no tracking, making drop-off invisible. Category: funnel
+- **Funnel weak spots**: Multi-step flows (onboarding, checkout, activation) where intermediate steps have no tracking, making drop-off invisible. Category: funnel`;
+
+const DISCOVERY_PROMPT_EXPERIMENT_TIER = `
+
+## Tier 3 -- Experiment opportunities (only when PostHog SDK is detected)
+
+- **Experimentable surfaces**: User-facing surfaces where an A/B test would meaningfully inform a product decision — pricing pages, paywalls, primary CTAs, signup/onboarding flows, empty states, recommendation lists, upgrade prompts. Category: experiment
+  - Title: a one-line hypothesis ("Test 'Get started free' vs 'Sign up' on landing CTA")
+  - Description: state the hypothesis as a sentence — what you would change and why you think it would move the metric
+  - Impact: name the primary metric you would measure (e.g. "Sign-up conversion on /landing") and what a winning variant would look like
+  - Recommendation: describe the control and test variants concretely (exact copy, layout change, or behavior), and note any flag wiring required (\`posthog.getFeatureFlag\`)
+  - Only suggest experiments where: (a) the surface is in code you can point at, (b) the variant is implementable without backend changes you can't see, and (c) the metric is something a typical PostHog event would capture
+
+If you find at least one credible Tier 3 experiment opportunity, include at least one experiment-category task in your output — even if doing so displaces a lower-impact Tier 1/2 finding. Do not fabricate an experiment to fill the slot: if no credible candidate exists, omit the category entirely.`;
+
+const BASE_ALLOWED_CATEGORIES =
+  "bug, security, dead_code, duplication, performance, stale_feature_flag, error_tracking, event_tracking, funnel";
+
+function buildDiscoveryRules(includeExperiments: boolean): string {
+  const allowed = includeExperiments
+    ? `${BASE_ALLOWED_CATEGORIES}, experiment`
+    : BASE_ALLOWED_CATEGORIES;
+  return `
 
 ## Rules
 
@@ -33,6 +55,16 @@ Scan the codebase for issues in two tiers. Tier 1 applies to every repo. Tier 2
 - Prioritize by impact. Lead with findings that save the most time or prevent the most damage.
 - Do NOT suggest documentation, comment, or style/formatting changes.
 - Maximum 4 tasks. Quality over quantity.
-- Allowed \`category\` values: bug, security, dead_code, duplication, performance, stale_feature_flag, error_tracking, event_tracking, funnel. Do NOT emit any other category.
+- Allowed \`category\` values: ${allowed}. Do NOT emit any other category.
 
 When you are done analyzing, call create_output with your findings.`;
+}
+
+export function buildDiscoveryPrompt({
+  includeExperiments,
+}: {
+  includeExperiments: boolean;
+}): string {
+  const middle = includeExperiments ? DISCOVERY_PROMPT_EXPERIMENT_TIER : "";
+  return `${DISCOVERY_PROMPT_BASE}${middle}${buildDiscoveryRules(includeExperiments)}`;
+}
diff --git a/apps/code/src/renderer/features/setup/services/setupRunService.ts b/apps/code/src/renderer/features/setup/services/setupRunService.ts
@@ -1,16 +1,21 @@
 import { getAuthenticatedClient } from "@features/auth/hooks/authClient";
 import { fetchAuthState } from "@features/auth/hooks/authQueries";
-import { DISCOVERY_PROMPT } from "@features/setup/prompts";
+import { buildDiscoveryPrompt } from "@features/setup/prompts";
 import { useSetupStore } from "@features/setup/stores/setupStore";
 import {
+  buildTaskDiscoverySchema,
   type DiscoveredTask,
-  TASK_DISCOVERY_JSON_SCHEMA,
 } from "@features/setup/types";
 import { trpcClient } from "@renderer/trpc/client";
+import { EXPERIMENT_SUGGESTIONS_FLAG } from "@shared/constants";
 import { isTerminalStatus, type Task } from "@shared/types";
 import { ANALYTICS_EVENTS } from "@shared/types/analytics";
 import { getCloudUrlFromRegion } from "@shared/utils/urls";
-import { captureException, track } from "@utils/analytics";
+import {
+  captureException,
+  isFeatureFlagEnabled,
+  track,
+} from "@utils/analytics";
 import { logger } from "@utils/logger";
 import { injectable } from "inversify";
 
@@ -349,10 +354,16 @@ export class SetupRunService {
         return;
       }
 
+      const includeExperiments =
+        isFeatureFlagEnabled(EXPERIMENT_SUGGESTIONS_FLAG) ||
+        import.meta.env.DEV;
+      const discoveryPrompt = buildDiscoveryPrompt({ includeExperiments });
+      const discoverySchema = buildTaskDiscoverySchema({ includeExperiments });
+
       const task = (await client.createTask({
         title: "Discover first tasks",
-        description: DISCOVERY_PROMPT,
-        json_schema: TASK_DISCOVERY_JSON_SCHEMA as Record<string, unknown>,
+        description: discoveryPrompt,
+        json_schema: discoverySchema,
       })) as unknown as Task;
       if (abort.signal.aborted) return;
 
@@ -375,14 +386,14 @@ export class SetupRunService {
         apiHost,
         projectId,
         permissionMode: "bypassPermissions",
-        jsonSchema: TASK_DISCOVERY_JSON_SCHEMA as Record<string, unknown>,
+        jsonSchema: discoverySchema,
       });
       if (abort.signal.aborted) return;
 
       trpcClient.agent.prompt
         .mutate({
           sessionId: taskRun.id,
-          prompt: [{ type: "text", text: DISCOVERY_PROMPT }],
+          prompt: [{ type: "text", text: discoveryPrompt }],
         })
         .catch((err) => {
           log.error("Failed to send discovery prompt", { error: err });

diff --git a/apps/code/src/renderer/features/setup/types.ts b/apps/code/src/renderer/features/setup/types.ts
@@ -14,7 +14,8 @@ export interface DiscoveredTask {
     | "error_tracking"
     | "event_tracking"
     | "funnel"
-    | "posthog_setup";
+    | "posthog_setup"
+    | "experiment";
   source: DiscoveredTaskSource;
   file?: string;
   lineHint?: number;
@@ -23,69 +24,84 @@ export interface DiscoveredTask {
   prompt?: string;
 }
 
-export const TASK_DISCOVERY_JSON_SCHEMA = {
-  type: "object",
-  properties: {
-    tasks: {
-      type: "array",
-      items: {
-        type: "object",
-        properties: {
-          id: { type: "string", description: "A short kebab-case identifier" },
-          title: {
-            type: "string",
-            description:
-              "Short, action-oriented header — under 60 characters. No file paths or line numbers.",
-          },
-          description: {
-            type: "string",
-            description:
-              "A clear paragraph (2–4 sentences) describing the problem: what's wrong and the conditions under which it manifests. Do NOT include the file path or line number — those go in the file/lineHint fields.",
-          },
-          category: {
-            type: "string",
-            enum: [
-              "bug",
-              "security",
-              "dead_code",
-              "duplication",
-              "performance",
-              "stale_feature_flag",
-              "error_tracking",
-              "event_tracking",
-              "funnel",
-            ],
-          },
-          file: {
-            type: "string",
-            description: "Relative file path where the issue lives",
-          },
-          lineHint: {
-            type: "integer",
-            description: "Approximate line number",
-          },
-          impact: {
-            type: "string",
-            description:
-              "Why this matters — concrete impact, blast radius, or risk. 1–3 sentences. Be specific (e.g. 'silently drops auth errors so users see a successful login UI even when backend rejects them').",
-          },
-          recommendation: {
-            type: "string",
-            description:
-              "Suggested approach to fix, in plain prose. 2–4 sentences pointing at the right shape of the fix without writing the patch. Reference any specific functions, types, or files involved.",
+const BASE_CATEGORY_ENUM = [
+  "bug",
+  "security",
+  "dead_code",
+  "duplication",
+  "performance",
+  "stale_feature_flag",
+  "error_tracking",
+  "event_tracking",
+  "funnel",
+] as const;
+
+export function buildTaskDiscoverySchema({
+  includeExperiments,
+}: {
+  includeExperiments: boolean;
+}): Record<string, unknown> {
+  const categoryEnum = includeExperiments
+    ? [...BASE_CATEGORY_ENUM, "experiment"]
+    : [...BASE_CATEGORY_ENUM];
+
+  return {
+    type: "object",
+    properties: {
+      tasks: {
+        type: "array",
+        items: {
+          type: "object",
+          properties: {
+            id: {
+              type: "string",
+              description: "A short kebab-case identifier",
+            },
+            title: {
+              type: "string",
+              description:
+                "Short, action-oriented header — under 60 characters. No file paths or line numbers.",
+            },
+            description: {
+              type: "string",
+              description:
+                "A clear paragraph (2–4 sentences) describing the problem: what's wrong and the conditions under which it manifests. Do NOT include the file path or line number — those go in the file/lineHint fields. For experiment-category tasks, state the hypothesis being tested instead of a problem.",
+            },
+            category: {
+              type: "string",
+              enum: categoryEnum,
+            },
+            file: {
+              type: "string",
+              description: "Relative file path where the issue lives",
+            },
+            lineHint: {
+              type: "integer",
+              description: "Approximate line number",
+            },
+            impact: {
+              type: "string",
+              description:
+                "Why this matters — concrete impact, blast radius, or risk. 1–3 sentences. For experiment-category tasks, state the metric you would measure and the outcome a winning variant would produce.",
+            },
+            recommendation: {
+              type: "string",
+              description:
+                "Suggested approach to fix, in plain prose. 2–4 sentences pointing at the right shape of the fix without writing the patch. Reference any specific functions, types, or files involved. For experiment-category tasks, describe the proposed control and test variants concretely.",
+            },
           },
+          required: [
+            "id",
+            "title",
+            "description",
+            "category",
+            "impact",
+            "recommendation",
+          ],
         },
-        required: [
-          "id",
-          "title",
-          "description",
-          "category",
-          "impact",
-          "recommendation",
-        ],
+        maxItems: 4,
       },
-      maxItems: 4,
     },
-  },
-  required: ["tasks"],
-} as const;
+    required: ["tasks"],
+  };
+}
diff --git a/apps/code/src/renderer/features/setup/utils/buildDiscoveredTaskPrompt.ts b/apps/code/src/renderer/features/setup/utils/buildDiscoveredTaskPrompt.ts
@@ -1,7 +1,40 @@
 import type { DiscoveredTask } from "@features/setup/types";
+import { SKILL_BUTTONS } from "@features/skill-buttons/prompts";
+
+function buildExperimentTaskPrompt(task: DiscoveredTask): string {
+  const sections: string[] = [
+    SKILL_BUTTONS["run-experiment"].prompt,
+    "",
+    "Use the analysis below as the starting point.",
+    "",
+    `Hypothesis: ${task.title}`,
+    "",
+    task.description,
+  ];
+
+  if (task.impact) {
+    sections.push("", "Primary metric:", task.impact);
+  }
+
+  if (task.recommendation) {
+    sections.push("", "Proposed variants:", task.recommendation);
+  }
+
+  if (task.file) {
+    const location = task.lineHint
+      ? `${task.file}:${task.lineHint}`
+      : task.file;
+    sections.push("", `Surface: ${location}`);
+  }
+
+  return sections.join("\n");
+}
 
 export function buildDiscoveredTaskPrompt(task: DiscoveredTask): string {
   if (task.prompt) return task.prompt;
+  if (task.category === "experiment") {
+    return buildExperimentTaskPrompt(task);
+  }
 
   const sections: string[] = [
     "Investigate this issue and implement the fix. Open a PR if appropriate.",

diff --git a/apps/code/src/renderer/features/setup/utils/categoryConfig.ts b/apps/code/src/renderer/features/setup/utils/categoryConfig.ts
@@ -5,6 +5,7 @@ import {
   ChartLine,
   Copy,
   Flag,
+  Flask,
   Funnel,
   Lightning,
   Lock,
@@ -37,6 +38,7 @@ export const CATEGORY_CONFIG: Record<
   event_tracking: { icon: ChartLine, color: "blue", label: "Event tracking" },
   funnel: { icon: Funnel, color: "violet", label: "Funnel" },
   posthog_setup: { icon: Sparkle, color: "violet", label: "PostHog setup" },
+  experiment: { icon: Flask, color: "purple", label: "Experiment" },
 };
 
 // Fallback when a `DiscoveredTask.category` somehow doesn't match the map

diff --git a/apps/code/src/shared/constants.ts b/apps/code/src/shared/constants.ts
@@ -1,5 +1,7 @@
 export const BILLING_FLAG = "posthog-code-billing";
 export const INBOX_GATED_DUE_TO_SCALE_FLAG = "inbox-gated-due-to-scale";
+export const EXPERIMENT_SUGGESTIONS_FLAG =
+  "posthog-code-experiment-suggestions";
 export const BRANCH_PREFIX = "posthog-code/";
 export const DATA_DIR = ".posthog-code";
 export const WORKTREES_DIR = ".posthog-code/worktrees";

diff --git a/apps/code/src/shared/types/analytics.ts b/apps/code/src/shared/types/analytics.ts
@@ -285,7 +285,8 @@ type SetupDiscoveredTaskCategory =
   | "error_tracking"
   | "event_tracking"
   | "funnel"
-  | "posthog_setup";
+  | "posthog_setup"
+  | "experiment";
 
 export interface SetupViewedProperties {
   discovery_status: "idle" | "running" | "done" | "error";