Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Features
- Expand PostHog telemetry coverage to close the 16 server-side and 12 web-UI gaps surfaced by the May audit (#376). Server-side adds `cli_install_success` / `cli_install_failure` / `cli_uninstall_success` / `cli_uninstall_failure` / `cli_list_invoked` / `cli_parse_error` / `cli_unexpected_error` / `hook_dispatch_error` (CLI lifecycle outcomes in `bin/failproofai.mjs`), `hook_stdin_error` / `hook_payload_parse_error` (hook handler input errors in `src/hooks/handler.ts`), `policy_evaluation_error` (builtin policy crashes in `src/hooks/policy-evaluator.ts`, distinct from the existing `custom_hook_error`), `custom_policy_validation_failed` / `custom_hooks_load_error` / `policy_params_validation_warning` / `scope_validation_failed` / `hook_write_failed` / `multi_scope_warning_shown` / `cli_detection_summary` / `beta_policies_installed` (manager / loader / install-prompt internals), and `first_install` / `version_changed` (lifecycle detection in `scripts/postinstall.mjs` via a new `~/.failproofai/last-version` file). Web-UI adds `policies_tab_switched` / `activity_filter_changed` (debounced) / `activity_row_toggled` / `activity_copy_clicked` / `activity_pagination_changed` / `cli_selection_toggled` / `cli_install_remove_submitted` / `cli_reinstall_submitted` / `policy_config_modal_opened` / `policy_config_modal_closed` / `action_error_displayed` / `hooks_install_from_error_clicked` via `usePostHog()` in `app/policies/hooks-client.tsx`. The deny-/instruct-only condition at `handler.ts:344` (allow-path tracking) is intentionally left unchanged. All events go through the existing helpers (`trackHookEvent`, `trackInstallEvent`, `captureClientEvent`) and honor `FAILPROOFAI_TELEMETRY_DISABLED=1`.
- Add a first-run install prompt on bare `failproofai` invocations. PostHog showed only ~10% of npm-installed users ever ran `failproofai policies --install`; the no-args dashboard launch now detects "zero hooks installed across any detected CLI" and offers to run the existing interactive policy-selection inline (covering all of Claude Code, Codex, Copilot, Cursor, OpenCode, Pi, Gemini). Non-TTY contexts (CI, piped invocations) print a short stderr hint and fall through to the dashboard. New `src/hooks/first-run-nudge.ts` module, a guard in `bin/failproofai.mjs` before `launch("start")`, plus four new PostHog events (`first_run_nudge_shown`, `_accepted`, `_declined`, `_skipped_noninteractive`) so the uplift is measurable. Postinstall message extended with a "Next steps" block when the brand-new-user case is detected (`!configured && !registered`). Opt-out via `FAILPROOFAI_NO_FIRST_RUN=1`.
- Add `failproofai audit` command (beta) — retrospectively scan past agent transcripts across all 7 CLIs and report wasteful/risky behavior via the 39 builtin policies + 8 new audit-only detectors (`redundant-cd-cwd`, `prefer-edit-over-read-cat`, `prefer-edit-over-sed-awk`, `prefer-write-over-heredoc`, `sleep-polling-loop`, `find-from-root`, `git-commit-no-verify`, `reread-after-edit`). Outputs ANSI table + markdown report; supports `--cli`, `--project`, `--since`, `--policy`, `--limit`, `--show-examples`, `--report`, `--no-report`, `--json`, `--no-cache`. Per-transcript cache at `~/.failproofai/cache/audit/` auto-invalidates on policy/detector code changes (#377).

### Docs
- Document the new first-run prompt in the README and `docs/introduction.mdx` quickstart snippets (calling out that `failproofai policies --install` is now optional — running bare `failproofai` will offer to do it), and add a new "First-run prompt" section to `docs/cli/environment-variables.mdx` for `FAILPROOFAI_NO_FIRST_RUN=1`. Chinese mirror and the 14 translated env-vars files left for the translation-sync workflow.
Expand Down
166 changes: 166 additions & 0 deletions __tests__/audit/detectors.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// @vitest-environment node
import { describe, it, expect } from "vitest";
import type { NormalizedToolEvent } from "../../src/audit/types";
import { redundantCdCwd } from "../../src/audit/detectors/redundant-cd-cwd";
import { preferEditOverReadCat } from "../../src/audit/detectors/prefer-edit-over-read-cat";
import { preferEditOverSedAwk } from "../../src/audit/detectors/prefer-edit-over-sed-awk";
import { preferWriteOverHeredoc } from "../../src/audit/detectors/prefer-write-over-heredoc";
import { sleepPollingLoop } from "../../src/audit/detectors/sleep-polling-loop";
import { findFromRoot } from "../../src/audit/detectors/find-from-root";
import { gitCommitNoVerify } from "../../src/audit/detectors/git-commit-no-verify";
import { rereadAfterEdit } from "../../src/audit/detectors/reread-after-edit";

function bash(cmd: string, cwd = "/home/u/proj"): NormalizedToolEvent {
return {
cli: "claude",
sessionId: "sess-1",
transcriptPath: "/tmp/t.jsonl",
cwd,
timestamp: "2026-05-21T00:00:00.000Z",
toolName: "Bash",
rawToolName: "Bash",
toolInput: { command: cmd },
};
}

function tool(name: string, input: Record<string, unknown>): NormalizedToolEvent {
return {
cli: "claude",
sessionId: "sess-1",
transcriptPath: "/tmp/t.jsonl",
cwd: "/home/u/proj",
timestamp: "2026-05-21T00:00:00.000Z",
toolName: name,
rawToolName: name,
toolInput: input,
};
}

describe("redundant-cd-cwd", () => {
it("matches `cd <cwd> && cmd`", () => {
const hit = redundantCdCwd.detect(bash("cd /home/u/proj && pnpm test"), {});
expect(hit?.example).toContain("cd /home/u/proj && pnpm test");
});
it("does not match cd to a different path", () => {
expect(redundantCdCwd.detect(bash("cd /tmp && ls"), {})).toBeNull();
});
it("does not match bare cmd without cd", () => {
expect(redundantCdCwd.detect(bash("pnpm test"), {})).toBeNull();
});
});

describe("prefer-edit-over-read-cat", () => {
it("matches `cat foo.ts`", () => {
expect(preferEditOverReadCat.detect(bash("cat src/foo.ts"), {})?.example).toBe("cat src/foo.ts");
});
it("matches `head -50 bar.py`", () => {
expect(preferEditOverReadCat.detect(bash("head -50 bar.py"), {})).not.toBeNull();
});
it("does not match `cat .env`", () => {
expect(preferEditOverReadCat.detect(bash("cat .env"), {})).toBeNull();
});
it("does not match piped `cat`", () => {
expect(preferEditOverReadCat.detect(bash("cat foo.ts | wc -l"), {})).toBeNull();
});
it("does not match `cat foo.txt > out`", () => {
expect(preferEditOverReadCat.detect(bash("cat foo.ts > /tmp/out"), {})).toBeNull();
});
it("does not match `cat unknownext`", () => {
expect(preferEditOverReadCat.detect(bash("cat README"), {})).toBeNull();
});
});

describe("prefer-edit-over-sed-awk", () => {
it("matches `sed -i`", () => {
expect(preferEditOverSedAwk.detect(bash("sed -i 's/foo/bar/g' file.ts"), {})).not.toBeNull();
});
it("matches `awk '...' file > out`", () => {
expect(preferEditOverSedAwk.detect(bash("awk '{print $1}' file > out"), {})).not.toBeNull();
});
it("does not match `sed 's/x/y/'` without -i", () => {
expect(preferEditOverSedAwk.detect(bash("echo x | sed 's/x/y/'"), {})).toBeNull();
});
});

describe("prefer-write-over-heredoc", () => {
it("matches `cat <<EOF > file`", () => {
expect(preferWriteOverHeredoc.detect(bash("cat <<'EOF' > out.md\nhello\nEOF"), {})).not.toBeNull();
});
it("does not match `cat <<EOF` inside `$()`", () => {
expect(
preferWriteOverHeredoc.detect(bash(`git commit -m "$(cat <<'EOF'\nfeat\nEOF\n)"`), {}),
).toBeNull();
});
it("matches `echo \"multi\\nline\" > file`", () => {
expect(preferWriteOverHeredoc.detect(bash('echo "a\nb" > out'), {})).not.toBeNull();
});
});

describe("sleep-polling-loop", () => {
it("matches `sleep 60`", () => {
expect(sleepPollingLoop.detect(bash("sleep 60"), {})).not.toBeNull();
});
it("matches `sleep 5m`", () => {
expect(sleepPollingLoop.detect(bash("sleep 5m"), {})).not.toBeNull();
});
it("matches while-sleep loop", () => {
expect(
sleepPollingLoop.detect(bash("while true; do echo x; sleep 5; done"), {}),
).not.toBeNull();
});
it("does not match `sleep 1`", () => {
expect(sleepPollingLoop.detect(bash("sleep 1"), {})).toBeNull();
});
});

describe("find-from-root", () => {
it("matches `find /`", () => {
expect(findFromRoot.detect(bash("find / -name '*.ts'"), {})).not.toBeNull();
});
it("matches `find /home`", () => {
expect(findFromRoot.detect(bash("find /home -name foo"), {})).not.toBeNull();
});
it("does not match `find . -name foo`", () => {
expect(findFromRoot.detect(bash("find . -name foo"), {})).toBeNull();
});
it("does not match `find src`", () => {
expect(findFromRoot.detect(bash("find src -name foo"), {})).toBeNull();
});
});

describe("git-commit-no-verify", () => {
it("matches `git commit --no-verify`", () => {
expect(gitCommitNoVerify.detect(bash("git commit --no-verify -m foo"), {})).not.toBeNull();
});
it("matches short `git commit -n`", () => {
expect(gitCommitNoVerify.detect(bash("git commit -n -m foo"), {})).not.toBeNull();
});
it("does not match plain `git commit -m`", () => {
expect(gitCommitNoVerify.detect(bash("git commit -m foo"), {})).toBeNull();
});
});

describe("reread-after-edit", () => {
it("matches Read of file just Edited", () => {
const state = {};
expect(rereadAfterEdit.detect(tool("Edit", { file_path: "/a/b.ts" }), state)).toBeNull();
const hit = rereadAfterEdit.detect(tool("Read", { file_path: "/a/b.ts" }), state);
expect(hit?.example).toContain("/a/b.ts");
});
it("matches Read after Write", () => {
const state = {};
rereadAfterEdit.detect(tool("Write", { file_path: "/a/b.ts" }), state);
expect(rereadAfterEdit.detect(tool("Read", { file_path: "/a/b.ts" }), state)).not.toBeNull();
});
it("does not match Read of a different file", () => {
const state = {};
rereadAfterEdit.detect(tool("Edit", { file_path: "/a/b.ts" }), state);
expect(rereadAfterEdit.detect(tool("Read", { file_path: "/a/other.ts" }), state)).toBeNull();
});
it("decays after window of 5 tool calls", () => {
const state = {};
rereadAfterEdit.detect(tool("Edit", { file_path: "/a/b.ts" }), state);
for (let i = 0; i < 6; i++) rereadAfterEdit.detect(tool("Bash", { command: "x" }), state);
expect(rereadAfterEdit.detect(tool("Read", { file_path: "/a/b.ts" }), state)).toBeNull();
});
});
87 changes: 87 additions & 0 deletions __tests__/audit/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// @vitest-environment node
import { describe, it, expect, beforeAll, afterAll } from "vitest";
import { mkdtempSync, writeFileSync, rmSync, mkdirSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { runAudit } from "../../src/audit";
import { resetReplay } from "../../src/audit/replay";

/**
* Builds a minimal Claude JSONL transcript with three tool-use events:
* 1. Bash(env) — should trigger protect-env-vars (builtin)
* 2. Bash(cd <cwd> && pnpm test) — should trigger redundant-cd-cwd (detector)
* 3. Edit(file_path) then Read(file_path) — should trigger reread-after-edit
*/
function buildFixtureTranscript(cwd: string, sessionId: string): string {
const lines: object[] = [];
let prevUuid: string | null = null;
function pushAssistantToolUse(name: string, input: Record<string, unknown>) {
const uuid = `uuid-${lines.length}`;
lines.push({
type: "assistant",
uuid,
parentUuid: prevUuid,
sessionId,
cwd,
timestamp: new Date(2026, 4, 21, lines.length).toISOString(),
message: {
role: "assistant",
content: [{ type: "tool_use", id: `tu-${lines.length}`, name, input }],
},
});
prevUuid = uuid;
}
pushAssistantToolUse("Bash", { command: "env" });
pushAssistantToolUse("Bash", { command: `cd ${cwd} && pnpm test` });
pushAssistantToolUse("Edit", { file_path: `${cwd}/foo.ts`, old_string: "a", new_string: "b" });
pushAssistantToolUse("Read", { file_path: `${cwd}/foo.ts` });
return lines.map((l) => JSON.stringify(l)).join("\n");
}

describe("runAudit() end-to-end on a fixture transcript", () => {
let tmpRoot: string;
let origEnv: string | undefined;

beforeAll(() => {
tmpRoot = mkdtempSync(join(tmpdir(), "failproofai-audit-fixture-"));
origEnv = process.env.CLAUDE_PROJECTS_PATH;
process.env.CLAUDE_PROJECTS_PATH = tmpRoot;

// Create one project with one transcript.
const projectDir = join(tmpRoot, "-tmp-myproj");
mkdirSync(projectDir, { recursive: true });
const sessionId = "11111111-2222-3333-4444-555555555555";
const transcriptPath = join(projectDir, `${sessionId}.jsonl`);
const transcriptCwd = "/tmp/myproj";
writeFileSync(transcriptPath, buildFixtureTranscript(transcriptCwd, sessionId));
resetReplay();
});

afterAll(() => {
if (origEnv) process.env.CLAUDE_PROJECTS_PATH = origEnv;
else delete process.env.CLAUDE_PROJECTS_PATH;
rmSync(tmpRoot, { recursive: true, force: true });
});

it("counts builtin + detector hits across the fixture transcript", async () => {
const result = await runAudit({ clis: ["claude"], noCache: true, noReport: true });
expect(result.transcripts.scanned).toBeGreaterThanOrEqual(1);

const names = result.results.map((r) => r.name);
// Builtin policy hit.
expect(names.some((n) => n.includes("protect-env-vars"))).toBe(true);
// Audit-only detector hits.
expect(names).toContain("redundant-cd-cwd");
expect(names).toContain("reread-after-edit");
});

it("filters by --policy", async () => {
const result = await runAudit({
clis: ["claude"],
noCache: true,
noReport: true,
policies: ["redundant-cd-cwd"],
});
expect(result.results.map((r) => r.name)).toEqual(["redundant-cd-cwd"]);
});
});
52 changes: 52 additions & 0 deletions __tests__/audit/replay.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// @vitest-environment node
import { describe, it, expect, beforeEach } from "vitest";
import { resetReplay, replayEvent } from "../../src/audit/replay";
import type { NormalizedToolEvent } from "../../src/audit/types";

function bash(command: string): NormalizedToolEvent {
return {
cli: "claude",
sessionId: "sess-1",
transcriptPath: "/tmp/t.jsonl",
cwd: "/home/u/proj",
timestamp: "2026-05-21T00:00:00.000Z",
toolName: "Bash",
rawToolName: "Bash",
toolInput: { command },
};
}

describe("replay engine", () => {
beforeEach(() => {
resetReplay();
});

it("triggers protect-env-vars on `env`", async () => {
const hits = await replayEvent(bash("env"));
const names = hits.map((h) => h.policyName);
expect(names.some((n) => n.includes("protect-env-vars"))).toBe(true);
});

it("triggers block-force-push on `git push --force` to a non-protected branch", async () => {
// Push to `feature` (not main/master) so block-push-master doesn't
// short-circuit before block-force-push gets a chance to fire.
const hits = await replayEvent(bash("git push --force origin feature"));
const names = hits.map((h) => h.policyName);
expect(names.some((n) => n.includes("block-force-push"))).toBe(true);
});

it("does not fire on a plain `ls`", async () => {
const hits = await replayEvent(bash("ls -la"));
expect(hits.filter((h) => h.decision === "deny")).toHaveLength(0);
});

it("synthesizes PostToolUse when toolResultText is set", async () => {
// Fake JWT shape — three dot-separated base64 chunks — to trigger
// sanitize-jwt on PostToolUse without using a real-looking API-key shape.
const fakeJwt = ["eyJhbGciOiJIUzI1NiJ9", "eyJzdWIiOiJ0ZXN0In0", "test-sig-xyz"].join(".");
const event = bash("echo token");
event.toolResultText = `Authorization: Bearer ${fakeJwt}`;
const hits = await replayEvent(event);
expect(hits.some((h) => h.eventType === "PostToolUse")).toBe(true);
});
});
Loading