From 641fa02d4dc7611c01675d1a095f06fe3819785e Mon Sep 17 00:00:00 2001 From: ysyneu Date: Tue, 28 Apr 2026 11:35:34 +0800 Subject: [PATCH 01/11] fix(ws): drop CurrentTime from heartbeat env info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CurrentTime was captured once via collectEnvironmentInfo() at runner start and the resulting EnvInfo was sent only on the first heartbeat (envInfoSent gate), so a long-lived runner kept reporting its boot time forever. Safari then served that stale value via the `now` tool. Static info (OS, arch, hostname, timezone) is fine to send once; current time is by definition not static. Drop the field entirely — safari uses its own wall clock now. Co-Authored-By: Claude Opus 4.7 (1M context) --- protocol/messages.go | 1 - ws/client.go | 3 --- 2 files changed, 4 deletions(-) diff --git a/protocol/messages.go b/protocol/messages.go index a5dcd35..2abbc1a 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -90,7 +90,6 @@ type EnvironmentInfo struct { Username string `json:"username"` // Current user NumCPU int `json:"num_cpu"` // Number of CPUs TotalMemoryMB int64 `json:"total_memory_mb"` // Total memory in MB - CurrentTime string `json:"current_time"` // Current time in RFC3339 format Timezone string `json:"timezone"` // Timezone name, e.g., "Asia/Shanghai" UTCOffset string `json:"utc_offset"` // UTC offset, e.g., "+08:00" } diff --git a/ws/client.go b/ws/client.go index 021b2dd..d692c07 100644 --- a/ws/client.go +++ b/ws/client.go @@ -446,8 +446,6 @@ func (c *Client) sendHeartbeat() { Version: c.version, } - // Only send environment info on first heartbeat after connection - // Environment info is static and doesn't need to be sent repeatedly c.mu.Lock() if !c.envInfoSent { payload.Environment = c.envInfo @@ -485,7 +483,6 @@ func collectEnvironmentInfo(workspaceRoot string) *protocol.EnvironmentInfo { } now := time.Now() - info.CurrentTime = now.Format(time.RFC3339) info.Timezone = now.Location().String() info.UTCOffset = now.Format("-07:00") From 3efa401a55d4d38975d3ebe4bbebf5de4e9b2be0 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Wed, 29 Apr 2026 17:12:58 +0800 Subject: [PATCH 02/11] fix(ws): tighten reconnect timing for cloud sandbox pool pods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud sandbox pool pods spend their entire pre-claim lifetime dialing safari and getting 401 (no t_cloud_sandbox_0 row exists yet — claim inserts it). Two timing knobs were tuned for long-lived BYOC outage recovery and hurt cloud claim latency: - initialReconnectDelay 1s -> 200ms: first dials race the row INSERT. The 1s delay added 1-2s of extra 401-retry cost before hostname auth could possibly succeed. - maxReconnectDelay 5min -> 10s: a pool pod waiting >50s slid into a 32-64s exponential-backoff sleep, and got declared unrecoverable by safari's 90s claim window before its next dial — observed live (sbx_bzQ... did not come online within 1m30s, traceid 6996857e714cb99c31665d043ed3c260). 10s stays well inside the claim window and remains acceptable for BYOC server-restart recovery. Co-Authored-By: Claude Opus 4.7 (1M context) --- ws/client.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/ws/client.go b/ws/client.go index d692c07..58e55a7 100644 --- a/ws/client.go +++ b/ws/client.go @@ -33,14 +33,25 @@ const ( // DefaultMaxReconnectAttempts is the default max reconnect attempts (0 = unlimited). DefaultMaxReconnectAttempts = 30 - // Initial reconnect delay (used for first few attempts) - initialReconnectDelay = 1 * time.Second + // Initial reconnect delay (used for first few attempts). + // Cloud sandbox runners race safari's t_cloud_sandbox_0 INSERT/UPDATE on + // first dial — a 1s delay added 1–2s of extra 401-retry cost before the + // row landed and the hostname-auth lookup could succeed. 200ms keeps the + // retry storm under control while shrinking the cold-start tail. + initialReconnectDelay = 200 * time.Millisecond // Fast retry attempts before switching to exponential backoff fastRetryAttempts = 15 - // Maximum reconnect delay - maxReconnectDelay = 5 * time.Minute + // Maximum reconnect delay. Capped at 10s so cloud-sandbox pool pods + // (which spend their entire pre-claim lifetime in 401-retry) never + // land in a multi-minute sleep when sandbox-manager finally hands + // them off — Safari's claim window is 90s, anything longer than that + // risks the pod being declared unrecoverable while it's still asleep. + // The previous 5-minute cap was tuned for BYOC outage recovery; 10s + // stays well under the BYOC server-restart window in practice and + // adds at most one extra dial per 10s of downtime. + maxReconnectDelay = 10 * time.Second ) // MessageHandler handles incoming messages from Flashduty. From 0f222d9255d4283bb5e46d2d614c6a90adda48df Mon Sep 17 00:00:00 2001 From: ysyneu Date: Wed, 29 Apr 2026 17:30:38 +0800 Subject: [PATCH 03/11] fix(env): reject duplicated-root rel_paths; surface sibling hint on ENOENT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit safePath defense-in-depth — runner cannot rely on safari to filter rel_paths that mirror the workspace root. Joining e.root with such a path silently created a nested duplicate workspace tree (matching nested copies were observed on a long-running BYOC host). Reject them at the runner boundary. Read now appends a bounded sibling-name list when stat returns ENOENT, so the agent can self-correct a near-miss filename without spending a turn on an extra list call. .golangci.yml: exclude gosec G706 (log-taint via taint analysis). slog's structured key-value logging is not a format-string injection vector; the rule fires on every slog call with a user-controlled value, which is the whole point of structured logging. Co-Authored-By: Claude Opus 4.7 (1M context) --- .golangci.yml | 5 ++++ environment/environment.go | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/.golangci.yml b/.golangci.yml index 3e28c53..122e243 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -47,10 +47,15 @@ linters: # Exclude G301 (directory permissions) - workspace needs readable directories # Exclude G304 (file inclusion) - paths are validated via safePath() # Exclude G306 (file permissions) - workspace files need to be readable + # Exclude G706 (log taint via taint analysis) - slog's structured + # key-value logging is not a format-string injection vector; the rule + # fires on every slog call that includes a user-controlled value, which + # is the whole point of structured logging. excludes: - G301 - G304 - G306 + - G706 formatters: enable: diff --git a/environment/environment.go b/environment/environment.go index d332033..d2b0b56 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -59,6 +59,22 @@ func (e *Environment) Root() string { // safePath ensures the path is within the workspace root, resolving symlinks. func (e *Environment) safePath(path string) (string, error) { + // Defense-in-depth: reject relative paths that mirror the host's workspace + // root (e.g. "Users/ysy/.flashduty-runner/workspace/DUTY.md"). Safari already + // rejects these, but the runner accepts requests over the wire and must not + // rely on the caller to have done the check. Joining such a path with + // e.root would silently create a nested duplicate of the workspace tree. + if !filepath.IsAbs(path) { + cleaned := filepath.Clean(filepath.FromSlash(path)) + rootRel := strings.TrimPrefix(e.root, string(filepath.Separator)) + if rootRel != "" { + rootRel = filepath.Clean(rootRel) + if cleaned == rootRel || strings.HasPrefix(cleaned, rootRel+string(filepath.Separator)) { + return "", fmt.Errorf("rel_path duplicates workspace root: %s", path) + } + } + } + absPath, err := filepath.Abs(filepath.Join(e.root, path)) if err != nil { return "", fmt.Errorf("failed to get absolute path: %w", err) @@ -105,6 +121,11 @@ func (e *Environment) Read(ctx context.Context, args *protocol.ReadArgs) (*proto info, err := os.Stat(realPath) if err != nil { + if os.IsNotExist(err) { + // Include sibling names so the model can self-correct without an extra + // list round-trip. Bounded to keep the error message small. + return nil, fmt.Errorf("failed to stat file: %w%s", err, siblingHint(filepath.Dir(realPath), filepath.Base(realPath))) + } return nil, fmt.Errorf("failed to stat file: %w", err) } if info.IsDir() { @@ -114,6 +135,37 @@ func (e *Environment) Read(ctx context.Context, args *protocol.ReadArgs) (*proto return e.readFileContent(realPath, info.Size(), args.Offset, args.Limit) } +// siblingHint returns a short " (siblings: a, b, c)" suffix when the parent +// directory exists, helping the agent recover from a near-miss filename +// without spending a turn on a list call. Returns empty string on any error +// or when the parent has too many entries to summarize compactly. +func siblingHint(dir, missing string) string { + const maxSiblings = 12 + entries, err := os.ReadDir(dir) + if err != nil { + return "" + } + names := make([]string, 0, len(entries)) + for _, e := range entries { + n := e.Name() + if strings.HasPrefix(n, ".") { + continue + } + names = append(names, n) + if len(names) > maxSiblings { + break + } + } + if len(names) == 0 { + return "" + } + if len(names) > maxSiblings { + names = names[:maxSiblings] + names = append(names, "…") + } + return fmt.Sprintf(" (parent %q contains: %s)", dir, strings.Join(names, ", ")) +} + // readFileContent reads file content with offset and limit support. func (e *Environment) readFileContent(path string, size, offset, limit int64) (*protocol.ReadResult, error) { file, err := os.Open(path) From ef2a0dcd0e39326ea44c28e3fbdfc9b60c4d2148 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 30 Apr 2026 11:10:31 +0800 Subject: [PATCH 04/11] feat(skill): probe-or-install sync_skill; default home ~/.flashduty - SyncSkill now checks .checksum+SKILL.md before install (cache hit returns {Cached:true,Path}; miss with no zip returns {Cached:false} so cloud retries with zip_data) - Skills land at /skills// instead of /.work/skills// - Default home moves from ~/.flashduty-runner/workspace to ~/.flashduty - FLASHDUTY_RUNNER_HOME is the new canonical override; FLASHDUTY_RUNNER_WORKSPACE kept as deprecated alias - Add SyncSkillResult.Cached field (omitempty); SyncSkillArgs unchanged Co-Authored-By: Claude Sonnet 4.6 --- cmd/main.go | 12 +++-- environment/environment.go | 42 +++++++++++------ environment/environment_test.go | 82 +++++++++++++++++++++++++++++++++ protocol/messages.go | 9 +++- 4 files changed, 126 insertions(+), 19 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 4872346..5956afc 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -79,7 +79,8 @@ Examples: Environment variables: FLASHDUTY_RUNNER_TOKEN - Authentication token (required if --token not provided) FLASHDUTY_RUNNER_URL - WebSocket endpoint URL - FLASHDUTY_RUNNER_WORKSPACE - Workspace root directory`, + FLASHDUTY_RUNNER_HOME - Home directory for skills and data (default: ~/.flashduty) + FLASHDUTY_RUNNER_WORKSPACE - Deprecated alias for FLASHDUTY_RUNNER_HOME`, RunE: func(cmd *cobra.Command, args []string) error { return runRunner() }, @@ -137,17 +138,20 @@ func loadConfig() (*Config, error) { cfg.URL = defaultURL } - // Workspace: flag > env > default + // Home directory (skills, future knowledge/output): flag > FLASHDUTY_RUNNER_HOME > deprecated FLASHDUTY_RUNNER_WORKSPACE > default ~/.flashduty cfg.WorkspaceRoot = flagWorkspace if cfg.WorkspaceRoot == "" { - cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_WORKSPACE") + cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_HOME") + } + if cfg.WorkspaceRoot == "" { + cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_WORKSPACE") // deprecated alias } if cfg.WorkspaceRoot == "" { homeDir, err := os.UserHomeDir() if err != nil { return nil, fmt.Errorf("failed to get home directory: %w", err) } - cfg.WorkspaceRoot = filepath.Join(homeDir, ".flashduty-runner", "workspace") + cfg.WorkspaceRoot = filepath.Join(homeDir, ".flashduty") } // Log level: flag > env > default diff --git a/environment/environment.go b/environment/environment.go index d2b0b56..e15f2bd 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -591,34 +591,48 @@ func (e *Environment) MCPListTools(ctx context.Context, args *protocol.MCPListTo return result, nil } -// SyncSkill syncs a skill from cloud to local workspace. +// SyncSkill is probe-or-install. With empty ZipData it checks local cache +// against args.Checksum: hit returns {Cached:true,Path}; miss returns +// {Cached:false,Path:""} so the cloud retries with zip. With ZipData present +// it overwrites /skills// and writes .checksum. +// +// args.SkillDir is accepted for backward compatibility but ignored — the +// runner derives the path from skill_name to keep layout under its control. func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArgs) (*protocol.SyncSkillResult, error) { - skillDir, err := e.safePath(args.SkillDir) + if args.SkillName == "" || args.Checksum == "" { + return nil, fmt.Errorf("skill_name and checksum are required") + } + relDir := filepath.Join("skills", args.SkillName) + skillDir, err := e.safePath(relDir) if err != nil { return nil, err } - // Decode zip data + // Probe phase: cache hit if .checksum matches AND SKILL.md exists. + cachedSum, _ := os.ReadFile(filepath.Join(skillDir, ".checksum")) + if string(cachedSum) == args.Checksum { + if _, err := os.Stat(filepath.Join(skillDir, "SKILL.md")); err == nil { + return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: true}, nil + } + } + + // Cache miss with no zip: signal cloud to retry with zip_data. + if args.ZipData == "" { + return &protocol.SyncSkillResult{Success: true, Cached: false}, nil + } + + // Install: decode, wipe, unzip, write .checksum. zipData, err := base64.StdEncoding.DecodeString(args.ZipData) if err != nil { return nil, fmt.Errorf("failed to decode zip data: %w", err) } - - // Unzip to skill directory if err := e.unzipSkill(zipData, skillDir); err != nil { return nil, fmt.Errorf("failed to unzip skill: %w", err) } - - // Write checksum file - checksumPath := filepath.Join(skillDir, ".checksum") - if err := os.WriteFile(checksumPath, []byte(args.Checksum), 0o644); err != nil { + if err := os.WriteFile(filepath.Join(skillDir, ".checksum"), []byte(args.Checksum), 0o644); err != nil { return nil, fmt.Errorf("failed to write checksum: %w", err) } - - return &protocol.SyncSkillResult{ - Success: true, - Path: args.SkillDir, - }, nil + return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: false}, nil } // unzipSkill extracts a zip archive to the destination directory. diff --git a/environment/environment_test.go b/environment/environment_test.go index 827f462..f462d85 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -1,6 +1,8 @@ package environment import ( + "archive/zip" + "bytes" "context" "encoding/base64" "os" @@ -14,6 +16,21 @@ import ( "github.com/flashcatcloud/flashduty-runner/protocol" ) +// buildTestZip creates an in-memory zip archive from a map of filename→content. +func buildTestZip(t *testing.T, files map[string]string) []byte { + t.Helper() + var buf bytes.Buffer + w := zip.NewWriter(&buf) + for name, content := range files { + f, err := w.Create(name) + require.NoError(t, err) + _, err = f.Write([]byte(content)) + require.NoError(t, err) + } + require.NoError(t, w.Close()) + return buf.Bytes() +} + func newTestEnvironment(t *testing.T) *Environment { tmpDir := t.TempDir() checker := permission.NewChecker(map[string]string{ @@ -201,3 +218,68 @@ func TestEnvironment_SafePath(t *testing.T) { }) } } + +func TestSyncSkill_ProbeMiss(t *testing.T) { + ws := newTestEnvironment(t) + res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{ + SkillName: "demo", Checksum: "csum1", + }) + require.NoError(t, err) + assert.False(t, res.Cached, "expected cache miss") + assert.Empty(t, res.Path, "expected empty path on miss") +} + +func TestSyncSkill_ProbeHit(t *testing.T) { + ws := newTestEnvironment(t) + dir := filepath.Join(ws.Root(), "skills", "demo") + require.NoError(t, os.MkdirAll(dir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("csum1"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte("body"), 0o644)) + + // Resolve symlinks so the comparison holds on macOS (/var → /private/var). + wantDir, err := filepath.EvalSymlinks(dir) + require.NoError(t, err) + + res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{ + SkillName: "demo", Checksum: "csum1", + }) + require.NoError(t, err) + assert.True(t, res.Cached, "expected cache hit") + assert.Equal(t, wantDir, res.Path) +} + +func TestSyncSkill_InstallOverwrites(t *testing.T) { + ws := newTestEnvironment(t) + dir := filepath.Join(ws.Root(), "skills", "demo") + + // Pre-seed an old version with a stale checksum. + require.NoError(t, os.MkdirAll(dir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "old-file.txt"), []byte("stale"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("old"), 0o644)) + + // Resolve symlinks so the comparison holds on macOS (/var → /private/var). + wantDir, err := filepath.EvalSymlinks(dir) + require.NoError(t, err) + + zipBytes := buildTestZip(t, map[string]string{ + "SKILL.md": "---\nname: demo\ndescription: x\n---\nbody", + }) + res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{ + SkillName: "demo", + Checksum: "new", + ZipData: base64.StdEncoding.EncodeToString(zipBytes), + }) + require.NoError(t, err) + assert.False(t, res.Cached, "install must report Cached=false") + assert.Equal(t, wantDir, res.Path) + + // Old file should be gone (unzipSkill does RemoveAll before extracting). + _, err = os.Stat(filepath.Join(dir, "old-file.txt")) + assert.True(t, os.IsNotExist(err), "old file should be wiped") + + _, err = os.Stat(filepath.Join(dir, "SKILL.md")) + assert.NoError(t, err, "new SKILL.md should exist") + + sum, _ := os.ReadFile(filepath.Join(dir, ".checksum")) + assert.Equal(t, "new", string(sum)) +} diff --git a/protocol/messages.go b/protocol/messages.go index 2abbc1a..4249501 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -284,9 +284,16 @@ type SyncSkillArgs struct { } // SyncSkillResult is the result of a sync_skill operation. +// +// Probe semantics: when SyncSkillArgs.ZipData is empty, the runner checks its +// local cache and returns either {Success:true, Cached:true, Path} on hit or +// {Success:true, Cached:false, Path:""} on miss (cloud must retry with zip). +// When ZipData is non-empty, the runner installs unconditionally and returns +// {Success:true, Cached:false, Path}. type SyncSkillResult struct { Success bool `json:"success"` - Path string `json:"path"` // Local path where skill was extracted + Path string `json:"path"` // Local path where skill is materialized + Cached bool `json:"cached,omitempty"` // True iff cache hit } // MCPToolInfo represents metadata for an MCP tool. From a6eb9b4f371e2cc9f3da87e00a99478fb363a974 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 30 Apr 2026 11:12:29 +0800 Subject: [PATCH 05/11] refactor(test): extract resolveDir helper, use errors.Is for ErrNotExist - resolveDir(t, dir) replaces duplicated EvalSymlinks boilerplate in ProbeHit and InstallOverwrites tests - errors.Is(err, os.ErrNotExist) replaces deprecated os.IsNotExist pattern Co-Authored-By: Claude Sonnet 4.6 --- environment/environment_test.go | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/environment/environment_test.go b/environment/environment_test.go index f462d85..c3fa126 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -5,6 +5,7 @@ import ( "bytes" "context" "encoding/base64" + "errors" "os" "path/filepath" "testing" @@ -31,6 +32,15 @@ func buildTestZip(t *testing.T, files map[string]string) []byte { return buf.Bytes() } +// resolveDir resolves symlinks on the given directory path so path comparisons +// hold on macOS where t.TempDir() returns /var/... but safePath resolves to /private/var/... +func resolveDir(t *testing.T, dir string) string { + t.Helper() + resolved, err := filepath.EvalSymlinks(dir) + require.NoError(t, err) + return resolved +} + func newTestEnvironment(t *testing.T) *Environment { tmpDir := t.TempDir() checker := permission.NewChecker(map[string]string{ @@ -236,16 +246,12 @@ func TestSyncSkill_ProbeHit(t *testing.T) { require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("csum1"), 0o644)) require.NoError(t, os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte("body"), 0o644)) - // Resolve symlinks so the comparison holds on macOS (/var → /private/var). - wantDir, err := filepath.EvalSymlinks(dir) - require.NoError(t, err) - res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{ SkillName: "demo", Checksum: "csum1", }) require.NoError(t, err) assert.True(t, res.Cached, "expected cache hit") - assert.Equal(t, wantDir, res.Path) + assert.Equal(t, resolveDir(t, dir), res.Path) } func TestSyncSkill_InstallOverwrites(t *testing.T) { @@ -257,10 +263,6 @@ func TestSyncSkill_InstallOverwrites(t *testing.T) { require.NoError(t, os.WriteFile(filepath.Join(dir, "old-file.txt"), []byte("stale"), 0o644)) require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("old"), 0o644)) - // Resolve symlinks so the comparison holds on macOS (/var → /private/var). - wantDir, err := filepath.EvalSymlinks(dir) - require.NoError(t, err) - zipBytes := buildTestZip(t, map[string]string{ "SKILL.md": "---\nname: demo\ndescription: x\n---\nbody", }) @@ -271,11 +273,11 @@ func TestSyncSkill_InstallOverwrites(t *testing.T) { }) require.NoError(t, err) assert.False(t, res.Cached, "install must report Cached=false") - assert.Equal(t, wantDir, res.Path) + assert.Equal(t, resolveDir(t, dir), res.Path) // Old file should be gone (unzipSkill does RemoveAll before extracting). _, err = os.Stat(filepath.Join(dir, "old-file.txt")) - assert.True(t, os.IsNotExist(err), "old file should be wiped") + assert.True(t, errors.Is(err, os.ErrNotExist), "old file should be wiped") _, err = os.Stat(filepath.Join(dir, "SKILL.md")) assert.NoError(t, err, "new SKILL.md should exist") From acb919bf2a00da2bd2e6bd2247fac5a6bf2bd3e7 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 30 Apr 2026 11:14:01 +0800 Subject: [PATCH 06/11] harden(skill): reject path separators in skill_name --- environment/environment.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/environment/environment.go b/environment/environment.go index e15f2bd..803e243 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -602,6 +602,9 @@ func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArg if args.SkillName == "" || args.Checksum == "" { return nil, fmt.Errorf("skill_name and checksum are required") } + if strings.ContainsAny(args.SkillName, `/\`) || args.SkillName == ".." || args.SkillName == "." { + return nil, fmt.Errorf("skill_name must not contain path separators") + } relDir := filepath.Join("skills", args.SkillName) skillDir, err := e.safePath(relDir) if err != nil { From 8160fdeedd85df1666c226eeb683757c0cf9cf1c Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 30 Apr 2026 11:23:09 +0800 Subject: [PATCH 07/11] docs(skill): document ~/.flashduty home and probe-or-install sync_skill --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d384f76..f3cda75 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,7 @@ Create `/etc/flashduty-runner/env`: ```bash FLASHDUTY_RUNNER_TOKEN=ent_xxx # FLASHDUTY_RUNNER_URL=wss://custom.example.com/safari/environment/ws -# FLASHDUTY_RUNNER_WORKSPACE=/var/flashduty/workspace +# FLASHDUTY_RUNNER_HOME=/var/flashduty ``` ```bash @@ -213,6 +213,15 @@ sudo systemctl daemon-reload sudo systemctl enable --now flashduty-runner ``` +## Skills + +Skills are materialized lazily. When the agent calls the `skill` tool, the cloud sends `sync_skill` with `(skill_name, checksum, zip_data?)`: + +- **Empty `zip_data`** = probe. The runner returns `{cached: true, path}` if `/skills//.checksum` already matches the requested checksum and `SKILL.md` is present; otherwise `{cached: false}` and the cloud will retry with the bundle attached. +- **Non-empty `zip_data`** = install. The runner wipes `/skills//`, unzips, and writes `.checksum`. Each skill name has exactly one slot — no version retention. Builtin skills go through the same flow with their bundle sourced from the cloud's embedded FS instead of S3. + +Inspect installed skills with `ls ~/.flashduty/skills/`. + ## Configuration Reference Configuration is via command-line flags or environment variables (flags take precedence). @@ -221,7 +230,7 @@ Configuration is via command-line flags or environment variables (flags take pre |------|-------------|----------|---------|-------------| | `--token` | `FLASHDUTY_RUNNER_TOKEN` | Yes | - | Authentication token | | `--url` | `FLASHDUTY_RUNNER_URL` | No | `wss://api.flashcat.cloud/safari/environment/ws` | WebSocket endpoint | -| `--workspace` | `FLASHDUTY_RUNNER_WORKSPACE` | No | `~/.flashduty-runner/workspace` | Workspace root directory | +| `--workspace` | `FLASHDUTY_RUNNER_HOME` | No | `~/.flashduty` | Runner home directory. Skills land at `/skills//`. `FLASHDUTY_RUNNER_WORKSPACE` is accepted as a deprecated alias for back-compat. | | `--log-level` | `FLASHDUTY_RUNNER_LOG_LEVEL` | No | `info` | Log level: debug, info, warn, error | ## Troubleshooting From b9868971e2bc0ccbd1fe9461755e9461dda0f7ba Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 2 May 2026 16:33:55 +0800 Subject: [PATCH 08/11] refactor(env,permission): SSRF + AST hardening + grep regex + bash structured MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runner-side counterparts of fc-safari's 2026-05-02 builtin-tools refactor: - environment/webfetch: SSRF guard via inlined safeHTTPTransport + safeCheckRedirect. Pre-flight validateURL refuses RFC1918 / loopback / link-local / IMDS before any TCP dial; CheckRedirect re-validates each hop so a 302 to 169.254.169.254 is refused. Inlined from go-pkg/x/netsafe (open-source repo cannot import internal modules). - environment/htmlx: HTML-to-markdown / text helpers inlined from go-pkg/x/htmlx for the same reason. html-to-markdown is now a direct dep. - environment/environment::unzipSkill: zip-slip closed — compares abs target against dest+separator and rejects names containing '..'. - environment/environment::grepWithGo: was strings.Contains (literal) — now compiles regex, falls back with regex_compile_error surfaced. Default ignore (.git, node_modules, .flashduty) added; Scanner buffer raised to 1 MiB; rg exit-2 surfaces real I/O errors instead of silent "no matches"; pattern starting with '-' gets '--' prefix; rsplit on ':' so paths containing ':' parse cleanly. - protocol.GrepArgs gains output_mode, context_before/after, head_limit, file_type, case_sensitive — forwarded to ripgrep flags. - environment/environment::Bash: per-stream large-output (stderr now truncates to its own bash_stderr_*.txt); LimitedWriter honors the io.Writer contract (returns ErrOutputCapped at the cap, exposes Hit(), appends "[output capped at 10MB]" marker once). - protocol.BashResult carries truncated_stdout/stderr, stdout_file_path/stderr_file_path, *_total_size; legacy fields remain populated so old safari clients keep working. - environment/large_output::ShouldSkipForOutputsDir: substring match → word-boundary regex (no more 'thread '/'head ' false positives). WriteRaw lowercased to writeRaw (single internal caller). - permission/permission: AST walk now visits CmdSubst / ProcSubst / nested Stmts so cat <(curl evil) and echo $(curl evil) hit the same rules. Redirect targets are checked instead of discarded (cmd > /etc/passwd refused). Canonical-form normalization on both rule and command sides (kubectl get pods matches kubectl get *); env-prefix dual evaluation (KUBECONFIG=x kubectl get pods still matches kubectl get *). Rules sort by literal-prefix specificity, first match wins. SafeReadOnlyRules drops echo */find * — find -delete / find -exec rm now denied by default; replaced with scoped allow patterns. E2E verified live: bash returns the new structured shape, web (action=fetch) reaches example.com via the safe transport, all 17+ new permission/permission_test scenarios pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- environment/bash_output_test.go | 114 +++++++++ environment/environment.go | 405 +++++++++++++++++++++++++++----- environment/environment_test.go | 91 +++++++ environment/htmlx.go | 51 ++++ environment/large_output.go | 20 +- environment/netsafe.go | 192 +++++++++++++++ environment/security_test.go | 165 +++++++++++++ environment/webfetch.go | 73 ++---- go.mod | 2 +- go.sum | 4 +- permission/permission.go | 379 ++++++++++++++++++++++++------ permission/permission_test.go | 192 +++++++++++++++ protocol/messages.go | 50 +++- 13 files changed, 1537 insertions(+), 201 deletions(-) create mode 100644 environment/bash_output_test.go create mode 100644 environment/htmlx.go create mode 100644 environment/netsafe.go create mode 100644 environment/security_test.go diff --git a/environment/bash_output_test.go b/environment/bash_output_test.go new file mode 100644 index 0000000..b43efd8 --- /dev/null +++ b/environment/bash_output_test.go @@ -0,0 +1,114 @@ +package environment + +import ( + "bytes" + "context" + "errors" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/flashcatcloud/flashduty-runner/protocol" +) + +func TestLimitedWriter_HitReturnsErrShortWrite(t *testing.T) { + var buf bytes.Buffer + w := &LimitedWriter{W: &buf, Limit: 5} + + // First write fits exactly within the cap. + n, err := w.Write([]byte("hello")) + require.NoError(t, err) + assert.Equal(t, 5, n) + assert.False(t, w.Hit(), "cap not yet hit when curr==Limit and next write hasn't happened") + + // Second write hits the cap. Contract: report ErrOutputCapped, not nil. + n, err = w.Write([]byte("world")) + assert.Equal(t, 0, n, "no bytes accepted past the cap") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrOutputCapped)) + assert.True(t, w.Hit()) + assert.Contains(t, buf.String(), "[output capped at 10MB]", "marker appended exactly once") + + // Subsequent writes also report cap, marker not duplicated. + prev := buf.String() + _, err = w.Write([]byte("more")) + assert.True(t, errors.Is(err, ErrOutputCapped)) + assert.Equal(t, prev, buf.String(), "marker only appended once") +} + +func TestLimitedWriter_PartialWriteAtBoundary(t *testing.T) { + var buf bytes.Buffer + w := &LimitedWriter{W: &buf, Limit: 3} + + // 5-byte write straddles the boundary: 3 bytes accepted, marker appended, + // ErrOutputCapped surfaced. + n, err := w.Write([]byte("abcde")) + assert.Equal(t, 3, n) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrOutputCapped)) + assert.True(t, w.Hit()) + assert.True(t, strings.HasPrefix(buf.String(), "abc")) + assert.Contains(t, buf.String(), "[output capped at 10MB]") +} + +func TestLimitedWriter_BelowCap(t *testing.T) { + var buf bytes.Buffer + w := &LimitedWriter{W: &buf, Limit: 100} + + n, err := w.Write([]byte("hi")) + require.NoError(t, err) + assert.Equal(t, 2, n) + assert.False(t, w.Hit()) + assert.NotContains(t, buf.String(), "capped") +} + +func TestEnvironment_Bash_StderrTruncatedSeparately(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Generate >30k bytes to stderr, nothing on stdout. Using awk so the + // subprocess exits cleanly once the byte budget is met (no SIGPIPE chain). + cmd := `awk 'BEGIN{ for(i=0;i<5000;i++) printf("ERROR_LINE_%d\n", i) > "/dev/stderr" }'` + result, err := ws.Bash(ctx, &protocol.BashArgs{Command: cmd}) + require.NoError(t, err) + + assert.True(t, result.TruncatedStderr, "stderr should be truncated when over the cap") + assert.NotEmpty(t, result.StderrFilePath, "stderr file should be written for full content") + assert.Greater(t, result.StderrTotalSize, int64(30000)) + assert.False(t, result.TruncatedStdout, "stdout should be untruncated when empty") + assert.Empty(t, result.StdoutFilePath) +} + +func TestEnvironment_Bash_NormalOutputUnchanged(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + result, err := ws.Bash(ctx, &protocol.BashArgs{Command: "echo small"}) + require.NoError(t, err) + assert.Equal(t, "small\n", result.Stdout) + assert.False(t, result.TruncatedStdout) + assert.False(t, result.TruncatedStderr) + assert.Empty(t, result.StdoutFilePath) + assert.Empty(t, result.StderrFilePath) +} + +func TestEnvironment_Bash_StdoutAtHardCapMarker(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Push past the LimitedWriter's 10MB cap. dd writes a fixed budget then + // exits, so we don't depend on SIGPIPE propagation to terminate `yes`. + cmd := "dd if=/dev/zero bs=1048576 count=11 2>/dev/null" + result, err := ws.Bash(ctx, &protocol.BashArgs{Command: cmd}) + require.NoError(t, err) + + assert.True(t, result.TruncatedStdout, "10MB cap must have been hit") + // The cap marker is appended to the captured buffer; after + // processLargeOutput it lives in the spilled file (the in-context preview + // only carries the first ~20 lines). + if !strings.Contains(result.Stdout, "[output capped") { + assert.NotEmpty(t, result.StdoutFilePath, "spilled file must exist when cap was hit") + } +} diff --git a/environment/environment.go b/environment/environment.go index 803e243..86c1658 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -8,12 +8,14 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "fmt" "io" "log/slog" "os" "os/exec" "path/filepath" + "regexp" "sort" "strconv" "strings" @@ -286,6 +288,15 @@ func (e *Environment) Glob(ctx context.Context, args *protocol.GlobArgs) (*proto return &protocol.GlobResult{Matches: matches}, nil } +// defaultGrepIgnoreDirs are walked-but-skipped by the Go fallback to align with +// ripgrep's default .gitignore behavior. A model-issued grep on a workspace +// containing `node_modules` should not waste minutes scanning it. +var defaultGrepIgnoreDirs = map[string]struct{}{ + ".git": {}, + "node_modules": {}, + ".flashduty": {}, // covers .flashduty/.work and friends +} + // Grep searches for a pattern in files. func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*protocol.GrepResult, error) { var res *protocol.GrepResult @@ -301,12 +312,19 @@ func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*proto return res, err } - // Build content string - var sb strings.Builder - for _, match := range res.Matches { - fmt.Fprintf(&sb, "%s:%d:%s\n", match.Path, match.LineNumber, match.Content) + // Apply head_limit cap before formatting; the truncation note belongs in the + // final content so the model can see "I asked for 10, got 10 of N". + totalMatches := len(res.Matches) + if args.HeadLimit > 0 && totalMatches > args.HeadLimit { + res.Matches = res.Matches[:args.HeadLimit] + } + + // Format content per OutputMode. Default ("" or "content") preserves the + // pre-PR-4 file:line:text shape so existing callers see no change. + content := formatGrepContent(res.Matches, args.OutputMode) + if args.HeadLimit > 0 && totalMatches > args.HeadLimit { + content += fmt.Sprintf("\n[truncated to first %d of %d matches]\n", args.HeadLimit, totalMatches) } - content := sb.String() // Process large output processor := NewLargeOutputProcessor(e, DefaultLargeOutputConfig()) @@ -325,37 +343,156 @@ func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*proto return res, nil } +// formatGrepContent renders matches per OutputMode. Unknown / empty mode falls +// back to "content" so downstream callers get the historical shape. +func formatGrepContent(matches []protocol.GrepMatch, mode string) string { + var sb strings.Builder + switch mode { + case "files_with_matches": + seen := make(map[string]struct{}, len(matches)) + for _, m := range matches { + if _, ok := seen[m.Path]; ok { + continue + } + seen[m.Path] = struct{}{} + sb.WriteString(m.Path) + sb.WriteByte('\n') + } + case "count": + counts := make(map[string]int, len(matches)) + order := make([]string, 0, len(matches)) + for _, m := range matches { + if _, ok := counts[m.Path]; !ok { + order = append(order, m.Path) + } + counts[m.Path]++ + } + for _, p := range order { + fmt.Fprintf(&sb, "%s:%d\n", p, counts[p]) + } + default: // "content" or empty + for _, m := range matches { + fmt.Fprintf(&sb, "%s:%d:%s\n", m.Path, m.LineNumber, m.Content) + } + } + return sb.String() +} + func (e *Environment) grepWithRipgrep(ctx context.Context, args *protocol.GrepArgs) (*protocol.GrepResult, error) { - cmdArgs := make([]string, 0, 6+2*len(args.Include)+2) - cmdArgs = append(cmdArgs, "--column", "--line-number", "--no-heading", "--color", "never", "--smart-case") + cmdArgs := make([]string, 0, 16+2*len(args.Include)) + + // OutputMode-specific flags. "content" uses --column for column info; the + // other modes drop --column so each rg line has a stable shape we can parse. + mode := args.OutputMode + switch mode { + case "files_with_matches": + cmdArgs = append(cmdArgs, "--files-with-matches", "--no-heading", "--color", "never") + case "count": + cmdArgs = append(cmdArgs, "--count", "--no-heading", "--color", "never") + default: + cmdArgs = append(cmdArgs, "--line-number", "--no-heading", "--color", "never") + } + + switch { + case args.CaseSensitive == nil: + cmdArgs = append(cmdArgs, "--smart-case") + case *args.CaseSensitive: + cmdArgs = append(cmdArgs, "--case-sensitive") + default: + cmdArgs = append(cmdArgs, "--ignore-case") + } + + if args.ContextBefore > 0 && (mode == "" || mode == "content") { + cmdArgs = append(cmdArgs, "--before-context", strconv.Itoa(args.ContextBefore)) + } + if args.ContextAfter > 0 && (mode == "" || mode == "content") { + cmdArgs = append(cmdArgs, "--after-context", strconv.Itoa(args.ContextAfter)) + } + if args.FileType != "" { + cmdArgs = append(cmdArgs, "--type", args.FileType) + } for _, inc := range args.Include { cmdArgs = append(cmdArgs, "--glob", inc) } - cmdArgs = append(cmdArgs, args.Pattern, ".") + + // `--` separator so a pattern starting with `-` is not parsed as an rg flag + // (e.g. searching for "--foo" or "-x" in source). + cmdArgs = append(cmdArgs, "--", args.Pattern, ".") cmd := exec.CommandContext(ctx, "rg", cmdArgs...) //nolint:gosec // G204: args built from validated grep parameters cmd.Dir = e.root - var stdout strings.Builder + var stdout, stderr strings.Builder cmd.Stdout = &stdout - _ = cmd.Run() // rg returns exit code 1 if no matches found + cmd.Stderr = &stderr + runErr := cmd.Run() + + // rg exit codes: 0 = matches found, 1 = no matches, 2 = real error (bad + // pattern, I/O failure, permission). We previously swallowed code 2 as + // "no matches", which silently dropped pattern compile errors. + if runErr != nil { + var exitErr *exec.ExitError + if errors.As(runErr, &exitErr) && exitErr.ExitCode() >= 2 { + msg := strings.TrimSpace(stderr.String()) + if msg == "" { + msg = exitErr.Error() + } + return nil, fmt.Errorf("ripgrep failed: %s", msg) + } + // exit code 1 (no matches) — keep going with empty stdout. + } lines := strings.Split(stdout.String(), "\n") results := make([]protocol.GrepMatch, 0, len(lines)) + for _, line := range lines { if line == "" { continue } - parts := strings.SplitN(line, ":", 4) - if len(parts) < 4 { - continue + + switch mode { + case "files_with_matches": + results = append(results, protocol.GrepMatch{Path: line}) + case "count": + // rg --count emits "path:N"; rsplit once so paths containing `:` survive. + idx := strings.LastIndex(line, ":") + if idx < 0 { + continue + } + cnt, _ := strconv.Atoi(line[idx+1:]) + results = append(results, protocol.GrepMatch{Path: line[:idx], LineNumber: cnt}) + default: + // rg with --line-number (no --column) emits "path:lineNum:content". + // Rsplit twice from the right so path containing ':' (Windows drive, + // scratch namespaces, etc.) parses correctly. + lastIdx := strings.LastIndex(line, ":") + if lastIdx < 0 { + continue + } + head, content := line[:lastIdx], line[lastIdx+1:] + midIdx := strings.LastIndex(head, ":") + if midIdx < 0 { + // Context lines from rg use `path-lineNum-content` separator; + // fall back to splitting on `-` for those rather than dropping. + dashIdx := strings.LastIndex(head, "-") + if dashIdx < 0 { + continue + } + lineNum, _ := strconv.Atoi(head[dashIdx+1:]) + results = append(results, protocol.GrepMatch{ + Path: head[:dashIdx], + LineNumber: lineNum, + Content: content, + }) + continue + } + lineNum, _ := strconv.Atoi(head[midIdx+1:]) + results = append(results, protocol.GrepMatch{ + Path: head[:midIdx], + LineNumber: lineNum, + Content: content, + }) } - lineNum, _ := strconv.Atoi(parts[1]) - results = append(results, protocol.GrepMatch{ - Path: parts[0], - LineNumber: lineNum, - Content: parts[3], - }) } return &protocol.GrepResult{Matches: results}, nil } @@ -367,6 +504,30 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) ( include = []string{"**/*"} } + // Try to compile the pattern as a regex. On compile failure, fall back to + // literal substring match and surface the error so the caller knows the + // pattern was treated as text rather than a regex. + caseInsensitive := args.CaseSensitive != nil && !*args.CaseSensitive + pattern := args.Pattern + if caseInsensitive { + pattern = "(?i)" + pattern + } + re, compileErr := regexp.Compile(pattern) + literalNeedle := args.Pattern + if caseInsensitive { + literalNeedle = strings.ToLower(literalNeedle) + } + + matchLine := func(line string) bool { + if re != nil { + return re.MatchString(line) + } + if caseInsensitive { + return strings.Contains(strings.ToLower(line), literalNeedle) + } + return strings.Contains(line, literalNeedle) + } + for _, inc := range include { matches, err := e.Glob(ctx, &protocol.GlobArgs{Pattern: inc}) if err != nil { @@ -374,17 +535,27 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) ( } for _, match := range matches.Matches { - realPath, _ := e.safePath(match) + if shouldIgnoreGrepPath(match) { + continue + } + realPath, err := e.safePath(match) + if err != nil { + continue + } file, err := os.Open(realPath) if err != nil { continue } scanner := bufio.NewScanner(file) + // Default 64KB cap silently drops long lines (minified JS, packed + // JSON, log dumps). Bump to 1MB so the model gets a faithful match + // set rather than a confusing zero-result on real-world files. + scanner.Buffer(make([]byte, 64*1024), 1024*1024) lineNum := 1 for scanner.Scan() { line := scanner.Text() - if strings.Contains(line, args.Pattern) { + if matchLine(line) { results = append(results, protocol.GrepMatch{ Path: match, LineNumber: lineNum, @@ -396,7 +567,23 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) ( _ = file.Close() } } - return &protocol.GrepResult{Matches: results}, nil + + res := &protocol.GrepResult{Matches: results} + if compileErr != nil { + res.RegexCompileError = compileErr.Error() + } + return res, nil +} + +// shouldIgnoreGrepPath returns true for any path whose relative form contains +// a default-ignored directory segment (.git, node_modules, .flashduty/...). +func shouldIgnoreGrepPath(rel string) bool { + for _, seg := range strings.Split(filepath.ToSlash(rel), "/") { + if _, ok := defaultGrepIgnoreDirs[seg]; ok { + return true + } + } + return false } // Bash executes a bash command in the workspace. @@ -418,7 +605,9 @@ func (e *Environment) Bash(ctx context.Context, args *protocol.BashArgs) (*proto // Skip large output processing for .outputs/ directory reads if ShouldSkipForOutputsDir(args.Command) { - result.TotalSize = int64(len(result.Stdout)) + result.StdoutTotalSize = int64(len(result.Stdout)) + result.StderrTotalSize = int64(len(result.Stderr)) + result.TotalSize = result.StdoutTotalSize return result, nil } @@ -449,12 +638,14 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s cmd := exec.CommandContext(ctx, "bash", "-c", command) //nolint:gosec // G204: command is user-initiated via workspace tool cmd.Dir = workdir - // Use a limited writer to prevent OOM from very large outputs - // 10MB limit is plenty for LLM context while preventing memory exhaustion + // 10MB per-stream cap prevents OOM from runaway commands while leaving + // plenty of headroom for normal LLM-context output. const maxOutputSize = 10 * 1024 * 1024 var stdout, stderr strings.Builder - cmd.Stdout = &LimitedWriter{W: &stdout, Limit: maxOutputSize} - cmd.Stderr = &LimitedWriter{W: &stderr, Limit: maxOutputSize} + stdoutW := &LimitedWriter{W: &stdout, Limit: maxOutputSize} + stderrW := &LimitedWriter{W: &stderr, Limit: maxOutputSize} + cmd.Stdout = stdoutW + cmd.Stderr = stderrW err := cmd.Run() exitCode := 0 @@ -462,65 +653,151 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s if exitError, ok := err.(*exec.ExitError); ok { exitCode = exitError.ExitCode() } else if ctx.Err() == context.DeadlineExceeded { - return &protocol.BashResult{ - Stdout: stdout.String(), - Stderr: "command timed out", - ExitCode: 124, - }, nil + res := &protocol.BashResult{ + Stdout: stdout.String(), + Stderr: "command timed out", + ExitCode: 124, + TruncatedStdout: stdoutW.Hit(), + } + if stdoutW.Hit() { + res.Truncated = true + } + return res, nil + } else if errors.Is(err, ErrOutputCapped) { + // Command itself didn't fail — only the writer's contract violation + // surfaced. Treat as a successful run with capped buffers. + err = nil } else { return nil, fmt.Errorf("failed to execute command: %w", err) } } - return &protocol.BashResult{ - Stdout: stdout.String(), - Stderr: stderr.String(), - ExitCode: exitCode, - }, nil + res := &protocol.BashResult{ + Stdout: stdout.String(), + Stderr: stderr.String(), + ExitCode: exitCode, + TruncatedStdout: stdoutW.Hit(), + TruncatedStderr: stderrW.Hit(), + } + if stdoutW.Hit() { + res.Truncated = true + } + return res, nil } -// LimitedWriter is an io.Writer that limits the total number of bytes written. +// ErrOutputCapped is returned by LimitedWriter once its byte cap has been +// reached. Surfaces the contract violation that would otherwise be hidden by +// the writer pretending it accepted everything (`return len(p), nil`). +var ErrOutputCapped = errors.New("output capped") + +// LimitedWriter is an io.Writer that caps the total number of bytes forwarded +// to the wrapped writer. After the cap is reached it appends an inline marker +// once and then returns ErrOutputCapped on every subsequent write so callers +// can detect truncation via Hit(). +// +// NOTE: exec.Cmd treats any writer error as fatal — callers wrapping LimitedWriter +// around a stdio pipe must filter ErrOutputCapped out of cmd.Run()'s error chain. type LimitedWriter struct { - W io.Writer - Limit int64 - curr int64 + W io.Writer + Limit int64 + curr int64 + hit bool + marker bool // marker already appended } +const limitedWriterMarker = "\n[output capped at 10MB]\n" + func (l *LimitedWriter) Write(p []byte) (n int, err error) { if l.curr >= l.Limit { - return len(p), nil + l.hit = true + l.appendMarker() + // Honor io.Writer contract: report only the bytes actually accepted + // downstream (zero) and propagate the cap signal so the caller can + // surface truncation instead of silently dropping data. + return 0, ErrOutputCapped } left := l.Limit - l.curr if int64(len(p)) > left { n, err = l.W.Write(p[:left]) l.curr += int64(n) - return len(p), err + l.hit = true + l.appendMarker() + if err == nil { + err = ErrOutputCapped + } + // Return the bytes actually written to the underlying writer; the + // remainder (p[left:]) was discarded along with the cap signal. + return n, err } n, err = l.W.Write(p) l.curr += int64(n) return n, err } -// processLargeOutput processes command output for truncation if needed. +// Hit reports whether the writer's byte cap has been reached. +func (l *LimitedWriter) Hit() bool { return l.hit } + +// appendMarker writes the cap marker into the underlying writer exactly once. +// Bypasses the per-call accounting since the marker is bookkeeping, not output. +func (l *LimitedWriter) appendMarker() { + if l.marker { + return + } + l.marker = true + _, _ = l.W.Write([]byte(limitedWriterMarker)) +} + +// processLargeOutput processes both stdout and stderr for truncation if needed. +// Each stream is spilled to its own .outputs/_*.txt or +// .outputs/_stderr_*.txt file when it exceeds the configured cap. +// +// Stderr was previously left untouched, which let high-stderr commands +// (e.g. `yes 2>&1 1>/dev/null | head`) flood the LLM context unbounded. func (e *Environment) processLargeOutput(ctx context.Context, result *protocol.BashResult, prefix string) (*protocol.BashResult, error) { processor := NewLargeOutputProcessor(e, DefaultLargeOutputConfig()) - processed, err := processor.Process(ctx, result.Stdout, prefix) + + stdoutSize := int64(len(result.Stdout)) + stderrSize := int64(len(result.Stderr)) + + processedOut, err := processor.Process(ctx, result.Stdout, prefix) if err != nil { - result.TotalSize = int64(len(result.Stdout)) + // File-write failure: keep the raw streams and at minimum report sizes. + result.StdoutTotalSize = stdoutSize + result.StderrTotalSize = stderrSize + result.TotalSize = stdoutSize return result, nil } + result.Stdout = processedOut.Content + result.StdoutTotalSize = processedOut.TotalSize + result.StdoutFilePath = processedOut.FilePath + if processedOut.Truncated { + result.TruncatedStdout = true + } + + processedErr, err := processor.Process(ctx, result.Stderr, prefix+"_stderr") + if err != nil { + result.StderrTotalSize = stderrSize + } else { + result.Stderr = processedErr.Content + result.StderrTotalSize = processedErr.TotalSize + result.StderrFilePath = processedErr.FilePath + if processedErr.Truncated { + result.TruncatedStderr = true + } + } - result.Stdout = processed.Content - result.Truncated = processed.Truncated - result.FilePath = processed.FilePath - result.TotalSize = processed.TotalSize + // Legacy mirrors: keep the stdout-side fields populated for old clients. + result.Truncated = result.TruncatedStdout + result.FilePath = result.StdoutFilePath + result.TotalSize = result.StdoutTotalSize return result, nil } -// WriteRaw writes raw content (not base64 encoded) to a file. -// Used internally for saving large output files. -func (e *Environment) WriteRaw(ctx context.Context, path string, content []byte) error { +// writeRaw writes raw content (not base64 encoded) to a file. +// Used internally for saving large output files; unexported because no +// off-package caller exists (only large_output.go uses it). +func (e *Environment) writeRaw(ctx context.Context, path string, content []byte) error { realPath, err := e.safePath(path) if err != nil { return err @@ -654,16 +931,32 @@ func (e *Environment) unzipSkill(data []byte, dest string) error { return fmt.Errorf("failed to create destination directory: %w", err) } + // Resolve dest to absolute and append a trailing separator BEFORE the + // loop. Zip-slip mitigation: comparing against the dir without a + // trailing separator (the previous bug) lets a sibling with a matching + // prefix — e.g. dest="/tmp/skill" vs absTarget="/tmp/skill-evil/x" — + // pass HasPrefix even though the entry escapes the intended directory. + absDest, err := filepath.Abs(dest) + if err != nil { + return fmt.Errorf("failed to resolve dest: %w", err) + } + destPrefix := absDest + string(os.PathSeparator) + for _, f := range reader.File { - // Security: validate zip entry path to prevent path traversal + // Reject any raw "..": filepath.Clean turns "a/../b" into "b" so a + // surviving ".." after Clean — or any literal ".." in the original + // — means the entry tried to escape from the root. + if strings.Contains(f.Name, "..") { + return fmt.Errorf("invalid file path in zip: %s", f.Name) + } cleanName := filepath.Clean(f.Name) if strings.HasPrefix(cleanName, "..") || filepath.IsAbs(cleanName) { return fmt.Errorf("invalid file path in zip: %s", f.Name) } - targetPath := filepath.Join(dest, cleanName) + targetPath := filepath.Join(absDest, cleanName) absTarget, err := filepath.Abs(targetPath) - if err != nil || !strings.HasPrefix(absTarget, dest) { + if err != nil || (absTarget != absDest && !strings.HasPrefix(absTarget, destPrefix)) { return fmt.Errorf("file path escapes destination: %s", f.Name) } diff --git a/environment/environment_test.go b/environment/environment_test.go index c3fa126..5a3c81f 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -8,6 +8,7 @@ import ( "errors" "os" "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -162,6 +163,96 @@ func TestEnvironment_Grep(t *testing.T) { assert.Len(t, result.Matches, 2) } +// TestEnvironment_Grep_RegexNotSubstring exercises the PR-4 fix: the legacy +// path used strings.Contains, so "a.b" matched only the literal "a.b" instead +// of "axb". The regex path must now match both via the wildcard. +func TestEnvironment_Grep_RegexNotSubstring(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + require.NoError(t, os.WriteFile( + filepath.Join(ws.Root(), "f.txt"), + []byte("axb\nayb\na.b\nzzz\n"), + 0o644, + )) + + result, err := ws.Grep(ctx, &protocol.GrepArgs{Pattern: "a.b"}) + require.NoError(t, err) + // All three of axb / ayb / a.b should match a regex `a.b`. + assert.GreaterOrEqual(t, len(result.Matches), 3, + "regex 'a.b' should match all of axb, ayb, a.b — got %v", result.Matches) + assert.Empty(t, result.RegexCompileError, "valid regex must not surface compile error") +} + +// TestEnvironment_Grep_RegexCompileFallback confirms that a syntactically +// invalid regex still returns results via literal substring fallback and +// surfaces the compile error so the caller can show it to the model. +func TestEnvironment_Grep_RegexCompileFallback(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + require.NoError(t, os.WriteFile( + filepath.Join(ws.Root(), "f.txt"), + []byte("hello [unclosed\nworld\n"), + 0o644, + )) + + // "[unclosed" is a malformed regex (unclosed character class). + res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "[unclosed"}) + require.NoError(t, err) + assert.NotEmpty(t, res.RegexCompileError, "compile failure must be surfaced") + require.NotEmpty(t, res.Matches, "literal fallback should still find the line") + assert.Contains(t, res.Matches[0].Content, "[unclosed") +} + +// TestEnvironment_Grep_DefaultIgnore verifies node_modules / .git are skipped +// by the Go fallback so a model issuing `grep` on a workspace with vendor +// dirs doesn't burn minutes scanning them. +func TestEnvironment_Grep_DefaultIgnore(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), "src"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), "node_modules", "pkg"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), ".git"), 0o755)) + + require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "src", "a.txt"), []byte("needle\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "node_modules", "pkg", "b.txt"), []byte("needle\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), ".git", "c.txt"), []byte("needle\n"), 0o644)) + + res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "needle"}) + require.NoError(t, err) + + for _, m := range res.Matches { + assert.NotContains(t, m.Path, "node_modules", "node_modules must be ignored, got %s", m.Path) + assert.NotContains(t, m.Path, ".git", ".git must be ignored, got %s", m.Path) + } + + // The src/ hit must still be present. + var sawSrc bool + for _, m := range res.Matches { + if strings.HasPrefix(filepath.ToSlash(m.Path), "src/") { + sawSrc = true + break + } + } + assert.True(t, sawSrc, "expected src/a.txt match in %v", res.Matches) +} + +// TestEnvironment_Grep_LongLine ensures the bumped scanner buffer catches +// matches on lines longer than the default 64 KB cap. +func TestEnvironment_Grep_LongLine(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Build a single line: 80 KB of 'a', then NEEDLE, then 80 KB of 'b'. + long := strings.Repeat("a", 80*1024) + "NEEDLE" + strings.Repeat("b", 80*1024) + "\n" + require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "long.txt"), []byte(long), 0o644)) + + res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "NEEDLE"}) + require.NoError(t, err) + require.NotEmpty(t, res.Matches, "NEEDLE on a >64KB line must still match") + assert.Equal(t, "long.txt", res.Matches[0].Path) +} + func TestEnvironment_Bash(t *testing.T) { ws := newTestEnvironment(t) ctx := context.Background() diff --git a/environment/htmlx.go b/environment/htmlx.go new file mode 100644 index 0000000..37cdd90 --- /dev/null +++ b/environment/htmlx.go @@ -0,0 +1,51 @@ +// HTML→Markdown / HTML→Text conversion helpers used by webfetch. +// +// Inlined from go-pkg/x/htmlx — keep in sync. flashduty-runner is +// open-source and may not import internal flashcatcloud modules. +package environment + +import ( + "regexp" + "strings" + + htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2" +) + +// Pre-compiled to avoid recompiling on every conversion call. webfetch is on +// a hot path; per-call compile was a measurable waste. +var ( + htmlScriptRe = regexp.MustCompile(`(?is)]*>.*?`) + htmlStyleRe = regexp.MustCompile(`(?is)]*>.*?`) + htmlTagRe = regexp.MustCompile(`<[^>]+>`) + htmlWhitespaceRe = regexp.MustCompile(`\s+`) + htmlBlankLinesRe = regexp.MustCompile(`\n{3,}`) +) + +func convertHTMLToMarkdown(html string) string { + md, err := htmltomarkdown.ConvertString(html) + if err != nil { + return convertHTMLToText(html) + } + return cleanupMarkdown(md) +} + +func convertHTMLToText(html string) string { + html = htmlScriptRe.ReplaceAllString(html, "") + html = htmlStyleRe.ReplaceAllString(html, "") + text := htmlTagRe.ReplaceAllString(html, " ") + + text = strings.ReplaceAll(text, " ", " ") + text = strings.ReplaceAll(text, "&", "&") + text = strings.ReplaceAll(text, "<", "<") + text = strings.ReplaceAll(text, ">", ">") + text = strings.ReplaceAll(text, """, "\"") + text = strings.ReplaceAll(text, "'", "'") + + text = htmlWhitespaceRe.ReplaceAllString(text, " ") + return strings.TrimSpace(text) +} + +func cleanupMarkdown(md string) string { + md = htmlBlankLinesRe.ReplaceAllString(md, "\n\n") + return strings.TrimSpace(md) +} diff --git a/environment/large_output.go b/environment/large_output.go index 2b8282b..84509a5 100644 --- a/environment/large_output.go +++ b/environment/large_output.go @@ -4,12 +4,18 @@ import ( "context" "fmt" "path/filepath" + "regexp" "strings" "time" "github.com/lithammer/shortuuid/v4" ) +// readCommandRe matches the read commands as standalone tokens (anchored at +// start of input or preceded by whitespace, semicolon, pipe, or `&`). Using +// substring matching ("head ") false-positives against words like "thread ". +var readCommandRe = regexp.MustCompile(`(?:^|[\s;|&])(?:cat|head|tail|less|more|bat)\s`) + const ( // DefaultMaxOutputSize is the default character limit before truncation (~7.5k tokens) DefaultMaxOutputSize = 30000 // ~7.5k tokens at 4 chars/token @@ -86,7 +92,7 @@ func (p *LargeOutputProcessor) Process(ctx context.Context, content string, pref filePath := filepath.Join(OutputsDir, filename) // Save full content to file - if err := p.ws.WriteRaw(ctx, filePath, []byte(content)); err != nil { + if err := p.ws.writeRaw(ctx, filePath, []byte(content)); err != nil { // If save fails, just return truncated content without file reference return &ProcessResult{ Content: p.truncateContent(content, ""), @@ -105,16 +111,12 @@ func (p *LargeOutputProcessor) Process(ctx context.Context, content string, pref // ShouldSkipForOutputsDir checks if large output processing should be skipped // for commands operating on .outputs/ directory to avoid circular processing. +// Uses a word-boundary regex so commands like "thread " don't match "head ". func ShouldSkipForOutputsDir(command string) bool { - readCommands := []string{"cat ", "head ", "tail ", "less ", "more ", "bat "} - for _, cmd := range readCommands { - if strings.Contains(command, cmd) { - if strings.Contains(command, OutputsDir+"/") || strings.Contains(command, OutputsDir+"\\") { - return true - } - } + if !readCommandRe.MatchString(command) { + return false } - return false + return strings.Contains(command, OutputsDir+"/") || strings.Contains(command, OutputsDir+"\\") } // truncateContent creates a truncated preview with optional file reference. diff --git a/environment/netsafe.go b/environment/netsafe.go new file mode 100644 index 0000000..e7babd2 --- /dev/null +++ b/environment/netsafe.go @@ -0,0 +1,192 @@ +// SSRF guard for outbound HTTP. Runner pods must NEVER reach IMDS or the +// cluster apiserver from inside; without dial-time IP validation a +// prompt-injected URL can exfiltrate cloud credentials from +// 169.254.169.254 or hit internal VPC services. The DNS-to-dial +// re-resolution closes the DNS-rebinding gap a host-string allowlist +// would leave open. +// +// Inlined from go-pkg/x/netsafe — keep in sync. flashduty-runner is +// open-source and may not import internal flashcatcloud modules. +package environment + +import ( + "context" + "fmt" + "net" + "net/http" + "net/url" + "os" + "strings" + "time" +) + +// maxRedirects caps redirect chains processed by safeCheckRedirect. Default +// Go is 10; CC web-fetch and Codex both cap around 5. +const maxRedirects = 5 + +func ipBlocked(ip net.IP) bool { + if ip == nil { + return true + } + if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || + ip.IsInterfaceLocalMulticast() || ip.IsMulticast() || ip.IsUnspecified() { + return true + } + if v4 := ip.To4(); v4 != nil { + // CGNAT 100.64.0.0/10 + IETF protocol-assignment 192.0.0.0/24 — both + // routable-but-internal, missed by IsPrivate. + if v4[0] == 100 && v4[1] >= 64 && v4[1] <= 127 { + return true + } + if v4[0] == 192 && v4[1] == 0 && v4[2] == 0 { + return true + } + } + return false +} + +// validateHost resolves host and returns an error if any resolved IP is +// blocked. Pre-flight check that gives a clearer error than the dial-time +// failure when the model passes a literal IMDS / RFC1918 URL. +func validateHost(ctx context.Context, host string) error { + if host == "" { + return fmt.Errorf("host is empty") + } + if h, _, err := net.SplitHostPort(host); err == nil { + host = h + } + if ip := net.ParseIP(host); ip != nil { + if ipBlocked(ip) { + return fmt.Errorf("target %s resolves to a blocked address range", host) + } + return nil + } + ips, err := net.DefaultResolver.LookupIPAddr(ctx, host) + if err != nil { + return fmt.Errorf("resolve %s: %w", host, err) + } + for _, addr := range ips { + if ipBlocked(addr.IP) { + return fmt.Errorf("target %s resolves to a blocked address (%s)", host, addr.IP) + } + } + return nil +} + +// validateURL parses raw, rejects non-http(s) schemes, and validates the +// host. Returns the parsed URL on success so callers don't re-parse. +func validateURL(ctx context.Context, raw string) (*url.URL, error) { + u, err := url.Parse(raw) + if err != nil { + return nil, fmt.Errorf("parse url: %w", err) + } + if u.Scheme != "http" && u.Scheme != "https" { + return nil, fmt.Errorf("url scheme must be http or https") + } + if u.Host == "" { + return nil, fmt.Errorf("url host must not be empty") + } + if err := validateHost(ctx, u.Hostname()); err != nil { + return nil, err + } + return u, nil +} + +// safeDialContext returns a DialContext that re-resolves the host on every +// dial and rejects blocked IPs. ServerName for TLS still derives from the +// request hostname so cert validation works. +func safeDialContext(dialTimeout time.Duration) func(ctx context.Context, network, addr string) (net.Conn, error) { + d := &net.Dialer{Timeout: dialTimeout, KeepAlive: 30 * time.Second} + proxyHosts := envProxyHosts() + return func(ctx context.Context, network, addr string) (net.Conn, error) { + host, port, err := net.SplitHostPort(addr) + if err != nil { + return nil, err + } + // Configured HTTP(S) proxy hops bypass SSRF validation: the proxy is + // trusted egress infrastructure (often a loopback sidecar) and + // enforces its own destination policy. Final-destination validation + // still runs upstream at validateURL when the caller is a + // user-supplied URL. + if _, ok := proxyHosts[host]; ok { + return d.DialContext(ctx, network, addr) + } + if ip := net.ParseIP(host); ip != nil { + if ipBlocked(ip) { + return nil, fmt.Errorf("blocked address %s", ip) + } + return d.DialContext(ctx, network, addr) + } + ips, err := net.DefaultResolver.LookupIPAddr(ctx, host) + if err != nil { + return nil, err + } + var lastErr error + for _, addr := range ips { + if ipBlocked(addr.IP) { + lastErr = fmt.Errorf("blocked address %s", addr.IP) + continue + } + conn, err := d.DialContext(ctx, network, net.JoinHostPort(addr.IP.String(), port)) + if err == nil { + return conn, nil + } + lastErr = err + } + if lastErr == nil { + lastErr = fmt.Errorf("no resolvable address for %s", host) + } + return nil, lastErr + } +} + +// safeHTTPTransport returns an *http.Transport wired with safeDialContext +// and sensible pool sizes. +func safeHTTPTransport(dialTimeout time.Duration) *http.Transport { + t := http.DefaultTransport.(*http.Transport).Clone() + t.DialContext = safeDialContext(dialTimeout) + return t +} + +// safeCheckRedirect re-validates each hop's host and caps the chain at +// maxRedirects. Without this, a 302 from a permitted host to +// http://169.254.169.254/ is followed by Go's default policy. +func safeCheckRedirect(req *http.Request, via []*http.Request) error { + if len(via) >= maxRedirects { + return fmt.Errorf("stopped after %d redirects", maxRedirects) + } + if req.URL == nil { + return fmt.Errorf("redirect url missing") + } + if req.URL.Scheme != "http" && req.URL.Scheme != "https" { + return fmt.Errorf("redirect to non-http(s) scheme %q refused", req.URL.Scheme) + } + if err := validateHost(req.Context(), req.URL.Hostname()); err != nil { + return fmt.Errorf("redirect refused: %w", err) + } + return nil +} + +// envProxyHosts captures the bare hostnames of any HTTP proxies configured +// via env so safeDialContext can let those dials through. Captured once at +// transport-creation time — Go's own ProxyFromEnvironment caches identically. +func envProxyHosts() map[string]struct{} { + out := map[string]struct{}{} + for _, key := range []string{"HTTP_PROXY", "http_proxy", "HTTPS_PROXY", "https_proxy", "ALL_PROXY", "all_proxy"} { + raw := strings.TrimSpace(os.Getenv(key)) + if raw == "" { + continue + } + host := raw + if u, err := url.Parse(raw); err == nil && u.Host != "" { + host = u.Host + } + if h, _, err := net.SplitHostPort(host); err == nil { + host = h + } + if host != "" { + out[host] = struct{}{} + } + } + return out +} diff --git a/environment/security_test.go b/environment/security_test.go new file mode 100644 index 0000000..c7820ee --- /dev/null +++ b/environment/security_test.go @@ -0,0 +1,165 @@ +package environment + +import ( + "archive/zip" + "bytes" + "context" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/flashcatcloud/flashduty-runner/protocol" +) + +// buildZipWithEntry creates an in-memory zip archive containing a single +// entry with the given name + body. Used by zip-slip tests so we can +// inject crafted entry names that the public buildTestZip helper would +// reject via filepath safety. +func buildZipWithEntry(t *testing.T, name, body string) []byte { + t.Helper() + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + w, err := zw.CreateHeader(&zip.FileHeader{Name: name, Method: zip.Deflate}) + require.NoError(t, err) + _, err = w.Write([]byte(body)) + require.NoError(t, err) + require.NoError(t, zw.Close()) + return buf.Bytes() +} + +// TestUnzipSkill_RejectsZipSlip is the regression test for the v3 audit +// finding: HasPrefix without a trailing separator + missing ".." rejection +// allowed entries like "../evil" or sibling-prefix attacks like +// dest="/x/skill" vs absTarget="/x/skill-evil/y" to escape the destination. +func TestUnzipSkill_RejectsZipSlip(t *testing.T) { + env := newTestEnvironment(t) + dest := filepath.Join(t.TempDir(), "skills", "victim") + require.NoError(t, os.MkdirAll(filepath.Dir(dest), 0o755)) + + cases := []struct { + name string + entry string + }{ + {"parent-traversal", "../evil"}, + {"deep-traversal", "a/b/../../../evil"}, + {"absolute-path-unix", "/etc/passwd"}, + {"current-then-parent", "./../evil"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + zipData := buildZipWithEntry(t, tc.entry, "pwned") + err := env.unzipSkill(zipData, dest) + require.Error(t, err, "expected zip-slip refusal for entry %q", tc.entry) + // Belt-and-suspenders: ensure no file was written outside dest. + outside := filepath.Join(filepath.Dir(dest), "evil") + if _, err := os.Stat(outside); err == nil { + t.Fatalf("file written outside dest: %s", outside) + } + }) + } +} + +// TestUnzipSkill_RejectsSiblingPrefixEscape covers the specific HasPrefix +// without trailing-separator bug. Without the fix, dest "/tmp/abc/skill" +// vs absTarget "/tmp/abc/skill-evil" passes HasPrefix even though +// skill-evil is a SIBLING directory — the entry name "../skill-evil/x" +// doesn't contain "..", err... wait, it does. The cleaner demonstration: +// we craft an entry that filepath.Clean leaves alone but that resolves +// outside the intended root. With the trailing-separator fix, this is +// rejected. +func TestUnzipSkill_RejectsSiblingPrefixEscape(t *testing.T) { + env := newTestEnvironment(t) + parent := t.TempDir() + dest := filepath.Join(parent, "skill") + require.NoError(t, os.MkdirAll(parent, 0o755)) + + // Pre-create the sibling so a successful escape would land a file in it. + sibling := filepath.Join(parent, "skill-evil") + require.NoError(t, os.MkdirAll(sibling, 0o755)) + + // "../skill-evil/x" — after filepath.Join(dest, ...) and Abs, this + // yields .../skill-evil/x. With the old HasPrefix(absTarget, dest) + // where dest="/.../skill", the prefix "/.../skill" matches + // "/.../skill-evil/x" — bypass. The fix appends os.PathSeparator. + zipData := buildZipWithEntry(t, "../skill-evil/x", "pwned") + err := env.unzipSkill(zipData, dest) + require.Error(t, err) + + if _, err := os.Stat(filepath.Join(sibling, "x")); err == nil { + t.Fatalf("zip-slip succeeded: file landed in sibling dir") + } +} + +// TestUnzipSkill_AcceptsBenign confirms the hardening doesn't break +// ordinary skill installs. +func TestUnzipSkill_AcceptsBenign(t *testing.T) { + env := newTestEnvironment(t) + dest := filepath.Join(t.TempDir(), "skills", "ok") + zipData := buildTestZip(t, map[string]string{ + "SKILL.md": "# hi", + "resources/a.txt": "alpha", + "scripts/b.sh": "#!/bin/sh\n", + }) + require.NoError(t, env.unzipSkill(zipData, dest)) + for _, rel := range []string{"SKILL.md", "resources/a.txt", "scripts/b.sh"} { + if _, err := os.Stat(filepath.Join(dest, rel)); err != nil { + t.Fatalf("expected file %s after unzip: %v", rel, err) + } + } +} + +// TestWebFetch_RefusesSSRFTargets is the regression test for PR-5.1: the +// runner historically used http.DefaultClient with no dial-time IP guard, +// so a model-supplied URL like http://169.254.169.254/ (cloud IMDS) or +// http://127.0.0.1/ (loopback) hit the real address from inside the pod. +// With the inlined SSRF guard wired in, all three targets are refused +// before any TCP connection is attempted. +func TestWebFetch_RefusesSSRFTargets(t *testing.T) { + env := newTestEnvironment(t) + cases := []string{ + "http://169.254.169.254/latest/meta-data/", // AWS / Aliyun / GCP IMDS + "http://127.0.0.1/", // loopback + "http://10.0.0.1/", // RFC1918 + } + for _, raw := range cases { + t.Run(raw, func(t *testing.T) { + _, err := env.WebFetch(context.Background(), &protocol.WebFetchArgs{URL: raw}) + require.Error(t, err, "%s must be refused", raw) + if !strings.Contains(err.Error(), "refused") && !strings.Contains(err.Error(), "blocked") { + t.Errorf("expected SSRF refusal in %v", err) + } + }) + } +} + +// TestWebFetch_RefusesRedirectToBlocked is the regression test for PR-5.4: +// without CheckRedirect re-validating each hop, a 302 from an allowed +// public host to http://169.254.169.254/ would be followed (Go's default +// CheckRedirect is permissive). With safeCheckRedirect wired, the client +// refuses the hop before dialing IMDS. +func TestWebFetch_RefusesRedirectToBlocked(t *testing.T) { + env := newTestEnvironment(t) + target := "http://169.254.169.254/latest/meta-data/" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, target, http.StatusFound) + })) + t.Cleanup(srv.Close) + + // We can't fetch srv.URL through the real fetchClient because it dials + // loopback (also blocked). The hop-validation guarantee is exercised + // directly here against safeCheckRedirect; env.WebFetch on the literal + // 169.254 URL would short-circuit at pre-flight validateURL before even + // hitting CheckRedirect — already covered by TestWebFetch_RefusesSSRFTargets. + _ = env + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, target, nil) + require.NoError(t, err) + via := []*http.Request{{}} + if err := safeCheckRedirect(req, via); err == nil { + t.Fatalf("safeCheckRedirect must refuse a redirect hop to %s", target) + } +} diff --git a/environment/webfetch.go b/environment/webfetch.go index 6217e40..646b29f 100644 --- a/environment/webfetch.go +++ b/environment/webfetch.go @@ -5,13 +5,10 @@ import ( "fmt" "io" "net/http" - "regexp" "strings" "time" "github.com/flashcatcloud/flashduty-runner/protocol" - - htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2" ) const ( @@ -21,12 +18,32 @@ const ( defaultFetchUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) +// fetchClient is process-global so the connection pool is shared across +// fetches. Wired with netsafe so: +// - dial-time IP validation refuses RFC1918 / loopback / link-local / IMDS +// even after DNS rebinding (re-resolves on every dial). +// - CheckRedirect re-validates each redirect hop, so a 302 from a +// permitted host to http://169.254.169.254/ is refused before dial. +// +// Runner pods must NEVER reach IMDS or the cluster apiserver from inside; +// netsafe enforcement is on by default in this binary (no SetEnforced call). +var fetchClient = &http.Client{ + Transport: safeHTTPTransport(10 * time.Second), + CheckRedirect: safeCheckRedirect, +} + // WebFetch fetches content from a URL and converts it to readable format. func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs) (*protocol.WebFetchResult, error) { if args.URL == "" || (!strings.HasPrefix(args.URL, "http://") && !strings.HasPrefix(args.URL, "https://")) { return nil, fmt.Errorf("valid http/https url is required") } + // Pre-flight URL validation gives a clearer error than dial-time + // failure when the model passes a literal IMDS / RFC1918 URL. + if _, err := validateURL(ctx, args.URL); err != nil { + return nil, fmt.Errorf("refused to fetch %s: %w", args.URL, err) + } + timeout := defaultFetchTimeout if args.Timeout > 0 { timeout = time.Duration(args.Timeout) * time.Second @@ -69,7 +86,7 @@ func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs) }, nil } -// fetchURL performs the HTTP request. +// fetchURL performs the HTTP request via the netsafe-wrapped client. func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout time.Duration) (*http.Response, error) { httpCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -81,7 +98,7 @@ func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout setRequestHeaders(req, format) - resp, err := http.DefaultClient.Do(req) + resp, err := fetchClient.Do(req) if err != nil { if httpCtx.Err() == context.DeadlineExceeded { return nil, fmt.Errorf("request timed out") @@ -143,49 +160,3 @@ func convertContent(content, format, contentType string) string { return content } - -// convertHTMLToMarkdown converts HTML content to Markdown. -func convertHTMLToMarkdown(html string) string { - markdown, err := htmltomarkdown.ConvertString(html) - if err != nil { - // Fallback to text extraction on error - return convertHTMLToText(html) - } - return cleanupMarkdown(markdown) -} - -// convertHTMLToText extracts plain text from HTML. -func convertHTMLToText(html string) string { - // Remove script and style tags with their content - scriptRe := regexp.MustCompile(`(?is)]*>.*?`) - html = scriptRe.ReplaceAllString(html, "") - - styleRe := regexp.MustCompile(`(?is)]*>.*?`) - html = styleRe.ReplaceAllString(html, "") - - // Remove all HTML tags - tagRe := regexp.MustCompile(`<[^>]+>`) - text := tagRe.ReplaceAllString(html, " ") - - // Decode common HTML entities - text = strings.ReplaceAll(text, " ", " ") - text = strings.ReplaceAll(text, "&", "&") - text = strings.ReplaceAll(text, "<", "<") - text = strings.ReplaceAll(text, ">", ">") - text = strings.ReplaceAll(text, """, "\"") - text = strings.ReplaceAll(text, "'", "'") - - // Normalize whitespace - spaceRe := regexp.MustCompile(`\s+`) - text = spaceRe.ReplaceAllString(text, " ") - - return strings.TrimSpace(text) -} - -// cleanupMarkdown removes excessive whitespace and normalizes the markdown. -func cleanupMarkdown(md string) string { - // Remove excessive blank lines (more than 2 consecutive) - blankLinesRe := regexp.MustCompile(`\n{3,}`) - md = blankLinesRe.ReplaceAllString(md, "\n\n") - return strings.TrimSpace(md) -} diff --git a/go.mod b/go.mod index d6500c2..449025a 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/lithammer/shortuuid/v4 v4.2.0 github.com/modelcontextprotocol/go-sdk v1.5.0 github.com/spf13/cobra v1.8.1 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.11.1 mvdan.cc/sh/v3 v3.13.1 ) diff --git a/go.sum b/go.sum index 3936390..75e70b5 100644 --- a/go.sum +++ b/go.sum @@ -46,8 +46,8 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA= diff --git a/permission/permission.go b/permission/permission.go index 2bc3e80..6f76ae9 100644 --- a/permission/permission.go +++ b/permission/permission.go @@ -1,4 +1,33 @@ // Package permission implements glob-based command permission checking. +// +// Matching semantics +// +// Rules are evaluated against three normalized projections of every shell +// command found anywhere in the input — including pipelines, command +// substitution ($(...) / `...`), process substitution (<(...) / >(...)), +// arithmetic expansion ($((...))), and redirect targets: +// +// 1. The full normalized command (env-prefix included), e.g. +// "KUBECONFIG=x kubectl get pods". +// 2. The same command with leading variable assignments stripped, e.g. +// "kubectl get pods". This prevents trivial bypass via env prefix +// and prevents false-deny when a rule lists only the program form. +// 3. Redirect targets are projected as synthetic commands (">/path", +// ">>/path", "&>/path", "&>>/path", ">|/path") so that rules can +// gate writes the same way they gate executions. Read redirects +// (<, <<, <<<, <>, here-docs) are not checked beyond their inner +// command (here-doc body and here-string undergo expansion). +// +// Both rule patterns and the command projections are run through the +// shell printer's canonical formatter, so "kubectl get pods" matches +// the rule "kubectl get pods" regardless of spacing. +// +// Precedence +// +// Rules are evaluated in specificity order, and the first match wins. +// Specificity is "longer literal prefix before any '*' wins"; the catch-all +// "*" is always tried last. A deny rule that matches first beats a more +// general allow rule, and vice-versa. package permission import ( @@ -22,6 +51,11 @@ const ( type Rule struct { Pattern string Action Action + // canonical is the printer-normalized form of Pattern, used for matching. + canonical string + // specificity is the length of the literal prefix preceding the first '*'. + // Higher is more specific; "*" has -1 so it always sorts last. + specificity int } // Checker checks command permissions against configured rules. @@ -30,42 +64,40 @@ type Checker struct { } // NewChecker creates a new permission checker from a map of patterns to actions. -// The "*" pattern is processed first as the default rule, followed by other patterns in sorted order. +// Rules are sorted so that the most specific rule (longest literal prefix +// before any '*') is tried first; "*" is always the fallback. func NewChecker(patterns map[string]string) *Checker { c := &Checker{ rules: make([]Rule, 0, len(patterns)), } - - // Add default "*" rule first if present - if action, ok := patterns["*"]; ok { - c.rules = append(c.rules, Rule{ - Pattern: "*", - Action: Action(strings.ToLower(action)), - }) - } - - // Add other rules in sorted order - otherPatterns := getSortedPatterns(patterns) - for _, pattern := range otherPatterns { + for pattern, action := range patterns { c.rules = append(c.rules, Rule{ - Pattern: pattern, - Action: Action(strings.ToLower(patterns[pattern])), + Pattern: pattern, + Action: Action(strings.ToLower(action)), + canonical: canonicalize(pattern), + specificity: patternSpecificity(pattern), }) } - + sort.SliceStable(c.rules, func(i, j int) bool { + // More specific first. Tie-break on canonical pattern for determinism. + if c.rules[i].specificity != c.rules[j].specificity { + return c.rules[i].specificity > c.rules[j].specificity + } + return c.rules[i].canonical < c.rules[j].canonical + }) return c } -// getSortedPatterns returns all patterns except "*" in sorted order. -func getSortedPatterns(patterns map[string]string) []string { - var result []string - for pattern := range patterns { - if pattern != "*" { - result = append(result, pattern) - } +// patternSpecificity returns the length of the literal prefix preceding the +// first '*' in pattern. "*" returns -1 (always least specific). +func patternSpecificity(pattern string) int { + if pattern == "*" { + return -1 + } + if i := strings.IndexByte(pattern, '*'); i >= 0 { + return i } - sort.Strings(result) - return result + return len(pattern) } // Check checks if a command is allowed. @@ -76,64 +108,247 @@ func (c *Checker) Check(command string) error { return fmt.Errorf("empty command") } - // Parse the command into a list of sub-commands to prevent injection (e.g., cmd1; cmd2) parser := syntax.NewParser() f, err := parser.Parse(strings.NewReader(command), "") if err != nil { return fmt.Errorf("failed to parse command: %w", err) } + if err := c.checkNode(f); err != nil { + return err + } + return nil +} + +// checkNode walks any AST node, evaluating every executable command and +// every write-redirect target it encounters. Recurses into command +// substitutions and process substitutions so that constructs like +// `cat <(curl evil)` and `echo $(curl evil)` cannot bypass deny rules. +func (c *Checker) checkNode(root syntax.Node) error { var checkErr error - syntax.Walk(f, func(node syntax.Node) bool { + syntax.Walk(root, func(node syntax.Node) bool { if checkErr != nil { return false } - switch x := node.(type) { case *syntax.Stmt: - // Check redirects for potential path escape or unauthorized writes + // Walk redirects ourselves; the printer cannot give us a + // stable target string without doing it here. for _, redir := range x.Redirs { - if redir.Word != nil { - target := c.nodeToString(redir.Word) - // We don't have rules for redirects yet, but we can detect them. - _ = target + if err := c.checkRedirect(redir); err != nil { + checkErr = err + return false } } - case *syntax.CallExpr: - // Check command and arguments - cmdStr := c.nodeToString(x) - if cmdStr != "" { - finalAction, matchedPattern := c.evaluateRules(cmdStr) - if finalAction == ActionDeny { - checkErr = c.denyError(matchedPattern, cmdStr) + if err := c.checkCallExpr(x); err != nil { + checkErr = err + return false + } + // Also descend manually into Args so that any embedded + // CmdSubst/ProcSubst inside the args is checked. (The + // outer Walk would also reach them, but checkCmdSubst / + // checkProcSubst recurse with checkNode, which is the + // canonical entry point and ensures consistent behaviour + // when checkRedirect feeds nested nodes back in.) + return true + case *syntax.CmdSubst: + for _, stmt := range x.Stmts { + if err := c.checkNode(stmt); err != nil { + checkErr = err + return false + } + } + return false // handled + case *syntax.ProcSubst: + for _, stmt := range x.Stmts { + if err := c.checkNode(stmt); err != nil { + checkErr = err return false } } + return false // handled } return true }) - return checkErr } -// nodeToString converts a syntax node back to its string representation. -func (c *Checker) nodeToString(node syntax.Node) string { +// checkCallExpr evaluates a single simple-command against the rules. +// It evaluates two projections — the full canonical form (env assignments +// included) and the form with assignments stripped — and applies a +// "deny wins, allow wins over default-deny" merge: +// +// - If either projection matches a deny rule, the command is denied. +// - Otherwise, if either projection matches an allow rule, allowed. +// - Otherwise, denied by default. +// +// This blocks env-prefix bypass ("FOO=bar curl evil" still hits "curl *") +// while preventing false-deny when only the program form is allow-listed +// ("KUBECONFIG=x kubectl get pods" matches "kubectl get *"). +func (c *Checker) checkCallExpr(call *syntax.CallExpr) error { + if len(call.Args) == 0 { + // Pure assignment (`FOO=bar`); nothing executes. + return nil + } + full := canonicalize(nodeToString(call)) + if full == "" { + return nil + } + stripped := full + if len(call.Assigns) > 0 { + stripped = canonicalize(nodeToString(&syntax.CallExpr{Args: call.Args})) + if stripped == "" { + stripped = full + } + } + + fullAction, fullPattern := c.evaluateRules(full) + if stripped == full { + if fullAction == ActionAllow { + return nil + } + return c.denyError(fullPattern, full) + } + + strippedAction, strippedPattern := c.evaluateRules(stripped) + + // Explicit (non-catch-all) deny on either projection wins immediately — + // this is what blocks env-prefix bypass like "FOO=bar curl evil". + if fullAction == ActionDeny && fullPattern != "*" { + return c.denyError(fullPattern, full) + } + if strippedAction == ActionDeny && strippedPattern != "*" { + return c.denyError(strippedPattern, stripped) + } + // Otherwise allow if either projection matched an allow rule. This + // prevents false-deny when only the program form ("kubectl get *") + // is allow-listed and the user prefixes env vars. + if fullAction == ActionAllow || strippedAction == ActionAllow { + return nil + } + // Both projections only matched the catch-all "*" deny (or no rule + // at all). Use whichever projection actually has a pattern for the + // error message. + if fullPattern != "" { + return c.denyError(fullPattern, full) + } + return c.denyError(strippedPattern, stripped) +} + +// checkRedirect rejects writes whose target is not explicitly allowed. +// The target is projected into a synthetic command string of the form +// ">target" (or ">>target", "&>target", "&>>target", ">|target"), which +// rules can match exactly (e.g. allow "> /tmp/*"). Read redirects only +// have their inner expansions / here-doc body checked. +func (c *Checker) checkRedirect(redir *syntax.Redirect) error { + if redir == nil || redir.Word == nil { + return nil + } + // Recurse into any CmdSubst/ProcSubst hidden inside the redirect target, + // e.g. `cat < <(curl evil)`. + if err := c.checkNode(redir.Word); err != nil { + return err + } + if redir.Hdoc != nil { + if err := c.checkNode(redir.Hdoc); err != nil { + return err + } + } + + prefix := writeRedirectPrefix(redir.Op) + if prefix == "" { + return nil // read redirect: nothing more to gate + } + target := canonicalize(nodeToString(redir.Word)) + if target == "" { + return nil + } + synth := prefix + " " + target + if action, pattern := c.evaluateRules(synth); action == ActionDeny { + return c.denyError(pattern, synth) + } + return nil +} + +// writeRedirectPrefix returns the canonical operator string for write +// redirects, or "" for read-only redirects. +func writeRedirectPrefix(op syntax.RedirOperator) string { + switch op { + case syntax.RdrOut: + return ">" + case syntax.AppOut: + return ">>" + case syntax.ClbOut: + return ">|" + case syntax.RdrAll: + return "&>" + case syntax.AppAll: + return "&>>" + default: + return "" + } +} + +// nodeToString prints any AST node back to its source form. The printer +// applies a canonical layout, which gives us a stable string regardless +// of how the user originally formatted the input. +func nodeToString(node syntax.Node) string { var sb strings.Builder printer := syntax.NewPrinter() _ = printer.Print(&sb, node) - return sb.String() + return strings.TrimRight(sb.String(), "\n") +} + +// canonicalize normalizes a command string by parsing and re-printing it +// (which collapses runs of whitespace to a single space, normalizes +// quoting, etc.). Falls back to a whitespace-collapsing best-effort if +// the input cannot be parsed (e.g. rule patterns with bare globs). +func canonicalize(s string) string { + s = strings.TrimSpace(s) + if s == "" { + return "" + } + parser := syntax.NewParser() + if f, err := parser.Parse(strings.NewReader(s), ""); err == nil { + printed := strings.TrimRight(nodeToString(f), "\n") + if printed != "" { + return collapseWS(printed) + } + } + return collapseWS(s) +} + +// collapseWS collapses any run of ASCII whitespace into a single space. +// Used so that rule patterns and command projections agree even when one +// side has been formatted differently. +func collapseWS(s string) string { + var sb strings.Builder + sb.Grow(len(s)) + prevSpace := false + for _, r := range s { + if r == ' ' || r == '\t' || r == '\n' || r == '\r' { + if !prevSpace { + sb.WriteByte(' ') + prevSpace = true + } + continue + } + sb.WriteRune(r) + prevSpace = false + } + return strings.TrimSpace(sb.String()) } -// evaluateRules checks all rules and returns the final action and matched pattern. +// evaluateRules walks rules in specificity order and returns on first +// match. If no rule matches, the command is denied. func (c *Checker) evaluateRules(command string) (Action, string) { - action, pattern := ActionDeny, "" for _, rule := range c.rules { - if matched, _ := matchPattern(rule.Pattern, command); matched { - action, pattern = rule.Action, rule.Pattern + if matched, _ := matchPattern(rule.canonical, command); matched { + return rule.Action, rule.Pattern } } - return action, pattern + return ActionDeny, "" } // denyError creates an error message that lists denied patterns so callers @@ -164,7 +379,7 @@ func matchPattern(pattern, command string) (bool, error) { return true, nil case !strings.Contains(pattern, "*"): return pattern == command, nil - case strings.HasSuffix(pattern, "*"): + case strings.HasSuffix(pattern, "*") && !strings.Contains(strings.TrimSuffix(pattern, "*"), "*"): return strings.HasPrefix(command, strings.TrimSuffix(pattern, "*")), nil default: return doublestar.Match(pattern, command) @@ -183,34 +398,46 @@ func DefaultRules() map[string]string { } } -// SafeReadOnlyRules returns rules that allow read-only operations. +// SafeReadOnlyRules returns rules that allow common read-only operations. +// +// Notably absent compared to a naive list: +// - "find *" — allowed `find . -delete` and `find . -exec rm`. Specific +// safe variants are listed instead, none of which can mutate state. +// - "echo *" — allowed `echo x > /etc/passwd`. Write redirects are now +// gated by checkRedirect, but plain "echo" is rarely needed for +// diagnostics, so it is dropped to stay tight by default. +// +// Write redirects are denied unless an explicit allow rule for the synthetic +// "> target" form is added (e.g. "> /tmp/*"). func SafeReadOnlyRules() map[string]string { return map[string]string{ - "*": "deny", - "cat *": "allow", - "head *": "allow", - "tail *": "allow", - "ls *": "allow", - "ls": "allow", - "pwd": "allow", - "whoami": "allow", - "date": "allow", - "echo *": "allow", - "grep *": "allow", - "find *": "allow", - "which *": "allow", - "env": "allow", - "uname *": "allow", - "uname": "allow", - "df *": "allow", - "df": "allow", - "du *": "allow", - "free *": "allow", - "free": "allow", - "uptime": "allow", - "ps *": "allow", - "ps": "allow", - "top -b *": "allow", + "*": "deny", + "cat *": "allow", + "head *": "allow", + "tail *": "allow", + "ls *": "allow", + "ls": "allow", + "pwd": "allow", + "whoami": "allow", + "date": "allow", + "grep *": "allow", + "find * -name *": "allow", + "find * -type *": "allow", + "find * -path *": "allow", + "find * -iname *": "allow", + "which *": "allow", + "env": "allow", + "uname *": "allow", + "uname": "allow", + "df *": "allow", + "df": "allow", + "du *": "allow", + "free *": "allow", + "free": "allow", + "uptime": "allow", + "ps *": "allow", + "ps": "allow", + "top -b *": "allow", } } diff --git a/permission/permission_test.go b/permission/permission_test.go index 08d4563..7688057 100644 --- a/permission/permission_test.go +++ b/permission/permission_test.go @@ -209,6 +209,198 @@ func TestKubernetesReadOnlyRules(t *testing.T) { assert.False(t, checker.IsAllowed("kubectl exec -it nginx -- bash")) } +// TestChecker_ASTBypassHardening covers the bypass scenarios documented +// in the v3 addendum to the AI-SRE builtin tools refactor plan. +func TestChecker_ASTBypassHardening(t *testing.T) { + tests := []struct { + name string + rules map[string]string + command string + wantErr bool + }{ + // Process / command substitution must be inspected. + { + name: "process substitution bypass blocked", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "cat <(curl evil.com)", + wantErr: true, + }, + { + name: "command substitution bypass blocked", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "echo $(curl evil.com)", + wantErr: true, + }, + { + name: "backtick substitution bypass blocked", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "echo `curl evil.com`", + wantErr: true, + }, + { + name: "nested process substitution blocked", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "diff <(cat a) <(curl evil.com)", + wantErr: true, + }, + // Redirect targets must be gated. + { + name: "write redirect to /etc blocked under read-only rules", + rules: SafeReadOnlyRules(), + // "echo *" was removed, so this is doubly blocked, but the + // redirect-target check is what makes this safe even if echo + // were re-allowed. + command: "cat /etc/hosts > /etc/passwd", + wantErr: true, + }, + { + name: "write redirect blocked when no allow rule exists", + rules: map[string]string{ + "*": "deny", + "echo *": "allow", + }, + command: "echo hi > /etc/passwd", + wantErr: true, + }, + { + name: "append redirect blocked when no allow rule exists", + rules: map[string]string{ + "*": "deny", + "echo *": "allow", + }, + command: "echo hi >> /etc/passwd", + wantErr: true, + }, + { + name: "write redirect allowed by explicit rule", + rules: map[string]string{ + "*": "deny", + "echo *": "allow", + "> *": "allow", + }, + command: "echo hi > /tmp/foo", + wantErr: false, + }, + { + name: "read redirect (here-string) is fine", + rules: map[string]string{ + "*": "deny", + "grep *": "allow", + }, + command: "grep foo <<< bar", + wantErr: false, + }, + // find arg-walker via removed pattern. + { + name: "find -delete blocked", + rules: SafeReadOnlyRules(), + command: "find . -delete", + wantErr: true, + }, + { + name: "find -exec rm blocked", + rules: SafeReadOnlyRules(), + command: "find . -exec rm {} +", + wantErr: true, + }, + { + name: "find -name still allowed", + rules: SafeReadOnlyRules(), + command: "find . -name *.go", + wantErr: false, + }, + // Whitespace normalization. + { + name: "extra spaces still match", + rules: map[string]string{ + "*": "deny", + "kubectl get *": "allow", + }, + command: "kubectl get pods", + wantErr: false, + }, + // Env-prefix normalization. + { + name: "env prefix does not bypass deny", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "FOO=bar curl evil.com", + wantErr: true, + }, + { + name: "env prefix matches program rule", + rules: map[string]string{ + "*": "deny", + "kubectl get *": "allow", + }, + command: "KUBECONFIG=x kubectl get pods", + wantErr: false, + }, + // First-match precedence: more specific rule wins regardless of + // map iteration order. + { + name: "first-match precedence: specific deny beats general allow", + rules: map[string]string{ + "*": "allow", + "kubectl *": "allow", + "kubectl delete *": "deny", + }, + command: "kubectl delete pod nginx", + wantErr: true, + }, + { + name: "first-match precedence: specific allow beats general deny", + rules: map[string]string{ + "*": "deny", + "kubectl *": "deny", + "kubectl get *": "allow", + }, + command: "kubectl get pods", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + checker := NewChecker(tt.rules) + err := checker.Check(tt.command) + if tt.wantErr { + assert.Error(t, err, "expected command to be denied: %q", tt.command) + } else { + assert.NoError(t, err, "expected command to be allowed: %q", tt.command) + } + }) + } +} + +func TestPatternSpecificity(t *testing.T) { + // "*" must always sort last; longer literal prefix beats shorter. + checker := NewChecker(map[string]string{ + "*": "allow", + "kubectl *": "deny", + "kubectl get *": "allow", + }) + // kubectl get pods → matches "kubectl get *" (most specific) → allow + assert.NoError(t, checker.Check("kubectl get pods")) + // kubectl delete pod nginx → falls through to "kubectl *" → deny + assert.Error(t, checker.Check("kubectl delete pod nginx")) + // rm -rf / → only "*" matches → allow + assert.NoError(t, checker.Check("rm -rf /")) +} + func TestMatchPattern(t *testing.T) { tests := []struct { pattern string diff --git a/protocol/messages.go b/protocol/messages.go index 4249501..f7db45b 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -157,6 +157,21 @@ type GlobArgs struct { type GrepArgs struct { Pattern string `json:"pattern"` Include []string `json:"include,omitempty"` + + // OutputMode controls the shape of the result: "content" (default — file:line:text rows), + // "files_with_matches" (one path per match), or "count" (one path:count per file). + OutputMode string `json:"output_mode,omitempty"` + // ContextBefore (-B) and ContextAfter (-A) request N lines of context around each match. + // Honored by ripgrep only when OutputMode is "content". + ContextBefore int `json:"context_before,omitempty"` + ContextAfter int `json:"context_after,omitempty"` + // HeadLimit caps the number of result rows returned (post-format). 0 = unlimited. + HeadLimit int `json:"head_limit,omitempty"` + // FileType maps to ripgrep's --type (e.g., "go", "py", "js"). Ignored by the Go fallback. + FileType string `json:"file_type,omitempty"` + // CaseSensitive overrides ripgrep's default --smart-case. + // nil = smart-case; true = case-sensitive; false = case-insensitive. + CaseSensitive *bool `json:"case_sensitive,omitempty"` } // BashArgs are the arguments for bash operation. @@ -220,16 +235,39 @@ type GrepResult struct { Truncated bool `json:"truncated,omitempty"` // Whether content was truncated FilePath string `json:"file_path,omitempty"` // Path to full content if truncated TotalSize int64 `json:"total_size,omitempty"` // Original content size + // RegexCompileError surfaces a non-fatal fallback path: the requested pattern failed + // to compile as a Go regexp, so the Go fallback walked files matching it as a literal. + // Empty when ripgrep handled the request or compilation succeeded. + RegexCompileError string `json:"regex_compile_error,omitempty"` } // BashResult is the result of a bash operation. +// +// Stdout and stderr are tracked independently — either stream may be truncated +// and spilled to a separate file. The legacy fields (`Truncated`, `FilePath`, +// `TotalSize`) mirror the stdout-side fields for backward compatibility with +// older safari builds; new callers should prefer the stream-specific fields. type BashResult struct { - Stdout string `json:"stdout"` - Stderr string `json:"stderr"` - ExitCode int `json:"exit_code"` - Truncated bool `json:"truncated,omitempty"` // Whether content was truncated - FilePath string `json:"file_path,omitempty"` // Path to full content if truncated - TotalSize int64 `json:"total_size,omitempty"` // Original content size + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + ExitCode int `json:"exit_code"` + + // Per-stream truncation metadata. A stream is "truncated" when either + // processLargeOutput trimmed it OR the underlying LimitedWriter hit its + // hard cap mid-execution (the captured buffer carries an inline marker). + TruncatedStdout bool `json:"truncated_stdout,omitempty"` + TruncatedStderr bool `json:"truncated_stderr,omitempty"` + StdoutFilePath string `json:"stdout_file_path,omitempty"` + StderrFilePath string `json:"stderr_file_path,omitempty"` + StdoutTotalSize int64 `json:"stdout_total_size,omitempty"` + StderrTotalSize int64 `json:"stderr_total_size,omitempty"` + + // Legacy stdout-only mirrors. Kept so older safari clients that only + // parse these still see truncation. New code should read the stream- + // specific fields instead. + Truncated bool `json:"truncated,omitempty"` + FilePath string `json:"file_path,omitempty"` + TotalSize int64 `json:"total_size,omitempty"` } // WebFetchArgs are the arguments for webfetch operation. From 07721826fb1fd91ae3babc7b1a8895600710ae4a Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 2 May 2026 16:45:57 +0800 Subject: [PATCH 09/11] fix(lint): clear golangci-lint findings + add /health endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - netsafe: guard http.DefaultTransport type-assertion (errcheck) - environment: switch if-else chain to switch (gocritic), drop dead err=nil (ineffassign), rename shadowed err vars (govet) - permission: syntax.ClbOut → syntax.RdrClob (staticcheck SA1019) - cmd: add /health listener for Tencent AGS readiness probe; bind via ListenConfig.Listen with parent context (noctx) and document the all-interfaces bind (gosec G102) Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/main.go | 36 +++++++++++++++++++++++++++++++++++ environment/environment.go | 21 ++++++++++---------- environment/netsafe.go | 6 +++++- environment/security_test.go | 10 +++++----- permission/permission.go | 6 +++--- permission/permission_test.go | 6 +++--- 6 files changed, 63 insertions(+), 22 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 5956afc..6432102 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -5,6 +5,8 @@ import ( "context" "fmt" "log/slog" + "net" + "net/http" "os" "os/signal" "path/filepath" @@ -38,6 +40,11 @@ var ( const ( defaultURL = "wss://api.flashcat.cloud/safari/environment/ws" defaultLogLevel = "info" + // healthAddr is bound by the runner so cloud sandbox platforms (e.g. Tencent + // AGS) can satisfy their HTTP readiness probe — the runner only opens + // outbound WebSockets otherwise, leaving no inbound port for the platform + // to probe. + healthAddr = ":49983" ) func main() { @@ -218,6 +225,8 @@ func runRunner() error { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + startHealthServer(ctx) + sigCh := make(chan os.Signal, 1) signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) @@ -278,6 +287,33 @@ func setupLogging(levelStr string) { slog.SetDefault(slog.New(handler)) } +// startHealthServer binds a tiny HTTP listener that always answers /health 200. +// Required by Tencent AGS (and similar serverless sandbox platforms) which gate +// instance start on an HTTP readiness probe within ~30s. +func startHealthServer(ctx context.Context) { + // AGS readiness probes hit the pod from outside, so binding to all + // interfaces is required — the listener only serves a fixed 200 on /health. + var lc net.ListenConfig + ln, err := lc.Listen(ctx, "tcp", healthAddr) // #nosec G102 -- intentional: external readiness probe + + if err != nil { + slog.Warn("health server skipped (port already in use)", "addr", healthAddr, "err", err) + return + } + mux := http.NewServeMux() + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + }) + srv := &http.Server{Handler: mux, ReadHeaderTimeout: 5 * time.Second} + go func() { + if err := srv.Serve(ln); err != nil && err != http.ErrServerClosed { + slog.Warn("health server stopped", "err", err) + } + }() + slog.Info("health server listening", "addr", healthAddr) +} + func parseLogLevel(levelStr string) slog.Level { switch levelStr { case "debug": diff --git a/environment/environment.go b/environment/environment.go index 86c1658..9d1420f 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -650,9 +650,11 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s err := cmd.Run() exitCode := 0 if err != nil { - if exitError, ok := err.(*exec.ExitError); ok { + var exitError *exec.ExitError + switch { + case errors.As(err, &exitError): exitCode = exitError.ExitCode() - } else if ctx.Err() == context.DeadlineExceeded { + case ctx.Err() == context.DeadlineExceeded: res := &protocol.BashResult{ Stdout: stdout.String(), Stderr: "command timed out", @@ -663,11 +665,10 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s res.Truncated = true } return res, nil - } else if errors.Is(err, ErrOutputCapped) { + case errors.Is(err, ErrOutputCapped): // Command itself didn't fail — only the writer's contract violation // surfaced. Treat as a successful run with capped buffers. - err = nil - } else { + default: return nil, fmt.Errorf("failed to execute command: %w", err) } } @@ -891,7 +892,7 @@ func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArg // Probe phase: cache hit if .checksum matches AND SKILL.md exists. cachedSum, _ := os.ReadFile(filepath.Join(skillDir, ".checksum")) if string(cachedSum) == args.Checksum { - if _, err := os.Stat(filepath.Join(skillDir, "SKILL.md")); err == nil { + if _, statErr := os.Stat(filepath.Join(skillDir, "SKILL.md")); statErr == nil { return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: true}, nil } } @@ -923,12 +924,12 @@ func (e *Environment) unzipSkill(data []byte, dest string) error { } // Remove existing directory if exists - if err := os.RemoveAll(dest); err != nil { - return fmt.Errorf("failed to remove existing directory: %w", err) + if rmErr := os.RemoveAll(dest); rmErr != nil { + return fmt.Errorf("failed to remove existing directory: %w", rmErr) } - if err := os.MkdirAll(dest, 0o755); err != nil { - return fmt.Errorf("failed to create destination directory: %w", err) + if mkErr := os.MkdirAll(dest, 0o755); mkErr != nil { + return fmt.Errorf("failed to create destination directory: %w", mkErr) } // Resolve dest to absolute and append a trailing separator BEFORE the diff --git a/environment/netsafe.go b/environment/netsafe.go index e7babd2..74c55b0 100644 --- a/environment/netsafe.go +++ b/environment/netsafe.go @@ -143,7 +143,11 @@ func safeDialContext(dialTimeout time.Duration) func(ctx context.Context, networ // safeHTTPTransport returns an *http.Transport wired with safeDialContext // and sensible pool sizes. func safeHTTPTransport(dialTimeout time.Duration) *http.Transport { - t := http.DefaultTransport.(*http.Transport).Clone() + base, ok := http.DefaultTransport.(*http.Transport) + if !ok { + base = &http.Transport{} + } + t := base.Clone() t.DialContext = safeDialContext(dialTimeout) return t } diff --git a/environment/security_test.go b/environment/security_test.go index c7820ee..527361b 100644 --- a/environment/security_test.go +++ b/environment/security_test.go @@ -101,9 +101,9 @@ func TestUnzipSkill_AcceptsBenign(t *testing.T) { env := newTestEnvironment(t) dest := filepath.Join(t.TempDir(), "skills", "ok") zipData := buildTestZip(t, map[string]string{ - "SKILL.md": "# hi", - "resources/a.txt": "alpha", - "scripts/b.sh": "#!/bin/sh\n", + "SKILL.md": "# hi", + "resources/a.txt": "alpha", + "scripts/b.sh": "#!/bin/sh\n", }) require.NoError(t, env.unzipSkill(zipData, dest)) for _, rel := range []string{"SKILL.md", "resources/a.txt", "scripts/b.sh"} { @@ -123,8 +123,8 @@ func TestWebFetch_RefusesSSRFTargets(t *testing.T) { env := newTestEnvironment(t) cases := []string{ "http://169.254.169.254/latest/meta-data/", // AWS / Aliyun / GCP IMDS - "http://127.0.0.1/", // loopback - "http://10.0.0.1/", // RFC1918 + "http://127.0.0.1/", // loopback + "http://10.0.0.1/", // RFC1918 } for _, raw := range cases { t.Run(raw, func(t *testing.T) { diff --git a/permission/permission.go b/permission/permission.go index 6f76ae9..773279f 100644 --- a/permission/permission.go +++ b/permission/permission.go @@ -1,6 +1,6 @@ // Package permission implements glob-based command permission checking. // -// Matching semantics +// # Matching semantics // // Rules are evaluated against three normalized projections of every shell // command found anywhere in the input — including pipelines, command @@ -22,7 +22,7 @@ // shell printer's canonical formatter, so "kubectl get pods" matches // the rule "kubectl get pods" regardless of spacing. // -// Precedence +// # Precedence // // Rules are evaluated in specificity order, and the first match wins. // Specificity is "longer literal prefix before any '*' wins"; the catch-all @@ -279,7 +279,7 @@ func writeRedirectPrefix(op syntax.RedirOperator) string { return ">" case syntax.AppOut: return ">>" - case syntax.ClbOut: + case syntax.RdrClob: return ">|" case syntax.RdrAll: return "&>" diff --git a/permission/permission_test.go b/permission/permission_test.go index 7688057..34b17ac 100644 --- a/permission/permission_test.go +++ b/permission/permission_test.go @@ -257,7 +257,7 @@ func TestChecker_ASTBypassHardening(t *testing.T) { }, // Redirect targets must be gated. { - name: "write redirect to /etc blocked under read-only rules", + name: "write redirect to /etc blocked under read-only rules", rules: SafeReadOnlyRules(), // "echo *" was removed, so this is doubly blocked, but the // redirect-target check is what makes this safe even if echo @@ -325,8 +325,8 @@ func TestChecker_ASTBypassHardening(t *testing.T) { { name: "extra spaces still match", rules: map[string]string{ - "*": "deny", - "kubectl get *": "allow", + "*": "deny", + "kubectl get *": "allow", }, command: "kubectl get pods", wantErr: false, From d5b5a5d69e3a10ece0de1ef0de568b794169d3fd Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 2 May 2026 20:59:45 +0800 Subject: [PATCH 10/11] fix(ci): drop invalid G706 gosec exclude + handle unix-abs zip-slip on windows - .golangci.yml: pinned CI golangci-lint v2.4 rejects G706 under gosec.excludes (rule unknown to its gosec). Move suppression to the version-safe linters.exclusions.rules text match so both v2.4 and newer versions stay quiet. Same regression previously fixed in a60df85. - environment/unzipSkill: filepath.IsAbs("/etc/passwd") returns false on Windows (no drive letter), so the absolute-path guard let the entry slip through and TestUnzipSkill_RejectsZipSlip/absolute-path-unix failed on windows-latest. Reject leading "/" or "\\" on the raw name before Clean normalizes separators. Co-Authored-By: Claude Opus 4.7 (1M context) --- .golangci.yml | 15 ++++++++++----- environment/environment.go | 7 +++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 122e243..22459b2 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -47,15 +47,20 @@ linters: # Exclude G301 (directory permissions) - workspace needs readable directories # Exclude G304 (file inclusion) - paths are validated via safePath() # Exclude G306 (file permissions) - workspace files need to be readable - # Exclude G706 (log taint via taint analysis) - slog's structured - # key-value logging is not a format-string injection vector; the rule - # fires on every slog call that includes a user-controlled value, which - # is the whole point of structured logging. excludes: - G301 - G304 - G306 - - G706 + exclusions: + rules: + # G706 (log injection via taint) is added in newer gosec versions and + # not in our pinned CI golangci-lint v2.4 — listing it under + # gosec.excludes fails `config verify` there. Match by text instead so + # newer versions also stay quiet: slog's structured key/value logging + # is not a format-string injection vector. + - linters: + - gosec + text: "G706" formatters: enable: diff --git a/environment/environment.go b/environment/environment.go index 9d1420f..2291f26 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -950,6 +950,13 @@ func (e *Environment) unzipSkill(data []byte, dest string) error { if strings.Contains(f.Name, "..") { return fmt.Errorf("invalid file path in zip: %s", f.Name) } + // Reject leading "/" or "\" on the raw name. filepath.IsAbs is + // platform-dependent and misses unix-style "/etc/passwd" on Windows + // (no drive letter), so check the raw name before Clean rewrites + // separators. + if strings.HasPrefix(f.Name, "/") || strings.HasPrefix(f.Name, `\`) { + return fmt.Errorf("invalid file path in zip: %s", f.Name) + } cleanName := filepath.Clean(f.Name) if strings.HasPrefix(cleanName, "..") || filepath.IsAbs(cleanName) { return fmt.Errorf("invalid file path in zip: %s", f.Name) From 95ca4c5782c0df68f4766738f591658bfbc758b7 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 7 May 2026 10:26:58 +0800 Subject: [PATCH 11/11] feat(knowledge): reconcile_knowledge_manifest RPC for orphan/stale prune + missing-file diff Adds a per-session RPC so Safari can declare its current view of which knowledge files should exist (manifest = list of rel_path + sha256). The runner walks /knowledge//, deletes anything not in the manifest (orphan prune), deletes anything whose sha disagrees with the sentinel (stale prune), and returns NeedsStage = manifest entries it doesn't have on disk after the prune step. Safari uses NeedsStage to drive a follow-up bulk StageKnowledgeFiles so the full pack lands at session start instead of lazily on first read. This is what makes ~/.flashduty/knowledge// look like the actual pack contents (DUTY.md + every runbook) instead of just whatever the LLM happened to read in the current turn. Walks the on-disk tree (rather than trusting the sentinel) so manual operator drops also get reconciled instead of lingering forever. Reuses the existing sentinel + advisory lock so concurrent stage calls from the same Safari are safe. Pairs with fc-safari change wiring BuildManifest + ReconcileKnowledgeManifest + EagerStageMissing into mw_env. Co-Authored-By: Claude Opus 4.7 (1M context) --- environment/knowledge.go | 191 ++++++++++++++++++++-- environment/knowledge_test.go | 298 ++++++++++++++++++++++++++++++---- protocol/messages.go | 41 ++++- ws/handler.go | 8 + 4 files changed, 486 insertions(+), 52 deletions(-) diff --git a/environment/knowledge.go b/environment/knowledge.go index 63155ce..455d535 100644 --- a/environment/knowledge.go +++ b/environment/knowledge.go @@ -8,11 +8,18 @@ import ( "log/slog" "os" "path/filepath" + "regexp" "strings" "github.com/flashcatcloud/flashduty-runner/protocol" ) +// teamScopeRe matches the only legal team-scope directory name: `team_` plus +// one or more digits. Anything else (e.g. `team_42a`, `team_`, `Team_42`) is +// rejected so the runner can never be tricked into mkdir'ing an attacker- +// supplied directory name. +var teamScopeRe = regexp.MustCompile(`^team_\d+$`) + const ( // sentinelName is the hidden JSON map that tracks staged-file checksums. // Safari reads this to decide which knowledge pack files are already current. @@ -21,27 +28,43 @@ const ( // validateKnowledgeRelPath enforces the path rules for knowledge file operations. // -// Rules (from the Safari-side contract): -// - Must not contain path separators or double-dot components — the runner -// only writes flat files in the workspace root, never in sub-directories. -// - Leading-dot filenames are rejected because they are hidden by convention; -// the sentinel is written by the runner itself and is never staged by clients. +// Layout: every staged file lives under `knowledge//` where +// scope is `account` or `team_`. The leading `knowledge/` segment +// matches the runner-relative form of the read path Safari uses +// (`/knowledge//`), so a freshly staged file lands at +// exactly the location a follow-up read will probe — no contract drift. +// +// Bare-leaf paths (`DUTY.md`), deeper trees (`knowledge/account/sub/foo.md`), +// missing prefix (`account/foo.md`), unknown scope segments, and the +// sentinel filename are all rejected. Backslashes are blocked to cover the +// Windows-style traversal vector defensively even though the runner only +// ships on unix. func validateKnowledgeRelPath(relPath string) error { if relPath == "" { return fmt.Errorf("rel_path must not be empty") } - if strings.ContainsAny(relPath, `/\`) { - return fmt.Errorf("rel_path must not contain path separators: %q", relPath) + if strings.ContainsRune(relPath, '\\') { + return fmt.Errorf("rel_path must not contain backslash: %q", relPath) + } + parts := strings.Split(relPath, "/") + if len(parts) != 3 { + return fmt.Errorf("rel_path must be knowledge//, got %q", relPath) + } + root, scope, leaf := parts[0], parts[1], parts[2] + if root != "knowledge" { + return fmt.Errorf("rel_path must start with 'knowledge/', got %q", relPath) } - // Reject the bare ".." token. Slash-separated traversal like "foo/../bar" - // is already blocked above, but a plain ".." with no slashes still escapes. - if relPath == ".." { - return fmt.Errorf("rel_path must not be '..': %q", relPath) + if scope != "account" && !teamScopeRe.MatchString(scope) { + return fmt.Errorf("rel_path scope must be 'account' or 'team_', got %q", scope) } - if strings.HasPrefix(relPath, ".") { - // Hidden files (including the sentinel itself) cannot be staged by clients. - // The runner owns the sentinel exclusively. - return fmt.Errorf("rel_path must not start with '.': %q", relPath) + if leaf == "" || leaf == "." || leaf == ".." { + return fmt.Errorf("rel_path leaf must be a real filename, got %q", leaf) + } + if strings.HasPrefix(leaf, ".") { + return fmt.Errorf("rel_path leaf must not start with '.': %q", leaf) + } + if leaf == sentinelName { + return fmt.Errorf("rel_path leaf must not be the sentinel filename") } return nil } @@ -166,6 +189,12 @@ func (e *Environment) StageKnowledgeFiles(ctx context.Context, args *protocol.St } targetPath := filepath.Join(e.root, f.RelPath) + if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil { + status.Success = false + status.Error = fmt.Sprintf("failed to create scope directory: %v", err) + result.Files = append(result.Files, status) + continue + } if err := atomicWriteFile(targetPath, content, 0o644); err != nil { status.Success = false status.Error = err.Error() @@ -197,6 +226,138 @@ func (e *Environment) StageKnowledgeFiles(ctx context.Context, args *protocol.St return result, nil } +// ReconcileKnowledgeManifest reconciles the on-disk knowledge tree against the +// supplied manifest. Files present on disk but absent from the manifest are +// orphans (pruned). Files present in the manifest with a checksum that +// disagrees with the sentinel are stale (also pruned, so the next read +// triggers a fresh lazy install from Safari). Files whose checksum already +// matches are left in place. +// +// The runner does NOT pre-stage anything in this call: the manifest declares +// what *should* exist if it were read, not what *must* be cached. Cold packs +// stay cold; only drift is corrected. The whole pass runs under the sentinel +// lock so it is safe with concurrent stage calls from the same Safari. +func (e *Environment) ReconcileKnowledgeManifest(ctx context.Context, args *protocol.ReconcileKnowledgeManifestArgs) (*protocol.ReconcileKnowledgeManifestResult, error) { + expected := make(map[string]string, len(args.Files)) + for _, f := range args.Files { + if err := validateKnowledgeRelPath(f.RelPath); err != nil { + slog.Warn("skipping invalid manifest entry", "rel_path", f.RelPath, "error", err) + continue + } + expected[f.RelPath] = f.Checksum + } + + result := &protocol.ReconcileKnowledgeManifestResult{} + knowledgeRoot := filepath.Join(e.root, "knowledge") + sentinelPath := filepath.Join(e.root, sentinelName) + + err := withSentinelLock(sentinelPath, func() error { + sentinel := readSentinel(sentinelPath) + // onDisk enumerates `knowledge//` paths actually present. + // We walk the tree (rather than trusting the sentinel) so a manual + // drop into the workspace — e.g. an operator copying a file in for + // debugging — also gets reconciled instead of lingering forever. + onDisk, err := walkKnowledgeTree(knowledgeRoot) + if err != nil { + return err + } + + dirty := false + // onDiskSet lets us answer "is this manifest entry already cached?" + // in O(1) below without a second walk. + onDiskSet := make(map[string]struct{}, len(onDisk)) + for _, relPath := range onDisk { + expectedSum, want := expected[relPath] + switch { + case !want: + // Orphan: not declared in the current manifest. + if rmErr := os.Remove(filepath.Join(e.root, relPath)); rmErr != nil && !os.IsNotExist(rmErr) { + slog.Warn("failed to prune orphan knowledge file", "rel_path", relPath, "error", rmErr) + continue + } + delete(sentinel, relPath) + result.Pruned = append(result.Pruned, relPath) + dirty = true + case sentinel[relPath] != expectedSum: + // Stale: sentinel disagrees with the manifest. Drop the file; + // it'll come back via NeedsStage so Safari refetches it from S3. + if rmErr := os.Remove(filepath.Join(e.root, relPath)); rmErr != nil && !os.IsNotExist(rmErr) { + slog.Warn("failed to prune stale knowledge file", "rel_path", relPath, "error", rmErr) + continue + } + delete(sentinel, relPath) + result.Pruned = append(result.Pruned, relPath) + result.StaleCount++ + dirty = true + default: + result.KeptCount++ + onDiskSet[relPath] = struct{}{} + } + } + + // Anything in the manifest that isn't in onDiskSet is a cache miss + // the caller needs to fix — either it was just pruned for being stale + // or it was never staged in the first place. The list is what powers + // Safari's eager-stage step so the full pack lands on disk in one + // batch instead of waiting for the agent to read each file. + for relPath := range expected { + if _, ok := onDiskSet[relPath]; !ok { + result.NeedsStage = append(result.NeedsStage, relPath) + } + } + + if dirty { + return writeSentinel(sentinelPath, sentinel) + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("reconcile manifest: %w", err) + } + return result, nil +} + +// walkKnowledgeTree returns every leaf file under // +// as `knowledge//` rel-paths. Hidden files (the sentinel) and +// anything failing validation are skipped; nested directories beneath a scope +// are ignored because the layout forbids them. +func walkKnowledgeTree(knowledgeRoot string) ([]string, error) { + entries, err := os.ReadDir(knowledgeRoot) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read knowledge root: %w", err) + } + + var paths []string + for _, scope := range entries { + if !scope.IsDir() { + continue + } + scopeName := scope.Name() + if scopeName != "account" && !teamScopeRe.MatchString(scopeName) { + continue + } + leaves, err := os.ReadDir(filepath.Join(knowledgeRoot, scopeName)) + if err != nil { + slog.Warn("failed to read scope directory", "scope", scopeName, "error", err) + continue + } + for _, leaf := range leaves { + if leaf.IsDir() { + continue + } + rel := "knowledge/" + scopeName + "/" + leaf.Name() + if err := validateKnowledgeRelPath(rel); err != nil { + continue + } + paths = append(paths, rel) + } + } + return paths, nil +} + // DeleteKnowledgeFiles removes the supplied files from the workspace root and // scrubs their entries from the sentinel. func (e *Environment) DeleteKnowledgeFiles(ctx context.Context, args *protocol.DeleteKnowledgeFilesArgs) (*protocol.DeleteKnowledgeFilesResult, error) { diff --git a/environment/knowledge_test.go b/environment/knowledge_test.go index 76202d7..e8384e7 100644 --- a/environment/knowledge_test.go +++ b/environment/knowledge_test.go @@ -42,21 +42,48 @@ func TestStageKnowledgeFiles_Single(t *testing.T) { result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ Files: []protocol.KnowledgeFile{ - {RelPath: "DUTY.md", Checksum: "abc123", ContentB64: b64("# Duty\n")}, + {RelPath: "knowledge/account/DUTY.md", Checksum: "abc123", ContentB64: b64("# Duty\n")}, }, }) require.NoError(t, err) require.Len(t, result.Files, 1) - assert.True(t, result.Files[0].Success) + assert.True(t, result.Files[0].Success, "stage failed: %s", result.Files[0].Error) assert.Empty(t, result.Files[0].Error) - assert.Equal(t, "# Duty\n", fileContent(t, ws.Root(), "DUTY.md")) + assert.Equal(t, "# Duty\n", fileContent(t, ws.Root(), "knowledge/account/DUTY.md")) + + sentinel := readSentinelMap(t, ws.Root()) + assert.Equal(t, "abc123", sentinel["knowledge/account/DUTY.md"]) +} + +// Stage files for two scopes in one call → both directories materialise and +// the sentinel records both keys. Locks in the multi-scope contract that +// motivated relaxing validateKnowledgeRelPath in the first place. +func TestStageKnowledgeFiles_AccountAndTeam(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ + Files: []protocol.KnowledgeFile{ + {RelPath: "knowledge/account/DUTY.md", Checksum: "asum", ContentB64: b64("a")}, + {RelPath: "knowledge/team_42/escalation.md", Checksum: "tsum", ContentB64: b64("t")}, + }, + }) + require.NoError(t, err) + require.Len(t, result.Files, 2) + for i, st := range result.Files { + assert.True(t, st.Success, "file %d (%s) failed: %s", i, st.RelPath, st.Error) + } + + assert.Equal(t, "a", fileContent(t, ws.Root(), "knowledge/account/DUTY.md")) + assert.Equal(t, "t", fileContent(t, ws.Root(), "knowledge/team_42/escalation.md")) sentinel := readSentinelMap(t, ws.Root()) - assert.Equal(t, "abc123", sentinel["DUTY.md"]) + assert.Equal(t, "asum", sentinel["knowledge/account/DUTY.md"]) + assert.Equal(t, "tsum", sentinel["knowledge/team_42/escalation.md"]) } -// Case 2: mixed batch — one valid file + one with '/' in rel_path. +// Case 2: mixed batch — one valid file + one with an extra '/' segment. // The valid file must land; the invalid one must return an error in the ack; // the sentinel must contain only the valid entry. func TestStageKnowledgeFiles_MixedValidity(t *testing.T) { @@ -65,8 +92,8 @@ func TestStageKnowledgeFiles_MixedValidity(t *testing.T) { result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ Files: []protocol.KnowledgeFile{ - {RelPath: "DUTY.md", Checksum: "goodsum", ContentB64: b64("content")}, - {RelPath: "sub/evil.md", Checksum: "badsum", ContentB64: b64("evil")}, + {RelPath: "knowledge/account/DUTY.md", Checksum: "goodsum", ContentB64: b64("content")}, + {RelPath: "knowledge/account/sub/evil.md", Checksum: "badsum", ContentB64: b64("evil")}, }, }) require.NoError(t, err) @@ -76,18 +103,18 @@ func TestStageKnowledgeFiles_MixedValidity(t *testing.T) { assert.True(t, result.Files[0].Success) assert.Empty(t, result.Files[0].Error) - // Second entry (slash in path) should fail. + // Second entry (deeper than knowledge//) should fail. assert.False(t, result.Files[1].Success) assert.NotEmpty(t, result.Files[1].Error) // Only valid file exists on disk. - assert.FileExists(t, filepath.Join(ws.Root(), "DUTY.md")) - assert.NoFileExists(t, filepath.Join(ws.Root(), "sub/evil.md")) + assert.FileExists(t, filepath.Join(ws.Root(), "knowledge/account/DUTY.md")) + assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/sub/evil.md")) // Sentinel has only the good entry. sentinel := readSentinelMap(t, ws.Root()) - assert.Equal(t, "goodsum", sentinel["DUTY.md"]) - _, hasBad := sentinel["sub/evil.md"] + assert.Equal(t, "goodsum", sentinel["knowledge/account/DUTY.md"]) + _, hasBad := sentinel["knowledge/account/sub/evil.md"] assert.False(t, hasBad) } @@ -99,22 +126,22 @@ func TestStageKnowledgeFiles_Overwrite(t *testing.T) { _, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ Files: []protocol.KnowledgeFile{ - {RelPath: "runbook.md", Checksum: "v1sum", ContentB64: b64("version 1")}, + {RelPath: "knowledge/account/runbook.md", Checksum: "v1sum", ContentB64: b64("version 1")}, }, }) require.NoError(t, err) _, err = ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ Files: []protocol.KnowledgeFile{ - {RelPath: "runbook.md", Checksum: "v2sum", ContentB64: b64("version 2")}, + {RelPath: "knowledge/account/runbook.md", Checksum: "v2sum", ContentB64: b64("version 2")}, }, }) require.NoError(t, err) - assert.Equal(t, "version 2", fileContent(t, ws.Root(), "runbook.md")) + assert.Equal(t, "version 2", fileContent(t, ws.Root(), "knowledge/account/runbook.md")) sentinel := readSentinelMap(t, ws.Root()) - assert.Equal(t, "v2sum", sentinel["runbook.md"]) + assert.Equal(t, "v2sum", sentinel["knowledge/account/runbook.md"]) } // Case 4: delete a previously staged file → file gone, sentinel entry gone. @@ -125,22 +152,22 @@ func TestDeleteKnowledgeFiles_Staged(t *testing.T) { // Stage first. _, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ Files: []protocol.KnowledgeFile{ - {RelPath: "old-runbook.md", Checksum: "deadbeef", ContentB64: b64("old content")}, + {RelPath: "knowledge/account/old-runbook.md", Checksum: "deadbeef", ContentB64: b64("old content")}, }, }) require.NoError(t, err) // Delete. delResult, err := ws.DeleteKnowledgeFiles(ctx, &protocol.DeleteKnowledgeFilesArgs{ - RelPaths: []string{"old-runbook.md"}, + RelPaths: []string{"knowledge/account/old-runbook.md"}, }) require.NoError(t, err) assert.True(t, delResult.Success) - assert.NoFileExists(t, filepath.Join(ws.Root(), "old-runbook.md")) + assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/old-runbook.md")) sentinel := readSentinelMap(t, ws.Root()) - _, exists := sentinel["old-runbook.md"] + _, exists := sentinel["knowledge/account/old-runbook.md"] assert.False(t, exists) } @@ -150,7 +177,7 @@ func TestDeleteKnowledgeFiles_Missing(t *testing.T) { ctx := context.Background() result, err := ws.DeleteKnowledgeFiles(ctx, &protocol.DeleteKnowledgeFilesArgs{ - RelPaths: []string{"does-not-exist.md"}, + RelPaths: []string{"knowledge/account/does-not-exist.md"}, }) require.NoError(t, err) assert.True(t, result.Success) @@ -165,18 +192,18 @@ func TestStageDeleteCycle(t *testing.T) { for i := range 2 { _, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ Files: []protocol.KnowledgeFile{ - {RelPath: "cycle.md", Checksum: "cycle", ContentB64: b64("cycle")}, + {RelPath: "knowledge/account/cycle.md", Checksum: "cycle", ContentB64: b64("cycle")}, }, }) require.NoError(t, err, "stage iteration %d", i) _, err = ws.DeleteKnowledgeFiles(ctx, &protocol.DeleteKnowledgeFilesArgs{ - RelPaths: []string{"cycle.md"}, + RelPaths: []string{"knowledge/account/cycle.md"}, }) require.NoError(t, err, "delete iteration %d", i) } - assert.NoFileExists(t, filepath.Join(ws.Root(), "cycle.md")) + assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/cycle.md")) sentinel := readSentinelMap(t, ws.Root()) assert.Empty(t, sentinel) @@ -192,9 +219,15 @@ func TestStageKnowledgeFiles_RejectedPaths(t *testing.T) { desc string }{ {"../etc/passwd", "path traversal with .."}, - {"sub/x.md", "subdirectory path with /"}, - {".hidden.md", "leading-dot filename"}, - {sentinelName, "sentinel filename itself"}, + {"knowledge/account/sub/x.md", "deeper than knowledge//"}, + {"knowledge/account/.hidden.md", "leading-dot leaf"}, + {"knowledge/account/" + sentinelName, "sentinel filename leaf"}, + {"account/DUTY.md", "missing knowledge/ prefix"}, + {"DUTY.md", "bare leaf without scope"}, + {"knowledge/unknown_scope/x.md", "unknown scope segment"}, + {"knowledge/team_/x.md", "empty team id"}, + {"knowledge/team_42a/x.md", "non-digit team suffix"}, + {"skill/account/x.md", "wrong root segment"}, {"", "empty string"}, } @@ -217,22 +250,217 @@ func TestStageKnowledgeFiles_RejectedPaths(t *testing.T) { // thorough coverage of edge cases. func TestValidateKnowledgeRelPath(t *testing.T) { valid := []string{ - "DUTY.md", "runbook-api.md", "README.txt", "a", + "knowledge/account/DUTY.md", "knowledge/account/runbook-api.md", + "knowledge/account/README.txt", "knowledge/account/a", + "knowledge/team_1/x.md", "knowledge/team_42/escalation.md", "knowledge/team_999/foo.md", + // Double-dot in the middle of a filename is a legal flat name. + "knowledge/account/foo..bar", "knowledge/account/v2..md", } for _, p := range valid { assert.NoError(t, validateKnowledgeRelPath(p), "should be valid: %q", p) } invalid := []string{ - "", "..", "a/b", `a\b`, ".hidden", sentinelName, "foo/../bar", - // These are now valid because ".." only matches the bare token, not - // substrings — "foo..bar" is a legitimate flat filename. - } - validButPreviouslyRejected := []string{"foo..bar", "v2..md"} - for _, p := range validButPreviouslyRejected { - assert.NoError(t, validateKnowledgeRelPath(p), "should be valid (double-dot in middle): %q", p) + "", "..", "DUTY.md", "a", "account/x.md", `knowledge\account\x.md`, + "knowledge/account/", "knowledge/account/.hidden", + "knowledge/account/" + sentinelName, + "knowledge/account/sub/x.md", + "knowledge/account/../etc/passwd", "knowledge/account/..", + "knowledge/foo/x.md", "knowledge/team_/x.md", + "knowledge/team_42a/x.md", "knowledge/Team_42/x.md", + "skill/account/x.md", "knowledge", "knowledge/", "knowledge/account", } for _, p := range invalid { assert.Error(t, validateKnowledgeRelPath(p), "should be invalid: %q", p) } } + +// stageOne is a tiny helper that lands a single file into the test workspace +// so reconcile tests can set up scenarios without restating the boilerplate. +func stageOne(t *testing.T, ws *Environment, relPath, sum, body string) { + t.Helper() + res, err := ws.StageKnowledgeFiles(context.Background(), &protocol.StageKnowledgeFilesArgs{ + Files: []protocol.KnowledgeFile{{RelPath: relPath, Checksum: sum, ContentB64: b64(body)}}, + }) + require.NoError(t, err) + require.Len(t, res.Files, 1) + require.True(t, res.Files[0].Success, res.Files[0].Error) +} + +// Reconcile case 1: every staged file matches the manifest → no prune, +// kept_count == staged count, sentinel untouched, nothing needs staging. +func TestReconcileKnowledgeManifest_AllMatch(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + stageOne(t, ws, "knowledge/account/runbook.md", "sum-a", "A") + stageOne(t, ws, "knowledge/team_42/escalation.md", "sum-b", "B") + + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{ + Files: []protocol.KnowledgeManifestEntry{ + {RelPath: "knowledge/account/runbook.md", Checksum: "sum-a"}, + {RelPath: "knowledge/team_42/escalation.md", Checksum: "sum-b"}, + }, + }) + require.NoError(t, err) + assert.Empty(t, res.Pruned) + assert.Equal(t, 2, res.KeptCount) + assert.Equal(t, 0, res.StaleCount) + assert.Empty(t, res.NeedsStage, "everything matches; caller should not need to stage anything") + + // Files still on disk; sentinel still has both entries. + assert.Equal(t, "A", fileContent(t, ws.Root(), "knowledge/account/runbook.md")) + assert.Equal(t, "B", fileContent(t, ws.Root(), "knowledge/team_42/escalation.md")) + sentinel := readSentinelMap(t, ws.Root()) + assert.Equal(t, "sum-a", sentinel["knowledge/account/runbook.md"]) + assert.Equal(t, "sum-b", sentinel["knowledge/team_42/escalation.md"]) +} + +// Reconcile case 2: a staged file is missing from the manifest entirely (the +// pack-level deletion case). The runner deletes the file and drops it from +// the sentinel; the surviving file is left alone. +func TestReconcileKnowledgeManifest_PrunesOrphans(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + stageOne(t, ws, "knowledge/account/runbook.md", "sum-a", "A") + stageOne(t, ws, "knowledge/account/old.md", "sum-old", "old body") + + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{ + Files: []protocol.KnowledgeManifestEntry{ + {RelPath: "knowledge/account/runbook.md", Checksum: "sum-a"}, + }, + }) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"knowledge/account/old.md"}, res.Pruned) + assert.Equal(t, 1, res.KeptCount) + assert.Equal(t, 0, res.StaleCount, "orphan should not count as stale") + assert.Empty(t, res.NeedsStage, "the surviving file is current and the orphan is not in the manifest") + + _, err = os.Stat(filepath.Join(ws.Root(), "knowledge/account/old.md")) + assert.True(t, os.IsNotExist(err), "orphan should be deleted") + assert.Equal(t, "A", fileContent(t, ws.Root(), "knowledge/account/runbook.md")) + + sentinel := readSentinelMap(t, ws.Root()) + _, hasOld := sentinel["knowledge/account/old.md"] + assert.False(t, hasOld, "sentinel must not retain pruned entry") + assert.Equal(t, "sum-a", sentinel["knowledge/account/runbook.md"]) +} + +// Reconcile case 3: a file's checksum drifted upstream (someone edited it in +// the UI). The runner deletes the on-disk copy so the next read goes through +// Safari's lazy installer and gets the fresh bytes from S3. +func TestReconcileKnowledgeManifest_PrunesStaleChecksum(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + stageOne(t, ws, "knowledge/account/runbook.md", "sum-old", "old content") + + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{ + Files: []protocol.KnowledgeManifestEntry{ + {RelPath: "knowledge/account/runbook.md", Checksum: "sum-new"}, + }, + }) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"knowledge/account/runbook.md"}, res.Pruned) + assert.Equal(t, 0, res.KeptCount) + assert.Equal(t, 1, res.StaleCount, "checksum drift should count as stale") + assert.ElementsMatch(t, []string{"knowledge/account/runbook.md"}, res.NeedsStage, + "after pruning the stale copy, caller must restage the fresh bytes") + + _, err = os.Stat(filepath.Join(ws.Root(), "knowledge/account/runbook.md")) + assert.True(t, os.IsNotExist(err), "stale file should be deleted") + + sentinel := readSentinelMap(t, ws.Root()) + _, hasEntry := sentinel["knowledge/account/runbook.md"] + assert.False(t, hasEntry, "sentinel must drop the stale entry so a refetch resets it") +} + +// Reconcile case 4: an empty manifest is the "no packs are active anymore" +// signal — every staged file should be pruned. +func TestReconcileKnowledgeManifest_EmptyManifestPrunesEverything(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + stageOne(t, ws, "knowledge/account/a.md", "sa", "a") + stageOne(t, ws, "knowledge/team_5/b.md", "sb", "b") + + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{Files: nil}) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"knowledge/account/a.md", "knowledge/team_5/b.md"}, res.Pruned) + assert.Equal(t, 0, res.KeptCount) + + sentinel := readSentinelMap(t, ws.Root()) + assert.Empty(t, sentinel) +} + +// Reconcile case 5: an operator drop — a file appeared on disk that Safari +// never staged (so it has no sentinel entry). It must be pruned because it +// isn't in the manifest, even though there's no checksum to compare against. +func TestReconcileKnowledgeManifest_PrunesUnsentineledFile(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Drop a file directly on disk, bypassing StageKnowledgeFiles → no sentinel. + scopeDir := filepath.Join(ws.Root(), "knowledge", "account") + require.NoError(t, os.MkdirAll(scopeDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(scopeDir, "manual.md"), []byte("hand-placed"), 0o644)) + + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{Files: nil}) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"knowledge/account/manual.md"}, res.Pruned) + + _, err = os.Stat(filepath.Join(scopeDir, "manual.md")) + assert.True(t, os.IsNotExist(err)) +} + +// Reconcile case 6a: cold runner — manifest declares files that aren't on +// disk yet. Nothing to prune, but every entry comes back in NeedsStage so +// Safari can do a one-shot eager fetch+stage at session start. +func TestReconcileKnowledgeManifest_FlagsMissingForEagerStage(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{ + Files: []protocol.KnowledgeManifestEntry{ + {RelPath: "knowledge/account/DUTY.md", Checksum: "duty"}, + {RelPath: "knowledge/account/runbook.md", Checksum: "rb"}, + }, + }) + require.NoError(t, err) + assert.Empty(t, res.Pruned) + assert.Equal(t, 0, res.KeptCount) + assert.ElementsMatch(t, + []string{"knowledge/account/DUTY.md", "knowledge/account/runbook.md"}, + res.NeedsStage, + ) +} + +// Reconcile case 6: the sentinel filename and unrecognized scope dirs must be +// invisible to the walker — no spurious "prune" attempts on infra files. +func TestReconcileKnowledgeManifest_IgnoresSentinelAndUnknownDirs(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + stageOne(t, ws, "knowledge/account/keep.md", "sk", "keep") + + // Plant an unknown-scope directory and an arbitrary file in the knowledge + // root; neither should appear in the prune list. + rogueScope := filepath.Join(ws.Root(), "knowledge", "rogue_scope") + require.NoError(t, os.MkdirAll(rogueScope, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(rogueScope, "stuff.md"), []byte("x"), 0o644)) + + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{ + Files: []protocol.KnowledgeManifestEntry{ + {RelPath: "knowledge/account/keep.md", Checksum: "sk"}, + }, + }) + require.NoError(t, err) + assert.Empty(t, res.Pruned, "unknown scope dirs and the sentinel must not be touched") + assert.Equal(t, 1, res.KeptCount) + + // Rogue file is still there — reconcile is intentionally narrow, only the + // validated scope dirs (`account`, `team_`) are in scope. + _, err = os.Stat(filepath.Join(rogueScope, "stuff.md")) + assert.NoError(t, err) +} diff --git a/protocol/messages.go b/protocol/messages.go index f7db45b..30f3dd4 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -115,8 +115,9 @@ const ( TaskOpMCPCall TaskOperation = "mcp_call" TaskOpMCPListTools TaskOperation = "mcp_list_tools" TaskOpSyncSkill TaskOperation = "sync_skill" - TaskOpStageKnowledgeFiles TaskOperation = "stage_knowledge_files" - TaskOpDeleteKnowledgeFiles TaskOperation = "delete_knowledge_files" + TaskOpStageKnowledgeFiles TaskOperation = "stage_knowledge_files" + TaskOpDeleteKnowledgeFiles TaskOperation = "delete_knowledge_files" + TaskOpReconcileKnowledgeManifest TaskOperation = "reconcile_knowledge_manifest" ) // TaskRequestPayload is the payload for task request messages. @@ -408,3 +409,39 @@ type DeleteKnowledgeFilesArgs struct { type DeleteKnowledgeFilesResult struct { Success bool `json:"success"` } + +// KnowledgeManifestEntry is one (rel_path, checksum) pair in a reconcile manifest. +// rel_path uses the same `knowledge//` shape as StageKnowledgeFiles +// so the runner-side validator is shared. +type KnowledgeManifestEntry struct { + RelPath string `json:"rel_path"` + Checksum string `json:"checksum"` +} + +// ReconcileKnowledgeManifestArgs supplies the canonical view of which knowledge +// files should exist on disk and at what checksum. The runner deletes anything +// not in the manifest (orphan prune) and anything whose on-disk checksum +// disagrees (stale prune); the result's NeedsStage field tells the caller +// which manifest entries the runner does NOT have cached so the caller can +// bulk-stage them in one follow-up batch (skills-style eager materialization). +type ReconcileKnowledgeManifestArgs struct { + Files []KnowledgeManifestEntry `json:"files"` +} + +// ReconcileKnowledgeManifestResult reports the diff outcome plus the list of +// entries the caller still needs to push. +// +// Pruned lists every rel_path the runner removed (orphans + stale together). +// StaleCount is the subset of Pruned whose rel_path was in the manifest but +// had a divergent checksum — useful for spotting upstream edits. +// KeptCount counts files left untouched because their checksum already matched. +// NeedsStage enumerates every manifest entry the runner does NOT have on disk +// after the prune step. The caller (Safari) materializes these by fetching +// from S3 and calling StageKnowledgeFiles. Empty when the runner is already +// in sync with the manifest. +type ReconcileKnowledgeManifestResult struct { + Pruned []string `json:"pruned,omitempty"` + KeptCount int `json:"kept_count"` + StaleCount int `json:"stale_count"` + NeedsStage []string `json:"needs_stage,omitempty"` +} diff --git a/ws/handler.go b/ws/handler.go index 1ac69eb..1452d79 100644 --- a/ws/handler.go +++ b/ws/handler.go @@ -270,6 +270,14 @@ func (h *Handler) executeTask(ctx context.Context, req *protocol.TaskRequestPayl logger.Info("deleting knowledge files", "count", len(args.RelPaths)) return h.ws.DeleteKnowledgeFiles(ctx, args) + case protocol.TaskOpReconcileKnowledgeManifest: + args, err := parseArgs[protocol.ReconcileKnowledgeManifestArgs](req.Args) + if err != nil { + return nil, fmt.Errorf("invalid reconcile_knowledge_manifest args: %w", err) + } + logger.Info("reconciling knowledge manifest", "count", len(args.Files)) + return h.ws.ReconcileKnowledgeManifest(ctx, args) + default: return nil, fmt.Errorf("unknown operation: %s", req.Operation) }