anomalyco · paymog · May 26, 2026
diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts
@@ -6,6 +6,49 @@ export type AbsolutePath = typeof AbsolutePath.Type
 export const RelativePath = Schema.String.pipe(Schema.brand("RelativePath"))
 export type RelativePath = typeof RelativePath.Type
 
+/**
+ * String input intended to flow to a filesystem operation (fopen, stat, etc.).
+ *
+ * Some open-weight models occasionally emit file paths wrapped in markdown
+ * auto-links, e.g. `"[notes.md](http://notes.md)"`. This is post-training
+ * chat distribution leaking through the tool boundary: the model has been
+ * rewarded for auto-linking in conversational output and applies that prior
+ * to fields where it makes no sense. Encoding the intent at the schema
+ * level — "this string is going to fopen, not into a chat bubble" — plugs
+ * the leak for every path field at once.
+ *
+ * Only the degenerate case (link text equals the URL with protocol stripped)
+ * is rewritten. Real markdown like `[click](https://example.com)` passes
+ * through untouched.
+ *
+ * Replaces `Schema.String` in tool parameter definitions for fields that
+ * carry filesystem paths. The annotation lives on the encoded side so the
+ * JSON Schema emitted to the LLM still carries the description.
+ *
+ *   filePath: FilePathInput({ description: "The absolute path to the file" })
+ */
+export const FilePathInput = (annotations?: { readonly description?: string }) => {
+  const source = annotations?.description
+    ? Schema.String.annotate({ description: annotations.description })
+    : Schema.String
+  return source.pipe(
+    Schema.decodeTo(Schema.String, {
+      decode: SchemaGetter.transform(unwrapDegenerateAutoLink),
+      encode: SchemaGetter.passthrough({ strict: false }),
+    }),
+  )
+}
+
+function unwrapDegenerateAutoLink(input: string): string {
+  // Two regexes — one for the whole-string case where the model emitted only
+  // the auto-link, one for embedded auto-links within a longer path.
+  const whole = input.match(/^\[([^\]]+)\]\((https?:\/\/)?([^)]+)\)$/)
+  if (whole && whole[1] === whole[3]) return whole[1]
+  return input.replace(/\[([^\]]+)\]\((https?:\/\/)?([^)]+)\)/g, (match, text, _proto, url) =>
+    text === url ? text : match,
+  )
+}
+
 /**
  * Integer greater than zero.
  */

diff --git a/packages/core/test/schema.test.ts b/packages/core/test/schema.test.ts
@@ -0,0 +1,43 @@
+import { describe, expect, test } from "bun:test"
+import { Schema } from "effect"
+import { FilePathInput } from "../src/schema"
+
+const decode = (input: unknown) => Schema.decodeUnknownSync(FilePathInput())(input)
+
+describe("FilePathInput", () => {
+  test("plain string passes through unchanged", () => {
+    expect(decode("/Users/x/proj/notes.md")).toBe("/Users/x/proj/notes.md")
+  })
+
+  test("degenerate auto-link (text == url-without-protocol) is unwrapped", () => {
+    expect(decode("[notes.md](http://notes.md)")).toBe("notes.md")
+    expect(decode("[notes.md](https://notes.md)")).toBe("notes.md")
+    expect(decode("[notes.md](notes.md)")).toBe("notes.md")
+  })
+
+  test("real markdown link (text != url-without-protocol) is preserved", () => {
+    expect(decode("[click](https://example.com)")).toBe("[click](https://example.com)")
+    expect(decode("[home](https://x.com)")).toBe("[home](https://x.com)")
+  })
+
+  test("embedded degenerate auto-link inside a longer path is unwrapped in place", () => {
+    expect(decode("/Users/x/proj/[notes.md](http://notes.md)")).toBe("/Users/x/proj/notes.md")
+  })
+
+  test("embedded real markdown link inside a longer string is preserved", () => {
+    expect(decode("see [click](https://example.com) for more")).toBe("see [click](https://example.com) for more")
+  })
+
+  test("non-string input is rejected by the underlying String schema", () => {
+    const result = Schema.decodeUnknownResult(FilePathInput())(123)
+    expect(result._tag).toBe("Failure")
+  })
+
+  test("description annotation lands on the encoded side (JSON Schema-visible)", () => {
+    const schema = FilePathInput({ description: "path to read" })
+    const wrapped = Schema.Struct({ p: schema })
+    const json = Schema.toJsonSchemaDocument(wrapped, { additionalProperties: true })
+    const prop = (json.schema as any).properties.p
+    expect(prop.description).toBe("path to read")
+  })
+})
diff --git a/packages/opencode/src/tool/edit.ts b/packages/opencode/src/tool/edit.ts
@@ -17,6 +17,7 @@ import { InstanceState } from "@/effect/instance-state"
 import { Snapshot } from "@/snapshot"
 import { assertExternalDirectoryEffect } from "./external-directory"
 import { AppFileSystem } from "@opencode-ai/core/filesystem"
+import { FilePathInput } from "@opencode-ai/core/schema"
 import * as Bom from "@/util/bom"
 
 function normalizeLineEndings(text: string): string {
@@ -45,7 +46,7 @@ function lock(filePath: string) {
 }
 
 export const Parameters = Schema.Struct({
-  filePath: Schema.String.annotate({ description: "The absolute path to the file to modify" }),
+  filePath: FilePathInput({ description: "The absolute path to the file to modify" }),
   oldString: Schema.String.annotate({ description: "The text to replace" }),
   newString: Schema.String.annotate({
     description: "The text to replace it with (must be different from oldString)",

diff --git a/packages/opencode/src/tool/lsp.ts b/packages/opencode/src/tool/lsp.ts
@@ -7,6 +7,7 @@ import { InstanceState } from "@/effect/instance-state"
 import { pathToFileURL } from "url"
 import { assertExternalDirectoryEffect } from "./external-directory"
 import { AppFileSystem } from "@opencode-ai/core/filesystem"
+import { FilePathInput } from "@opencode-ai/core/schema"
 
 const operations = [
   "goToDefinition",
@@ -22,7 +23,7 @@ const operations = [
 
 export const Parameters = Schema.Struct({
   operation: Schema.Literals(operations).annotate({ description: "The LSP operation to perform" }),
-  filePath: Schema.String.annotate({ description: "The absolute or relative path to the file" }),
+  filePath: FilePathInput({ description: "The absolute or relative path to the file" }),
   line: Schema.Int.check(Schema.isGreaterThanOrEqualTo(1)).annotate({
     description: "The line number (1-based, as shown in editors)",
   }),

diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts
@@ -1,5 +1,5 @@
 import { Effect, Option, Schema, Scope, Stream } from "effect"
-import { NonNegativeInt } from "@opencode-ai/core/schema"
+import { FilePathInput, NonNegativeInt } from "@opencode-ai/core/schema"
 import * as path from "path"
 import * as Tool from "./tool"
 import { AppFileSystem } from "@opencode-ai/core/filesystem"
@@ -27,7 +27,7 @@ class ReadStop extends Schema.TaggedErrorClass<ReadStop>()("ReadStop", {}) {}
 // Schema output is identical (`type: "number"`), so the LLM view is
 // unchanged; purely CLI-facing uses must now send numbers rather than strings.
 export const Parameters = Schema.Struct({
-  filePath: Schema.String.annotate({ description: "The absolute path to the file or directory to read" }),
+  filePath: FilePathInput({ description: "The absolute path to the file or directory to read" }),
   offset: Schema.optional(NonNegativeInt).annotate({
     description: "The line number to start reading from (1-indexed)",
   }),
@@ -292,14 +292,29 @@ export const ReadTool = Tool.define(
         return yield* Effect.fail(new Error(`Cannot read binary file: ${filepath}`))
       }
 
-      const file = yield* lines(filepath, { limit: params.limit ?? DEFAULT_READ_LIMIT, offset: params.offset || 1 })
+      const limit = params.limit ?? DEFAULT_READ_LIMIT
+      const offset = params.offset || 1
+      const file = yield* lines(filepath, { limit, offset })
       if (file.count < file.offset && !(file.count === 0 && file.offset === 1)) {
         return yield* Effect.fail(
           new Error(`Offset ${file.offset} is out of range for this file (${file.count} lines)`),
         )
       }
 
+      // Surface the offset/limit pairing decision back to the model. Some
+      // models call this tool with only one of the two and previously had
+      // no signal as to what the other was filled with — so they could not
+      // self-correct on the next turn. Note the message is informational
+      // (no `Error:` prefix) so it doesn't read as a failure.
+      const pairingNote =
+        params.offset !== undefined && params.limit === undefined
+          ? `Note: limit was not provided; defaulted to ${DEFAULT_READ_LIMIT} lines. To control the window, pass both offset and limit.\n`
+          : params.limit !== undefined && params.offset === undefined
+            ? `Note: offset was not provided; defaulted to 1 (start of file). To control the window, pass both offset and limit.\n`
+            : ""
+
       let output = [`<path>${filepath}</path>`, `<type>file</type>`, "<content>\n"].join("\n")
+      if (pairingNote) output += pairingNote
       output += file.raw.map((line, i) => `${i + file.offset}: ${line}`).join("\n")
 
       const last = file.offset + file.raw.length - 1

diff --git a/packages/opencode/src/tool/repair.ts b/packages/opencode/src/tool/repair.ts
@@ -0,0 +1,196 @@
+/**
+ * Tool-input repair layer for open-weight models.
+ *
+ * Background: open-weight models (deepseek, qwen, glm, ...) fail tool calls in
+ * a small, repeatable set of shape-level ways. Strict schema rejection sends
+ * them into recovery loops, because the raw "Expected X, got Y" error is
+ * rarely enough for the model to find the fix on its own.
+ *
+ * Approach: validate first, repair on failure. We let the schema decode run
+ * unchanged; only when it fails do we walk the parse error's issue tree to
+ * locate the failing paths, apply targeted shape repairs at those paths, and
+ * re-decode. Successful inputs are never touched — there is no preprocessing
+ * that could corrupt a valid call.
+ *
+ * The four shape repairs were chosen by surveying the failure modes most
+ * commonly reported against opencode (see issue #26498). Ordering matters:
+ * the JSON-array-string repair must fire before the bare-string wrap, or a
+ * stringified array like `'["a","b"]'` would be wrapped into
+ * `['["a","b"]']`. The `repairAt` switch encodes that ordering at each path.
+ */
+import { Effect } from "effect"
+
+type Path = ReadonlyArray<string | number>
+
+/**
+ * Effect Schema parse issues form a tree:
+ *   Composite { issues: Issue[] }
+ *   Pointer   { path: (string|number)[]; issue: Issue }
+ *   <leaf>    { _tag: "MissingKey" | "InvalidType" | "AnyOf" | ... }
+ *
+ * We collect (path, leafTag) for each leaf so we can repair at the exact
+ * location where the schema disagreed.
+ */
+const ISSUE_MARKER = "~effect/SchemaIssue/Issue"
+
+export function collectFailures(issue: unknown): Array<{ path: Path; tag: string }> {
+  const out: Array<{ path: Path; tag: string }> = []
+  const walk = (i: any, path: Path) => {
+    if (!i || typeof i !== "object" || !(ISSUE_MARKER in i)) return
+    if (i._tag === "Pointer") return walk(i.issue, [...path, ...(i.path ?? [])])
+    if (i._tag === "Composite") {
+      for (const child of i.issues ?? []) walk(child, path)
+      return
+    }
+    out.push({ path, tag: i._tag ?? "Unknown" })
+  }
+  walk(issue, [])
+  return out
+}
+
+function cloneDeep<T>(value: T): T {
+  if (value === null || typeof value !== "object") return value
+  if (Array.isArray(value)) return value.map(cloneDeep) as unknown as T
+  const out: Record<string, unknown> = {}
+  for (const key of Object.keys(value as object)) out[key] = cloneDeep((value as any)[key])
+  return out as unknown as T
+}
+
+const JSON_ARRAY_RE = /^\s*\[[\s\S]*\]\s*$/
+
+/**
+ * Apply the highest-priority repair that fits the current value at
+ * `parent[key]`. Returns true if a repair was applied.
+ *
+ * The order is fixed and load-bearing:
+ *   1. null at an optional position → drop the key
+ *   2. JSON-array-shaped string     → parse to a real array
+ *   3. empty-object placeholder {}  → drop the key
+ *   4. bare scalar where an array was expected → wrap as [scalar]
+ *
+ * Repair (2) must precede (4). Repair (4) is intentionally last because it
+ * applies broadly; if a more specific repair fits, we want it to win.
+ */
+function repairAt(parent: any, key: string | number, leafTag: string): boolean {
+  const value = parent[key]
+
+  if (value === null) {
+    if (Array.isArray(parent)) parent.splice(Number(key), 1)
+    else delete parent[key]
+    return true
+  }
+
+  if (typeof value === "string" && JSON_ARRAY_RE.test(value)) {
+    const parsed = parseJsonSafe(value)
+    if (Array.isArray(parsed)) {
+      parent[key] = parsed
+      return true
+    }
+  }
+
+  if (
+    value !== null &&
+    typeof value === "object" &&
+    !Array.isArray(value) &&
+    Object.keys(value).length === 0
+  ) {
+    if (Array.isArray(parent)) parent.splice(Number(key), 1)
+    else delete parent[key]
+    return true
+  }
+
+  // Bare-scalar-to-array wrap. Only fires when the leaf says a non-array was
+  // seen in a position that requires an array — relying on `InvalidType` and
+  // its AnyOf variant keeps us from wrapping in unrelated positions (e.g. a
+  // string in a position that wanted a number).
+  if ((leafTag === "InvalidType" || leafTag === "AnyOf") && !Array.isArray(value) && value !== undefined) {
+    parent[key] = [value]
+    return true
+  }
+
+  return false
+}
+
+function parseJsonSafe(input: string): unknown {
+  // We narrow callers to strings already; this only suppresses a syntactic
+  // parse failure.
+  // eslint-disable-next-line no-restricted-syntax
+  try {
+    return JSON.parse(input)
+  } catch {
+    return undefined
+  }
+}
+
+function navigate(root: any, path: Path): { parent: any; key: string | number } | undefined {
+  if (path.length === 0) return undefined
+  let parent: any = root
+  for (let i = 0; i < path.length - 1; i++) {
+    if (parent == null || typeof parent !== "object") return undefined
+    parent = parent[path[i]]
+  }
+  if (parent == null || typeof parent !== "object") return undefined
+  return { parent, key: path[path.length - 1] }
+}
+
+/**
+ * Apply targeted repairs to a copy of `input` based on the validator's own
+ * issue list. The schema is the prior; we only spend repair budget at paths
+ * the schema explicitly disagreed at. Returns `undefined` if no repair was
+ * applicable (caller should surface the original validation error).
+ */
+export function repair(input: unknown, issue: unknown): { value: unknown; repairs: string[] } | undefined {
+  const failures = collectFailures(issue)
+  if (failures.length === 0) return undefined
+  const out = cloneDeep(input)
+  const applied: string[] = []
+  for (const { path, tag } of failures) {
+    const target = navigate(out, path)
+    if (!target) continue
+    if (repairAt(target.parent, target.key, tag)) {
+      applied.push(`${path.join(".")}:${tag}`)
+    }
+  }
+  if (applied.length === 0) return undefined
+  return { value: out, repairs: applied }
+}
+
+// Effect Schema short-circuits at the first failing element of an array or
+// struct, so a single decode-then-repair pass can only fix one path at a
+// time. We loop until either the input parses cleanly or no further repair
+// applies. The bound is generous relative to the four-shape catalogue but
+// guarantees termination if a repair somehow re-introduces a failure.
+const MAX_REPAIR_ROUNDS = 6
+
+/**
+ * Attempt to recover from a tool-input validation failure. On success,
+ * annotates the current span with the repairs applied so per-tool repair
+ * rates can be watched in telemetry. On terminal failure, surfaces the
+ * original error so the model still sees the schema-level explanation it
+ * can act on (not a repair-induced cascade).
+ */
+export function recover<A, E, R>(
+  decode: (input: unknown) => Effect.Effect<A, E, R>,
+  rawInput: unknown,
+  error: unknown,
+): Effect.Effect<A, E, R> {
+  return Effect.gen(function* () {
+    let current: unknown = rawInput
+    let currentError: unknown = error
+    const repairs: string[] = []
+    for (let round = 0; round < MAX_REPAIR_ROUNDS; round++) {
+      const attempt = repair(current, (currentError as any)?.issue)
+      if (!attempt) return yield* Effect.fail(error as E)
+      repairs.push(...attempt.repairs)
+      current = attempt.value
+      const exit = yield* Effect.exit(decode(current))
+      if (exit._tag === "Success") {
+        yield* Effect.annotateCurrentSpan("tool.input_repaired", repairs.join(","))
+        return exit.value
+      }
+      const failure = exit.cause.reasons.find((r: any) => r._tag === "Fail" || r._tag === "FailReason")
+      currentError = (failure as any)?.error ?? error
+    }
+    return yield* Effect.fail(error as E)
+  })
+}
diff --git a/packages/opencode/src/tool/tool.ts b/packages/opencode/src/tool/tool.ts
@@ -4,6 +4,7 @@ import type { MessageV2 } from "../session/message-v2"
 import type { Permission } from "../permission"
 import type { SessionID, MessageID } from "../session/schema"
 import * as Truncate from "./truncate"
+import * as Repair from "./repair"
 import { Agent } from "@/agent/agent"
 
 interface Metadata {
@@ -117,6 +118,13 @@ function wrap<Parameters extends Schema.Decoder<unknown>, Result extends Metadat
         }
         return Effect.gen(function* () {
           const decoded = yield* decode(args).pipe(
+            // Open-weight models commonly emit a small, repeatable set of
+            // shape mistakes (null at optional fields, stringified arrays,
+            // empty-object placeholders, bare scalars where arrays were
+            // expected). On parse failure we let the validator's own issue
+            // list localize the bug, apply targeted repairs at those paths,
+            // and re-decode. Successful inputs are never touched.
+            Effect.catch((error) => Repair.recover(decode, args, error)),
             Effect.mapError(
               (error) =>
                 new InvalidArgumentsError({