Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions packages/core/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,49 @@ export type AbsolutePath = typeof AbsolutePath.Type
export const RelativePath = Schema.String.pipe(Schema.brand("RelativePath"))
export type RelativePath = typeof RelativePath.Type

/**
* String input intended to flow to a filesystem operation (fopen, stat, etc.).
*
* Some open-weight models occasionally emit file paths wrapped in markdown
* auto-links, e.g. `"[notes.md](http://notes.md)"`. This is post-training
* chat distribution leaking through the tool boundary: the model has been
* rewarded for auto-linking in conversational output and applies that prior
* to fields where it makes no sense. Encoding the intent at the schema
* level — "this string is going to fopen, not into a chat bubble" — plugs
* the leak for every path field at once.
*
* Only the degenerate case (link text equals the URL with protocol stripped)
* is rewritten. Real markdown like `[click](https://example.com)` passes
* through untouched.
*
* Replaces `Schema.String` in tool parameter definitions for fields that
* carry filesystem paths. The annotation lives on the encoded side so the
* JSON Schema emitted to the LLM still carries the description.
*
* filePath: FilePathInput({ description: "The absolute path to the file" })
*/
export const FilePathInput = (annotations?: { readonly description?: string }) => {
const source = annotations?.description
? Schema.String.annotate({ description: annotations.description })
: Schema.String
return source.pipe(
Schema.decodeTo(Schema.String, {
decode: SchemaGetter.transform(unwrapDegenerateAutoLink),
encode: SchemaGetter.passthrough({ strict: false }),
}),
)
}

function unwrapDegenerateAutoLink(input: string): string {
// Two regexes — one for the whole-string case where the model emitted only
// the auto-link, one for embedded auto-links within a longer path.
const whole = input.match(/^\[([^\]]+)\]\((https?:\/\/)?([^)]+)\)$/)
if (whole && whole[1] === whole[3]) return whole[1]
return input.replace(/\[([^\]]+)\]\((https?:\/\/)?([^)]+)\)/g, (match, text, _proto, url) =>
text === url ? text : match,
)
}

/**
* Integer greater than zero.
*/
Expand Down
43 changes: 43 additions & 0 deletions packages/core/test/schema.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { describe, expect, test } from "bun:test"
import { Schema } from "effect"
import { FilePathInput } from "../src/schema"

const decode = (input: unknown) => Schema.decodeUnknownSync(FilePathInput())(input)

describe("FilePathInput", () => {
test("plain string passes through unchanged", () => {
expect(decode("/Users/x/proj/notes.md")).toBe("/Users/x/proj/notes.md")
})

test("degenerate auto-link (text == url-without-protocol) is unwrapped", () => {
expect(decode("[notes.md](http://notes.md)")).toBe("notes.md")
expect(decode("[notes.md](https://notes.md)")).toBe("notes.md")
expect(decode("[notes.md](notes.md)")).toBe("notes.md")
})

test("real markdown link (text != url-without-protocol) is preserved", () => {
expect(decode("[click](https://example.com)")).toBe("[click](https://example.com)")
expect(decode("[home](https://x.com)")).toBe("[home](https://x.com)")
})

test("embedded degenerate auto-link inside a longer path is unwrapped in place", () => {
expect(decode("/Users/x/proj/[notes.md](http://notes.md)")).toBe("/Users/x/proj/notes.md")
})

test("embedded real markdown link inside a longer string is preserved", () => {
expect(decode("see [click](https://example.com) for more")).toBe("see [click](https://example.com) for more")
})

test("non-string input is rejected by the underlying String schema", () => {
const result = Schema.decodeUnknownResult(FilePathInput())(123)
expect(result._tag).toBe("Failure")
})

test("description annotation lands on the encoded side (JSON Schema-visible)", () => {
const schema = FilePathInput({ description: "path to read" })
const wrapped = Schema.Struct({ p: schema })
const json = Schema.toJsonSchemaDocument(wrapped, { additionalProperties: true })
const prop = (json.schema as any).properties.p
expect(prop.description).toBe("path to read")
})
})
3 changes: 2 additions & 1 deletion packages/opencode/src/tool/edit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { InstanceState } from "@/effect/instance-state"
import { Snapshot } from "@/snapshot"
import { assertExternalDirectoryEffect } from "./external-directory"
import { AppFileSystem } from "@opencode-ai/core/filesystem"
import { FilePathInput } from "@opencode-ai/core/schema"
import * as Bom from "@/util/bom"

function normalizeLineEndings(text: string): string {
Expand Down Expand Up @@ -45,7 +46,7 @@ function lock(filePath: string) {
}

export const Parameters = Schema.Struct({
filePath: Schema.String.annotate({ description: "The absolute path to the file to modify" }),
filePath: FilePathInput({ description: "The absolute path to the file to modify" }),
oldString: Schema.String.annotate({ description: "The text to replace" }),
newString: Schema.String.annotate({
description: "The text to replace it with (must be different from oldString)",
Expand Down
3 changes: 2 additions & 1 deletion packages/opencode/src/tool/lsp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { InstanceState } from "@/effect/instance-state"
import { pathToFileURL } from "url"
import { assertExternalDirectoryEffect } from "./external-directory"
import { AppFileSystem } from "@opencode-ai/core/filesystem"
import { FilePathInput } from "@opencode-ai/core/schema"

const operations = [
"goToDefinition",
Expand All @@ -22,7 +23,7 @@ const operations = [

export const Parameters = Schema.Struct({
operation: Schema.Literals(operations).annotate({ description: "The LSP operation to perform" }),
filePath: Schema.String.annotate({ description: "The absolute or relative path to the file" }),
filePath: FilePathInput({ description: "The absolute or relative path to the file" }),
line: Schema.Int.check(Schema.isGreaterThanOrEqualTo(1)).annotate({
description: "The line number (1-based, as shown in editors)",
}),
Expand Down
21 changes: 18 additions & 3 deletions packages/opencode/src/tool/read.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Effect, Option, Schema, Scope, Stream } from "effect"
import { NonNegativeInt } from "@opencode-ai/core/schema"
import { FilePathInput, NonNegativeInt } from "@opencode-ai/core/schema"
import * as path from "path"
import * as Tool from "./tool"
import { AppFileSystem } from "@opencode-ai/core/filesystem"
Expand Down Expand Up @@ -27,7 +27,7 @@ class ReadStop extends Schema.TaggedErrorClass<ReadStop>()("ReadStop", {}) {}
// Schema output is identical (`type: "number"`), so the LLM view is
// unchanged; purely CLI-facing uses must now send numbers rather than strings.
export const Parameters = Schema.Struct({
filePath: Schema.String.annotate({ description: "The absolute path to the file or directory to read" }),
filePath: FilePathInput({ description: "The absolute path to the file or directory to read" }),
offset: Schema.optional(NonNegativeInt).annotate({
description: "The line number to start reading from (1-indexed)",
}),
Expand Down Expand Up @@ -292,14 +292,29 @@ export const ReadTool = Tool.define(
return yield* Effect.fail(new Error(`Cannot read binary file: ${filepath}`))
}

const file = yield* lines(filepath, { limit: params.limit ?? DEFAULT_READ_LIMIT, offset: params.offset || 1 })
const limit = params.limit ?? DEFAULT_READ_LIMIT
const offset = params.offset || 1
const file = yield* lines(filepath, { limit, offset })
if (file.count < file.offset && !(file.count === 0 && file.offset === 1)) {
return yield* Effect.fail(
new Error(`Offset ${file.offset} is out of range for this file (${file.count} lines)`),
)
}

// Surface the offset/limit pairing decision back to the model. Some
// models call this tool with only one of the two and previously had
// no signal as to what the other was filled with — so they could not
// self-correct on the next turn. Note the message is informational
// (no `Error:` prefix) so it doesn't read as a failure.
const pairingNote =
params.offset !== undefined && params.limit === undefined
? `Note: limit was not provided; defaulted to ${DEFAULT_READ_LIMIT} lines. To control the window, pass both offset and limit.\n`
: params.limit !== undefined && params.offset === undefined
? `Note: offset was not provided; defaulted to 1 (start of file). To control the window, pass both offset and limit.\n`
: ""

let output = [`<path>${filepath}</path>`, `<type>file</type>`, "<content>\n"].join("\n")
if (pairingNote) output += pairingNote
output += file.raw.map((line, i) => `${i + file.offset}: ${line}`).join("\n")

const last = file.offset + file.raw.length - 1
Expand Down
196 changes: 196 additions & 0 deletions packages/opencode/src/tool/repair.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
/**
* Tool-input repair layer for open-weight models.
*
* Background: open-weight models (deepseek, qwen, glm, ...) fail tool calls in
* a small, repeatable set of shape-level ways. Strict schema rejection sends
* them into recovery loops, because the raw "Expected X, got Y" error is
* rarely enough for the model to find the fix on its own.
*
* Approach: validate first, repair on failure. We let the schema decode run
* unchanged; only when it fails do we walk the parse error's issue tree to
* locate the failing paths, apply targeted shape repairs at those paths, and
* re-decode. Successful inputs are never touched — there is no preprocessing
* that could corrupt a valid call.
*
* The four shape repairs were chosen by surveying the failure modes most
* commonly reported against opencode (see issue #26498). Ordering matters:
* the JSON-array-string repair must fire before the bare-string wrap, or a
* stringified array like `'["a","b"]'` would be wrapped into
* `['["a","b"]']`. The `repairAt` switch encodes that ordering at each path.
*/
import { Effect } from "effect"

type Path = ReadonlyArray<string | number>

/**
* Effect Schema parse issues form a tree:
* Composite { issues: Issue[] }
* Pointer { path: (string|number)[]; issue: Issue }
* <leaf> { _tag: "MissingKey" | "InvalidType" | "AnyOf" | ... }
*
* We collect (path, leafTag) for each leaf so we can repair at the exact
* location where the schema disagreed.
*/
const ISSUE_MARKER = "~effect/SchemaIssue/Issue"

export function collectFailures(issue: unknown): Array<{ path: Path; tag: string }> {
const out: Array<{ path: Path; tag: string }> = []
const walk = (i: any, path: Path) => {
if (!i || typeof i !== "object" || !(ISSUE_MARKER in i)) return
if (i._tag === "Pointer") return walk(i.issue, [...path, ...(i.path ?? [])])
if (i._tag === "Composite") {
for (const child of i.issues ?? []) walk(child, path)
return
}
out.push({ path, tag: i._tag ?? "Unknown" })
}
walk(issue, [])
return out
}

function cloneDeep<T>(value: T): T {
if (value === null || typeof value !== "object") return value
if (Array.isArray(value)) return value.map(cloneDeep) as unknown as T
const out: Record<string, unknown> = {}
for (const key of Object.keys(value as object)) out[key] = cloneDeep((value as any)[key])
return out as unknown as T
}

const JSON_ARRAY_RE = /^\s*\[[\s\S]*\]\s*$/

/**
* Apply the highest-priority repair that fits the current value at
* `parent[key]`. Returns true if a repair was applied.
*
* The order is fixed and load-bearing:
* 1. null at an optional position → drop the key
* 2. JSON-array-shaped string → parse to a real array
* 3. empty-object placeholder {} → drop the key
* 4. bare scalar where an array was expected → wrap as [scalar]
*
* Repair (2) must precede (4). Repair (4) is intentionally last because it
* applies broadly; if a more specific repair fits, we want it to win.
*/
function repairAt(parent: any, key: string | number, leafTag: string): boolean {
const value = parent[key]

if (value === null) {
if (Array.isArray(parent)) parent.splice(Number(key), 1)
else delete parent[key]
return true
}

if (typeof value === "string" && JSON_ARRAY_RE.test(value)) {
const parsed = parseJsonSafe(value)
if (Array.isArray(parsed)) {
parent[key] = parsed
return true
}
}

if (
value !== null &&
typeof value === "object" &&
!Array.isArray(value) &&
Object.keys(value).length === 0
) {
if (Array.isArray(parent)) parent.splice(Number(key), 1)
else delete parent[key]
return true
}

// Bare-scalar-to-array wrap. Only fires when the leaf says a non-array was
// seen in a position that requires an array — relying on `InvalidType` and
// its AnyOf variant keeps us from wrapping in unrelated positions (e.g. a
// string in a position that wanted a number).
if ((leafTag === "InvalidType" || leafTag === "AnyOf") && !Array.isArray(value) && value !== undefined) {
parent[key] = [value]
return true
}

return false
}

function parseJsonSafe(input: string): unknown {
// We narrow callers to strings already; this only suppresses a syntactic
// parse failure.
// eslint-disable-next-line no-restricted-syntax
try {
return JSON.parse(input)
} catch {
return undefined
}
}

function navigate(root: any, path: Path): { parent: any; key: string | number } | undefined {
if (path.length === 0) return undefined
let parent: any = root
for (let i = 0; i < path.length - 1; i++) {
if (parent == null || typeof parent !== "object") return undefined
parent = parent[path[i]]
}
if (parent == null || typeof parent !== "object") return undefined
return { parent, key: path[path.length - 1] }
}

/**
* Apply targeted repairs to a copy of `input` based on the validator's own
* issue list. The schema is the prior; we only spend repair budget at paths
* the schema explicitly disagreed at. Returns `undefined` if no repair was
* applicable (caller should surface the original validation error).
*/
export function repair(input: unknown, issue: unknown): { value: unknown; repairs: string[] } | undefined {
const failures = collectFailures(issue)
if (failures.length === 0) return undefined
const out = cloneDeep(input)
const applied: string[] = []
for (const { path, tag } of failures) {
const target = navigate(out, path)
if (!target) continue
if (repairAt(target.parent, target.key, tag)) {
applied.push(`${path.join(".")}:${tag}`)
}
}
if (applied.length === 0) return undefined
return { value: out, repairs: applied }
}

// Effect Schema short-circuits at the first failing element of an array or
// struct, so a single decode-then-repair pass can only fix one path at a
// time. We loop until either the input parses cleanly or no further repair
// applies. The bound is generous relative to the four-shape catalogue but
// guarantees termination if a repair somehow re-introduces a failure.
const MAX_REPAIR_ROUNDS = 6

/**
* Attempt to recover from a tool-input validation failure. On success,
* annotates the current span with the repairs applied so per-tool repair
* rates can be watched in telemetry. On terminal failure, surfaces the
* original error so the model still sees the schema-level explanation it
* can act on (not a repair-induced cascade).
*/
export function recover<A, E, R>(
decode: (input: unknown) => Effect.Effect<A, E, R>,
rawInput: unknown,
error: unknown,
): Effect.Effect<A, E, R> {
return Effect.gen(function* () {
let current: unknown = rawInput
let currentError: unknown = error
const repairs: string[] = []
for (let round = 0; round < MAX_REPAIR_ROUNDS; round++) {
const attempt = repair(current, (currentError as any)?.issue)
if (!attempt) return yield* Effect.fail(error as E)
repairs.push(...attempt.repairs)
current = attempt.value
const exit = yield* Effect.exit(decode(current))
if (exit._tag === "Success") {
yield* Effect.annotateCurrentSpan("tool.input_repaired", repairs.join(","))
return exit.value
}
const failure = exit.cause.reasons.find((r: any) => r._tag === "Fail" || r._tag === "FailReason")
currentError = (failure as any)?.error ?? error
}
return yield* Effect.fail(error as E)
})
}
8 changes: 8 additions & 0 deletions packages/opencode/src/tool/tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import type { MessageV2 } from "../session/message-v2"
import type { Permission } from "../permission"
import type { SessionID, MessageID } from "../session/schema"
import * as Truncate from "./truncate"
import * as Repair from "./repair"
import { Agent } from "@/agent/agent"

interface Metadata {
Expand Down Expand Up @@ -117,6 +118,13 @@ function wrap<Parameters extends Schema.Decoder<unknown>, Result extends Metadat
}
return Effect.gen(function* () {
const decoded = yield* decode(args).pipe(
// Open-weight models commonly emit a small, repeatable set of
// shape mistakes (null at optional fields, stringified arrays,
// empty-object placeholders, bare scalars where arrays were
// expected). On parse failure we let the validator's own issue
// list localize the bug, apply targeted repairs at those paths,
// and re-decode. Successful inputs are never touched.
Effect.catch((error) => Repair.recover(decode, args, error)),
Effect.mapError(
(error) =>
new InvalidArgumentsError({
Expand Down
Loading
Loading