diff --git a/packages/opencode/src/plugin/github-copilot/copilot.ts b/packages/opencode/src/plugin/github-copilot/copilot.ts index d24d9b9daed6..140567251e9d 100644 --- a/packages/opencode/src/plugin/github-copilot/copilot.ts +++ b/packages/opencode/src/plugin/github-copilot/copilot.ts @@ -10,6 +10,8 @@ import { MessageV2 } from "@/session/message-v2" const log = Log.create({ service: "plugin.copilot" }) const CLIENT_ID = "Ov23li8tweQw6odWQebz" +const API_VERSION = "2026-06-01" +const UTILITY_MODELS = ["gpt-5.4-nano", "gpt-4.1", "gpt-4o", "gpt-4o-mini"] // Add a small safety buffer when polling to avoid hitting the server // slightly too early due to clock skew / timer drift. const OAUTH_POLLING_SAFETY_MARGIN_MS = 3000 // 3 seconds @@ -56,11 +58,13 @@ function fix(model: Model, url: string): Model { export async function CopilotAuthPlugin(input: PluginInput): Promise { const sdk = input.client + let models: Record = {} return { provider: { id: "github-copilot", async models(provider, ctx) { if (ctx.auth?.type !== "oauth") { + models = {} return Object.fromEntries(Object.entries(provider.models).map(([id, model]) => [id, fix(model, base())])) } @@ -71,14 +75,23 @@ export async function CopilotAuthPlugin(input: PluginInput): Promise { { Authorization: `Bearer ${auth.refresh}`, "User-Agent": `opencode/${InstallationVersion}`, + "X-GitHub-Api-Version": API_VERSION, }, provider.models, - ).catch((error) => { - log.error("failed to fetch copilot models", { error }) - return Object.fromEntries( - Object.entries(provider.models).map(([id, model]) => [id, fix(model, base(auth.enterpriseUrl))]), - ) - }) + ) + .then((result) => { + models = result.models + return Object.fromEntries( + Object.entries(result.models).filter(([, model]) => result.pickerEnabled.has(model.api.id)), + ) + }) + .catch((error) => { + models = {} + log.error("failed to fetch copilot models", { error }) + return Object.fromEntries( + Object.entries(provider.models).map(([id, model]) => [id, fix(model, base(auth.enterpriseUrl))]), + ) + }) }, }, auth: { @@ -342,9 +355,19 @@ export async function CopilotAuthPlugin(input: PluginInput): Promise { output.options.toolStreaming = false } }, + "experimental.provider.small_model": async (incoming, output) => { + if (incoming.provider.id !== "github-copilot") return + // GitHub exposes utility models for title generation without including them in the picker. + output.model = UTILITY_MODELS.map((id) => models[id]).find((model) => model !== undefined) + }, "chat.headers": async (incoming, output) => { if (!incoming.model.providerID.includes("github-copilot")) return + output.headers["X-GitHub-Api-Version"] = API_VERSION + if (incoming.agent === "title") { + output.headers["X-Interaction-Type"] = "agent-session-name-generation" + } + if (incoming.model.api.npm === "@ai-sdk/anthropic") { output.headers["anthropic-beta"] = "interleaved-thinking-2025-05-14" } diff --git a/packages/opencode/src/plugin/github-copilot/models.ts b/packages/opencode/src/plugin/github-copilot/models.ts index a488be4a4853..7e6608e6036e 100644 --- a/packages/opencode/src/plugin/github-copilot/models.ts +++ b/packages/opencode/src/plugin/github-copilot/models.ts @@ -1,53 +1,81 @@ import type { Model } from "@opencode-ai/sdk/v2" -import { Schema } from "effect" +import { Option, Schema } from "effect" -export const schema = Schema.Struct({ - data: Schema.Array( +const item = Schema.Struct({ + model_picker_enabled: Schema.Boolean, + id: Schema.String, + name: Schema.String, + // every version looks like: `{model.id}-YYYY-MM-DD` + version: Schema.String, + supported_endpoints: Schema.optional(Schema.Array(Schema.String)), + policy: Schema.optional( + Schema.Struct({ + state: Schema.optional(Schema.String), + }), + ), + billing: Schema.optional( Schema.Struct({ - model_picker_enabled: Schema.Boolean, - id: Schema.String, - name: Schema.String, - // every version looks like: `{model.id}-YYYY-MM-DD` - version: Schema.String, - supported_endpoints: Schema.optional(Schema.Array(Schema.String)), - policy: Schema.optional( + token_prices: Schema.optional( Schema.Struct({ - state: Schema.optional(Schema.String), + batch_size: Schema.Number, + default: Schema.Struct({ + cache_price: Schema.Number, + input_price: Schema.Number, + output_price: Schema.Number, + }), }), ), - capabilities: Schema.Struct({ - family: Schema.String, - limits: Schema.Struct({ - max_context_window_tokens: Schema.Number, - max_output_tokens: Schema.Number, - max_prompt_tokens: Schema.Number, - vision: Schema.optional( - Schema.Struct({ - max_prompt_image_size: Schema.Number, - max_prompt_images: Schema.Number, - supported_media_types: Schema.Array(Schema.String), - }), - ), - }), - supports: Schema.Struct({ - adaptive_thinking: Schema.optional(Schema.Boolean), - max_thinking_budget: Schema.optional(Schema.Number), - min_thinking_budget: Schema.optional(Schema.Number), - reasoning_effort: Schema.optional(Schema.Array(Schema.String)), - streaming: Schema.Boolean, - structured_outputs: Schema.optional(Schema.Boolean), - tool_calls: Schema.Boolean, - vision: Schema.optional(Schema.Boolean), - }), - }), }), ), + capabilities: Schema.Struct({ + family: Schema.String, + limits: Schema.optional( + Schema.Struct({ + max_context_window_tokens: Schema.optional(Schema.Number), + max_output_tokens: Schema.optional(Schema.Number), + max_prompt_tokens: Schema.optional(Schema.Number), + vision: Schema.optional( + Schema.Struct({ + max_prompt_image_size: Schema.Number, + max_prompt_images: Schema.Number, + supported_media_types: Schema.Array(Schema.String), + }), + ), + }), + ), + supports: Schema.Struct({ + adaptive_thinking: Schema.optional(Schema.Boolean), + max_thinking_budget: Schema.optional(Schema.Number), + min_thinking_budget: Schema.optional(Schema.Number), + reasoning_effort: Schema.optional(Schema.Array(Schema.String)), + streaming: Schema.optional(Schema.Boolean), + structured_outputs: Schema.optional(Schema.Boolean), + tool_calls: Schema.optional(Schema.Boolean), + vision: Schema.optional(Schema.Boolean), + }), + }), +}) + +export const schema = Schema.Struct({ + data: Schema.Array(Schema.Unknown), }) -type Item = Schema.Schema.Type["data"][number] +type Item = Schema.Schema.Type +type SelectableItem = Item & { + capabilities: Item["capabilities"] & { + limits: NonNullable & { + max_output_tokens: number + max_prompt_tokens: number + } + supports: Item["capabilities"]["supports"] & { + tool_calls: boolean + } + } +} const decodeModels = Schema.decodeUnknownSync(schema) +const decodeItem = Schema.decodeUnknownOption(item) -function build(key: string, remote: Item, url: string, prev?: Model): Model { +function build(key: string, remote: SelectableItem, url: string, prev?: Model): Model { const reasoning = !!remote.capabilities.supports.adaptive_thinking || !!remote.capabilities.supports.reasoning_effort?.length || @@ -58,6 +86,9 @@ function build(key: string, remote: Item, url: string, prev?: Model): Model { (remote.capabilities.limits.vision?.supported_media_types ?? []).some((item) => item.startsWith("image/")) const isMsgApi = remote.supported_endpoints?.includes("/v1/messages") + const prices = remote.billing?.token_prices + // Copilot prices are AIC per billing batch; OpenCode stores USD per million tokens. + const usdPerMillion = prices ? 10_000 / prices.batch_size : 0 const model: Model = { id: key, @@ -70,7 +101,7 @@ function build(key: string, remote: Item, url: string, prev?: Model): Model { // API response wins status: "active", limit: { - context: remote.capabilities.limits.max_context_window_tokens, + context: remote.capabilities.limits.max_context_window_tokens ?? remote.capabilities.limits.max_prompt_tokens, input: remote.capabilities.limits.max_prompt_tokens, output: remote.capabilities.limits.max_output_tokens, }, @@ -99,9 +130,13 @@ function build(key: string, remote: Item, url: string, prev?: Model): Model { family: prev?.family ?? remote.capabilities.family, name: prev?.name ?? remote.name, cost: { - input: 0, - output: 0, - cache: { read: 0, write: 0 }, + input: (prices?.default.input_price ?? 0) * usdPerMillion, + output: (prices?.default.output_price ?? 0) * usdPerMillion, + cache: { + read: (prices?.default.cache_price ?? 0) * usdPerMillion, + // `/models` exposes cached-input reads only; per-request billing accounts for cache writes. + write: 0, + }, }, options: prev?.options ?? {}, headers: prev?.headers ?? {}, @@ -154,11 +189,20 @@ function build(key: string, remote: Item, url: string, prev?: Model): Model { return model } +function usable(item: Item): item is SelectableItem { + return ( + item.policy?.state !== "disabled" && + item.capabilities.limits?.max_output_tokens !== undefined && + item.capabilities.limits.max_prompt_tokens !== undefined && + item.capabilities.supports.tool_calls !== undefined + ) +} + export async function get( baseURL: string, headers: HeadersInit = {}, existing: Record = {}, -): Promise> { +): Promise<{ models: Record; pickerEnabled: Set }> { const data = await fetch(`${baseURL}/models`, { headers, signal: AbortSignal.timeout(5_000), @@ -171,7 +215,10 @@ export async function get( const result = { ...existing } const remote = new Map( - data.data.filter((m) => m.model_picker_enabled && m.policy?.state !== "disabled").map((m) => [m.id, m] as const), + data.data.flatMap((raw) => { + const item = Option.getOrUndefined(decodeItem(raw)) + return item && usable(item) ? ([[item.id, item]] as const) : [] + }), ) // prune existing models whose api.id isn't in the endpoint response @@ -190,7 +237,10 @@ export async function get( result[id] = build(id, m, baseURL) } - return result + return { + models: result, + pickerEnabled: new Set([...remote].filter(([, item]) => item.model_picker_enabled).map(([id]) => id)), + } } export * as CopilotModels from "./models" diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index ad860be92abc..9859b81e88f4 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -1765,6 +1765,19 @@ export const layer = Layer.effect( const provider = s.providers[providerID] if (!provider) return undefined + const experimental = yield* plugin.trigger<"experimental.provider.small_model">( + "experimental.provider.small_model", + { provider: toPublicInfo(provider) }, + { model: undefined }, + ) + if (experimental.model) { + return { + ...experimental.model, + id: ProviderV2.ModelID.make(experimental.model.id), + providerID: ProviderV2.ID.make(experimental.model.providerID), + } + } + const defaultPriority = [ "claude-haiku-4-5", "claude-haiku-4.5", diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index ebaad3e9306d..1851df2d7c7b 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -280,6 +280,8 @@ const live: Layer.Layer< return { type: "ai-sdk" as const, result: streamText({ + // Copilot returns the authoritative billed amount only in provider-specific response fields. + includeRawChunks: input.model.providerID.includes("github-copilot"), onError(error) { l.error("stream error", { error, diff --git a/packages/opencode/src/session/llm/ai-sdk.ts b/packages/opencode/src/session/llm/ai-sdk.ts index ad3e66a40bdc..8db8985d7b09 100644 --- a/packages/opencode/src/session/llm/ai-sdk.ts +++ b/packages/opencode/src/session/llm/ai-sdk.ts @@ -14,6 +14,7 @@ export function adapterState() { currentTextID: undefined as string | undefined, currentReasoningID: undefined as string | undefined, toolNames: {} as Record, + copilotTotalNanoAiu: undefined as number | undefined, } } @@ -26,6 +27,20 @@ function providerMetadata(value: unknown): ProviderMetadata | undefined { return Schema.is(ProviderMetadata)(value) ? value : undefined } +// Temporary AI SDK bridge: Copilot billing survives only in raw provider chunks here. +// Move this extraction into @opencode-ai/llm when Copilot is handled by the native runtime. +function copilotTotalNanoAiu(value: unknown) { + if (!value || typeof value !== "object") return + const raw = value as Record + const response = + raw.response && typeof raw.response === "object" ? (raw.response as Record) : undefined + const usage = raw.copilot_usage ?? response?.copilot_usage + if (!usage || typeof usage !== "object") return + const total = (usage as Record).total_nano_aiu + if (typeof total !== "number" || !Number.isFinite(total) || total < 0) return + return total +} + function usage(value: unknown) { if (!value || typeof value !== "object") return undefined const item = value as { @@ -70,14 +85,28 @@ export function toLLMEvents( return Effect.succeed([LLMEvent.stepStart({ index: state.step })]) case "finish-step": - return Effect.sync(() => [ - LLMEvent.stepFinish({ - index: state.step++, - reason: finishReason(event.finishReason), - usage: usage(event.usage), - providerMetadata: providerMetadata(event.providerMetadata), - }), - ]) + return Effect.sync(() => { + const original = providerMetadata(event.providerMetadata) + const metadata = + state.copilotTotalNanoAiu === undefined + ? original + : { + ...original, + copilot: { + ...original?.copilot, + totalNanoAiu: state.copilotTotalNanoAiu, + }, + } + state.copilotTotalNanoAiu = undefined + return [ + LLMEvent.stepFinish({ + index: state.step++, + reason: finishReason(event.finishReason), + usage: usage(event.usage), + providerMetadata: metadata, + }), + ] + }) case "finish": return Effect.sync(() => { @@ -238,11 +267,16 @@ export function toLLMEvents( case "abort": case "source": case "file": - case "raw": case "tool-output-denied": case "tool-approval-request": return Effect.succeed([]) + case "raw": + return Effect.sync(() => { + state.copilotTotalNanoAiu = copilotTotalNanoAiu(event.rawValue) ?? state.copilotTotalNanoAiu + return [] + }) + default: { const _exhaustive: never = event void _exhaustive diff --git a/packages/opencode/src/session/session.ts b/packages/opencode/src/session/session.ts index a8a867a425c9..a9b57231881d 100644 --- a/packages/opencode/src/session/session.ts +++ b/packages/opencode/src/session/session.ts @@ -436,18 +436,22 @@ export const getUsage = (input: { model: Provider.Model; usage: Usage; metadata? (input.model.cost?.experimentalOver200K && contextTokens > 200_000 ? input.model.cost.experimentalOver200K : input.model.cost) + const totalNanoAiu = input.metadata?.["copilot"]?.["totalNanoAiu"] return { - cost: safe( - new Decimal(0) - .add(new Decimal(tokens.input).mul(costInfo?.input ?? 0).div(1_000_000)) - .add(new Decimal(tokens.output).mul(costInfo?.output ?? 0).div(1_000_000)) - .add(new Decimal(tokens.cache.read).mul(costInfo?.cache?.read ?? 0).div(1_000_000)) - .add(new Decimal(tokens.cache.write).mul(costInfo?.cache?.write ?? 0).div(1_000_000)) - // TODO: update models.dev to have better pricing model, for now: - // charge reasoning tokens at the same rate as output tokens - .add(new Decimal(tokens.reasoning).mul(costInfo?.output ?? 0).div(1_000_000)) - .toNumber(), - ), + cost: + typeof totalNanoAiu === "number" && Number.isFinite(totalNanoAiu) && totalNanoAiu >= 0 + ? new Decimal(totalNanoAiu).div(100_000_000_000).toNumber() + : safe( + new Decimal(0) + .add(new Decimal(tokens.input).mul(costInfo?.input ?? 0).div(1_000_000)) + .add(new Decimal(tokens.output).mul(costInfo?.output ?? 0).div(1_000_000)) + .add(new Decimal(tokens.cache.read).mul(costInfo?.cache?.read ?? 0).div(1_000_000)) + .add(new Decimal(tokens.cache.write).mul(costInfo?.cache?.write ?? 0).div(1_000_000)) + // TODO: update models.dev to have better pricing model, for now: + // charge reasoning tokens at the same rate as output tokens + .add(new Decimal(tokens.reasoning).mul(costInfo?.output ?? 0).div(1_000_000)) + .toNumber(), + ), tokens, } } diff --git a/packages/opencode/test/plugin/github-copilot-models.test.ts b/packages/opencode/test/plugin/github-copilot-models.test.ts index 939247f09b4e..1a63f3cb92f4 100644 --- a/packages/opencode/test/plugin/github-copilot-models.test.ts +++ b/packages/opencode/test/plugin/github-copilot-models.test.ts @@ -57,7 +57,7 @@ test("preserves temperature support from existing provider models", async () => ), ) as unknown as typeof fetch - const models = await CopilotModels.get( + const result = await CopilotModels.get( "https://api.githubcopilot.com", {}, { @@ -112,11 +112,81 @@ test("preserves temperature support from existing provider models", async () => }, }, ) + const models = result.models expect(models["gpt-4o"].capabilities.temperature).toBe(true) expect(models["brand-new"].capabilities.temperature).toBe(true) }) +test("converts Copilot AIC token prices to USD per million tokens", async () => { + globalThis.fetch = mock(() => + Promise.resolve( + new Response( + JSON.stringify({ + data: [ + { + model_picker_enabled: true, + id: "gpt-5", + name: "GPT-5", + version: "gpt-5-2026-06-01", + billing: { + token_prices: { + batch_size: 500000, + default: { + input_price: 500, + output_price: 3000, + cache_price: 50, + }, + }, + }, + capabilities: { + family: "gpt", + limits: { + max_context_window_tokens: 200000, + max_output_tokens: 16384, + max_prompt_tokens: 200000, + }, + supports: { + streaming: true, + tool_calls: true, + }, + }, + }, + { + model_picker_enabled: true, + id: "incomplete-internal-model", + name: "Incomplete Internal Model", + version: "incomplete-internal-model-2026-06-01", + capabilities: { + family: "internal", + supports: {}, + }, + }, + { + model_picker_enabled: false, + id: "ignored-non-chat-record", + }, + ], + }), + { status: 200 }, + ), + ), + ) as unknown as typeof fetch + + const models = (await CopilotModels.get("https://api.githubcopilot.com")).models + + expect(models["gpt-5"].cost).toEqual({ + input: 10, + output: 60, + cache: { + read: 1, + write: 0, + }, + }) + expect(models["incomplete-internal-model"]).toBeUndefined() + expect(models["ignored-non-chat-record"]).toBeUndefined() +}) + test("clears existing variants so refreshed models calculate provider-specific variants", async () => { globalThis.fetch = mock(() => Promise.resolve( @@ -150,7 +220,7 @@ test("clears existing variants so refreshed models calculate provider-specific v ), ) as unknown as typeof fetch - const models = await CopilotModels.get( + const result = await CopilotModels.get( "https://api.githubcopilot.com", {}, { @@ -210,6 +280,7 @@ test("clears existing variants so refreshed models calculate provider-specific v }, }, ) + const models = result.models expect(models["claude-opus-4.7"].api.npm).toBe("@ai-sdk/anthropic") expect(models["claude-opus-4.7"].variants).toBeUndefined() diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 9bff89c348ad..de797691378c 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -1674,6 +1674,20 @@ describe("SessionNs.getUsage", () => { expect(result.cost).toBe(3 + 1.5) }) + test("uses authoritative Copilot billed cost when provided", () => { + const result = SessionNs.getUsage({ + model: createModel({ + context: 100_000, + output: 32_000, + cost: { input: 3, output: 15, cache: { read: 0.3, write: 0.3 } }, + }), + usage: usage({ inputTokens: 11_774, outputTokens: 39, totalTokens: 11_813 }), + metadata: { copilot: { totalNanoAiu: 4_473_525_000 } }, + }) + + expect(result.cost).toBe(0.04473525) + }) + test("uses matching context cost tier before over-200k fallback", () => { const model = createModel({ context: 1_000_000, diff --git a/packages/opencode/test/session/llm.test.ts b/packages/opencode/test/session/llm.test.ts index 2376750eeae0..4a465c2abd5a 100644 --- a/packages/opencode/test/session/llm.test.ts +++ b/packages/opencode/test/session/llm.test.ts @@ -500,6 +500,57 @@ describe("session.llm.ai-sdk adapter", () => { expect(result.tokens.cache.write).toBe(300) expect(result.tokens.cache.read).toBe(200) }) + + test("captures Copilot billed usage from raw Anthropic message deltas per step", async () => { + const events = await adapt([ + uncheckedAdapterEvent({ + type: "raw", + rawValue: { + type: "message_delta", + copilot_usage: { total_nano_aiu: 4_473_525_000 }, + }, + }), + { + type: "finish-step", + response: { id: "msg_test", timestamp: new Date(0), modelId: "claude-sonnet-4.6" }, + finishReason: "stop", + rawFinishReason: "end_turn", + usage: { + inputTokens: 11_774, + outputTokens: 39, + totalTokens: 11_813, + inputTokenDetails: { noCacheTokens: 3, cacheReadTokens: 0, cacheWriteTokens: 11_771 }, + outputTokenDetails: { textTokens: 39, reasoningTokens: undefined }, + }, + providerMetadata: { anthropic: { cacheCreationInputTokens: 11_771 } }, + }, + { + type: "finish-step", + response: { id: "msg_follow_up", timestamp: new Date(0), modelId: "claude-sonnet-4.6" }, + finishReason: "stop", + rawFinishReason: "end_turn", + usage: { + inputTokens: 1, + outputTokens: 1, + totalTokens: 2, + inputTokenDetails: { noCacheTokens: 1, cacheReadTokens: 0, cacheWriteTokens: 0 }, + outputTokenDetails: { textTokens: 1, reasoningTokens: undefined }, + }, + providerMetadata: { anthropic: {} }, + }, + ]) + + expect(events[0]).toMatchObject({ + type: "step-finish", + providerMetadata: { + anthropic: { cacheCreationInputTokens: 11_771 }, + copilot: { totalNanoAiu: 4_473_525_000 }, + }, + }) + expect(events[1]).toMatchObject({ type: "step-finish", providerMetadata: { anthropic: {} } }) + if (events[1].type !== "step-finish") throw new Error("expected step-finish") + expect(events[1].providerMetadata?.copilot).toBeUndefined() + }) }) type Capture = { diff --git a/packages/plugin/src/index.ts b/packages/plugin/src/index.ts index 3c710d076a38..edfa0139dfca 100644 --- a/packages/plugin/src/index.ts +++ b/packages/plugin/src/index.ts @@ -294,6 +294,7 @@ export interface Hooks { system: string[] }, ) => Promise + "experimental.provider.small_model"?: (input: { provider: ProviderV2 }, output: { model?: ModelV2 }) => Promise /** * Called before session compaction starts. Allows plugins to customize * the compaction prompt.