diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 03a4e1a3b..ca2cfac40 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -1,6 +1,9 @@ # Tok Architecture -Tok is a high-performance Go library and CLI tool that cuts LLM token costs by 60–90% through prompt compression, output filtering, and transparent command rewriting. +Tok is a Go **library** (no CLI, no binary) that cuts LLM token costs by 60–90% +through prompt compression, output filtering, cost estimation, and secret +detection. It is consumed by `hawk`, `eyrie`, `yaad`, and any other Go program +that needs to keep LLM context windows lean. --- @@ -8,36 +11,21 @@ Tok is a high-performance Go library and CLI tool that cuts LLM token costs by 6 ``` ┌─────────────────────────────────────────────────────────────────┐ -│ AI Coding Agents │ -│ Claude Code | Cursor | Copilot | Gemini CLI | ... │ +│ Consumer Application │ +│ hawk | eyrie | yaad | custom Go service │ └────────────────────────────┬────────────────────────────────────┘ - │ + │ import "github.com/GrayCodeAI/tok" ┌────────────────────────────▼────────────────────────────────────┐ -│ Shell Hooks (transparent) │ -│ bash_hook.sh | powershell_hook.ps1 | node_hook.js │ -└────────────────────────────┬────────────────────────────────────┘ - │ -┌────────────────────────────▼────────────────────────────────────┐ -│ Tok CLI │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ -│ │ Command │ │ Filter │ │ Rewrite │ │ -│ │ Runner │ │ Selector │ │ Engine │ │ -│ │ │ │ (80 TOML │ │ (transparent │ │ -│ │ │ │ configs) │ │ command prefixing) │ │ -│ └──────┬───────┘ └──────┬───────┘ └──────────┬───────────┘ │ -│ │ │ │ │ -│ ┌──────▼────────────────▼─────────────────────▼───────────┐ │ -│ │ Compression Pipeline (50+ layers) │ │ -│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────────┐ │ │ -│ │ │ Pre │ │ Core │ │Semantic │ │ Advanced │ │ │ -│ │ │ (0-0.5) │ │ (1-10) │ │(11-20) │ │ (21-50) │ │ │ -│ │ └─────────┘ └─────────┘ └─────────┘ └─────────────┘ │ │ -│ └─────────────────────────────────────────────────────────┘ │ -└────────────────────────────┬────────────────────────────────────┘ - │ -┌────────────────────────────▼────────────────────────────────────┐ -│ Library API │ -│ tok.Compress() | tok.EstimateTokens() | tok.StreamCompress() │ +│ tok package │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ +│ │ Compress │ │ Estimate │ │ Cost / Rate-limit / │ │ +│ │ (31-layer │ │ Tokens │ │ Secret detection │ │ +│ │ pipeline) │ │ (BPE) │ │ (33 patterns) │ │ +│ └──────┬───────┘ └──────┬───────┘ └──────────┬───────────┘ │ +│ └────────────────┼─────────────────────┘ │ +│ ▼ │ +│ internal/filter (31 layers) + internal/core (BPE) │ +│ + internal/secrets + internal/cache + internal/extract │ └─────────────────────────────────────────────────────────────────┘ ``` @@ -48,25 +36,82 @@ Tok is a high-performance Go library and CLI tool that cuts LLM token costs by 6 ### Public API ```go -// Compress reduces text while preserving meaning. -func Compress(text string, opts ...Option) (*Result, error) +// Compress runs the 31-layer pipeline (plus any opt-in post-stages) and +// returns the compressed text and per-stage stats. Safe to call with no +// options; sensible defaults apply. +func Compress(text string, opts ...Option) (string, Stats) + +// EstimateTokens returns the estimated token count for text. BPE-backed +// when a model is supplied, heuristic otherwise. +func EstimateTokens(text string) int +func EstimateTokensForModel(text, model string) int +func EstimateTokensPrecise(text string) int +func EstimateTokensFast(text string) int + +// Cost & pricing. +func GetModelPricing(model string) (ModelPricing, bool) +func RegisterModelPricing(model string, inputPer1K, outputPer1K float64) +func EstimateCostSavings(stats Stats, model string) float64 +func ListModels() []string + +// Secret detection. +type SecretDetector struct{ ... } +func NewSecretDetector() *SecretDetector +func DefaultSecretDetector() *SecretDetector +func IsSensitiveFilename(path string) (bool, secrets.FilenameMatch) + +// Output extraction. +func ExtractJSON(text string) (string, bool) +func ExtractJSONArray(text string) (string, bool) +func ExtractAllJSON(text string) []string +func CompressJSON(text string, maxItems int) string +func CompressLog(text string) string + +// Reusable compressor. +type Compressor struct{ ... } +func NewCompressor(opts ...Option) *Compressor +func (c *Compressor) Compress(text string) (string, Stats) -// EstimateTokens returns fast token count estimate. -func EstimateTokens(text string, model ...string) int +// Context-window optimizer. +type ContextOptimizer struct{ ... } +func NewContextOptimizer(opts ...Option) *ContextOptimizer -// Compressor is a reusable compression instance. -type Compressor struct { ... } -func NewCompressor(opts ...Option) *Compressor -func (c *Compressor) Compress(text string) (*Result, error) +// Strategy advisor. +type CompressionAdvisor struct{ ... } +func NewCompressionAdvisor() *CompressionAdvisor + +// Rate-limit / usage tracker. +type UsageTracker struct{ ... } +func NewUsageTracker(opts ...UsageOption) *UsageTracker + +// Persistent gain tracker (SQLite). +type Tracker struct{ ... } +func NewTracker(ctx context.Context) (*Tracker, error) +func NewTrackerAt(path string) (*Tracker, error) ``` ### Functional Options ```go -tok.WithMode(tok.ModeFull) // Compression mode (lite/fast/balanced/full/aggressive/wenyan-ultra) -tok.WithBudget(10000) // Token budget limit -tok.WithTier(tok.TierAdvanced) // Pipeline tier (core/semantic/advanced/specialized) -tok.WithQuery("relevant context") // Query-aware compression +tok.WithMode(tok.ModeFull) // Compression intensity +tok.WithBudget(10000) // Hard token budget on output +tok.WithTier(tok.TierCore) // Pipeline tier (surface/trim/extract/core/code/log/thread/adaptive) +tok.WithQuery("relevant context") // Query-aware compression +tok.WithModel("gpt-4o") // Enables cost calculation + BPE +tok.WithCodeAware("go") // Symbol-preserving guard for source code +tok.WithCustomFilters(rules) // Append user TOML regex rules +tok.WithPerplexityGuided(scorer, 0.4) // LLMLingua-style selective drop +``` + +### Preset Variables + +```go +tok.Minimal // Lightest pass — entropy + AST + budget +tok.Aggressive // Full pipeline, every layer flipped on +tok.Surface // Output filtering only (good for already-compressed text) +tok.Adaptive // Auto-detect content type, choose tier +tok.Code // Symbol-preserving, comment stripping, structure kept +tok.Log // Collapse repeated INFO/DEBUG runs, keep ERROR verbatim ``` --- @@ -75,7 +120,8 @@ tok.WithQuery("relevant context") // Query-aware compression ### Architecture -The pipeline is a multi-layer compression engine with 50+ layers organized in 5 tiers: +The pipeline is a multi-stage compression engine. Each stage mutates the input +in place and updates the shared `PipelineContext`: ``` Input Text @@ -88,14 +134,22 @@ Input Text │ └───────────────────────────────────────────────────┘ │ │ │ │ ┌───────────────────────────────────────────────────┐ │ -│ │ Layer 0: QuantumLock (KV-cache alignment) │ │ -│ │ Layer 0.5: Photon (image handling) │ │ -│ │ Layer 1: Entropy filtering │ │ -│ │ Layer 2: Perplexity scoring │ │ -│ │ Layer 3: AST preservation │ │ -│ │ Layer 4: Goal-driven compression │ │ -│ │ ... │ │ -│ │ Layer 50: ContextCrunch (experimental) │ │ +│ │ Pre (0-0.5) : QuantumLock (KV-cache align), │ │ +│ │ Photon (image handling) │ │ +│ │ Core (1-10) : Entropy, Perplexity, AST, │ │ +│ │ Goal-Driven, Contrastive, N-gram, │ │ +│ │ Evaluator-Heads, Gist, Hierarchical,│ │ +│ │ Budget │ │ +│ │ Sem. (11-20) : Compaction, Attribution, H2O, │ │ +│ │ AttentionSink, MetaToken, │ │ +│ │ SemanticChunk, SketchStore, │ │ +│ │ LazyPruner, SemanticAnchor, │ │ +│ │ AgentMemory │ │ +│ │ Adv. (21-40) : MarginalInfoGain, NearDedup, │ │ +│ │ CoTCompress, DiffAdapt, EPiC, │ │ +│ │ GraphCoT, and ~15 more │ │ +│ │ Spec.(41-50) : ContextCrunch, SearchCrunch, │ │ +│ │ AdaptiveLearning (5K+ token input) │ │ │ └───────────────────────────────────────────────────┘ │ │ │ │ ┌───────────────────────────────────────────────────┐ │ @@ -110,17 +164,16 @@ Compressed Text + Stats ### Tier System | Tier | Layers | Purpose | Auto-Enabled | -|------|--------|---------|-------------| +|------|--------|---------|--------------| | Pre | 0-0.5 | QuantumLock, Photon | Always | | Core | 1-10 | Entropy, Perplexity, AST, Goal-Driven, Contrastive, N-gram, Evaluator, Gist, Hierarchical, Budget | Always | | Semantic | 11-20 | Compaction, Attribution, H2O, AttentionSink, MetaToken, SemanticChunk, SketchStore, LazyPruner, SemanticAnchor, AgentMemory | Always | | Advanced | 21-40 | 20 research-based layers (MarginalInfoGain, NearDedup, CoTCompress, DiffAdapt, EPiC, GraphCoT, etc.) | Auto for large inputs | -| Specialized | 41-50 | Experimental (ContextCrunch, SearchCrunch, AdaptiveLearning, etc.) | Auto for 5K+ tokens | +| Specialized | 41-50 | Experimental (ContextCrunch, SearchCrunch, AdaptiveLearning) | Auto for 5K+ tokens | ### Layer Interface ```go -// Filter is the core layer interface. type Filter interface { Name() string Apply(input string, ctx *PipelineContext) (string, error) @@ -159,24 +212,41 @@ type PipelineContext struct { | Package | Purpose | Key Files | |---------|---------|-----------| -| `tok.go` | Public API | `Compress()`, `EstimateTokens()` | -| `options.go` | Functional options | `WithMode()`, `WithBudget()`, `WithTier()` | -| `compressor.go` | Reusable compressor | `NewCompressor()` | -| `stream.go` | Streaming compression | `StreamCompressor` | -| `optimizer.go` | Context optimization | `ContextOptimizer` | -| `chunker.go` | Code-aware chunking | `CodeChunker` | -| `advisor.go` | Strategy recommendations | `CompressionAdvisor` | -| `ratelimit.go` | Usage tracking | `UsageTracker` | -| `secrets.go` | Secret detection | `SecretDetector` | -| `internal/filter/` | Pipeline engine (50+ layers) | `coordinator.go`, `layer_*.go` | -| `internal/core/` | Token estimation, command runner | `estimator.go`, `runner.go` | -| `internal/cache/` | Multi-level caching | `cache.go`, `watcher.go` | -| `internal/config/` | Configuration | `config.go` | -| `internal/fastops/` | SIMD-optimized operations | `simd_amd64.go`, `generic.go` | -| `internal/secrets/` | Secret patterns | `patterns.go` | -| `filters/` | TOML filter configs (80 files) | `jest.toml`, `eslint.toml`, etc. | -| `agents/` | AI agent integration (15 configs) | `claude-code.toml`, `cursor.toml` | -| `hooks/` | Shell integration | `bash_hook.sh`, `powershell_hook.ps1` | +| `tok.go` | Public Compress + EstimateTokens entry points | `Compress()`, `EstimateTokens*` | +| `options.go` | Functional options + preset variables | `WithMode()`, `WithBudget()`, `WithTier()`, `Minimal/Aggressive/Surface/Adaptive/Code/Log` | +| `compressor.go` | Reusable `Compressor` (caches pipeline) | `Compressor`, `NewCompressor` | +| `stream.go` | Streaming compression (delta-only) | `StreamCompressor` | +| `optimizer.go` | Token-budget context optimizer | `ContextOptimizer`, `Greedy/Balanced/PriorityOptimize` | +| `chunker.go` | Source-code chunking (130+ language map) | `ChunkCode`, `RegisterChunker` | +| `advisor.go` | Strategy recommender + content classifier | `CompressionAdvisor`, `ClassifyContent` | +| `ratelimit.go` | Usage tracker w/ thresholds | `UsageTracker`, `FormatUsageBar` | +| `secrets.go` | Secret detection facade (33 patterns internally) | `SecretDetector`, `IsSensitiveFilename` | +| `tracker.go` | Persistent gain tracker (SQLite/WAL) | `Tracker`, `NewTrackerAt` | +| `entropy.go` | Shannon-entropy helpers | `ShannonEntropy`, `IsHighEntropy` | +| `extract.go` | Brace-balanced JSON extraction | `ExtractJSON*` | +| `jsoncrunch.go` | JSON array sampler | `CompressJSON` | +| `logcrunch.go` | Log-line level detector + run collapse | `CompressLog` | +| `profile.go` | Named/versioned compression profiles (TOML) | `LoadProfile`, `BuiltinProfile*` | +| `filters.go` | Custom regex filter DSL (TOML) | `LoadFilterRules`, `CustomFilter` | +| `codeaware.go` | Symbol-preserving code guard | `WithCodeAware`, `codeProtector` | +| `perplexity.go` | LLMLingua-style selective drop | `WithPerplexityGuided` | +| `mcp/server.go` | MCP server with real `count_tokens`, `estimate_cost`, `compress_text`, `redact_secrets` tools | `NewTokServer` | +| `internal/filter/` | Pipeline engine — 31 layers + tier configs + presets | `pipeline_*.go`, `presets.go`, `tier_config.go` | +| `internal/core/` | BPE tokenizer, batch processor, runner | `estimator.go`, `cost.go` | +| `internal/cache/` | Multi-level cache with git-aware watcher | `cache.go`, `git_watcher.go` | +| `internal/extract/` | Brace-balanced JSON extraction impl | `extract.go` | +| `internal/fastops/` | SIMD-accelerated primitives | `simd_amd64.go`, `simd_amd64.s` | +| `internal/secrets/` | 33 secret regex patterns + filename detector | `secrets.go`, `filename.go` | +| `internal/tracking/` | SQLite-backed gain tracker | `tracking.go` | +| `internal/utils/` | slog adapter, helpers | `logger.go` | +| `filters/` | 80 per-tool TOML filter configs (jest, eslint, go, kubectl, terraform, etc.) | one TOML per tool | +| `commands/` | 6 TOML agent-command definitions (pr-review, tok-commit, tok-compress, tok-help, tok-review, tok) | one TOML per command | +| `config/` | Example TOML + tokman.yaml | `example.toml` | +| `rules/` | ast-grep `no-fmt-println` rule + tok agent-activation prompt | `no-fmt-println.yaml`, `tok-activate.md` | +| `skills/` | 5 Claude-style agent skills (`tok`, `tok-commit`, `tok-compress`, `tok-help`, `tok-review`) | `SKILL.md` per skill | +| `benchmarks/` | Benchmark harness (run.sh + results.md template) | `run.sh` | +| `evals/` | Prompt-compression eval | `pipeline-bench.sh`, `prompts/en.txt` | +| `types/` | Cross-eco exported types (mirrors hawk's `shared/types/`) | `finding.go`, `severity.go` | --- @@ -185,29 +255,42 @@ type PipelineContext struct { ### Compression Request ``` -1. Application calls tok.Compress(text, opts...) -2. Options parsed (mode, budget, tier, query) -3. Content type detected (code, log, markdown, etc.) +1. Consumer calls tok.Compress(text, opts...) +2. Options parsed (mode, budget, tier, query, model, code-aware, custom rules) +3. Content type detected (code, log, markdown, data, etc.) 4. Adaptive tier selection based on input size -5. PipelineCoordinator created (from pool for reuse) +5. PipelineCoordinator created (from sync.Pool for reuse) 6. Layers executed sequentially: a. Each layer receives input + PipelineContext b. Layer transforms text (remove, compress, restructure) c. PipelineContext updated (tokens saved, quality score) d. Early exit if budget met -7. Quality guardrails validate output -8. Result returned (compressed text + stats) +7. Optional post-stages: perplexity-guided drop → custom TOML rules +8. Quality guardrails validate output (no accidental whitespace/structure loss) +9. Stats computed: originalTokens, finalTokens, tokensSaved, reductionPct, cost +10. Result returned (compressed text + stats) +``` + +### Secret Detection Request + +``` +1. Consumer calls det := tok.NewSecretDetector() +2. det.DetectSecrets(text) iterates the 33-pattern registry +3. Each pattern: compiled regex; on match, record (type, span, value) +4. det.RedactSecrets(text) replaces matches with [REDACTED:] +5. Optional: DetectAndRedactWithEntropy(text, threshold) adds Shannon-entropy + pass to catch high-entropy blobs the regex table misses ``` -### Command Rewriting +### Cost Calculation Request ``` -1. Shell hook intercepts: "cargo test" -2. Tok CLI receives command -3. Filter config loaded: filters/jest.toml -4. Command rewritten: "tok test-runner cargo test" -5. Output captured and filtered through pipeline -6. Filtered output returned to agent +1. Consumer calls tok.GetModelPricing(model) → ModelPricing + (returns zero-value + false for unknown models; consumer may call + tok.RegisterModelPricing to add custom entries) +2. Cost = (inputTokens/1000)*InputPricePer1K + (outputTokens/1000)*OutputPricePer1K +3. For compression savings: tok.EstimateCostSavings(stats, model) + conservatively assumes saved tokens would have been input tokens ``` --- @@ -216,17 +299,18 @@ type PipelineContext struct { ### Object Pooling -`CoordinatorPool` reuses pipeline coordinators for 10-20x speedup: +`coordinator_pool.go` reuses pipeline coordinators via `sync.Pool` for a +**10–20× speedup** over per-call `NewCompressor()` construction. ```go var coordinatorPool = sync.Pool{ - New: func() interface{} { return NewPipelineCoordinator() }, + New: func() interface{} { return filter.NewPipelineCoordinator() }, } ``` ### SIMD Optimization -`internal/fastops/` provides SIMD-accelerated string operations: +`internal/fastops/` provides SIMD-accelerated string operations on amd64: | Operation | Generic | SIMD (AVX2) | Speedup | |-----------|---------|-------------|---------| @@ -234,34 +318,43 @@ var coordinatorPool = sync.Pool{ | Whitespace norm | 80ns | 25ns | 3.2x | | Char counting | 60ns | 20ns | 3.0x | -Build tags: `simd_avx2`, `simd_neon` (auto-detected at runtime) +Build tag: `simd_avx2` (auto-detected at runtime). ### Token Estimation Two modes: -- **Heuristic**: ~0.3ns/op (character-based estimate) -- **BPE**: ~2ns/op (tiktoken-compatible, precise) +- **Heuristic** (`EstimateTokensFast`): ~0.3 ns/op, character-based estimate +- **BPE** (`EstimateTokensPrecise` / `EstimateTokensForModel`): ~2 ns/op, + tiktoken-compatible (cl100k, o200k, p50k, r50k encodings) + +`internal/core/estimator.go` uses a 64-shard sharded LRU token cache for BPE +counts (FNV-64a keyed, atomic hit counter). + +### Buffer Pooling + +`internal/filter/bytepool.go` provides `BytePool` + `FastStringBuilder` to +reduce GC pressure on hot paths. --- ## Filter Configuration -80 TOML-based command filter definitions in `filters/`: +80 per-tool TOML filter configs in `filters/` (one per CLI tool: jest, eslint, +go, kubectl, terraform, vitest, playwright, aws, swift, etc.). Each declares +which pipeline layers to run and a per-tool token budget. Loaded via +`tok.LoadFilterRules` and applied via `WithCustomFilters`. ```toml # filters/jest.toml -[command] -name = "jest" -pattern = "^jest\\b" - -[filter] -layers = ["ansi_strip", "whitespace", "error_extract", "summary"] -output_max_tokens = 2000 - -[patterns] -error = "^\\s*●\\s+" -pass = "^\\s*✓\\s+" -fail = "^\\s*✕\\s+" +[[rule]] +name = "strip-ansi" +pattern = '\x1b\[[0-9;]*m' +replacement = "" + +[[rule]] +name = "collapse-blank-lines" +pattern = '\n{3,}' +replacement = '\n\n' ``` --- @@ -270,11 +363,12 @@ fail = "^\\s*✕\\s+" ### Secret Detection -Pattern + entropy-based detection: +Pattern + entropy-based detection across 33 patterns: ```go type SecretDetector struct { - patterns []*regexp.Regexp // 15+ secret formats + patterns []*regexp.Regexp // 33 secret formats + entropy EntropyAnalyzer // optional Shannon-entropy pass } func (d *SecretDetector) DetectSecrets(text string) []SecretMatch @@ -282,15 +376,28 @@ func (d *SecretDetector) RedactSecrets(text string) string func (d *SecretDetector) DetectAndRedactWithEntropy(text string, threshold float64) string ``` -Supported patterns: API keys, JWT tokens, AWS keys, GitHub tokens, private keys, connection strings, etc. +Supported patterns include: AWS access keys, GitHub PATs, Slack tokens, Google +API keys, Stripe keys, OpenAI/Anthropic keys, JWTs, RSA/EC/OpenSSH private +keys, SendGrid, Twilio, Heroku, DigitalOcean, npm, PyPI, Docker registry, +generic API keys, passwords, DB connection strings, Bearer tokens. + +`tok.IsSensitiveFilename` complements content scanning with a 3-layer +filename detector (exact basename, sensitive directory, name token) for +`.env`, `id_rsa`, `/home/*/.ssh/...`, etc. --- ## Build & Release - **Language**: Go 1.26+, zero CGO -- **Binary**: Single static binary -- **Platforms**: linux/darwin/windows/freebsd × amd64/arm64/386 -- **Distribution**: `go install`, Homebrew tap, deb/rpm (nfpm), Docker -- **Release**: GoReleaser with SHA-256 checksums, release-please automation -- **CI**: 3 workflows (ci.yml, quality.yml, security.yml) +- **Type**: Library — no binary, no CLI (`.goreleaser.yml` ships source + archive + SPDX SBOM only) +- **Distribution**: `go get github.com/GrayCodeAI/tok` +- **Versioning**: `VERSION` file is the single source of truth; embedded via + `//go:embed` in `version.go`; bumped by release-please from Conventional + Commits +- **CI**: 3 workflows (`ci.yml` for fmt/vet/lint/test/security, `release.yml` + for GoReleaser, `scorecard.yml` for OpenSSF) +- **Coverage gate**: 60% (codecov) + +The consumer-facing CLI is `hawk tok ...`, which embeds this library. diff --git a/CITATION.cff b/CITATION.cff index 4c07056a7..e14cc2dbc 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,5 +1,5 @@ cff-version: 1.2.0 -title: "Tok: Token-Aware CLI Proxy with 31-Layer Compression Pipeline" +title: "Tok: A Go Library for Prompt Compression, Output Filtering, Token Estimation, and Secret Detection" message: "If you use Tok in your research, please cite it using these metadata." type: software authors: @@ -15,18 +15,22 @@ keywords: - llm - ai-coding-assistant - compression - - cli-proxy - context-window - - claude-code - - cursor - - copilot + - bpe + - token-estimation + - secret-detection + - go-library abstract: >- - Tok is a token-aware CLI proxy that intercepts CLI commands and applies - a 31-layer compression pipeline to reduce token usage for AI coding assistants. - Built on research from 120+ papers, it achieves 60-90% token reduction on - common development operations. The pipeline includes entropy filtering, - perplexity pruning, goal-driven selection, AST preservation, contrastive - ranking, and 15+ additional research-backed compression layers. + Tok is a pure Go library (no CGO, no CLI) that provides prompt compression, + output filtering, BPE token estimation, cost calculation, and secret + detection for AI coding agents and other LLM workloads. It exposes a + 31-layer compression pipeline (entropy, LLMLingua-style perplexity + pruning, AST preservation, gisting, H2O, attention-sink, ChunkKV, and + more) and ships a 33-pattern secret scanner with optional Shannon-entropy + analysis. Built on research from 50+ papers, it achieves 60-90% token + reduction on common development operations. The library is consumed by + hawk, eyrie, and yaad in the hawk-eco ecosystem, and is available to any + Go program via `go get github.com/GrayCodeAI/tok`. references: - type: article title: "Selective Context for Language Models" diff --git a/mcp/server.go b/mcp/server.go index 23148650e..27aab049f 100644 --- a/mcp/server.go +++ b/mcp/server.go @@ -1,8 +1,27 @@ +// Package mcp provides a minimal in-memory MCP (Model Context Protocol) server +// pre-loaded with tok tools. +// +// The implementation is deliberately small: it is an in-process registry + +// dispatcher with no stdio/HTTP/SSE transport. Host applications that need +// transport wire this package's *MCPServer into their own MCP daemon (for +// example, hawk's internal/mcp package exposes it to MCP-compatible agents). +// +// The three default tools wired up by NewTokServer call the real tok package +// APIs rather than stub implementations: +// +// - count_tokens → tok.EstimateTokensForModel (BPE) or tok.EstimateTokens +// - estimate_cost → tok.GetModelPricing × (inputTokens + outputTokens) +// - compress_text → tok.Compress with the caller-supplied options +// +// Tool authors can also use the bare NewServer + RegisterTool for custom +// surfaces. package mcp import ( "context" "fmt" + + tok "github.com/GrayCodeAI/tok" ) type ToolHandler func(ctx context.Context, params map[string]interface{}) (interface{}, error) @@ -23,14 +42,19 @@ type toolEntry struct { handler ToolHandler } +// NewServer creates an empty MCP server with the given name. Tools must be +// registered with RegisterTool before the server is useful. func NewServer(name string) *MCPServer { return &MCPServer{name: name, tools: make(map[string]toolEntry)} } +// RegisterTool adds a tool to the server. Re-registering an existing name +// overwrites the previous definition and handler. func (s *MCPServer) RegisterTool(name, description string, schema map[string]interface{}, handler ToolHandler) { s.tools[name] = toolEntry{def: ToolDef{Name: name, Description: description, InputSchema: schema}, handler: handler} } +// ListTools returns the registered tool definitions. Order is not stable. func (s *MCPServer) ListTools() []ToolDef { defs := make([]ToolDef, 0, len(s.tools)) for _, e := range s.tools { @@ -39,6 +63,9 @@ func (s *MCPServer) ListTools() []ToolDef { return defs } +// HandleRequest dispatches a JSON-RPC-style request to the registered tool. +// Supported methods: "tools/list" (no params) and "tools/call" (params must +// include "name" and optional "arguments" map). func (s *MCPServer) HandleRequest(ctx context.Context, method string, params map[string]interface{}) (interface{}, error) { switch method { case "tools/list": @@ -62,76 +89,226 @@ func (s *MCPServer) HandleRequest(ctx context.Context, method string, params map } } -// NewTokServer creates an MCP server pre-registered with tok tools. +// NewTokServer creates an MCP server pre-registered with the standard tok +// tool surface. The registered tools call the real tok package APIs: +// +// - count_tokens(text, model) → real BPE-backed token count +// - estimate_cost(model, inputTokens, outputTokens) → real pricing-table cost +// - compress_text(text, mode) → real 31-layer compression pipeline +// - redact_secrets(text) → real 33-pattern secret detector func NewTokServer() *MCPServer { s := NewServer("tok") - s.RegisterTool("count_tokens", "Count tokens in text for a given model", map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "text": map[string]interface{}{"type": "string"}, - "model": map[string]interface{}{"type": "string"}, + s.RegisterTool("count_tokens", + "Count tokens in text using tok's BPE tokenizer (cl100k/o200k/etc.). Pass model='heuristic' for a fast character-based estimate.", + map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "text": map[string]interface{}{"type": "string", "description": "Input text to count tokens for"}, + "model": map[string]interface{}{"type": "string", "description": "Model name (e.g. gpt-4o, claude-sonnet) or 'heuristic' for a fast estimate"}, + }, + "required": []string{"text"}, }, - "required": []string{"text", "model"}, - }, func(ctx context.Context, p map[string]interface{}) (interface{}, error) { - text, _ := p["text"].(string) - model, _ := p["model"].(string) - if text == "" || model == "" { - return nil, fmt.Errorf("count_tokens: text and model required") - } - return map[string]interface{}{"count": len(text) / 4, "model": model}, nil - }) - - s.RegisterTool("estimate_cost", "Estimate dollar cost for input/output tokens", map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "model": map[string]interface{}{"type": "string"}, - "inputTokens": map[string]interface{}{"type": "number"}, - "outputTokens": map[string]interface{}{"type": "number"}, + countTokensHandler) + + s.RegisterTool("estimate_cost", + "Estimate the dollar cost for input + output tokens at the model's registered price-per-1K. Returns 0 for unknown models.", + map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "model": map[string]interface{}{"type": "string", "description": "Model name (e.g. gpt-4o, claude-sonnet)"}, + "inputTokens": map[string]interface{}{"type": "number", "description": "Number of input tokens"}, + "outputTokens": map[string]interface{}{"type": "number", "description": "Number of output tokens"}, + }, + "required": []string{"model", "inputTokens", "outputTokens"}, }, - "required": []string{"model", "inputTokens", "outputTokens"}, - }, func(ctx context.Context, p map[string]interface{}) (interface{}, error) { - model, _ := p["model"].(string) - if model == "" { - return nil, fmt.Errorf("estimate_cost: model required") - } - in, _ := p["inputTokens"].(float64) - out, _ := p["outputTokens"].(float64) - return map[string]interface{}{"model": model, "inputTokens": in, "outputTokens": out, "totalCost": 0.0}, nil - }) - - s.RegisterTool("compress_text", "Compress text by removing redundant whitespace", map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "text": map[string]interface{}{"type": "string"}, + estimateCostHandler) + + s.RegisterTool("compress_text", + "Compress text using tok's full pipeline. Returns the compressed string and per-stage stats. Optional 'mode' parameter selects the compression preset (minimal, aggressive, surface, adaptive, code, log).", + map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "text": map[string]interface{}{"type": "string", "description": "Text to compress"}, + "mode": map[string]interface{}{ + "type": "string", + "description": "Compression preset: minimal | aggressive | surface | adaptive | code | log (default: minimal)", + "enum": []string{"minimal", "aggressive", "surface", "adaptive", "code", "log"}, + }, + "budget": map[string]interface{}{"type": "number", "description": "Optional token budget to enforce"}, + }, + "required": []string{"text"}, }, - "required": []string{"text"}, - }, func(ctx context.Context, p map[string]interface{}) (interface{}, error) { - text, _ := p["text"].(string) - if text == "" { - return nil, fmt.Errorf("compress_text: text required") - } - result := collapseWS(text) - return map[string]interface{}{"original": len(text), "compressed": len(result), "text": result}, nil - }) + compressTextHandler) + + s.RegisterTool("redact_secrets", + "Detect and redact secrets (API keys, AWS tokens, GitHub tokens, private keys, JWTs, etc.) from text. Returns redacted text and the count of matches found.", + map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "text": map[string]interface{}{"type": "string", "description": "Text to scan for secrets"}, + "entropyThreshold": map[string]interface{}{"type": "number", "description": "Optional Shannon-entropy threshold (default 4.5) to also catch high-entropy blobs that pattern matching misses"}, + }, + "required": []string{"text"}, + }, + redactSecretsHandler) return s } -func collapseWS(s string) string { - out := make([]byte, 0, len(s)) - space := false - for i := 0; i < len(s); i++ { - c := s[i] - if c == ' ' || c == '\t' || c == '\n' || c == '\r' { - if !space { - out = append(out, ' ') - space = true - } - } else { - out = append(out, c) - space = false +// countTokensHandler wires the MCP count_tokens tool to tok's real estimator. +func countTokensHandler(_ context.Context, p map[string]interface{}) (interface{}, error) { + text, _ := p["text"].(string) + if text == "" { + return nil, fmt.Errorf("count_tokens: text required") + } + model, _ := p["model"].(string) + if model == "" || model == "heuristic" { + return map[string]interface{}{ + "text": text, + "model": "heuristic", + "count": tok.EstimateTokens(text), + }, nil + } + return map[string]interface{}{ + "text": text, + "model": model, + "count": tok.EstimateTokensForModel(text, model), + }, nil +} + +// estimateCostHandler wires the MCP estimate_cost tool to tok's real +// pricing registry. Returns totalCost = input/1000 * inputPrice + +// output/1000 * outputPrice. totalCost is 0 for unknown models. +func estimateCostHandler(_ context.Context, p map[string]interface{}) (interface{}, error) { + model, _ := p["model"].(string) + if model == "" { + return nil, fmt.Errorf("estimate_cost: model required") + } + in, okIn := numberFromParams(p, "inputTokens") + out, okOut := numberFromParams(p, "outputTokens") + if !okIn || !okOut { + return nil, fmt.Errorf("estimate_cost: inputTokens and outputTokens required") + } + pricing, found := tok.GetModelPricing(model) + if !found { + return map[string]interface{}{ + "model": model, + "inputTokens": in, + "outputTokens": out, + "totalCost": 0.0, + "currency": "USD", + "known": false, + "warning": fmt.Sprintf("model %q is not in the pricing registry; call tok.RegisterModelPricing to add it", model), + }, nil + } + total := (in/1000)*pricing.InputPricePer1K + (out/1000)*pricing.OutputPricePer1K + return map[string]interface{}{ + "model": model, + "inputTokens": in, + "outputTokens": out, + "inputPricePer1K": pricing.InputPricePer1K, + "outputPricePer1K": pricing.OutputPricePer1K, + "totalCost": total, + "currency": "USD", + "known": true, + }, nil +} + +// compressTextHandler wires the MCP compress_text tool to tok.Compress. +// Optional mode parameter maps to one of the public preset variables. +func compressTextHandler(_ context.Context, p map[string]interface{}) (interface{}, error) { + text, _ := p["text"].(string) + if text == "" { + return nil, fmt.Errorf("compress_text: text required") + } + mode, _ := p["mode"].(string) + opts := buildCompressOptions(mode, p) + out, stats := tok.Compress(text, opts...) + return map[string]interface{}{ + "original": text, + "compressed": out, + "originalTokens": stats.OriginalTokens, + "finalTokens": stats.FinalTokens, + "tokensSaved": stats.TokensSaved, + "reductionPct": stats.ReductionPercent, + "mode": mode, + "model": stats.Model, + "costSavingsUSD": stats.CostSavings, + }, nil +} + +// redactSecretsHandler wires the MCP redact_secrets tool to the real +// SecretDetector. If entropyThreshold is set, the detector also runs +// Shannon-entropy analysis to catch blobs the pattern table misses. +func redactSecretsHandler(_ context.Context, p map[string]interface{}) (interface{}, error) { + text, _ := p["text"].(string) + if text == "" { + return nil, fmt.Errorf("redact_secrets: text required") + } + entropy, hasEntropy := numberFromParams(p, "entropyThreshold") + det := tok.NewSecretDetector() + var matches []tok.SecretMatch + var redacted string + if hasEntropy { + matches = det.DetectSecrets(text) + redacted = det.RedactSecrets(text) + if entropy > 0 { + redacted = det.DetectAndRedactWithEntropy(text, entropy) } + } else { + matches = det.DetectSecrets(text) + redacted = det.RedactSecrets(text) + } + return map[string]interface{}{ + "original": text, + "redacted": redacted, + "matchCount": len(matches), + }, nil +} + +// buildCompressOptions maps the MCP compress_text 'mode' enum to the +// corresponding tok preset Option, plus an optional budget. Unknown modes +// fall back to the default (no preset) which lets the pipeline use its +// auto-tier selection. +func buildCompressOptions(mode string, p map[string]interface{}) []tok.Option { + var opts []tok.Option + switch mode { + case "minimal": + opts = append(opts, tok.Minimal) + case "aggressive": + opts = append(opts, tok.Aggressive) + case "surface": + opts = append(opts, tok.Surface) + case "adaptive": + opts = append(opts, tok.Adaptive) + case "code": + opts = append(opts, tok.Code) + case "log": + opts = append(opts, tok.Log) + } + if budget, ok := numberFromParams(p, "budget"); ok && budget > 0 { + opts = append(opts, tok.WithBudget(int(budget))) + } + return opts +} + +// numberFromParams returns the float64 value of a key, supporting both +// JSON-number and JSON-decimal inputs as commonly produced by MCP clients. +func numberFromParams(p map[string]interface{}, key string) (float64, bool) { + v, ok := p[key] + if !ok { + return 0, false + } + switch n := v.(type) { + case float64: + return n, true + case float32: + return float64(n), true + case int: + return float64(n), true + case int64: + return float64(n), true } - return string(out) + return 0, false } diff --git a/mcp/server_test.go b/mcp/server_test.go index aa5f4cb81..194ec210e 100644 --- a/mcp/server_test.go +++ b/mcp/server_test.go @@ -2,7 +2,10 @@ package mcp import ( "context" + "strings" "testing" + + tok "github.com/GrayCodeAI/tok" ) func TestNewServer(t *testing.T) { @@ -114,3 +117,253 @@ func TestHandleRequest_UnknownMethod(t *testing.T) { t.Fatalf("unexpected error message: %v", err) } } + +// NewTokServer is pre-loaded with real tool handlers that delegate to the tok +// package. These tests exercise the wire-up so a future regression that +// reintroduces the legacy stub bodies (e.g. count_tokens=len/4, +// estimate_cost=0, compress_text=collapseWS) fails CI. + +func TestNewTokServer_RegistersAllTools(t *testing.T) { + s := NewTokServer() + defs := s.ListTools() + got := map[string]bool{} + for _, d := range defs { + got[d.Name] = true + } + for _, want := range []string{"count_tokens", "estimate_cost", "compress_text", "redact_secrets"} { + if !got[want] { + t.Errorf("expected registered tool %q in %v", want, got) + } + } +} + +func TestNewTokServer_CountTokens_Heuristic(t *testing.T) { + s := NewTokServer() + res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{ + "name": "count_tokens", + "arguments": map[string]interface{}{ + "text": "hello world from tok", + "model": "heuristic", + }, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + m, ok := res.(map[string]interface{}) + if !ok { + t.Fatalf("expected map, got %T", res) + } + count, ok := m["count"].(int) + if !ok { + t.Fatalf("expected int count, got %T", m["count"]) + } + if count != tok.EstimateTokens("hello world from tok") { + t.Fatalf("heuristic count %d != EstimateTokens %d (stub regression?)", + count, tok.EstimateTokens("hello world from tok")) + } + if count <= 0 { + t.Fatalf("expected positive count, got %d", count) + } +} + +func TestNewTokServer_CountTokens_ForModel(t *testing.T) { + s := NewTokServer() + res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{ + "name": "count_tokens", + "arguments": map[string]interface{}{ + "text": "hello world", + "model": "gpt-4o", + }, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + m := res.(map[string]interface{}) + if m["model"] != "gpt-4o" { + t.Fatalf("expected model gpt-4o, got %v", m["model"]) + } + if _, ok := m["count"].(int); !ok { + t.Fatalf("expected int count, got %T", m["count"]) + } +} + +func TestNewTokServer_CountTokens_MissingText(t *testing.T) { + s := NewTokServer() + _, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{ + "name": "count_tokens", + "arguments": map[string]interface{}{"model": "gpt-4o"}, + }) + if err == nil { + t.Fatal("expected error when text is missing") + } +} + +func TestNewTokServer_EstimateCost_KnownModel(t *testing.T) { + s := NewTokServer() + res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{ + "name": "estimate_cost", + "arguments": map[string]interface{}{ + "model": "gpt-4o", + "inputTokens": 1000.0, + "outputTokens": 500.0, + }, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + m := res.(map[string]interface{}) + if m["known"] != true { + t.Fatalf("expected known=true, got %v", m["known"]) + } + cost, ok := m["totalCost"].(float64) + if !ok { + t.Fatalf("expected float64 totalCost, got %T", m["totalCost"]) + } + pricing, _ := tok.GetModelPricing("gpt-4o") + want := (1000.0/1000)*pricing.InputPricePer1K + (500.0/1000)*pricing.OutputPricePer1K + if cost != want { + t.Fatalf("totalCost = %v, want %v (stub regression?)", cost, want) + } + if cost == 0 { + t.Fatal("expected non-zero cost for gpt-4o, got 0 (legacy stub regression?)") + } +} + +func TestNewTokServer_EstimateCost_UnknownModel(t *testing.T) { + s := NewTokServer() + res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{ + "name": "estimate_cost", + "arguments": map[string]interface{}{ + "model": "totally-fake-model-xyz", + "inputTokens": 100.0, + "outputTokens": 200.0, + }, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + m := res.(map[string]interface{}) + if m["known"] != false { + t.Fatalf("expected known=false, got %v", m["known"]) + } + if m["warning"] == nil { + t.Fatal("expected warning for unknown model") + } +} + +func TestNewTokServer_CompressText_RealPipeline(t *testing.T) { + s := NewTokServer() + // Aggressive mode on repetitive natural-language prose will drop most + // filler words. We don't assert specific content survives — we assert + // the *real* pipeline ran, not the legacy stub (which either no-op'd + // or only collapsed whitespace). The token-count and length + // comparison is sufficient evidence. + input := strings.Repeat( + "The rain in spain stays mainly in the plain. ", 100, + ) + res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{ + "name": "compress_text", + "arguments": map[string]interface{}{ + "text": input, + "mode": "aggressive", + }, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + m := res.(map[string]interface{}) + compressed, _ := m["compressed"].(string) + if compressed == "" { + t.Fatal("expected non-empty compressed text") + } + if compressed == input { + t.Fatal("compressed text equals input (stub regression: pipeline not invoked?)") + } + origTokens, _ := m["originalTokens"].(int) + finalTokens, _ := m["finalTokens"].(int) + if origTokens <= 0 || finalTokens <= 0 { + t.Fatalf("expected positive token counts, got orig=%d final=%d", origTokens, finalTokens) + } + if finalTokens >= origTokens { + t.Fatalf("aggressive compression should reduce tokens: orig=%d final=%d", origTokens, finalTokens) + } + // Stub regression guard: legacy compress_text only collapsed + // whitespace, which would still drop *some* bytes but leave the + // final token count close to the original. A 4× reduction is a + // strong signal the 31-layer pipeline actually ran. + if finalTokens*4 > origTokens { + t.Fatalf("expected aggressive mode to reduce tokens by >= 4x, got orig=%d final=%d", + origTokens, finalTokens) + } +} + +func TestNewTokServer_CompressText_RespectsBudget(t *testing.T) { + s := NewTokServer() + // Repetitive filler is exactly what aggressive mode is designed to + // chew through, so this gives the budget a chance to clamp the result. + input := strings.Repeat("lorem ipsum dolor sit amet consectetur. ", 200) + res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{ + "name": "compress_text", + "arguments": map[string]interface{}{ + "text": input, + "mode": "aggressive", + "budget": 50.0, + }, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + m := res.(map[string]interface{}) + finalTokens, _ := m["finalTokens"].(int) + if finalTokens > 60 { + t.Fatalf("budget not respected: finalTokens=%d, want <= 60", finalTokens) + } +} + +func TestNewTokServer_RedactSecrets(t *testing.T) { + s := NewTokServer() + input := "config: AKIAIOSFODNN7EXAMPLE key=ghp_1234567890abcdefghijklmnopqrstuvwxyz" + res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{ + "name": "redact_secrets", + "arguments": map[string]interface{}{"text": input}, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + m := res.(map[string]interface{}) + redacted, _ := m["redacted"].(string) + if redacted == input { + t.Fatal("expected redacted text to differ from input (no secret detected?)") + } + if count, _ := m["matchCount"].(int); count < 2 { + t.Fatalf("expected at least 2 secrets (AWS + GitHub), got %d", count) + } +} + +func TestNumberFromParams(t *testing.T) { + cases := []struct { + name string + input map[string]interface{} + key string + want float64 + wantOK bool + }{ + {"float64", map[string]interface{}{"x": 1.5}, "x", 1.5, true}, + {"float32", map[string]interface{}{"x": float32(1.5)}, "x", 1.5, true}, + {"int", map[string]interface{}{"x": int(7)}, "x", 7, true}, + {"int64", map[string]interface{}{"x": int64(8)}, "x", 8, true}, + {"missing", map[string]interface{}{}, "x", 0, false}, + {"wrong type", map[string]interface{}{"x": "1.5"}, "x", 0, false}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got, ok := numberFromParams(c.input, c.key) + if ok != c.wantOK { + t.Fatalf("ok=%v, want %v", ok, c.wantOK) + } + if ok && got != c.want { + t.Fatalf("got %v, want %v", got, c.want) + } + }) + } +}