diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 03a4e1a3b..ca2cfac40 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -1,6 +1,9 @@
 # Tok Architecture
 
-Tok is a high-performance Go library and CLI tool that cuts LLM token costs by 60–90% through prompt compression, output filtering, and transparent command rewriting.
+Tok is a Go **library** (no CLI, no binary) that cuts LLM token costs by 60–90%
+through prompt compression, output filtering, cost estimation, and secret
+detection. It is consumed by `hawk`, `eyrie`, `yaad`, and any other Go program
+that needs to keep LLM context windows lean.
 
 ---
 
@@ -8,36 +11,21 @@ Tok is a high-performance Go library and CLI tool that cuts LLM token costs by 6
 
 ```
 ┌─────────────────────────────────────────────────────────────────┐
-│                     AI Coding Agents                             │
-│         Claude Code | Cursor | Copilot | Gemini CLI | ...       │
+│                     Consumer Application                          │
+│         hawk  |  eyrie  |  yaad  |  custom Go service             │
 └────────────────────────────┬────────────────────────────────────┘
-                             │
+                             │ import "github.com/GrayCodeAI/tok"
 ┌────────────────────────────▼────────────────────────────────────┐
-│                      Shell Hooks (transparent)                   │
-│         bash_hook.sh | powershell_hook.ps1 | node_hook.js       │
-└────────────────────────────┬────────────────────────────────────┘
-                             │
-┌────────────────────────────▼────────────────────────────────────┐
-│                         Tok CLI                                  │
-│  ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐    │
-│  │ Command      │ │ Filter       │ │ Rewrite              │    │
-│  │ Runner       │ │ Selector     │ │ Engine               │    │
-│  │              │ │ (80 TOML     │ │ (transparent         │    │
-│  │              │ │  configs)    │ │  command prefixing)   │    │
-│  └──────┬───────┘ └──────┬───────┘ └──────────┬───────────┘    │
-│         │                │                     │                │
-│  ┌──────▼────────────────▼─────────────────────▼───────────┐    │
-│  │              Compression Pipeline (50+ layers)            │    │
-│  │  ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────────┐   │    │
-│  │  │  Pre    │ │  Core   │ │Semantic │ │  Advanced   │   │    │
-│  │  │ (0-0.5) │ │ (1-10)  │ │(11-20)  │ │  (21-50)    │   │    │
-│  │  └─────────┘ └─────────┘ └─────────┘ └─────────────┘   │    │
-│  └─────────────────────────────────────────────────────────┘    │
-└────────────────────────────┬────────────────────────────────────┘
-                             │
-┌────────────────────────────▼────────────────────────────────────┐
-│                      Library API                                 │
-│  tok.Compress() | tok.EstimateTokens() | tok.StreamCompress()   │
+│                         tok package                               │
+│  ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐       │
+│  │ Compress     │ │ Estimate     │ │ Cost / Rate-limit /  │       │
+│  │ (31-layer    │ │ Tokens       │ │ Secret detection     │       │
+│  │  pipeline)   │ │ (BPE)        │ │ (33 patterns)        │       │
+│  └──────┬───────┘ └──────┬───────┘ └──────────┬───────────┘       │
+│         └────────────────┼─────────────────────┘                  │
+│                          ▼                                        │
+│   internal/filter (31 layers) + internal/core (BPE)               │
+│   + internal/secrets + internal/cache + internal/extract          │
 └─────────────────────────────────────────────────────────────────┘
 ```
 
@@ -48,25 +36,82 @@ Tok is a high-performance Go library and CLI tool that cuts LLM token costs by 6
 ### Public API
 
 ```go
-// Compress reduces text while preserving meaning.
-func Compress(text string, opts ...Option) (*Result, error)
+// Compress runs the 31-layer pipeline (plus any opt-in post-stages) and
+// returns the compressed text and per-stage stats. Safe to call with no
+// options; sensible defaults apply.
+func Compress(text string, opts ...Option) (string, Stats)
+
+// EstimateTokens returns the estimated token count for text. BPE-backed
+// when a model is supplied, heuristic otherwise.
+func EstimateTokens(text string) int
+func EstimateTokensForModel(text, model string) int
+func EstimateTokensPrecise(text string) int
+func EstimateTokensFast(text string) int
+
+// Cost & pricing.
+func GetModelPricing(model string) (ModelPricing, bool)
+func RegisterModelPricing(model string, inputPer1K, outputPer1K float64)
+func EstimateCostSavings(stats Stats, model string) float64
+func ListModels() []string
+
+// Secret detection.
+type SecretDetector struct{ ... }
+func NewSecretDetector() *SecretDetector
+func DefaultSecretDetector() *SecretDetector
+func IsSensitiveFilename(path string) (bool, secrets.FilenameMatch)
+
+// Output extraction.
+func ExtractJSON(text string) (string, bool)
+func ExtractJSONArray(text string) (string, bool)
+func ExtractAllJSON(text string) []string
+func CompressJSON(text string, maxItems int) string
+func CompressLog(text string) string
+
+// Reusable compressor.
+type Compressor struct{ ... }
+func NewCompressor(opts ...Option) *Compressor
+func (c *Compressor) Compress(text string) (string, Stats)
 
-// EstimateTokens returns fast token count estimate.
-func EstimateTokens(text string, model ...string) int
+// Context-window optimizer.
+type ContextOptimizer struct{ ... }
+func NewContextOptimizer(opts ...Option) *ContextOptimizer
 
-// Compressor is a reusable compression instance.
-type Compressor struct { ... }
-func NewCompressor(opts ...Option) *Compressor
-func (c *Compressor) Compress(text string) (*Result, error)
+// Strategy advisor.
+type CompressionAdvisor struct{ ... }
+func NewCompressionAdvisor() *CompressionAdvisor
+
+// Rate-limit / usage tracker.
+type UsageTracker struct{ ... }
+func NewUsageTracker(opts ...UsageOption) *UsageTracker
+
+// Persistent gain tracker (SQLite).
+type Tracker struct{ ... }
+func NewTracker(ctx context.Context) (*Tracker, error)
+func NewTrackerAt(path string) (*Tracker, error)
 ```
 
 ### Functional Options
 
 ```go
-tok.WithMode(tok.ModeFull)        // Compression mode (lite/fast/balanced/full/aggressive/wenyan-ultra)
-tok.WithBudget(10000)              // Token budget limit
-tok.WithTier(tok.TierAdvanced)     // Pipeline tier (core/semantic/advanced/specialized)
-tok.WithQuery("relevant context")  // Query-aware compression
+tok.WithMode(tok.ModeFull)           // Compression intensity
+tok.WithBudget(10000)                 // Hard token budget on output
+tok.WithTier(tok.TierCore)            // Pipeline tier (surface/trim/extract/core/code/log/thread/adaptive)
+tok.WithQuery("relevant context")     // Query-aware compression
+tok.WithModel("gpt-4o")               // Enables cost calculation + BPE
+tok.WithCodeAware("go")               // Symbol-preserving guard for source code
+tok.WithCustomFilters(rules)          // Append user TOML regex rules
+tok.WithPerplexityGuided(scorer, 0.4) // LLMLingua-style selective drop
+```
+
+### Preset Variables
+
+```go
+tok.Minimal     // Lightest pass — entropy + AST + budget
+tok.Aggressive  // Full pipeline, every layer flipped on
+tok.Surface     // Output filtering only (good for already-compressed text)
+tok.Adaptive    // Auto-detect content type, choose tier
+tok.Code        // Symbol-preserving, comment stripping, structure kept
+tok.Log         // Collapse repeated INFO/DEBUG runs, keep ERROR verbatim
 ```
 
 ---
@@ -75,7 +120,8 @@ tok.WithQuery("relevant context")  // Query-aware compression
 
 ### Architecture
 
-The pipeline is a multi-layer compression engine with 50+ layers organized in 5 tiers:
+The pipeline is a multi-stage compression engine. Each stage mutates the input
+in place and updates the shared `PipelineContext`:
 
 ```
 Input Text
@@ -88,14 +134,22 @@ Input Text
 │  └───────────────────────────────────────────────────┘  │
 │                                                          │
 │  ┌───────────────────────────────────────────────────┐  │
-│  │ Layer 0: QuantumLock (KV-cache alignment)          │  │
-│  │ Layer 0.5: Photon (image handling)                 │  │
-│  │ Layer 1: Entropy filtering                         │  │
-│  │ Layer 2: Perplexity scoring                        │  │
-│  │ Layer 3: AST preservation                          │  │
-│  │ Layer 4: Goal-driven compression                   │  │
-│  │ ...                                                │  │
-│  │ Layer 50: ContextCrunch (experimental)             │  │
+│  │ Pre  (0-0.5)  : QuantumLock (KV-cache align),      │  │
+│  │                 Photon (image handling)             │  │
+│  │ Core (1-10)   : Entropy, Perplexity, AST,           │  │
+│  │                 Goal-Driven, Contrastive, N-gram,   │  │
+│  │                 Evaluator-Heads, Gist, Hierarchical,│  │
+│  │                 Budget                               │  │
+│  │ Sem. (11-20)  : Compaction, Attribution, H2O,        │  │
+│  │                 AttentionSink, MetaToken,            │  │
+│  │                 SemanticChunk, SketchStore,          │  │
+│  │                 LazyPruner, SemanticAnchor,          │  │
+│  │                 AgentMemory                          │  │
+│  │ Adv. (21-40)  : MarginalInfoGain, NearDedup,         │  │
+│  │                 CoTCompress, DiffAdapt, EPiC,        │  │
+│  │                 GraphCoT, and ~15 more               │  │
+│  │ Spec.(41-50)  : ContextCrunch, SearchCrunch,        │  │
+│  │                 AdaptiveLearning (5K+ token input)   │  │
 │  └───────────────────────────────────────────────────┘  │
 │                                                          │
 │  ┌───────────────────────────────────────────────────┐  │
@@ -110,17 +164,16 @@ Compressed Text + Stats
 ### Tier System
 
 | Tier | Layers | Purpose | Auto-Enabled |
-|------|--------|---------|-------------|
+|------|--------|---------|--------------|
 | Pre | 0-0.5 | QuantumLock, Photon | Always |
 | Core | 1-10 | Entropy, Perplexity, AST, Goal-Driven, Contrastive, N-gram, Evaluator, Gist, Hierarchical, Budget | Always |
 | Semantic | 11-20 | Compaction, Attribution, H2O, AttentionSink, MetaToken, SemanticChunk, SketchStore, LazyPruner, SemanticAnchor, AgentMemory | Always |
 | Advanced | 21-40 | 20 research-based layers (MarginalInfoGain, NearDedup, CoTCompress, DiffAdapt, EPiC, GraphCoT, etc.) | Auto for large inputs |
-| Specialized | 41-50 | Experimental (ContextCrunch, SearchCrunch, AdaptiveLearning, etc.) | Auto for 5K+ tokens |
+| Specialized | 41-50 | Experimental (ContextCrunch, SearchCrunch, AdaptiveLearning) | Auto for 5K+ tokens |
 
 ### Layer Interface
 
 ```go
-// Filter is the core layer interface.
 type Filter interface {
     Name() string
     Apply(input string, ctx *PipelineContext) (string, error)
@@ -159,24 +212,41 @@ type PipelineContext struct {
 
 | Package | Purpose | Key Files |
 |---------|---------|-----------|
-| `tok.go` | Public API | `Compress()`, `EstimateTokens()` |
-| `options.go` | Functional options | `WithMode()`, `WithBudget()`, `WithTier()` |
-| `compressor.go` | Reusable compressor | `NewCompressor()` |
-| `stream.go` | Streaming compression | `StreamCompressor` |
-| `optimizer.go` | Context optimization | `ContextOptimizer` |
-| `chunker.go` | Code-aware chunking | `CodeChunker` |
-| `advisor.go` | Strategy recommendations | `CompressionAdvisor` |
-| `ratelimit.go` | Usage tracking | `UsageTracker` |
-| `secrets.go` | Secret detection | `SecretDetector` |
-| `internal/filter/` | Pipeline engine (50+ layers) | `coordinator.go`, `layer_*.go` |
-| `internal/core/` | Token estimation, command runner | `estimator.go`, `runner.go` |
-| `internal/cache/` | Multi-level caching | `cache.go`, `watcher.go` |
-| `internal/config/` | Configuration | `config.go` |
-| `internal/fastops/` | SIMD-optimized operations | `simd_amd64.go`, `generic.go` |
-| `internal/secrets/` | Secret patterns | `patterns.go` |
-| `filters/` | TOML filter configs (80 files) | `jest.toml`, `eslint.toml`, etc. |
-| `agents/` | AI agent integration (15 configs) | `claude-code.toml`, `cursor.toml` |
-| `hooks/` | Shell integration | `bash_hook.sh`, `powershell_hook.ps1` |
+| `tok.go` | Public Compress + EstimateTokens entry points | `Compress()`, `EstimateTokens*` |
+| `options.go` | Functional options + preset variables | `WithMode()`, `WithBudget()`, `WithTier()`, `Minimal/Aggressive/Surface/Adaptive/Code/Log` |
+| `compressor.go` | Reusable `Compressor` (caches pipeline) | `Compressor`, `NewCompressor` |
+| `stream.go` | Streaming compression (delta-only) | `StreamCompressor` |
+| `optimizer.go` | Token-budget context optimizer | `ContextOptimizer`, `Greedy/Balanced/PriorityOptimize` |
+| `chunker.go` | Source-code chunking (130+ language map) | `ChunkCode`, `RegisterChunker` |
+| `advisor.go` | Strategy recommender + content classifier | `CompressionAdvisor`, `ClassifyContent` |
+| `ratelimit.go` | Usage tracker w/ thresholds | `UsageTracker`, `FormatUsageBar` |
+| `secrets.go` | Secret detection facade (33 patterns internally) | `SecretDetector`, `IsSensitiveFilename` |
+| `tracker.go` | Persistent gain tracker (SQLite/WAL) | `Tracker`, `NewTrackerAt` |
+| `entropy.go` | Shannon-entropy helpers | `ShannonEntropy`, `IsHighEntropy` |
+| `extract.go` | Brace-balanced JSON extraction | `ExtractJSON*` |
+| `jsoncrunch.go` | JSON array sampler | `CompressJSON` |
+| `logcrunch.go` | Log-line level detector + run collapse | `CompressLog` |
+| `profile.go` | Named/versioned compression profiles (TOML) | `LoadProfile`, `BuiltinProfile*` |
+| `filters.go` | Custom regex filter DSL (TOML) | `LoadFilterRules`, `CustomFilter` |
+| `codeaware.go` | Symbol-preserving code guard | `WithCodeAware`, `codeProtector` |
+| `perplexity.go` | LLMLingua-style selective drop | `WithPerplexityGuided` |
+| `mcp/server.go` | MCP server with real `count_tokens`, `estimate_cost`, `compress_text`, `redact_secrets` tools | `NewTokServer` |
+| `internal/filter/` | Pipeline engine — 31 layers + tier configs + presets | `pipeline_*.go`, `presets.go`, `tier_config.go` |
+| `internal/core/` | BPE tokenizer, batch processor, runner | `estimator.go`, `cost.go` |
+| `internal/cache/` | Multi-level cache with git-aware watcher | `cache.go`, `git_watcher.go` |
+| `internal/extract/` | Brace-balanced JSON extraction impl | `extract.go` |
+| `internal/fastops/` | SIMD-accelerated primitives | `simd_amd64.go`, `simd_amd64.s` |
+| `internal/secrets/` | 33 secret regex patterns + filename detector | `secrets.go`, `filename.go` |
+| `internal/tracking/` | SQLite-backed gain tracker | `tracking.go` |
+| `internal/utils/` | slog adapter, helpers | `logger.go` |
+| `filters/` | 80 per-tool TOML filter configs (jest, eslint, go, kubectl, terraform, etc.) | one TOML per tool |
+| `commands/` | 6 TOML agent-command definitions (pr-review, tok-commit, tok-compress, tok-help, tok-review, tok) | one TOML per command |
+| `config/` | Example TOML + tokman.yaml | `example.toml` |
+| `rules/` | ast-grep `no-fmt-println` rule + tok agent-activation prompt | `no-fmt-println.yaml`, `tok-activate.md` |
+| `skills/` | 5 Claude-style agent skills (`tok`, `tok-commit`, `tok-compress`, `tok-help`, `tok-review`) | `SKILL.md` per skill |
+| `benchmarks/` | Benchmark harness (run.sh + results.md template) | `run.sh` |
+| `evals/` | Prompt-compression eval | `pipeline-bench.sh`, `prompts/en.txt` |
+| `types/` | Cross-eco exported types (mirrors hawk's `shared/types/`) | `finding.go`, `severity.go` |
 
 ---
 
@@ -185,29 +255,42 @@ type PipelineContext struct {
 ### Compression Request
 
 ```
-1. Application calls tok.Compress(text, opts...)
-2. Options parsed (mode, budget, tier, query)
-3. Content type detected (code, log, markdown, etc.)
+1. Consumer calls tok.Compress(text, opts...)
+2. Options parsed (mode, budget, tier, query, model, code-aware, custom rules)
+3. Content type detected (code, log, markdown, data, etc.)
 4. Adaptive tier selection based on input size
-5. PipelineCoordinator created (from pool for reuse)
+5. PipelineCoordinator created (from sync.Pool for reuse)
 6. Layers executed sequentially:
    a. Each layer receives input + PipelineContext
    b. Layer transforms text (remove, compress, restructure)
    c. PipelineContext updated (tokens saved, quality score)
    d. Early exit if budget met
-7. Quality guardrails validate output
-8. Result returned (compressed text + stats)
+7. Optional post-stages: perplexity-guided drop → custom TOML rules
+8. Quality guardrails validate output (no accidental whitespace/structure loss)
+9. Stats computed: originalTokens, finalTokens, tokensSaved, reductionPct, cost
+10. Result returned (compressed text + stats)
+```
+
+### Secret Detection Request
+
+```
+1. Consumer calls det := tok.NewSecretDetector()
+2. det.DetectSecrets(text) iterates the 33-pattern registry
+3. Each pattern: compiled regex; on match, record (type, span, value)
+4. det.RedactSecrets(text) replaces matches with [REDACTED:<type>]
+5. Optional: DetectAndRedactWithEntropy(text, threshold) adds Shannon-entropy
+   pass to catch high-entropy blobs the regex table misses
 ```
 
-### Command Rewriting
+### Cost Calculation Request
 
 ```
-1. Shell hook intercepts: "cargo test"
-2. Tok CLI receives command
-3. Filter config loaded: filters/jest.toml
-4. Command rewritten: "tok test-runner cargo test"
-5. Output captured and filtered through pipeline
-6. Filtered output returned to agent
+1. Consumer calls tok.GetModelPricing(model) → ModelPricing
+   (returns zero-value + false for unknown models; consumer may call
+    tok.RegisterModelPricing to add custom entries)
+2. Cost = (inputTokens/1000)*InputPricePer1K + (outputTokens/1000)*OutputPricePer1K
+3. For compression savings: tok.EstimateCostSavings(stats, model)
+   conservatively assumes saved tokens would have been input tokens
 ```
 
 ---
@@ -216,17 +299,18 @@ type PipelineContext struct {
 
 ### Object Pooling
 
-`CoordinatorPool` reuses pipeline coordinators for 10-20x speedup:
+`coordinator_pool.go` reuses pipeline coordinators via `sync.Pool` for a
+**10–20× speedup** over per-call `NewCompressor()` construction.
 
 ```go
 var coordinatorPool = sync.Pool{
-    New: func() interface{} { return NewPipelineCoordinator() },
+    New: func() interface{} { return filter.NewPipelineCoordinator() },
 }
 ```
 
 ### SIMD Optimization
 
-`internal/fastops/` provides SIMD-accelerated string operations:
+`internal/fastops/` provides SIMD-accelerated string operations on amd64:
 
 | Operation | Generic | SIMD (AVX2) | Speedup |
 |-----------|---------|-------------|---------|
@@ -234,34 +318,43 @@ var coordinatorPool = sync.Pool{
 | Whitespace norm | 80ns | 25ns | 3.2x |
 | Char counting | 60ns | 20ns | 3.0x |
 
-Build tags: `simd_avx2`, `simd_neon` (auto-detected at runtime)
+Build tag: `simd_avx2` (auto-detected at runtime).
 
 ### Token Estimation
 
 Two modes:
-- **Heuristic**: ~0.3ns/op (character-based estimate)
-- **BPE**: ~2ns/op (tiktoken-compatible, precise)
+- **Heuristic** (`EstimateTokensFast`): ~0.3 ns/op, character-based estimate
+- **BPE** (`EstimateTokensPrecise` / `EstimateTokensForModel`): ~2 ns/op,
+  tiktoken-compatible (cl100k, o200k, p50k, r50k encodings)
+
+`internal/core/estimator.go` uses a 64-shard sharded LRU token cache for BPE
+counts (FNV-64a keyed, atomic hit counter).
+
+### Buffer Pooling
+
+`internal/filter/bytepool.go` provides `BytePool` + `FastStringBuilder` to
+reduce GC pressure on hot paths.
 
 ---
 
 ## Filter Configuration
 
-80 TOML-based command filter definitions in `filters/`:
+80 per-tool TOML filter configs in `filters/` (one per CLI tool: jest, eslint,
+go, kubectl, terraform, vitest, playwright, aws, swift, etc.). Each declares
+which pipeline layers to run and a per-tool token budget. Loaded via
+`tok.LoadFilterRules` and applied via `WithCustomFilters`.
 
 ```toml
 # filters/jest.toml
-[command]
-name = "jest"
-pattern = "^jest\\b"
-
-[filter]
-layers = ["ansi_strip", "whitespace", "error_extract", "summary"]
-output_max_tokens = 2000
-
-[patterns]
-error = "^\\s*●\\s+"
-pass = "^\\s*✓\\s+"
-fail = "^\\s*✕\\s+"
+[[rule]]
+name = "strip-ansi"
+pattern = '\x1b\[[0-9;]*m'
+replacement = ""
+
+[[rule]]
+name = "collapse-blank-lines"
+pattern = '\n{3,}'
+replacement = '\n\n'
 ```
 
 ---
@@ -270,11 +363,12 @@ fail = "^\\s*✕\\s+"
 
 ### Secret Detection
 
-Pattern + entropy-based detection:
+Pattern + entropy-based detection across 33 patterns:
 
 ```go
 type SecretDetector struct {
-    patterns []*regexp.Regexp  // 15+ secret formats
+    patterns []*regexp.Regexp  // 33 secret formats
+    entropy  EntropyAnalyzer   // optional Shannon-entropy pass
 }
 
 func (d *SecretDetector) DetectSecrets(text string) []SecretMatch
@@ -282,15 +376,28 @@ func (d *SecretDetector) RedactSecrets(text string) string
 func (d *SecretDetector) DetectAndRedactWithEntropy(text string, threshold float64) string
 ```
 
-Supported patterns: API keys, JWT tokens, AWS keys, GitHub tokens, private keys, connection strings, etc.
+Supported patterns include: AWS access keys, GitHub PATs, Slack tokens, Google
+API keys, Stripe keys, OpenAI/Anthropic keys, JWTs, RSA/EC/OpenSSH private
+keys, SendGrid, Twilio, Heroku, DigitalOcean, npm, PyPI, Docker registry,
+generic API keys, passwords, DB connection strings, Bearer tokens.
+
+`tok.IsSensitiveFilename` complements content scanning with a 3-layer
+filename detector (exact basename, sensitive directory, name token) for
+`.env`, `id_rsa`, `/home/*/.ssh/...`, etc.
 
 ---
 
 ## Build & Release
 
 - **Language**: Go 1.26+, zero CGO
-- **Binary**: Single static binary
-- **Platforms**: linux/darwin/windows/freebsd × amd64/arm64/386
-- **Distribution**: `go install`, Homebrew tap, deb/rpm (nfpm), Docker
-- **Release**: GoReleaser with SHA-256 checksums, release-please automation
-- **CI**: 3 workflows (ci.yml, quality.yml, security.yml)
+- **Type**: Library — no binary, no CLI (`.goreleaser.yml` ships source
+  archive + SPDX SBOM only)
+- **Distribution**: `go get github.com/GrayCodeAI/tok`
+- **Versioning**: `VERSION` file is the single source of truth; embedded via
+  `//go:embed` in `version.go`; bumped by release-please from Conventional
+  Commits
+- **CI**: 3 workflows (`ci.yml` for fmt/vet/lint/test/security, `release.yml`
+  for GoReleaser, `scorecard.yml` for OpenSSF)
+- **Coverage gate**: 60% (codecov)
+
+The consumer-facing CLI is `hawk tok ...`, which embeds this library.
diff --git a/CITATION.cff b/CITATION.cff
index 4c07056a7..e14cc2dbc 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,5 +1,5 @@
 cff-version: 1.2.0
-title: "Tok: Token-Aware CLI Proxy with 31-Layer Compression Pipeline"
+title: "Tok: A Go Library for Prompt Compression, Output Filtering, Token Estimation, and Secret Detection"
 message: "If you use Tok in your research, please cite it using these metadata."
 type: software
 authors:
@@ -15,18 +15,22 @@ keywords:
   - llm
   - ai-coding-assistant
   - compression
-  - cli-proxy
   - context-window
-  - claude-code
-  - cursor
-  - copilot
+  - bpe
+  - token-estimation
+  - secret-detection
+  - go-library
 abstract: >-
-  Tok is a token-aware CLI proxy that intercepts CLI commands and applies
-  a 31-layer compression pipeline to reduce token usage for AI coding assistants.
-  Built on research from 120+ papers, it achieves 60-90% token reduction on
-  common development operations. The pipeline includes entropy filtering,
-  perplexity pruning, goal-driven selection, AST preservation, contrastive
-  ranking, and 15+ additional research-backed compression layers.
+  Tok is a pure Go library (no CGO, no CLI) that provides prompt compression,
+  output filtering, BPE token estimation, cost calculation, and secret
+  detection for AI coding agents and other LLM workloads. It exposes a
+  31-layer compression pipeline (entropy, LLMLingua-style perplexity
+  pruning, AST preservation, gisting, H2O, attention-sink, ChunkKV, and
+  more) and ships a 33-pattern secret scanner with optional Shannon-entropy
+  analysis. Built on research from 50+ papers, it achieves 60-90% token
+  reduction on common development operations. The library is consumed by
+  hawk, eyrie, and yaad in the hawk-eco ecosystem, and is available to any
+  Go program via `go get github.com/GrayCodeAI/tok`.
 references:
   - type: article
     title: "Selective Context for Language Models"
diff --git a/mcp/server.go b/mcp/server.go
index 23148650e..27aab049f 100644
--- a/mcp/server.go
+++ b/mcp/server.go
@@ -1,8 +1,27 @@
+// Package mcp provides a minimal in-memory MCP (Model Context Protocol) server
+// pre-loaded with tok tools.
+//
+// The implementation is deliberately small: it is an in-process registry +
+// dispatcher with no stdio/HTTP/SSE transport. Host applications that need
+// transport wire this package's *MCPServer into their own MCP daemon (for
+// example, hawk's internal/mcp package exposes it to MCP-compatible agents).
+//
+// The three default tools wired up by NewTokServer call the real tok package
+// APIs rather than stub implementations:
+//
+//   - count_tokens      → tok.EstimateTokensForModel (BPE) or tok.EstimateTokens
+//   - estimate_cost     → tok.GetModelPricing × (inputTokens + outputTokens)
+//   - compress_text     → tok.Compress with the caller-supplied options
+//
+// Tool authors can also use the bare NewServer + RegisterTool for custom
+// surfaces.
 package mcp
 
 import (
 	"context"
 	"fmt"
+
+	tok "github.com/GrayCodeAI/tok"
 )
 
 type ToolHandler func(ctx context.Context, params map[string]interface{}) (interface{}, error)
@@ -23,14 +42,19 @@ type toolEntry struct {
 	handler ToolHandler
 }
 
+// NewServer creates an empty MCP server with the given name. Tools must be
+// registered with RegisterTool before the server is useful.
 func NewServer(name string) *MCPServer {
 	return &MCPServer{name: name, tools: make(map[string]toolEntry)}
 }
 
+// RegisterTool adds a tool to the server. Re-registering an existing name
+// overwrites the previous definition and handler.
 func (s *MCPServer) RegisterTool(name, description string, schema map[string]interface{}, handler ToolHandler) {
 	s.tools[name] = toolEntry{def: ToolDef{Name: name, Description: description, InputSchema: schema}, handler: handler}
 }
 
+// ListTools returns the registered tool definitions. Order is not stable.
 func (s *MCPServer) ListTools() []ToolDef {
 	defs := make([]ToolDef, 0, len(s.tools))
 	for _, e := range s.tools {
@@ -39,6 +63,9 @@ func (s *MCPServer) ListTools() []ToolDef {
 	return defs
 }
 
+// HandleRequest dispatches a JSON-RPC-style request to the registered tool.
+// Supported methods: "tools/list" (no params) and "tools/call" (params must
+// include "name" and optional "arguments" map).
 func (s *MCPServer) HandleRequest(ctx context.Context, method string, params map[string]interface{}) (interface{}, error) {
 	switch method {
 	case "tools/list":
@@ -62,76 +89,226 @@ func (s *MCPServer) HandleRequest(ctx context.Context, method string, params map
 	}
 }
 
-// NewTokServer creates an MCP server pre-registered with tok tools.
+// NewTokServer creates an MCP server pre-registered with the standard tok
+// tool surface. The registered tools call the real tok package APIs:
+//
+//   - count_tokens(text, model) → real BPE-backed token count
+//   - estimate_cost(model, inputTokens, outputTokens) → real pricing-table cost
+//   - compress_text(text, mode) → real 31-layer compression pipeline
+//   - redact_secrets(text)      → real 33-pattern secret detector
 func NewTokServer() *MCPServer {
 	s := NewServer("tok")
 
-	s.RegisterTool("count_tokens", "Count tokens in text for a given model", map[string]interface{}{
-		"type": "object",
-		"properties": map[string]interface{}{
-			"text":  map[string]interface{}{"type": "string"},
-			"model": map[string]interface{}{"type": "string"},
+	s.RegisterTool("count_tokens",
+		"Count tokens in text using tok's BPE tokenizer (cl100k/o200k/etc.). Pass model='heuristic' for a fast character-based estimate.",
+		map[string]interface{}{
+			"type": "object",
+			"properties": map[string]interface{}{
+				"text":  map[string]interface{}{"type": "string", "description": "Input text to count tokens for"},
+				"model": map[string]interface{}{"type": "string", "description": "Model name (e.g. gpt-4o, claude-sonnet) or 'heuristic' for a fast estimate"},
+			},
+			"required": []string{"text"},
 		},
-		"required": []string{"text", "model"},
-	}, func(ctx context.Context, p map[string]interface{}) (interface{}, error) {
-		text, _ := p["text"].(string)
-		model, _ := p["model"].(string)
-		if text == "" || model == "" {
-			return nil, fmt.Errorf("count_tokens: text and model required")
-		}
-		return map[string]interface{}{"count": len(text) / 4, "model": model}, nil
-	})
-
-	s.RegisterTool("estimate_cost", "Estimate dollar cost for input/output tokens", map[string]interface{}{
-		"type": "object",
-		"properties": map[string]interface{}{
-			"model":        map[string]interface{}{"type": "string"},
-			"inputTokens":  map[string]interface{}{"type": "number"},
-			"outputTokens": map[string]interface{}{"type": "number"},
+		countTokensHandler)
+
+	s.RegisterTool("estimate_cost",
+		"Estimate the dollar cost for input + output tokens at the model's registered price-per-1K. Returns 0 for unknown models.",
+		map[string]interface{}{
+			"type": "object",
+			"properties": map[string]interface{}{
+				"model":        map[string]interface{}{"type": "string", "description": "Model name (e.g. gpt-4o, claude-sonnet)"},
+				"inputTokens":  map[string]interface{}{"type": "number", "description": "Number of input tokens"},
+				"outputTokens": map[string]interface{}{"type": "number", "description": "Number of output tokens"},
+			},
+			"required": []string{"model", "inputTokens", "outputTokens"},
 		},
-		"required": []string{"model", "inputTokens", "outputTokens"},
-	}, func(ctx context.Context, p map[string]interface{}) (interface{}, error) {
-		model, _ := p["model"].(string)
-		if model == "" {
-			return nil, fmt.Errorf("estimate_cost: model required")
-		}
-		in, _ := p["inputTokens"].(float64)
-		out, _ := p["outputTokens"].(float64)
-		return map[string]interface{}{"model": model, "inputTokens": in, "outputTokens": out, "totalCost": 0.0}, nil
-	})
-
-	s.RegisterTool("compress_text", "Compress text by removing redundant whitespace", map[string]interface{}{
-		"type": "object",
-		"properties": map[string]interface{}{
-			"text": map[string]interface{}{"type": "string"},
+		estimateCostHandler)
+
+	s.RegisterTool("compress_text",
+		"Compress text using tok's full pipeline. Returns the compressed string and per-stage stats. Optional 'mode' parameter selects the compression preset (minimal, aggressive, surface, adaptive, code, log).",
+		map[string]interface{}{
+			"type": "object",
+			"properties": map[string]interface{}{
+				"text": map[string]interface{}{"type": "string", "description": "Text to compress"},
+				"mode": map[string]interface{}{
+					"type":        "string",
+					"description": "Compression preset: minimal | aggressive | surface | adaptive | code | log (default: minimal)",
+					"enum":        []string{"minimal", "aggressive", "surface", "adaptive", "code", "log"},
+				},
+				"budget": map[string]interface{}{"type": "number", "description": "Optional token budget to enforce"},
+			},
+			"required": []string{"text"},
 		},
-		"required": []string{"text"},
-	}, func(ctx context.Context, p map[string]interface{}) (interface{}, error) {
-		text, _ := p["text"].(string)
-		if text == "" {
-			return nil, fmt.Errorf("compress_text: text required")
-		}
-		result := collapseWS(text)
-		return map[string]interface{}{"original": len(text), "compressed": len(result), "text": result}, nil
-	})
+		compressTextHandler)
+
+	s.RegisterTool("redact_secrets",
+		"Detect and redact secrets (API keys, AWS tokens, GitHub tokens, private keys, JWTs, etc.) from text. Returns redacted text and the count of matches found.",
+		map[string]interface{}{
+			"type": "object",
+			"properties": map[string]interface{}{
+				"text":             map[string]interface{}{"type": "string", "description": "Text to scan for secrets"},
+				"entropyThreshold": map[string]interface{}{"type": "number", "description": "Optional Shannon-entropy threshold (default 4.5) to also catch high-entropy blobs that pattern matching misses"},
+			},
+			"required": []string{"text"},
+		},
+		redactSecretsHandler)
 
 	return s
 }
 
-func collapseWS(s string) string {
-	out := make([]byte, 0, len(s))
-	space := false
-	for i := 0; i < len(s); i++ {
-		c := s[i]
-		if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
-			if !space {
-				out = append(out, ' ')
-				space = true
-			}
-		} else {
-			out = append(out, c)
-			space = false
+// countTokensHandler wires the MCP count_tokens tool to tok's real estimator.
+func countTokensHandler(_ context.Context, p map[string]interface{}) (interface{}, error) {
+	text, _ := p["text"].(string)
+	if text == "" {
+		return nil, fmt.Errorf("count_tokens: text required")
+	}
+	model, _ := p["model"].(string)
+	if model == "" || model == "heuristic" {
+		return map[string]interface{}{
+			"text":  text,
+			"model": "heuristic",
+			"count": tok.EstimateTokens(text),
+		}, nil
+	}
+	return map[string]interface{}{
+		"text":  text,
+		"model": model,
+		"count": tok.EstimateTokensForModel(text, model),
+	}, nil
+}
+
+// estimateCostHandler wires the MCP estimate_cost tool to tok's real
+// pricing registry. Returns totalCost = input/1000 * inputPrice +
+// output/1000 * outputPrice. totalCost is 0 for unknown models.
+func estimateCostHandler(_ context.Context, p map[string]interface{}) (interface{}, error) {
+	model, _ := p["model"].(string)
+	if model == "" {
+		return nil, fmt.Errorf("estimate_cost: model required")
+	}
+	in, okIn := numberFromParams(p, "inputTokens")
+	out, okOut := numberFromParams(p, "outputTokens")
+	if !okIn || !okOut {
+		return nil, fmt.Errorf("estimate_cost: inputTokens and outputTokens required")
+	}
+	pricing, found := tok.GetModelPricing(model)
+	if !found {
+		return map[string]interface{}{
+			"model":        model,
+			"inputTokens":  in,
+			"outputTokens": out,
+			"totalCost":    0.0,
+			"currency":     "USD",
+			"known":        false,
+			"warning":      fmt.Sprintf("model %q is not in the pricing registry; call tok.RegisterModelPricing to add it", model),
+		}, nil
+	}
+	total := (in/1000)*pricing.InputPricePer1K + (out/1000)*pricing.OutputPricePer1K
+	return map[string]interface{}{
+		"model":            model,
+		"inputTokens":      in,
+		"outputTokens":     out,
+		"inputPricePer1K":  pricing.InputPricePer1K,
+		"outputPricePer1K": pricing.OutputPricePer1K,
+		"totalCost":        total,
+		"currency":         "USD",
+		"known":            true,
+	}, nil
+}
+
+// compressTextHandler wires the MCP compress_text tool to tok.Compress.
+// Optional mode parameter maps to one of the public preset variables.
+func compressTextHandler(_ context.Context, p map[string]interface{}) (interface{}, error) {
+	text, _ := p["text"].(string)
+	if text == "" {
+		return nil, fmt.Errorf("compress_text: text required")
+	}
+	mode, _ := p["mode"].(string)
+	opts := buildCompressOptions(mode, p)
+	out, stats := tok.Compress(text, opts...)
+	return map[string]interface{}{
+		"original":       text,
+		"compressed":     out,
+		"originalTokens": stats.OriginalTokens,
+		"finalTokens":    stats.FinalTokens,
+		"tokensSaved":    stats.TokensSaved,
+		"reductionPct":   stats.ReductionPercent,
+		"mode":           mode,
+		"model":          stats.Model,
+		"costSavingsUSD": stats.CostSavings,
+	}, nil
+}
+
+// redactSecretsHandler wires the MCP redact_secrets tool to the real
+// SecretDetector. If entropyThreshold is set, the detector also runs
+// Shannon-entropy analysis to catch blobs the pattern table misses.
+func redactSecretsHandler(_ context.Context, p map[string]interface{}) (interface{}, error) {
+	text, _ := p["text"].(string)
+	if text == "" {
+		return nil, fmt.Errorf("redact_secrets: text required")
+	}
+	entropy, hasEntropy := numberFromParams(p, "entropyThreshold")
+	det := tok.NewSecretDetector()
+	var matches []tok.SecretMatch
+	var redacted string
+	if hasEntropy {
+		matches = det.DetectSecrets(text)
+		redacted = det.RedactSecrets(text)
+		if entropy > 0 {
+			redacted = det.DetectAndRedactWithEntropy(text, entropy)
 		}
+	} else {
+		matches = det.DetectSecrets(text)
+		redacted = det.RedactSecrets(text)
+	}
+	return map[string]interface{}{
+		"original":   text,
+		"redacted":   redacted,
+		"matchCount": len(matches),
+	}, nil
+}
+
+// buildCompressOptions maps the MCP compress_text 'mode' enum to the
+// corresponding tok preset Option, plus an optional budget. Unknown modes
+// fall back to the default (no preset) which lets the pipeline use its
+// auto-tier selection.
+func buildCompressOptions(mode string, p map[string]interface{}) []tok.Option {
+	var opts []tok.Option
+	switch mode {
+	case "minimal":
+		opts = append(opts, tok.Minimal)
+	case "aggressive":
+		opts = append(opts, tok.Aggressive)
+	case "surface":
+		opts = append(opts, tok.Surface)
+	case "adaptive":
+		opts = append(opts, tok.Adaptive)
+	case "code":
+		opts = append(opts, tok.Code)
+	case "log":
+		opts = append(opts, tok.Log)
+	}
+	if budget, ok := numberFromParams(p, "budget"); ok && budget > 0 {
+		opts = append(opts, tok.WithBudget(int(budget)))
+	}
+	return opts
+}
+
+// numberFromParams returns the float64 value of a key, supporting both
+// JSON-number and JSON-decimal inputs as commonly produced by MCP clients.
+func numberFromParams(p map[string]interface{}, key string) (float64, bool) {
+	v, ok := p[key]
+	if !ok {
+		return 0, false
+	}
+	switch n := v.(type) {
+	case float64:
+		return n, true
+	case float32:
+		return float64(n), true
+	case int:
+		return float64(n), true
+	case int64:
+		return float64(n), true
 	}
-	return string(out)
+	return 0, false
 }
diff --git a/mcp/server_test.go b/mcp/server_test.go
index aa5f4cb81..194ec210e 100644
--- a/mcp/server_test.go
+++ b/mcp/server_test.go
@@ -2,7 +2,10 @@ package mcp
 
 import (
 	"context"
+	"strings"
 	"testing"
+
+	tok "github.com/GrayCodeAI/tok"
 )
 
 func TestNewServer(t *testing.T) {
@@ -114,3 +117,253 @@ func TestHandleRequest_UnknownMethod(t *testing.T) {
 		t.Fatalf("unexpected error message: %v", err)
 	}
 }
+
+// NewTokServer is pre-loaded with real tool handlers that delegate to the tok
+// package. These tests exercise the wire-up so a future regression that
+// reintroduces the legacy stub bodies (e.g. count_tokens=len/4,
+// estimate_cost=0, compress_text=collapseWS) fails CI.
+
+func TestNewTokServer_RegistersAllTools(t *testing.T) {
+	s := NewTokServer()
+	defs := s.ListTools()
+	got := map[string]bool{}
+	for _, d := range defs {
+		got[d.Name] = true
+	}
+	for _, want := range []string{"count_tokens", "estimate_cost", "compress_text", "redact_secrets"} {
+		if !got[want] {
+			t.Errorf("expected registered tool %q in %v", want, got)
+		}
+	}
+}
+
+func TestNewTokServer_CountTokens_Heuristic(t *testing.T) {
+	s := NewTokServer()
+	res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{
+		"name": "count_tokens",
+		"arguments": map[string]interface{}{
+			"text":  "hello world from tok",
+			"model": "heuristic",
+		},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	m, ok := res.(map[string]interface{})
+	if !ok {
+		t.Fatalf("expected map, got %T", res)
+	}
+	count, ok := m["count"].(int)
+	if !ok {
+		t.Fatalf("expected int count, got %T", m["count"])
+	}
+	if count != tok.EstimateTokens("hello world from tok") {
+		t.Fatalf("heuristic count %d != EstimateTokens %d (stub regression?)",
+			count, tok.EstimateTokens("hello world from tok"))
+	}
+	if count <= 0 {
+		t.Fatalf("expected positive count, got %d", count)
+	}
+}
+
+func TestNewTokServer_CountTokens_ForModel(t *testing.T) {
+	s := NewTokServer()
+	res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{
+		"name": "count_tokens",
+		"arguments": map[string]interface{}{
+			"text":  "hello world",
+			"model": "gpt-4o",
+		},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	m := res.(map[string]interface{})
+	if m["model"] != "gpt-4o" {
+		t.Fatalf("expected model gpt-4o, got %v", m["model"])
+	}
+	if _, ok := m["count"].(int); !ok {
+		t.Fatalf("expected int count, got %T", m["count"])
+	}
+}
+
+func TestNewTokServer_CountTokens_MissingText(t *testing.T) {
+	s := NewTokServer()
+	_, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{
+		"name":      "count_tokens",
+		"arguments": map[string]interface{}{"model": "gpt-4o"},
+	})
+	if err == nil {
+		t.Fatal("expected error when text is missing")
+	}
+}
+
+func TestNewTokServer_EstimateCost_KnownModel(t *testing.T) {
+	s := NewTokServer()
+	res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{
+		"name": "estimate_cost",
+		"arguments": map[string]interface{}{
+			"model":        "gpt-4o",
+			"inputTokens":  1000.0,
+			"outputTokens": 500.0,
+		},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	m := res.(map[string]interface{})
+	if m["known"] != true {
+		t.Fatalf("expected known=true, got %v", m["known"])
+	}
+	cost, ok := m["totalCost"].(float64)
+	if !ok {
+		t.Fatalf("expected float64 totalCost, got %T", m["totalCost"])
+	}
+	pricing, _ := tok.GetModelPricing("gpt-4o")
+	want := (1000.0/1000)*pricing.InputPricePer1K + (500.0/1000)*pricing.OutputPricePer1K
+	if cost != want {
+		t.Fatalf("totalCost = %v, want %v (stub regression?)", cost, want)
+	}
+	if cost == 0 {
+		t.Fatal("expected non-zero cost for gpt-4o, got 0 (legacy stub regression?)")
+	}
+}
+
+func TestNewTokServer_EstimateCost_UnknownModel(t *testing.T) {
+	s := NewTokServer()
+	res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{
+		"name": "estimate_cost",
+		"arguments": map[string]interface{}{
+			"model":        "totally-fake-model-xyz",
+			"inputTokens":  100.0,
+			"outputTokens": 200.0,
+		},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	m := res.(map[string]interface{})
+	if m["known"] != false {
+		t.Fatalf("expected known=false, got %v", m["known"])
+	}
+	if m["warning"] == nil {
+		t.Fatal("expected warning for unknown model")
+	}
+}
+
+func TestNewTokServer_CompressText_RealPipeline(t *testing.T) {
+	s := NewTokServer()
+	// Aggressive mode on repetitive natural-language prose will drop most
+	// filler words. We don't assert specific content survives — we assert
+	// the *real* pipeline ran, not the legacy stub (which either no-op'd
+	// or only collapsed whitespace). The token-count and length
+	// comparison is sufficient evidence.
+	input := strings.Repeat(
+		"The rain in spain stays mainly in the plain. ", 100,
+	)
+	res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{
+		"name": "compress_text",
+		"arguments": map[string]interface{}{
+			"text": input,
+			"mode": "aggressive",
+		},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	m := res.(map[string]interface{})
+	compressed, _ := m["compressed"].(string)
+	if compressed == "" {
+		t.Fatal("expected non-empty compressed text")
+	}
+	if compressed == input {
+		t.Fatal("compressed text equals input (stub regression: pipeline not invoked?)")
+	}
+	origTokens, _ := m["originalTokens"].(int)
+	finalTokens, _ := m["finalTokens"].(int)
+	if origTokens <= 0 || finalTokens <= 0 {
+		t.Fatalf("expected positive token counts, got orig=%d final=%d", origTokens, finalTokens)
+	}
+	if finalTokens >= origTokens {
+		t.Fatalf("aggressive compression should reduce tokens: orig=%d final=%d", origTokens, finalTokens)
+	}
+	// Stub regression guard: legacy compress_text only collapsed
+	// whitespace, which would still drop *some* bytes but leave the
+	// final token count close to the original. A 4× reduction is a
+	// strong signal the 31-layer pipeline actually ran.
+	if finalTokens*4 > origTokens {
+		t.Fatalf("expected aggressive mode to reduce tokens by >= 4x, got orig=%d final=%d",
+			origTokens, finalTokens)
+	}
+}
+
+func TestNewTokServer_CompressText_RespectsBudget(t *testing.T) {
+	s := NewTokServer()
+	// Repetitive filler is exactly what aggressive mode is designed to
+	// chew through, so this gives the budget a chance to clamp the result.
+	input := strings.Repeat("lorem ipsum dolor sit amet consectetur. ", 200)
+	res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{
+		"name": "compress_text",
+		"arguments": map[string]interface{}{
+			"text":   input,
+			"mode":   "aggressive",
+			"budget": 50.0,
+		},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	m := res.(map[string]interface{})
+	finalTokens, _ := m["finalTokens"].(int)
+	if finalTokens > 60 {
+		t.Fatalf("budget not respected: finalTokens=%d, want <= 60", finalTokens)
+	}
+}
+
+func TestNewTokServer_RedactSecrets(t *testing.T) {
+	s := NewTokServer()
+	input := "config: AKIAIOSFODNN7EXAMPLE key=ghp_1234567890abcdefghijklmnopqrstuvwxyz"
+	res, err := s.HandleRequest(context.Background(), "tools/call", map[string]interface{}{
+		"name":      "redact_secrets",
+		"arguments": map[string]interface{}{"text": input},
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	m := res.(map[string]interface{})
+	redacted, _ := m["redacted"].(string)
+	if redacted == input {
+		t.Fatal("expected redacted text to differ from input (no secret detected?)")
+	}
+	if count, _ := m["matchCount"].(int); count < 2 {
+		t.Fatalf("expected at least 2 secrets (AWS + GitHub), got %d", count)
+	}
+}
+
+func TestNumberFromParams(t *testing.T) {
+	cases := []struct {
+		name   string
+		input  map[string]interface{}
+		key    string
+		want   float64
+		wantOK bool
+	}{
+		{"float64", map[string]interface{}{"x": 1.5}, "x", 1.5, true},
+		{"float32", map[string]interface{}{"x": float32(1.5)}, "x", 1.5, true},
+		{"int", map[string]interface{}{"x": int(7)}, "x", 7, true},
+		{"int64", map[string]interface{}{"x": int64(8)}, "x", 8, true},
+		{"missing", map[string]interface{}{}, "x", 0, false},
+		{"wrong type", map[string]interface{}{"x": "1.5"}, "x", 0, false},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			got, ok := numberFromParams(c.input, c.key)
+			if ok != c.wantOK {
+				t.Fatalf("ok=%v, want %v", ok, c.wantOK)
+			}
+			if ok && got != c.want {
+				t.Fatalf("got %v, want %v", got, c.want)
+			}
+		})
+	}
+}