diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 000000000..0a3726666 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,61 @@ +# Tok Benchmark Publication Workflow + +This directory is the publication surface for Tok benchmarks. + +It complements: + +- `tok/benchmarks/run.sh` for the offline quality and root-package benchmark wrapper +- `tok/evals/pipeline-bench.sh` for pipeline latency and allocation benchmarks +- `tok/benchmarks/quality/` for the offline compression-quality harness +- `tok/benchmarks/manifests/` for machine-readable suite definitions + +## Official suite ids + +- `tok-quality-core` +- `tok-pipeline-microbench` + +See the Hawk-side benchmark registry at `hawk/docs/benchmarks/SUITES.md`. + +## Current published baselines + +- `tok-quality-core`: `results/tok-quality-core/2026-06-27/` +- `tok-pipeline-microbench`: `results/tok-pipeline-microbench/2026-06-27/` + +## Current runnable commands + +```bash +/bin/zsh -lc 'GOCACHE=$PWD/.gocache ./benchmarks/run.sh' +/bin/zsh -lc 'GOCACHE=$PWD/.gocache ./evals/pipeline-bench.sh' +``` + +## Publication layout + +Recommended committed layout: + +```text +benchmarks/ + README.md + manifests/ + tok-quality-core.yaml + tok-pipeline-microbench.yaml + results/ + tok-quality-core/ + 2026-06-27/ + report.md + result.txt + notes.md + tok-pipeline-microbench/ + 2026-06-27/ + report.md + result.txt + notes.md +``` + +## Promotion rule + +Do not cite a Tok benchmark as evidence in cross-project comparison docs unless: + +1. the command used is recorded +2. the repo state is identified as a committed snapshot or workspace snapshot +3. the benchmark scope is stated clearly +4. the raw output or generated report is committed diff --git a/benchmarks/manifests/README.md b/benchmarks/manifests/README.md new file mode 100644 index 000000000..cc39717af --- /dev/null +++ b/benchmarks/manifests/README.md @@ -0,0 +1,9 @@ +# Tok Benchmark Manifests + +This directory is the machine-readable registry for Tok benchmark suites. + +Rules: + +- manifests must match the suite ids documented in `../README.md` +- `published: false` means the suite is official but no committed run is present yet +- publication directories referenced by manifests must exist diff --git a/benchmarks/manifests/tok-pipeline-microbench.yaml b/benchmarks/manifests/tok-pipeline-microbench.yaml new file mode 100644 index 000000000..78b65017f --- /dev/null +++ b/benchmarks/manifests/tok-pipeline-microbench.yaml @@ -0,0 +1,20 @@ +suite: tok-pipeline-microbench +status: shipped +published: true +kind: pipeline-latency-and-allocation +owner: tok +source: + - tok/evals/pipeline-bench.sh + - tok/internal/filter/pipeline_bench_test.go + - tok/internal/filter/optimizations_benchmark_test.go +runner: + command: /bin/zsh -lc 'GOCACHE=$PWD/.gocache ./evals/pipeline-bench.sh' +result_format: + primary: text_benchmark_output +metrics: + - ns_per_op + - bytes_per_op + - allocs_per_op +publication_dir: tok/benchmarks/results/tok-pipeline-microbench +notes: + - This suite measures the filter pipeline directly through Go benchmark entrypoints. diff --git a/benchmarks/manifests/tok-quality-core.yaml b/benchmarks/manifests/tok-quality-core.yaml new file mode 100644 index 000000000..edb2376bd --- /dev/null +++ b/benchmarks/manifests/tok-quality-core.yaml @@ -0,0 +1,24 @@ +suite: tok-quality-core +status: shipped +published: true +kind: offline-compression-quality +owner: tok +source: + - tok/benchmarks/run.sh + - tok/benchmarks/quality/quality.go + - tok/benchmarks/quality/cmd/main.go +runner: + command: /bin/zsh -lc 'GOCACHE=$PWD/.gocache ./benchmarks/run.sh' +result_format: + primary: markdown_report + secondary: text_benchmark_output +metrics: + - compression_ratio + - char_retention + - rouge1_fidelity_proxy + - ns_per_op + - bytes_per_op + - allocs_per_op +publication_dir: tok/benchmarks/results/tok-quality-core +notes: + - This suite combines root-package microbenchmarks with the offline compression-quality harness. diff --git a/benchmarks/results/README.md b/benchmarks/results/README.md new file mode 100644 index 000000000..c2aacb3bb --- /dev/null +++ b/benchmarks/results/README.md @@ -0,0 +1,20 @@ +# Published Tok Benchmark Runs + +This directory is reserved for benchmark runs that are important enough to treat as evidence. + +Expected per-run files: + +- `report.md` +- `result.txt` +- `notes.md` + +Only commit runs that are intended to serve as: + +- baselines +- release notes evidence +- comparison evidence in docs + +Current published runs: + +- `tok-quality-core/2026-06-27/` +- `tok-pipeline-microbench/2026-06-27/` diff --git a/benchmarks/results/tok-pipeline-microbench/2026-06-27/notes.md b/benchmarks/results/tok-pipeline-microbench/2026-06-27/notes.md new file mode 100644 index 000000000..4d4953a09 --- /dev/null +++ b/benchmarks/results/tok-pipeline-microbench/2026-06-27/notes.md @@ -0,0 +1,14 @@ +# Provenance Notes + +- Command: + `/bin/zsh -lc 'GOCACHE=$PWD/.gocache ./evals/pipeline-bench.sh'` +- Verification: + benchmark command exited successfully with `PASS` +- Environment: + local workspace run on `2026-06-27` + `goos=darwin` + `goarch=arm64` + `cpu=Apple M1` +- Caveats: + pipeline timings are local benchmark measurements rather than CI medians + this suite measures the internal filter pipeline, not end-to-end answer quality diff --git a/benchmarks/results/tok-pipeline-microbench/2026-06-27/report.md b/benchmarks/results/tok-pipeline-microbench/2026-06-27/report.md new file mode 100644 index 000000000..38bbdd507 --- /dev/null +++ b/benchmarks/results/tok-pipeline-microbench/2026-06-27/report.md @@ -0,0 +1,34 @@ +# Tok Pipeline Microbenchmark Baseline + +## Metadata + +- Suite: `tok-pipeline-microbench` +- Date: `2026-06-27` +- Model: none +- Provider: none +- Commit: current workspace snapshot +- Command: `/bin/zsh -lc 'GOCACHE=$PWD/.gocache ./evals/pipeline-bench.sh'` + +## Headline Metrics + +- ProcessSmall: `20342 ns/op`, `8707 B/op`, `90 allocs/op` +- ProcessMedium: `78233 ns/op`, `44992 B/op`, `70 allocs/op` +- ProcessWithBudget: `43841 ns/op`, `24232 B/op`, `45 allocs/op` +- EstimateTokens Small: `18.89 ns/op` +- EstimateTokens Medium: `8442 ns/op` +- EstimateTokens Large: `127261 ns/op` +- Layer Entropy: `3508 ns/op`, `1930 B/op`, `22 allocs/op` +- Layer Perplexity: `3775 ns/op`, `1607 B/op`, `27 allocs/op` +- ProcessParallel: `1637 ns/op`, `3104 B/op`, `19 allocs/op` + +## Notes + +- This is the first committed repo-owned baseline artifact for `tok-pipeline-microbench`. +- The runner targets the internal filter pipeline directly through Go benchmark entrypoints. +- The script completed successfully and printed the benchmark summary footer. + +## Comparison Summary + +- Previous baseline: none committed +- Change since baseline: initial published baseline +- Interpretation: the core filter pipeline stays in the tens-of-microseconds range for small and medium process cases, with low-single-microsecond parallel path timings on this local harness. diff --git a/benchmarks/results/tok-pipeline-microbench/2026-06-27/result.txt b/benchmarks/results/tok-pipeline-microbench/2026-06-27/result.txt new file mode 100644 index 000000000..920a7e309 --- /dev/null +++ b/benchmarks/results/tok-pipeline-microbench/2026-06-27/result.txt @@ -0,0 +1,19 @@ +==> tok pipeline benchmarks (internal/filter) + +goos: darwin +goarch: arm64 +pkg: github.com/GrayCodeAI/tok/internal/filter +cpu: Apple M1 +BenchmarkPipeline_ProcessSmall-8 58953 20342 ns/op 8707 B/op 90 allocs/op +BenchmarkPipeline_ProcessMedium-8 15304 78233 ns/op 44992 B/op 70 allocs/op +BenchmarkPipeline_ProcessWithBudget-8 27705 43841 ns/op 24232 B/op 45 allocs/op +BenchmarkEstimateTokens_Small-8 63821442 18.89 ns/op 0 B/op 0 allocs/op +BenchmarkEstimateTokens_Medium-8 142575 8442 ns/op 0 B/op 0 allocs/op +BenchmarkEstimateTokens_Large-8 9369 127261 ns/op 0 B/op 0 allocs/op +BenchmarkLayer_Entropy-8 334741 3508 ns/op 1930 B/op 22 allocs/op +BenchmarkLayer_Perplexity-8 317840 3775 ns/op 1607 B/op 27 allocs/op +BenchmarkPipeline_ProcessParallel-8 729633 1637 ns/op 3104 B/op 19 allocs/op +PASS +ok github.com/GrayCodeAI/tok/internal/filter 13.689s + +pipeline-bench: complete (see ns/op + B/op + allocs/op above) diff --git a/benchmarks/results/tok-pipeline-microbench/README.md b/benchmarks/results/tok-pipeline-microbench/README.md new file mode 100644 index 000000000..f83fdef81 --- /dev/null +++ b/benchmarks/results/tok-pipeline-microbench/README.md @@ -0,0 +1,13 @@ +# `tok-pipeline-microbench` Published Runs + +Current published runs: + +- `2026-06-27/` + +Each published run includes: + +- `report.md` +- `result.txt` +- `notes.md` + +Reference manifest: `../../manifests/tok-pipeline-microbench.yaml` diff --git a/benchmarks/results/tok-quality-core/2026-06-27/notes.md b/benchmarks/results/tok-quality-core/2026-06-27/notes.md new file mode 100644 index 000000000..4c648b94e --- /dev/null +++ b/benchmarks/results/tok-quality-core/2026-06-27/notes.md @@ -0,0 +1,14 @@ +# Provenance Notes + +- Command: + `/bin/zsh -lc 'GOCACHE=$PWD/.gocache ./benchmarks/run.sh'` +- Verification: + `/bin/zsh -lc 'GOCACHE=$PWD/.gocache go test ./... -count=1'` +- Environment: + local workspace run on `2026-06-27` + `goos=darwin` + `goarch=arm64` + `cpu=Apple M1` +- Caveats: + root-package benchmark timings are point-in-time local measurements, not CI medians + the quality harness uses a checked-in offline sample corpus rather than an external public benchmark dataset diff --git a/benchmarks/results/tok-quality-core/2026-06-27/report.md b/benchmarks/results/tok-quality-core/2026-06-27/report.md new file mode 100644 index 000000000..266b211e2 --- /dev/null +++ b/benchmarks/results/tok-quality-core/2026-06-27/report.md @@ -0,0 +1,36 @@ +# Tok Quality Core Baseline + +## Metadata + +- Suite: `tok-quality-core` +- Date: `2026-06-27` +- Model: none +- Provider: none +- Commit: current workspace snapshot +- Command: `/bin/zsh -lc 'GOCACHE=$PWD/.gocache ./benchmarks/run.sh'` + +## Headline Metrics + +- CountTokens 100B: `97.64 ns/op`, `1024.19 MB/s` +- CountTokens 100KB: `133845 ns/op`, `765.06 MB/s` +- Compress 100B Minimal: `43500 ns/op`, `63080 B/op`, `222 allocs/op` +- Compress 100B Aggressive: `31372 ns/op`, `62684 B/op`, `221 allocs/op` +- Compress 100KB Minimal: `1485713 ns/op`, `1190139 B/op`, `244 allocs/op` +- Compress 100KB Aggressive: `1492485 ns/op`, `1184748 B/op`, `236 allocs/op` +- BPEEncode 100KB: `131183 ns/op`, `780.59 MB/s` +- Offline quality harness summary: + - `surface`: ratio `0.921`, char retention `0.950`, fidelity `0.973` + - `trim`: ratio `0.922`, char retention `0.950`, fidelity `0.975` + - `extract`: ratio `0.456`, char retention `0.382`, fidelity `0.368` + +## Notes + +- This is the first committed repo-owned baseline artifact for `tok-quality-core`. +- The root-package benchmark section and offline quality harness both completed successfully in the same command. +- The quality harness remains fully offline and uses the checked-in sample corpus. + +## Comparison Summary + +- Previous baseline: none committed +- Change since baseline: initial published baseline +- Interpretation: Tok’s core token-count and encode paths stay sub-millisecond at large input sizes, while the offline quality harness shows that `extract` delivers the strongest compression with a clear fidelity tradeoff versus `surface` and `trim`. diff --git a/benchmarks/results/tok-quality-core/2026-06-27/result.txt b/benchmarks/results/tok-quality-core/2026-06-27/result.txt new file mode 100644 index 000000000..d62f7713c --- /dev/null +++ b/benchmarks/results/tok-quality-core/2026-06-27/result.txt @@ -0,0 +1,31 @@ +tok Benchmark Runner +==================== + +==> Go benchmarks (root package) +goos: darwin +goarch: arm64 +pkg: github.com/GrayCodeAI/tok +cpu: Apple M1 +BenchmarkCountTokens/100B-8 11460319 97.64 ns/op 1024.19 MB/s 0 B/op 0 allocs/op +BenchmarkCountTokens/1KB-8 918771 1299 ns/op 788.31 MB/s 0 B/op 0 allocs/op +BenchmarkCountTokens/10KB-8 89722 13331 ns/op 768.12 MB/s 0 B/op 0 allocs/op +BenchmarkCountTokens/100KB-8 9020 133845 ns/op 765.06 MB/s 0 B/op 0 allocs/op +BenchmarkCompress/100B/Minimal-8 27205 43500 ns/op 2.30 MB/s 63080 B/op 222 allocs/op +BenchmarkCompress/100B/Aggressive-8 37798 31372 ns/op 3.19 MB/s 62684 B/op 221 allocs/op +BenchmarkCompress/1KB/Minimal-8 27144 44158 ns/op 23.19 MB/s 75664 B/op 242 allocs/op +BenchmarkCompress/1KB/Aggressive-8 21780 54690 ns/op 18.72 MB/s 74355 B/op 235 allocs/op +BenchmarkCompress/10KB/Minimal-8 6374 193246 ns/op 52.99 MB/s 175568 B/op 245 allocs/op +BenchmarkCompress/10KB/Aggressive-8 5500 189578 ns/op 54.01 MB/s 173413 B/op 238 allocs/op +BenchmarkCompress/100KB/Minimal-8 806 1485713 ns/op 68.92 MB/s 1190139 B/op 244 allocs/op +BenchmarkCompress/100KB/Aggressive-8 804 1492485 ns/op 68.61 MB/s 1184748 B/op 236 allocs/op +BenchmarkBPEEncode/100B-8 9851370 120.9 ns/op 827.08 MB/s 0 B/op 0 allocs/op +BenchmarkBPEEncode/1KB-8 911469 1312 ns/op 780.75 MB/s 0 B/op 0 allocs/op +BenchmarkBPEEncode/10KB-8 91712 13102 ns/op 781.55 MB/s 0 B/op 0 allocs/op +BenchmarkBPEEncode/100KB-8 9166 131183 ns/op 780.59 MB/s 0 B/op 0 allocs/op +PASS +ok github.com/GrayCodeAI/tok 24.210s + +==> Compression-quality harness +quality benchmark: 15 samples x 4 tiers -> benchmarks/quality-results.md + +Results written to benchmarks/quality-results.md diff --git a/benchmarks/results/tok-quality-core/README.md b/benchmarks/results/tok-quality-core/README.md new file mode 100644 index 000000000..0233b2968 --- /dev/null +++ b/benchmarks/results/tok-quality-core/README.md @@ -0,0 +1,13 @@ +# `tok-quality-core` Published Runs + +Current published runs: + +- `2026-06-27/` + +Each published run includes: + +- `report.md` +- `result.txt` +- `notes.md` + +Reference manifest: `../../manifests/tok-quality-core.yaml` diff --git a/internal/codeaware/tokenizer.go b/internal/codeaware/tokenizer.go index cf9ea82d1..b531b2e4f 100644 --- a/internal/codeaware/tokenizer.go +++ b/internal/codeaware/tokenizer.go @@ -1,4 +1,4 @@ -package tok +package codeaware import ( "strings" diff --git a/internal/codeaware/tokenizer_test.go b/internal/codeaware/tokenizer_test.go index 55300d7df..f0785844d 100644 --- a/internal/codeaware/tokenizer_test.go +++ b/internal/codeaware/tokenizer_test.go @@ -1,4 +1,4 @@ -package tok +package codeaware import ( "strings" diff --git a/internal/config/config.go b/internal/config/config.go index e06a0b945..6299a0d50 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -102,25 +102,7 @@ type EntropyFilterConfig struct { // All disabled by default unless a preset/profile enables them. // Field names match the original flat PipelineConfig fields for backward compatibility. type ResearchLayersConfig struct { - EnableDiffAdapt bool `mapstructure:"enable_difft_adapt"` // Difficulty-adaptive pruning - EnableEPiC bool `mapstructure:"enable_epic"` // Causal-edge preservation - EnableSSDP bool `mapstructure:"enable_ssdp"` // ToT branch pruning - EnableAgentOCR bool `mapstructure:"enable_agent_ocr"` // Turn-density compression - EnableS2MAD bool `mapstructure:"enable_s2_mad"` // Agreement collapse - EnableACON bool `mapstructure:"enable_acon"` // Adaptive context optimization - EnableLatentCollab bool `mapstructure:"enable_latent_collab"` // Latent collaboration merge - EnableGraphCoT bool `mapstructure:"enable_graph_cot"` // Graph-CoT compression - EnableRoleBudget bool `mapstructure:"enable_role_budget"` // Role-aware budgeting - EnableSWEAdaptive bool `mapstructure:"enable_swe_adaptive_loop"` // SWE adaptive prune loop - EnableAgentOCRHist bool `mapstructure:"enable_agent_ocr_history"` // AgentOCR history compaction - EnablePlanBudget bool `mapstructure:"enable_plan_budget"` // Plan-and-budget controller - EnableLightMem bool `mapstructure:"enable_lightmem"` // Lightweight memory reuse - EnablePathShorten bool `mapstructure:"enable_path_shorten"` // Path/identifier shortening - EnableJSONSampler bool `mapstructure:"enable_json_sampler"` // JSON statistical sampler - EnableContextCrunch bool `mapstructure:"enable_context_crunch"` // Context crunch (merged log + diff folding) - EnableSearchCrunch bool `mapstructure:"enable_search_crunch"` // Search result dedup stage - EnableStructColl bool `mapstructure:"enable_structural_collapse"` // Structural boilerplate collapse - EnableResearchPack bool `mapstructure:"enable_research_pack"` // One-toggle research bundle + EnableResearchPack bool `mapstructure:"enable_research_pack"` // One-toggle research bundle } // PipelineConfig controls the 20-layer compression pipeline. @@ -195,8 +177,6 @@ type PipelineConfig struct { ExtractiveTailLines int `mapstructure:"extractive_tail_lines"` // Tail lines to preserve ExtractiveSignalLines int `mapstructure:"extractive_signal_lines"` // Signal lines to preserve EnableQualityGuardrail bool `mapstructure:"enable_quality_guardrail"` // Auto-fallback on quality risk - EnablePlannedLayers bool `mapstructure:"enable_planned_layers"` // Enable experimental 30-49 layer pack - // LLM Compaction (Layer 11) - moved to embedded CompactionConfig // Access as: cfg.EnableCompaction, cfg.CompactionThreshold, etc. @@ -251,7 +231,7 @@ type PipelineConfig struct { AgentMemoryExtractFn string `mapstructure:"agent_memory_extract_fn"` // Extraction function type // Research Layers (31-49) - moved to embedded ResearchLayersConfig - // Access as: cfg.EnableDiffAdapt, cfg.EnableEPiC, cfg.ResearchPack, etc. + // Access as: cfg.EnableResearchPack, etc. // Perplexity Filter (Layer 2) detailed settings - moved to embedded PerplexityFilterConfig // Entropy Filter (Layer 1) detailed settings - moved to embedded EntropyFilterConfig @@ -443,7 +423,6 @@ func Defaults() *Config { ExtractiveTailLines: 60, ExtractiveSignalLines: 120, EnableQualityGuardrail: false, - EnablePlannedLayers: false, // Embedded sub-configs CompactionConfig: CompactionConfig{ @@ -629,41 +608,23 @@ type envAlias struct { // envAliasRegistry maps non-standard env var names to their config keys. // Registered once at init time — no allocations on each Load() call. var envAliasRegistry = map[string]envAlias{ - "TOK_DB_PATH": {key: "tracking.database_path", parser: "string"}, - "TOK_TELEMETRY_DISABLED": {key: "tracking.telemetry", parser: "bool-not"}, - "TOK_AUDIT_DIR": {key: "hooks.audit_dir", parser: "string"}, - "TOK_TEE_DIR": {key: "hooks.tee_dir", parser: "string"}, - "TOK_TEE": {key: "hooks.tee_enabled", parser: "bool"}, - "TOK_HOOK_AUDIT": {key: "hooks.audit_enabled", parser: "bool"}, - "TOK_BUDGET": {key: "pipeline.default_budget", parser: "int"}, - "TOK_MODE": {key: "filter.mode", parser: "string"}, - "TOK_PRESET": {key: "pipeline.preset", parser: "string"}, - "TOK_MAX_CONTEXT": {key: "pipeline.max_context_tokens", parser: "int"}, - "TOK_CACHE_SIZE": {key: "pipeline.cache_max_size", parser: "int"}, - "TOK_ENTROPY_THRESHOLD": {key: "pipeline.entropy_threshold", parser: "float"}, - "TOK_COMPACTION": {key: "pipeline.enable_compaction", parser: "bool"}, - "TOK_H2O": {key: "pipeline.enable_h2o", parser: "bool"}, - "TOK_ATTENTION_SINK": {key: "pipeline.enable_attention_sink", parser: "bool"}, - "TOK_DIFF_ADAPT": {key: "pipeline.enable_difft_adapt", parser: "bool"}, - "TOK_EPIC": {key: "pipeline.enable_epic", parser: "bool"}, - "TOK_SSDP": {key: "pipeline.enable_ssdp", parser: "bool"}, - "TOK_AGENT_OCR": {key: "pipeline.enable_agent_ocr", parser: "bool"}, - "TOK_S2_MAD": {key: "pipeline.enable_s2_mad", parser: "bool"}, - "TOK_ACON": {key: "pipeline.enable_acon", parser: "bool"}, - "TOK_LATENT_COLLAB": {key: "pipeline.enable_latent_collab", parser: "bool"}, - "TOK_GRAPH_COT": {key: "pipeline.enable_graph_cot", parser: "bool"}, - "TOK_ROLE_BUDGET": {key: "pipeline.enable_role_budget", parser: "bool"}, - "TOK_SWE_ADAPTIVE": {key: "pipeline.enable_swe_adaptive_loop", parser: "bool"}, - "TOK_AGENT_OCR_HISTORY": {key: "pipeline.enable_agent_ocr_history", parser: "bool"}, - "TOK_PLAN_BUDGET": {key: "pipeline.enable_plan_budget", parser: "bool"}, - "TOK_LIGHTMEM": {key: "pipeline.enable_lightmem", parser: "bool"}, - "TOK_PATH_SHORTEN": {key: "pipeline.enable_path_shorten", parser: "bool"}, - "TOK_JSON_SAMPLER": {key: "pipeline.enable_json_sampler", parser: "bool"}, - "TOK_LOG_CRUNCH": {key: "pipeline.enable_log_crunch", parser: "bool"}, - "TOK_SEARCH_CRUNCH": {key: "pipeline.enable_search_crunch", parser: "bool"}, - "TOK_DIFF_CRUNCH": {key: "pipeline.enable_diff_crunch", parser: "bool"}, - "TOK_STRUCTURAL_COLLAPSE": {key: "pipeline.enable_structural_collapse", parser: "bool"}, - "TOK_RESEARCH_PACK": {key: "pipeline.enable_research_pack", parser: "bool"}, + "TOK_DB_PATH": {key: "tracking.database_path", parser: "string"}, + "TOK_TELEMETRY_DISABLED": {key: "tracking.telemetry", parser: "bool-not"}, + "TOK_AUDIT_DIR": {key: "hooks.audit_dir", parser: "string"}, + "TOK_TEE_DIR": {key: "hooks.tee_dir", parser: "string"}, + "TOK_TEE": {key: "hooks.tee_enabled", parser: "bool"}, + "TOK_HOOK_AUDIT": {key: "hooks.audit_enabled", parser: "bool"}, + "TOK_BUDGET": {key: "pipeline.default_budget", parser: "int"}, + "TOK_MODE": {key: "filter.mode", parser: "string"}, + "TOK_PRESET": {key: "pipeline.preset", parser: "string"}, + "TOK_MAX_CONTEXT": {key: "pipeline.max_context_tokens", parser: "int"}, + "TOK_CACHE_SIZE": {key: "pipeline.cache_max_size", parser: "int"}, + "TOK_ENTROPY_THRESHOLD": {key: "pipeline.entropy_threshold", parser: "float"}, + "TOK_COMPACTION": {key: "pipeline.enable_compaction", parser: "bool"}, + "TOK_H2O": {key: "pipeline.enable_h2o", parser: "bool"}, + "TOK_ATTENTION_SINK": {key: "pipeline.enable_attention_sink", parser: "bool"}, + + "TOK_RESEARCH_PACK": {key: "pipeline.enable_research_pack", parser: "bool"}, } // parseEnvValue converts a raw env string according to the alias parser type. diff --git a/internal/filter/adaptive_context_optimize.go b/internal/filter/adaptive_context_optimize.go deleted file mode 100644 index b37adf870..000000000 --- a/internal/filter/adaptive_context_optimize.go +++ /dev/null @@ -1,158 +0,0 @@ -package filter - -import ( - "math" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "ACON: Optimizing Context Compression for Long-Context LLMs" — ICLR 2026 -// ACONFilter implements adaptive context optimization — dynamically adjusts -// compression based on content complexity and context length. -type ACONFilter struct { - targetRatio float64 -} - -// NewACONFilter creates a new ACON-style context compression filter. -func NewACONFilter() *ACONFilter { - return &ACONFilter{targetRatio: 0.6} -} - -// Apply applies adaptive context compression. -func (f *ACONFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - original := input - tokens := core.EstimateTokens(input) - - complexity := f.complexityScore(input) - - targetRatio := f.targetRatio - if complexity > 0.7 { - targetRatio = 0.8 - } else if complexity < 0.3 { - targetRatio = 0.4 - } - - if mode == ModeAggressive { - targetRatio *= 0.7 - } - - keepCount := int(math.Ceil(float64(tokens) * targetRatio)) - - lines := strings.Split(input, "\n") - type lineInfo struct { - line string - score float64 - idx int - } - - scored := make([]lineInfo, len(lines)) - for i, line := range lines { - scored[i] = lineInfo{line: line, score: f.lineScore(line), idx: i} - } - - for i := 1; i < len(scored); i++ { - for j := i; j > 0 && scored[j].score > scored[j-1].score; j-- { - scored[j], scored[j-1] = scored[j-1], scored[j] - } - } - - keepLines := keepCount / 10 - if keepLines < 1 { - keepLines = 1 - } - if keepLines > len(scored) { - keepLines = len(scored) - } - - kept := make(map[int]string) - for i := 0; i < keepLines; i++ { - kept[scored[i].idx] = scored[i].line - } - - var result []string - for i := 0; i < len(lines); i++ { - if l, ok := kept[i]; ok { - result = append(result, l) - } - } - - if len(result) == 0 { - return input, 0 - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(original) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func (f *ACONFilter) complexityScore(input string) float64 { - lines := strings.Split(input, "\n") - if len(lines) == 0 { - return 0 - } - - uniqueWords := make(map[string]bool) - totalWords := 0 - for _, line := range lines { - words := strings.Fields(line) - for _, w := range words { - uniqueWords[strings.ToLower(w)] = true - totalWords++ - } - } - - if totalWords == 0 { - return 0 - } - - return float64(len(uniqueWords)) / float64(totalWords) -} - -func (f *ACONFilter) lineScore(line string) float64 { - score := 0.0 - trimmed := strings.TrimSpace(line) - - if trimmed == "" { - return 0 - } - - if strings.ContainsAny(trimmed, "{}[]()") { - score += 2.0 - } - - lower := strings.ToLower(trimmed) - for _, kw := range []string{ - "error", "fail", "panic", "func", "class", "type", - "import", "return", "struct", "interface", - } { - if strings.Contains(lower, kw) { - score += 1.5 - } - } - - for _, c := range trimmed { - if c >= '0' && c <= '9' { - score += 0.5 - break - } - } - - if strings.Contains(trimmed, "/") { - score += 1.0 - } - - score += float64(len(trimmed)) / 100.0 - - return score -} - -// Name returns the layer name. -func (f *ACONFilter) Name() string { return "36_acon" } diff --git a/internal/filter/agent_density_compress.go b/internal/filter/agent_density_compress.go deleted file mode 100644 index d805617d6..000000000 --- a/internal/filter/agent_density_compress.go +++ /dev/null @@ -1,183 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "AgentOCR: Content-Density Aware Compression for Multi-Turn Agent Trajectories" -// arXiv 2026 -// -// AgentOCRFilter operates on multi-turn agent outputs (tool call sequences, conversation turns). -// It measures the "content density" of each turn — the ratio of information-bearing lines -// to total lines — and collapses low-density turns into a single summary stub while -// preserving high-density turns in full. -// -// Content density signals: -// - Information-bearing: error lines, code lines, unique-term-rich lines -// - Filler: empty lines, repeated terms, pure-whitespace lines -// -// Turn detection: turns are separated by patterns like: -// - "Human:", "Assistant:", "User:", "Agent:", "System:" -// - "", "", markdown "## Turn N" -// -// Density thresholds: -// - Low density (<0.30): collapse to "[Turn summary: N lines, M tokens]" -// - Medium density (0.30–0.65): keep first+last few lines + omission marker -// - High density (>0.65): preserve fully -type AgentOCRFilter struct { - lowDensityThreshold float64 - highDensityThreshold float64 - contextLines int // lines to keep at start/end of medium-density turns -} - -// NewAgentOCRFilter creates a new agent turn content-density filter. -func NewAgentOCRFilter() *AgentOCRFilter { - return &AgentOCRFilter{ - lowDensityThreshold: 0.30, - highDensityThreshold: 0.65, - contextLines: 3, - } -} - -// Name returns the filter name. -func (f *AgentOCRFilter) Name() string { return "34_agent_ocr" } - -// Apply collapses low-density agent turns, trims medium-density ones. -func (f *AgentOCRFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - - turns := f.parseTurns(lines) - if len(turns) < 2 { - return input, 0 - } - - lowThresh := f.lowDensityThreshold - highThresh := f.highDensityThreshold - if mode == ModeAggressive { - lowThresh = 0.45 - highThresh = 0.75 - } - - var resultLines []string - changed := false - - for _, t := range turns { - turnLines := lines[t.start : t.end+1] - density := f.contentDensity(turnLines) - - switch { - case density >= highThresh: - // High density: preserve fully - resultLines = append(resultLines, turnLines...) - case density >= lowThresh: - // Medium density: keep head + tail + marker - ctx := f.contextLines - if len(turnLines) <= ctx*2+1 { - resultLines = append(resultLines, turnLines...) - } else { - resultLines = append(resultLines, turnLines[:ctx]...) - omitted := len(turnLines) - ctx*2 - resultLines = append(resultLines, "[... "+itoa(omitted)+" lines omitted (density="+aocFmtPct(density)+") ...]") - resultLines = append(resultLines, turnLines[len(turnLines)-ctx:]...) - changed = true - } - default: - // Low density: collapse to stub - tokens := core.EstimateTokens(strings.Join(turnLines, "\n")) - resultLines = append(resultLines, turnLines[0]) // keep the header line - resultLines = append(resultLines, "[collapsed: "+itoa(len(turnLines)-1)+" lines / ~"+itoa(tokens)+" tokens (low density)]") - changed = true - } - } - - if !changed { - return input, 0 - } - - output := strings.Join(resultLines, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -type agentTurn struct{ start, end int } - -var aocTurnHeaders = []string{ - "human:", "assistant:", "user:", "agent:", "system:", - "", "", "", "", - "## turn ", "# turn ", -} - -// parseTurns splits lines into agent turns by header patterns. -func (f *AgentOCRFilter) parseTurns(lines []string) []agentTurn { - var headers []int - for i, line := range lines { - lower := strings.ToLower(strings.TrimSpace(line)) - for _, h := range aocTurnHeaders { - if strings.HasPrefix(lower, h) { - headers = append(headers, i) - break - } - } - } - - if len(headers) < 2 { - return nil - } - - var turns []agentTurn - for k := 0; k < len(headers); k++ { - end := len(lines) - 1 - if k+1 < len(headers) { - end = headers[k+1] - 1 - } - turns = append(turns, agentTurn{headers[k], end}) - } - return turns -} - -// contentDensity measures the fraction of information-bearing lines in a turn. -func (f *AgentOCRFilter) contentDensity(lines []string) float64 { - if len(lines) == 0 { - return 1.0 - } - infoBearing := 0 - for _, line := range lines { - if f.isInfoBearing(line) { - infoBearing++ - } - } - return float64(infoBearing) / float64(len(lines)) -} - -// isInfoBearing returns true if a line carries substantive information. -func (f *AgentOCRFilter) isInfoBearing(line string) bool { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - return false - } - if isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { - return true - } - // Lines with ≥4 distinct tokens are likely substantive - terms := ltTokenize(line) - unique := make(map[string]bool) - for _, t := range terms { - unique[t] = true - } - return len(unique) >= 4 -} - -// aocFmtPct formats a float as a short percentage string. -func aocFmtPct(f float64) string { - pct := int(f * 100) - return itoa(pct) + "%" -} diff --git a/internal/filter/agent_history_compress.go b/internal/filter/agent_history_compress.go deleted file mode 100644 index cb39e8de9..000000000 --- a/internal/filter/agent_history_compress.go +++ /dev/null @@ -1,124 +0,0 @@ -package filter - -import ( - "sort" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// AgentOCRHistoryFilter compacts older conversation turns while preserving recent turns. -type AgentOCRHistoryFilter struct { - recentTurns int -} - -// NewAgentOCRHistoryFilter creates the history-focused AgentOCR extension. -func NewAgentOCRHistoryFilter() *AgentOCRHistoryFilter { - return &AgentOCRHistoryFilter{recentTurns: 3} -} - -// Name returns the filter name. -func (f *AgentOCRHistoryFilter) Name() string { return "41_agent_ocr_history" } - -// Apply compresses old low-density turns and preserves recent high-signal turns. -func (f *AgentOCRHistoryFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - turns := parseRoleTurns(lines) - if len(turns) < 4 { - return input, 0 - } - - keepRecent := f.recentTurns - if mode == ModeAggressive { - keepRecent = 2 - } - cut := len(turns) - keepRecent - if cut < 1 { - cut = 1 - } - - out := make([]string, 0, len(lines)) - changed := false - for i, t := range turns { - seg := lines[t.start : t.end+1] - if i >= cut { - out = append(out, seg...) - continue - } - - out = append(out, lines[t.start]) - kept := ocrHistoryTopLines(seg[1:], mode) - if len(kept) > 0 { - out = append(out, kept...) - } - omitted := len(seg) - 1 - len(kept) - if omitted > 0 { - out = append(out, "[agent-ocr-history: "+itoa(omitted)+" lines compacted]") - changed = true - } - } - - if !changed { - return input, 0 - } - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func ocrHistoryTopLines(lines []string, mode Mode) []string { - if len(lines) == 0 { - return nil - } - type cand struct { - idx int - score float64 - } - cands := make([]cand, 0, len(lines)) - for i, line := range lines { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - continue - } - score := 0.0 - if isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { - score += 2.0 - } - if strings.ContainsAny(line, ":=/") { - score += 0.7 - } - score += float64(len(ltTokenize(line))) / 10.0 - cands = append(cands, cand{idx: i, score: score}) - } - - if len(cands) == 0 { - return nil - } - sort.Slice(cands, func(i, j int) bool { return cands[i].score > cands[j].score }) - - limit := 2 - if mode == ModeAggressive { - limit = 1 - } - if limit > len(cands) { - limit = len(cands) - } - - pick := make(map[int]bool, limit) - for i := 0; i < limit; i++ { - pick[cands[i].idx] = true - } - out := make([]string, 0, limit) - for i, line := range lines { - if pick[i] { - out = append(out, line) - } - } - return out -} diff --git a/internal/filter/auto_content_compress.go b/internal/filter/auto_content_compress.go deleted file mode 100644 index a00e7ac24..000000000 --- a/internal/filter/auto_content_compress.go +++ /dev/null @@ -1,109 +0,0 @@ -package filter - -import ( - "regexp" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// ContextCrunchFilter combines Layer 46 (LogCrunch) and Layer 48 (DiffCrunch) -// into a unified context-folding layer that auto-detects content type. -// -// This merged layer: -// - Auto-detects if input is logs or diffs -// - Applies appropriate folding strategy -// - Handles both types with unified logic -type ContextCrunchFilter struct { - logCrunch *LogCrunchFilter - diffCrunch *DiffCrunchFilter -} - -var ( - ccDiffPattern = regexp.MustCompile(`^(diff --git|@@|\+\+\+|---) `) - ccLogPattern = regexp.MustCompile(`(?i)\b(info|debug|warn|error|fatal)\b.*\d{4}-\d{2}-\d{2}`) - ccTimestampPattern = regexp.MustCompile(`\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}`) -) - -// NewContextCrunchFilter creates a new context crunch filter. -// This replaces both NewLogCrunchFilter() and NewDiffCrunchFilter(). -func NewContextCrunchFilter() *ContextCrunchFilter { - return &ContextCrunchFilter{ - logCrunch: NewLogCrunchFilter(), - diffCrunch: NewDiffCrunchFilter(), - } -} - -// Name returns the filter name. -func (c *ContextCrunchFilter) Name() string { return "46_context_crunch" } - -// ContextContentType represents the detected content type for context crunching. -type ContextContentType int - -const ( - ContextContentTypeUnknown ContextContentType = iota - ContextContentTypeLog - ContextContentTypeDiff -) - -// Apply auto-detects content type and applies appropriate folding. -func (c *ContextCrunchFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 20 { - return input, 0 - } - - // Auto-detect content type - contentType := c.detectContentType(lines) - - switch contentType { - case ContextContentTypeDiff: - return c.diffCrunch.Apply(input, mode) - case ContextContentTypeLog: - return c.logCrunch.Apply(input, mode) - default: - // Try both and use the one that saves more tokens - logOutput, logSaved := c.logCrunch.Apply(input, mode) - diffOutput, diffSaved := c.diffCrunch.Apply(input, mode) - - if logSaved > diffSaved { - return logOutput, logSaved - } - return diffOutput, diffSaved - } -} - -// detectContentType analyzes input to determine if it's logs or diffs. -func (c *ContextCrunchFilter) detectContentType(lines []string) ContextContentType { - diffIndicators := 0 - logIndicators := 0 - - for _, line := range lines { - if ccDiffPattern.MatchString(line) { - diffIndicators++ - } - if ccLogPattern.MatchString(line) || ccTimestampPattern.MatchString(line) { - logIndicators++ - } - } - - // Need at least 2 diff indicators to be confident - if diffIndicators >= 2 { - return ContextContentTypeDiff - } - // Need at least 3 log indicators - if logIndicators >= 3 { - return ContextContentTypeLog - } - - return ContextContentTypeUnknown -} - -// EstimateTokens provides token estimation for the filter. -func (c *ContextCrunchFilter) EstimateTokens(text string) int { - return core.EstimateTokens(text) -} diff --git a/internal/filter/causal_edge_preserve.go b/internal/filter/causal_edge_preserve.go deleted file mode 100644 index 0428a9867..000000000 --- a/internal/filter/causal_edge_preserve.go +++ /dev/null @@ -1,160 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "EPiC: Effective Prompting for Imitation-based Condensation of Long CoT Traces" -// arXiv:2505.xxxxx — 2025 -// -// EPiCFilter identifies "causal edge" lines in chain-of-thought reasoning traces — -// lines that explicitly reference or build upon conclusions from prior steps — -// and protects them from compression. -// -// Causal connectives are the load-bearing joints of a reasoning chain: -// - "therefore", "thus", "so", "hence", "consequently" -// - "because", "since", "given that", "due to" -// - "this means", "which implies", "it follows that" -// - "building on", "using the result from", "from step N" -// -// Without these connectives, a compressed trace loses its logical continuity. -// EPiC's core contribution is identifying that these inter-step linkages must be -// preserved even when the surrounding content is dropped. -// -// Implementation: score each line; lines with connective markers are anchored at 1.0. -// Non-connective lines are scored by term novelty relative to a running seen-set. -// Lines below the novelty threshold are dropped. -type EPiCFilter struct { - noveltyThreshold float64 // min fraction of new terms to retain a line -} - -// NewEPiCFilter creates a new EPiC causal-edge preservation filter. -func NewEPiCFilter() *EPiCFilter { - return &EPiCFilter{ - noveltyThreshold: 0.35, - } -} - -// Name returns the filter name. -func (f *EPiCFilter) Name() string { return "32_epic" } - -// Apply identifies causal edges and drops low-novelty non-causal lines. -func (f *EPiCFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 6 { - return input, 0 - } - - // Only apply to reasoning-heavy content - if !epicLooksLikeCoT(lines) { - return input, 0 - } - - threshold := f.noveltyThreshold - if mode == ModeAggressive { - threshold = 0.50 - } - - seen := make(map[string]bool) - var result []string - - for _, line := range lines { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - result = append(result, line) - continue - } - - // Always preserve structural anchors - if isErrorLine(line) || isWarningLine(line) || isHeadingLine(line) || isCodeLine(line) { - result = append(result, line) - epicAddTerms(seen, line) - continue - } - - // Causal edge lines are always preserved - if epicIsCausalEdge(trimmed) { - result = append(result, line) - epicAddTerms(seen, line) - continue - } - - // Score novelty: fraction of this line's terms not yet seen - terms := ltTokenize(line) - if len(terms) == 0 { - result = append(result, line) - continue - } - newTerms := 0 - for _, t := range terms { - if !seen[t] { - newTerms++ - } - } - novelty := float64(newTerms) / float64(len(terms)) - - if novelty >= threshold { - result = append(result, line) - epicAddTerms(seen, line) - } - } - - if len(result) == len(lines) { - return input, 0 - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// epicIsCausalEdge returns true if the line contains a causal/logical connective. -func epicIsCausalEdge(line string) bool { - lower := strings.ToLower(line) - causalMarkers := []string{ - "therefore", "thus,", "hence,", "so,", "consequently", - "because ", "since ", "given that", "due to", - "this means", "which implies", "it follows", - "building on", "from step ", "from the previous", - "using this", "using the result", "we can conclude", - "in conclusion", "as a result", "for this reason", - } - for _, marker := range causalMarkers { - if strings.Contains(lower, marker) { - return true - } - } - return false -} - -// epicLooksLikeCoT returns true if the input has reasoning-trace characteristics. -func epicLooksLikeCoT(lines []string) bool { - causalCount := 0 - stepCount := 0 - for _, line := range lines { - lower := strings.ToLower(strings.TrimSpace(line)) - if epicIsCausalEdge(lower) { - causalCount++ - } - if isReasoningLine(line) { - stepCount++ - } - } - return causalCount >= 2 || stepCount >= 3 -} - -// epicAddTerms adds all tokens from a line to the seen set. -func epicAddTerms(seen map[string]bool, line string) { - for _, t := range ltTokenize(line) { - seen[t] = true - } -} diff --git a/internal/filter/chain_of_thought_compress.go b/internal/filter/chain_of_thought_compress.go deleted file mode 100644 index 80a55c21e..000000000 --- a/internal/filter/chain_of_thought_compress.go +++ /dev/null @@ -1,206 +0,0 @@ -package filter - -import ( - "regexp" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "TokenSkip: Controllable Chain-of-Thought Compression" -// arXiv:2502.12067 — 2025 -// -// CoTCompressFilter detects chain-of-thought reasoning traces in output and -// applies token-budget-controlled compression: -// - ModeMinimal: truncate CoT to first 30% + summary marker -// - ModeAggressive: replace entire CoT block with a token-count stub -// -// Applicable when tok wraps tools that emit LLM reasoning output -// (e.g. claude --verbose, agent traces, reasoning model output). -// -// Patterns detected: -// - XML-style: ..., ... -// - Markdown step blocks: "Step 1:", "Let me think", numbered reasoning -// - Reflection loops: "Wait,", "Actually,", "Let me reconsider" -type CoTCompressFilter struct { - xmlThinkRe *regexp.Regexp - xmlReasoningRe *regexp.Regexp - stepPrefixRe *regexp.Regexp - reflectionRe *regexp.Regexp - minBlockLines int // minimum CoT block size before compressing -} - -// NewCoTCompressFilter creates a new TokenSkip-inspired chain-of-thought compressor. -func NewCoTCompressFilter() *CoTCompressFilter { - return &CoTCompressFilter{ - xmlThinkRe: regexp.MustCompile(`(?s)(.*?)`), - xmlReasoningRe: regexp.MustCompile(`(?s)(.*?)`), - stepPrefixRe: regexp.MustCompile(`(?i)^(step\s+\d+[:.)]|let me (think|consider|analyze)|firstly,|secondly,|thirdly,|finally,)`), - reflectionRe: regexp.MustCompile(`(?i)^(wait,|actually,|let me reconsider|hmm,|on second thought)`), - minBlockLines: 4, - } -} - -// Name returns the filter name. -func (f *CoTCompressFilter) Name() string { return "23_cot_compress" } - -// Apply compresses chain-of-thought blocks according to mode. -func (f *CoTCompressFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - output := input - - // Handle XML-style think blocks first (most common in modern LLM output) - output = f.compressXMLBlocks(output, mode) - - // Handle markdown-style reasoning sections - output = f.compressMarkdownCoT(output, mode) - - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func (f *CoTCompressFilter) compressXMLBlocks(input string, mode Mode) string { - for _, re := range []*regexp.Regexp{f.xmlThinkRe, f.xmlReasoningRe} { - input = re.ReplaceAllStringFunc(input, func(match string) string { - // Extract inner content - inner := re.FindStringSubmatch(match) - if len(inner) < 2 { - return match - } - content := inner[1] - toks := core.EstimateTokens(content) - - if mode == ModeAggressive { - return "[thinking: " + tokLabel(toks) + " compressed]" - } - - // ModeMinimal: keep first 30% - lines := strings.Split(strings.TrimSpace(content), "\n") - if len(lines) < f.minBlockLines { - return match - } - keep := len(lines) * 30 / 100 - if keep < 2 { - keep = 2 - } - tag := xmlTagName(re) - truncated := strings.Join(lines[:keep], "\n") - return "<" + tag + ">\n" + truncated + "\n[... " + tokLabel(toks*70/100) + " omitted]\n" - }) - } - return input -} - -func (f *CoTCompressFilter) compressMarkdownCoT(input string, mode Mode) string { - lines := strings.Split(input, "\n") - - // Find runs of reasoning lines (step prefixes or reflection markers) - type run struct{ start, end int } - var runs []run - inRun := false - runStart := 0 - - for i, line := range lines { - trimmed := strings.TrimSpace(line) - isReasoning := f.stepPrefixRe.MatchString(trimmed) || f.reflectionRe.MatchString(trimmed) - if isReasoning && !inRun { - inRun = true - runStart = i - } else if !isReasoning && trimmed == "" && inRun { - // Blank line ends a reasoning run - if i-runStart >= f.minBlockLines { - runs = append(runs, run{runStart, i - 1}) - } - inRun = false - } - } - if inRun && len(lines)-runStart >= f.minBlockLines { - runs = append(runs, run{runStart, len(lines) - 1}) - } - - if len(runs) == 0 { - return input - } - - suppress := make(map[int]bool) - annotation := make(map[int]string) - - for _, r := range runs { - block := lines[r.start : r.end+1] - toks := core.EstimateTokens(strings.Join(block, "\n")) - - if mode == ModeAggressive { - // Replace entire run with a single stub line - annotation[r.start] = "[reasoning: " + tokLabel(toks) + " compressed]" - for i := r.start + 1; i <= r.end; i++ { - suppress[i] = true - } - } else { - // ModeMinimal: keep first 30%, suppress the rest - keep := (r.end - r.start + 1) * 30 / 100 - if keep < 2 { - keep = 2 - } - cutoff := r.start + keep - omitted := r.end - cutoff + 1 - if omitted > 0 { - annotation[cutoff] = "[... " + tokLabel(toks*(100-30)/100) + " reasoning omitted]" - for i := cutoff + 1; i <= r.end; i++ { - suppress[i] = true - } - } - } - } - - var result []string - for i, line := range lines { - if suppress[i] { - continue - } - if ann, ok := annotation[i]; ok { - if mode == ModeAggressive { - result = append(result, ann) - } else { - result = append(result, line) - result = append(result, ann) - } - continue - } - result = append(result, line) - } - - return strings.Join(result, "\n") -} - -func tokLabel(n int) string { - if n < 1000 { - return "~" + cotItoa(n) + " tok" - } - return "~" + cotItoa(n/1000) + "k tok" -} - -func cotItoa(n int) string { - if n == 0 { - return "0" - } - buf := make([]byte, 0, 10) - for n > 0 { - buf = append([]byte{byte('0' + n%10)}, buf...) - n /= 10 - } - return string(buf) -} - -func xmlTagName(re *regexp.Regexp) string { - src := re.String() - if strings.Contains(src, "reasoning") { - return "reasoning" - } - return "think" -} diff --git a/internal/filter/coding_agent_context.go b/internal/filter/coding_agent_context.go deleted file mode 100644 index 9bfb9f877..000000000 --- a/internal/filter/coding_agent_context.go +++ /dev/null @@ -1,366 +0,0 @@ -package filter - -import ( - "math" - "sort" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "SWE-Pruner: Self-Adaptive Context Pruning for Coding Agents" -// arXiv:2601.16746 — Wang et al., Shanghai Jiao Tong, 2026 -// -// CodingAgentContextFilter specialises context pruning for the structured -// tool outputs that coding agents (Claude Code, Cursor, etc.) receive: -// file reads, bash output, search results, git diffs, test output, compile logs. -// -// Unlike the general GoalDrivenFilter (CRF scoring against query terms), -// this filter is structure-aware: it identifies the output type and applies -// a type-specific compression strategy, then self-adjusts the compression -// ratio based on observed output density. -// -// Type-specific strategies: -// -// file_read — elide unchanged middle sections, keep head+tail -// bash_output — keep last N lines (most recent = most relevant) -// search_hits — one result per unique file path -// git_diff — keep ±-lines only, drop context lines in aggressive mode -// test_output — keep FAIL/PASS summary + failing assertions -// compile_log — keep error/warning lines, collapse repeated warnings -type CodingAgentContextFilter struct { - headLines int // lines to keep at head of file reads - tailLines int // lines to keep at tail of bash output - maxResults int // max search results per file path - baseRatio float64 // baseline keep ratio for unlabeled output -} - -// NewCodingAgentContextFilter creates a self-adaptive coding agent context filter. -func NewCodingAgentContextFilter() *CodingAgentContextFilter { - return &CodingAgentContextFilter{ - headLines: 30, - tailLines: 50, - maxResults: 3, - baseRatio: 0.6, - } -} - -// Name returns the filter name. -func (f *CodingAgentContextFilter) Name() string { return "24_coding_agent_ctx" } - -// Apply detects output type and applies the appropriate compression strategy. -func (f *CodingAgentContextFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 8 { - return input, 0 - } - - outputType := f.detectType(lines) - - var output string - switch outputType { - case "git_diff": - output = f.compressDiff(lines, mode) - case "test_output": - output = f.compressTestOutput(lines, mode) - case "compile_log": - output = f.compressCompileLog(lines, mode) - case "bash_output": - output = f.compressBashOutput(lines, mode) - case "search_hits": - output = f.compressSearchHits(lines, mode) - case "file_read": - output = f.compressFileRead(lines, mode) - default: - output = f.compressGeneric(lines, mode) - } - - if output == "" || output == input { - return input, 0 - } - - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// detectType identifies the output type from leading lines. -func (f *CodingAgentContextFilter) detectType(lines []string) string { - head := strings.Join(firstN(lines, 10), "\n") - headL := strings.ToLower(head) - - if strings.Contains(head, "diff --git") || strings.HasPrefix(lines[0], "--- ") { - return "git_diff" - } - if strings.Contains(headL, "=== run") || strings.Contains(headL, "--- fail") || - strings.Contains(headL, "--- pass") || strings.Contains(headL, "test session starts") || - strings.Contains(headL, "running ") && strings.Contains(headL, "test") { - return "test_output" - } - if strings.Contains(headL, "error[e") || strings.Contains(headL, "compiling ") || - strings.Contains(headL, "building [") || strings.Contains(headL, ": error:") { - return "compile_log" - } - // Search results: lines starting with "filepath:linenum:content" - if looksLikeSearchResults(lines) { - return "search_hits" - } - // File read: lots of indented/code lines, no diff markers - if isLikelyFileRead(lines) { - return "file_read" - } - // Long bash output: no special structure - if len(lines) > 30 { - return "bash_output" - } - return "generic" -} - -// compressDiff keeps +/- lines; drops @@ context lines in aggressive mode. -func (f *CodingAgentContextFilter) compressDiff(lines []string, mode Mode) string { - var result []string - for _, line := range lines { - if strings.HasPrefix(line, "diff ") || strings.HasPrefix(line, "index ") || - strings.HasPrefix(line, "--- ") || strings.HasPrefix(line, "+++ ") { - result = append(result, line) - continue - } - if strings.HasPrefix(line, "@@") { - if mode != ModeAggressive { - result = append(result, line) - } - continue - } - if strings.HasPrefix(line, "+") || strings.HasPrefix(line, "-") { - result = append(result, line) - continue - } - // Context line - if mode != ModeAggressive { - result = append(result, line) - } - } - return strings.Join(result, "\n") -} - -// compressTestOutput keeps FAIL summary + failing test names + assertion lines. -func (f *CodingAgentContextFilter) compressTestOutput(lines []string, mode Mode) string { - var result []string - inFailBlock := false - - for _, line := range lines { - lower := strings.ToLower(line) - isSummary := strings.Contains(lower, "passed") || strings.Contains(lower, "failed") || - strings.Contains(lower, "ok") && strings.Contains(lower, "test") || - strings.HasPrefix(lower, "failures:") || strings.HasPrefix(lower, "test result") - isFailLine := strings.Contains(lower, "fail") || strings.Contains(lower, "panic") || - strings.Contains(lower, "assert") || strings.Contains(lower, "expected") || - strings.Contains(lower, "got ") || strings.HasPrefix(line, "---") || - strings.HasPrefix(line, "FAIL") || strings.HasPrefix(line, "--- FAIL") - - if isSummary { - result = append(result, line) - inFailBlock = false - continue - } - if isFailLine { - result = append(result, line) - inFailBlock = true - continue - } - if inFailBlock && mode != ModeAggressive { - result = append(result, line) // context within fail block - } - } - return strings.Join(result, "\n") -} - -// compressCompileLog keeps error/warning lines; collapses repeated warnings. -func (f *CodingAgentContextFilter) compressCompileLog(lines []string, mode Mode) string { - var result []string - seenWarnings := make(map[string]int) // pattern → count - - for _, line := range lines { - lower := strings.ToLower(line) - isErr := strings.Contains(lower, "error") || strings.Contains(lower, "fatal") - isWarn := strings.Contains(lower, "warning") || strings.Contains(lower, "warn:") - isNote := strings.Contains(lower, "note:") || strings.Contains(lower, "help:") - isSummary := strings.HasPrefix(lower, "error[") || strings.Contains(lower, "aborting due") || - strings.Contains(lower, "build failed") || strings.Contains(lower, "build finished") - - if isErr || isSummary { - result = append(result, line) - continue - } - if isWarn { - // Normalise warning to its pattern (strip line numbers) - pattern := warnPattern(line) - seenWarnings[pattern]++ - if seenWarnings[pattern] == 1 { - result = append(result, line) - } else if seenWarnings[pattern] == 2 && mode != ModeAggressive { - result = append(result, line+" [repeated]") - } - continue - } - if isNote && mode != ModeAggressive { - result = append(result, line) - } - } - return strings.Join(result, "\n") -} - -// compressBashOutput keeps last tailLines lines (most recent = most relevant). -func (f *CodingAgentContextFilter) compressBashOutput(lines []string, mode Mode) string { - keep := f.tailLines - if mode == ModeAggressive { - keep /= 2 - } - if len(lines) <= keep { - return strings.Join(lines, "\n") - } - omitted := len(lines) - keep - stub := "[... " + itoa(omitted) + " lines omitted]\n" - return stub + strings.Join(lines[len(lines)-keep:], "\n") -} - -// compressSearchHits keeps at most maxResults matches per file path. -func (f *CodingAgentContextFilter) compressSearchHits(lines []string, mode Mode) string { - maxPer := f.maxResults - if mode == ModeAggressive { - maxPer = 1 - } - fileCounts := make(map[string]int) - var result []string - for _, line := range lines { - path := extractFilePath(line) - if path == "" { - result = append(result, line) - continue - } - fileCounts[path]++ - if fileCounts[path] <= maxPer { - result = append(result, line) - } - } - return strings.Join(result, "\n") -} - -// compressFileRead keeps head + tail, elides middle with a stub. -func (f *CodingAgentContextFilter) compressFileRead(lines []string, mode Mode) string { - head := f.headLines - tail := f.headLines - if mode == ModeAggressive { - head /= 2 - tail /= 2 - } - total := head + tail - if len(lines) <= total { - return strings.Join(lines, "\n") - } - omitted := len(lines) - total - stub := "... (" + itoa(omitted) + " lines omitted) ..." - result := make([]string, 0, head+tail+1) - result = append(result, lines[:head]...) - result = append(result, stub) - result = append(result, lines[len(lines)-tail:]...) - return strings.Join(result, "\n") -} - -// compressGeneric applies a simple keep-ratio to unlabeled output. -func (f *CodingAgentContextFilter) compressGeneric(lines []string, mode Mode) string { - ratio := f.baseRatio - if mode == ModeAggressive { - ratio *= 0.6 - } - keep := int(math.Ceil(float64(len(lines)) * ratio)) - if keep >= len(lines) { - return strings.Join(lines, "\n") - } - // Keep structurally important lines first, then fill to budget - type scored struct { - idx int - score float64 - } - scores := make([]scored, len(lines)) - for i, line := range lines { - scores[i] = scored{idx: i, score: structuralBonus(line)} - } - sort.Slice(scores, func(a, b int) bool { return scores[a].score > scores[b].score }) - - kept := make(map[int]bool) - for _, s := range scores[:keep] { - kept[s.idx] = true - } - var result []string - for i, line := range lines { - if kept[i] { - result = append(result, line) - } - } - return strings.Join(result, "\n") -} - -// -- helpers -- - -func firstN(lines []string, n int) []string { - if n > len(lines) { - n = len(lines) - } - return lines[:n] -} - -func looksLikeSearchResults(lines []string) bool { - matches := 0 - for _, line := range firstN(lines, 15) { - if colonIdx := strings.Index(line, ":"); colonIdx > 0 { - prefix := line[:colonIdx] - if strings.Contains(prefix, "/") || strings.HasSuffix(prefix, ".go") || - strings.HasSuffix(prefix, ".rs") || strings.HasSuffix(prefix, ".ts") { - matches++ - } - } - } - return matches >= 3 -} - -func isLikelyFileRead(lines []string) bool { - indented := 0 - for _, line := range firstN(lines, 20) { - if strings.HasPrefix(line, "\t") || strings.HasPrefix(line, " ") { - indented++ - } - } - return indented >= 8 -} - -func warnPattern(line string) string { - // Strip digits to normalize warning patterns - var b strings.Builder - for _, ch := range line { - if ch >= '0' && ch <= '9' { - b.WriteRune('#') - } else { - b.WriteRune(ch) - } - } - return b.String() -} - -func extractFilePath(line string) string { - idx := strings.Index(line, ":") - if idx <= 0 { - return "" - } - prefix := line[:idx] - if strings.Contains(prefix, "/") || strings.Contains(prefix, ".") { - return prefix - } - return "" -} diff --git a/internal/filter/content_route_strategy.go b/internal/filter/content_route_strategy.go index 7117f7e91..5c6327407 100644 --- a/internal/filter/content_route_strategy.go +++ b/internal/filter/content_route_strategy.go @@ -86,7 +86,7 @@ func (p *PipelineCoordinator) applyExtractivePrefilter(input string) (string, in continue } l := strings.ToLower(line) - if isErrorLine(line) || isWarningLine(line) || isCodeLine(line) || isReasoningLine(line) || epicIsCausalEdge(line) { + if isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { keep[i] = true signals++ continue diff --git a/internal/filter/critical_action_retain.go b/internal/filter/critical_action_retain.go deleted file mode 100644 index 869548e09..000000000 --- a/internal/filter/critical_action_retain.go +++ /dev/null @@ -1,216 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "CARL: Critical Action Focused Reinforcement Learning for Multi-Step Agent" -// arXiv:2512.04949 — 2025 -// -// CARLFilter identifies "critical" vs "non-critical" tool-call entries in -// agent output sequences and drops the non-critical ones. -// -// Criticality is defined as: did this action cause an observable state change? -// -// Critical: error output, file writes/deletes, test failures, non-empty diffs, -// non-zero exit codes, assertion failures, CRUD operations -// Non-critical: empty results, successful no-ops, pure info queries, -// repeated identical results, health checks -// -// CARL's key insight (from RL perspective): in a long agent trajectory, most -// actions are "maintenance" (checking state, listing files, echoing info) and -// carry no new causal information. Keeping only critical actions and their -// immediate context preserves the trajectory's causal skeleton at a fraction -// of the token cost. -type CARLFilter struct { - criticalPatterns []string - nonCriticalPatterns []string - contextLines int // lines of context to keep around critical entries - entryHeaderRe []string -} - -// NewCARLFilter creates a new CARL critical-action filter. -func NewCARLFilter() *CARLFilter { - return &CARLFilter{ - criticalPatterns: []string{ - "error", "fail", "failed", "exception", "panic", "fatal", - "assert", "expected", "got ", "mismatch", "undefined", - "permission denied", "no such file", "not found", - "exit code", "exit status", "returncode", - "created", "deleted", "removed", "written", "saved", - "diff --git", "--- a/", "+++ b/", "@@ -", "@@ +", - "test failed", "assertion failed", - }, - nonCriticalPatterns: []string{ - "(no output)", "(empty)", "total 0", - "nothing to commit", "up to date", - "ok\n", "ok \t", - "200 ok", "status: ok", "health: ok", - "already exists", "already up to date", - }, - contextLines: 2, - entryHeaderRe: []string{ - "tool:", "result:", "output:", "stdout:", "stderr:", - "", "", - }, - } -} - -// Name returns the filter name. -func (f *CARLFilter) Name() string { return "29_carl" } - -// Apply drops non-critical agent tool-call entries. -func (f *CARLFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - - // If not agent-like output, skip - if !f.looksLikeAgentOutput(lines) { - return input, 0 - } - - entries := f.parseEntries(lines) - if len(entries) < 2 { - return input, 0 - } - - critThreshold := 0.3 - if mode == ModeAggressive { - critThreshold = 0.5 - } - - suppress := make(map[int]bool) - for _, e := range entries { - score := f.criticalityScore(lines[e.start : e.end+1]) - if score < critThreshold { - for i := e.start; i <= e.end; i++ { - suppress[i] = true - } - } - } - - if len(suppress) == 0 { - return input, 0 - } - - var result []string - for i, line := range lines { - if !suppress[i] { - result = append(result, line) - } - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -type agentEntry struct{ start, end int } - -// parseEntries segments the output into tool-call result blocks. -func (f *CARLFilter) parseEntries(lines []string) []agentEntry { - var entries []agentEntry - inEntry := false - start := 0 - - for i, line := range lines { - lower := strings.ToLower(strings.TrimSpace(line)) - isHeader := false - for _, h := range f.entryHeaderRe { - if strings.HasPrefix(lower, h) { - isHeader = true - break - } - } - if isHeader { - if inEntry && i > start { - entries = append(entries, agentEntry{start, i - 1}) - } - start = i - inEntry = true - } - } - if inEntry { - entries = append(entries, agentEntry{start, len(lines) - 1}) - } - return entries -} - -// criticalityScore returns 0.0..1.0 for how critical an entry is. -func (f *CARLFilter) criticalityScore(lines []string) float64 { - if len(lines) == 0 { - return 0 - } - - text := strings.ToLower(strings.Join(lines, "\n")) - - // Non-critical patterns immediately signal low criticality - for _, p := range f.nonCriticalPatterns { - if strings.Contains(text, p) { - return 0.05 - } - } - - // Empty-ish entries - contentLines := 0 - for _, line := range lines { - if strings.TrimSpace(line) != "" { - contentLines++ - } - } - if contentLines <= 1 { - return 0.1 - } - - // Score by critical pattern hits - hits := 0 - for _, p := range f.criticalPatterns { - if strings.Contains(text, p) { - hits++ - } - } - - score := float64(hits) / float64(len(f.criticalPatterns)) - if score > 1.0 { - score = 1.0 - } - - // Bonus for diff markers or explicit error/fail lines - if strings.Contains(text, "error") || strings.Contains(text, "fail") { - score += 0.3 - } - if strings.Contains(text, "diff --git") || strings.Contains(text, "--- a/") || - strings.Contains(text, "+++ b/") { - score += 0.4 - } - if score > 1.0 { - score = 1.0 - } - return score -} - -// looksLikeAgentOutput returns true if the input seems to contain agent tool results. -func (f *CARLFilter) looksLikeAgentOutput(lines []string) bool { - count := 0 - for _, line := range lines { - lower := strings.ToLower(strings.TrimSpace(line)) - for _, h := range f.entryHeaderRe { - if strings.HasPrefix(lower, h) { - count++ - break - } - } - if count >= 2 { - return true - } - } - return false -} diff --git a/internal/filter/diff_fold_compress.go b/internal/filter/diff_fold_compress.go deleted file mode 100644 index 88e24c0d5..000000000 --- a/internal/filter/diff_fold_compress.go +++ /dev/null @@ -1,143 +0,0 @@ -package filter - -import ( - "fmt" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// DiffCrunchFilter compacts large diffs by pruning repetitive unchanged context lines. -type DiffCrunchFilter struct{} - -func NewDiffCrunchFilter() *DiffCrunchFilter { return &DiffCrunchFilter{} } - -func (f *DiffCrunchFilter) Name() string { return "48_diff_crunch" } - -type diffHunk struct { - header string - contextPre []string - changes []string - contextPost []string -} - -func (f *DiffCrunchFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - if len(lines) < 20 { - return input, 0 - } - if !looksLikeDiff(lines) { - return input, 0 - } - - hunks := parseUnifiedDiff(lines) - if len(hunks) == 0 { - return input, 0 - } - - contextWindow := 3 - if mode == ModeAggressive { - contextWindow = 2 - } - - out := make([]string, 0, len(lines)) - changed := false - - for _, hunk := range hunks { - out = append(out, hunk.header) - - // Fold pre-context - if len(hunk.contextPre) > contextWindow { - out = append(out, hunk.contextPre[:contextWindow]...) - out = append(out, fmt.Sprintf("[... %d context lines folded ...]", len(hunk.contextPre)-contextWindow)) - changed = true - } else { - out = append(out, hunk.contextPre...) - } - - // Always keep changes - out = append(out, hunk.changes...) - - // Fold post-context - if len(hunk.contextPost) > contextWindow { - out = append(out, hunk.contextPost[:contextWindow]...) - out = append(out, fmt.Sprintf("[... %d context lines folded ...]", len(hunk.contextPost)-contextWindow)) - changed = true - } else { - out = append(out, hunk.contextPost...) - } - } - - if !changed { - return input, 0 - } - - out = append(out, "[diff-crunch: context folded]") - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func parseUnifiedDiff(lines []string) []diffHunk { - hunks := []diffHunk{} - var current *diffHunk - inChanges := false - - for _, line := range lines { - // Hunk header - if strings.HasPrefix(line, "@@") { - if current != nil { - hunks = append(hunks, *current) - } - current = &diffHunk{header: line} - inChanges = false - continue - } - - if current == nil { - continue - } - - // File headers - if strings.HasPrefix(line, "diff --git") || strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { - current.header = line - continue - } - - // Changes - if strings.HasPrefix(line, "+") || strings.HasPrefix(line, "-") { - current.changes = append(current.changes, line) - inChanges = true - continue - } - - // Context lines - if !inChanges { - current.contextPre = append(current.contextPre, line) - } else { - current.contextPost = append(current.contextPost, line) - } - } - - if current != nil { - hunks = append(hunks, *current) - } - - return hunks -} - -func looksLikeDiff(lines []string) bool { - hits := 0 - for _, line := range lines { - if strings.HasPrefix(line, "diff --git") || strings.HasPrefix(line, "@@") || strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { - hits++ - } - } - return hits >= 2 -} diff --git a/internal/filter/difficulty_adaptive_compress.go b/internal/filter/difficulty_adaptive_compress.go deleted file mode 100644 index 8d8cb929f..000000000 --- a/internal/filter/difficulty_adaptive_compress.go +++ /dev/null @@ -1,187 +0,0 @@ -package filter - -import ( - "math" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "DiffAdapt: Difficulty-Adaptive Token Compression for LLM Inference" -// ICLR 2026 -// -// DiffAdaptFilter is a meta-controller that measures input "difficulty" — -// the structural complexity of the text — and scales the compression ratio -// applied downstream accordingly. -// -// Difficulty is estimated via three signals: -// 1. Vocabulary entropy: high entropy → rich, non-repetitive content → harder to compress -// 2. Nesting depth: indented blocks and bracket nesting → structured code/data -// 3. Average line length: longer lines tend to carry more information density -// -// The filter then prunes lines whose per-line information score falls below a -// difficulty-scaled threshold. High-difficulty inputs use a tighter threshold -// (preserve more); low-difficulty inputs use a looser threshold (compress more). -// -// This filter complements the BudgetEnforcer by acting BEFORE budget enforcement: -// it shapes the content distribution so the budget layer has better material to work -// with. Unlike static threshold filters, DiffAdapt adjusts dynamically per input. -type DiffAdaptFilter struct { - baseThreshold float64 // baseline per-line score threshold (difficulty=0.5) - minThreshold float64 // floor when input is very easy (low complexity) - maxThreshold float64 // ceiling when input is very hard (high complexity) -} - -// NewDiffAdaptFilter creates a new difficulty-adaptive compression filter. -func NewDiffAdaptFilter() *DiffAdaptFilter { - return &DiffAdaptFilter{ - baseThreshold: 0.30, - minThreshold: 0.15, - maxThreshold: 0.55, - } -} - -// Name returns the filter name. -func (f *DiffAdaptFilter) Name() string { return "31_difft_adapt" } - -// Apply measures input difficulty and prunes low-scoring lines adaptively. -func (f *DiffAdaptFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 8 { - return input, 0 - } - - difficulty := f.measureDifficulty(lines) - - // Scale threshold inversely with difficulty: - // difficulty=0.0 → maxThreshold (easy content → compress aggressively) - // difficulty=1.0 → minThreshold (hard content → compress conservatively) - threshold := f.maxThreshold - difficulty*(f.maxThreshold-f.minThreshold) - if mode == ModeAggressive { - threshold *= 0.75 // push thresholds down → more lines dropped - } - - // Score each line and drop below-threshold lines that aren't anchors - termFreq := daTermFrequency(lines) - var result []string - for _, line := range lines { - if strings.TrimSpace(line) == "" { - result = append(result, line) - continue - } - if isErrorLine(line) || isWarningLine(line) || isHeadingLine(line) { - result = append(result, line) - continue - } - score := daLineScore(line, termFreq, len(lines)) - if score >= threshold { - result = append(result, line) - } - } - - if len(result) == len(lines) { - return input, 0 - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// measureDifficulty returns a 0.0–1.0 score for input complexity. -func (f *DiffAdaptFilter) measureDifficulty(lines []string) float64 { - if len(lines) == 0 { - return 0.5 - } - - // Signal 1: vocabulary entropy - termCounts := make(map[string]int) - totalTerms := 0 - for _, line := range lines { - for _, t := range ltTokenize(line) { - termCounts[t]++ - totalTerms++ - } - } - entropy := 0.0 - if totalTerms > 0 { - for _, count := range termCounts { - p := float64(count) / float64(totalTerms) - if p > 0 { - entropy -= p * math.Log2(p) - } - } - } - // Normalise: most CLI output sits between 3-10 bits. Map to 0-1. - entropyScore := math.Min(entropy/10.0, 1.0) - - // Signal 2: nesting depth (indentation level) - totalDepth := 0 - for _, line := range lines { - if strings.TrimSpace(line) == "" { - continue - } - spaces := 0 - countSpaces: - for _, ch := range line { - switch ch { - case ' ': - spaces++ - case '\t': - spaces += 4 - default: - break countSpaces - } - } - totalDepth += spaces / 4 - } - avgDepth := float64(totalDepth) / float64(len(lines)) - nestScore := math.Min(avgDepth/6.0, 1.0) - - // Signal 3: average line length - totalLen := 0 - for _, line := range lines { - totalLen += len(line) - } - avgLen := float64(totalLen) / float64(len(lines)) - lenScore := math.Min(avgLen/120.0, 1.0) - - // Weighted combination - return 0.5*entropyScore + 0.3*nestScore + 0.2*lenScore -} - -// daTermFrequency builds a term frequency map across all lines. -func daTermFrequency(lines []string) map[string]int { - freq := make(map[string]int) - for _, line := range lines { - for _, t := range ltTokenize(line) { - freq[t]++ - } - } - return freq -} - -// daLineScore scores a line by its average inverse term frequency (rare terms = high score). -func daLineScore(line string, termFreq map[string]int, nLines int) float64 { - terms := ltTokenize(line) - if len(terms) == 0 { - return 0 - } - score := 0.0 - for _, t := range terms { - freq := termFreq[t] - if freq == 0 { - freq = 1 - } - // ITF: lines with rare terms score higher - score += 1.0 / float64(freq) - } - return score / float64(len(terms)) * float64(nLines) / 10.0 -} diff --git a/internal/filter/graph_reasoning_compress.go b/internal/filter/graph_reasoning_compress.go deleted file mode 100644 index 891a032cb..000000000 --- a/internal/filter/graph_reasoning_compress.go +++ /dev/null @@ -1,116 +0,0 @@ -package filter - -import ( - "math" - "sort" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// GraphCoTFilter keeps high-centrality reasoning lines in long traces. -type GraphCoTFilter struct { - targetRatio float64 -} - -// NewGraphCoTFilter creates a graph-CoT style filter. -func NewGraphCoTFilter() *GraphCoTFilter { - return &GraphCoTFilter{targetRatio: 0.55} -} - -// Name returns the filter name. -func (f *GraphCoTFilter) Name() string { return "38_graph_cot" } - -// Apply scores reasoning lines and keeps high-centrality nodes. -func (f *GraphCoTFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 8 || !epicLooksLikeCoT(lines) { - return input, 0 - } - - ratio := f.targetRatio - if mode == ModeAggressive { - ratio = 0.40 - } - target := int(math.Ceil(float64(len(lines)) * ratio)) - if target < 2 { - target = 2 - } - - termFreq := daTermFrequency(lines) - type cand struct { - idx int - score float64 - } - cands := make([]cand, 0, len(lines)) - for i, line := range lines { - cands = append(cands, cand{idx: i, score: graphLineScore(line, termFreq, len(lines))}) - } - sort.Slice(cands, func(i, j int) bool { return cands[i].score > cands[j].score }) - - keep := make(map[int]bool, target) - for i := 0; i < len(cands) && len(keep) < target; i++ { - keep[cands[i].idx] = true - } - // Anchors - for i, line := range lines { - if strings.TrimSpace(line) != "" { - keep[i] = true - break - } - } - for i := len(lines) - 1; i >= 0; i-- { - if strings.TrimSpace(lines[i]) != "" { - keep[i] = true - break - } - } - - var out []string - for i, line := range lines { - if keep[i] { - out = append(out, line) - } - } - - if len(out) >= len(lines) { - return input, 0 - } - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func graphLineScore(line string, termFreq map[string]int, nLines int) float64 { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - return 0 - } - score := 0.0 - if isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { - score += 3.0 - } - if epicIsCausalEdge(trimmed) || isReasoningLine(line) { - score += 1.5 - } - terms := ltTokenize(line) - if len(terms) > 0 { - rare := 0.0 - for _, t := range terms { - f := termFreq[t] - if f <= 0 { - f = 1 - } - rare += 1.0 / float64(f) - } - score += rare * (float64(nLines) / 10.0) / float64(len(terms)) - } - return score -} diff --git a/internal/filter/group_merge_semantic_align.go b/internal/filter/group_merge_semantic_align.go deleted file mode 100644 index 1b0ba888d..000000000 --- a/internal/filter/group_merge_semantic_align.go +++ /dev/null @@ -1,272 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "GMSA: Enhancing Context Compression via Group Merging and Layer Semantic Alignment" -// arXiv:2505.12215 — 2025 -// -// GMSAFilter operates at paragraph/chunk level (blank-line separated blocks), -// complementing NearDedupFilter (line-level) and PerceptionCompressFilter (window-level). -// -// Two phases: -// 1. Group Merging — cluster similar chunks by term-overlap similarity, collapse -// each cluster to its best representative with a count annotation. -// 2. Semantic Alignment — after merging, reorder the surviving chunks so that -// "anchor" content (errors, headings, key results) floats to the top, -// maximizing information density in the region most attended by LLMs. -// -// Key insight: repeated paragraph-length explanations of the same concept -// (common in verbose documentation, long error reports, and agentic outputs) -// produce more waste than repeated individual lines, and require chunk-level -// detection that line-level filters miss. -type GMSAFilter struct { - similarityThreshold float64 // min term-overlap fraction to group chunks - minChunkLines int // chunks shorter than this are never merged - alignEnabled bool // whether to apply semantic alignment phase -} - -// NewGMSAFilter creates a new GMSA group-merge + semantic-alignment filter. -func NewGMSAFilter() *GMSAFilter { - return &GMSAFilter{ - similarityThreshold: 0.40, // containment-based; lower = more aggressive merging - minChunkLines: 3, - alignEnabled: true, - } -} - -// Name returns the filter name. -func (f *GMSAFilter) Name() string { return "28_gmsa" } - -// Apply applies group merging and semantic alignment. -func (f *GMSAFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - thresh := f.similarityThreshold - if mode == ModeAggressive { - thresh = 0.40 - } - - chunks := f.splitChunks(input) - if len(chunks) < 2 { - return input, 0 - } - - // Phase 1: Group Merging - merged := f.groupMerge(chunks, thresh) - - // Phase 2: Semantic Alignment - if f.alignEnabled { - merged = f.semanticAlign(merged) - } - - output := f.joinChunks(merged) - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -type textChunk struct { - lines []string - termSet map[string]bool - anchor float64 // structural importance score - suppressed bool - annotation string // e.g. "[+2 similar chunks merged]" -} - -// splitChunks splits input on blank lines into paragraph-sized chunks. -func (f *GMSAFilter) splitChunks(input string) []*textChunk { - lines := strings.Split(input, "\n") - var chunks []*textChunk - var cur []string - - flush := func() { - if len(cur) >= f.minChunkLines { - c := &textChunk{lines: append([]string{}, cur...)} - c.termSet = gmsaTermSet(cur) - c.anchor = gmsaAnchorScore(cur) - chunks = append(chunks, c) - } else if len(cur) > 0 { - // short chunk: add as single-line pass-through with special flag - c := &textChunk{lines: append([]string{}, cur...), anchor: 1.0} - c.termSet = gmsaTermSet(cur) - chunks = append(chunks, c) - } - cur = cur[:0] - } - - for _, line := range lines { - if strings.TrimSpace(line) == "" { - flush() - } else { - cur = append(cur, line) - } - } - flush() - return chunks -} - -// groupMerge clusters similar chunks and keeps best representatives. -func (f *GMSAFilter) groupMerge(chunks []*textChunk, threshold float64) []*textChunk { - n := len(chunks) - parent := make([]int, n) - for i := range parent { - parent[i] = i - } - var find func(int) int - find = func(x int) int { - if parent[x] != x { - parent[x] = find(parent[x]) - } - return parent[x] - } - union := func(x, y int) { - px, py := find(x), find(y) - if px != py { - parent[px] = py - } - } - - // Only consider chunks that are large enough to merge - for i := 0; i < n; i++ { - if len(chunks[i].lines) < f.minChunkLines { - continue - } - // Look ahead up to 10 chunks - limit := i + 10 - if limit > n { - limit = n - } - for j := i + 1; j < limit; j++ { - if len(chunks[j].lines) < f.minChunkLines { - continue - } - if gmsaOverlap(chunks[i].termSet, chunks[j].termSet) >= threshold { - union(i, j) - } - } - } - - // Build cluster groups - groups := make(map[int][]int) - for i := range chunks { - root := find(i) - groups[root] = append(groups[root], i) - } - - // For clusters with ≥ 2 members, keep the best (highest anchor score + most terms) - for _, members := range groups { - if len(members) < 2 { - continue - } - bestIdx := members[0] - for _, idx := range members[1:] { - c := chunks[idx] - best := chunks[bestIdx] - if c.anchor > best.anchor || (c.anchor == best.anchor && len(c.termSet) > len(best.termSet)) { - bestIdx = idx - } - } - count := len(members) - 1 - chunks[bestIdx].annotation = "[+" + itoa(count) + " similar chunks merged]" - for _, idx := range members { - if idx != bestIdx { - chunks[idx].suppressed = true - } - } - } - - var result []*textChunk - for _, c := range chunks { - if !c.suppressed { - result = append(result, c) - } - } - return result -} - -// semanticAlign reorders surviving chunks: anchors (errors/headings/results) first. -func (f *GMSAFilter) semanticAlign(chunks []*textChunk) []*textChunk { - // Stable sort: high-anchor chunks move to front, low-anchor to back - // Use insertion sort to preserve relative order within tiers - n := len(chunks) - for i := 1; i < n; i++ { - for j := i; j > 0 && chunks[j].anchor > chunks[j-1].anchor+0.5; j-- { - chunks[j], chunks[j-1] = chunks[j-1], chunks[j] - } - } - return chunks -} - -// joinChunks reassembles chunks into a string with blank-line separators. -func (f *GMSAFilter) joinChunks(chunks []*textChunk) string { - var parts []string - for _, c := range chunks { - block := strings.Join(c.lines, "\n") - if c.annotation != "" { - block += "\n" + c.annotation - } - parts = append(parts, block) - } - return strings.Join(parts, "\n\n") -} - -// -- helpers -- - -func gmsaTermSet(lines []string) map[string]bool { - set := make(map[string]bool) - for _, line := range lines { - for _, t := range ltTokenize(line) { - set[t] = true - } - } - return set -} - -func gmsaOverlap(a, b map[string]bool) float64 { - if len(a) == 0 || len(b) == 0 { - return 0 - } - shared := 0 - for t := range a { - if b[t] { - shared++ - } - } - // Containment similarity: shared / min(|A|, |B|) - // Better than Jaccard for detecting when one chunk is a paraphrase of another, - // since paraphrases use synonyms and the smaller set is more constrained. - smaller := len(a) - if len(b) < smaller { - smaller = len(b) - } - if smaller == 0 { - return 0 - } - return float64(shared) / float64(smaller) -} - -func gmsaAnchorScore(lines []string) float64 { - score := 0.0 - for _, line := range lines { - switch { - case isErrorLine(line) || isWarningLine(line): - score += 3.0 - case isHeadingLine(line): - score += 2.0 - case isCodeLine(line): - score += 0.5 - } - } - if len(lines) > 0 { - score /= float64(len(lines)) - } - return score -} diff --git a/internal/filter/json_stream_sampler.go b/internal/filter/json_stream_sampler.go deleted file mode 100644 index ac8d8aa2a..000000000 --- a/internal/filter/json_stream_sampler.go +++ /dev/null @@ -1,80 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// JSONSamplerFilter down-samples dense JSON line streams while preserving anchors. -type JSONSamplerFilter struct{} - -func NewJSONSamplerFilter() *JSONSamplerFilter { return &JSONSamplerFilter{} } - -func (f *JSONSamplerFilter) Name() string { return "45_json_sampler" } - -func (f *JSONSamplerFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - if len(lines) < 20 { - return input, 0 - } - - jsonLike := 0 - for _, line := range lines { - if isJSONLikeLine(line) { - jsonLike++ - } - } - if float64(jsonLike)/float64(len(lines)) < 0.55 { - return input, 0 - } - - stride := 4 - if mode == ModeAggressive { - stride = 6 - } - - out := make([]string, 0, len(lines)/2) - for i, line := range lines { - trim := strings.TrimSpace(line) - if i < 4 || i >= len(lines)-4 || isErrorLine(line) || isWarningLine(line) { - out = append(out, line) - continue - } - if !isJSONLikeLine(line) { - out = append(out, line) - continue - } - if i%stride == 0 || strings.Contains(trim, "\"error\"") { - out = append(out, line) - } - } - - if len(out) >= len(lines) { - return input, 0 - } - out = append(out, "[json-sampler: sampled JSON lines]") - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func isJSONLikeLine(line string) bool { - trim := strings.TrimSpace(line) - if trim == "" { - return false - } - if strings.HasPrefix(trim, "{") || strings.HasPrefix(trim, "[") || strings.HasPrefix(trim, "}") || strings.HasPrefix(trim, "]") { - return true - } - if strings.Contains(trim, "\":") || strings.Contains(trim, "\",") { - return true - } - return false -} diff --git a/internal/filter/latent_collab_collapse.go b/internal/filter/latent_collab_collapse.go deleted file mode 100644 index 1afa0a71d..000000000 --- a/internal/filter/latent_collab_collapse.go +++ /dev/null @@ -1,108 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// LatentCollabFilter approximates latent-space collaboration by collapsing -// semantically equivalent multi-agent turns into compact markers. -type LatentCollabFilter struct { - similarityThreshold float64 -} - -// NewLatentCollabFilter creates a latent-collaboration inspired filter. -func NewLatentCollabFilter() *LatentCollabFilter { - return &LatentCollabFilter{similarityThreshold: 0.62} -} - -// Name returns the filter name. -func (f *LatentCollabFilter) Name() string { return "37_latent_collab" } - -// Apply merges highly similar adjacent agent turns. -func (f *LatentCollabFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - turns := parseRoleTurns(lines) - if len(turns) < 2 { - return input, 0 - } - - thresh := f.similarityThreshold - if mode == ModeAggressive { - thresh = 0.62 - } - - type signature struct { - role string - terms map[string]bool - } - var kept []signature - var out []string - changed := false - - for _, t := range turns { - segment := lines[t.start : t.end+1] - terms := latentTermSet(segment) - if len(terms) == 0 { - out = append(out, segment...) - continue - } - - merged := false - for i := len(kept) - 1; i >= 0; i-- { - if kept[i].role != t.role { - continue - } - if jaccardOverlap(kept[i].terms, terms) >= thresh { - out = append(out, lines[t.start]) - out = append(out, "[latent-collab: merged]") - merged = true - changed = true - break - } - } - if merged { - continue - } - - out = append(out, segment...) - kept = append(kept, signature{role: t.role, terms: terms}) - } - - if !changed { - return input, 0 - } - - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func latentTermSet(lines []string) map[string]bool { - set := make(map[string]bool) - for _, line := range lines { - if strings.TrimSpace(line) == "" { - continue - } - if isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { - for _, t := range ltTokenize(line) { - set[t] = true - } - continue - } - for _, t := range ltTokenize(line) { - if len(t) >= 4 { - set[t] = true - } - } - } - return set -} diff --git a/internal/filter/layers_37_80.go b/internal/filter/layers_37_80.go deleted file mode 100644 index f14238f64..000000000 --- a/internal/filter/layers_37_80.go +++ /dev/null @@ -1,150 +0,0 @@ -package filter - -// Task 37: CRF-based goal-driven selection -type CRFGoalDriven struct{ weights []float64 } - -func NewCRFGoalDriven() *CRFGoalDriven { return &CRFGoalDriven{weights: make([]float64, 10)} } -func (c *CRFGoalDriven) Score(line string) float64 { return 0.5 } - -// Task 38: Layer skip prediction -type LayerSkipPredictor struct{ skipProb map[int]float64 } - -func NewLayerSkipPredictor() *LayerSkipPredictor { - return &LayerSkipPredictor{skipProb: make(map[int]float64)} -} -func (l *LayerSkipPredictor) ShouldSkip(layerID int) bool { return l.skipProb[layerID] > 0.8 } - -// Task 39: Compression budget allocator -type BudgetAllocator struct{ budgets map[int]int } - -func NewBudgetAllocator() *BudgetAllocator { return &BudgetAllocator{budgets: make(map[int]int)} } -func (b *BudgetAllocator) Allocate(layerID, budget int) { b.budgets[layerID] = budget } - -// Task 40: Real-time compression metrics -type RealtimeMetrics struct{ rate float64 } - -func NewRealtimeMetrics() *RealtimeMetrics { return &RealtimeMetrics{} } -func (r *RealtimeMetrics) Update(ratio float64) { r.rate = ratio } - -// Task 41-60: Enhanced layer algorithms -type EnhancedEntropy struct{} - -func (e *EnhancedEntropy) Calculate(data []byte) float64 { return 0.5 } - -type BeamSearchPerplexity struct{ width int } - -func NewBeamSearchPerplexity(w int) *BeamSearchPerplexity { return &BeamSearchPerplexity{width: w} } - -type MultiLangAST struct{ parsers map[string]interface{} } - -func NewMultiLangAST() *MultiLangAST { return &MultiLangAST{parsers: make(map[string]interface{})} } - -type EmbeddingContrastive struct{} //nolint:unused // placeholder for future model integration - -func NewEmbeddingContrastive() *EmbeddingContrastive { return &EmbeddingContrastive{} } - -type VariableNGram struct{ minN, maxN int } - -func NewVariableNGram(min, max int) *VariableNGram { return &VariableNGram{minN: min, maxN: max} } - -type TrainedEvaluatorHeads struct{} //nolint:unused // placeholder for future model integration - -func NewTrainedEvaluatorHeads() *TrainedEvaluatorHeads { return &TrainedEvaluatorHeads{} } - -type CodeGist struct{ embeddings map[string][]float64 } - -func NewCodeGist() *CodeGist { return &CodeGist{embeddings: make(map[string][]float64)} } - -type ConfigurableHierarchical struct{ depth int } - -func NewConfigurableHierarchical(d int) *ConfigurableHierarchical { - return &ConfigurableHierarchical{depth: d} -} - -type SoftBudget struct{ limit, overflow int } - -func NewSoftBudget(l, o int) *SoftBudget { return &SoftBudget{limit: l, overflow: o} } - -type EmbeddingCompaction struct{} //nolint:unused // placeholder for future transformer integration - -func NewEmbeddingCompaction() *EmbeddingCompaction { return &EmbeddingCompaction{} } - -// Task 61-80: New research layers -type MarginalInfoGain struct{} - -func (m *MarginalInfoGain) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type MinHashDedup struct{ hashes []uint64 } - -func NewMinHashDedup() *MinHashDedup { return &MinHashDedup{hashes: make([]uint64, 0)} } -func (m *MinHashDedup) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type CoTCompressor struct{} - -func (c *CoTCompressor) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type CodingAgentContext struct{} - -func (c *CodingAgentContext) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type PerceptionCompress struct{} - -func (p *PerceptionCompress) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type LightThinker struct{} - -func (l *LightThinker) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type ThinkSwitcher struct{ routes map[string]Filter } - -func NewThinkSwitcher() *ThinkSwitcher { return &ThinkSwitcher{routes: make(map[string]Filter)} } -func (t *ThinkSwitcher) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type GMSA struct{} - -func (g *GMSA) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type CARL struct{} - -func (c *CARL) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type SlimInfer struct{} - -func (s *SlimInfer) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type SSDP struct{} - -func (s *SSDP) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type DiffAdapt struct{} - -func (d *DiffAdapt) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type EPiC struct{} - -func (e *EPiC) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type TDD struct{} - -func (t *TDD) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type TOON struct{} - -func (t *TOON) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type EnhancedPhotonFilter struct{} - -func (e *EnhancedPhotonFilter) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type S2MAD struct{} - -func (s *S2MAD) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type LightMem struct{} - -func (l *LightMem) Apply(input string, mode Mode) (string, int) { return input, 0 } - -type PathShorten struct{ aliases map[string]string } - -func NewPathShorten() *PathShorten { return &PathShorten{aliases: make(map[string]string)} } -func (p *PathShorten) Apply(input string, mode Mode) (string, int) { return input, 0 } diff --git a/internal/filter/log_fold_compress.go b/internal/filter/log_fold_compress.go deleted file mode 100644 index 8a3a3100c..000000000 --- a/internal/filter/log_fold_compress.go +++ /dev/null @@ -1,143 +0,0 @@ -package filter - -import ( - "regexp" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// LogCrunchFilter folds repetitive INFO/DEBUG logs while preserving warnings/errors and state transitions. -type LogCrunchFilter struct { - normalizeTimestamps bool -} - -func NewLogCrunchFilter() *LogCrunchFilter { - return &LogCrunchFilter{normalizeTimestamps: true} -} - -func (f *LogCrunchFilter) Name() string { return "46_log_crunch" } - -var ( - stackFramePattern = regexp.MustCompile(`(?:^\s+at\s+|^\s+File\s+"|^\s+in\s+\w|Traceback|goroutine\s+\d+)`) - stackIndentPattern = regexp.MustCompile(`^(\s{2,}|\t)`) - timestampPattern = regexp.MustCompile(`\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?`) -) - -func (f *LogCrunchFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - if len(lines) < 20 { - return input, 0 - } - - if f.normalizeTimestamps { - lines = normalizeTimestamps(lines) - } - - out := compressLogLines(lines, mode) - if len(out) == len(lines) { - return input, 0 - } - - out = append(out, "[log-crunch: repetitive logs folded]") - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func compressLogLines(lines []string, mode Mode) []string { - out := make([]string, 0, len(lines)) - inTrace := false - runNorm := "" - runLines := []string{} - runCount := 0 - - flushRun := func() { - if len(runLines) == 0 { - return - } - if runCount >= 3 { - out = append(out, runLines[0]) - out = append(out, " [... repeated "+string(rune(runCount-2+48))+" more times ...]") - out = append(out, runLines[len(runLines)-1]) - } else { - out = append(out, runLines...) - } - runLines = nil - runCount = 0 - runNorm = "" - } - - for i := 0; i < len(lines); i++ { - line := lines[i] - trim := strings.TrimSpace(line) - if trim == "" { - continue - } - - // Detect stack trace start - if !inTrace && stackFramePattern.MatchString(line) { - flushRun() - trace := []string{line} - // Collect continuation lines - var lastJ int - for j := i + 1; j < len(lines); j++ { - if stackIndentPattern.MatchString(lines[j]) || stackFramePattern.MatchString(lines[j]) { - trace = append(trace, lines[j]) - lastJ = j - } else { - break - } - } - i = lastJ - out = append(out, trace...) - continue - } - - // Always keep errors/warnings - if isErrorLine(line) || isWarningLine(line) { - flushRun() - out = append(out, line) - continue - } - - // Collapse repetitive INFO/DEBUG - norm := normalizeLogLine(line) - if norm == runNorm { - runLines = append(runLines, line) - runCount++ - } else { - flushRun() - runNorm = norm - runLines = []string{line} - runCount = 1 - } - } - - flushRun() - return out -} - -func normalizeTimestamps(lines []string) []string { - result := make([]string, len(lines)) - for i, line := range lines { - result[i] = timestampPattern.ReplaceAllString(line, "[+T]") - } - return result -} - -func normalizeLogLine(line string) string { - lower := strings.ToLower(strings.TrimSpace(line)) - lower = strings.ReplaceAll(lower, "\t", " ") - parts := strings.Fields(lower) - if len(parts) > 8 { - parts = parts[:8] - } - return strings.Join(parts, " ") -} diff --git a/internal/filter/marginal_info_gain.go b/internal/filter/marginal_info_gain.go deleted file mode 100644 index 84eb704e7..000000000 --- a/internal/filter/marginal_info_gain.go +++ /dev/null @@ -1,230 +0,0 @@ -package filter - -import ( - "math" - "sort" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "COMI: Coarse-to-fine Context Compression via Marginal Information Gain" -// 2026 — scores each line by how much NEW information it contributes to the -// already-retained set, rather than scoring lines in isolation. -// -// Key insight: lines that repeat already-covered terms add zero marginal gain, -// so they can be dropped even if individually "important." -// -// Algorithm: -// 1. Build global TF map; identify discriminative terms (frequent but not ubiquitous) -// 2. Assign each line a term-set covering its discriminative terms -// 3. Greedy selection: rank lines by marginal_gain / token_cost, apply structural bonus -// 4. Fill token budget top-down; anchor first and last non-empty lines -type MarginalInfoGainFilter struct { - targetRatio float64 - minTermFreq int - stopWords map[string]bool -} - -// NewMarginalInfoGainFilter creates a new COMI-style marginal information gain filter. -func NewMarginalInfoGainFilter() *MarginalInfoGainFilter { - return &MarginalInfoGainFilter{ - targetRatio: 0.55, - minTermFreq: 2, - stopWords: migStopWords(), - } -} - -// Name returns the filter name. -func (f *MarginalInfoGainFilter) Name() string { return "21_marginal_info_gain" } - -// Apply selects lines that maximize marginal information gain within a token budget. -func (f *MarginalInfoGainFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 5 { - return input, 0 - } - - ratio := f.targetRatio - if mode == ModeAggressive { - ratio *= 0.7 - } - - budget := int(math.Ceil(float64(core.EstimateTokens(input)) * ratio)) - - globalFreq := f.buildTermFreq(lines) - lineTerms := f.buildLineTermSets(lines, globalFreq) - kept := f.greedySelect(lines, lineTerms, budget) - - var result []string - for i, line := range lines { - if kept[i] { - result = append(result, line) - } - } - if len(result) == 0 { - return input, 0 - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func (f *MarginalInfoGainFilter) buildTermFreq(lines []string) map[string]int { - freq := make(map[string]int) - for _, line := range lines { - for _, t := range f.tokenizeLine(line) { - freq[t]++ - } - } - return freq -} - -func (f *MarginalInfoGainFilter) buildLineTermSets(lines []string, freq map[string]int) []map[string]bool { - n := len(lines) - sets := make([]map[string]bool, n) - for i, line := range lines { - set := make(map[string]bool) - for _, t := range f.tokenizeLine(line) { - tf := freq[t] - // Keep discriminative terms: appear ≥ minTermFreq times but in fewer than half the lines - if (tf >= f.minTermFreq && tf < n/2) || tf == 1 { - set[t] = true - } - } - sets[i] = set - } - return sets -} - -type migCandidate struct { - idx int - score float64 // marginal_gain * structural_bonus / token_cost -} - -func (f *MarginalInfoGainFilter) greedySelect(lines []string, lineTerms []map[string]bool, budget int) map[int]bool { - kept := make(map[int]bool) - covered := make(map[string]bool) - used := 0 - - // Anchor: first non-empty line - for i, line := range lines { - if strings.TrimSpace(line) != "" { - kept[i] = true - used += core.EstimateTokens(line) - for t := range lineTerms[i] { - covered[t] = true - } - break - } - } - // Anchor: last non-empty line - for i := len(lines) - 1; i >= 0; i-- { - if strings.TrimSpace(lines[i]) != "" && !kept[i] { - kept[i] = true - used += core.EstimateTokens(lines[i]) - for t := range lineTerms[i] { - covered[t] = true - } - break - } - } - - // Score remaining candidates - candidates := make([]migCandidate, 0, len(lines)) - for i, line := range lines { - if kept[i] || strings.TrimSpace(line) == "" { - continue - } - toks := core.EstimateTokens(line) - if toks == 0 { - toks = 1 - } - gain := marginalGain(lineTerms[i], covered) - bonus := structuralBonus(line) - candidates = append(candidates, migCandidate{idx: i, score: (gain * bonus) / float64(toks)}) - } - - sort.Slice(candidates, func(a, b int) bool { - return candidates[a].score > candidates[b].score - }) - - for _, c := range candidates { - if used >= budget { - break - } - toks := core.EstimateTokens(lines[c.idx]) - kept[c.idx] = true - used += toks - for t := range lineTerms[c.idx] { - covered[t] = true - } - } - return kept -} - -func marginalGain(terms map[string]bool, covered map[string]bool) float64 { - gain := 0.0 - for t := range terms { - if !covered[t] { - gain++ - } - } - return gain + 0.1 // small floor so zero-gain lines can still win via structural bonus -} - -func structuralBonus(line string) float64 { - if isErrorLine(line) || isWarningLine(line) { - return 3.0 - } - if isHeadingLine(line) { - return 1.8 - } - if isCodeLine(line) { - return 1.2 - } - return 1.0 -} - -func (f *MarginalInfoGainFilter) tokenizeLine(line string) []string { - var terms []string - var word strings.Builder - for _, ch := range strings.ToLower(line) { - if (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '_' { - word.WriteRune(ch) - } else if word.Len() > 0 { - w := word.String() - if len(w) > 2 && !f.stopWords[w] { - terms = append(terms, w) - } - word.Reset() - } - } - if word.Len() > 2 { - if w := word.String(); !f.stopWords[w] { - terms = append(terms, w) - } - } - return terms -} - -func migStopWords() map[string]bool { - sw := map[string]bool{} - for _, w := range []string{ - "the", "and", "for", "that", "this", "with", "from", "are", "was", - "were", "has", "have", "had", "not", "but", "its", "can", "will", - "all", "any", "one", "more", "also", "when", "then", "than", "too", - "use", "used", "using", "new", "get", "set", "add", "let", "var", - } { - sw[w] = true - } - return sw -} diff --git a/internal/filter/memory_augment_compress.go b/internal/filter/memory_augment_compress.go deleted file mode 100644 index 42b349bcb..000000000 --- a/internal/filter/memory_augment_compress.go +++ /dev/null @@ -1,76 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// LightMemFilter reuses previously seen high-signal facts with short references. -type LightMemFilter struct{} - -// NewLightMemFilter creates the lightweight memory-augmentation filter. -func NewLightMemFilter() *LightMemFilter { return &LightMemFilter{} } - -// Name returns the filter name. -func (f *LightMemFilter) Name() string { return "43_lightmem" } - -// Apply detects repeated facts and replaces duplicates with lightweight references. -func (f *LightMemFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 8 { - return input, 0 - } - - seen := make(map[string]int, 16) - out := make([]string, 0, len(lines)) - changed := false - memID := 1 - for _, line := range lines { - norm := lightMemNormalize(line) - if norm == "" { - out = append(out, line) - continue - } - if id, ok := seen[norm]; ok { - out = append(out, "[lightmem: reuse #"+itoa(id)+"]") - changed = true - continue - } - seen[norm] = memID - memID++ - out = append(out, line) - } - - if !changed { - return input, 0 - } - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func lightMemNormalize(line string) string { - trimmed := strings.ToLower(strings.TrimSpace(line)) - if trimmed == "" { - return "" - } - if !isErrorLine(trimmed) && !isWarningLine(trimmed) && !isCodeLine(line) && !strings.Contains(trimmed, ":") && !strings.Contains(trimmed, "file") && !strings.Contains(trimmed, "path") { - return "" - } - toks := ltTokenize(trimmed) - if len(toks) < 3 { - return "" - } - if len(toks) > 12 { - toks = toks[:12] - } - return strings.Join(toks, " ") -} diff --git a/internal/filter/multi_agent_debate_collapse.go b/internal/filter/multi_agent_debate_collapse.go deleted file mode 100644 index 78c65fcdf..000000000 --- a/internal/filter/multi_agent_debate_collapse.go +++ /dev/null @@ -1,154 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "S2-MAD: Semantic-Similarity Multi-Agent Debate Compression" -// NAACL 2025 -// -// S2MADFilter detects agreement phrases in multi-agent debate or review outputs -// ("I agree with", "As X mentioned", "Building on that", "This is correct") and -// collapses those agreement passages into compact markers while preserving the -// novel arguments in each agent turn. -// -// Multi-agent debate outputs are common in: -// - LLM self-critique and revision loops -// - Peer-review style agent pipelines -// - RAG reranker debate outputs -// -// The filter operates in two stages: -// 1. Passage scoring: each line is checked for agreement/acknowledgement markers. -// Lines with such markers score near 0; lines with novel claims score near 1. -// 2. Agreement run collapsing: consecutive agreement-heavy lines are merged into -// a single "[agreement: N lines]" marker, preserving surrounding novel content. -type S2MADFilter struct { - agreementThreshold float64 // agreement-marker density to trigger collapsing - minRunLength int // minimum run of agreement lines to collapse -} - -// NewS2MADFilter creates a new S2-MAD multi-agent debate compression filter. -func NewS2MADFilter() *S2MADFilter { - return &S2MADFilter{ - agreementThreshold: 0.5, - minRunLength: 2, - } -} - -// Name returns the filter name. -func (f *S2MADFilter) Name() string { return "35_s2_mad" } - -// Apply collapses agreement passages and preserves novel arguments. -func (f *S2MADFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 4 { - return input, 0 - } - - if !s2madLooksLikeDebate(lines) { - return input, 0 - } - - minRun := f.minRunLength - if mode == ModeAggressive { - minRun = 1 - } - - // Mark agreement lines - isAgreement := make([]bool, len(lines)) - for i, line := range lines { - isAgreement[i] = s2madIsAgreementLine(line) - } - - // Collapse consecutive agreement runs of length ≥ minRun - var result []string - i := 0 - changed := false - for i < len(lines) { - if isAgreement[i] && strings.TrimSpace(lines[i]) != "" { - // Measure run length - j := i - for j < len(lines) && isAgreement[j] { - j++ - } - runLen := j - i - if runLen >= minRun { - result = append(result, "[agreement: "+itoa(runLen)+" lines collapsed]") - i = j - changed = true - continue - } - } - result = append(result, lines[i]) - i++ - } - - if !changed { - return input, 0 - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// agreementPhrases are markers that indicate a line is acknowledging/agreeing. -var s2madAgreementPhrases = []string{ - "i agree", "i agree with", "agreed,", "i concur", - "as you mentioned", "as mentioned", "as stated", - "as noted", "as pointed out", "as highlighted", - "building on that", "building on this", - "you are correct", "that is correct", "this is correct", - "that's right", "that's a good point", "good point", - "this aligns with", "this is consistent with", - "echoing the", "supporting the view", - "i think you're right", "i think that's right", - "same as above", "similar to what", - "to add to that", "adding to what", -} - -// s2madIsAgreementLine returns true if the line is primarily an agreement expression. -func s2madIsAgreementLine(line string) bool { - lower := strings.ToLower(strings.TrimSpace(line)) - if lower == "" { - return false - } - for _, phrase := range s2madAgreementPhrases { - if strings.Contains(lower, phrase) { - return true - } - } - return false -} - -// s2madLooksLikeDebate returns true if the input resembles debate/review output. -func s2madLooksLikeDebate(lines []string) bool { - agreementCount := 0 - speakerCount := 0 - speakerMarkers := []string{ - "agent", "model", "reviewer", "critic", "expert", - "assistant", "debater", "participant", - } - for _, line := range lines { - lower := strings.ToLower(strings.TrimSpace(line)) - if s2madIsAgreementLine(line) { - agreementCount++ - } - for _, m := range speakerMarkers { - if strings.HasPrefix(lower, m) && (strings.Contains(lower, ":") || strings.Contains(lower, " 1") || strings.Contains(lower, " 2")) { - speakerCount++ - break - } - } - } - return agreementCount >= 2 || speakerCount >= 2 -} diff --git a/internal/filter/near_duplicate_collapse.go b/internal/filter/near_duplicate_collapse.go deleted file mode 100644 index d8e16ecd5..000000000 --- a/internal/filter/near_duplicate_collapse.go +++ /dev/null @@ -1,161 +0,0 @@ -package filter - -import ( - "fmt" - "sort" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "DART: Stop Looking for Important Tokens, Duplication Matters More" -// EMNLP 2025 — Kim et al., KAIST -// -// Key finding: aggressively collapsing near-duplicate content consistently -// outperforms importance-based selection across benchmarks, because LLMs -// are hurt more by seeing the same information N times than by losing one -// "important" token. -// -// NearDedupFilter groups near-duplicate lines (within a single output) -// using SimHash fingerprints and Hamming distance, then collapses each -// cluster to its most informative representative with a count annotation. -// -// Typical wins: repeated cargo/clippy warnings, stacked log lines with -// varying file paths, duplicated test assertion messages. -type NearDedupFilter struct { - threshold int // max Hamming distance to treat lines as near-duplicate - minLineLen int // lines shorter than this are never clustered - minCluster int // minimum cluster size before collapsing -} - -// NewNearDedupFilter creates a new DART-inspired near-duplicate line filter. -func NewNearDedupFilter() *NearDedupFilter { - return &NearDedupFilter{ - threshold: 8, - minLineLen: 20, - minCluster: 2, - } -} - -// Name returns the filter name. -func (f *NearDedupFilter) Name() string { return "22_near_dedup" } - -// Apply collapses near-duplicate lines preserving the best representative. -func (f *NearDedupFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - threshold := f.threshold - if mode == ModeAggressive { - threshold = 12 - } - - lines := strings.Split(input, "\n") - if len(lines) < 4 { - return input, 0 - } - - type lineInfo struct { - line string - hash uint64 - } - - infos := make([]lineInfo, len(lines)) - for i, line := range lines { - infos[i] = lineInfo{line: line, hash: SimHash(line)} - } - - // Union-find for clustering - parent := make([]int, len(lines)) - for i := range parent { - parent[i] = i - } - var find func(int) int - find = func(x int) int { - if parent[x] != x { - parent[x] = find(parent[x]) - } - return parent[x] - } - union := func(x, y int) { - px, py := find(x), find(y) - if px != py { - parent[px] = py - } - } - - // Only compare non-empty lines of sufficient length; look-ahead capped at 50 - for i := range infos { - if len(strings.TrimSpace(infos[i].line)) < f.minLineLen { - continue - } - limit := i + 50 - if limit > len(infos) { - limit = len(infos) - } - for j := i + 1; j < limit; j++ { - if len(strings.TrimSpace(infos[j].line)) < f.minLineLen { - continue - } - if HammingDistance(infos[i].hash, infos[j].hash) <= threshold { - union(i, j) - } - } - } - - // Build clusters indexed by root - clusters := make(map[int][]int) - for i := range infos { - if strings.TrimSpace(infos[i].line) == "" { - continue - } - root := find(i) - clusters[root] = append(clusters[root], i) - } - - // For each cluster of ≥ minCluster, pick best representative (longest = most specific) - suppressed := make(map[int]bool) - annotation := make(map[int]string) // representative idx → " [+N similar]" - - for _, members := range clusters { - if len(members) < f.minCluster { - continue - } - sort.Ints(members) - bestIdx := members[0] - for _, idx := range members[1:] { - if len(infos[idx].line) > len(infos[bestIdx].line) { - bestIdx = idx - } - } - for _, idx := range members { - if idx != bestIdx { - suppressed[idx] = true - } - } - annotation[bestIdx] = fmt.Sprintf(" [+%d similar]", len(members)-1) - } - - var result []string - for i, li := range infos { - if suppressed[i] { - continue - } - line := li.line - if ann, ok := annotation[i]; ok { - line = strings.TrimRight(line, " \t") + ann - } - result = append(result, line) - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// SetThreshold overrides the Hamming distance threshold. -func (f *NearDedupFilter) SetThreshold(t int) { f.threshold = t } diff --git a/internal/filter/orphan_line_drop.go b/internal/filter/orphan_line_drop.go deleted file mode 100644 index 048b2eff9..000000000 --- a/internal/filter/orphan_line_drop.go +++ /dev/null @@ -1,161 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "SlimInfer: Accelerating Long-Context LLM Inference via Dynamic Token Pruning" -// arXiv:2508.06447 — 2025 -// -// SlimInferFilter implements the core insight from SlimInfer: drop "orphan" lines — -// lines whose terms do not appear in any other line of the output. -// -// This is the complement to MarginalInfoGainFilter: -// -// MIG — drops lines that contribute NO NEW terms to the retained set -// Slim — drops lines whose terms are REFERENCED BY NO OTHER line -// -// Together they remove both ends of the information graph: -// - Lines that are fully covered by others (MIG) -// - Lines that are fully isolated from others (Slim) -// -// Algorithm: -// 1. Build a term → {line indices} inverted index -// 2. For each line, compute refScore = number of OTHER lines that share ≥1 term -// 3. Lines with refScore < threshold are "orphans" → prune -// 4. Structural lines (errors, headings, first/last) are always kept -// -// Threshold: -// -// ModeMinimal: refScore ≥ 1 (at least one other line shares a term) -// ModeAggressive: refScore ≥ 2 (at least two other lines share a term) -type SlimInferFilter struct { - minLineLen int // lines shorter than this are always kept -} - -// NewSlimInferFilter creates a new SlimInfer orphan-line pruner. -func NewSlimInferFilter() *SlimInferFilter { - return &SlimInferFilter{minLineLen: 12} -} - -// Name returns the filter name. -func (f *SlimInferFilter) Name() string { return "30_slim_infer" } - -// Apply drops orphan lines (low inter-line term reference count). -func (f *SlimInferFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 6 { - return input, 0 - } - - threshold := 1 - if mode == ModeAggressive { - threshold = 2 - } - - // Build term sets per line - termSets := make([]map[string]bool, len(lines)) - for i, line := range lines { - termSets[i] = siTermSet(line) - } - - // Build inverted index: term → set of line indices - invIdx := make(map[string][]int) - for i, ts := range termSets { - for t := range ts { - invIdx[t] = append(invIdx[t], i) - } - } - - // Compute refScore for each line - refScore := make([]int, len(lines)) - for i, ts := range termSets { - seen := make(map[int]bool) - for t := range ts { - for _, j := range invIdx[t] { - if j != i && !seen[j] { - seen[j] = true - refScore[i]++ - } - } - } - } - - // Determine keep set - keep := make([]bool, len(lines)) - // Always keep first and last non-empty lines - for i, line := range lines { - if strings.TrimSpace(line) != "" { - keep[i] = true - break - } - } - for i := len(lines) - 1; i >= 0; i-- { - if strings.TrimSpace(lines[i]) != "" { - keep[i] = true - break - } - } - - for i, line := range lines { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - keep[i] = true // preserve blank lines (structure) - continue - } - if len(trimmed) < f.minLineLen { - keep[i] = true // too short to assess - continue - } - if isErrorLine(line) || isWarningLine(line) || isHeadingLine(line) { - keep[i] = true - continue - } - if refScore[i] >= threshold { - keep[i] = true - } - } - - var result []string - for i, line := range lines { - if keep[i] { - result = append(result, line) - } - } - if len(result) == 0 { - return input, 0 - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// siTermSet builds a set of significant terms for a line. -func siTermSet(line string) map[string]bool { - set := make(map[string]bool) - var word strings.Builder - for _, ch := range strings.ToLower(line) { - if (ch >= 'a' && ch <= 'z') || ch == '_' { - word.WriteRune(ch) - } else if word.Len() > 0 { - if w := word.String(); len(w) >= 4 { // ≥4 chars to avoid noise - set[w] = true - } - word.Reset() - } - } - if word.Len() >= 4 { - set[word.String()] = true - } - return set -} diff --git a/internal/filter/path_alias_compress.go b/internal/filter/path_alias_compress.go deleted file mode 100644 index 6e295516d..000000000 --- a/internal/filter/path_alias_compress.go +++ /dev/null @@ -1,97 +0,0 @@ -package filter - -import ( - "regexp" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// PathShortenFilter aliases repeated long paths/identifiers for compactness. -type PathShortenFilter struct{} - -func NewPathShortenFilter() *PathShortenFilter { return &PathShortenFilter{} } - -func (f *PathShortenFilter) Name() string { return "44_path_shorten" } - -var ( - pathTokenPattern = regexp.MustCompile(`(?:[A-Za-z0-9_.-]+/){2,}[A-Za-z0-9_.-]+`) - longIdentPattern = regexp.MustCompile(`\b[A-Za-z_][A-Za-z0-9_]{24,}\b`) -) - -func (f *PathShortenFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - if len(lines) < 8 { - return input, 0 - } - - pathAlias := map[string]string{} - identAlias := map[string]string{} - pathN, identN := 1, 1 - seenPath := map[string]int{} - seenIdent := map[string]int{} - - // First pass: count repeated candidates. - for _, line := range lines { - for _, p := range pathTokenPattern.FindAllString(line, -1) { - seenPath[p]++ - } - for _, id := range longIdentPattern.FindAllString(line, -1) { - seenIdent[id]++ - } - } - - var out []string - changed := false - for _, line := range lines { - replaced := line - for _, p := range pathTokenPattern.FindAllString(replaced, -1) { - if seenPath[p] < 2 { - continue - } - alias, ok := pathAlias[p] - if !ok { - alias = "@p" + itoa(pathN) - pathN++ - pathAlias[p] = alias - continue - } - replaced = strings.ReplaceAll(replaced, p, alias) - } - for _, id := range longIdentPattern.FindAllString(replaced, -1) { - if seenIdent[id] < 2 { - continue - } - alias, ok := identAlias[id] - if !ok { - alias = "@id" + itoa(identN) - identN++ - identAlias[id] = alias - continue - } - replaced = strings.ReplaceAll(replaced, id, alias) - } - if replaced != line { - changed = true - } - out = append(out, replaced) - } - - if !changed { - return input, 0 - } - - if len(pathAlias) > 0 || len(identAlias) > 0 { - out = append(out, "[path-shorten: aliases active]") - } - - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} diff --git a/internal/filter/perceptual_redundancy_drop.go b/internal/filter/perceptual_redundancy_drop.go deleted file mode 100644 index 732bf6f8e..000000000 --- a/internal/filter/perceptual_redundancy_drop.go +++ /dev/null @@ -1,153 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "Perception Compressor: Training-Free Prompt Compression for Long Context" -// arXiv:2504.xxxxx — 2025 -// -// PerceptionCompressFilter identifies "perceptually redundant" lines: those whose -// semantic content is already covered by their immediate neighbors. Removing -// them does not change what an LLM would perceive as the meaning of the context. -// -// Proxy for perceptual redundancy (training-free): -// - Compute term-overlap between line i and its window (i±windowSize) -// - If overlap / own_terms ≥ threshold, the line is dominated by neighbors -// -// This catches verbose prose, repeated captions, duplicate log prefixes, and -// transitional boilerplate that carries no new information. -type PerceptionCompressFilter struct { - windowSize int // lines on each side to compare against - threshold float64 // min overlap fraction to consider a line redundant - minLineLen int // skip lines shorter than this -} - -// NewPerceptionCompressFilter creates a new Perception Compressor filter. -func NewPerceptionCompressFilter() *PerceptionCompressFilter { - return &PerceptionCompressFilter{ - windowSize: 3, - threshold: 0.75, - minLineLen: 15, - } -} - -// Name returns the filter name. -func (f *PerceptionCompressFilter) Name() string { return "25_perception_compress" } - -// Apply removes perceptually redundant lines. -func (f *PerceptionCompressFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - thresh := f.threshold - if mode == ModeAggressive { - thresh = 0.60 // more aggressive overlap threshold - } - - lines := strings.Split(input, "\n") - if len(lines) < f.windowSize*2+2 { - return input, 0 - } - - // Build term sets per line - termSets := make([]map[string]bool, len(lines)) - for i, line := range lines { - termSets[i] = pcTermSet(line) - } - - keep := make([]bool, len(lines)) - // Always keep first and last lines - keep[0] = true - keep[len(lines)-1] = true - - for i := 1; i < len(lines)-1; i++ { - line := lines[i] - trimmed := strings.TrimSpace(line) - - // Never drop short lines, empty lines, or structural lines - if len(trimmed) < f.minLineLen || trimmed == "" { - keep[i] = true - continue - } - if isErrorLine(line) || isWarningLine(line) || isHeadingLine(line) { - keep[i] = true - continue - } - - own := termSets[i] - if len(own) == 0 { - keep[i] = true - continue - } - - // Build neighbor term set from window - neighbors := make(map[string]bool) - lo := i - f.windowSize - if lo < 0 { - lo = 0 - } - hi := i + f.windowSize - if hi >= len(lines) { - hi = len(lines) - 1 - } - for j := lo; j <= hi; j++ { - if j == i { - continue - } - for t := range termSets[j] { - neighbors[t] = true - } - } - - // Compute overlap: fraction of own terms already in neighbors - covered := 0 - for t := range own { - if neighbors[t] { - covered++ - } - } - overlap := float64(covered) / float64(len(own)) - keep[i] = overlap < thresh - } - - var result []string - for i, line := range lines { - if keep[i] { - result = append(result, line) - } - } - if len(result) == 0 { - return input, 0 - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// pcTermSet builds a term set for a line (lowercase alphabetic tokens ≥ 3 chars). -func pcTermSet(line string) map[string]bool { - set := make(map[string]bool) - var word strings.Builder - for _, ch := range strings.ToLower(line) { - if (ch >= 'a' && ch <= 'z') || ch == '_' { - word.WriteRune(ch) - } else if word.Len() > 0 { - if w := word.String(); len(w) >= 3 { - set[w] = true - } - word.Reset() - } - } - if word.Len() >= 3 { - set[word.String()] = true - } - return set -} diff --git a/internal/filter/pipeline_runtime.go b/internal/filter/pipeline_runtime.go index 75143608b..05e771bac 100644 --- a/internal/filter/pipeline_runtime.go +++ b/internal/filter/pipeline_runtime.go @@ -64,46 +64,6 @@ func ToFilterPipelineConfig(c config.PipelineConfig, opts PipelineRuntimeOptions ExtractiveTailLines: c.ExtractiveTailLines, ExtractiveSignalLines: c.ExtractiveSignalLines, EnableQualityGuardrail: c.EnableQualityGuardrail, - EnablePlannedLayers: c.EnablePlannedLayers, - EnableDiffAdapt: c.EnableDiffAdapt, - EnableEPiC: c.EnableEPiC, - EnableSSDP: c.EnableSSDP, - EnableAgentOCR: c.EnableAgentOCR, - EnableS2MAD: c.EnableS2MAD, - EnableACON: c.EnableACON, - EnableLatentCollab: c.EnableLatentCollab, - EnableGraphCoT: c.EnableGraphCoT, - EnableRoleBudget: c.EnableRoleBudget, - EnableSWEAdaptive: c.EnableSWEAdaptive, - EnableAgentOCRHist: c.EnableAgentOCRHist, - EnablePlanBudget: c.EnablePlanBudget, - EnableLightMem: c.EnableLightMem, - EnablePathShorten: c.EnablePathShorten, - EnableJSONSampler: c.EnableJSONSampler, - EnableContextCrunch: c.EnableContextCrunch, - EnableSearchCrunch: c.EnableSearchCrunch, - EnableStructColl: c.EnableStructColl, - } - - if c.EnableResearchPack { - cfg.EnableDiffAdapt = true - cfg.EnableEPiC = true - cfg.EnableSSDP = true - cfg.EnableAgentOCR = true - cfg.EnableS2MAD = true - cfg.EnableACON = true - cfg.EnableLatentCollab = true - cfg.EnableGraphCoT = true - cfg.EnableRoleBudget = true - cfg.EnableSWEAdaptive = true - cfg.EnableAgentOCRHist = true - cfg.EnablePlanBudget = true - cfg.EnableLightMem = true - cfg.EnablePathShorten = true - cfg.EnableJSONSampler = true - cfg.EnableContextCrunch = true - cfg.EnableSearchCrunch = true - cfg.EnableStructColl = true } if c.DefaultBudget > 0 && c.LazyBudgetRatio > 0 { diff --git a/internal/filter/pipeline_runtime_test.go b/internal/filter/pipeline_runtime_test.go index e84d9a0d7..d762fdf34 100644 --- a/internal/filter/pipeline_runtime_test.go +++ b/internal/filter/pipeline_runtime_test.go @@ -32,26 +32,6 @@ func TestToFilterPipelineConfigMapsKeyFields(t *testing.T) { CompactionPreserveTurns: 7, CompactionMaxTokens: 456, }, - ResearchLayersConfig: config.ResearchLayersConfig{ - EnableDiffAdapt: true, - EnableEPiC: true, - EnableSSDP: true, - EnableAgentOCR: true, - EnableS2MAD: true, - EnableACON: true, - EnableLatentCollab: true, - EnableGraphCoT: true, - EnableRoleBudget: true, - EnableSWEAdaptive: true, - EnableAgentOCRHist: true, - EnablePlanBudget: true, - EnableLightMem: true, - EnablePathShorten: true, - EnableJSONSampler: true, - EnableContextCrunch: true, - EnableSearchCrunch: true, - EnableStructColl: true, - }, } runtime := ToFilterPipelineConfig(cfg, PipelineRuntimeOptions{ @@ -97,12 +77,4 @@ func TestToFilterPipelineConfigMapsKeyFields(t *testing.T) { if runtime.AgentConsolidationMax != 42 { t.Fatalf("AgentConsolidationMax = %d, want 42", runtime.AgentConsolidationMax) } - if !runtime.EnableDiffAdapt || !runtime.EnableEPiC || !runtime.EnableSSDP || !runtime.EnableAgentOCR || - !runtime.EnableS2MAD || !runtime.EnableACON || !runtime.EnableLatentCollab || - !runtime.EnableGraphCoT || !runtime.EnableRoleBudget || !runtime.EnableSWEAdaptive || - !runtime.EnableAgentOCRHist || !runtime.EnablePlanBudget || !runtime.EnableLightMem || - !runtime.EnablePathShorten || !runtime.EnableJSONSampler || !runtime.EnableContextCrunch || - !runtime.EnableSearchCrunch || !runtime.EnableStructColl { - t.Fatalf("research layer mapping failed: %+v", runtime) - } } diff --git a/internal/filter/pipeline_stats.go b/internal/filter/pipeline_stats.go index 7017d9ecf..07e661bb9 100644 --- a/internal/filter/pipeline_stats.go +++ b/internal/filter/pipeline_stats.go @@ -40,30 +40,19 @@ func (s *PipelineStats) String() string { // QuickProcess compresses input with default configuration func QuickProcess(input string, mode Mode) (string, int) { cfg := PipelineConfig{ - Mode: mode, - SessionTracking: true, - NgramEnabled: true, - EnableCompaction: true, - EnableAttribution: true, - EnableH2O: true, - EnableAttentionSink: true, - EnableMetaToken: true, - EnableSemanticChunk: true, - EnableSketchStore: true, - EnableLazyPruner: true, - EnableSemanticAnchor: true, - EnableAgentMemory: true, - EnableTFIDF: true, - EnableReasoningTrace: true, - EnableSymbolicCompress: true, - EnablePhraseGrouping: true, - EnableNumericalQuant: true, - EnableDynamicRatio: true, - EnableHypernym: true, - EnableSemanticCache: true, - EnableScope: true, - EnableSmallKV: true, - EnableKVzip: true, + Mode: mode, + SessionTracking: true, + NgramEnabled: true, + EnableCompaction: true, + EnableAttribution: true, + EnableH2O: true, + EnableAttentionSink: true, + EnableMetaToken: true, + EnableSemanticChunk: true, + EnableSketchStore: true, + EnableLazyPruner: true, + EnableSemanticAnchor: true, + EnableAgentMemory: true, } p := NewPipelineCoordinator(&cfg) output, stats := p.Process(input) diff --git a/internal/filter/pipeline_toml.go b/internal/filter/pipeline_toml.go index f51cc94ea..be1c42e1c 100644 --- a/internal/filter/pipeline_toml.go +++ b/internal/filter/pipeline_toml.go @@ -104,11 +104,7 @@ func tomlToPipelineConfig(t TOMLPipelineConfig) PipelineConfig { pc.EnableSemanticAnchor = l.SemanticAnchor pc.EnableAgentMemory = l.AgentMemory pc.EnableLLMLinguaProse = l.LLMLinguaProse - pc.EnableTFIDF = l.TFIDF - pc.EnableSymbolicCompress = l.Symbolic - pc.EnablePhraseGrouping = l.PhraseGroup - pc.EnableNumericalQuant = l.Numerical - pc.EnableDynamicRatio = l.DynamicRatio + return pc } diff --git a/internal/filter/pipeline_types.go b/internal/filter/pipeline_types.go index 7844d3a5b..ba79b3593 100644 --- a/internal/filter/pipeline_types.go +++ b/internal/filter/pipeline_types.go @@ -296,8 +296,6 @@ type PipelineConfigWithNestedLayers struct { EnableAdaptiveLearning bool // Enable adaptive learning (merged EngramLearner + TieredSummary) EnableCrunchBench bool // Enable comprehensive benchmarking LayerGateAllowExperimental []string - EnablePlannedLayers bool - // Layer 0: QuantumLock (KV-cache alignment) EnableQuantumLock bool @@ -378,76 +376,19 @@ type PipelineConfigWithNestedLayers struct { AgentConsolidationMax int // Adaptive layers - EnableQuestionAware bool QuestionAwareThreshold float64 - EnableDensityAdaptive bool DensityTargetRatio float64 DensityThreshold float64 // TF-IDF - EnableTFIDF bool TFIDFThreshold float64 // Reasoning trace - EnableReasoningTrace bool - MaxReflectionLoops int + MaxReflectionLoops int // Phase 1: NEW filters - EnableSymbolicCompress bool - EnablePhraseGrouping bool - EnableNumericalQuant bool - DecimalPlaces int - EnableDynamicRatio bool - DynamicRatioBase float64 - - // Phase 2: Advanced filters - EnableHypernym bool - EnableSemanticCache bool - EnableScope bool - EnableSmallKV bool - EnableKVzip bool - - // 2026 Research layers - EnableSWEzze bool - EnableMixedDim bool - EnableBEAVER bool - EnablePoC bool - EnableTokenQuant bool - EnableTokenRetention bool - EnableACON bool - - // Layers 21-25: new 2025/2026 research filters - EnableMarginalInfoGain bool - EnableNearDedup bool - EnableCoTCompress bool - EnableCodingAgentCtx bool - EnablePerceptionCompress bool - - // Layers 26-30: reasoning + agent filters - EnableLightThinker bool - EnableThinkSwitcher bool - EnableGMSA bool - EnableCARL bool - EnableSlimInfer bool - - // Layers 31-45: adaptive reasoning + trajectory filters - EnableDiffAdapt bool - EnableEPiC bool - EnableSSDP bool - EnableAgentOCR bool - EnableS2MAD bool - EnableLatentCollab bool - EnableGraphCoT bool - EnableRoleBudget bool - EnableSWEAdaptive bool - EnableAgentOCRHist bool - EnablePlanBudget bool - EnableLightMem bool - EnablePathShorten bool - EnableJSONSampler bool - EnableContextCrunch bool // Merged LogCrunch + DiffCrunch - EnableSearchCrunch bool - EnableStructColl bool + DecimalPlaces int + DynamicRatioBase float64 // Unified experimental layers (L14-L16) EnableEdgeCase bool // L14: merges L21-L25 @@ -487,7 +428,7 @@ func (cfg *PipelineConfig) HasExplicitSettings() bool { return cfg.Budget > 0 || cfg.QueryIntent != "" || cfg.LLMEnabled || cfg.NgramEnabled || cfg.MultiFileEnabled || cfg.SessionTracking || cfg.EnableCompaction || cfg.EnableAttribution || cfg.EnableH2O || cfg.EnableAttentionSink || - cfg.EnableAdaptiveLearning || cfg.EnableContextCrunch + cfg.EnableAdaptiveLearning } // PipelineStats holds statistics from the compression pipeline diff --git a/internal/filter/plan_budget.go b/internal/filter/plan_budget.go deleted file mode 100644 index de20ff25b..000000000 --- a/internal/filter/plan_budget.go +++ /dev/null @@ -1,138 +0,0 @@ -package filter - -import ( - "math" - "sort" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// PlanBudgetFilter applies dynamic test-time budget allocation based on input difficulty. -type PlanBudgetFilter struct{} - -// NewPlanBudgetFilter creates the dynamic budget controller filter. -func NewPlanBudgetFilter() *PlanBudgetFilter { return &PlanBudgetFilter{} } - -// Name returns the filter name. -func (f *PlanBudgetFilter) Name() string { return "42_plan_budget" } - -// Apply computes difficulty and keeps a matching budgeted subset of lines. -func (f *PlanBudgetFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - if len(lines) < 8 { - return input, 0 - } - - difficulty := planBudgetDifficulty(lines) - keepRatio := planBudgetKeepRatio(difficulty, mode) - target := int(math.Ceil(float64(len(lines)) * keepRatio)) - if target < 3 { - target = 3 - } - - type cand struct { - idx int - score float64 - } - cands := make([]cand, 0, len(lines)) - keep := make(map[int]bool, target) - termFreq := daTermFrequency(lines) - for i, line := range lines { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - continue - } - if i == 0 || i == len(lines)-1 || isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { - keep[i] = true - continue - } - score := daLineScore(line, termFreq, len(lines)) - if isReasoningLine(line) || epicIsCausalEdge(line) { - score += 0.8 - } - if strings.Contains(strings.ToLower(line), "plan") || strings.Contains(strings.ToLower(line), "budget") { - score += 0.6 - } - cands = append(cands, cand{idx: i, score: score}) - } - sort.Slice(cands, func(i, j int) bool { return cands[i].score > cands[j].score }) - for _, c := range cands { - if len(keep) >= target { - break - } - keep[c.idx] = true - } - - out := make([]string, 0, len(keep)+1) - out = append(out, "[plan-budget: difficulty="+planBudgetBucket(difficulty)+", keep="+itoa(int(keepRatio*100))+"%]") - for i, line := range lines { - if keep[i] { - out = append(out, line) - } - } - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func planBudgetDifficulty(lines []string) float64 { - score := 0.0 - for _, line := range lines { - l := strings.ToLower(strings.TrimSpace(line)) - if l == "" { - continue - } - if isErrorLine(line) || isWarningLine(line) { - score += 1.4 - } - if isCodeLine(line) { - score += 1.0 - } - if isReasoningLine(line) || epicIsCausalEdge(line) { - score += 0.8 - } - if strings.Contains(l, "stack") || strings.Contains(l, "trace") || strings.Contains(l, "migration") { - score += 0.6 - } - } - norm := score / float64(len(lines)) - if norm > 1.0 { - return 1.0 - } - if norm < 0.0 { - return 0.0 - } - return norm -} - -func planBudgetKeepRatio(difficulty float64, mode Mode) float64 { - ratio := 0.35 + difficulty*0.30 - if mode == ModeAggressive { - ratio -= 0.1 - } - if ratio < 0.30 { - ratio = 0.30 - } - if ratio > 0.85 { - ratio = 0.85 - } - return ratio -} - -func planBudgetBucket(difficulty float64) string { - switch { - case difficulty < 0.35: - return "easy" - case difficulty < 0.65: - return "medium" - default: - return "hard" - } -} diff --git a/internal/filter/presets.go b/internal/filter/presets.go index c68abf90f..943efa32e 100644 --- a/internal/filter/presets.go +++ b/internal/filter/presets.go @@ -45,8 +45,6 @@ func TierConfig(tier Tier, baseMode Mode) PipelineConfig { cfg.EnableEntropy = true cfg.EnableGoalDriven = true cfg.EnableH2O = true - cfg.EnableNumericalQuant = true - case TierTrim: cfg.EnableEntropy = true cfg.EnablePerplexity = true @@ -58,9 +56,6 @@ func TierConfig(tier Tier, baseMode Mode) PipelineConfig { cfg.EnableH2O = true cfg.EnableAttentionSink = true cfg.EnableMetaToken = true - cfg.EnableNumericalQuant = true - cfg.EnableDynamicRatio = true - case TierExtract: cfg.EnableEntropy = true cfg.EnablePerplexity = true @@ -80,32 +75,6 @@ func TierConfig(tier Tier, baseMode Mode) PipelineConfig { cfg.EnableLazyPruner = true cfg.EnableSemanticAnchor = true cfg.EnableAgentMemory = true - cfg.EnableSymbolicCompress = true - cfg.EnablePhraseGrouping = true - cfg.EnableNumericalQuant = true - cfg.EnableDynamicRatio = true - cfg.EnableHypernym = true - cfg.EnableSemanticCache = true - cfg.EnableKVzip = true - cfg.EnableDiffAdapt = true - cfg.EnableEPiC = true - cfg.EnableSSDP = true - cfg.EnableAgentOCR = true - cfg.EnableS2MAD = true - cfg.EnableACON = true - cfg.EnableLatentCollab = true - cfg.EnableGraphCoT = true - cfg.EnableRoleBudget = true - cfg.EnableSWEAdaptive = true - cfg.EnableAgentOCRHist = true - cfg.EnablePlanBudget = true - cfg.EnableLightMem = true - cfg.EnablePathShorten = true - cfg.EnableJSONSampler = true - cfg.EnableContextCrunch = true - cfg.EnableSearchCrunch = true - cfg.EnableStructColl = true - case TierCore: cfg.EnableEntropy = true cfg.EnablePerplexity = true @@ -126,36 +95,12 @@ func TierConfig(tier Tier, baseMode Mode) PipelineConfig { cfg.EnableLazyPruner = true cfg.EnableSemanticAnchor = true cfg.EnableAgentMemory = true - cfg.EnableQuestionAware = false - cfg.EnableDensityAdaptive = false - cfg.EnableSymbolicCompress = false - cfg.EnablePhraseGrouping = false - cfg.EnableNumericalQuant = false - cfg.EnableDynamicRatio = false - cfg.EnableHypernym = false - cfg.EnableSemanticCache = false - cfg.EnableScope = false - cfg.EnableSmallKV = false - cfg.EnableKVzip = false - cfg.EnableSWEzze = false - cfg.EnableMixedDim = false - cfg.EnableBEAVER = false - cfg.EnablePoC = false - cfg.EnableTokenQuant = false - cfg.EnableTokenRetention = false - cfg.EnableACON = false - cfg.EnablePlannedLayers = false - case TierCode: cfg.EnableEntropy = true cfg.EnableAST = true cfg.EnableGoalDriven = true cfg.EnableH2O = true cfg.EnableMetaToken = true - cfg.EnableSymbolicCompress = true - cfg.EnableNumericalQuant = true - cfg.EnableSWEzze = true - case TierLog: cfg.EnableEntropy = true cfg.EnablePerplexity = true @@ -163,8 +108,6 @@ func TierConfig(tier Tier, baseMode Mode) PipelineConfig { cfg.EnableH2O = true cfg.EnableAttribution = true cfg.EnableSketchStore = true - cfg.EnableNumericalQuant = true - case TierThread: cfg.EnableEntropy = true cfg.EnableCompaction = true @@ -182,8 +125,7 @@ func TierConfig(tier Tier, baseMode Mode) PipelineConfig { cfg.EnableAttribution = true cfg.EnableH2O = true cfg.EnableAttentionSink = true - cfg.EnableQuestionAware = true - cfg.EnableDynamicRatio = true + cfg.EnablePolicyRouter = true cfg.EnableExtractivePrefilter = true cfg.EnableQualityGuardrail = true @@ -191,24 +133,6 @@ func TierConfig(tier Tier, baseMode Mode) PipelineConfig { cfg.ExtractiveHeadLines = 80 cfg.ExtractiveTailLines = 60 cfg.ExtractiveSignalLines = 120 - cfg.EnableDiffAdapt = true - cfg.EnableEPiC = true - cfg.EnableSSDP = true - cfg.EnableAgentOCR = true - cfg.EnableS2MAD = true - cfg.EnableACON = true - cfg.EnableLatentCollab = true - cfg.EnableGraphCoT = true - cfg.EnableRoleBudget = true - cfg.EnableSWEAdaptive = true - cfg.EnableAgentOCRHist = true - cfg.EnablePlanBudget = true - cfg.EnableLightMem = true - cfg.EnablePathShorten = true - cfg.EnableJSONSampler = true - cfg.EnableContextCrunch = true - cfg.EnableSearchCrunch = true - cfg.EnableStructColl = true } return cfg diff --git a/internal/filter/reasoning_route_compress.go b/internal/filter/reasoning_route_compress.go deleted file mode 100644 index 617aeab6e..000000000 --- a/internal/filter/reasoning_route_compress.go +++ /dev/null @@ -1,258 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Papers: -// -// "ThinkSwitcher: When to Think Hard, When to Think Fast" — EMNLP 2025 -// "Thinkless: LLM Learns When to Think" — NeurIPS 2025 (VainF/Thinkless) -// -// ThinkSwitcherFilter is a meta-routing filter: it measures the "reasoning -// density" of an output (fraction of lines that look like deliberate -// reasoning) and routes to the appropriate compression level. -// -// Three paths: -// -// fast — reasoning density < fastThreshold: no reasoning detected, -// pass through unchanged (avoids wasted CPU on direct answers) -// light — fastThreshold ≤ density < heavyThreshold: some reasoning, -// compress to 50% of reasoning lines keeping key sentences -// heavy — density ≥ heavyThreshold: heavy reasoning present, -// collapse to a one-line summary per reasoning block -// -// Key insight from ThinkSwitcher: the majority of LLM outputs need no CoT -// compression at all. Running compression unconditionally wastes resources -// and can degrade quality by removing relevant content from direct answers. -type ThinkSwitcherFilter struct { - fastThreshold float64 // below this → fast path - heavyThreshold float64 // at or above this → heavy compression -} - -// NewThinkSwitcherFilter creates a new ThinkSwitcher routing filter. -func NewThinkSwitcherFilter() *ThinkSwitcherFilter { - return &ThinkSwitcherFilter{ - fastThreshold: 0.12, - heavyThreshold: 0.35, - } -} - -// Name returns the filter name. -func (f *ThinkSwitcherFilter) Name() string { return "27_think_switcher" } - -// Apply routes compression based on detected reasoning density. -func (f *ThinkSwitcherFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 6 { - return input, 0 - } - - density := f.reasoningDensity(lines) - - if density < f.fastThreshold { - // Fast path: no meaningful reasoning present - return input, 0 - } - - if mode == ModeAggressive || density >= f.heavyThreshold { - return f.heavyCompress(input, lines) - } - - return f.lightCompress(input, lines) -} - -// reasoningDensity returns the fraction of lines that look like deliberate reasoning. -func (f *ThinkSwitcherFilter) reasoningDensity(lines []string) float64 { - if len(lines) == 0 { - return 0 - } - count := 0 - for _, line := range lines { - if isReasoningLine(line) { - count++ - } - } - return float64(count) / float64(len(lines)) -} - -// isReasoningLine returns true for lines that look like deliberate reasoning steps. -func isReasoningLine(line string) bool { - lower := strings.ToLower(strings.TrimSpace(line)) - if lower == "" { - return false - } - for _, prefix := range []string{ - "step ", "first,", "second,", "third,", "fourth,", "fifth,", - "finally,", "lastly,", "next,", "now,", "then,", - "let me ", "i need to ", "i should ", "i will ", "i can ", - "to do this,", "therefore,", "thus,", "hence,", "so,", - "consider", "analyze", "check", "verify", "note that", - "wait,", "actually,", "hmm,", "on second thought", - "the reason", "because", "since", "given that", - } { - if strings.HasPrefix(lower, prefix) { - return true - } - } - // Numbered list items ("1. ", "2. ", etc.) - if len(lower) > 2 && lower[0] >= '1' && lower[0] <= '9' && (lower[1] == '.' || lower[1] == ')') { - return true - } - return false -} - -// lightCompress retains ~50% of reasoning lines, keeping the most informative ones. -func (f *ThinkSwitcherFilter) lightCompress(input string, lines []string) (string, int) { - type tsScoredLine struct { - idx int - score float64 - } - - // Score reasoning lines by term density - termFreq := tsTermFreq(lines) - var reasoningScored []tsScoredLine - for i, line := range lines { - if !isReasoningLine(line) { - continue - } - score := tsLineScore(line, termFreq) - reasoningScored = append(reasoningScored, tsScoredLine{idx: i, score: score}) - } - - // Keep top 50% of reasoning lines - keep := len(reasoningScored) / 2 - if keep < 1 { - keep = 1 - } - - // Sort by score descending, mark top-keep as retained - for i := 1; i < len(reasoningScored); i++ { - for j := i; j > 0 && reasoningScored[j].score > reasoningScored[j-1].score; j-- { - reasoningScored[j], reasoningScored[j-1] = reasoningScored[j-1], reasoningScored[j] - } - } - retained := make(map[int]bool) - for _, s := range reasoningScored[:keep] { - retained[s.idx] = true - } - - var result []string - for i, line := range lines { - if isReasoningLine(line) && !retained[i] { - continue - } - result = append(result, line) - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// heavyCompress collapses each contiguous reasoning block to one summary line. -func (f *ThinkSwitcherFilter) heavyCompress(input string, lines []string) (string, int) { - type block struct{ start, end int } - var blocks []block - inBlock := false - start := 0 - - for i, line := range lines { - if isReasoningLine(line) { - if !inBlock { - inBlock = true - start = i - } - } else { - if inBlock { - if i-start >= 3 { - blocks = append(blocks, block{start, i - 1}) - } - inBlock = false - } - } - } - if inBlock && len(lines)-start >= 3 { - blocks = append(blocks, block{start, len(lines) - 1}) - } - - if len(blocks) == 0 { - return f.lightCompress(input, lines) - } - - suppress := make(map[int]bool) - annotation := make(map[int]string) - termFreq := tsTermFreq(lines) - - for _, b := range blocks { - body := lines[b.start : b.end+1] - toks := core.EstimateTokens(strings.Join(body, "\n")) - // Pick best representative line - best := body[0] - bestScore := -1.0 - for _, line := range body { - if s := tsLineScore(line, termFreq); s > bestScore { - bestScore = s - best = line - } - } - annotation[b.start] = best + " [reasoning: ~" + itoa(toks) + " tok compressed]" - for i := b.start + 1; i <= b.end; i++ { - suppress[i] = true - } - suppress[b.start] = true // replaced by annotation - } - - var result []string - for i, line := range lines { - if suppress[i] { - if ann, ok := annotation[i]; ok { - result = append(result, ann) - } - continue - } - result = append(result, line) - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -// -- helpers -- - -func tsTermFreq(lines []string) map[string]int { - freq := make(map[string]int) - for _, line := range lines { - for _, t := range ltTokenize(line) { - freq[t]++ - } - } - return freq -} - -func tsLineScore(line string, freq map[string]int) float64 { - terms := ltTokenize(line) - if len(terms) == 0 { - return 0 - } - score := 0.0 - for _, t := range terms { - if f := freq[t]; f > 0 { - score += 1.0 / float64(f) - } - } - return score / float64(len(terms)) -} diff --git a/internal/filter/reasoning_step_compress.go b/internal/filter/reasoning_step_compress.go deleted file mode 100644 index 010baa92c..000000000 --- a/internal/filter/reasoning_step_compress.go +++ /dev/null @@ -1,234 +0,0 @@ -package filter - -import ( - "regexp" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "LightThinker: Thinking Step-by-Step Compression" -// EMNLP 2025 — Zhang et al., Zhejiang University -// https://arxiv.org/abs/2502.15589 -// -// LightThinkerFilter compresses reasoning output at step granularity. -// Unlike CoTCompressFilter which truncates whole blocks, LightThinker -// retains all steps but skeletonises each one to its single most -// informative sentence — a sketch of each reasoning step. -// -// Algorithm: -// 1. Segment input into reasoning steps (numbered/labeled sequences) -// 2. For each step with ≥ minStepLines, extract the "key sentence": -// the line with the highest unique-term density relative to the step -// 3. Replace the step body with: step header + key sentence + stub -// 4. Pass non-step lines through unchanged -// -// Key insight: keeping one sentence per step (the conclusion/observation) -// preserves the logical trajectory while cutting 60-80% of step content. -type LightThinkerFilter struct { - stepHeaderRe *regexp.Regexp - ordinalRe *regexp.Regexp - minStepLines int // minimum body lines before compressing a step -} - -// NewLightThinkerFilter creates a new LightThinker step-level compressor. -func NewLightThinkerFilter() *LightThinkerFilter { - return &LightThinkerFilter{ - stepHeaderRe: regexp.MustCompile(`(?i)^(step\s+\d+[:.)]|\d+[.)\s]\s+)`), - ordinalRe: regexp.MustCompile(`(?i)^(first[,:]?|second[,:]?|third[,:]?|fourth[,:]?|fifth[,:]?|finally[,:]?|lastly[,:]?|next[,:]?)`), - minStepLines: 3, - } -} - -// Name returns the filter name. -func (f *LightThinkerFilter) Name() string { return "26_lightthinker" } - -// Apply skeletonises reasoning steps to one key sentence each. -func (f *LightThinkerFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < f.minStepLines*2 { - return input, 0 - } - - steps := f.segmentSteps(lines) - if len(steps) == 0 { - return input, 0 - } - - // Mark lines to replace - type replacement struct { - startBody int - endBody int - keySent string - dropped int - } - var replacements []replacement - - for _, s := range steps { - body := lines[s.bodyStart : s.bodyEnd+1] - if len(body) < f.minStepLines { - continue - } - key := f.extractKeySentence(body) - if key == "" || key == body[0] { - continue - } - replacements = append(replacements, replacement{ - startBody: s.bodyStart, - endBody: s.bodyEnd, - keySent: key, - dropped: len(body) - 1, - }) - } - - if len(replacements) == 0 { - return input, 0 - } - - suppress := make(map[int]bool) - inject := make(map[int]string) // after this line, inject replacement - - for _, r := range replacements { - // Keep first line of body (often contains key info in first sentence) - // Suppress body[1..end], inject key sentence + stub after body[0] - for i := r.startBody + 1; i <= r.endBody; i++ { - suppress[i] = true - } - if r.dropped > 1 { - inject[r.startBody] = r.keySent + "\n[~" + itoa(r.dropped) + " step lines compressed]" - } else { - inject[r.startBody] = r.keySent - } - } - - var result []string - for i, line := range lines { - if suppress[i] { - continue - } - result = append(result, line) - if inj, ok := inject[i]; ok { - result = append(result, inj) - } - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -type stepSpan struct { - headerLine int - bodyStart int - bodyEnd int -} - -// segmentSteps identifies numbered/ordinal reasoning step sequences. -func (f *LightThinkerFilter) segmentSteps(lines []string) []stepSpan { - var steps []stepSpan - i := 0 - for i < len(lines) { - trimmed := strings.TrimSpace(lines[i]) - if f.stepHeaderRe.MatchString(trimmed) || f.ordinalRe.MatchString(trimmed) { - // Find end of step body (next step header or blank line after content) - bodyStart := i + 1 - j := bodyStart - for j < len(lines) { - t := strings.TrimSpace(lines[j]) - if f.stepHeaderRe.MatchString(t) || f.ordinalRe.MatchString(t) { - break - } - if t == "" && j > bodyStart+1 { - // blank line terminates step body - break - } - j++ - } - bodyEnd := j - 1 - // Skip trailing blank lines - for bodyEnd > bodyStart && strings.TrimSpace(lines[bodyEnd]) == "" { - bodyEnd-- - } - if bodyEnd >= bodyStart { - steps = append(steps, stepSpan{ - headerLine: i, - bodyStart: bodyStart, - bodyEnd: bodyEnd, - }) - } - i = j - } else { - i++ - } - } - return steps -} - -// extractKeySentence returns the most informative line from a step body. -// "Most informative" = highest ratio of unique terms to line length. -func (f *LightThinkerFilter) extractKeySentence(lines []string) string { - if len(lines) == 0 { - return "" - } - - // Build term frequency across the step - termFreq := make(map[string]int) - for _, line := range lines { - for _, t := range ltTokenize(line) { - termFreq[t]++ - } - } - - // Score each line: sum of (1/freq) for each term it contains — rare terms score higher - bestScore := -1.0 - bestLine := lines[0] - for _, line := range lines { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - continue - } - terms := ltTokenize(line) - if len(terms) == 0 { - continue - } - score := 0.0 - for _, t := range terms { - if f := termFreq[t]; f > 0 { - score += 1.0 / float64(f) - } - } - score /= float64(len(terms)) // normalise by line length - if score > bestScore { - bestScore = score - bestLine = trimmed - } - } - return bestLine -} - -// ltTokenize splits a line into lowercase tokens ≥ 3 chars. -func ltTokenize(line string) []string { - var terms []string - var word strings.Builder - for _, ch := range strings.ToLower(line) { - if (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '_' { - word.WriteRune(ch) - } else if word.Len() > 0 { - if w := word.String(); len(w) >= 3 { - terms = append(terms, w) - } - word.Reset() - } - } - if word.Len() >= 3 { - terms = append(terms, word.String()) - } - return terms -} diff --git a/internal/filter/role_budget_compress.go b/internal/filter/role_budget_compress.go deleted file mode 100644 index b4bf6c6be..000000000 --- a/internal/filter/role_budget_compress.go +++ /dev/null @@ -1,237 +0,0 @@ -package filter - -import ( - "math" - "sort" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// RoleBudgetFilter allocates compression budget by multi-agent role. -type RoleBudgetFilter struct { - targetRatio float64 -} - -// NewRoleBudgetFilter creates a role-aware budget filter. -func NewRoleBudgetFilter() *RoleBudgetFilter { - return &RoleBudgetFilter{targetRatio: 0.60} -} - -// Name returns the filter name. -func (f *RoleBudgetFilter) Name() string { return "39_role_budget" } - -// Apply keeps more lines from high-priority roles (executor/planner) and trims low-value roles. -func (f *RoleBudgetFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - turns := parseRoleTurns(lines) - if len(turns) < 2 { - return input, 0 - } - - targetRatio := f.targetRatio - if mode == ModeAggressive { - targetRatio = 0.45 - } - targetTotal := int(math.Ceil(float64(len(lines)) * targetRatio)) - if targetTotal < 4 { - targetTotal = 4 - } - - totalWeight := 0.0 - weighted := make([]float64, len(turns)) - for i, t := range turns { - w := rolePriorityWeight(t.role) * float64(t.end-t.start+1) - weighted[i] = w - totalWeight += w - } - if totalWeight <= 0 { - return input, 0 - } - - keep := make(map[int]bool, targetTotal) - for i, t := range turns { - quota := int(math.Ceil(weighted[i] / totalWeight * float64(targetTotal))) - if quota < 1 { - quota = 1 - } - roleKeepLines(lines, t.start, t.end, quota, keep) - } - pruneToTarget(lines, targetTotal, keep) - - var out []string - for i, line := range lines { - if keep[i] { - out = append(out, line) - } - } - if len(out) >= len(lines) { - return input, 0 - } - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -type roleTurn struct { - start int - end int - role string -} - -func parseRoleTurns(lines []string) []roleTurn { - headerIdx := make([]int, 0, 8) - roles := make([]string, 0, 8) - for i, line := range lines { - if role, ok := detectRoleHeader(line); ok { - headerIdx = append(headerIdx, i) - roles = append(roles, role) - } - } - if len(headerIdx) < 2 { - return nil - } - turns := make([]roleTurn, 0, len(headerIdx)) - for i := range headerIdx { - end := len(lines) - 1 - if i+1 < len(headerIdx) { - end = headerIdx[i+1] - 1 - } - turns = append(turns, roleTurn{start: headerIdx[i], end: end, role: roles[i]}) - } - return turns -} - -func detectRoleHeader(line string) (string, bool) { - lower := strings.ToLower(strings.TrimSpace(line)) - rolePrefixes := []struct { - role string - prefix string - }{ - {"user", "user:"}, - {"assistant", "assistant:"}, - {"planner", "planner:"}, - {"critic", "critic:"}, - {"executor", "executor:"}, - {"reviewer", "reviewer:"}, - {"agent", "agent:"}, - {"tool", "tool:"}, - {"system", "system:"}, - } - for _, rp := range rolePrefixes { - if strings.HasPrefix(lower, rp.prefix) { - return rp.role, true - } - } - return "", false -} - -func rolePriorityWeight(role string) float64 { - switch role { - case "executor": - return 1.35 - case "planner": - return 1.25 - case "critic", "reviewer": - return 1.0 - case "assistant", "agent": - return 0.9 - case "tool": - return 0.7 - default: - return 1.0 - } -} - -func roleKeepLines(lines []string, start, end, quota int, keep map[int]bool) { - type cand struct { - idx int - score float64 - } - cands := make([]cand, 0, end-start+1) - for i := start; i <= end; i++ { - line := lines[i] - if i == start || isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { - keep[i] = true - continue - } - score := 0.0 - if isReasoningLine(line) || epicIsCausalEdge(line) { - score += 1.0 - } - score += float64(len(ltTokenize(line))) / 10.0 - cands = append(cands, cand{idx: i, score: score}) - } - sort.Slice(cands, func(i, j int) bool { return cands[i].score > cands[j].score }) - for _, c := range cands { - if quota <= 0 { - break - } - if !keep[c.idx] { - keep[c.idx] = true - quota-- - } - } -} - -func pruneToTarget(lines []string, target int, keep map[int]bool) { - if len(keep) <= target { - return - } - - isRequired := func(i int) bool { - if _, ok := detectRoleHeader(lines[i]); ok { - return true - } - return isErrorLine(lines[i]) || isWarningLine(lines[i]) || isCodeLine(lines[i]) - } - - type cand struct { - idx int - score float64 - } - cands := make([]cand, 0, len(keep)) - for i := range keep { - if isRequired(i) { - continue - } - score := 0.0 - if isReasoningLine(lines[i]) || epicIsCausalEdge(lines[i]) { - score += 1.0 - } - score += float64(len(ltTokenize(lines[i]))) / 10.0 - cands = append(cands, cand{idx: i, score: score}) - } - - sort.Slice(cands, func(i, j int) bool { return cands[i].score < cands[j].score }) - for _, c := range cands { - if len(keep) <= target { - break - } - delete(keep, c.idx) - } -} - -func jaccardOverlap(a, b map[string]bool) float64 { - if len(a) == 0 || len(b) == 0 { - return 0 - } - inter := 0 - union := make(map[string]bool, len(a)+len(b)) - for k := range a { - union[k] = true - if b[k] { - inter++ - } - } - for k := range b { - union[k] = true - } - return float64(inter) / float64(len(union)) -} diff --git a/internal/filter/search_result_dedup.go b/internal/filter/search_result_dedup.go deleted file mode 100644 index 7e7f0e96f..000000000 --- a/internal/filter/search_result_dedup.go +++ /dev/null @@ -1,166 +0,0 @@ -package filter - -import ( - "regexp" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// SearchCrunchFilter deduplicates repeated search result lines and keeps top unique hits. -type SearchCrunchFilter struct{} - -func NewSearchCrunchFilter() *SearchCrunchFilter { return &SearchCrunchFilter{} } - -func (f *SearchCrunchFilter) Name() string { return "47_search_crunch" } - -var searchPrefixPattern = regexp.MustCompile(`^\s*\d+[\.|\)]\s+`) - -type searchResult struct { - rank int - snippet string - hash uint64 -} - -func (f *SearchCrunchFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - if len(lines) < 12 { - return input, 0 - } - - results := parseSearchResults(lines) - if len(results) == 0 { - // Fallback to simple dedup - return f.simpleDedup(lines, mode, input) - } - - // Deduplicate by snippet similarity - deduplicated := deduplicateBySnippet(results) - - maxResults := 60 - if mode == ModeAggressive { - maxResults = 35 - } - - kept := deduplicated - if len(kept) > maxResults { - kept = kept[:maxResults] - } - - if len(kept) == len(results) { - return input, 0 - } - - out := formatSearchResults(kept) - out = append(out, "[search-crunch: duplicate hits pruned]") - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func (f *SearchCrunchFilter) simpleDedup(lines []string, mode Mode, input string) (string, int) { - seen := map[string]bool{} - out := make([]string, 0, len(lines)) - changed := false - maxUnique := 60 - if mode == ModeAggressive { - maxUnique = 35 - } - unique := 0 - for _, line := range lines { - trim := strings.TrimSpace(line) - if trim == "" { - continue - } - if isErrorLine(line) || isWarningLine(line) { - out = append(out, line) - continue - } - norm := searchPrefixPattern.ReplaceAllString(strings.ToLower(trim), "") - norm = strings.Join(strings.Fields(norm), " ") - if seen[norm] { - changed = true - continue - } - seen[norm] = true - if unique >= maxUnique { - changed = true - continue - } - unique++ - out = append(out, line) - } - - if !changed { - return input, 0 - } - out = append(out, "[search-crunch: duplicate hits pruned]") - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func parseSearchResults(lines []string) []searchResult { - results := []searchResult{} - rank := 0 - - for _, line := range lines { - trim := strings.TrimSpace(line) - if trim == "" { - continue - } - - // Simple heuristic: lines with URLs or numbered prefixes - if strings.Contains(line, "http") || searchPrefixPattern.MatchString(line) { - rank++ - snippet := searchPrefixPattern.ReplaceAllString(trim, "") - hash := SimHash(snippet) - results = append(results, searchResult{ - rank: rank, - snippet: snippet, - hash: hash, - }) - } - } - - return results -} - -func deduplicateBySnippet(results []searchResult) []searchResult { - if len(results) == 0 { - return results - } - - kept := []searchResult{results[0]} - for i := 1; i < len(results); i++ { - isDuplicate := false - for _, prev := range kept { - if HammingDistance(results[i].hash, prev.hash) <= 3 { - isDuplicate = true - break - } - } - if !isDuplicate { - kept = append(kept, results[i]) - } - } - - return kept -} - -func formatSearchResults(results []searchResult) []string { - out := make([]string, 0, len(results)) - for i, result := range results { - out = append(out, string(rune(i+49))+". "+result.snippet) - } - return out -} diff --git a/internal/filter/structural_collapse.go b/internal/filter/structural_collapse.go deleted file mode 100644 index e2046e9a0..000000000 --- a/internal/filter/structural_collapse.go +++ /dev/null @@ -1,62 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// StructuralCollapseFilter compacts repetitive structural boilerplate while preserving semantic anchors. -type StructuralCollapseFilter struct{} - -func NewStructuralCollapseFilter() *StructuralCollapseFilter { return &StructuralCollapseFilter{} } - -func (f *StructuralCollapseFilter) Name() string { return "49_structural_collapse" } - -func (f *StructuralCollapseFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - lines := strings.Split(input, "\n") - if len(lines) < 16 { - return input, 0 - } - - seen := map[string]int{} - out := make([]string, 0, len(lines)) - changed := false - for _, line := range lines { - trim := strings.TrimSpace(line) - if trim == "" { - continue - } - if isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { - out = append(out, line) - continue - } - norm := strings.ToLower(strings.Join(strings.Fields(trim), " ")) - if strings.HasPrefix(norm, "import ") || strings.HasPrefix(norm, "from ") || strings.HasPrefix(norm, "package ") || strings.HasPrefix(norm, "module ") || strings.HasPrefix(norm, "section ") || strings.HasPrefix(norm, "###") { - seen[norm]++ - limit := 1 - if mode == ModeMinimal { - limit = 2 - } - if seen[norm] > limit { - changed = true - continue - } - } - out = append(out, line) - } - - if !changed { - return input, 0 - } - out = append(out, "[structural-collapse: repeated boilerplate pruned]") - output := strings.Join(out, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} diff --git a/internal/filter/swe_adaptive_loop.go b/internal/filter/swe_adaptive_loop.go deleted file mode 100644 index efba0f53a..000000000 --- a/internal/filter/swe_adaptive_loop.go +++ /dev/null @@ -1,125 +0,0 @@ -package filter - -import ( - "math" - "sort" - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// SWEAdaptiveLoopFilter adds a lightweight self-adaptive prune loop inspired by -// SWE-Pruner style iterative scoring and retention feedback. -type SWEAdaptiveLoopFilter struct { - passesMinimal int - passesAggressive int - baseKeepRatio float64 -} - -// NewSWEAdaptiveLoopFilter creates the adaptive loop filter. -func NewSWEAdaptiveLoopFilter() *SWEAdaptiveLoopFilter { - return &SWEAdaptiveLoopFilter{ - passesMinimal: 2, - passesAggressive: 3, - baseKeepRatio: 0.80, - } -} - -// Name returns the filter name. -func (f *SWEAdaptiveLoopFilter) Name() string { return "40_swe_adaptive_loop" } - -// Apply runs a small iterative pruning loop with progressively tighter budgets. -func (f *SWEAdaptiveLoopFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - if len(lines) < 10 { - return input, 0 - } - - passes := f.passesMinimal - if mode == ModeAggressive { - passes = f.passesAggressive - } - - current := lines - for pass := 0; pass < passes; pass++ { - ratio := f.baseKeepRatio - float64(pass)*0.12 - if mode == ModeAggressive { - ratio -= 0.08 - } - if ratio < 0.35 { - ratio = 0.35 - } - next := swePrunePass(current, ratio) - if len(next) == len(current) { - break - } - current = next - } - - output := strings.Join(current, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -func swePrunePass(lines []string, keepRatio float64) []string { - if len(lines) == 0 { - return lines - } - target := int(math.Ceil(float64(len(lines)) * keepRatio)) - if target < 4 { - target = 4 - } - - type cand struct { - idx int - score float64 - } - cands := make([]cand, 0, len(lines)) - keep := make(map[int]bool, target) - - termFreq := daTermFrequency(lines) - for i, line := range lines { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - continue - } - if i == 0 || i == len(lines)-1 || isErrorLine(line) || isWarningLine(line) || isCodeLine(line) { - keep[i] = true - continue - } - score := daLineScore(line, termFreq, len(lines)) - if isReasoningLine(line) || epicIsCausalEdge(line) { - score += 1.0 - } - if _, ok := detectRoleHeader(line); ok { - score += 0.6 - } - cands = append(cands, cand{idx: i, score: score}) - } - - sort.Slice(cands, func(i, j int) bool { return cands[i].score > cands[j].score }) - for _, c := range cands { - if len(keep) >= target { - break - } - keep[c.idx] = true - } - - out := make([]string, 0, len(keep)) - for i, line := range lines { - if keep[i] { - out = append(out, line) - } - } - if len(out) == 0 { - return lines - } - return out -} diff --git a/internal/filter/tests_81_100_stub.go b/internal/filter/tests_81_100_stub.go deleted file mode 100644 index dd2764a42..000000000 --- a/internal/filter/tests_81_100_stub.go +++ /dev/null @@ -1,4 +0,0 @@ -package filter - -// Test stubs - full implementation in separate test files -// Tests 81-100 cover all layers with proper interface implementations diff --git a/internal/filter/tier_config.go b/internal/filter/tier_config.go index a888c14a1..fe7a60362 100644 --- a/internal/filter/tier_config.go +++ b/internal/filter/tier_config.go @@ -231,36 +231,8 @@ func enableTierLayers(cfg *PipelineConfig, tier AutoTier) { cfg.EnableAgentMemory = true case AutoTierAdvanced: - cfg.EnableMarginalInfoGain = true - cfg.EnableNearDedup = true - cfg.EnableCoTCompress = true - cfg.EnableCodingAgentCtx = true - cfg.EnablePerceptionCompress = true - cfg.EnableLightThinker = true - cfg.EnableThinkSwitcher = true - cfg.EnableGMSA = true - cfg.EnableCARL = true - cfg.EnableSlimInfer = true - cfg.EnableDiffAdapt = true - cfg.EnableEPiC = true - cfg.EnableSSDP = true - cfg.EnableAgentOCR = true - cfg.EnableS2MAD = true - cfg.EnableACON = true - cfg.EnableLatentCollab = true - cfg.EnableGraphCoT = true - cfg.EnableRoleBudget = true - cfg.EnableSWEAdaptive = true case AutoTierSpecialized: - cfg.EnableAgentOCRHist = true - cfg.EnablePlanBudget = true - cfg.EnableLightMem = true - cfg.EnablePathShorten = true - cfg.EnableJSONSampler = true - cfg.EnableContextCrunch = true - cfg.EnableSearchCrunch = true - cfg.EnableStructColl = true cfg.EnableAdaptiveLearning = true } } diff --git a/internal/filter/tree_search_diverge_prune.go b/internal/filter/tree_search_diverge_prune.go deleted file mode 100644 index 4d6c961ae..000000000 --- a/internal/filter/tree_search_diverge_prune.go +++ /dev/null @@ -1,189 +0,0 @@ -package filter - -import ( - "strings" - - "github.com/GrayCodeAI/tok/internal/core" -) - -// Paper: "SSDP / Chopping Trees: Pruning Tree-of-Thought Branches for Efficient LLM Inference" -// NeurIPSW 2025 -// -// SSDPFilter detects branching tree-of-thought (ToT) structures in text and prunes -// redundant or divergent branches, keeping only the most informative path. -// -// Branch detection: sequences starting with markers like: -// - "Option A:", "Option B:", "Alternative:", "Approach 1:", "Approach 2:" -// - "Case 1:", "Case 2:", "Path A:", "Scenario A:" -// -// Pruning strategy: -// 1. Similarity pruning: if two branches share >60% vocabulary, drop the shorter one. -// 2. Divergence pruning: if a branch's content strongly contradicts the final -// conclusion (detected by negation+key-term overlap), drop it. -// 3. In aggressive mode: keep only the branch with the highest anchor score -// (error/heading density) — the branch most likely to be the final answer. -type SSDPFilter struct { - simThreshold float64 // vocabulary overlap threshold for similarity pruning -} - -// NewSSDPFilter creates a new SSDP tree-of-thought branch pruner. -func NewSSDPFilter() *SSDPFilter { - return &SSDPFilter{ - simThreshold: 0.60, - } -} - -// Name returns the filter name. -func (f *SSDPFilter) Name() string { return "33_ssdp" } - -// Apply detects ToT branch blocks and prunes redundant ones. -func (f *SSDPFilter) Apply(input string, mode Mode) (string, int) { - if mode == ModeNone { - return input, 0 - } - - lines := strings.Split(input, "\n") - - branches := f.detectBranches(lines) - if len(branches) < 2 { - return input, 0 - } - - simThresh := f.simThreshold - if mode == ModeAggressive { - simThresh = 0.45 - } - - suppress := make(map[int]bool) - - if mode == ModeAggressive && len(branches) > 1 { - // Keep only highest-anchor branch - bestIdx := 0 - bestScore := -1.0 - for i, b := range branches { - score := ssdpBranchAnchorScore(lines[b.start : b.end+1]) - if score > bestScore { - bestScore = score - bestIdx = i - } - } - for i, b := range branches { - if i != bestIdx { - for j := b.start; j <= b.end; j++ { - suppress[j] = true - } - } - } - } else { - // Similarity pruning: suppress shorter of similar-pair branches - for i := 0; i < len(branches); i++ { - if suppress[branches[i].start] { - continue - } - for j := i + 1; j < len(branches); j++ { - if suppress[branches[j].start] { - continue - } - termA := ssdpBranchTerms(lines[branches[i].start : branches[i].end+1]) - termB := ssdpBranchTerms(lines[branches[j].start : branches[j].end+1]) - if gmsaOverlap(termA, termB) >= simThresh { - // Suppress the shorter branch - lenA := branches[i].end - branches[i].start - lenB := branches[j].end - branches[j].start - victim := j - if lenA < lenB { - victim = i - } - for k := branches[victim].start; k <= branches[victim].end; k++ { - suppress[k] = true - } - } - } - } - } - - if len(suppress) == 0 { - return input, 0 - } - - var result []string - for i, line := range lines { - if !suppress[i] { - result = append(result, line) - } - } - - output := strings.Join(result, "\n") - saved := core.EstimateTokens(input) - core.EstimateTokens(output) - if saved < 0 { - saved = 0 - } - return output, saved -} - -type ssdpBranch struct{ start, end int } - -// branchMarkers are the header patterns that signal a ToT branch. -var ssdpBranchMarkers = []string{ - "option a", "option b", "option c", "option d", - "approach 1", "approach 2", "approach 3", - "alternative 1", "alternative 2", - "case 1", "case 2", "case 3", - "path a", "path b", - "scenario a", "scenario b", - "method 1", "method 2", - "solution 1", "solution 2", -} - -// detectBranches finds branch-header lines and extends each branch to the next header or end. -func (f *SSDPFilter) detectBranches(lines []string) []ssdpBranch { - var headers []int - for i, line := range lines { - lower := strings.ToLower(strings.TrimSpace(line)) - for _, marker := range ssdpBranchMarkers { - if strings.HasPrefix(lower, marker) { - headers = append(headers, i) - break - } - } - } - - if len(headers) < 2 { - return nil - } - - var branches []ssdpBranch - for k := 0; k < len(headers); k++ { - end := len(lines) - 1 - if k+1 < len(headers) { - end = headers[k+1] - 1 - } - branches = append(branches, ssdpBranch{headers[k], end}) - } - return branches -} - -func ssdpBranchTerms(lines []string) map[string]bool { - set := make(map[string]bool) - for _, line := range lines { - for _, t := range ltTokenize(line) { - set[t] = true - } - } - return set -} - -func ssdpBranchAnchorScore(lines []string) float64 { - if len(lines) == 0 { - return 0 - } - score := 0.0 - for _, line := range lines { - if isErrorLine(line) || isWarningLine(line) { - score += 2.0 - } else if isHeadingLine(line) { - score += 1.0 - } - } - return score / float64(len(lines)) -}