Skip to content
8 changes: 6 additions & 2 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
{
"name": "Python 3",
"image": "mcr.microsoft.com/devcontainers/python:3.13-bullseye",
"image": "mcr.microsoft.com/devcontainers/python:3.14-bookworm",
"features": {
"ghcr.io/va-h/devcontainers-features/uv:1": {},
"ghcr.io/devcontainers/features/azure-cli:1.2.8": {}
"ghcr.io/devcontainers/features/docker-in-docker:3": {},
"ghcr.io/devcontainers/features/azure-cli:1.2.9": {},
"ghcr.io/devcontainers/features/copilot-cli:1": {}
},
"postCreateCommand": "bash ./devsetup.sh",
"workspaceFolder": "/workspaces/agent-framework/python/",
"customizations": {
"vscode": {
"extensions": [
"GitHub.copilot",
"GitHub.vscode-github-actions",
"ms-python.python",
"ms-windows-ai-studio.windows-ai-studio",
"littlefoxteam.vscode-python-test-adapter"
Expand Down
8 changes: 8 additions & 0 deletions python/packages/core/agent_framework/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
CharacterEstimatorTokenizer,
CompactionProvider,
CompactionStrategy,
ContextWindowCompactionStrategy,
SelectiveToolCallCompactionStrategy,
SlidingWindowStrategy,
SummarizationStrategy,
Expand Down Expand Up @@ -79,6 +80,10 @@
tool_calls_present,
)
from ._feature_stage import ExperimentalFeature, ReleaseCandidateFeature
from ._harness._agent import (
DEFAULT_HARNESS_INSTRUCTIONS,
create_harness_agent,
)
from ._harness._memory import (
DEFAULT_MEMORY_SOURCE_ID,
MemoryContextProvider,
Expand Down Expand Up @@ -297,6 +302,7 @@
"AGENT_FRAMEWORK_USER_AGENT",
"APP_INFO",
"COMPACTION_STATE_KEY",
"DEFAULT_HARNESS_INSTRUCTIONS",
"DEFAULT_MAX_ITERATIONS",
"DEFAULT_MEMORY_SOURCE_ID",
"DEFAULT_MODE_SOURCE_ID",
Expand Down Expand Up @@ -352,6 +358,7 @@
"CompactionStrategy",
"Content",
"ContextProvider",
"ContextWindowCompactionStrategy",
"ContinuationToken",
"ConversationSplit",
"ConversationSplitter",
Expand Down Expand Up @@ -499,6 +506,7 @@
"apply_compaction",
"chat_middleware",
"create_edge_runner",
"create_harness_agent",
"detect_media_type_from_base64",
"evaluate_agent",
"evaluate_workflow",
Expand Down
116 changes: 116 additions & 0 deletions python/packages/core/agent_framework/_compaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,121 @@ async def after_run(
# whether excluded messages are loaded on the next turn.


class ContextWindowCompactionStrategy:
"""Token-budget compaction derived from a model's context window size.

Computes an input budget from the model's context window and output token
limits, then applies a two-phase compaction pipeline:

1. **Tool result eviction** — collapses older tool-call groups into summaries
when included tokens exceed ``tool_eviction_threshold`` of the input budget.
2. **Truncation** — removes oldest non-system groups when included tokens
exceed ``truncation_threshold`` of the input budget.

The class uses two independent :class:`TokenBudgetComposedStrategy`
instances — one per phase — so each fires only when its own threshold
is exceeded.

Examples:
.. code-block:: python

from agent_framework import ContextWindowCompactionStrategy, CompactionProvider

strategy = ContextWindowCompactionStrategy(
max_context_window_tokens=128_000,
max_output_tokens=16_384,
)
provider = CompactionProvider(before_strategy=strategy)
"""

DEFAULT_TOOL_EVICTION_THRESHOLD: float = 0.5
"""Default fraction of input budget at which tool result eviction triggers."""

DEFAULT_TRUNCATION_THRESHOLD: float = 0.8
"""Default fraction of input budget at which truncation triggers."""

def __init__(
self,
*,
max_context_window_tokens: int,
max_output_tokens: int,
tokenizer: TokenizerProtocol | None = None,
tool_eviction_threshold: float = DEFAULT_TOOL_EVICTION_THRESHOLD,
truncation_threshold: float = DEFAULT_TRUNCATION_THRESHOLD,
keep_last_tool_call_groups: int = 4,
) -> None:
"""Create a context-window compaction strategy.

Keyword Args:
max_context_window_tokens: The model's maximum context window size
in tokens (e.g. 128,000).
max_output_tokens: The model's maximum output tokens per response
(e.g. 16,384).
tokenizer: Token counter for measuring message sizes. Defaults to
:class:`CharacterEstimatorTokenizer` (4 chars/token heuristic).
tool_eviction_threshold: Fraction of input budget (0.0, 1.0] at
which tool result eviction triggers. Defaults to 0.5.
truncation_threshold: Fraction of input budget (0.0, 1.0] at which
truncation triggers. Must be ≥ ``tool_eviction_threshold``.
Defaults to 0.8.
keep_last_tool_call_groups: Number of most recent tool-call groups
to retain verbatim during tool eviction. Older groups are
collapsed into summaries. Defaults to 4.

Raises:
ValueError: If thresholds are out of range or inconsistent.
"""
if max_context_window_tokens <= 0:
raise ValueError("max_context_window_tokens must be positive.")
if max_output_tokens < 0 or max_output_tokens >= max_context_window_tokens:
raise ValueError("max_output_tokens must be >= 0 and < max_context_window_tokens.")
if not (0.0 < tool_eviction_threshold <= 1.0):
raise ValueError("tool_eviction_threshold must be in (0.0, 1.0].")
if not (0.0 < truncation_threshold <= 1.0):
raise ValueError("truncation_threshold must be in (0.0, 1.0].")
if truncation_threshold < tool_eviction_threshold:
raise ValueError("truncation_threshold must be >= tool_eviction_threshold.")

resolved_tokenizer = tokenizer or CharacterEstimatorTokenizer()
input_budget = max_context_window_tokens - max_output_tokens
tool_eviction_tokens = int(input_budget * tool_eviction_threshold)
truncation_tokens = int(input_budget * truncation_threshold)

self.max_context_window_tokens = max_context_window_tokens
self.max_output_tokens = max_output_tokens
self.input_budget_tokens = input_budget
self.tool_eviction_threshold = tool_eviction_threshold
self.truncation_threshold = truncation_threshold

self._tool_eviction = TokenBudgetComposedStrategy(
token_budget=tool_eviction_tokens,
tokenizer=resolved_tokenizer,
strategies=[
ToolResultCompactionStrategy(keep_last_tool_call_groups=keep_last_tool_call_groups),
],
)
self._truncation = TokenBudgetComposedStrategy(
token_budget=truncation_tokens,
tokenizer=resolved_tokenizer,
strategies=[
TruncationStrategy(
max_n=truncation_tokens,
compact_to=tool_eviction_tokens,
tokenizer=resolved_tokenizer,
),
],
)

async def __call__(self, messages: list[Message]) -> bool:
"""Apply the two-phase compaction pipeline.

Returns:
True if compaction changed message inclusion; otherwise False.
"""
changed = await self._tool_eviction(messages)
return (await self._truncation(messages)) or changed


__all__ = [
"COMPACTION_STATE_KEY",
"EXCLUDED_KEY",
Expand All @@ -1293,6 +1408,7 @@ async def after_run(
"CharacterEstimatorTokenizer",
"CompactionProvider",
"CompactionStrategy",
"ContextWindowCompactionStrategy",
"GroupKind",
"SelectiveToolCallCompactionStrategy",
"SlidingWindowStrategy",
Expand Down
Loading
Loading