Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The format is intentionally lightweight and human-readable. Group entries by rel
- Added an opt-in `auto_update` policy block plus `foundrygate-auto-update` so controlled deployments can gate helper-driven updates without enabling silent self-updates
- Added `GET /api/operator-events` plus operator-event metrics for update checks and helper-driven auto-update attempts
- Added dashboard cards and tables for operator-side update checks and apply attempts
- Added provider-health rollout guardrails so helper-driven auto-updates can block when gateway health is already degraded

## v0.6.0 - 2026-03-12

Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,8 @@ Supported fields in `auto_update`:

- `enabled`
- `allow_major`
- `require_healthy_providers`
- `max_unhealthy_providers`
- `apply_command`

Example:
Expand All @@ -551,6 +553,8 @@ Example:
auto_update:
enabled: true
allow_major: false
require_healthy_providers: true
max_unhealthy_providers: 0
apply_command: "foundrygate-update"
```

Expand All @@ -559,6 +563,7 @@ What the current runtime does with it:
- exposes eligibility in `GET /api/update` under `auto_update`
- shows the same state in the dashboard
- lets `foundrygate-auto-update --apply` run only when the current release state is eligible
- can block helper-driven rollout when provider health is already degraded

What it still does not do:

Expand Down
2 changes: 2 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,8 @@ update_check:
auto_update:
enabled: false
allow_major: false
require_healthy_providers: true
max_unhealthy_providers: 0
apply_command: "foundrygate-update"


Expand Down
1 change: 1 addition & 0 deletions docs/PUBLISHING.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ If you want scheduled update application:

- keep `auto_update.enabled: true` explicit in `config.yaml`
- keep `allow_major: false` unless you are ready to absorb breaking changes automatically
- keep `require_healthy_providers: true` unless you are intentionally allowing rollouts while the gateway is degraded
- prefer the reviewed examples in [examples/foundrygate-auto-update.service](./examples/foundrygate-auto-update.service) and [examples/foundrygate-auto-update.timer](./examples/foundrygate-auto-update.timer)
- use the cron example in [examples/foundrygate-auto-update.cron](./examples/foundrygate-auto-update.cron) only when `systemd` timers are not practical

Expand Down
2 changes: 2 additions & 0 deletions docs/TROUBLESHOOTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,6 @@ If `foundrygate-auto-update --apply` refuses to run, inspect the `auto_update` b

- `auto_update.enabled: false`
- the latest release is a major upgrade while `allow_major: false`
- one or more providers are unhealthy while `require_healthy_providers: true`
- the number of unhealthy providers exceeds `max_unhealthy_providers`
- the release lookup itself is unavailable
14 changes: 14 additions & 0 deletions foundrygate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,16 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]:
if not isinstance(allow_major, bool):
raise ConfigError("'auto_update.allow_major' must be a boolean")

require_healthy_providers = raw.get("require_healthy_providers", True)
if not isinstance(require_healthy_providers, bool):
raise ConfigError("'auto_update.require_healthy_providers' must be a boolean")

max_unhealthy_providers = raw.get("max_unhealthy_providers", 0)
if isinstance(max_unhealthy_providers, bool) or not isinstance(max_unhealthy_providers, int):
raise ConfigError("'auto_update.max_unhealthy_providers' must be a non-negative integer")
if max_unhealthy_providers < 0:
raise ConfigError("'auto_update.max_unhealthy_providers' must be non-negative")

apply_command = raw.get("apply_command", "foundrygate-update")
if not isinstance(apply_command, str) or not apply_command.strip():
raise ConfigError("'auto_update.apply_command' must be a non-empty string")
Expand All @@ -900,6 +910,8 @@ def _normalize_auto_update(data: dict[str, Any]) -> dict[str, Any]:
normalized["auto_update"] = {
"enabled": enabled,
"allow_major": allow_major,
"require_healthy_providers": require_healthy_providers,
"max_unhealthy_providers": max_unhealthy_providers,
"apply_command": apply_command.strip(),
}
return normalized
Expand Down Expand Up @@ -989,6 +1001,8 @@ def auto_update(self) -> dict:
{
"enabled": False,
"allow_major": False,
"require_healthy_providers": True,
"max_unhealthy_providers": 0,
"apply_command": "foundrygate-update",
},
)
Expand Down
26 changes: 18 additions & 8 deletions foundrygate/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from .metrics import MetricsStore, calc_cost
from .providers import ProviderBackend, ProviderError
from .router import Router, RoutingDecision
from .updates import UpdateChecker
from .updates import UpdateChecker, apply_auto_update_guardrails

logger = logging.getLogger("foundrygate")

Expand Down Expand Up @@ -248,6 +248,17 @@ def _build_capability_coverage() -> dict[str, dict[str, Any]]:
return dict(sorted(coverage.items()))


def _health_summary() -> dict[str, int]:
"""Return a compact provider-health summary for operator guardrails."""
providers_healthy = sum(1 for provider in _providers.values() if provider.health.healthy)
providers_unhealthy = sum(1 for provider in _providers.values() if not provider.health.healthy)
return {
"providers_total": len(_providers),
"providers_healthy": providers_healthy,
"providers_unhealthy": providers_unhealthy,
}


def _estimate_request_dimensions(body: dict[str, Any]) -> dict[str, int | str]:
"""Return lightweight request-dimension estimates for debugging and routing preview."""
messages = body.get("messages", [])
Expand Down Expand Up @@ -675,13 +686,7 @@ async def health():
}
return {
"status": "ok",
"summary": {
"providers_total": len(providers),
"providers_healthy": sum(1 for provider in providers.values() if provider["healthy"]),
"providers_unhealthy": sum(
1 for provider in providers.values() if not provider["healthy"]
),
},
"summary": _health_summary(),
"coverage": _build_capability_coverage(),
"providers": providers,
}
Expand Down Expand Up @@ -821,6 +826,11 @@ async def update_status(request: Request, force: bool = False):
"""Return cached or fresh release update metadata."""
headers = _collect_routing_headers(request)
status = await _update_checker.get_status(force=force)
status.auto_update = apply_auto_update_guardrails(
status.auto_update or {},
providers_healthy=_health_summary()["providers_healthy"],
providers_unhealthy=_health_summary()["providers_unhealthy"],
)
operator_action, client_tag = _collect_operator_context(headers)
auto_update = status.auto_update or {}
_metrics.log_operator_event(
Expand Down
40 changes: 40 additions & 0 deletions foundrygate/updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,38 @@ def alert_level_for_update(update_type: str, *, available: bool, status: str) ->
return "warning"


def apply_auto_update_guardrails(
auto_update: dict[str, Any],
*,
providers_healthy: int,
providers_unhealthy: int,
) -> dict[str, Any]:
"""Apply provider-health guardrails to one auto-update eligibility result."""
result = dict(auto_update or {})
if not result.get("enabled") or not result.get("eligible"):
return result

require_healthy_providers = bool(result.get("require_healthy_providers", True))
max_unhealthy_providers = int(result.get("max_unhealthy_providers", 0))

if not require_healthy_providers:
return result

if providers_healthy <= 0:
result["eligible"] = False
result["blocked_reason"] = "No healthy providers available"
return result

if providers_unhealthy > max_unhealthy_providers:
result["eligible"] = False
result["blocked_reason"] = (
f"Too many unhealthy providers ({providers_unhealthy} > {max_unhealthy_providers})"
)
return result

return result


@dataclass
class UpdateStatus:
"""Structured update-check result."""
Expand Down Expand Up @@ -133,6 +165,10 @@ def __init__(
self.auto_update = {
"enabled": bool((auto_update or {}).get("enabled", False)),
"allow_major": bool((auto_update or {}).get("allow_major", False)),
"require_healthy_providers": bool(
(auto_update or {}).get("require_healthy_providers", True)
),
"max_unhealthy_providers": int((auto_update or {}).get("max_unhealthy_providers", 0)),
"apply_command": str((auto_update or {}).get("apply_command", "foundrygate-update")),
}
self._cached = UpdateStatus(
Expand Down Expand Up @@ -187,6 +223,10 @@ def _auto_update_status(
"strategy": "script",
"allowed_update_types": allowed_types,
"allow_major": allow_major,
"require_healthy_providers": bool(
self.auto_update.get("require_healthy_providers", True)
),
"max_unhealthy_providers": int(self.auto_update.get("max_unhealthy_providers", 0)),
"eligible": eligible,
"blocked_reason": blocked_reason,
"apply_command": apply_command,
Expand Down
2 changes: 2 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,6 @@ def test_auto_update_defaults_are_exposed():
cfg = load_config(Path(__file__).parent.parent / "config.yaml")
assert cfg.auto_update["enabled"] is False
assert cfg.auto_update["allow_major"] is False
assert cfg.auto_update["require_healthy_providers"] is True
assert cfg.auto_update["max_unhealthy_providers"] == 0
assert cfg.auto_update["apply_command"] == "foundrygate-update"
51 changes: 51 additions & 0 deletions tests/test_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from foundrygate.updates import (
UpdateChecker,
alert_level_for_update,
apply_auto_update_guardrails,
classify_update,
is_update_available,
)
Expand Down Expand Up @@ -57,6 +58,56 @@ def test_alert_level_maps_update_type_and_status():
assert alert_level_for_update("unknown", available=False, status="unavailable") == "warning"


def test_auto_update_guardrails_block_when_too_many_providers_are_unhealthy():
guarded = apply_auto_update_guardrails(
{
"enabled": True,
"eligible": True,
"require_healthy_providers": True,
"max_unhealthy_providers": 0,
"blocked_reason": "",
},
providers_healthy=1,
providers_unhealthy=1,
)

assert guarded["eligible"] is False
assert guarded["blocked_reason"] == "Too many unhealthy providers (1 > 0)"


def test_auto_update_guardrails_allow_updates_when_health_budget_is_met():
guarded = apply_auto_update_guardrails(
{
"enabled": True,
"eligible": True,
"require_healthy_providers": True,
"max_unhealthy_providers": 1,
"blocked_reason": "",
},
providers_healthy=2,
providers_unhealthy=1,
)

assert guarded["eligible"] is True


def test_auto_update_guardrails_block_when_no_provider_is_healthy():
guarded = apply_auto_update_guardrails(
{
"enabled": True,
"eligible": True,
"require_healthy_providers": True,
"max_unhealthy_providers": 2,
"blocked_reason": "",
},
providers_healthy=0,
providers_unhealthy=2,
)

assert guarded["eligible"] is False
assert guarded["blocked_reason"] == "No healthy providers available"


@pytest.mark.asyncio
async def test_update_checker_reports_latest_release():
checker = UpdateChecker(
Expand Down
Loading