From 97f6617484befe68d7d5f8fee5013977d4df39a8 Mon Sep 17 00:00:00 2001 From: Dazlarus Date: Tue, 16 Jun 2026 15:41:34 -0500 Subject: [PATCH 1/2] test: add pytest test suite for rotator_library core modules Add the project's first test suite covering four self-contained modules in the resilience library: - request_sanitizer.sanitize_request_payload (14 tests) Parameter stripping for dimensions/thinking fields by model type - core.utils.normalize_usage_for_response (13 tests) Normalization of exclusive vs inclusive reasoning_tokens convention - session_tracking.SessionTracker (20 tests) Session inference via conversation fingerprints, tool call IDs, first-user-text matching, TTL pruning, and disk persistence - cooldown_manager.CooldownManager (11 tests) Per-provider cooldown tracking with async-safe access, including expiry, extension, and concurrent read/write scenarios Infrastructure added: - pytest.ini with asyncio auto mode - tests/conftest.py with sys.path setup for both standalone and package-qualified imports - .github/workflows/tests.yml CI workflow (Python 3.12/3.13 on Ubuntu/Windows) - CONTRIBUTING.md updated with testing instructions --- .github/workflows/tests.yml | 53 +++++++ CONTRIBUTING.md | 24 +++ pytest.ini | 9 ++ tests/conftest.py | 27 ++++ tests/test_cooldown_manager.py | 127 +++++++++++++++ tests/test_normalize_usage.py | 173 ++++++++++++++++++++ tests/test_request_sanitizer.py | 129 +++++++++++++++ tests/test_session_tracking.py | 272 ++++++++++++++++++++++++++++++++ 8 files changed, 814 insertions(+) create mode 100644 .github/workflows/tests.yml create mode 100644 pytest.ini create mode 100644 tests/conftest.py create mode 100644 tests/test_cooldown_manager.py create mode 100644 tests/test_normalize_usage.py create mode 100644 tests/test_request_sanitizer.py create mode 100644 tests/test_session_tracking.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 000000000..ee9c81e2a --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: LGPL-3.0-only +# Copyright (c) 2026 Mirrowel + +name: Tests + +on: + push: + branches: + - main + - dev + paths: + - 'src/rotator_library/**' + - 'tests/**' + - 'pytest.ini' + - 'requirements.txt' + pull_request: + paths: + - 'src/rotator_library/**' + - 'tests/**' + - 'pytest.ini' + - 'requirements.txt' + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ['3.12', '3.13'] + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + shell: bash + run: | + python -m pip install --upgrade pip + pip install pytest pytest-asyncio + # Install rotator_library dependencies (skip editable install line) + grep -v -- '-e src/rotator_library' requirements.txt > temp_requirements.txt + pip install -r temp_requirements.txt + pip install -e src/rotator_library + + - name: Run tests + shell: bash + run: pytest -v diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8ecee43a1..f59b1e3e7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,6 +60,29 @@ All new source files **must** include the appropriate license header: - Use type hints where practical - Keep functions focused and well-documented +### Running Tests + +The project uses `pytest` for testing. The test suite covers self-contained modules +in the resilience library (`rotator_library`). + +```bash +# Install test dependencies +pip install pytest pytest-asyncio + +# Run the full test suite +pytest + +# Run a specific test module +pytest tests/test_request_sanitizer.py + +# Run with verbose output +pytest -v +``` + +When adding new features or fixing bugs, include tests in the `tests/` directory. +Follow the existing test file naming convention (`test_*.py`) and use descriptive +class names to group related tests. + ## Pull Requests 1. Fork the repository and create a feature branch @@ -67,6 +90,7 @@ All new source files **must** include the appropriate license header: 3. Reference related issues in commits: `feat(providers): add X provider (#123)` 4. Open a PR with a clear description of what changed and why 5. Ensure your changes include necessary documentation updates +6. Add or update tests for any changed behavior ## Reporting Issues diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..e7209f300 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: LGPL-3.0-only +# Copyright (c) 2026 Mirrowel + +[pytest] +testpaths = tests +asyncio_mode = auto +python_files = test_*.py +python_classes = Test* +python_functions = test_* diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..bf4c010b4 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-3.0-only +# Copyright (c) 2026 Mirrowel + +""" +Pytest configuration and shared fixtures for the rotator_library test suite. + +This conftest adds the source directories to sys.path so that both +standalone modules (request_sanitizer, session_tracking, etc.) and +package-qualified modules (rotator_library.core.utils) can be imported +without requiring a full editable install of every dependency. +""" + +import sys +from pathlib import Path + +# Resolve the repository root (tests/ -> parent) +_REPO_ROOT = Path(__file__).resolve().parent.parent +_SRC_ROOT = _REPO_ROOT / "src" +_LIB_ROOT = _SRC_ROOT / "rotator_library" + +# Prepend source paths so that: +# - `import rotator_library.X` works (via _SRC_ROOT) +# - `from request_sanitizer import ...` works (via _LIB_ROOT) +for _path in (_SRC_ROOT, _LIB_ROOT): + _str = str(_path) + if _str not in sys.path: + sys.path.insert(0, _str) diff --git a/tests/test_cooldown_manager.py b/tests/test_cooldown_manager.py new file mode 100644 index 000000000..fa9cea39a --- /dev/null +++ b/tests/test_cooldown_manager.py @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: LGPL-3.0-only +# Copyright (c) 2026 Mirrowel + +""" +Tests for cooldown_manager.CooldownManager. + +CooldownManager tracks per-provider cooldown periods to handle +IP-based rate limiting. When a 429 is received, all requests to +that provider are paused for a configurable duration. +""" + +import asyncio +import time +from unittest.mock import patch + +import pytest + +from cooldown_manager import CooldownManager + + +class TestCooldownBasics: + """Basic functionality tests.""" + + @pytest.mark.asyncio + async def test_provider_not_cooling_down_initially(self): + cm = CooldownManager() + assert await cm.is_cooling_down("openai") is False + + @pytest.mark.asyncio + async def test_start_cooldown_makes_provider_cooling(self): + cm = CooldownManager() + await cm.start_cooldown("openai", 60) + assert await cm.is_cooling_down("openai") is True + + @pytest.mark.asyncio + async def test_different_providers_independent(self): + cm = CooldownManager() + await cm.start_cooldown("openai", 60) + assert await cm.is_cooling_down("openai") is True + assert await cm.is_cooling_down("anthropic") is False + + +class TestCooldownRemaining: + """Tests for remaining time calculations.""" + + @pytest.mark.asyncio + async def test_remaining_cooldown_positive(self): + cm = CooldownManager() + await cm.start_cooldown("openai", 100) + remaining = await cm.get_cooldown_remaining("openai") + # Should be close to 100 but slightly less due to execution time + assert 90 <= remaining <= 100 + + @pytest.mark.asyncio + async def test_remaining_cooldown_zero_if_not_cooling(self): + cm = CooldownManager() + remaining = await cm.get_cooldown_remaining("openai") + assert remaining == 0 + + @pytest.mark.asyncio + async def test_remaining_cooldown_after_expiry(self): + cm = CooldownManager() + await cm.start_cooldown("openai", 0) + # With duration=0, the cooldown is set to exactly now + # By the time we check, time has advanced + await asyncio.sleep(0.01) + remaining = await cm.get_cooldown_remaining("openai") + assert remaining == 0 + + @pytest.mark.asyncio + async def test_get_remaining_cooldown_alias(self): + """get_remaining_cooldown is a backward-compatible alias.""" + cm = CooldownManager() + await cm.start_cooldown("openai", 50) + r1 = await cm.get_cooldown_remaining("openai") + r2 = await cm.get_remaining_cooldown("openai") + assert abs(r1 - r2) < 1 # Should be nearly identical + + +class TestCooldownExpiry: + """Tests for cooldown expiration behavior.""" + + @pytest.mark.asyncio + async def test_cooldown_expires(self): + cm = CooldownManager() + await cm.start_cooldown("openai", 1) + assert await cm.is_cooling_down("openai") is True + await asyncio.sleep(1.1) + assert await cm.is_cooling_down("openai") is False + + @pytest.mark.asyncio + async def test_cooldown_extends_on_new_start(self): + cm = CooldownManager() + await cm.start_cooldown("openai", 5) + r1 = await cm.get_cooldown_remaining("openai") + + await cm.start_cooldown("openai", 100) + r2 = await cm.get_cooldown_remaining("openai") + + assert r2 > r1 + + +class TestCooldownConcurrency: + """Tests for concurrent access safety.""" + + @pytest.mark.asyncio + async def test_concurrent_start_and_check(self): + cm = CooldownManager() + # Multiple concurrent operations should not cause issues + await asyncio.gather( + cm.start_cooldown("openai", 10), + cm.start_cooldown("anthropic", 20), + cm.is_cooling_down("openai"), + cm.is_cooling_down("anthropic"), + cm.is_cooling_down("gemini"), + ) + assert await cm.is_cooling_down("openai") is True + assert await cm.is_cooling_down("anthropic") is True + assert await cm.is_cooling_down("gemini") is False + + @pytest.mark.asyncio + async def test_concurrent_reads(self): + cm = CooldownManager() + await cm.start_cooldown("openai", 10) + # Many concurrent reads + results = await asyncio.gather(*[cm.is_cooling_down("openai") for _ in range(100)]) + assert all(results) diff --git a/tests/test_normalize_usage.py b/tests/test_normalize_usage.py new file mode 100644 index 000000000..9bb21f326 --- /dev/null +++ b/tests/test_normalize_usage.py @@ -0,0 +1,173 @@ +# SPDX-License-Identifier: LGPL-3.0-only +# Copyright (c) 2026 Mirrowel + +""" +Tests for rotator_library.core.utils.normalize_usage_for_response. + +This function normalizes provider usage data so that reasoning_tokens +are inclusive of completion_tokens (the OpenAI convention), fixing +providers like Mistral that report them exclusively. +""" + +import pytest + +from rotator_library.core.utils import normalize_usage_for_response + + +# ============================================================================ +# Dict-based usage tests +# ============================================================================ + + +class TestNormalizeUsageDict: + """Tests using dict-style usage objects (as from streamed chunks).""" + + def test_no_change_when_no_reasoning_tokens(self): + usage = { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + } + normalize_usage_for_response(usage, "openai/gpt-4") + assert usage["completion_tokens"] == 50 + assert usage["total_tokens"] == 150 + + def test_no_change_when_reasoning_is_inclusive(self): + """When completion_tokens >= reasoning_tokens, the convention is already correct.""" + usage = { + "prompt_tokens": 100, + "completion_tokens": 80, + "total_tokens": 180, + "completion_tokens_details": {"reasoning_tokens": 30}, + } + normalize_usage_for_response(usage, "openai/o3") + assert usage["completion_tokens"] == 80 + assert usage["total_tokens"] == 180 + + def test_normalizes_exclusive_reasoning(self): + """When reasoning_tokens > completion_tokens, add them together.""" + usage = { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120, + "completion_tokens_details": {"reasoning_tokens": 50}, + } + normalize_usage_for_response(usage, "mistral/mistral-large") + assert usage["completion_tokens"] == 70 # 20 + 50 + assert usage["total_tokens"] == 170 # 100 + 70 + + def test_handles_missing_completion_tokens(self): + usage = { + "prompt_tokens": 100, + "total_tokens": 100, + "completion_tokens_details": {"reasoning_tokens": 30}, + } + normalize_usage_for_response(usage, "mistral/mistral-large") + assert usage["completion_tokens"] == 30 # 0 + 30 + assert usage["total_tokens"] == 130 # 100 + 30 + + def test_handles_missing_prompt_tokens(self): + usage = { + "completion_tokens": 10, + "total_tokens": 10, + "completion_tokens_details": {"reasoning_tokens": 40}, + } + normalize_usage_for_response(usage, "mistral/mistral-large") + assert usage["completion_tokens"] == 50 + assert usage["total_tokens"] == 50 # 0 + 50 + + def test_zero_reasoning_tokens_no_change(self): + usage = { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "completion_tokens_details": {"reasoning_tokens": 0}, + } + normalize_usage_for_response(usage, "openai/gpt-4") + assert usage["completion_tokens"] == 50 + + def test_none_usage_does_nothing(self): + """Passing None should be a no-op.""" + normalize_usage_for_response(None, "openai/gpt-4") # should not raise + + def test_empty_dict_does_nothing(self): + normalize_usage_for_response({}, "openai/gpt-4") # should not raise + # No assertions needed - just verifying it doesn't raise + + def test_details_without_reasoning_key(self): + """completion_tokens_details without reasoning_tokens should be a no-op.""" + usage = { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "completion_tokens_details": {}, + } + normalize_usage_for_response(usage, "openai/gpt-4") + assert usage["completion_tokens"] == 50 + + +# ============================================================================ +# Object/attribute-based usage tests +# ============================================================================ + + +class _MockUsageDetails: + """Minimal mock for a pydantic-style usage details object.""" + + def __init__(self, reasoning_tokens=0): + self.reasoning_tokens = reasoning_tokens + + +class _MockUsage: + """Minimal mock for a pydantic-style usage object.""" + + def __init__(self, prompt=0, completion=0, reasoning=0): + self.prompt_tokens = prompt + self.completion_tokens = completion + self.total_tokens = prompt + completion + self.completion_tokens_details = _MockUsageDetails(reasoning) + + +class TestNormalizeUsageObject: + """Tests using attribute-style usage objects (as from non-streamed responses).""" + + def test_no_change_when_no_reasoning(self): + usage = _MockUsage(prompt=100, completion=50) + normalize_usage_for_response(usage, "openai/gpt-4") + assert usage.completion_tokens == 50 + assert usage.total_tokens == 150 + + def test_normalizes_exclusive_reasoning(self): + usage = _MockUsage(prompt=100, completion=20, reasoning=50) + normalize_usage_for_response(usage, "mistral/mistral-large") + assert usage.completion_tokens == 70 + assert usage.total_tokens == 170 + + def test_no_change_when_inclusive(self): + usage = _MockUsage(prompt=100, completion=80, reasoning=30) + normalize_usage_for_response(usage, "openai/o3") + assert usage.completion_tokens == 80 + assert usage.total_tokens == 180 + + def test_object_with_none_details(self): + usage = _MockUsage(prompt=100, completion=50) + usage.completion_tokens_details = None + normalize_usage_for_response(usage, "openai/gpt-4") + assert usage.completion_tokens == 50 + + +# ============================================================================ +# Edge cases +# ============================================================================ + + +class TestNormalizeUsageEdgeCases: + def test_details_as_dict_on_object(self): + """An object with completion_tokens_details as a dict should also work.""" + usage = _MockUsage(prompt=100, completion=20) + usage.completion_tokens_details = {"reasoning_tokens": 50} + + normalize_usage_for_response(usage, "mistral/mistral-large") + + assert usage.completion_tokens == 70 + assert usage.total_tokens == 170 diff --git a/tests/test_request_sanitizer.py b/tests/test_request_sanitizer.py new file mode 100644 index 000000000..8ae6f5442 --- /dev/null +++ b/tests/test_request_sanitizer.py @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: LGPL-3.0-only +# Copyright (c) 2026 Mirrowel + +""" +Tests for request_sanitizer.sanitize_request_payload. + +This module is a pure function that strips unsupported parameters +from LLM request payloads based on the target model name. +""" + +import pytest + +from request_sanitizer import sanitize_request_payload + + +class TestSanitizeDimensions: + """Tests for the 'dimensions' parameter sanitization.""" + + def test_dimensions_removed_for_non_openai_embedding_model(self): + payload = { + "model": "anthropic/claude-3-opus", + "dimensions": 256, + } + result = sanitize_request_payload(payload, "anthropic/claude-3-opus") + assert "dimensions" not in result + + def test_dimensions_kept_for_openai_text_embedding_3(self): + payload = { + "model": "openai/text-embedding-3-small", + "dimensions": 256, + } + result = sanitize_request_payload(payload, "openai/text-embedding-3-small") + assert result["dimensions"] == 256 + + def test_dimensions_kept_for_openai_text_embedding_3_large(self): + payload = { + "dimensions": 1024, + } + result = sanitize_request_payload(payload, "openai/text-embedding-3-large") + assert result["dimensions"] == 1024 + + def test_dimensions_removed_for_gemini_model(self): + payload = { + "model": "gemini/gemini-2.5-pro", + "dimensions": 512, + } + result = sanitize_request_payload(payload, "gemini/gemini-2.5-pro") + assert "dimensions" not in result + + def test_no_dimensions_key_leaves_payload_unchanged(self): + payload = {"model": "openai/gpt-4", "messages": []} + result = sanitize_request_payload(payload, "openai/gpt-4") + assert result == payload + + +class TestSanitizeThinking: + """Tests for the 'thinking' parameter sanitization.""" + + _ENABLED_BUDGET_NEG1 = {"type": "enabled", "budget_tokens": -1} + + def test_thinking_removed_for_non_gemini_model(self): + payload = { + "model": "openai/gpt-4", + "thinking": self._ENABLED_BUDGET_NEG1, + } + result = sanitize_request_payload(payload, "openai/gpt-4") + assert "thinking" not in result + + def test_thinking_kept_for_gemini_25_pro(self): + payload = { + "thinking": self._ENABLED_BUDGET_NEG1, + } + result = sanitize_request_payload(payload, "gemini/gemini-2.5-pro") + assert result["thinking"] == self._ENABLED_BUDGET_NEG1 + + def test_thinking_kept_for_gemini_25_flash(self): + payload = { + "thinking": self._ENABLED_BUDGET_NEG1, + } + result = sanitize_request_payload(payload, "gemini/gemini-2.5-flash") + assert result["thinking"] == self._ENABLED_BUDGET_NEG1 + + def test_thinking_with_other_value_is_kept(self): + """Only the exact sentinel dict {type: enabled, budget_tokens: -1} is stripped.""" + thinking = {"type": "enabled", "budget_tokens": 1000} + payload = {"thinking": thinking} + result = sanitize_request_payload(payload, "openai/gpt-4") + assert result["thinking"] == thinking + + def test_thinking_disabled_is_kept(self): + thinking = {"type": "disabled"} + payload = {"thinking": thinking} + result = sanitize_request_payload(payload, "openai/gpt-4") + assert result["thinking"] == thinking + + +class TestSanitizeGeneralBehavior: + """General behavior tests.""" + + def test_empty_payload(self): + result = sanitize_request_payload({}, "openai/gpt-4") + assert result == {} + + def test_unrelated_keys_preserved(self): + payload = { + "model": "openai/gpt-4", + "messages": [{"role": "user", "content": "hello"}], + "temperature": 0.7, + "max_tokens": 100, + } + result = sanitize_request_payload(payload, "openai/gpt-4") + assert result == payload + + def test_returns_same_dict_object(self): + """The function should modify and return the same dict object (in-place).""" + payload = {"dimensions": 128} + result = sanitize_request_payload(payload, "some/model") + assert result is payload + + def test_both_dimensions_and_thinking_removed(self): + payload = { + "dimensions": 256, + "thinking": {"type": "enabled", "budget_tokens": -1}, + "messages": [], + } + result = sanitize_request_payload(payload, "anthropic/claude-3-opus") + assert "dimensions" not in result + assert "thinking" not in result + assert "messages" in result diff --git a/tests/test_session_tracking.py b/tests/test_session_tracking.py new file mode 100644 index 000000000..17fbcb8ef --- /dev/null +++ b/tests/test_session_tracking.py @@ -0,0 +1,272 @@ +# SPDX-License-Identifier: LGPL-3.0-only +# Copyright (c) 2026 Mirrowel + +""" +Tests for session_tracking.SessionTracker. + +SessionTracker infers stable session IDs from request payloads so that +sequential rotation can keep using the same credential for the same +conversation. + +These tests exercise: +- Explicit session/conversation IDs (strong fingerprints) +- Tool call ID matching (strong fingerprints) +- First-user-text matching (weak fingerprints) +- TTL expiry and pruning +- Multiple concurrent sessions +- Persistence (save/load) +""" + +import json +import time +from pathlib import Path +from unittest.mock import patch + +import pytest + +from session_tracking import SessionTracker + + +# ============================================================================ +# Explicit ID tests +# ============================================================================ + + +class TestExplicitSessionIds: + """Tests for explicit session/conversation/thread IDs.""" + + @pytest.mark.parametrize( + "key", + [ + "session_id", + "conversation_id", + "conversationId", + "thread_id", + "threadId", + "chat_id", + "chatId", + ], + ) + def test_explicit_id_creates_session(self, key): + tracker = SessionTracker() + request = {key: "abc-123", "messages": []} + sid = tracker.infer_session_id(request) + assert sid is not None + + def test_same_explicit_id_returns_same_session(self): + tracker = SessionTracker() + req = {"session_id": "conv-1", "messages": []} + sid1 = tracker.infer_session_id(req) + sid2 = tracker.infer_session_id(req) + assert sid1 == sid2 + + def test_different_explicit_ids_create_different_sessions(self): + tracker = SessionTracker() + sid1 = tracker.infer_session_id({"session_id": "conv-1", "messages": []}) + sid2 = tracker.infer_session_id({"session_id": "conv-2", "messages": []}) + assert sid1 != sid2 + + def test_empty_explicit_id_ignored(self): + tracker = SessionTracker() + # Empty string should be falsy → no explicit fingerprint + sid = tracker.infer_session_id({"session_id": "", "messages": []}) + # With no messages, there's nothing to fingerprint + assert sid is None + + +# ============================================================================ +# Conversation fingerprint tests +# ============================================================================ + + +class TestConversationFingerprints: + """Tests for message-based conversation fingerprinting.""" + + def test_same_conversation_returns_same_session(self): + tracker = SessionTracker() + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + ] + sid1 = tracker.infer_session_id({"messages": messages}) + sid2 = tracker.infer_session_id({"messages": messages}) + assert sid1 == sid2 + + def test_different_conversations_different_sessions(self): + tracker = SessionTracker() + msgs1 = [{"role": "user", "content": "What is Python?"}] + msgs2 = [{"role": "user", "content": "What is Rust?"}] + sid1 = tracker.infer_session_id({"messages": msgs1}) + sid2 = tracker.infer_session_id({"messages": msgs2}) + assert sid1 != sid2 + + def test_tool_call_ids_create_strong_fingerprint(self): + tracker = SessionTracker() + messages = [ + {"role": "user", "content": "Use the tool"}, + { + "role": "assistant", + "content": None, + "tool_calls": [{"id": "call_001", "function": {"name": "get_weather"}}], + }, + { + "role": "tool", + "tool_call_id": "call_001", + "content": "Sunny, 72F", + }, + ] + sid1 = tracker.infer_session_id({"messages": messages}) + sid2 = tracker.infer_session_id({"messages": messages}) + assert sid1 == sid2 + + def test_empty_messages_returns_none(self): + tracker = SessionTracker() + assert tracker.infer_session_id({"messages": []}) is None + + def test_no_messages_key_returns_none(self): + tracker = SessionTracker() + assert tracker.infer_session_id({}) is None + + def test_multi_content_user_message(self): + """User content as a list of parts should be handled.""" + tracker = SessionTracker() + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "data:..."}}, + ], + } + ] + sid1 = tracker.infer_session_id({"messages": messages}) + sid2 = tracker.infer_session_id({"messages": messages}) + assert sid1 == sid2 + + +# ============================================================================ +# Weak fingerprint (first user text) tests +# ============================================================================ + + +class TestWeakFingerprints: + """Tests for first-user-text matching.""" + + def test_same_first_user_text_weak_match(self): + tracker = SessionTracker() + msgs1 = [{"role": "user", "content": "Tell me a joke"}] + msgs2 = [ + {"role": "user", "content": "Tell me a joke"}, + {"role": "assistant", "content": "Why did the chicken..."}, + {"role": "user", "content": "Another one"}, + ] + sid1 = tracker.infer_session_id({"messages": msgs1}) + sid2 = tracker.infer_session_id({"messages": msgs2}) + # The weak fingerprint should bridge them + assert sid1 == sid2 + + +# ============================================================================ +# TTL and pruning tests +# ============================================================================ + + +class TestTTLAndPruning: + """Tests for TTL expiry and record pruning.""" + + def test_expired_record_is_pruned(self): + tracker = SessionTracker(ttl_seconds=1) + tracker.infer_session_id({"session_id": "conv-old"}) + # Wait for expiry + time.sleep(1.1) + # After expiry, a new request with same ID should get a new session + sid_new = tracker.infer_session_id({"session_id": "conv-old"}) + # Session ID should still be returned (creates new record) + assert sid_new is not None + + def test_different_ttls(self): + tracker = SessionTracker(ttl_seconds=60) + sid1 = tracker.infer_session_id({"session_id": "long"}) + sid2 = tracker.infer_session_id({"session_id": "long"}) + assert sid1 == sid2 + + +# ============================================================================ +# Persistence tests +# ============================================================================ + + +class TestPersistence: + """Tests for disk persistence.""" + + def test_save_and_load(self, tmp_path: Path): + persist_file = tmp_path / "sessions.json" + + tracker1 = SessionTracker( + ttl_seconds=3600, persist_to_disk=True, persistence_path=persist_file + ) + sid = tracker1.infer_session_id({"session_id": "persisted-conv"}) + + # Create a new tracker that loads from disk + tracker2 = SessionTracker( + ttl_seconds=3600, persist_to_disk=True, persistence_path=persist_file + ) + sid2 = tracker2.infer_session_id({"session_id": "persisted-conv"}) + assert sid == sid2 + + def test_persistence_file_created(self, tmp_path: Path): + persist_file = tmp_path / "sessions.json" + tracker = SessionTracker( + ttl_seconds=3600, persist_to_disk=True, persistence_path=persist_file + ) + tracker.infer_session_id({"session_id": "test"}) + assert persist_file.exists() + + def test_load_corrupt_file_silent(self, tmp_path: Path): + """Corrupted persistence file should be silently ignored.""" + persist_file = tmp_path / "sessions.json" + persist_file.write_text("NOT JSON {{{", encoding="utf-8") + + # Should not raise + tracker = SessionTracker( + ttl_seconds=3600, persist_to_disk=True, persistence_path=persist_file + ) + # Should still work normally (empty records) + sid = tracker.infer_session_id({"session_id": "new-conv"}) + assert sid is not None + + +# ============================================================================ +# Edge cases +# ============================================================================ + + +class TestEdgeCases: + def test_none_request_data(self): + tracker = SessionTracker() + # infer_session_id expects a dict; None should not crash but return None + # (it accesses .get on dict, so None would raise — that's acceptable behavior) + # Test with empty dict instead + assert tracker.infer_session_id({}) is None + + def test_ttl_minimum_clamped(self): + """TTL should be clamped to at least 1 second.""" + tracker = SessionTracker(ttl_seconds=0) + assert tracker.ttl_seconds == 1 + + def test_conversation_growth_extends_session(self): + """Adding messages to an existing conversation should preserve session.""" + tracker = SessionTracker() + msgs_v1 = [{"role": "user", "content": "Hello"}] + sid1 = tracker.infer_session_id({"messages": msgs_v1}) + + msgs_v2 = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi!"}, + {"role": "user", "content": "How are you?"}, + ] + sid2 = tracker.infer_session_id({"messages": msgs_v2}) + + # The strong conversation fingerprint changed, but the weak + # first-user-text fingerprint should bridge them + assert sid1 == sid2 From 423c390cd0b37e00d22176c3a3d00f1d373578e0 Mon Sep 17 00:00:00 2001 From: Dazlarus Date: Tue, 16 Jun 2026 17:34:53 -0500 Subject: [PATCH 2/2] fix: address review feedback on test suite - TTL expiry test now verifies session ID actually changes (sid_new != sid_old) - Cooldown expiry test patches clock instead of real asyncio.sleep - CI workflow includes itself in paths filter - CI cleans up temp_requirements.txt after install Addresses feedback from CodeRabbit, Greptile, and Copilot reviews. --- .github/workflows/tests.yml | 3 +++ tests/test_cooldown_manager.py | 5 +++-- tests/test_session_tracking.py | 13 +++++++------ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ee9c81e2a..ea9e0fbfe 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,12 +13,14 @@ on: - 'tests/**' - 'pytest.ini' - 'requirements.txt' + - '.github/workflows/tests.yml' pull_request: paths: - 'src/rotator_library/**' - 'tests/**' - 'pytest.ini' - 'requirements.txt' + - '.github/workflows/tests.yml' jobs: test: @@ -47,6 +49,7 @@ jobs: grep -v -- '-e src/rotator_library' requirements.txt > temp_requirements.txt pip install -r temp_requirements.txt pip install -e src/rotator_library + rm -f temp_requirements.txt - name: Run tests shell: bash diff --git a/tests/test_cooldown_manager.py b/tests/test_cooldown_manager.py index fa9cea39a..6bcb6dca0 100644 --- a/tests/test_cooldown_manager.py +++ b/tests/test_cooldown_manager.py @@ -85,8 +85,9 @@ async def test_cooldown_expires(self): cm = CooldownManager() await cm.start_cooldown("openai", 1) assert await cm.is_cooling_down("openai") is True - await asyncio.sleep(1.1) - assert await cm.is_cooling_down("openai") is False + # Patch the clock to advance past expiry without real sleeping + with patch("cooldown_manager.time.time", return_value=time.time() + 1.1): + assert await cm.is_cooling_down("openai") is False @pytest.mark.asyncio async def test_cooldown_extends_on_new_start(self): diff --git a/tests/test_session_tracking.py b/tests/test_session_tracking.py index 17fbcb8ef..e761fbe41 100644 --- a/tests/test_session_tracking.py +++ b/tests/test_session_tracking.py @@ -176,13 +176,14 @@ class TestTTLAndPruning: def test_expired_record_is_pruned(self): tracker = SessionTracker(ttl_seconds=1) - tracker.infer_session_id({"session_id": "conv-old"}) - # Wait for expiry - time.sleep(1.1) - # After expiry, a new request with same ID should get a new session - sid_new = tracker.infer_session_id({"session_id": "conv-old"}) - # Session ID should still be returned (creates new record) + sid_old = tracker.infer_session_id({"session_id": "conv-old"}) + # Advance past TTL without real sleeping + with patch("session_tracking.time.time", return_value=time.time() + 2): + # After expiry, a new request with same ID should get a new session + sid_new = tracker.infer_session_id({"session_id": "conv-old"}) + # The new session must differ from the expired one (proves pruning) assert sid_new is not None + assert sid_new != sid_old def test_different_ttls(self): tracker = SessionTracker(ttl_seconds=60)