From 4dfff2c038e532fd813b5cdb270b822fed8f363d Mon Sep 17 00:00:00 2001 From: bala Date: Wed, 25 Mar 2026 13:45:07 +0000 Subject: [PATCH 01/12] Add gh-copilot support - Backup --- pyproject.toml | 3 + src/microbots/MicroBot.py | 11 ++ src/microbots/constants.py | 1 + src/microbots/llm/copilot_api.py | 123 +++++++++++++++++ test/bot/test_writing_bot_copilot.py | 130 ++++++++++++++++++ test/llm/test_copilot_api.py | 196 +++++++++++++++++++++++++++ 6 files changed, 464 insertions(+) create mode 100644 src/microbots/llm/copilot_api.py create mode 100644 test/bot/test_writing_bot_copilot.py create mode 100644 test/llm/test_copilot_api.py diff --git a/pyproject.toml b/pyproject.toml index becf06a5..ea425fa7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,9 @@ classifiers = [ ] requires-python = ">=3.11" +[project.optional-dependencies] +ghcp = ["github-copilot-sdk"] + [tool.setuptools.dynamic] dependencies = { file = ["requirements.txt"] } diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py index a8c9b7a1..8561a65c 100644 --- a/src/microbots/MicroBot.py +++ b/src/microbots/MicroBot.py @@ -362,6 +362,17 @@ def _create_llm(self): system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name, token_provider=self.token_provider, ) + elif self.model_provider == ModelProvider.GITHUB_COPILOT: + try: + from microbots.llm.copilot_api import CopilotApi + except ImportError: + raise ValueError( + "GitHub Copilot provider requires the ghcp extra. " + "Install with: pip install microbots[ghcp]" + ) + self.llm = CopilotApi( + system_prompt=system_prompt_with_tools, model_name=self.deployment_name + ) # No Else case required as model provider is already validated using _validate_model_and_provider def _validate_model_and_provider(self, model): diff --git a/src/microbots/constants.py b/src/microbots/constants.py index c4a7ff98..5989eb0f 100644 --- a/src/microbots/constants.py +++ b/src/microbots/constants.py @@ -6,6 +6,7 @@ class ModelProvider(StrEnum): OPENAI = "azure-openai" OLLAMA_LOCAL = "ollama-local" ANTHROPIC = "anthropic" + GITHUB_COPILOT = "github-copilot" class ModelEnum(StrEnum): diff --git a/src/microbots/llm/copilot_api.py b/src/microbots/llm/copilot_api.py new file mode 100644 index 00000000..a3de9910 --- /dev/null +++ b/src/microbots/llm/copilot_api.py @@ -0,0 +1,123 @@ +import asyncio +import json +import os +import threading +from dataclasses import asdict +from logging import getLogger + +from copilot import CopilotClient, PermissionHandler +from copilot.types import SubprocessConfig +from microbots.llm.llm import LLMAskResponse, LLMInterface + +logger = getLogger(__name__) + + +class CopilotApi(LLMInterface): + + def __init__(self, system_prompt, model_name, max_retries=3, github_token=None): + self.system_prompt = system_prompt + self.model_name = model_name + self.messages = [] + + self.max_retries = max_retries + self.retries = 0 + + # Resolve GitHub token: explicit > GITHUB_TOKEN env var > logged-in user + self._github_token = github_token or os.environ.get("GITHUB_TOKEN") + + # Persistent event loop in a daemon thread for async-sync bridging. + # The Copilot SDK is async-native; MicroBot's LLMInterface is sync. + self._loop = asyncio.new_event_loop() + self._thread = threading.Thread( + target=self._loop.run_forever, daemon=True + ) + self._thread.start() + self._closed = False + + config = SubprocessConfig(github_token=self._github_token) if self._github_token else SubprocessConfig() + self._client = CopilotClient(config) + self._session = None + self._run_async(self._start()) + + async def _start(self): + await self._client.start() + await self._create_session() + + async def _create_session(self): + self._session = await self._client.create_session( + model=self.model_name, + on_permission_request=PermissionHandler.approve_all, + system_message={"content": self.system_prompt}, + infinite_sessions={"enabled": False}, + ) + + def _run_async(self, coro): + """Submit an async coroutine to the background loop and block until done.""" + future = asyncio.run_coroutine_threadsafe(coro, self._loop) + return future.result() + + async def _send_and_collect(self, message): + """Send a message and wait for the assistant's complete response.""" + response_event = await self._session.send_and_wait(message, timeout=300.0) + if response_event and response_event.data and response_event.data.content: + return response_event.data.content + return "" + + def ask(self, message) -> LLMAskResponse: + self.retries = 0 + + self.messages.append({"role": "user", "content": message}) + + valid = False + while not valid: + response_text = self._run_async(self._send_and_collect(message)) + logger.debug( + "Raw Copilot response (first 500 chars): %s", + response_text[:500], + ) + + # Try to extract JSON if wrapped in markdown code blocks + import re + json_match = re.search( + r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL + ) + if json_match: + response_text = json_match.group(1) + + valid, askResponse = self._validate_llm_response( + response=response_text + ) + + self.messages.append( + {"role": "assistant", "content": json.dumps(asdict(askResponse))} + ) + return askResponse + + def clear_history(self): + self.messages = [] + self._run_async(self._recreate_session()) + return True + + async def _recreate_session(self): + if self._session: + await self._session.disconnect() + await self._create_session() + + def close(self): + """Stop the Copilot client and shut down the background event loop.""" + if self._closed: + return + self._closed = True + try: + if self._session: + self._run_async(self._session.disconnect()) + self._run_async(self._client.stop()) + finally: + self._loop.call_soon_threadsafe(self._loop.stop) + self._thread.join(timeout=5) + + def __del__(self): + try: + self.close() + except Exception: + pass diff --git a/test/bot/test_writing_bot_copilot.py b/test/bot/test_writing_bot_copilot.py new file mode 100644 index 00000000..c515c177 --- /dev/null +++ b/test/bot/test_writing_bot_copilot.py @@ -0,0 +1,130 @@ +""" +Integration test for CopilotApi — end-to-end code fix using GitHub Copilot. + +This test uses the WritingBot with the `github-copilot` provider to fix +a real syntax error (missing colon) from the SWE-agent test repository. + +Prerequisites: + - GitHub Copilot CLI installed and in PATH (`copilot --version`) + - Authenticated via `copilot` login or GITHUB_TOKEN / GH_TOKEN env var + - Active GitHub Copilot subscription + - Install the ghcp extra: `pip install microbots[ghcp]` + - Docker daemon running + +Usage: +------ + # Run the integration test: + pytest test/bot/test_writing_bot_copilot.py -v -m "integration" +""" + +import os +import shutil +import subprocess +import sys + +import pytest + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) +) + +import logging +logging.basicConfig(level=logging.INFO) + +from microbots import WritingBot, BotRunResult + + +def _copilot_cli_available(): + """Check if the Copilot CLI is installed and accessible.""" + return shutil.which("copilot") is not None + + +def _copilot_sdk_installed(): + """Check if the github-copilot-sdk package is installed.""" + try: + import copilot # noqa: F401 + return True + except ImportError: + return False + + +def _copilot_auth_available(): + """Check if GitHub authentication is available for Copilot.""" + if os.environ.get("GITHUB_TOKEN"): + return True + # Check if gh CLI is authenticated + if shutil.which("gh"): + try: + result = subprocess.run( + ["gh", "auth", "status"], + capture_output=True, timeout=10, + ) + return result.returncode == 0 + except Exception: + pass + return False + + +skip_no_copilot_cli = pytest.mark.skipif( + not _copilot_cli_available(), + reason="GitHub Copilot CLI not installed (copilot not in PATH)", +) + +skip_no_copilot_sdk = pytest.mark.skipif( + not _copilot_sdk_installed(), + reason="github-copilot-sdk not installed (pip install microbots[ghcp])", +) + +skip_no_copilot_auth = pytest.mark.skipif( + not _copilot_auth_available(), + reason="No GitHub auth available (set GITHUB_TOKEN or run 'gh auth login')", +) + + +@skip_no_copilot_cli +@skip_no_copilot_sdk +@skip_no_copilot_auth +@pytest.mark.integration +@pytest.mark.slow +def test_writing_bot_copilot_fixes_syntax_error(test_repo, issue_1): + """ + End-to-end test: WritingBot with GitHub Copilot fixes a syntax error. + + The test-repo contains `tests/missing_colon.py` with a SyntaxError + (missing colon on a function definition). The WritingBot should: + 1. Read the error description + 2. Find the faulty file + 3. Fix the syntax error (add the missing colon) + 4. Verify the fix by running the script + + After the bot completes, `verify_function` confirms the fix by + executing the script and asserting returncode == 0. + """ + issue_text = issue_1[0] + verify_function = issue_1[1] + + model = "github-copilot/gpt-4.1" + + writing_bot = WritingBot( + model=model, + folder_to_mount=str(test_repo), + ) + + response: BotRunResult = writing_bot.run( + issue_text, + max_iterations=25, + timeout_in_seconds=300, + ) + + print( + f"Status: {response.status}, " + f"Result: {response.result}, " + f"Error: {response.error}" + ) + + assert response.status is True, ( + f"WritingBot did not complete the task. Error: {response.error}" + ) + + # Verify the fix actually works: run the script, expect exit code 0 + verify_function(test_repo) diff --git a/test/llm/test_copilot_api.py b/test/llm/test_copilot_api.py new file mode 100644 index 00000000..d268771d --- /dev/null +++ b/test/llm/test_copilot_api.py @@ -0,0 +1,196 @@ +""" +Unit tests for CopilotApi class +""" +import pytest +import json +import sys +import os +import asyncio +import importlib +from unittest.mock import Mock, patch, MagicMock, AsyncMock +from dataclasses import asdict + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) + +# Mock the copilot module before importing CopilotApi, since github-copilot-sdk +# is an optional dependency that won't be installed in the test environment. +_mock_copilot = MagicMock() +_mock_copilot.PermissionHandler = MagicMock() +_mock_copilot.PermissionHandler.approve_all = MagicMock() +_mock_copilot.CopilotClient = MagicMock +_mock_copilot_types = MagicMock() +_mock_copilot_types.SubprocessConfig = MagicMock +sys.modules["copilot"] = _mock_copilot +sys.modules["copilot.types"] = _mock_copilot_types + +# Now safe to import — the module-level `from copilot import ...` will resolve +# against our mock. +if "microbots.llm.copilot_api" in sys.modules: + importlib.reload(sys.modules["microbots.llm.copilot_api"]) +from microbots.llm.copilot_api import CopilotApi +from microbots.llm.llm import LLMAskResponse, LLMInterface, llm_output_format_str + + +@pytest.fixture +def mock_copilot_session(): + """Create a mock Copilot session with send_and_wait.""" + session = AsyncMock() + session.disconnect = AsyncMock() + + # Default response from send_and_wait + default_response = Mock() + default_response.data.content = json.dumps({ + "task_done": False, + "thoughts": "Thinking about the task", + "command": "ls -la" + }) + session.send_and_wait = AsyncMock(return_value=default_response) + return session + + +@pytest.fixture +def mock_copilot_client(mock_copilot_session): + """Create a mock CopilotClient.""" + client = AsyncMock() + client.start = AsyncMock() + client.stop = AsyncMock() + client.create_session = AsyncMock(return_value=mock_copilot_session) + return client + + +@pytest.fixture +def copilot_api(mock_copilot_client): + """Create a CopilotApi instance with mocked client.""" + with patch("microbots.llm.copilot_api.CopilotClient", return_value=mock_copilot_client): + api = CopilotApi( + system_prompt="You are a test assistant", + model_name="gpt-4.1", + ) + yield api + api.close() + + +@pytest.mark.unit +class TestCopilotApiInitialization: + """Tests for CopilotApi initialization.""" + + def test_init_stores_params(self, copilot_api): + assert copilot_api.system_prompt == "You are a test assistant" + assert copilot_api.model_name == "gpt-4.1" + assert copilot_api.max_retries == 3 + assert copilot_api.retries == 0 + assert copilot_api.messages == [] + + def test_init_starts_client_and_creates_session(self, mock_copilot_client): + with patch("microbots.llm.copilot_api.CopilotClient", return_value=mock_copilot_client): + api = CopilotApi(system_prompt="test", model_name="gpt-4.1") + try: + mock_copilot_client.start.assert_awaited_once() + mock_copilot_client.create_session.assert_awaited_once() + + call_kwargs = mock_copilot_client.create_session.call_args[1] + assert call_kwargs["model"] == "gpt-4.1" + assert call_kwargs["infinite_sessions"] == {"enabled": False} + assert "system_message" in call_kwargs + assert call_kwargs["system_message"]["content"] == "test" + finally: + api.close() + + def test_implements_llm_interface(self, copilot_api): + assert isinstance(copilot_api, LLMInterface) + + +@pytest.mark.unit +class TestCopilotApiAsk: + """Tests for CopilotApi.ask() method.""" + + def test_ask_returns_valid_response(self, copilot_api): + response = copilot_api.ask("What files are in the directory?") + + assert isinstance(response, LLMAskResponse) + assert response.task_done is False + assert response.thoughts == "Thinking about the task" + assert response.command == "ls -la" + + def test_ask_appends_to_messages(self, copilot_api): + copilot_api.ask("test message") + + assert len(copilot_api.messages) == 2 + assert copilot_api.messages[0]["role"] == "user" + assert copilot_api.messages[0]["content"] == "test message" + assert copilot_api.messages[1]["role"] == "assistant" + + def test_ask_handles_task_done(self, copilot_api, mock_copilot_session): + """Test ask when LLM signals task completion.""" + done_response = Mock() + done_response.data.content = json.dumps({ + "task_done": True, + "thoughts": "Task is complete", + "command": "" + }) + mock_copilot_session.send_and_wait = AsyncMock(return_value=done_response) + + response = copilot_api.ask("done?") + assert response.task_done is True + assert response.command == "" + + def test_ask_handles_markdown_wrapped_json(self, copilot_api, mock_copilot_session): + """Test that JSON wrapped in markdown code blocks is extracted.""" + md_response = Mock() + md_response.data.content = '```json\n{"task_done": false, "thoughts": "extracted", "command": "pwd"}\n```' + mock_copilot_session.send_and_wait = AsyncMock(return_value=md_response) + + response = copilot_api.ask("test") + assert response.thoughts == "extracted" + assert response.command == "pwd" + + +@pytest.mark.unit +class TestCopilotApiClearHistory: + """Tests for CopilotApi.clear_history() method.""" + + def test_clear_history_resets_messages(self, copilot_api): + copilot_api.messages = [{"role": "user", "content": "test"}] + result = copilot_api.clear_history() + + assert result is True + assert copilot_api.messages == [] + + def test_clear_history_recreates_session(self, copilot_api, mock_copilot_session, mock_copilot_client): + copilot_api.clear_history() + + mock_copilot_session.disconnect.assert_awaited() + # create_session called once at init, once on clear_history + assert mock_copilot_client.create_session.await_count == 2 + + +@pytest.mark.unit +class TestCopilotApiClose: + """Tests for CopilotApi.close() method.""" + + def test_close_stops_client(self, mock_copilot_client, mock_copilot_session): + with patch("microbots.llm.copilot_api.CopilotClient", return_value=mock_copilot_client): + api = CopilotApi(system_prompt="test", model_name="gpt-4.1") + api.close() + + mock_copilot_session.disconnect.assert_awaited() + mock_copilot_client.stop.assert_awaited() + + +@pytest.mark.unit +class TestCopilotApiImportError: + """Test that a helpful error is raised when ghcp extra is not installed.""" + + def test_microbot_raises_helpful_error_without_ghcp(self): + """MicroBot._create_llm() should raise ValueError when copilot SDK is missing.""" + from microbots.constants import ModelProvider + + with patch("microbots.MicroBot.ModelProvider", ModelProvider): + # Simulate ImportError when trying to import CopilotApi + with patch.dict("sys.modules", {"microbots.llm.copilot_api": None}): + from microbots.MicroBot import MicroBot + with pytest.raises(ValueError, match="pip install microbots\\[ghcp\\]"): + MicroBot( + model="github-copilot/gpt-4.1", + system_prompt="test", + ) From 88080aa492b40e31bf0d968aa30f99e112b71ba4 Mon Sep 17 00:00:00 2001 From: bala Date: Fri, 27 Mar 2026 12:35:25 +0000 Subject: [PATCH 02/12] Backup - Yet to verify the implementation --- src/microbots/__init__.py | 3 +- src/microbots/bot/CopilotBot.py | 598 ++++++++++++++++++ src/microbots/bot/__init__.py | 3 +- src/microbots/utils/multi_agent_log_parser.py | 460 ++++++++++---- test/bot/test_copilot_bot.py | 393 ++++++++++++ 5 files changed, 1336 insertions(+), 121 deletions(-) create mode 100644 src/microbots/bot/CopilotBot.py create mode 100644 test/bot/test_copilot_bot.py diff --git a/src/microbots/__init__.py b/src/microbots/__init__.py index cc344c05..7415ffda 100644 --- a/src/microbots/__init__.py +++ b/src/microbots/__init__.py @@ -1,4 +1,4 @@ -from microbots.bot import ReadingBot, WritingBot, LogAnalysisBot, BrowsingBot, AgentBoss +from microbots.bot import ReadingBot, WritingBot, LogAnalysisBot, BrowsingBot, AgentBoss, CopilotBot from microbots.MicroBot import MicroBot, BotRunResult __all__ = [ "ReadingBot", @@ -6,6 +6,7 @@ "LogAnalysisBot", "BrowsingBot", "AgentBoss", + "CopilotBot", "MicroBot", "BotRunResult" ] diff --git a/src/microbots/bot/CopilotBot.py b/src/microbots/bot/CopilotBot.py new file mode 100644 index 00000000..8bc869a4 --- /dev/null +++ b/src/microbots/bot/CopilotBot.py @@ -0,0 +1,598 @@ +""" +CopilotBot — A wrapper around the GitHub Copilot SDK. + +Unlike MicroBot (which manages its own LLM ↔ shell agent loop), CopilotBot +delegates the **entire agent loop to the Copilot runtime**. Copilot handles +planning, tool invocation (file edits, shell commands, web requests, etc.) +and multi-turn reasoning autonomously. + +CopilotBot reuses the Microbots infrastructure: + - Docker sandbox (LocalDockerEnvironment) for isolated execution + - Mount system for folder access control + - ToolAbstract lifecycle (install → verify → setup) for additional tools + - copilot-cli is installed **inside** the container and run in headless + server mode; the SDK on the host connects to it via TCP. + +Architecture: + + Host Docker Container + ───── ──────────────── + CopilotBot copilot-cli --headless --port

+ │ │ + ├── Copilot SDK ──TCP──────────┘ + │ (ExternalServerConfig) + │ + ├── additional tools + │ (define_tool → SDK session) + │ + └── BotRunResult + +Prerequisites: + - pip install microbots[ghcp] (github-copilot-sdk) + - Docker daemon running + - GitHub authentication (GITHUB_TOKEN / COPILOT_GITHUB_TOKEN or copilot login) +""" + +import asyncio +import os +import time +import threading +from logging import getLogger +from typing import Optional + +from microbots.constants import ( + DOCKER_WORKING_DIR, + PermissionLabels, +) +from microbots.environment.local_docker.LocalDockerEnvironment import ( + LocalDockerEnvironment, +) +from microbots.extras.mount import Mount, MountType +from microbots.MicroBot import BotRunResult +from microbots.tools.tool import ToolAbstract +from microbots.utils.network import get_free_port + +logger = getLogger(" CopilotBot ") + +# Default model when none is specified (just the deployment name, no provider prefix) +_DEFAULT_MODEL = "gpt-4.1" + +# Time (seconds) to wait for copilot-cli to start inside the container +_CLI_STARTUP_TIMEOUT = 60 + +# copilot-cli port inside the container +_CONTAINER_CLI_PORT = 4321 + + +class CopilotBot: + """Wrapper around the GitHub Copilot SDK with a sandboxed Docker environment. + + The Copilot runtime manages the agent loop (planning, tool calls, + multi-turn reasoning). CopilotBot sets up the sandbox, installs + copilot-cli inside it, connects the SDK, and exposes a simple + ``run(task)`` interface. + + Parameters + ---------- + model : str + Copilot model name (e.g. ``"gpt-4.1"``, ``"claude-sonnet-4.5"``). + Unlike MicroBot, no ``/`` prefix is needed. + folder_to_mount : str + Absolute host path to mount into the sandbox. + permission : PermissionLabels + Mount permission — READ_ONLY or READ_WRITE. Defaults to READ_WRITE. + environment : Optional[LocalDockerEnvironment] + Pre-created environment. One is created automatically when *None*. + additional_tools : Optional[list[ToolAbstract]] + Extra Microbots tools to install in the sandbox. Their + ``usage_instructions_to_llm`` are appended to the system message + and, where possible, they are registered as SDK custom tools. + github_token : Optional[str] + Explicit GitHub token. Falls back to ``GITHUB_TOKEN`` / + ``COPILOT_GITHUB_TOKEN`` env vars. + """ + + def __init__( + self, + model: str = _DEFAULT_MODEL, + folder_to_mount: Optional[str] = None, + permission: PermissionLabels = PermissionLabels.READ_WRITE, + environment: Optional[LocalDockerEnvironment] = None, + additional_tools: Optional[list[ToolAbstract]] = None, + github_token: Optional[str] = None, + ): + try: + from copilot import CopilotClient, ExternalServerConfig + from copilot.session import PermissionHandler + except ImportError: + raise ImportError( + "CopilotBot requires the github-copilot-sdk package. " + "Install with: pip install microbots[ghcp]" + ) + + self.model = model + self.additional_tools = additional_tools or [] + self.github_token = ( + github_token + or os.environ.get("COPILOT_GITHUB_TOKEN") + or os.environ.get("GITHUB_TOKEN") + or os.environ.get("GH_TOKEN") + ) + + # ── Mount setup ───────────────────────────────────────────── + self.folder_to_mount: Optional[Mount] = None + if folder_to_mount: + sandbox_path = f"/{DOCKER_WORKING_DIR}/{os.path.basename(folder_to_mount)}" + self.folder_to_mount = Mount(folder_to_mount, sandbox_path, permission) + + # ── Docker environment ────────────────────────────────────── + self.environment = environment + if not self.environment: + self._create_environment() + + # ── Install additional tools inside the container ─────────── + for tool in self.additional_tools: + tool.install_tool(self.environment) + tool.verify_tool_installation(self.environment) + + # ── Install & start copilot-cli inside the container ──────── + self._cli_host_port = get_free_port() + self._install_copilot_cli() + self._start_copilot_cli_server() + + # ── Background event loop for async SDK calls ─────────────── + self._loop = asyncio.new_event_loop() + self._thread = threading.Thread(target=self._loop.run_forever, daemon=True) + self._thread.start() + + # ── Connect SDK to in-container CLI ───────────────────────── + self._client = CopilotClient( + ExternalServerConfig(url=f"localhost:{self._cli_host_port}") + ) + self._run_async(self._client.start()) + self._PermissionHandler = PermissionHandler + + logger.info( + "✅ CopilotBot initialised — model=%s, cli_port=%d", + self.model, + self._cli_host_port, + ) + + # ────────────────────────────────────────────────────────────────── + # Public API + # ────────────────────────────────────────────────────────────────── + + def run( + self, + task: str, + additional_mounts: Optional[list[Mount]] = None, + timeout_in_seconds: int = 600, + streaming: bool = False, + ) -> BotRunResult: + """Send *task* to the Copilot agent and wait for completion. + + The Copilot runtime manages the full agent loop autonomously — + planning, tool invocation, multi-turn reasoning, and file edits + all happen inside the sandboxed environment. + + Parameters + ---------- + task : str + A natural-language description of the task. + additional_mounts : Optional[list[Mount]] + Extra folders to copy into the container before running. + timeout_in_seconds : int + Maximum wall-clock time for the agent run. + streaming : bool + Whether to enable streaming delta events (logged at DEBUG level). + + Returns + ------- + BotRunResult + status=True on success with the agent's final message in *result*, + or status=False with an error description. + """ + # Setup additional tools (env vars, files, setup_commands) + for tool in self.additional_tools: + tool.setup_tool(self.environment) + + # Mount additional folders + for mount in additional_mounts or []: + self._mount_additional(mount) + + # Build system message with tool instructions + system_content = self._build_system_message() + + # Build SDK custom tools from additional_tools + sdk_tools = self._build_sdk_tools() + + try: + result_text = self._run_async( + self._execute_session( + task=task, + system_content=system_content, + sdk_tools=sdk_tools, + timeout=timeout_in_seconds, + streaming=streaming, + ) + ) + return BotRunResult(status=True, result=result_text, error=None) + except Exception as e: + logger.exception("❌ CopilotBot run failed: %s", e) + return BotRunResult(status=False, result=None, error=str(e)) + + def stop(self): + """Tear down the SDK client, CLI server, and Docker environment.""" + if getattr(self, "_stopped", False): + return + self._stopped = True + + # Stop the SDK client (best-effort, with timeout to avoid deadlock) + try: + if self._loop.is_running(): + future = asyncio.run_coroutine_threadsafe( + self._client.stop(), self._loop + ) + future.result(timeout=10) + except Exception: + pass + + # Shut down the background event loop + try: + self._loop.call_soon_threadsafe(self._loop.stop) + self._thread.join(timeout=5) + except Exception: + pass + + if self.environment: + self.environment.stop() + self.environment = None + logger.info("🛑 CopilotBot stopped") + + def __del__(self): + try: + self.stop() + except Exception: + pass + + # ────────────────────────────────────────────────────────────────── + # Private — environment & CLI setup + # ────────────────────────────────────────────────────────────────── + + def _create_environment(self): + free_port = get_free_port() + # Also map the copilot-cli headless port + self._cli_host_port = get_free_port() + self.environment = LocalDockerEnvironment( + port=free_port, + folder_to_mount=self.folder_to_mount, + ) + # Expose additional port mapping for copilot-cli + self._map_cli_port() + + def _map_cli_port(self): + """Add a second port mapping for the copilot-cli headless server. + + Docker port mappings are static after container creation, so we use + ``socat`` inside the container to forward the CLI port through the + existing shell_server port range, OR we use ``docker exec`` via iptables. + + The simplest reliable approach: install socat and forward from a known + port that's already exposed, or use ``docker port``. + + Actually, the cleanest approach: stop the container, recreate it with + the additional port. Since we control environment creation this is safe. + """ + # The environment was just created by us, so recreating with an extra port + # is acceptable. We stop the existing container and create a new one + # with both ports mapped. + if not self.environment.container: + return + + container = self.environment.container + image = self.environment.image + port = self.environment.port + container_port = self.environment.container_port + + # Gather existing volume config from the running container + import docker + + container.stop() + container.remove() + + # Re-create with both ports + volumes_config = {self.environment.working_dir: {"bind": DOCKER_WORKING_DIR, "mode": "rw"}} + if self.folder_to_mount: + mode_map = {"READ_ONLY": "ro", "READ_WRITE": "rw"} + if self.folder_to_mount.permission == PermissionLabels.READ_ONLY: + volumes_config[self.folder_to_mount.host_path_info.abs_path] = { + "bind": f"/ro/{os.path.basename(self.folder_to_mount.sandbox_path)}", + "mode": mode_map[self.folder_to_mount.permission], + } + else: + volumes_config[self.folder_to_mount.host_path_info.abs_path] = { + "bind": self.folder_to_mount.sandbox_path, + "mode": mode_map[self.folder_to_mount.permission], + } + + port_mapping = { + f"{container_port}/tcp": port, + f"{_CONTAINER_CLI_PORT}/tcp": self._cli_host_port, + } + + client = docker.from_env() + self.environment.container = client.containers.run( + image, + volumes=volumes_config, + ports=port_mapping, + detach=True, + working_dir="/app", + privileged=True, + environment={"BOT_PORT": str(container_port)}, + ) + logger.info( + "🚀 Recreated container with CLI port mapping: host %d → container %d", + self._cli_host_port, + _CONTAINER_CLI_PORT, + ) + time.sleep(2) + + # Re-setup overlay if needed + if self.folder_to_mount and self.folder_to_mount.permission == PermissionLabels.READ_ONLY: + self.environment._setup_overlay_mount() + + # cd into mounted folder + if self.folder_to_mount: + self.environment.execute(f"cd {self.folder_to_mount.sandbox_path}") + else: + self.environment.execute("cd /") + + def _install_copilot_cli(self): + """Install copilot-cli inside the Docker container.""" + logger.info("📦 Installing copilot-cli inside container...") + + # Install Node.js (required for copilot-cli via npm) + install_commands = [ + # Install Node.js 22.x (copilot-cli requires Node 22+) + "apt-get update -qq && apt-get install -y -qq curl ca-certificates > /dev/null 2>&1", + "curl -fsSL https://deb.nodesource.com/setup_22.x | bash - > /dev/null 2>&1", + "apt-get install -y -qq nodejs > /dev/null 2>&1", + # Install copilot-cli globally + "npm install -g @github/copilot > /dev/null 2>&1", + ] + + for cmd in install_commands: + result = self.environment.execute(cmd, timeout=300) + if result.return_code != 0: + raise RuntimeError( + f"Failed to install copilot-cli: {cmd}\n" + f"stdout: {result.stdout}\nstderr: {result.stderr}" + ) + + # Verify installation + verify = self.environment.execute("copilot --version") + if verify.return_code != 0: + raise RuntimeError( + f"copilot-cli installation verification failed: {verify.stderr}" + ) + logger.info("✅ copilot-cli installed: %s", verify.stdout.strip()) + + def _start_copilot_cli_server(self): + """Start copilot-cli in headless server mode inside the container. + + The CLI listens on ``_CONTAINER_CLI_PORT`` which is mapped to + ``self._cli_host_port`` on the host. Authentication is handled + via the GITHUB_TOKEN environment variable injected into the container. + """ + # Inject the GitHub token into the container for authentication + if self.github_token: + self.environment.execute( + f'export GITHUB_TOKEN="{self.github_token}"' + ) + self.environment.execute( + f'export COPILOT_GITHUB_TOKEN="{self.github_token}"' + ) + + # Start copilot in headless mode in the background + # Using nohup + & to run it as a background process inside the container's shell + start_cmd = ( + f"nohup copilot --headless --port {_CONTAINER_CLI_PORT} " + f"> /var/log/copilot-cli.log 2>&1 &" + ) + result = self.environment.execute(start_cmd) + if result.return_code != 0: + raise RuntimeError( + f"Failed to start copilot-cli server: {result.stderr}" + ) + + # Wait for the server to be ready + self._wait_for_cli_ready() + logger.info( + "✅ copilot-cli headless server running on container port %d (host port %d)", + _CONTAINER_CLI_PORT, + self._cli_host_port, + ) + + def _wait_for_cli_ready(self): + """Poll until the copilot-cli server is accepting connections.""" + import socket as _socket + + deadline = time.time() + _CLI_STARTUP_TIMEOUT + while time.time() < deadline: + try: + sock = _socket.create_connection( + ("localhost", self._cli_host_port), timeout=2 + ) + sock.close() + return + except (ConnectionRefusedError, OSError): + time.sleep(1) + raise TimeoutError( + f"copilot-cli did not become ready within {_CLI_STARTUP_TIMEOUT}s " + f"on host port {self._cli_host_port}" + ) + + # ────────────────────────────────────────────────────────────────── + # Private — SDK session & execution + # ────────────────────────────────────────────────────────────────── + + def _run_async(self, coro): + """Submit an async coroutine to the background loop and block.""" + future = asyncio.run_coroutine_threadsafe(coro, self._loop) + return future.result() + + async def _execute_session( + self, + task: str, + system_content: str, + sdk_tools: list, + timeout: int, + streaming: bool, + ) -> str: + """Create a Copilot session, send the task, and collect the result.""" + from copilot.generated.session_events import SessionEventType + + session_kwargs = { + "model": self.model, + "on_permission_request": self._PermissionHandler.approve_all, + "streaming": streaming, + } + + if system_content: + session_kwargs["system_message"] = {"content": system_content} + + if sdk_tools: + session_kwargs["tools"] = sdk_tools + + session = await self._client.create_session(**session_kwargs) + + collected_text = [] + done_event = asyncio.Event() + + def _on_event(event): + if event.type == SessionEventType.ASSISTANT_MESSAGE: + if event.data and event.data.content: + collected_text.append(event.data.content) + elif event.type == SessionEventType.ASSISTANT_MESSAGE_DELTA: + if event.data and event.data.delta_content: + logger.debug("📝 %s", event.data.delta_content) + elif event.type == SessionEventType.SESSION_IDLE: + done_event.set() + + session.on(_on_event) + + # Send the task prompt and wait for completion + response = await session.send_and_wait(task, timeout=float(timeout)) + + # If send_and_wait returned a full response, use it + if response and response.data and response.data.content: + return response.data.content + + # Otherwise wait for the collected events + if not collected_text: + try: + await asyncio.wait_for(done_event.wait(), timeout=float(timeout)) + except asyncio.TimeoutError: + pass + + await session.disconnect() + + if collected_text: + return collected_text[-1] # Return the last assistant message + + return "Agent completed without producing a final message." + + def _build_system_message(self) -> str: + """Compose the system message from mount info and tool instructions.""" + parts = [] + + if self.folder_to_mount: + parts.append( + f"The working directory is mounted at {self.folder_to_mount.sandbox_path}. " + f"You can access files using paths relative to or absolute from that directory." + ) + + for tool in self.additional_tools: + if tool.usage_instructions_to_llm: + parts.append(tool.usage_instructions_to_llm) + + return "\n\n".join(parts) + + def _build_sdk_tools(self) -> list: + """Convert Microbots additional tools into Copilot SDK tool definitions. + + Only tools that implement ``is_invoked`` / have an ``invoke`` method + (ExternalTools) can be meaningfully wrapped. Internal tools that run + via shell commands are already accessible to Copilot's built-in shell + tool and don't need explicit registration. + """ + from microbots.tools.external_tool import ExternalTool + + sdk_tools = [] + for tool in self.additional_tools: + if isinstance(tool, ExternalTool) and hasattr(tool, "invoke"): + sdk_tool = self._wrap_external_tool(tool) + if sdk_tool: + sdk_tools.append(sdk_tool) + return sdk_tools + + def _wrap_external_tool(self, tool: ToolAbstract): + """Wrap a Microbots ExternalTool as a Copilot SDK define_tool.""" + try: + from copilot.tools import Tool as CopilotTool, ToolInvocation, ToolResult + except ImportError: + return None + + bot_ref = self # Capture reference for the handler closure + + async def handler(invocation: ToolInvocation) -> ToolResult: + command = invocation.arguments.get("command", "") + try: + cmd_return = tool.invoke(command, bot_ref) + output = cmd_return.stdout if cmd_return.return_code == 0 else ( + f"COMMAND FAILED (rc={cmd_return.return_code})\n" + f"stdout: {cmd_return.stdout}\nstderr: {cmd_return.stderr}" + ) + return ToolResult( + text_result_for_llm=output, + result_type="success" if cmd_return.return_code == 0 else "failure", + ) + except Exception as e: + return ToolResult( + text_result_for_llm=f"Tool error: {e}", + result_type="failure", + ) + + return CopilotTool( + name=tool.name, + description=tool.description, + parameters={ + "type": "object", + "properties": { + "command": { + "type": "string", + "description": f"The command to invoke the {tool.name} tool", + }, + }, + "required": ["command"], + }, + handler=handler, + ) + + # ────────────────────────────────────────────────────────────────── + # Private — mount helpers + # ────────────────────────────────────────────────────────────────── + + def _mount_additional(self, mount: Mount): + """Copy an additional folder into the running container.""" + if mount.mount_type != MountType.COPY: + raise ValueError( + "Only COPY mount type is supported for additional mounts" + ) + if not self.environment.copy_to_container( + mount.host_path_info.abs_path, mount.sandbox_path + ): + raise ValueError( + f"Failed to copy additional mount: " + f"{mount.host_path_info.abs_path} -> {mount.sandbox_path}" + ) diff --git a/src/microbots/bot/__init__.py b/src/microbots/bot/__init__.py index fd6cd62e..25e61b50 100644 --- a/src/microbots/bot/__init__.py +++ b/src/microbots/bot/__init__.py @@ -3,5 +3,6 @@ from .WritingBot import WritingBot from .BrowsingBot import BrowsingBot from .AgentBoss import AgentBoss +from .CopilotBot import CopilotBot -__all__ = ["LogAnalysisBot", "ReadingBot", "WritingBot", "BrowsingBot", "AgentBoss"] +__all__ = ["LogAnalysisBot", "ReadingBot", "WritingBot", "BrowsingBot", "AgentBoss", "CopilotBot"] diff --git a/src/microbots/utils/multi_agent_log_parser.py b/src/microbots/utils/multi_agent_log_parser.py index ebdf9fb9..0183e617 100644 --- a/src/microbots/utils/multi_agent_log_parser.py +++ b/src/microbots/utils/multi_agent_log_parser.py @@ -3,19 +3,21 @@ Parse microbots info.log files into markdown trajectory files. Usage: - python multi_agent_log_parser.py _info.log [output_dir] + python multi_agent_log_parser.py [output_dir] [--single-file] -Creates: - _trajectory/ +Creates either: + _trajectory/ main_agent.md sub_agent_1.md sub_agent_2.md ... +Or with --single-file: + _trajectory.md -The info.log file should be named as _info.log. -A directory _trajectory will be created with all the markdown files. +The log file name (minus _info.log or .log suffix) determines the output name. """ +import argparse import re import os import sys @@ -26,6 +28,18 @@ # ─────────────────────────── Data Classes ─────────────────────────── +@dataclass +class SetupInfo: + """Captured setup information before the agent starts working.""" + container_id: str = "" + image: str = "" + host_port: str = "" + working_dir: str = "" + volume_mappings: List[str] = field(default_factory=list) + tools_installed: List[str] = field(default_factory=list) + files_copied: List[str] = field(default_factory=list) + + @dataclass class Step: """Represents a single step in an agent's execution.""" @@ -39,7 +53,6 @@ class Step: is_sub_agent_call: bool = False sub_agent_task: str = "" sub_agent_index: int = -1 # index into the test case's sub_agents list - is_final: bool = False # True if this represents LLM final thoughts @dataclass @@ -51,6 +64,7 @@ class Agent: final_thoughts: str = "" completed: bool = False max_iterations_reached: bool = False + error_message: str = "" @dataclass @@ -59,12 +73,21 @@ class TestCase: name: str = "" main_agent: Optional[Agent] = None sub_agents: List[Agent] = field(default_factory=list) + setup: SetupInfo = field(default_factory=SetupInfo) # ─────────────────────────── Log Parsing ─────────────────────────── -# Regex for parsing log line timestamps +# Format: TIMESTAMP MODULE LEVEL CONTENT +# e.g. "2026-03-26 12:45:20,277 microbots.environment.local_docker.LocalDockerEnvironment INFO ..." +# e.g. "2026-03-26 12:46:35,819 MicroBot INFO ℹ️ TASK STARTED : ..." +# e.g. "2026-03-26 12:49:30,653 🤖 MicroBot-Sub INFO Sub-agent completed..." LOG_LINE_RE = re.compile( + r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+(.*?)\s+(INFO|ERROR|WARNING|DEBUG)\s(.*)$' +) + +# Legacy format: TIMESTAMP [LEVEL] CONTENT +LOG_LINE_LEGACY_RE = re.compile( r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) \[(INFO|ERROR|WARNING|DEBUG)\] (.*)$' ) @@ -73,8 +96,11 @@ def parse_log_entries(log_path: str) -> List[dict]: """ Parse a log file into a list of entries. Multi-line log entries (continuation lines without timestamps) are joined. + Supports both the current log format (TIMESTAMP MODULE LEVEL CONTENT) and + the legacy format (TIMESTAMP [LEVEL] CONTENT). - Returns a list of dicts: {'timestamp': str, 'level': str, 'content': str, 'line_num': int} + Returns a list of dicts: + {'timestamp': str, 'level': str, 'module': str, 'content': str, 'line_num': int} """ entries = [] current_entry = None @@ -82,24 +108,36 @@ def parse_log_entries(log_path: str) -> List[dict]: with open(log_path, 'r', encoding='utf-8', errors='replace') as f: for line_num, raw_line in enumerate(f, 1): line = raw_line.rstrip('\n') + + # Try current format first, then legacy match = LOG_LINE_RE.match(line) if match: - # Save previous entry if current_entry is not None: entries.append(current_entry) current_entry = { 'timestamp': match.group(1), - 'level': match.group(2), - 'content': match.group(3), + 'module': match.group(2).strip(), + 'level': match.group(3), + 'content': match.group(4), 'line_num': line_num, } else: - # Continuation of previous entry - if current_entry is not None: - current_entry['content'] += '\n' + line - # else: lines before any log entry (skip) + legacy = LOG_LINE_LEGACY_RE.match(line) + if legacy: + if current_entry is not None: + entries.append(current_entry) + current_entry = { + 'timestamp': legacy.group(1), + 'module': '', + 'level': legacy.group(2), + 'content': legacy.group(3), + 'line_num': line_num, + } + else: + # Continuation of previous entry + if current_entry is not None: + current_entry['content'] += '\n' + line - # Don't forget the last entry if current_entry is not None: entries.append(current_entry) @@ -111,33 +149,26 @@ def parse_log_entries(log_path: str) -> List[dict]: def extract_task_from_microbot_sub(command: str) -> str: """Extract the --task argument from a microbot_sub command.""" - # Normalize escaped quotes: \" -> " normalized = command.replace('\\"', '"').replace('\\n', '\n') - # Try to find --task "..." followed by " --iterations or end match = re.search(r'--task\s+"(.*?)"\s+--(?:iterations|timeout)', normalized, re.DOTALL) if match: return match.group(1).strip() - # Try to find --task "..." at end of command match = re.search(r'--task\s+"(.*?)"\s*$', normalized, re.DOTALL) if match: return match.group(1).strip() - # Try single quotes match = re.search(r"--task\s+'(.*?)'\s+--(?:iterations|timeout)", normalized, re.DOTALL) if match: return match.group(1).strip() - # Fallback: grab everything after --task " until the last " before --iterations match = re.search(r'--task\s+"(.+)', normalized, re.DOTALL) if match: text = match.group(1) - # Try to find closing quote before --iterations or --timeout iter_match = re.search(r'"\s+--(?:iterations|timeout)', text) if iter_match: return text[:iter_match.start()].strip() - # Try the last quote quote_end = text.rfind('"') if quote_end > 0: return text[:quote_end].strip() @@ -145,6 +176,50 @@ def extract_task_from_microbot_sub(command: str) -> str: return command +def _extract_setup_info(entries: List[dict]) -> SetupInfo: + """Extract environment setup information from log entries before the first TASK STARTED.""" + setup = SetupInfo() + for entry in entries: + content = entry['content'] + if 'TASK STARTED' in content: + break + + # Container start + m = re.search(r'Started container (\w+) with image (\S+) on host port (\d+)', content) + if m: + setup.container_id = m.group(1) + setup.image = m.group(2) + setup.host_port = m.group(3) + continue + + # Working directory + m = re.search(r'Created working directory at (\S+)', content) + if m: + setup.working_dir = m.group(1) + continue + + # Volume mapping + if 'Volume mapping:' in content: + setup.volume_mappings.append(content.split('Volume mapping:', 1)[1].strip()) + continue + + # Tool installed + m = re.search(r'Successfully (?:installed|set up|setup) (?:external )?tool:\s*(\S+)', content) + if m: + tool_name = m.group(1) + if tool_name not in setup.tools_installed: + setup.tools_installed.append(tool_name) + continue + + # Files copied to container + m = re.search(r'Successfully copied (.+?) to container:(.+)', content) + if m: + setup.files_copied.append(f"{m.group(1).strip()} → {m.group(2).strip()}") + continue + + return setup + + def build_test_cases(entries: List[dict]) -> List[TestCase]: """ Walk through log entries and build a list of TestCase objects, @@ -153,11 +228,10 @@ def build_test_cases(entries: List[dict]) -> List[TestCase]: test_cases = [] current_test: Optional[TestCase] = None - # Agent tracking - agent_stack: List[Agent] = [] # stack: [main_agent, sub_agent, ...] + agent_stack: List[Agent] = [] current_step: Optional[Step] = None - pending_sub_agent_step: Optional[Step] = None # main agent step that called microbot_sub - current_field: Optional[str] = None # track what we're collecting multi-line for + pending_sub_agent_step: Optional[Step] = None + current_field: Optional[str] = None def current_agent() -> Optional[Agent]: return agent_stack[-1] if agent_stack else None @@ -191,22 +265,18 @@ def finalize_test_case(): continue # ── Task started ── - if 'ℹ️ TASK STARTED' in content: + if 'TASK STARTED' in content: task_text = content.split('TASK STARTED', 1)[1].lstrip(' :').strip() new_agent = Agent(task=task_text) if not current_test: - # No test case context yet, create one from filename current_test = TestCase(name="unknown") if not current_test.main_agent: - # First agent = main agent new_agent.is_main = True current_test.main_agent = new_agent agent_stack = [new_agent] else: - # Sub-agent - # Use the task from the microbot_sub command if available if pending_sub_agent_step and pending_sub_agent_step.sub_agent_task: new_agent.task = pending_sub_agent_step.sub_agent_task elif task_text: @@ -215,7 +285,6 @@ def finalize_test_case(): sub_idx = len(current_test.sub_agents) current_test.sub_agents.append(new_agent) - # Link the parent step to this sub-agent if pending_sub_agent_step: pending_sub_agent_step.sub_agent_index = sub_idx pending_sub_agent_step = None @@ -227,16 +296,15 @@ def finalize_test_case(): continue # ── Task completed ── - if '🔚 TASK COMPLETED' in content: + if 'TASK COMPLETED' in content: agent = current_agent() if agent: agent.completed = True - current_field = None # Stop accumulating text + current_field = None continue # ── Sub-agent completed message ── if 'Sub-agent completed successfully with output:' in content: - # Pop sub-agent from stack if len(agent_stack) > 1: agent_stack.pop() current_step = None @@ -249,13 +317,22 @@ def finalize_test_case(): if agent and not agent.is_main: agent.max_iterations_reached = True agent.completed = False - # Pop sub-agent from stack + agent.error_message = content if len(agent_stack) > 1: agent_stack.pop() current_step = None current_field = None continue + # ── Failed to parse sub-agent command ── + if level == 'ERROR' and 'Failed to parse microbot_sub command' in content: + if current_step: + current_step.is_blocked = True + current_step.blocked_reason = content + pending_sub_agent_step = None + current_field = None + continue + # ── Max iterations reached ── if level == 'ERROR' and 'Max iterations' in content: agent = current_agent() @@ -275,8 +352,8 @@ def finalize_test_case(): continue # ── LLM final thoughts ── - if '💭 LLM final thoughts:' in content: - text = content.split('💭 LLM final thoughts:', 1)[1].strip() + if 'LLM final thoughts:' in content: + text = content.split('LLM final thoughts:', 1)[1].strip() agent = current_agent() if agent: agent.final_thoughts = text @@ -284,17 +361,16 @@ def finalize_test_case(): continue # ── LLM thoughts ── - if '💭 LLM thoughts:' in content: - text = content.split('💭 LLM thoughts:', 1)[1].strip() + if 'LLM thoughts:' in content and 'final' not in content.split('LLM thoughts:')[0].lower(): + text = content.split('LLM thoughts:', 1)[1].strip() if current_step: current_step.thought = text current_field = 'thought' continue # ── LLM tool call ── - if '➡️ LLM tool call :' in content: - cmd = content.split('➡️ LLM tool call :', 1)[1].strip() - # Remove surrounding quotes if present + if 'LLM tool call' in content and ':' in content.split('LLM tool call')[1]: + cmd = content.split('LLM tool call', 1)[1].split(':', 1)[1].strip() if cmd.startswith('"') and cmd.endswith('"'): cmd = cmd[1:-1] if current_step: @@ -307,22 +383,29 @@ def finalize_test_case(): continue # ── Command output ── - if '⬅️ Command output:' in content: - text = content.split('⬅️ Command output:', 1)[1].strip() + if 'Command output:' in content: + text = content.split('Command output:', 1)[1].strip() if current_step: current_step.output = text current_field = 'output' continue # ── Dangerous command blocked ── - if '⚠️ Dangerous command detected' in content: + if 'Dangerous command detected' in content: if current_step: current_step.is_blocked = True - current_step.blocked_reason = content + # Parse REASON/ALTERNATIVE from multi-line content + lines = content.split('\n') + current_step.blocked_reason = lines[0] + for bline in lines[1:]: + if bline.startswith('REASON:'): + current_step.blocked_reason = bline + elif bline.startswith('ALTERNATIVE:'): + current_step.blocked_alternative = bline current_field = 'blocked' continue - # ── REASON / ALTERNATIVE for blocked commands ── + # ── REASON / ALTERNATIVE for blocked commands (separate entries) ── if current_field == 'blocked' and current_step: if content.startswith('REASON:'): current_step.blocked_reason = content @@ -332,7 +415,10 @@ def finalize_test_case(): # ── Invoking MicroBotSubAgent ── if 'Invoking MicroBotSubAgent with task:' in content: - # This is just a log message; the sub-agent TASK STARTED follows + continue + + # ── Memory tool operations ── + if 'Memory file created:' in content or 'Memory file updated:' in content: continue # ── Multi-line continuation for known fields ── @@ -366,20 +452,13 @@ def finalize_test_case(): agent.final_thoughts = content continue - # Finalize last test case finalize_test_case() - return test_cases # ─────────────────────────── Markdown Generation ─────────────────────────── -def escape_md(text: str) -> str: - """Escape text for markdown display (minimal escaping for code blocks).""" - return text - - def truncate_text(text: str, max_lines: int = 200) -> str: """Truncate text if it exceeds max_lines.""" lines = text.split('\n') @@ -388,7 +467,34 @@ def truncate_text(text: str, max_lines: int = 200) -> str: return text -def generate_step_md(step: Step, sub_agent_filename: str = "") -> str: +def generate_setup_md(setup: SetupInfo) -> str: + """Generate markdown for the setup/environment section.""" + if not setup.container_id and not setup.tools_installed: + return "" + + md = "## 🔧 Environment Setup\n\n" + + if setup.container_id: + md += f"- **Container:** `{setup.container_id}` (image: `{setup.image}`, port: {setup.host_port})\n" + if setup.working_dir: + md += f"- **Working directory:** `{setup.working_dir}`\n" + for vol in setup.volume_mappings: + md += f"- **Volume:** {vol}\n" + + if setup.tools_installed: + md += f"- **Tools:** {', '.join(setup.tools_installed)}\n" + + if setup.files_copied: + md += "\n

\nFiles copied to container\n\n" + for fc in setup.files_copied: + md += f"- {fc}\n" + md += "\n
\n" + + md += "\n---\n\n" + return md + + +def generate_step_md(step: Step, sub_agent_filename: str = "", heading_level: int = 3) -> str: """Generate markdown for a single step as a collapsible details section.""" status = "🚫 Blocked" if step.is_blocked else "" if step.is_sub_agent_call: @@ -396,39 +502,36 @@ def generate_step_md(step: Step, sub_agent_filename: str = "") -> str: summary = f"Step {step.number}" if status: - summary += f" - {status}" + summary += f" — {status}" - # Build brief description from the thought (first sentence) if step.thought: first_line = step.thought.split('\n')[0] if len(first_line) > 120: first_line = first_line[:117] + "..." summary += f": {first_line}" + h = '#' * heading_level + md = f"
\n{summary}\n\n" - # Thought section if step.thought: - md += "### 💭 Thought\n\n" + md += f"{h} 💭 Thought\n\n" md += f"{step.thought}\n\n" - # Blocked command warning if step.is_blocked: - md += "### ⚠️ Command Blocked\n\n" + md += f"{h} ⚠️ Command Blocked\n\n" if step.blocked_reason: md += f"> {step.blocked_reason}\n" if step.blocked_alternative: md += f"> {step.blocked_alternative}\n" md += "\n" - # Command section if step.command: - md += "### ➡️ Command\n\n" + md += f"{h} ➡️ Command\n\n" if step.is_sub_agent_call: md += "**Sub-agent invocation:**\n\n" if sub_agent_filename: md += f"📎 **[View Sub-Agent Trajectory]({sub_agent_filename})**\n\n" - # Show the task if step.sub_agent_task: md += "
\nSub-agent task description\n\n" md += f"```\n{step.sub_agent_task}\n```\n\n" @@ -436,9 +539,8 @@ def generate_step_md(step: Step, sub_agent_filename: str = "") -> str: else: md += f"```bash\n{step.command}\n```\n\n" - # Output section if step.output: - md += "### ⬅️ Output\n\n" + md += f"{h} ⬅️ Output\n\n" output_text = truncate_text(step.output) md += f"```\n{output_text}\n```\n\n" @@ -446,9 +548,19 @@ def generate_step_md(step: Step, sub_agent_filename: str = "") -> str: return md +def _agent_status_str(agent: Agent) -> str: + if agent.completed: + return "✅ Completed" + if agent.max_iterations_reached: + return "❌ Failed (max iterations / timeout)" + return "❓ Unknown" + + def generate_main_agent_md(test_case: TestCase) -> str: """Generate the main agent markdown file content.""" - md = f"# 🤖 Main Agent Trajectory: {test_case.name}\n\n" + md = f"# 🤖 Agent Trajectory: {test_case.name}\n\n" + + md += generate_setup_md(test_case.setup) if test_case.main_agent and test_case.main_agent.task: md += "## Task\n\n" @@ -471,7 +583,6 @@ def generate_main_agent_md(test_case: TestCase) -> str: md += "---\n\n" - # Summary if agent.completed: md += "## ✅ Task Completed\n\n" if agent.final_thoughts: @@ -480,7 +591,6 @@ def generate_main_agent_md(test_case: TestCase) -> str: md += "## ❌ Max Iterations Reached\n\n" md += "The agent did not complete the task within the maximum allowed iterations.\n\n" - # Sub-agent index if test_case.sub_agents: md += "## 📋 Sub-Agents\n\n" md += "| # | Task | Status | Link |\n" @@ -490,7 +600,7 @@ def generate_main_agent_md(test_case: TestCase) -> str: first_line = clean.split('\n')[0] task_summary = first_line[:80] + "..." if len(first_line) > 80 else first_line task_summary = task_summary.replace('|', '\\|') - status = "✅ Completed" if sub.completed else "❌ Failed" + status = _agent_status_str(sub) link = f"[sub_agent_{i + 1}.md](sub_agent_{i + 1}.md)" md += f"| {i + 1} | {task_summary} | {status} | {link} |\n" md += "\n" @@ -501,22 +611,18 @@ def generate_main_agent_md(test_case: TestCase) -> str: def clean_task_text(task: str) -> str: """Clean up a task string: remove microbot_sub prefix, escaped quotes, etc.""" text = task.strip() - # Remove microbot_sub --task "..." wrapper if present if text.startswith('microbot_sub'): match = re.search(r'--task\s+["\'](.+)', text, re.DOTALL) if match: text = match.group(1) - # Remove trailing quote + flags text = re.sub(r'["\']\s*--(?:iterations|timeout).*$', '', text, flags=re.DOTALL) text = text.strip().strip('"').strip("'").strip() - # Unescape - text = text.replace('\\"', '"').replace('\\n', '\n').replace("\\'" , "'") + text = text.replace('\\"', '"').replace('\\n', '\n').replace("\\'", "'") return text def generate_sub_agent_md(sub_agent: Agent, index: int, test_case_name: str) -> str: """Generate a sub-agent markdown file content.""" - # Clean and use the first line of the task as heading clean_task = clean_task_text(sub_agent.task) task_heading = clean_task.split('\n')[0] if clean_task else f"Sub-Agent {index + 1}" if len(task_heading) > 150: @@ -539,14 +645,114 @@ def generate_sub_agent_md(sub_agent: Agent, index: int, test_case_name: str) -> md += "---\n\n" - # Summary if sub_agent.completed: md += "## ✅ Task Completed\n\n" if sub_agent.final_thoughts: md += f"{sub_agent.final_thoughts}\n\n" elif sub_agent.max_iterations_reached: md += "## ❌ Max Iterations Reached\n\n" - md += "The sub-agent did not complete the task within the maximum allowed iterations.\n\n" + if sub_agent.error_message: + md += f"> {sub_agent.error_message}\n\n" + else: + md += "The sub-agent did not complete the task within the maximum allowed iterations.\n\n" + + return md + + +# ─────────────────────────── Single-File Mode ─────────────────────────── + + +def generate_single_file_md(test_case: TestCase) -> str: + """Generate a single markdown file containing the main agent and all sub-agents.""" + md = f"# 🤖 Agent Trajectory: {test_case.name}\n\n" + + md += generate_setup_md(test_case.setup) + + # Table of contents + if test_case.sub_agents: + md += "## 📑 Table of Contents\n\n" + md += "- [Main Agent](#main-agent)\n" + for i, sub in enumerate(test_case.sub_agents): + clean = clean_task_text(sub.task) + first_line = clean.split('\n')[0][:60] + md += f"- [Sub-Agent {i + 1}: {first_line}](#sub-agent-{i + 1})\n" + md += "\n---\n\n" + + # Main agent section + md += "## Main Agent\n\n" + + if test_case.main_agent and test_case.main_agent.task: + md += "### Task\n\n" + task_text = test_case.main_agent.task + if len(task_text) > 500: + md += f"
\nFull task description\n\n{task_text}\n\n
\n\n" + else: + md += f"{task_text}\n\n" + + md += "---\n\n" + md += "### Steps\n\n" + + if test_case.main_agent: + agent = test_case.main_agent + for step in agent.steps: + sub_ref = "" + if step.is_sub_agent_call and step.sub_agent_index >= 0: + sub_ref = f"#sub-agent-{step.sub_agent_index + 1}" + md += generate_step_md(step, sub_agent_filename=sub_ref, heading_level=4) + + md += "---\n\n" + + if agent.completed: + md += "### ✅ Task Completed\n\n" + if agent.final_thoughts: + md += f"{agent.final_thoughts}\n\n" + elif agent.max_iterations_reached: + md += "### ❌ Max Iterations Reached\n\n" + + # Sub-agent summary table + if test_case.sub_agents: + md += "### 📋 Sub-Agents Summary\n\n" + md += "| # | Task | Status |\n" + md += "|---|------|--------|\n" + for i, sub in enumerate(test_case.sub_agents): + clean = clean_task_text(sub.task) + first_line = clean.split('\n')[0] + task_summary = first_line[:80] + "..." if len(first_line) > 80 else first_line + task_summary = task_summary.replace('|', '\\|') + status = _agent_status_str(sub) + md += f"| [{i + 1}](#sub-agent-{i + 1}) | {task_summary} | {status} |\n" + md += "\n" + + # Sub-agent sections + for i, sub in enumerate(test_case.sub_agents): + clean_task = clean_task_text(sub.task) + task_heading = clean_task.split('\n')[0] if clean_task else f"Sub-Agent {i + 1}" + if len(task_heading) > 120: + task_heading = task_heading[:117] + "..." + + md += f"\n---\n\n## Sub-Agent {i + 1}\n\n" + md += f"**{task_heading}**\n\n" + + if clean_task and '\n' in clean_task: + md += "
\nFull task description\n\n" + md += f"```\n{clean_task}\n```\n\n" + md += "
\n\n" + + md += "### Steps\n\n" + + for step in sub.steps: + md += generate_step_md(step, heading_level=4) + + md += "---\n\n" + + if sub.completed: + md += "### ✅ Task Completed\n\n" + if sub.final_thoughts: + md += f"{sub.final_thoughts}\n\n" + elif sub.max_iterations_reached: + md += "### ❌ Max Iterations Reached\n\n" + if sub.error_message: + md += f"> {sub.error_message}\n\n" return md @@ -554,19 +760,19 @@ def generate_sub_agent_md(sub_agent: Agent, index: int, test_case_name: str) -> # ─────────────────────────── Main ─────────────────────────── -def parse_and_generate(log_path: str, output_base_dir: str = None): +def parse_and_generate(log_path: str, output_base_dir: str = None, single_file: bool = False): """ Parse an info.log file and generate markdown trajectory files. Args: log_path: Path to the info.log file output_base_dir: Base directory for output. If None, uses the log file's directory. + single_file: If True, generate a single markdown file instead of a directory. """ if not os.path.isfile(log_path): print(f"Error: Log file not found: {log_path}") sys.exit(1) - # Derive test case name from filename basename = os.path.basename(log_path) if basename.endswith('_info.log'): default_test_name = basename[:-len('_info.log')] @@ -580,44 +786,59 @@ def parse_and_generate(log_path: str, output_base_dir: str = None): print(f"Parsing log file: {log_path}") - # Parse entries = parse_log_entries(log_path) print(f" Parsed {len(entries)} log entries") + # Extract setup info before building test cases + setup = _extract_setup_info(entries) + test_cases = build_test_cases(entries) print(f" Found {len(test_cases)} test case(s)") if not test_cases: - # If no test case boundaries found, create a single test case print(" No test case boundaries found, treating entire log as one test case") - tc = TestCase(name=default_test_name) - # Re-parse with a dummy test case test_cases = _build_single_test_case(entries, default_test_name) - for tc in test_cases: - # Create output directory - trajectory_dir = os.path.join(output_base_dir, f"{tc.name}_trajectory") - os.makedirs(trajectory_dir, exist_ok=True) - print(f"\n Test case: {tc.name}") - print(f" Output directory: {trajectory_dir}") - - # Generate main agent markdown - main_md = generate_main_agent_md(tc) - main_path = os.path.join(trajectory_dir, "main_agent.md") - with open(main_path, 'w', encoding='utf-8') as f: - f.write(main_md) - main_steps = len(tc.main_agent.steps) if tc.main_agent else 0 - print(f" Created: main_agent.md ({main_steps} steps)") + # Attach setup info to first test case + if test_cases: + test_cases[0].setup = setup - # Generate sub-agent markdowns - for i, sub in enumerate(tc.sub_agents): - sub_md = generate_sub_agent_md(sub, i, tc.name) - sub_path = os.path.join(trajectory_dir, f"sub_agent_{i + 1}.md") - with open(sub_path, 'w', encoding='utf-8') as f: - f.write(sub_md) - print(f" Created: sub_agent_{i + 1}.md ({len(sub.steps)} steps)") + for tc in test_cases: + tc_name = tc.name if tc.name != "unknown" else default_test_name + tc.name = tc_name - print(f"\nDone! Generated trajectory files for {len(test_cases)} test case(s).") + main_steps = len(tc.main_agent.steps) if tc.main_agent else 0 + sub_count = len(tc.sub_agents) + print(f"\n Test case: {tc_name}") + print(f" Main agent: {main_steps} steps, {sub_count} sub-agent(s)") + + if single_file: + # Single file mode + md = generate_single_file_md(tc) + out_path = os.path.join(output_base_dir, f"{tc_name}_trajectory.md") + with open(out_path, 'w', encoding='utf-8') as f: + f.write(md) + print(f" Created: {out_path}") + else: + # Multi-file mode + trajectory_dir = os.path.join(output_base_dir, f"{tc_name}_trajectory") + os.makedirs(trajectory_dir, exist_ok=True) + print(f" Output directory: {trajectory_dir}") + + main_md = generate_main_agent_md(tc) + main_path = os.path.join(trajectory_dir, "main_agent.md") + with open(main_path, 'w', encoding='utf-8') as f: + f.write(main_md) + print(f" Created: main_agent.md ({main_steps} steps)") + + for i, sub in enumerate(tc.sub_agents): + sub_md = generate_sub_agent_md(sub, i, tc_name) + sub_path = os.path.join(trajectory_dir, f"sub_agent_{i + 1}.md") + with open(sub_path, 'w', encoding='utf-8') as f: + f.write(sub_md) + print(f" Created: sub_agent_{i + 1}.md ({len(sub.steps)} steps)") + + print(f"\nDone! Generated trajectory for {len(test_cases)} test case(s).") return test_cases @@ -628,6 +849,7 @@ def _build_single_test_case(entries: List[dict], name: str) -> List[TestCase]: """ fake_boundary = { 'timestamp': '2000-01-01 00:00:00,000', + 'module': '', 'level': 'INFO', 'content': f'Test directory set up at: /fake/{name}', 'line_num': 0, @@ -636,17 +858,17 @@ def _build_single_test_case(entries: List[dict], name: str) -> List[TestCase]: def main(): - if len(sys.argv) < 2: - print("Usage: python multi_agent_log_parser.py _info.log [output_dir]") - print("\nParses an info.log file and generates markdown trajectory files.") - print("The log file should be named as _info.log.") - print("A directory _trajectory will be created with all markdown files.") - sys.exit(1) - - log_path = sys.argv[1] - output_dir = sys.argv[2] if len(sys.argv) > 2 else None - - parse_and_generate(log_path, output_dir) + parser = argparse.ArgumentParser( + description="Parse microbots info.log files into markdown trajectory files." + ) + parser.add_argument("log_file", help="Path to the info.log file to parse") + parser.add_argument("output_dir", nargs="?", default=None, + help="Output directory (default: same directory as log file)") + parser.add_argument("--single-file", action="store_true", + help="Generate a single markdown file instead of a directory with separate files") + + args = parser.parse_args() + parse_and_generate(args.log_file, args.output_dir, args.single_file) if __name__ == '__main__': diff --git a/test/bot/test_copilot_bot.py b/test/bot/test_copilot_bot.py new file mode 100644 index 00000000..94f1658c --- /dev/null +++ b/test/bot/test_copilot_bot.py @@ -0,0 +1,393 @@ +""" +Unit and integration tests for CopilotBot. + +Unit tests mock the copilot SDK and Docker environment to verify the +wiring and lifecycle. Integration tests (marked ``@pytest.mark.integration``) +require a real Docker daemon, copilot-cli, and GitHub authentication. +""" + +import importlib +import os +import shutil +import subprocess +import sys +from unittest.mock import AsyncMock, MagicMock, Mock, patch + +import pytest + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) +) + +# --------------------------------------------------------------------------- +# Mock the copilot SDK before importing CopilotBot (optional dependency) +# --------------------------------------------------------------------------- +_mock_copilot = MagicMock() +_mock_copilot.CopilotClient = MagicMock +_mock_copilot.ExternalServerConfig = MagicMock + +_mock_permission = MagicMock() +_mock_permission.PermissionHandler = MagicMock() +_mock_permission.PermissionHandler.approve_all = MagicMock() +_mock_permission.PermissionRequestResult = MagicMock + +_mock_events = MagicMock() +_mock_events.SessionEventType = MagicMock() +_mock_events.SessionEventType.ASSISTANT_MESSAGE = "assistant.message" +_mock_events.SessionEventType.ASSISTANT_MESSAGE_DELTA = "assistant.message_delta" +_mock_events.SessionEventType.SESSION_IDLE = "session.idle" + +_mock_tools = MagicMock() +_mock_tools.Tool = MagicMock +_mock_tools.ToolInvocation = MagicMock +_mock_tools.ToolResult = MagicMock +_mock_tools.define_tool = MagicMock + +sys.modules.setdefault("copilot", _mock_copilot) +sys.modules.setdefault("copilot.session", _mock_permission) +sys.modules.setdefault("copilot.generated.session_events", _mock_events) +sys.modules.setdefault("copilot.tools", _mock_tools) +sys.modules.setdefault("copilot.types", MagicMock()) + +# Reload to pick up mock +if "microbots.bot.CopilotBot" in sys.modules: + importlib.reload(sys.modules["microbots.bot.CopilotBot"]) + +from microbots.MicroBot import BotRunResult + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _copilot_cli_available(): + return shutil.which("copilot") is not None + + +def _copilot_sdk_installed(): + try: + import copilot # noqa: F401 + return not isinstance(copilot, MagicMock) + except ImportError: + return False + + +def _copilot_auth_available(): + if os.environ.get("GITHUB_TOKEN") or os.environ.get("COPILOT_GITHUB_TOKEN"): + return True + if shutil.which("gh"): + try: + result = subprocess.run( + ["gh", "auth", "status"], capture_output=True, timeout=10 + ) + return result.returncode == 0 + except Exception: + pass + return False + + +# --------------------------------------------------------------------------- +# Unit test fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def mock_environment(): + """Create a mock LocalDockerEnvironment.""" + env = MagicMock() + env.port = 9000 + env.container_port = 8080 + env.container = MagicMock() + env.container.id = "abc123def456" + env.image = "kavyasree261002/shell_server:latest" + env.working_dir = "/tmp/mock_workdir" + env.folder_to_mount = None + env.overlay_mount = False + + # Make execute return success by default + success_return = MagicMock() + success_return.return_code = 0 + success_return.stdout = "copilot version 1.0.0" + success_return.stderr = "" + env.execute = MagicMock(return_value=success_return) + env.copy_to_container = MagicMock(return_value=True) + env.stop = MagicMock() + return env + + +@pytest.fixture +def mock_copilot_session(): + """Mock Copilot SDK session.""" + session = AsyncMock() + session.disconnect = AsyncMock() + + response = Mock() + response.data = Mock() + response.data.content = "Task completed successfully." + session.send_and_wait = AsyncMock(return_value=response) + session.on = MagicMock() + return session + + +@pytest.fixture +def mock_copilot_client(mock_copilot_session): + """Mock CopilotClient.""" + client = AsyncMock() + client.start = AsyncMock() + client.stop = AsyncMock() + client.create_session = AsyncMock(return_value=mock_copilot_session) + return client + + +@pytest.fixture +def copilot_bot(mock_environment, mock_copilot_client): + """Create a CopilotBot with all external dependencies mocked.""" + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322, 4323]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test_token_123", + ) + yield bot + # Stop the event loop thread properly before teardown + try: + bot._loop.call_soon_threadsafe(bot._loop.stop) + bot._thread.join(timeout=2) + except Exception: + pass + bot.environment = None # Prevent stop() from trying env.stop() again + + +# --------------------------------------------------------------------------- +# Unit tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotInit: + """Tests for CopilotBot initialisation.""" + + def test_stores_model(self, copilot_bot): + assert copilot_bot.model == "gpt-4.1" + + def test_stores_github_token(self, copilot_bot): + assert copilot_bot.github_token == "ghp_test_token_123" + + def test_environment_assigned(self, copilot_bot, mock_environment): + assert copilot_bot.environment is mock_environment + + def test_additional_tools_default_empty(self, copilot_bot): + assert copilot_bot.additional_tools == [] + + def test_import_error_without_sdk(self): + """CopilotBot raises ImportError when copilot SDK is not installed.""" + # Temporarily remove the mock so the import fails + saved = sys.modules.get("copilot") + try: + sys.modules["copilot"] = None # Force ImportError on import + # Need to reload the module + if "microbots.bot.CopilotBot" in sys.modules: + importlib.reload(sys.modules["microbots.bot.CopilotBot"]) + from microbots.bot.CopilotBot import CopilotBot as CB + with pytest.raises(ImportError, match="github-copilot-sdk"): + CB(model="gpt-4.1") + finally: + sys.modules["copilot"] = saved + if "microbots.bot.CopilotBot" in sys.modules: + importlib.reload(sys.modules["microbots.bot.CopilotBot"]) + + +@pytest.mark.unit +class TestCopilotBotRun: + """Tests for CopilotBot.run().""" + + def test_run_returns_bot_run_result(self, copilot_bot): + result = copilot_bot.run("Fix the bug in main.py") + assert isinstance(result, BotRunResult) + + def test_run_success(self, copilot_bot): + result = copilot_bot.run("Fix the bug in main.py") + assert result.status is True + assert result.error is None + assert result.result is not None + + def test_run_calls_tool_setup(self, copilot_bot, mock_environment): + mock_tool = MagicMock() + mock_tool.usage_instructions_to_llm = "Use tool X" + mock_tool.install_commands = [] + mock_tool.verify_commands = [] + copilot_bot.additional_tools = [mock_tool] + + copilot_bot.run("test task") + mock_tool.setup_tool.assert_called_once_with(mock_environment) + + def test_run_handles_exception(self, copilot_bot): + """Run returns failure BotRunResult on exceptions.""" + with patch.object(copilot_bot, "_run_async", side_effect=RuntimeError("boom")): + result = copilot_bot.run("test") + assert result.status is False + assert "boom" in result.error + + +@pytest.mark.unit +class TestCopilotBotSystemMessage: + """Tests for system message construction.""" + + def test_system_message_empty_no_mount_no_tools(self, copilot_bot): + msg = copilot_bot._build_system_message() + assert msg == "" + + def test_system_message_includes_mount_path(self, mock_environment, mock_copilot_client): + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322, 4323]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + patch("microbots.bot.CopilotBot.CopilotBot._map_cli_port"), + patch("microbots.bot.CopilotBot.CopilotBot._create_environment"), + ): + from microbots.bot.CopilotBot import CopilotBot + from microbots.extras.mount import Mount + mount = Mount("/tmp/test_repo", "/workdir/test_repo", "READ_WRITE") + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + bot.folder_to_mount = mount + msg = bot._build_system_message() + assert "/workdir/test_repo" in msg + bot.stop() + + def test_system_message_includes_tool_instructions(self, copilot_bot): + mock_tool = MagicMock() + mock_tool.usage_instructions_to_llm = "# Use browser command" + copilot_bot.additional_tools = [mock_tool] + + msg = copilot_bot._build_system_message() + assert "browser" in msg + + +@pytest.mark.unit +class TestCopilotBotStop: + """Tests for CopilotBot.stop().""" + + def test_stop_cleans_environment(self, copilot_bot, mock_environment): + copilot_bot.stop() + mock_environment.stop.assert_called_once() + + def test_stop_idempotent(self, copilot_bot, mock_environment): + copilot_bot.stop() + copilot_bot.stop() # Should not raise + + +@pytest.mark.unit +class TestCopilotBotCLIInstall: + """Tests for copilot-cli installation logic.""" + + def test_install_cli_calls_execute(self, mock_environment): + from microbots.bot.CopilotBot import CopilotBot + + with ( + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322]), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=AsyncMock()), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + # _install_copilot_cli was called during __init__ + # Verify that execute was called with npm install command + calls = [str(c) for c in mock_environment.execute.call_args_list] + npm_calls = [c for c in calls if "npm install" in c or "copilot" in c] + assert len(npm_calls) > 0, "Expected copilot-cli install commands" + bot.stop() + + def test_install_cli_raises_on_failure(self, mock_environment): + from microbots.bot.CopilotBot import CopilotBot + + fail_return = MagicMock() + fail_return.return_code = 1 + fail_return.stdout = "" + fail_return.stderr = "npm ERR! not found" + mock_environment.execute = MagicMock(return_value=fail_return) + + with ( + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322]), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=AsyncMock()), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + with pytest.raises(RuntimeError, match="Failed to install copilot-cli"): + CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + + +# --------------------------------------------------------------------------- +# Integration tests — require real Docker + copilot-cli + auth +# --------------------------------------------------------------------------- + +_skip_no_copilot_cli = pytest.mark.skipif( + not _copilot_cli_available(), + reason="GitHub Copilot CLI not installed (copilot not in PATH)", +) + +_skip_no_copilot_sdk = pytest.mark.skipif( + not _copilot_sdk_installed(), + reason="github-copilot-sdk not installed (pip install microbots[ghcp])", +) + +_skip_no_copilot_auth = pytest.mark.skipif( + not _copilot_auth_available(), + reason="No GitHub auth available (set GITHUB_TOKEN or run 'gh auth login')", +) + + +@_skip_no_copilot_cli +@_skip_no_copilot_sdk +@_skip_no_copilot_auth +@pytest.mark.integration +@pytest.mark.slow +class TestCopilotBotIntegration: + """End-to-end integration tests with real Copilot SDK.""" + + def test_simple_task(self, test_repo, issue_1): + """CopilotBot can fix a simple syntax error.""" + from microbots.bot.CopilotBot import CopilotBot + + issue_text = issue_1[0] + verify_function = issue_1[1] + + bot = CopilotBot( + model="gpt-4.1", + folder_to_mount=str(test_repo), + permission="READ_WRITE", + ) + + try: + result = bot.run( + issue_text, + timeout_in_seconds=300, + ) + assert result.status is True, f"CopilotBot failed: {result.error}" + verify_function(test_repo) + finally: + bot.stop() From 96839ff387aa2cdcc009722af296dfa9f0aa074e Mon Sep 17 00:00:00 2001 From: bala Date: Mon, 30 Mar 2026 13:19:22 +0000 Subject: [PATCH 03/12] Backup. First level walkthrough done. Need complete testing --- src/microbots/bot/CopilotBot.py | 207 +++++------------- src/microbots/environment/Environment.py | 29 +++ .../local_docker/LocalDockerEnvironment.py | 82 +++++++ test/bot/test_copilot_bot.py | 27 ++- 4 files changed, 194 insertions(+), 151 deletions(-) diff --git a/src/microbots/bot/CopilotBot.py b/src/microbots/bot/CopilotBot.py index 8bc869a4..dbe6359e 100644 --- a/src/microbots/bot/CopilotBot.py +++ b/src/microbots/bot/CopilotBot.py @@ -49,6 +49,7 @@ ) from microbots.extras.mount import Mount, MountType from microbots.MicroBot import BotRunResult +from microbots.tools.external_tool import ExternalTool from microbots.tools.tool import ToolAbstract from microbots.utils.network import get_free_port @@ -130,10 +131,21 @@ def __init__( if not self.environment: self._create_environment() + # ── Validate tools — ExternalTool is not supported ────────── + for tool in self.additional_tools: + if isinstance(tool, ExternalTool): + raise ValueError( + f"CopilotBot does not support ExternalTool '{tool.name}'. " + f"copilot-cli runs inside the Docker container, so only " + f"internal (container-side) tools are allowed." + ) + # ── Install additional tools inside the container ─────────── for tool in self.additional_tools: + logger.info("🔧 Installing additional tool '%s'...", tool.name) tool.install_tool(self.environment) tool.verify_tool_installation(self.environment) + logger.info("✅ Tool '%s' installed and verified", tool.name) # ── Install & start copilot-cli inside the container ──────── self._cli_host_port = get_free_port() @@ -192,8 +204,11 @@ def run( status=True on success with the agent's final message in *result*, or status=False with an error description. """ + logger.info("🚀 Starting CopilotBot run — task: %.120s...", task) + # Setup additional tools (env vars, files, setup_commands) for tool in self.additional_tools: + logger.info("⚙️ Setting up tool '%s'", tool.name) tool.setup_tool(self.environment) # Mount additional folders @@ -203,19 +218,16 @@ def run( # Build system message with tool instructions system_content = self._build_system_message() - # Build SDK custom tools from additional_tools - sdk_tools = self._build_sdk_tools() - try: result_text = self._run_async( self._execute_session( task=task, system_content=system_content, - sdk_tools=sdk_tools, timeout=timeout_in_seconds, streaming=streaming, ) ) + logger.info("✅ CopilotBot run completed successfully") return BotRunResult(status=True, result=result_text, error=None) except Exception as e: logger.exception("❌ CopilotBot run failed: %s", e) @@ -261,91 +273,10 @@ def __del__(self): def _create_environment(self): free_port = get_free_port() - # Also map the copilot-cli headless port - self._cli_host_port = get_free_port() self.environment = LocalDockerEnvironment( port=free_port, folder_to_mount=self.folder_to_mount, ) - # Expose additional port mapping for copilot-cli - self._map_cli_port() - - def _map_cli_port(self): - """Add a second port mapping for the copilot-cli headless server. - - Docker port mappings are static after container creation, so we use - ``socat`` inside the container to forward the CLI port through the - existing shell_server port range, OR we use ``docker exec`` via iptables. - - The simplest reliable approach: install socat and forward from a known - port that's already exposed, or use ``docker port``. - - Actually, the cleanest approach: stop the container, recreate it with - the additional port. Since we control environment creation this is safe. - """ - # The environment was just created by us, so recreating with an extra port - # is acceptable. We stop the existing container and create a new one - # with both ports mapped. - if not self.environment.container: - return - - container = self.environment.container - image = self.environment.image - port = self.environment.port - container_port = self.environment.container_port - - # Gather existing volume config from the running container - import docker - - container.stop() - container.remove() - - # Re-create with both ports - volumes_config = {self.environment.working_dir: {"bind": DOCKER_WORKING_DIR, "mode": "rw"}} - if self.folder_to_mount: - mode_map = {"READ_ONLY": "ro", "READ_WRITE": "rw"} - if self.folder_to_mount.permission == PermissionLabels.READ_ONLY: - volumes_config[self.folder_to_mount.host_path_info.abs_path] = { - "bind": f"/ro/{os.path.basename(self.folder_to_mount.sandbox_path)}", - "mode": mode_map[self.folder_to_mount.permission], - } - else: - volumes_config[self.folder_to_mount.host_path_info.abs_path] = { - "bind": self.folder_to_mount.sandbox_path, - "mode": mode_map[self.folder_to_mount.permission], - } - - port_mapping = { - f"{container_port}/tcp": port, - f"{_CONTAINER_CLI_PORT}/tcp": self._cli_host_port, - } - - client = docker.from_env() - self.environment.container = client.containers.run( - image, - volumes=volumes_config, - ports=port_mapping, - detach=True, - working_dir="/app", - privileged=True, - environment={"BOT_PORT": str(container_port)}, - ) - logger.info( - "🚀 Recreated container with CLI port mapping: host %d → container %d", - self._cli_host_port, - _CONTAINER_CLI_PORT, - ) - time.sleep(2) - - # Re-setup overlay if needed - if self.folder_to_mount and self.folder_to_mount.permission == PermissionLabels.READ_ONLY: - self.environment._setup_overlay_mount() - - # cd into mounted folder - if self.folder_to_mount: - self.environment.execute(f"cd {self.folder_to_mount.sandbox_path}") - else: - self.environment.execute("cd /") def _install_copilot_cli(self): """Install copilot-cli inside the Docker container.""" @@ -405,6 +336,13 @@ def _start_copilot_cli_server(self): f"Failed to start copilot-cli server: {result.stderr}" ) + # Expose the CLI port from the environment to the host + if not self.environment.expose_port(_CONTAINER_CLI_PORT, self._cli_host_port): + raise RuntimeError( + f"Failed to expose copilot-cli port {_CONTAINER_CLI_PORT} " + f"on host port {self._cli_host_port}" + ) + # Wait for the server to be ready self._wait_for_cli_ready() logger.info( @@ -445,7 +383,6 @@ async def _execute_session( self, task: str, system_content: str, - sdk_tools: list, timeout: int, streaming: bool, ) -> str: @@ -456,14 +393,17 @@ async def _execute_session( "model": self.model, "on_permission_request": self._PermissionHandler.approve_all, "streaming": streaming, + "hooks": { + "on_pre_tool_use": self._on_pre_tool_use, + "on_post_tool_use": self._on_post_tool_use, + }, } if system_content: session_kwargs["system_message"] = {"content": system_content} - if sdk_tools: - session_kwargs["tools"] = sdk_tools - + logger.info("📡 Creating Copilot session (model=%s, streaming=%s)", self.model, streaming) + logger.debug("Session kwargs: %s", session_kwargs) session = await self._client.create_session(**session_kwargs) collected_text = [] @@ -473,19 +413,27 @@ def _on_event(event): if event.type == SessionEventType.ASSISTANT_MESSAGE: if event.data and event.data.content: collected_text.append(event.data.content) + logger.info("💬 Assistant message received (%d chars)", len(event.data.content)) elif event.type == SessionEventType.ASSISTANT_MESSAGE_DELTA: if event.data and event.data.delta_content: logger.debug("📝 %s", event.data.delta_content) elif event.type == SessionEventType.SESSION_IDLE: + logger.info("⏹️ Session idle — agent finished processing") done_event.set() + else: + logger.debug("📨 Session event: %s", event.type) session.on(_on_event) # Send the task prompt and wait for completion + logger.info("📤 Sending task to Copilot agent...") + logger.debug("Task content: %s", task) response = await session.send_and_wait(task, timeout=float(timeout)) # If send_and_wait returned a full response, use it if response and response.data and response.data.content: + logger.info("✅ Received response from send_and_wait with %d chars", len(response.data.content)) + logger.info("Response content: %s", response.data.content) return response.data.content # Otherwise wait for the collected events @@ -493,7 +441,7 @@ def _on_event(event): try: await asyncio.wait_for(done_event.wait(), timeout=float(timeout)) except asyncio.TimeoutError: - pass + logger.warning("⏱️ Timed out waiting for session idle after %ds", timeout) await session.disconnect() @@ -518,66 +466,27 @@ def _build_system_message(self) -> str: return "\n\n".join(parts) - def _build_sdk_tools(self) -> list: - """Convert Microbots additional tools into Copilot SDK tool definitions. - - Only tools that implement ``is_invoked`` / have an ``invoke`` method - (ExternalTools) can be meaningfully wrapped. Internal tools that run - via shell commands are already accessible to Copilot's built-in shell - tool and don't need explicit registration. - """ - from microbots.tools.external_tool import ExternalTool - - sdk_tools = [] - for tool in self.additional_tools: - if isinstance(tool, ExternalTool) and hasattr(tool, "invoke"): - sdk_tool = self._wrap_external_tool(tool) - if sdk_tool: - sdk_tools.append(sdk_tool) - return sdk_tools - - def _wrap_external_tool(self, tool: ToolAbstract): - """Wrap a Microbots ExternalTool as a Copilot SDK define_tool.""" - try: - from copilot.tools import Tool as CopilotTool, ToolInvocation, ToolResult - except ImportError: - return None - - bot_ref = self # Capture reference for the handler closure - - async def handler(invocation: ToolInvocation) -> ToolResult: - command = invocation.arguments.get("command", "") - try: - cmd_return = tool.invoke(command, bot_ref) - output = cmd_return.stdout if cmd_return.return_code == 0 else ( - f"COMMAND FAILED (rc={cmd_return.return_code})\n" - f"stdout: {cmd_return.stdout}\nstderr: {cmd_return.stderr}" - ) - return ToolResult( - text_result_for_llm=output, - result_type="success" if cmd_return.return_code == 0 else "failure", - ) - except Exception as e: - return ToolResult( - text_result_for_llm=f"Tool error: {e}", - result_type="failure", - ) + # ────────────────────────────────────────────────────────────────── + # Private — SDK hooks for tool-use logging + # ────────────────────────────────────────────────────────────────── - return CopilotTool( - name=tool.name, - description=tool.description, - parameters={ - "type": "object", - "properties": { - "command": { - "type": "string", - "description": f"The command to invoke the {tool.name} tool", - }, - }, - "required": ["command"], - }, - handler=handler, - ) + async def _on_pre_tool_use(self, input_data, invocation): + """Hook called before each tool execution — log the call.""" + tool_name = input_data.get("toolName", "unknown") + tool_args = input_data.get("toolArgs", {}) + logger.info("➡️ Tool call: %s — args: %s", tool_name, tool_args) + return {"permissionDecision": "allow"} + + async def _on_post_tool_use(self, input_data, invocation): + """Hook called after each tool execution — log the result.""" + tool_name = input_data.get("toolName", "unknown") + result = input_data.get("toolResult", "") + # Truncate long results for readable logs + result_str = str(result) + if len(result_str) > 500: + result_str = result_str[:500] + "... (truncated)" + logger.info("⬅️ Tool result: %s — output: %s", tool_name, result_str) + return {} # ────────────────────────────────────────────────────────────────── # Private — mount helpers diff --git a/src/microbots/environment/Environment.py b/src/microbots/environment/Environment.py index f2fdabd9..5a2f2c0b 100644 --- a/src/microbots/environment/Environment.py +++ b/src/microbots/environment/Environment.py @@ -33,3 +33,32 @@ def copy_from_container(self, src_path: str, dest_path: str) -> bool: f"{self.__class__.__name__} does not support copying files from container. " f"This is an optional feature - only implement if needed for your use case." ) + + def expose_port(self, container_port: int, host_port: int) -> bool: + """Expose an additional port from the running environment. + + Makes a service listening on *container_port* inside the environment + reachable at *host_port* on the host. How this is achieved is up to + the implementation (e.g. socat, iptables, native platform API). + + Parameters + ---------- + container_port : int + The port the service is listening on **inside** the environment. + host_port : int + The port on the **host** that should forward to *container_port*. + + Returns + ------- + bool + True if the port was exposed successfully, False otherwise. + + Raises + ------ + NotImplementedError + If the environment does not support dynamic port exposure. + """ + raise NotImplementedError( + f"{self.__class__.__name__} does not support exposing additional ports. " + f"This is an optional feature - only implement if needed for your use case." + ) diff --git a/src/microbots/environment/local_docker/LocalDockerEnvironment.py b/src/microbots/environment/local_docker/LocalDockerEnvironment.py index 874b80db..080e2465 100644 --- a/src/microbots/environment/local_docker/LocalDockerEnvironment.py +++ b/src/microbots/environment/local_docker/LocalDockerEnvironment.py @@ -153,8 +153,90 @@ def _teardown_overlay_mount(self): except Exception as e: logger.error("❌ Failed to teardown overlay mount: %s", e) + def expose_port(self, container_port: int, host_port: int) -> bool: + """Expose an additional port from the running container using ``socat``. + + Docker does not allow adding port mappings to an already-running + container. Instead we install ``socat`` on the **host** and run it + as a background process that forwards ``host_port`` → the container's + IP on ``container_port``. + + The socat process is tracked so it can be cleaned up in :meth:`stop`. + """ + if not self.container: + logger.error("❌ No active container to expose port from") + return False + + try: + # Resolve the container's IP on the Docker bridge network + self.container.reload() + networks = self.container.attrs["NetworkSettings"]["Networks"] + container_ip = next(iter(networks.values()))["IPAddress"] + if not container_ip: + logger.error("❌ Could not determine container IP address") + return False + + # Launch a host-side socat forwarder in the background + proc = subprocess.Popen( + [ + "socat", + f"TCP-LISTEN:{host_port},fork,reuseaddr", + f"TCP:{container_ip}:{container_port}", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + ) + + # Give socat a moment to bind, then verify it's still alive + time.sleep(0.5) + if proc.poll() is not None: + stderr = proc.stderr.read().decode() if proc.stderr else "" + logger.error( + "❌ socat exited immediately (rc=%d): %s", + proc.returncode, + stderr, + ) + return False + + # Track the process for cleanup + if not hasattr(self, "_socat_procs"): + self._socat_procs: list[subprocess.Popen] = [] + self._socat_procs.append(proc) + + logger.info( + "✅ Exposed container port %d on host port %d (via socat, container IP %s)", + container_port, + host_port, + container_ip, + ) + return True + + except FileNotFoundError: + logger.error( + "❌ 'socat' is not installed on the host. " + "Install it with: apt-get install socat" + ) + return False + except Exception as e: + logger.exception("❌ Failed to expose port: %s", e) + return False + + def _cleanup_socat(self): + """Terminate any socat forwarder processes we spawned.""" + for proc in getattr(self, "_socat_procs", []): + try: + proc.terminate() + proc.wait(timeout=3) + except Exception: + try: + proc.kill() + except Exception: + pass + self._socat_procs = [] + def stop(self): """Stop and remove the container""" + self._cleanup_socat() if self.container: if self.overlay_mount: self._teardown_overlay_mount() diff --git a/test/bot/test_copilot_bot.py b/test/bot/test_copilot_bot.py index 94f1658c..2534704a 100644 --- a/test/bot/test_copilot_bot.py +++ b/test/bot/test_copilot_bot.py @@ -186,6 +186,31 @@ def test_environment_assigned(self, copilot_bot, mock_environment): def test_additional_tools_default_empty(self, copilot_bot): assert copilot_bot.additional_tools == [] + def test_rejects_external_tool(self, mock_environment, mock_copilot_client): + """CopilotBot raises ValueError if an ExternalTool is passed.""" + from microbots.tools.external_tool import ExternalTool + + ext_tool = MagicMock(spec=ExternalTool) + ext_tool.name = "my_external" + + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + with pytest.raises(ValueError, match="does not support ExternalTool"): + CopilotBot( + model="gpt-4.1", + environment=mock_environment, + additional_tools=[ext_tool], + github_token="ghp_test", + ) + def test_import_error_without_sdk(self): """CopilotBot raises ImportError when copilot SDK is not installed.""" # Temporarily remove the mock so the import fails @@ -253,8 +278,6 @@ def test_system_message_includes_mount_path(self, mock_environment, mock_copilot patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), patch("copilot.CopilotClient", return_value=mock_copilot_client), patch("copilot.ExternalServerConfig", return_value=MagicMock()), - patch("microbots.bot.CopilotBot.CopilotBot._map_cli_port"), - patch("microbots.bot.CopilotBot.CopilotBot._create_environment"), ): from microbots.bot.CopilotBot import CopilotBot from microbots.extras.mount import Mount From 1e49e0516b85ab997801f0b4e418473b56f1170d Mon Sep 17 00:00:00 2001 From: bala Date: Thu, 2 Apr 2026 12:53:35 +0000 Subject: [PATCH 04/12] Backup --- src/microbots/bot/CopilotBot.py | 38 ++++---- src/microbots/environment/Environment.py | 24 ++--- .../local_docker/LocalDockerEnvironment.py | 88 ++----------------- test/bot/test_copilot_bot.py | 25 +++--- test/swe-bench-test/run_swe_bench.py | 34 ++++++- 5 files changed, 81 insertions(+), 128 deletions(-) diff --git a/src/microbots/bot/CopilotBot.py b/src/microbots/bot/CopilotBot.py index dbe6359e..46f39d31 100644 --- a/src/microbots/bot/CopilotBot.py +++ b/src/microbots/bot/CopilotBot.py @@ -51,7 +51,7 @@ from microbots.MicroBot import BotRunResult from microbots.tools.external_tool import ExternalTool from microbots.tools.tool import ToolAbstract -from microbots.utils.network import get_free_port +from microbots.utils.network import get_free_port # still used for _create_environment logger = getLogger(" CopilotBot ") @@ -104,7 +104,7 @@ def __init__( ): try: from copilot import CopilotClient, ExternalServerConfig - from copilot.session import PermissionHandler + from copilot.types import PermissionHandler except ImportError: raise ImportError( "CopilotBot requires the github-copilot-sdk package. " @@ -148,7 +148,6 @@ def __init__( logger.info("✅ Tool '%s' installed and verified", tool.name) # ── Install & start copilot-cli inside the container ──────── - self._cli_host_port = get_free_port() self._install_copilot_cli() self._start_copilot_cli_server() @@ -158,16 +157,18 @@ def __init__( self._thread.start() # ── Connect SDK to in-container CLI ───────────────────────── + container_ip = self.environment.get_ipv4_address() self._client = CopilotClient( - ExternalServerConfig(url=f"localhost:{self._cli_host_port}") + ExternalServerConfig(url=f"{container_ip}:{_CONTAINER_CLI_PORT}") ) self._run_async(self._client.start()) self._PermissionHandler = PermissionHandler logger.info( - "✅ CopilotBot initialised — model=%s, cli_port=%d", + "✅ CopilotBot initialised — model=%s, cli=%s:%d", self.model, - self._cli_host_port, + container_ip, + _CONTAINER_CLI_PORT, ) # ────────────────────────────────────────────────────────────────── @@ -284,6 +285,8 @@ def _install_copilot_cli(self): # Install Node.js (required for copilot-cli via npm) install_commands = [ + # Remove stale third-party repos that may have expired GPG keys + "rm -f /etc/apt/sources.list.d/yarn.list", # Install Node.js 22.x (copilot-cli requires Node 22+) "apt-get update -qq && apt-get install -y -qq curl ca-certificates > /dev/null 2>&1", "curl -fsSL https://deb.nodesource.com/setup_22.x | bash - > /dev/null 2>&1", @@ -311,9 +314,10 @@ def _install_copilot_cli(self): def _start_copilot_cli_server(self): """Start copilot-cli in headless server mode inside the container. - The CLI listens on ``_CONTAINER_CLI_PORT`` which is mapped to - ``self._cli_host_port`` on the host. Authentication is handled - via the GITHUB_TOKEN environment variable injected into the container. + The CLI listens on ``_CONTAINER_CLI_PORT`` inside the container. + The host connects directly to the container's bridge-network IP. + Authentication is handled via the GITHUB_TOKEN environment variable + injected into the container. """ # Inject the GitHub token into the container for authentication if self.github_token: @@ -336,30 +340,23 @@ def _start_copilot_cli_server(self): f"Failed to start copilot-cli server: {result.stderr}" ) - # Expose the CLI port from the environment to the host - if not self.environment.expose_port(_CONTAINER_CLI_PORT, self._cli_host_port): - raise RuntimeError( - f"Failed to expose copilot-cli port {_CONTAINER_CLI_PORT} " - f"on host port {self._cli_host_port}" - ) - # Wait for the server to be ready self._wait_for_cli_ready() logger.info( - "✅ copilot-cli headless server running on container port %d (host port %d)", + "✅ copilot-cli headless server running on container port %d", _CONTAINER_CLI_PORT, - self._cli_host_port, ) def _wait_for_cli_ready(self): """Poll until the copilot-cli server is accepting connections.""" import socket as _socket + container_ip = self.environment.get_ipv4_address() deadline = time.time() + _CLI_STARTUP_TIMEOUT while time.time() < deadline: try: sock = _socket.create_connection( - ("localhost", self._cli_host_port), timeout=2 + (container_ip, _CONTAINER_CLI_PORT), timeout=2 ) sock.close() return @@ -367,7 +364,7 @@ def _wait_for_cli_ready(self): time.sleep(1) raise TimeoutError( f"copilot-cli did not become ready within {_CLI_STARTUP_TIMEOUT}s " - f"on host port {self._cli_host_port}" + f"on {container_ip}:{_CONTAINER_CLI_PORT}" ) # ────────────────────────────────────────────────────────────────── @@ -483,6 +480,7 @@ async def _on_post_tool_use(self, input_data, invocation): result = input_data.get("toolResult", "") # Truncate long results for readable logs result_str = str(result) + logger.debug("Tool '%s'\nexecution result: %s", tool_name, result_str) if len(result_str) > 500: result_str = result_str[:500] + "... (truncated)" logger.info("⬅️ Tool result: %s — output: %s", tool_name, result_str) diff --git a/src/microbots/environment/Environment.py b/src/microbots/environment/Environment.py index 5a2f2c0b..83df8716 100644 --- a/src/microbots/environment/Environment.py +++ b/src/microbots/environment/Environment.py @@ -34,31 +34,23 @@ def copy_from_container(self, src_path: str, dest_path: str) -> bool: f"This is an optional feature - only implement if needed for your use case." ) - def expose_port(self, container_port: int, host_port: int) -> bool: - """Expose an additional port from the running environment. + def get_ipv4_address(self) -> str: + """Return the IPv4 address of the running environment. - Makes a service listening on *container_port* inside the environment - reachable at *host_port* on the host. How this is achieved is up to - the implementation (e.g. socat, iptables, native platform API). - - Parameters - ---------- - container_port : int - The port the service is listening on **inside** the environment. - host_port : int - The port on the **host** that should forward to *container_port*. + This allows host-side code to connect directly to services + running inside the environment without port forwarding. Returns ------- - bool - True if the port was exposed successfully, False otherwise. + str + The IPv4 address of the environment. Raises ------ NotImplementedError - If the environment does not support dynamic port exposure. + If the environment does not support retrieving its IP address. """ raise NotImplementedError( - f"{self.__class__.__name__} does not support exposing additional ports. " + f"{self.__class__.__name__} does not support retrieving its IP address. " f"This is an optional feature - only implement if needed for your use case." ) diff --git a/src/microbots/environment/local_docker/LocalDockerEnvironment.py b/src/microbots/environment/local_docker/LocalDockerEnvironment.py index 080e2465..d91e98ea 100644 --- a/src/microbots/environment/local_docker/LocalDockerEnvironment.py +++ b/src/microbots/environment/local_docker/LocalDockerEnvironment.py @@ -153,90 +153,20 @@ def _teardown_overlay_mount(self): except Exception as e: logger.error("❌ Failed to teardown overlay mount: %s", e) - def expose_port(self, container_port: int, host_port: int) -> bool: - """Expose an additional port from the running container using ``socat``. - - Docker does not allow adding port mappings to an already-running - container. Instead we install ``socat`` on the **host** and run it - as a background process that forwards ``host_port`` → the container's - IP on ``container_port``. - - The socat process is tracked so it can be cleaned up in :meth:`stop`. - """ + def get_ipv4_address(self) -> str: + """Return the container's IPv4 address on the Docker bridge network.""" if not self.container: - logger.error("❌ No active container to expose port from") - return False - - try: - # Resolve the container's IP on the Docker bridge network - self.container.reload() - networks = self.container.attrs["NetworkSettings"]["Networks"] - container_ip = next(iter(networks.values()))["IPAddress"] - if not container_ip: - logger.error("❌ Could not determine container IP address") - return False - - # Launch a host-side socat forwarder in the background - proc = subprocess.Popen( - [ - "socat", - f"TCP-LISTEN:{host_port},fork,reuseaddr", - f"TCP:{container_ip}:{container_port}", - ], - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, - ) + raise RuntimeError("No active container to get IP address from") - # Give socat a moment to bind, then verify it's still alive - time.sleep(0.5) - if proc.poll() is not None: - stderr = proc.stderr.read().decode() if proc.stderr else "" - logger.error( - "❌ socat exited immediately (rc=%d): %s", - proc.returncode, - stderr, - ) - return False - - # Track the process for cleanup - if not hasattr(self, "_socat_procs"): - self._socat_procs: list[subprocess.Popen] = [] - self._socat_procs.append(proc) - - logger.info( - "✅ Exposed container port %d on host port %d (via socat, container IP %s)", - container_port, - host_port, - container_ip, - ) - return True - - except FileNotFoundError: - logger.error( - "❌ 'socat' is not installed on the host. " - "Install it with: apt-get install socat" - ) - return False - except Exception as e: - logger.exception("❌ Failed to expose port: %s", e) - return False - - def _cleanup_socat(self): - """Terminate any socat forwarder processes we spawned.""" - for proc in getattr(self, "_socat_procs", []): - try: - proc.terminate() - proc.wait(timeout=3) - except Exception: - try: - proc.kill() - except Exception: - pass - self._socat_procs = [] + self.container.reload() + networks = self.container.attrs["NetworkSettings"]["Networks"] + container_ip = next(iter(networks.values()))["IPAddress"] + if not container_ip: + raise RuntimeError("Could not determine container IP address") + return container_ip def stop(self): """Stop and remove the container""" - self._cleanup_socat() if self.container: if self.overlay_mount: self._teardown_overlay_mount() diff --git a/test/bot/test_copilot_bot.py b/test/bot/test_copilot_bot.py index 2534704a..2cff19b0 100644 --- a/test/bot/test_copilot_bot.py +++ b/test/bot/test_copilot_bot.py @@ -26,10 +26,8 @@ _mock_copilot.CopilotClient = MagicMock _mock_copilot.ExternalServerConfig = MagicMock -_mock_permission = MagicMock() -_mock_permission.PermissionHandler = MagicMock() -_mock_permission.PermissionHandler.approve_all = MagicMock() -_mock_permission.PermissionRequestResult = MagicMock +_mock_session = MagicMock() +_mock_session.PermissionRequestResult = MagicMock _mock_events = MagicMock() _mock_events.SessionEventType = MagicMock() @@ -43,11 +41,15 @@ _mock_tools.ToolResult = MagicMock _mock_tools.define_tool = MagicMock +_mock_types = MagicMock() +_mock_types.PermissionHandler = MagicMock() +_mock_types.PermissionHandler.approve_all = MagicMock() + sys.modules.setdefault("copilot", _mock_copilot) -sys.modules.setdefault("copilot.session", _mock_permission) +sys.modules.setdefault("copilot.session", _mock_session) sys.modules.setdefault("copilot.generated.session_events", _mock_events) sys.modules.setdefault("copilot.tools", _mock_tools) -sys.modules.setdefault("copilot.types", MagicMock()) +sys.modules.setdefault("copilot.types", _mock_types) # Reload to pick up mock if "microbots.bot.CopilotBot" in sys.modules: @@ -111,6 +113,7 @@ def mock_environment(): env.execute = MagicMock(return_value=success_return) env.copy_to_container = MagicMock(return_value=True) env.stop = MagicMock() + env.get_ipv4_address = MagicMock(return_value="172.17.0.2") return env @@ -143,7 +146,7 @@ def copilot_bot(mock_environment, mock_copilot_client): """Create a CopilotBot with all external dependencies mocked.""" with ( patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), - patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322, 4323]), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), @@ -195,7 +198,7 @@ def test_rejects_external_tool(self, mock_environment, mock_copilot_client): with ( patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), - patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322]), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), @@ -272,7 +275,7 @@ def test_system_message_empty_no_mount_no_tools(self, copilot_bot): def test_system_message_includes_mount_path(self, mock_environment, mock_copilot_client): with ( patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), - patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322, 4323]), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), @@ -322,7 +325,7 @@ def test_install_cli_calls_execute(self, mock_environment): from microbots.bot.CopilotBot import CopilotBot with ( - patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322]), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), patch("copilot.CopilotClient", return_value=AsyncMock()), @@ -350,7 +353,7 @@ def test_install_cli_raises_on_failure(self, mock_environment): mock_environment.execute = MagicMock(return_value=fail_return) with ( - patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000, 4322]), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), patch("copilot.CopilotClient", return_value=AsyncMock()), diff --git a/test/swe-bench-test/run_swe_bench.py b/test/swe-bench-test/run_swe_bench.py index ee3da525..5a1aefdd 100644 --- a/test/swe-bench-test/run_swe_bench.py +++ b/test/swe-bench-test/run_swe_bench.py @@ -11,7 +11,7 @@ 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) ) -from microbots import AgentBoss +from microbots import AgentBoss, CopilotBot logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -94,6 +94,23 @@ def run_agent(dataset): ) +def run_copilot_agent(dataset): + bot = CopilotBot( + model="gpt-4.1", + folder_to_mount=str(TEST_DIR / dataset['instance_id']), + permission="READ_WRITE", + ) + try: + result = bot.run( + task=dataset['problem_statement'] + "\n\nHint: " + dataset['hints_text'], + timeout_in_seconds=3600 * 4, # 4 hours + ) + if not result.status: + logger.error(f"CopilotBot failed on {dataset['instance_id']}: {result.error}") + finally: + bot.stop() + + def generate_prediction(dataset): repo_path = TEST_DIR / dataset['instance_id'] diff_output = subprocess.run( @@ -146,5 +163,18 @@ def test_swe_bench(): verify_fix() +def test_swe_bench_copilot(): + datasets = load_dataset(SWE_BENCH_SUITE, split="test") + + for instance in selected_dataset: + dataset = datasets.filter(lambda x: x['instance_id'] == instance)[0] + logger.info(f"DATASET: {pprint(dataset)}") + setup_test_directory(dataset) + run_copilot_agent(dataset) + generate_prediction(dataset) + + verify_fix() + + if __name__ == "__main__": - test_swe_bench() \ No newline at end of file + test_swe_bench_copilot() \ No newline at end of file From 4509c981c7e58979820ef0e4bca3f7396b5acbe0 Mon Sep 17 00:00:00 2001 From: bala Date: Thu, 2 Apr 2026 16:03:02 +0000 Subject: [PATCH 05/12] Backup --- src/microbots/bot/CopilotBot.py | 2 ++ src/microbots/llm/copilot_api.py | 5 +-- src/microbots/utils/copilot_auth.py | 40 ++++++++++++++++++++++ test/swe-bench-test/run_swe_bench.py | 51 ++++++++++++++++++++++++++-- 4 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 src/microbots/utils/copilot_auth.py diff --git a/src/microbots/bot/CopilotBot.py b/src/microbots/bot/CopilotBot.py index 46f39d31..dd3c84aa 100644 --- a/src/microbots/bot/CopilotBot.py +++ b/src/microbots/bot/CopilotBot.py @@ -51,6 +51,7 @@ from microbots.MicroBot import BotRunResult from microbots.tools.external_tool import ExternalTool from microbots.tools.tool import ToolAbstract +from microbots.utils.copilot_auth import get_copilot_token from microbots.utils.network import get_free_port # still used for _create_environment logger = getLogger(" CopilotBot ") @@ -118,6 +119,7 @@ def __init__( or os.environ.get("COPILOT_GITHUB_TOKEN") or os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + or get_copilot_token() ) # ── Mount setup ───────────────────────────────────────────── diff --git a/src/microbots/llm/copilot_api.py b/src/microbots/llm/copilot_api.py index a3de9910..33c9a6fb 100644 --- a/src/microbots/llm/copilot_api.py +++ b/src/microbots/llm/copilot_api.py @@ -8,6 +8,7 @@ from copilot import CopilotClient, PermissionHandler from copilot.types import SubprocessConfig from microbots.llm.llm import LLMAskResponse, LLMInterface +from microbots.utils.copilot_auth import get_copilot_token logger = getLogger(__name__) @@ -22,8 +23,8 @@ def __init__(self, system_prompt, model_name, max_retries=3, github_token=None): self.max_retries = max_retries self.retries = 0 - # Resolve GitHub token: explicit > GITHUB_TOKEN env var > logged-in user - self._github_token = github_token or os.environ.get("GITHUB_TOKEN") + # Resolve GitHub token: explicit > env var > ~/.copilot/config.json > SDK default + self._github_token = github_token or os.environ.get("GITHUB_TOKEN") or get_copilot_token() # Persistent event loop in a daemon thread for async-sync bridging. # The Copilot SDK is async-native; MicroBot's LLMInterface is sync. diff --git a/src/microbots/utils/copilot_auth.py b/src/microbots/utils/copilot_auth.py new file mode 100644 index 00000000..4d3aeebd --- /dev/null +++ b/src/microbots/utils/copilot_auth.py @@ -0,0 +1,40 @@ +"""Utility to read GitHub Copilot CLI credentials from ~/.copilot/config.json.""" + +import json +from logging import getLogger +from pathlib import Path +from typing import Optional + +logger = getLogger(__name__) + +COPILOT_CONFIG_PATH = Path.home() / ".copilot" / "config.json" + + +def get_copilot_token(config_path: Path = COPILOT_CONFIG_PATH) -> Optional[str]: + """Extract the OAuth token from the Copilot CLI config file. + + The Copilot CLI stores credentials in ``~/.copilot/config.json`` after + ``copilot auth login``. This function reads the first available token + from the ``copilot_tokens`` map. + + Returns ``None`` if the file doesn't exist or contains no tokens. + """ + if not config_path.is_file(): + logger.debug("Copilot config not found at %s", config_path) + return None + + try: + data = json.loads(config_path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError) as exc: + logger.warning("Failed to read Copilot config at %s: %s", config_path, exc) + return None + + tokens = data.get("copilot_tokens", {}) + if not tokens: + logger.debug("No copilot_tokens found in %s", config_path) + return None + + # Return the first available token + token = next(iter(tokens.values())) + logger.debug("Resolved Copilot token from %s", config_path) + return token diff --git a/test/swe-bench-test/run_swe_bench.py b/test/swe-bench-test/run_swe_bench.py index 5a1aefdd..ea6c01d8 100644 --- a/test/swe-bench-test/run_swe_bench.py +++ b/test/swe-bench-test/run_swe_bench.py @@ -13,8 +13,53 @@ from microbots import AgentBoss, CopilotBot +LOG_DIR = Path(__file__).parent.resolve() / "logs" +LOG_DIR.mkdir(parents=True, exist_ok=True) + +LOG_FORMAT = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s") + logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) +logger.setLevel(logging.DEBUG) + +# Console output (always active) +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) +console_handler.setFormatter(LOG_FORMAT) +logger.addHandler(console_handler) + +# Track per-instance file handlers so they can be swapped between test cases +_active_file_handlers: list[logging.Handler] = [] + + +def setup_instance_logging(instance_id: str): + """Create per-instance log directory and swap file handlers.""" + root = logging.getLogger() + + # Remove previous instance file handlers + for h in _active_file_handlers: + root.removeHandler(h) + h.close() + _active_file_handlers.clear() + + instance_log_dir = LOG_DIR / instance_id + instance_log_dir.mkdir(parents=True, exist_ok=True) + + # Info log file + info_handler = logging.FileHandler(instance_log_dir / "info.log") + info_handler.setLevel(logging.INFO) + info_handler.setFormatter(LOG_FORMAT) + + # Debug log file + debug_handler = logging.FileHandler(instance_log_dir / "debug.log") + debug_handler.setLevel(logging.DEBUG) + debug_handler.setFormatter(LOG_FORMAT) + + root.setLevel(logging.DEBUG) + root.addHandler(info_handler) + root.addHandler(debug_handler) + _active_file_handlers.extend([info_handler, debug_handler]) + + logger.info("Logging for instance %s -> %s", instance_id, instance_log_dir) # Verification method # `pip install swebench` @@ -96,7 +141,7 @@ def run_agent(dataset): def run_copilot_agent(dataset): bot = CopilotBot( - model="gpt-4.1", + model="gpt-5.4", folder_to_mount=str(TEST_DIR / dataset['instance_id']), permission="READ_WRITE", ) @@ -153,6 +198,7 @@ def test_swe_bench(): datasets = load_dataset(SWE_BENCH_SUITE, split="test") for instance in selected_dataset: + setup_instance_logging(instance) dataset = datasets.filter(lambda x: x['instance_id'] == instance)[0] logger.info(f"DATASET: {pprint(dataset)}") setup_test_directory(dataset) @@ -167,6 +213,7 @@ def test_swe_bench_copilot(): datasets = load_dataset(SWE_BENCH_SUITE, split="test") for instance in selected_dataset: + setup_instance_logging(instance) dataset = datasets.filter(lambda x: x['instance_id'] == instance)[0] logger.info(f"DATASET: {pprint(dataset)}") setup_test_directory(dataset) From 5528f6d836146cedf54ea3d64a08974b3f00a1f1 Mon Sep 17 00:00:00 2001 From: bala Date: Mon, 6 Apr 2026 10:17:34 +0000 Subject: [PATCH 06/12] Combine two for loops --- src/microbots/bot/CopilotBot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/microbots/bot/CopilotBot.py b/src/microbots/bot/CopilotBot.py index dd3c84aa..c31c4e1d 100644 --- a/src/microbots/bot/CopilotBot.py +++ b/src/microbots/bot/CopilotBot.py @@ -134,6 +134,8 @@ def __init__( self._create_environment() # ── Validate tools — ExternalTool is not supported ────────── + # __ And ___ + # ── Install additional tools inside the container ─────────── for tool in self.additional_tools: if isinstance(tool, ExternalTool): raise ValueError( @@ -142,8 +144,6 @@ def __init__( f"internal (container-side) tools are allowed." ) - # ── Install additional tools inside the container ─────────── - for tool in self.additional_tools: logger.info("🔧 Installing additional tool '%s'...", tool.name) tool.install_tool(self.environment) tool.verify_tool_installation(self.environment) From 27b1d9050c13350d551323f66fe96a53beb530e8 Mon Sep 17 00:00:00 2001 From: bala Date: Mon, 6 Apr 2026 16:38:55 +0000 Subject: [PATCH 07/12] Update byok for copilot sdk --- src/microbots/bot/CopilotBot.py | 231 ++++++++++++++++++++-- test/bot/test_copilot_bot.py | 328 +++++++++++++++++++++++++++++++- 2 files changed, 545 insertions(+), 14 deletions(-) diff --git a/src/microbots/bot/CopilotBot.py b/src/microbots/bot/CopilotBot.py index c31c4e1d..d6514edb 100644 --- a/src/microbots/bot/CopilotBot.py +++ b/src/microbots/bot/CopilotBot.py @@ -37,6 +37,7 @@ import os import time import threading +from collections.abc import Callable from logging import getLogger from typing import Optional @@ -65,6 +66,177 @@ # copilot-cli port inside the container _CONTAINER_CLI_PORT = 4321 +# Environment variable names for BYOK configuration +_BYOK_ENV_PROVIDER_TYPE = "COPILOT_BYOK_PROVIDER_TYPE" +_BYOK_ENV_BASE_URL = "COPILOT_BYOK_BASE_URL" +_BYOK_ENV_API_KEY = "COPILOT_BYOK_API_KEY" +_BYOK_ENV_BEARER_TOKEN = "COPILOT_BYOK_BEARER_TOKEN" +_BYOK_ENV_WIRE_API = "COPILOT_BYOK_WIRE_API" +_BYOK_ENV_AZURE_API_VERSION = "COPILOT_BYOK_AZURE_API_VERSION" +_BYOK_ENV_MODEL = "COPILOT_BYOK_MODEL" + + +def resolve_auth_config( + model: str = _DEFAULT_MODEL, + github_token: Optional[str] = None, + api_key: Optional[str] = None, + bearer_token: Optional[str] = None, + base_url: Optional[str] = None, + provider_type: Optional[str] = None, + wire_api: Optional[str] = None, + azure_api_version: Optional[str] = None, + token_provider: Optional[Callable[[], str]] = None, +) -> tuple[str, Optional[str], Optional[dict]]: + """Resolve authentication and provider configuration for CopilotBot. + + Determines whether to use BYOK (Bring Your Own Key) or native GitHub + Copilot authentication, and builds the appropriate provider config. + + Priority order: + 1. Explicit ``api_key`` or ``bearer_token`` with ``base_url`` → BYOK + 2. Environment variables (``COPILOT_BYOK_*``) → BYOK + 3. ``token_provider`` (e.g. Azure AD token provider) → BYOK with bearer token + 4. GitHub token → native Copilot authentication + + Parameters + ---------- + model : str + Model name (e.g. ``"gpt-4.1"``, ``"claude-sonnet-4.5"``). + github_token : Optional[str] + GitHub token for native Copilot auth. + api_key : Optional[str] + API key for BYOK provider. + bearer_token : Optional[str] + Bearer token for BYOK (takes precedence over ``api_key``). + base_url : Optional[str] + API endpoint URL for BYOK provider. + provider_type : Optional[str] + Provider type: ``"openai"``, ``"azure"``, or ``"anthropic"``. + wire_api : Optional[str] + API format: ``"completions"`` or ``"responses"``. + azure_api_version : Optional[str] + Azure API version (only for ``type: "azure"``). + token_provider : Optional[Callable[[], str]] + Callable that returns a bearer token string (e.g. Azure AD + token provider). The token is fetched once at config resolution + time. For long-running sessions, create a new session with a + refreshed token. + + Returns + ------- + tuple[str, Optional[str], Optional[dict]] + ``(model, github_token, provider_config)`` where + ``provider_config`` is *None* for native Copilot auth or a dict + suitable for the ``provider`` kwarg of ``create_session``. + + Raises + ------ + ValueError + If BYOK is requested but ``base_url`` is missing, or if + ``token_provider`` is not a valid callable. + """ + + # ── 1. Explicit api_key / bearer_token ─────────────────────────── + if api_key or bearer_token: + if not base_url: + raise ValueError( + "BYOK requires a base_url when api_key or bearer_token is provided." + ) + provider = _build_provider_config( + provider_type=provider_type or "openai", + base_url=base_url, + api_key=api_key, + bearer_token=bearer_token, + wire_api=wire_api, + azure_api_version=azure_api_version, + ) + logger.info("🔑 BYOK auth resolved via explicit credentials (type=%s)", provider["type"]) + return model, None, provider + + # ── 2. Environment variables ───────────────────────────────────── + env_base_url = os.environ.get(_BYOK_ENV_BASE_URL) + env_api_key = os.environ.get(_BYOK_ENV_API_KEY) + env_bearer_token = os.environ.get(_BYOK_ENV_BEARER_TOKEN) + + if env_base_url and (env_api_key or env_bearer_token): + env_model = os.environ.get(_BYOK_ENV_MODEL, model) + provider = _build_provider_config( + provider_type=os.environ.get(_BYOK_ENV_PROVIDER_TYPE, "openai"), + base_url=env_base_url, + api_key=env_api_key, + bearer_token=env_bearer_token, + wire_api=os.environ.get(_BYOK_ENV_WIRE_API), + azure_api_version=os.environ.get(_BYOK_ENV_AZURE_API_VERSION), + ) + logger.info("🔑 BYOK auth resolved via environment variables (type=%s)", provider["type"]) + return env_model, None, provider + + # ── 3. Token provider (e.g. Azure AD) ──────────────────────────── + if token_provider: + if not callable(token_provider): + raise ValueError("token_provider must be a callable that returns a string token.") + resolved_url = base_url or env_base_url + if not resolved_url: + raise ValueError( + "BYOK with token_provider requires a base_url (pass it directly " + "or set COPILOT_BYOK_BASE_URL)." + ) + try: + token = token_provider() + except Exception as e: + raise ValueError(f"token_provider failed during validation: {e}") from e + if not isinstance(token, str) or not token: + raise ValueError("token_provider must return a non-empty string token.") + + provider = _build_provider_config( + provider_type=provider_type or os.environ.get(_BYOK_ENV_PROVIDER_TYPE, "openai"), + base_url=resolved_url, + bearer_token=token, + wire_api=wire_api or os.environ.get(_BYOK_ENV_WIRE_API), + azure_api_version=azure_api_version or os.environ.get(_BYOK_ENV_AZURE_API_VERSION), + ) + logger.info("🔑 BYOK auth resolved via token_provider (type=%s)", provider["type"]) + return model, None, provider + + # ── 4. Native GitHub Copilot auth ──────────────────────────────── + resolved_github_token = ( + github_token + or os.environ.get("COPILOT_GITHUB_TOKEN") + or os.environ.get("GITHUB_TOKEN") + or os.environ.get("GH_TOKEN") + or get_copilot_token() + ) + logger.info("🔑 Using native GitHub Copilot authentication") + return model, resolved_github_token, None + + +def _build_provider_config( + provider_type: str, + base_url: str, + api_key: Optional[str] = None, + bearer_token: Optional[str] = None, + wire_api: Optional[str] = None, + azure_api_version: Optional[str] = None, +) -> dict: + """Build the ``provider`` dict accepted by ``create_session``.""" + config: dict = { + "type": provider_type, + "base_url": base_url, + } + # bearer_token takes precedence over api_key per SDK docs + if bearer_token: + config["bearer_token"] = bearer_token + elif api_key: + config["api_key"] = api_key + + if wire_api: + config["wire_api"] = wire_api + + if provider_type == "azure" and azure_api_version: + config["azure"] = {"api_version": azure_api_version} + + return config + class CopilotBot: """Wrapper around the GitHub Copilot SDK with a sandboxed Docker environment. @@ -91,7 +263,27 @@ class CopilotBot: and, where possible, they are registered as SDK custom tools. github_token : Optional[str] Explicit GitHub token. Falls back to ``GITHUB_TOKEN`` / - ``COPILOT_GITHUB_TOKEN`` env vars. + ``COPILOT_GITHUB_TOKEN`` env vars. Used only when BYOK is not + configured. + api_key : Optional[str] + API key for BYOK provider. When provided with ``base_url``, + bypasses GitHub Copilot auth and uses the key directly. + bearer_token : Optional[str] + Bearer token for BYOK provider. Takes precedence over ``api_key``. + base_url : Optional[str] + API endpoint URL for BYOK (e.g. + ``"https://api.openai.com/v1"``). + provider_type : Optional[str] + BYOK provider type: ``"openai"``, ``"azure"``, or + ``"anthropic"``. Defaults to ``"openai"``. + wire_api : Optional[str] + API format: ``"completions"`` (default) or ``"responses"`` + (for GPT-5 series). + azure_api_version : Optional[str] + Azure API version string (only for ``provider_type="azure"``). + token_provider : Optional[Callable[[], str]] + A callable returning a bearer token (e.g. Azure AD token + provider). Requires ``base_url``. """ def __init__( @@ -102,6 +294,13 @@ def __init__( environment: Optional[LocalDockerEnvironment] = None, additional_tools: Optional[list[ToolAbstract]] = None, github_token: Optional[str] = None, + api_key: Optional[str] = None, + bearer_token: Optional[str] = None, + base_url: Optional[str] = None, + provider_type: Optional[str] = None, + wire_api: Optional[str] = None, + azure_api_version: Optional[str] = None, + token_provider: Optional[Callable[[], str]] = None, ): try: from copilot import CopilotClient, ExternalServerConfig @@ -112,14 +311,19 @@ def __init__( "Install with: pip install microbots[ghcp]" ) - self.model = model self.additional_tools = additional_tools or [] - self.github_token = ( - github_token - or os.environ.get("COPILOT_GITHUB_TOKEN") - or os.environ.get("GITHUB_TOKEN") - or os.environ.get("GH_TOKEN") - or get_copilot_token() + + # ── Resolve auth: BYOK vs native GitHub Copilot ───────────── + self.model, self.github_token, self._provider_config = resolve_auth_config( + model=model, + github_token=github_token, + api_key=api_key, + bearer_token=bearer_token, + base_url=base_url, + provider_type=provider_type, + wire_api=wire_api, + azure_api_version=azure_api_version, + token_provider=token_provider, ) # ── Mount setup ───────────────────────────────────────────── @@ -321,8 +525,10 @@ def _start_copilot_cli_server(self): Authentication is handled via the GITHUB_TOKEN environment variable injected into the container. """ - # Inject the GitHub token into the container for authentication - if self.github_token: + # Inject the GitHub token into the container for native Copilot auth. + # When BYOK is active, authentication is handled via the provider + # config passed to create_session — no container-side token needed. + if self.github_token and not self._provider_config: self.environment.execute( f'export GITHUB_TOKEN="{self.github_token}"' ) @@ -398,10 +604,13 @@ async def _execute_session( }, } + if self._provider_config: + session_kwargs["provider"] = self._provider_config + if system_content: session_kwargs["system_message"] = {"content": system_content} - logger.info("📡 Creating Copilot session (model=%s, streaming=%s)", self.model, streaming) + logger.info("📡 Creating Copilot session (model=%s, streaming=%s, byok=%s)", self.model, streaming, self._provider_config is not None) logger.debug("Session kwargs: %s", session_kwargs) session = await self._client.create_session(**session_kwargs) diff --git a/test/bot/test_copilot_bot.py b/test/bot/test_copilot_bot.py index 2cff19b0..a11611ed 100644 --- a/test/bot/test_copilot_bot.py +++ b/test/bot/test_copilot_bot.py @@ -58,6 +58,20 @@ from microbots.MicroBot import BotRunResult +def _restore_real_copilot_modules(): + """Remove mock copilot modules from sys.modules and reload CopilotBot. + + This allows integration tests to use the real copilot SDK instead of + the mocks injected at module level for unit tests. + """ + mock_keys = [k for k in sys.modules if k == "copilot" or k.startswith("copilot.")] + for key in mock_keys: + del sys.modules[key] + # Also force CopilotBot to re-import the real SDK on next import + if "microbots.bot.CopilotBot" in sys.modules: + del sys.modules["microbots.bot.CopilotBot"] + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -68,9 +82,10 @@ def _copilot_cli_available(): def _copilot_sdk_installed(): try: - import copilot # noqa: F401 - return not isinstance(copilot, MagicMock) - except ImportError: + from importlib.metadata import version + version("github-copilot-sdk") + return True + except Exception: return False @@ -367,6 +382,248 @@ def test_install_cli_raises_on_failure(self, mock_environment): ) +# --------------------------------------------------------------------------- +# Unit tests — resolve_auth_config and BYOK +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestResolveAuthConfig: + """Tests for the standalone resolve_auth_config function.""" + + def test_explicit_api_key_returns_byok_provider(self): + from microbots.bot.CopilotBot import resolve_auth_config + + model, gh_token, provider = resolve_auth_config( + model="gpt-4.1", + api_key="sk-test-key", + base_url="https://api.openai.com/v1", + ) + assert model == "gpt-4.1" + assert gh_token is None + assert provider is not None + assert provider["type"] == "openai" + assert provider["base_url"] == "https://api.openai.com/v1" + assert provider["api_key"] == "sk-test-key" + assert "bearer_token" not in provider + + def test_explicit_bearer_token_takes_precedence_over_api_key(self): + from microbots.bot.CopilotBot import resolve_auth_config + + _, _, provider = resolve_auth_config( + model="gpt-4.1", + api_key="sk-key", + bearer_token="my-bearer", + base_url="https://api.openai.com/v1", + ) + assert provider["bearer_token"] == "my-bearer" + assert "api_key" not in provider + + def test_explicit_api_key_without_base_url_raises(self): + from microbots.bot.CopilotBot import resolve_auth_config + + with pytest.raises(ValueError, match="base_url"): + resolve_auth_config(model="gpt-4.1", api_key="sk-test") + + def test_azure_provider_type_includes_api_version(self): + from microbots.bot.CopilotBot import resolve_auth_config + + _, _, provider = resolve_auth_config( + model="gpt-4.1", + api_key="azure-key", + base_url="https://my-resource.openai.azure.com", + provider_type="azure", + azure_api_version="2024-10-21", + ) + assert provider["type"] == "azure" + assert provider["azure"] == {"api_version": "2024-10-21"} + + def test_wire_api_included_when_set(self): + from microbots.bot.CopilotBot import resolve_auth_config + + _, _, provider = resolve_auth_config( + model="gpt-5", + api_key="key", + base_url="https://endpoint.com/v1", + wire_api="responses", + ) + assert provider["wire_api"] == "responses" + + def test_env_vars_resolve_byok(self, monkeypatch): + from microbots.bot.CopilotBot import resolve_auth_config + + monkeypatch.setenv("COPILOT_BYOK_BASE_URL", "https://env-endpoint.com/v1") + monkeypatch.setenv("COPILOT_BYOK_API_KEY", "env-key") + monkeypatch.setenv("COPILOT_BYOK_PROVIDER_TYPE", "anthropic") + monkeypatch.setenv("COPILOT_BYOK_MODEL", "claude-sonnet-4.5") + + model, gh_token, provider = resolve_auth_config(model="gpt-4.1") + assert model == "claude-sonnet-4.5" + assert gh_token is None + assert provider["type"] == "anthropic" + assert provider["base_url"] == "https://env-endpoint.com/v1" + assert provider["api_key"] == "env-key" + + def test_env_vars_bearer_token(self, monkeypatch): + from microbots.bot.CopilotBot import resolve_auth_config + + monkeypatch.setenv("COPILOT_BYOK_BASE_URL", "https://endpoint.com/v1") + monkeypatch.setenv("COPILOT_BYOK_BEARER_TOKEN", "env-bearer") + + _, _, provider = resolve_auth_config(model="gpt-4.1") + assert provider["bearer_token"] == "env-bearer" + assert "api_key" not in provider + + def test_env_vars_ignored_when_explicit_key_provided(self, monkeypatch): + from microbots.bot.CopilotBot import resolve_auth_config + + monkeypatch.setenv("COPILOT_BYOK_BASE_URL", "https://env-endpoint.com/v1") + monkeypatch.setenv("COPILOT_BYOK_API_KEY", "env-key") + + _, _, provider = resolve_auth_config( + model="gpt-4.1", + api_key="explicit-key", + base_url="https://explicit.com/v1", + ) + assert provider["api_key"] == "explicit-key" + assert provider["base_url"] == "https://explicit.com/v1" + + def test_token_provider_returns_byok_with_bearer(self): + from microbots.bot.CopilotBot import resolve_auth_config + + _, _, provider = resolve_auth_config( + model="gpt-4.1", + base_url="https://azure.endpoint.com/v1", + token_provider=lambda: "ad-token-123", + ) + assert provider["bearer_token"] == "ad-token-123" + assert "api_key" not in provider + + def test_token_provider_without_base_url_raises(self): + from microbots.bot.CopilotBot import resolve_auth_config + + with pytest.raises(ValueError, match="base_url"): + resolve_auth_config( + model="gpt-4.1", + token_provider=lambda: "token", + ) + + def test_token_provider_not_callable_raises(self): + from microbots.bot.CopilotBot import resolve_auth_config + + with pytest.raises(ValueError, match="callable"): + resolve_auth_config( + model="gpt-4.1", + base_url="https://endpoint.com/v1", + token_provider="not-a-callable", + ) + + def test_token_provider_returning_empty_raises(self): + from microbots.bot.CopilotBot import resolve_auth_config + + with pytest.raises(ValueError, match="non-empty"): + resolve_auth_config( + model="gpt-4.1", + base_url="https://endpoint.com/v1", + token_provider=lambda: "", + ) + + def test_token_provider_exception_raises(self): + from microbots.bot.CopilotBot import resolve_auth_config + + def bad_provider(): + raise RuntimeError("auth failed") + + with pytest.raises(ValueError, match="auth failed"): + resolve_auth_config( + model="gpt-4.1", + base_url="https://endpoint.com/v1", + token_provider=bad_provider, + ) + + def test_fallback_to_github_token(self): + from microbots.bot.CopilotBot import resolve_auth_config + + model, gh_token, provider = resolve_auth_config( + model="gpt-4.1", + github_token="ghp_test123", + ) + assert model == "gpt-4.1" + assert gh_token == "ghp_test123" + assert provider is None + + def test_default_provider_type_is_openai(self): + from microbots.bot.CopilotBot import resolve_auth_config + + _, _, provider = resolve_auth_config( + model="m", api_key="k", base_url="https://x.com/v1" + ) + assert provider["type"] == "openai" + + def test_anthropic_provider_type(self): + from microbots.bot.CopilotBot import resolve_auth_config + + _, _, provider = resolve_auth_config( + model="claude-sonnet-4.5", + api_key="ant-key", + base_url="https://api.anthropic.com", + provider_type="anthropic", + ) + assert provider["type"] == "anthropic" + + +@pytest.mark.unit +class TestCopilotBotBYOKInit: + """Tests for CopilotBot initialisation with BYOK parameters.""" + + def test_byok_api_key_sets_provider_config(self, mock_environment, mock_copilot_client): + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + api_key="sk-byok-key", + base_url="https://api.openai.com/v1", + ) + assert bot._provider_config is not None + assert bot._provider_config["api_key"] == "sk-byok-key" + assert bot.github_token is None + bot.stop() + + def test_byok_token_provider_sets_provider_config(self, mock_environment, mock_copilot_client): + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + base_url="https://azure.endpoint.com/v1", + token_provider=lambda: "ad-token-xyz", + ) + assert bot._provider_config is not None + assert bot._provider_config["bearer_token"] == "ad-token-xyz" + assert bot.github_token is None + bot.stop() + + def test_native_auth_has_no_provider_config(self, copilot_bot): + assert copilot_bot._provider_config is None + assert copilot_bot.github_token == "ghp_test_token_123" + + # --------------------------------------------------------------------------- # Integration tests — require real Docker + copilot-cli + auth # --------------------------------------------------------------------------- @@ -397,6 +654,7 @@ class TestCopilotBotIntegration: def test_simple_task(self, test_repo, issue_1): """CopilotBot can fix a simple syntax error.""" + _restore_real_copilot_modules() from microbots.bot.CopilotBot import CopilotBot issue_text = issue_1[0] @@ -417,3 +675,67 @@ def test_simple_task(self, test_repo, issue_1): verify_function(test_repo) finally: bot.stop() + + +# --------------------------------------------------------------------------- +# BYOK helpers +# --------------------------------------------------------------------------- + +def _byok_openai_available(): + """Check if OpenAI BYOK credentials are configured via env vars.""" + return bool( + os.environ.get("OPEN_AI_KEY") + and os.environ.get("OPEN_AI_END_POINT") + ) + + +_skip_no_byok_openai = pytest.mark.skipif( + not _byok_openai_available(), + reason="OpenAI BYOK not configured (set OPEN_AI_KEY and OPEN_AI_END_POINT)", +) + + +@_skip_no_copilot_cli +@_skip_no_copilot_sdk +@_skip_no_byok_openai +@pytest.mark.integration +@pytest.mark.slow +class TestCopilotBotBYOKOpenAIIntegration: + """End-to-end integration tests for CopilotBot with OpenAI BYOK.""" + + def test_byok_openai_simple_task(self, test_repo, issue_1): + """CopilotBot can fix a simple syntax error using OpenAI BYOK credentials.""" + _restore_real_copilot_modules() + from microbots.bot.CopilotBot import CopilotBot + + issue_text = issue_1[0] + verify_function = issue_1[1] + + api_key = os.environ["OPEN_AI_KEY"] + base_url = os.environ["OPEN_AI_END_POINT"] + model = os.getenv( + "AZURE_OPENAI_DEPLOYMENT_NAME", "mini-swe-agent-gpt5" + ) + + bot = CopilotBot( + model=model, + folder_to_mount=str(test_repo), + permission="READ_WRITE", + api_key=api_key, + base_url=base_url, + provider_type="openai", + ) + + try: + assert bot._provider_config is not None + assert bot._provider_config["type"] == "openai" + assert bot.github_token is None + + result = bot.run( + issue_text, + timeout_in_seconds=300, + ) + assert result.status is True, f"CopilotBot BYOK run failed: {result.error}" + verify_function(test_repo) + finally: + bot.stop() From 536cfa64a97a9a2645c646454a08150b9e73d27d Mon Sep 17 00:00:00 2001 From: bala Date: Mon, 6 Apr 2026 16:46:54 +0000 Subject: [PATCH 08/12] Update optional dependency section and remove copilot_api and test_copilot_api --- pyproject.toml | 4 +- src/microbots/llm/copilot_api.py | 124 ------------------- test/llm/test_copilot_api.py | 196 ------------------------------- 3 files changed, 1 insertion(+), 323 deletions(-) delete mode 100644 src/microbots/llm/copilot_api.py delete mode 100644 test/llm/test_copilot_api.py diff --git a/pyproject.toml b/pyproject.toml index ea425fa7..fd5f0efd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ requires-python = ">=3.11" [project.optional-dependencies] ghcp = ["github-copilot-sdk"] +azure_ad = ["azure-identity>=1.15.0"] [tool.setuptools.dynamic] dependencies = { file = ["requirements.txt"] } @@ -33,9 +34,6 @@ dependencies = { file = ["requirements.txt"] } [tool.setuptools.packages.find] where = ["src"] -[project.optional-dependencies] -azure_ad = ["azure-identity>=1.15.0"] - [project.urls] "Source Repo" = "https://github.com/microsoft/minions" Issues = "https://github.com/microsoft/minions/issues" \ No newline at end of file diff --git a/src/microbots/llm/copilot_api.py b/src/microbots/llm/copilot_api.py deleted file mode 100644 index 33c9a6fb..00000000 --- a/src/microbots/llm/copilot_api.py +++ /dev/null @@ -1,124 +0,0 @@ -import asyncio -import json -import os -import threading -from dataclasses import asdict -from logging import getLogger - -from copilot import CopilotClient, PermissionHandler -from copilot.types import SubprocessConfig -from microbots.llm.llm import LLMAskResponse, LLMInterface -from microbots.utils.copilot_auth import get_copilot_token - -logger = getLogger(__name__) - - -class CopilotApi(LLMInterface): - - def __init__(self, system_prompt, model_name, max_retries=3, github_token=None): - self.system_prompt = system_prompt - self.model_name = model_name - self.messages = [] - - self.max_retries = max_retries - self.retries = 0 - - # Resolve GitHub token: explicit > env var > ~/.copilot/config.json > SDK default - self._github_token = github_token or os.environ.get("GITHUB_TOKEN") or get_copilot_token() - - # Persistent event loop in a daemon thread for async-sync bridging. - # The Copilot SDK is async-native; MicroBot's LLMInterface is sync. - self._loop = asyncio.new_event_loop() - self._thread = threading.Thread( - target=self._loop.run_forever, daemon=True - ) - self._thread.start() - self._closed = False - - config = SubprocessConfig(github_token=self._github_token) if self._github_token else SubprocessConfig() - self._client = CopilotClient(config) - self._session = None - self._run_async(self._start()) - - async def _start(self): - await self._client.start() - await self._create_session() - - async def _create_session(self): - self._session = await self._client.create_session( - model=self.model_name, - on_permission_request=PermissionHandler.approve_all, - system_message={"content": self.system_prompt}, - infinite_sessions={"enabled": False}, - ) - - def _run_async(self, coro): - """Submit an async coroutine to the background loop and block until done.""" - future = asyncio.run_coroutine_threadsafe(coro, self._loop) - return future.result() - - async def _send_and_collect(self, message): - """Send a message and wait for the assistant's complete response.""" - response_event = await self._session.send_and_wait(message, timeout=300.0) - if response_event and response_event.data and response_event.data.content: - return response_event.data.content - return "" - - def ask(self, message) -> LLMAskResponse: - self.retries = 0 - - self.messages.append({"role": "user", "content": message}) - - valid = False - while not valid: - response_text = self._run_async(self._send_and_collect(message)) - logger.debug( - "Raw Copilot response (first 500 chars): %s", - response_text[:500], - ) - - # Try to extract JSON if wrapped in markdown code blocks - import re - json_match = re.search( - r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL - ) - if json_match: - response_text = json_match.group(1) - - valid, askResponse = self._validate_llm_response( - response=response_text - ) - - self.messages.append( - {"role": "assistant", "content": json.dumps(asdict(askResponse))} - ) - return askResponse - - def clear_history(self): - self.messages = [] - self._run_async(self._recreate_session()) - return True - - async def _recreate_session(self): - if self._session: - await self._session.disconnect() - await self._create_session() - - def close(self): - """Stop the Copilot client and shut down the background event loop.""" - if self._closed: - return - self._closed = True - try: - if self._session: - self._run_async(self._session.disconnect()) - self._run_async(self._client.stop()) - finally: - self._loop.call_soon_threadsafe(self._loop.stop) - self._thread.join(timeout=5) - - def __del__(self): - try: - self.close() - except Exception: - pass diff --git a/test/llm/test_copilot_api.py b/test/llm/test_copilot_api.py deleted file mode 100644 index d268771d..00000000 --- a/test/llm/test_copilot_api.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -Unit tests for CopilotApi class -""" -import pytest -import json -import sys -import os -import asyncio -import importlib -from unittest.mock import Mock, patch, MagicMock, AsyncMock -from dataclasses import asdict - -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) - -# Mock the copilot module before importing CopilotApi, since github-copilot-sdk -# is an optional dependency that won't be installed in the test environment. -_mock_copilot = MagicMock() -_mock_copilot.PermissionHandler = MagicMock() -_mock_copilot.PermissionHandler.approve_all = MagicMock() -_mock_copilot.CopilotClient = MagicMock -_mock_copilot_types = MagicMock() -_mock_copilot_types.SubprocessConfig = MagicMock -sys.modules["copilot"] = _mock_copilot -sys.modules["copilot.types"] = _mock_copilot_types - -# Now safe to import — the module-level `from copilot import ...` will resolve -# against our mock. -if "microbots.llm.copilot_api" in sys.modules: - importlib.reload(sys.modules["microbots.llm.copilot_api"]) -from microbots.llm.copilot_api import CopilotApi -from microbots.llm.llm import LLMAskResponse, LLMInterface, llm_output_format_str - - -@pytest.fixture -def mock_copilot_session(): - """Create a mock Copilot session with send_and_wait.""" - session = AsyncMock() - session.disconnect = AsyncMock() - - # Default response from send_and_wait - default_response = Mock() - default_response.data.content = json.dumps({ - "task_done": False, - "thoughts": "Thinking about the task", - "command": "ls -la" - }) - session.send_and_wait = AsyncMock(return_value=default_response) - return session - - -@pytest.fixture -def mock_copilot_client(mock_copilot_session): - """Create a mock CopilotClient.""" - client = AsyncMock() - client.start = AsyncMock() - client.stop = AsyncMock() - client.create_session = AsyncMock(return_value=mock_copilot_session) - return client - - -@pytest.fixture -def copilot_api(mock_copilot_client): - """Create a CopilotApi instance with mocked client.""" - with patch("microbots.llm.copilot_api.CopilotClient", return_value=mock_copilot_client): - api = CopilotApi( - system_prompt="You are a test assistant", - model_name="gpt-4.1", - ) - yield api - api.close() - - -@pytest.mark.unit -class TestCopilotApiInitialization: - """Tests for CopilotApi initialization.""" - - def test_init_stores_params(self, copilot_api): - assert copilot_api.system_prompt == "You are a test assistant" - assert copilot_api.model_name == "gpt-4.1" - assert copilot_api.max_retries == 3 - assert copilot_api.retries == 0 - assert copilot_api.messages == [] - - def test_init_starts_client_and_creates_session(self, mock_copilot_client): - with patch("microbots.llm.copilot_api.CopilotClient", return_value=mock_copilot_client): - api = CopilotApi(system_prompt="test", model_name="gpt-4.1") - try: - mock_copilot_client.start.assert_awaited_once() - mock_copilot_client.create_session.assert_awaited_once() - - call_kwargs = mock_copilot_client.create_session.call_args[1] - assert call_kwargs["model"] == "gpt-4.1" - assert call_kwargs["infinite_sessions"] == {"enabled": False} - assert "system_message" in call_kwargs - assert call_kwargs["system_message"]["content"] == "test" - finally: - api.close() - - def test_implements_llm_interface(self, copilot_api): - assert isinstance(copilot_api, LLMInterface) - - -@pytest.mark.unit -class TestCopilotApiAsk: - """Tests for CopilotApi.ask() method.""" - - def test_ask_returns_valid_response(self, copilot_api): - response = copilot_api.ask("What files are in the directory?") - - assert isinstance(response, LLMAskResponse) - assert response.task_done is False - assert response.thoughts == "Thinking about the task" - assert response.command == "ls -la" - - def test_ask_appends_to_messages(self, copilot_api): - copilot_api.ask("test message") - - assert len(copilot_api.messages) == 2 - assert copilot_api.messages[0]["role"] == "user" - assert copilot_api.messages[0]["content"] == "test message" - assert copilot_api.messages[1]["role"] == "assistant" - - def test_ask_handles_task_done(self, copilot_api, mock_copilot_session): - """Test ask when LLM signals task completion.""" - done_response = Mock() - done_response.data.content = json.dumps({ - "task_done": True, - "thoughts": "Task is complete", - "command": "" - }) - mock_copilot_session.send_and_wait = AsyncMock(return_value=done_response) - - response = copilot_api.ask("done?") - assert response.task_done is True - assert response.command == "" - - def test_ask_handles_markdown_wrapped_json(self, copilot_api, mock_copilot_session): - """Test that JSON wrapped in markdown code blocks is extracted.""" - md_response = Mock() - md_response.data.content = '```json\n{"task_done": false, "thoughts": "extracted", "command": "pwd"}\n```' - mock_copilot_session.send_and_wait = AsyncMock(return_value=md_response) - - response = copilot_api.ask("test") - assert response.thoughts == "extracted" - assert response.command == "pwd" - - -@pytest.mark.unit -class TestCopilotApiClearHistory: - """Tests for CopilotApi.clear_history() method.""" - - def test_clear_history_resets_messages(self, copilot_api): - copilot_api.messages = [{"role": "user", "content": "test"}] - result = copilot_api.clear_history() - - assert result is True - assert copilot_api.messages == [] - - def test_clear_history_recreates_session(self, copilot_api, mock_copilot_session, mock_copilot_client): - copilot_api.clear_history() - - mock_copilot_session.disconnect.assert_awaited() - # create_session called once at init, once on clear_history - assert mock_copilot_client.create_session.await_count == 2 - - -@pytest.mark.unit -class TestCopilotApiClose: - """Tests for CopilotApi.close() method.""" - - def test_close_stops_client(self, mock_copilot_client, mock_copilot_session): - with patch("microbots.llm.copilot_api.CopilotClient", return_value=mock_copilot_client): - api = CopilotApi(system_prompt="test", model_name="gpt-4.1") - api.close() - - mock_copilot_session.disconnect.assert_awaited() - mock_copilot_client.stop.assert_awaited() - - -@pytest.mark.unit -class TestCopilotApiImportError: - """Test that a helpful error is raised when ghcp extra is not installed.""" - - def test_microbot_raises_helpful_error_without_ghcp(self): - """MicroBot._create_llm() should raise ValueError when copilot SDK is missing.""" - from microbots.constants import ModelProvider - - with patch("microbots.MicroBot.ModelProvider", ModelProvider): - # Simulate ImportError when trying to import CopilotApi - with patch.dict("sys.modules", {"microbots.llm.copilot_api": None}): - from microbots.MicroBot import MicroBot - with pytest.raises(ValueError, match="pip install microbots\\[ghcp\\]"): - MicroBot( - model="github-copilot/gpt-4.1", - system_prompt="test", - ) From 0ace21896bf2c22f7d9022b1ed050d867c631a0c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Apr 2026 10:05:38 +0000 Subject: [PATCH 09/12] Add unit tests for uncovered lines in CopilotBot and copilot_auth Agent-Logs-Url: https://github.com/microsoft/microbots/sessions/9a80811f-626c-49f6-8ef8-a764f2f692da Co-authored-by: 0xba1a <2942888+0xba1a@users.noreply.github.com> --- test/bot/test_copilot_auth.py | 67 ++++ test/bot/test_copilot_bot.py | 700 ++++++++++++++++++++++++++++++++++ 2 files changed, 767 insertions(+) create mode 100644 test/bot/test_copilot_auth.py diff --git a/test/bot/test_copilot_auth.py b/test/bot/test_copilot_auth.py new file mode 100644 index 00000000..57dd8714 --- /dev/null +++ b/test/bot/test_copilot_auth.py @@ -0,0 +1,67 @@ +"""Unit tests for microbots.utils.copilot_auth.get_copilot_token.""" + +import json +import os +import sys +from pathlib import Path + +import pytest + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) +) + +from microbots.utils.copilot_auth import get_copilot_token + + +# --------------------------------------------------------------------------- +# Unit tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestGetCopilotToken: + """Tests for get_copilot_token().""" + + def test_returns_none_when_file_missing(self, tmp_path): + """Returns None when the config file does not exist.""" + missing = tmp_path / "nonexistent.json" + assert get_copilot_token(config_path=missing) is None + + def test_returns_none_on_invalid_json(self, tmp_path): + """Returns None and logs a warning when the file contains invalid JSON.""" + bad_file = tmp_path / "config.json" + bad_file.write_text("this is not json", encoding="utf-8") + assert get_copilot_token(config_path=bad_file) is None + + def test_returns_none_when_no_copilot_tokens_key(self, tmp_path): + """Returns None when the JSON has no 'copilot_tokens' key.""" + cfg = tmp_path / "config.json" + cfg.write_text(json.dumps({"other_key": "value"}), encoding="utf-8") + assert get_copilot_token(config_path=cfg) is None + + def test_returns_none_when_copilot_tokens_empty(self, tmp_path): + """Returns None when 'copilot_tokens' is an empty dict.""" + cfg = tmp_path / "config.json" + cfg.write_text(json.dumps({"copilot_tokens": {}}), encoding="utf-8") + assert get_copilot_token(config_path=cfg) is None + + def test_returns_first_token(self, tmp_path): + """Returns the first token value from 'copilot_tokens'.""" + cfg = tmp_path / "config.json" + cfg.write_text( + json.dumps({"copilot_tokens": {"host1": "token-abc", "host2": "token-xyz"}}), + encoding="utf-8", + ) + token = get_copilot_token(config_path=cfg) + assert token == "token-abc" + + def test_returns_none_on_os_error(self, tmp_path): + """Returns None when the file cannot be read (OSError).""" + cfg = tmp_path / "config.json" + cfg.write_text("{}", encoding="utf-8") + cfg.chmod(0o000) # remove read permission + try: + result = get_copilot_token(config_path=cfg) + assert result is None + finally: + cfg.chmod(0o644) # restore permissions for cleanup diff --git a/test/bot/test_copilot_bot.py b/test/bot/test_copilot_bot.py index a11611ed..5804d4cf 100644 --- a/test/bot/test_copilot_bot.py +++ b/test/bot/test_copilot_bot.py @@ -624,6 +624,706 @@ def test_native_auth_has_no_provider_config(self, copilot_bot): assert copilot_bot.github_token == "ghp_test_token_123" +# --------------------------------------------------------------------------- +# Helper context manager shared by several new test classes +# --------------------------------------------------------------------------- + +def _standard_init_patches(mock_environment, mock_copilot_client): + """Return a combined context manager for standard CopilotBot init patches.""" + return ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ) + + +# --------------------------------------------------------------------------- +# Unit tests — __init__ with folder_to_mount and auto-created environment +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotInitFolderMount: + """Tests for __init__ paths: folder_to_mount string and auto environment.""" + + def test_folder_to_mount_creates_mount_object(self, mock_environment, mock_copilot_client): + """When folder_to_mount string is provided, a Mount is stored.""" + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + folder_to_mount="/tmp/test_repo", + ) + assert bot.folder_to_mount is not None + assert "test_repo" in bot.folder_to_mount.sandbox_path + bot.stop() + + def test_auto_creates_environment_when_none(self, mock_environment, mock_copilot_client): + """When environment=None, LocalDockerEnvironment is instantiated.""" + mock_environment.get_ipv4_address = MagicMock(return_value="172.17.0.2") + with ( + patch( + "microbots.bot.CopilotBot.LocalDockerEnvironment", + return_value=mock_environment, + ) as mock_lde, + patch("microbots.bot.CopilotBot.get_free_port", return_value=9000), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + github_token="ghp_test", + ) + mock_lde.assert_called_once() + assert bot.environment is mock_environment + bot.stop() + + +# --------------------------------------------------------------------------- +# Unit tests — non-ExternalTool installation in __init__ +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotNonExternalToolInstall: + """Tests that non-ExternalTool tools are installed during __init__.""" + + def test_non_external_tool_install_and_verify_called( + self, mock_environment, mock_copilot_client + ): + """install_tool and verify_tool_installation are called for regular tools.""" + from microbots.tools.tool import ToolAbstract + + mock_tool = MagicMock(spec=ToolAbstract) + mock_tool.name = "my_tool" + mock_tool.usage_instructions_to_llm = "Use my_tool" + + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + additional_tools=[mock_tool], + ) + mock_tool.install_tool.assert_called_once_with(mock_environment) + mock_tool.verify_tool_installation.assert_called_once_with(mock_environment) + bot.stop() + + +# --------------------------------------------------------------------------- +# Unit tests — _install_copilot_cli verification failure +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotCLIVerification: + """Tests that copilot-cli verification failure raises RuntimeError.""" + + def test_install_cli_verify_fails_raises(self, mock_environment): + """RuntimeError raised when install commands succeed but 'copilot --version' fails.""" + from microbots.bot.CopilotBot import CopilotBot + + success_ret = MagicMock() + success_ret.return_code = 0 + success_ret.stdout = "" + success_ret.stderr = "" + + fail_ret = MagicMock() + fail_ret.return_code = 1 + fail_ret.stdout = "" + fail_ret.stderr = "command not found: copilot" + + def side_effect(cmd, **kwargs): + if "copilot --version" in cmd: + return fail_ret + return success_ret + + mock_environment.execute = MagicMock(side_effect=side_effect) + + with ( + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=AsyncMock()), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + with pytest.raises(RuntimeError, match="verification failed"): + CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + + +# --------------------------------------------------------------------------- +# Unit tests — _start_copilot_cli_server +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotStartServer: + """Tests for _start_copilot_cli_server().""" + + def _make_bot_for_server_test(self, mock_environment, mock_copilot_client, github_token=None, provider_config=None): + """Build a CopilotBot with _start_copilot_cli_server NOT patched.""" + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + if github_token: + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token=github_token, + ) + elif provider_config: + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + api_key="sk-key", + base_url="https://api.openai.com/v1", + ) + else: + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + ) + return bot + + def test_start_server_injects_github_token(self, mock_environment, mock_copilot_client): + """Token injection calls when github_token is set without provider_config.""" + bot = self._make_bot_for_server_test( + mock_environment, mock_copilot_client, github_token="ghp_server_test" + ) + execute_args = [str(c) for c in mock_environment.execute.call_args_list] + assert any("GITHUB_TOKEN" in a for a in execute_args) + assert any("COPILOT_GITHUB_TOKEN" in a for a in execute_args) + bot.stop() + + def test_start_server_skips_token_injection_for_byok( + self, mock_environment, mock_copilot_client + ): + """No token injection when BYOK provider_config is active.""" + bot = self._make_bot_for_server_test( + mock_environment, mock_copilot_client, provider_config=True + ) + execute_args = [str(c) for c in mock_environment.execute.call_args_list] + assert not any("GITHUB_TOKEN" in a for a in execute_args) + bot.stop() + + def test_start_server_raises_on_execute_failure(self, mock_environment, mock_copilot_client): + """RuntimeError raised when start_cmd execute fails.""" + fail_ret = MagicMock() + fail_ret.return_code = 1 + fail_ret.stderr = "failed to start" + + success_ret = MagicMock() + success_ret.return_code = 0 + success_ret.stdout = "" + success_ret.stderr = "" + + def side_effect(cmd, **kwargs): + if "copilot --headless" in cmd: + return fail_ret + return success_ret + + mock_environment.execute = MagicMock(side_effect=side_effect) + + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + with pytest.raises(RuntimeError, match="Failed to start copilot-cli server"): + CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + + +# --------------------------------------------------------------------------- +# Unit tests — _wait_for_cli_ready +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotWaitReady: + """Tests for _wait_for_cli_ready() called directly on a minimal instance.""" + + def _make_minimal_bot(self): + """Return a bare CopilotBot instance with only environment set.""" + from microbots.bot.CopilotBot import CopilotBot + + bot = object.__new__(CopilotBot) + mock_env = MagicMock() + mock_env.get_ipv4_address = MagicMock(return_value="127.0.0.1") + bot.environment = mock_env + return bot + + def test_wait_for_cli_ready_success(self): + """Returns immediately when socket connection succeeds.""" + bot = self._make_minimal_bot() + mock_sock = MagicMock() + with patch("socket.create_connection", return_value=mock_sock) as mock_conn: + bot._wait_for_cli_ready() + mock_conn.assert_called_once() + mock_sock.close.assert_called_once() + + def test_wait_for_cli_ready_timeout(self): + """Raises TimeoutError when connections always fail past the deadline.""" + bot = self._make_minimal_bot() + with ( + patch("socket.create_connection", side_effect=ConnectionRefusedError()), + patch("microbots.bot.CopilotBot.time") as mock_time, + ): + # First call sets deadline (0 + _CLI_STARTUP_TIMEOUT), second exceeds it + mock_time.time.side_effect = [0, 9999] + mock_time.sleep = MagicMock() + with pytest.raises(TimeoutError, match="copilot-cli did not become ready"): + bot._wait_for_cli_ready() + + def test_wait_for_cli_ready_oserror_retries(self): + """OSError is caught and retried like ConnectionRefusedError.""" + bot = self._make_minimal_bot() + mock_sock = MagicMock() + # First attempt raises OSError, second attempt succeeds + with patch("socket.create_connection", side_effect=[OSError("network error"), mock_sock]): + with patch("microbots.bot.CopilotBot.time") as mock_time: + mock_time.time.side_effect = [0, 1, 2] + mock_time.sleep = MagicMock() + bot._wait_for_cli_ready() + mock_sock.close.assert_called_once() + + +# --------------------------------------------------------------------------- +# Unit tests — run() with additional_mounts +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotRunMounts: + """Tests for run() with additional_mounts parameter.""" + + def test_run_with_additional_mounts_calls_mount_additional(self, copilot_bot): + """_mount_additional is called for each mount in additional_mounts.""" + from microbots.extras.mount import Mount, MountType + + mock_mount = MagicMock(spec=Mount) + mock_mount.mount_type = MountType.COPY + mock_mount.host_path_info = MagicMock() + mock_mount.host_path_info.abs_path = "/tmp/extra" + mock_mount.sandbox_path = "/workdir/extra" + + copilot_bot.environment.copy_to_container = MagicMock(return_value=True) + + with patch.object(copilot_bot, "_mount_additional") as mock_ma: + copilot_bot.run("test task", additional_mounts=[mock_mount]) + mock_ma.assert_called_once_with(mock_mount) + + +# --------------------------------------------------------------------------- +# Unit tests — _execute_session +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotExecuteSession: + """Tests for _execute_session() paths.""" + + def test_execute_session_includes_provider_config(self, mock_environment, mock_copilot_client): + """provider is added to session kwargs when _provider_config is set.""" + import asyncio + + session = AsyncMock() + response = Mock() + response.data = Mock() + response.data.content = "done" + session.send_and_wait = AsyncMock(return_value=response) + session.on = MagicMock() + session.disconnect = AsyncMock() + mock_copilot_client.create_session = AsyncMock(return_value=session) + + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + api_key="sk-key", + base_url="https://api.openai.com/v1", + ) + result = asyncio.run( + bot._execute_session( + task="do something", + system_content="", + timeout=30, + streaming=False, + ) + ) + _, call_kwargs = mock_copilot_client.create_session.call_args + assert "provider" in call_kwargs + assert result == "done" + bot.stop() + + def test_execute_session_includes_system_message(self, mock_environment, mock_copilot_client): + """system_message is added to session kwargs when system_content is non-empty.""" + import asyncio + + session = AsyncMock() + response = Mock() + response.data = Mock() + response.data.content = "done" + session.send_and_wait = AsyncMock(return_value=response) + session.on = MagicMock() + session.disconnect = AsyncMock() + mock_copilot_client.create_session = AsyncMock(return_value=session) + + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + asyncio.run( + bot._execute_session( + task="do something", + system_content="You are a helper.", + timeout=30, + streaming=False, + ) + ) + _, call_kwargs = mock_copilot_client.create_session.call_args + assert "system_message" in call_kwargs + assert call_kwargs["system_message"]["content"] == "You are a helper." + bot.stop() + + def test_execute_session_returns_collected_event_text( + self, mock_environment, mock_copilot_client + ): + """Returns last collected text when send_and_wait returns no content.""" + import asyncio + + session = AsyncMock() + # send_and_wait returns response with no content + empty_response = Mock() + empty_response.data = Mock() + empty_response.data.content = "" + session.send_and_wait = AsyncMock(return_value=empty_response) + session.disconnect = AsyncMock() + + captured = [] + + def capture_on(callback): + captured.append(callback) + + session.on = MagicMock(side_effect=capture_on) + mock_copilot_client.create_session = AsyncMock(return_value=session) + + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + from copilot.generated.session_events import SessionEventType + + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + + # Simulate an ASSISTANT_MESSAGE event arriving before send_and_wait returns + async def _send_and_wait_with_event(task, timeout): + if captured: + msg_event = Mock() + msg_event.type = SessionEventType.ASSISTANT_MESSAGE + msg_event.data = Mock() + msg_event.data.content = "from event" + captured[0](msg_event) + return empty_response + + session.send_and_wait = _send_and_wait_with_event + + result = asyncio.run( + bot._execute_session( + task="do something", + system_content="", + timeout=30, + streaming=False, + ) + ) + assert result == "from event" + bot.stop() + + def test_execute_session_returns_fallback_when_no_content( + self, mock_environment, mock_copilot_client + ): + """Returns fallback message when no text is collected at all.""" + import asyncio + + session = AsyncMock() + empty_response = Mock() + empty_response.data = Mock() + empty_response.data.content = "" + session.send_and_wait = AsyncMock(return_value=empty_response) + session.on = MagicMock() + session.disconnect = AsyncMock() + mock_copilot_client.create_session = AsyncMock(return_value=session) + + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + result = asyncio.run( + bot._execute_session( + task="do something", + system_content="", + timeout=1, + streaming=False, + ) + ) + assert "without producing" in result + bot.stop() + + def test_execute_session_on_event_handlers(self, mock_environment, mock_copilot_client): + """_on_event handles ASSISTANT_MESSAGE_DELTA, SESSION_IDLE, and unknown types.""" + import asyncio + + session = AsyncMock() + empty_response = Mock() + empty_response.data = Mock() + empty_response.data.content = "" + session.disconnect = AsyncMock() + + captured = [] + + def capture_on(callback): + captured.append(callback) + + session.on = MagicMock(side_effect=capture_on) + + async def _send_and_wait_with_events(task, timeout): + if captured: + cb = captured[0] + # ASSISTANT_MESSAGE_DELTA with delta_content + delta_event = Mock() + delta_event.type = "assistant.message_delta" + delta_event.data = Mock() + delta_event.data.delta_content = "partial" + cb(delta_event) + + # SESSION_IDLE + idle_event = Mock() + idle_event.type = "session.idle" + cb(idle_event) + + # Unknown event type + unknown_event = Mock() + unknown_event.type = "some.other.event" + cb(unknown_event) + + # ASSISTANT_MESSAGE with no content (data.content is empty) + msg_empty = Mock() + msg_empty.type = "assistant.message" + msg_empty.data = Mock() + msg_empty.data.content = "" + cb(msg_empty) + + return empty_response + + session.send_and_wait = _send_and_wait_with_events + mock_copilot_client.create_session = AsyncMock(return_value=session) + + with ( + patch("microbots.bot.CopilotBot.LocalDockerEnvironment", return_value=mock_environment), + patch("microbots.bot.CopilotBot.get_free_port", side_effect=[9000]), + patch("microbots.bot.CopilotBot.CopilotBot._install_copilot_cli"), + patch("microbots.bot.CopilotBot.CopilotBot._start_copilot_cli_server"), + patch("microbots.bot.CopilotBot.CopilotBot._wait_for_cli_ready"), + patch("copilot.CopilotClient", return_value=mock_copilot_client), + patch("copilot.ExternalServerConfig", return_value=MagicMock()), + ): + from microbots.bot.CopilotBot import CopilotBot + bot = CopilotBot( + model="gpt-4.1", + environment=mock_environment, + github_token="ghp_test", + ) + # Should not raise — covers all branches of _on_event + asyncio.run( + bot._execute_session( + task="do something", + system_content="", + timeout=5, + streaming=False, + ) + ) + bot.stop() + + +# --------------------------------------------------------------------------- +# Unit tests — _on_pre_tool_use and _on_post_tool_use hooks +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotToolUseHooks: + """Tests for _on_pre_tool_use() and _on_post_tool_use() async hooks.""" + + def test_on_pre_tool_use_returns_allow(self, copilot_bot): + import asyncio + + result = asyncio.run( + copilot_bot._on_pre_tool_use( + {"toolName": "bash", "toolArgs": {"command": "ls"}}, + None, + ) + ) + assert result == {"permissionDecision": "allow"} + + def test_on_pre_tool_use_missing_keys(self, copilot_bot): + import asyncio + + result = asyncio.run(copilot_bot._on_pre_tool_use({}, None)) + assert result == {"permissionDecision": "allow"} + + def test_on_post_tool_use_returns_empty_dict(self, copilot_bot): + import asyncio + + result = asyncio.run( + copilot_bot._on_post_tool_use( + {"toolName": "bash", "toolResult": "output here"}, + None, + ) + ) + assert result == {} + + def test_on_post_tool_use_truncates_long_result(self, copilot_bot): + import asyncio + + long_result = "x" * 600 + # Should not raise even with a very long result string + result = asyncio.run( + copilot_bot._on_post_tool_use( + {"toolName": "bash", "toolResult": long_result}, + None, + ) + ) + assert result == {} + + def test_on_post_tool_use_missing_keys(self, copilot_bot): + import asyncio + + result = asyncio.run(copilot_bot._on_post_tool_use({}, None)) + assert result == {} + + +# --------------------------------------------------------------------------- +# Unit tests — _mount_additional +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestCopilotBotMountAdditional: + """Tests for _mount_additional().""" + + def test_mount_additional_non_copy_raises(self, copilot_bot): + """ValueError raised for non-COPY mount type.""" + from microbots.extras.mount import Mount, MountType + + mock_mount = MagicMock() + mock_mount.mount_type = MountType.MOUNT # not COPY + with pytest.raises(ValueError, match="Only COPY mount type"): + copilot_bot._mount_additional(mock_mount) + + def test_mount_additional_copy_fails_raises(self, copilot_bot): + """ValueError raised when copy_to_container returns False.""" + from microbots.extras.mount import MountType + + mock_mount = MagicMock() + mock_mount.mount_type = MountType.COPY + mock_mount.host_path_info = MagicMock() + mock_mount.host_path_info.abs_path = "/host/path" + mock_mount.sandbox_path = "/workdir/path" + + copilot_bot.environment.copy_to_container = MagicMock(return_value=False) + with pytest.raises(ValueError, match="Failed to copy additional mount"): + copilot_bot._mount_additional(mock_mount) + + def test_mount_additional_copy_succeeds(self, copilot_bot): + """No error raised when copy_to_container succeeds.""" + from microbots.extras.mount import MountType + + mock_mount = MagicMock() + mock_mount.mount_type = MountType.COPY + mock_mount.host_path_info = MagicMock() + mock_mount.host_path_info.abs_path = "/host/path" + mock_mount.sandbox_path = "/workdir/path" + + copilot_bot.environment.copy_to_container = MagicMock(return_value=True) + copilot_bot._mount_additional(mock_mount) # should not raise + + # --------------------------------------------------------------------------- # Integration tests — require real Docker + copilot-cli + auth # --------------------------------------------------------------------------- From cf039f51d3782e84c22ecca585425805171f6176 Mon Sep 17 00:00:00 2001 From: bala Date: Thu, 9 Apr 2026 14:32:10 +0000 Subject: [PATCH 10/12] Remove copilot_api_llm related code, update test cases and add documentation to use CopilotBot --- docs/copilot-bot.md | 336 ++++++++++++++++++ mkdocs.yml | 3 + src/microbots/MicroBot.py | 11 - src/microbots/bot/CopilotBot.py | 4 +- src/microbots/constants.py | 1 - .../test_local_docker_environment.py | 51 +++ 6 files changed, 392 insertions(+), 14 deletions(-) create mode 100644 docs/copilot-bot.md diff --git a/docs/copilot-bot.md b/docs/copilot-bot.md new file mode 100644 index 00000000..ca379fc3 --- /dev/null +++ b/docs/copilot-bot.md @@ -0,0 +1,336 @@ +# CopilotBot + +CopilotBot is a Microbots agent that delegates the entire agent loop to the **GitHub Copilot runtime**. Unlike other Microbots bots (ReadingBot, WritingBot, etc.) where Microbots manages the LLM ↔ tool loop, CopilotBot lets the Copilot runtime handle planning, tool invocation, file edits, shell commands, and multi-turn reasoning — all within a secure Docker sandbox. + +## Prerequisites + +- **Docker** — a running Docker daemon +- **Python 3.10+** +- **One of the following** for authentication: + - A GitHub Copilot subscription (for native Copilot auth), **or** + - API credentials for any OpenAI-compatible, Azure OpenAI, or Anthropic endpoint (BYOK — no Copilot subscription needed) + +## Installation + +```bash +pip install microbots[ghcp] +``` + +This installs the `github-copilot-sdk` package alongside Microbots. + +!!! note + You do **not** need to install `copilot-cli` on your host machine. Microbots automatically installs and runs it inside the Docker container during initialization. + +## Quick Start + +```python +from microbots.bot.CopilotBot import CopilotBot + +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/your/project", + github_token="ghp_your_github_token", +) + +result = bot.run("Fix the failing unit tests and make sure all tests pass.") + +print(result.status) # True if successful +print(result.result) # The agent's final response +print(result.error) # Error message if status is False + +bot.stop() +``` + +## Authentication Methods + +CopilotBot supports multiple authentication methods. The first two require a GitHub Copilot subscription; the BYOK methods do not. + +### 1. GitHub Token (Native Copilot Auth) + +Pass a GitHub token directly or let Microbots discover it from the environment. + +```python +# Option A: Pass explicitly +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/project", + github_token="ghp_your_token", +) + +# Option B: Set environment variable (auto-discovered) +# export GITHUB_TOKEN="ghp_your_token" +# — or — +# export COPILOT_GITHUB_TOKEN="ghp_your_token" +# — or — +# export GH_TOKEN="ghp_your_token" + +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/project", +) +``` + +If no token is provided and no environment variable is set, Microbots will attempt to retrieve a token from a local GitHub Copilot login (e.g. via `gh copilot`). + +**Token discovery order:** explicit `github_token` → `COPILOT_GITHUB_TOKEN` → `GITHUB_TOKEN` → `GH_TOKEN` → local Copilot login. + +!!! note + The local Copilot login fallback requires `copilot-cli` to be installed on your **host** machine and a valid login session in your home directory (e.g. via `copilot login`). If `copilot-cli` is not installed or no login is found, this step is skipped. + +### 2. BYOK — API Key (No Copilot Subscription Required) + +Use your own API key and endpoint. This works with any OpenAI-compatible API, Anthropic, or Azure OpenAI — no GitHub Copilot subscription needed. + +#### OpenAI + +```python +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/project", + api_key="sk-your-openai-key", + base_url="https://api.openai.com/v1", + provider_type="openai", # default, can be omitted +) +``` + +#### Anthropic + +```python +bot = CopilotBot( + model="claude-sonnet-4.5", + folder_to_mount="/path/to/project", + api_key="sk-ant-your-key", + base_url="https://api.anthropic.com", + provider_type="anthropic", +) +``` + +#### Azure OpenAI + +```python +bot = CopilotBot( + model="my-gpt4-deployment", + folder_to_mount="/path/to/project", + api_key="your-azure-api-key", + base_url="https://your-resource.openai.azure.com", + provider_type="azure", + azure_api_version="2024-10-21", +) +``` + +#### Using `wire_api` for newer models + +For models that use the Responses API (e.g. GPT-5 series), set `wire_api="responses"`: + +```python +bot = CopilotBot( + model="gpt-5", + folder_to_mount="/path/to/project", + api_key="sk-your-key", + base_url="https://api.openai.com/v1", + wire_api="responses", +) +``` + +### 3. BYOK — Bearer Token + +If your provider uses bearer token authentication instead of an API key: + +```python +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/project", + bearer_token="your-bearer-token", + base_url="https://your-endpoint.com/v1", +) +``` + +!!! note + When both `api_key` and `bearer_token` are provided, `bearer_token` takes precedence. + +### 4. BYOK — Token Provider (e.g. Azure AD) + +For environments that use dynamic token authentication (such as Azure AD managed identity), pass a callable that returns a fresh token: + +```python +from azure.identity import DefaultAzureCredential + +credential = DefaultAzureCredential() + +def get_token(): + return credential.get_token("https://cognitiveservices.azure.com/.default").token + +bot = CopilotBot( + model="my-gpt4-deployment", + folder_to_mount="/path/to/project", + base_url="https://your-resource.openai.azure.com", + provider_type="azure", + azure_api_version="2024-10-21", + token_provider=get_token, +) +``` + +The `token_provider` must be a callable that returns a non-empty string. It is called once at initialization time. + +### 5. BYOK — Environment Variables + +Configure BYOK entirely through environment variables without changing any code: + +```bash +export COPILOT_BYOK_BASE_URL="https://api.openai.com/v1" +export COPILOT_BYOK_API_KEY="sk-your-key" +export COPILOT_BYOK_PROVIDER_TYPE="openai" # optional, defaults to "openai" +export COPILOT_BYOK_MODEL="gpt-4.1" # optional, overrides the model param +export COPILOT_BYOK_WIRE_API="completions" # optional +export COPILOT_BYOK_AZURE_API_VERSION="2024-10-21" # optional, for Azure only +``` + +Then create the bot without any auth parameters: + +```python +bot = CopilotBot( + folder_to_mount="/path/to/project", +) +``` + +You can also use `COPILOT_BYOK_BEARER_TOKEN` instead of `COPILOT_BYOK_API_KEY` for bearer-token authentication. + +## Authentication Priority + +When multiple auth methods are configured simultaneously, CopilotBot resolves them in this order: + +| Priority | Method | Condition | +|----------|--------|-----------| +| 1 | Explicit API key / bearer token | `api_key` or `bearer_token` parameter is set | +| 2 | Environment variables | `COPILOT_BYOK_BASE_URL` + `COPILOT_BYOK_API_KEY` or `COPILOT_BYOK_BEARER_TOKEN` | +| 3 | Token provider | `token_provider` parameter is set | +| 4 | Native GitHub Copilot | `github_token` or `GITHUB_TOKEN` / `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` env vars | + +## Parameters + +### Constructor + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `model` | `str` | `"gpt-4.1"` | Model name (e.g. `"gpt-4.1"`, `"claude-sonnet-4.5"`). No provider prefix needed. | +| `folder_to_mount` | `str` | `None` | Absolute path to the folder to mount into the sandbox. | +| `permission` | `PermissionLabels` | `READ_WRITE` | Mount permission — `READ_ONLY` or `READ_WRITE`. | +| `environment` | `LocalDockerEnvironment` | `None` | Pre-created Docker environment. Auto-created if not provided. | +| `additional_tools` | `list[ToolAbstract]` | `[]` | Extra tools to install in the sandbox. | +| `github_token` | `str` | `None` | GitHub token for native Copilot auth. | +| `api_key` | `str` | `None` | API key for BYOK. | +| `bearer_token` | `str` | `None` | Bearer token for BYOK. | +| `base_url` | `str` | `None` | API endpoint URL for BYOK. | +| `provider_type` | `str` | `"openai"` | BYOK provider: `"openai"`, `"azure"`, or `"anthropic"`. | +| `wire_api` | `str` | `None` | API format: `"completions"` or `"responses"`. | +| `azure_api_version` | `str` | `None` | Azure API version (for `provider_type="azure"` only). | +| `token_provider` | `Callable[[], str]` | `None` | Callable returning a bearer token string. | + +### `run()` method + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `task` | `str` | *(required)* | Natural-language description of the task. | +| `additional_mounts` | `list[Mount]` | `None` | Extra folders to copy into the container. | +| `timeout_in_seconds` | `int` | `600` | Maximum wall-clock time for the agent run. | +| `streaming` | `bool` | `False` | Enable streaming delta events (logged at DEBUG level). | + +### Return value — `BotRunResult` + +| Field | Type | Description | +|-------|------|-------------| +| `status` | `bool` | `True` if the agent completed successfully. | +| `result` | `str` or `None` | The agent's final response text. | +| `error` | `str` or `None` | Error description if `status` is `False`. | + +## Examples + +### Read-only code analysis + +```python +from microbots.bot.CopilotBot import CopilotBot +from microbots.constants import PermissionLabels + +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/project", + permission=PermissionLabels.READ_ONLY, + github_token="ghp_your_token", +) + +result = bot.run("Analyze the codebase and list all public API endpoints.") +print(result.result) +bot.stop() +``` + +### Fix a bug with BYOK (OpenAI) + +```python +from microbots.bot.CopilotBot import CopilotBot + +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/project", + api_key="sk-your-openai-key", + base_url="https://api.openai.com/v1", +) + +result = bot.run( + "The login form crashes when email contains a '+'. Fix the validation logic.", + timeout_in_seconds=300, +) +print(result.result) +bot.stop() +``` + +### Using additional tools + +```python +from microbots.bot.CopilotBot import CopilotBot +from microbots.tools.internal_tool import InternalTool + +my_tool = InternalTool(tool_definition_path="path/to/tool.yaml") + +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/project", + github_token="ghp_your_token", + additional_tools=[my_tool], +) + +result = bot.run("Use the custom tool to lint and then fix all issues.") +bot.stop() +``` + +!!! warning + `ExternalTool` is **not supported** with CopilotBot. Only tools that run inside the Docker container (internal tools) can be used. + +### Mounting additional folders at runtime + +```python +from microbots.bot.CopilotBot import CopilotBot +from microbots.extras.mount import Mount + +bot = CopilotBot( + model="gpt-4.1", + folder_to_mount="/path/to/project", + github_token="ghp_your_token", +) + +extra = Mount("/path/to/test-data", "/workdir/test-data", "READ_ONLY") +result = bot.run( + "Run the integration tests using the data in /workdir/test-data.", + additional_mounts=[extra], +) +bot.stop() +``` + +## Cleanup + +Always call `bot.stop()` when you are done. This tears down the SDK client, the CLI server, and the Docker container: + +```python +bot.stop() +``` + +`stop()` is idempotent — calling it multiple times is safe. It is also called automatically when the object is garbage-collected, but explicit cleanup is recommended. diff --git a/mkdocs.yml b/mkdocs.yml index d7e50a8e..9fa483b6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -41,6 +41,9 @@ markdown_extensions: nav: - Getting Started: - Home: index.md + - Guides: + - CopilotBot: copilot-bot.md + - Authentication: authentication.md - Blogs: - blog/index.md - "Microbots : Safety First Agentic Workflow": blog/microbots-safety-first-ai-agent.md diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py index 8561a65c..a8c9b7a1 100644 --- a/src/microbots/MicroBot.py +++ b/src/microbots/MicroBot.py @@ -362,17 +362,6 @@ def _create_llm(self): system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name, token_provider=self.token_provider, ) - elif self.model_provider == ModelProvider.GITHUB_COPILOT: - try: - from microbots.llm.copilot_api import CopilotApi - except ImportError: - raise ValueError( - "GitHub Copilot provider requires the ghcp extra. " - "Install with: pip install microbots[ghcp]" - ) - self.llm = CopilotApi( - system_prompt=system_prompt_with_tools, model_name=self.deployment_name - ) # No Else case required as model provider is already validated using _validate_model_and_provider def _validate_model_and_provider(self, model): diff --git a/src/microbots/bot/CopilotBot.py b/src/microbots/bot/CopilotBot.py index d6514edb..bb8e656a 100644 --- a/src/microbots/bot/CopilotBot.py +++ b/src/microbots/bot/CopilotBot.py @@ -530,10 +530,10 @@ def _start_copilot_cli_server(self): # config passed to create_session — no container-side token needed. if self.github_token and not self._provider_config: self.environment.execute( - f'export GITHUB_TOKEN="{self.github_token}"' + f'export GITHUB_TOKEN="{self.github_token}"', sensitive=True ) self.environment.execute( - f'export COPILOT_GITHUB_TOKEN="{self.github_token}"' + f'export COPILOT_GITHUB_TOKEN="{self.github_token}"', sensitive=True ) # Start copilot in headless mode in the background diff --git a/src/microbots/constants.py b/src/microbots/constants.py index 5989eb0f..c4a7ff98 100644 --- a/src/microbots/constants.py +++ b/src/microbots/constants.py @@ -6,7 +6,6 @@ class ModelProvider(StrEnum): OPENAI = "azure-openai" OLLAMA_LOCAL = "ollama-local" ANTHROPIC = "anthropic" - GITHUB_COPILOT = "github-copilot" class ModelEnum(StrEnum): diff --git a/test/environment/local_docker/test_local_docker_environment.py b/test/environment/local_docker/test_local_docker_environment.py index 266f9f2e..e67f4956 100644 --- a/test/environment/local_docker/test_local_docker_environment.py +++ b/test/environment/local_docker/test_local_docker_environment.py @@ -492,3 +492,54 @@ def test_execute_redacts_command_when_sensitive(self, caplog): assert "" in caplog.text assert "SECRET_TOKEN" not in caplog.text assert "abc123" not in caplog.text + + +@pytest.mark.unit +class TestGetIpv4Address: + """Unit tests for LocalDockerEnvironment.get_ipv4_address""" + + def _make_env(self): + """Create a bare LocalDockerEnvironment without calling __init__""" + env = LocalDockerEnvironment.__new__(LocalDockerEnvironment) + env.deleted = True + env.container = None + return env + + def test_returns_ip_address_from_container_networks(self): + """get_ipv4_address returns the IP from the first Docker network""" + env = self._make_env() + env.container = Mock() + env.container.attrs = { + "NetworkSettings": { + "Networks": { + "bridge": {"IPAddress": "172.17.0.2"} + } + } + } + + result = env.get_ipv4_address() + + env.container.reload.assert_called_once() + assert result == "172.17.0.2" + + def test_raises_runtime_error_when_no_container(self): + """get_ipv4_address raises RuntimeError when container is None""" + env = self._make_env() + + with pytest.raises(RuntimeError, match="No active container"): + env.get_ipv4_address() + + def test_raises_runtime_error_when_ip_is_empty(self): + """get_ipv4_address raises RuntimeError when IP address is empty""" + env = self._make_env() + env.container = Mock() + env.container.attrs = { + "NetworkSettings": { + "Networks": { + "bridge": {"IPAddress": ""} + } + } + } + + with pytest.raises(RuntimeError, match="Could not determine container IP address"): + env.get_ipv4_address() From b1b2cfdded0b30f54fd2c42c557908de18ccdbb0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 15:40:09 +0000 Subject: [PATCH 11/12] Add unit tests for new functionality in multi_agent_log_parser.py Agent-Logs-Url: https://github.com/microsoft/microbots/sessions/feb7ae20-c185-4fed-8144-4f5a68ab122b Co-authored-by: 0xba1a <2942888+0xba1a@users.noreply.github.com> --- test/utils/__init__.py | 0 test/utils/test_multi_agent_log_parser.py | 518 ++++++++++++++++++++++ 2 files changed, 518 insertions(+) create mode 100644 test/utils/__init__.py create mode 100644 test/utils/test_multi_agent_log_parser.py diff --git a/test/utils/__init__.py b/test/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/utils/test_multi_agent_log_parser.py b/test/utils/test_multi_agent_log_parser.py new file mode 100644 index 00000000..3c7f850a --- /dev/null +++ b/test/utils/test_multi_agent_log_parser.py @@ -0,0 +1,518 @@ +""" +Unit tests for the new functionality added to multi_agent_log_parser.py: + +- SetupInfo dataclass +- _extract_setup_info() function +- parse_log_entries() with legacy format support +- Agent.error_message field +- TestCase.setup field +""" +import os +import sys + +import pytest + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) +) + +from microbots.utils.multi_agent_log_parser import ( + Agent, + SetupInfo, + TestCase, + _agent_status_str, + _extract_setup_info, + build_test_cases, + extract_task_from_microbot_sub, + generate_setup_md, + parse_log_entries, + truncate_text, +) + + +# --------------------------------------------------------------------------- +# Unit tests — SetupInfo dataclass +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestSetupInfo: + """Tests for the new SetupInfo dataclass.""" + + def test_default_values(self): + """SetupInfo has sensible empty defaults.""" + s = SetupInfo() + assert s.container_id == "" + assert s.image == "" + assert s.host_port == "" + assert s.working_dir == "" + assert s.volume_mappings == [] + assert s.tools_installed == [] + assert s.files_copied == [] + + def test_explicit_construction(self): + """SetupInfo can be created with explicit values.""" + s = SetupInfo( + container_id="abc123", + image="ubuntu:22.04", + host_port="8080", + working_dir="/workspace", + volume_mappings=["/host:/container"], + tools_installed=["git"], + files_copied=["file.py → /workspace/file.py"], + ) + assert s.container_id == "abc123" + assert s.image == "ubuntu:22.04" + assert s.host_port == "8080" + assert s.working_dir == "/workspace" + assert s.volume_mappings == ["/host:/container"] + assert s.tools_installed == ["git"] + assert s.files_copied == ["file.py → /workspace/file.py"] + + +# --------------------------------------------------------------------------- +# Unit tests — Agent.error_message field +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAgentErrorMessage: + """Tests for the new error_message field on Agent.""" + + def test_default_error_message_is_empty(self): + """Agent.error_message defaults to empty string.""" + a = Agent() + assert a.error_message == "" + + def test_can_set_error_message(self): + """Agent.error_message can be set.""" + a = Agent(error_message="Something went wrong") + assert a.error_message == "Something went wrong" + + +# --------------------------------------------------------------------------- +# Unit tests — TestCase.setup field +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestTestCaseSetup: + """Tests for the new setup field on TestCase.""" + + def test_default_setup_is_setup_info(self): + """TestCase.setup defaults to a fresh SetupInfo instance.""" + tc = TestCase() + assert isinstance(tc.setup, SetupInfo) + assert tc.setup.container_id == "" + + def test_setup_field_is_independent_per_instance(self): + """Each TestCase gets its own SetupInfo instance (no shared mutable default).""" + tc1 = TestCase() + tc2 = TestCase() + tc1.setup.container_id = "id1" + assert tc2.setup.container_id == "" + + +# --------------------------------------------------------------------------- +# Unit tests — parse_log_entries (legacy format support) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestParseLogEntriesLegacyFormat: + """Tests for parse_log_entries() with the new legacy-format support.""" + + def test_parses_current_format(self, tmp_path): + """parse_log_entries handles the current TIMESTAMP MODULE LEVEL CONTENT format.""" + log = tmp_path / "test.log" + log.write_text( + "2026-03-26 12:45:20,277 microbots.MicroBot INFO Hello world\n", + encoding="utf-8", + ) + entries = parse_log_entries(str(log)) + assert len(entries) == 1 + assert entries[0]["content"] == "Hello world" + assert entries[0]["level"] == "INFO" + assert entries[0]["module"] == "microbots.MicroBot" + + def test_parses_legacy_format(self, tmp_path): + """parse_log_entries handles the legacy TIMESTAMP [LEVEL] CONTENT format.""" + log = tmp_path / "test.log" + log.write_text( + "2026-03-26 12:45:20,277 [INFO] Legacy format message\n", + encoding="utf-8", + ) + entries = parse_log_entries(str(log)) + assert len(entries) == 1 + assert entries[0]["content"] == "Legacy format message" + assert entries[0]["level"] == "INFO" + assert entries[0]["module"] == "" + + def test_continuation_lines_joined(self, tmp_path): + """Lines without timestamps are joined to the previous entry.""" + log = tmp_path / "test.log" + log.write_text( + "2026-03-26 12:45:20,277 microbots.MicroBot INFO First line\n" + " continuation here\n", + encoding="utf-8", + ) + entries = parse_log_entries(str(log)) + assert len(entries) == 1 + assert "continuation here" in entries[0]["content"] + + def test_multiple_entries_both_formats(self, tmp_path): + """Mix of current and legacy format entries are all parsed.""" + log = tmp_path / "test.log" + log.write_text( + "2026-03-26 12:45:20,277 microbots.MicroBot INFO Current format\n" + "2026-03-26 12:45:21,000 [DEBUG] Legacy format\n", + encoding="utf-8", + ) + entries = parse_log_entries(str(log)) + assert len(entries) == 2 + assert entries[0]["content"] == "Current format" + assert entries[1]["content"] == "Legacy format" + assert entries[1]["module"] == "" + + def test_multiple_current_format_entries(self, tmp_path): + """Multiple sequential current-format entries are all captured.""" + log = tmp_path / "multi.log" + log.write_text( + "2026-03-26 12:45:20,277 microbots.MicroBot INFO First entry\n" + "2026-03-26 12:45:21,000 microbots.MicroBot INFO Second entry\n" + "2026-03-26 12:45:22,000 microbots.MicroBot INFO Third entry\n", + encoding="utf-8", + ) + entries = parse_log_entries(str(log)) + assert len(entries) == 3 + assert entries[0]["content"] == "First entry" + assert entries[1]["content"] == "Second entry" + assert entries[2]["content"] == "Third entry" + + def test_empty_log_returns_empty_list(self, tmp_path): + """An empty log file returns an empty list.""" + log = tmp_path / "empty.log" + log.write_text("", encoding="utf-8") + entries = parse_log_entries(str(log)) + assert entries == [] + + +# --------------------------------------------------------------------------- +# Unit tests — _extract_setup_info +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestExtractSetupInfo: + """Tests for the new _extract_setup_info() function.""" + + def _make_entry(self, content, level="INFO"): + return { + "timestamp": "2026-03-26 12:00:00,000", + "module": "test", + "level": level, + "content": content, + "line_num": 1, + } + + def test_extracts_container_info(self): + """Parses container ID, image, and host port from log entries.""" + entries = [ + self._make_entry( + "Started container abc123 with image ubuntu:22.04 on host port 8080" + ), + ] + setup = _extract_setup_info(entries) + assert setup.container_id == "abc123" + assert setup.image == "ubuntu:22.04" + assert setup.host_port == "8080" + + def test_extracts_working_directory(self): + """Parses working directory from log entries.""" + entries = [ + self._make_entry("Created working directory at /tmp/workspace"), + ] + setup = _extract_setup_info(entries) + assert setup.working_dir == "/tmp/workspace" + + def test_extracts_volume_mapping(self): + """Parses volume mappings from log entries.""" + entries = [ + self._make_entry("Volume mapping: /host/path:/container/path"), + ] + setup = _extract_setup_info(entries) + assert "/host/path:/container/path" in setup.volume_mappings + + def test_extracts_tools_installed(self): + """Parses installed tools from log entries.""" + entries = [ + self._make_entry("Successfully installed tool: git"), + self._make_entry("Successfully set up tool: docker"), + ] + setup = _extract_setup_info(entries) + assert "git" in setup.tools_installed + assert "docker" in setup.tools_installed + + def test_no_duplicate_tools(self): + """Same tool name is not added twice.""" + entries = [ + self._make_entry("Successfully installed tool: git"), + self._make_entry("Successfully installed tool: git"), + ] + setup = _extract_setup_info(entries) + assert setup.tools_installed.count("git") == 1 + + def test_extracts_files_copied(self): + """Parses copied files from log entries.""" + entries = [ + self._make_entry("Successfully copied repo to container: /workspace/repo"), + ] + setup = _extract_setup_info(entries) + assert len(setup.files_copied) == 1 + assert "repo" in setup.files_copied[0] + + def test_stops_at_task_started(self): + """Stops parsing setup info when TASK STARTED is encountered.""" + entries = [ + self._make_entry( + "Started container ctn1 with image img1 on host port 9000" + ), + self._make_entry("ℹ️ TASK STARTED : some task"), + # This entry comes AFTER task started and should be ignored + self._make_entry("Volume mapping: /should/not/be/included"), + ] + setup = _extract_setup_info(entries) + assert setup.container_id == "ctn1" + assert setup.volume_mappings == [] + + def test_empty_entries_returns_empty_setup(self): + """Returns a default SetupInfo when entries list is empty.""" + setup = _extract_setup_info([]) + assert setup.container_id == "" + assert setup.working_dir == "" + + def test_no_matching_entries_returns_empty_setup(self): + """Returns empty SetupInfo when no setup patterns match.""" + entries = [ + self._make_entry("Just some random log message"), + self._make_entry("Another random message"), + ] + setup = _extract_setup_info(entries) + assert setup.container_id == "" + assert setup.image == "" + assert setup.working_dir == "" + assert setup.volume_mappings == [] + + +# --------------------------------------------------------------------------- +# Unit tests — extract_task_from_microbot_sub +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestExtractTaskFromMicrobotSub: + """Tests for the new extract_task_from_microbot_sub() function.""" + + def test_extracts_task_with_iterations_flag(self): + """Extracts --task value when followed by --iterations.""" + cmd = 'microbot_sub --task "Do the thing" --iterations 10' + result = extract_task_from_microbot_sub(cmd) + assert result == "Do the thing" + + def test_extracts_task_with_timeout_flag(self): + """Extracts --task value when followed by --timeout.""" + cmd = 'microbot_sub --task "Run tests" --timeout 300' + result = extract_task_from_microbot_sub(cmd) + assert result == "Run tests" + + def test_extracts_task_at_end_of_command(self): + """Extracts --task value at the end of the command string.""" + cmd = 'microbot_sub --task "Final task"' + result = extract_task_from_microbot_sub(cmd) + assert result == "Final task" + + def test_falls_back_to_full_command_when_no_task(self): + """Returns the full command string when no --task flag is found.""" + cmd = "microbot_sub --some-other-arg value" + result = extract_task_from_microbot_sub(cmd) + assert result == cmd + + def test_handles_escaped_quotes(self): + """Handles escaped quotes in the command string.""" + cmd = r'microbot_sub --task "Task with \"quotes\"" --iterations 5' + result = extract_task_from_microbot_sub(cmd) + assert "Task with" in result + + def test_handles_multiline_task(self): + """Handles multi-line task descriptions.""" + cmd = 'microbot_sub --task "Line one\\nLine two" --iterations 5' + result = extract_task_from_microbot_sub(cmd) + assert "Line one" in result + + +# --------------------------------------------------------------------------- +# Unit tests — build_test_cases (new fields and new code paths) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestBuildTestCases: + """Tests for new code paths added to build_test_cases().""" + + def _make_entry(self, content, level="INFO"): + return { + "timestamp": "2026-03-26 12:00:00,000", + "module": "MicroBot", + "level": level, + "content": content, + "line_num": 1, + } + + def test_empty_entries_returns_empty_list(self): + """build_test_cases returns empty list for empty input.""" + assert build_test_cases([]) == [] + + def test_task_started_creates_agent(self): + """TASK STARTED creates a main agent with the task text.""" + entries = [ + self._make_entry("ℹ️ TASK STARTED : Do the work"), + self._make_entry("TASK COMPLETED successfully"), + ] + test_cases = build_test_cases(entries) + assert len(test_cases) == 1 + assert test_cases[0].main_agent is not None + assert test_cases[0].main_agent.completed is True + + def test_task_completed_sets_completed_flag(self): + """TASK COMPLETED sets agent.completed = True and clears current_field.""" + entries = [ + self._make_entry("ℹ️ TASK STARTED : Some task"), + self._make_entry("TASK COMPLETED"), + ] + test_cases = build_test_cases(entries) + assert test_cases[0].main_agent.completed is True + + def test_sub_agent_failed_sets_error_message(self): + """ERROR Sub-agent failed sets error_message on the sub-agent.""" + entries = [ + self._make_entry("ℹ️ TASK STARTED : Main task"), + self._make_entry("ℹ️ TASK STARTED : Sub task"), + self._make_entry("Sub-agent failed: timed out", level="ERROR"), + ] + test_cases = build_test_cases(entries) + assert len(test_cases) == 1 + assert len(test_cases[0].sub_agents) == 1 + assert test_cases[0].sub_agents[0].error_message == "Sub-agent failed: timed out" + assert test_cases[0].sub_agents[0].max_iterations_reached is True + + def test_failed_to_parse_microbot_sub_sets_blocked(self): + """ERROR Failed to parse microbot_sub command sets current_step as blocked.""" + from microbots.utils.multi_agent_log_parser import Step + entries = [ + self._make_entry("ℹ️ TASK STARTED : Main task"), + self._make_entry("LLM tool call: microbot_sub: bad command"), + self._make_entry( + "Failed to parse microbot_sub command: invalid syntax", level="ERROR" + ), + ] + test_cases = build_test_cases(entries) + # Should not raise and should produce a test case + assert len(test_cases) >= 1 + + +# --------------------------------------------------------------------------- +# Unit tests — truncate_text (new function) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestTruncateText: + """Tests for the new truncate_text() helper.""" + + def test_short_text_unchanged(self): + """Text within the line limit is returned as-is.""" + text = "Line one\nLine two\nLine three" + assert truncate_text(text, max_lines=10) == text + + def test_long_text_truncated(self): + """Text exceeding max_lines is truncated with a notice.""" + lines = [f"line {i}" for i in range(250)] + text = "\n".join(lines) + result = truncate_text(text, max_lines=200) + assert "truncated" in result + assert "50 more lines" in result + + def test_exact_limit_not_truncated(self): + """Text at exactly max_lines is NOT truncated.""" + lines = [f"line {i}" for i in range(200)] + text = "\n".join(lines) + result = truncate_text(text, max_lines=200) + assert "truncated" not in result + + +# --------------------------------------------------------------------------- +# Unit tests — generate_setup_md (new function) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestGenerateSetupMd: + """Tests for the new generate_setup_md() function.""" + + def test_empty_setup_returns_empty_string(self): + """Returns empty string when no container_id and no tools_installed.""" + setup = SetupInfo() + assert generate_setup_md(setup) == "" + + def test_setup_with_container_id_returns_markdown(self): + """Returns markdown when container_id is set.""" + setup = SetupInfo(container_id="abc123", image="ubuntu:22.04", host_port="8080") + md = generate_setup_md(setup) + assert "abc123" in md + assert "ubuntu:22.04" in md + assert "8080" in md + + def test_setup_with_working_dir(self): + """Includes working directory in output.""" + setup = SetupInfo(container_id="ctn1", working_dir="/workspace") + md = generate_setup_md(setup) + assert "/workspace" in md + + def test_setup_with_volume_mappings(self): + """Includes volume mappings in output.""" + setup = SetupInfo(container_id="ctn1", volume_mappings=["/host:/container"]) + md = generate_setup_md(setup) + assert "/host:/container" in md + + def test_setup_with_tools_only(self): + """Returns markdown when only tools_installed is set (no container_id).""" + setup = SetupInfo(tools_installed=["git", "docker"]) + md = generate_setup_md(setup) + assert "git" in md + assert "docker" in md + + def test_setup_with_files_copied(self): + """Includes files_copied section when files were copied.""" + setup = SetupInfo( + container_id="ctn1", + files_copied=["repo.py → /workspace/repo.py"], + ) + md = generate_setup_md(setup) + assert "Files copied" in md + assert "repo.py" in md + + +# --------------------------------------------------------------------------- +# Unit tests — _agent_status_str (new function) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAgentStatusStr: + """Tests for the new _agent_status_str() helper.""" + + def test_completed_agent_returns_completed(self): + """Returns completed string for completed agent.""" + agent = Agent(completed=True) + assert "Completed" in _agent_status_str(agent) + + def test_max_iterations_agent_returns_failed(self): + """Returns failed string for agent that hit max iterations.""" + agent = Agent(max_iterations_reached=True) + result = _agent_status_str(agent) + assert "Failed" in result + + def test_unknown_agent_returns_unknown(self): + """Returns unknown string for agent with no terminal state.""" + agent = Agent() + assert "Unknown" in _agent_status_str(agent) From bf8b15f5e2eddc38a525d2eb4f105f3c3963635c Mon Sep 17 00:00:00 2001 From: bala Date: Fri, 10 Apr 2026 05:47:51 +0000 Subject: [PATCH 12/12] Revert "Add unit tests for new functionality in multi_agent_log_parser.py" This reverts commit b1b2cfdded0b30f54fd2c42c557908de18ccdbb0. --- test/utils/__init__.py | 0 test/utils/test_multi_agent_log_parser.py | 518 ---------------------- 2 files changed, 518 deletions(-) delete mode 100644 test/utils/__init__.py delete mode 100644 test/utils/test_multi_agent_log_parser.py diff --git a/test/utils/__init__.py b/test/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/test/utils/test_multi_agent_log_parser.py b/test/utils/test_multi_agent_log_parser.py deleted file mode 100644 index 3c7f850a..00000000 --- a/test/utils/test_multi_agent_log_parser.py +++ /dev/null @@ -1,518 +0,0 @@ -""" -Unit tests for the new functionality added to multi_agent_log_parser.py: - -- SetupInfo dataclass -- _extract_setup_info() function -- parse_log_entries() with legacy format support -- Agent.error_message field -- TestCase.setup field -""" -import os -import sys - -import pytest - -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) -) - -from microbots.utils.multi_agent_log_parser import ( - Agent, - SetupInfo, - TestCase, - _agent_status_str, - _extract_setup_info, - build_test_cases, - extract_task_from_microbot_sub, - generate_setup_md, - parse_log_entries, - truncate_text, -) - - -# --------------------------------------------------------------------------- -# Unit tests — SetupInfo dataclass -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestSetupInfo: - """Tests for the new SetupInfo dataclass.""" - - def test_default_values(self): - """SetupInfo has sensible empty defaults.""" - s = SetupInfo() - assert s.container_id == "" - assert s.image == "" - assert s.host_port == "" - assert s.working_dir == "" - assert s.volume_mappings == [] - assert s.tools_installed == [] - assert s.files_copied == [] - - def test_explicit_construction(self): - """SetupInfo can be created with explicit values.""" - s = SetupInfo( - container_id="abc123", - image="ubuntu:22.04", - host_port="8080", - working_dir="/workspace", - volume_mappings=["/host:/container"], - tools_installed=["git"], - files_copied=["file.py → /workspace/file.py"], - ) - assert s.container_id == "abc123" - assert s.image == "ubuntu:22.04" - assert s.host_port == "8080" - assert s.working_dir == "/workspace" - assert s.volume_mappings == ["/host:/container"] - assert s.tools_installed == ["git"] - assert s.files_copied == ["file.py → /workspace/file.py"] - - -# --------------------------------------------------------------------------- -# Unit tests — Agent.error_message field -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAgentErrorMessage: - """Tests for the new error_message field on Agent.""" - - def test_default_error_message_is_empty(self): - """Agent.error_message defaults to empty string.""" - a = Agent() - assert a.error_message == "" - - def test_can_set_error_message(self): - """Agent.error_message can be set.""" - a = Agent(error_message="Something went wrong") - assert a.error_message == "Something went wrong" - - -# --------------------------------------------------------------------------- -# Unit tests — TestCase.setup field -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestTestCaseSetup: - """Tests for the new setup field on TestCase.""" - - def test_default_setup_is_setup_info(self): - """TestCase.setup defaults to a fresh SetupInfo instance.""" - tc = TestCase() - assert isinstance(tc.setup, SetupInfo) - assert tc.setup.container_id == "" - - def test_setup_field_is_independent_per_instance(self): - """Each TestCase gets its own SetupInfo instance (no shared mutable default).""" - tc1 = TestCase() - tc2 = TestCase() - tc1.setup.container_id = "id1" - assert tc2.setup.container_id == "" - - -# --------------------------------------------------------------------------- -# Unit tests — parse_log_entries (legacy format support) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestParseLogEntriesLegacyFormat: - """Tests for parse_log_entries() with the new legacy-format support.""" - - def test_parses_current_format(self, tmp_path): - """parse_log_entries handles the current TIMESTAMP MODULE LEVEL CONTENT format.""" - log = tmp_path / "test.log" - log.write_text( - "2026-03-26 12:45:20,277 microbots.MicroBot INFO Hello world\n", - encoding="utf-8", - ) - entries = parse_log_entries(str(log)) - assert len(entries) == 1 - assert entries[0]["content"] == "Hello world" - assert entries[0]["level"] == "INFO" - assert entries[0]["module"] == "microbots.MicroBot" - - def test_parses_legacy_format(self, tmp_path): - """parse_log_entries handles the legacy TIMESTAMP [LEVEL] CONTENT format.""" - log = tmp_path / "test.log" - log.write_text( - "2026-03-26 12:45:20,277 [INFO] Legacy format message\n", - encoding="utf-8", - ) - entries = parse_log_entries(str(log)) - assert len(entries) == 1 - assert entries[0]["content"] == "Legacy format message" - assert entries[0]["level"] == "INFO" - assert entries[0]["module"] == "" - - def test_continuation_lines_joined(self, tmp_path): - """Lines without timestamps are joined to the previous entry.""" - log = tmp_path / "test.log" - log.write_text( - "2026-03-26 12:45:20,277 microbots.MicroBot INFO First line\n" - " continuation here\n", - encoding="utf-8", - ) - entries = parse_log_entries(str(log)) - assert len(entries) == 1 - assert "continuation here" in entries[0]["content"] - - def test_multiple_entries_both_formats(self, tmp_path): - """Mix of current and legacy format entries are all parsed.""" - log = tmp_path / "test.log" - log.write_text( - "2026-03-26 12:45:20,277 microbots.MicroBot INFO Current format\n" - "2026-03-26 12:45:21,000 [DEBUG] Legacy format\n", - encoding="utf-8", - ) - entries = parse_log_entries(str(log)) - assert len(entries) == 2 - assert entries[0]["content"] == "Current format" - assert entries[1]["content"] == "Legacy format" - assert entries[1]["module"] == "" - - def test_multiple_current_format_entries(self, tmp_path): - """Multiple sequential current-format entries are all captured.""" - log = tmp_path / "multi.log" - log.write_text( - "2026-03-26 12:45:20,277 microbots.MicroBot INFO First entry\n" - "2026-03-26 12:45:21,000 microbots.MicroBot INFO Second entry\n" - "2026-03-26 12:45:22,000 microbots.MicroBot INFO Third entry\n", - encoding="utf-8", - ) - entries = parse_log_entries(str(log)) - assert len(entries) == 3 - assert entries[0]["content"] == "First entry" - assert entries[1]["content"] == "Second entry" - assert entries[2]["content"] == "Third entry" - - def test_empty_log_returns_empty_list(self, tmp_path): - """An empty log file returns an empty list.""" - log = tmp_path / "empty.log" - log.write_text("", encoding="utf-8") - entries = parse_log_entries(str(log)) - assert entries == [] - - -# --------------------------------------------------------------------------- -# Unit tests — _extract_setup_info -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestExtractSetupInfo: - """Tests for the new _extract_setup_info() function.""" - - def _make_entry(self, content, level="INFO"): - return { - "timestamp": "2026-03-26 12:00:00,000", - "module": "test", - "level": level, - "content": content, - "line_num": 1, - } - - def test_extracts_container_info(self): - """Parses container ID, image, and host port from log entries.""" - entries = [ - self._make_entry( - "Started container abc123 with image ubuntu:22.04 on host port 8080" - ), - ] - setup = _extract_setup_info(entries) - assert setup.container_id == "abc123" - assert setup.image == "ubuntu:22.04" - assert setup.host_port == "8080" - - def test_extracts_working_directory(self): - """Parses working directory from log entries.""" - entries = [ - self._make_entry("Created working directory at /tmp/workspace"), - ] - setup = _extract_setup_info(entries) - assert setup.working_dir == "/tmp/workspace" - - def test_extracts_volume_mapping(self): - """Parses volume mappings from log entries.""" - entries = [ - self._make_entry("Volume mapping: /host/path:/container/path"), - ] - setup = _extract_setup_info(entries) - assert "/host/path:/container/path" in setup.volume_mappings - - def test_extracts_tools_installed(self): - """Parses installed tools from log entries.""" - entries = [ - self._make_entry("Successfully installed tool: git"), - self._make_entry("Successfully set up tool: docker"), - ] - setup = _extract_setup_info(entries) - assert "git" in setup.tools_installed - assert "docker" in setup.tools_installed - - def test_no_duplicate_tools(self): - """Same tool name is not added twice.""" - entries = [ - self._make_entry("Successfully installed tool: git"), - self._make_entry("Successfully installed tool: git"), - ] - setup = _extract_setup_info(entries) - assert setup.tools_installed.count("git") == 1 - - def test_extracts_files_copied(self): - """Parses copied files from log entries.""" - entries = [ - self._make_entry("Successfully copied repo to container: /workspace/repo"), - ] - setup = _extract_setup_info(entries) - assert len(setup.files_copied) == 1 - assert "repo" in setup.files_copied[0] - - def test_stops_at_task_started(self): - """Stops parsing setup info when TASK STARTED is encountered.""" - entries = [ - self._make_entry( - "Started container ctn1 with image img1 on host port 9000" - ), - self._make_entry("ℹ️ TASK STARTED : some task"), - # This entry comes AFTER task started and should be ignored - self._make_entry("Volume mapping: /should/not/be/included"), - ] - setup = _extract_setup_info(entries) - assert setup.container_id == "ctn1" - assert setup.volume_mappings == [] - - def test_empty_entries_returns_empty_setup(self): - """Returns a default SetupInfo when entries list is empty.""" - setup = _extract_setup_info([]) - assert setup.container_id == "" - assert setup.working_dir == "" - - def test_no_matching_entries_returns_empty_setup(self): - """Returns empty SetupInfo when no setup patterns match.""" - entries = [ - self._make_entry("Just some random log message"), - self._make_entry("Another random message"), - ] - setup = _extract_setup_info(entries) - assert setup.container_id == "" - assert setup.image == "" - assert setup.working_dir == "" - assert setup.volume_mappings == [] - - -# --------------------------------------------------------------------------- -# Unit tests — extract_task_from_microbot_sub -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestExtractTaskFromMicrobotSub: - """Tests for the new extract_task_from_microbot_sub() function.""" - - def test_extracts_task_with_iterations_flag(self): - """Extracts --task value when followed by --iterations.""" - cmd = 'microbot_sub --task "Do the thing" --iterations 10' - result = extract_task_from_microbot_sub(cmd) - assert result == "Do the thing" - - def test_extracts_task_with_timeout_flag(self): - """Extracts --task value when followed by --timeout.""" - cmd = 'microbot_sub --task "Run tests" --timeout 300' - result = extract_task_from_microbot_sub(cmd) - assert result == "Run tests" - - def test_extracts_task_at_end_of_command(self): - """Extracts --task value at the end of the command string.""" - cmd = 'microbot_sub --task "Final task"' - result = extract_task_from_microbot_sub(cmd) - assert result == "Final task" - - def test_falls_back_to_full_command_when_no_task(self): - """Returns the full command string when no --task flag is found.""" - cmd = "microbot_sub --some-other-arg value" - result = extract_task_from_microbot_sub(cmd) - assert result == cmd - - def test_handles_escaped_quotes(self): - """Handles escaped quotes in the command string.""" - cmd = r'microbot_sub --task "Task with \"quotes\"" --iterations 5' - result = extract_task_from_microbot_sub(cmd) - assert "Task with" in result - - def test_handles_multiline_task(self): - """Handles multi-line task descriptions.""" - cmd = 'microbot_sub --task "Line one\\nLine two" --iterations 5' - result = extract_task_from_microbot_sub(cmd) - assert "Line one" in result - - -# --------------------------------------------------------------------------- -# Unit tests — build_test_cases (new fields and new code paths) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestBuildTestCases: - """Tests for new code paths added to build_test_cases().""" - - def _make_entry(self, content, level="INFO"): - return { - "timestamp": "2026-03-26 12:00:00,000", - "module": "MicroBot", - "level": level, - "content": content, - "line_num": 1, - } - - def test_empty_entries_returns_empty_list(self): - """build_test_cases returns empty list for empty input.""" - assert build_test_cases([]) == [] - - def test_task_started_creates_agent(self): - """TASK STARTED creates a main agent with the task text.""" - entries = [ - self._make_entry("ℹ️ TASK STARTED : Do the work"), - self._make_entry("TASK COMPLETED successfully"), - ] - test_cases = build_test_cases(entries) - assert len(test_cases) == 1 - assert test_cases[0].main_agent is not None - assert test_cases[0].main_agent.completed is True - - def test_task_completed_sets_completed_flag(self): - """TASK COMPLETED sets agent.completed = True and clears current_field.""" - entries = [ - self._make_entry("ℹ️ TASK STARTED : Some task"), - self._make_entry("TASK COMPLETED"), - ] - test_cases = build_test_cases(entries) - assert test_cases[0].main_agent.completed is True - - def test_sub_agent_failed_sets_error_message(self): - """ERROR Sub-agent failed sets error_message on the sub-agent.""" - entries = [ - self._make_entry("ℹ️ TASK STARTED : Main task"), - self._make_entry("ℹ️ TASK STARTED : Sub task"), - self._make_entry("Sub-agent failed: timed out", level="ERROR"), - ] - test_cases = build_test_cases(entries) - assert len(test_cases) == 1 - assert len(test_cases[0].sub_agents) == 1 - assert test_cases[0].sub_agents[0].error_message == "Sub-agent failed: timed out" - assert test_cases[0].sub_agents[0].max_iterations_reached is True - - def test_failed_to_parse_microbot_sub_sets_blocked(self): - """ERROR Failed to parse microbot_sub command sets current_step as blocked.""" - from microbots.utils.multi_agent_log_parser import Step - entries = [ - self._make_entry("ℹ️ TASK STARTED : Main task"), - self._make_entry("LLM tool call: microbot_sub: bad command"), - self._make_entry( - "Failed to parse microbot_sub command: invalid syntax", level="ERROR" - ), - ] - test_cases = build_test_cases(entries) - # Should not raise and should produce a test case - assert len(test_cases) >= 1 - - -# --------------------------------------------------------------------------- -# Unit tests — truncate_text (new function) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestTruncateText: - """Tests for the new truncate_text() helper.""" - - def test_short_text_unchanged(self): - """Text within the line limit is returned as-is.""" - text = "Line one\nLine two\nLine three" - assert truncate_text(text, max_lines=10) == text - - def test_long_text_truncated(self): - """Text exceeding max_lines is truncated with a notice.""" - lines = [f"line {i}" for i in range(250)] - text = "\n".join(lines) - result = truncate_text(text, max_lines=200) - assert "truncated" in result - assert "50 more lines" in result - - def test_exact_limit_not_truncated(self): - """Text at exactly max_lines is NOT truncated.""" - lines = [f"line {i}" for i in range(200)] - text = "\n".join(lines) - result = truncate_text(text, max_lines=200) - assert "truncated" not in result - - -# --------------------------------------------------------------------------- -# Unit tests — generate_setup_md (new function) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestGenerateSetupMd: - """Tests for the new generate_setup_md() function.""" - - def test_empty_setup_returns_empty_string(self): - """Returns empty string when no container_id and no tools_installed.""" - setup = SetupInfo() - assert generate_setup_md(setup) == "" - - def test_setup_with_container_id_returns_markdown(self): - """Returns markdown when container_id is set.""" - setup = SetupInfo(container_id="abc123", image="ubuntu:22.04", host_port="8080") - md = generate_setup_md(setup) - assert "abc123" in md - assert "ubuntu:22.04" in md - assert "8080" in md - - def test_setup_with_working_dir(self): - """Includes working directory in output.""" - setup = SetupInfo(container_id="ctn1", working_dir="/workspace") - md = generate_setup_md(setup) - assert "/workspace" in md - - def test_setup_with_volume_mappings(self): - """Includes volume mappings in output.""" - setup = SetupInfo(container_id="ctn1", volume_mappings=["/host:/container"]) - md = generate_setup_md(setup) - assert "/host:/container" in md - - def test_setup_with_tools_only(self): - """Returns markdown when only tools_installed is set (no container_id).""" - setup = SetupInfo(tools_installed=["git", "docker"]) - md = generate_setup_md(setup) - assert "git" in md - assert "docker" in md - - def test_setup_with_files_copied(self): - """Includes files_copied section when files were copied.""" - setup = SetupInfo( - container_id="ctn1", - files_copied=["repo.py → /workspace/repo.py"], - ) - md = generate_setup_md(setup) - assert "Files copied" in md - assert "repo.py" in md - - -# --------------------------------------------------------------------------- -# Unit tests — _agent_status_str (new function) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAgentStatusStr: - """Tests for the new _agent_status_str() helper.""" - - def test_completed_agent_returns_completed(self): - """Returns completed string for completed agent.""" - agent = Agent(completed=True) - assert "Completed" in _agent_status_str(agent) - - def test_max_iterations_agent_returns_failed(self): - """Returns failed string for agent that hit max iterations.""" - agent = Agent(max_iterations_reached=True) - result = _agent_status_str(agent) - assert "Failed" in result - - def test_unknown_agent_returns_unknown(self): - """Returns unknown string for agent with no terminal state.""" - agent = Agent() - assert "Unknown" in _agent_status_str(agent)