Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
# Runtime artifacts
.supervisor.log
.supervisor.lock
.supervisor_snapshot/
.logger_runs/
.log/
.mle_log.jsonl
gym_log.json

# Python/editor cruft
__pycache__/
*.egg-info/
dist/
build/
.DS_Store
*.pyc
.DS_Store

# gym-environment
.claudeignore
.copilotignore
.cursorignore
.cursorrules
.geminiignore
.github
.gitignore
AGENTS.md
CLAUDE.md
GEMINI.md
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ aicodinggym configure --user-id USER_ID [--workspace-dir DIR]

#### `aicodinggym swe fetch PROBLEM_ID`

Fetch a problem and clone the repo locally.
Fetch a problem and clone the repo locally. After a successful `swe fetch`, `mle download`, or `cr fetch`, the CLI downloads agent instruction files from [AICodingGym/gym-environment](https://github.com/AICodingGym/gym-environment) via the GitHub Contents API. By default it uses the **`test` branch**. Override with environment variables:

- `AICODINGGYM_GYM_ENV_REPO` — `owner/repo` (default: `AICodingGym/gym-environment`)
- `AICODINGGYM_GYM_ENV_REF` — branch, tag, or commit SHA for `?ref=` (default: `test` when unset)

```
aicodinggym swe fetch PROBLEM_ID [--user-id ID] [--workspace-dir DIR]
Expand Down
45 changes: 43 additions & 2 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,44 @@
"""AI Coding Gym CLI."""
"""AI Coding Gym CLI.

__version__ = "0.3.0"
Imports are lazy so tooling that loads this file without package context
(e.g. some pytest collection paths) does not fail on relative imports.
"""

from __future__ import annotations

import importlib
import importlib.metadata
from typing import TYPE_CHECKING, Any

try:
__version__ = importlib.metadata.version("aicodinggym-cli")
except importlib.metadata.PackageNotFoundError: # pragma: no cover - dev without install
__version__ = "0.0.0"

__all__ = [
"__version__",
"ExperimentLog",
"LogEntry",
"capture_mle_provenance",
"log_entry",
"print_summary",
"set_log_path",
"gym_logger",
]


def __getattr__(name: str) -> Any:
if name in ("ExperimentLog", "LogEntry", "capture_mle_provenance"):
m = importlib.import_module("aicodinggym.experiment_log")
return getattr(m, name)
if name in ("log_entry", "print_summary", "set_log_path"):
m = importlib.import_module("aicodinggym.gym_logger")
return getattr(m, name)
if name == "gym_logger":
return importlib.import_module("aicodinggym.gym_logger")
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


if TYPE_CHECKING:
from .experiment_log import ExperimentLog, LogEntry, capture_mle_provenance
from .gym_logger import log_entry, print_summary, set_log_path
85 changes: 78 additions & 7 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,21 @@ def fetch_problem(user_id: str, problem_id: str) -> dict:


def submit_notification(problem_id: str, user_id: str, commit_hash: str,
branch: str, commit_message: str, timestamp: str) -> dict:
"""Notify backend of a submission."""
branch: str, commit_message: str, timestamp: str,
tool: str | None = None,
tool_version: str | None = None,
ai_model: str | None = None) -> dict:
"""Notify backend of a SWE submission, optionally attributing the tool/model used."""
return _post("submissions", {
"problem_id": problem_id,
"user_id": user_id,
"commit_hash": commit_hash,
"branch": branch,
"commit_message": commit_message,
"timestamp": timestamp,
"tool": tool,
"tool_version": tool_version,
"ai_model": ai_model,
})


Expand All @@ -93,18 +99,73 @@ def fetch_pr(user_id: str, problem_id: str) -> dict:
return _post("code-review-fetch", {"user_id": user_id, "problem_id": problem_id})


def cr_submit_review(user_id: str, problem_id: str, review: str) -> dict:
def cr_submit_review(user_id: str, problem_id: str, review: str,
tool: str | None = None,
tool_version: str | None = None,
ai_model: str | None = None) -> dict:
"""Submit a code review."""
return _post("code-review-submit", {
"user_id": user_id,
"problem_id": problem_id,
"review": review,
"tool": tool,
"tool_version": tool_version,
"ai_model": ai_model,
})


def notify_mle_progress(user_id: str, problem_slug: str, best_percentile: float,
tool: str | None = None,
tool_version: str | None = None,
ai_model: str | None = None) -> dict:
"""After an MLE-bench grade is returned, log tool/model attribution and
bestPercentile against the Prisma UserProgress row so the leaderboard
aggregator can pick it up."""
payload = {
"problemSlug": problem_slug,
"status": "solved",
"bestPercentile": best_percentile,
"tool": tool,
"tool_version": tool_version,
"ai_model": ai_model,
}
return _post(f"users/{user_id}/progress", payload)


def mlebench_download_info(user_id: str, competition_id: str, dest_path: str) -> None:
"""Download dataset for an MLE-bench competition directly to dest_path."""
resp = _get(f"competitions/{competition_id}/download", stream=True)
"""Download dataset for an MLE-bench competition directly to dest_path.

Uses a long read timeout: large zips can take many minutes between chunks
over slow links; the default 30s read timeout would abort mid-stream.
"""
read_s = int(os.environ.get("AICODINGGYM_DOWNLOAD_READ_TIMEOUT", "0"))
if read_s <= 0:
read_s = 7200 # seconds between reads; large zips need headroom
url = f"{API_BASE}/competitions/{competition_id}/download"
try:
resp = requests.get(
url,
stream=True,
timeout=(120, read_s),
)
resp.raise_for_status()
except requests.ConnectionError:
raise APIError(
f"Cannot connect to {API_BASE}.\n"
"Check your internet connection and try again."
)
except requests.Timeout:
raise APIError(f"Download from {url} timed out.")
except requests.HTTPError as e:
body = ""
try:
body = e.response.json().get("detail", e.response.text)
except Exception:
body = e.response.text
raise APIError(f"API error (HTTP {e.response.status_code}): {body}")
except requests.RequestException as e:
raise APIError(f"Request failed: {e}")

with open(dest_path, "wb") as f:
for chunk in resp.iter_content(chunk_size=8192):
f.write(chunk)
Expand All @@ -122,15 +183,25 @@ def mlebench_download_file(url: str, dest_path: str, timeout: int = 300) -> None
raise APIError(f"Download failed: {e}")


def mlebench_submit_csv(user_id: str, competition_id: str, csv_path: str) -> dict:
def mlebench_submit_csv(user_id: str, competition_id: str, csv_path: str,
tool: str | None = None,
tool_version: str | None = None,
ai_model: str | None = None) -> dict:
"""Upload a prediction CSV for an MLE-bench competition."""
try:
csv_name = Path(csv_path).name
with open(csv_path, "rb") as f:
compressed = gzip.compress(f.read())
form = {
"user_id": user_id,
"competition_id": competition_id,
"tool": tool or "",
"tool_version": tool_version or "",
"ai_model": ai_model or "",
}
resp = requests.post(
f"{API_BASE}/competitions/{competition_id}/submit",
data={"user_id": user_id, "competition_id": competition_id},
data=form,
files={"file": (csv_name + ".gz", compressed, "application/gzip")},
timeout=120,
)
Expand Down
Loading