From 7ab705460ecb9ae079d5100c730eec93019b9641 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Thu, 30 Apr 2026 19:58:36 -0700 Subject: [PATCH 1/2] Vibe-xample --- examples/mlx.yaml | 9 ++ examples/mlx/example/eval.py | 133 +++++++++++++++++++++++++ examples/mlx/example/reference.py | 29 ++++++ examples/mlx/example/submission.py | 6 ++ examples/mlx/example/task.yml | 32 ++++++ instructions.txt | 73 ++++++++++++++ src/envs.txt | 3 + src/kernelbot/api/main.py | 2 +- src/kernelbot/cogs/admin_cog.py | 5 +- src/kernelbot/main.py | 3 +- src/libkernelbot/consts.py | 7 +- src/libkernelbot/launchers/__init__.py | 3 +- src/libkernelbot/launchers/local.py | 30 ++++++ src/libkernelbot/run_eval.py | 16 ++- 14 files changed, 341 insertions(+), 10 deletions(-) create mode 100644 examples/mlx.yaml create mode 100644 examples/mlx/example/eval.py create mode 100644 examples/mlx/example/reference.py create mode 100644 examples/mlx/example/submission.py create mode 100644 examples/mlx/example/task.yml create mode 100644 instructions.txt create mode 100644 src/envs.txt create mode 100644 src/libkernelbot/launchers/local.py diff --git a/examples/mlx.yaml b/examples/mlx.yaml new file mode 100644 index 00000000..3afe2c2d --- /dev/null +++ b/examples/mlx.yaml @@ -0,0 +1,9 @@ +name: MLX Problem Set +deadline: "2026-05-01 03:59" +description: "Test MLX" +problems: + - directory: mlx/example + name: example_mlx + deadline: "2026-05-01 03:59" + gpus: + - M4_Max diff --git a/examples/mlx/example/eval.py b/examples/mlx/example/eval.py new file mode 100644 index 00000000..06ca6937 --- /dev/null +++ b/examples/mlx/example/eval.py @@ -0,0 +1,133 @@ +import math +import os +import re +import sys +import time +from pathlib import Path + +import mlx.core as mx + +from reference import check_implementation, generate_input +from submission import custom_kernel + +WARMUP_ITERS = 10 +BENCH_ITERS = 100 + + +class PopcornOutput: + def __init__(self, fd: int): + self.file = os.fdopen(fd, "w") + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.file.close() + + def log(self, key, value): + print(f"{key}: {value}", file=self.file, flush=True) + + +def get_test_cases(file_name): + content = Path(file_name).read_text() + tests = [] + pattern = r"\s*([a-zA-Z_]+):\s*([a-zA-Z_]+|[+-]?[0-9]+)\s*" + for line in content.splitlines(): + if not line.strip(): + continue + case = {} + for part in line.split(";"): + m = re.fullmatch(pattern, part) + if not m: + print(f"invalid test case: '{line}'", file=sys.stderr) + sys.exit(113) + key, val = m[1], m[2] + try: + val = int(val) + except ValueError: + pass + case[key] = val + tests.append(case) + return tests + + +def run_testing(logger, tests): + passed = True + logger.log("test-count", len(tests)) + for idx, test in enumerate(tests): + logger.log(f"test.{idx}.spec", test) + data = generate_input(**test) + output = custom_kernel(data) + mx.eval(output) + error = check_implementation(data, output) + if error: + logger.log(f"test.{idx}.status", "fail") + logger.log(f"test.{idx}.error", error) + passed = False + else: + logger.log(f"test.{idx}.status", "pass") + logger.log("check", "pass" if passed else "fail") + return 0 if passed else 112 + + +def run_benchmarking(logger, tests): + # warmup + data = generate_input(**tests[0]) + for _ in range(WARMUP_ITERS): + mx.eval(custom_kernel(data)) + + passed = True + logger.log("benchmark-count", len(tests)) + for idx, test in enumerate(tests): + logger.log(f"benchmark.{idx}.spec", test) + data = generate_input(**test) + mx.eval(data) + + output = custom_kernel(data) + mx.eval(output) + error = check_implementation(data, output) + if error: + logger.log(f"benchmark.{idx}.status", "fail") + logger.log(f"benchmark.{idx}.error", error) + passed = False + continue + + durations = [] + for i in range(BENCH_ITERS): + start = time.perf_counter_ns() + mx.eval(custom_kernel(data)) + durations.append(time.perf_counter_ns() - start) + if i > 1: + avg = sum(durations) / len(durations) + std = math.sqrt(sum((d - avg) ** 2 for d in durations) / (len(durations) - 1)) + if std / math.sqrt(len(durations)) / avg < 0.01: + break + + avg = sum(durations) / len(durations) + logger.log(f"benchmark.{idx}.runs", len(durations)) + logger.log(f"benchmark.{idx}.mean", avg) + + logger.log("check", "pass" if passed else "fail") + return 0 if passed else 112 + + +def main(): + fd = os.getenv("POPCORN_FD") + if not fd: + return 111 + if len(sys.argv) < 3: + return 2 + + mode = sys.argv[1] + tests = get_test_cases(sys.argv[2]) + + with PopcornOutput(int(fd)) as logger: + if mode == "test": + return run_testing(logger, tests) + if mode in ("benchmark", "leaderboard"): + return run_benchmarking(logger, tests) + return 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/examples/mlx/example/reference.py b/examples/mlx/example/reference.py new file mode 100644 index 00000000..dc487dd4 --- /dev/null +++ b/examples/mlx/example/reference.py @@ -0,0 +1,29 @@ +import mlx.core as mx + + +ATOL = 1e-3 +RTOL = 1e-3 + + +def generate_input(size, seed=42): + mx.random.seed(seed) + A = mx.random.normal(shape=(size, size)).astype(mx.float16) + B = mx.random.normal(shape=(size, size)).astype(mx.float16) + mx.eval(A, B) + return A, B + + +def reference_kernel(data): + A, B = data + return A + B + + +def check_implementation(data, output): + expected = reference_kernel(data) + mx.eval(expected) + if output.shape != expected.shape: + return f"shape mismatch: expected {expected.shape}, got {output.shape}" + if not mx.allclose(output, expected, atol=ATOL, rtol=RTOL).item(): + max_diff = mx.max(mx.abs(output - expected)).item() + return f"mismatch found! max diff: {max_diff}" + return "" diff --git a/examples/mlx/example/submission.py b/examples/mlx/example/submission.py new file mode 100644 index 00000000..9573201c --- /dev/null +++ b/examples/mlx/example/submission.py @@ -0,0 +1,6 @@ +import mlx.core as mx + + +def custom_kernel(data): + A, B = data + return A + B diff --git a/examples/mlx/example/task.yml b/examples/mlx/example/task.yml new file mode 100644 index 00000000..1645eeda --- /dev/null +++ b/examples/mlx/example/task.yml @@ -0,0 +1,32 @@ +files: + - {"name": "submission.py", "source": "@SUBMISSION@"} + - {"name": "reference.py", "source": "reference.py"} + - {"name": "eval.py", "source": "eval.py"} + +lang: "py" + +description: | + Implement a float16 vector addition kernel using MLX. + + Input: tuple(mx.array, mx.array) with arrays of shape (N, N) and type mx.float16. + Output: mx.array of shape (N, N) and type mx.float16 + +config: + main: "eval.py" + +tests: + - {"size": 128, "seed": 5236} + - {"size": 256, "seed": 5531} + - {"size": 512, "seed": 9173} + +benchmarks: + - {"size": 1024, "seed": 31232} + - {"size": 4096, "seed": 2146} + - {"size": 16384, "seed": 54352} + +test_timeout: 180 +benchmark_timeout: 180 +ranked_timeout: 180 + +gpus: + - M4_Max diff --git a/instructions.txt b/instructions.txt new file mode 100644 index 00000000..74f50f36 --- /dev/null +++ b/instructions.txt @@ -0,0 +1,73 @@ +## Changes Summary + +### New files +- src/libkernelbot/launchers/local.py — LocalLauncher that runs submissions directly on the host machine via run_config(). Blocks CUDA submissions. + +### Modified files — Adding Metal/MLX support + +1. src/libkernelbot/consts.py + - Added MetalGPU enum (M4_Max) + - Registered it in _GPU_LOOKUP under "Local" runner + - Added M4_Max: None to GPU_TO_SM + +2. src/libkernelbot/launchers/__init__.py — Exports LocalLauncher + +3. src/kernelbot/main.py — Registers LocalLauncher() in create_backend() + +4. src/kernelbot/cogs/admin_cog.py — Added MetalGPU to Discord GPU dropdowns + +### Modified files — Bug fixes for macOS compatibility + +5. src/libkernelbot/run_eval.py — Three fixes in make_system_info(): + - Added MPS/Metal detection via torch.backends.mps + - Catch FileNotFoundError for nvidia-smi/rocm-smi (don't exist on macOS) + - Catch FileNotFoundError for /proc/cpuinfo (doesn't exist on macOS) + +6. src/kernelbot/api/main.py — Replace / with _ in auto-derived dev leaderboard names so nested directories don't break API routing + +--- + +## Manual Test Steps + +# 1. Start Postgres (if not already running) +brew services start postgresql@14 + +# 2. Create DB and run migrations +export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot" +createdb kernelbot # skip if already exists +cd /path/to/kernelbot +uv run yoyo apply --database "$DATABASE_URL" src/migrations/ + +# 3. Create test user +psql "$DATABASE_URL" -c "INSERT INTO leaderboard.user_info (id, user_name, cli_id, cli_valid) +VALUES ('999999', 'testuser', 'test-cli-id-123', true) +ON CONFLICT (id) DO UPDATE SET cli_id = 'test-cli-id-123', cli_valid = true;" + +# 4. Install mlx +uv pip install mlx + +# 5. Start the API server +cd src/kernelbot +export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot" +export ADMIN_TOKEN="your-admin-token" +export PROBLEM_DEV_DIR="/path/to/kernelbot/examples" +export GITHUB_TOKEN="dummy" +export GITHUB_REPO="dummy/dummy" +export DISABLE_SSL=1 +uv run python main.py --api-only + +# 6. (In another terminal) Create the dev leaderboard +curl -X POST "http://localhost:8000/admin/leaderboards" \ + -H "Authorization: Bearer your-admin-token" \ + -H "Content-Type: application/json" \ + -d '{"directory": "mlx/example"}' + +# 7. Submit a test +curl -X POST "http://localhost:8000/mlx_example-dev/M4_Max/test" \ + -H "X-Popcorn-Cli-Id: test-cli-id-123" \ + -F "file=@examples/mlx/example/submission.py" + +# 8. Submit a benchmark +curl -X POST "http://localhost:8000/mlx_example-dev/M4_Max/benchmark" \ + -H "X-Popcorn-Cli-Id: test-cli-id-123" \ + -F "file=@examples/mlx/example/submission.py" diff --git a/src/envs.txt b/src/envs.txt new file mode 100644 index 00000000..2da48879 --- /dev/null +++ b/src/envs.txt @@ -0,0 +1,3 @@ +export DATABASE_URL="postgresql://$(whoami)@localhost:5432/kernelbot" +export ADMIN_TOKEN="your-admin-token" +export PROBLEM_DEV_DIR="/Users/jackkhuu/Desktop/oss/reference-kernels/problems" diff --git a/src/kernelbot/api/main.py b/src/kernelbot/api/main.py index 3417efa1..05b1847d 100644 --- a/src/kernelbot/api/main.py +++ b/src/kernelbot/api/main.py @@ -644,7 +644,7 @@ async def create_dev_leaderboard( definition = make_task_definition(directory_path) # Auto-derive name and deadline like admin_cog.leaderboard_create_local - leaderboard_name = f"{directory}-dev" + leaderboard_name = f"{directory.replace('/', '_')}-dev" deadline_value = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=365) # GPUs must be specified in task.yml diff --git a/src/kernelbot/cogs/admin_cog.py b/src/kernelbot/cogs/admin_cog.py index 8b21747a..e39f3c2b 100644 --- a/src/kernelbot/cogs/admin_cog.py +++ b/src/kernelbot/cogs/admin_cog.py @@ -19,7 +19,7 @@ ) from kernelbot.env import env from kernelbot.ui.misc import ConfirmationView, DeleteConfirmationModal, GPUSelectionView -from libkernelbot.consts import GitHubGPU, ModalGPU +from libkernelbot.consts import GitHubGPU, MetalGPU, ModalGPU from libkernelbot.leaderboard_db import LeaderboardDoesNotExist, LeaderboardItem, SubmissionItem from libkernelbot.task import LeaderboardDefinition, make_task_definition from libkernelbot.utils import ( @@ -208,6 +208,7 @@ async def unban_user(self, interaction: discord.Interaction, user_id: str): @app_commands.choices( gpu=[app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in GitHubGPU] + [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in ModalGPU] + + [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in MetalGPU] ) @with_error_handling async def leaderboard_create_local( @@ -386,7 +387,7 @@ async def create_leaderboard_in_db( if gpu is None: # Ask the user to select GPUs view = GPUSelectionView( - [gpu.name for gpu in GitHubGPU] + [gpu.name for gpu in ModalGPU] + [gpu.name for gpu in GitHubGPU] + [gpu.name for gpu in ModalGPU] + [gpu.name for gpu in MetalGPU] ) await send_discord_message( diff --git a/src/kernelbot/main.py b/src/kernelbot/main.py index 517fad02..f8c61eec 100644 --- a/src/kernelbot/main.py +++ b/src/kernelbot/main.py @@ -16,7 +16,7 @@ from libkernelbot import consts from libkernelbot.backend import KernelBackend from libkernelbot.background_submission_manager import BackgroundSubmissionManager -from libkernelbot.launchers import GitHubLauncher, ModalLauncher +from libkernelbot.launchers import GitHubLauncher, LocalLauncher, ModalLauncher from libkernelbot.utils import setup_logging logger = setup_logging(__name__) @@ -29,6 +29,7 @@ def create_backend(debug_mode: bool = False) -> KernelBackend: backend.register_launcher( GitHubLauncher(env.GITHUB_REPO, env.GITHUB_TOKEN, env.GITHUB_WORKFLOW_BRANCH) ) + backend.register_launcher(LocalLauncher()) return backend diff --git a/src/libkernelbot/consts.py b/src/libkernelbot/consts.py index 55113e76..7faa7cee 100644 --- a/src/libkernelbot/consts.py +++ b/src/libkernelbot/consts.py @@ -35,6 +35,10 @@ class ModalGPU(Enum): L4x4 = "L4x4" +class MetalGPU(Enum): + M4_Max = "M4_Max" + + @dataclasses.dataclass class GPU: name: str @@ -52,7 +56,7 @@ def _make_gpu_lookup(runner_map: dict[str, Type[Enum]]): return lookup -_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU}) +_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU, "Local": MetalGPU}) def get_gpu_by_name(name: str) -> GPU: @@ -132,6 +136,7 @@ class RankCriterion(Enum): "MI300x8": None, "MI250": None, "MI355X": None, + "M4_Max": None, } diff --git a/src/libkernelbot/launchers/__init__.py b/src/libkernelbot/launchers/__init__.py index df47476f..799fb2a8 100644 --- a/src/libkernelbot/launchers/__init__.py +++ b/src/libkernelbot/launchers/__init__.py @@ -1,5 +1,6 @@ from .github import GitHubLauncher from .launcher import Launcher +from .local import LocalLauncher from .modal import ModalLauncher -__all__ = [Launcher, GitHubLauncher, ModalLauncher] +__all__ = [Launcher, GitHubLauncher, LocalLauncher, ModalLauncher] diff --git a/src/libkernelbot/launchers/local.py b/src/libkernelbot/launchers/local.py new file mode 100644 index 00000000..e002ef1d --- /dev/null +++ b/src/libkernelbot/launchers/local.py @@ -0,0 +1,30 @@ +import asyncio + +from libkernelbot.consts import GPU, MetalGPU +from libkernelbot.report import RunProgressReporter +from libkernelbot.run_eval import FullResult, run_config +from libkernelbot.utils import setup_logging + +from .launcher import Launcher + +logger = setup_logging(__name__) + + +class LocalLauncher(Launcher): + def __init__(self): + super().__init__("Local", gpus=MetalGPU) + + async def run_submission( + self, config: dict, gpu_type: GPU, status: RunProgressReporter + ) -> FullResult: + if config["lang"] == "cu": + raise NotImplementedError("CUDA is not supported on Metal GPUs") + + logger.info(f"Starting local run for {gpu_type.name}") + await status.push(f"⏳ Running locally on {gpu_type.name}...") + + loop = asyncio.get_event_loop() + result = await loop.run_in_executor(None, lambda: run_config(config)) + + await status.update(f"✅ Local run on {gpu_type.name} complete") + return result diff --git a/src/libkernelbot/run_eval.py b/src/libkernelbot/run_eval.py index aec59f95..48104a25 100644 --- a/src/libkernelbot/run_eval.py +++ b/src/libkernelbot/run_eval.py @@ -590,6 +590,15 @@ def make_system_info() -> SystemInfo: # noqa: C901 info.runtime = "ROCm" elif torch.version.cuda is not None: info.runtime = "CUDA" + elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + info.runtime = "MPS" + info.device_count = 1 + try: + info.gpu = subprocess.check_output( + ["sysctl", "-n", "machdep.cpu.brand_string"], encoding="utf-8" + ).strip() + except (subprocess.CalledProcessError, FileNotFoundError): + info.gpu = "Apple Silicon" except ImportError: # get GPU info manually try: @@ -598,7 +607,7 @@ def make_system_info() -> SystemInfo: # noqa: C901 ) info.device_count = info.gpu.count("\n") info.runtime = "CUDA" - except subprocess.CalledProcessError: + except (subprocess.CalledProcessError, FileNotFoundError): # try again for HIP try: rocm_info = json.loads( @@ -611,7 +620,7 @@ def make_system_info() -> SystemInfo: # noqa: C901 info.device_count = len(rocm_info) info.runtime = "ROCm" - except subprocess.CalledProcessError: + except (subprocess.CalledProcessError, FileNotFoundError): # OK, no GPU info available pass @@ -628,8 +637,7 @@ def make_system_info() -> SystemInfo: # noqa: C901 # ¯\_(ツ)_/¯ info.cpu = cpu_info_dict.get("vendor_id", "") - except PermissionError: - # nothing we can do here; we're not getting CPU info + except (PermissionError, FileNotFoundError): pass import platform From a85a79bd0e53d9424337f5b5b5fc5f0691b89d45 Mon Sep 17 00:00:00 2001 From: Jack-Khuu Date: Fri, 1 May 2026 15:26:16 -0700 Subject: [PATCH 2/2] untested selfhost as runner --- src/kernelbot/main.py | 12 ++++++++---- src/libkernelbot/consts.py | 5 +++++ src/libkernelbot/launchers/github.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/kernelbot/main.py b/src/kernelbot/main.py index f8c61eec..c5a98580 100644 --- a/src/kernelbot/main.py +++ b/src/kernelbot/main.py @@ -26,10 +26,14 @@ def create_backend(debug_mode: bool = False) -> KernelBackend: """Create and configure a KernelBackend with launchers.""" backend = KernelBackend(env=env, debug_mode=debug_mode) backend.register_launcher(ModalLauncher(consts.MODAL_CUDA_INCLUDE_DIRS)) - backend.register_launcher( - GitHubLauncher(env.GITHUB_REPO, env.GITHUB_TOKEN, env.GITHUB_WORKFLOW_BRANCH) - ) - backend.register_launcher(LocalLauncher()) + github_launcher = GitHubLauncher(env.GITHUB_REPO, env.GITHUB_TOKEN, env.GITHUB_WORKFLOW_BRANCH) + backend.register_launcher(github_launcher) + + if os.environ.get("METAL_LAUNCHER") == "arc": + for gpu in consts.MetalGPU: + backend.launcher_map[gpu.value] = github_launcher + else: + backend.register_launcher(LocalLauncher()) return backend diff --git a/src/libkernelbot/consts.py b/src/libkernelbot/consts.py index 7faa7cee..7dd1ea4c 100644 --- a/src/libkernelbot/consts.py +++ b/src/libkernelbot/consts.py @@ -172,5 +172,10 @@ class RankCriterion(Enum): numpy """ +METAL_REQUIREMENTS = """ +mlx +numpy +""" + # A buffer for timeouts to account for github setup time TIMEOUT_BUFFER_MINUTES = 2 diff --git a/src/libkernelbot/launchers/github.py b/src/libkernelbot/launchers/github.py index 289f7c66..404cf7ea 100644 --- a/src/libkernelbot/launchers/github.py +++ b/src/libkernelbot/launchers/github.py @@ -24,9 +24,11 @@ AMD_REQUIREMENTS, DEFAULT_GITHUB_TIMEOUT_MINUTES, GPU, + METAL_REQUIREMENTS, NVIDIA_REQUIREMENTS, TIMEOUT_BUFFER_MINUTES, GitHubGPU, + MetalGPU, SubmissionMode, ) from libkernelbot.report import RunProgressReporter @@ -115,6 +117,11 @@ async def run_submission( # noqa: C901 runner_name = None gpu_vendor = "NVIDIA" requirements = NVIDIA_REQUIREMENTS + elif gpu_type.value in [g.value for g in MetalGPU]: + selected_workflow = "metal_workflow.yml" + runner_name = "arc-metal-runner-set" + gpu_vendor = "Apple" + requirements = METAL_REQUIREMENTS else: raise ValueError(f"Invalid GPU type: {gpu_type.value}") @@ -122,6 +129,8 @@ async def run_submission( # noqa: C901 if lang == "cu" and gpu_vendor == "AMD": # TODO implement HIP raise NotImplementedError("Cannot use CUDA runs with AMD GPUs") + if lang == "cu" and gpu_vendor == "Apple": + raise NotImplementedError("CUDA is not supported on Metal GPUs") lang_name = {"py": "Python", "cu": "CUDA"}[lang] @@ -311,6 +320,8 @@ async def trigger(self, inputs: dict) -> bool: # noqa: C901 expected_run_name = f"NVIDIA Job - {run_id}" elif self.workflow_file == "helion_workflow.yml": expected_run_name = f"Helion Job - {run_id}" + elif self.workflow_file == "metal_workflow.yml": + expected_run_name = f"Metal Job - {run_id}" else: raise ValueError(f"Unknown workflow file: {self.workflow_file}")