diff --git a/demos/integration_with_OpenHands/.env.example b/demos/integration_with_OpenHands/.env.example new file mode 100644 index 0000000000..5570f03116 --- /dev/null +++ b/demos/integration_with_OpenHands/.env.example @@ -0,0 +1,5 @@ +# Hugging Face token for gated models +HF_TOKEN= + +# Optional: Custom model (overrides default) +MODEL_ID= diff --git a/demos/integration_with_OpenHands/.gitignore b/demos/integration_with_OpenHands/.gitignore new file mode 100644 index 0000000000..e10ffa5042 --- /dev/null +++ b/demos/integration_with_OpenHands/.gitignore @@ -0,0 +1,10 @@ +# Environment variables (local overrides) +.env + +# Screenshots (generated locally) +screenshots/*.png +screenshots/*.jpg +screenshots/*.jpeg + +# Log files +*.log diff --git a/demos/integration_with_OpenHands/.openhands/.jwt_secret b/demos/integration_with_OpenHands/.openhands/.jwt_secret new file mode 100644 index 0000000000..d64a44f117 --- /dev/null +++ b/demos/integration_with_OpenHands/.openhands/.jwt_secret @@ -0,0 +1 @@ +9bd25fb8eb154921bfda8900088fe83b \ No newline at end of file diff --git a/demos/integration_with_OpenHands/ADVANCED_DEPLOYMENT.md b/demos/integration_with_OpenHands/ADVANCED_DEPLOYMENT.md new file mode 100644 index 0000000000..4c12c390aa --- /dev/null +++ b/demos/integration_with_OpenHands/ADVANCED_DEPLOYMENT.md @@ -0,0 +1,453 @@ +# Advanced Deployment Guide + +This document contains detailed deployment and implementation reference material for the OpenHands + OVMS integration. For the Quick Start guide, see [README.md](README.md). + +--- + +## Request Flow + +1. User creates an agent task in the OpenHands web UI +2. OpenHands sends `POST /v3/chat/completions` requests to OVMS +3. OVMS routes requests through the MediaPipe LLM graph +4. OpenVINO inference engine processes the model +5. OVMS returns the completion (possibly with tool calls) +6. OpenHands parses the response and continues the agent loop + +--- + +## Why OpenHands Requires Additional Configuration + +Unlike simple chat UIs, OpenHands has specific requirements: + +- **Model prefix:** OpenHands expects `openai/` format in `LLM_MODEL` +- **API key placeholder:** A non-empty `LLM_API_KEY` is required even though OVMS doesn't authenticate +- **Stable networking:** Container-to-container communication on a shared Docker network +- **Docker socket access:** OpenHands creates runtime sandbox containers for code execution +- **Resource limits:** Sandbox memory limits prevent runaway agent processes + +--- + +## OVMS `--source_model` Workflow + +OVMS provides native model retrieval and preparation through the `--source_model` parameter: + +```bash +docker run --rm -v ${HOME}/ovms-openhands/models:/models \ + openvino/model_server:latest \ + --source_model OpenVINO/Qwen3-8b-int8-ov \ + --model_repository_path /models \ + --model_name qwen3-8b-int8-ov \ + --task text_generation \ + --target_device CPU +``` + +This command downloads the model from Hugging Face, converts to OpenVINO IR format if needed, generates the MediaPipe LLM graph, and stores artifacts in the specified model repository. + +--- + +## Model Workspace Layout + +After running the `--source_model` workflow, the model directory contains: + +```text +${HOME}/ovms-openhands/models/ +└── qwen3-8b-int8-ov/ + ├── openvino_model.xml # OpenVINO model structure + ├── openvino_model.bin # Model weights + ├── graph.pbtxt # MediaPipe LLM graph configuration + └── ....... +``` + +This external storage keeps the Git repository lightweight and allows model reuse across OVMS deployments. + +--- + +## Manual Deployment Workflow + +You can deploy using Docker commands directly without the helper scripts. This approach is useful for debugging and customization. + +**Repository not required:** These commands can be executed from any directory on a Linux system with Docker installed. The OpenHands state directory (`.openhands`) will be created relative to your current working directory. + +### Step 1: Set environment variables + +```bash +# Model configuration +export MODEL_ID="OpenVINO/Qwen3-8b-int8-ov" +export LOCAL_NAME="qwen3-8b-int8-ov" +export TARGET_DEVICE="CPU" +export TOOL_PARSER="hermes3" +export MODEL_CACHE_DIR="${HOME}/ovms-openhands/models" +export HF_TOKEN="${HF_TOKEN:-}" +``` + +### Step 2: Create the model cache directory + +```bash +mkdir -p "$MODEL_CACHE_DIR" +``` + +> **Note:** OVMS runs as a non-root user inside the container. The mounted model cache directory must be writable by the OVMS container user. If OVMS fails during startup with permission errors when creating model directories, verify permissions on the model cache directory. + +### Step 3: Deploy OVMS + +```bash +# Create the Docker network +docker network create ovms-net 2>/dev/null || true + +# Run OVMS container +docker run -d \ + --name ovms-llm \ + --network ovms-net \ + --publish 8000:8000 \ + --publish 9000:9000 \ + --device /dev/dri:/dev/dri \ + --volume "$MODEL_CACHE_DIR:/models:rw" \ + --env HF_TOKEN="${HF_TOKEN:-}" \ + --restart unless-stopped \ + openvino/model_server:latest \ + --model_repository_path /models \ + --source_model "$MODEL_ID" \ + --model_name "$LOCAL_NAME" \ + --task text_generation \ + --target_device "$TARGET_DEVICE" \ + --port 9000 \ + --rest_port 8000 \ + --tool_parser "$TOOL_PARSER" +``` + +This command downloads the model from Hugging Face (if not cached), converts to OpenVINO IR format (if needed), generates the MediaPipe LLM graph, and starts the OVMS server with the OpenAI-compatible REST API. + +### Step 4: Deploy OpenHands + +```bash +# Run OpenHands container +docker run -d \ + --name openhands \ + --network ovms-net \ + --publish 3000:3000 \ + --add-host host.docker.internal:host-gateway \ + --volume /var/run/docker.sock:/var/run/docker.sock \ + --volume "$(pwd)/.openhands:/.openhands" \ + --env LLM_BASE_URL="http://ovms-llm:8000/v3" \ + --env LLM_MODEL="openai/${LOCAL_NAME}" \ + --env LLM_API_KEY="unused" \ + --env LLM_TEMPERATURE="0.0" \ + --env LLM_MAX_OUTPUT_TOKENS="500" \ + --env LLM_MAX_INPUT_TOKENS="4096" \ + --env LLM_TIMEOUT="120000" \ + --env SANDBOX_DOCKER_ARGS="--memory=1536m --memory-swap=1536m" \ + --restart unless-stopped \ + ghcr.io/all-hands-ai/openhands:latest +``` + +The `--add-host` mapping allows the OpenHands container to reach host services if needed. It is optional for basic OVMS communication. + +### Step 5: Wait for OVMS to be ready + +OVMS needs time to download and initialize the model. Check the status: + +```bash +# Poll until model is AVAILABLE +curl -sf http://localhost:8000/v1/config | grep AVAILABLE +``` + +Or check container logs: + +```bash +docker logs ovms-llm +``` + +--- + +## Understanding `docker-compose.yml` + +The `docker-compose.yml` file documents the service architecture. You can achieve the same result with the manual Docker commands above. + +### Service: ovms-llm + +```yaml +ovms-llm: + image: openvino/model_server:latest + container_name: ovms-llm +``` + +The `container_name` provides a stable hostname for OpenHands to reach OVMS. + +**Device mapping:** +```yaml +devices: + - /dev/dri:/dev/dri +``` + +Provides GPU device access. For CPU-only deployments, this can be removed. + +**Port publishing:** +```yaml +ports: + - "8000:8000" # REST API + - "9000:9000" # gRPC API +``` + +Exposes the OpenAI-compatible REST API (8000) and gRPC API (9000). + +**Volume mount:** +```yaml +volumes: + - ${MODEL_CACHE_DIR:-./docker/models}:/models:rw +``` + +Mounts the model cache directory where OVMS materializes models via `--source_model`. The script sets `MODEL_CACHE_DIR` to `${HOME}/ovms-openhands/models` by default; the compose file fallback is `./docker/models` if the variable is unset. + +**Environment:** +```yaml +environment: + HF_TOKEN: ${HF_TOKEN:-} +``` + +Passes the Hugging Face token for gated models. + +**Command:** +```yaml +command: + - --model_repository_path /models + - --source_model ${MODEL_ID} + - --model_name ${LOCAL_NAME} + - --task text_generation + - --target_device ${TARGET_DEVICE} + - --port "9000" + - --rest_port "8000" + - --tool_parser ${TOOL_PARSER} +``` + +Configures OVMS to use `/models` as the repository, download from Hugging Face, serve under the local name, use the text-generation pipeline, run on the specified device, and enable tool parsing. + +### Service: openhands + +```yaml +openhands: + image: ghcr.io/all-hands-ai/openhands:latest + container_name: openhands + depends_on: + - ovms-llm +``` + +`depends_on` ensures OVMS starts first (though it does not wait for health). + +**Port publishing:** +```yaml +ports: + - "3000:3000" # Web UI +``` + +**Volume mounts:** +```yaml +volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./.openhands:/.openhands +``` + +Docker socket allows OpenHands to create runtime sandbox containers. The `.openhands` directory persists settings locally. + +**Environment variables:** +```yaml +environment: + LLM_BASE_URL: http://ovms-llm:8000/v3 + LLM_MODEL: openai/${LOCAL_NAME} + LLM_API_KEY: unused +``` + +Points OpenHands to the OVMS endpoint. Note the `openai/` prefix required by OpenHands. + +**Extra hosts:** +```yaml +extra_hosts: + - host.docker.internal:host-gateway +``` + +Allows the OpenHands container to reach the host machine. Optional for basic OVMS communication but may be needed for certain agent workflows. + +**Generation parameters:** +```yaml +LLM_TEMPERATURE: "0.0" +LLM_MAX_OUTPUT_TOKENS: "500" +LLM_MAX_INPUT_TOKENS: "4096" +LLM_TIMEOUT: "120000" +``` + +Temperature set to 0.0 for deterministic responses. Conservative output limits prevent runaway loops. Increased timeout accommodates CPU inference latency. + +**Sandbox limits:** +```yaml +SANDBOX_DOCKER_ARGS: --memory=1536m --memory-swap=1536m +``` + +Limits memory for OpenHands runtime sandboxes. Adjust based on available host RAM. + +### Network + +```yaml +networks: + ovms-net: + name: ovms-net + driver: bridge +``` + +Creates a shared Docker network for container-to-container communication. OpenHands reaches OVMS via `http://ovms-llm:8000`. + +--- + +## Understanding `deploy_model_ovms.sh` + +The `scripts/deploy_model_ovms.sh` script automates the manual workflow documented above. All steps it performs can be done manually. + +### What the Script Does + +**1. Validates prerequisites** + +Checks for Docker and docker compose availability, warns if `HF_TOKEN` is not set for gated models, and validates the target device (`CPU` or `GPU`). + +**2. Normalizes the model name** + +```bash +# "OpenVINO/Qwen3-8b-int8-ov" → "qwen3-8b-int8-ov" +basename "$MODEL_ID" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' +``` + +**3. Resolves the tool parser** + +Maps model family to parser (e.g., Qwen → `hermes3`, Llama3 → `llama3`, Mistral → `mistral`). + +**4. Creates the model cache directory** + +```bash +mkdir -p "$MODEL_CACHE_DIR" # Defaults to ${HOME}/ovms-openhands/models +``` + +**5. Exports runtime configuration** + +```bash +export MODEL_ID LOCAL_NAME TARGET_DEVICE TOOL_PARSER MODEL_CACHE_DIR HF_TOKEN +``` + +These variables are consumed by docker-compose.yml via environment variable substitution. + +**6. Deploys OVMS and OpenHands** + +```bash +docker compose -f "$COMPOSE_FILE" up -d +``` + +**7. Waits for health** + +Polls `http://localhost:8000/v1/config` until the model reports `AVAILABLE` (up to 3 minutes). + +**8. Prints the manual equivalent** + +Shows the manual Docker commands equivalent to what the script just performed. + +### Script Usage + +```bash +./scripts/deploy_model_ovms.sh [OPTIONS] +``` + +**Arguments:** +- `model_id`: Hugging Face model ID (e.g., `OpenVINO/Qwen3-8b-int8-ov`) + +**Options:** +- `--device DEVICE`: Target device (`CPU` or `GPU`, default: `CPU`) +- `--parser PARSER`: Override the automatically resolved tool parser +- `--cache-dir DIR`: Model cache directory (default: `${HOME}/ovms-openhands/models`) +- `--compose-file FILE`: Path to docker-compose.yml +- `--skip-wait`: Skip health check and return immediately after deploy + +**Environment variable overrides:** +- `HF_TOKEN`: Hugging Face token for gated models +- `LOCAL_NAME`: Override the auto-normalized model name +- `MODEL_CACHE_DIR`: Override model cache directory +- `TARGET_DEVICE`: Override target device +- `TOOL_PARSER`: Override tool parser + +--- + +## Debugging OVMS + +### Viewing OVMS Logs + +View OVMS logs from the Docker container: + +```bash +docker logs ovms-llm +``` + +Follow logs in real time while reproducing an issue: + +```bash +docker logs -f ovms-llm +``` + +### Running OVMS with TRACE Logging + +OVMS supports configurable logging levels. The following command demonstrates enabling TRACE logging for a standalone OVMS deployment: + +```bash +ovms \ + --rest_port 9001 \ + --model_repository_path ./models \ + --source_model OpenVINO/Qwen3-8b-int8-ov \ + --task text_generation \ + --target_device CPU \ + --model_name qwen3-8b-int8-ov \ + --tool_parser hermes3 +``` + +Enable TRACE logging for detailed diagnostics: + +```bash +ovms \ + --rest_port 9001 \ + --model_repository_path ./models \ + --source_model OpenVINO/Qwen3-8b-int8-ov \ + --task text_generation \ + --target_device CPU \ + --model_name qwen3-8b-int8-ov \ + --tool_parser hermes3 \ + --log_level TRACE +``` + +TRACE logging provides detailed information helpful for diagnosing issues related to model loading, request processing, inference, and tool-calling behavior. + +### Enabling TRACE Logging with Docker Compose + +Enable the same logging level by modifying the OVMS command in `docker-compose.yml`. Add the `--log_level` argument: + +```yaml +command: + - --model_repository_path /models + - --source_model ${MODEL_ID} + - --model_name ${LOCAL_NAME} + - --task text_generation + - --target_device ${TARGET_DEVICE} + - --port "9000" + - --rest_port "8000" + - --tool_parser ${TOOL_PARSER} + - --log_level TRACE +``` + +### Restarting After Configuration Changes + +Restart the OVMS container for configuration changes to take effect: + +```bash +docker compose restart ovms-llm +``` + +### Viewing TRACE Logs + +Follow the logs after restarting: + +```bash +docker logs -f ovms-llm +``` + +Observe detailed OVMS logs while reproducing an issue. diff --git a/demos/integration_with_OpenHands/INTEGRATION_PLAN.md b/demos/integration_with_OpenHands/INTEGRATION_PLAN.md new file mode 100644 index 0000000000..d0b2cef352 --- /dev/null +++ b/demos/integration_with_OpenHands/INTEGRATION_PLAN.md @@ -0,0 +1,506 @@ +# OpenHands Integration Plan + +## High-Level Goal + +The goal of this integration is to provide an upstream-quality OVMS demo showing +how to run OpenHands with an OpenVINO Model Server backend. The final demo should +let a user: + +- deploy a suitable text-generation model with OVMS; +- connect OpenHands to the OVMS OpenAI-compatible REST endpoint; +- verify the connection with a direct API request and an OpenHands agent task; +- understand the model, context-window, tool-calling, networking, and resource + requirements that affect agent behavior; and +- reproduce the setup primarily through clear documentation. + +## Core Philosophy: Documentation-First + +This demo is **documentation-first**. The README.md is the authoritative source +of truth and must contain all instructions necessary for a user to understand and +reproduce the setup manually. + +**Helper artifacts are provided for convenience only, not as dependencies:** + +- **docker-compose.yml** — A reference configuration showing the service + architecture. The README explains each section and maps it to manual Docker + commands so users understand every step independently of Compose. + +- **scripts/deploy_model_ovms.sh** — A convenience helper that automates + repetitive tasks (model selection, tool parser configuration, health checks). + The README documents exactly what the script does internally. + +**Long-term direction:** These helper artifacts may be removed if OVMS +maintainers prefer a pure documentation-based demo. The README must remain +complete and useful without them, but the artifacts are provided as helpful +references for users who prefer them. + +**Model storage:** Models, OpenVINO IR files, and downloaded artifacts are stored +externally to the Git repository. The recommended workspace is: + +```text +${HOME}/ovms-openhands/ +└── models/ + └── / + ├── openvino_model.xml + ├── openvino_model.bin + └── graph.pbtxt +``` + +The repository should never contain model files. When using the OVMS `--source_model` +workflow, OVMS handles model retrieval and graph generation automatically. The +README explains where files are created and how they are mounted. + +## Architecture of `integration_with_OpenWebUI` + +The OpenWebUI demo is primarily a documentation integration rather than a +custom software component. Its directory contains one comprehensive `README.md` +and screenshots demonstrating configuration and results. + +Its core architecture is: + +```text +User + | + v +Open WebUI + | + | OpenAI-compatible HTTP requests + | Base URL: http://localhost:8000/v3 + v +OpenVINO Model Server + | + v +OpenVINO-format generative models +``` + +Key characteristics: + +- OVMS and the client remain independent components connected through standard + REST APIs. +- Models are prepared with OVMS-native `--pull` and `--add_to_config` commands. +- Linux instructions use the published OVMS Docker image; Windows instructions + use the OVMS binary. +- Each capability begins with model deployment and a direct `curl` verification, + followed by client configuration and a demonstrated user workflow. +- Client-specific behavior is configured through its UI or supported settings, + without patching OpenWebUI or OVMS source code. +- The README is organized as a guided recipe with prerequisites, numbered setup + steps, references, notes, and screenshots. +- The demo expands from basic chat into optional OVMS-backed capabilities such + as RAG, image generation, VLM, tools, web search, memory, code execution, and + audio. + +This establishes the preferred philosophy for the OpenHands demo: use existing +published components, keep the API boundary explicit, make configuration +reproducible, verify each layer independently, and document the workflow +visually. + +## Architecture of the Prototype Repository + +The `openhands-openvino-integration` repository is a local development, +validation, and benchmarking environment. It contains more machinery than an +OVMS demo should ultimately require. + +Its functional runtime architecture is: + +```text +User or benchmark runner + | + v +OpenHands web application and conversation API + | + | OpenAI-compatible chat completions + | LLM_BASE_URL=http://ovms-llm:8000/v3 + | LLM_MODEL=openai/ + v +OVMS container + | + | Text-generation pipeline and model-specific tool parser + v +OpenVINO model + +OpenHands also creates separate runtime sandbox containers for agent actions. +``` + +The main components are: + +- `docker-compose.yml`, which defines OVMS and OpenHands services on a shared + Docker network, publishes ports `8000`, `9000`, and `3000`, passes OpenHands + LLM settings through environment variables, mounts the Docker socket for + runtime sandboxes, and persists OpenHands settings. +- OVMS configuration and model graph files under `configs/`, used by some of + the prototype's deployment paths. +- deployment helpers that either download/convert models and generate a + MediaPipe graph or use the newer OVMS-native `--source_model` workflow; +- an OpenHands startup helper that handles persisted settings, networking, and + cleanup of old runtime containers; +- direct endpoint validation scripts; and +- a Python benchmark harness that drives the OpenHands conversation API, + measures readiness and completion, scores responses, collects Docker + telemetry and logs, and writes reproducibility artifacts. + +The prototype demonstrates that OpenHands can route requests to OVMS through +the `/v3/chat/completions` endpoint. It also records practical findings: + +- OpenHands requires the provider prefix in model configuration, for example + `openai/`, while OVMS receives the served model name. +- A non-empty placeholder API key is needed by the client even though OVMS does + not authenticate the request. +- A shared Docker network and the OVMS service name are more stable than + container IP addresses. +- Persisted OpenHands settings can override environment variables. +- Agent prompts need a large context window and a capable coding/instruction + model; small models may connect successfully but fail at useful agent tasks. +- Each OpenHands conversation may create a resource-consuming runtime sandbox. +- Tool-parser selection, generation limits, timeouts, and host memory are + important parts of a usable deployment. + +The repository also contains historical experiments and configurations for +different models and deployment methods. Some checked-in names, paths, device +choices, and compatibility conclusions do not describe one single canonical +configuration. They should therefore be treated as evidence and lessons, not +copied verbatim into the OVMS demo. + +## Initial Similarities and Differences + +### Similarities + +- Both integrations connect an independent user application to OVMS over an + OpenAI-compatible HTTP API. +- Both use the OVMS `/v3` base path and require an explicitly configured model + name. +- Both can run with published Docker images and avoid changes to either + application's source code. +- Both benefit from validating OVMS directly before debugging the client. +- Both need model-specific deployment parameters and enough host resources for + the selected model. +- Both are best presented as reproducible setup and configuration recipes. + +### Differences + +- OpenWebUI is a general model interface, while OpenHands is an autonomous + coding-agent application that adds long system prompts, iterative inference, + tool/action behavior, and runtime sandbox containers. +- The OpenWebUI demo is documentation-first and has no orchestration or + benchmark code. The prototype is an engineering workbench with Compose, + deployment scripts, tests, telemetry, benchmarks, and extensive investigation + notes. +- OpenWebUI supports many independent generative use cases. The initial + OpenHands demo should focus narrowly on reliable text-generation and coding + agent behavior. +- OpenHands has client-specific configuration requirements, including the + `openai/` model prefix, a placeholder API key, Docker socket access, persistent + settings, and stable container networking. +- Successful API connectivity is sufficient for a basic OpenWebUI chat demo, + but it is not sufficient for OpenHands: model quality, context capacity, + structured tool output, latency, and sandbox resources determine whether an + agent task actually succeeds. +- The prototype includes legacy and experimental model-serving paths. The OVMS + demo should prefer current OVMS-native model preparation and a small number of + clearly documented configuration choices. + +## Updated Migration Direction + +The upstream demo follows a **documentation-first approach**: + +- **README.md** is the authoritative source of truth and primary deliverable (Section 1-4 implemented) +- **docker-compose.yml** is a reference configuration showing the service architecture +- **scripts/deploy_model_ovms.sh** is a convenience helper that automates repetitive tasks +- Models are stored externally to the Git repository +- Benchmarking, telemetry, and experimental tooling remain in the standalone prototype repository + +**Key principle:** Users should be able to understand and reproduce the setup by +following the README alone. Helper artifacts are provided for convenience and +reference. + +**Long-term vision:** The helper artifacts (Compose and script) may be removed +if OVMS maintainers prefer a pure documentation-based demo. The README must +remain complete and useful without them, but the artifacts are helpful references +for users who prefer them. + +**Current status:** README Sections 1-4 (Overview, Architecture, Prerequisites, Preparing the Model) have been implemented. The documentation now provides a complete foundation for understanding the integration and preparing models. Remaining sections (Quick Start, Verification, Troubleshooting, References) will be implemented in the next phase. + +## Agreed Target Directory Structure + +This is the current architectural target for the upstream demo. + +```text +demos/ +└── integration_with_OpenHands/ + ├── README.md # Primary documentation (authoritative) + ├── docker-compose.yml # Reference configuration (convenience) + ├── scripts/ + │ └── deploy_model_ovms.sh # Convenience helper (not required) + └── screenshots/ # Visual verification guide +``` + +**Model storage (external to Git repository):** +```text +${HOME}/ovms-openhands/ +└── models/ + └── / # Downloaded/pulled OpenVINO models + ├── openvino_model.xml + ├── openvino_model.bin + └── graph.pbtxt +``` + +**What is NOT included in upstream demo:** +- Benchmarking code +- Telemetry collection +- Experimental configurations +- Model caches or artifacts in the repository +- Prototype-specific development tooling +- `.env.example` or `.gitignore` (environment variables are used directly; models are external) + +## Component Roles + +### README.md (Authoritative) + +The README is the primary deliverable and must be complete. It explains: + +- Architecture overview with diagrams +- How OVMS and OpenHands interact over the OpenAI API +- Prerequisites (Docker, HF_TOKEN, hardware) +- Model preparation options (OVMS `--pull` workflow) +- Manual deployment steps (Docker commands without Compose) +- docker-compose.yml reference (what each section does) +- deploy_model_ovms.sh reference (what the script automates) +- Verification workflow (curl tests, OpenHands agent task) +- Troubleshooting common issues +- Supported model families and tool parser requirements + +**Key principle:** A user should be able to set up the integration by reading +the README alone, without using the helper artifacts. + +### docker-compose.yml (Static Reference Configuration) + +Provided as a static reference implementation showing the complete service +architecture. The file: + +- Uses Docker Compose environment variable substitution for runtime configuration +- Consumes environment variables exported by the helper script or set manually +- Relies on OVMS `--source_model` workflow for model download and graph generation +- Does NOT require runtime patching or placeholder replacement +- Serves as a reference implementation of the manual Docker commands + +The README: +- Explains each service (ovms-llm, openhands) +- Maps Compose sections to equivalent Docker CLI commands +- Documents the required environment variables and their purposes +- Explains volume mounts and networking +- Shows how to achieve the same result without Compose + +**Status:** Reference and convenience artifact. May be removed in favor of pure +documentation without reducing the value of the README. + +### scripts/deploy_model_ovms.sh (Convenience Helper) + +Automates repetitive tasks for user convenience: + +- Parses model and deployment arguments (model_id, device, parser) +- Validates prerequisites (Docker, docker compose, HF_TOKEN) +- Normalizes model names for filesystem safety +- Resolves tool parsers based on model family +- Prepares external model workspace (`${HOME}/ovms-openhands/models`) +- Exports runtime environment variables for docker-compose.yml +- Launches `docker compose up -d` with the static compose file +- Waits for OVMS health via `/v1/config` polling +- Prints diagnostics and equivalent manual workflow + +The script: +- Does NOT patch or modify docker-compose.yml at runtime +- Does NOT rewrite configuration files +- Is a thin wrapper around the manual README workflow +- Serves as a reference for users who prefer automation + +The README documents exactly what this script does so users understand the +automation or can perform steps manually. + +**Status:** Convenience and reference artifact. May be removed in favor of pure +documentation without reducing the value of the README. + +### Prototype Repository Retains + +The standalone prototype repository continues to host: + +- Benchmark harness and comparison tools +- Telemetry collection and log parsing +- Historical experiments and configurations +- Development/validation workflows + +## Recommended User Workflow + +The README should present two equivalent paths to the same result: + +### Path A: Using Helper Artifacts (Convenience) + +1. **Configure prerequisites** — Set `HF_TOKEN` in environment +2. **Run deployment script** — `./scripts/deploy_model_ovms.sh ` +3. **Wait for successful deployment** — Script confirms OVMS health +4. **Verify OVMS** — Direct `curl` test to `/v3/chat/completions` +5. **Use OpenHands** — Open web UI, create agent task + +### Path B: Manual Setup (Documentation-Driven) + +1. **Configure prerequisites** — Set `HF_TOKEN` in environment +2. **Prepare model workspace** — Create external model directory +3. **Deploy OVMS** — Use Docker CLI or modified Compose +4. **Verify OVMS** — Direct `curl` test +5. **Configure OpenHands** — Set LLM_BASE_URL and LLM_MODEL +6. **Use OpenHands** — Open web UI, create agent task + +**Both paths achieve the same result.** The helper artifacts automate repetitive +steps but are not required. The README must document both approaches. + +## Implementation Roadmap + +### Phase 1: Project Skeleton +- [x] Create upstream demo directory structure. +- [x] Create README skeleton. +- [ ] Add placeholder for screenshots (to be captured during validation). + +### Phase 2: Helper Artifacts (Reference) +- [x] Implement docker-compose.yml as static reference configuration. +- [x] Implement scripts/deploy_model_ovms.sh as convenience helper. + +### Phase 3: Documentation (Primary Deliverable) +- [x] Expand README with architecture overview and diagrams. +- [ ] Document manual setup workflow (Docker CLI commands). +- [ ] Document docker-compose.yml reference (what each section does). +- [x] Document deploy_model_ovms.sh reference (what the script does) - conceptual overview in Section 4 +- [x] Ensure README can standalone without helpers - reinforced in Overview and Section 4 +- [ ] Add verification workflow (curl tests, OpenHands agent task). +- [ ] Add troubleshooting section. +- [x] Document model families and tool parser requirements - included in Section 4 +- [ ] Capture screenshots for visual verification. + +### Phase 4: Validation +- [ ] Test manual setup workflow (README-only, no helpers). +- [ ] Test helper artifact workflow (Compose + script). +- [ ] Fresh clone validation. +- [ ] OVMS API verification. +- [ ] OpenHands agent task verification. +- [ ] Final documentation review for completeness. + +## Migration Log + +This section tracks significant decisions and changes to the integration plan. + +### Step 0: Architecture Inspection and Planning + +- Inspected `model_server/demos/integration_with_OpenWebUI`. +- Inspected the structure, runtime configuration, documentation, deployment + helpers, validation utilities, and benchmark design in + `openhands-openvino-integration`. +- Identified the OpenWebUI demo as the canonical documentation-first pattern. +- Identified the prototype's validated runtime boundary and operational lessons, + while noting that its historical configurations should not be migrated as a + single package. +- Created this planning document. No implementation code or configuration was + added. + +### Step 1: Architecture Re-evaluation (Initial Approach) + +- Reviewed `deploy_model_ovms.sh` and identified its encapsulation of non-trivial + operational knowledge (tool parser resolution, model normalization, health-wait + logic). +- Agreed on hybrid architecture: `docker-compose.yml` as scaffolding + + `deploy_model_ovms.sh` as deployment helper. +- Documented component responsibilities and recommended user workflow. + +### Step 2: Philosophy Re-Alignment (OVMS Mentor Feedback) + +**Major directional change based on OVMS maintainer feedback:** + +- Shifted to **documentation-first philosophy** — README.md is the authoritative + source of truth, not the helper scripts. +- **Downgraded helper artifacts:** + - `docker-compose.yml` → Reference configuration only + - `deploy_model_ovms.sh` → Convenience helper only +- **Established external model storage:** Models stored in user-local directory + (e.g., `${HOME}/ovms-openhands/models`), never in the Git repository. +- **Clarified long-term direction:** Helper artifacts may be removed; README must + be complete without them. +- **Updated repository structure:** Simplified to README, reference configs, + convenience script, and screenshots only. +- **No migration of:** Benchmarking code, telemetry, experiments, or model caches. + +**Implementation impact:** +- README must document manual setup workflow equivalent to using helpers. +- docker-compose.yml is reference material, not a required component. +- deploy_model_ovms.sh is optional automation, not the primary interface. +- Users should be able to succeed by following README documentation alone. + +### Step 3: Architectural Simplification + +Simplified the deployment workflow by removing runtime docker-compose patching: + +**Design changes:** +- `docker-compose.yml` is now a static reference configuration. +- Runtime configuration uses Docker Compose environment variable substitution. +- The helper script exports environment variables rather than patching YAML. +- OVMS `--source_model` handles model download and graph generation. + +**Rationale:** +- Better aligns with the documentation-first philosophy. +- Keeps helper artifacts as optional convenience implementations. +- Makes the manual workflow more transparent (no hidden patching logic). +- Leverages Docker Compose native capabilities instead of custom logic. + +**Implementation:** +- Replaced compose placeholders with `${VAR}` environment variable references. +- Updated `deploy_model_ovms.sh` to export runtime configuration. +- Removed any in-place file modification or patching logic. +- Script now prints the manual equivalent for user transparency. + +### Phase 1: Project Skeleton (Completed) + +Architectural milestone: +- Created upstream demo directory structure at `model_server/demos/integration_with_OpenHands/`. +- Established documentation-first philosophy and component roles. +- Added `README.md` with skeleton section headings. +- Prepared `scripts/` and `screenshots/` directories for later implementation. +- Set foundation for Phase 2 (helper artifacts) and Phase 3 (documentation expansion). + +### Phase 2: Helper Artifacts (Completed) + +Implementation milestone: +- Implemented `docker-compose.yml` as static reference configuration using environment + variable substitution for runtime configuration. +- Implemented `scripts/deploy_model_ovms.sh` as optional convenience helper that exports + runtime configuration and launches the static compose file. +- Adopted OVMS `--source_model` workflow for automatic model download and graph generation. +- Removed runtime docker-compose patching in favor of Docker Compose native substitution. +- Established external model storage at `${HOME}/ovms-openhands/models`. +- Set foundation for Phase 3 (README documentation). + +### Phase 3: README Documentation - Sections 1-4 (Completed) + +Documentation milestone: +- Implemented README Section 1 (Overview) with introduction to OpenHands, OVMS backend suitability, + and documentation-first philosophy statement. +- Implemented README Section 2 (Architecture) with ASCII diagram, component descriptions, + request flow explanation, and OpenHands-specific configuration requirements. +- Implemented README Section 3 (Prerequisites) with system requirements, network/port usage table, + and external model storage location documentation. +- Implemented README Section 4 (Preparing the Model) with model selection guidance, tool parser + explanation, `--source_model` workflow documentation, and conceptual overview of helper script behavior. + +**Design principles preserved:** +- README.md remains the authoritative source of truth; helper artifacts are documented as optional. +- No new mandatory dependencies or unsupported workflows were introduced. +- Helper scripts are presented as convenience tools rather than required setup mechanisms. +- All commands, environment variables, and workflows are consistent with existing implementation. + +**Consistency with implementation:** +- Environment variables (MODEL_ID, LOCAL_NAME, TARGET_DEVICE, TOOL_PARSER, MODEL_CACHE_DIR, HF_TOKEN) + match docker-compose.yml and deploy_model_ovms.sh. +- Port documentation (8000, 9000, 3000) matches the compose configuration. +- Model storage location (${HOME}/ovms-openhands/models) matches the helper script defaults. +- Tool parser mappings (Qwen→qwen, Llama3/Mistral→hermes3) match the script resolution logic. +- OVMS `--source_model` workflow is documented as the recommended approach. + +**Foundation for next phase:** +- README structure is now substantially expanded beyond skeleton. +- Architecture, prerequisites, and model preparation are fully documented. +- Remaining sections (Quick Start, Verification, Troubleshooting, References) are ready for implementation. diff --git a/demos/integration_with_OpenHands/README.md b/demos/integration_with_OpenHands/README.md new file mode 100644 index 0000000000..db37ccdff1 --- /dev/null +++ b/demos/integration_with_OpenHands/README.md @@ -0,0 +1,272 @@ +# OpenHands Integration with OpenVINO Model Server {#ovms_demos_integration_with_openhands} + +## Description + +[OpenHands](https://github.com/All-Hands-AI/OpenHands) is an open-source software engineering agent that automates coding tasks through iterative LLM inference, tool execution, and runtime sandbox environments. Unlike simple chat interfaces, OpenHands maintains long-running conversations, creates code execution sandboxes, and performs multi-step problem solving. + +This demo integrates OpenHands with [OpenVINO Model Server](https://github.com/openvinotoolkit/model_server) using OVMS's OpenAI-compatible REST API. It demonstrates how to deploy OVMS as a backend for OpenHands, enabling agent workflows on local hardware with OpenVINO-optimized models. + +This README covers the recommended deployment workflow. For manual Docker deployment and implementation details, see [ADVANCED_DEPLOYMENT.md](ADVANCED_DEPLOYMENT.md). + +## Architecture + +``` +User + | + v +OpenHands Container + | (creates runtime sandbox containers for code execution) + | + | OpenAI-compatible HTTP requests + | POST /v3/chat/completions + | LLM_BASE_URL=http://ovms-llm:8000/v3 + | LLM_MODEL=openai/ + v +OpenVINO Model Server + | (MediaPipe LLM graph + tool parser) + v +OpenVINO model +``` + +OpenHands maintains conversation state and creates isolated Docker containers for code execution. It requires an OpenAI-compatible LLM endpoint with models that have sufficient context capacity and coding capability. + +OVMS serves generative models through an OpenAI-compatible REST API, handling model retrieval, OpenVINO conversion, and graph generation. It applies model-specific tool parsers for structured output and runs on CPU or GPU with OpenVINO optimization. + +For detailed request flow and configuration requirements, see [ADVANCED_DEPLOYMENT.md](ADVANCED_DEPLOYMENT.md). + +--- + +## Prerequisites + +- **Host architecture:** x86_64 +- **Operating system:** Linux (Docker-based deployment) +- **Docker Engine:** Installed and running +- **Docker Compose:** Plugin v2 or standalone +- **Memory:** Minimum 8GB RAM; 16GB+ recommended for agent workflows +- **Hugging Face account:** For model access (gated models may require token) + +### Network and Port Usage + +| Port | Component | Purpose | +|------|-----------|-----------------------------| +| 8000 | OVMS | OpenAI-compatible REST API | +| 9000 | OVMS | gRPC API (not used here) | +| 3000 | OpenHands | Web UI | + +Ensure these ports are available on your host. + +--- + +## Preparing the Model + +### Choosing a Compatible Model + +OpenHands requires models with instruction-following capability, coding proficiency, sufficient context window (4096+ tokens), and tool calling support. + +| Model Family | Tool Parser | Notes | +|--------------|-------------|----------------------------------------| +| Qwen 3 Coder | `qwen3coder`| Strong coding performance, various sizes | +| Qwen 3 | `hermes3` | General instruction following | +| Llama 3 | `llama3` | Good general instruction following | +| Mistral | `mistral` | Efficient inference | + +> **Note:** Examples use `OpenVINO/Qwen3-8b-int8-ov`. Other compatible models may also be used. + +### Tool Parser Selection + +Tool parsers enable structured output for function calling. When OpenHands executes a tool (running code, reading files), it expects the LLM to return structured JSON specifying the tool name and arguments. The tool parser converts model outputs into this format. + +Without the correct tool parser, the model does not generate tool calls in the expected format, causing tool call extraction to fail. + +For details on OVMS model retrieval and workspace layout, see [ADVANCED_DEPLOYMENT.md](ADVANCED_DEPLOYMENT.md). + +--- + +## Deployment + +This demo uses `docker-compose.yml` and `scripts/deploy_model_ovms.sh` to automate deployment. Clone the repository and navigate to the demo directory before proceeding. + +**Prerequisites:** Docker Engine, Docker Compose, 8GB+ RAM, and `HF_TOKEN` for gated models. + +1. **Clone the repository:** + ```bash + git clone https://github.com/openvinotoolkit/model_server.git + cd model_server/demos/integration_with_OpenHands + ``` + +2. **Set your Hugging Face token** (required for gated models like Llama, Mistral): + ```bash + export HF_TOKEN="your_token_here" + ``` + +3. **Run the deployment script:** + ```bash + ./scripts/deploy_model_ovms.sh OpenVINO/Qwen3-8b-int8-ov + ``` + + The script validates your environment, prepares the model, and launches both containers. See [ADVANCED_DEPLOYMENT.md](ADVANCED_DEPLOYMENT.md) for details on what the script does. + +4. **Verify the deployment** (see next section) + +**Optional parameters:** +```bash +# Specify device, parser, or cache directory +./scripts/deploy_model_ovms.sh OpenVINO/Qwen3-8b-int8-ov \ + --device CPU \ + --parser hermes3 \ + --cache-dir ~/custom-models + +# Skip health check for faster feedback +./scripts/deploy_model_ovms.sh OpenVINO/Qwen3-8b-int8-ov --skip-wait +``` + +For manual Docker deployment, see [ADVANCED_DEPLOYMENT.md](ADVANCED_DEPLOYMENT.md). + +--- + +## Verifying the Deployment + +Verify the integration in two stages: first OVMS directly, then OpenHands. + +### Stage 1: Verify OVMS + +**Check health:** +```bash +curl -s http://localhost:8000/v1/config | jq . +``` + +The response should include `"model_status": "AVAILABLE"`. + +**Test a completion request:** +```bash +curl -X POST http://localhost:8000/v3/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "qwen3-8b-int8-ov", + "messages": [{"role": "user", "content": "Say hello"}], + "max_tokens": 10 + }' +``` + +If OVMS fails to respond, check `docker logs ovms-llm`, verify the model downloaded at `$MODEL_CACHE_DIR`, and ensure `HF_TOKEN` is set if needed. + +### Stage 2: Verify OpenHands + +1. **Open the web UI** at `http://localhost:3000` + ![OpenHands UI]() + +2. **Configure the OVMS-backed model:** + - Click **Settings** → **LLM** + - Enable **Advanced** mode if needed + - Set **Custom Model:** `openai/qwen3-8b-int8-ov` + - Set **Base URL:** `http://ovms-llm:8000/v3` + - Set **API Key:** `unused` + - Click **Save** + + ![OpenHands LLM Configuration](screenshots/Pasted%20image.png) + +3. **Create an agent task:** + ``` + Create a Python function that calculates the factorial of a number. + ``` + +4. **Verify behavior:** + - OpenHands creates a runtime sandbox container + - The agent writes and tests code + - OVMS logs show incoming `/v3/chat/completions` requests + +**Common issues:** +- API errors: Verify `LLM_BASE_URL` and `LLM_MODEL` match OVMS configuration +- Slow responses: CPU inference is slower than GPU; consider `LLM_TIMEOUT` setting +- Task failures: The model may lack coding capability; try a larger model + +--- + +## Troubleshooting + +### OVMS Container Issues + +**OVMS exits immediately after starting** + +Check `docker logs ovms-llm`. Possible causes: +- Invalid `HF_TOKEN` for gated model +- Invalid model ID +- Device not available (change `TARGET_DEVICE` to `CPU`) +- Volume mount error (ensure `MODEL_CACHE_DIR` exists) +- Permission denied on `/models` (directory must be writable by OVMS container user) + +**Model status is not `AVAILABLE`** + +Check `curl -s http://localhost:8000/v1/config`. Possible causes: +- Model still downloading (wait longer for large models) +- Out of memory (check host RAM; model may be too large) +- Tool parser mismatch (verify `TOOL_PARSER` matches model family) + +**Connection refused** + +Possible causes: +- OVMS container not running (`docker ps`) +- Wrong port (verify `8000:8000` mapping) +- Firewall blocking port 8000 + +### OpenHands Container Issues + +**API errors** + +Check `docker logs openhands`. Possible causes: +- `LLM_BASE_URL` incorrect (should be `http://ovms-llm:8000/v3`) +- `LLM_MODEL` format wrong (should be `openai/`) +- OVMS not ready (verify model is `AVAILABLE`) + +**Fails to create runtime sandboxes** + +Check `docker logs openhands | grep -i sandbox`. Possible causes: +- Docker socket not mounted +- Permission denied on Docker socket +- Memory limit too low (increase `SANDBOX_DOCKER_ARGS`) + +### Performance Issues + +**Slow responses** + +CPU inference is inherently slower than GPU. First-token latency is higher for CPU-optimized models. Smaller models are faster. Check resource usage with `docker stats`. + +**Agent tasks fail or produce poor results** + +Possible causes: +- Model lacks coding capability (try a model optimized for code) +- Context window too small (increase `LLM_MAX_INPUT_TOKENS`) +- Output limit too low (increase `LLM_MAX_OUTPUT_TOKENS`) +- Temperature too low (try `0.1` or `0.2`) + +### Network Issues + +**Containers cannot communicate** + +Check `docker network inspect ovms-net`. Verify both containers use `ovms-net` and that OpenHands expects the `ovms-llm` hostname. + +### Getting Help + +- [OpenHands documentation](https://docs.all-hands.dev/) +- [OVMS documentation](https://github.com/openvinotoolkit/model_server) + + +## References + +- [OpenHands Project](https://github.com/All-Hands-AI/OpenHands) +- [OpenHands Documentation](https://docs.all-hands.dev/) +- [OpenVINO Model Server](https://github.com/openvinotoolkit/model_server) +- [OVMS Documentation](https://github.com/openvinotoolkit/model_server/tree/main/docs) +- [Hugging Face Models](https://huggingface.co/models) +- [OpenAI API Specification](https://platform.openai.com/docs/api-reference) + +### Related OVMS Demos + +- [integration_with_OpenWebUI](../integration_with_OpenWebUI/) — General model interface integration +- [llm_standalone_flow](../llm_standalone_flow/) — Standalone LLM deployment + +### Model Documentation + +- [Qwen Models](https://huggingface.co/Qwen) +- [Llama Models](https://huggingface.co/meta-llama) +- [Mistral Models](https://huggingface.co/mistralai) diff --git a/demos/integration_with_OpenHands/docker-compose.yml b/demos/integration_with_OpenHands/docker-compose.yml new file mode 100644 index 0000000000..f26b6819bf --- /dev/null +++ b/demos/integration_with_OpenHands/docker-compose.yml @@ -0,0 +1,85 @@ +version: "3.8" + +services: + ovms-llm: + image: openvino/model_server:latest + container_name: ovms-llm + # GPU device access for hardware acceleration. + # For CPU-only deployments, this device mapping can be removed. + devices: + - /dev/dri:/dev/dri + ports: + - "8000:8000" # REST API + - "9000:9000" # gRPC API + volumes: + # Model cache directory - OVMS will materialize models here on --source_model pull + # Defaults to ${HOME}/ovms-openhands/models if not set + - ${MODEL_CACHE_DIR:-./docker/models}:/models:rw + environment: + # Hugging Face token for gated models. + HF_TOKEN: ${HF_TOKEN:-} + command: + - --model_repository_path + - /models + - --source_model + - ${MODEL_ID} + - --model_name + - ${LOCAL_NAME} + - --task + - text_generation + - --target_device + - ${TARGET_DEVICE} + - --port + - "9000" + - --rest_port + - "8000" + - --tool_parser + - ${TOOL_PARSER} + networks: + - ovms-net + restart: unless-stopped + + openhands: + image: ghcr.io/all-hands-ai/openhands:latest + container_name: openhands + depends_on: + - ovms-llm + ports: + - "3000:3000" # Web UI + extra_hosts: + # Allows OpenHands container to reach host services if needed + - host.docker.internal:host-gateway + environment: + # OVMS OpenAI-compatible endpoint + LLM_BASE_URL: http://ovms-llm:8000/v3 + # Model identifier - must include 'openai/' prefix for OpenHands. + LLM_MODEL: openai/${LOCAL_NAME} + # OpenHands requires a non-empty API key even though OVMS doesn't authenticate + LLM_API_KEY: unused + # Generation parameters + LLM_TEMPERATURE: "0.0" + # Generation circuit breaker - prevents runaway agent loops. + # Increase for complex tasks, but keep conservative for stability. + LLM_MAX_OUTPUT_TOKENS: "5000" + # Context window limit - adjust based on model's actual capacity. + # Some modern models support 32k+ tokens. + LLM_MAX_INPUT_TOKENS: "4096" + # Request timeout in milliseconds. Increased from default to accommodate + # larger models, CPU inference, and first-token latency. + LLM_TIMEOUT: "120000" + # Memory limit for OpenHands runtime sandboxes (agent code execution). + # Adjust based on available host RAM. + SANDBOX_DOCKER_ARGS: --memory=1536m --memory-swap=1536m + volumes: + # Docker socket for OpenHands to create runtime sandbox containers + - /var/run/docker.sock:/var/run/docker.sock + # Persistent OpenHands settings - repo-local for transparency + - ./.openhands:/.openhands + networks: + - ovms-net + restart: unless-stopped + +networks: + ovms-net: + name: ovms-net + driver: bridge diff --git a/demos/integration_with_OpenHands/screenshots/.gitkeep b/demos/integration_with_OpenHands/screenshots/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/demos/integration_with_OpenHands/scripts/deploy_model_ovms.sh b/demos/integration_with_OpenHands/scripts/deploy_model_ovms.sh new file mode 100755 index 0000000000..612106030d --- /dev/null +++ b/demos/integration_with_OpenHands/scripts/deploy_model_ovms.sh @@ -0,0 +1,399 @@ +#!/usr/bin/env bash +# +# deploy_model_ovms.sh +# +# Convenience helper for deploying OpenVINO Model Server with OpenHands configuration. +# +# This script automates the runtime environment setup and OVMS deployment documented +# in the README.md. It is optional - users can achieve the same result by following +# the manual workflow documented in the README. +# +# Usage: +# ./scripts/deploy_model_ovms.sh [OPTIONS] +# +# Arguments: +# model_id Hugging Face model ID (e.g., "OpenVINO/qwen3-0.6b-int8-ov") +# +# Options: +# --device DEVICE Target device: CPU or GPU (default: CPU) +# --parser PARSER Tool parser: hermes3, qwen, or none (default: auto-resolved) +# --cache-dir DIR Model cache directory (default: ${HOME}/ovms-openhands/models) +# --compose-file FILE Path to docker-compose.yml (default: /docker-compose.yml) +# --skip-wait Skip health check and return immediately after deploy +# +# Example: +# ./scripts/deploy_model_ovms.sh OpenVINO/qwen3-0.6b-int8-ov --device CPU +# +# Environment Variables: +# HF_TOKEN Hugging Face token for gated models (required for some models) +# LOCAL_NAME Override the local model name (default: auto-normalized from model_id) +# MODEL_CACHE_DIR Override model cache directory +# TARGET_DEVICE Override target device +# TOOL_PARSER Override tool parser +# +# The script exports environment variables consumed by docker-compose.yml: +# MODEL_ID, LOCAL_NAME, TARGET_DEVICE, TOOL_PARSER, MODEL_CACHE_DIR + +set -euo pipefail + +################################################################################ +# Constants and Directory Resolution +################################################################################ + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEMO_ROOT="$(dirname "$SCRIPT_DIR")" +DEFAULT_COMPOSE_FILE="${DEMO_ROOT}/docker-compose.yml" +DEFAULT_MODEL_CACHE_DIR="${HOME}/ovms-openhands/models" +OVMS_CONTAINER_NAME="ovms-llm" +OPENHANDS_CONTAINER_NAME="openhands" +DOCKER_NETWORK="ovms-net" +OVMS_REST_PORT="8000" + +# Tool parser mapping: model family patterns to parser names +declare -A TOOL_PARSERS=( + ["Qwen3"]="hermes3" + ["qwen3"]="hermes3" + ["Qwen3-Coder"]="qwen3coder" + ["qwen3-coder"]="qwen3coder" + ["Llama3"]="llama3" + ["llama3"]="llama3" + ["Mistral"]="mistral" + ["mistral"]="mistral" + ["Phi4"]="phi4" + ["phi4"]="phi4" +) + +################################################################################ +# Argument Parsing +################################################################################ + +print_usage() { + grep '^#' "${BASH_SOURCE[0]}" | grep -v '^#!/usr/bin/env' | sed 's/^# //' | sed 's/^#//' + exit 0 +} + +parse_args() { + if [[ $# -eq 0 ]] || [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then + print_usage + fi + + MODEL_ID="$1" + shift + + # Initialize from environment or defaults + TARGET_DEVICE="${TARGET_DEVICE:-CPU}" + TOOL_PARSER="${TOOL_PARSER:-}" + MODEL_CACHE_DIR="${MODEL_CACHE_DIR:-${DEFAULT_MODEL_CACHE_DIR}}" + COMPOSE_FILE="${DEFAULT_COMPOSE_FILE}" + SKIP_WAIT=false + + while [[ $# -gt 0 ]]; do + case "$1" in + --device) + TARGET_DEVICE="$2" + shift 2 + ;; + --parser) + TOOL_PARSER="$2" + shift 2 + ;; + --cache-dir) + MODEL_CACHE_DIR="$2" + shift 2 + ;; + --compose-file) + COMPOSE_FILE="$2" + shift 2 + ;; + --skip-wait) + SKIP_WAIT=true + shift + ;; + *) + echo "ERROR: Unknown option: $1" >&2 + echo "Use --help for usage information." >&2 + exit 1 + ;; + esac + done +} + +################################################################################ +# Validation Functions +################################################################################ + +validate_prerequisites() { + local errors=0 + + # Check Docker + if ! command -v docker &>/dev/null; then + echo "ERROR: Docker is not installed or not in PATH" >&2 + errors=$((errors + 1)) + fi + + # Check Docker Compose plugin + if ! docker compose version &>/dev/null; then + echo "ERROR: docker compose plugin is not available" >&2 + echo "Install Docker Compose v2 or use 'docker-compose' standalone" >&2 + errors=$((errors + 1)) + fi + + # Check HF_TOKEN for gated models (warning only) + if [[ -z "${HF_TOKEN:-}" ]]; then + if [[ "$MODEL_ID" =~ meta-llama|Llama|mistralai ]]; then + echo "WARNING: HF_TOKEN is not set. This model may require authentication." >&2 + echo "Set HF_TOKEN environment variable for gated models." >&2 + fi + fi + + # Validate compose file + if [[ ! -f "$COMPOSE_FILE" ]]; then + echo "ERROR: docker-compose.yml not found: $COMPOSE_FILE" >&2 + errors=$((errors + 1)) + fi + + return $errors +} + +validate_device() { + local device="$1" + + case "$device" in + CPU|GPU) + # Valid device types + ;; + *) + echo "ERROR: Invalid device: $device" >&2 + echo "Supported devices: CPU, GPU" >&2 + exit 1 + ;; + esac +} + +################################################################################ +# Model Name Normalization and Tool Parser Resolution +################################################################################ + +normalize_model_name() { + local model_id="$1" + + # Convert Hugging Face model ID to filesystem-safe local name + # e.g., "OpenVINO/qwen3-0.6b-int8-ov" → "qwen3-0.6b-int8-ov" + basename "$model_id" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' +} + +resolve_tool_parser() { + local model_id="$1" + local override="${2:-}" + + # If override provided, use it + if [[ -n "$override" ]]; then + echo "$override" + return + fi + + # Try to match against known model families + for pattern in "${!TOOL_PARSERS[@]}"; do + if [[ "$model_id" == *"$pattern"* ]]; then + echo "${TOOL_PARSERS[$pattern]}" + return + fi + done + + # Default: no tool parser + echo "none" +} + +################################################################################ +# Workspace Preparation +################################################################################ + +prepare_model_workspace() { + local cache_dir="$1" + + # Create cache directory if it doesn't exist + if [[ ! -d "$cache_dir" ]]; then + echo "Creating model cache directory: $cache_dir" + mkdir -p "$cache_dir" + fi + + # OVMS will handle model download and graph generation via --source_model + echo "Model cache ready: $cache_dir" +} + +################################################################################ +# Runtime Configuration Export +################################################################################ + +export_runtime_configuration() { + # Export all variables consumed by docker-compose.yml + export MODEL_ID + export LOCAL_NAME + export TARGET_DEVICE + export TOOL_PARSER + export MODEL_CACHE_DIR + export HF_TOKEN="${HF_TOKEN:-}" + + echo "Runtime configuration:" + echo " MODEL_ID: $MODEL_ID" + echo " LOCAL_NAME: $LOCAL_NAME" + echo " TARGET_DEVICE: $TARGET_DEVICE" + echo " TOOL_PARSER: $TOOL_PARSER" + echo " MODEL_CACHE_DIR: $MODEL_CACHE_DIR" + echo " HF_TOKEN: ${HF_TOKEN:+}" +} + +################################################################################ +# Docker Compose Deployment +################################################################################ + +deploy_ovms() { + local compose_file="$1" + + echo "Deploying OVMS and OpenHands via Docker Compose..." + + # Stop existing containers if running + if docker ps -a --format '{{.Names}}' | grep -q "^${OVMS_CONTAINER_NAME}$"; then + echo "Stopping existing OVMS container: $OVMS_CONTAINER_NAME" + docker stop "$OVMS_CONTAINER_NAME" >/dev/null 2>&1 || true + docker rm "$OVMS_CONTAINER_NAME" >/dev/null 2>&1 || true + fi + + if docker ps -a --format '{{.Names}}' | grep -q "^${OPENHANDS_CONTAINER_NAME}$"; then + echo "Stopping existing OpenHands container: $OPENHANDS_CONTAINER_NAME" + docker stop "$OPENHANDS_CONTAINER_NAME" >/dev/null 2>&1 || true + docker rm "$OPENHANDS_CONTAINER_NAME" >/dev/null 2>&1 || true + fi + + # Deploy via docker compose + docker compose -f "$compose_file" up -d +} + +################################################################################ +# Health Check Polling +################################################################################ + +wait_for_health() { + local max_retries=18 # 18 * 600s + local retry_interval=600 + + echo "Waiting for OVMS LLM graph to initialize (this may take 30-60 seconds)..." + + # Initial sleep to let container start + sleep 10 + + for i in $(seq 1 $max_retries); do + local status + status=$(curl -sf "http://localhost:${OVMS_REST_PORT}/v1/config" 2>/dev/null || true) + + if echo "$status" | grep -q '"AVAILABLE"'; then + echo "✓ OVMS is ready. Model status: AVAILABLE" + return 0 + fi + + echo " Attempt $i/$max_retries: model not available yet..." + sleep "$retry_interval" + done + + echo "ERROR: OVMS failed to become ready within expected time." >&2 + echo "Check container logs: docker logs $OVMS_CONTAINER_NAME" >&2 + return 1 +} + +################################################################################ +# Diagnostics and Manual Equivalent +################################################################################ + +print_manual_equivalent() { + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "Manual Equivalent (README documents this workflow)" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + echo "The same deployment can be achieved manually with:" + echo "" + echo " # Set environment variables" + echo " export MODEL_ID=\"$MODEL_ID\"" + echo " export LOCAL_NAME=\"$LOCAL_NAME\"" + echo " export TARGET_DEVICE=\"$TARGET_DEVICE\"" + echo " export TOOL_PARSER=\"$TOOL_PARSER\"" + echo " export MODEL_CACHE_DIR=\"$MODEL_CACHE_DIR" + echo " export HF_TOKEN=\"\${HF_TOKEN:-}\"" + echo "" + echo " # Deploy via Docker Compose" + echo " docker compose -f $COMPOSE_FILE up -d" + echo "" + echo " # Wait for OVMS to become ready" + echo " curl -sf http://localhost:8000/v1/config | grep AVAILABLE" + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" +} + +cleanup_on_error() { + local exit_code="$1" + + if [[ $exit_code -ne 0 ]]; then + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "Deployment failed. For troubleshooting, see:" + echo " - Container logs: docker logs $OVMS_CONTAINER_NAME" + echo " - README.md troubleshooting section" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + fi +} + +################################################################################ +# Main Orchestration +################################################################################ + +main() { + parse_args "$@" + validate_prerequisites + validate_device "$TARGET_DEVICE" + + # Normalize model name if not overridden + LOCAL_NAME="${LOCAL_NAME:-$(normalize_model_name "$MODEL_ID")}" + + # Resolve tool parser if not overridden + TOOL_PARSER="$(resolve_tool_parser "$MODEL_ID" "$TOOL_PARSER")" + + # Prepare workspace + prepare_model_workspace "$MODEL_CACHE_DIR" + + # Export runtime configuration + export_runtime_configuration + + # Deploy + deploy_ovms "$COMPOSE_FILE" + + # Health check (unless skipped) + if [[ "$SKIP_WAIT" == "false" ]]; then + if ! wait_for_health; then + cleanup_on_error 1 + exit 1 + fi + else + echo "Skipping health check (--skip-wait specified)" + fi + + # Print manual equivalent + print_manual_equivalent + + # Success summary + echo "✓ Deployment complete!" + echo "" + echo "Services running:" + echo " - OVMS: http://localhost:${OVMS_REST_PORT}/v3" + echo " - OpenHands: http://localhost:3000" + echo "" + echo "Next steps (from README.md):" + echo " 1. Verify OVMS: curl http://localhost:${OVMS_REST_PORT}/v3/models" + echo " 2. Open OpenHands: http://localhost:3000" + echo " 3. Create an agent task to test the integration" + echo "" +} + +# Execute main function with all arguments +main "$@"