From b2ba5463aca28f1aae305ca82ae8835d09946ddb Mon Sep 17 00:00:00 2001 From: Marcus Messer <12846590+m-messer@users.noreply.github.com> Date: Mon, 25 May 2026 13:21:49 +0100 Subject: [PATCH 01/12] Added GitHub Actions release workflow and updated build workflow to trigger on version tags (#18) --- .github/workflows/build.yml | 2 ++ .github/workflows/release.yml | 68 +++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f1c3fc4..707aed9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,6 +6,8 @@ on: - "README.md" branches: - main + tags: + - 'v*.*.*' pull_request: workflow_dispatch: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..a8efb59 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,68 @@ +name: Release + +on: + push: + branches: + - prod + workflow_dispatch: + inputs: + version: + description: 'Version to release (e.g. 1.2.3, leave empty to auto-bump patch)' + required: false + type: string + +jobs: + release: + name: Create Release + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check for existing tag on HEAD + id: idempotency + run: | + EXISTING=$(git describe --exact-match --tags HEAD 2>/dev/null || true) + if echo "$EXISTING" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "HEAD is already tagged as $EXISTING — skipping release." + echo "skip=true" >> $GITHUB_OUTPUT + else + echo "skip=false" >> $GITHUB_OUTPUT + fi + + - name: Determine version + if: steps.idempotency.outputs.skip == 'false' + id: version + run: | + if [ -n "${{ inputs.version }}" ]; then + NEW_VERSION="v${{ inputs.version }}" + else + LATEST=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0") + MAJOR=$(echo "${LATEST#v}" | cut -d. -f1) + MINOR=$(echo "${LATEST#v}" | cut -d. -f2) + PATCH=$(echo "${LATEST#v}" | cut -d. -f3) + NEW_VERSION="v${MAJOR}.${MINOR}.$((PATCH + 1))" + fi + echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT + echo "Releasing ${NEW_VERSION}" + + - name: Create and push tag + if: steps.idempotency.outputs.skip == 'false' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git tag ${{ steps.version.outputs.version }} + git push origin ${{ steps.version.outputs.version }} + + - name: Create GitHub Release + if: steps.idempotency.outputs.skip == 'false' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh release create ${{ steps.version.outputs.version }} \ + --generate-notes \ + --title "Release ${{ steps.version.outputs.version }}" From 6d30bf5788b7c4f53a622bd5f1bc89bc4c373358 Mon Sep 17 00:00:00 2001 From: Marcus Messer <12846590+m-messer@users.noreply.github.com> Date: Mon, 25 May 2026 13:30:17 +0100 Subject: [PATCH 02/12] Feature/release workflow (#20) * Added GitHub Actions release workflow and updated build workflow to trigger on version tags * Triggered evaluation-function-base release from release workflow --- .github/workflows/release.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a8efb59..5e878c6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -66,3 +66,12 @@ jobs: gh release create ${{ steps.version.outputs.version }} \ --generate-notes \ --title "Release ${{ steps.version.outputs.version }}" + + - name: Trigger evaluation-function-base release + if: steps.idempotency.outputs.skip == 'false' + uses: peter-evans/repository-dispatch@v3 + with: + token: ${{ secrets.EVALUATION_FUNCTION_BASE_BUILD_TRIGGER_TOKEN }} + repository: ${{ github.repository_owner }}/evaluation-function-base + event-type: release + client-payload: '{"shimmy_version": "${{ steps.version.outputs.version }}"}' From 65a0f61c30316d95cb6761557ea4e8f70568b0b7 Mon Sep 17 00:00:00 2001 From: Marcus Messer <12846590+m-messer@users.noreply.github.com> Date: Mon, 25 May 2026 14:45:49 +0100 Subject: [PATCH 03/12] Updated GitHub Actions build workflow to enable `latest` image tagging only for tag refs (#22) --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 707aed9..116b698 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -123,7 +123,7 @@ jobs: type=ref,event=branch type=ref,event=tag type=ref,event=pr - type=raw,value=latest,enable={{is_default_branch}} + type=raw,value=latest,enable=${{ github.ref_type == 'tag' }} type=edge,branch=main images: | ghcr.io/${{ github.repository }} From 6940711203098478aecc777ede4887c797be1c4a Mon Sep 17 00:00:00 2001 From: Marcus Messer <12846590+m-messer@users.noreply.github.com> Date: Wed, 27 May 2026 10:55:53 +0100 Subject: [PATCH 04/12] =?UTF-8?q?Add=20=C2=B5Ed=20OpenAPI=20validation=20m?= =?UTF-8?q?iddleware=20(#17)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added OpenAPI request/response validation middleware and integrated OpenAPI specification * Add embedded µEd OpenAPI specification Co-Authored-By: Claude Sonnet 4.6 * Move µEd OpenAPI spec into runtime/schema Relocates the spec from api/ into runtime/schema/ alongside the existing JSON schema files, and renames it to mued_v0.1.0.yml to make the version explicit. Removes the api/ package; embed is now owned by runtime/schema. Co-Authored-By: Claude Sonnet 4.6 * Ignore .idea/ directory Co-Authored-By: Claude Sonnet 4.6 * Make OpenAPI response validation strict for µEd routes Previously, responses that failed spec validation were only logged as warnings and forwarded anyway. Now a failed µEd response validation returns 500 to the caller. The legacy / route is unaffected — it has no matching path in the spec so the middleware passes it through unchanged. Co-Authored-By: Claude Sonnet 4.6 * Update Go version to 1.25 in Dockerfile for builder stage * Support OpenAPI 3.1.0 spec in router validation Pass IsOpenAPI31OrLater and AllowExtraSiblingFields options to the legacy router so description/summary siblings on $ref objects (valid in 3.1.0) don't fail validation. Also propagate errors from OpenAPIMiddleware and NewHttpServer instead of ignoring them. Co-Authored-By: Claude Sonnet 4.6 * Refactor error responses and improve OpenAPI middleware robustness Use `writeJSONError` helper for consistent JSON error responses in µEd handler. Enhance OpenAPI response validation to prevent buffer drainage during snapshot handling. * Add health status response to µEd handler based on test results * Update µEd test assertion to verify "status" field instead of "tests_passed" field --------- Co-authored-by: Claude Sonnet 4.6 --- .gitignore | 2 +- Dockerfile | 2 +- go.mod | 13 +- go.sum | 22 + handler/mued.go | 27 +- handler/mued_test.go | 2 +- internal/server/module.go | 2 + internal/server/openapi.go | 89 ++ internal/server/openapi_test.go | 161 +++ internal/server/server.go | 22 +- runtime/schema/mued_v0.1.0.yml | 2050 +++++++++++++++++++++++++++++++ runtime/schema/openapi.go | 6 + 12 files changed, 2380 insertions(+), 18 deletions(-) create mode 100644 internal/server/openapi.go create mode 100644 internal/server/openapi_test.go create mode 100644 runtime/schema/mued_v0.1.0.yml create mode 100644 runtime/schema/openapi.go diff --git a/.gitignore b/.gitignore index ca54b99..83e00e5 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,4 @@ lcov.info go.work # Local .env files -*.local \ No newline at end of file +*.local.idea/ diff --git a/Dockerfile b/Dockerfile index fb3c00a..84a9a90 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=$BUILDPLATFORM golang:1.24 as builder +FROM --platform=$BUILDPLATFORM golang:1.25 as builder WORKDIR /app diff --git a/go.mod b/go.mod index 1f343f2..10caf84 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/lambda-feedback/shimmy -go 1.24.5 +go 1.25 require ( github.com/aws/aws-lambda-go v1.46.0 @@ -23,12 +23,23 @@ require ( github.com/Microsoft/go-winio v0.6.2 // indirect github.com/StackExchange/wmi v1.2.1 // indirect github.com/deckarep/golang-set/v2 v2.6.0 // indirect + github.com/getkin/kin-openapi v0.138.0 // indirect github.com/go-ole/go-ole v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/holiman/uint256 v1.2.4 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/oasdiff/yaml v0.0.9 // indirect + github.com/oasdiff/yaml3 v0.0.12 // indirect + github.com/perimeterx/marshmallow v1.1.5 // indirect + github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect + github.com/woodsbury/decimal128 v1.3.0 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect golang.org/x/crypto v0.24.0 // indirect diff --git a/go.sum b/go.sum index b65c019..014f78c 100644 --- a/go.sum +++ b/go.sum @@ -31,6 +31,8 @@ github.com/ethereum/go-ethereum v1.14.5 h1:szuFzO1MhJmweXjoM5nSAeDvjNUH3vIQoMzzQ github.com/ethereum/go-ethereum v1.14.5/go.mod h1:VEDGGhSxY7IEjn98hJRFXl/uFvpRgbIIf2PpXiyGGgc= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/getkin/kin-openapi v0.138.0 h1:ebfE0JAmF6AqHrNBy1KO3Fs68K9tPs48HalvLPo7Rv4= +github.com/getkin/kin-openapi v0.138.0/go.mod h1:vUYWaKyMqj7PfTybelXtLuLN9tReS12vxnzMRK+z2GY= github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps= github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= @@ -38,6 +40,10 @@ github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3Bop github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 h1:TQcrn6Wq+sKGkpyPvppOz99zsMBaUOKXq6HSv655U1c= github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= @@ -48,6 +54,8 @@ github.com/holiman/uint256 v1.2.4 h1:jUc4Nk8fm9jZabQuqr2JzednajVmBpC+oiTiXZJEApU github.com/holiman/uint256 v1.2.4/go.mod h1:EOMSn4q6Nyt9P6efbI3bueV4e1b3dGlUCXeiRV4ng7E= github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk= github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs= github.com/knadh/koanf/maps v0.1.1/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI= github.com/knadh/koanf/parsers/json v0.1.0 h1:dzSZl5pf5bBcW0Acnu20Djleto19T0CfHcvZ14NJ6fU= @@ -60,18 +68,28 @@ github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA= github.com/knadh/koanf/v2 v2.1.0 h1:eh4QmHHBuU8BybfIJ8mB8K8gsGCD/AUQTdwGq/GzId8= github.com/knadh/koanf/v2 v2.1.0/go.mod h1:4mnTRbZCK+ALuBXHZMjDfG9y714L7TykVnZkXbMU3Es= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/mmcloughlin/addchain v0.4.0 h1:SobOdjm2xLj1KkXN5/n0xTIWyZA2+s99UCY1iPfkHRY= github.com/mmcloughlin/addchain v0.4.0/go.mod h1:A86O+tHqZLMNO4w6ZZ4FlVQEadcoqkyU72HC5wJ4RlU= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY= github.com/nxadm/tail v1.4.11/go.mod h1:OTaG3NK980DZzxbRq6lEuzgU+mug70nY11sMd4JXXHc= +github.com/oasdiff/yaml v0.0.9 h1:zQOvd2UKoozsSsAknnWoDJlSK4lC0mpmjfDsfqNwX48= +github.com/oasdiff/yaml v0.0.9/go.mod h1:8lvhgJG4xiKPj3HN5lDow4jZHPlx1i7dIwzkdAo6oAM= +github.com/oasdiff/yaml3 v0.0.12 h1:75urAtPeDg2/iDEWwzNrLOWxI9N/dCh81nTTJtokt2M= +github.com/oasdiff/yaml3 v0.0.12/go.mod h1:y5+oSEHCPT/DGrS++Wc/479ERge0zTFxaF8PbGKcg2o= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU= github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4= +github.com/perimeterx/marshmallow v1.1.5 h1:a2LALqQ1BlHM8PZblsDdidgv1mWi1DgC2UmX50IvK2s= +github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw= github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -80,6 +98,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible h1:Bn1aCHHRnjv4Bl16T8rcaFjYSrGrIZvpiGO6P3Q4GpU= github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -96,6 +116,8 @@ github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+F github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= +github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIjVWss0= +github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= diff --git a/handler/mued.go b/handler/mued.go index 6897369..4c48786 100644 --- a/handler/mued.go +++ b/handler/mued.go @@ -37,6 +37,12 @@ func NewMuEdHandler(params MuEdHandlerParams) *MuEdHandler { } } +func writeJSONError(w http.ResponseWriter, msg string, status int) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + json.NewEncoder(w).Encode(map[string]any{"error": map[string]any{"message": msg}}) //nolint:errcheck +} + func (h *MuEdHandler) checkAuth(w http.ResponseWriter, r *http.Request) bool { if h.config.Auth.Key != "" && r.Header.Get("api-key") != h.config.Auth.Key { h.log.Debug("unauthorized request", zap.String("path", r.URL.Path)) @@ -59,13 +65,13 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { body, err := io.ReadAll(r.Body) if err != nil { - http.Error(w, "failed to read body", http.StatusBadRequest) + writeJSONError(w, "failed to read body", http.StatusBadRequest) return } var muEdReq runtime.MuEdEvaluateRequest if err := json.Unmarshal(body, &muEdReq); err != nil { - http.Error(w, "invalid request body", http.StatusBadRequest) + writeJSONError(w, "invalid request body", http.StatusBadRequest) return } @@ -78,13 +84,13 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { legacyBody, err = runtime.MuEdBuildLegacyEvaluateRequest(muEdReq) } if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) + writeJSONError(w, err.Error(), http.StatusBadRequest) return } legacyBodyBytes, err := json.Marshal(legacyBody) if err != nil { - http.Error(w, "failed to build request", http.StatusInternalServerError) + writeJSONError(w, "failed to build request", http.StatusInternalServerError) return } @@ -118,13 +124,13 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { var respBody map[string]any if err := json.Unmarshal(resp.Body, &respBody); err != nil { - http.Error(w, "failed to parse response", http.StatusInternalServerError) + writeJSONError(w, "failed to parse response", http.StatusInternalServerError) return } result, ok := respBody["result"].(map[string]any) if !ok { - http.Error(w, "invalid response from evaluation function", http.StatusInternalServerError) + writeJSONError(w, "invalid response from evaluation function", http.StatusInternalServerError) return } @@ -160,13 +166,18 @@ func (h *MuEdHandler) ServeHealth(w http.ResponseWriter, r *http.Request) { return } - result, ok := resp["result"] + result, ok := resp["result"].(map[string]any) if !ok { http.Error(w, "invalid health response", http.StatusInternalServerError) return } + status := "DEGRADED" + if testsPassed, _ := result["tests_passed"].(bool); testsPassed { + status = "OK" + } + w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(result) //nolint:errcheck + json.NewEncoder(w).Encode(map[string]any{"status": status}) //nolint:errcheck } diff --git a/handler/mued_test.go b/handler/mued_test.go index bcc905f..4b16850 100644 --- a/handler/mued_test.go +++ b/handler/mued_test.go @@ -280,7 +280,7 @@ func TestMuEdServeHealth_Success(t *testing.T) { var result map[string]any require.NoError(t, json.Unmarshal(raw, &result)) - assert.Equal(t, true, result["tests_passed"]) + assert.Equal(t, "OK", result["status"]) mockRuntime.AssertExpectations(t) } diff --git a/internal/server/module.go b/internal/server/module.go index f41556b..7644bed 100644 --- a/internal/server/module.go +++ b/internal/server/module.go @@ -6,6 +6,8 @@ func Module(config HttpConfig) fx.Option { return fx.Module("server", // provide config fx.Supply(config), + // provide openapi spec + fx.Provide(LoadOpenAPISpec), // provide server fx.Provide(NewLifecycleServer), // invoke server diff --git a/internal/server/openapi.go b/internal/server/openapi.go new file mode 100644 index 0000000..3ef2727 --- /dev/null +++ b/internal/server/openapi.go @@ -0,0 +1,89 @@ +package server + +import ( + "bytes" + "fmt" + "io" + "net/http" + "net/http/httptest" + + "github.com/getkin/kin-openapi/openapi3" + "github.com/getkin/kin-openapi/openapi3filter" + "github.com/getkin/kin-openapi/routers/legacy" + "go.uber.org/zap" + + "github.com/lambda-feedback/shimmy/runtime/schema" +) + +func LoadOpenAPISpec() (*openapi3.T, error) { + loader := openapi3.NewLoader() + loader.IsExternalRefsAllowed = true + spec, err := loader.LoadFromData(schema.OpenAPISpec) + if err != nil { + return nil, fmt.Errorf("loading OpenAPI spec: %w", err) + } + // Skip validation for OpenAPI 3.1.0 — the legacy router validates on NewRouter. + return spec, nil +} + +func OpenAPIMiddleware(spec *openapi3.T, log *zap.Logger) (func(http.Handler) http.Handler, error) { + router, err := legacy.NewRouter(spec, + openapi3.IsOpenAPI31OrLater(), + openapi3.AllowExtraSiblingFields("description", "summary"), + ) + if err != nil { + return nil, fmt.Errorf("creating OpenAPI router: %w", err) + } + opts := &openapi3filter.Options{AuthenticationFunc: openapi3filter.NoopAuthenticationFunc} + + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + route, pathParams, err := router.FindRoute(r) + if err != nil { + // Not a µEd route — pass through unvalidated + next.ServeHTTP(w, r) + return + } + + // Validate request + reqInput := &openapi3filter.RequestValidationInput{ + Request: r, + PathParams: pathParams, + Route: route, + Options: opts, + } + if err := openapi3filter.ValidateRequest(r.Context(), reqInput); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + // Capture response for validation + rec := httptest.NewRecorder() + next.ServeHTTP(rec, r) + + // Snapshot body before validation — ValidateResponse drains the buffer. + bodyBytes := rec.Body.Bytes() + + // Validate response (lenient — log only) + respInput := &openapi3filter.ResponseValidationInput{ + RequestValidationInput: reqInput, + Status: rec.Code, + Header: rec.Header(), + Body: io.NopCloser(bytes.NewReader(bodyBytes)), + Options: opts, + } + if err := openapi3filter.ValidateResponse(r.Context(), respInput); err != nil { + log.Error("response failed OpenAPI validation", zap.Error(err)) + http.Error(w, "invalid response format", http.StatusInternalServerError) + return + } + + // Forward captured response + for k, v := range rec.Header() { + w.Header()[k] = v + } + w.WriteHeader(rec.Code) + w.Write(bodyBytes) //nolint:errcheck + }) + }, nil +} diff --git a/internal/server/openapi_test.go b/internal/server/openapi_test.go new file mode 100644 index 0000000..554f354 --- /dev/null +++ b/internal/server/openapi_test.go @@ -0,0 +1,161 @@ +package server + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func TestLoadOpenAPISpec(t *testing.T) { + spec, err := LoadOpenAPISpec() + require.NoError(t, err) + assert.NotNil(t, spec) +} + +func TestOpenAPIMiddleware_Init(t *testing.T) { + spec, err := LoadOpenAPISpec() + require.NoError(t, err) + + middleware, err := OpenAPIMiddleware(spec, zap.NewNop()) + require.NoError(t, err) + assert.NotNil(t, middleware) +} + +func TestOpenAPIMiddleware_UnknownRoute_PassesThrough(t *testing.T) { + middleware := mustMiddleware(t) + + called := false + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.WriteHeader(http.StatusOK) + }) + + req := httptest.NewRequest(http.MethodGet, "/not-a-mued-route", nil) + w := httptest.NewRecorder() + middleware(next).ServeHTTP(w, req) + + assert.True(t, called, "next handler should be called for unknown route") + assert.Equal(t, http.StatusOK, w.Code) +} + +func TestOpenAPIMiddleware_ValidRequest_ReachesHandler(t *testing.T) { + middleware := mustMiddleware(t) + + body := mustJSON(t, map[string]any{ + "submission": map[string]any{ + "type": "TEXT", + "content": map[string]any{"text": "hello"}, + }, + }) + + called := false + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(`[]`)) //nolint:errcheck + }) + + req := httptest.NewRequest(http.MethodPost, "/evaluate", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + middleware(next).ServeHTTP(w, req) + + assert.True(t, called, "next handler should be called for valid request") + assert.Equal(t, http.StatusOK, w.Code) +} + +func TestOpenAPIMiddleware_MissingRequiredField_Returns400(t *testing.T) { + middleware := mustMiddleware(t) + + // POST /evaluate requires "submission" + body := mustJSON(t, map[string]any{}) + + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Error("next handler must not be called for invalid request") + }) + + req := httptest.NewRequest(http.MethodPost, "/evaluate", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + middleware(next).ServeHTTP(w, req) + + assert.Equal(t, http.StatusBadRequest, w.Code) +} + +func TestOpenAPIMiddleware_InvalidResponseBody_Returns500(t *testing.T) { + middleware := mustMiddleware(t) + + body := mustJSON(t, map[string]any{ + "submission": map[string]any{ + "type": "TEXT", + "content": map[string]any{"text": "hello"}, + }, + }) + + // handler returns an object, but spec requires an array for POST /evaluate 200 + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"unexpected": "object"}`)) //nolint:errcheck + }) + + req := httptest.NewRequest(http.MethodPost, "/evaluate", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + middleware(next).ServeHTTP(w, req) + + assert.Equal(t, http.StatusInternalServerError, w.Code) +} + +func TestOpenAPIMiddleware_ValidHealthRequest_ReachesHandler(t *testing.T) { + middleware := mustMiddleware(t) + + called := false + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write(mustJSON(t, map[string]any{ //nolint:errcheck + "status": "OK", + "capabilities": map[string]any{ + "supportsEvaluate": true, + "supportsPreSubmissionFeedback": false, + "supportsFormativeFeedback": true, + "supportsSummativeFeedback": true, + "supportsDataPolicy": "NOT_SUPPORTED", + }, + })) + }) + + req := httptest.NewRequest(http.MethodGet, "/evaluate/health", nil) + w := httptest.NewRecorder() + middleware(next).ServeHTTP(w, req) + + assert.True(t, called, "next handler should be called for valid health request") + assert.Equal(t, http.StatusOK, w.Code) +} + +// mustMiddleware loads the real spec and returns the initialised middleware, failing the test on error. +func mustMiddleware(t *testing.T) func(http.Handler) http.Handler { + t.Helper() + spec, err := LoadOpenAPISpec() + require.NoError(t, err) + middleware, err := OpenAPIMiddleware(spec, zap.NewNop()) + require.NoError(t, err) + return middleware +} + +// mustJSON marshals v to JSON, failing the test on error. +func mustJSON(t *testing.T, v any) []byte { + t.Helper() + b, err := json.Marshal(v) + require.NoError(t, err) + return b +} \ No newline at end of file diff --git a/internal/server/server.go b/internal/server/server.go index 883be2f..6a94ea5 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -6,6 +6,7 @@ import ( "net" "net/http" + "github.com/getkin/kin-openapi/openapi3" "go.uber.org/fx" "go.uber.org/zap" "golang.org/x/net/http2" @@ -18,6 +19,7 @@ type HttpServerParams struct { Context context.Context Config HttpConfig + Spec *openapi3.T Handlers []*HttpHandler `group:"handlers"` Logger *zap.Logger @@ -31,7 +33,7 @@ type HttpServer struct { log *zap.Logger } -func NewHttpServer(params HttpServerParams) *HttpServer { +func NewHttpServer(params HttpServerParams) (*HttpServer, error) { mux := http.NewServeMux() for _, handler := range params.Handlers { @@ -39,8 +41,13 @@ func NewHttpServer(params HttpServerParams) *HttpServer { } var handler http.Handler = NormalizePath(mux) + openAPIMiddleware, err := OpenAPIMiddleware(params.Spec, params.Logger) + if err != nil { + return nil, fmt.Errorf("initialising OpenAPI middleware: %w", err) + } + handler = openAPIMiddleware(handler) if params.Config.H2c { - handler = h2c.NewHandler(NormalizePath(mux), &http2.Server{}) + handler = h2c.NewHandler(handler, &http2.Server{}) } server := &http.Server{ @@ -54,11 +61,14 @@ func NewHttpServer(params HttpServerParams) *HttpServer { port: params.Config.Port, server: server, log: params.Logger, - } + }, nil } -func NewLifecycleServer(params HttpServerParams, lc fx.Lifecycle) *HttpServer { - server := NewHttpServer(params) +func NewLifecycleServer(params HttpServerParams, lc fx.Lifecycle) (*HttpServer, error) { + server, err := NewHttpServer(params) + if err != nil { + return nil, err + } lc.Append(fx.Hook{ OnStart: func(ctx context.Context) error { go server.Serve(ctx) @@ -68,7 +78,7 @@ func NewLifecycleServer(params HttpServerParams, lc fx.Lifecycle) *HttpServer { return server.Shutdown(ctx) }, }) - return server + return server, nil } func (s *HttpServer) Serve(context.Context) error { diff --git a/runtime/schema/mued_v0.1.0.yml b/runtime/schema/mued_v0.1.0.yml new file mode 100644 index 0000000..c5ca3c8 --- /dev/null +++ b/runtime/schema/mued_v0.1.0.yml @@ -0,0 +1,2050 @@ +openapi: 3.1.0 +info: + title: µEd API - Educational Microservices + version: 0.1.0 + contact: + name: µEd API Maintainers + description: | + The µEd API ("microservices for education") is a specification for interoperable educational services.

Currently defined endpoints:
- **Evaluate Task**: automatic feedback and grading for student submissions.
- **Chat**: conversational interactions around tasks, submissions, or general + learning questions. +tags: + - name: evaluate + description: Endpoints for evaluating student submissions and generating feedback. + - name: chat + description: Conversational endpoints for educational dialogue. +paths: + /evaluate: + post: + summary: Evaluate a submission and generate feedback + operationId: evaluateSubmission + description: | + Generates a list of feedback items for a given student submission. The request can optionally include the task context, user information, criteria to evaluate on, pre-submission feedback options, configuration, and a callback URL for asynchronous result delivery. + tags: + - evaluate + parameters: + - $ref: '#/components/parameters/Authorization' + - $ref: '#/components/parameters/X-Request-Id' + - $ref: '#/components/parameters/X-Api-Version' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateRequest' + examples: + simpleTextSubmission: + summary: Simple text submission without user or criteria + value: + submission: + submissionId: sub-123 + taskId: task-42 + type: TEXT + format: plain + content: + text: Explain what polymorphism is in object-oriented programming. + submittedAt: '2025-12-16T09:30:00Z' + version: 1 + configuration: null + preSubmissionFeedbackExample: + summary: Pre-submission feedback (non-final) + value: + submission: + submissionId: sub-777 + taskId: task-42 + type: TEXT + format: plain + content: + text: My short answer... + preSubmissionFeedback: + enabled: true + configuration: + llm: + model: gpt-5.2 + temperature: 0.4 + asyncCallbackExample: + summary: Asynchronous processing via callback URL + value: + submission: + submissionId: sub-async-001 + taskId: task-42 + type: TEXT + format: plain + content: + text: Detailed essay answer that may require longer processing. + submittedAt: '2025-12-16T09:45:00Z' + version: 1 + callbackUrl: https://learning-platform.example.com/hooks/evaluate-result + withTaskAndExtras: + summary: With task context, user, criteria and configuration + value: + task: + taskId: task-12 + title: Explain polymorphism + content: + text: Define polymorphism and give at least one example in Java. + learningObjectives: + - Explain the concept of polymorphism. + - Provide an example of subtype polymorphism in Java. + referenceSolution: + text: | + Polymorphism allows the same method call to result in different behavior depending on the object's runtime type. For example, a variable of type Shape can reference a Circle or Rectangle, and calling draw() will invoke the appropriate implementation. + context: + constraints: Answer in 3-6 sentences. + language: en + submission: + submissionId: sub-456 + taskId: task-12 + type: TEXT + format: plain + content: + text: | + Polymorphism means that an object can take many forms, for example subclasses implementing methods differently. + submittedAt: '2025-12-16T10:00:00Z' + version: 2 + user: + userId: user-789 + type: LEARNER + detailPreference: DETAILED + tonePreference: FRIENDLY + languagePreference: en + criteria: + - criterionId: crit-1 + name: Correctness + context: The explanation of polymorphism is conceptually correct. + maxPoints: 10 + - criterionId: crit-2 + name: Clarity + context: The explanation is clear, well-structured, and easy to understand. + maxPoints: 5 + preSubmissionFeedback: + enabled: false + configuration: + llm: + model: gpt-5.2 + temperature: 0.2 + maxTokens: 800 + credentials: + type: JWT + key: Some-Key + enforceRubricStrictness: true + codeSubmissionExample: + summary: Code submission (Python) + value: + submission: + submissionId: sub-code-001 + taskId: task-python-101 + type: CODE + format: python + content: + code: | + def fibonacci(n): + if n <= 1: + return n + return fibonacci(n-1) + fibonacci(n-2) + + # Test the function + for i in range(10): + print(fibonacci(i)) + submittedAt: '2025-12-16T11:00:00Z' + version: 1 + codeMultiFileExample: + summary: Code submission with multiple files + value: + submission: + submissionId: sub-code-002 + taskId: task-java-201 + type: CODE + format: java + content: + files: + - path: src/Main.java + content: | + public class Main { + public static void main(String[] args) { + Calculator calc = new Calculator(); + System.out.println(calc.add(2, 3)); + } + } + - path: src/Calculator.java + content: | + public class Calculator { + public int add(int a, int b) { + return a + b; + } + } + entryPoint: src/Main.java + submittedAt: '2025-12-16T11:30:00Z' + version: 1 + codeSympyExample: + summary: Math submission (SymPy/Python) + value: + submission: + submissionId: sub-math-002 + taskId: task-algebra-101 + type: CODE + format: sympy + content: + expression: solve(x**2 - 4, x) + imports: + - from sympy import symbols, solve + - x = symbols('x') + submittedAt: '2025-12-16T12:30:00Z' + version: 1 + mathInlineLatexExample: + summary: Inline math submission (Inline LaTeX) + value: + submission: + submissionId: sub-math-001 + taskId: task-calculus-101 + type: MATH + format: latex + content: + expression: \int_{0}^{\infty} e^{-x^2} dx = \frac{\sqrt{\pi}}{2} + submittedAt: '2025-12-16T12:00:00Z' + version: 1 + mathMathMLExample: + summary: Math submission (MathML) + value: + submission: + submissionId: sub-math-003 + taskId: task-geometry-101 + type: MATH + format: mathml + content: + expression: | + + + a + = + + + b2 + + + c2 + + + + + submittedAt: '2025-12-16T13:00:00Z' + version: 1 + modelUmlExample: + summary: Model submission (UML class diagram - PlantUML) + value: + submission: + submissionId: sub-model-001 + taskId: task-oop-design-101 + type: MODEL + format: uml + content: + model: | + @startuml + abstract class Animal { + +name: String + +speak(): String + } + + class Dog extends Animal { + +speak(): String + } + + class Cat extends Animal { + +speak(): String + } + @enduml + notation: plantuml + diagramType: class + submittedAt: '2025-12-16T14:00:00Z' + version: 1 + modelErExample: + summary: Model submission (ER diagram - JSON structure) + value: + submission: + submissionId: sub-model-002 + taskId: task-database-101 + type: MODEL + format: er + content: + model: + entities: + - name: Student + attributes: + - name: student_id + type: INTEGER + primaryKey: true + - name: name + type: VARCHAR(100) + - name: email + type: VARCHAR(255) + - name: Course + attributes: + - name: course_id + type: INTEGER + primaryKey: true + - name: title + type: VARCHAR(200) + relationships: + - name: enrolls_in + from: Student + to: Course + cardinality: many-to-many + notation: json + submittedAt: '2025-12-16T14:30:00Z' + version: 1 + modelBpmnExample: + summary: Model submission (BPMN process) + value: + submission: + submissionId: sub-model-003 + taskId: task-process-101 + type: MODEL + format: bpmn + content: + model: | + + + + + + + + + + + notation: bpmn-xml + submittedAt: '2025-12-16T15:00:00Z' + version: 1 + textMarkdownExample: + summary: Text submission (Markdown with formatting) + value: + submission: + submissionId: sub-text-002 + taskId: task-essay-101 + type: TEXT + format: markdown + content: + markdown: | + # Introduction to Polymorphism + + Polymorphism is a fundamental concept in **object-oriented programming** that allows objects to be treated as instances of their parent class. + + ## Key Points + + 1. **Subtype polymorphism**: Different classes can be used interchangeably + 2. **Method overriding**: Subclasses provide specific implementations + submittedAt: '2025-12-16T15:30:00Z' + version: 1 + responses: + '200': + description: Successfully generated feedback. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Feedback' + examples: + exampleResponse: + summary: Example feedback response + value: + - feedbackId: fb-1 + title: Clarify your definition + message: Your explanation of polymorphism is generally correct, but it would help to distinguish between subtype polymorphism and parametric polymorphism. + suggestedAction: Add one or two concrete examples of polymorphism in Java, e.g., method overriding. + awardedPoints: 2.5 + criterion: + criterionId: crit-1 + name: Correctness + context: The solution produces correct results for the specified problem. + maxPoints: 10 + target: + artefactType: TEXT + format: plain + locator: + type: span + startIndex: 0 + endIndex: 120 + - feedbackId: fb-2 + title: Overall structure + message: The overall structure of your answer is clear and easy to follow. + '202': + $ref: '#/components/responses/202-Accepted' + '400': + $ref: '#/components/responses/400-BadRequest' + '403': + $ref: '#/components/responses/403-Forbidden' + '406': + $ref: '#/components/responses/406-VersionNotSupported' + '500': + $ref: '#/components/responses/500-InternalError' + '501': + $ref: '#/components/responses/501-NotImplemented' + /evaluate/health: + get: + summary: Health and capabilities of the evaluate service + operationId: getEvaluateHealth + description: | + Returns health information and capabilities of the evaluate service. Clients can use this endpoint to discover whether the service supports optional features such as pre-submission feedback, formative feedback, and summative feedback. + tags: + - evaluate + parameters: + - $ref: '#/components/parameters/X-Request-Id' + - $ref: '#/components/parameters/X-Api-Version' + responses: + '200': + description: Evaluate service is reachable and reporting capabilities. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateHealthResponse' + examples: + exampleHealth: + summary: Example healthy service with capabilities + value: + status: OK + message: Service healthy + version: 1.0.0 + capabilities: + supportsEvaluate: true + supportsPreSubmissionFeedback: false + supportsFormativeFeedback: true + supportsSummativeFeedback: true + supportsDataPolicy: PARTIAL + supportedArtefactProfiles: + - type: TEXT + supportedFormats: + - plain + - markdown + - type: CODE + supportedFormats: + - python + - java + - javascript + - type: MATH + supportedFormats: + - latex + - mathml + supportedLanguages: + - en + - de + supportedVersions: + - 0.1.0 + '406': + $ref: '#/components/responses/406-VersionNotSupported' + '501': + description: The server does not implement the health endpoint for evaluate. + $ref: '#/components/responses/501-NotImplemented' + '503': + $ref: '#/components/responses/503-ServiceUnavailable' + /chat: + post: + summary: Chat about tasks, submissions, or learning topics + operationId: chat + description: | + Conversational endpoint for educational chat use cases. A conversation can be grounded in a specific course, task or submission and may use user information to adapt tone and detail. Typical use cases include: asking follow-up questions on feedback, requesting hints, or clarifying concepts. + tags: + - chat + parameters: + - $ref: '#/components/parameters/Authorization' + - $ref: '#/components/parameters/X-Request-Id' + - $ref: '#/components/parameters/X-Api-Version' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ChatRequest' + examples: + minimalChat: + summary: Minimal chat request + value: + messages: + - role: USER + content: Can you explain polymorphism? + configuration: null + chatWithLlmConfig: + summary: Chat request with optional LLM configuration + value: + conversationId: conv-1001 + user: + userId: user-456 + type: LEARNER + detailPreference: MEDIUM + tonePreference: FRIENDLY + languagePreference: en + messages: + - role: USER + content: Give me a hint for my answer about polymorphism. + context: + task: + taskId: task-12 + title: Explain polymorphism + content: + text: Define polymorphism and give at least one example in Java. + configuration: + type: Java Assistant + llm: + model: gpt-5.2 + temperature: 0.7 + stream: false + credentials: + type: JWT + key: Some-Key + chatWithContext: + summary: Chat request with complex educational context + value: + messages: + - role: USER + content: What should I do for this part? + user: + userId: user-321 + type: LEARNER + detailPreference: DETAILED + tonePreference: NEUTRAL + languagePreference: en + taskProgress: + currentQuestionId: question-321 + timeSpentOnQuestion: 30 minutes + currentPart: + partId: part-1 + timeSpentOnPart: 10 minutes + submission: + type: TEXT + content: + text: outputs= ["Woof!", "Meow!"] + feedback: + - feedbackId: fb-101 + message: Incomplete answer, explain why these outputs occur. + context: + module: + moduleId: module-456 + title: Introduction to Object-Oriented Programming (OOP) + set: + setId: set-789 + title: Fundamentals + question: + questionId: question-321 + title: Understanding Polymorphism + content: | + Answer the questions for the following example of polymorphism in Python. + ```python class Animal: + def speak(self): + pass + + class Dog(Animal): + def speak(self): + return "Woof!" + + class Cat(Animal): + def speak(self): + return "Meow!" + + animals = [Dog(), Cat()] for animal in animals: + print(animal.speak()) + ``` + estimatedTime: 15-25 minutes + parts: + - partId: part-1 + content: | + Looking at the code example, identify which method is being overridden and explain how this demonstrates polymorphism. What output would the code produce and why? + - partId: part-2 + content: | + Write a new class called `Bird` that inherits from `Animal` and overrides the `speak()` method to return "Tweet!". Then add an instance of `Bird` to the animals list. + referenceSolution: + code: | + class Bird(Animal): + def speak(self): + return "Tweet!" + + animals = [Dog(), Cat(), Bird()] + for animal in animals: + print(animal.speak()) + configuration: + type: CS Assistant + llm: + model: gpt-5.2 + temperature: 0.5 + responses: + '200': + description: Successful chat response. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ChatResponse' + examples: + minimalChatResponse: + summary: Minimal chat response + value: + output: + role: ASSISTANT + content: | + Polymorphism is a core concept in object-oriented programming that allows objects of different classes to be treated as objects of a common superclass. It enables a single interface to represent different underlying forms (data types). For example, in Java, you can have a superclass `Animal` with a method `speak()`, and subclasses like `Dog` and `Cat` that provide their own implementations of `speak()`. When you call `speak()` on an `Animal` reference, the appropriate subclass method is invoked based on the actual object type at runtime. + metadata: null + chatWithLlmConfigResponse: + summary: Chat response with LLM configuration + value: + output: + role: ASSISTANT + content: | + Here's a hint for your answer about polymorphism: Focus on how polymorphism allows methods to do different things based on the object that it is acting upon, even when accessed through a common interface. You might want to mention method overriding and how it enables this behavior in object-oriented programming. + metadata: + responseTimeMs: 1800 + type: Java Assistant + createdAt: '2025-12-10T11:15:00Z' + llm: + model: gpt-5.2 + temperature: 0.7 + outputTokens: 78 + chatWithContextResponse: + summary: Chat response with context + value: + output: + role: ASSISTANT + content: | + In the provided code example, the `speak()` method is being overridden in the `Dog` and `Cat` subclasses of the `Animal` superclass. This demonstrates polymorphism because when we call `speak()` on each animal in the `animals` list, the method that gets executed depends on the actual object type (either `Dog` or `Cat`), not the reference type (`Animal`). The output of the code would be: + ``` Woof! Meow! ``` + This happens because each subclass provides its own implementation of the `speak()` method, and at runtime, the correct method is called based on the object's type. + metadata: + responseTimeMs: 2000 + type: CS Assistant + createdAt: '2025-12-10T11:15:00Z' + llm: + model: gpt-5.2 + temperature: 0.5 + outputTokens: 143 + '400': + $ref: '#/components/responses/400-BadRequest-2' + '403': + $ref: '#/components/responses/403-Forbidden' + '406': + $ref: '#/components/responses/406-VersionNotSupported' + '500': + $ref: '#/components/responses/500-InternalError-2' + '501': + $ref: '#/components/responses/501-NotImplemented-2' + /chat/health: + get: + summary: Health and capabilities of the chat service + operationId: getChatHealth + description: | + Returns health information and capabilities of the chat service. Clients can use this endpoint to discover whether the service supports optional features such as user preferences or streaming responses. + tags: + - chat + parameters: + - $ref: '#/components/parameters/X-Request-Id' + - $ref: '#/components/parameters/X-Api-Version' + responses: + '200': + description: Chat service is reachable and reporting capabilities. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ChatHealthResponse' + examples: + exampleHealth: + summary: Example healthy service with capabilities + value: + status: OK + statusMessage: Service healthy + version: 1.0.0 + capabilities: + supportsChat: true + supportsUserPreferences: true + supportsStreaming: true + supportsDataPolicy: NOT_SUPPORTED + supportedLanguages: + - en + - de + supportedModels: + - gpt-4o + - llama-3 + supportedVersions: + - 0.1.0 + '406': + $ref: '#/components/responses/406-VersionNotSupported' + '501': + description: The server does not implement the health endpoint for chat. + $ref: '#/components/responses/501-NotImplemented-2' + '503': + $ref: '#/components/responses/503-ServiceUnavailable-2' +components: + parameters: + Authorization: + in: header + name: Authorization + schema: + type: string + required: false + description: Optional authorization header. + X-Request-Id: + in: header + name: X-Request-Id + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + in: header + name: X-Api-Version + description: | + The µEd API version the client is targeting (e.g. "0.1.0"). If omitted, the server will use the latest version it supports. If the requested version cannot be served, the server returns 406 Version Not Supported. + required: false + schema: + type: string + example: 0.1.0 + schemas: + Task: + type: object + description: | + Task context including the content, learning objectives, optional reference solution, optional context information, and optional metadata. + required: + - title + properties: + taskId: + type: string + description: Optional unique identifier for the task. + title: + type: string + description: Short title or label for the task. + content: + type: + - object + - 'null' + description: Optional content shown to the learner (structure is task-specific). + additionalProperties: true + context: + type: + - object + - 'null' + description: Optional educational context (e.g., course material). + additionalProperties: true + learningObjectives: + type: + - array + - 'null' + description: Optional list of learning objectives addressed by this task. + items: + type: string + referenceSolution: + type: + - object + - 'null' + description: Optional reference or example solution (structure is task-specific). + additionalProperties: true + metadata: + type: + - object + - 'null' + description: Optional metadata such as difficulty, topic, tags, etc. + additionalProperties: true + ArtefactType: + type: string + description: | + High-level type of artefact. Use the 'format' field to specify the exact format (e.g., programming language for CODE, notation for MATH). + enum: + - TEXT + - CODE + - MODEL + - MATH + - OTHER + Submission: + type: object + description: | + A student's submission for a task. The structure of 'content' is intentionally generic and task-specific. + required: + - type + - content + properties: + submissionId: + type: string + description: Optional unique identifier of the submission. + taskId: + type: + - string + - 'null' + description: Optional identifier of the task this submission belongs to. + type: + $ref: '#/components/schemas/ArtefactType' + format: + type: + - string + - 'null' + description: | + Optional format specifier providing additional detail about the artefact. For TEXT: plain_text, markdown, html, rich_text, etc. For CODE: programming language (e.g., python, java, javascript, wolfram, matlab). For MATH: latex, mathml, sympy, wolfram, asciimath, etc. For MODEL: uml, er, bpmn, petri_net, state_machine, etc. Use lowercase values and snakecase. Services should document which formats they support. + content: + type: object + additionalProperties: true + description: | + Logical representation of the submission content. The expected structure depends on the artefact type: - TEXT: { text: string } or { markdown: string } - CODE: { code: string } or { files: [{ path: string, content: string }], entryPoint?: string } - MATH: { expression: string } - MODEL: { model: string | object, notation?: string } + submittedAt: + type: + - string + - 'null' + format: date-time + description: Optional timestamp when the submission was created. + version: + type: + - integer + - 'null' + format: int32 + description: Optional version number (e.g., resubmissions). + UserType: + type: string + description: Type of user interacting with the API. + enum: + - LEARNER + - TEACHER + - EDU_ADMIN + - SYS_ADMIN + - OTHER + Detail: + type: string + description: Level of detail preferred in responses or feedback. + enum: + - BRIEF + - MEDIUM + - DETAILED + Tone: + type: string + description: Preferred tone for responses or feedback. + enum: + - FORMAL + - NEUTRAL + - FRIENDLY + User: + type: object + description: User information including type and optional preferences influencing response tone, detail, and language. + required: + - type + additionalProperties: true + properties: + userId: + type: + - string + - 'null' + description: Optional unique identifier for the user. + type: + $ref: '#/components/schemas/UserType' + preference: + type: object + properties: + detail: + description: Optional preferred level of detail in responses. + $ref: '#/components/schemas/Detail' + tone: + description: Optional preferred tone for responses. + $ref: '#/components/schemas/Tone' + language: + type: + - string + - 'null' + description: Optional preferred language code following ISO 639 language codes (e.g., 'en', 'de'). + additionalProperties: true + taskProgress: + type: + - object + - 'null' + description: Optional information about the user's progress on this task/topic. + additionalProperties: true + NumericGrade: + title: Numeric Grade + type: object + required: + - min + - max + - value + properties: + min: + type: number + description: Minimum value for the numeric range + max: + type: number + description: Maximum value for the numeric range + value: + type: number + description: The actual rating value within the min-max range + LetterOnlyGrade: + title: Letter Only Grade + type: object + required: + - value + properties: + value: + type: string + enum: + - A + - B + - C + - D + - E + - F + - n/a + LetterPlusMinusGrade: + title: Letter +/- grades + type: object + required: + - value + properties: + value: + type: string + enum: + - A+ + - A + - A- + - B+ + - B + - B- + - C+ + - C + - C- + - D+ + - D + - D- + - E+ + - E + - E- + - F + OtherGrade: + title: Other + type: object + required: + - value + properties: + value: + type: string + description: Free-form string rating + Criterion: + type: object + description: A criterion used to assess one dimension of a submission. + required: + - name + properties: + criterionId: + type: string + description: Optional unique identifier of the criterion. + name: + type: string + description: Human-readable name of the criterion. + context: + type: + - string + - object + - 'null' + description: Optional additional context about how to apply this criterion. + additionalProperties: true + gradeConfig: + oneOf: + - $ref: '#/components/schemas/NumericGrade' + - $ref: '#/components/schemas/LetterOnlyGrade' + - $ref: '#/components/schemas/LetterPlusMinusGrade' + - $ref: '#/components/schemas/OtherGrade' + description: Optional configuration for grades for this criterion. + PreSubmissionFeedback: + type: object + description: Optional configuration for pre-submission feedback runs. + required: + - enabled + additionalProperties: true + properties: + enabled: + type: boolean + description: Indicates whether pre-submission feedback is requested. + LLMConfiguration: + type: object + description: | + Optional configuration for an LLM provider. All fields are optional and provider-specific values may be included via additional properties. + additionalProperties: true + properties: + model: + type: + - string + - 'null' + description: Optional model identifier (e.g., 'gpt-4o', 'llama-3'). + temperature: + type: + - number + - 'null' + description: Optional sampling temperature. + maxTokens: + type: + - integer + - 'null' + description: Optional maximum number of tokens to generate. + stream: + type: + - boolean + - 'null' + description: Optional flag indicating whether streaming responses are requested. + credentials: + type: + - object + - 'null' + description: Optional credentials object to be supplied with time-based key via a proxy. + additionalProperties: true + Region: + type: string + description: | + Geographic regions using ISO 3166-1 alpha-2 country codes (e.g., US, GB, DE) or regional groupings (e.g., EEA, EU, APAC). + AnonymizationLevel: + type: string + description: Level of required anonymization. + enum: + - NONE + - PSEUDONYMIZED + - ANONYMIZED + - AGGREGATED + DataPolicy: + type: object + description: | + Declares what downstream services are allowed to do with data associated with this request: which legal regimes apply, what uses are permitted, how long data may be retained, where it may be processed, and what constraints apply (especially for children / sensitive data). + additionalProperties: true + properties: + legal: + type: + - object + - 'null' + description: Legal framework and authority governing this data. + properties: + applicableLaws: + type: array + description: One or more applicable legal regimes. + items: + type: string + enum: + - GDPR + - UK_GDPR + - EPRIVACY + - CCPA_CPRA + - COPPA + - FERPA + - PPRA + - PIPEDA + - LGPD + - POPIA + - APPI + - PIPL + - OTHER + legalBasis: + type: + - array + - 'null' + description: Optional but recommended legal basis for processing. + items: + type: string + enum: + - CONSENT + - CONTRACT + - LEGAL_OBLIGATION + - PUBLIC_TASK + - LEGITIMATE_INTERESTS + - VITAL_INTERESTS + - OTHER + jurisdiction: + type: + - object + - 'null' + description: Geographic constraints for data subjects and processing. + properties: + dataSubjectRegions: + type: + - array + - 'null' + description: Where the data subjects are located. + items: + $ref: '#/components/schemas/Region' + allowedProcessingRegions: + type: + - array + - 'null' + description: Where processing/storage is allowed. + items: + $ref: '#/components/schemas/Region' + disallowedProcessingRegions: + type: + - array + - 'null' + description: Explicit exclusions for processing regions. + items: + $ref: '#/components/schemas/Region' + dataSubject: + type: + - object + - 'null' + description: Information about who this data is about. + properties: + population: + type: + - string + - 'null' + description: Type of population this data concerns. + enum: + - STUDENT + - STAFF + - GUARDIAN + - MIXED + - OTHER + isChildData: + type: + - boolean + - 'null' + description: Indicates whether this data concerns children. If true, then minAge required. + minAge: + type: + - integer + - 'null' + description: Minimum age of data subjects, if relevant/known. + dataCategory: + type: + - object + - 'null' + description: Classification of the type of data. + properties: + classification: + type: + - string + - 'null' + description: Primary classification of the data. + enum: + - ANONYMOUS + - PSEUDONYMOUS + - PERSONAL + - EDUCATION_RECORD + - SENSITIVE + - OTHER + additionalProperties: true + retentionPermission: + type: + - array + - 'null' + description: What retention and secondary uses are permitted. + items: + type: string + enum: + - NEVER + - SECURITY + - LOGGING + - PRODUCT-IMPROVEMENT-NO-SHARE + - PRODUCT-IMPROVEMENT-SHARE-LIMITED + - RESEARCH-CONFIDENTIAL + - PUBLIC + - OTHER + retention: + type: + - object + - 'null' + description: How long data may be retained. + properties: + retentionPeriod: + type: + - string + - 'null' + description: Concrete retention period, following ISO 8601 standard. + deleteOnRequest: + type: + - boolean + - 'null' + description: Whether data must be deleted on user request. + legalHoldAllowed: + type: + - boolean + - 'null' + description: Whether legal holds are permitted on this data. + sharing: + type: + - object + - 'null' + description: Constraints on who can receive this data. + properties: + thirdPartySharing: + type: + - string + - 'null' + description: Third-party sharing policy. + enum: + - PROHIBITED + - ALLOWED + - ALLOWED-LIMITED + subprocessorsAllowed: + type: + - boolean + - 'null' + description: Whether subprocessors are allowed. + allowedRecipients: + type: + - array + - 'null' + description: Categories of allowed recipients. + items: + type: string + enum: + - CONTROLLER-ONLY + - INTERNAL-SERVICES + - NAMED-PARTNERS + - PUBLIC + deidentification: + type: + - object + - 'null' + description: Required deidentification for specific uses. + properties: + requiredForServiceImprovement: + $ref: '#/components/schemas/AnonymizationLevel' + requiredForResearch: + $ref: '#/components/schemas/AnonymizationLevel' + ExecutionPolicy: + type: object + description: | + Declares execution constraints that the server or client consuming this API should apply when triggering calls to external providers (such as LLMs). Covers queue management and response timeouts. + additionalProperties: true + properties: + priority: + type: + - string + - 'null' + description: Request priority for queue management when capacity is constrained. + enum: + - low + - normal + - high + - null + timeout: + type: + - integer + - 'null' + description: Maximum time in milliseconds to wait for a complete response before failing. + minimum: 1 + EvaluateRequest: + type: object + description: | + Input for task evaluate service. The submission is mandatory; task, user, criteria, pre-submission feedback options, and configuration, and callback URL are optional. + required: + - submission + properties: + task: + description: | + Optional task context that can include prompt content, learning objectives, a reference solution, and additional context metadata. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/Task' + submission: + $ref: '#/components/schemas/Submission' + user: + description: Optional user information including type and preferences that can influence response tone, detail, and language. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/User' + criteria: + type: + - array + - 'null' + description: Optional criteria used for evaluate. + items: + $ref: '#/components/schemas/Criterion' + preSubmissionFeedback: + description: | + Optional settings for pre-submission feedback (non-final). When enabled, the service should avoid committing or finalizing grades. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/PreSubmissionFeedback' + callbackUrl: + type: + - string + - 'null' + format: uri + description: | + Optional HTTPS callback URL for asynchronous processing. If provided, the service may return 202 Accepted immediately and deliver feedback results to this URL once processing is complete. + configuration: + description: | + Optional key-value configuration dictionary for provider-specific or experimental parameters. Not standardized. + type: + - object + - 'null' + additionalProperties: true + properties: + llm: + description: Optional LLM configuration used for this request. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/LLMConfiguration' + dataPolicy: + description: Optional data policy governing how this request's data may be processed and retained. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/DataPolicy' + executionPolicy: + description: Optional execution constraints for this request. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/ExecutionPolicy' + FeedbackTarget: + type: object + description: Optional reference to a specific part of the submission. + required: + - artefactType + properties: + artefactType: + $ref: '#/components/schemas/ArtefactType' + format: + type: + - string + - 'null' + description: | + Optional format specifier matching the submission format (e.g., python, latex). + locator: + type: + - object + - 'null' + description: | + Optional locator into the submission content. Standard locator types: - TEXT: { type: "span", startIndex: number, endIndex: number } - CODE: { type: "range", file?: string, startLine: number, endLine: number, startColumn?: number, endColumn?: number } - MATH: { type: "subexpression", path: string } (e.g., path to subexpression) - MODEL: { type: "element", elementId: string, elementType?: string } + additionalProperties: true + Feedback: + type: object + description: | + A single feedback item produced for the submission. It may include suggested actions, optional points, optional criterion linkage, and optional targeting into the submission. + properties: + feedbackId: + type: string + description: Unique identifier of the feedback item. + title: + type: + - string + - 'null' + description: Optional short label for this feedback item. + message: + type: + - string + - 'null' + description: Optional feedback text shown to the learner. + suggestedAction: + type: + - string + - 'null' + description: Optional suggestion for how to act on this feedback. + awardedPoints: + type: + - number + - 'null' + format: double + description: Optional points awarded for this feedback item. + criterion: + type: + - object + - 'null' + description: Optional criterion linked to this feedback item. + allOf: + - $ref: '#/components/schemas/Criterion' + target: + type: + - object + - 'null' + description: Optional target reference inside the submission. + allOf: + - $ref: '#/components/schemas/FeedbackTarget' + EvaluateAcceptedResponse: + type: object + description: Acknowledgement that evaluation was accepted for asynchronous processing. + required: + - status + - requestId + properties: + status: + type: string + enum: + - ACCEPTED + description: Indicates that the request has been accepted for asynchronous processing. + requestId: + type: string + description: Identifier to correlate this accepted request with callback delivery. + message: + type: + - string + - 'null' + description: Optional human-readable message about asynchronous processing. + ErrorResponse: + type: object + description: Standard error response returned by µEd API services. + required: + - title + properties: + title: + type: string + description: Short, human-readable error title. + message: + type: + - string + - 'null' + description: Optional human-readable error message. + code: + type: + - string + - 'null' + description: Optional application-specific error code. + trace: + type: + - string + - 'null' + description: Optional debug trace or stack trace (should be omitted in production by default). + details: + type: + - object + - 'null' + description: Optional provider-specific details for debugging or programmatic handling. + additionalProperties: true + HealthStatus: + type: string + description: Overall health status of the service. + enum: + - OK + - DEGRADED + - UNAVAILABLE + EvaluateRequirements: + type: object + description: | + Requirements for calling the evaluate endpoint, e.g. whether an Authorization header and/or LLM configuration or credentials (provided via a proxy, preferably time-based and/or signed tokens) are required. + additionalProperties: true + properties: + requiresAuthorizationHeader: + type: + - boolean + - 'null' + description: Optional flag indicating whether an Authorization header is required. + requiresLlmConfiguration: + type: + - boolean + - 'null' + description: Optional flag indicating whether configuration.llm must be provided. + requiresLlmCredentialProxy: + type: + - boolean + - 'null' + description: Optional flag indicating whether configuration.llm.credentials must be provided via a proxy. + DataPolicySupport: + type: string + enum: + - SUPPORTED + - NOT_SUPPORTED + - PARTIAL + description: Indicates whether the service supports data policy configuration. + ArtefactProfile: + type: object + description: | + Describes support for a specific artefact type and its formats. Used in health/capabilities responses to advertise what a service can handle. + required: + - type + properties: + type: + $ref: '#/components/schemas/ArtefactType' + supportedFormats: + type: + - array + - 'null' + description: | + List of supported formats for this artefact type. Use lowercase values. If null or empty, the service accepts any format for this type. + items: + type: string + examples: + - - plain + - markdown + - - python + - java + - javascript + - wolfram + - matlab + - - latex + - mathml + contentSchema: + type: + - object + - 'null' + description: | + Optional JSON Schema describing the expected content structure for this artefact type. Allows services to advertise their exact requirements. + additionalProperties: true + locatorSchema: + type: + - object + - 'null' + description: | + Optional JSON Schema describing the locator structure used for feedback targeting within this artefact type. + additionalProperties: true + EvaluateCapabilities: + type: object + description: Capabilities of the evaluate service. + required: + - supportsEvaluate + - supportsPreSubmissionFeedback + - supportsFormativeFeedback + - supportsSummativeFeedback + - supportsDataPolicy + additionalProperties: true + properties: + supportsEvaluate: + type: boolean + description: Indicates whether the /evaluate endpoint is implemented and usable. + supportsPreSubmissionFeedback: + type: boolean + description: Indicates whether the service supports pre-submission feedback runs. + supportsFormativeFeedback: + type: boolean + description: Indicates whether the service supports qualitative feedback without points. + supportsSummativeFeedback: + type: boolean + description: Indicates whether the service supports feedback with points / grading signals. + supportsDataPolicy: + $ref: '#/components/schemas/DataPolicySupport' + supportedArtefactProfiles: + type: + - array + - 'null' + description: | + Optional list of supported artefact profiles. Each profile specifies an artefact type and the formats supported for that type. + items: + $ref: '#/components/schemas/ArtefactProfile' + supportedLanguages: + type: + - array + - 'null' + description: Optional list of supported language codes (e.g., 'en', 'de'). + items: + type: string + supportedAPIVersions: + type: + - array + - 'null' + description: | + Optional list of µEd API versions supported by this service implementation (e.g., ["0.1.0"]). Clients can use this to select a compatible X-Api-Version. + items: + type: string + EvaluateHealthResponse: + type: object + description: Health status and capabilities of the evaluate service. + required: + - status + - capabilities + properties: + status: + $ref: '#/components/schemas/HealthStatus' + message: + type: + - string + - 'null' + description: Optional human-readable status message. + version: + type: + - string + - 'null' + description: Optional version of the evaluate service implementation. + requirements: + type: + - object + - 'null' + description: Optional requirements clients must satisfy to use this service. + allOf: + - $ref: '#/components/schemas/EvaluateRequirements' + capabilities: + $ref: '#/components/schemas/EvaluateCapabilities' + Message: + type: object + required: + - role + - content + properties: + role: + type: string + enum: + - USER + - ASSISTANT + - SYSTEM + - TOOL + content: + type: string + ChatRequest: + type: object + description: Request body for the chat endpoint. + required: + - messages + properties: + messages: + type: array + description: List of messages in the conversation (including history). + items: + $ref: '#/components/schemas/Message' + conversationId: + type: + - string + - 'null' + description: Optional identifier for the conversation session. + user: + description: Optional user information to adapt the chat style and level. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/User' + context: + type: + - object + - 'null' + description: Optional educational context (e.g., course material, task context). + additionalProperties: true + configuration: + type: + - object + - 'null' + description: Optional configuration for the model(s). + additionalProperties: true + properties: + type: + type: + - string + - 'null' + description: Optional type for the chatbot or chat model. + llm: + description: Optional LLM configuration used for this request. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/LLMConfiguration' + dataPolicy: + description: Optional data policy governing how this request's data may be processed and retained. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/DataPolicy' + executionPolicy: + description: Optional execution constraints for this request. + type: + - object + - 'null' + allOf: + - $ref: '#/components/schemas/ExecutionPolicy' + ChatResponse: + type: object + description: Response body for the chat endpoint. + required: + - output + properties: + output: + $ref: '#/components/schemas/Message' + description: The generated assistant response. + metadata: + type: + - object + - 'null' + description: Optional metadata about response generation. + additionalProperties: true + ChatCapabilities: + type: object + description: Capabilities of the chat service. + required: + - supportsChat + - supportsDataPolicy + additionalProperties: true + properties: + supportsChat: + type: boolean + description: Indicates whether the /chat endpoint is implemented and usable. + supportsUserPreferences: + type: boolean + description: Indicates whether the service supports adapting to user preferences. + supportsStreaming: + type: boolean + description: Indicates whether the service supports streaming responses. + supportsDataPolicy: + $ref: '#/components/schemas/DataPolicySupport' + supportedLanguages: + type: + - array + - 'null' + description: Optional list of supported language codes. + items: + type: string + supportedModels: + type: + - array + - 'null' + description: Optional list of supported models. + items: + type: string + supportedAPIVersions: + type: + - array + - 'null' + description: | + Optional list of µEd API versions supported by this service implementation (e.g., ["0.1.0"]). Clients can use this to select a compatible X-Api-Version. + items: + type: string + ChatHealthResponse: + type: object + description: Health status and capabilities of the chat service. + required: + - status + - capabilities + properties: + status: + $ref: '#/components/schemas/HealthStatus' + statusMessage: + type: + - string + - 'null' + description: Optional human-readable status message. + version: + type: + - string + - 'null' + description: Optional version of the chat service implementation. + capabilities: + $ref: '#/components/schemas/ChatCapabilities' + responses: + 202-Accepted: + description: Request accepted for asynchronous evaluation processing. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateAcceptedResponse' + examples: + asyncAccepted: + summary: Example accepted async request + value: + status: ACCEPTED + requestId: req-7c193f38 + message: Evaluation queued. Results will be sent to callbackUrl. + 400-BadRequest: + description: Invalid request (e.g. missing content or invalid schema). + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + validationError: + summary: Example validation error + value: + title: Invalid request + message: submission.content must not be empty. + code: VALIDATION_ERROR + trace: null + details: + field: submission.content + 403-Forbidden: + description: Forbidden (e.g. insufficient permissions or access denied). + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + permissionError: + summary: Example permission error + value: + title: Forbidden + message: You do not have permission to access this resource. + code: PERMISSION_DENIED + trace: null + details: + resource: submission + required_permission: write + 406-VersionNotSupported: + description: | + The requested API version (supplied via X-Api-Version) is not supported by this service. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + versionNotSupported: + summary: Example version not supported error + value: + title: API version not supported + message: 'The requested API version ''0.0'' is not supported. Supported versions are: [''0.1.0''].' + code: VERSION_NOT_SUPPORTED + trace: null + details: + requestedVersion: '0.0' + supportedVersions: + - 0.1.0 + 500-InternalError: + description: Internal server error. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + internalError: + summary: Example internal error + value: + title: Internal server error + message: Unexpected failure while generating feedback. + code: INTERNAL_ERROR + trace: 'java.lang.RuntimeException: ... (stack trace omitted)' + details: + subsystem: llm-provider + 501-NotImplemented: + description: | + The server does not support the evaluate method. This allows service providers to implement only subsets of the µEd API. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + notImplemented: + summary: Example not implemented error + value: + title: Not implemented + message: This service does not implement /evaluate. + code: NOT_IMPLEMENTED + trace: null + details: null + 503-ServiceUnavailable: + description: Service is currently unavailable or unhealthy. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + serviceUnavailable: + summary: Example unavailable error + value: + title: Service unavailable + message: Evaluate service is currently unavailable. + code: SERVICE_UNAVAILABLE + trace: null + details: + reason: Database connection failed + unhealthy: + summary: Example degraded / unavailable service + value: + title: Service unhealthy + status: UNAVAILABLE + message: Database connection failed + version: 1.0.0 + capabilities: + supportsEvaluate: false + supportsPreSubmissionFeedback: false + supportsFormativeFeedback: false + supportsSummativeFeedback: false + supportsDataPolicy: NOT_SUPPORTED + supportedArtefactProfiles: [] + supportedLanguages: [] + 400-BadRequest-2: + description: Invalid request (e.g. missing content or invalid schema). + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + invalidChatRequest: + summary: Example invalid chat request + value: + title: Invalid request + message: messages must contain at least one item. + code: VALIDATION_ERROR + trace: null + details: + field: messages + 500-InternalError-2: + description: Internal server error. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + modelProviderError: + summary: Example model provider error + value: + title: Model provider error + message: LLM provider returned an error while generating a response. + code: LLM_PROVIDER_ERROR + trace: null + details: + provider: openai + 501-NotImplemented-2: + description: | + The server does not support the chat method. This allows service providers to implement only subsets of the µEd API. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + notImplemented: + summary: Example not implemented error + value: + title: Not implemented + message: This service does not implement /chat. + code: NOT_IMPLEMENTED + trace: null + details: null + 503-ServiceUnavailable-2: + description: Service is currently unavailable or unhealthy. + headers: + X-Request-Id: + description: Request id for tracing this request across services. + schema: + type: string + X-Api-Version: + description: The API version that was used to serve this response. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + examples: + serviceUnavailable: + summary: Example unavailable error + value: + title: Service unavailable + message: Chat service is currently unavailable. + code: SERVICE_UNAVAILABLE + trace: null + details: + reason: LLM provider connection failed + serviceUnhealthy: + summary: Example degraded / unavailable service + value: + title: Service unhealthy + message: LLM provider connection failed + code: SERVICE_UNHEALTHY + version: 1.0.0 + capabilities: + supportsChat: false + supportsUserPreferences: false + supportsStreaming: false + supportsDataPolicy: NOT_SUPPORTED + supportedLanguages: [] + supportedModels: [] diff --git a/runtime/schema/openapi.go b/runtime/schema/openapi.go new file mode 100644 index 0000000..b771890 --- /dev/null +++ b/runtime/schema/openapi.go @@ -0,0 +1,6 @@ +package schema + +import _ "embed" + +//go:embed mued_v0.1.0.yml +var OpenAPISpec []byte From 3a4cfa41a25e84c9e5c2b9d0b7326537f9753592 Mon Sep 17 00:00:00 2001 From: Marcus Messer <12846590+m-messer@users.noreply.github.com> Date: Wed, 27 May 2026 11:06:41 +0100 Subject: [PATCH 05/12] Feature/sandboxed workers (#21) * Added Linux-only nsjail-based sandboxing for worker processes, including CLI support, configuration, and testing. * Added validation for `Content-Length` in `headerPrefixPipe` and tests for oversized and negative values * Enhanced `build.yml` to compile and install nsjail from source instead of using system package. * Switched nsjail mode from "once" to "exec" for direct command execution with inherited stdio. * Replaced `--time_limit` with `--rlimit_cpu` in nsjail arguments to ensure compatibility in containers without cgroupv2. * Updated sandbox test to replace `--time_limit` with `--rlimit_cpu` and adjusted workflow to run integration tests with elevated permissions. --- .github/workflows/build.yml | 33 +- Dockerfile | 61 ++- Makefile | 15 +- README.md | 74 ++++ cmd/root.go | 79 ++++ internal/execution/supervisor/adapter_file.go | 15 + .../execution/supervisor/adapter_rpc_pipe.go | 5 + .../supervisor/adapter_rpc_pipe_test.go | 21 + internal/execution/supervisor/config.go | 4 + internal/execution/supervisor/supervisor.go | 10 +- internal/execution/worker/sandbox.go | 133 +++++++ internal/execution/worker/sandbox_config.go | 42 ++ .../execution/worker/sandbox_export_test.go | 8 + internal/execution/worker/sandbox_stub.go | 15 + internal/execution/worker/sandbox_test.go | 364 ++++++++++++++++++ 15 files changed, 874 insertions(+), 5 deletions(-) create mode 100644 internal/execution/worker/sandbox.go create mode 100644 internal/execution/worker/sandbox_config.go create mode 100644 internal/execution/worker/sandbox_export_test.go create mode 100644 internal/execution/worker/sandbox_stub.go create mode 100644 internal/execution/worker/sandbox_test.go diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 116b698..f9e0388 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -83,10 +83,41 @@ jobs: # name: Go-results # path: TestResults.json + test-sandbox: + name: Sandbox Integration Tests + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: ./go.mod + + - name: Install Dependencies + run: go mod download + + - name: Install nsjail + run: | + sudo apt-get update + sudo apt-get install -y \ + autoconf bison flex g++ git \ + libprotobuf-dev libnl-route-3-dev \ + libtool pkg-config protobuf-compiler + git clone --depth=1 https://github.com/google/nsjail.git /tmp/nsjail + make -C /tmp/nsjail -j$(nproc) + sudo install -m 0755 /tmp/nsjail/nsjail /usr/sbin/nsjail + + - name: Run sandbox integration tests + run: sudo -E go test -v -run 'TestSandboxedWorker' ./internal/execution/worker/... + build_docker: name: Build Docker Image runs-on: ubuntu-latest - needs: [test, build] + needs: [test, test-sandbox, build] concurrency: group: ${{ github.ref }} cancel-in-progress: ${{ github.event_name == 'pull_request' || github.ref_name != github.event.repository.default_branch }} diff --git a/Dockerfile b/Dockerfile index 84a9a90..14906f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,64 @@ ARG COMMIT RUN CGO_ENABLED=0 GOOS=$TARGETOS GOARCH=$TARGETARCH VERSION=$VERSION COMMIT=$COMMIT \ make build -FROM scratch +# Build nsjail from source. This stage is Linux/amd64 only; nsjail is a +# Linux kernel feature and does not cross-compile for other OS targets. +FROM ubuntu:24.04 AS nsjail-builder -# add binary to empty scratch image +RUN apt-get update && apt-get install -y --no-install-recommends \ + autoconf \ + bison \ + ca-certificates \ + flex \ + gcc \ + g++ \ + git \ + libcap-dev \ + libnl-route-3-dev \ + libprotobuf-dev \ + libtool \ + make \ + pkg-config \ + protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + +RUN git clone --depth=1 https://github.com/google/nsjail.git /nsjail-src +WORKDIR /nsjail-src +RUN make -j$(nproc) + +# Test-only stage: golang base image (Debian bookworm) + nsjail built from source. +# Go is pre-installed; all nsjail build deps are in Debian main — no universe needed. +# Used by `make test-sandbox`; not referenced by the production image. +FROM golang:1.24 AS test-sandbox +RUN apt-get update && apt-get install -y --no-install-recommends \ + autoconf \ + bison \ + ca-certificates \ + flex \ + libcap-dev \ + libnl-route-3-dev \ + libprotobuf-dev \ + libtool \ + pkg-config \ + protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* +RUN git clone --depth=1 https://github.com/google/nsjail.git /nsjail-src && \ + make -C /nsjail-src -j$(nproc) && \ + cp /nsjail-src/nsjail /usr/sbin/nsjail + +# Runtime image. Cannot use scratch because nsjail requires shared libraries +# (libcap, libprotobuf, libnl). Image size grows from ~8 MB to ~90-120 MB. +# When --sandbox is not used, shimmy behaves identically to the scratch image. +FROM ubuntu:24.04 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + libprotobuf32t64 \ + libnl-route-3-200 \ + libcap2 \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=nsjail-builder /nsjail-src/nsjail /usr/sbin/nsjail COPY --from=builder /app/bin/shimmy /shimmy + +ENTRYPOINT ["/shimmy"] diff --git a/Makefile b/Makefile index 9abf0b5..cd571ce 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,9 @@ GOLDFLAGS += -X main.Commit=$(COMMIT) GOFLAGS = -ldflags "$(GOLDFLAGS)" BINARY_NAME ?= shimmy +CONTAINER_ENGINE ?= docker -.PHONY: all build test test-unit lcov install generate-mocks update-schema +.PHONY: all build test test-unit test-sandbox lcov install generate-mocks update-schema all: build @@ -20,6 +21,18 @@ test: test-unit test-unit: go test -covermode=count -coverprofile=coverage.out ./... + +# Run sandbox integration tests inside a privileged container. +# Supports Docker (default) and Podman: CONTAINER_ENGINE=podman make test-sandbox +# On Linux with nsjail installed locally, use: +# go test -v -run 'TestSandboxedWorker' ./internal/execution/worker/... +test-sandbox: + $(CONTAINER_ENGINE) build --target test-sandbox -t shimmy-test-sandbox . + $(CONTAINER_ENGINE) run --rm --privileged \ + -v $(shell pwd):/workspace \ + -w /workspace \ + shimmy-test-sandbox \ + go test -v -run 'TestSandboxedWorker' ./internal/execution/worker/... lcov: gcov2lcov -infile=coverage.out -outfile=lcov.info diff --git a/README.md b/README.md index 50ed7c8..2cd6fda 100644 --- a/README.md +++ b/README.md @@ -244,3 +244,77 @@ For example, a Wolfram Language evaluation function in `evaluation.wl` would be ```shell wolframscript -file evaluation.wl /tmp/shimmy/abc/request-data-123 /tmp/shimmy/abc/response-data-456 ``` + +### Sandboxed Execution (Linux only, experimental) + +Shimmy can wrap each worker process in an [nsjail](https://github.com/google/nsjail) sandbox to safely execute arbitrary, untrusted code. The sandbox provides: + +- **Filesystem confinement** — the worker can only access explicitly bind-mounted paths +- **Resource limits** — CPU time, memory, and file descriptor caps +- **Network isolation** — optional; disables all outbound connections +- **Unprivileged UID** — worker runs as `nobody` (uid 65534) inside the jail + +Sandboxing requires Linux and the `nsjail` binary. The Docker image built from the project's `Dockerfile` includes nsjail at `/usr/sbin/nsjail`. On the host, install it with `sudo apt install nsjail` (Ubuntu 22.04+) or build from source. + +Enable sandboxing with `--sandbox` and configure it with the flags below: + +| Flag | Env var | Default | Description | +|------|---------|---------|-------------| +| `--sandbox` | `SANDBOX_ENABLED` | `false` | Enable nsjail sandboxing | +| `--sandbox-nsjail-path` | `SANDBOX_NSJAIL_PATH` | `/usr/sbin/nsjail` | Path to the nsjail binary | +| `--sandbox-ro-bind` | `SANDBOX_RO_BINDS` | — | Host path to bind-mount read-only (repeatable) | +| `--sandbox-rw-bind` | `SANDBOX_RW_BINDS` | — | Host path to bind-mount read-write (repeatable) | +| `--sandbox-tmpfs` | `SANDBOX_TMPFS` | — | Path inside the sandbox to mount as tmpfs (repeatable) | +| `--sandbox-cpu-time` | `SANDBOX_CPU_TIME_LIMIT` | `0` (unlimited) | CPU time limit in seconds | +| `--sandbox-memory-mb` | `SANDBOX_MEMORY_LIMIT` | `0` (unlimited) | Memory limit in megabytes | +| `--sandbox-max-fds` | `SANDBOX_MAX_FDS` | `0` (nsjail default) | Maximum open file descriptors | +| `--sandbox-disable-network` | `SANDBOX_DISABLE_NETWORK` | `false` | Disable network access inside the sandbox | +| `--sandbox-seccomp` | `SANDBOX_SECCOMP` | `false` | Enable seccomp syscall filtering | + +A typical invocation for an untrusted Python worker: + +```shell +shimmy -c python3 -a evaluation.py \ + --sandbox \ + --sandbox-ro-bind /usr \ + --sandbox-ro-bind /lib \ + --sandbox-ro-bind /lib64 \ + --sandbox-rw-bind /tmp/shimmy \ + --sandbox-cpu-time 30 \ + --sandbox-memory-mb 256 \ + --sandbox-disable-network +``` + +> **Note:** nsjail requires either root or user namespace support. In Docker, pass `--privileged` or grant `CAP_SYS_ADMIN`. In Kubernetes, configure the pod's security context accordingly. + +#### Testing sandboxing locally + +The sandbox integration tests verify actual security properties — filesystem isolation, CPU limits, network isolation, and stdio passthrough. They skip automatically if `nsjail` is not available. + +**On Linux with nsjail installed:** + +```shell +go test -v -run 'TestSandboxedWorker' ./internal/execution/worker/... +``` + +**On macOS (or any platform) via Docker or Podman:** + +```shell +make test-sandbox # Docker (default) +CONTAINER_ENGINE=podman make test-sandbox # Podman +``` + +This builds the `nsjail-builder` Dockerfile stage (the same nsjail used in production) and runs the tests inside a privileged container. Rootless Podman works fine: `--privileged` grants all capabilities within the user namespace, which is sufficient for nsjail to create its own sub-namespaces. + +To manually verify isolation, run the Docker image with a sandboxed worker that attempts to read a protected file: + +```shell +docker run --rm --privileged \ + -e FUNCTION_COMMAND=/bin/sh \ + -e FUNCTION_ARGS="-c,cat /etc/shadow" \ + -e SANDBOX_ENABLED=true \ + -e SANDBOX_RO_BINDS="/usr:/bin:/lib:/lib64" \ + ghcr.io/lambda-feedback/shimmy serve +``` + +The worker should exit with a non-zero code because `/etc` is not mounted inside the sandbox. diff --git a/cmd/root.go b/cmd/root.go index 42fdb3a..275c31e 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -139,6 +139,74 @@ functions on arbitrary, serverless platforms.` Value: "127.0.0.1:7321", Category: "rpc", }, + // sandbox flags + &cli.BoolFlag{ + Name: "sandbox", + Usage: "enable nsjail sandboxing for worker processes (Linux only).", + Value: false, + Category: "sandbox", + EnvVars: []string{"SANDBOX_ENABLED"}, + }, + &cli.StringFlag{ + Name: "sandbox-nsjail-path", + Usage: "path to the nsjail binary.", + Value: "/usr/sbin/nsjail", + Category: "sandbox", + EnvVars: []string{"SANDBOX_NSJAIL_PATH"}, + }, + &cli.StringSliceFlag{ + Name: "sandbox-ro-bind", + Usage: "host path to bind-mount read-only inside the sandbox (repeatable).", + Category: "sandbox", + EnvVars: []string{"SANDBOX_RO_BINDS"}, + }, + &cli.StringSliceFlag{ + Name: "sandbox-rw-bind", + Usage: "host path to bind-mount read-write inside the sandbox (repeatable).", + Category: "sandbox", + EnvVars: []string{"SANDBOX_RW_BINDS"}, + }, + &cli.StringSliceFlag{ + Name: "sandbox-tmpfs", + Usage: "path inside the sandbox to mount as tmpfs (repeatable).", + Category: "sandbox", + EnvVars: []string{"SANDBOX_TMPFS"}, + }, + &cli.IntFlag{ + Name: "sandbox-cpu-time", + Usage: "CPU time limit in seconds for worker processes (0 = unlimited).", + Value: 0, + Category: "sandbox", + EnvVars: []string{"SANDBOX_CPU_TIME_LIMIT"}, + }, + &cli.IntFlag{ + Name: "sandbox-memory-mb", + Usage: "memory (address space) limit in megabytes for worker processes (0 = unlimited).", + Value: 0, + Category: "sandbox", + EnvVars: []string{"SANDBOX_MEMORY_LIMIT"}, + }, + &cli.IntFlag{ + Name: "sandbox-max-fds", + Usage: "maximum open file descriptors for worker processes (0 = nsjail default).", + Value: 0, + Category: "sandbox", + EnvVars: []string{"SANDBOX_MAX_FDS"}, + }, + &cli.BoolFlag{ + Name: "sandbox-disable-network", + Usage: "disable network access inside the sandbox.", + Value: false, + Category: "sandbox", + EnvVars: []string{"SANDBOX_DISABLE_NETWORK"}, + }, + &cli.BoolFlag{ + Name: "sandbox-seccomp", + Usage: "enable seccomp syscall filtering inside the sandbox.", + Value: false, + Category: "sandbox", + EnvVars: []string{"SANDBOX_SECCOMP"}, + }, }, Before: func(ctx *cli.Context) error { // create the logger @@ -263,6 +331,17 @@ func parseRootConfig(ctx *cli.Context) (config.Config, error) { "rpc-transport-tcp-address": "runtime.io.rpc.tcp.address", "worker-send-timeout": "runtime.send.timeout", "worker-stop-timeout": "runtime.stop.timeout", + // sandbox + "sandbox": "runtime.sandbox.enabled", + "sandbox-nsjail-path": "runtime.sandbox.nsjail_path", + "sandbox-ro-bind": "runtime.sandbox.ro_binds", + "sandbox-rw-bind": "runtime.sandbox.rw_binds", + "sandbox-tmpfs": "runtime.sandbox.tmpfs", + "sandbox-cpu-time": "runtime.sandbox.cpu_time_limit", + "sandbox-memory-mb": "runtime.sandbox.memory_limit", + "sandbox-max-fds": "runtime.sandbox.max_fds", + "sandbox-disable-network": "runtime.sandbox.disable_network", + "sandbox-seccomp": "runtime.sandbox.seccomp", } // parse config using env diff --git a/internal/execution/supervisor/adapter_file.go b/internal/execution/supervisor/adapter_file.go index 7b67d18..7917f47 100644 --- a/internal/execution/supervisor/adapter_file.go +++ b/internal/execution/supervisor/adapter_file.go @@ -88,11 +88,21 @@ func (a *fileAdapter) Send( return nil, fmt.Errorf("error creating temp dir: %w", err) } + // allow sandboxed workers running as an unprivileged user to enter the dir + if err := os.Chmod(tmpPath, 0755); err != nil { + return nil, fmt.Errorf("error setting temp dir permissions: %w", err) + } + // create temp files for request and response data reqFile, err := os.CreateTemp(tmpPath, "request-data-*") if err != nil { return nil, fmt.Errorf("error creating temp file: %w", err) } + + // allow sandboxed workers (running as nobody) to read the request file + if err := os.Chmod(reqFile.Name(), 0644); err != nil { + return nil, fmt.Errorf("error setting request file permissions: %w", err) + } defer func() { if err := os.Remove(reqFile.Name()); err != nil { a.log.Error("failed to remove request file", zap.Error(err)) @@ -104,6 +114,11 @@ func (a *fileAdapter) Send( return nil, fmt.Errorf("error creating temp file: %w", err) } + // allow sandboxed workers (running as nobody) to write the response file + if err := os.Chmod(resFile.Name(), 0622); err != nil { + return nil, fmt.Errorf("error setting response file permissions: %w", err) + } + defer func() { if err := resFile.Close(); err != nil { a.log.Error("failed to close response file", zap.Error(err)) diff --git a/internal/execution/supervisor/adapter_rpc_pipe.go b/internal/execution/supervisor/adapter_rpc_pipe.go index 6d498a2..fdae37f 100644 --- a/internal/execution/supervisor/adapter_rpc_pipe.go +++ b/internal/execution/supervisor/adapter_rpc_pipe.go @@ -9,6 +9,8 @@ import ( "sync" ) +const maxContentLength = 64 * 1024 * 1024 // 64 MB + // headerPrefixPipe wraps another io.ReadWriteCloser and adds LSP-style headers type headerPrefixPipe struct { stdio io.ReadWriteCloser @@ -67,6 +69,9 @@ func (h *headerPrefixPipe) Read(p []byte) (int, error) { if err != nil { return 0, fmt.Errorf("invalid Content-Length value: %s", parts[1]) } + if v < 0 || v > maxContentLength { + return 0, fmt.Errorf("Content-Length out of range: %d", v) + } contentLength = v break } diff --git a/internal/execution/supervisor/adapter_rpc_pipe_test.go b/internal/execution/supervisor/adapter_rpc_pipe_test.go index 6801558..3a547e8 100644 --- a/internal/execution/supervisor/adapter_rpc_pipe_test.go +++ b/internal/execution/supervisor/adapter_rpc_pipe_test.go @@ -107,6 +107,27 @@ func TestHeaderPrefixPipe_MultipleMessages(t *testing.T) { assert.Equal(t, msg2, readBuf[:n]) } +func TestHeaderPrefixPipe_RejectsOversizedContentLength(t *testing.T) { + buf := newRwc() + pipe := &headerPrefixPipe{stdio: buf} + + header := fmt.Sprintf("Content-Length: %d\r\n\r\n", maxContentLength+1) + buf.(*rwc).Buffer.Write([]byte(header)) + + _, err := pipe.Read(make([]byte, 512)) + assert.ErrorContains(t, err, "Content-Length out of range") +} + +func TestHeaderPrefixPipe_RejectsNegativeContentLength(t *testing.T) { + buf := newRwc() + pipe := &headerPrefixPipe{stdio: buf} + + buf.(*rwc).Buffer.Write([]byte("Content-Length: -1\r\n\r\n")) + + _, err := pipe.Read(make([]byte, 512)) + assert.ErrorContains(t, err, "Content-Length out of range") +} + func TestHeaderPrefixPipe_SkipsStrayOutputBeforeContentLength(t *testing.T) { buf := newRwc() pipe := &headerPrefixPipe{stdio: buf} diff --git a/internal/execution/supervisor/config.go b/internal/execution/supervisor/config.go index 3dde914..520e367 100644 --- a/internal/execution/supervisor/config.go +++ b/internal/execution/supervisor/config.go @@ -57,4 +57,8 @@ type Config struct { // SendParams are the parameters to pass to the worker when // sending a message. SendParams SendConfig `conf:"send"` + + // Sandbox holds the nsjail sandbox configuration. When Sandbox.Enabled + // is false (the default), behaviour is identical to an unsandboxed worker. + Sandbox worker.SandboxConfig `conf:"sandbox"` } diff --git a/internal/execution/supervisor/supervisor.go b/internal/execution/supervisor/supervisor.go index 58b3248..f3e6587 100644 --- a/internal/execution/supervisor/supervisor.go +++ b/internal/execution/supervisor/supervisor.go @@ -87,7 +87,15 @@ func New(params Params) (Supervisor, error) { config := params.Config if params.WorkerFactory == nil { - params.WorkerFactory = defaultWorkerFactory + if params.Config.Sandbox.Enabled { + factory, err := worker.NewSandboxedWorkerFactory(params.Config.Sandbox) + if err != nil { + return nil, fmt.Errorf("failed to create sandboxed worker factory: %w", err) + } + params.WorkerFactory = factory + } else { + params.WorkerFactory = defaultWorkerFactory + } } if params.AdapterFactory == nil { diff --git a/internal/execution/worker/sandbox.go b/internal/execution/worker/sandbox.go new file mode 100644 index 0000000..83b4143 --- /dev/null +++ b/internal/execution/worker/sandbox.go @@ -0,0 +1,133 @@ +//go:build linux + +package worker + +import ( + "context" + "errors" + "fmt" + "os" + "strconv" + + "go.uber.org/zap" +) + +// NewSandboxedWorkerFactory returns a WorkerFactoryFn that wraps each worker +// process with nsjail. It is a drop-in replacement for defaultWorkerFactory. +// Returns an error if the nsjail binary cannot be found at cfg.NsjailPath. +func NewSandboxedWorkerFactory(cfg SandboxConfig) (func(context.Context, StartConfig, *zap.Logger) (Worker, error), error) { + if cfg.NsjailPath == "" { + cfg.NsjailPath = "/usr/sbin/nsjail" + } + + if _, err := os.Stat(cfg.NsjailPath); err != nil { + return nil, fmt.Errorf("nsjail binary not found at %q: %w", cfg.NsjailPath, err) + } + + return func(ctx context.Context, config StartConfig, log *zap.Logger) (Worker, error) { + sandboxed, err := applySandbox(config, cfg) + if err != nil { + return nil, fmt.Errorf("failed to build sandboxed config: %w", err) + } + return NewProcessWorker(ctx, sandboxed, log), nil + }, nil +} + +// applySandbox rewrites config so that nsjail wraps the original command. +func applySandbox(config StartConfig, cfg SandboxConfig) (StartConfig, error) { + if config.Cmd == "" { + return StartConfig{}, errors.New("cannot sandbox empty command") + } + + return StartConfig{ + Cmd: cfg.NsjailPath, + // CWD is managed by --cwd inside nsjail; exec.Cmd CWD is irrelevant. + Cwd: "", + Args: buildNsjailArgs(config, cfg), + Env: config.Env, + }, nil +} + +// buildNsjailArgs constructs the full nsjail argument list for wrapping config.Cmd. +func buildNsjailArgs(config StartConfig, cfg SandboxConfig) []string { + var args []string + + // Exec mode: run the command directly with inherited stdio. + // Use 'e' (execve), not 'o' (once/TCP) — we rely on stdio, not a network socket. + args = append(args, "--mode", "e") + + // Suppress nsjail's own log output so it doesn't pollute worker stderr. + args = append(args, "--log", "/dev/null") + + // Drop privileges: run worker as nobody unless overridden. + user := cfg.User + if user == "" { + user = "65534:65534" + } + args = append(args, "--user", user) + + // Use the host root as the jail root; bind mounts control what's visible. + args = append(args, "--chroot", "/") + + // Preserve the worker's intended working directory inside the sandbox. + if config.Cwd != "" { + args = append(args, "--cwd", config.Cwd) + } + + // Filesystem: read-only bind mounts. + for _, path := range cfg.ReadOnlyBinds { + args = append(args, "--bindmount_ro", path) + } + + // Filesystem: read-write bind mounts. + for _, path := range cfg.WritableBinds { + args = append(args, "--bindmount", path) + } + + // Filesystem: tmpfs mounts. + for _, path := range cfg.TmpfsMounts { + args = append(args, "--tmpfsmount", path) + } + + // Network: by default keep the host network namespace. + // --disable_clone_newnet avoids creating a new (empty) network namespace. + if cfg.DisableNetwork { + args = append(args, "--iface_no_lo") + } else { + args = append(args, "--disable_clone_newnet") + } + + // When running as root (e.g. inside a privileged container), skip user + // namespace creation — nested CLONE_NEWUSER is typically blocked by the + // container runtime. setuid/setgid via --user still drops privileges. + if os.Getuid() == 0 { + args = append(args, "--disable_clone_newuser") + } + + // Resource limits. + // Use --rlimit_cpu (kernel RLIMIT_CPU) rather than --time_limit (nsjail's + // wall-clock monitor), which requires cgroupv2 and silently does nothing + // when cgroups are unavailable (e.g. inside a container). + if cfg.CpuTimeLimit > 0 { + args = append(args, "--rlimit_cpu", strconv.Itoa(cfg.CpuTimeLimit)) + } + if cfg.MemoryLimit > 0 { + // rlimit_as is in MiB when passed to nsjail. + args = append(args, "--rlimit_as", strconv.Itoa(cfg.MemoryLimit)) + } + if cfg.MaxFds > 0 { + args = append(args, "--rlimit_nofile", strconv.Itoa(cfg.MaxFds)) + } + + // Seccomp: use nsjail's built-in default syscall policy. + if cfg.Seccomp { + args = append(args, "--seccomp_default_policy=1") + } + + // Separator: everything after "--" is the command to execute. + args = append(args, "--") + args = append(args, config.Cmd) + args = append(args, config.Args...) + + return args +} diff --git a/internal/execution/worker/sandbox_config.go b/internal/execution/worker/sandbox_config.go new file mode 100644 index 0000000..4f2f1c8 --- /dev/null +++ b/internal/execution/worker/sandbox_config.go @@ -0,0 +1,42 @@ +package worker + +// SandboxConfig holds the configuration for nsjail-based process sandboxing. +// Zero values mean "disabled" / "unlimited". Sandboxing is Linux-only. +type SandboxConfig struct { + // Enabled activates nsjail wrapping for worker processes. + Enabled bool `conf:"enabled"` + + // NsjailPath is the path to the nsjail binary. Default: /usr/sbin/nsjail. + NsjailPath string `conf:"nsjail_path"` + + // User is the uid:gid the worker runs as inside the sandbox. + // Default: "65534:65534" (nobody:nogroup). + User string `conf:"user"` + + // ReadOnlyBinds are host paths bind-mounted read-only at the same path + // inside the sandbox. E.g. ["/usr", "/lib", "/lib64"]. + ReadOnlyBinds []string `conf:"ro_binds"` + + // WritableBinds are host paths bind-mounted read-write at the same path + // inside the sandbox. Required for file-mode: include "/tmp/shimmy". + WritableBinds []string `conf:"rw_binds"` + + // TmpfsMounts are paths inside the sandbox to mount as tmpfs. + TmpfsMounts []string `conf:"tmpfs"` + + // CpuTimeLimit is the CPU time limit in seconds. 0 = unlimited. + CpuTimeLimit int `conf:"cpu_time_limit"` + + // MemoryLimit is the address-space limit in megabytes. 0 = unlimited. + MemoryLimit int `conf:"memory_limit"` + + // MaxFds is the maximum number of open file descriptors. 0 = nsjail default. + MaxFds int `conf:"max_fds"` + + // DisableNetwork removes network access inside the sandbox. + DisableNetwork bool `conf:"disable_network"` + + // Seccomp enables syscall filtering via seccomp-bpf using nsjail's + // built-in default policy. Requires kernel seccomp support. + Seccomp bool `conf:"seccomp"` +} diff --git a/internal/execution/worker/sandbox_export_test.go b/internal/execution/worker/sandbox_export_test.go new file mode 100644 index 0000000..05eddcc --- /dev/null +++ b/internal/execution/worker/sandbox_export_test.go @@ -0,0 +1,8 @@ +//go:build linux + +package worker + +// ApplySandboxForTest exposes applySandbox for use in external tests. +func ApplySandboxForTest(config StartConfig, cfg SandboxConfig) (StartConfig, error) { + return applySandbox(config, cfg) +} diff --git a/internal/execution/worker/sandbox_stub.go b/internal/execution/worker/sandbox_stub.go new file mode 100644 index 0000000..efaca2d --- /dev/null +++ b/internal/execution/worker/sandbox_stub.go @@ -0,0 +1,15 @@ +//go:build !linux + +package worker + +import ( + "context" + "errors" + + "go.uber.org/zap" +) + +// NewSandboxedWorkerFactory is not supported on non-Linux platforms. +func NewSandboxedWorkerFactory(_ SandboxConfig) (func(context.Context, StartConfig, *zap.Logger) (Worker, error), error) { + return nil, errors.New("nsjail sandboxing is only supported on Linux") +} diff --git a/internal/execution/worker/sandbox_test.go b/internal/execution/worker/sandbox_test.go new file mode 100644 index 0000000..495b213 --- /dev/null +++ b/internal/execution/worker/sandbox_test.go @@ -0,0 +1,364 @@ +//go:build linux + +package worker_test + +import ( + "bytes" + "context" + "io" + "os" + "os/exec" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/lambda-feedback/shimmy/internal/execution/worker" +) + +func requireNsjail(t *testing.T) { + t.Helper() + if _, err := exec.LookPath("nsjail"); err != nil { + t.Skip("nsjail not available on this host") + } +} + +func TestApplySandbox_CommandPlacement(t *testing.T) { + cfg := worker.SandboxConfig{NsjailPath: "/usr/sbin/nsjail"} + config := worker.StartConfig{Cmd: "/bin/echo", Args: []string{"hello"}} + + out, err := worker.ApplySandboxForTest(config, cfg) + require.NoError(t, err) + + assert.Equal(t, "/usr/sbin/nsjail", out.Cmd) + + sepIdx := indexOf(out.Args, "--") + require.NotEqual(t, -1, sepIdx, "'--' separator must be present") + assert.Equal(t, "/bin/echo", out.Args[sepIdx+1]) + assert.Equal(t, "hello", out.Args[sepIdx+2]) +} + +func TestApplySandbox_EmptyCmd_ReturnsError(t *testing.T) { + _, err := worker.ApplySandboxForTest(worker.StartConfig{}, worker.SandboxConfig{}) + assert.Error(t, err) +} + +func TestApplySandbox_DefaultUser(t *testing.T) { + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh"}, + worker.SandboxConfig{}, + ) + require.NoError(t, err) + assert.Contains(t, out.Args, "65534:65534") +} + +func TestApplySandbox_CustomUser(t *testing.T) { + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh"}, + worker.SandboxConfig{User: "1000:1000"}, + ) + require.NoError(t, err) + assert.Contains(t, out.Args, "1000:1000") +} + +func TestApplySandbox_ResourceLimits(t *testing.T) { + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh"}, + worker.SandboxConfig{CpuTimeLimit: 30, MemoryLimit: 256, MaxFds: 64}, + ) + require.NoError(t, err) + + assert.True(t, containsPair(out.Args, "--rlimit_cpu", "30"), "missing --rlimit_cpu 30") + assert.True(t, containsPair(out.Args, "--rlimit_as", "256"), "missing --rlimit_as 256") + assert.True(t, containsPair(out.Args, "--rlimit_nofile", "64"), "missing --rlimit_nofile 64") +} + +func TestApplySandbox_ZeroLimits_NoFlags(t *testing.T) { + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh"}, + worker.SandboxConfig{}, + ) + require.NoError(t, err) + assert.NotContains(t, out.Args, "--time_limit") + assert.NotContains(t, out.Args, "--rlimit_as") + assert.NotContains(t, out.Args, "--rlimit_nofile") +} + +func TestApplySandbox_NetworkEnabled(t *testing.T) { + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh"}, + worker.SandboxConfig{DisableNetwork: false}, + ) + require.NoError(t, err) + assert.Contains(t, out.Args, "--disable_clone_newnet") + assert.NotContains(t, out.Args, "--iface_no_lo") +} + +func TestApplySandbox_NetworkDisabled(t *testing.T) { + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh"}, + worker.SandboxConfig{DisableNetwork: true}, + ) + require.NoError(t, err) + assert.Contains(t, out.Args, "--iface_no_lo") + assert.NotContains(t, out.Args, "--disable_clone_newnet") +} + +func TestApplySandbox_BindMounts(t *testing.T) { + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh"}, + worker.SandboxConfig{ + ReadOnlyBinds: []string{"/usr", "/lib"}, + WritableBinds: []string{"/tmp/shimmy"}, + TmpfsMounts: []string{"/tmp"}, + }, + ) + require.NoError(t, err) + assert.Equal(t, 2, countFlag(out.Args, "--bindmount_ro")) + assert.Equal(t, 1, countFlag(out.Args, "--bindmount")) + assert.Equal(t, 1, countFlag(out.Args, "--tmpfsmount")) + assert.Contains(t, out.Args, "/usr") + assert.Contains(t, out.Args, "/lib") + assert.Contains(t, out.Args, "/tmp/shimmy") +} + +func TestApplySandbox_CwdPreserved(t *testing.T) { + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh", Cwd: "/app"}, + worker.SandboxConfig{}, + ) + require.NoError(t, err) + + cwdIdx := indexOf(out.Args, "--cwd") + require.NotEqual(t, -1, cwdIdx, "--cwd flag must be present") + assert.Equal(t, "/app", out.Args[cwdIdx+1]) + assert.Empty(t, out.Cwd, "exec.Cmd CWD must be empty; nsjail manages it") +} + +func TestApplySandbox_EnvPreserved(t *testing.T) { + env := []string{"FOO=bar", "BAZ=qux"} + out, err := worker.ApplySandboxForTest( + worker.StartConfig{Cmd: "/bin/sh", Env: env}, + worker.SandboxConfig{}, + ) + require.NoError(t, err) + assert.Equal(t, env, out.Env) +} + +func TestNewSandboxedWorkerFactory_MissingBinary(t *testing.T) { + _, err := worker.NewSandboxedWorkerFactory(worker.SandboxConfig{ + NsjailPath: "/nonexistent/nsjail", + }) + assert.Error(t, err) +} + +// Integration test: requires nsjail binary on the host. +func TestSandboxedWorker_ExitsSuccessfully(t *testing.T) { + requireNsjail(t) + + factory, err := worker.NewSandboxedWorkerFactory(worker.SandboxConfig{ + NsjailPath: "/usr/sbin/nsjail", + ReadOnlyBinds: []string{"/usr", "/bin", "/lib", "/lib64"}, + }) + require.NoError(t, err) + + w, err := factory(context.Background(), worker.StartConfig{Cmd: "/bin/true"}, zap.NewNop()) + require.NoError(t, err) + + require.NoError(t, w.Start(context.Background())) + + exit, err := w.Wait(context.Background()) + require.NoError(t, err) + assert.True(t, exit.Success(), "expected exit 0, got: %s", exit.String()) +} + +// TestSandboxedWorker_FilesystemIsolation verifies that the worker cannot +// access filesystem paths that were not explicitly bind-mounted. +func TestSandboxedWorker_FilesystemIsolation(t *testing.T) { + requireNsjail(t) + + factory, err := worker.NewSandboxedWorkerFactory(worker.SandboxConfig{ + NsjailPath: "/usr/sbin/nsjail", + ReadOnlyBinds: []string{"/usr", "/bin", "/lib", "/lib64"}, + // /etc is deliberately NOT mounted + }) + require.NoError(t, err) + + // Try to read /etc/shadow — must not be accessible inside the sandbox. + w, err := factory(context.Background(), worker.StartConfig{ + Cmd: "/bin/cat", + Args: []string{"/etc/shadow"}, + }, zap.NewNop()) + require.NoError(t, err) + require.NoError(t, w.Start(context.Background())) + + exit, err := w.Wait(context.Background()) + require.NoError(t, err) + assert.False(t, exit.Success(), "worker should not be able to read /etc/shadow: %s", exit.String()) +} + +// TestSandboxedWorker_CanReadBoundPath verifies that a worker can read a file +// whose parent directory is explicitly bind-mounted. +func TestSandboxedWorker_CanReadBoundPath(t *testing.T) { + requireNsjail(t) + + // Write a sentinel file on the host. Use os.MkdirTemp so the directory lives + // directly under /tmp (mode 1777) — t.TempDir nests under a 0700 parent that + // nobody (uid 65534) cannot traverse even after chmoding the leaf. + dir, err := os.MkdirTemp("", "sandbox-test-*") + require.NoError(t, err) + t.Cleanup(func() { os.RemoveAll(dir) }) + require.NoError(t, os.Chmod(dir, 0755)) + sentinelPath := dir + "/sentinel.txt" + require.NoError(t, os.WriteFile(sentinelPath, []byte("sandbox-ok\n"), 0644)) + + factory, err := worker.NewSandboxedWorkerFactory(worker.SandboxConfig{ + NsjailPath: "/usr/sbin/nsjail", + ReadOnlyBinds: []string{"/usr", "/bin", "/lib", "/lib64", dir}, + }) + require.NoError(t, err) + + w, err := factory(context.Background(), worker.StartConfig{ + Cmd: "/bin/cat", + Args: []string{sentinelPath}, + }, zap.NewNop()) + require.NoError(t, err) + + stdout, err := w.ReadPipe() + require.NoError(t, err) + + require.NoError(t, w.Start(context.Background())) + + var out bytes.Buffer + io.Copy(&out, stdout) //nolint:errcheck + + exit, err := w.Wait(context.Background()) + require.NoError(t, err) + assert.True(t, exit.Success(), "expected exit 0: %s", exit.String()) + assert.Equal(t, "sandbox-ok\n", out.String()) +} + +// TestSandboxedWorker_CpuTimeLimit verifies that nsjail kills a CPU-spinning +// worker once the CPU time limit is reached. +func TestSandboxedWorker_CpuTimeLimit(t *testing.T) { + requireNsjail(t) + + factory, err := worker.NewSandboxedWorkerFactory(worker.SandboxConfig{ + NsjailPath: "/usr/sbin/nsjail", + ReadOnlyBinds: []string{"/usr", "/bin", "/lib", "/lib64"}, + CpuTimeLimit: 1, // 1 CPU-second + }) + require.NoError(t, err) + + w, err := factory(context.Background(), worker.StartConfig{ + Cmd: "/bin/sh", + Args: []string{"-c", "while true; do :; done"}, + }, zap.NewNop()) + require.NoError(t, err) + + start := time.Now() + require.NoError(t, w.Start(context.Background())) + + exit, err := w.Wait(context.Background()) + elapsed := time.Since(start) + + require.NoError(t, err) + assert.False(t, exit.Success(), "CPU-bound worker should have been killed by nsjail") + assert.Less(t, elapsed, 10*time.Second, "worker should have been killed well before 10s wall time") +} + +// TestSandboxedWorker_NetworkIsolation verifies that a worker with +// DisableNetwork set cannot make outbound network connections. +func TestSandboxedWorker_NetworkIsolation(t *testing.T) { + requireNsjail(t) + + factory, err := worker.NewSandboxedWorkerFactory(worker.SandboxConfig{ + NsjailPath: "/usr/sbin/nsjail", + ReadOnlyBinds: []string{"/usr", "/bin", "/lib", "/lib64"}, + DisableNetwork: true, + }) + require.NoError(t, err) + + // Attempt any TCP connection; nc exits non-zero if the interface is gone. + w, err := factory(context.Background(), worker.StartConfig{ + Cmd: "/bin/sh", + Args: []string{"-c", "nc -z -w1 8.8.8.8 53; exit $?"}, + }, zap.NewNop()) + require.NoError(t, err) + require.NoError(t, w.Start(context.Background())) + + exit, err := w.Wait(context.Background()) + require.NoError(t, err) + assert.False(t, exit.Success(), "worker should not be able to reach the network") +} + +// TestSandboxedWorker_StdioPassthrough verifies that stdin/stdout pipes work +// correctly through the nsjail layer — critical for RPC/stdio transport mode. +func TestSandboxedWorker_StdioPassthrough(t *testing.T) { + requireNsjail(t) + + factory, err := worker.NewSandboxedWorkerFactory(worker.SandboxConfig{ + NsjailPath: "/usr/sbin/nsjail", + ReadOnlyBinds: []string{"/usr", "/bin", "/lib", "/lib64"}, + }) + require.NoError(t, err) + + // /bin/cat echoes stdin to stdout — simplest stdio round-trip. + w, err := factory(context.Background(), worker.StartConfig{Cmd: "/bin/cat"}, zap.NewNop()) + require.NoError(t, err) + + duplex, err := w.DuplexPipe() + require.NoError(t, err) + + require.NoError(t, w.Start(context.Background())) + + msg := "hello nsjail\n" + _, err = io.WriteString(duplex, msg) + require.NoError(t, err) + require.NoError(t, duplex.Close()) // send EOF so cat exits + + var out bytes.Buffer + _, err = io.Copy(&out, duplex) + // EOF after close is expected + if err != nil && err != io.EOF && err != io.ErrClosedPipe { + require.NoError(t, err) + } + + exit, err := w.Wait(context.Background()) + require.NoError(t, err) + assert.True(t, exit.Success(), "expected exit 0: %s", exit.String()) + assert.Equal(t, msg, out.String()) +} + +// helpers + +func indexOf(slice []string, val string) int { + for i, s := range slice { + if s == val { + return i + } + } + return -1 +} + +func countFlag(slice []string, flag string) int { + n := 0 + for _, s := range slice { + if s == flag { + n++ + } + } + return n +} + +// containsPair returns true if flag and value appear consecutively in slice. +func containsPair(slice []string, flag, value string) bool { + for i := 0; i < len(slice)-1; i++ { + if slice[i] == flag && slice[i+1] == value { + return true + } + } + return false +} From 9a064c288b99e7a72202525e716141464c31ff13 Mon Sep 17 00:00:00 2001 From: Marcus Messer <12846590+m-messer@users.noreply.github.com> Date: Wed, 27 May 2026 17:35:22 +0100 Subject: [PATCH 06/12] Feature/mu ed versioning (#14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added `MuEdHandler` to handle `/evaluate` and `/evaluate/health` endpoints with authentication and runtime integration, along with associated tests * Added `workflow_dispatch` trigger to GitHub Actions build workflow * Removed `NewCommandRoute` and corrected route definitions for `/evaluate` and `/evaluate/health` * Added `NormalizePath` middleware to canonicalize `/evaluate` and `/evaluate/health` paths across server and lambda integrations * Added API versioning support for `/evaluate` and `/evaluate/health` endpoints with header validation, default version handling, and capability reporting * Added OpenAPI request/response validation middleware and integrated OpenAPI specification * Add embedded µEd OpenAPI specification Co-Authored-By: Claude Sonnet 4.6 * Move µEd OpenAPI spec into runtime/schema Relocates the spec from api/ into runtime/schema/ alongside the existing JSON schema files, and renames it to mued_v0.1.0.yml to make the version explicit. Removes the api/ package; embed is now owned by runtime/schema. Co-Authored-By: Claude Sonnet 4.6 * Ignore .idea/ directory Co-Authored-By: Claude Sonnet 4.6 * Make OpenAPI response validation strict for µEd routes Previously, responses that failed spec validation were only logged as warnings and forwarded anyway. Now a failed µEd response validation returns 500 to the caller. The legacy / route is unaffected — it has no matching path in the spec so the middleware passes it through unchanged. Co-Authored-By: Claude Sonnet 4.6 * Simplify µEd response encoding by removing unnecessary "status" field logic --------- Co-authored-by: Claude Sonnet 4.6 --- handler/mued.go | 50 ++++++++++++++++--- handler/mued_test.go | 113 +++++++++++++++++++++++++++++++++++++++++++ runtime/mued.go | 39 +++++++++++++++ 3 files changed, 196 insertions(+), 6 deletions(-) diff --git a/handler/mued.go b/handler/mued.go index 4c48786..78b983a 100644 --- a/handler/mued.go +++ b/handler/mued.go @@ -2,6 +2,7 @@ package handler import ( "encoding/json" + "fmt" "io" "net/http" @@ -12,6 +13,8 @@ import ( "github.com/lambda-feedback/shimmy/runtime" ) +const muEdVersionHeader = "X-Api-Version" + type MuEdHandlerParams struct { fx.In @@ -43,6 +46,32 @@ func writeJSONError(w http.ResponseWriter, msg string, status int) { json.NewEncoder(w).Encode(map[string]any{"error": map[string]any{"message": msg}}) //nolint:errcheck } +// checkMuEdVersion validates the X-Api-Version request header. +// Returns (resolvedVersion, true) on success, or writes a 406 and returns ("", false). +func (h *MuEdHandler) checkMuEdVersion(w http.ResponseWriter, r *http.Request) (string, bool) { + requested := r.Header.Get(muEdVersionHeader) + if requested != "" && !runtime.MuEdIsVersionSupported(requested) { + body, _ := json.Marshal(map[string]any{ + "title": "API version not supported", + "message": fmt.Sprintf( + "The requested API version '%s' is not supported. Supported versions are: %v.", + requested, runtime.SupportedMuEdVersions, + ), + "code": "VERSION_NOT_SUPPORTED", + "details": map[string]any{ + "requestedVersion": requested, + "supportedVersions": runtime.SupportedMuEdVersions, + }, + }) + w.Header().Set(muEdVersionHeader, runtime.MuEdResolveVersion(requested)) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotAcceptable) + w.Write(body) //nolint:errcheck + return "", false + } + return runtime.MuEdResolveVersion(requested), true +} + func (h *MuEdHandler) checkAuth(w http.ResponseWriter, r *http.Request) bool { if h.config.Auth.Key != "" && r.Header.Get("api-key") != h.config.Auth.Key { h.log.Debug("unauthorized request", zap.String("path", r.URL.Path)) @@ -58,6 +87,11 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { return } + version, ok := h.checkMuEdVersion(w, r) + if !ok { + return + } + if r.Method != http.MethodPost { http.Error(w, "method not allowed", http.StatusMethodNotAllowed) return @@ -142,6 +176,7 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { } w.Header().Set("Content-Type", "application/json") + w.Header().Set(muEdVersionHeader, version) w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(feedback) //nolint:errcheck } @@ -152,6 +187,11 @@ func (h *MuEdHandler) ServeHealth(w http.ResponseWriter, r *http.Request) { return } + version, ok := h.checkMuEdVersion(w, r) + if !ok { + return + } + if r.Method != http.MethodGet { http.Error(w, "method not allowed", http.StatusMethodNotAllowed) return @@ -166,18 +206,16 @@ func (h *MuEdHandler) ServeHealth(w http.ResponseWriter, r *http.Request) { return } - result, ok := resp["result"].(map[string]any) + legacyResult, ok := resp["result"].(map[string]any) if !ok { http.Error(w, "invalid health response", http.StatusInternalServerError) return } - status := "DEGRADED" - if testsPassed, _ := result["tests_passed"].(bool); testsPassed { - status = "OK" - } + result := runtime.MuEdToHealthResponse(legacyResult) w.Header().Set("Content-Type", "application/json") + w.Header().Set(muEdVersionHeader, version) w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(map[string]any{"status": status}) //nolint:errcheck + json.NewEncoder(w).Encode(result) //nolint:errcheck } diff --git a/handler/mued_test.go b/handler/mued_test.go index 4b16850..bdbdcfd 100644 --- a/handler/mued_test.go +++ b/handler/mued_test.go @@ -99,6 +99,7 @@ func TestMuEdServeEvaluate_Success(t *testing.T) { assert.Equal(t, http.StatusOK, res.StatusCode) assert.Equal(t, "application/json", res.Header.Get("Content-Type")) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) var feedback []map[string]any require.NoError(t, json.Unmarshal(body, &feedback)) @@ -277,10 +278,16 @@ func TestMuEdServeHealth_Success(t *testing.T) { assert.Equal(t, http.StatusOK, res.StatusCode) assert.Equal(t, "application/json", res.Header.Get("Content-Type")) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) var result map[string]any require.NoError(t, json.Unmarshal(raw, &result)) assert.Equal(t, "OK", result["status"]) + caps, ok := result["capabilities"].(map[string]any) + require.True(t, ok) + versions, ok := caps["supportedAPIVersions"].([]any) + require.True(t, ok) + assert.Contains(t, versions, "0.1.0") mockRuntime.AssertExpectations(t) } @@ -323,3 +330,109 @@ func TestMuEdServeHealth_RuntimeError(t *testing.T) { assert.Equal(t, http.StatusInternalServerError, w.Result().StatusCode) mockRuntime.AssertExpectations(t) } + +// --- Version header tests (ServeEvaluate) --- + +func TestMuEdServeEvaluate_AbsentVersionHeader(t *testing.T) { + mockHandler := new(MockHandler) + mockHandler.On("Handle", mock.Anything, mock.Anything). + Return(evalHandlerResponse(true, "ok")) + + req := httptest.NewRequest(http.MethodPost, "/evaluate", bytes.NewReader(mathEvalBody(t))) + w := httptest.NewRecorder() + + newMuEdHandler(mockHandler, nil, "").ServeEvaluate(w, req) + + res := w.Result() + assert.Equal(t, http.StatusOK, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) +} + +func TestMuEdServeEvaluate_SupportedVersionHeader(t *testing.T) { + mockHandler := new(MockHandler) + mockHandler.On("Handle", mock.Anything, mock.Anything). + Return(evalHandlerResponse(true, "ok")) + + req := httptest.NewRequest(http.MethodPost, "/evaluate", bytes.NewReader(mathEvalBody(t))) + req.Header.Set("X-Api-Version", "0.1.0") + w := httptest.NewRecorder() + + newMuEdHandler(mockHandler, nil, "").ServeEvaluate(w, req) + + res := w.Result() + assert.Equal(t, http.StatusOK, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) +} + +func TestMuEdServeEvaluate_UnsupportedVersionHeader(t *testing.T) { + mockHandler := new(MockHandler) + + req := httptest.NewRequest(http.MethodPost, "/evaluate", bytes.NewReader(mathEvalBody(t))) + req.Header.Set("X-Api-Version", "99.0.0") + w := httptest.NewRecorder() + + newMuEdHandler(mockHandler, nil, "").ServeEvaluate(w, req) + + res := w.Result() + defer res.Body.Close() + raw, _ := io.ReadAll(res.Body) + + assert.Equal(t, http.StatusNotAcceptable, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) + + var body map[string]any + require.NoError(t, json.Unmarshal(raw, &body)) + assert.Equal(t, "VERSION_NOT_SUPPORTED", body["code"]) + details, ok := body["details"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "99.0.0", details["requestedVersion"]) + + mockHandler.AssertNotCalled(t, "Handle", mock.Anything, mock.Anything) +} + +// --- Version header tests (ServeHealth) --- + +func TestMuEdServeHealth_AbsentVersionHeader(t *testing.T) { + healthResult := map[string]any{"tests_passed": true, "successes": []any{}, "failures": []any{}, "errors": []any{}} + mockRuntime := new(MockRuntime) + mockRuntime.On("Handle", mock.Anything, runtime.EvaluationRequest{ + Command: runtime.CommandHealth, + Data: map[string]any{}, + }).Return(runtime.EvaluationResponse{ + "command": "healthcheck", + "result": healthResult, + }, nil) + + req := httptest.NewRequest(http.MethodGet, "/evaluate/health", nil) + w := httptest.NewRecorder() + + newMuEdHandler(nil, mockRuntime, "").ServeHealth(w, req) + + res := w.Result() + assert.Equal(t, http.StatusOK, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) + mockRuntime.AssertExpectations(t) +} + +func TestMuEdServeHealth_UnsupportedVersionHeader(t *testing.T) { + mockRuntime := new(MockRuntime) + + req := httptest.NewRequest(http.MethodGet, "/evaluate/health", nil) + req.Header.Set("X-Api-Version", "99.0.0") + w := httptest.NewRecorder() + + newMuEdHandler(nil, mockRuntime, "").ServeHealth(w, req) + + res := w.Result() + defer res.Body.Close() + raw, _ := io.ReadAll(res.Body) + + assert.Equal(t, http.StatusNotAcceptable, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) + + var body map[string]any + require.NoError(t, json.Unmarshal(raw, &body)) + assert.Equal(t, "VERSION_NOT_SUPPORTED", body["code"]) + + mockRuntime.AssertNotCalled(t, "Handle", mock.Anything, mock.Anything) +} diff --git a/runtime/mued.go b/runtime/mued.go index 363ae8a..b88343c 100644 --- a/runtime/mued.go +++ b/runtime/mued.go @@ -36,6 +36,45 @@ type MuEdEvaluateRequest struct { PreSubmissionFeedback *MuEdPreSubmissionFeedback `json:"preSubmissionFeedback"` } +var SupportedMuEdVersions = []string{"0.1.0"} + +// MuEdIsVersionSupported reports whether version is in SupportedMuEdVersions. +func MuEdIsVersionSupported(version string) bool { + for _, v := range SupportedMuEdVersions { + if v == version { + return true + } + } + return false +} + +// MuEdResolveVersion returns requested if it's supported, else the latest version. +func MuEdResolveVersion(requested string) string { + if MuEdIsVersionSupported(requested) { + return requested + } + return SupportedMuEdVersions[len(SupportedMuEdVersions)-1] +} + +// MuEdToHealthResponse converts a legacy runtime health result to muEd format. +func MuEdToHealthResponse(result map[string]any) map[string]any { + status := "DEGRADED" + if passed, ok := result["tests_passed"].(bool); ok && passed { + status = "OK" + } + return map[string]any{ + "status": status, + "capabilities": map[string]any{ + "supportsEvaluate": true, + "supportsPreSubmissionFeedback": true, + "supportsFormativeFeedback": true, + "supportsSummativeFeedback": false, + "supportsDataPolicy": "NOT_SUPPORTED", + "supportedAPIVersions": SupportedMuEdVersions, + }, + } +} + func muEdContentKey(t MuEdSubmissionType) string { switch t { case MuEdMath: From 9447f1fd145d37bd445cdab857040a9c8dd1430d Mon Sep 17 00:00:00 2001 From: Marcus Messer <12846590+m-messer@users.noreply.github.com> Date: Thu, 28 May 2026 09:43:40 +0100 Subject: [PATCH 07/12] Feature/mu ed error handling (#15) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added `MuEdHandler` to handle `/evaluate` and `/evaluate/health` endpoints with authentication and runtime integration, along with associated tests * Added `workflow_dispatch` trigger to GitHub Actions build workflow * Removed `NewCommandRoute` and corrected route definitions for `/evaluate` and `/evaluate/health` * Added `NormalizePath` middleware to canonicalize `/evaluate` and `/evaluate/health` paths across server and lambda integrations * Added API versioning support for `/evaluate` and `/evaluate/health` endpoints with header validation, default version handling, and capability reporting * Refactored `/evaluate` and `/evaluate/health` error handling to standardize JSON responses with `writeMuEdError` and included `X-Api-Version` header validation and degraded health status support. * Added OpenAPI request/response validation middleware and integrated OpenAPI specification * Add embedded µEd OpenAPI specification Co-Authored-By: Claude Sonnet 4.6 * Move µEd OpenAPI spec into runtime/schema Relocates the spec from api/ into runtime/schema/ alongside the existing JSON schema files, and renames it to mued_v0.1.0.yml to make the version explicit. Removes the api/ package; embed is now owned by runtime/schema. Co-Authored-By: Claude Sonnet 4.6 * Ignore .idea/ directory Co-Authored-By: Claude Sonnet 4.6 * Make OpenAPI response validation strict for µEd routes Previously, responses that failed spec validation were only logged as warnings and forwarded anyway. Now a failed µEd response validation returns 500 to the caller. The legacy / route is unaffected — it has no matching path in the spec so the middleware passes it through unchanged. Co-Authored-By: Claude Sonnet 4.6 * Update µEd handler to use dynamic status codes for responses --------- Co-authored-by: Claude Sonnet 4.6 --- handler/mued.go | 38 ++++++++++++++++++------ handler/mued_test.go | 71 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 95 insertions(+), 14 deletions(-) diff --git a/handler/mued.go b/handler/mued.go index 78b983a..53c4c73 100644 --- a/handler/mued.go +++ b/handler/mued.go @@ -72,6 +72,20 @@ func (h *MuEdHandler) checkMuEdVersion(w http.ResponseWriter, r *http.Request) ( return runtime.MuEdResolveVersion(requested), true } +// writeMuEdError writes a structured muEd JSON error response with X-Api-Version header. +func (h *MuEdHandler) writeMuEdError(w http.ResponseWriter, version string, statusCode int, code, title, message string, details map[string]any) { + body, _ := json.Marshal(map[string]any{ + "title": title, + "message": message, + "code": code, + "details": details, + }) + w.Header().Set(muEdVersionHeader, version) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + w.Write(body) //nolint:errcheck +} + func (h *MuEdHandler) checkAuth(w http.ResponseWriter, r *http.Request) bool { if h.config.Auth.Key != "" && r.Header.Get("api-key") != h.config.Auth.Key { h.log.Debug("unauthorized request", zap.String("path", r.URL.Path)) @@ -99,13 +113,13 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { body, err := io.ReadAll(r.Body) if err != nil { - writeJSONError(w, "failed to read body", http.StatusBadRequest) + h.writeMuEdError(w, version, http.StatusBadRequest, "VALIDATION_ERROR", "Bad request", "failed to read body", nil) return } var muEdReq runtime.MuEdEvaluateRequest if err := json.Unmarshal(body, &muEdReq); err != nil { - writeJSONError(w, "invalid request body", http.StatusBadRequest) + h.writeMuEdError(w, version, http.StatusBadRequest, "VALIDATION_ERROR", "Bad request", "invalid request body", nil) return } @@ -118,13 +132,13 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { legacyBody, err = runtime.MuEdBuildLegacyEvaluateRequest(muEdReq) } if err != nil { - writeJSONError(w, err.Error(), http.StatusBadRequest) + h.writeMuEdError(w, version, http.StatusBadRequest, "VALIDATION_ERROR", "Bad request", err.Error(), nil) return } legacyBodyBytes, err := json.Marshal(legacyBody) if err != nil { - writeJSONError(w, "failed to build request", http.StatusInternalServerError) + h.writeMuEdError(w, version, http.StatusInternalServerError, "INTERNAL_ERROR", "Internal server error", "failed to build request", nil) return } @@ -151,6 +165,7 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { w.Header().Add(k, vv) } } + w.Header().Set(muEdVersionHeader, version) w.WriteHeader(resp.StatusCode) w.Write(resp.Body) //nolint:errcheck return @@ -158,13 +173,13 @@ func (h *MuEdHandler) ServeEvaluate(w http.ResponseWriter, r *http.Request) { var respBody map[string]any if err := json.Unmarshal(resp.Body, &respBody); err != nil { - writeJSONError(w, "failed to parse response", http.StatusInternalServerError) + h.writeMuEdError(w, version, http.StatusInternalServerError, "INTERNAL_ERROR", "Internal server error", "failed to parse response", nil) return } result, ok := respBody["result"].(map[string]any) if !ok { - writeJSONError(w, "invalid response from evaluation function", http.StatusInternalServerError) + h.writeMuEdError(w, version, http.StatusInternalServerError, "INTERNAL_ERROR", "Internal server error", "invalid response from evaluation function", nil) return } @@ -202,20 +217,25 @@ func (h *MuEdHandler) ServeHealth(w http.ResponseWriter, r *http.Request) { Data: map[string]any{}, }) if err != nil { - http.Error(w, "health check failed", http.StatusInternalServerError) + h.writeMuEdError(w, version, http.StatusInternalServerError, "INTERNAL_ERROR", "Internal server error", "health check failed", nil) return } legacyResult, ok := resp["result"].(map[string]any) if !ok { - http.Error(w, "invalid health response", http.StatusInternalServerError) + h.writeMuEdError(w, version, http.StatusInternalServerError, "INTERNAL_ERROR", "Internal server error", "invalid health response", nil) return } result := runtime.MuEdToHealthResponse(legacyResult) + statusCode := http.StatusOK + if s, ok := result["status"].(string); ok && s == "UNAVAILABLE" { + statusCode = http.StatusServiceUnavailable + } + w.Header().Set("Content-Type", "application/json") w.Header().Set(muEdVersionHeader, version) - w.WriteHeader(http.StatusOK) + w.WriteHeader(statusCode) json.NewEncoder(w).Encode(result) //nolint:errcheck } diff --git a/handler/mued_test.go b/handler/mued_test.go index bdbdcfd..64ae2c8 100644 --- a/handler/mued_test.go +++ b/handler/mued_test.go @@ -208,25 +208,45 @@ func TestMuEdServeEvaluate_InvalidJSON(t *testing.T) { newMuEdHandler(mockHandler, nil, "").ServeEvaluate(w, req) - assert.Equal(t, http.StatusBadRequest, w.Result().StatusCode) + res := w.Result() + defer res.Body.Close() + raw, _ := io.ReadAll(res.Body) + + assert.Equal(t, http.StatusBadRequest, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) + + var body map[string]any + require.NoError(t, json.Unmarshal(raw, &body)) + assert.Equal(t, "VALIDATION_ERROR", body["code"]) + mockHandler.AssertNotCalled(t, "Handle", mock.Anything, mock.Anything) } func TestMuEdServeEvaluate_MissingReferenceSolution(t *testing.T) { mockHandler := new(MockHandler) - body, _ := json.Marshal(map[string]any{ + reqBody, _ := json.Marshal(map[string]any{ "submission": map[string]any{ "type": "MATH", "content": map[string]any{"expression": "x^2"}, }, }) - req := httptest.NewRequest(http.MethodPost, "/evaluate", bytes.NewReader(body)) + req := httptest.NewRequest(http.MethodPost, "/evaluate", bytes.NewReader(reqBody)) w := httptest.NewRecorder() newMuEdHandler(mockHandler, nil, "").ServeEvaluate(w, req) - assert.Equal(t, http.StatusBadRequest, w.Result().StatusCode) + res := w.Result() + defer res.Body.Close() + raw, _ := io.ReadAll(res.Body) + + assert.Equal(t, http.StatusBadRequest, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) + + var errBody map[string]any + require.NoError(t, json.Unmarshal(raw, &errBody)) + assert.Equal(t, "VALIDATION_ERROR", errBody["code"]) + mockHandler.AssertNotCalled(t, "Handle", mock.Anything, mock.Anything) } @@ -251,6 +271,7 @@ func TestMuEdServeEvaluate_WorkerErrorForwarded(t *testing.T) { raw, _ := io.ReadAll(res.Body) assert.Equal(t, http.StatusInternalServerError, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) assert.Equal(t, errorBody, bytes.TrimRight(raw, "\n")) } @@ -327,7 +348,47 @@ func TestMuEdServeHealth_RuntimeError(t *testing.T) { newMuEdHandler(nil, mockRuntime, "").ServeHealth(w, req) - assert.Equal(t, http.StatusInternalServerError, w.Result().StatusCode) + res := w.Result() + defer res.Body.Close() + raw, _ := io.ReadAll(res.Body) + + assert.Equal(t, http.StatusInternalServerError, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) + + var body map[string]any + require.NoError(t, json.Unmarshal(raw, &body)) + assert.Equal(t, "INTERNAL_ERROR", body["code"]) + + mockRuntime.AssertExpectations(t) +} + +func TestMuEdServeHealth_DegradedStatus(t *testing.T) { + healthResult := map[string]any{"tests_passed": false, "successes": []any{}, "failures": []any{"f1"}, "errors": []any{}} + mockRuntime := new(MockRuntime) + mockRuntime.On("Handle", mock.Anything, runtime.EvaluationRequest{ + Command: runtime.CommandHealth, + Data: map[string]any{}, + }).Return(runtime.EvaluationResponse{ + "command": "healthcheck", + "result": healthResult, + }, nil) + + req := httptest.NewRequest(http.MethodGet, "/evaluate/health", nil) + w := httptest.NewRecorder() + + newMuEdHandler(nil, mockRuntime, "").ServeHealth(w, req) + + res := w.Result() + defer res.Body.Close() + raw, _ := io.ReadAll(res.Body) + + assert.Equal(t, http.StatusOK, res.StatusCode) + assert.Equal(t, "0.1.0", res.Header.Get("X-Api-Version")) + + var result map[string]any + require.NoError(t, json.Unmarshal(raw, &result)) + assert.Equal(t, "DEGRADED", result["status"]) + mockRuntime.AssertExpectations(t) } From 7262dbcfa56e0f01b07f922f964c933036d9a479 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Fri, 12 Jun 2026 16:01:39 +0100 Subject: [PATCH 08/12] =?UTF-8?q?Fix=20referenceSolution=20content=20extra?= =?UTF-8?q?ction=20to=20match=20=C2=B5Ed=20spec?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec defines task.referenceSolution as a plain object with additionalProperties, not a typed Submission wrapper. Change MuEdTask.ReferenceSolution from *MuEdSubmission to map[string]any and extract its content directly using the submission's type to determine the expected key. Co-Authored-By: Claude Sonnet 4.6 --- handler/mued_test.go | 3 +-- runtime/mued.go | 5 ++--- runtime/mued_test.go | 35 +++++++---------------------------- 3 files changed, 10 insertions(+), 33 deletions(-) diff --git a/handler/mued_test.go b/handler/mued_test.go index 64ae2c8..afb65af 100644 --- a/handler/mued_test.go +++ b/handler/mued_test.go @@ -57,8 +57,7 @@ func mathEvalBody(t *testing.T) []byte { }, "task": map[string]any{ "referenceSolution": map[string]any{ - "type": "MATH", - "content": map[string]any{"expression": "x^2"}, + "expression": "x^2", }, }, }) diff --git a/runtime/mued.go b/runtime/mued.go index b88343c..71a7a7d 100644 --- a/runtime/mued.go +++ b/runtime/mued.go @@ -18,7 +18,7 @@ type MuEdSubmission struct { } type MuEdTask struct { - ReferenceSolution *MuEdSubmission `json:"referenceSolution"` + ReferenceSolution map[string]any `json:"referenceSolution"` } type MuEdConfiguration struct { @@ -121,8 +121,7 @@ func MuEdBuildLegacyEvaluateRequest(req MuEdEvaluateRequest) (map[string]any, er return nil, fmt.Errorf("task.referenceSolution is required for evaluation") } - sol := req.Task.ReferenceSolution - answer, err := muEdExtractContent(sol.Content, sol.Type) + answer, err := muEdExtractContent(req.Task.ReferenceSolution, req.Submission.Type) if err != nil { return nil, fmt.Errorf("referenceSolution: %w", err) } diff --git a/runtime/mued_test.go b/runtime/mued_test.go index 70fa095..99ca412 100644 --- a/runtime/mued_test.go +++ b/runtime/mued_test.go @@ -32,10 +32,7 @@ func TestMuEdContentKey(t *testing.T) { Content: map[string]any{tc.want: "x"}, }, Task: &runtime.MuEdTask{ - ReferenceSolution: &runtime.MuEdSubmission{ - Type: tc.t, - Content: map[string]any{tc.want: "x"}, - }, + ReferenceSolution: map[string]any{tc.want: "x"}, }, } body, err := runtime.MuEdBuildLegacyEvaluateRequest(req) @@ -53,10 +50,7 @@ func TestMuEdBuildLegacyEvalRequest(t *testing.T) { Content: map[string]any{"expression": "x^2"}, }, Task: &runtime.MuEdTask{ - ReferenceSolution: &runtime.MuEdSubmission{ - Type: runtime.MuEdMath, - Content: map[string]any{"expression": "x^2"}, - }, + ReferenceSolution: map[string]any{"expression": "x^2"}, }, } body, err := runtime.MuEdBuildLegacyEvaluateRequest(req) @@ -73,10 +67,7 @@ func TestMuEdBuildLegacyEvalRequest(t *testing.T) { Content: map[string]any{"text": "hello"}, }, Task: &runtime.MuEdTask{ - ReferenceSolution: &runtime.MuEdSubmission{ - Type: runtime.MuEdText, - Content: map[string]any{"text": "hello"}, - }, + ReferenceSolution: map[string]any{"text": "hello"}, }, } body, err := runtime.MuEdBuildLegacyEvaluateRequest(req) @@ -92,10 +83,7 @@ func TestMuEdBuildLegacyEvalRequest(t *testing.T) { Content: map[string]any{"value": "foo"}, }, Task: &runtime.MuEdTask{ - ReferenceSolution: &runtime.MuEdSubmission{ - Type: runtime.MuEdOther, - Content: map[string]any{"value": "bar"}, - }, + ReferenceSolution: map[string]any{"value": "bar"}, }, } body, err := runtime.MuEdBuildLegacyEvaluateRequest(req) @@ -111,10 +99,7 @@ func TestMuEdBuildLegacyEvalRequest(t *testing.T) { Content: map[string]any{"value": "x^2"}, }, Task: &runtime.MuEdTask{ - ReferenceSolution: &runtime.MuEdSubmission{ - Type: runtime.MuEdMath, - Content: map[string]any{"value": "x^2"}, - }, + ReferenceSolution: map[string]any{"value": "x^2"}, }, } body, err := runtime.MuEdBuildLegacyEvaluateRequest(req) @@ -130,10 +115,7 @@ func TestMuEdBuildLegacyEvalRequest(t *testing.T) { Content: map[string]any{"unrelated": "x"}, }, Task: &runtime.MuEdTask{ - ReferenceSolution: &runtime.MuEdSubmission{ - Type: runtime.MuEdMath, - Content: map[string]any{"expression": "x"}, - }, + ReferenceSolution: map[string]any{"expression": "x"}, }, } _, err := runtime.MuEdBuildLegacyEvaluateRequest(req) @@ -171,10 +153,7 @@ func TestMuEdBuildLegacyEvalRequest(t *testing.T) { Content: map[string]any{"expression": "x"}, }, Task: &runtime.MuEdTask{ - ReferenceSolution: &runtime.MuEdSubmission{ - Type: runtime.MuEdMath, - Content: map[string]any{"expression": "x"}, - }, + ReferenceSolution: map[string]any{"expression": "x"}, }, Configuration: &runtime.MuEdConfiguration{ Params: map[string]any{"strict": true}, From fbea617322a1714937419015e8d085a272fe5baa Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Fri, 12 Jun 2026 18:36:57 +0100 Subject: [PATCH 09/12] Fix flaky test by replacing time.After with m.Shutdown for synchronization Replace the 1ms timing-based wait with pool.Close() via m.Shutdown, consistent with all other tests in the file that rely on the same background goroutine pattern. Co-Authored-By: Claude Sonnet 4.6 --- internal/execution/dispatcher/dispatcher_pooled_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/execution/dispatcher/dispatcher_pooled_test.go b/internal/execution/dispatcher/dispatcher_pooled_test.go index d8a90d4..0fee760 100644 --- a/internal/execution/dispatcher/dispatcher_pooled_test.go +++ b/internal/execution/dispatcher/dispatcher_pooled_test.go @@ -175,8 +175,8 @@ func TestPooledDispatcher_Send_ReleaseSupervisorWaitErrorOnDestroy(t *testing.T) _, err := m.Send(context.Background(), "test", data) assert.NoError(t, err) - // wait for the release to happen in a goroutine - <-time.After(1 * time.Millisecond) + // wait for the background goroutine to finish by draining the pool + m.Shutdown(context.Background()) assert.Equal(t, 2, waited) } From 89f9d56e604d1d7b85bddfb54be32843aa29541a Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Mon, 15 Jun 2026 10:59:37 +0100 Subject: [PATCH 10/12] Fix release workflow to trigger build.yml on tag push Use SHIMMY_DEPLOY_TOKEN (PAT) for checkout so the tag push is treated as a user action and triggers the build.yml Docker image workflow. GITHUB_TOKEN-initiated pushes are blocked from triggering other workflows. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5e878c6..2a48682 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,6 +22,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + token: ${{ secrets.SHIMMY_DEPLOY_TOKEN }} - name: Check for existing tag on HEAD id: idempotency From f40252f54c0dfe5aa777f32a36c09e60808fb815 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Mon, 15 Jun 2026 11:11:06 +0100 Subject: [PATCH 11/12] Use SHIMMY_DEPLOY_TOKEN for evaluation-function-base dispatch Replace expired EVALUATION_FUNCTION_BASE_BUILD_TRIGGER_TOKEN with SHIMMY_DEPLOY_TOKEN which has access to all org repos. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a48682..d4a0a24 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -72,7 +72,7 @@ jobs: if: steps.idempotency.outputs.skip == 'false' uses: peter-evans/repository-dispatch@v3 with: - token: ${{ secrets.EVALUATION_FUNCTION_BASE_BUILD_TRIGGER_TOKEN }} + token: ${{ secrets.SHIMMY_DEPLOY_TOKEN }} repository: ${{ github.repository_owner }}/evaluation-function-base event-type: release client-payload: '{"shimmy_version": "${{ steps.version.outputs.version }}"}' From c1ac1a642d82d963b81b2ac94820b4f691b56b07 Mon Sep 17 00:00:00 2001 From: Marcus Messer Date: Mon, 15 Jun 2026 11:46:01 +0100 Subject: [PATCH 12/12] Replace peter-evans/repository-dispatch with gh api calls Fine-grained PATs are incompatible with the action. Use gh api directly with SHIMMY_DEPLOY_TOKEN for both release and trigger-build dispatches. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 11 ++++++----- .github/workflows/release.yml | 13 +++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f9e0388..28ca2e0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -188,8 +188,9 @@ jobs: id-token: write steps: - name: Repository Dispatch - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ secrets.EVALUATION_FUNCTION_BASE_BUILD_TRIGGER_TOKEN }} - repository: ${{ github.repository_owner }}/evaluation-function-base - event-type: trigger-build + env: + GH_TOKEN: ${{ secrets.SHIMMY_DEPLOY_TOKEN }} + run: | + gh api repos/${{ github.repository_owner }}/evaluation-function-base/dispatches \ + --method POST \ + -f event_type=trigger-build diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d4a0a24..ac45a18 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -70,9 +70,10 @@ jobs: - name: Trigger evaluation-function-base release if: steps.idempotency.outputs.skip == 'false' - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ secrets.SHIMMY_DEPLOY_TOKEN }} - repository: ${{ github.repository_owner }}/evaluation-function-base - event-type: release - client-payload: '{"shimmy_version": "${{ steps.version.outputs.version }}"}' + env: + GH_TOKEN: ${{ secrets.SHIMMY_DEPLOY_TOKEN }} + run: | + gh api repos/${{ github.repository_owner }}/evaluation-function-base/dispatches \ + --method POST \ + -f event_type=release \ + -F client_payload='{"shimmy_version": "${{ steps.version.outputs.version }}"}'