diff --git a/.github/workflows/e2e-suite.yaml b/.github/workflows/e2e-suite.yaml
new file mode 100644
index 0000000..dce5b8a
--- /dev/null
+++ b/.github/workflows/e2e-suite.yaml
@@ -0,0 +1,109 @@
+# Reusable e2e workflow (workflow_call): shared setup (build image, kind, deploy
+# fluence base), then run ONE test suite — a directory under test/e2e/. The
+# suite's tests are DISCOVERED (every NN-*.sh, run in sorted order); adding a test
+# is just dropping a file in the directory, no workflow edit. If the suite needs
+# special preparation it provides a setup.sh in its directory, which is run before
+# the tests (the gang suite has none; the quantum suite installs the qpu add-on).
+name: e2e-suite
+on:
+  workflow_call:
+    inputs:
+      suite:
+        description: "test suite directory name under test/e2e/ (e.g. gang, quantum)"
+        required: true
+        type: string
+
+env:
+  IMAGE: vanessa/fluence:test
+
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build fluence image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          push: false
+          load: true
+          tags: ${{ env.IMAGE }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Create k8s Kind Cluster
+        uses: helm/kind-action@v1.10.0
+        with:
+          version: v0.32.0              # required for gang
+          node_image: kindest/node:v1.36.1
+          config: ./deploy/kind-config.yaml
+
+      - name: Free Disk Space (Ubuntu)
+        run: |
+          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \
+                      /opt/hostedtoolcache/CodeQL
+          sudo apt-get clean
+          df -h
+
+      - name: Load docker images
+        run: |
+          cluster=$(kind get clusters)
+          kind load --name "$cluster" docker-image ${{ env.IMAGE }}
+
+      - name: Deploy fluence (base)
+        run: |
+          kubectl apply -f deploy/fluence-test.yaml
+          kubectl rollout status -n kube-system deployment/fluence --timeout=180s
+          POD=""
+          for i in $(seq 1 60); do
+            POD=$(kubectl -n kube-system get pods -l app=fluence \
+              -o go-template='{{range .items}}{{if not .metadata.deletionTimestamp}}{{$name := .metadata.name}}{{range .status.conditions}}{{if and (eq .type "Ready") (eq .status "True")}}{{$name}}{{"\n"}}{{end}}{{end}}{{end}}{{end}}' 2>/dev/null | head -1 || true)
+            [ -n "$POD" ] && break
+            sleep 2
+          done
+          [ -n "$POD" ] || { echo "ERROR: no Ready non-terminating fluence pod"; kubectl -n kube-system get pods -l app=fluence -o wide; exit 1; }
+          echo "Using pod: $POD"
+          sleep 5
+          kubectl -n kube-system exec "$POD" -- /bin/bash -c "cat /tmp/fluence-graph-*.json" || true
+          kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{": cpu="}{.status.allocatable.cpu}{" mem="}{.status.allocatable.memory}{"\n"}{end}'
+
+      # Per-suite special setup, if the suite directory provides one.
+      - name: Suite setup (${{ inputs.suite }})
+        run: |
+          s="test/e2e/${{ inputs.suite }}/setup.sh"
+          if [ -f "$s" ]; then
+            echo "running $s"
+            bash "$s"
+          else
+            echo "no setup.sh for suite '${{ inputs.suite }}' — skipping"
+          fi
+
+      # Discover and run every NN-*.sh in the suite directory, in sorted order.
+      - name: Run suite (${{ inputs.suite }})
+        run: |
+          dir="test/e2e/${{ inputs.suite }}"
+          [ -d "$dir" ] || { echo "ERROR: no such suite dir: $dir"; exit 1; }
+          shopt -s nullglob
+          tests=("$dir"/[0-9]*.sh)
+          [ ${#tests[@]} -gt 0 ] || { echo "ERROR: no NN-*.sh tests in $dir"; exit 1; }
+          IFS=$'\n' tests=($(sort <<<"${tests[*]}")); unset IFS
+          echo "discovered ${#tests[@]} test(s) in $dir:"
+          printf '  %s\n' "${tests[@]}"
+          for t in "${tests[@]}"; do
+            echo "::group::$t"
+            bash "$t"
+            echo "::endgroup::"
+          done
+
+      - name: Dump diagnostics on failure
+        if: failure()
+        run: |
+          kubectl get pods -A -o wide
+          kubectl logs -n kube-system deployment/fluence || true
+          kubectl logs -n kube-system deployment/fluence-webhook || true
\ No newline at end of file
diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml
index a6c1266..4b405f6 100644
--- a/.github/workflows/e2e-tests.yaml
+++ b/.github/workflows/e2e-tests.yaml
@@ -8,140 +8,15 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
-env:
-  KIND_VERSION: v0.32.0
-  IMAGE: vanessa/fluence:test
-
 jobs:
+  # Fan out the suites as parallel jobs, each a call into the reusable workflow.
+  # The shared setup (build, kind, deploy) lives once in e2e-suite.yaml; the
+  # matrix runs gang and quantum concurrently.
   e2e:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build fluence image
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          file: ./Dockerfile
-          push: false
-          load: true
-          tags: ${{ env.IMAGE }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          
-      - name: Create k8s Kind Cluster
-        uses: helm/kind-action@v1.10.0
-        with:
-          version: v0.32.0              # required for gang
-          node_image: kindest/node:v1.36.1
-          config: ./deploy/kind-config.yaml
-          
-      - name: Free Disk Space (Ubuntu)
-        run: |
-          echo "=== Disk space before cleanup ==="
-          df -h
-          
-          # Remove large software runtimes and tools
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /opt/ghc
-          sudo rm -rf /opt/hostedtoolcache/CodeQL
-          
-          # Clean package caches
-          sudo apt-get clean          
-          echo "=== Disk space after cleanup ==="
-          df -h
-
-      - name: Load docker images
-        run: |
-          kind get clusters
-          cluster=$(kind get clusters)
-          kind load --name $cluster docker-image vanessa/fluence:test
-
-      - name: Deploy fluence (base)
-        run: |
-          kubectl apply -f deploy/fluence-test.yaml
-          kubectl rollout status -n kube-system deployment/fluence --timeout=180s
-          # rollout status can return while the OLD ReplicaSet's pod is still
-          # Running (terminating). Selecting by phase=Running alone can grab that
-          # stale pod, which then 404s on exec/logs. Wait until exactly one
-          # fluence pod remains, and require it to be Ready and not terminating.
-          POD=""
-          for i in $(seq 1 60); do
-            # names of pods that are Ready AND have no deletionTimestamp (not terminating)
-            POD=$(kubectl -n kube-system get pods -l app=fluence \
-              -o go-template='{{range .items}}{{if not .metadata.deletionTimestamp}}{{$name := .metadata.name}}{{range .status.conditions}}{{if and (eq .type "Ready") (eq .status "True")}}{{$name}}{{"\n"}}{{end}}{{end}}{{end}}{{end}}' 2>/dev/null | head -1 || true)
-            [ -n "$POD" ] && break
-            sleep 2
-          done
-          [ -n "$POD" ] || { echo "ERROR: no Ready non-terminating fluence pod found"; kubectl -n kube-system get pods -l app=fluence -o wide; exit 1; }
-          echo "Using pod: $POD"
-          # Brief sleep to let the container runtime stabilize before exec
-          sleep 5
-          kubectl -n kube-system exec "$POD" -- ls /tmp/
-          kubectl -n kube-system logs "$POD"
-          kubectl -n kube-system exec "$POD" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
-          kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{": cpu="}{.status.allocatable.cpu}{" mem="}{.status.allocatable.memory}{"\n"}{end}'
-         
-      - name: E2E - classical gang
-        run: bash test/e2e/01-classical-gang.sh
-
-      - name: Deploy quantum add-on
-        run: |
-          # Includes the device plugin and oriented to testing container
-          kubectl apply -f deploy/fluence-resources-test.yaml
-          kubectl rollout restart -n kube-system deployment/fluence
-          kubectl rollout status  -n kube-system deployment/fluence --timeout=60s
-          for i in $(seq 1 60); do
-            kubectl get nodes -o jsonpath='{range .items[*]}{.status.allocatable}{"\n"}{end}'
-            kubectl get nodes -o jsonpath='{range .items[*]}{.status.allocatable}{"\n"}{end}' | grep -q 'fluxion.flux-framework.org/qpu' && break
-            sleep 1
-          done
-          # After a rollout restart BOTH the old and new pods are briefly Running.
-          # Select only a Ready pod with no deletionTimestamp (i.e. the new one,
-          # not the terminating old one) so exec/logs don't 404.
-          POD=""
-          for i in $(seq 1 60); do
-            POD=$(kubectl -n kube-system get pods -l app=fluence \
-              -o go-template='{{range .items}}{{if not .metadata.deletionTimestamp}}{{$name := .metadata.name}}{{range .status.conditions}}{{if and (eq .type "Ready") (eq .status "True")}}{{$name}}{{"\n"}}{{end}}{{end}}{{end}}{{end}}' 2>/dev/null | head -1 || true)
-            [ -n "$POD" ] && break
-            sleep 2
-          done
-          [ -n "$POD" ] || { echo "ERROR: no Ready non-terminating fluence pod found after restart"; kubectl -n kube-system get pods -l app=fluence -o wide; exit 1; }
-          echo "Using pod: $POD"
-          # Brief sleep to let the container runtime stabilize before exec
-          sleep 5
-          kubectl -n kube-system exec "$POD" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
-
-      - name: Wait for webhook
-        run: |
-
-          # wait for the deployment AND for the caBundle to be populated on the webhook config
-          kubectl -n kube-system rollout status deployment/fluence-webhook --timeout=120s
-          for i in $(seq 1 30); do
-            cab=$(kubectl get mutatingwebhookconfiguration fluence-webhook \
-                  -o jsonpath='{.webhooks[0].clientConfig.caBundle}' 2>/dev/null)
-            [ -n "$cab" ] && break
-            sleep 2
-          done
-          # let TLS serving settle after caBundle patch
-          sleep 3 
-
-      - name: E2E - quantum placement
-        run: bash test/e2e/02-quantum-placement.sh
-
-      #- name: E2E - restart recovery (no double-book)
-      #  run: bash test/e2e/03-restart-recovery.sh
-
-      - name: E2E - sidecar ungate
-        run: bash test/e2e/04-sidecar-ungate.sh
-
-      - name: Dump diagnostics on failure
-        if: failure()
-        run: |
-          kubectl get pods -A -o wide
-          kubectl logs -n kube-system deployment/fluence
+    strategy:
+      fail-fast: false        # one suite failing should not cancel the other
+      matrix:
+        suite: [gang, quantum]
+    uses: ./.github/workflows/e2e-suite.yaml
+    with:
+      suite: ${{ matrix.suite }}
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 1160cb4..5912c5a 100644
--- a/Makefile
+++ b/Makefile
@@ -55,13 +55,16 @@ test-image-deploy: test-image
 	kubectl patch podgroup training -n default --type=merge -p '{"metadata":{"finalizers":null}}' || true
 	kubectl delete deployments --all
 	kubectl delete pods --all
-	kubectl delete -f deploy/fluence-test.yaml
+	kubectl delete -f deploy/fluence-test.yaml || true
 	kubectl delete pods --all
 
+.PHONY: test-deploy-recreate
+test-deploy-recreate: test-image-deploy
+	kubectl apply -f deploy/fluence-pull-test.yaml
 
 .PHONY: deploy
 deploy: ## Install RBAC + scheduler into kube-system
-	kubectl apply -f deploy/fluence.yaml
+	kubectl apply -f deploy/fluence-.yaml
 
 .PHONY: help
 help:
diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go
index ea2669a..1a6709d 100644
--- a/cmd/webhook/main.go
+++ b/cmd/webhook/main.go
@@ -12,9 +12,11 @@ package main
 import (
 	"context"
 	"crypto/tls"
+	"flag"
 	"log"
 	"net/http"
 	"os"
+	"strings"
 	"time"
 
 	"github.com/converged-computing/fluence/pkg/cluster"
@@ -38,6 +40,29 @@ func main() {
 	cfgName := env("WEBHOOK_CONFIG", "fluence-webhook")
 	addr := env("WEBHOOK_ADDR", ":8443")
 
+	// Handler selection. By default ALL registered handlers are enabled. The
+	// operator may restrict the active set with --handlers (comma-separated) or
+	// the FLUENCE_HANDLERS env var, e.g. --handlers=fluxion,gang to run without
+	// quantum. An empty value means all enabled. Unknown names are warned about
+	// but not fatal (so config survives a handler being renamed/removed).
+	handlersFlag := flag.String("handlers", env("FLUENCE_HANDLERS", ""),
+		"comma-separated handlers in dispatch order (default: fluxion,quantum,gang). e.g. fluxion,gang disables quantum")
+	flag.Parse()
+
+	var requested []string
+	if *handlersFlag != "" {
+		for _, n := range strings.Split(*handlersFlag, ",") {
+			if n = strings.TrimSpace(n); n != "" {
+				requested = append(requested, n)
+			}
+		}
+	}
+	active, unknown := webhook.SetActiveHandlers(requested)
+	for _, n := range unknown {
+		log.Printf("[fluence-webhook] WARNING: unknown handler %q — ignoring", n)
+	}
+	log.Printf("[fluence-webhook] active handlers (in dispatch order): %v", active)
+
 	dnsNames := []string{
 		svc + "." + ns + ".svc",
 		svc + "." + ns + ".svc.cluster.local",
@@ -87,7 +112,6 @@ func main() {
 	mutator := &webhook.Mutator{
 		AttributeKeys: attrKeys,
 		Clientset:     client,
-		SidecarImage:  env("FLUENCE_SIDECAR_IMAGE", ""),
 	}
 	log.Printf("[fluence-webhook] env contract injected into fluxion pods: %v", mutator.EnvVarNames())
 
diff --git a/deploy/fluence-pull-test.yaml b/deploy/fluence-pull-test.yaml
new file mode 100644
index 0000000..94c2425
--- /dev/null
+++ b/deploy/fluence-pull-test.yaml
@@ -0,0 +1,286 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: fluence
+  namespace: kube-system
+---
+# Bind the built-in scheduler roles so fluence (a full kube-scheduler build) has
+# every list/watch the scheduling framework needs (nodes, pods, PV/PVC, CSI,
+# storageclasses, resourceclaims/slices, volumeattachments, events, etc.).
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-as-kube-scheduler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:kube-scheduler
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-as-volume-scheduler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:volume-scheduler
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+# Delegated authentication: read the auth configmap in kube-system. This is the
+# fix for the "extension-apiserver-authentication ... forbidden" errors.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: fluence-extension-apiserver-authentication-reader
+  namespace: kube-system
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: extension-apiserver-authentication-reader
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+# Extras the built-in scheduler role does not grant: the alpha PodGroup/Workload
+# API (gang), and leader-election leases under our scheduler name.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: fluence-extra
+rules:
+  - apiGroups: ["scheduling.k8s.io"]
+    resources: ["podgroups", "workloads", "podgroups/status", "workloads/status"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["create", "get", "update", "list", "watch"]
+  # PreBind stamps the allocated backend onto the pod as an annotation; the
+  # built-in system:kube-scheduler role only allows patching pods/status, not
+  # the pod object, so grant it here.
+  - apiGroups: [""]
+    resources: ["pods"]
+    # create/delete: the webhook creates the one-off quantum submitter pod
+    # (ensureSubmitterPod) and the scheduler reaps it during gang cleanup.
+    verbs: ["get", "list", "watch", "create", "patch", "update", "delete"]
+  # The webhook self-manages its TLS by patching its own config's caBundle.
+  - apiGroups: ["admissionregistration.k8s.io"]
+    resources: ["mutatingwebhookconfigurations"]
+    verbs: ["get", "list", "watch", "patch"]
+  # The webhook creates per-namespace sidecar RBAC on demand when a leader
+  # pod is admitted, so users do not need to apply RBAC manually.
+  - apiGroups: [""]
+    resources: ["serviceaccounts"]
+    verbs: ["get", "create"]
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["get", "create"]
+  - apiGroups: ["rbac.authorization.k8s.io"]
+    resources: ["roles", "rolebindings"]
+    verbs: ["get", "create"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-extra
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: fluence-extra
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: fluence-scheduler-config
+  namespace: kube-system
+data:
+  scheduler-config.yaml: |
+    apiVersion: kubescheduler.config.k8s.io/v1
+    kind: KubeSchedulerConfiguration
+    leaderElection:
+      leaderElect: false
+    profiles:
+      - schedulerName: fluence
+        plugins:
+          # multiPoint wires Fluence into every extension point its Go type
+          # implements: PreFilter, Filter, and PreBind (which stamps the backend
+          # annotation). Listing points individually risks omitting one — that is
+          # exactly what left PreBind unwired and the backend annotation unset.
+          multiPoint:
+            enabled: [{name: Fluence}]
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: fluence
+  namespace: kube-system
+  labels: {app: fluence}
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: fluence}
+  template:
+    metadata:
+      labels: {app: fluence}
+    spec:
+      serviceAccountName: fluence
+      containers:
+        - name: fluence
+          image: vanessa/fluence:test
+          # Allows for kind load
+          imagePullPolicy: Always
+          command:
+            - /bin/fluence
+            - --config=/etc/fluence/scheduler-config.yaml
+            # fluence is its own scheduler binary, so it needs the gang gates set
+            # here (the cluster-level kube-scheduler gates don't apply to it).
+            # Without these its PodGroup/GangScheduling plugin is inactive, pods
+            # schedule with no gang semantics, and PodGroup status stays Pending.
+            - --feature-gates=GenericWorkload=true,GangScheduling=true
+            # Re-attempt unschedulable pods more often than the 5m default. In the
+            # contention experiment a gang that loses the initial race for nodes is
+            # marked Unschedulable; this is how soon it is re-tried after capacity
+            # frees (the event-driven QueueingHint is best-effort; this is the
+            # backstop that bounds worst-case requeue latency). 30s keeps contended
+            # gangs draining promptly without thrashing the queue.
+            - --pod-max-in-unschedulable-pods-duration=30s
+            - --v=4
+          env:
+            # Path to the resources config (e.g. quantum backends). Unset/empty
+            # file -> classical-only graph. Supplied by the quantum add-on.
+            - name: FLUENCE_RESOURCES
+              value: /etc/fluence/resources.yaml
+          volumeMounts:
+            - name: config
+              mountPath: /etc/fluence
+      volumes:
+        - name: config
+          projected:
+            sources:
+              - configMap: {name: fluence-scheduler-config}
+              - configMap: {name: fluence-resources, optional: true}
+---
+# Mutating webhook: injects scheduler-chosen values into pods at creation time
+# (currently a downward-API QRMI_BACKEND env for quantum pods). It self-manages
+# TLS — generates a CA + serving cert at startup and patches the caBundle below —
+# so no cert-manager and no committed keys. failurePolicy Ignore keeps a webhook
+# outage from blocking pod creation cluster-wide.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: fluence-webhook
+  namespace: kube-system
+  labels: {app: fluence-webhook}
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: fluence-webhook}
+  template:
+    metadata:
+      labels: {app: fluence-webhook}
+    spec:
+      serviceAccountName: fluence
+      containers:
+        - name: webhook
+          image: vanessa/fluence:test
+          # Allows for kind load
+          imagePullPolicy: Always
+          command: ["/bin/fluence-webhook"]
+          env:
+            # Use busybox as sidecar image in tests — avoids pulling the real
+            # sidecar image which is large and not cached in CI.
+            - name: FLUENCE_SIDECAR_IMAGE
+              value: "busybox:latest"
+          ports:
+            - containerPort: 8443
+          readinessProbe:
+            httpGet: {path: /healthz, port: 8443, scheme: HTTPS}
+            initialDelaySeconds: 2
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: fluence-webhook
+  namespace: kube-system
+spec:
+  selector: {app: fluence-webhook}
+  ports:
+    - port: 443
+      targetPort: 8443
+---
+apiVersion: admissionregistration.k8s.io/v1
+kind: MutatingWebhookConfiguration
+metadata:
+  name: fluence-webhook
+webhooks:
+  - name: pods.fluence.flux-framework.org
+    admissionReviewVersions: ["v1"]
+    sideEffects: None
+    failurePolicy: Ignore        # never block pod creation if the webhook is down
+    # caBundle is filled in at runtime by the webhook patching this object.
+    clientConfig:
+      service:
+        name: fluence-webhook
+        namespace: kube-system
+        path: /mutate
+        port: 443
+    rules:
+      - apiGroups: [""]
+        apiVersions: ["v1"]
+        operations: ["CREATE"]
+        resources: ["pods"]
+        scope: Namespaced
+    # Don't intercept system pods (and avoid bootstrap coupling).
+    namespaceSelector:
+      matchExpressions:
+        - key: kubernetes.io/metadata.name
+          operator: NotIn
+          values: ["kube-system"]
+---
+# fluence-sidecar.yaml
+#
+# RBAC and supporting resources for the Fluence quantum sidecar.
+#
+# The sidecar runs inside a leader pod and needs:
+#   - patch/annotate on pods in its own namespace (to ungate workers and
+#     propagate the task ARN annotation)
+#
+# The sidecar ServiceAccount is namespace-scoped — it only has permissions
+# in the namespace where the workflow runs. The webhook sets
+# spec.serviceAccountName on the leader pod to fluence-sidecar.
+#
+# The fluence Python package is staged into user containers by an init
+# container (Model C): the webhook injects an init container from the
+# sidecar image that copies the package + sitecustomize into a shared
+# volume on the user container's PYTHONPATH. No ConfigMap, no user install.
+#
+# Apply with:
+#   kubectl apply -f deploy/fluence-sidecar.yaml
+
+
+---
+# PriorityClass for classical pods paired with quantum work.
+# Applied to worker pods by the webhook when they are gated.
+# When ungated, high priority triggers preemption of lower-priority work
+# so workers get nodes immediately as the QPU result arrives.
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: fluence-quantum-classical
+  labels:
+    app: fluence
+value: 1000000
+globalDefault: false
+preemptionPolicy: PreemptLowerPriority
+description: "High priority for classical pods paired with quantum work. Set by Fluence webhook."
diff --git a/deploy/fluence-test.yaml b/deploy/fluence-test.yaml
index 6d1dace..ab61a91 100644
--- a/deploy/fluence-test.yaml
+++ b/deploy/fluence-test.yaml
@@ -67,7 +67,9 @@ rules:
   # the pod object, so grant it here.
   - apiGroups: [""]
     resources: ["pods"]
-    verbs: ["get", "list", "watch", "patch", "update"]
+    # create/delete: the webhook creates the one-off quantum submitter pod
+    # (ensureSubmitterPod) and the scheduler reaps it during gang cleanup.
+    verbs: ["get", "list", "watch", "create", "patch", "update", "delete"]
   # The webhook self-manages its TLS by patching its own config's caBundle.
   - apiGroups: ["admissionregistration.k8s.io"]
     resources: ["mutatingwebhookconfigurations"]
@@ -146,6 +148,13 @@ spec:
             # Without these its PodGroup/GangScheduling plugin is inactive, pods
             # schedule with no gang semantics, and PodGroup status stays Pending.
             - --feature-gates=GenericWorkload=true,GangScheduling=true
+            # Re-attempt unschedulable pods more often than the 5m default. In the
+            # contention experiment a gang that loses the initial race for nodes is
+            # marked Unschedulable; this is how soon it is re-tried after capacity
+            # frees (the event-driven QueueingHint is best-effort; this is the
+            # backstop that bounds worst-case requeue latency). 30s keeps contended
+            # gangs draining promptly without thrashing the queue.
+            - --pod-max-in-unschedulable-pods-duration=30s
             - --v=4
           env:
             # Path to the resources config (e.g. quantum backends). Unset/empty
diff --git a/deploy/fluence.yaml b/deploy/fluence.yaml
index b856268..7d71386 100644
--- a/deploy/fluence.yaml
+++ b/deploy/fluence.yaml
@@ -67,7 +67,9 @@ rules:
   # the pod object, so grant it here.
   - apiGroups: [""]
     resources: ["pods"]
-    verbs: ["get", "list", "watch", "patch", "update"]
+    # create/delete: the webhook creates the one-off quantum submitter pod
+    # (ensureSubmitterPod) and the scheduler reaps it during gang cleanup.
+    verbs: ["get", "list", "watch", "create", "patch", "update", "delete"]
   # The webhook self-manages its TLS by patching its own config's caBundle.
   - apiGroups: ["admissionregistration.k8s.io"]
     resources: ["mutatingwebhookconfigurations"]
diff --git a/deploy/kind-config.yaml b/deploy/kind-config.yaml
index c94e070..ec310bc 100644
--- a/deploy/kind-config.yaml
+++ b/deploy/kind-config.yaml
@@ -32,4 +32,4 @@ nodes:
             - name: feature-gates
               value: "GenericWorkload=true"
   - role: worker
-  - role: worker
+  - role: worker
\ No newline at end of file
diff --git a/docs/handlers.md b/docs/handlers.md
new file mode 100644
index 0000000..1da169a
--- /dev/null
+++ b/docs/handlers.md
@@ -0,0 +1,83 @@
+# Webhook handlers & sidecar architecture
+
+Fluence's value is not creating gangs (Kubernetes 1.36 native gang scheduling
+already does that). It is **customizing the gang on the fly based on the
+resources a pod requests** — e.g. a quantum leader/worker workload becomes a
+size-1 leader gang plus a size-(N-1) worker gang, with the leader running a
+sidecar that ungates its workers when the quantum task is ready.
+
+## Handlers
+
+Each handler is an interface implementation (`pkg/webhook/handler.go`):
+
+```go
+type Handler interface {
+    Name() string
+    Applies(ctx, m MutatorAPI, pod) bool
+    Mutate(ctx, m MutatorAPI, pod) []spec.Op
+}
+```
+
+Handlers self-register by name (`init()` -> `webhook.Register`); a blank import
+of the handlers package makes them AVAILABLE. The core never names a handler.
+
+**Ordering = the active list.** There is no per-handler priority. The active
+handler list is BOTH the selection and the dispatch order:
+
+```go
+var DefaultHandlerOrder = []string{"fluxion", "quantum", "gang"}
+```
+
+Dispatch walks this list in order. `gang` is last because it is last in the
+list — the fallback that applies common defaults (honor `group-size`, else
+owner-derived N) only if no earlier handler already shaped the gang. A
+custom-resource handler is inserted into the list before `gang` to shape its own
+gang first. To change the order, or disable a handler, pass a different list.
+
+## Enabling/disabling handlers
+
+By default ALL registered handlers are enabled. Restrict the active set on the
+webhook command:
+
+```
+fluence-webhook --handlers=fluxion,gang        # run without quantum
+FLUENCE_HANDLERS=fluxion,quantum,gang fluence-webhook
+```
+
+Empty = the default list. The list is the order: `--handlers=gang,fluxion` runs
+gang first; omitting a name disables it. Unknown names are warned and dropped.
+
+(The handler set lives in the WEBHOOK, which mutates pods. `cmd/fluence` is the
+scheduler plugin and runs no handlers.)
+
+## Sidecar interface
+
+The coordination sidecar is a handler-owned capability, not a core one. Handlers
+that need a sidecar use `handlers.Sidecar`:
+
+```go
+type Sidecar interface {
+    EnsureRBAC(ctx, namespace)
+    InterceptorOps(pod) []spec.Op
+    ContainerOps(pod, observe bool) []spec.Op
+}
+```
+
+The default `coreSidecar` delegates to the core's staging primitives. The quantum
+handler uses it today; a custom handler can supply its own implementation
+(different image, env, gating) without touching the core or other handlers. The
+core's `MutatorAPI` keeps the staging primitives only so the default
+implementation can delegate — handlers do not call them directly.
+
+## Group size resolution (the default gang handler)
+
+`minCount` (the atomic-schedule count) resolves as:
+
+1. explicit `fluence.flux-framework.org/group-size` annotation — honored verbatim
+   (the override; e.g. a quantum split sets it directly);
+2. else the owning indexed Job's `parallelism` (== MiniCluster size N);
+3. else 1, logged.
+
+This is a common default available to every gang; handler-specific annotations
+(quantum role, expected-workers, etc.) live in their handlers and are not
+required by the core.
diff --git a/examples/quantum-pod.yaml b/examples/quantum-pod.yaml
index a619df9..b5dfbc9 100644
--- a/examples/quantum-pod.yaml
+++ b/examples/quantum-pod.yaml
@@ -2,7 +2,7 @@
 # via resources (the fluence device plugin advertises fluxion.flux-framework.org/qpu
 # on every node, so NodeResourcesFit is satisfied). Fluence's PreFilter matches
 # the request against the resource graph and picks a backend, the webhook injects
-# QRMI_BACKEND (the allocated backend) automatically, and note we can add other
+# FLUXION_BACKEND (the allocated backend) automatically, and note we can add other
 # envars here in the future. I chose a webhook because I think this is going to
 # be a requirement, and the pod is immutable after creation. 
 # Then the container submits via qrmi-go (the separate qrmi-sampler image).
@@ -27,4 +27,4 @@ spec:
         requests:
           fluxion.flux-framework.org/qpu: "1"
         limits:
-          fluxion.flux-framework.org/qpu: "1"
\ No newline at end of file
+          fluxion.flux-framework.org/qpu: "1"
diff --git a/examples/test/e2e/gang/multi-gang-contention.yaml b/examples/test/e2e/gang/multi-gang-contention.yaml
new file mode 100644
index 0000000..14b0fd8
--- /dev/null
+++ b/examples/test/e2e/gang/multi-gang-contention.yaml
@@ -0,0 +1,40 @@
+# Two gangs that cannot both place: fluxion allocates one core per slot, so two
+# 2-pod gangs need 4 cores, but the cluster graphs ~3 (3 workers, ~1 core each). One gang places entirely; the loser stays FULLY pending
+# (all-or-nothing), never partial.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gang-a
+spec:
+  replicas: 2
+  selector: {matchLabels: {app: gang-a}}
+  template:
+    metadata:
+      labels: {app: gang-a, fluence.flux-framework.org/group: gang-a}
+      annotations: {fluence.flux-framework.org/group-size: "2"}
+    spec:
+      schedulerName: fluence
+      containers:
+        - name: w
+          image: busybox
+          command: ["sleep", "3600"]
+          resources: {requests: {cpu: "1"}}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gang-b
+spec:
+  replicas: 2
+  selector: {matchLabels: {app: gang-b}}
+  template:
+    metadata:
+      labels: {app: gang-b, fluence.flux-framework.org/group: gang-b}
+      annotations: {fluence.flux-framework.org/group-size: "2"}
+    spec:
+      schedulerName: fluence
+      containers:
+        - name: w
+          image: busybox
+          command: ["sleep", "3600"]
+          resources: {requests: {cpu: "1"}}
diff --git a/examples/test/e2e/gang/multi-gang-requeue.yaml b/examples/test/e2e/gang/multi-gang-requeue.yaml
new file mode 100644
index 0000000..a8e8636
--- /dev/null
+++ b/examples/test/e2e/gang/multi-gang-requeue.yaml
@@ -0,0 +1,48 @@
+# Requeue-on-capacity + gang-atomicity test (test/e2e/gang/09).
+# gang-win: a 2-pod gang that runs a SHORT job and COMPLETES (pods -> Succeeded),
+#           freeing its nodes.
+# gang-wait: a 2-pod gang needing the same nodes; loses the initial race and sits
+#            Unschedulable. When gang-win completes, gang-wait must be re-attempted
+#            (via the shortened unschedulable-recheck timeout) and place atomically.
+# On a 3-worker (~3-core) cluster the two 2-pod gangs (4 cores) cannot co-run.
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: gang-win
+spec:
+  completions: 2
+  parallelism: 2
+  completionMode: Indexed
+  template:
+    metadata:
+      labels: {fluence.flux-framework.org/group: gang-win}
+      annotations: {fluence.flux-framework.org/group-size: "2"}
+    spec:
+      schedulerName: fluence
+      restartPolicy: Never
+      containers:
+        - name: w
+          image: busybox
+          command: ["sh","-c","sleep 30"]   # completes, frees nodes
+          resources: {requests: {cpu: "1"}}
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: gang-wait
+spec:
+  completions: 2
+  parallelism: 2
+  completionMode: Indexed
+  template:
+    metadata:
+      labels: {fluence.flux-framework.org/group: gang-wait}
+      annotations: {fluence.flux-framework.org/group-size: "2"}
+    spec:
+      schedulerName: fluence
+      restartPolicy: Never
+      containers:
+        - name: w
+          image: busybox
+          command: ["sh","-c","sleep 10"]
+          resources: {requests: {cpu: "1"}}
\ No newline at end of file
diff --git a/examples/test/e2e/gang/multi-gang.yaml b/examples/test/e2e/gang/multi-gang.yaml
new file mode 100644
index 0000000..9bfa67c
--- /dev/null
+++ b/examples/test/e2e/gang/multi-gang.yaml
@@ -0,0 +1,25 @@
+# Multi-pod gang via the WEBHOOK path (the path the experiments use
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gang3
+spec:
+  replicas: 2
+  selector:
+    matchLabels: {app: gang3}
+  template:
+    metadata:
+      labels:
+        app: gang3
+        fluence.flux-framework.org/group: gang3
+      annotations:
+        fluence.flux-framework.org/group-size: "2"
+    spec:
+      schedulerName: fluence
+      containers:
+        - name: worker
+          image: busybox
+          command: ["sleep", "3600"]
+          resources:
+            requests:
+              cpu: "1"
diff --git a/examples/single-podgroup.yaml b/examples/test/e2e/gang/single-podgroup.yaml
similarity index 100%
rename from examples/single-podgroup.yaml
rename to examples/test/e2e/gang/single-podgroup.yaml
diff --git a/examples/test/e2e/quantum/quantum-gang-pods.yaml b/examples/test/e2e/quantum/quantum-gang-pods.yaml
new file mode 100644
index 0000000..b345398
--- /dev/null
+++ b/examples/test/e2e/quantum/quantum-gang-pods.yaml
@@ -0,0 +1,49 @@
+# Gang + submitter quantum workload for the e2e (no leader/worker).
+#
+# Two pods, identical, both requesting the quantum resource, in group "qgang".
+# The user authors NO roles and NO submitter — the webhook treats this as a gang
+# of full size N=2 (group-size makes N deterministic for raw pods, which have no
+# owning Job/Deployment to derive it from), gates every pod, and ADDITIONALLY
+# creates the one-off submitter pod "qgang-submitter" (its own group-of-one) that
+# runs the real submit and ungates the gang. busybox stands in for the quantum
+# app; the interceptor staging fails soft (no python), which is fine for the
+# structural assertions in 02/03/04.
+apiVersion: v1
+kind: Pod
+metadata:
+  name: qgang-0
+  labels:
+    app: qgang
+    fluence.flux-framework.org/group: qgang
+  annotations:
+    fluence.flux-framework.org/group-size: "2"
+spec:
+  schedulerName: fluence
+  restartPolicy: Never
+  containers:
+    - name: app
+      image: busybox
+      command: ["sh", "-c", "echo gang member; sleep 600"]
+      resources:
+        requests: {fluxion.flux-framework.org/qpu: "1"}
+        limits:   {fluxion.flux-framework.org/qpu: "1"}
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: qgang-1
+  labels:
+    app: qgang
+    fluence.flux-framework.org/group: qgang
+  annotations:
+    fluence.flux-framework.org/group-size: "2"
+spec:
+  schedulerName: fluence
+  restartPolicy: Never
+  containers:
+    - name: app
+      image: busybox
+      command: ["sh", "-c", "echo gang member; sleep 600"]
+      resources:
+        requests: {fluxion.flux-framework.org/qpu: "1"}
+        limits:   {fluxion.flux-framework.org/qpu: "1"}
\ No newline at end of file
diff --git a/examples/test/e2e/quantum-pod-mock.yaml b/examples/test/e2e/quantum/quantum-pod-mock.yaml
similarity index 100%
rename from examples/test/e2e/quantum-pod-mock.yaml
rename to examples/test/e2e/quantum/quantum-pod-mock.yaml
diff --git a/examples/test/e2e/sidecar-mock-pods.yaml b/examples/test/e2e/sidecar-mock-pods.yaml
deleted file mode 100644
index fb223a7..0000000
--- a/examples/test/e2e/sidecar-mock-pods.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
----
-# Leader pod — first admitted, webhook creates PodGroup, injects sidecar, creates RBAC
-# User only needs schedulerName: fluence and the quantum-group label.
-# No PodGroup object needed — Fluence creates it.
-apiVersion: v1
-kind: Pod
-metadata:
-  name: sidecar-test-leader
-  labels:
-    app: fluence-sidecar-test
-    fluence.flux-framework.org/group: sidecar-test-group
-spec:
-  schedulerName: fluence
-  restartPolicy: Never
-  containers:
-    - name: mock-quantum-app
-      image: busybox
-      command:
-        - sh
-        - -c
-        - |
-          echo "mock-quantum-app: running"
-          echo "arn:aws:braket:us-east-1:123456:quantum-task/mock-abc123" \
-            > /tmp/task-arn
-          echo "mock-quantum-app: task ARN written"
-          sleep 3600
-      resources:
-        requests:
-          fluxion.flux-framework.org/qpu: "1"
-        limits:
-          fluxion.flux-framework.org/qpu: "1"
-
----
-# Worker pod — classical (no QPU). Gated by the webhook because it is a
-# non-leader member of a group whose leader is a quantum pod.
-apiVersion: v1
-kind: Pod
-metadata:
-  name: sidecar-test-worker
-  labels:
-    app: fluence-sidecar-test
-    fluence.flux-framework.org/group: sidecar-test-group
-spec:
-  schedulerName: fluence
-  restartPolicy: Never
-  containers:
-    - name: classical-worker
-      image: busybox
-      command:
-        - sh
-        - -c
-        - |
-          echo "classical-worker: started"
-          echo "TASK_ARN=$BRAKET_TASK_ARN"
-          sleep 10
-      env:
-        - name: FLUENCE_QUANTUM_JOB_ID
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.annotations['fluence.flux-framework.org/quantum-job-id']
-      resources:
-        requests:
-          cpu: "100m"
-          memory: "128Mi"
diff --git a/pkg/fluence/fluence.go b/pkg/fluence/fluence.go
index a1a10e1..fd3b080 100644
--- a/pkg/fluence/fluence.go
+++ b/pkg/fluence/fluence.go
@@ -77,14 +77,61 @@ type Fluence struct {
 	mu sync.Mutex
 	// placement maps a group key to its allocation (nodes, backend, jobids).
 	placement map[string]groupAlloc
+	// excludedNodes maps a group key to the set of nodes that are GENUINELY
+	// INCOMPATIBLE with that group (PostFilter saw UnschedulableAndUnresolvable
+	// from another plugin: a taint, affinity, or constraint Fluxion's graph does
+	// not model). PreFilter feeds them back as an RFC 31 negated-hostlist
+	// constraint so the re-match is steered onto other nodes. Nodes that were
+	// merely BUSY are deliberately NOT recorded here (excluding them would turn
+	// transient contention into permanent group failure). The set only grows for a
+	// group, so the exclusion-driven re-match is finite, and it is cleared on
+	// teardown. Guarded by mu.
+	excludedNodes map[string]map[string]bool
 }
 
 var (
-	_ fwk.PreFilterPlugin = (*Fluence)(nil)
-	_ fwk.FilterPlugin    = (*Fluence)(nil)
-	_ fwk.PreBindPlugin   = (*Fluence)(nil)
+	_ fwk.PreFilterPlugin  = (*Fluence)(nil)
+	_ fwk.FilterPlugin     = (*Fluence)(nil)
+	_ fwk.PostFilterPlugin = (*Fluence)(nil)
+	_ fwk.ReservePlugin    = (*Fluence)(nil)
+	_ fwk.PreBindPlugin    = (*Fluence)(nil)
 )
 
+// schedulableNodes returns only the nodes a normal pod could actually be placed
+// on, so the Fluxion graph never offers a node that Kubernetes will then reject
+// in Filter. Two kinds are dropped:
+//
+//   - cordoned nodes (spec.unschedulable), and
+//   - nodes carrying a NoSchedule/NoExecute taint (e.g. the control-plane's
+//     node-role.kubernetes.io/control-plane:NoSchedule).
+//
+// Without this, Fluxion can place a gang slot on the control-plane (it looks like
+// a valid virtual=false compute node to the graph), the pod is then rejected by
+// TaintToleration with UnschedulableAndUnresolvable, and PostFilter abandons the
+// whole allocation — on a small cluster that strands the gang permanently. We do
+// not attempt to honor specific tolerations here: gang workloads in this setup do
+// not tolerate node taints, so any NoSchedule/NoExecute taint means "not for us".
+func schedulableNodes(nodes []corev1.Node) []corev1.Node {
+	out := make([]corev1.Node, 0, len(nodes))
+	for _, n := range nodes {
+		if n.Spec.Unschedulable {
+			continue
+		}
+		tainted := false
+		for _, t := range n.Spec.Taints {
+			if t.Effect == corev1.TaintEffectNoSchedule || t.Effect == corev1.TaintEffectNoExecute {
+				tainted = true
+				break
+			}
+		}
+		if tainted {
+			continue
+		}
+		out = append(out, n)
+	}
+	return out
+}
+
 // New builds the plugin: discover cluster nodes, optionally inject quantum
 // resources, write the JGF graph, initialize the Fluxion matcher, and register
 // the delete handlers that cancel allocations when their owning object is gone.
@@ -129,7 +176,7 @@ func New(ctx context.Context, _ runtime.Object, h fwk.Handle) (fwk.Plugin, error
 		}
 	}
 
-	jgfBytes, err := cluster.BuildGraph(nodeList.Items, opts)
+	jgfBytes, err := cluster.BuildGraph(schedulableNodes(nodeList.Items), opts)
 	if err != nil {
 		return nil, fmt.Errorf("build resource graph: %w", err)
 	}
@@ -161,10 +208,11 @@ func New(ctx context.Context, _ runtime.Object, h fwk.Handle) (fwk.Plugin, error
 	fluxion.Init(tmp.Name(), os.Getenv("FLUENCE_MATCH_POLICY"), "")
 
 	f := &Fluence{
-		handle:       h,
-		matcher:      fluxion,
-		knownDevices: knownDevices,
-		placement:    map[string]groupAlloc{},
+		handle:        h,
+		matcher:       fluxion,
+		knownDevices:  knownDevices,
+		placement:     map[string]groupAlloc{},
+		excludedNodes: map[string]map[string]bool{},
 	}
 	f.registerCancelHandlers()
 	// Periodic + startup reconcile of completed Fluence-created PodGroups, so a
@@ -251,7 +299,15 @@ func (f *Fluence) PreFilter(
 		return nil, fwk.AsStatus(err)
 	}
 
-	specs, err := placement.JobspecsForGroup(group, pods, f.knownDevices)
+	f.mu.Lock()
+	excluded := make([]string, 0, len(f.excludedNodes[group]))
+	for n := range f.excludedNodes[group] {
+		excluded = append(excluded, n)
+	}
+	f.mu.Unlock()
+	sort.Strings(excluded) // deterministic constraint for stable matching/logs
+
+	specs, err := placement.JobspecsForGroup(group, pods, f.knownDevices, excluded)
 	if err != nil {
 		return nil, fwk.AsStatus(err)
 	}
@@ -390,6 +446,103 @@ func (f *Fluence) Filter(
 	return fwk.NewStatus(fwk.Unschedulable, "node not in fluxion allocation for this group")
 }
 
+// PostFilter runs when a pod could not be scheduled after Filter — for a Fluence
+// group, this means the cached Fluxion allocation's nodes did not all survive the
+// other scheduler plugins' Filter checks. Without intervention the group would
+// retry forever against the same cached allocation while the Fluxion reservation
+// leaked, because PreFilter short-circuits on the cache and nothing else releases
+// it on a scheduling failure.
+//
+// We always abandon the failed allocation here (cancel the Fluxion jobids, drop
+// the cached placement) so the next PreFilter re-matches fresh. The careful part
+// is WHICH nodes we then permanently exclude from the group's future matches,
+// because a group reaches PostFilter for two very different reasons and they must
+// be handled oppositely (see fwk.Code docs):
+//
+//   - UnschedulableAndUnresolvable: the node genuinely cannot host this pod and
+//     re-trying it is pointless (a taint the pod does not tolerate, node affinity
+//     mismatch, a constraint Fluxion's graph does not model). EXCLUDE it; the
+//     next PreFilter feeds the exclusion set back as an RFC 31 negated-hostlist
+//     constraint so Fluxion is steered onto other nodes.
+//
+//   - Unschedulable (plain): the node could host the pod, just not at this
+//     instant (it is momentarily full). This is TRANSIENT. Do NOT exclude it —
+//     excluding a merely-busy node converts ordinary contention into permanent
+//     group failure, and in a saturated cluster (a gang that needs the whole node
+//     set) it strands the gang forever even though it would fit once a node frees.
+//
+// So contention excludes nothing and the group recovers by waiting/retrying;
+// only durable incompatibility accumulates in excludedNodes (cleared on group
+// teardown), which keeps the exclusion-driven re-match finite and correct.
+func (f *Fluence) PostFilter(
+	ctx context.Context,
+	state fwk.CycleState,
+	pod *corev1.Pod,
+	filteredNodeStatusMap fwk.NodeToStatusReader,
+) (*fwk.PostFilterResult, *fwk.Status) {
+	group := groupKey(pod)
+
+	f.mu.Lock()
+	alloc, ok := f.placement[group]
+	if !ok {
+		// No cached allocation for this group — nothing of ours to reconcile.
+		// (Another plugin's PostFilter, or a non-group pod.)
+		f.mu.Unlock()
+		return nil, fwk.NewStatus(fwk.Unschedulable)
+	}
+	// Exclude ONLY nodes that are genuinely incompatible with this pod, never
+	// nodes that were merely busy this cycle. The framework gives us a per-node
+	// status: UnschedulableAndUnresolvable means the node cannot host the pod and
+	// re-trying it is pointless (a taint the pod does not tolerate, node affinity
+	// mismatch, a constraint Fluxion's graph does not model) -> exclude it so the
+	// re-match is steered elsewhere. A plain Unschedulable means the node could
+	// host the pod but not right now (it is momentarily full) -> do NOT exclude
+	// it; it must stay eligible so the group can land there once capacity frees.
+	//
+	// This is the whole point: a group enters PostFilter for many reasons, and
+	// "the cluster is just full at this instant" is the common one. Permanently
+	// banning the busy nodes (the old whole-allocation exclusion) turned transient
+	// contention into permanent group failure — exactly backwards. Now contention
+	// excludes nothing; the group simply abandons this cycle's reservation and
+	// retries the same nodes when they free.
+	if f.excludedNodes[group] == nil {
+		f.excludedNodes[group] = map[string]bool{}
+	}
+	var incompatible, busy []string
+	for _, n := range alloc.place.Nodes {
+		var code fwk.Code
+		if filteredNodeStatusMap != nil {
+			if st := filteredNodeStatusMap.Get(n); st != nil {
+				code = st.Code()
+			}
+		}
+		if code == fwk.UnschedulableAndUnresolvable {
+			f.excludedNodes[group][n] = true
+			incompatible = append(incompatible, n)
+		} else {
+			// plain Unschedulable, Success, or unknown/nil -> transient, keep.
+			busy = append(busy, n)
+		}
+	}
+	excludedCount := len(f.excludedNodes[group])
+	jobids := alloc.jobids
+	delete(f.placement, group)
+	f.mu.Unlock()
+
+	// Release the Fluxion reservation for the abandoned allocation so the graph
+	// does not leak it while the group retries.
+	f.cancelJobids(jobids)
+
+	log.Printf("[fluence] group %s unschedulable: abandoning allocation (jobids %v); "+
+		"incompatible(excluded)=%v busy(retryable, NOT excluded)=%v; %d node(s) excluded total",
+		group, jobids, incompatible, busy, excludedCount)
+
+	// Returning Unschedulable (no nominated node) lets the pod be requeued; the
+	// next PreFilter re-matches (with any incompatible nodes excluded, but busy
+	// nodes still in play). Fluxion, not PostFilter preemption, chooses placement.
+	return nil, fwk.NewStatus(fwk.Unschedulable)
+}
+
 // PreBindPreFlight runs before PreBind. It returns Success when we have a cached
 // allocation for the pod's group (so PreBind can record the jobid, and stamp the
 // backend for a quantum pod), and Skip otherwise.
@@ -408,12 +561,59 @@ func (f *Fluence) PreBindPreFlight(
 	return nil, fwk.NewStatus(fwk.Success)
 }
 
+// Reserve stamps the chosen backend (and matched attributes) onto the pod as
+// early as possible — at reservation, in the scheduling cycle — rather than in
+// PreBind. The webhook injects FLUXION_BACKEND (and FLUXION_<ATTR>) as a
+// downward-API env sourced from these annotations; downward-API env is resolved
+// by the kubelet when the container starts and is NOT updated afterward, so the
+// annotation must be present well before the container starts. PreBind runs in
+// the (asynchronous) binding cycle, milliseconds before Bind, which races the
+// kubelet — Reserve runs earlier and synchronously, giving the annotation time
+// to propagate so the value reliably surfaces in the container.
+func (f *Fluence) Reserve(
+	ctx context.Context,
+	state fwk.CycleState,
+	pod *corev1.Pod,
+	nodeName string,
+) *fwk.Status {
+	if err := f.stampBackend(ctx, pod); err != nil {
+		return fwk.AsStatus(fmt.Errorf("stamp backend annotations: %w", err))
+	}
+	return fwk.NewStatus(fwk.Success)
+}
+
+// Unreserve is a no-op: a stale backend annotation from a reservation that was
+// later rejected is harmless (it is overwritten on the next attempt and the
+// value is correct for the allocation that produced it), and clearing it would
+// cost an extra API call. Required to satisfy fwk.ReservePlugin.
+func (f *Fluence) Unreserve(ctx context.Context, state fwk.CycleState, pod *corev1.Pod, nodeName string) {
+}
+
+// stampBackend writes the allocated backend name and matched attributes onto the
+// pod (idempotent merge patch). No-op when there is no cached allocation or the
+// allocation carries no backend (classical, non-quantum gangs).
+func (f *Fluence) stampBackend(ctx context.Context, pod *corev1.Pod) error {
+	f.mu.Lock()
+	alloc, ok := f.placement[groupKey(pod)]
+	f.mu.Unlock()
+	if !ok || alloc.place.Backend == "" {
+		return nil
+	}
+	ann := map[string]string{placement.BackendAnnotation: alloc.place.Backend}
+	for k, v := range alloc.place.BackendAttributes {
+		ann[placement.AttributeAnnotationPrefix+k] = v
+	}
+	log.Printf("[fluence] group %s -> backend %q attrs %v (reserve-stamped, nodes %v)",
+		groupKey(pod), alloc.place.Backend, alloc.place.BackendAttributes, alloc.place.Nodes)
+	return f.patchPodAnnotations(ctx, pod.Namespace, pod.Name, ann)
+}
+
 // PreBind records, in the commit phase, the durable state for this group:
-//   - the Fluxion jobid onto the owning object (the PodGroup for a gang, else the
-//     pod) so the allocation can be cancelled when that object is deleted;
-//   - for a quantum group, the allocated backend onto the pod, which the webhook-
-//     injected downward-API env surfaces as QRMI_BACKEND (container env is
-//     immutable post-creation, so the value must travel via an annotation).
+// the Fluxion jobid onto the owning object (the PodGroup for a gang, else the
+// pod) so the allocation can be cancelled when that object is deleted. The
+// backend annotation is stamped earlier, in Reserve (see stampBackend), because
+// the webhook-injected downward-API env (FLUXION_BACKEND) must be present before
+// the container starts; PreBind is too late and races the kubelet.
 func (f *Fluence) PreBind(
 	ctx context.Context,
 	state fwk.CycleState,
@@ -430,20 +630,10 @@ func (f *Fluence) PreBind(
 	if err := f.recordJobIDs(ctx, pod, alloc.jobids); err != nil {
 		return fwk.AsStatus(fmt.Errorf("record jobids: %w", err))
 	}
-	if alloc.place.Backend != "" {
-		// Stamp the backend name and all matched attributes in one patch. The
-		// webhook injects a normalized env per annotation so the workload reads
-		// exactly what it matched (backend + region/qubits/...).
-		ann := map[string]string{placement.BackendAnnotation: alloc.place.Backend}
-		for k, v := range alloc.place.BackendAttributes {
-			ann[placement.AttributeAnnotationPrefix+k] = v
-		}
-		log.Printf("[fluence] group %s -> backend %q attrs %v (nodes %v, jobids %v)",
-			groupKey(pod), alloc.place.Backend, alloc.place.BackendAttributes,
-			alloc.place.Nodes, alloc.jobids)
-		if err := f.patchPodAnnotations(ctx, pod.Namespace, pod.Name, ann); err != nil {
-			return fwk.AsStatus(fmt.Errorf("stamp backend annotations: %w", err))
-		}
+	// Backstop: if Reserve was skipped for any reason, ensure the backend is
+	// stamped before bind anyway (idempotent).
+	if err := f.stampBackend(ctx, pod); err != nil {
+		return fwk.AsStatus(fmt.Errorf("stamp backend annotations: %w", err))
 	}
 	return fwk.NewStatus(fwk.Success)
 }
@@ -637,6 +827,20 @@ func (f *Fluence) reconcileGroup(ctx context.Context, namespace, group string) {
 	}
 	log.Printf("fluence: reconciled completed gang %s/%s — deleted Fluence-created PodGroup, allocation freed",
 		namespace, group)
+
+	// Gang+submitter cleanup: the one-off quantum submitter pod and its
+	// group-of-one PodGroup (<group>-submitter) are not owned by the user's
+	// workload, so reap them alongside the gang. The submitter pod also carries
+	// an ownerReference to this gang PodGroup (so its deletion cascades via GC);
+	// this explicit delete is the backstop and also removes the submitter's own
+	// PodGroup. Skip when this group is itself a submitter group, to avoid
+	// recursing on <group>-submitter-submitter.
+	if !strings.HasSuffix(group, submitterGroupSuffix) {
+		sg := group + submitterGroupSuffix
+		_ = f.handle.ClientSet().SchedulingV1alpha2().PodGroups(namespace).Delete(ctx, sg, metav1.DeleteOptions{})
+		_ = f.handle.ClientSet().CoreV1().Pods(namespace).Delete(ctx, sg, metav1.DeleteOptions{})
+		log.Printf("fluence: reaped submitter %s/%s for gang %s", namespace, sg, group)
+	}
 }
 
 // reconcileGraceForEmpty is how long a Fluence-created PodGroup with no live
@@ -648,6 +852,12 @@ const reconcileGraceForEmpty = 2 * time.Minute
 // package (the scheduler must not depend on the webhook). Kept in sync with it.
 const webhookGroupLabel = "fluence.flux-framework.org/group"
 
+// submitterGroupSuffix mirrors handlers.SubmitterGroupSuffix: the one-off quantum
+// submitter for gang <g> is named <g>-submitter (both the pod and its PodGroup).
+// Duplicated here to avoid importing the webhook handlers package into the
+// scheduler plugin; keep the two in sync.
+const submitterGroupSuffix = "-submitter"
+
 // onPodGroupDeleted frees the gang's allocation when its PodGroup is deleted.
 func (f *Fluence) onPodGroupDeleted(obj interface{}) {
 	pg, ok := obj.(*schedv1a2.PodGroup)
@@ -718,6 +928,7 @@ func (f *Fluence) cancelGroup(key string, ann map[string]string) {
 
 	f.mu.Lock()
 	delete(f.placement, key)
+	delete(f.excludedNodes, key) // drop accumulated exclusions so a future group reusing the name starts clean
 	f.mu.Unlock()
 }
 
diff --git a/pkg/fluence/fluence_test.go b/pkg/fluence/fluence_test.go
index 998e1a7..5228f97 100644
--- a/pkg/fluence/fluence_test.go
+++ b/pkg/fluence/fluence_test.go
@@ -1,6 +1,7 @@
 package fluence
 
 import (
+	"context"
 	"errors"
 	"testing"
 
@@ -12,6 +13,7 @@ import (
 	schedv1a2 "k8s.io/api/scheduling/v1alpha2"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/tools/cache"
+	fwk "k8s.io/kube-scheduler/framework"
 )
 
 // fakeMatcher records Cancel calls so cancel behavior can be asserted without
@@ -46,7 +48,11 @@ func (m *fakeMatcher) Cancel(jobid uint64) error {
 }
 
 func newTestFluence(m matcher) *Fluence {
-	return &Fluence{matcher: m, placement: map[string]groupAlloc{}}
+	return &Fluence{
+		matcher:       m,
+		placement:     map[string]groupAlloc{},
+		excludedNodes: map[string]map[string]bool{},
+	}
 }
 
 func ann(jobid string) map[string]string {
@@ -345,3 +351,205 @@ func twoSpecs() []*jobspec.Jobspec {
 		{Version: 9999},
 	}
 }
+
+// --- PostFilter allocation reconciliation -----------------------------------
+
+// fakeNodeStatus is a minimal fwk.NodeToStatusReader for PostFilter tests: it
+// maps node name -> status code so a test can mark some nodes incompatible
+// (UnschedulableAndUnresolvable) and others merely busy (Unschedulable).
+type fakeNodeStatus map[string]fwk.Code
+
+func (s fakeNodeStatus) Get(node string) *fwk.Status {
+	if c, ok := s[node]; ok {
+		return fwk.NewStatus(c)
+	}
+	return nil
+}
+func (s fakeNodeStatus) NodesForStatusCode(fwk.NodeInfoLister, fwk.Code) ([]fwk.NodeInfo, error) {
+	return nil, nil
+}
+
+// PostFilter abandons the failed allocation (cancel jobids, drop cache) and
+// excludes ONLY genuinely-incompatible nodes (UnschedulableAndUnresolvable).
+// A node that was merely busy (plain Unschedulable) MUST stay eligible.
+func TestPostFilterExcludesOnlyIncompatibleNodes(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	f.placement[key] = groupAlloc{
+		place:  placement.Placement{Nodes: []string{"node-a", "node-b", "node-c"}},
+		jobids: []uint64{11, 12},
+	}
+	pod := groupedPod("default", "training-0", "training", nil)
+
+	// node-a incompatible (taint); node-b busy; node-c survived Filter.
+	status := fakeNodeStatus{
+		"node-a": fwk.UnschedulableAndUnresolvable,
+		"node-b": fwk.Unschedulable,
+		"node-c": fwk.Success,
+	}
+
+	_, st := f.PostFilter(context.Background(), nil, pod, status)
+	if st == nil || st.Code() != fwk.Unschedulable {
+		t.Fatalf("expected Unschedulable status, got %v", st)
+	}
+	if _, still := f.placement[key]; still {
+		t.Fatal("placement cache should be deleted after PostFilter")
+	}
+	if len(m.cancelled) != 2 {
+		t.Fatalf("expected both jobids cancelled, got %v", m.cancelled)
+	}
+	excl := f.excludedNodes[key]
+	if !excl["node-a"] {
+		t.Fatalf("incompatible node-a should be excluded, set=%v", excl)
+	}
+	if excl["node-b"] || excl["node-c"] {
+		t.Fatalf("busy/ok nodes must NOT be excluded (would strand a saturated gang), set=%v", excl)
+	}
+	if len(excl) != 1 {
+		t.Fatalf("expected exactly 1 excluded node, got %v", excl)
+	}
+}
+
+// A group blocked purely by contention (every node merely busy) excludes NOTHING
+// so it can retry the same nodes once they free — the saturated-cluster property.
+func TestPostFilterContentionExcludesNothing(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	f.placement[key] = groupAlloc{
+		place:  placement.Placement{Nodes: []string{"node-a", "node-b"}},
+		jobids: []uint64{1},
+	}
+	pod := groupedPod("default", "training-0", "training", nil)
+	status := fakeNodeStatus{"node-a": fwk.Unschedulable, "node-b": fwk.Unschedulable}
+
+	f.PostFilter(context.Background(), nil, pod, status)
+
+	if len(f.excludedNodes[key]) != 0 {
+		t.Fatalf("a purely-busy group must exclude no nodes, got %v", f.excludedNodes[key])
+	}
+	if _, still := f.placement[key]; still {
+		t.Fatal("placement cache should be deleted even when nothing is excluded")
+	}
+	if len(m.cancelled) != 1 {
+		t.Fatalf("expected the jobid cancelled, got %v", m.cancelled)
+	}
+}
+
+// A nil status map (e.g. all nodes filtered out upstream) must be safe and
+// exclude nothing rather than panic or ban the whole allocation.
+func TestPostFilterNilStatusMapExcludesNothing(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	f.placement[key] = groupAlloc{place: placement.Placement{Nodes: []string{"node-a", "node-b"}}, jobids: []uint64{7}}
+	pod := groupedPod("default", "training-0", "training", nil)
+
+	_, st := f.PostFilter(context.Background(), nil, pod, nil)
+	if st == nil || st.Code() != fwk.Unschedulable {
+		t.Fatalf("expected Unschedulable, got %v", st)
+	}
+	if len(f.excludedNodes[key]) != 0 {
+		t.Fatalf("nil status map must exclude nothing, got %v", f.excludedNodes[key])
+	}
+}
+
+// Incompatible nodes accumulate across attempts; busy ones never do.
+func TestPostFilterAccumulatesIncompatibleAcrossAttempts(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	pod := groupedPod("default", "training-0", "training", nil)
+
+	f.placement[key] = groupAlloc{place: placement.Placement{Nodes: []string{"node-a", "node-b"}}, jobids: []uint64{1}}
+	f.PostFilter(context.Background(), nil, pod, fakeNodeStatus{"node-a": fwk.UnschedulableAndUnresolvable, "node-b": fwk.Unschedulable})
+	f.placement[key] = groupAlloc{place: placement.Placement{Nodes: []string{"node-c", "node-d"}}, jobids: []uint64{2}}
+	f.PostFilter(context.Background(), nil, pod, fakeNodeStatus{"node-c": fwk.UnschedulableAndUnresolvable, "node-d": fwk.Unschedulable})
+
+	excl := f.excludedNodes[key]
+	for _, n := range []string{"node-a", "node-c"} {
+		if !excl[n] {
+			t.Fatalf("incompatible %s should accumulate, got %v", n, excl)
+		}
+	}
+	if excl["node-b"] || excl["node-d"] {
+		t.Fatalf("busy nodes must never accumulate, got %v", excl)
+	}
+	if len(excl) != 2 {
+		t.Fatalf("exclusion set should be the 2 incompatible nodes, got %v", excl)
+	}
+}
+
+// PostFilter on a group with no cached allocation (not ours, or already cleared)
+// is a safe no-op: no panic, no cancel, returns Unschedulable.
+func TestPostFilterUnknownGroupNoop(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	pod := groupedPod("default", "stranger-0", "stranger", nil)
+
+	_, status := f.PostFilter(context.Background(), nil, pod, nil)
+	if status == nil || status.Code() != fwk.Unschedulable {
+		t.Fatalf("expected Unschedulable, got %v", status)
+	}
+	if len(m.cancelled) != 0 {
+		t.Fatalf("nothing should be cancelled for an unknown group, got %v", m.cancelled)
+	}
+	if len(f.excludedNodes) != 0 {
+		t.Fatalf("no exclusion set should be created for an unknown group, got %v", f.excludedNodes)
+	}
+}
+
+// Teardown (cancelGroup) must clear the exclusion set so a future group reusing
+// the same key does not inherit stale exclusions.
+func TestCancelGroupClearsExclusions(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	f.placement[key] = groupAlloc{jobids: []uint64{9}}
+	f.excludedNodes[key] = map[string]bool{"node-a": true}
+
+	f.cancelGroup(key, ann("9"))
+
+	if _, still := f.excludedNodes[key]; still {
+		t.Fatal("exclusion set should be cleared on teardown")
+	}
+}
+
+// schedulableNodes must drop control-plane (NoSchedule taint), NoExecute-tainted,
+// and cordoned nodes, keeping only nodes a normal gang pod can actually land on.
+// This keeps the Fluxion graph from offering nodes Kubernetes will reject in
+// Filter (which, with whole-allocation PostFilter exclusion, strands the gang).
+func TestSchedulableNodesDropsTaintedAndCordoned(t *testing.T) {
+	node := func(name string, unsched bool, effects ...corev1.TaintEffect) corev1.Node {
+		n := corev1.Node{}
+		n.Name = name
+		n.Spec.Unschedulable = unsched
+		for _, e := range effects {
+			n.Spec.Taints = append(n.Spec.Taints, corev1.Taint{Key: "k", Effect: e})
+		}
+		return n
+	}
+	in := []corev1.Node{
+		node("worker-1", false),
+		node("worker-2", false),
+		node("control-plane", false, corev1.TaintEffectNoSchedule),
+		node("draining", false, corev1.TaintEffectNoExecute),
+		node("cordoned", true),
+		node("prefer-only", false, corev1.TaintEffectPreferNoSchedule), // soft taint: keep
+	}
+	got := schedulableNodes(in)
+	gotNames := map[string]bool{}
+	for _, n := range got {
+		gotNames[n.Name] = true
+	}
+	want := []string{"worker-1", "worker-2", "prefer-only"}
+	if len(got) != len(want) {
+		t.Fatalf("expected %d schedulable nodes %v, got %d %v", len(want), want, len(got), gotNames)
+	}
+	for _, w := range want {
+		if !gotNames[w] {
+			t.Fatalf("expected %s kept, got set %v", w, gotNames)
+		}
+	}
+}
diff --git a/pkg/placement/placement.go b/pkg/placement/placement.go
index 554f319..c7f76de 100644
--- a/pkg/placement/placement.go
+++ b/pkg/placement/placement.go
@@ -214,14 +214,36 @@ func withEntries(counts map[string]int) []jobspec.Resource {
 // allocation (duration 0 runs to graph end) plus an RFC 31 property constraint
 // selecting the eligible node set. properties is the AND-set of composed
 // key=value property strings a matched node must carry.
-func systemAttributes(properties []string) map[string]interface{} {
+func systemAttributes(properties []string, excludeNodes []string) map[string]interface{} {
+	// Base property constraint (the eligible-node property AND-set).
+	constraints := map[string]interface{}{
+		"properties": properties,
+	}
+	// When a group has had a placement rejected by other scheduler plugins
+	// (taints, affinity, volume topology that Fluxion's graph does not model),
+	// PostFilter accumulates the rejected hostnames and we AND in an RFC 31
+	// negated hostlist so the re-match is forced onto untried nodes. RFC 31 is
+	// JsonLogic-style ({operator:[values]}, one operator per object), so to AND
+	// two operators we nest them under an explicit `and`. We only do this when
+	// there is something to exclude, so the no-exclusion jobspec is byte-for-byte
+	// what it was before (and existing tests/behavior are unchanged).
+	if len(excludeNodes) > 0 {
+		constraints = map[string]interface{}{
+			"and": []interface{}{
+				map[string]interface{}{"properties": properties},
+				map[string]interface{}{
+					"not": []interface{}{
+						map[string]interface{}{"hostlist": excludeNodes},
+					},
+				},
+			},
+		}
+	}
 	return map[string]interface{}{
 		"system": map[string]interface{}{
 			// duration 0 => hold the allocation until we explicitly Cancel.
-			"duration": 0,
-			"constraints": map[string]interface{}{
-				"properties": properties,
-			},
+			"duration":    0,
+			"constraints": constraints,
 		},
 	}
 }
@@ -229,7 +251,7 @@ func systemAttributes(properties []string) map[string]interface{} {
 // computeJobspec builds the physical-compute jobspec for a group: one slot per
 // pod holding the compute resources, constrained to virtual=false nodes. This is
 // the only jobspec for a group that requests no virtual devices.
-func computeJobspec(groupName string, slots int, compute map[string]int) *jobspec.Jobspec {
+func computeJobspec(groupName string, slots int, compute map[string]int, excludeNodes []string) *jobspec.Jobspec {
 	return &jobspec.Jobspec{
 		Version: 9999,
 		Resources: []jobspec.Resource{{
@@ -238,7 +260,7 @@ func computeJobspec(groupName string, slots int, compute map[string]int) *jobspe
 			Label: "default",
 			With:  withEntries(compute),
 		}},
-		Attributes: systemAttributes([]string{VirtualPropertyFalse}),
+		Attributes: systemAttributes([]string{VirtualPropertyFalse}, excludeNodes),
 		Tasks: []jobspec.Task{{
 			Command: []string{groupName},
 			Slot:    "default",
@@ -272,7 +294,7 @@ func deviceJobspec(groupName, deviceType string, count int, extraProps []string)
 			Label: "device",
 			With:  []jobspec.Resource{{Type: "node", Count: count}},
 		}},
-		Attributes: systemAttributes(props),
+		Attributes: systemAttributes(props, nil),
 		Tasks: []jobspec.Task{{
 			Command: []string{groupName},
 			Slot:    "device",
@@ -299,6 +321,7 @@ func JobspecsForGroup(
 	groupName string,
 	pods []corev1.Pod,
 	knownDevices map[string]bool,
+	excludeNodes []string,
 ) ([]*jobspec.Jobspec, error) {
 	if len(pods) == 0 {
 		return nil, fmt.Errorf("pod group %q has no pods", groupName)
@@ -321,7 +344,7 @@ func JobspecsForGroup(
 		}
 	}
 
-	specs := []*jobspec.Jobspec{computeJobspec(groupName, len(pods), compute)}
+	specs := []*jobspec.Jobspec{computeJobspec(groupName, len(pods), compute, excludeNodes)}
 
 	// Deterministic device order for stable output.
 	deviceTypes := make([]string, 0, len(devices))
diff --git a/pkg/placement/placement_test.go b/pkg/placement/placement_test.go
index 33786c8..fe68917 100644
--- a/pkg/placement/placement_test.go
+++ b/pkg/placement/placement_test.go
@@ -64,7 +64,7 @@ func TestClassicalSingleMatch(t *testing.T) {
 		podWith("p0", corev1.ResourceList{corev1.ResourceCPU: qty(4), "nvidia.com/gpu": qty(1)}),
 		podWith("p1", corev1.ResourceList{corev1.ResourceCPU: qty(4), "nvidia.com/gpu": qty(1)}),
 	}
-	specs, err := JobspecsForGroup("grp", pods, nil)
+	specs, err := JobspecsForGroup("grp", pods, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -101,7 +101,7 @@ func TestGroupDeviceMatchWhenLeaderNotFirst(t *testing.T) {
 	})
 	// Leader deliberately placed last.
 	pods := []corev1.Pod{worker, worker, leader}
-	specs, err := JobspecsForGroup("qgrp", pods, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("qgrp", pods, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -132,7 +132,7 @@ func qpuPodWithRequires(name string, requires map[string]string) corev1.Pod {
 // constraints, nothing extra (over-constraining would break unconstrained runs).
 func TestNoRequireAnnotationsAddsNoConstraints(t *testing.T) {
 	p := qpuPodWithRequires("q", nil)
-	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -145,7 +145,7 @@ func TestNoRequireAnnotationsAddsNoConstraints(t *testing.T) {
 // Exactly one require- constraint.
 func TestSingleRequireConstraint(t *testing.T) {
 	p := qpuPodWithRequires("q", map[string]string{"qrmi_type": "braket-gate"})
-	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -169,7 +169,7 @@ func TestMultipleRequireConstraintsAreDeduped(t *testing.T) {
 	// a worker that happens to repeat one of the same require- annotations
 	worker := qpuPodWithRequires("w0", map[string]string{"vendor": "amazon"})
 	specs, err := JobspecsForGroup("g", []corev1.Pod{leader, worker},
-		map[string]bool{"qpu": true})
+		map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -211,7 +211,7 @@ func TestRequireAnnotationConstrainsDevice(t *testing.T) {
 	leader.Annotations[RequireAnnotationPrefix+"vendor"] = "amazon"
 
 	specs, err := JobspecsForGroup("qgrp", []corev1.Pod{leader},
-		map[string]bool{"qpu": true})
+		map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -232,7 +232,7 @@ func TestDeviceProducesSecondMatch(t *testing.T) {
 		FluxionResourcePrefix + "qpu": qty(1),
 	})
 	known := map[string]bool{"qpu": true}
-	specs, err := JobspecsForGroup("qgrp", []corev1.Pod{p}, known)
+	specs, err := JobspecsForGroup("qgrp", []corev1.Pod{p}, known, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -274,7 +274,7 @@ func TestDeviceProducesSecondMatch(t *testing.T) {
 // node), so there are two matches: compute (core=1, virtual=false) and device.
 func TestDeviceOnlyStillForcesCompute(t *testing.T) {
 	p := podWith("q", corev1.ResourceList{FluxionResourcePrefix + "qpu": qty(1)})
-	specs, err := JobspecsForGroup("qonly", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("qonly", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -289,7 +289,7 @@ func TestDeviceOnlyStillForcesCompute(t *testing.T) {
 // Requesting a device type the graph does not model is a hard error.
 func TestUnknownDeviceErrors(t *testing.T) {
 	p := podWith("q", corev1.ResourceList{FluxionResourcePrefix + "fpga": qty(1)})
-	_, err := JobspecsForGroup("grp", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	_, err := JobspecsForGroup("grp", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err == nil {
 		t.Fatal("expected an error for an unmodeled device type")
 	}
@@ -301,7 +301,7 @@ func TestHoldDurationZero(t *testing.T) {
 		corev1.ResourceCPU:            qty(1),
 		FluxionResourcePrefix + "qpu": qty(1),
 	})
-	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -366,3 +366,76 @@ func TestPlacementUnmarkedNodeIsCompute(t *testing.T) {
 		t.Fatalf("unmarked node should not be a backend, got %q", p.Backend)
 	}
 }
+
+// When excludeNodes is non-empty, the compute jobspec's constraint must AND the
+// base properties with an RFC 31 negated hostlist, so a re-match avoids the
+// rejected nodes. When empty, the constraint must be the plain properties form
+// (byte-for-byte the pre-exclusion behavior).
+func TestExcludeNodesAddsNegatedHostlist(t *testing.T) {
+	p := podWith("p", corev1.ResourceList{corev1.ResourceCPU: qty(1)})
+
+	// no exclusion -> plain properties, no `and`/`not`
+	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, nil, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	cons := computeConstraints(t, specs[0])
+	if _, hasAnd := cons["and"]; hasAnd {
+		t.Fatalf("no-exclusion constraint must not use `and`: %#v", cons)
+	}
+	if _, hasProps := cons["properties"]; !hasProps {
+		t.Fatalf("no-exclusion constraint must have plain properties: %#v", cons)
+	}
+
+	// with exclusion -> and[ properties, not[ hostlist ] ]
+	specs, err = JobspecsForGroup("g", []corev1.Pod{p}, nil, []string{"node-b", "node-c"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	cons = computeConstraints(t, specs[0])
+	andTerms, ok := cons["and"].([]interface{})
+	if !ok || len(andTerms) != 2 {
+		t.Fatalf("exclusion constraint must be `and` of 2 terms: %#v", cons)
+	}
+	// find the not/hostlist term
+	foundHostlist := false
+	for _, term := range andTerms {
+		tm, _ := term.(map[string]interface{})
+		notTerm, ok := tm["not"].([]interface{})
+		if !ok || len(notTerm) == 0 {
+			continue
+		}
+		inner, _ := notTerm[0].(map[string]interface{})
+		hl, ok := inner["hostlist"].([]string)
+		if !ok {
+			// json round-trip may make it []interface{}; accept both
+			if hlAny, ok2 := inner["hostlist"].([]interface{}); ok2 {
+				if len(hlAny) == 2 {
+					foundHostlist = true
+				}
+			}
+			continue
+		}
+		if len(hl) == 2 {
+			foundHostlist = true
+		}
+	}
+	if !foundHostlist {
+		t.Fatalf("exclusion constraint must contain not[hostlist[2 nodes]]: %#v", cons)
+	}
+}
+
+// computeConstraints digs out attributes.system.constraints from the compute
+// jobspec (the first spec; device specs do not carry node exclusions).
+func computeConstraints(t *testing.T, spec *jobspec.Jobspec) map[string]interface{} {
+	t.Helper()
+	sys, ok := spec.Attributes["system"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("no system attributes: %#v", spec.Attributes)
+	}
+	cons, ok := sys["constraints"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("no constraints: %#v", sys)
+	}
+	return cons
+}
diff --git a/pkg/webhook/handler.go b/pkg/webhook/handler.go
index 82a1227..61b97b1 100644
--- a/pkg/webhook/handler.go
+++ b/pkg/webhook/handler.go
@@ -25,34 +25,32 @@ type MutatorAPI interface {
 	// InjectedEnv is the FLUXION_* env contract the scheduler/webhook supplies.
 	InjectedEnv() []corev1.EnvVar
 
-	// PodGroup operations (gang scheduling). Group identity is the value of the
-	// group label, which the core treats as an opaque string.
-	PodGroupLeader(ctx context.Context, namespace, group string) string
-	EnsurePodGroup(ctx context.Context, namespace, group, leaderPod string)
-	RecordLeader(ctx context.Context, namespace, group, leaderPod string)
-
-	// EnsureSidecarRBAC provisions the per-namespace ServiceAccount/Role/Binding
-	// the sidecar needs.
-	EnsureSidecarRBAC(ctx context.Context, namespace string)
-
-	// InterceptorOps stages the fluence package into the quantum container via an
-	// init container + shared volume on PYTHONPATH (Model C). SidecarContainerOps
-	// adds the sidecar container (observe=true => observe-only telemetry mode).
-	InterceptorOps(pod *corev1.Pod) []spec.Op
-	SidecarContainerOps(pod *corev1.Pod, observe bool) []spec.Op
+	// EnsurePodGroup creates the group's PodGroup with the given gang minCount if
+	// it does not already exist (idempotent). Group identity is the opaque value
+	// of the group label. creatorPod is recorded only as the PodGroup's creator
+	// reference; the core ascribes no role semantics to it.
+	EnsurePodGroup(ctx context.Context, namespace, group, creatorPod string, minCount int32)
 }
 
 // Handler inspects a pod and, when it applies, contributes JSON patch ops. A pod
 // flows through every registered handler whose Applies returns true; their ops
 // are concatenated. Applies is fully general — it receives the pod and the
-// MutatorAPI, so a handler may consult cluster state (e.g. resolve a group's
-// leader) in deciding whether it applies.
+// MutatorAPI, so a handler may consult cluster state in deciding whether it
+// applies.
 type Handler interface {
 	Name() string
 	Applies(ctx context.Context, m MutatorAPI, pod *corev1.Pod) bool
 	Mutate(ctx context.Context, m MutatorAPI, pod *corev1.Pod) []spec.Op
 }
 
+// DefaultHandlerOrder is the active set AND the dispatch order when the operator
+// passes no --handlers flag. Order matters: specific handlers run before the
+// generic gang fallback, so "gang" is LAST — it applies default gang sizing
+// (group-size annotation or owner-derived N) only if no earlier handler already
+// shaped the gang. To change the order or disable a handler, pass a different
+// list (e.g. --handlers=fluxion,gang drops quantum).
+var DefaultHandlerOrder = []string{"fluxion", "quantum", "gang"}
+
 // ── registration ────────────────────────────────────────────────────────────────
 //
 // Handlers self-register via Register() from their package's init(). The core
@@ -60,15 +58,57 @@ type Handler interface {
 // webhook server wiring) is what populates the registry. This keeps the core
 // domain-agnostic: adding or removing a handler does not touch core code.
 
-var registry []Handler
+// available maps a handler's Name() to the handler. Populated by Register() from
+// each handler package's init(). This is the set of handlers that EXIST; which
+// ones actually run, and in what order, is decided by activeOrder.
+var available = map[string]Handler{}
+
+// activeOrder is the ordered list of handler names to dispatch. It is BOTH the
+// selection (names not present are disabled) and the order (dispatch follows the
+// slice). Defaults to DefaultHandlerOrder; overridden by SetActiveHandlers.
+var activeOrder = append([]string(nil), DefaultHandlerOrder...)
 
-// Register adds a handler to the global set. Called from handler packages'
-// init(). Order of registration is the order handlers run.
+// Register adds a handler to the available set under its Name(). Called from
+// handler packages' init().
 func Register(h Handler) {
-	registry = append(registry, h)
+	available[h.Name()] = h
+}
+
+// SetActiveHandlers sets the active, ordered handler list (the --handlers value).
+// Empty/nil restores DefaultHandlerOrder. Names with no registered handler are
+// dropped and returned as `unknown` so the caller can warn. Order is preserved
+// exactly as given — the list is the dispatch order.
+func SetActiveHandlers(names []string) (active, unknown []string) {
+	if len(names) == 0 {
+		activeOrder = append([]string(nil), DefaultHandlerOrder...)
+		return activeOrder, nil
+	}
+	var ordered []string
+	for _, n := range names {
+		if _, ok := available[n]; ok {
+			ordered = append(ordered, n)
+		} else {
+			unknown = append(unknown, n)
+		}
+	}
+	activeOrder = ordered
+	return activeOrder, unknown
+}
+
+// ActiveHandlerNames returns the active dispatch order (for logging at startup).
+func ActiveHandlerNames() []string {
+	return append([]string(nil), activeOrder...)
 }
 
-// registered returns the registered handlers (the live registry).
+// registered returns the active handlers, resolved from activeOrder, in order.
+// Names in the order with no registered handler are skipped (already warned at
+// SetActiveHandlers time).
 func registered() []Handler {
-	return registry
+	out := make([]Handler, 0, len(activeOrder))
+	for _, n := range activeOrder {
+		if h, ok := available[n]; ok {
+			out = append(out, h)
+		}
+	}
+	return out
 }
diff --git a/pkg/webhook/handlers/dependency.go b/pkg/webhook/handlers/dependency.go
new file mode 100644
index 0000000..d25d598
--- /dev/null
+++ b/pkg/webhook/handlers/dependency.go
@@ -0,0 +1,131 @@
+package handlers
+
+import (
+	"github.com/converged-computing/fluence/pkg/webhook/spec"
+
+	corev1 "k8s.io/api/core/v1"
+)
+
+// Dependency is Fluence's GENERAL "this set of pods must wait for a producer to
+// be ready" primitive. It is deliberately NOT quantum-specific: quantum is the
+// first resource type to use it (a gang waits for a quantum submission to reach
+// the device queue), but the same primitive applies to any resource type whose
+// readiness is produced out-of-band — a license server, a data stage-in job, a
+// warmed cache, another gang, etc.
+//
+// A Dependency has three parts, each carried as a pod annotation so the
+// relationship lives at the GROUP level (not duplicated as bespoke per-resource
+// fields) and is readable by both the webhook (at admission) and the scheduler
+// (in its reconcile loop):
+//
+//   - Kind:     what KIND of readiness this is (the resource type's name). The
+//     producer side knows how to satisfy this kind; the consumer side
+//     only knows it must wait. Quantum's kind is "quantum-submit".
+//   - Producer: the identity of the thing that will signal ready. For quantum it
+//     is the submitter's (base) group; generally it is whatever the
+//     kind's handler records as the satisfier.
+//   - Gate:     the scheduling gate held on the dependent (consumer) pods until
+//     the producer signals ready. Removing the gate is the "ungate"
+//     and is performed by whatever observes the producer's readiness
+//     (the quantum sidecar for kind=quantum-submit; the scheduler's
+//     reconcile loop for kinds whose readiness is in-cluster, e.g.
+//     "another gang is Running").
+//
+// The webhook PRODUCES a Dependency (gates the consumers, stamps the
+// annotations); REMOVING the gate is owned by the observer best placed to see
+// the producer's readiness. That split — declare here, observe elsewhere — is
+// what keeps the primitive general: a new resource type adds a Kind and an
+// observer and reuses the gating/annotation machinery unchanged.
+type Dependency struct {
+	Kind     string // resource-type readiness kind, e.g. "quantum-submit"
+	Producer string // identity of the readiness producer (e.g. the base group)
+	Gate     string // scheduling gate held on dependents until ready
+}
+
+// Dependency annotation keys (stamped on the dependent pods). Generic — no
+// quantum in the names, so any resource type reuses them.
+const (
+	// DependsOnKindAnnotation names the readiness kind the dependent waits for.
+	DependsOnKindAnnotation = "fluence.flux-framework.org/depends-on-kind"
+	// DependsOnProducerAnnotation names the producer expected to signal ready.
+	DependsOnProducerAnnotation = "fluence.flux-framework.org/depends-on-producer"
+	// DependsOnGateAnnotation records which scheduling gate encodes the wait, so
+	// an observer knows exactly which gate to remove when the producer is ready.
+	DependsOnGateAnnotation = "fluence.flux-framework.org/depends-on-gate"
+)
+
+// applyOps gates the dependent pod and stamps the dependency annotations so the
+// relationship is self-describing on the pod. It reuses the gate machinery
+// (gateWithName) verbatim — the gate is the universal "held until ready"
+// mechanism regardless of resource type — so a new Kind costs only its readiness
+// observer, not new gating code.
+func (d Dependency) applyOps(pod *corev1.Pod) []spec.Op {
+	ops := gateWithName(pod, d.Gate)
+	ops = append(ops, annotateOp(pod, DependsOnKindAnnotation, d.Kind)...)
+	ops = append(ops, annotateOp(pod, DependsOnProducerAnnotation, d.Producer)...)
+	ops = append(ops, annotateOp(pod, DependsOnGateAnnotation, d.Gate)...)
+	return ops
+}
+
+// DependencyOf reads a dependent pod's declared Dependency, or ok=false if it
+// carries none. The scheduler's reconcile loop and the sidecar use this to learn
+// what a gated pod is waiting for without hardcoding a kind.
+func DependencyOf(pod *corev1.Pod) (Dependency, bool) {
+	kind := spec.Annotation(pod, DependsOnKindAnnotation)
+	if kind == "" {
+		return Dependency{}, false
+	}
+	return Dependency{
+		Kind:     kind,
+		Producer: spec.Annotation(pod, DependsOnProducerAnnotation),
+		Gate:     spec.Annotation(pod, DependsOnGateAnnotation),
+	}, true
+}
+
+// annotateOp adds a single metadata annotation (creating the annotations map if
+// the pod has none). The key is JSON-Pointer-escaped so slashes are handled.
+func annotateOp(pod *corev1.Pod, key, value string) []spec.Op {
+	if value == "" {
+		return nil
+	}
+	if pod.Annotations == nil {
+		return []spec.Op{{
+			Op:    "add",
+			Path:  "/metadata/annotations",
+			Value: map[string]string{key: value},
+		}}
+	}
+	return []spec.Op{{
+		Op:    "add",
+		Path:  "/metadata/annotations/" + escapeJSONPointer(key),
+		Value: value,
+	}}
+}
+
+// gateWithName adds a named scheduling gate (idempotent) and raises priority for
+// the held pod, generalizing the quantum gating to ANY gate name so the
+// dependency primitive is not tied to the quantum gate.
+func gateWithName(pod *corev1.Pod, gateName string) []spec.Op {
+	for _, g := range pod.Spec.SchedulingGates {
+		if g.Name == gateName {
+			return nil
+		}
+	}
+	var ops []spec.Op
+	gate := corev1.PodSchedulingGate{Name: gateName}
+	if len(pod.Spec.SchedulingGates) == 0 {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGates", Value: []corev1.PodSchedulingGate{gate}})
+	} else {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGates/-", Value: gate})
+	}
+	// Gated dependents schedule reliably once ungated only if they outrank other
+	// pending work; priorityClassName is immutable post-creation so it must be
+	// set now. Don't override a user's explicit class. spec.priority is cleared
+	// to null so the priority admission controller recomputes it from the class
+	// (add-null is valid whether the field is absent, 0, or set).
+	if pod.Spec.PriorityClassName == "" {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/priorityClassName", Value: QuantumClassicalPriorityClass})
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/priority", EmitNull: true})
+	}
+	return ops
+}
diff --git a/pkg/webhook/handlers/gang.go b/pkg/webhook/handlers/gang.go
index a6c6126..0469c11 100644
--- a/pkg/webhook/handlers/gang.go
+++ b/pkg/webhook/handlers/gang.go
@@ -2,11 +2,14 @@ package handlers
 
 import (
 	"context"
+	"log"
+	"strconv"
 
 	"github.com/converged-computing/fluence/pkg/webhook"
 	"github.com/converged-computing/fluence/pkg/webhook/spec"
 
 	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
 func init() {
@@ -14,7 +17,7 @@ func init() {
 }
 
 // gangHandler gang-schedules pods that carry the group label: it creates a
-// Fluence-owned PodGroup (first pod admitted becomes the recorded leader) and
+// Fluence-owned PodGroup and
 // links every pod to it via spec.schedulingGroup.podGroupName, which is the
 // field the scheduler gangs by. It knows nothing about quantum — a purely
 // classical gang is fully handled here, with no sidecar.
@@ -28,15 +31,76 @@ func (h *gangHandler) Applies(ctx context.Context, m webhook.MutatorAPI, pod *co
 
 func (h *gangHandler) Mutate(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) []spec.Op {
 	g := webhook.GroupName(pod)
-	// First pod admitted in the group creates the PodGroup and is recorded as
-	// the admission-order leader. All pods are linked to the group.
-	if m.PodGroupLeader(ctx, pod.Namespace, g) == "" {
-		m.EnsurePodGroup(ctx, pod.Namespace, g, pod.Name)
-		m.RecordLeader(ctx, pod.Namespace, g, pod.Name)
-	}
+	// Ensure the group's PodGroup exists with the resolved gang size, and link
+	// this pod to it. EnsurePodGroup is idempotent (no-ops if the PodGroup
+	// already exists — e.g. created by an earlier, more specific handler), so we
+	// call it unconditionally. The gang handler knows nothing about quantum or
+	// submitters; that is the quantum handler's concern.
+	// minCount = full gang size N (group-size annotation, else owner-derived);
+	// see resolveMinCount.
+	m.EnsurePodGroup(ctx, pod.Namespace, g, pod.Name, resolveMinCount(ctx, m, pod))
 	return schedulingGroupOps(pod, g)
 }
 
+// resolveMinCount determines the gang's atomic-schedule size N:
+//  1. explicit group-size annotation -> honor it verbatim. This is the override
+//     for when minCount must differ from the parent's replica count (e.g. the
+//     quantum leader/worker split, where the gang's N is expressed directly).
+//  2. otherwise derive from the OWNING object: a Flux Operator MiniCluster pod
+//     is owned by an indexed Job whose parallelism == completions == size == N.
+//     (The operator sets Parallelism = Completions = MiniCluster.Spec.Size.)
+//  3. otherwise default to 1, logged — never silently size a multi-pod gang to 1.
+//
+// The leader/worker (quantum) split is orthogonal and unchanged: it is driven by
+// QuantumResource in the quantum handler. minCount is always the
+// FULL gang N regardless of which pods get gated.
+func resolveMinCount(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) int32 {
+	// 1. explicit override
+	if pod.Annotations != nil {
+		if n := pod.Annotations[webhook.GroupSizeAnnotation]; n != "" {
+			if v, err := strconv.Atoi(n); err == nil && v > 0 {
+				return int32(v)
+			}
+		}
+	}
+	// 2. derive from the owning Job's parallelism
+	if n := ownerJobN(ctx, m, pod); n > 0 {
+		return n
+	}
+	// 3. no signal: a single-pod gang. Log so a missing size on a multi-pod
+	// workload is visible rather than a silent gang-of-1.
+	log.Printf("[fluence-webhook] group %s: no group-size annotation and no owning Job parallelism; defaulting minCount=1", webhook.GroupName(pod))
+	return 1
+}
+
+// ownerJobN returns the parallelism (== size N) of the indexed Job that owns the
+// pod, or 0 if there is no such owner. The Flux Operator sets a MiniCluster's
+// Job Parallelism == Completions == size, so this is the full gang size N.
+// Shared by the gang handler (classical: minCount = N) and the quantum handler
+// (split: leader group = 1, worker group = N-1).
+func ownerJobN(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) int32 {
+	c := m.Client()
+	if c == nil {
+		return 0
+	}
+	for _, ref := range pod.OwnerReferences {
+		if ref.Kind != "Job" {
+			continue
+		}
+		job, err := c.BatchV1().Jobs(pod.Namespace).Get(ctx, ref.Name, metav1.GetOptions{})
+		if err != nil {
+			return 0
+		}
+		if job.Spec.Parallelism != nil && *job.Spec.Parallelism > 0 {
+			return *job.Spec.Parallelism
+		}
+		if job.Spec.Completions != nil && *job.Spec.Completions > 0 {
+			return *job.Spec.Completions
+		}
+	}
+	return 0
+}
+
 // schedulingGroupOps links a pod to its PodGroup via the native 1.36 field
 // spec.schedulingGroup.podGroupName. Idempotent if already linked.
 func schedulingGroupOps(pod *corev1.Pod, group string) []spec.Op {
diff --git a/pkg/webhook/handlers/gang_test.go b/pkg/webhook/handlers/gang_test.go
new file mode 100644
index 0000000..ac027f8
--- /dev/null
+++ b/pkg/webhook/handlers/gang_test.go
@@ -0,0 +1,153 @@
+/*
+Copyright 2024 Lawrence Livermore National Security, LLC
+ (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+SPDX-License-Identifier: Apache-2.0
+*/
+
+// Tests for gang PodGroup minCount: the whole gang (full N) must schedule
+// atomically. Regression guard for the bug where every PodGroup was created
+// with minCount=1, so a multi-pod gang was "satisfied" by a single pod and the
+// rest were stranded (partial placement).
+package handlers
+
+import (
+	"context"
+	"testing"
+
+	"strconv"
+
+	"github.com/converged-computing/fluence/pkg/webhook"
+
+	corev1 "k8s.io/api/core/v1"
+
+	batchv1 "k8s.io/api/batch/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+// minCountOf runs the gang handler for the leader pod of a group and returns the
+// minCount of the PodGroup the webhook created.
+func minCountOf(t *testing.T, pod *corev1.Pod) int32 {
+	t.Helper()
+	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset()}
+	m.Mutate(context.Background(), pod)
+	pg, err := m.Clientset.SchedulingV1alpha2().
+		PodGroups(pod.Namespace).Get(context.Background(), webhook.GroupName(pod), metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("PodGroup not created: %v", err)
+	}
+	if pg.Spec.SchedulingPolicy.Gang == nil {
+		t.Fatal("PodGroup has no gang scheduling policy")
+	}
+	return pg.Spec.SchedulingPolicy.Gang.MinCount
+}
+
+// minCountWithClient runs the gang handler with a pre-seeded clientset (so the
+// owning Job exists) and returns the created PodGroup's minCount.
+func minCountWithClient(t *testing.T, pod *corev1.Pod, objs ...interface{}) int32 {
+	t.Helper()
+	cs := fake.NewSimpleClientset(toRuntime(objs)...)
+	m := &webhook.Mutator{Clientset: cs}
+	m.Mutate(context.Background(), pod)
+	pg, err := cs.SchedulingV1alpha2().PodGroups(pod.Namespace).
+		Get(context.Background(), webhook.GroupName(pod), metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("PodGroup not created: %v", err)
+	}
+	return pg.Spec.SchedulingPolicy.Gang.MinCount
+}
+
+func jobWithParallelism(ns, name string, n int32) *batchv1.Job {
+	return &batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &n, Completions: &n},
+	}
+}
+
+func ownedBy(pod *corev1.Pod, kind, name string) {
+	pod.OwnerReferences = append(pod.OwnerReferences,
+		metav1.OwnerReference{Kind: kind, Name: name})
+}
+
+// No annotation, but the pod is owned by an indexed Job with parallelism N
+// (the Flux Operator MiniCluster case: Parallelism == Completions == size == N).
+// minCount must come from the Job.
+func TestGangMinCountDerivedFromOwningJob(t *testing.T) {
+	pod := cpuPod("fluence")
+	pod.Namespace = "default"
+	pod.Labels = map[string]string{webhook.GroupLabel: "mc-gang"}
+	ownedBy(pod, "Job", "mc-gang-job")
+	got := minCountWithClient(t, pod, jobWithParallelism("default", "mc-gang-job", 4))
+	if got != 4 {
+		t.Errorf("owner-derived: minCount=%d, want 4 (from Job parallelism)", got)
+	}
+}
+
+// The explicit annotation OVERRIDES the owning Job's parallelism (the override
+// exists precisely because minCount may differ from the parent replica count).
+func TestGangMinCountAnnotationOverridesOwner(t *testing.T) {
+	pod := cpuPod("fluence")
+	pod.Namespace = "default"
+	pod.Labels = map[string]string{webhook.GroupLabel: "ovr-gang"}
+	pod.Annotations = map[string]string{webhook.GroupSizeAnnotation: "2"}
+	ownedBy(pod, "Job", "ovr-gang-job")
+	got := minCountWithClient(t, pod, jobWithParallelism("default", "ovr-gang-job", 8))
+	if got != 2 {
+		t.Errorf("annotation override: minCount=%d, want 2 (annotation wins over Job=8)", got)
+	}
+}
+
+// A classical gang of size N must get minCount = N so the whole group schedules
+// atomically (this is the core multi-gang fix).
+func atoi32(s string) int32 { v, _ := strconv.Atoi(s); return int32(v) }
+
+func toRuntime(objs []interface{}) []runtime.Object {
+	out := make([]runtime.Object, 0, len(objs))
+	for _, o := range objs {
+		if ro, ok := o.(runtime.Object); ok {
+			out = append(out, ro)
+		}
+	}
+	return out
+}
+
+func TestGangMinCountEqualsGroupSize(t *testing.T) {
+	for _, n := range []string{"2", "4", "8"} {
+		pod := cpuPod("fluence")
+		pod.Namespace = "default"
+		pod.Labels = map[string]string{webhook.GroupLabel: "g-" + n}
+		pod.Annotations = map[string]string{webhook.GroupSizeAnnotation: n}
+		got := minCountOf(t, pod)
+		want := atoi32(n)
+		if got != want {
+			t.Errorf("group-size=%s: minCount=%d, want %d", n, got, want)
+		}
+	}
+}
+
+// No group-size annotation -> minCount falls back to 1 (single-pod gang).
+func TestGangMinCountDefaultsToOne(t *testing.T) {
+	pod := cpuPod("fluence")
+	pod.Namespace = "default"
+	pod.Labels = map[string]string{webhook.GroupLabel: "g-default"}
+	if got := minCountOf(t, pod); got != 1 {
+		t.Errorf("absent group-size: minCount=%d, want 1", got)
+	}
+}
+
+// group-size is the authoritative gang minCount: a workload that sets it to N
+// gets minCount=N (the whole gang schedules atomically), regardless of any owner
+// replica count. In the gang+submitter model the full workload IS the gang —
+// there is no N-1 worker split.
+func TestGangMinCountHonorsGroupSize(t *testing.T) {
+	pod := cpuPod("fluence")
+	pod.Namespace = "default"
+	pod.Labels = map[string]string{webhook.GroupLabel: "q-gang"}
+	pod.Annotations = map[string]string{
+		webhook.GroupSizeAnnotation: "4", // full gang size
+	}
+	if got := minCountOf(t, pod); got != 4 {
+		t.Errorf("group-size gang: minCount=%d, want 4 (full N)", got)
+	}
+}
diff --git a/pkg/webhook/handlers/handlers_test.go b/pkg/webhook/handlers/handlers_test.go
index 04d0e02..4931a8a 100644
--- a/pkg/webhook/handlers/handlers_test.go
+++ b/pkg/webhook/handlers/handlers_test.go
@@ -9,10 +9,7 @@ import (
 	"github.com/converged-computing/fluence/pkg/webhook/spec"
 
 	corev1 "k8s.io/api/core/v1"
-	schedulingv1alpha2 "k8s.io/api/scheduling/v1alpha2"
 	"k8s.io/apimachinery/pkg/api/resource"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/client-go/kubernetes/fake"
 )
 
 // ── fixtures ────────────────────────────────────────────────────────────────────
@@ -87,12 +84,12 @@ func hasSidecarOp(ops []spec.Op) bool {
 	for _, op := range ops {
 		switch v := op.Value.(type) {
 		case corev1.Container:
-			if v.Name == "fluence-sidecar" {
+			if v.Name == SidecarContainerName {
 				return true
 			}
 		case []corev1.Container:
 			for _, c := range v {
-				if c.Name == "fluence-sidecar" {
+				if c.Name == SidecarContainerName {
 					return true
 				}
 			}
@@ -127,238 +124,6 @@ func TestMutateSkipsNonFluxion(t *testing.T) {
 	}
 }
 
-// ── quantum handler: submitter ──────────────────────────────────────────────────
-
-func TestSingleQuantumGetsInterceptorNoSidecar(t *testing.T) {
-	m := &webhook.Mutator{AttributeKeys: []string{"region"}}
-	ops := m.Mutate(context.Background(), qpuPod("fluence"))
-	names := opEnvNames(ops)
-	if !contains(names, "FLUXION_BACKEND") {
-		t.Errorf("want FLUXION_BACKEND, got %v", names)
-	}
-	if !contains(names, "PYTHONPATH") || !contains(names, "FLUENCE_POD_UID") {
-		t.Errorf("want interceptor env (PYTHONPATH, FLUENCE_POD_UID), got %v", names)
-	}
-	if hasSidecarOp(ops) {
-		t.Error("standalone quantum pod should not get a sidecar")
-	}
-	if hasGateOp(ops) {
-		t.Error("standalone quantum pod should not be gated")
-	}
-}
-
-func TestObserveLabelInjectsSidecar(t *testing.T) {
-	m := &webhook.Mutator{}
-	pod := qpuPod("fluence")
-	pod.Labels = map[string]string{ObserveLabel: "true"}
-	ops := m.Mutate(context.Background(), pod)
-	if !hasSidecarOp(ops) {
-		t.Error("observe-labeled quantum pod should get the sidecar")
-	}
-	if hasGateOp(ops) {
-		t.Error("observe-only pod should not be gated")
-	}
-}
-
-// ── quantum handler: worker gating ──────────────────────────────────────────────
-
-func quantumGroupFixture(ns, group, leaderName string) *fake.Clientset {
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: group, Namespace: ns,
-			Annotations: map[string]string{webhook.LeaderAnnotation: leaderName},
-		},
-	}
-	leaderPod := qpuPod("fluence")
-	leaderPod.Name = leaderName
-	leaderPod.Namespace = ns
-	leaderPod.Labels = map[string]string{webhook.GroupLabel: group}
-	return fake.NewSimpleClientset(pg, leaderPod)
-}
-
-func TestClassicalWorkerInQuantumGroupIsGated(t *testing.T) {
-	ns, group, leader := "default", "qaoa", "qaoa-leader"
-	m := &webhook.Mutator{Clientset: quantumGroupFixture(ns, group, leader)}
-
-	worker := cpuPod("fluence")
-	worker.Name = "qaoa-worker-0"
-	worker.Namespace = ns
-	worker.Labels = map[string]string{webhook.GroupLabel: group}
-
-	ops := m.Mutate(context.Background(), worker)
-	if !hasGateOp(ops) {
-		t.Errorf("classical worker in a quantum group should be gated; ops=%v", ops)
-	}
-	if hasSidecarOp(ops) {
-		t.Error("worker should not get a sidecar")
-	}
-}
-
-func TestClassicalGangWorkerNotGated(t *testing.T) {
-	ns, group, leader := "default", "classical", "classical-leader"
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns,
-			Annotations: map[string]string{webhook.LeaderAnnotation: leader}},
-	}
-	leaderPod := cpuPod("fluence")
-	leaderPod.Name = leader
-	leaderPod.Namespace = ns
-	leaderPod.Labels = map[string]string{webhook.GroupLabel: group}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg, leaderPod)}
-
-	worker := cpuPod("fluence")
-	worker.Name = "classical-worker-0"
-	worker.Namespace = ns
-	worker.Labels = map[string]string{webhook.GroupLabel: group}
-
-	if hasGateOp(m.Mutate(context.Background(), worker)) {
-		t.Error("worker in a classical gang must NOT be gated (would deadlock)")
-	}
-}
-
-// Pod-template gang: every pod requests QPU; only the recorded leader gets the
-// sidecar, the rest are gated workers (role by admission order, not request).
-func TestPodTemplateGangSecondPodIsWorker(t *testing.T) {
-	ns, group, leader := "default", "qaoa", "qaoa-abc123"
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns,
-			Annotations: map[string]string{webhook.LeaderAnnotation: leader}},
-	}
-	leaderPod := qpuPod("fluence")
-	leaderPod.Name = leader
-	leaderPod.Namespace = ns
-	leaderPod.Labels = map[string]string{webhook.GroupLabel: group}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg, leaderPod)}
-
-	second := qpuPod("fluence") // identical spec, requests QPU
-	second.Name = "qaoa-def456"
-	second.Namespace = ns
-	second.Labels = map[string]string{webhook.GroupLabel: group}
-
-	ops := m.Mutate(context.Background(), second)
-	if !hasGateOp(ops) {
-		t.Error("second pod in a pod-template gang must be gated as a worker")
-	}
-	if hasSidecarOp(ops) {
-		t.Error("second pod must NOT get a sidecar (it is a worker)")
-	}
-}
-
-// ── quantum handler: explicit role annotation ──────────────────────────────────
-//
-// These cover the fluence.flux-framework.org/role annotation, which makes the
-// leader/worker split EXPLICIT rather than inferred by admission order. When the
-// annotation is present it is authoritative; the same value is echoed to the
-// container as FLUENCE_ROLE so the app reads the role Fluence used.
-
-// roledQPUPod is a QPU-requesting pod in a group with an explicit role.
-func roledQPUPod(ns, group, name, role string) *corev1.Pod {
-	p := qpuPod("fluence")
-	p.Name = name
-	p.Namespace = ns
-	p.Labels = map[string]string{webhook.GroupLabel: group}
-	p.Annotations = map[string]string{webhook.RoleAnnotation: role}
-	return p
-}
-
-// An explicitly-declared leader gets the sidecar and is NOT gated — even though
-// no leader is recorded on the PodGroup (admission order never consulted).
-func TestExplicitLeaderGetsSidecarNotGated(t *testing.T) {
-	ns, group := "default", "qaoa"
-	// fixture with NO LeaderAnnotation recorded — proves we don't rely on it.
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns},
-	}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg)}
-
-	leader := roledQPUPod(ns, group, "qaoa-leader", RoleLeader)
-	ops := m.Mutate(context.Background(), leader)
-	if hasGateOp(ops) {
-		t.Error("explicit leader must NOT be gated")
-	}
-	if !hasSidecarOp(ops) {
-		t.Error("explicit leader must get the sidecar")
-	}
-	if !contains(opEnvNames(ops), "FLUENCE_ROLE") {
-		t.Error("leader must get FLUENCE_ROLE injected for the app to read")
-	}
-}
-
-// An explicitly-declared worker is gated and gets no sidecar — even if it
-// requests the QPU resource itself and even if it (wrongly) appears as the
-// recorded leader. The annotation overrides both.
-func TestExplicitWorkerIsGatedRegardlessOfAdmission(t *testing.T) {
-	ns, group := "default", "qaoa"
-	// Adversarial fixture: record THIS worker's own name as the admission-order
-	// leader. The explicit role:worker must still win and gate it.
-	worker := roledQPUPod(ns, group, "qaoa-worker-0", RoleWorker)
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns,
-			Annotations: map[string]string{webhook.LeaderAnnotation: worker.Name}},
-	}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg)}
-
-	ops := m.Mutate(context.Background(), worker)
-	if !hasGateOp(ops) {
-		t.Error("explicit worker must be gated even if mis-recorded as the admission-order leader")
-	}
-	if hasSidecarOp(ops) {
-		t.Error("explicit worker must NOT get a sidecar")
-	}
-	if !contains(opEnvNames(ops), "FLUENCE_ROLE") {
-		t.Error("worker must get FLUENCE_ROLE injected so the app knows it is a worker")
-	}
-}
-
-// A heterogeneous gang declared with explicit roles resolves to exactly one
-// leader (sidecar, ungated) and the rest workers (gated) — independent of the
-// order in which the webhook admits the pods. This is the property a
-// leader/worker quantum gang needs and that admission order cannot guarantee.
-func TestExplicitRolesResolveRegardlessOfOrder(t *testing.T) {
-	ns, group := "default", "qaoa"
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns}, // no recorded leader
-	}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg)}
-
-	pods := []*corev1.Pod{
-		roledQPUPod(ns, group, "w0", RoleWorker),
-		roledQPUPod(ns, group, "leader", RoleLeader),
-		roledQPUPod(ns, group, "w1", RoleWorker),
-	}
-	var leaders, workers int
-	for _, p := range pods { // any admission order
-		ops := m.Mutate(context.Background(), p)
-		switch {
-		case hasSidecarOp(ops) && !hasGateOp(ops):
-			leaders++
-		case hasGateOp(ops) && !hasSidecarOp(ops):
-			workers++
-		default:
-			t.Fatalf("pod %s resolved to neither a clean leader nor worker", p.Name)
-		}
-	}
-	if leaders != 1 || workers != 2 {
-		t.Fatalf("want 1 leader + 2 workers, got %d leaders / %d workers", leaders, workers)
-	}
-}
-
-// Backwards compatibility: with NO role annotation, the leader is still chosen
-// by admission order (the recorded PodGroup leader), exactly as before.
-func TestNoRoleAnnotationFallsBackToAdmissionOrder(t *testing.T) {
-	ns, group, leader := "default", "qaoa", "qaoa-leader"
-	m := &webhook.Mutator{Clientset: quantumGroupFixture(ns, group, leader)}
-
-	// a second pod with no role annotation, not the recorded leader -> worker
-	second := qpuPod("fluence")
-	second.Name = "qaoa-second"
-	second.Namespace = ns
-	second.Labels = map[string]string{webhook.GroupLabel: group}
-	if !hasGateOp(m.Mutate(context.Background(), second)) {
-		t.Error("without a role annotation, a non-leader group member must be gated by admission order")
-	}
-}
-
 // ── gang handler: scheduling group linkage ──────────────────────────────────────
 
 func TestGangStampsSchedulingGroup(t *testing.T) {
diff --git a/pkg/webhook/handlers/quantum.go b/pkg/webhook/handlers/quantum.go
index 97fbfa6..47e1714 100644
--- a/pkg/webhook/handlers/quantum.go
+++ b/pkg/webhook/handlers/quantum.go
@@ -4,11 +4,16 @@ import (
 	"context"
 	"fmt"
 	"log"
+	"os"
+	"strconv"
+	"strings"
 
 	"github.com/converged-computing/fluence/pkg/webhook"
 	"github.com/converged-computing/fluence/pkg/webhook/spec"
 
 	corev1 "k8s.io/api/core/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
@@ -16,188 +21,560 @@ func init() {
 	webhook.Register(&quantumHandler{})
 }
 
-// Quantum-specific policy. The webhook core knows NONE of these — they live
-// only here, in the quantum handler.
+// Quantum-specific policy. The webhook core knows NONE of these — they live only
+// here, in the quantum handler.
+//
+// Model (no leader/worker): a workload requesting the quantum resource (Job,
+// Deployment, or loose pods — the trigger is the resource, not the kind) becomes
+// a GANG of full size N: one PodGroup, every pod fully gated and raised to a
+// preempting priority, each staged with the interceptor in FAUX mode (the submit
+// is a no-op). Fluence ALSO creates a separate one-off SUBMITTER pod — a
+// group-of-one running the SAME application container plus the real sidecar —
+// which submits the quantum task for real, tags it, stamps the resulting job-id
+// onto the gang, and ungates the gang. There is no leader among the user's pods;
+// the submitter is the only submitting pod and Fluence owns it.
 const (
-	// QuantumResource is the Fluxion resource a pod requests when it wants
-	// Fluence to schedule quantum work. Requesting it is the trigger for sidecar
-	// + interceptor injection.
+	// QuantumResource is the Fluxion resource a pod requests to ask Fluence to
+	// schedule quantum work. Requesting it is the sole trigger for this handler.
 	QuantumResource = "fluxion.flux-framework.org/qpu"
 
-	// QuantumGate holds a classical worker until the leader's quantum task is
-	// ready (the sidecar removes it).
+	// QuantumGate holds a gang pod unscheduled until the submitter's task is
+	// ready (the submitter's sidecar removes it).
 	QuantumGate = "quantum.braket/ready"
 
-	// ObserveLabel opts a standalone quantum pod into observe-only telemetry:
-	// the sidecar is injected and polls queue position but ungates nothing.
+	// ObserveLabel opts a STANDALONE quantum pod (a group of one) into
+	// observe-only telemetry: the sidecar is injected and polls queue position
+	// but ungates nothing.
 	ObserveLabel = "fluence.flux-framework.org/observe"
 
-	// Role values for webhook.RoleAnnotation.
-	RoleLeader = "leader"
-	RoleWorker = "worker"
+	// DependencyKindQuantumSubmit is the readiness Kind for the quantum resource
+	// type: gang pods wait for a quantum submission to reach the device queue.
+	// First concrete instance of the general Dependency primitive (dependency.go).
+	DependencyKindQuantumSubmit = "quantum-submit"
+
+	// SubmitterAnnotation marks the Fluence-created submitter pod so its own
+	// admission is recognized (real sidecar, real submit, not gated) instead of
+	// being treated as another gang member.
+	SubmitterAnnotation = "fluence.flux-framework.org/submitter"
+
+	// GangGroupAnnotation, set on the submitter at creation, names the gang group
+	// the submitter must ungate. Surfaced to its sidecar as FLUENCE_GANG_GROUP.
+	GangGroupAnnotation = "fluence.flux-framework.org/gang-group"
+
+	// SubmitterGroupSuffix: the submitter is its own group-of-one named
+	// <group>-submitter (a distinct PodGroup, minCount 1, so it schedules alone
+	// and never deadlocks against the gated gang).
+	SubmitterGroupSuffix = "-submitter"
+
+	// GangGroupEnv tells the submitter's sidecar which gang group label to list
+	// and ungate when the task is ready.
+	GangGroupEnv = "FLUENCE_GANG_GROUP"
 )
 
-// quantumHandler coordinates quantum-classical workflows. It applies to a pod
-// in either role:
-//   - the quantum submitter (requests QuantumResource): inject the interceptor,
-//     plus the sidecar when there is coordination to do (group leader, or
-//     observe-only telemetry requested);
-//   - a classical worker (a non-leader member of a group whose leader is a
-//     quantum pod): gate it until the leader's task is ready.
-//
-// This is the only place in the webhook that knows about quantum resources,
-// gates, or observe semantics.
+// quantumHandler creates, for a quantum workload, a fully-gated faux-submitting
+// gang plus a one-off real submitter (see the package-level model comment). It
+// is the only place in the webhook that knows about quantum resources, gates,
+// submitters, or observe semantics.
 type quantumHandler struct{}
 
 func (h *quantumHandler) Name() string { return "quantum" }
 
+// Applies to any pod requesting the quantum resource. Gang members run the same
+// image as the submitter and request it; the submitter (a copy) requests it; a
+// standalone quantum pod requests it. Nothing without the resource needs quantum
+// handling, so this is the single, unambiguous trigger.
 func (h *quantumHandler) Applies(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) bool {
-	if spec.PodRequestsResource(pod, QuantumResource) {
-		return true
+	return spec.PodRequestsResource(pod, QuantumResource)
+}
+
+func (h *quantumHandler) Mutate(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) []spec.Op {
+	// The Fluence-created submitter: real interceptor + real sidecar, its own
+	// group-of-one, NOT gated. Recognized by the marker set at creation.
+	if spec.Annotation(pod, SubmitterAnnotation) == "true" {
+		return h.mutateSubmitter(ctx, m, pod)
+	}
+
+	g := resolveGroup(pod)
+	observe := spec.Label(pod, ObserveLabel) == "true"
+	n := resolveGangSize(ctx, m, pod, g)
+
+	// Standalone quantum pod (a group of one): it performs its own real submit.
+	// No gang, no gating, no faux, no separate submitter. The sidecar is added
+	// only for observe-only telemetry.
+	if g == "" || n <= 1 {
+		ops := interceptorOps(pod)
+		if observe {
+			sc := sidecarFor(m)
+			sc.EnsureRBAC(ctx, pod.Namespace)
+			ops = append(ops, sc.ContainerOps(pod, true, nil)...)
+		}
+		log.Printf("[fluence-webhook] quantum standalone %s/%s (observe=%v)", pod.Namespace, pod.Name, observe)
+		return ops
 	}
-	// An explicitly-declared worker applies (so it gets gated) even if it
-	// doesn't request the quantum resource and the leader isn't recorded yet —
-	// this removes the admission-order race for explicitly-roled gangs.
-	if webhook.Role(pod) == RoleWorker && webhook.GroupName(pod) != "" {
-		return true
+
+	// Gang member: full gang of N in one PodGroup, fully gated + preempting
+	// priority + faux interceptor. Fluence also ensures the one-off submitter
+	// (idempotent) that does the real submit and ungates this gang.
+	m.EnsurePodGroup(ctx, pod.Namespace, g, pod.Name, n)
+	ensureSubmitterPod(ctx, m, pod, g)
+
+	ops := linkGroupOps(pod, g)
+	// Express the wait as the GENERAL dependency primitive: this gang pod depends
+	// on the quantum submission produced by <group>-submitter, held by the quantum
+	// gate. applyOps gates the pod, raises priority, and stamps depends-on-*.
+	dep := Dependency{Kind: DependencyKindQuantumSubmit, Producer: g + SubmitterGroupSuffix, Gate: QuantumGate}
+	ops = append(ops, dep.applyOps(pod)...)
+	// Same interceptor as the submitter, but FAUX mode so the gang pod never
+	// resubmits; it receives the real task id via FLUENCE_QUANTUM_JOB_ID.
+	ops = append(ops, interceptorOps(pod)...)
+	ops = append(ops, fauxSubmitEnvOps(pod)...)
+	log.Printf("[fluence-webhook] quantum gang member %s/%s — group %s minCount=%d, gated+faux",
+		pod.Namespace, pod.Name, g, n)
+	return ops
+}
+
+// mutateSubmitter wires the Fluence-created submitter pod: its own PodGroup of
+// one, the real interceptor (tag mode), RBAC, and the sidecar container told
+// which gang group to ungate (FLUENCE_GANG_GROUP). The submitter is never gated.
+func (h *quantumHandler) mutateSubmitter(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) []spec.Op {
+	sg := webhook.GroupName(pod) // the submitter's own group: <gang>-submitter
+	gang := spec.Annotation(pod, GangGroupAnnotation)
+	if sg != "" {
+		m.EnsurePodGroup(ctx, pod.Namespace, sg, pod.Name, 1)
 	}
-	return h.isWorkerOfQuantumGroup(ctx, m, pod)
+	sc := sidecarFor(m)
+	ops := sc.InterceptorOps(pod)
+	sc.EnsureRBAC(ctx, pod.Namespace)
+	extra := []corev1.EnvVar{{Name: GangGroupEnv, Value: gang}}
+	ops = append(ops, sc.ContainerOps(pod, false, extra)...)
+	log.Printf("[fluence-webhook] quantum submitter %s/%s — group %s (ungates gang %q)",
+		pod.Namespace, pod.Name, sg, gang)
+	return ops
 }
 
-// isWorkerOfQuantumGroup reports whether pod is a non-leader member of a group
-// whose recorded leader is a quantum (QuantumResource-requesting) pod. Workers
-// are classical and do not request the resource themselves, so their role is a
-// property of group membership, resolved against cluster state.
-func (h *quantumHandler) isWorkerOfQuantumGroup(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) bool {
-	g := webhook.GroupName(pod)
-	if g == "" || m.Client() == nil {
-		return false
+// resolveGroup returns the gang group identity: the explicit group label, else
+// the owning controller's name (Job/ReplicaSet/StatefulSet — a Deployment's pods
+// are owned by a ReplicaSet), else "" (a loose quantum pod with no group, which
+// is treated as a standalone group of one).
+func resolveGroup(pod *corev1.Pod) string {
+	if g := webhook.GroupName(pod); g != "" {
+		return g
+	}
+	for _, ref := range pod.OwnerReferences {
+		switch ref.Kind {
+		case "Job", "ReplicaSet", "StatefulSet":
+			return ref.Name
+		}
 	}
-	leader := m.PodGroupLeader(ctx, pod.Namespace, g)
-	if leader == "" || leader == pod.Name {
-		return false
+	return ""
+}
+
+// resolveGangSize returns the full gang size N: the explicit group-size
+// annotation (authoritative override), else the owner's replica count (Job
+// parallelism/completions, ReplicaSet replicas), else a count of pods already
+// carrying the group label (best-effort for loose grouped pods; admission-order
+// dependent, which is why the annotation is preferred), else 1.
+func resolveGangSize(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod, group string) int32 {
+	if pod.Annotations != nil {
+		if v, err := strconv.Atoi(pod.Annotations[webhook.GroupSizeAnnotation]); err == nil && v > 0 {
+			return int32(v)
+		}
+	}
+	if n := ownerJobN(ctx, m, pod); n > 0 {
+		return n
+	}
+	if n := ownerReplicaSetN(ctx, m, pod); n > 0 {
+		return n
+	}
+	if group != "" {
+		if n := countGroupPods(ctx, m, pod.Namespace, group); n > 0 {
+			return n
+		}
 	}
-	lp, err := m.Client().CoreV1().Pods(pod.Namespace).Get(ctx, leader, metav1.GetOptions{})
+	return 1
+}
+
+// ownerReplicaSetN returns the replica count of the ReplicaSet that owns the pod
+// (the Deployment case: Deployment -> ReplicaSet -> Pod), or 0 if none.
+func ownerReplicaSetN(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) int32 {
+	c := m.Client()
+	if c == nil {
+		return 0
+	}
+	for _, ref := range pod.OwnerReferences {
+		if ref.Kind != "ReplicaSet" {
+			continue
+		}
+		rs, err := c.AppsV1().ReplicaSets(pod.Namespace).Get(ctx, ref.Name, metav1.GetOptions{})
+		if err != nil {
+			return 0
+		}
+		if rs.Spec.Replicas != nil && *rs.Spec.Replicas > 0 {
+			return *rs.Spec.Replicas
+		}
+	}
+	return 0
+}
+
+// countGroupPods counts pods already carrying the group label (best-effort gang
+// size for loose grouped pods that have neither a group-size annotation nor an
+// owning controller). Admission-order dependent — prefer the group-size
+// annotation when the exact size must be guaranteed.
+func countGroupPods(ctx context.Context, m webhook.MutatorAPI, namespace, group string) int32 {
+	c := m.Client()
+	if c == nil {
+		return 0
+	}
+	list, err := c.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
+		LabelSelector: webhook.GroupLabel + "=" + group,
+	})
 	if err != nil {
-		return false
+		return 0
 	}
-	return spec.PodRequestsResource(lp, QuantumResource)
+	return int32(len(list.Items))
 }
 
-func (h *quantumHandler) Mutate(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) []spec.Op {
-	g := webhook.GroupName(pod)
-
-	// Determine role. An explicit role annotation is AUTHORITATIVE: the workload
-	// declares which pod leads and which wait, and Fluence honors it directly —
-	// no admission-order race, and the same value is echoed to the app as
-	// FLUENCE_ROLE so the webhook's notion of leader and the application's notion
-	// cannot disagree. When the annotation is absent, fall back to the legacy
-	// behavior: role is decided by admission order (the first pod admitted in the
-	// group, recorded on the PodGroup by the gang handler). The admission-order
-	// path suits a homogeneous pod-template gang where every pod is identical;
-	// the explicit annotation suits a heterogeneous leader/worker gang.
-	role := webhook.Role(pod)
-	var isWorker bool
-	switch role {
-	case RoleWorker:
-		isWorker = true
-	case RoleLeader:
-		isWorker = false
-	default:
-		if g != "" {
-			leader := m.PodGroupLeader(ctx, pod.Namespace, g)
-			isWorker = leader != "" && leader != pod.Name
-		}
-	}
-
-	if g != "" && isWorker {
-		log.Printf("[fluence-webhook] quantum worker %s/%s (role=%q) — gating",
-			pod.Namespace, pod.Name, role)
-		ops := gateOps(pod)
-		ops = append(ops, roleEnvOps(pod, RoleWorker)...)
-		return ops
+// SubmitterPodSuffix names the Fluence-created submitter for a group:
+// <group>-submitter. It also serves as the submitter's own PodGroup name.
+const SubmitterPodSuffix = SubmitterGroupSuffix
+
+// ensureSubmitterPod creates the one-off quantum submitter pod for a group
+// (idempotent create-if-absent — a client side-effect of admission, like
+// EnsurePodGroup/EnsureSidecarRBAC; NOT a separate controller). It is built from
+// the admitted gang pod so it runs the SAME application + credentials, is its own
+// group-of-one (<group>-submitter), is marked the submitter (so its admission
+// gets the real sidecar and is not gated), and records the gang group it must
+// ungate. An ownerReference to the gang's PodGroup cascades GC: when the gang
+// PodGroup is deleted (gang completed/deleted), the submitter is collected too.
+func ensureSubmitterPod(ctx context.Context, m webhook.MutatorAPI, gangPod *corev1.Pod, group string) {
+	c := m.Client()
+	if c == nil {
+		return
+	}
+	name := group + SubmitterGroupSuffix
+	if _, err := c.CoreV1().Pods(gangPod.Namespace).Get(ctx, name, metav1.GetOptions{}); err == nil {
+		return // already created (idempotent)
 	}
+	// Clean copy of the user's application: same containers (image, env, creds,
+	// the quantum resource request) and app volumes — none of the gang's gating
+	// or faux wiring.
+	src := gangPod.DeepCopy()
+	submitter := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: gangPod.Namespace,
+			Labels:    map[string]string{webhook.GroupLabel: name},
+			Annotations: map[string]string{
+				SubmitterAnnotation: "true",
+				GangGroupAnnotation: group,
+			},
+		},
+		Spec: corev1.PodSpec{
+			SchedulerName: webhook.SchedulerName,
+			RestartPolicy: corev1.RestartPolicyNever,
+			Containers:    src.Spec.Containers,
+			Volumes:       src.Spec.Volumes,
+		},
+	}
+	// Cascade GC: own the submitter by the gang's PodGroup (created moments ago by
+	// the caller). Best-effort — only set when the PodGroup UID is known (it is on
+	// a real cluster; the fake client in tests may leave it empty, in which case
+	// we skip the ref rather than emit an invalid one).
+	if pg, err := c.SchedulingV1alpha2().PodGroups(gangPod.Namespace).Get(ctx, group, metav1.GetOptions{}); err == nil && pg.UID != "" {
+		submitter.OwnerReferences = []metav1.OwnerReference{{
+			APIVersion: "scheduling.k8s.io/v1alpha2",
+			Kind:       "PodGroup",
+			Name:       group,
+			UID:        pg.UID,
+		}}
+	}
+	if _, err := c.CoreV1().Pods(gangPod.Namespace).Create(ctx, submitter, metav1.CreateOptions{}); err != nil {
+		log.Printf("[fluence-webhook] submitter pod %s/%s: %v", gangPod.Namespace, name, err)
+	} else {
+		log.Printf("[fluence-webhook] created submitter pod %s/%s for gang %s", gangPod.Namespace, name, group)
+	}
+}
 
-	// Submitter/leader role: recorded or declared group leader, or a standalone
-	// quantum pod. Always gets the interceptor (so its task is tagged). It gets
-	// the SIDECAR only when there is coordination to do: it is a group leader
-	// (workers to ungate), or observe-only telemetry is requested.
-	isLeader := g != ""
-	observe := spec.Label(pod, ObserveLabel) == "true"
+// linkGroupOps ensures the gang pod carries the group label (so the submitter's
+// sidecar can list it) and is linked to the gang PodGroup via
+// spec.schedulingGroup.podGroupName. Idempotent.
+func linkGroupOps(pod *corev1.Pod, group string) []spec.Op {
+	var ops []spec.Op
+	if webhook.GroupName(pod) != group {
+		if pod.Labels == nil {
+			ops = append(ops, spec.Op{Op: "add", Path: "/metadata/labels",
+				Value: map[string]string{webhook.GroupLabel: group}})
+		} else {
+			ops = append(ops, spec.Op{Op: "add",
+				Path:  "/metadata/labels/" + escapeJSONPointer(webhook.GroupLabel),
+				Value: group})
+		}
+	}
+	if pod.Spec.SchedulingGroup == nil || pod.Spec.SchedulingGroup.PodGroupName == nil ||
+		*pod.Spec.SchedulingGroup.PodGroupName != group {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGroup",
+			Value: map[string]string{"podGroupName": group}})
+	}
+	return ops
+}
 
-	log.Printf("[fluence-webhook] quantum pod %s/%s — interceptor (leader=%v role=%q observe=%v)",
-		pod.Namespace, pod.Name, isLeader, role, observe)
+// escapeJSONPointer escapes "~" and "/" for use in a JSON Pointer path segment.
+func escapeJSONPointer(s string) string {
+	s = strings.ReplaceAll(s, "~", "~0")
+	s = strings.ReplaceAll(s, "/", "~1")
+	return s
+}
 
-	ops := m.InterceptorOps(pod)
-	ops = append(ops, roleEnvOps(pod, RoleLeader)...)
-	if isLeader || observe {
-		m.EnsureSidecarRBAC(ctx, pod.Namespace)
-		ops = append(ops, m.SidecarContainerOps(pod, observe)...)
+const QuantumClassicalPriorityClass = "fluence-quantum-classical"
+
+// ── faux-submit (worker submit dedup) ───────────────────────────────────────────
+//
+// Quantum-specific, and delivered through the SAME Python interceptor as the
+// submitter — not a second mechanism. The submitter's interceptor tags the
+// submit; the worker's interceptor (same staged code) no-ops the submit. Which
+// behavior runs is selected at runtime by FLUENCE_FAUX_SUBMIT, set here on the
+// worker. Workers run the submitter's image and may call submit, but by ungate
+// time the task already exists, so resubmitting would duplicate it N times.
+
+const (
+	// FauxSubmitEnv selects the interceptor's no-op (faux) mode on workers.
+	// install_interceptor (see python/fluence/providers/braket.py) reads it and
+	// patches the vendor submit to return the existing task instead of submitting.
+	FauxSubmitEnv = "FLUENCE_FAUX_SUBMIT"
+
+	// QuantumJobIDAnnotation is the vendor-neutral task id the ungating sidecar
+	// stamps on each worker (mirrors python/fluence/ungate.py JOB_ID_ANNOTATION),
+	// BEFORE removing the gate. Surfaced into FLUENCE_QUANTUM_JOB_ID via the
+	// downward API so the faux interceptor can return a handle to that task.
+	QuantumJobIDAnnotation = "fluence.flux-framework.org/quantum-job-id"
+
+	// QuantumJobIDEnv is the env the faux interceptor reads for the existing
+	// task's id.
+	QuantumJobIDEnv = "FLUENCE_QUANTUM_JOB_ID"
+)
+
+// fauxSubmitEnvOps sets, on each non-sidecar worker container, the faux-mode
+// marker (FLUENCE_FAUX_SUBMIT=true) and the existing task's id
+// (FLUENCE_QUANTUM_JOB_ID, downward API from the annotation the ungating sidecar
+// stamps). The interceptor is staged separately via the shared sidecar
+// InterceptorOps path — these env vars only switch its mode and hand it the id.
+func fauxSubmitEnvOps(pod *corev1.Pod) []spec.Op {
+	faux := corev1.EnvVar{Name: FauxSubmitEnv, Value: "true"}
+	jobID := spec.AnnotationEnv(QuantumJobIDEnv, QuantumJobIDAnnotation)
+	var ops []spec.Op
+	for i, c := range pod.Spec.Containers {
+		if c.Name == SidecarContainerName {
+			continue
+		}
+		if !spec.HasEnv(c, FauxSubmitEnv) {
+			if len(c.Env) == 0 {
+				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env", i), Value: []corev1.EnvVar{faux}})
+				pod.Spec.Containers[i].Env = []corev1.EnvVar{faux}
+			} else {
+				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: faux})
+				pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, faux)
+			}
+		}
+		if !spec.HasEnv(c, QuantumJobIDEnv) {
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: jobID})
+			pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, jobID)
+		}
 	}
 	return ops
 }
 
-// roleEnvOps injects FLUENCE_ROLE into every (non-sidecar) container so the
-// application reads its gang role from the same source of truth the webhook
-// used. effectiveRole is what the webhook decided (leader/worker), used only
-// when the pod carries no explicit role annotation; when the annotation is
-// present we source the value from it via the downward API so the two always
-// agree. Unlike InterceptorOps, this is NOT limited to Fluxion-resource
-// containers — worker containers do not request the quantum resource but still
-// need to know they are workers.
-func roleEnvOps(pod *corev1.Pod, effectiveRole string) []spec.Op {
-	var value corev1.EnvVar
-	if webhook.Role(pod) != "" {
-		value = spec.AnnotationEnv("FLUENCE_ROLE", webhook.RoleAnnotation)
-	} else {
-		value = corev1.EnvVar{Name: "FLUENCE_ROLE", Value: effectiveRole}
+// Sidecar implementation — quantum-owned, NOT core.
+//
+// The fluence coordination sidecar (its container, name, RBAC, image, and the
+// Python interceptor staging) is specific to the quantum integration: it polls a
+// vendor queue and ungates workers. None of this belongs on the webhook core,
+// which stays domain-agnostic and only exposes generic primitives (Client,
+// InjectedEnv, EnsurePodGroup). The core invokes each handler's generic Mutate;
+// a handler does its own create/edit side-effects (here: RBAC, ConfigMaps,
+// container injection) through the generic client.
+//
+// These are package-level functions (not methods on the core *Mutator) operating
+// on the generic webhook.MutatorAPI. coreSidecar (see sidecar.go) delegates to
+// them; a future non-quantum handler that needs a different sidecar supplies its
+// own Sidecar implementation and its own container name/image.
+
+const (
+	// SidecarContainerName is the injected sidecar container's name. Owned here
+	// (not a global core const) because the container is quantum-specific.
+	SidecarContainerName = "fluence-sidecar"
+
+	// SidecarServiceAccount is the ServiceAccount (and Role/RoleBinding) name the
+	// sidecar uses to patch pods and read PodGroups.
+	SidecarServiceAccount = "fluence-sidecar"
+
+	// defaultSidecarImage is used when FLUENCE_SIDECAR_IMAGE is not set. Owned by
+	// the quantum integration; the deployment may override it via the env var.
+	defaultSidecarImage = "ghcr.io/converged-computing/fluence-sidecar:latest"
+
+	// StageVolumeName / StageMountPath: the shared emptyDir the init container
+	// stages the fluence Python package into, mounted into workload containers
+	// and prepended to PYTHONPATH (Model C delivery).
+	StageVolumeName = "fluence-pkg"
+	StageMountPath  = "/opt/fluence-staged"
+)
+
+// sidecarImage resolves the sidecar image: the FLUENCE_SIDECAR_IMAGE override
+// (deployment config) or the quantum default. Read here so image config is owned
+// by the integration that uses it, not the core.
+func sidecarImage() string {
+	if v := os.Getenv("FLUENCE_SIDECAR_IMAGE"); v != "" {
+		return v
+	}
+	return defaultSidecarImage
+}
+
+// ensureSidecarRBAC provisions the per-namespace ServiceAccount/Role/RoleBinding
+// the sidecar uses to patch pods and read PodGroups. Idempotent (create-if-absent).
+func ensureSidecarRBAC(ctx context.Context, m webhook.MutatorAPI, namespace string) {
+	c := m.Client()
+	if c == nil {
+		return
+	}
+	lbl := map[string]string{"app": SidecarServiceAccount}
+
+	if _, err := c.CoreV1().ServiceAccounts(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
+		sa := &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl}}
+		if _, err := c.CoreV1().ServiceAccounts(namespace).Create(ctx, sa, metav1.CreateOptions{}); err != nil {
+			log.Printf("[fluence-webhook] could not create ServiceAccount %s/%s: %v", namespace, SidecarServiceAccount, err)
+		}
+	}
+	if _, err := c.RbacV1().Roles(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
+		role := &rbacv1.Role{
+			ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl},
+			Rules: []rbacv1.PolicyRule{
+				{APIGroups: []string{""}, Resources: []string{"pods"}, Verbs: []string{"get", "list", "patch", "update"}},
+				{APIGroups: []string{"scheduling.k8s.io"}, Resources: []string{"podgroups"}, Verbs: []string{"get", "list"}},
+			},
+		}
+		if _, err := c.RbacV1().Roles(namespace).Create(ctx, role, metav1.CreateOptions{}); err != nil {
+			log.Printf("[fluence-webhook] could not create Role %s/%s: %v", namespace, SidecarServiceAccount, err)
+		}
+	}
+	if _, err := c.RbacV1().RoleBindings(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
+		rb := &rbacv1.RoleBinding{
+			ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl},
+			Subjects:   []rbacv1.Subject{{Kind: "ServiceAccount", Name: SidecarServiceAccount, Namespace: namespace}},
+			RoleRef:    rbacv1.RoleRef{APIGroup: "rbac.authorization.k8s.io", Kind: "Role", Name: SidecarServiceAccount},
+		}
+		if _, err := c.RbacV1().RoleBindings(namespace).Create(ctx, rb, metav1.CreateOptions{}); err != nil {
+			log.Printf("[fluence-webhook] could not create RoleBinding %s/%s: %v", namespace, SidecarServiceAccount, err)
+		}
 	}
+}
+
+// interceptorOps stages the fluence Python package (Model C): an init container
+// copies it into a shared emptyDir, mounted into every workload container
+// (skipping the sidecar) with PYTHONPATH + FLUENCE_POD_UID, so Python auto-imports
+// the interceptor via sitecustomize. Broad mounting is safe (fail-soft when the
+// vendor SDK is absent) and is required so a quantum WORKER — which runs the same
+// image but does not request the resource — also gets the (faux-mode) interceptor.
+func interceptorOps(pod *corev1.Pod) []spec.Op {
 	var ops []spec.Op
+
+	vol := corev1.Volume{Name: StageVolumeName, VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}}
+	if len(pod.Spec.Volumes) == 0 {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/volumes", Value: []corev1.Volume{vol}})
+	} else {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/volumes/-", Value: vol})
+	}
+
+	initc := corev1.Container{
+		Name:            "fluence-stage",
+		Image:           sidecarImage(),
+		ImagePullPolicy: corev1.PullAlways,
+		Command: []string{"sh", "-c",
+			fmt.Sprintf("python -m fluence.stage %s || echo '[fluence] staging skipped (interceptor unavailable)'", StageMountPath)},
+		VolumeMounts: []corev1.VolumeMount{{Name: StageVolumeName, MountPath: StageMountPath}},
+	}
+	if len(pod.Spec.InitContainers) == 0 {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/initContainers", Value: []corev1.Container{initc}})
+	} else {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/initContainers/-", Value: initc})
+	}
+
+	mount := corev1.VolumeMount{Name: StageVolumeName, MountPath: StageMountPath, ReadOnly: true}
+	pythonpath := corev1.EnvVar{Name: "PYTHONPATH", Value: StageMountPath}
+	uid := spec.FieldEnv("FLUENCE_POD_UID", "metadata.uid")
 	for i, c := range pod.Spec.Containers {
-		if c.Name == "fluence-sidecar" || spec.HasEnv(c, "FLUENCE_ROLE") {
+		if c.Name == SidecarContainerName {
 			continue
 		}
-		if len(c.Env) == 0 {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env", i), Value: []corev1.EnvVar{value}})
+		if len(c.VolumeMounts) == 0 {
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/volumeMounts", i), Value: []corev1.VolumeMount{mount}})
 		} else {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: value})
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/volumeMounts/-", i), Value: mount})
+		}
+		if !spec.HasEnv(c, "PYTHONPATH") {
+			if len(c.Env) == 0 {
+				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env", i), Value: []corev1.EnvVar{pythonpath}})
+				pod.Spec.Containers[i].Env = []corev1.EnvVar{pythonpath}
+			} else {
+				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: pythonpath})
+				pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, pythonpath)
+			}
+		}
+		if !spec.HasEnv(c, "FLUENCE_POD_UID") {
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: uid})
+			pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, uid)
 		}
-		pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, value)
 	}
 	return ops
 }
 
-// gateOps adds the quantum scheduling gate (idempotent).
-const QuantumClassicalPriorityClass = "fluence-quantum-classical"
-
-func gateOps(pod *corev1.Pod) []spec.Op {
-	for _, g := range pod.Spec.SchedulingGates {
-		if g.Name == QuantumGate {
-			return nil
+// sidecarContainerOps adds the fluence sidecar container (pod identity env, the
+// generic FLUXION_* contract from InjectedEnv, the observe flag, handler-supplied
+// extraEnv, and the workload's secret/configMap-sourced credentials) and sets the
+// sidecar ServiceAccount. observe=true selects observe-only telemetry mode.
+func sidecarContainerOps(m webhook.MutatorAPI, pod *corev1.Pod, observe bool, extraEnv []corev1.EnvVar) []spec.Op {
+	var ops []spec.Op
+	env := []corev1.EnvVar{
+		spec.FieldEnv("FLUENCE_POD_UID", "metadata.uid"),
+		spec.FieldEnv("FLUENCE_POD_NAME", "metadata.name"),
+		spec.FieldEnv("FLUENCE_NAMESPACE", "metadata.namespace"),
+		spec.FieldEnv("FLUENCE_GROUP", "metadata.labels['"+webhook.GroupLabel+"']"),
+	}
+	env = append(env, m.InjectedEnv()...)
+	if observe {
+		env = append(env, corev1.EnvVar{Name: "FLUENCE_OBSERVE", Value: "true"})
+	}
+	env = append(env, extraEnv...)
+	// Copy the workload container's secret/configMap-sourced env onto the sidecar
+	// so it can talk to the same backend (domain-agnostic: we propagate whatever
+	// the workload pulls from a secret/configMap; existing FLUENCE_/FLUXION_ names
+	// are not overwritten).
+	if len(pod.Spec.Containers) > 0 {
+		have := map[string]bool{}
+		for _, e := range env {
+			have[e.Name] = true
+		}
+		for _, e := range pod.Spec.Containers[0].Env {
+			if have[e.Name] || e.ValueFrom == nil {
+				continue
+			}
+			if e.ValueFrom.SecretKeyRef != nil || e.ValueFrom.ConfigMapKeyRef != nil {
+				env = append(env, e)
+			}
 		}
 	}
-	var ops []spec.Op
-	gate := corev1.PodSchedulingGate{Name: QuantumGate}
-	if len(pod.Spec.SchedulingGates) == 0 {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGates", Value: []corev1.PodSchedulingGate{gate}})
+	sidecar := corev1.Container{
+		Name: SidecarContainerName, Image: sidecarImage(), ImagePullPolicy: corev1.PullAlways,
+		Env: env,
+		Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{
+			corev1.ResourceCPU: resource.MustParse("100m"), corev1.ResourceMemory: resource.MustParse("256Mi"),
+		}},
+	}
+	if len(pod.Spec.Containers) == 0 {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/containers", Value: []corev1.Container{sidecar}})
 	} else {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGates/-", Value: gate})
-	}
-	// Give gated classical workers a raised priority so they schedule reliably
-	// once ungated. priorityClassName is immutable post-creation, so it MUST be
-	// set here at admission, not at ungate time. Only set it if the pod doesn't
-	// already declare one (don't overwrite a user's class).
-	if pod.Spec.PriorityClassName == "" {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/priorityClassName", Value: QuantumClassicalPriorityClass})
-		// Clear spec.priority so the priority admission controller recomputes it
-		// from the class. The controller errors only when spec.priority is
-		// non-nil AND differs from the class value; setting it to null avoids
-		// that in every case. We use add-with-null (not remove): a JSON Patch
-		// "remove" of an absent path is a hard error, and whether the API has
-		// already defaulted spec.priority differs across clusters/k8s versions
-		// (it broke in CI but not on GKE, or vice versa). add-null is valid
-		// whether the field is absent, 0, or set.
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/priority", EmitNull: true})
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/containers/-", Value: sidecar})
+	}
+	if pod.Spec.ServiceAccountName == "" || pod.Spec.ServiceAccountName == "default" {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/serviceAccountName", Value: SidecarServiceAccount})
 	}
 	return ops
 }
diff --git a/pkg/webhook/handlers/quantum_test.go b/pkg/webhook/handlers/quantum_test.go
new file mode 100644
index 0000000..613724d
--- /dev/null
+++ b/pkg/webhook/handlers/quantum_test.go
@@ -0,0 +1,448 @@
+/*
+Copyright 2024 Lawrence Livermore National Security, LLC
+ (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+SPDX-License-Identifier: Apache-2.0
+*/
+
+// quantum_test.go — all tests for the quantum handler: the gang + submitter
+// model, faux-submit, the sidecar wiring, the Dependency primitive, and the
+// standalone/observe paths. Shared fixtures (qpuPod, cpuPod, op helpers) live in
+// handlers_test.go.
+package handlers
+
+import (
+	"context"
+	"testing"
+
+	"github.com/converged-computing/fluence/pkg/webhook"
+	"github.com/converged-computing/fluence/pkg/webhook/spec"
+
+	batchv1 "k8s.io/api/batch/v1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+// ── standalone / observe ────────────────────────────────────────────────────────
+
+func TestSingleQuantumGetsInterceptorNoSidecar(t *testing.T) {
+	m := &webhook.Mutator{AttributeKeys: []string{"region"}}
+	ops := m.Mutate(context.Background(), qpuPod("fluence"))
+	names := opEnvNames(ops)
+	if !contains(names, "FLUXION_BACKEND") {
+		t.Errorf("want FLUXION_BACKEND, got %v", names)
+	}
+	if !contains(names, "PYTHONPATH") || !contains(names, "FLUENCE_POD_UID") {
+		t.Errorf("want interceptor env (PYTHONPATH, FLUENCE_POD_UID), got %v", names)
+	}
+	if hasSidecarOp(ops) {
+		t.Error("standalone quantum pod should not get a sidecar")
+	}
+	if hasGateOp(ops) {
+		t.Error("standalone quantum pod should not be gated")
+	}
+}
+
+func TestObserveLabelInjectsSidecar(t *testing.T) {
+	m := &webhook.Mutator{}
+	pod := qpuPod("fluence")
+	pod.Labels = map[string]string{ObserveLabel: "true"}
+	ops := m.Mutate(context.Background(), pod)
+	if !hasSidecarOp(ops) {
+		t.Error("observe-labeled quantum pod should get the sidecar")
+	}
+	if hasGateOp(ops) {
+		t.Error("observe-only pod should not be gated")
+	}
+}
+
+// ── gang + submitter ────────────────────────────────────────────────────────────
+
+// gangQPUPod is a quantum workload pod (requests the resource) in a group,
+// owned by a Job of parallelism N — the common real shape (a MiniCluster /
+// indexed Job). No role annotation: the new model has no leader/worker.
+func gangQPUPod(ns, group, name, job string) *corev1.Pod {
+	p := qpuPod("fluence")
+	p.Name = name
+	p.Namespace = ns
+	p.Labels = map[string]string{webhook.GroupLabel: group}
+	p.OwnerReferences = []metav1.OwnerReference{{Kind: "Job", Name: job}}
+	return p
+}
+
+// mincount returns the gang minCount of the named PodGroup, or ok=false.
+func mincount(t *testing.T, cs *fake.Clientset, ns, group string) (int32, bool) {
+	t.Helper()
+	pg, err := cs.SchedulingV1alpha2().PodGroups(ns).Get(context.Background(), group, metav1.GetOptions{})
+	if err != nil || pg.Spec.SchedulingPolicy.Gang == nil {
+		return 0, false
+	}
+	return pg.Spec.SchedulingPolicy.Gang.MinCount, true
+}
+
+// A quantum gang member (owned by Job parallelism=3) is gated + faux, its gang
+// PodGroup is minCount 3 (full N — no N-1 split), and Fluence creates the
+// separate <group>-submitter pod. It gets NO sidecar (it is gated).
+func TestQuantumGangGatedFauxAndSubmitterCreated(t *testing.T) {
+	ns, group, job := "default", "qg", "qg-job"
+	par := int32(3)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), gangQPUPod(ns, group, "qg-0", job))
+
+	if !hasGateOp(ops) {
+		t.Error("gang member must be gated")
+	}
+	if hasSidecarOp(ops) {
+		t.Error("gang member (gated) must NOT get a sidecar")
+	}
+	if e, ok := envOp(ops, FauxSubmitEnv); !ok || e.Value != "true" {
+		t.Errorf("gang member must get %s=true", FauxSubmitEnv)
+	}
+	if mc, ok := mincount(t, cs, ns, group); !ok || mc != 3 {
+		t.Errorf("gang PodGroup minCount=%d (ok=%v), want 3 (full N, no split)", mc, ok)
+	}
+	// No <group>-workers subgroup in the new model.
+	if _, ok := mincount(t, cs, ns, group+"-workers"); ok {
+		t.Error("there must be no -workers subgroup in the gang+submitter model")
+	}
+	// Fluence created the submitter.
+	sub, err := cs.CoreV1().Pods(ns).Get(context.Background(), group+SubmitterGroupSuffix, metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("submitter pod not created: %v", err)
+	}
+	if sub.Annotations[SubmitterAnnotation] != "true" {
+		t.Error("submitter must carry the submitter marker")
+	}
+	if sub.Annotations[GangGroupAnnotation] != group {
+		t.Errorf("submitter gang-group=%q, want %q", sub.Annotations[GangGroupAnnotation], group)
+	}
+	if len(sub.Spec.SchedulingGates) != 0 {
+		t.Error("submitter must NOT be gated")
+	}
+}
+
+// The submitter pod, on its own admission, is wired as the real coordinator: its
+// own PodGroup minCount 1, the real sidecar (not faux), not gated, and told which
+// gang to ungate via FLUENCE_GANG_GROUP.
+func TestSubmitterWiredAsRealSidecar(t *testing.T) {
+	ns, group, job := "default", "qg2", "qg2-job"
+	par := int32(2)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	// First a gang member, which creates the submitter.
+	m.Mutate(context.Background(), gangQPUPod(ns, group, "qg2-0", job))
+	sub, err := cs.CoreV1().Pods(ns).Get(context.Background(), group+SubmitterGroupSuffix, metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("submitter not created: %v", err)
+	}
+
+	ops := m.Mutate(context.Background(), sub)
+	if !hasSidecarOp(ops) {
+		t.Error("submitter must get the real sidecar")
+	}
+	if hasGateOp(ops) {
+		t.Error("submitter must NOT be gated")
+	}
+	if _, ok := envOp(ops, FauxSubmitEnv); ok {
+		t.Error("submitter must NOT be in faux mode")
+	}
+	// FLUENCE_GANG_GROUP is on the sidecar container itself.
+	var sidecar *corev1.Container
+	for _, op := range ops {
+		if c, ok := op.Value.(corev1.Container); ok && c.Name == SidecarContainerName {
+			cc := c
+			sidecar = &cc
+		}
+	}
+	if sidecar == nil {
+		t.Fatal("no sidecar container on submitter")
+	}
+	var gotGang bool
+	for _, e := range sidecar.Env {
+		if e.Name == GangGroupEnv && e.Value == group {
+			gotGang = true
+		}
+	}
+	if !gotGang {
+		t.Errorf("submitter sidecar must get %s=%q", GangGroupEnv, group)
+	}
+	if mc, ok := mincount(t, cs, ns, group+SubmitterGroupSuffix); !ok || mc != 1 {
+		t.Errorf("submitter PodGroup minCount=%d (ok=%v), want 1", mc, ok)
+	}
+}
+
+// A standalone quantum pod (no group, no owner → group of one) does its own real
+// submit: interceptor staged, but no gating, no faux, and no separate submitter.
+func TestStandaloneQuantumIsRealNoSubmitter(t *testing.T) {
+	ns := "default"
+	cs := fake.NewSimpleClientset()
+	m := &webhook.Mutator{Clientset: cs}
+
+	pod := qpuPod("fluence")
+	pod.Name = "solo"
+	pod.Namespace = ns
+
+	ops := m.Mutate(context.Background(), pod)
+	if hasGateOp(ops) {
+		t.Error("standalone quantum pod must not be gated")
+	}
+	if _, ok := envOp(ops, FauxSubmitEnv); ok {
+		t.Error("standalone quantum pod must not be faux")
+	}
+	pods, _ := cs.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
+	if len(pods.Items) != 0 {
+		t.Error("standalone quantum pod must not spawn a submitter")
+	}
+}
+
+// ── faux-submit + dependency ────────────────────────────────────────────────────
+
+// envValueFrom returns the env var op with the given name, if present (covers
+// both single-EnvVar and []EnvVar op shapes).
+func envOp(ops []spec.Op, name string) (corev1.EnvVar, bool) {
+	for _, op := range ops {
+		switch v := op.Value.(type) {
+		case corev1.EnvVar:
+			if v.Name == name {
+				return v, true
+			}
+		case []corev1.EnvVar:
+			for _, e := range v {
+				if e.Name == name {
+					return e, true
+				}
+			}
+		}
+	}
+	return corev1.EnvVar{}, false
+}
+
+// annotationOps collects all annotation key=value pairs the ops would stamp.
+func annotationOps(ops []spec.Op) map[string]string {
+	out := map[string]string{}
+	for _, op := range ops {
+		// whole-map add: /metadata/annotations
+		if op.Path == "/metadata/annotations" {
+			if m, ok := op.Value.(map[string]string); ok {
+				for k, v := range m {
+					out[k] = v
+				}
+			}
+			continue
+		}
+		// single-key add: /metadata/annotations/<escaped-key> -> string value
+		const pfx = "/metadata/annotations/"
+		if len(op.Path) > len(pfx) && op.Path[:len(pfx)] == pfx {
+			if s, ok := op.Value.(string); ok {
+				key := unescapeJSONPointer(op.Path[len(pfx):])
+				out[key] = s
+			}
+		}
+	}
+	return out
+}
+
+// unescapeJSONPointer reverses escapeJSONPointer for assertion readability.
+func unescapeJSONPointer(s string) string {
+	// reverse order of escape: ~1 -> /, then ~0 -> ~
+	out := ""
+	for i := 0; i < len(s); i++ {
+		if s[i] == '~' && i+1 < len(s) {
+			switch s[i+1] {
+			case '1':
+				out += "/"
+				i++
+				continue
+			case '0':
+				out += "~"
+				i++
+				continue
+			}
+		}
+		out += string(s[i])
+	}
+	return out
+}
+
+// A quantum worker (no group-size of its own) is expressed as a general
+// Dependency: gated, stamped with depends-on-{kind,producer,gate}, and the
+// producer is the base group.
+func TestQuantumWorkerIsGeneralDependency(t *testing.T) {
+	ns, group, job := "default", "depq", "depq-job"
+	par := int32(3)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), gangQPUPod(ns, group, "depq-0", job))
+
+	if !hasGateOp(ops) {
+		t.Errorf("worker not gated by the dependency (ops: %+v)", ops)
+	}
+	ann := annotationOps(ops)
+	if ann[DependsOnKindAnnotation] != DependencyKindQuantumSubmit {
+		t.Errorf("depends-on-kind=%q, want %q", ann[DependsOnKindAnnotation], DependencyKindQuantumSubmit)
+	}
+	if ann[DependsOnProducerAnnotation] != group+SubmitterGroupSuffix {
+		t.Errorf("depends-on-producer=%q, want %q (the submitter group)", ann[DependsOnProducerAnnotation], group+SubmitterGroupSuffix)
+	}
+	if ann[DependsOnGateAnnotation] != QuantumGate {
+		t.Errorf("depends-on-gate=%q, want %q", ann[DependsOnGateAnnotation], QuantumGate)
+	}
+}
+
+// DependencyOf round-trips the stamped annotations back into a Dependency, so a
+// scheduler/sidecar observer can read what a gated pod waits for.
+func TestDependencyOfRoundTrip(t *testing.T) {
+	pod := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{
+		DependsOnKindAnnotation:     DependencyKindQuantumSubmit,
+		DependsOnProducerAnnotation: "grp",
+		DependsOnGateAnnotation:     QuantumGate,
+	}}}
+	d, ok := DependencyOf(pod)
+	if !ok || d.Kind != DependencyKindQuantumSubmit || d.Producer != "grp" || d.Gate != QuantumGate {
+		t.Errorf("DependencyOf=%+v ok=%v, want quantum-submit/grp/%s", d, ok, QuantumGate)
+	}
+	if _, ok := DependencyOf(&corev1.Pod{}); ok {
+		t.Errorf("DependencyOf on a pod with no dependency should be ok=false")
+	}
+}
+
+// The worker is staged with the SAME interceptor as the submitter (PYTHONPATH +
+// FLUENCE_POD_UID), put into faux mode (FLUENCE_FAUX_SUBMIT=true), and handed the
+// existing task id via the FLUENCE_QUANTUM_JOB_ID downward-API env. One
+// mechanism, two modes — no separate ConfigMap shim. The user sets nothing.
+func TestQuantumWorkerStagedWithFauxSubmit(t *testing.T) {
+	ns, group, job := "default", "fauxq", "fauxq-job"
+	par := int32(2)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), gangQPUPod(ns, group, "fauxq-0", job))
+
+	// Same interceptor staging as the submitter (PYTHONPATH set on the worker).
+	if _, ok := envOp(ops, "PYTHONPATH"); !ok {
+		t.Errorf("worker not staged with the interceptor (no PYTHONPATH); ops: %+v", ops)
+	}
+
+	// Faux mode selected.
+	if e, ok := envOp(ops, FauxSubmitEnv); !ok || e.Value != "true" {
+		t.Errorf("worker missing %s=true (got %+v, ok=%v)", FauxSubmitEnv, e, ok)
+	}
+
+	// Existing task id sourced from the annotation the ungating sidecar stamps.
+	e, ok := envOp(ops, QuantumJobIDEnv)
+	if !ok {
+		t.Fatalf("worker missing %s env", QuantumJobIDEnv)
+	}
+	if e.ValueFrom == nil || e.ValueFrom.FieldRef == nil ||
+		e.ValueFrom.FieldRef.FieldPath != "metadata.annotations['"+QuantumJobIDAnnotation+"']" {
+		t.Errorf("%s should be a downward-API ref to %s, got %+v", QuantumJobIDEnv, QuantumJobIDAnnotation, e)
+	}
+}
+
+// Classical override below the replica count: group-size=2 on a gang owned by a
+// Job(parallelism=5) must yield minCount=2 (the override), not 5. With a cluster
+// sized to 2, the gang reaches quorum and runs; if the override were dropped the
+// gang would wait forever for 5 (the e2e hang that fails CI).
+func TestClassicalOverrideBelowReplicaCount(t *testing.T) {
+	ns, group, job := "default", "ovr2", "ovr2-job"
+	pod := cpuPod("fluence")
+	pod.Namespace = ns
+	pod.Labels = map[string]string{webhook.GroupLabel: group}
+	pod.Annotations = map[string]string{webhook.GroupSizeAnnotation: "2"}
+	ownedBy(pod, "Job", job)
+
+	got := minCountWithClient(t, pod, jobWithParallelism(ns, job, 5))
+	if got != 2 {
+		t.Errorf("override below replicas: minCount=%d, want 2 (override wins over Job=5)", got)
+	}
+}
+
+// ── sidecar wiring ──────────────────────────────────────────────────────────────
+
+// The sidecar inherits the workload's secret/configMap-sourced credentials so it
+// can talk to the same backend, but NOT plain-value env. (Moved from the core
+// webhook package: sidecar construction is now quantum-owned.)
+func TestSidecarInheritsWorkloadSecretEnv(t *testing.T) {
+	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset()}
+	pod := &corev1.Pod{
+		Spec: corev1.PodSpec{
+			Containers: []corev1.Container{{
+				Name: "gang",
+				Env: []corev1.EnvVar{
+					{Name: "GANG_ROLE", Value: "leader"}, // plain value: NOT copied
+					{Name: "AWS_ACCESS_KEY_ID", ValueFrom: &corev1.EnvVarSource{
+						SecretKeyRef: &corev1.SecretKeySelector{
+							LocalObjectReference: corev1.LocalObjectReference{Name: "aws-braket-credentials"},
+							Key:                  "AWS_ACCESS_KEY_ID",
+						}}},
+				},
+			}},
+		},
+	}
+	ops := sidecarContainerOps(m, pod, false, nil)
+	var sidecar *corev1.Container
+	for _, op := range ops {
+		if c, ok := op.Value.(corev1.Container); ok && c.Name == SidecarContainerName {
+			sidecar = &c
+		}
+	}
+	if sidecar == nil {
+		t.Fatal("no sidecar container added")
+	}
+	var gotSecret, gotPlain bool
+	for _, e := range sidecar.Env {
+		if e.Name == "AWS_ACCESS_KEY_ID" && e.ValueFrom != nil && e.ValueFrom.SecretKeyRef != nil {
+			gotSecret = true
+		}
+		if e.Name == "GANG_ROLE" {
+			gotPlain = true
+		}
+	}
+	if !gotSecret {
+		t.Error("sidecar should inherit the workload's secret-sourced AWS creds")
+	}
+	if gotPlain {
+		t.Error("sidecar should NOT copy plain-value workload env like GANG_ROLE")
+	}
+}
+
+// A plain quantum workload pod (no role, owned by a Job of N>1) is gated as a
+// faux gang member AND triggers creation of the one-off submitter. The user
+// authors no submitter and no roles.
+func TestGangMemberTriggersSubmitter(t *testing.T) {
+	ns, group, job := "default", "qauto", "qauto-job"
+	par := int32(2)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	workload := gangQPUPod(ns, group, "qauto-0", job)
+	ops := m.Mutate(context.Background(), workload)
+
+	if !hasGateOp(ops) {
+		t.Error("gang member must be gated")
+	}
+	if _, ok := envOp(ops, FauxSubmitEnv); !ok {
+		t.Error("gang member must get FLUENCE_FAUX_SUBMIT")
+	}
+	sub, err := cs.CoreV1().Pods(ns).Get(context.Background(), group+SubmitterGroupSuffix, metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("submitter pod not created: %v", err)
+	}
+	if !spec.PodRequestsResource(sub, QuantumResource) {
+		t.Error("submitter must request the quantum resource (it runs the real submit)")
+	}
+}
diff --git a/pkg/webhook/handlers/registry_test.go b/pkg/webhook/handlers/registry_test.go
new file mode 100644
index 0000000..346d786
--- /dev/null
+++ b/pkg/webhook/handlers/registry_test.go
@@ -0,0 +1,82 @@
+/*
+Copyright 2024 Lawrence Livermore National Security, LLC
+ (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+SPDX-License-Identifier: Apache-2.0
+*/
+
+// Registry behavior: dispatch order comes from the active handler list (not a
+// per-handler Order), and the list both selects and orders handlers.
+package handlers
+
+import (
+	"context"
+	"testing"
+
+	"github.com/converged-computing/fluence/pkg/webhook"
+	"github.com/converged-computing/fluence/pkg/webhook/spec"
+
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+// The default active order ships gang LAST so it only applies default gang
+// sizing when no earlier handler shaped the gang.
+func TestDefaultOrderGangLast(t *testing.T) {
+	defer webhook.SetActiveHandlers(nil)
+	active, _ := webhook.SetActiveHandlers(nil) // restore + read default
+	if len(active) == 0 {
+		t.Fatal("no active handlers")
+	}
+	if active[len(active)-1] != "gang" {
+		t.Errorf("gang must be last in default order; got %v", active)
+	}
+	// default order is exactly fluxion, quantum, gang
+	want := []string{"fluxion", "quantum", "gang"}
+	if len(active) != len(want) {
+		t.Fatalf("default order = %v, want %v", active, want)
+	}
+	for i := range want {
+		if active[i] != want[i] {
+			t.Errorf("default order = %v, want %v", active, want)
+			break
+		}
+	}
+}
+
+// The active list IS the order: passing a custom order reorders dispatch, and
+// unknown names are reported, not silently kept.
+func TestActiveListSetsOrderAndReportsUnknown(t *testing.T) {
+	defer webhook.SetActiveHandlers(nil)
+	active, unknown := webhook.SetActiveHandlers([]string{"gang", "fluxion", "bogus"})
+	if len(active) != 2 || active[0] != "gang" || active[1] != "fluxion" {
+		t.Errorf("active = %v, want [gang fluxion] in that order", active)
+	}
+	if len(unknown) != 1 || unknown[0] != "bogus" {
+		t.Errorf("unknown = %v, want [bogus]", unknown)
+	}
+}
+
+// Dropping a handler from the list disables it: a quantum pod with quantum
+// omitted gets no interceptor ops (only fluxion/gang act).
+func TestOmittedHandlerDoesNotDispatch(t *testing.T) {
+	defer webhook.SetActiveHandlers(nil)
+	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset()}
+
+	webhook.SetActiveHandlers(nil) // default: quantum present
+	if !hasInterceptor(m.Mutate(context.Background(), qpuPod("fluence"))) {
+		t.Fatal("with quantum active, expected interceptor (init container) ops")
+	}
+
+	webhook.SetActiveHandlers([]string{"fluxion", "gang"}) // quantum omitted
+	if hasInterceptor(m.Mutate(context.Background(), qpuPod("fluence"))) {
+		t.Error("with quantum omitted, interceptor ops must NOT be present")
+	}
+}
+
+func hasInterceptor(ops []spec.Op) bool {
+	for _, op := range ops {
+		if op.Path == "/spec/initContainers" || op.Path == "/spec/initContainers/-" {
+			return true
+		}
+	}
+	return false
+}
diff --git a/pkg/webhook/handlers/sidecar.go b/pkg/webhook/handlers/sidecar.go
new file mode 100644
index 0000000..d105a7c
--- /dev/null
+++ b/pkg/webhook/handlers/sidecar.go
@@ -0,0 +1,57 @@
+package handlers
+
+import (
+	"context"
+
+	"github.com/converged-computing/fluence/pkg/webhook"
+	"github.com/converged-computing/fluence/pkg/webhook/spec"
+
+	corev1 "k8s.io/api/core/v1"
+)
+
+// Sidecar is the capability a handler uses to attach a coordination sidecar to a
+// pod. It is NOT part of the webhook core's MutatorAPI: only handlers that need
+// a sidecar (today, quantum) depend on it, and a handler may supply its own
+// implementation to customize delivery. The default implementation
+// (coreSidecar) delegates to the webhook core's interceptor/sidecar ops, which
+// remain the staging mechanism shared by any sidecar-using handler.
+//
+// This is the seam your design calls for: "a general sidecar interface that can
+// be used across handlers and customized by the quantum [handler]". A future
+// custom-resource handler can implement Sidecar differently (different image,
+// env, gating) without touching the core or other handlers.
+type Sidecar interface {
+	// EnsureRBAC provisions the per-namespace ServiceAccount/Role/Binding the
+	// sidecar needs to read/patch pods and podgroups.
+	EnsureRBAC(ctx context.Context, namespace string)
+	// InterceptorOps stages the in-pod interceptor (Model C) into the workload
+	// containers (init container + shared volume on PYTHONPATH).
+	InterceptorOps(pod *corev1.Pod) []spec.Op
+	// ContainerOps adds the sidecar container. observe=true selects observe-only
+	// telemetry mode (no ungating). extraEnv carries handler-computed,
+	// domain-specific env (e.g. the quantum handler's FLUENCE_EXPECTED_WORKERS =
+	// N-1 and FLUENCE_WORKER_GROUP_BASE) so the core never has to know about
+	// leader/worker concepts — the handler that owns the split owns those values.
+	ContainerOps(pod *corev1.Pod, observe bool, extraEnv []corev1.EnvVar) []spec.Op
+}
+
+// coreSidecar is the default Sidecar. It delegates to the quantum-owned sidecar
+// implementation (see sidecar_impl.go), which uses only the generic MutatorAPI
+// (Client, InjectedEnv). The webhook core no longer carries any sidecar logic; a
+// custom handler could supply its own Sidecar with a different container/image.
+type coreSidecar struct{ m webhook.MutatorAPI }
+
+func (s coreSidecar) EnsureRBAC(ctx context.Context, namespace string) {
+	ensureSidecarRBAC(ctx, s.m, namespace)
+}
+func (s coreSidecar) InterceptorOps(pod *corev1.Pod) []spec.Op {
+	return interceptorOps(pod)
+}
+func (s coreSidecar) ContainerOps(pod *corev1.Pod, observe bool, extraEnv []corev1.EnvVar) []spec.Op {
+	return sidecarContainerOps(s.m, pod, observe, extraEnv)
+}
+
+// sidecarFor returns the Sidecar a handler should use. Centralized so the choice
+// of implementation (and any future per-handler customization) lives in one
+// place. Today every sidecar-using handler gets the core-backed default.
+func sidecarFor(m webhook.MutatorAPI) Sidecar { return coreSidecar{m: m} }
diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go
index 20a7288..b39bec1 100644
--- a/pkg/webhook/webhook.go
+++ b/pkg/webhook/webhook.go
@@ -1,11 +1,11 @@
 // Package webhook is fluence's mutating admission webhook.
 //
 // The core here is domain-agnostic plumbing: it owns the Mutator, the handler
-// dispatcher, per-namespace PodGroup/RBAC provisioning, the Model C package
-// staging (init container + shared volume on PYTHONPATH), the HTTP entrypoint,
-// and self-managed TLS. It knows nothing about quantum, Braket, gate names, or
-// observe labels — that policy lives entirely in the handlers (pkg/webhook/
-// handlers), which self-register via Register().
+// dispatcher, per-namespace PodGroup provisioning, the HTTP entrypoint, and
+// self-managed TLS. It knows nothing about quantum, Braket, gate names, sidecars,
+// RBAC, or interceptor staging — that policy and machinery lives entirely in the
+// handlers (pkg/webhook/handlers), which self-register via Register() and perform
+// their own create/edit side-effects through the generic MutatorAPI.
 //
 // The webhook self-manages TLS via a self-signed CA patched into the
 // MutatingWebhookConfiguration caBundle at startup.
@@ -32,9 +32,7 @@ import (
 
 	admissionv1 "k8s.io/api/admission/v1"
 	corev1 "k8s.io/api/core/v1"
-	rbacv1 "k8s.io/api/rbac/v1"
 	schedulingv1alpha2 "k8s.io/api/scheduling/v1alpha2"
-	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/kubernetes"
@@ -52,34 +50,12 @@ const (
 	// meaning to it (a handler decides what a group means).
 	GroupLabel = "fluence.flux-framework.org/group"
 
-	// LeaderAnnotation records the admission-order leader on a PodGroup.
-	LeaderAnnotation = "fluence.flux-framework.org/leader"
-
-	// RoleAnnotation, set by the workload on each pod, explicitly declares the
-	// pod's gang role ("leader" or "worker"). When present it is AUTHORITATIVE:
-	// the quantum handler gates workers and gives the leader the sidecar based
-	// on this value, instead of inferring the leader by admission order. The
-	// same value is injected into the container env as FLUENCE_ROLE so the
-	// application reads its role from the same source of truth Fluence used.
-	// When absent, role falls back to admission order (backwards compatible).
-	RoleAnnotation = "fluence.flux-framework.org/role"
-
-	// ExpectedWorkersAnnotation, set by the workload on the leader pod, tells the
-	// sidecar how many gated workers to wait for before ungating. The count is
-	// known at admission (the workload declares it) even though worker names are
-	// not, so it travels as a static sidecar env var. The core treats it as an
-	// opaque string and ascribes no meaning to it beyond propagation.
-	ExpectedWorkersAnnotation = "fluence.flux-framework.org/expected-workers"
-
-	// Sidecar/staging infrastructure (generic — not quantum-specific).
-	SidecarImage          = "ghcr.io/converged-computing/fluence-sidecar:latest"
-	SidecarServiceAccount = "fluence-sidecar"
-
-	// StageVolumeName / StageMountPath: the shared emptyDir the init container
-	// stages the fluence Python package into, mounted into the user container and
-	// prepended to PYTHONPATH (Model C delivery).
-	StageVolumeName = "fluence-pkg"
-	StageMountPath  = "/opt/fluence-staged"
+	// GroupSizeAnnotation is the gang member count N, set by the workload on each
+	// pod. It is the authoritative override for the PodGroup gang minCount when
+	// the size cannot (or should not) be derived from the owning controller — and
+	// for loose grouped pods where counting at admission is unreliable. The core
+	// treats it as an opaque integer string.
+	GroupSizeAnnotation = "fluence.flux-framework.org/group-size"
 )
 
 // ── Mutator ─────────────────────────────────────────────────────────────────────
@@ -87,31 +63,14 @@ const (
 type Mutator struct {
 	AttributeKeys []string
 	Clientset     kubernetes.Interface
-	SidecarImage  string
 }
 
 // compile-time check that *Mutator satisfies the handler capability interface.
 var _ MutatorAPI = (*Mutator)(nil)
 
-func (m *Mutator) sidecarImage() string {
-	if m.SidecarImage != "" {
-		return m.SidecarImage
-	}
-	return SidecarImage
-}
-
 // GroupName returns the value of GroupLabel on the pod, or "".
 func GroupName(pod *corev1.Pod) string { return spec.Label(pod, GroupLabel) }
 
-// Role returns the explicit gang role declared on the pod via RoleAnnotation
-// ("leader"/"worker"), or "" if unset (caller falls back to admission order).
-func Role(pod *corev1.Pod) string { return spec.Annotation(pod, RoleAnnotation) }
-
-func resourceQuantity(s string) *resource.Quantity {
-	q := resource.MustParse(s)
-	return &q
-}
-
 // ── MutatorAPI: capabilities exposed to handlers ────────────────────────────────
 
 // Client implements MutatorAPI: returns the Kubernetes client (nil in tests).
@@ -138,29 +97,13 @@ func (m *Mutator) EnvVarNames() []string {
 	return names
 }
 
-// PodGroupLeader returns the recorded admission-order leader for the group, or
-// "". Retries briefly to absorb the concurrent leader/worker admission race.
-func (m *Mutator) PodGroupLeader(ctx context.Context, namespace, group string) string {
-	if m.Clientset == nil || group == "" {
-		return ""
-	}
-	for i := 0; i < 3; i++ {
-		pg, err := m.Clientset.SchedulingV1alpha2().PodGroups(namespace).Get(ctx, group, metav1.GetOptions{})
-		if err != nil {
-			return ""
-		}
-		if pg.Annotations != nil && pg.Annotations[LeaderAnnotation] != "" {
-			return pg.Annotations[LeaderAnnotation]
-		}
-		if i < 2 {
-			time.Sleep(100 * time.Millisecond)
-		}
+// EnsurePodGroup creates a Fluence-owned PodGroup with gang minCount = the full
+// gang size N (the whole group schedules atomically) if absent. minCount<=0
+// falls back to 1.
+func (m *Mutator) EnsurePodGroup(ctx context.Context, namespace, group, leaderPod string, minCount int32) {
+	if minCount <= 0 {
+		minCount = 1
 	}
-	return ""
-}
-
-// EnsurePodGroup creates a Fluence-owned PodGroup (minCount:1) if absent.
-func (m *Mutator) EnsurePodGroup(ctx context.Context, namespace, group, leaderPod string) {
 	if m.Clientset == nil {
 		return
 	}
@@ -179,205 +122,17 @@ func (m *Mutator) EnsurePodGroup(ctx context.Context, namespace, group, leaderPo
 		},
 		Spec: schedulingv1alpha2.PodGroupSpec{
 			SchedulingPolicy: schedulingv1alpha2.PodGroupSchedulingPolicy{
-				Gang: &schedulingv1alpha2.GangSchedulingPolicy{MinCount: 1},
+				Gang: &schedulingv1alpha2.GangSchedulingPolicy{MinCount: minCount},
 			},
 		},
 	}
 	if _, err := m.Clientset.SchedulingV1alpha2().PodGroups(namespace).Create(ctx, pg, metav1.CreateOptions{}); err != nil {
 		log.Printf("[fluence-webhook] could not create PodGroup %s/%s: %v", namespace, group, err)
 	} else {
-		log.Printf("[fluence-webhook] created PodGroup %s/%s (minCount=1)", namespace, group)
-	}
-}
-
-// RecordLeader records leaderPod as the group's admission-order leader.
-func (m *Mutator) RecordLeader(ctx context.Context, namespace, group, leaderPod string) {
-	if m.Clientset == nil || group == "" {
-		return
-	}
-	patch := fmt.Sprintf(`{"metadata":{"annotations":{%q:%q}}}`, LeaderAnnotation, leaderPod)
-	if _, err := m.Clientset.SchedulingV1alpha2().PodGroups(namespace).Patch(
-		ctx, group, types.MergePatchType, []byte(patch), metav1.PatchOptions{}); err != nil {
-		log.Printf("[fluence-webhook] could not record leader on PodGroup %s/%s: %v", namespace, group, err)
-	}
-}
-
-// EnsureSidecarRBAC provisions the per-namespace ServiceAccount/Role/RoleBinding
-// the sidecar uses to patch pods and read PodGroups.
-func (m *Mutator) EnsureSidecarRBAC(ctx context.Context, namespace string) {
-	if m.Clientset == nil {
-		return
-	}
-	lbl := map[string]string{"app": "fluence-sidecar"}
-
-	if _, err := m.Clientset.CoreV1().ServiceAccounts(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
-		sa := &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl}}
-		if _, err := m.Clientset.CoreV1().ServiceAccounts(namespace).Create(ctx, sa, metav1.CreateOptions{}); err != nil {
-			log.Printf("[fluence-webhook] could not create ServiceAccount %s/%s: %v", namespace, SidecarServiceAccount, err)
-		}
-	}
-	if _, err := m.Clientset.RbacV1().Roles(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
-		role := &rbacv1.Role{
-			ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl},
-			Rules: []rbacv1.PolicyRule{
-				{APIGroups: []string{""}, Resources: []string{"pods"}, Verbs: []string{"get", "list", "patch", "update"}},
-				{APIGroups: []string{"scheduling.k8s.io"}, Resources: []string{"podgroups"}, Verbs: []string{"get", "list"}},
-			},
-		}
-		if _, err := m.Clientset.RbacV1().Roles(namespace).Create(ctx, role, metav1.CreateOptions{}); err != nil {
-			log.Printf("[fluence-webhook] could not create Role %s/%s: %v", namespace, SidecarServiceAccount, err)
-		}
-	}
-	if _, err := m.Clientset.RbacV1().RoleBindings(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
-		rb := &rbacv1.RoleBinding{
-			ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl},
-			Subjects:   []rbacv1.Subject{{Kind: "ServiceAccount", Name: SidecarServiceAccount, Namespace: namespace}},
-			RoleRef:    rbacv1.RoleRef{APIGroup: "rbac.authorization.k8s.io", Kind: "Role", Name: SidecarServiceAccount},
-		}
-		if _, err := m.Clientset.RbacV1().RoleBindings(namespace).Create(ctx, rb, metav1.CreateOptions{}); err != nil {
-			log.Printf("[fluence-webhook] could not create RoleBinding %s/%s: %v", namespace, SidecarServiceAccount, err)
-		}
+		log.Printf("[fluence-webhook] created PodGroup %s/%s (minCount=%d)", namespace, group, minCount)
 	}
 }
 
-// InterceptorOps implements Model C delivery. It injects an init container (the
-// sidecar image) that stages the fluence Python package into a shared emptyDir,
-// mounts that volume into every Fluxion-resource container, and prepends it to
-// PYTHONPATH plus sets FLUENCE_POD_UID. Python auto-imports the staged
-// sitecustomize on startup, which runs the interceptor — no user code changes,
-// no PYTHONSTARTUP (which only fires interactively), no vendor SDK on our side.
-func (m *Mutator) InterceptorOps(pod *corev1.Pod) []spec.Op {
-	var ops []spec.Op
-
-	// Shared volume.
-	vol := corev1.Volume{Name: StageVolumeName, VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}}
-	if len(pod.Spec.Volumes) == 0 {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/volumes", Value: []corev1.Volume{vol}})
-	} else {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/volumes/-", Value: vol})
-	}
-
-	// Init container that stages the package into the shared volume.
-	//
-	// Fail-soft: the interceptor is best-effort, so its delivery must be too. We
-	// wrap the stage command so a failure (bad image, missing python, package
-	// problem) leaves the shared volume empty and exits 0 rather than blocking
-	// the user's pod with Init:Error. An empty staged dir simply means the
-	// interceptor does not run — the user application is unaffected. (This also
-	// lets CI use a minimal placeholder sidecar image for placement-only tests.)
-	initc := corev1.Container{
-		Name:            "fluence-stage",
-		Image:           m.sidecarImage(),
-		ImagePullPolicy: corev1.PullAlways,
-		Command: []string{"sh", "-c",
-			fmt.Sprintf("python -m fluence.stage %s || echo '[fluence] staging skipped (interceptor unavailable)'", StageMountPath)},
-		VolumeMounts: []corev1.VolumeMount{{Name: StageVolumeName, MountPath: StageMountPath}},
-	}
-	if len(pod.Spec.InitContainers) == 0 {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/initContainers", Value: []corev1.Container{initc}})
-	} else {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/initContainers/-", Value: initc})
-	}
-
-	// Mount the staged volume + set PYTHONPATH and FLUENCE_POD_UID on each
-	// Fluxion-resource container.
-	mount := corev1.VolumeMount{Name: StageVolumeName, MountPath: StageMountPath, ReadOnly: true}
-	pythonpath := corev1.EnvVar{Name: "PYTHONPATH", Value: StageMountPath}
-	uid := spec.FieldEnv("FLUENCE_POD_UID", "metadata.uid")
-	for i, c := range pod.Spec.Containers {
-		if !spec.RequestsFluxionResource(c) {
-			continue
-		}
-		if len(c.VolumeMounts) == 0 {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/volumeMounts", i), Value: []corev1.VolumeMount{mount}})
-		} else {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/volumeMounts/-", i), Value: mount})
-		}
-		if !spec.HasEnv(c, "PYTHONPATH") {
-			if len(c.Env) == 0 {
-				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env", i), Value: []corev1.EnvVar{pythonpath}})
-				pod.Spec.Containers[i].Env = []corev1.EnvVar{pythonpath}
-			} else {
-				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: pythonpath})
-				pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, pythonpath)
-			}
-		}
-		if !spec.HasEnv(c, "FLUENCE_POD_UID") {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: uid})
-			pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, uid)
-		}
-	}
-	return ops
-}
-
-// SidecarContainerOps adds the fluence-sidecar container and sets its
-// ServiceAccount. observe=true selects observe-only telemetry mode.
-func (m *Mutator) SidecarContainerOps(pod *corev1.Pod, observe bool) []spec.Op {
-	var ops []spec.Op
-	// The sidecar resolves its vendor provider at runtime from the backend the
-	// scheduler chose. It gets the same FLUXION_* contract as the workload
-	// containers (FLUXION_BACKEND + attribute vars like FLUXION_VENDOR), sourced
-	// via the downward API from the scheduler's annotations — so the values
-	// resolve once the scheduler writes them, after admission.
-	env := []corev1.EnvVar{
-		spec.FieldEnv("FLUENCE_POD_UID", "metadata.uid"),
-		spec.FieldEnv("FLUENCE_POD_NAME", "metadata.name"),
-		spec.FieldEnv("FLUENCE_NAMESPACE", "metadata.namespace"),
-		spec.FieldEnv("FLUENCE_GROUP", "metadata.labels['"+GroupLabel+"']"),
-	}
-	env = append(env, m.InjectedEnv()...)
-	if observe {
-		env = append(env, corev1.EnvVar{Name: "FLUENCE_OBSERVE", Value: "true"})
-	}
-	// The gang size is known at admission (the leader carries it), even though
-	// the worker NAMES are not yet. Propagate the expected worker count to the
-	// sidecar as a static env var so it can wait until it has discovered that
-	// many gated workers before ungating, rather than ungating a partial set.
-	// Read from a generic annotation so the core stays domain-agnostic; the
-	// workload manifest sets it (e.g. from its own N_WORKERS).
-	if pod.Annotations != nil {
-		if n := pod.Annotations[ExpectedWorkersAnnotation]; n != "" {
-			env = append(env, corev1.EnvVar{Name: "FLUENCE_EXPECTED_WORKERS", Value: n})
-		}
-	}
-	// The sidecar talks to the same backend the workload does (e.g. to find the
-	// task and read its queue position), so it needs the same credentials. Copy
-	// the workload container's secret/configmap-sourced env onto the sidecar.
-	// This stays domain-agnostic: we don't know or name the provider's creds, we
-	// just propagate whatever the workload pulls from a secret/configMap (e.g.
-	// AWS_*, IBM tokens). Existing FLUENCE_/FLUXION_ names are not overwritten.
-	if len(pod.Spec.Containers) > 0 {
-		have := map[string]bool{}
-		for _, e := range env {
-			have[e.Name] = true
-		}
-		for _, e := range pod.Spec.Containers[0].Env {
-			if have[e.Name] || e.ValueFrom == nil {
-				continue
-			}
-			if e.ValueFrom.SecretKeyRef != nil || e.ValueFrom.ConfigMapKeyRef != nil {
-				env = append(env, e)
-			}
-		}
-	}
-	sidecar := corev1.Container{
-		Name: "fluence-sidecar", Image: m.sidecarImage(), ImagePullPolicy: corev1.PullAlways,
-		Env: env,
-		Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{
-			corev1.ResourceCPU: *resourceQuantity("100m"), corev1.ResourceMemory: *resourceQuantity("256Mi"),
-		}},
-	}
-	if len(pod.Spec.Containers) == 0 {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/containers", Value: []corev1.Container{sidecar}})
-	} else {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/containers/-", Value: sidecar})
-	}
-	if pod.Spec.ServiceAccountName == "" || pod.Spec.ServiceAccountName == "default" {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/serviceAccountName", Value: SidecarServiceAccount})
-	}
-	return ops
-}
-
 // ── Dispatcher ──────────────────────────────────────────────────────────────────
 
 // Mutate dispatches the pod to every registered handler and concatenates the
diff --git a/pkg/webhook/webhook_test.go b/pkg/webhook/webhook_test.go
index 26983d4..9af6c9c 100644
--- a/pkg/webhook/webhook_test.go
+++ b/pkg/webhook/webhook_test.go
@@ -2,8 +2,6 @@ package webhook
 
 import (
 	"testing"
-
-	corev1 "k8s.io/api/core/v1"
 )
 
 // EnvVarNames returns the FLUXION_* contract names (used by the scheduler plugin
@@ -22,47 +20,3 @@ func TestEnvVarNames(t *testing.T) {
 		}
 	}
 }
-
-func TestSidecarInheritsWorkloadSecretEnv(t *testing.T) {
-	m := &Mutator{}
-	pod := &corev1.Pod{
-		Spec: corev1.PodSpec{
-			Containers: []corev1.Container{{
-				Name: "gang",
-				Env: []corev1.EnvVar{
-					{Name: "GANG_ROLE", Value: "leader"}, // plain value: NOT copied
-					{Name: "AWS_ACCESS_KEY_ID", ValueFrom: &corev1.EnvVarSource{
-						SecretKeyRef: &corev1.SecretKeySelector{
-							LocalObjectReference: corev1.LocalObjectReference{Name: "aws-braket-credentials"},
-							Key:                  "AWS_ACCESS_KEY_ID",
-						}}},
-				},
-			}},
-		},
-	}
-	ops := m.SidecarContainerOps(pod, false)
-	var sidecar *corev1.Container
-	for _, op := range ops {
-		if c, ok := op.Value.(corev1.Container); ok && c.Name == "fluence-sidecar" {
-			sidecar = &c
-		}
-	}
-	if sidecar == nil {
-		t.Fatal("no sidecar container added")
-	}
-	var gotSecret, gotPlain bool
-	for _, e := range sidecar.Env {
-		if e.Name == "AWS_ACCESS_KEY_ID" && e.ValueFrom != nil && e.ValueFrom.SecretKeyRef != nil {
-			gotSecret = true
-		}
-		if e.Name == "GANG_ROLE" {
-			gotPlain = true
-		}
-	}
-	if !gotSecret {
-		t.Error("sidecar should inherit the workload's secret-sourced AWS creds")
-	}
-	if gotPlain {
-		t.Error("sidecar should NOT copy plain-value workload env like GANG_ROLE")
-	}
-}
diff --git a/python/fluence/providers/base.py b/python/fluence/providers/base.py
index dca4429..561bca2 100644
--- a/python/fluence/providers/base.py
+++ b/python/fluence/providers/base.py
@@ -80,7 +80,7 @@ def find_my_task(self, pod_uid: str, backend: str, timeout: int) -> "Task | None
         raise NotImplementedError
 
     def is_ready_to_ungate(self, task: "Task") -> bool:
-        """True when workers should be ungated — queue position == 1 or the task
+        """True when the gang should be ungated — queue position == 1 or the task
         is already RUNNING/terminal. Always implementable."""
         raise NotImplementedError
 
@@ -134,4 +134,4 @@ def resolve_from_env() -> "Provider | None":
     for k, v in os.environ.items():
         if k.startswith("FLUXION_"):
             attrs[k[len("FLUXION_"):].lower()] = v
-    return resolve(attrs)
+    return resolve(attrs)
\ No newline at end of file
diff --git a/python/fluence/providers/braket.py b/python/fluence/providers/braket.py
index 23bd9fc..33f1683 100644
--- a/python/fluence/providers/braket.py
+++ b/python/fluence/providers/braket.py
@@ -49,8 +49,26 @@ def install_interceptor(self, pod_uid: str) -> bool:
             return False  # braket SDK not in this container — fail-soft
 
         original_run = AwsDevice.run
+        faux = os.environ.get("FLUENCE_FAUX_SUBMIT", "").lower() == "true"
 
         def patched_run(self, task_specification, *args, **kwargs):
+            # Two modes of the ONE interceptor:
+            #   faux (worker): the one-off submitter already submitted this task
+            #     before the worker was ungated, so submitting again would
+            #     duplicate it N times. Return a handle to the EXISTING task (by
+            #     ARN, handed over via FLUENCE_QUANTUM_JOB_ID) without submitting.
+            #   tag (submitter): stamp the pod-uid tag so the sidecar can find the
+            #     task in the queue, then submit for real.
+            if faux:
+                arn = os.environ.get("FLUENCE_QUANTUM_JOB_ID", "")
+                if arn:
+                    from braket.aws import AwsQuantumTask
+                    log(f"faux-submit: returning existing task {arn} "
+                        f"(no resubmission)")
+                    return AwsQuantumTask(arn=arn)
+                log("faux-submit: no job id; suppressing submit "
+                    "(worker consumes results by id)")
+                return None
             if pod_uid:
                 tags = kwargs.get("tags", {})
                 tags[TAG_KEY] = pod_uid
@@ -226,4 +244,4 @@ def job_id(self, task: BraketTask) -> str:
 
 
 PROVIDER = BraketProvider()
-register(PROVIDER)
+register(PROVIDER)
\ No newline at end of file
diff --git a/python/fluence/sidecar.py b/python/fluence/sidecar.py
index 098574b..d0724e5 100644
--- a/python/fluence/sidecar.py
+++ b/python/fluence/sidecar.py
@@ -1,18 +1,19 @@
 """
 fluence.sidecar — provider-agnostic quantum coordination sidecar main loop.
 
-Injected by the Fluence webhook into the quantum-submitting pod. Resolves its
-vendor at runtime from the backend annotation, discovers the task the user
-application submitted (tagged by the interceptor), polls readiness, and either
-ungates gated workers (gang mode) or just logs the queue-position series
-(observe-only mode).
+Injected by the Fluence webhook into the one-off SUBMITTER pod (gang + submitter
+model — there is no leader/worker split). Resolves its vendor at runtime from the
+backend annotation, discovers the task the user application submitted (tagged by
+the interceptor), polls readiness, and either ungates the gated GANG group (gang
+mode) or just logs the queue-position series (observe-only mode).
 
 Entry point: `fluence-sidecar` console script (see pyproject.toml) -> main().
 
 Environment (injected by the Fluence webhook):
   FLUENCE_POD_UID                 UID of this pod (matches interceptor tag)
   FLUENCE_NAMESPACE               Kubernetes namespace
-  FLUENCE_GATED_PODS              comma-separated gated worker names
+  FLUENCE_GANG_GROUP              group label of the gated gang to ungate
+  FLUENCE_GATED_PODS              optional explicit comma-separated gang pod names
   FLUENCE_OBSERVE                 "true" for observe-only telemetry mode
   FLUXION_BACKEND / FLUXION_VENDOR  scheduler-chosen backend / vendor
   FLUENCE_TASK_DISCOVERY_TIMEOUT  seconds to wait for discovery (default 300)
@@ -30,6 +31,7 @@
 from fluence.ungate import ungate_pods, gated_pods_from_env, namespace_from_env, wait_for_gated_pods
 
 
+
 def _poll(provider, task, poll_interval, ungate):
     mode = "gang" if ungate else "observe-only"
     log(f"{mode} mode: polling queue position")
@@ -52,18 +54,22 @@ def main():
     pod_uid = os.environ.get("FLUENCE_POD_UID", "")
     pod_name = os.environ.get("FLUENCE_POD_NAME", "")
     group = os.environ.get("FLUENCE_GROUP", "")
+    # Gang + submitter model: this sidecar runs in the one-off SUBMITTER pod
+    # (its own group-of-one, <gang>-submitter). The gated workload it must ungate
+    # is the GANG group, named by FLUENCE_GANG_GROUP (set by the webhook). There
+    # is no leader/worker split and no -workers subgroup.
+    gang_group = os.environ.get("FLUENCE_GANG_GROUP", "")
     backend = os.environ.get("FLUXION_BACKEND", "")
     observe = os.environ.get("FLUENCE_OBSERVE", "").lower() == "true"
     discovery_timeout = int(os.environ.get("FLUENCE_TASK_DISCOVERY_TIMEOUT", 300))
     poll_interval = int(os.environ.get("FLUENCE_POLL_INTERVAL", 30))
-    expected_workers = int(os.environ.get("FLUENCE_EXPECTED_WORKERS", 0))
     ungate_timeout = int(os.environ.get("FLUENCE_UNGATE_TIMEOUT", 120))
 
     namespace = namespace_from_env()
 
-    log("starting fluence quantum sidecar")
+    log("starting fluence quantum submitter sidecar")
     log(f"  pod_uid={pod_uid} namespace={namespace} group={group} "
-        f"backend={backend} observe={observe} expected_workers={expected_workers}")
+        f"gang_group={gang_group} backend={backend} observe={observe}")
 
     provider = resolve_from_env()
     if provider is None:
@@ -75,8 +81,9 @@ def main():
     if task is None:
         log("ERROR: could not discover quantum task")
         if not observe:
-            ungate_pods(wait_for_gated_pods(namespace, group, expected_workers,
-                                            exclude=pod_name, timeout=ungate_timeout),
+            # Fail open: ungate the gang so it is not stranded forever.
+            ungate_pods(wait_for_gated_pods(namespace, gang_group, exclude=pod_name,
+                                            timeout=ungate_timeout),
                         "", namespace)
         sys.exit(1)
 
@@ -89,19 +96,18 @@ def main():
         log("observe-only run complete")
         return
 
-    # Wait until all expected gated workers are present (gang is submitted
-    # together), then ungate them. expected_workers is N-1, propagated by the
-    # webhook from the leader at admission; if unset we ungate whatever is found.
+    # Ungate the gang: discover the gated pods in the gang group and remove their
+    # gate, stamping the job-id so each can fetch results by id. The gang pods are
+    # created up front (Job/Deployment), so they are present by submit time.
     gated_pods = gated_pods_from_env() or wait_for_gated_pods(
-        namespace, group, expected_workers, exclude=pod_name,
-        timeout=ungate_timeout)
-    log(f"ungating {len(gated_pods)} worker(s): {gated_pods}")
+        namespace, gang_group, exclude=pod_name, timeout=ungate_timeout)
+    log(f"ungating {len(gated_pods)} gang pod(s): {gated_pods}")
     n_ok = ungate_pods(gated_pods, job_id, namespace)
     if n_ok == len(gated_pods):
-        log(f"done — {n_ok} worker(s) ungated")
+        log(f"done — {n_ok} gang pod(s) ungated")
     else:
-        log(f"WARNING: ungated only {n_ok}/{len(gated_pods)} worker(s) — see errors above")
+        log(f"WARNING: ungated only {n_ok}/{len(gated_pods)} gang pod(s) — see errors above")
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/python/fluence/ungate.py b/python/fluence/ungate.py
index 1019ead..a40e662 100644
--- a/python/fluence/ungate.py
+++ b/python/fluence/ungate.py
@@ -84,10 +84,10 @@ def gated_pods_from_env():
 def discover_gated_pods(namespace, group, exclude=""):
     """
     Find the names of pods in the same group that still carry the quantum
-    scheduling gate (i.e. the workers this sidecar's leader must ungate).
+    scheduling gate (i.e. the gang pods this submitter must ungate).
 
-    The leader's sidecar is created before the workers are admitted, so the gated
-    set cannot be known at admission time and must be discovered at runtime. We
+    The submitter is created alongside the gang, so the gated set is discovered
+    at runtime rather than known at admission. We
     list pods by the group label and keep those with the QUANTUM_GATE_NAME gate
     still present, excluding the leader pod itself.
     """
@@ -114,31 +114,24 @@ def discover_gated_pods(namespace, group, exclude=""):
     return names
 
 
-def wait_for_gated_pods(namespace, group, expected, exclude="", timeout=120,
-                        interval=3):
+def wait_for_gated_pods(namespace, group, exclude="", timeout=120, interval=3):
     """
-    Wait until at least `expected` gated workers have been discovered in the
-    group, or `timeout` seconds elapse. The gang is submitted together, so all
-    workers appear quickly; the timeout is a backstop against a crashed/never-
-    admitted worker so the sidecar never hangs. Returns the discovered list
-    (which may be short of `expected` if the timeout fired).
+    Wait until at least one gated gang pod is discovered in the group (the gang
+    is created up front, so its pods appear quickly), then return all currently
+    gated pods. The timeout is a backstop so the submitter never hangs if the
+    gang never appears. Returns the discovered list (possibly empty on timeout).
     """
     deadline = time.time() + timeout
     found = []
     while time.time() < deadline:
         found = discover_gated_pods(namespace, group, exclude=exclude)
-        if expected and len(found) >= expected:
-            log(f"all {expected} gated worker(s) present")
+        if found:
             return found
-        if not expected:
-            # No expected count known — return whatever is present now.
-            return found
-        log(f"waiting for gated workers: {len(found)}/{expected}")
+        log("waiting for gated gang pods to appear")
         time.sleep(interval)
-    log(f"WARNING: timed out waiting for gated workers "
-        f"({len(found)}/{expected}); ungating what is present")
+    log("WARNING: timed out waiting for gated gang pods; none found")
     return found
 
 
 def namespace_from_env():
-    return os.environ.get("FLUENCE_NAMESPACE", "default")
+    return os.environ.get("FLUENCE_NAMESPACE", "default")
\ No newline at end of file
diff --git a/test/e2e/02-quantum-placement.sh b/test/e2e/02-quantum-placement.sh
deleted file mode 100644
index 17897a3..0000000
--- a/test/e2e/02-quantum-placement.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-# Quantum placement: a qpu pod is matched to a backend and the webhook injects QRMI_BACKEND.
-set -euo pipefail
-HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
-ANN="fluence.flux-framework.org/backend"
-
-log "TEST 2: quantum placement and backend handoff"
-kubectl apply -f examples/test/e2e/quantum-pod-mock.yaml
-
-wait_pod_phase sampler-mock Running 120 || fail "sampler-mock did not reach Running"
-
-# fluence must have stamped the chosen backend annotation.
-backend="$(kubectl get pod sampler-mock -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
-[ -n "$backend" ] || (show_webhook sampler-mock && fail "backend annotation ($ANN) was not set by fluence")
-log "fluence chose backend: $backend"
-
-# The webhook must have surfaced it as QRMI_BACKEND inside the container.
-out="$(kubectl logs sampler-mock || true)"
-echo "$out" | grep -q "BACKEND=${backend}" \
-  || (show_webhook sampler-mock && fail "QRMI_BACKEND in container ('$out') does not match annotation ($backend)")
-
-log "PASS: qpu pod scheduled, backend '$backend' chosen and injected as QRMI_BACKEND"
-kubectl delete -f examples/test/e2e/quantum-pod-mock.yaml --wait=false || true
diff --git a/test/e2e/03-restart-recovery.sh b/test/e2e/03-restart-recovery.sh
index 20c1be9..c26980f 100644
--- a/test/e2e/03-restart-recovery.sh
+++ b/test/e2e/03-restart-recovery.sh
@@ -9,7 +9,7 @@ ANN="fluence.flux-framework.org/backend"
 log "TEST 3: restart does not double-book an exclusive backend"
 
 # 1. Schedule the first qpu pod and capture its backend.
-kubectl apply -f examples/test/e2e/quantum-pod-mock.yaml
+kubectl apply -f examples/test/e2e/quantum/quantum-pod-mock.yaml
 wait_pod_phase sampler-mock "$NS" Running 120 || fail "sampler-mock did not reach Running"
 backend="$(kubectl get pod sampler-mock -n "$NS" -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
 [ -n "$backend" ] || fail "first pod has no backend annotation"
@@ -26,7 +26,7 @@ wait_pod_phase sampler-mock "$NS" Running 30 || fail "first pod not Running afte
 
 # 4. A second pod requesting the same exclusive qpu must NOT get the same backend.
 #    If recovery worked, the backend is occupied and the second pod stays Pending.
-kubectl apply -f examples/test/e2e/quantum-pod-mock-2.yaml
+kubectl apply -f examples/test/e2e/quantum/quantum-pod-mock-2.yaml
 if assert_stays_pending sampler-mock-2 "$NS" 45; then
   log "PASS: second qpu pod stayed Pending; backend '$backend' was not double-booked"
 else
@@ -38,5 +38,5 @@ else
   fi
 fi
 
-kubectl delete -f examples/test/e2e/quantum-pod-mock-2.yaml --wait=false || true
-kubectl delete -f examples/test/e2e/quantum-pod-mock.yaml --wait=false || true
+kubectl delete -f examples/test/e2e/quantum/quantum-pod-mock-2.yaml --wait=false || true
+kubectl delete -f examples/test/e2e/quantum/quantum-pod-mock.yaml --wait=false || true
diff --git a/test/e2e/04-sidecar-ungate.sh b/test/e2e/04-sidecar-ungate.sh
deleted file mode 100644
index 9ffefc8..0000000
--- a/test/e2e/04-sidecar-ungate.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env bash
-# Sidecar webhook test.
-#
-# Verifies that when a PodGroup of size > 1 with QPU resources is submitted:
-#   1. The webhook creates fluence-sidecar RBAC in the namespace automatically
-#   2. The leader pod gets the sidecar container injected
-#   3. The worker pod gets the quantum.braket/ready scheduling gate added
-#   4. The worker pod gets fluence-quantum-classical priority class set
-#
-# Does NOT test the sidecar itself (task discovery, interceptor,
-# queue position polling). Those require real AWS credentials and are covered
-# by sidecars/providers/braket/test/integration.sh which is run locally.
-set -euo pipefail
-HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
-
-log "TEST 4: sidecar webhook — RBAC creation, gate injection, sidecar injection"
-
-kubectl apply -f examples/test/e2e/sidecar-mock-pods.yaml
-
-# Give webhook time to process the leader pod admission
-sleep 3
-
-# Print webhook logs — always show these so we can see what happened
-log "--- webhook logs ---"
-kubectl logs -n kube-system deployment/fluence-webhook --tail=50 || true
-log "--- end webhook logs ---"
-
-# 1. Webhook should have created fluence-sidecar ServiceAccount
-log "checking webhook created fluence-sidecar ServiceAccount..."
-for i in $(seq 1 30); do
-  kubectl get serviceaccount fluence-sidecar -n default > /dev/null 2>&1 && break
-  sleep 2
-done
-kubectl get serviceaccount fluence-sidecar -n default \
-  || fail "webhook did not create fluence-sidecar ServiceAccount"
-log "  fluence-sidecar ServiceAccount created"
-
-# 2. Webhook should have created fluence-sidecar Role
-kubectl get role fluence-sidecar -n default \
-  || fail "webhook did not create fluence-sidecar Role"
-log "  fluence-sidecar Role created"
-
-# 3. Webhook should have created fluence-sidecar RoleBinding
-kubectl get rolebinding fluence-sidecar -n default \
-  || fail "webhook did not create fluence-sidecar RoleBinding"
-log "  fluence-sidecar RoleBinding created"
-
-# 4. Leader pod should have the fluence-stage init container injected (Model C:
-#    it stages the fluence Python package into a shared volume on PYTHONPATH).
-log "checking webhook injected the fluence-stage init container..."
-wait_pod_phase sidecar-test-leader Running 120 \
-  || { kubectl describe pod sidecar-test-leader; fail "sidecar-test-leader did not reach Running"; }
-initc=$(kubectl get pod sidecar-test-leader \
-  -o jsonpath='{.spec.initContainers[*].name}')
-echo "$initc" | grep -q "fluence-stage" \
-  || fail "fluence-stage init container not injected (initContainers: $initc)"
-log "  fluence-stage init container injected"
-
-# 5. Leader pod should have the sidecar container injected
-log "checking sidecar injected into leader pod..."
-containers=$(kubectl get pod sidecar-test-leader \
-  -o jsonpath='{.spec.containers[*].name}')
-echo "$containers" | grep -q "fluence-sidecar" \
-  || fail "fluence-sidecar container not injected into leader (containers: $containers)"
-log "  fluence-sidecar container injected into leader"
-
-# 6. Worker pod should have scheduling gate added by webhook
-gate=$(kubectl get pod sidecar-test-worker \
-  -o jsonpath='{.spec.schedulingGates[0].name}')
-[ "$gate" = "quantum.braket/ready" ] \
-  || fail "worker pod does not have quantum.braket/ready gate (got: $gate)"
-log "  quantum.braket/ready gate set on worker"
-
-# 7. Worker pod should have the fluence-quantum-classical priority class set by
-#    the webhook at admission (so it schedules reliably once ungated).
-pc=$(kubectl get pod sidecar-test-worker -o jsonpath='{.spec.priorityClassName}')
-[ "$pc" = "fluence-quantum-classical" ] \
-  || fail "worker pod missing fluence-quantum-classical priority class (got: $pc)"
-log "  fluence-quantum-classical priority class set on worker"
-
-log "PASS: webhook correctly created RBAC, injected sidecar, gated worker"
-log "NOTE: fluence-quantum-classical priority is set by the webhook at admission (immutable post-creation)"
-log "NOTE: braket sidecar integration test (SDK intercept, tag discovery,"
-log "      queue polling) is in sidecars/providers/braket/test/integration.sh"
-
-# Only clean up pods and PodGroup — RBAC is namespace infrastructure
-# that persists for future quantum workflows in this namespace
-kubectl delete -f examples/test/e2e/sidecar-mock-pods.yaml
diff --git a/test/e2e/01-classical-gang.sh b/test/e2e/gang/01-classical-gang.sh
old mode 100644
new mode 100755
similarity index 71%
rename from test/e2e/01-classical-gang.sh
rename to test/e2e/gang/01-classical-gang.sh
index d2018ac..1ebfc64
--- a/test/e2e/01-classical-gang.sh
+++ b/test/e2e/gang/01-classical-gang.sh
@@ -1,10 +1,10 @@
 #!/usr/bin/env bash
 # Classical gang scheduling: a PodGroup of 2 must be placed all-or-nothing on real nodes.
 set -euo pipefail
-HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
 
 log "TEST 1: classical gang scheduling"
-kubectl apply -f examples/single-podgroup.yaml
+kubectl apply -f examples/test/e2e/gang/single-podgroup.yaml
 
 # All pods in the 'training' deployment must reach Running (scheduled + started).
 # Wait for the pod to EXIST before waiting for Ready — kubectl wait errors out
@@ -25,5 +25,9 @@ count="$(kubectl get pods -l app=training --no-headers | wc -l | tr -d ' ')"
 [ "$count" = "1" ] || fail "expected 2 training pods, got $count"
 
 log "PASS: classical gang placed all $count pods via fluence"
-kubectl delete -f examples/single-podgroup.yaml --wait=false || true
+kubectl delete -f examples/test/e2e/gang/single-podgroup.yaml --wait=false || true
 kubectl patch podgroup training --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+# Wait for the pods to actually be gone before the next test runs — otherwise a
+# terminating 'training' pod (same name/labels reused by other scenarios) can be
+# misread as the next test's placement.
+kubectl wait --for=delete pod -l app=training --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/gang/02-postfilter-rematch.sh b/test/e2e/gang/02-postfilter-rematch.sh
new file mode 100755
index 0000000..f74c87b
--- /dev/null
+++ b/test/e2e/gang/02-postfilter-rematch.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+# PostFilter re-match: when another scheduler plugin (TaintToleration) rejects a
+# node Fluxion allocated, Fluence must abandon that allocation, exclude the node,
+# and re-match onto an untainted node. Safety: the gang's RUNNING pod must NEVER
+# bind to the tainted node.
+#
+# This test is self-isolating: it uses its own workload name (pf-rematch) and
+# labels, distinct from the other e2e scenarios, and ensures a clean slate first,
+# so a pod left over (terminating) from a previous test can never be mistaken for
+# this test's placement. It also ignores terminating pods when asserting.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+NAME=pf-rematch
+SEL="app=${NAME}"
+
+log "TEST 5: PostFilter abandons a taint-rejected allocation and re-matches"
+
+# --- clean slate: no leftover pods from earlier tests under our name ----------
+kubectl delete deployment "$NAME" --ignore-not-found >/dev/null 2>&1 || true
+kubectl delete podgroup "$NAME" --ignore-not-found >/dev/null 2>&1 || true
+kubectl patch podgroup "$NAME" --type=merge \
+  -p '{"metadata":{"finalizers":null}}' >/dev/null 2>&1 || true
+kubectl wait --for=delete pod -l "$SEL" --timeout=60s >/dev/null 2>&1 || true
+# Defensive: a prior test's workload left running would occupy the only
+# untainted worker and make this test fail with a (correct) fluxion
+# allocate -1 for lack of capacity. Ensure none lingers.
+kubectl delete deployment training --ignore-not-found --wait=false >/dev/null 2>&1 || true
+kubectl wait --for=delete pod -l app=training --timeout=60s >/dev/null 2>&1 || true
+
+TAINTED="$(kubectl get nodes -l '!node-role.kubernetes.io/control-plane' \
+  -o jsonpath='{.items[0].metadata.name}')"
+[ -n "$TAINTED" ] || fail "no worker node found to taint"
+log "tainting node $TAINTED with fluence-e2e=blocked:NoSchedule"
+kubectl taint nodes "$TAINTED" fluence-e2e=blocked:NoSchedule --overwrite
+
+cleanup() {
+  kubectl taint nodes "$TAINTED" fluence-e2e- 2>/dev/null || true
+  kubectl delete deployment "$NAME" --ignore-not-found --wait=false 2>/dev/null || true
+  kubectl delete podgroup "$NAME" --ignore-not-found --wait=false 2>/dev/null || true
+  kubectl patch podgroup "$NAME" --type=merge \
+    -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+}
+trap cleanup EXIT
+
+# --- our own workload (distinct name/labels; does NOT tolerate the taint) ------
+kubectl apply -f - <<YAML
+apiVersion: scheduling.k8s.io/v1alpha2
+kind: PodGroup
+metadata:
+  name: ${NAME}
+spec:
+  schedulingPolicy:
+    gang:
+      minCount: 1
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ${NAME}
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: ${NAME}}
+  template:
+    metadata:
+      labels: {app: ${NAME}}
+    spec:
+      schedulerName: fluence
+      schedulingGroup:
+        podGroupName: ${NAME}
+      containers:
+        - name: worker
+          image: busybox
+          command: ["sleep", "3600"]
+          resources:
+            requests:
+              cpu: "1"
+YAML
+
+log "waiting for the gang to schedule (must avoid the tainted node)"
+wait_pods_ready "$SEL" 1 180 \
+  || fail "gang never became Ready — PostFilter re-match did not recover (likely stuck on the taint-rejected allocation)"
+
+# SAFETY: among NON-terminating (Running, no deletionTimestamp) pods, none may be
+# on the tainted node. Terminating leftovers are ignored by construction (we use
+# a unique name and cleaned the slate), but we still filter defensively.
+checked=0
+while read -r name node deleted; do
+  [ -z "$name" ] && continue
+  # custom-columns prints "<none>" for empty fields, so an empty deletionTimestamp
+  # shows as "<none>", NOT "". Treat "<none>" as empty for both columns.
+  if [ "$deleted" != "<none>" ] && [ -n "$deleted" ]; then continue; fi   # skip terminating
+  if [ "$node" = "<none>" ] || [ -z "$node" ]; then continue; fi          # skip not-yet-bound
+  checked=$((checked+1))
+  if [ "$node" = "$TAINTED" ]; then
+    fail "SAFETY VIOLATION: running pod $name is bound to the tainted node $TAINTED"
+  fi
+  log "$name correctly placed on $node (not the tainted $TAINTED)"
+done < <(kubectl get pods -l "$SEL" \
+  -o custom-columns='N:.metadata.name,NODE:.spec.nodeName,DEL:.metadata.deletionTimestamp' \
+  --no-headers)
+
+[ "$checked" -ge 1 ] || fail "no running ${NAME} pod found to check"
+
+# Informational: did PostFilter actually fire (Fluxion picked the tainted node
+# first and we re-matched), or did Fluxion place on the good node directly?
+POD="$(kubectl -n kube-system get pods -l app=fluence \
+  -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
+if [ -n "$POD" ] && kubectl -n kube-system logs "$POD" 2>/dev/null \
+     | grep -q "unschedulable: abandoning allocation"; then
+  log "observed PostFilter abandonment in scheduler log (re-match path exercised)"
+else
+  log "note: Fluxion placed on the untainted node directly this run (PostFilter not needed)"
+fi
+
+log "PASS: gang scheduled on an untainted node; no running pod on the tainted node"
diff --git a/test/e2e/gang/03-multi-gang.sh b/test/e2e/gang/03-multi-gang.sh
new file mode 100755
index 0000000..9f01ae5
--- /dev/null
+++ b/test/e2e/gang/03-multi-gang.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# Multi-pod gang scheduling on real nodes. Guards the two failures that the
+# single-pod 01 test could NOT catch (and that shipped a minCount=1 bug):
+#   A) a multi-pod gang must place ALL of them (minCount must equal the gang size, not 1)
+#   B) under contention, a gang that cannot fully fit stays ENTIRELY pending —
+#      never partially placed (no stranded pods holding nodes).
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+# ---- A) all-or-nothing placement of a 3-pod gang -------------------------------
+log "TEST 6A: multi-pod gang (2) places all-or-nothing"
+kubectl apply -f examples/test/e2e/gang/multi-gang.yaml
+
+# the webhook must have created the PodGroup with minCount = 2 (the bug set it to 1)
+log "checking PodGroup minCount == 2 (set by webhook from group-size)"
+for i in $(seq 1 30); do
+  mc="$(kubectl get podgroup gang3 -o jsonpath='{.spec.schedulingPolicy.gang.minCount}' 2>/dev/null || true)"
+  [ -n "$mc" ] && break; sleep 2
+done
+[ "$mc" = "2" ] || fail "PodGroup gang3 minCount=$mc, want 2 (minCount=1 bug -> partial gangs)"
+
+log "waiting for all 2 gang pods to be Ready"
+wait_pods_ready "app=gang3" 2 180 || fail "gang3 did not place all 2 pods (gang scheduling failed)"
+
+count="$(kubectl get pods -l app=gang3 --field-selector=status.phase=Running --no-headers | wc -l | tr -d ' ')"
+[ "$count" = "2" ] || fail "expected 2 Running gang3 pods, got $count (partial placement)"
+for p in $(kubectl get pods -l app=gang3 -o name); do
+  pod="${p#pod/}"
+  sched="$(kubectl get pod "$pod" -o jsonpath='{.spec.schedulerName}')"
+  [ "$sched" = "fluence" ] || fail "$pod not scheduled by fluence (got: $sched)"
+done
+log "PASS 6A: 2-pod gang placed atomically by fluence (minCount=2)"
+
+kubectl delete -f examples/test/e2e/gang/multi-gang.yaml --wait=false || true
+kubectl patch podgroup gang3 --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+kubectl wait --for=delete pod -l app=gang3 --timeout=60s 2>/dev/null || true
+
+# ---- B) contention: the gang that can't fully fit stays ENTIRELY pending --------
+log "TEST 6B: contention — a gang that cannot fully fit must NOT partially place"
+kubectl apply -f examples/test/e2e/gang/multi-gang-contention.yaml
+
+# wait until the cluster settles. Three possible outcomes:
+#   - one gang fully Running, other fully Pending  -> contention; assert no partial
+#   - BOTH fully Running                            -> runner big enough, no contention to test (skip)
+#   - any partial (1 of 2 in a gang scheduled)      -> the bug, fail
+log "waiting for gangs to settle"
+winner=""; loser=""; both=""
+for i in $(seq 1 90); do
+  ra="$(kubectl get pods -l app=gang-a --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l | tr -d ' ')"
+  rb="$(kubectl get pods -l app=gang-b --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l | tr -d ' ')"
+  if [ "$ra" = "2" ] && [ "$rb" = "2" ]; then both=1; break; fi
+  if [ "$ra" = "2" ] && [ "$rb" = "0" ]; then winner=gang-a; loser=gang-b; break; fi
+  if [ "$rb" = "2" ] && [ "$ra" = "0" ]; then winner=gang-b; loser=gang-a; break; fi
+  sleep 2
+done
+
+if [ -n "$both" ]; then
+  log "SKIP 6B: cluster placed both gangs (>=4 schedulable cores) — no contention on this runner"
+else
+  [ -n "$winner" ] || fail "no clean settle: gang-a=$ra gang-b=$rb running (possible PARTIAL placement)"
+  log "winner=$winner (2 running), loser=$loser (expected 0 running)"
+  # the loser must have ZERO pods scheduled to a node — the all-or-nothing guarantee.
+  # A single scheduled loser pod = partial placement = the bug.
+  scheduled_loser="$(kubectl get pods -l app=$loser -o jsonpath='{range .items[*]}{.spec.nodeName}{"\n"}{end}' | grep -c . || true)"
+  [ "$scheduled_loser" = "0" ] || fail "$loser has $scheduled_loser pod(s) on a node — PARTIAL placement (gang violated)"
+  log "PASS 6B: $loser stayed entirely pending — no partial placement under contention"
+fi
+
+kubectl delete -f examples/test/e2e/gang/multi-gang-contention.yaml --wait=false || true
+for g in gang-a gang-b; do
+  kubectl patch podgroup $g --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l app=gang-a --timeout=60s 2>/dev/null || true
+kubectl wait --for=delete pod -l app=gang-b --timeout=60s 2>/dev/null || true
+log "PASS: multi-gang all-or-nothing verified"
diff --git a/test/e2e/gang/04-requeue-on-capacity.sh b/test/e2e/gang/04-requeue-on-capacity.sh
new file mode 100755
index 0000000..f41aa71
--- /dev/null
+++ b/test/e2e/gang/04-requeue-on-capacity.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# Requeue-on-capacity + gang atomicity under contention.
+#
+# Two 2-pod gangs contend for a cluster that can only run one at a time. This
+# guards two invariants that the GKE contention runs exposed:
+#   1. ALL-OR-NOTHING: each gang places ALL its pods or NONE — never a partial
+#      (e.g. 1-of-2 scheduled). The winner must be a clean 2/2; the loser a clean
+#      0/2 while it waits.
+#   2. REQUEUE: when the winner completes and frees its nodes, the loser is
+#      re-attempted on its own (no manual nudge) and then ALSO places atomically
+#      (2/2), driven by the shortened --pod-max-in-unschedulable-pods-duration.
+#
+# SCOPE / LIMITATION: this is a 3-node kind cluster with small (1-core) pods. It
+# verifies the INVARIANTS on a minimal contention case. It does NOT reproduce the
+# GKE-scale dynamics where the bug was first seen — one-pod-per-node (~80-core)
+# saturation and ~20 simultaneous mixed-size gangs draining in sequence. That
+# scale behavior is validated on the real cluster, not in CI; a pass here means
+# the invariants hold on the simple case, not that large-scale draining is proven.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+# running-pod count for a gang (job-name label set by the Job controller)
+running() { kubectl get pods -l job-name="$1" --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l | tr -d ' '; }
+# count of a gang's pods actually bound to a node (Running OR already Succeeded)
+on_nodes() { kubectl get pods -l job-name="$1" -o jsonpath='{range .items[*]}{.spec.nodeName}{"\n"}{end}' 2>/dev/null | grep -c . || true; }
+
+log "TEST 9: contended gangs stay all-or-nothing, loser requeues when capacity frees"
+kubectl apply -f examples/test/e2e/gang/multi-gang-requeue.yaml
+
+# ---- 1. one gang wins CLEANLY (2/2); the other places NOTHING (0/2) ------------
+log "waiting for a clean 2/0 split (one whole gang runs, the other entirely waits)"
+winner=""; loser=""
+for i in $(seq 1 60); do
+  rw="$(running gang-win)"; ra="$(running gang-wait)"
+  if [ "$rw" = "2" ] && [ "$ra" = "0" ]; then winner=gang-win;  loser=gang-wait; break; fi
+  if [ "$ra" = "2" ] && [ "$rw" = "0" ]; then winner=gang-wait; loser=gang-win;  break; fi
+  # a 1/x or x/1 state that persists is a PARTIAL gang — fail fast on it
+  if [ "$rw" = "1" ] || [ "$ra" = "1" ]; then
+    sleep 6  # allow a transient mid-bind moment to resolve
+    rw="$(running gang-win)"; ra="$(running gang-wait)"
+    { [ "$rw" = "1" ] || [ "$ra" = "1" ]; } && \
+      fail "PARTIAL gang: gang-win=$rw gang-wait=$ra running (all-or-nothing violated)"
+  fi
+  sleep 2
+done
+[ -n "$winner" ] || fail "no clean 2/0 split (gang-win=$(running gang-win) gang-wait=$(running gang-wait))"
+log "  winner=$winner (2/2 running), loser=$loser"
+
+# loser must have ZERO pods on any node — not even one (that would be a partial)
+sl="$(on_nodes "$loser")"
+[ "$sl" = "0" ] || fail "$loser has $sl pod(s) bound while it should be entirely pending — PARTIAL placement"
+log "  $loser entirely pending (0 pods bound) — all-or-nothing holds"
+
+# ---- 2. winner completes -> loser is requeued AND places atomically ------------
+log "waiting for winner=$winner to complete and free its nodes"
+kubectl wait --for=condition=complete job/$winner --timeout=120s || fail "$winner did not complete"
+log "  $winner completed; capacity freed"
+
+# The loser must now place ALL its pods (2/2), on its own, within a window above
+# the 30s recheck flush but below the 5m default — proving the shortened timeout
+# is in effect AND that the requeued gang is still atomic (not a partial).
+log "asserting $loser requeues and places ATOMICALLY (2/2) within ~75s"
+ok=""
+for i in $(seq 1 38); do   # ~75s
+  rl="$(running $loser)"
+  dl="$(kubectl get pods -l job-name=$loser --field-selector=status.phase=Succeeded --no-headers 2>/dev/null | wc -l | tr -d ' ')"
+  # both pods accounted for (running and/or already completed) = atomic placement
+  [ "$((rl + dl))" = "2" ] && { ok=1; break; }
+  # a lone 1/2 that lingers = partial placement of the requeued gang
+  if [ "$((rl + dl))" = "1" ]; then
+    sleep 6
+    rl="$(running $loser)"; dl="$(kubectl get pods -l job-name=$loser --field-selector=status.phase=Succeeded --no-headers 2>/dev/null | wc -l | tr -d ' ')"
+    [ "$((rl + dl))" = "1" ] && fail "$loser placed 1 of 2 pods — PARTIAL placement of the requeued gang"
+  fi
+  sleep 2
+done
+[ -n "$ok" ] || fail "$loser did NOT place both pods within 75s of capacity freeing — \
+either the shortened --pod-max-in-unschedulable-pods-duration is not taking effect \
+(gang stuck) or the requeued gang did not assemble"
+log "PASS 9: $loser requeued and placed atomically (2/2) after $winner freed capacity"
+
+kubectl delete -f examples/test/e2e/gang/multi-gang-requeue.yaml --wait=false || true
+for g in gang-win gang-wait; do
+  kubectl patch podgroup $g --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l job-name=gang-win  --timeout=60s 2>/dev/null || true
+kubectl wait --for=delete pod -l job-name=gang-wait --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/lib.sh b/test/e2e/lib.sh
index cad6a2e..13390c9 100644
--- a/test/e2e/lib.sh
+++ b/test/e2e/lib.sh
@@ -44,7 +44,7 @@ wait_fluence_ready() {
 
 show_webhook() {
   pod=$1
-  echo "FAIL: QRMI_BACKEND mismatch"
+  echo "FAIL: FLUXION_BACKEND mismatch"
   kubectl get pod $pod -o jsonpath='{.spec.containers[0].env}'; echo
   kubectl get pod $pod -o jsonpath='{.metadata.annotations}'; echo
   kubectl -n kube-system logs deploy/fluence-webhook --tail=50
diff --git a/test/e2e/quantum/01-quantum-placement.sh b/test/e2e/quantum/01-quantum-placement.sh
new file mode 100755
index 0000000..8f5c475
--- /dev/null
+++ b/test/e2e/quantum/01-quantum-placement.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# Quantum placement: a qpu pod is matched to a backend and the webhook injects FLUXION_BACKEND.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+ANN="fluence.flux-framework.org/backend"
+
+log "TEST 2: quantum placement and backend handoff"
+kubectl apply -f examples/test/e2e/quantum/quantum-pod-mock.yaml
+
+wait_pod_phase sampler-mock Running 120 || fail "sampler-mock did not reach Running"
+
+# fluence must have stamped the chosen backend annotation.
+backend="$(kubectl get pod sampler-mock -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
+[ -n "$backend" ] || (show_webhook sampler-mock && fail "backend annotation ($ANN) was not set by fluence")
+log "fluence chose backend: $backend"
+
+# The webhook must have surfaced it as FLUXION_BACKEND inside the container.
+out="$(kubectl logs sampler-mock || true)"
+if ! echo "$out" | grep -q "BACKEND=${backend}"; then
+  # Diagnostic (CI has no interactive shell): show whether the env var is ABSENT
+  # (not injected -> webhook issue) or PRESENT-BUT-EMPTY (annotation not resolved
+  # at container start -> delivery/timing issue), and what the container actually got.
+  log "--- diagnostic: container env spec ---"
+  kubectl get pod sampler-mock -o jsonpath='{.spec.containers[0].env}' ; echo
+  log "--- diagnostic: live value via exec ---"
+  kubectl exec sampler-mock -- sh -c 'echo "FLUXION_BACKEND=[$FLUXION_BACKEND]"' 2>&1 || true
+  log "--- diagnostic: backend annotation on pod ---"
+  kubectl get pod sampler-mock -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" ; echo
+  show_webhook sampler-mock
+  fail "FLUXION_BACKEND in container ('$out') does not match annotation ($backend)"
+fi
+
+log "PASS: qpu pod scheduled, backend '$backend' chosen and injected as FLUXION_BACKEND"
+kubectl delete -f examples/test/e2e/quantum/quantum-pod-mock.yaml --wait=false || true
diff --git a/test/e2e/quantum/02-sidecar-ungate.sh b/test/e2e/quantum/02-sidecar-ungate.sh
new file mode 100755
index 0000000..88f047b
--- /dev/null
+++ b/test/e2e/quantum/02-sidecar-ungate.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# Gang + submitter webhook test (no leader/worker).
+#
+# When a quantum workload (a gang of N pods all requesting QPU, no roles) is
+# submitted, the webhook must:
+#   1. create the fluence-sidecar RBAC in the namespace automatically
+#   2. gate every gang pod with quantum.braket/ready
+#   3. raise every gang pod to the fluence-quantum-classical priority class
+#   4. ADDITIONALLY create the one-off submitter pod <group>-submitter
+#   5. inject the fluence-stage init container + the sidecar container into the
+#      submitter (Model C staging + the real coordinator)
+#
+# Does NOT test the sidecar runtime (task discovery, interceptor, queue polling)
+# — that needs real AWS creds (sidecars/providers/braket/test/integration.sh).
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+GROUP=qgang
+SUBMITTER=${GROUP}-submitter
+
+log "TEST 4: gang+submitter webhook — RBAC, gating, priority, submitter creation"
+kubectl apply -f examples/test/e2e/quantum/quantum-gang-pods.yaml
+sleep 3
+
+log "--- webhook logs ---"
+kubectl logs -n kube-system deployment/fluence-webhook --tail=50 || true
+log "--- end webhook logs ---"
+
+# 1. RBAC created by the webhook (idempotent, per-namespace).
+log "checking webhook created fluence-sidecar RBAC..."
+for i in $(seq 1 30); do
+  kubectl get serviceaccount fluence-sidecar -n default >/dev/null 2>&1 && break
+  sleep 2
+done
+kubectl get serviceaccount fluence-sidecar -n default || fail "no fluence-sidecar ServiceAccount"
+kubectl get role            fluence-sidecar -n default || fail "no fluence-sidecar Role"
+kubectl get rolebinding     fluence-sidecar -n default || fail "no fluence-sidecar RoleBinding"
+log "  RBAC present"
+
+# 2 + 3. Every gang pod is gated and at the preempting priority class.
+for p in ${GROUP}-0 ${GROUP}-1; do
+  gate="$(kubectl get pod "$p" -o jsonpath='{.spec.schedulingGates[0].name}' 2>/dev/null || true)"
+  [ "$gate" = "quantum.braket/ready" ] || fail "$p not gated (gate=$gate)"
+  pc="$(kubectl get pod "$p" -o jsonpath='{.spec.priorityClassName}' 2>/dev/null || true)"
+  [ "$pc" = "fluence-quantum-classical" ] || fail "$p priorityClass=$pc, want fluence-quantum-classical"
+done
+log "  gang pods gated + fluence-quantum-classical priority"
+
+# 4. Fluence created the submitter pod.
+log "checking webhook created the submitter pod $SUBMITTER..."
+for i in $(seq 1 30); do
+  kubectl get pod "$SUBMITTER" -n default >/dev/null 2>&1 && break
+  sleep 2
+done
+kubectl get pod "$SUBMITTER" -n default || fail "webhook did not create submitter pod $SUBMITTER"
+sub_marker="$(kubectl get pod "$SUBMITTER" -o jsonpath='{.metadata.annotations.fluence\.flux-framework\.org/submitter}' 2>/dev/null || true)"
+[ "$sub_marker" = "true" ] || fail "submitter missing the submitter marker"
+log "  submitter pod created"
+
+# 5. Submitter has the staging init container + the sidecar container, and is NOT gated.
+wait_pod_phase "$SUBMITTER" Running 120 \
+  || { kubectl describe pod "$SUBMITTER"; fail "$SUBMITTER did not reach Running"; }
+initc="$(kubectl get pod "$SUBMITTER" -o jsonpath='{.spec.initContainers[*].name}')"
+echo "$initc" | grep -q fluence-stage || fail "fluence-stage init container not injected (init: $initc)"
+conts="$(kubectl get pod "$SUBMITTER" -o jsonpath='{.spec.containers[*].name}')"
+echo "$conts" | grep -q fluence-sidecar || fail "fluence-sidecar container not injected (containers: $conts)"
+sgate="$(kubectl get pod "$SUBMITTER" -o jsonpath='{.spec.schedulingGates[0].name}' 2>/dev/null || true)"
+[ -z "$sgate" ] || fail "submitter must NOT be gated (gate=$sgate)"
+log "  submitter has fluence-stage + fluence-sidecar, not gated"
+
+log "PASS: webhook gated the gang, set priority, created RBAC + the submitter"
+log "NOTE: priority is set at admission (immutable post-creation)"
+log "NOTE: braket sidecar runtime (SDK intercept, tag discovery, queue polling)"
+log "      is in sidecars/providers/braket/test/integration.sh"
+
+# Clean up pods + PodGroups; RBAC is namespace infra and persists.
+kubectl delete -f examples/test/e2e/quantum/quantum-gang-pods.yaml --wait=false || true
+kubectl delete pod "$SUBMITTER" --wait=false 2>/dev/null || true
+for g in "$GROUP" "$SUBMITTER"; do
+  kubectl patch podgroup "$g" --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l app="$GROUP" --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/quantum/03-gang-submitter.sh b/test/e2e/quantum/03-gang-submitter.sh
new file mode 100644
index 0000000..46905ca
--- /dev/null
+++ b/test/e2e/quantum/03-gang-submitter.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Gang + submitter structure (replaces the old leader/worker split).
+#
+# The structural guarantee the ungate path depends on: a quantum gang of size N
+# is ONE fully-gated PodGroup <group> (minCount N), and Fluence creates a
+# SEPARATE submitter pod in its OWN group-of-one <group>-submitter (minCount 1,
+# not gated) that does the real submit and ungates the gang. There is no
+# <group>-workers subgroup and no leader among the user's pods. (The runtime
+# ungate is covered by the braket integration test; here we prove the shape.)
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+GROUP=qgang
+SUBMITTER=${GROUP}-submitter
+
+log "TEST 7: gang(N, gated) + separate submitter(1) structure"
+kubectl apply -f examples/test/e2e/quantum/quantum-gang-pods.yaml
+
+# Gang PodGroup <group> exists with minCount N=2 (full gang, no split).
+log "checking gang group '$GROUP' minCount == 2 (full N)"
+for i in $(seq 1 30); do
+  gc="$(kubectl get podgroup "$GROUP" -o jsonpath='{.spec.schedulingPolicy.gang.minCount}' 2>/dev/null || true)"
+  [ -n "$gc" ] && break; sleep 2
+done
+[ "$gc" = "2" ] || fail "gang group $GROUP minCount=$gc, want 2 (full N)"
+
+# There must be NO <group>-workers subgroup (the old split is gone).
+if kubectl get podgroup "${GROUP}-workers" >/dev/null 2>&1; then
+  fail "found ${GROUP}-workers PodGroup — the obsolete leader/worker split must not exist"
+fi
+log "  gang group minCount=2, no -workers subgroup"
+
+# Submitter PodGroup <group>-submitter exists with minCount 1 (schedules alone).
+log "checking submitter group '$SUBMITTER' minCount == 1"
+for i in $(seq 1 30); do
+  sc="$(kubectl get podgroup "$SUBMITTER" -o jsonpath='{.spec.schedulingPolicy.gang.minCount}' 2>/dev/null || true)"
+  [ -n "$sc" ] && break; sleep 2
+done
+[ "$sc" = "1" ] || fail "submitter group $SUBMITTER minCount=$sc, want 1"
+
+# Submitter pod records the gang group it ungates, and is its own group.
+gg="$(kubectl get pod "$SUBMITTER" -o jsonpath='{.metadata.annotations.fluence\.flux-framework\.org/gang-group}' 2>/dev/null || true)"
+[ "$gg" = "$GROUP" ] || fail "submitter gang-group annotation=$gg, want $GROUP"
+sl="$(kubectl get pod "$SUBMITTER" -o jsonpath='{.metadata.labels.fluence\.flux-framework\.org/group}' 2>/dev/null || true)"
+[ "$sl" = "$SUBMITTER" ] || fail "submitter group label=$sl, want $SUBMITTER"
+log "  submitter group minCount=1, ungates gang '$GROUP'"
+
+# Gang pods stay in <group> (NOT relinked) and are gated.
+for p in ${GROUP}-0 ${GROUP}-1; do
+  g="$(kubectl get pod "$p" -o jsonpath='{.metadata.labels.fluence\.flux-framework\.org/group}' 2>/dev/null || true)"
+  [ "$g" = "$GROUP" ] || fail "$p group label=$g, want $GROUP (gang pods must not be relinked)"
+  gate="$(kubectl get pod "$p" -o jsonpath='{.spec.schedulingGates[0].name}' 2>/dev/null || true)"
+  [ "$gate" = "quantum.braket/ready" ] || fail "$p not gated (gate=$gate)"
+done
+log "  gang pods remain in '$GROUP' and are gated"
+
+log "PASS 7: gang(N=2, gated) + submitter(1, ungates gang), no leader/worker split"
+kubectl delete -f examples/test/e2e/quantum/quantum-gang-pods.yaml --wait=false || true
+kubectl delete pod "$SUBMITTER" --wait=false 2>/dev/null || true
+for g in "$GROUP" "$SUBMITTER"; do
+  kubectl patch podgroup "$g" --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l app="$GROUP" --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/quantum/04-gang-env-contract.sh b/test/e2e/quantum/04-gang-env-contract.sh
new file mode 100755
index 0000000..19f2439
--- /dev/null
+++ b/test/e2e/quantum/04-gang-env-contract.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Env-contract e2e (gang + submitter): verify the webhook injects, at admission,
+# the env the runtime depends on — IN-CLUSTER, on the real pod specs, with no
+# Braket/AWS and WITHOUT requiring scheduling. Guards the seam that, if broken,
+# makes a gang schedule then hang or double-submit.
+#
+# Spec layer only (these are downward-API valueFrom refs whose VALUES resolve at
+# placement, but whose PRESENCE is deterministic at admission), so no scheduling,
+# no qpu capacity, no logs — it cannot flake on capacity. Contract:
+#   gang pod (faux):  FLUENCE_FAUX_SUBMIT, FLUENCE_QUANTUM_JOB_ID, PYTHONPATH, FLUXION_BACKEND
+#   submitter:        FLUENCE_GANG_GROUP on the sidecar (real submit, ungates the gang)
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+GROUP=qgang
+SUBMITTER=${GROUP}-submitter
+
+log "TEST 8: gang+submitter env contract — spec layer"
+kubectl apply -f examples/test/e2e/quantum/quantum-gang-pods.yaml
+
+# does container $2 of pod $1 have an env entry named $3 ? (spec-level only)
+has_env() {
+  kubectl get pod "$1" -o jsonpath="{.spec.containers[?(@.name=='$2')].env[*].name}" \
+    2>/dev/null | tr ' ' '\n' | grep -qx "$3"
+}
+
+log "checking the webhook wired the faux contract onto a gang pod"
+for i in $(seq 1 15); do has_env ${GROUP}-0 app FLUENCE_FAUX_SUBMIT && break; sleep 2; done
+for v in FLUENCE_FAUX_SUBMIT FLUENCE_QUANTUM_JOB_ID PYTHONPATH FLUXION_BACKEND; do
+  has_env ${GROUP}-0 app "$v" \
+    || { kubectl get pod ${GROUP}-0 -o yaml | sed -n '/containers:/,/status:/p'; \
+         fail "gang pod 'app' container missing env '$v'"; }
+  log "  gang pod has env: $v"
+done
+
+# The submitter's sidecar must know which gang to ungate.
+log "checking the submitter sidecar has FLUENCE_GANG_GROUP=$GROUP"
+for i in $(seq 1 30); do kubectl get pod "$SUBMITTER" >/dev/null 2>&1 && break; sleep 2; done
+gg="$(kubectl get pod "$SUBMITTER" \
+  -o jsonpath="{.spec.containers[?(@.name=='fluence-sidecar')].env[?(@.name=='FLUENCE_GANG_GROUP')].value}" \
+  2>/dev/null || true)"
+[ "$gg" = "$GROUP" ] || fail "submitter sidecar FLUENCE_GANG_GROUP=$gg, want $GROUP"
+log "  submitter sidecar has FLUENCE_GANG_GROUP=$gg"
+
+# And the submitter must NOT be in faux mode (it does the real submit).
+if has_env "$SUBMITTER" app FLUENCE_FAUX_SUBMIT; then
+  fail "submitter must NOT carry FLUENCE_FAUX_SUBMIT (it submits for real)"
+fi
+log "  submitter is not faux"
+
+log "PASS 8: webhook injects the gang(faux) + submitter(real) env contract at admission"
+
+kubectl delete -f examples/test/e2e/quantum/quantum-gang-pods.yaml --wait=false || true
+kubectl delete pod "$SUBMITTER" --wait=false 2>/dev/null || true
+for g in "$GROUP" "$SUBMITTER"; do
+  kubectl patch podgroup "$g" --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l app="$GROUP" --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/quantum/setup.sh b/test/e2e/quantum/setup.sh
new file mode 100644
index 0000000..cf35020
--- /dev/null
+++ b/test/e2e/quantum/setup.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Quantum suite setup (run by the e2e-suite workflow before the NN-*.sh tests).
+#
+# Installs the qpu add-on so nodes advertise fluxion.flux-framework.org/qpu —
+# without it every quantum pod stays Pending (fluence matches in its own graph,
+# but the default NodeResourcesFit plugin rejects each node because the extended
+# resource is not in allocatable, so the match is rolled back). The base deploy
+# (deploy/fluence-test.yaml) does NOT include this; it is quantum-only.
+#
+# Also points the webhook-injected sidecar/stage image at the CI-loaded image:
+# the default sidecar image (ghcr.io/.../fluence-sidecar:latest) is not loaded in
+# kind, so the submitter's containers could not pull. The fluence-stage init is
+# fail-soft (no python in this image -> it logs and exits 0), which is fine for
+# the structural assertions; the submitter still schedules and runs.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+IMAGE="${IMAGE:-vanessa/fluence:test}"
+
+log "quantum setup: installing the qpu add-on (resources ConfigMap + device plugin)"
+kubectl apply -f deploy/fluence-resources-test.yaml
+
+# Run the device plugin from the CI-loaded image (its manifest ships a registry
+# image that kind has not pulled). Container name is 'deviceplugin'.
+kubectl -n kube-system set image daemonset/fluence-deviceplugin deviceplugin="$IMAGE"
+kubectl -n kube-system patch daemonset/fluence-deviceplugin --type=json \
+  -p '[{"op":"replace","path":"/spec/template/spec/containers/0/imagePullPolicy","value":"IfNotPresent"}]' \
+  2>/dev/null || true
+
+# Injected sidecar + stage init must use a present image too (see header).
+kubectl -n kube-system set env deployment/fluence-webhook FLUENCE_SIDECAR_IMAGE="$IMAGE"
+kubectl -n kube-system rollout status deployment/fluence-webhook --timeout=180s
+
+# Scheduler re-reads the resources config now that the ConfigMap exists.
+kubectl -n kube-system rollout restart deployment/fluence
+kubectl -n kube-system rollout status  deployment/fluence --timeout=180s
+
+log "waiting for the device plugin DaemonSet to be Ready"
+kubectl -n kube-system rollout status daemonset/fluence-deviceplugin --timeout=180s
+
+# Block until at least one node advertises the qpu extended resource, so the
+# tests do not race the kubelet's device registration.
+log "waiting for nodes to advertise fluxion.flux-framework.org/qpu"
+ok=0
+for i in $(seq 1 60); do
+  if kubectl get nodes -o jsonpath='{.items[*].status.allocatable}' 2>/dev/null \
+       | grep -q 'fluxion.flux-framework.org/qpu'; then
+    ok=1; break
+  fi
+  sleep 3
+done
+[ "$ok" = 1 ] || fail "no node advertised fluxion.flux-framework.org/qpu after the add-on (device plugin not registering)"
+log "qpu advertised on at least one node"
+
+log "quantum setup complete: qpu add-on installed, scheduler restarted, sidecar image=$IMAGE"
\ No newline at end of file