diff --git a/cmd/atecontroller/internal/controllers/workerpool_apply.go b/cmd/atecontroller/internal/controllers/workerpool_apply.go index e9ce1f7a..9ce6a9c4 100644 --- a/cmd/atecontroller/internal/controllers/workerpool_apply.go +++ b/cmd/atecontroller/internal/controllers/workerpool_apply.go @@ -59,7 +59,7 @@ func buildDeploymentApplyConfig(wp *atev1alpha1.WorkerPool) *appsv1ac.Deployment WithPath(ateompath.BasePath). WithType(corev1.HostPathDirectoryOrCreate))) - applyWorkerPoolPodTemplate(podSpecAC, wp.Spec.Template) + applyWorkerPoolPodTemplate(podSpecAC, containerAC, wp.Spec.Template) podSpecAC.WithContainers(containerAC) return appsv1ac.Deployment(deploymentName(wp.Name), wp.Namespace). @@ -83,12 +83,15 @@ func buildDeploymentApplyConfig(wp *atev1alpha1.WorkerPool) *appsv1ac.Deployment func applyWorkerPoolPodTemplate( podSpecAC *corev1ac.PodSpecApplyConfiguration, + containerAC *corev1ac.ContainerApplyConfiguration, tmpl *atev1alpha1.WorkerPoolPodTemplate, ) { podSpecAC.NodeSelector = map[string]string{} podSpecAC.Tolerations = []corev1ac.TolerationApplyConfiguration{} podSpecAC.WithPriorityClassName("") podSpecAC.WithAffinity(corev1ac.Affinity()) + resourcesAC := corev1ac.ResourceRequirements() + containerAC.WithResources(resourcesAC) if tmpl == nil { return @@ -103,6 +106,15 @@ func applyWorkerPoolPodTemplate( if tmpl.NodeAffinity != nil { podSpecAC.WithAffinity(corev1ac.Affinity().WithNodeAffinity(nodeAffinityToApply(tmpl.NodeAffinity))) } + + if tmpl.Resources != nil { + if tmpl.Resources.Requests != nil { + resourcesAC.WithRequests(tmpl.Resources.Requests) + } + if tmpl.Resources.Limits != nil { + resourcesAC.WithLimits(tmpl.Resources.Limits) + } + } } func tolerationApplyValues(tolerations []*corev1ac.TolerationApplyConfiguration) []corev1ac.TolerationApplyConfiguration { diff --git a/cmd/atecontroller/internal/controllers/workerpool_apply_test.go b/cmd/atecontroller/internal/controllers/workerpool_apply_test.go index 5d1ccda8..7e8de95d 100644 --- a/cmd/atecontroller/internal/controllers/workerpool_apply_test.go +++ b/cmd/atecontroller/internal/controllers/workerpool_apply_test.go @@ -19,6 +19,7 @@ import ( "github.com/google/go-cmp/cmp" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" appsv1ac "k8s.io/client-go/applyconfigurations/apps/v1" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" @@ -131,6 +132,32 @@ func TestBuildDeploymentApplyConfig(t *testing.T) { podSpecAC.WithPriorityClassName("interactive-workerpool") }), }, + { + name: "with resources", + wp: testWorkerPoolApplyConfig(&atev1alpha1.WorkerPoolPodTemplate{ + Resources: &corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }), + want: expectedDeploymentApplyConfig(func(podSpecAC *corev1ac.PodSpecApplyConfiguration) { + podSpecAC.Containers[0].WithResources(corev1ac.ResourceRequirements(). + WithRequests(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }). + WithLimits(corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + })) + }), + }, { name: "with combined scheduling fields", wp: testWorkerPoolApplyConfig(&atev1alpha1.WorkerPoolPodTemplate{ @@ -220,7 +247,8 @@ func expectedDeploymentApplyConfig(mutatePodSpec func(*corev1ac.PodSpecApplyConf WithFieldPath("metadata.uid")))). WithVolumeMounts(corev1ac.VolumeMount(). WithName("run-ateom"). - WithMountPath(ateompath.BasePath))) + WithMountPath(ateompath.BasePath)). + WithResources(corev1ac.ResourceRequirements())) podSpecAC.NodeSelector = map[string]string{} podSpecAC.Tolerations = []corev1ac.TolerationApplyConfiguration{} diff --git a/cmd/atecontroller/internal/controllers/workerpool_controller_test.go b/cmd/atecontroller/internal/controllers/workerpool_controller_test.go index df1e4992..6a95b91f 100644 --- a/cmd/atecontroller/internal/controllers/workerpool_controller_test.go +++ b/cmd/atecontroller/internal/controllers/workerpool_controller_test.go @@ -24,6 +24,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" k8errors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -361,6 +362,16 @@ func sampleWorkerPoolPodTemplate() *atev1alpha1.WorkerPoolPodTemplate { }}, }, }, + Resources: &corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, } } @@ -394,7 +405,10 @@ func TestWorkerPoolPodTemplatePropagation(t *testing.T) { if podSpec.Affinity == nil || podSpec.Affinity.NodeAffinity == nil { return false, nil } - return len(container.Resources.Requests) == 0 && len(container.Resources.Limits) == 0, nil + return container.Resources.Requests.Cpu().String() == "500m" && + container.Resources.Requests.Memory().String() == "1Gi" && + container.Resources.Limits.Cpu().String() == "1" && + container.Resources.Limits.Memory().String() == "2Gi", nil }) } @@ -428,7 +442,7 @@ func TestWorkerPoolPodTemplateUpdate(t *testing.T) { } podSpec := dep.Spec.Template.Spec return podSpec.NodeSelector["workload"] == "updated" && - len(podSpec.Containers[0].Resources.Requests) == 0, nil + podSpec.Containers[0].Resources.Requests.Cpu().String() == "500m", nil }) } @@ -486,7 +500,7 @@ func TestWorkerPoolPodTemplateClearAll(t *testing.T) { podSpec.PriorityClassName == "substrate-workers" && podSpec.Affinity != nil && podSpec.Affinity.NodeAffinity != nil && - len(container.Resources.Requests) == 0, nil + container.Resources.Requests.Cpu().String() == "500m", nil }) if err := k8sClient.Get(testCtx, types.NamespacedName{Name: wp.Name, Namespace: wp.Namespace}, wp); err != nil { diff --git a/docs/api-guide.md b/docs/api-guide.md index 57ad9368..510dcd3f 100644 --- a/docs/api-guide.md +++ b/docs/api-guide.md @@ -24,6 +24,7 @@ The `WorkerPool` defines the pool of physical "warm" compute capacity. It manage | `tolerations` | `[]Toleration` | `spec.tolerations` (max 16) | | `priorityClassName` | `string` | `spec.priorityClassName` | | `nodeAffinity` | `NodeAffinity` | `spec.affinity.nodeAffinity` | +| `resources` | `ResourceRequirements` | `spec.containers[].resources` | ### Example @@ -68,6 +69,13 @@ spec: - key: workload operator: In values: [substrate] + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: "1" + memory: 2Gi ``` --- diff --git a/manifests/ate-install/generated/ate.dev_workerpools.yaml b/manifests/ate-install/generated/ate.dev_workerpools.yaml index 5714891a..408a7c9f 100644 --- a/manifests/ate-install/generated/ate.dev_workerpools.yaml +++ b/manifests/ate-install/generated/ate.dev_workerpools.yaml @@ -307,6 +307,66 @@ spec: priorityClassName: description: PriorityClassName for the worker pods. type: string + resources: + description: Resources are the compute resources allocated for + each worker pod. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This field depends on the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object tolerations: description: Tolerations for the worker pods. items: diff --git a/pkg/api/v1alpha1/workerpool_types.go b/pkg/api/v1alpha1/workerpool_types.go index cb081827..44dd25da 100644 --- a/pkg/api/v1alpha1/workerpool_types.go +++ b/pkg/api/v1alpha1/workerpool_types.go @@ -44,6 +44,11 @@ type WorkerPoolPodTemplate struct { // // +optional NodeAffinity *corev1.NodeAffinity `json:"nodeAffinity,omitempty"` + + // Resources are the compute resources allocated for each worker pod. + // + // +optional + Resources *corev1.ResourceRequirements `json:"resources,omitempty"` } type WorkerPoolSpec struct { diff --git a/pkg/api/v1alpha1/workerpool_validation_test.go b/pkg/api/v1alpha1/workerpool_validation_test.go index 5c213bc1..ab6dc0ab 100644 --- a/pkg/api/v1alpha1/workerpool_validation_test.go +++ b/pkg/api/v1alpha1/workerpool_validation_test.go @@ -20,6 +20,7 @@ import ( "testing" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -70,6 +71,16 @@ func TestWorkerPoolValidation(t *testing.T) { Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoSchedule, }}, + Resources: &corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, } }, wantErr: false, diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 65cd9cdb..dec84c92 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -428,6 +428,11 @@ func (in *WorkerPoolPodTemplate) DeepCopyInto(out *WorkerPoolPodTemplate) { *out = new(corev1.NodeAffinity) (*in).DeepCopyInto(*out) } + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(corev1.ResourceRequirements) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkerPoolPodTemplate.