diff --git a/api/nvidia.com/resource/gpu/v1alpha1/multinode.go b/api/nvidia.com/resource/gpu/v1alpha1/multinode.go index c48eb04b..23593eb3 100644 --- a/api/nvidia.com/resource/gpu/v1alpha1/multinode.go +++ b/api/nvidia.com/resource/gpu/v1alpha1/multinode.go @@ -43,8 +43,11 @@ type MultiNodeEnvironmentList struct { Items []MultiNodeEnvironment `json:"items"` } +// +kubebuilder:validation:XValidation:rule="(has(self.resourceClaimName) ? !has(self.deviceClassName) : has(self.deviceClassName))",message="Exactly one of 'resourceClaimName' or 'deviceClassName' must be set." + // MultiNodeEnvironmentSpec provides the spec for a MultiNodeEnvironment. type MultiNodeEnvironmentSpec struct { NumNodes int `json:"numNodes"` - ResourceClaimName string `json:"resourceClaimName"` + ResourceClaimName string `json:"resourceClaimName,omitempty"` + DeviceClassName string `json:"deviceClassName,omitempty"` } diff --git a/cmd/nvidia-dra-controller/mnenv.go b/cmd/nvidia-dra-controller/mnenv.go index 77132495..5568e677 100644 --- a/cmd/nvidia-dra-controller/mnenv.go +++ b/cmd/nvidia-dra-controller/mnenv.go @@ -172,13 +172,15 @@ func (m *MultiNodeEnvironmentManager) onMultiNodeEnvironmentAdd(obj any) error { Controller: ptr.To(true), } - dc, err := m.createDeviceClass("", ownerReference) + dc, err := m.createDeviceClass(mne.Spec.DeviceClassName, ownerReference) if err != nil { return fmt.Errorf("error creating DeviceClass '%s': %w", "", err) } - if _, err := m.createResourceClaim(mne.Namespace, mne.Spec.ResourceClaimName, dc.Name, ownerReference); err != nil { - return fmt.Errorf("error creating ResourceClaim '%s/%s': %w", mne.Namespace, mne.Spec.ResourceClaimName, err) + if mne.Spec.ResourceClaimName != "" { + if _, err := m.createResourceClaim(mne.Namespace, mne.Spec.ResourceClaimName, dc.Name, ownerReference); err != nil { + return fmt.Errorf("error creating ResourceClaim '%s/%s': %w", mne.Namespace, mne.Spec.ResourceClaimName, err) + } } return nil diff --git a/deployments/helm/k8s-dra-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml b/deployments/helm/k8s-dra-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml index cb4fac04..b29f5490 100644 --- a/deployments/helm/k8s-dra-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml +++ b/deployments/helm/k8s-dra-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml @@ -40,14 +40,19 @@ spec: spec: description: MultiNodeEnvironmentSpec provides the spec for a MultiNodeEnvironment. properties: + deviceClassName: + type: string numNodes: type: integer resourceClaimName: type: string required: - numNodes - - resourceClaimName type: object + x-kubernetes-validations: + - message: Exactly one of 'resourceClaimName' or 'deviceClassName' must + be set. + rule: '(has(self.resourceClaimName) ? !has(self.deviceClassName) : has(self.deviceClassName))' type: object served: true storage: true diff --git a/deployments/helm/k8s-dra-driver/templates/deviceclass-imex.yaml b/deployments/helm/k8s-dra-driver/templates/deviceclass-imex.yaml deleted file mode 100644 index bb26adbb..00000000 --- a/deployments/helm/k8s-dra-driver/templates/deviceclass-imex.yaml +++ /dev/null @@ -1,11 +0,0 @@ -{{- if include "k8s-dra-driver.listHas" (list $.Values.deviceClasses "imex") }} ---- -apiVersion: resource.k8s.io/v1beta1 -kind: DeviceClass -metadata: - name: imex.nvidia.com -spec: - selectors: - - cel: - expression: "device.driver == 'gpu.nvidia.com' && device.attributes['gpu.nvidia.com'].type == 'imex-channel'" -{{- end }}