Skip to content

Commit

Permalink
WIP: Move to ResourceClaimTemplates instead of global ResourceClaims
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Klues <[email protected]>
  • Loading branch information
klueska committed Feb 9, 2025
1 parent 65f7709 commit 6ebdfce
Show file tree
Hide file tree
Showing 12 changed files with 217 additions and 408 deletions.
18 changes: 9 additions & 9 deletions api/nvidia.com/resource/v1beta1/computedomain.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ type ComputeDomainList struct {
}

// +kubebuilder:validation:XValidation:rule="self == oldSelf", message="A computeDomain.spec is immutable"
// +kubebuilder:validation:XValidation:rule="self.mode == 'Immediate' || (self.mode == 'Delayed' && size(self.resourceClaims) == 1)",message="When 'mode' is 'Delayed', 'resourceClaims' must have exactly one entry."
// +kubebuilder:validation:XValidation:rule="self.mode == 'Immediate' || (self.mode == 'Delayed' && size(self.resourceClaimTemplates) == 1)",message="When 'mode' is 'Delayed', 'resourceClaimTemplates' must have exactly one entry."
// +kubebuilder:validation:XValidation:rule="self.mode == 'Immediate' || (self.mode == 'Delayed' && !has(self.nodeSelector))",message="When 'mode' is 'Delayed', 'NodeSelector' must not be set."
// +kubebuilder:validation:XValidation:rule="self.mode == 'Immediate' || (self.mode == 'Delayed' && !has(self.topologyAlignment))",message="When 'mode' is 'Delayed', 'TopologyAlignment' must not be set."
// +kubebuilder:validation:XValidation:rule="self.mode == 'Immediate' || (self.mode == 'Delayed' && (!has(self.nodeAffinity) || !has(self.nodeAffinity.preferred)))",message="When mode is 'Delayed', 'nodeAffinity.preferred' must not be set; only 'nodeAffinity.required' is allowed."
Expand All @@ -65,16 +65,16 @@ type ComputeDomainList struct {
type ComputeDomainSpec struct {
// +kubebuilder:validation:Enum=Immediate;Delayed
// +kubebuilder:default=Immediate
Mode string `json:"mode"`
NumNodes int `json:"numNodes"`
ResourceClaims []ComputeDomainResourceClaim `json:"resourceClaims"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
NodeAffinity *ComputeDomainNodeAffinity `json:"nodeAffinity,omitempty"`
TopologyAlignment *ComputeDomainTopologyAlignment `json:"topologyAlignment,omitempty"`
TopologyAntiAlignment *ComputeDomainTopologyAlignment `json:"topologyAntiAlignment,omitempty"`
Mode string `json:"mode"`
NumNodes int `json:"numNodes"`
ResourceClaimTemplates []ComputeDomainResourceClaimTemplate `json:"resourceClaimTemplates"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
NodeAffinity *ComputeDomainNodeAffinity `json:"nodeAffinity,omitempty"`
TopologyAlignment *ComputeDomainTopologyAlignment `json:"topologyAlignment,omitempty"`
TopologyAntiAlignment *ComputeDomainTopologyAlignment `json:"topologyAntiAlignment,omitempty"`
}

type ComputeDomainResourceClaim struct {
type ComputeDomainResourceClaimTemplate struct {
Name string `json:"name"`
}

Expand Down
14 changes: 7 additions & 7 deletions api/nvidia.com/resource/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 28 additions & 16 deletions cmd/compute-domain-controller/channels.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ import (
"k8s.io/dynamic-resource-allocation/resourceslice"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"

nvapi "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1"
)

const (
ResourceSliceComputeDomainChannelStart = 1 // Channel 0 is reserved, and advertised by the node itself
ResourceSliceComputeDomainChannelLimit = 128 // There is a limit of 128 per ResourceSlice
DriverComputeDomainChannelLimit = 128 // The acual limit is 2048, but keep things to a single slice for now
ResourceSliceComputeDomainChannelStart = 1 // Channel 0 is reserved, and advertised by the node itself
ResourceSliceComputeDomainChannelLimit = 128 // There is a limit of 128 per ResourceSlice
DriverComputeDomainChannelLimit = 2048 // This limit is imposed by the underlying GPU driver
)

type ComputeDomainChannelManager struct {
Expand Down Expand Up @@ -99,47 +101,57 @@ func (m *ComputeDomainChannelManager) Stop() error {
}

// CreateOrUpdatePool creates or updates a pool of ComputeDomain channels for the given ComputeDomain.
func (m *ComputeDomainChannelManager) CreateOrUpdatePool(computeDomainName string, nodeSelector *v1.NodeSelector) error {
func (m *ComputeDomainChannelManager) CreateOrUpdatePool(cd *nvapi.ComputeDomain, nodeSelector *v1.NodeSelector) error {
var slices []resourceslice.Slice
for i := m.resourceSliceComputeDomainChannelStart; i < m.driverComputeDomainChannelLimit; i += m.resourceSliceComputeDomainChannelLimit {
slice := m.generatePoolSlice(computeDomainName, i, m.resourceSliceComputeDomainChannelLimit)
slices = append(slices, slice)
for i := m.resourceSliceComputeDomainChannelStart; i < (len(cd.Spec.ResourceClaimTemplates) + m.resourceSliceComputeDomainChannelStart); i++ {
remainingCopies := cd.Spec.NumNodes
for j := 0; remainingCopies > 0; j++ {
count := m.resourceSliceComputeDomainChannelLimit
if remainingCopies < m.resourceSliceComputeDomainChannelLimit {
count = remainingCopies
}

slice := m.generatePoolSlice(string(cd.UID), i, j, count)
slices = append(slices, slice)

remainingCopies -= count
}
}

pool := resourceslice.Pool{
NodeSelector: nodeSelector,
Slices: slices,
}

m.driverResources.Pools[computeDomainName] = pool
m.driverResources.Pools[string(cd.UID)] = pool
m.controller.Update(m.driverResources)

return nil
}

// DeletePool deletes a pool of ComnputeDomain channels for the given ComputeDomain.
func (m *ComputeDomainChannelManager) DeletePool(computeDomainName string) error {
delete(m.driverResources.Pools, computeDomainName)
func (m *ComputeDomainChannelManager) DeletePool(cdUID string) error {
delete(m.driverResources.Pools, cdUID)
m.controller.Update(m.driverResources)
return nil
}

// generatePoolSlice generates the contents of a single ResourceSlice of ComputeDomain channels in the given range.
func (m *ComputeDomainChannelManager) generatePoolSlice(computeDomainName string, startChannel, numChannels int) resourceslice.Slice {
// generatePoolSlice generates the contents of a single ResourceSlice of ComputeDomain channels.
func (m *ComputeDomainChannelManager) generatePoolSlice(cdUID string, channel, sliceIndex, count int) resourceslice.Slice {
var devices []resourceapi.Device
for i := startChannel; i < (startChannel + numChannels); i++ {
for i := 0; i < count; i++ {
d := resourceapi.Device{
Name: fmt.Sprintf("channel-%d", i),
Name: fmt.Sprintf("channel-%d-%d-%d", channel, sliceIndex, i),
Basic: &resourceapi.BasicDevice{
Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
"type": {
StringValue: ptr.To("channel"),
},
"domain": {
StringValue: ptr.To(computeDomainName),
StringValue: ptr.To(cdUID),
},
"id": {
IntValue: ptr.To(int64(i)),
IntValue: ptr.To(int64(channel)),
},
},
},
Expand Down
68 changes: 17 additions & 51 deletions cmd/compute-domain-controller/computedomain.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,13 @@ const (
computeDomainLabelKey = "resource.nvidia.com/computeDomain"
computeDomainFinalizer = computeDomainLabelKey

computeDomainDefaultChannelDeviceClass = "compute-domain-default-channel.nvidia.com"
computeDomainChannelDeviceClass = "compute-domain-channel.nvidia.com"
computeDomainDaemonDeviceClass = "compute-domain-daemon.nvidia.com"

computeDomainResourceClaimTemplateTargetLabelKey = "resource.nvidia.com/computeDomainTarget"
computeDomainResourceClaimTemplateTargetDaemon = "Daemon"
computeDomainResourceClaimTemplateTargetWorkload = "Workload"
)

type ComputeDomainManager struct {
Expand All @@ -51,10 +56,9 @@ type ComputeDomainManager struct {
factory nvinformers.SharedInformerFactory
informer cache.SharedIndexInformer

deploymentManager *DeploymentManager
deviceClassManager *DeviceClassManager
resourceClaimManager *ResourceClaimManager
computeDomainChannelManager *ComputeDomainChannelManager
deploymentManager *DeploymentManager
resourceClaimTemplateManager *WorkloadResourceClaimTemplateManager
computeDomainChannelManager *ComputeDomainChannelManager
}

// NewComputeDomainManager creates a new ComputeDomainManager.
Expand All @@ -68,8 +72,7 @@ func NewComputeDomainManager(config *ManagerConfig) *ComputeDomainManager {
informer: informer,
}
m.deploymentManager = NewDeploymentManager(config, m.Get)
m.deviceClassManager = NewDeviceClassManager(config)
m.resourceClaimManager = NewResourceClaimManager(config)
m.resourceClaimTemplateManager = NewWorkloadResourceClaimTemplateManager(config)
m.computeDomainChannelManager = NewComputeDomainChannelManager(config)

return m
Expand Down Expand Up @@ -121,11 +124,7 @@ func (m *ComputeDomainManager) Start(ctx context.Context) (rerr error) {
return fmt.Errorf("error starting Deployment manager: %w", err)
}

if err := m.deviceClassManager.Start(ctx); err != nil {
return fmt.Errorf("error creating DeviceClass manager: %w", err)
}

if err := m.resourceClaimManager.Start(ctx); err != nil {
if err := m.resourceClaimTemplateManager.Start(ctx); err != nil {
return fmt.Errorf("error creating ResourceClaim manager: %w", err)
}

Expand All @@ -140,12 +139,9 @@ func (m *ComputeDomainManager) Stop() error {
if err := m.deploymentManager.Stop(); err != nil {
return fmt.Errorf("error stopping Deployment manager: %w", err)
}
if err := m.resourceClaimManager.Stop(); err != nil {
if err := m.resourceClaimTemplateManager.Stop(); err != nil {
return fmt.Errorf("error stopping ResourceClaim manager: %w", err)
}
if err := m.deviceClassManager.Stop(); err != nil {
return fmt.Errorf("error stopping DeviceClass manager: %w", err)
}
if err := m.computeDomainChannelManager.Stop(); err != nil {
return fmt.Errorf("error stopping ComputeDomain channel manager: %w", err)
}
Expand Down Expand Up @@ -231,14 +227,10 @@ func (m *ComputeDomainManager) onAddOrUpdate(ctx context.Context, obj any) error
return fmt.Errorf("error deleting ComputeDomain channel pool: %w", err)
}

if err := m.resourceClaimManager.Delete(ctx, string(cd.UID)); err != nil {
if err := m.resourceClaimTemplateManager.Delete(ctx, string(cd.UID)); err != nil {
return fmt.Errorf("error deleting ResourceClaim: %w", err)
}

if err := m.deviceClassManager.Delete(ctx, string(cd.UID)); err != nil {
return fmt.Errorf("error deleting DeviceClass: %w", err)
}

if err := m.deploymentManager.Delete(ctx, string(cd.UID)); err != nil {
return fmt.Errorf("error deleting Deployment: %w", err)
}
Expand All @@ -249,14 +241,10 @@ func (m *ComputeDomainManager) onAddOrUpdate(ctx context.Context, obj any) error
// has been deleted, and (2) track the allocation of channels in the
// ComputeDomain status and wait for that list to become empty.
if true {
if err := m.resourceClaimManager.RemoveFinalizer(ctx, string(cd.UID)); err != nil {
if err := m.resourceClaimTemplateManager.RemoveFinalizer(ctx, string(cd.UID)); err != nil {
return fmt.Errorf("error deleting ResourceClaim: %w", err)
}

if err := m.deviceClassManager.RemoveFinalizer(ctx, string(cd.UID)); err != nil {
return fmt.Errorf("error deleting DeviceClass: %w", err)
}

if err := m.deploymentManager.RemoveFinalizer(ctx, string(cd.UID)); err != nil {
return fmt.Errorf("error deleting Deployment: %w", err)
}
Expand All @@ -277,14 +265,9 @@ func (m *ComputeDomainManager) onAddOrUpdate(ctx context.Context, obj any) error
return fmt.Errorf("error creating Deployment: %w", err)
}

dc, err := m.deviceClassManager.Create(ctx, cd)
if err != nil {
return fmt.Errorf("error creating DeviceClass: %w", err)
}

for _, rc := range cd.Spec.ResourceClaims {
if _, err := m.resourceClaimManager.Create(ctx, cd.Namespace, rc.Name, dc.Name, cd); err != nil {
return fmt.Errorf("error creating ResourceClaim '%s/%s': %w", cd.Namespace, rc.Name, err)
for i, rc := range cd.Spec.ResourceClaimTemplates {
if _, err := m.resourceClaimTemplateManager.Create(ctx, cd.Namespace, rc.Name, i+1, cd); err != nil {
return fmt.Errorf("error creating ResourceClaim '%s/%s' for channel %d: %w", cd.Namespace, rc.Name, i+1, err)
}
}

Expand All @@ -298,12 +281,6 @@ func (m *ComputeDomainManager) onAddOrUpdate(ctx context.Context, obj any) error
}
}

if cd.Spec.Mode == nvapi.ComputeDomainModeDelayed {
if err := m.createOrUpdatePoolDelayedMode(ctx, cd); err != nil {
return fmt.Errorf("error creating or updating pool: %w", err)
}
}

return nil
}

Expand All @@ -327,20 +304,9 @@ func (m *ComputeDomainManager) createOrUpdatePoolImmediateMode(ctx context.Conte
},
}

if err := m.computeDomainChannelManager.CreateOrUpdatePool(string(cd.UID), &nodeSelector); err != nil {
if err := m.computeDomainChannelManager.CreateOrUpdatePool(cd, &nodeSelector); err != nil {
return fmt.Errorf("failed to create or update ComputeDomain channel pool: %w", err)
}

return nil
}

func (m *ComputeDomainManager) createOrUpdatePoolDelayedMode(ctx context.Context, cd *nvapi.ComputeDomain) error {
var nodeSelector *corev1.NodeSelector
if cd.Spec.NodeAffinity != nil && cd.Spec.NodeAffinity.Required != nil {
nodeSelector = cd.Spec.NodeAffinity.Required
}
if err := m.computeDomainChannelManager.CreateOrUpdatePool(string(cd.UID), nodeSelector); err != nil {
return fmt.Errorf("failed to create or update ComputeDomain channel pool: %w", err)
}
return nil
}
14 changes: 4 additions & 10 deletions cmd/compute-domain-controller/deploymentresourceclaimtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ import (
)

const (
ComputeDomainDaemonDeviceClass = "compute-domain-daemon.nvidia.com"
ComputeDomainDefaultChannelDeviceClass = "compute-domain-default-channel.nvidia.com"
DeploymentResourceClaimTemplateTemplatePath = "/templates/compute-domain-daemon-claim-template.tmpl.yaml"
)

Expand Down Expand Up @@ -159,23 +157,19 @@ func (m *DeploymentResourceClaimTemplateManager) Create(ctx context.Context, nam

templateData := DeploymentResourceClaimTemplateTemplateData{
Namespace: namespace,
GenerateName: fmt.Sprintf("%s-claim-template-", cd.Name),
GenerateName: fmt.Sprintf("%s-daemon-claim-template-", cd.Name),
Finalizer: computeDomainFinalizer,
ComputeDomainLabelKey: computeDomainLabelKey,
ComputeDomainLabelValue: cd.UID,
TargetLabelKey: computeDomainResourceClaimTemplateTargetLabelKey,
TargetLabelValue: computeDomainResourceClaimTemplateTargetDaemon,
DaemonDeviceClassName: ComputeDomainDaemonDeviceClass,
DaemonDeviceClassName: computeDomainDaemonDeviceClass,
DriverName: DriverName,
DaemonConfig: daemonConfig,
}

// TODO: Add the commented conditional once we have a way for workloads to
// directly consume the node-advertised channel 0 via a global
// ResourceClaim in delayed mode.
//if cd.Spec.Mode == nvapi.ComputeDomainModeImmediate {
if true {
templateData.ChannelDeviceClassName = ComputeDomainDefaultChannelDeviceClass
if cd.Spec.Mode == nvapi.ComputeDomainModeImmediate {
templateData.ChannelDeviceClassName = computeDomainDefaultChannelDeviceClass
templateData.ChannelConfig = channelConfig
}

Expand Down
Loading

0 comments on commit 6ebdfce

Please sign in to comment.