From b28e45aee6fa5a14978dfbfbb74799c1f0af1e3d Mon Sep 17 00:00:00 2001 From: Kirill Sibirev Date: Thu, 12 Sep 2024 15:20:38 +0200 Subject: [PATCH] Add tolerations and nodeselectors to jobs (#342) * Add tolerations and nodeselectors to jobs * Fix tolerations passing --- api/v1/ytsaurus_types.go | 3 ++ api/v1/zz_generated.deepcopy.go | 14 ++++++++ .../bases/cluster.ytsaurus.tech_ytsaurus.yaml | 34 +++++++++++++++++++ docs/api.md | 2 ++ pkg/components/chyt.go | 15 ++++++-- pkg/components/helpers.go | 14 ++++++++ pkg/components/init_job.go | 16 +++++++-- pkg/components/init_job_test.go | 1 + pkg/components/master.go | 7 +++- pkg/components/query_tracker.go | 5 ++- pkg/components/queue_agent.go | 5 ++- pkg/components/scheduler.go | 10 ++++-- pkg/components/spyt.go | 10 ++++-- pkg/components/strawberry_controller.go | 10 ++++-- pkg/components/ui.go | 5 ++- pkg/components/yql_agent.go | 5 ++- pkg/components/ytsaurus_client.go | 5 ++- .../crds/ytsaurus.cluster.ytsaurus.tech.yaml | 34 +++++++++++++++++++ 18 files changed, 177 insertions(+), 18 deletions(-) diff --git a/api/v1/ytsaurus_types.go b/api/v1/ytsaurus_types.go index 750854e8..702e4777 100644 --- a/api/v1/ytsaurus_types.go +++ b/api/v1/ytsaurus_types.go @@ -619,6 +619,9 @@ type YtsaurusSpec struct { // If UpdateSelector is not empty EnableFullUpdate is ignored. UpdateSelector UpdateSelector `json:"updateSelector"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + Bootstrap *BootstrapSpec `json:"bootstrap,omitempty"` Discovery DiscoverySpec `json:"discovery,omitempty"` diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 83063fb4..ec571566 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -1784,6 +1784,20 @@ func (in *YtsaurusSpec) DeepCopyInto(out *YtsaurusSpec) { *out = new(OauthServiceSpec) (*in).DeepCopyInto(*out) } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]corev1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Bootstrap != nil { in, out := &in.Bootstrap, &out.Bootstrap *out = new(BootstrapSpec) diff --git a/config/crd/bases/cluster.ytsaurus.tech_ytsaurus.yaml b/config/crd/bases/cluster.ytsaurus.tech_ytsaurus.yaml index 7aa84e64..c8d18a16 100644 --- a/config/crd/bases/cluster.ytsaurus.tech_ytsaurus.yaml +++ b/config/crd/bases/cluster.ytsaurus.tech_ytsaurus.yaml @@ -14887,6 +14887,10 @@ spec: type: object x-kubernetes-map-type: atomic type: object + nodeSelector: + additionalProperties: + type: string + type: object oauthService: properties: host: @@ -34371,6 +34375,36 @@ spec: - portCount type: object type: array + tolerations: + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the trip + properties: + effect: + description: Effect indicates the taint effect to match. Empty + means match all taint effects. + type: string + key: + description: Key is the taint key that the toleration applies + to. + type: string + operator: + description: Operator represents a key's relationship to the + value. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of + format: int64 + type: integer + value: + description: Value is the taint value the toleration matches + to. + type: string + type: object + type: array ui: properties: description: diff --git a/docs/api.md b/docs/api.md index a451f19a..1eb1e2a0 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1836,6 +1836,8 @@ _Appears in:_ | `isManaged` _boolean_ | | true | | | `enableFullUpdate` _boolean_ | | true | | | `updateSelector` _[UpdateSelector](#updateselector)_ | UpdateSelector is an experimental field. Behaviour may change.
If UpdateSelector is not empty EnableFullUpdate is ignored. | | Enum: [ Nothing StatelessOnly MasterOnly TabletNodesOnly ExecNodesOnly Everything]
| +| `nodeSelector` _object (keys:string, values:string)_ | | | | +| `tolerations` _[Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#toleration-v1-core) array_ | | | | | `bootstrap` _[BootstrapSpec](#bootstrapspec)_ | | | | | `discovery` _[DiscoverySpec](#discoveryspec)_ | | | | | `primaryMasters` _[MastersSpec](#mastersspec)_ | | | | diff --git a/pkg/components/chyt.go b/pkg/components/chyt.go index f527cd3e..83012f0b 100644 --- a/pkg/components/chyt.go +++ b/pkg/components/chyt.go @@ -54,7 +54,10 @@ func NewChyt( "user", consts.ClientConfigFileName, ytsaurus.Spec.CoreImage, - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + ytsaurus.Spec.Tolerations, + ytsaurus.Spec.NodeSelector, + ), initEnvironment: NewInitJob( &l, chyt.APIProxy(), @@ -63,7 +66,10 @@ func NewChyt( "release", consts.ClientConfigFileName, chyt.GetResource().Spec.Image, - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + ytsaurus.Spec.Tolerations, + ytsaurus.Spec.NodeSelector, + ), initChPublicJob: NewInitJob( &l, chyt.APIProxy(), @@ -72,7 +78,10 @@ func NewChyt( "ch-public", consts.ClientConfigFileName, chyt.GetResource().Spec.Image, - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + ytsaurus.Spec.Tolerations, + ytsaurus.Spec.NodeSelector, + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/pkg/components/helpers.go b/pkg/components/helpers.go index 795ee3c6..e7a8635f 100644 --- a/pkg/components/helpers.go +++ b/pkg/components/helpers.go @@ -234,3 +234,17 @@ func getImageWithDefault(componentImage *string, defaultImage string) string { } return defaultImage } + +func getTolerationsWithDefault(componentTolerations, defaultTolerations []corev1.Toleration) []corev1.Toleration { + if len(componentTolerations) != 0 { + return componentTolerations + } + return defaultTolerations +} + +func getNodeSelectorWithDefault(componentNodeSelector, defaultNodeSelector map[string]string) map[string]string { + if len(componentNodeSelector) != 0 { + return componentNodeSelector + } + return defaultNodeSelector +} diff --git a/pkg/components/init_job.go b/pkg/components/init_job.go index 46d82176..67d367fe 100644 --- a/pkg/components/init_job.go +++ b/pkg/components/init_job.go @@ -45,7 +45,9 @@ type InitJob struct { configHelper *ConfigHelper initCompletedCondition string - image string + image string + tolerations []corev1.Toleration + nodeSelector map[string]string builtJob *batchv1.Job } @@ -56,7 +58,10 @@ func NewInitJob( conditionsManager apiproxy.ConditionManager, imagePullSecrets []corev1.LocalObjectReference, name, configFileName, image string, - generator ytconfig.YsonGeneratorFunc) *InitJob { + generator ytconfig.YsonGeneratorFunc, + tolerations []corev1.Toleration, + nodeSelector map[string]string, +) *InitJob { return &InitJob{ baseComponent: baseComponent{ labeller: labeller, @@ -66,10 +71,13 @@ func NewInitJob( imagePullSecrets: imagePullSecrets, initCompletedCondition: fmt.Sprintf("%s%sInitJobCompleted", name, labeller.GetFullComponentName()), image: image, + tolerations: tolerations, + nodeSelector: nodeSelector, initJob: resources.NewJob( labeller.GetInitJobName(name), labeller, - apiProxy), + apiProxy, + ), configHelper: NewConfigHelper( labeller, apiProxy, @@ -120,6 +128,8 @@ func (j *InitJob) Build() *batchv1.Job { createConfigVolume(consts.ConfigVolumeName, j.configHelper.GetConfigMapName(), &defaultMode), }, RestartPolicy: corev1.RestartPolicyOnFailure, + Tolerations: j.tolerations, + NodeSelector: j.nodeSelector, }, } j.builtJob = job diff --git a/pkg/components/init_job_test.go b/pkg/components/init_job_test.go index aaa5aa07..3ec20e55 100644 --- a/pkg/components/init_job_test.go +++ b/pkg/components/init_job_test.go @@ -89,6 +89,7 @@ func newTestJob(ytsaurus *apiproxy.Ytsaurus) *InitJob { consts.ClientConfigFileName, "dummy-image", func() ([]byte, error) { return []byte("dummy-cfg"), nil }, + nil, nil, ) } diff --git a/pkg/components/master.go b/pkg/components/master.go index d5013b3e..30d11fee 100644 --- a/pkg/components/master.go +++ b/pkg/components/master.go @@ -70,7 +70,10 @@ func NewMaster(cfgen *ytconfig.Generator, ytsaurus *apiproxy.Ytsaurus) *Master { "default", consts.ClientConfigFileName, getImageWithDefault(resource.Spec.PrimaryMasters.InstanceSpec.Image, resource.Spec.CoreImage), - cfgen.GetNativeClientConfig) + cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.PrimaryMasters.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.PrimaryMasters.NodeSelector, resource.Spec.NodeSelector), + ) exitReadOnlyJob := NewInitJob( &l, @@ -81,6 +84,8 @@ func NewMaster(cfgen *ytconfig.Generator, ytsaurus *apiproxy.Ytsaurus) *Master { consts.ClientConfigFileName, getImageWithDefault(resource.Spec.PrimaryMasters.InstanceSpec.Image, resource.Spec.CoreImage), cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.PrimaryMasters.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.PrimaryMasters.NodeSelector, resource.Spec.NodeSelector), ) return &Master{ diff --git a/pkg/components/query_tracker.go b/pkg/components/query_tracker.go index 7371e571..c58545d6 100644 --- a/pkg/components/query_tracker.go +++ b/pkg/components/query_tracker.go @@ -80,7 +80,10 @@ func NewQueryTracker( "qt-state", consts.ClientConfigFileName, getImageWithDefault(resource.Spec.QueryTrackers.InstanceSpec.Image, resource.Spec.CoreImage), - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.QueryTrackers.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.QueryTrackers.NodeSelector, resource.Spec.NodeSelector), + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/pkg/components/queue_agent.go b/pkg/components/queue_agent.go index b95ab87b..8881ea5f 100644 --- a/pkg/components/queue_agent.go +++ b/pkg/components/queue_agent.go @@ -83,7 +83,10 @@ func NewQueueAgent( "qa-state", consts.ClientConfigFileName, getImageWithDefault(resource.Spec.QueueAgents.InstanceSpec.Image, resource.Spec.CoreImage), - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.QueueAgents.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.QueueAgents.NodeSelector, resource.Spec.NodeSelector), + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/pkg/components/scheduler.go b/pkg/components/scheduler.go index e392b8d1..24bbf420 100644 --- a/pkg/components/scheduler.go +++ b/pkg/components/scheduler.go @@ -80,7 +80,10 @@ func NewScheduler( "user", consts.ClientConfigFileName, getImageWithDefault(resource.Spec.Schedulers.InstanceSpec.Image, resource.Spec.CoreImage), - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.Schedulers.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.Schedulers.NodeSelector, resource.Spec.NodeSelector), + ), initOpArchive: NewInitJob( &l, ytsaurus.APIProxy(), @@ -89,7 +92,10 @@ func NewScheduler( "op-archive", consts.ClientConfigFileName, getImageWithDefault(resource.Spec.Schedulers.InstanceSpec.Image, resource.Spec.CoreImage), - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.Schedulers.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.Schedulers.NodeSelector, resource.Spec.NodeSelector), + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/pkg/components/spyt.go b/pkg/components/spyt.go index ebcbecf9..8aa8a56b 100644 --- a/pkg/components/spyt.go +++ b/pkg/components/spyt.go @@ -52,7 +52,10 @@ func NewSpyt( "user", consts.ClientConfigFileName, ytsaurus.Spec.CoreImage, - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + ytsaurus.Spec.Tolerations, + ytsaurus.Spec.NodeSelector, + ), initEnvironment: NewInitJob( &l, spyt.APIProxy(), @@ -61,7 +64,10 @@ func NewSpyt( "spyt-environment", consts.ClientConfigFileName, spyt.GetResource().Spec.Image, - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + ytsaurus.Spec.Tolerations, + ytsaurus.Spec.NodeSelector, + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/pkg/components/strawberry_controller.go b/pkg/components/strawberry_controller.go index 0206eced..6e710dfa 100644 --- a/pkg/components/strawberry_controller.go +++ b/pkg/components/strawberry_controller.go @@ -84,7 +84,10 @@ func NewStrawberryController( "user", consts.ClientConfigFileName, getImageWithDefault(resource.Spec.StrawberryController.Image, resource.Spec.CoreImage), - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.StrawberryController.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.StrawberryController.NodeSelector, resource.Spec.NodeSelector), + ), initChytClusterJob: NewInitJob( &l, ytsaurus.APIProxy(), @@ -93,7 +96,10 @@ func NewStrawberryController( "cluster", ChytInitClusterJobConfigFileName, getImageWithDefault(resource.Spec.StrawberryController.Image, resource.Spec.CoreImage), - cfgen.GetChytInitClusterConfig), + cfgen.GetChytInitClusterConfig, + getTolerationsWithDefault(resource.Spec.StrawberryController.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.StrawberryController.NodeSelector, resource.Spec.NodeSelector), + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/pkg/components/ui.go b/pkg/components/ui.go index 9609bc77..02565b58 100644 --- a/pkg/components/ui.go +++ b/pkg/components/ui.go @@ -84,7 +84,10 @@ func NewUI(cfgen *ytconfig.Generator, ytsaurus *apiproxy.Ytsaurus, master Compon "default", consts.ClientConfigFileName, getImageWithDefault(resource.Spec.UI.Image, resource.Spec.CoreImage), - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.UI.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.UI.NodeSelector, resource.Spec.NodeSelector), + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/pkg/components/yql_agent.go b/pkg/components/yql_agent.go index 6c150615..6755592e 100644 --- a/pkg/components/yql_agent.go +++ b/pkg/components/yql_agent.go @@ -69,7 +69,10 @@ func NewYQLAgent(cfgen *ytconfig.Generator, ytsaurus *apiproxy.Ytsaurus, master "yql-agent-environment", consts.ClientConfigFileName, getImageWithDefault(resource.Spec.YQLAgents.Image, resource.Spec.CoreImage), - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + getTolerationsWithDefault(resource.Spec.YQLAgents.Tolerations, resource.Spec.Tolerations), + getNodeSelectorWithDefault(resource.Spec.YQLAgents.NodeSelector, resource.Spec.NodeSelector), + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/pkg/components/ytsaurus_client.go b/pkg/components/ytsaurus_client.go index ebc7309a..fa4f5832 100644 --- a/pkg/components/ytsaurus_client.go +++ b/pkg/components/ytsaurus_client.go @@ -63,7 +63,10 @@ func NewYtsaurusClient( "user", consts.ClientConfigFileName, resource.Spec.CoreImage, - cfgen.GetNativeClientConfig), + cfgen.GetNativeClientConfig, + resource.Spec.Tolerations, + resource.Spec.NodeSelector, + ), secret: resources.NewStringSecret( l.GetSecretName(), &l, diff --git a/ytop-chart/templates/crds/ytsaurus.cluster.ytsaurus.tech.yaml b/ytop-chart/templates/crds/ytsaurus.cluster.ytsaurus.tech.yaml index 110d068b..a2c37e16 100644 --- a/ytop-chart/templates/crds/ytsaurus.cluster.ytsaurus.tech.yaml +++ b/ytop-chart/templates/crds/ytsaurus.cluster.ytsaurus.tech.yaml @@ -14898,6 +14898,10 @@ spec: type: object x-kubernetes-map-type: atomic type: object + nodeSelector: + additionalProperties: + type: string + type: object oauthService: properties: host: @@ -34382,6 +34386,36 @@ spec: - portCount type: object type: array + tolerations: + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the trip + properties: + effect: + description: Effect indicates the taint effect to match. Empty + means match all taint effects. + type: string + key: + description: Key is the taint key that the toleration applies + to. + type: string + operator: + description: Operator represents a key's relationship to the + value. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of + format: int64 + type: integer + value: + description: Value is the taint value the toleration matches + to. + type: string + type: object + type: array ui: properties: description: