diff --git a/charts/spiderpool/README.md b/charts/spiderpool/README.md index f4f68e275f..a95db4f81e 100644 --- a/charts/spiderpool/README.md +++ b/charts/spiderpool/README.md @@ -300,74 +300,77 @@ helm install spiderpool spiderpool/spiderpool --wait --namespace kube-system \ ### spiderpoolController parameters -| Name | Description | Value | -| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | -| `spiderpoolController.name` | the spiderpoolController name | `spiderpool-controller` | -| `spiderpoolController.replicas` | the replicas number of spiderpoolController pod | `1` | -| `spiderpoolController.binName` | the binName name of spiderpoolController | `/usr/bin/spiderpool-controller` | -| `spiderpoolController.hostnetwork` | enable hostnetwork mode of spiderpoolController pod. Notice, if no CNI available before spiderpool installation, must enable this | `true` | -| `spiderpoolController.image.registry` | the image registry of spiderpoolController | `ghcr.io` | -| `spiderpoolController.image.repository` | the image repository of spiderpoolController | `spidernet-io/spiderpool/spiderpool-controller` | -| `spiderpoolController.image.pullPolicy` | the image pullPolicy of spiderpoolController | `IfNotPresent` | -| `spiderpoolController.image.digest` | the image digest of spiderpoolController, which takes preference over tag | `""` | -| `spiderpoolController.image.tag` | the image tag of spiderpoolController, overrides the image tag whose default is the chart appVersion. | `""` | -| `spiderpoolController.image.imagePullSecrets` | the image imagePullSecrets of spiderpoolController | `[]` | -| `spiderpoolController.serviceAccount.create` | create the service account for the spiderpoolController | `true` | -| `spiderpoolController.serviceAccount.annotations` | the annotations of spiderpoolController service account | `{}` | -| `spiderpoolController.service.annotations` | the annotations for spiderpoolController service | `{}` | -| `spiderpoolController.service.type` | the type for spiderpoolController service | `ClusterIP` | -| `spiderpoolController.priorityClassName` | the priority Class Name for spiderpoolController | `system-node-critical` | -| `spiderpoolController.affinity` | the affinity of spiderpoolController | `{}` | -| `spiderpoolController.extraArgs` | the additional arguments of spiderpoolController container | `[]` | -| `spiderpoolController.extraEnv` | the additional environment variables of spiderpoolController container | `[]` | -| `spiderpoolController.extraVolumes` | the additional volumes of spiderpoolController container | `[]` | -| `spiderpoolController.extraVolumeMounts` | the additional hostPath mounts of spiderpoolController container | `[]` | -| `spiderpoolController.podAnnotations` | the additional annotations of spiderpoolController pod | `{}` | -| `spiderpoolController.podLabels` | the additional label of spiderpoolController pod | `{}` | -| `spiderpoolController.securityContext` | the security Context of spiderpoolController pod | `{}` | -| `spiderpoolController.resources.limits.cpu` | the cpu limit of spiderpoolController pod | `500m` | -| `spiderpoolController.resources.limits.memory` | the memory limit of spiderpoolController pod | `1024Mi` | -| `spiderpoolController.resources.requests.cpu` | the cpu requests of spiderpoolController pod | `100m` | -| `spiderpoolController.resources.requests.memory` | the memory requests of spiderpoolController pod | `128Mi` | -| `spiderpoolController.podDisruptionBudget.enabled` | enable podDisruptionBudget for spiderpoolController pod | `false` | -| `spiderpoolController.podDisruptionBudget.minAvailable` | minimum number/percentage of pods that should remain scheduled. | `1` | -| `spiderpoolController.httpPort` | the http Port for spiderpoolController, for health checking and http service | `5720` | -| `spiderpoolController.healthChecking.startupProbe.failureThreshold` | the failure threshold of startup probe for spiderpoolController health checking | `30` | -| `spiderpoolController.healthChecking.startupProbe.periodSeconds` | the period seconds of startup probe for spiderpoolController health checking | `2` | -| `spiderpoolController.healthChecking.livenessProbe.failureThreshold` | the failure threshold of startup probe for spiderpoolController health checking | `6` | -| `spiderpoolController.healthChecking.livenessProbe.periodSeconds` | the period seconds of startup probe for spiderpoolController health checking | `10` | -| `spiderpoolController.healthChecking.readinessProbe.failureThreshold` | the failure threshold of startup probe for spiderpoolController health checking | `3` | -| `spiderpoolController.healthChecking.readinessProbe.periodSeconds` | the period seconds of startup probe for spiderpoolController health checking | `10` | -| `spiderpoolController.webhookPort` | the http port for spiderpoolController webhook | `5722` | -| `spiderpoolController.prometheus.enabled` | enable spiderpool Controller to collect metrics | `false` | -| `spiderpoolController.prometheus.enabledDebugMetric` | enable spiderpool Controller to collect debug level metrics | `false` | -| `spiderpoolController.prometheus.port` | the metrics port of spiderpool Controller | `5721` | -| `spiderpoolController.prometheus.serviceMonitor.install` | install serviceMonitor for spiderpool agent. This requires the prometheus CRDs to be available | `false` | -| `spiderpoolController.prometheus.serviceMonitor.namespace` | the serviceMonitor namespace. Default to the namespace of helm instance | `""` | -| `spiderpoolController.prometheus.serviceMonitor.annotations` | the additional annotations of spiderpoolController serviceMonitor | `{}` | -| `spiderpoolController.prometheus.serviceMonitor.labels` | the additional label of spiderpoolController serviceMonitor | `{}` | -| `spiderpoolController.prometheus.serviceMonitor.interval` | represents the interval of spiderpoolController serviceMonitor's scraping action | `10s` | -| `spiderpoolController.prometheus.prometheusRule.install` | install prometheusRule for spiderpool agent. This requires the prometheus CRDs to be available | `false` | -| `spiderpoolController.prometheus.prometheusRule.namespace` | the prometheusRule namespace. Default to the namespace of helm instance | `""` | -| `spiderpoolController.prometheus.prometheusRule.annotations` | the additional annotations of spiderpoolController prometheusRule | `{}` | -| `spiderpoolController.prometheus.prometheusRule.labels` | the additional label of spiderpoolController prometheusRule | `{}` | -| `spiderpoolController.prometheus.prometheusRule.enableWarningIPGCFailureCounts` | the additional rule of spiderpoolController prometheusRule | `true` | -| `spiderpoolController.debug.logLevel` | the log level of spiderpool Controller [debug, info, warn, error, fatal, panic] | `info` | -| `spiderpoolController.debug.gopsPort` | the gops port of spiderpool Controller | `5724` | -| `spiderpoolController.tls.method` | the method for generating TLS certificates. [ provided , certmanager , auto] | `auto` | -| `spiderpoolController.tls.secretName` | the secret name for storing TLS certificates | `spiderpool-controller-server-certs` | -| `spiderpoolController.tls.certmanager.certValidityDuration` | generated certificates validity duration in days for 'certmanager' method | `36500` | -| `spiderpoolController.tls.certmanager.issuerName` | issuer name of cert manager 'certmanager'. If not specified, a CA issuer will be created. | `""` | -| `spiderpoolController.tls.certmanager.extraDnsNames` | extra DNS names added to certificate when it's auto generated | `[]` | -| `spiderpoolController.tls.certmanager.extraIPAddresses` | extra IP addresses added to certificate when it's auto generated | `[]` | -| `spiderpoolController.tls.provided.tlsCert` | encoded tls certificate for provided method | `""` | -| `spiderpoolController.tls.provided.tlsKey` | encoded tls key for provided method | `""` | -| `spiderpoolController.tls.provided.tlsCa` | encoded tls CA for provided method | `""` | -| `spiderpoolController.tls.auto.caExpiration` | ca expiration for auto method | `73000` | -| `spiderpoolController.tls.auto.certExpiration` | server cert expiration for auto method | `73000` | -| `spiderpoolController.tls.auto.extraIpAddresses` | extra IP addresses of server certificate for auto method | `[]` | -| `spiderpoolController.tls.auto.extraDnsNames` | extra DNS names of server cert for auto method | `[]` | -| `spiderpoolController.cleanup.enable` | clean up resources when helm uninstall | `true` | +| Name | Description | Value | +| ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------- | +| `spiderpoolController.name` | the spiderpoolController name | `spiderpool-controller` | +| `spiderpoolController.replicas` | the replicas number of spiderpoolController pod | `1` | +| `spiderpoolController.binName` | the binName name of spiderpoolController | `/usr/bin/spiderpool-controller` | +| `spiderpoolController.hostnetwork` | enable hostnetwork mode of spiderpoolController pod. Notice, if no CNI available before spiderpool installation, must enable this | `true` | +| `spiderpoolController.image.registry` | the image registry of spiderpoolController | `ghcr.io` | +| `spiderpoolController.image.repository` | the image repository of spiderpoolController | `spidernet-io/spiderpool/spiderpool-controller` | +| `spiderpoolController.image.pullPolicy` | the image pullPolicy of spiderpoolController | `IfNotPresent` | +| `spiderpoolController.image.digest` | the image digest of spiderpoolController, which takes preference over tag | `""` | +| `spiderpoolController.image.tag` | the image tag of spiderpoolController, overrides the image tag whose default is the chart appVersion. | `""` | +| `spiderpoolController.image.imagePullSecrets` | the image imagePullSecrets of spiderpoolController | `[]` | +| `spiderpoolController.serviceAccount.create` | create the service account for the spiderpoolController | `true` | +| `spiderpoolController.serviceAccount.annotations` | the annotations of spiderpoolController service account | `{}` | +| `spiderpoolController.service.annotations` | the annotations for spiderpoolController service | `{}` | +| `spiderpoolController.service.type` | the type for spiderpoolController service | `ClusterIP` | +| `spiderpoolController.priorityClassName` | the priority Class Name for spiderpoolController | `system-node-critical` | +| `spiderpoolController.affinity` | the affinity of spiderpoolController | `{}` | +| `spiderpoolController.extraArgs` | the additional arguments of spiderpoolController container | `[]` | +| `spiderpoolController.extraEnv` | the additional environment variables of spiderpoolController container | `[]` | +| `spiderpoolController.extraVolumes` | the additional volumes of spiderpoolController container | `[]` | +| `spiderpoolController.extraVolumeMounts` | the additional hostPath mounts of spiderpoolController container | `[]` | +| `spiderpoolController.podAnnotations` | the additional annotations of spiderpoolController pod | `{}` | +| `spiderpoolController.podLabels` | the additional label of spiderpoolController pod | `{}` | +| `spiderpoolController.securityContext` | the security Context of spiderpoolController pod | `{}` | +| `spiderpoolController.resources.limits.cpu` | the cpu limit of spiderpoolController pod | `500m` | +| `spiderpoolController.resources.limits.memory` | the memory limit of spiderpoolController pod | `1024Mi` | +| `spiderpoolController.resources.requests.cpu` | the cpu requests of spiderpoolController pod | `100m` | +| `spiderpoolController.resources.requests.memory` | the memory requests of spiderpoolController pod | `128Mi` | +| `spiderpoolController.podDisruptionBudget.enabled` | enable podDisruptionBudget for spiderpoolController pod | `false` | +| `spiderpoolController.podDisruptionBudget.minAvailable` | minimum number/percentage of pods that should remain scheduled. | `1` | +| `spiderpoolController.httpPort` | the http Port for spiderpoolController, for health checking and http service | `5720` | +| `spiderpoolController.healthChecking.startupProbe.failureThreshold` | the failure threshold of startup probe for spiderpoolController health checking | `30` | +| `spiderpoolController.healthChecking.startupProbe.periodSeconds` | the period seconds of startup probe for spiderpoolController health checking | `2` | +| `spiderpoolController.healthChecking.livenessProbe.failureThreshold` | the failure threshold of startup probe for spiderpoolController health checking | `6` | +| `spiderpoolController.healthChecking.livenessProbe.periodSeconds` | the period seconds of startup probe for spiderpoolController health checking | `10` | +| `spiderpoolController.healthChecking.readinessProbe.failureThreshold` | the failure threshold of startup probe for spiderpoolController health checking | `3` | +| `spiderpoolController.healthChecking.readinessProbe.periodSeconds` | the period seconds of startup probe for spiderpoolController health checking | `10` | +| `spiderpoolController.webhookPort` | the http port for spiderpoolController webhook | `5722` | +| `spiderpoolController.podResourceInject.enabled` | enable pod resource inject | `false` | +| `spiderpoolController.podResourceInject.namespacesExclude` | exclude the namespaces of the pod resource inject | `["kube-system","spiderpool","metallb-system","istio-system"]` | +| `spiderpoolController.podResourceInject.namespacesInclude` | include the namespaces of the pod resource inject, empty means all namespaces but exclude the namespaces in namespacesExclude, not empty means only include the namespaces in namespacesInclude | `[]` | +| `spiderpoolController.prometheus.enabled` | enable spiderpool Controller to collect metrics | `false` | +| `spiderpoolController.prometheus.enabledDebugMetric` | enable spiderpool Controller to collect debug level metrics | `false` | +| `spiderpoolController.prometheus.port` | the metrics port of spiderpool Controller | `5721` | +| `spiderpoolController.prometheus.serviceMonitor.install` | install serviceMonitor for spiderpool agent. This requires the prometheus CRDs to be available | `false` | +| `spiderpoolController.prometheus.serviceMonitor.namespace` | the serviceMonitor namespace. Default to the namespace of helm instance | `""` | +| `spiderpoolController.prometheus.serviceMonitor.annotations` | the additional annotations of spiderpoolController serviceMonitor | `{}` | +| `spiderpoolController.prometheus.serviceMonitor.labels` | the additional label of spiderpoolController serviceMonitor | `{}` | +| `spiderpoolController.prometheus.serviceMonitor.interval` | represents the interval of spiderpoolController serviceMonitor's scraping action | `10s` | +| `spiderpoolController.prometheus.prometheusRule.install` | install prometheusRule for spiderpool agent. This requires the prometheus CRDs to be available | `false` | +| `spiderpoolController.prometheus.prometheusRule.namespace` | the prometheusRule namespace. Default to the namespace of helm instance | `""` | +| `spiderpoolController.prometheus.prometheusRule.annotations` | the additional annotations of spiderpoolController prometheusRule | `{}` | +| `spiderpoolController.prometheus.prometheusRule.labels` | the additional label of spiderpoolController prometheusRule | `{}` | +| `spiderpoolController.prometheus.prometheusRule.enableWarningIPGCFailureCounts` | the additional rule of spiderpoolController prometheusRule | `true` | +| `spiderpoolController.debug.logLevel` | the log level of spiderpool Controller [debug, info, warn, error, fatal, panic] | `info` | +| `spiderpoolController.debug.gopsPort` | the gops port of spiderpool Controller | `5724` | +| `spiderpoolController.tls.method` | the method for generating TLS certificates. [ provided , certmanager , auto] | `auto` | +| `spiderpoolController.tls.secretName` | the secret name for storing TLS certificates | `spiderpool-controller-server-certs` | +| `spiderpoolController.tls.certmanager.certValidityDuration` | generated certificates validity duration in days for 'certmanager' method | `36500` | +| `spiderpoolController.tls.certmanager.issuerName` | issuer name of cert manager 'certmanager'. If not specified, a CA issuer will be created. | `""` | +| `spiderpoolController.tls.certmanager.extraDnsNames` | extra DNS names added to certificate when it's auto generated | `[]` | +| `spiderpoolController.tls.certmanager.extraIPAddresses` | extra IP addresses added to certificate when it's auto generated | `[]` | +| `spiderpoolController.tls.provided.tlsCert` | encoded tls certificate for provided method | `""` | +| `spiderpoolController.tls.provided.tlsKey` | encoded tls key for provided method | `""` | +| `spiderpoolController.tls.provided.tlsCa` | encoded tls CA for provided method | `""` | +| `spiderpoolController.tls.auto.caExpiration` | ca expiration for auto method | `73000` | +| `spiderpoolController.tls.auto.certExpiration` | server cert expiration for auto method | `73000` | +| `spiderpoolController.tls.auto.extraIpAddresses` | extra IP addresses of server certificate for auto method | `[]` | +| `spiderpoolController.tls.auto.extraDnsNames` | extra DNS names of server cert for auto method | `[]` | +| `spiderpoolController.cleanup.enable` | clean up resources when helm uninstall | `true` | ### spiderpoolInit parameters diff --git a/charts/spiderpool/templates/configmap.yaml b/charts/spiderpool/templates/configmap.yaml index 39feec91ea..2202605c5c 100644 --- a/charts/spiderpool/templates/configmap.yaml +++ b/charts/spiderpool/templates/configmap.yaml @@ -27,6 +27,10 @@ data: clusterSubnetDefaultFlexibleIPNumber: 0 {{- end }} tuneSysctlConfig: {{ .Values.spiderpoolAgent.tuneSysctlConfig }} + podResourceInject: + enabled: {{ .Values.spiderpoolController.podResourceInject.enabled }} + namespacesExclude: {{ toJson .Values.spiderpoolController.podResourceInject.namespacesExclude }} + namespacesInclude: {{ toJson .Values.spiderpoolController.podResourceInject.namespacesInclude }} {{- if .Values.multus.multusCNI.install }} --- kind: ConfigMap diff --git a/charts/spiderpool/templates/deployment.yaml b/charts/spiderpool/templates/deployment.yaml index d78ce1b080..45cbc4be28 100644 --- a/charts/spiderpool/templates/deployment.yaml +++ b/charts/spiderpool/templates/deployment.yaml @@ -187,6 +187,8 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + - name: SPIDERPOOL_CONTROLLER_DEPLOYMENT_NAME + value: {{ .Values.spiderpoolController.name | quote }} {{- with .Values.spiderpoolController.extraEnv }} {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/spiderpool/templates/role.yaml b/charts/spiderpool/templates/role.yaml index ca4ecec499..031b9240a8 100644 --- a/charts/spiderpool/templates/role.yaml +++ b/charts/spiderpool/templates/role.yaml @@ -49,6 +49,7 @@ rules: - delete - get - list + - update - watch - apiGroups: - apiextensions.k8s.io diff --git a/charts/spiderpool/values.yaml b/charts/spiderpool/values.yaml index 06443c87a3..3bea1d4d69 100644 --- a/charts/spiderpool/values.yaml +++ b/charts/spiderpool/values.yaml @@ -659,6 +659,20 @@ spiderpoolController: ## @param spiderpoolController.webhookPort the http port for spiderpoolController webhook webhookPort: 5722 + podResourceInject: + ## @param spiderpoolController.podResourceInject.enabled enable pod resource inject + enabled: false + + ## @param spiderpoolController.podResourceInject.namespacesExclude exclude the namespaces of the pod resource inject + namespacesExclude: + - kube-system + - spiderpool + - metallb-system + - istio-system + + ## @param spiderpoolController.podResourceInject.namespacesInclude include the namespaces of the pod resource inject, empty means all namespaces but exclude the namespaces in namespacesExclude, not empty means only include the namespaces in namespacesInclude + namespacesInclude: [] + prometheus: ## @param spiderpoolController.prometheus.enabled enable spiderpool Controller to collect metrics enabled: false diff --git a/cmd/spiderpool-controller/cmd/config.go b/cmd/spiderpool-controller/cmd/config.go index 276c5cebe0..84f11d1cae 100644 --- a/cmd/spiderpool-controller/cmd/config.go +++ b/cmd/spiderpool-controller/cmd/config.go @@ -99,6 +99,7 @@ var envInfo = []envConf{ {"SPIDERPOOL_MULTUS_CONFIG_INFORMER_RESYNC_PERIOD", "60", false, nil, nil, &controllerContext.Cfg.MultusConfigInformerResyncPeriod}, {"SPIDERPOOL_CILIUM_CONFIGMAP_NAMESPACE_NAME", "kube-system/cilium-config", false, &controllerContext.Cfg.CiliumConfigName, nil, nil}, + {"SPIDERPOOL_CONTROLLER_DEPLOYMENT_NAME", "spiderpool-controller", true, &controllerContext.Cfg.ControllerDeploymentName, nil, nil}, {"SPIDERPOOL_IPPOOL_INFORMER_RESYNC_PERIOD", "300", false, nil, nil, &controllerContext.Cfg.IPPoolInformerResyncPeriod}, {"SPIDERPOOL_IPPOOL_INFORMER_WORKERS", "3", true, nil, nil, &controllerContext.Cfg.IPPoolInformerWorkers}, {"SPIDERPOOL_AUTO_IPPOOL_HANDLER_MAX_WORKQUEUE_LENGTH", "10000", true, nil, nil, &controllerContext.Cfg.IPPoolInformerMaxWorkQueueLength}, @@ -128,16 +129,18 @@ type Config struct { GopsListenPort string PyroscopeAddress string DefaultCniConfDir string - // CiliumConfigName is formatted by namespace and name,default is kube-system/cilium-config + // CiliumConfigName is formatted by namespace and name + // default is kube-system/cilium-config CiliumConfigName string - ControllerPodNamespace string - ControllerPodName string - DefaultCoordinatorName string - LeaseDuration int - LeaseRenewDeadline int - LeaseRetryPeriod int - LeaseRetryGap int + ControllerDeploymentName string + ControllerPodNamespace string + ControllerPodName string + DefaultCoordinatorName string + LeaseDuration int + LeaseRenewDeadline int + LeaseRetryPeriod int + LeaseRetryGap int IPPoolMaxAllocatedIPs int diff --git a/cmd/spiderpool-controller/cmd/crd_manager.go b/cmd/spiderpool-controller/cmd/crd_manager.go index c998ca4e7c..9244af4a29 100644 --- a/cmd/spiderpool-controller/cmd/crd_manager.go +++ b/cmd/spiderpool-controller/cmd/crd_manager.go @@ -11,6 +11,7 @@ import ( "github.com/go-logr/logr" multusv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" calicov1 "github.com/tigera/operator/pkg/apis/crd.projectcalico.org/v1" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -35,6 +36,7 @@ func init() { utilruntime.Must(multusv1.AddToScheme(scheme)) utilruntime.Must(apiextensionsv1.AddToScheme(scheme)) utilruntime.Must(kubevirtv1.AddToScheme(scheme)) + utilruntime.Must(admissionregistrationv1.AddToScheme(scheme)) } func newCRDManager() (ctrl.Manager, error) { diff --git a/cmd/spiderpool-controller/cmd/daemon.go b/cmd/spiderpool-controller/cmd/daemon.go index 647d097c49..e534dbf06f 100644 --- a/cmd/spiderpool-controller/cmd/daemon.go +++ b/cmd/spiderpool-controller/cmd/daemon.go @@ -265,6 +265,22 @@ func initControllerServiceManagers(ctx context.Context) { } controllerContext.PodManager = podManager + if controllerContext.Cfg.PodResourceInjectConfig.Enabled { + logger.Debug("Begin to init Pod MutatingWebhook") + if err := podmanager.InitPodWebhook(controllerContext.ClientSet.AdmissionregistrationV1(), + controllerContext.CRDManager, controllerContext.Cfg.ControllerDeploymentName, + controllerContext.Cfg.PodResourceInjectConfig.NamespacesExclude, + controllerContext.Cfg.PodResourceInjectConfig.NamespacesInclude); err != nil { + logger.Fatal(err.Error()) + } + } else { + logger.Debug("InjectPodNetworkResource is disabled, try to remove the pod part in the MutatingWebhook") + if err := podmanager.RemovePodMutatingWebhook(controllerContext.ClientSet.AdmissionregistrationV1(), + controllerContext.Cfg.ControllerDeploymentName); err != nil { + logger.Error(err.Error()) + } + } + logger.Info("Begin to initialize StatefulSet manager") statefulSetManager, err := statefulsetmanager.NewStatefulSetManager( controllerContext.CRDManager.GetClient(), diff --git a/docs/reference/spiderpool-controller.md b/docs/reference/spiderpool-controller.md index 847ff3f572..12ecdf254d 100644 --- a/docs/reference/spiderpool-controller.md +++ b/docs/reference/spiderpool-controller.md @@ -32,6 +32,7 @@ Run the spiderpool controller daemon. | SPIDERPOOL_CNI_CONFIG_DIR | /etc/cni/net.d | The host path of the cni config directory. | | SPIDERPOOL_CILIUM_CONFIGMAP_NAMESPACE_NAME | kube-system/cilium-config. | The cilium's configMap, default is kube-system/cilium-config. | | SPIDERPOOL_COORDINATOR_DEFAULT_NAME | default | the name of default spidercoordinator CR | +| SPIDERPOOL_CONTROLLER_DEPLOYMENT_NAME | spiderpool-controller | The deployment name of spiderpool-controller. | ## spiderpool-controller shutdown diff --git a/docs/usage/install/ai/get-started-macvlan-zh_CN.md b/docs/usage/install/ai/get-started-macvlan-zh_CN.md index 426be7471a..98a543cfe1 100644 --- a/docs/usage/install/ai/get-started-macvlan-zh_CN.md +++ b/docs/usage/install/ai/get-started-macvlan-zh_CN.md @@ -53,8 +53,8 @@ 对于 Mellanox 网卡,可下载 [NVIDIA OFED 官方驱动](https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/)进行主机安装,执行如下安装命令: ```shell - $ mount /root/MLNX_OFED_LINUX-24.01-0.3.3.1-ubuntu22.04-x86_64.iso /mnt - $ /mnt/mlnxofedinstall --all + mount /root/MLNX_OFED_LINUX-24.01-0.3.3.1-ubuntu22.04-x86_64.iso /mnt + /mnt/mlnxofedinstall --all ``` 对于 Mellanox 网卡,也可基于容器化安装驱动,实现对集群主机上所有 Mellanox 网卡批量安装驱动,运行如下命令,注意的是,该运行过程中需要访问因特网获取一些安装包。当所有的 ofed pod 进入 ready 状态,表示主机上已经完成了 OFED driver 安装。 @@ -131,7 +131,7 @@ gdrdrv 24576 0 ``` -4. 确认主机上的 RDMA 子系统为 shared 模式,这是 macvlan 场景下提供 RDMA 设备给容器的要求。 +4. 确认主机上的 RDMA 子系统为 shared 模式,这是 macvlan 场景下提供 RDMA 设备给容器的要求。 ``` # Check the current operating mode (the Linux RDMA subsystem operates in shared mode by default): @@ -144,10 +144,10 @@ 1. 使用 helm 安装 Spiderpool,并启用 rdmaSharedDevicePlugin 组件 ```shell - $ helm repo add spiderpool https://spidernet-io.github.io/spiderpool - $ helm repo update spiderpool - $ kubectl create namespace spiderpool - $ helm install spiderpool spiderpool/spiderpool -n spiderpool --set rdma.rdmaSharedDevicePlugin.install=true + helm repo add spiderpool https://spidernet-io.github.io/spiderpool + helm repo update spiderpool + kubectl create namespace spiderpool + helm install spiderpool spiderpool/spiderpool -n spiderpool --set rdma.rdmaSharedDevicePlugin.install=true ``` > 如果您是中国用户,可以指定参数 `--set global.imageRegistryOverride=ghcr.m.daocloud.io` 来使用国内的镜像源。 @@ -223,10 +223,10 @@ metadata: name: gpu1-net11 spec: - gateway: 172.16.11.254 - subnet: 172.16.11.0/16 - ips: - - 172.16.11.1-172.16.11.200 + gateway: 172.16.11.254 + subnet: 172.16.11.0/16 + ips: + - 172.16.11.1-172.16.11.200 --- apiVersion: spiderpool.spidernet.io/v2beta1 kind: SpiderMultusConfig @@ -234,11 +234,11 @@ name: gpu1-macvlan namespace: spiderpool spec: - cniType: macvlan - macvlan: - master: ["enp11s0f0np0"] - ippools: - ipv4: ["gpu1-net11"] + cniType: macvlan + macvlan: + master: ["enp11s0f0np0"] + ippools: + ipv4: ["gpu1-net11"] EOF ``` @@ -247,6 +247,8 @@ 1. 在指定节点上创建一组 DaemonSet 应用 如下例子,通过 annotations `v1.multus-cni.io/default-network` 指定使用 calico 的缺省网卡,用于进行控制面通信,annotations `k8s.v1.cni.cncf.io/networks` 接入 8 个 GPU 亲和网卡的网卡,用于 RDMA 通信,并配置 8 种 RDMA resources 资源 + > 注:可自动为应用注入 RDMA 网络资源,参考 [基于 Webhook 自动注入 RDMA 资源](#基于-webhook-自动注入网络资源) + ```shell $ helm repo add spiderchart https://spidernet-io.github.io/charts $ helm repo update @@ -261,39 +263,39 @@ # just run daemonset in nodes 'worker1' and 'worker2' affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: - - worker1 - - worker2 + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - worker1 + - worker2 # macvlan interfaces extraAnnotations: k8s.v1.cni.cncf.io/networks: |- - [{"name":"gpu1-macvlan","namespace":"spiderpool"}, - {"name":"gpu2-macvlan","namespace":"spiderpool"}, - {"name":"gpu3-macvlan","namespace":"spiderpool"}, - {"name":"gpu4-macvlan","namespace":"spiderpool"}, - {"name":"gpu5-macvlan","namespace":"spiderpool"}, - {"name":"gpu6-macvlan","namespace":"spiderpool"}, - {"name":"gpu7-macvlan","namespace":"spiderpool"}, - {"name":"gpu8-macvlan","namespace":"spiderpool"}] + [{"name":"gpu1-macvlan","namespace":"spiderpool"}, + {"name":"gpu2-macvlan","namespace":"spiderpool"}, + {"name":"gpu3-macvlan","namespace":"spiderpool"}, + {"name":"gpu4-macvlan","namespace":"spiderpool"}, + {"name":"gpu5-macvlan","namespace":"spiderpool"}, + {"name":"gpu6-macvlan","namespace":"spiderpool"}, + {"name":"gpu7-macvlan","namespace":"spiderpool"}, + {"name":"gpu8-macvlan","namespace":"spiderpool"}] # macvlan resource resources: limits: - spidernet.io/shared_cx5_gpu1: 1 - spidernet.io/shared_cx5_gpu2: 1 - spidernet.io/shared_cx5_gpu3: 1 - spidernet.io/shared_cx5_gpu4: 1 - spidernet.io/shared_cx5_gpu5: 1 - spidernet.io/shared_cx5_gpu6: 1 - spidernet.io/shared_cx5_gpu7: 1 - spidernet.io/shared_cx5_gpu8: 1 - #nvidia.com/gpu: 1 + spidernet.io/shared_cx5_gpu1: 1 + spidernet.io/shared_cx5_gpu2: 1 + spidernet.io/shared_cx5_gpu3: 1 + spidernet.io/shared_cx5_gpu4: 1 + spidernet.io/shared_cx5_gpu5: 1 + spidernet.io/shared_cx5_gpu6: 1 + spidernet.io/shared_cx5_gpu7: 1 + spidernet.io/shared_cx5_gpu8: 1 + #nvidia.com/gpu: 1 EOF $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml @@ -410,3 +412,106 @@ # Successfully access the RDMA service of the other Pod $ ib_read_lat 172.91.0.115 ``` + +## 基于 Webhook 自动注入网络资源 + +Spiderpool 为了简化 AI 应用配置多网卡的复杂度,支持通过 labels(`cni.spidernet.io/rdma-resource-inject`) 对一组网卡配置分类。用户只需要为 Pod 添加相同的注解。这样 Spiderpool 会通过 webhook 自动为 Pod 注入所有具有相同 label 的对应的网卡和网络资源。 + + > 该功能仅支持 [ macvlan,ipvlan,sriov,ib-sriov, ipoib ] 这几种 cniType 的网卡配置。 + +1. 安装 Spiderpool 时,指定开启 webhook 自动注入网络资源功能: + + ```shell + helm install spiderpool spiderchart/spiderpool --set spiderpoolController.podResourceInject.enabled=true + ``` + + > - 默认关闭 webhook 自动注入网络资源功能,需要用户手动开启。 + > - 您可以通过 `spiderpoolController.podResourceInject.namespacesExclude` 指定不注入的命名空间,通过 `spiderpoolController.podResourceInject.namespacesInclude` 指定注入的命名空间。 + > - 安装 Spiderpool 后,您可以通过更新 spiderpool-config configMap 中 podResourceInject 字段更新配置。 + +2. 创建 SpiderMultusConfig 时指定 labels,并配置 RDMA 相关配置: + + ```shell + $ cat < - `cni.spidernet.io/rdma-resource-inject: gpu-macvlan` 固定的 key,value 为用户自定义。具有相同 `Label` 和 `Value` 的一组网卡配置要求 `cniType` 必须一致。 + > - `enableRdma`, `rdmaResourceName` 和 `ippools` 必须配置,否则 Pod 无法成功注入网络资源。 + +3. 创建应用时添加注解: `cni.spidernet.io/rdma-resource-inject: gpu-macvlan`,这样 Spiderpool 自动为 Pod 添加 8 个 GPU 亲和网卡的网卡,用于 RDMA 通信,并配置 8 种 RDMA resources 资源: + + > 注意:使用 webhook 自动注入网络资源功能时,不能为应用添加其他网络配置注解(如 `k8s.v1.cni.cncf.io/networks` 和 `ipam.spidernet.io/ippools`等),否则会影响资源自动注入功能。 + + ```shell + $ helm repo add spiderchart https://spidernet-io.github.io/charts + $ helm repo update + $ helm search repo rdma-tools + + # run daemonset on worker1 and worker2 + $ cat < values.yaml + # for china user , it could add these to use a domestic registry + #image: + # registry: ghcr.m.daocloud.io + + # just run daemonset in nodes 'worker1' and 'worker2' + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - worker1 + - worker2 + + # macvlan interfaces + extraAnnotations: + cni.spidernet.io/rdma-resource-inject: gpu-macvlan + EOF + + $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml + ``` + + 当 Pod 成功 Running,检查 Pod 是否成功注入 8 个 RDMA 网卡的 annotations 和 8 种 RDMA 资源。 + + ```shell + # Pod multus annotations + k8s.v1.cni.cncf.io/networks: |- + [{"name":"gpu1-macvlan","namespace":"spiderpool"}, + {"name":"gpu2-macvlan","namespace":"spiderpool"}, + {"name":"gpu3-macvlan","namespace":"spiderpool"}, + {"name":"gpu4-macvlan","namespace":"spiderpool"}, + {"name":"gpu5-macvlan","namespace":"spiderpool"}, + {"name":"gpu6-macvlan","namespace":"spiderpool"}, + {"name":"gpu7-macvlan","namespace":"spiderpool"}, + {"name":"gpu8-macvlan","namespace":"spiderpool"}] + # macvlan resource + resources: + requests: + spidernet.io/shared_cx5_gpu1: 1 + spidernet.io/shared_cx5_gpu2: 1 + spidernet.io/shared_cx5_gpu3: 1 + spidernet.io/shared_cx5_gpu4: 1 + spidernet.io/shared_cx5_gpu5: 1 + spidernet.io/shared_cx5_gpu6: 1 + spidernet.io/shared_cx5_gpu7: 1 + spidernet.io/shared_cx5_gpu8: 1 + #nvidia.com/gpu: 1 + ``` diff --git a/docs/usage/install/ai/get-started-macvlan.md b/docs/usage/install/ai/get-started-macvlan.md index 1c86c32510..9bf9c008e9 100644 --- a/docs/usage/install/ai/get-started-macvlan.md +++ b/docs/usage/install/ai/get-started-macvlan.md @@ -53,8 +53,8 @@ The network planning for the cluster is as follows: For Mellanox network cards, you can download [the NVIDIA OFED official driver](https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/) and install it on the host using the following installation command: ```shell - $ mount /root/MLNX_OFED_LINUX-24.01-0.3.3.1-ubuntu22.04-x86_64.iso /mnt - $ /mnt/mlnxofedinstall --all + mount /root/MLNX_OFED_LINUX-24.01-0.3.3.1-ubuntu22.04-x86_64.iso /mnt + /mnt/mlnxofedinstall --all ``` For Mellanox network cards, you can also perform a containerized installation to batch install drivers on all Mellanox network cards in the cluster hosts. Run the following command. Note that this process requires internet access to fetch some installation packages. When all the OFED pods enter the ready state, it indicates that the OFED driver installation on the hosts is complete: @@ -145,10 +145,10 @@ The network planning for the cluster is as follows: 1. Use Helm to install Spiderpool and enable the rdmaSharedDevicePlugin: ```shell - $ helm repo add spiderpool https://spidernet-io.github.io/spiderpool - $ helm repo update spiderpool - $ kubectl create namespace spiderpool - $ helm install spiderpool spiderpool/spiderpool -n spiderpool --set rdma.rdmaSharedDevicePlugin.install=true + helm repo add spiderpool https://spidernet-io.github.io/spiderpool + helm repo update spiderpool + kubectl create namespace spiderpool + helm install spiderpool spiderpool/spiderpool -n spiderpool --set rdma.rdmaSharedDevicePlugin.install=true ``` > If you are a user in China, you can specify the helm option `--set global.imageRegistryOverride=ghcr.m.daocloud.io` to use a domestic image source. @@ -224,10 +224,10 @@ The network planning for the cluster is as follows: metadata: name: gpu1-net11 spec: - gateway: 172.16.11.254 - subnet: 172.16.11.0/16 - ips: - - 172.16.11.1-172.16.11.200 + gateway: 172.16.11.254 + subnet: 172.16.11.0/16 + ips: + - 172.16.11.1-172.16.11.200 --- apiVersion: spiderpool.spidernet.io/v2beta1 kind: SpiderMultusConfig @@ -235,11 +235,11 @@ The network planning for the cluster is as follows: name: gpu1-macvlan namespace: spiderpool spec: - cniType: macvlan - macvlan: - master: ["enp11s0f0np0"] - ippools: - ipv4: ["gpu1-net11"] + cniType: macvlan + macvlan: + master: ["enp11s0f0np0"] + ippools: + ipv4: ["gpu1-net11"] EOF ``` @@ -248,6 +248,8 @@ The network planning for the cluster is as follows: 1. Create a DaemonSet application on specified nodes. In the following example, the annotation field `v1.multus-cni.io/default-network` specifies the use of the default Calico network card for control plane communication. The annotation field `k8s.v1.cni.cncf.io/networks` connects to the 8 network cards affinitized to the GPU for RDMA communication, and configures 8 types of RDMA resources. + > NOTICE: It support auto inject RDMA resources for application, see [Auto inject RDMA Resources](#auto-inject-rdma-resources-base-on-webhook) + ```shell $ helm repo add spiderchart https://spidernet-io.github.io/charts $ helm repo update @@ -262,42 +264,38 @@ The network planning for the cluster is as follows: # just run daemonset in nodes 'worker1' and 'worker2' affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: - - worker1 - - worker2 + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - worker1 + - worker2 # interfaces extraAnnotations: - k8s.v1.cni.cncf.io/networks: |- - [{"name":"gpu1-macvlan","namespace":"spiderpool"}, - {"name":"gpu2-macvlan","namespace":"spiderpool"}, - {"name":"gpu3-macvlan","namespace":"spiderpool"}, - {"name":"gpu4-macvlan","namespace":"spiderpool"}, - {"name":"gpu5-macvlan","namespace":"spiderpool"}, - {"name":"gpu6-macvlan","namespace":"spiderpool"}, - {"name":"gpu7-macvlan","namespace":"spiderpool"}, - {"name":"gpu8-macvlan","namespace":"spiderpool"}] - - # resource + k8s.v1.cni.cncf.io/networks: |- + [{"name":"gpu1-macvlan","namespace":"spiderpool"}, + {"name":"gpu2-macvlan","namespace":"spiderpool"}, + {"name":"gpu3-macvlan","namespace":"spiderpool"}, + {"name":"gpu4-macvlan","namespace":"spiderpool"}, + {"name":"gpu5-macvlan","namespace":"spiderpool"}, + {"name":"gpu6-macvlan","namespace":"spiderpool"}, + {"name":"gpu7-macvlan","namespace":"spiderpool"}, + {"name":"gpu8-macvlan","namespace":"spiderpool"}] + # macvlan resource resources: - limits: - spidernet.io/shared_cx5_gpu1: 1 - spidernet.io/shared_cx5_gpu2: 1 - spidernet.io/shared_cx5_gpu3: 1 - spidernet.io/shared_cx5_gpu4: 1 - spidernet.io/shared_cx5_gpu5: 1 - spidernet.io/shared_cx5_gpu6: 1 - spidernet.io/shared_cx5_gpu7: 1 - spidernet.io/shared_cx5_gpu8: 1 - #nvidia.com/gpu: 1 - EOF - - $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml + requests: + spidernet.io/shared_cx5_gpu1: 1 + spidernet.io/shared_cx5_gpu2: 1 + spidernet.io/shared_cx5_gpu3: 1 + spidernet.io/shared_cx5_gpu4: 1 + spidernet.io/shared_cx5_gpu5: 1 + spidernet.io/shared_cx5_gpu6: 1 + spidernet.io/shared_cx5_gpu7: 1 + spidernet.io/shared_cx5_gpu8: 1 + #nvidia.com/gpu: 1 ``` During the creation of the network namespace for the container, Spiderpool will perform connectivity tests on the gateway of the macvlan interface. @@ -412,3 +410,105 @@ The network planning for the cluster is as follows: # Successfully access the RDMA service of the other Pod $ ib_read_lat 172.91.0.115 ``` + +## Auto Inject RDMA Resources base on webhook + +To simplify the complexity of configuring multiple network cards for AI applications, Spiderpool supports categorizing a group of network card configurations through labels (cni.spidernet.io/rdma-resource-inject). Users only need to add the same annotation to the Pod. This way, Spiderpool will automatically inject all corresponding network cards and network resources with the same label into the Pod through a webhook. + + > This feature only supports network card configurations with cniType of [ macvlan,ipvlan,sriov,ib-sriov, ipoib ]. + +1. When installing Spiderpool, specify to enable the webhook automatic injection of network resources feature: + + ```shell + helm install spiderpool spiderchart/spiderpool --set spiderpoolController.podResourceInject.enabled=true + ``` + + > - By default, the webhook automatic injection of network resources feature is disabled and needs to be manually enabled by the user. + > - You can specify namespaces to exclude from injection using `spiderpoolController.podResourceInject.namespacesExclude`, and specify namespaces to include for injection using `spiderpoolController.podResourceInject.namespacesInclude`. + > - After installing Spiderpool, you can update the configuration by modifying the podResourceInject field in the spiderpool-config configMap. + +2. Create SpiderMultusConfig and specify labels, and configure RDMA-related settings: + + ```shell + $ cat < - `cni.spidernet.io/rdma-resource-inject: gpu-macvlan` is a fixed key, and the value is user-defined. A group of network card configurations with the same Label and Value must have the same cniType. + > - `enableRdma`, `rdmaResourceName` and `ippools` must be configured, otherwise the Pod will fail to inject network resources successfully. + +3. Add the annotation `cni.spidernet.io/rdma-resource-inject: gpu-macvlan` to the Pod, so that Spiderpool automatically adds 8 GPU-affinity network cards for RDMA communication and configures 8 types of RDMA resources: + + > Note: When using the webhook automatic injection of network resources feature, do not add other network configuration annotations (such as `k8s.v1.cni.cncf.io/networks` and `ipam.spidernet.io/ippools`) to the Pod, otherwise it will affect the automatic injection of resources. + + ```shell + $ helm repo add spiderchart https://spidernet-io.github.io/charts + $ helm repo update + $ helm search repo rdma-tools + + # run daemonset on worker1 and worker2 + $ cat < values.yaml + # for china user , it could add these to use a domestic registry + #image: + # registry: ghcr.m.daocloud.io + + # just run daemonset in nodes 'worker1' and 'worker2' + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - worker1 + - worker2 + # macvlan interfaces + extraAnnotations: + cni.spidernet.io/rdma-resource-inject: gpu-macvlan + EOF + + $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml + ``` + + When the Pod is successfully Running, check if 8 RDMA network card annotations and 8 types of RDMA resources are successfully injected into the Pod. + + ```shell + # Pod multus annotations + k8s.v1.cni.cncf.io/networks: |- + [{"name":"gpu1-macvlan","namespace":"spiderpool"}, + {"name":"gpu2-macvlan","namespace":"spiderpool"}, + {"name":"gpu3-macvlan","namespace":"spiderpool"}, + {"name":"gpu4-macvlan","namespace":"spiderpool"}, + {"name":"gpu5-macvlan","namespace":"spiderpool"}, + {"name":"gpu6-macvlan","namespace":"spiderpool"}, + {"name":"gpu7-macvlan","namespace":"spiderpool"}, + {"name":"gpu8-macvlan","namespace":"spiderpool"}] + # macvlan resource + resources: + requests: + spidernet.io/shared_cx5_gpu1: 1 + spidernet.io/shared_cx5_gpu2: 1 + spidernet.io/shared_cx5_gpu3: 1 + spidernet.io/shared_cx5_gpu4: 1 + spidernet.io/shared_cx5_gpu5: 1 + spidernet.io/shared_cx5_gpu6: 1 + spidernet.io/shared_cx5_gpu7: 1 + spidernet.io/shared_cx5_gpu8: 1 + #nvidia.com/gpu: 1 + ``` diff --git a/docs/usage/install/ai/get-started-sriov-zh_CN.md b/docs/usage/install/ai/get-started-sriov-zh_CN.md index ea571a6ade..7aeef987fd 100644 --- a/docs/usage/install/ai/get-started-sriov-zh_CN.md +++ b/docs/usage/install/ai/get-started-sriov-zh_CN.md @@ -14,7 +14,7 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb 2. 独占模式,容器中只会看到分配给自身 VF 的 RDMA 设备,不会看见 PF 和 其它 VF 的 RDMA 设备。 -- 在不同的网络场景下,使用了不同的 CNI +- 在不同的网络场景下,使用了不同的 CNI 1. Infiniband 网络场景下,使用 [IB-SRIOV CNI](https://github.com/k8snetworkplumbingwg/ib-sriov-cni) 给 POD 提供 SR-IOV 网卡。 @@ -22,7 +22,7 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb ## 方案 -本文将以如下典型的 AI 集群拓扑为例,介绍如何搭建 Spiderpool +本文将以如下典型的 AI 集群拓扑为例,介绍如何搭建 Spiderpool ![AI Cluster](../../../images/ai-cluster.png) 图1 AI 集群拓扑 @@ -62,8 +62,8 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb 对于 Mellanox 网卡,可下载 [NVIDIA OFED 官方驱动](https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/) 进行主机安装,执行如下安装命令 ``` - $ mount /root/MLNX_OFED_LINUX-24.01-0.3.3.1-ubuntu22.04-x86_64.iso /mnt - $ /mnt/mlnxofedinstall --all + mount /root/MLNX_OFED_LINUX-24.01-0.3.3.1-ubuntu22.04-x86_64.iso /mnt + /mnt/mlnxofedinstall --all ``` 对于 Mellanox 网卡,也可基于容器化安装,实现对集群主机上所有 Mellanox 网卡批量安装驱动,运行如下命令,注意的是,该运行过程中需要访问因特网获取一些安装包。当所有的 ofed pod 进入 ready 状态,表示主机上已经完成了 OFED driver 安装 @@ -94,7 +94,7 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb ....... ``` - 确认网卡的工作模式,如下输出表示网卡工作在 Ethernet 模式下,可实现 RoCE 通信 + 确认网卡的工作模式,如下输出表示网卡工作在 Ethernet 模式下,可实现 RoCE 通信 ``` $ ibstat mlx5_0 | grep "Link layer" @@ -125,7 +125,7 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb ``` 3. 开启 [GPUDirect RMDA](https://docs.nvidia.com/cuda/gpudirect-rdma/) 功能 - + 在安装或使用 [gpu-operator](https://github.com/NVIDIA/gpu-operator) 过程中 a. 开启 helm 安装选项: `--set driver.rdma.enabled=true --set driver.rdma.useHostMofed=true`,gpu-operator 会安装 [nvidia-peermem](https://network.nvidia.com/products/GPUDirect-RDMA/) 内核模块,启用 GPUDirect RMDA 功能,加速 GPU 和 RDMA 网卡之间的转发性能。可在主机上输入如下命令,确认安装成功的内核模块 @@ -165,10 +165,10 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb 1. 使用 helm 安装 Spiderpool,并启用 SR-IOV 组件 ``` - $ helm repo add spiderpool https://spidernet-io.github.io/spiderpool - $ helm repo update spiderpool - $ kubectl create namespace spiderpool - $ helm install spiderpool spiderpool/spiderpool -n spiderpool --set sriov.install=true + helm repo add spiderpool https://spidernet-io.github.io/spiderpool + helm repo update spiderpool + kubectl create namespace spiderpool + helm install spiderpool spiderpool/spiderpool -n spiderpool --set sriov.install=true ``` > 如果您是中国用户,可以指定参数 `--set global.imageRegistryOverride=ghcr.m.daocloud.io` 来使用国内的镜像源。 @@ -190,14 +190,14 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb 2. 配置 SR-IOV operator, 在每个主机上创建出 VF 设备 使用如下命令,查询主机上网卡设备的 PCIE 信息。确认如下输出的设备号 [15b3:1017] 出现在 [sriov-network-operator 支持网卡型号范围](https://github.com/k8snetworkplumbingwg/sriov-network-operator/blob/master/deployment/sriov-network-operator-chart/templates/configmap.yaml) - + ``` $ lspci -nn | grep Mellanox 86:00.0 Infiniband controller [0207]: Mellanox Technologies MT27800 Family [ConnectX-5] [15b3:1017] 86:00.1 Infiniband controller [0207]: Mellanox Technologies MT27800 Family [ConnectX-5] [15b3:1017] .... ``` - + SRIOV VF 数量决定了一个网卡能同时为多少个 POD 提供网卡,不同型号的网卡的有不同的最大 VF 数量上限,Mellanox 的 ConnectX 网卡常见型号的最大 VF 上限是 127 。 如下示例,设置每个节点上的 GPU1 和 GPU2 的网卡,每个网卡配置出 12 个 VF 设备。请参考如下,为主机上每个亲和 GPU 的网卡配置 SriovNetworkNodePolicy,这样,将有 8 个 SRIOV resource 以供使用。 @@ -246,7 +246,7 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb isRdma: true EOF ``` - + 创建 SriovNetworkNodePolicy 配置后,每个节点上将会启动 sriov-device-plugin ,负责上报 VF 设备资源 ``` @@ -307,17 +307,17 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb (1) 对于 Infiniband 网络,请为所有的 GPU 亲和的 SR-IOV 网卡配置 [IB-SRIOV CNI](https://github.com/k8snetworkplumbingwg/ib-sriov-cni) 配置,并创建对应的 IP 地址池 。 如下例子,配置了 GPU1 亲和的网卡和 IP 地址池 - ``` + ```shell $ cat < 注:支持自动为应用注入 RDMA 网络资源,参考 [基于 Webhook 自动为应用注入 RDMA 网络资源](#基于-webhook-自动注入-rdma-网络资源) + ``` $ helm repo add spiderchart https://spidernet-io.github.io/charts $ helm repo update @@ -382,45 +384,45 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb # just run daemonset in nodes 'worker1' and 'worker2' affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: - - worker1 - - worker2 + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - worker1 + - worker2 # sriov interfaces extraAnnotations: k8s.v1.cni.cncf.io/networks: |- - [{"name":"gpu1-sriov","namespace":"spiderpool"}, - {"name":"gpu2-sriov","namespace":"spiderpool"}, - {"name":"gpu3-sriov","namespace":"spiderpool"}, - {"name":"gpu4-sriov","namespace":"spiderpool"}, - {"name":"gpu5-sriov","namespace":"spiderpool"}, - {"name":"gpu6-sriov","namespace":"spiderpool"}, - {"name":"gpu7-sriov","namespace":"spiderpool"}, - {"name":"gpu8-sriov","namespace":"spiderpool"}] + [{"name":"gpu1-sriov","namespace":"spiderpool"}, + {"name":"gpu2-sriov","namespace":"spiderpool"}, + {"name":"gpu3-sriov","namespace":"spiderpool"}, + {"name":"gpu4-sriov","namespace":"spiderpool"}, + {"name":"gpu5-sriov","namespace":"spiderpool"}, + {"name":"gpu6-sriov","namespace":"spiderpool"}, + {"name":"gpu7-sriov","namespace":"spiderpool"}, + {"name":"gpu8-sriov","namespace":"spiderpool"}] # sriov resource resources: limits: - spidernet.io/gpu1sriov: 1 - spidernet.io/gpu2sriov: 1 - spidernet.io/gpu3sriov: 1 - spidernet.io/gpu4sriov: 1 - spidernet.io/gpu5sriov: 1 - spidernet.io/gpu6sriov: 1 - spidernet.io/gpu7sriov: 1 - spidernet.io/gpu8sriov: 1 - #nvidia.com/gpu: 1 + spidernet.io/gpu1sriov: 1 + spidernet.io/gpu2sriov: 1 + spidernet.io/gpu3sriov: 1 + spidernet.io/gpu4sriov: 1 + spidernet.io/gpu5sriov: 1 + spidernet.io/gpu6sriov: 1 + spidernet.io/gpu7sriov: 1 + spidernet.io/gpu8sriov: 1 + #nvidia.com/gpu: 1 EOF $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml ``` - + 在容器的网络命名空间创建过程中,Spiderpool 会对 sriov 接口上的网关进行连通性测试,如果如上应用的所有 POD 都启动成功,说明了每个节点上的 VF 设备的连通性成功,可进行正常的 RDMA 通信。 2. 查看容器的网络命名空间状态 @@ -533,7 +535,7 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb $ ib_read_lat 172.91.0.115 ``` -## (可选)Infiniband 网络下对接 UFM +## (可选)Infiniband 网络下对接 UFM 对于使用了 Infiniband 网络的集群,如果网络中有 [UFM 管理平台](https://www.nvidia.com/en-us/networking/infiniband/ufm/),可使用 [ib-kubernetes](https://github.com/Mellanox/ib-kubernetes) 插件,它以 daemonset 形式运行,监控所有使用 SRIOV 网卡的容器,把 VF 设备的 Pkey 和 GUID 上报给 UFM 。 @@ -576,12 +578,12 @@ Spiderpool 使用了 [sriov-network-operator](https://github.com/k8snetworkplumb 3. 在 kubernetes 集群上安装 ib-kubernetes ``` - $ git clone https://github.com/Mellanox/ib-kubernetes.git && cd ib-kubernetes - $ $ kubectl create -f deployment/ib-kubernetes-configmap.yaml - $ kubectl create -f deployment/ib-kubernetes.yaml + git clone https://github.com/Mellanox/ib-kubernetes.git && cd ib-kubernetes + $ kubectl create -f deployment/ib-kubernetes-configmap.yaml + kubectl create -f deployment/ib-kubernetes.yaml ``` -4. 在 Infiniband 网络下,创建 Spiderpool 的 SpiderMultusConfig 时,可配置 pkey,使用该配置创建的 POD 将生效 pkey 配置,且被 ib-kubernetes 同步给 UFM +4. 在 Infiniband 网络下,创建 Spiderpool 的 SpiderMultusConfig 时,可配置 pkey,使用该配置创建的 POD 将生效 pkey 配置,且被 ib-kubernetes 同步给 UFM ``` $ cat < Note: Each node in an Infiniband Kubernetes deployment may be associated with up to 128 PKeys due to kernel limitation + +## 基于 Webhook 自动注入 RDMA 网络资源 + +Spiderpool 为了简化 AI 应用配置多网卡的复杂度,支持通过 labels(`cni.spidernet.io/rdma-resource-inject`) 对一组网卡配置分类。用户只需要为 Pod 添加相同的注解。这样 Spiderpool 会通过 webhook 自动为 Pod 注入所有具有相同 label 的对应的网卡和网络资源。 + + > 该功能仅支持 [ macvlan,ipvlan,sriov,ib-sriov, ipoib ] 这几种 cniType 的网卡配置。 + +1. 使用 webhook 自动注入 RDMA 网络资源,需要安装 Spiderpool 时指定开启 webhook 自动注入网络资源功能: + + ```shell + helm install spiderpool spiderchart/spiderpool --set spiderpoolController.podResourceInject.enabled=true + ``` + + > - 默认关闭 webhook 自动注入网络资源功能,需要用户手动开启。 + > - 您可以通过 `spiderpoolController.podResourceInject.namespacesExclude` 指定不注入的命名空间,通过 `spiderpoolController.podResourceInject.namespacesInclude` 指定注入的命名空间。 + > - 安装 Spiderpool 后,您可以通过更新 spiderpool-config configMap 中 podResourceInject 字段更新配置。 + +2. 创建 SpiderMultusConfig 时指定 labels,并配置 RDMA 相关配置: + + (1) 对于 Infiniband 网络,请为所有的 GPU 亲和的 SR-IOV 网卡配置 [IB-SRIOV CNI](https://github.com/k8snetworkplumbingwg/ib-sriov-cni) 配置,并创建对应的 IP 地址池 。 如下例子,配置了 GPU1 亲和的网卡和 IP 地址池 + + ```shell + $ cat < - `cni.spidernet.io/rdma-resource-inject: gpu-ibsriov` 固定的 key,value 为用户自定义。具有相同 Label 和 Value 的一组网卡配置要求 cniType 必须一致。 + > - `resourceName` 和 `ippools` 必须配置,否则 Pod 无法成功注入网络资源。 + + (2) 对于 Ethernet 网络,请为所有的 GPU 亲和的 SR-IOV 网卡配置 [SR-IOV CNI](https://github.com/k8snetworkplumbingwg/sriov-cni) 配置,并创建对应的 IP 地址池 。 如下例子,配置了 GPU1 亲和的网卡和 IP 地址池 + + ``` + $ cat < - `cni.spidernet.io/rdma-resource-inject: gpu-sriov` 固定的 key,value 为用户自定义。具有相同 Label 和 Value 的一组网卡配置要求 cniType 必须一致。 + > - `resourceName` 和 `ippools` 必须配置,否则 Pod 无法成功注入网络资源。 + +3. 创建应用时,添加注解: `cni.spidernet.io/rdma-resource-inject: gpu-sriov`,这样 Spiderpool 自动为 Pod 添加 8 个 GPU 亲和网卡的网卡,用于 RDMA 通信,并配置 8 种 RDMA resources 资源: + + > 注意:使用 webhook 自动注入网络资源功能时,不能为应用添加其他网络配置注解(如 `k8s.v1.cni.cncf.io/networks` 和 `ipam.spidernet.io/ippools`等),否则会影响资源自动注入功能。 + + ```shell + $ helm repo add spiderchart https://spidernet-io.github.io/charts + $ helm repo update + $ helm search repo rdma-tools + + # run daemonset on worker1 and worker2 + $ cat < values.yaml + # for china user , it could add these to use a domestic registry + #image: + # registry: ghcr.m.daocloud.io + + # just run daemonset in nodes 'worker1' and 'worker2' + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - worker1 + - worker2 + + # macvlan interfaces + extraAnnotations: + cni.spidernet.io/rdma-resource-inject: gpu-sriov + EOF + + $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml + ``` + + 当 Pod 成功 Running,检查 Pod 是否成功注入 8 个 RDMA 网卡的 annotations 和 8 种 RDMA 资源。 + + ```shell + # pod annotations + extraAnnotations: + k8s.v1.cni.cncf.io/networks: |- + [{"name":"gpu1-sriov","namespace":"spiderpool"}, + {"name":"gpu2-sriov","namespace":"spiderpool"}, + {"name":"gpu3-sriov","namespace":"spiderpool"}, + {"name":"gpu4-sriov","namespace":"spiderpool"}, + {"name":"gpu5-sriov","namespace":"spiderpool"}, + {"name":"gpu6-sriov","namespace":"spiderpool"}, + {"name":"gpu7-sriov","namespace":"spiderpool"}, + {"name":"gpu8-sriov","namespace":"spiderpool"}] + + # sriov resource + resources: + limits: + spidernet.io/gpu1sriov: 1 + spidernet.io/gpu2sriov: 1 + spidernet.io/gpu3sriov: 1 + spidernet.io/gpu4sriov: 1 + spidernet.io/gpu5sriov: 1 + spidernet.io/gpu6sriov: 1 + spidernet.io/gpu7sriov: 1 + spidernet.io/gpu8sriov: 1 + #nvidia.com/gpu: 1 + ``` diff --git a/docs/usage/install/ai/get-started-sriov.md b/docs/usage/install/ai/get-started-sriov.md index af4ad0bd8c..ac6b5e1ee8 100644 --- a/docs/usage/install/ai/get-started-sriov.md +++ b/docs/usage/install/ai/get-started-sriov.md @@ -61,8 +61,8 @@ The network planning for the cluster is as follows: For Mellanox network cards, you can download [the NVIDIA OFED official driver](https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/) and install it on the host using the following installation command: ``` - $ mount /root/MLNX_OFED_LINUX-24.01-0.3.3.1-ubuntu22.04-x86_64.iso /mnt - $ /mnt/mlnxofedinstall --all + mount /root/MLNX_OFED_LINUX-24.01-0.3.3.1-ubuntu22.04-x86_64.iso /mnt + /mnt/mlnxofedinstall --all ``` For Mellanox network cards, you can also perform a containerized installation to batch install drivers on all Mellanox network cards in the cluster hosts. Run the following command. Note that this process requires internet access to fetch some installation packages. When all the OFED pods enter the ready state, it indicates that the OFED driver installation on the hosts is complete: @@ -125,9 +125,9 @@ The network planning for the cluster is as follows: 3. Enable [GPUDirect RDMA](https://docs.nvidia.com/cuda/gpudirect-rdma/) - The installation of the [gpu-operator](https://github.com/NVIDIA/gpu-operator): + The installation of the [gpu-operator](https://github.com/NVIDIA/gpu-operator): - a. Enable the Helm installation options: `--set driver.rdma.enabled=true --set driver.rdma.useHostMofed=true`. The gpu-operator will install [the nvidia-peermem](https://network.nvidia.com/products/GPUDirect-RDMA/) kernel module, + a. Enable the Helm installation options: `--set driver.rdma.enabled=true --set driver.rdma.useHostMofed=true`. The gpu-operator will install [the nvidia-peermem](https://network.nvidia.com/products/GPUDirect-RDMA/) kernel module, enabling GPUDirect RDMA functionality to accelerate data transfer performance between the GPU and RDMA network cards. Enter the following command on the host to confirm the successful installation of the kernel module: ``` @@ -165,10 +165,10 @@ The network planning for the cluster is as follows: 1. Use Helm to install Spiderpool and enable the SR-IOV component: ``` - $ helm repo add spiderpool https://spidernet-io.github.io/spiderpool - $ helm repo update spiderpool - $ kubectl create namespace spiderpool - $ helm install spiderpool spiderpool/spiderpool -n spiderpool --set sriov.install=true + helm repo add spiderpool https://spidernet-io.github.io/spiderpool + helm repo update spiderpool + kubectl create namespace spiderpool + helm install spiderpool spiderpool/spiderpool -n spiderpool --set sriov.install=true ``` > If you are a user in China, you can specify the helm option `--set global.imageRegistryOverride=ghcr.m.daocloud.io` to use a domestic image source. @@ -189,7 +189,7 @@ The network planning for the cluster is as follows: 2. Configure the SR-IOV Operator to Create VF Devices on Each Host - Use the following command to query the PCIe information of the network card devices on the host. Confirm that the device ID [15b3:1017] appears + Use the following command to query the PCIe information of the network card devices on the host. Confirm that the device ID [15b3:1017] appears in [the supported network card models list of the sriov-network-operator](https://github.com/k8snetworkplumbingwg/sriov-network-operator/blob/master/deployment/sriov-network-operator-chart/templates/configmap.yaml). ``` @@ -265,8 +265,8 @@ The network planning for the cluster is as follows: sriov-network-config-daemon-n629x 1/1 Running 0 1m ....... ``` - - Once the SriovNetworkNodePolicy configuration is created, the SR-IOV operator will sequentially evict PODs on each node, configure the + + Once the SriovNetworkNodePolicy configuration is created, the SR-IOV operator will sequentially evict PODs on each node, configure the VF settings in the network card driver, and then reboot the host. Consequently, you will observe the nodes in the cluster sequentially entering the SchedulingDisabled state and being rebooted. ``` @@ -276,7 +276,7 @@ The network planning for the cluster is as follows: ai-10-1-16-2 Ready,SchedulingDisabled worker 2d15h v1.28.9 ....... ``` - + It may take several minutes for all nodes to complete the VF configuration process. You can monitor the sriovnetworknodestates status to see if it has entered the Succeeded state, indicating that the configuration is complete. ``` @@ -317,10 +317,10 @@ The network planning for the cluster is as follows: metadata: name: gpu1-net11 spec: - gateway: 172.16.11.254 - subnet: 172.16.11.0/16 - ips: - - 172.16.11.1-172.16.11.200 + gateway: 172.16.11.254 + subnet: 172.16.11.0/16 + ips: + - 172.16.11.1-172.16.11.200 --- apiVersion: spiderpool.spidernet.io/v2beta1 kind: SpiderMultusConfig @@ -328,27 +328,27 @@ The network planning for the cluster is as follows: name: gpu1-sriov namespace: spiderpool spec: - cniType: ib-sriov - ibsriov: - resourceName: spidernet.io/gpu1sriov - ippools: - ipv4: ["gpu1-net91"] + cniType: ib-sriov + ibsriov: + resourceName: spidernet.io/gpu1sriov + ippools: + ipv4: ["gpu1-net91"] EOF ``` b. For Ethernet Networks, configure [the SR-IOV CNI](https://github.com/k8snetworkplumbingwg/sriov-cni) for all GPU-affinitized SR-IOV network cards and create the corresponding IP address pool. The following example configures the network card and IP address pool for GPU1 - ``` + ``` $ cat < NOTICE: It support auto inject RDMA resources for application, see [Auto inject RDMA Resources](#auto-inject-rdma-resources-base-on-webhook) + ```shell $ helm repo add spiderchart https://spidernet-io.github.io/charts $ helm repo update @@ -384,45 +386,45 @@ The network planning for the cluster is as follows: # just run daemonset in nodes 'worker1' and 'worker2' affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: - - worker1 - - worker2 + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - worker1 + - worker2 # sriov interfaces extraAnnotations: k8s.v1.cni.cncf.io/networks: |- - [{"name":"gpu1-sriov","namespace":"spiderpool"}, - {"name":"gpu2-sriov","namespace":"spiderpool"}, - {"name":"gpu3-sriov","namespace":"spiderpool"}, - {"name":"gpu4-sriov","namespace":"spiderpool"}, - {"name":"gpu5-sriov","namespace":"spiderpool"}, - {"name":"gpu6-sriov","namespace":"spiderpool"}, - {"name":"gpu7-sriov","namespace":"spiderpool"}, - {"name":"gpu8-sriov","namespace":"spiderpool"}] + [{"name":"gpu1-sriov","namespace":"spiderpool"}, + {"name":"gpu2-sriov","namespace":"spiderpool"}, + {"name":"gpu3-sriov","namespace":"spiderpool"}, + {"name":"gpu4-sriov","namespace":"spiderpool"}, + {"name":"gpu5-sriov","namespace":"spiderpool"}, + {"name":"gpu6-sriov","namespace":"spiderpool"}, + {"name":"gpu7-sriov","namespace":"spiderpool"}, + {"name":"gpu8-sriov","namespace":"spiderpool"}] # sriov resource resources: limits: - spidernet.io/gpu1sriov: 1 - spidernet.io/gpu2sriov: 1 - spidernet.io/gpu3sriov: 1 - spidernet.io/gpu4sriov: 1 - spidernet.io/gpu5sriov: 1 - spidernet.io/gpu6sriov: 1 - spidernet.io/gpu7sriov: 1 - spidernet.io/gpu8sriov: 1 - #nvidia.com/gpu: 1 + spidernet.io/gpu1sriov: 1 + spidernet.io/gpu2sriov: 1 + spidernet.io/gpu3sriov: 1 + spidernet.io/gpu4sriov: 1 + spidernet.io/gpu5sriov: 1 + spidernet.io/gpu6sriov: 1 + spidernet.io/gpu7sriov: 1 + spidernet.io/gpu8sriov: 1 + #nvidia.com/gpu: 1 EOF $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml ``` - During the creation of the network namespace for the container, Spiderpool will perform connectivity tests on the gateway of the SR-IOV interface. + During the creation of the network namespace for the container, Spiderpool will perform connectivity tests on the gateway of the SR-IOV interface. If all PODs of the above application start successfully, it indicates successful connectivity of the VF devices on each node, allowing normal RDMA communication. 2. Check the network namespace status of the container. @@ -524,7 +526,7 @@ The network planning for the cluster is as follows: # Start an RDMA service $ ib_read_lat ``` - + Open another terminal, enter another Pod, and access the service: ``` @@ -578,9 +580,9 @@ For clusters using Infiniband networks, if there is a [UFM management platform]( 3. Install ib-kubernetes on the Kubernetes cluster ``` - $ git clone https://github.com/Mellanox/ib-kubernetes.git && cd ib-kubernetes - $ $ kubectl create -f deployment/ib-kubernetes-configmap.yaml - $ kubectl create -f deployment/ib-kubernetes.yaml + git clone https://github.com/Mellanox/ib-kubernetes.git && cd ib-kubernetes + $ kubectl create -f deployment/ib-kubernetes-configmap.yaml + kubectl create -f deployment/ib-kubernetes.yaml ``` 4. On Infiniband networks, when creating Spiderpool's SpiderMultusConfig, you can configure the Pkey. Pods created with this configuration will use the Pkey settings and be synchronized with UFM by ib-kubernetes @@ -593,11 +595,157 @@ For clusters using Infiniband networks, if there is a [UFM management platform]( name: ib-sriov namespace: spiderpool spec: - cniType: ib-sriov - ibsriov: - pkey: 1000 - ... + cniType: ib-sriov + ibsriov: + pkey: 1000 + ... EOF ``` - + > Note: Each node in an Infiniband Kubernetes deployment may be associated with up to 128 PKeys due to kernel limitation + +## Auto Inject RDMA Resources base on webhook + +To simplify the complexity of configuring multiple network cards for AI applications, Spiderpool supports categorizing a group of network card configurations through labels (cni.spidernet.io/rdma-resource-inject). Users only need to add the same annotation to the Pod. This way, Spiderpool will automatically inject all corresponding network cards and network resources with the same label into the Pod through a webhook. + + > This feature only supports network card configurations with cniType of [ macvlan,ipvlan,sriov,ib-sriov, ipoib ]. + +1. Install Spiderpool with webhook automatic injection of network resources feature enabled: + + ```shell + helm install spiderpool spiderchart/spiderpool --set spiderpoolController.podResourceInject.enabled=true + ``` + + > - By default, the webhook automatic injection of network resources feature is disabled and needs to be manually enabled by the user. + > - You can specify namespaces to exclude from injection using `spiderpoolController.podResourceInject.namespacesExclude`, and specify namespaces to include for injection using `spiderpoolController.podResourceInject.namespacesInclude`. + > - After installing Spiderpool, you can update the configuration by modifying the podResourceInject field in the spiderpool-config configMap. + +2. Create SpiderMultusConfig and specify labels, and configure RDMA-related settings: + + a. For Infiniband Networks, configure [the IB-SRIOV CNI](https://github.com/k8snetworkplumbingwg/ib-sriov-cni) for all GPU-affinitized SR-IOV network cards and create the corresponding IP address pool. The following example configures the network card and IP address pool for GPU1 + + ```shell + $ cat < - `cni.spidernet.io/rdma-resource-inject: gpu-ibsriov` is a fixed key, and the value is user-defined. A group of network card configurations with the same `Label` and `Value` must have the same `cniType`. + > - `resourceName` and `ippools` must be configured, otherwise the Pod will fail to inject network resources successfully. + + b. For Ethernet Networks, configure [the SR-IOV CNI](https://github.com/k8snetworkplumbingwg/sriov-cni) for all GPU-affinitized SR-IOV network cards and create the corresponding IP address pool. The following example configures the network card and IP address pool for GPU1 + + ```shell + $ cat < - `cni.spidernet.io/rdma-resource-inject: gpu-sriov` is a fixed key, and the value is user-defined. A group of network card configurations with the same `Label` and `Value` must have the same `cniType`. + > - `resourceName` and `ippools` must be configured, otherwise the Pod will fail to inject network resources successfully. + +3. Add the annotation `cni.spidernet.io/rdma-resource-inject: gpu-sriov` to the Pod, so that Spiderpool automatically adds 8 GPU-affinity network cards for RDMA communication and configures 8 types of RDMA resources: + + > Note: When using the webhook automatic injection of network resources feature, do not add other network configuration annotations (such as `k8s.v1.cni.cncf.io/networks` and `ipam.spidernet.io/ippools`) to the Pod, otherwise it will affect the automatic injection of resources. + + ```shell + $ helm repo add spiderchart https://spidernet-io.github.io/charts + $ helm repo update + $ helm search repo rdma-tools + + # run daemonset on worker1 and worker2 + $ cat < values.yaml + # for china user , it could add these to use a domestic registry + #image: + # registry: ghcr.m.daocloud.io + + # just run daemonset in nodes 'worker1' and 'worker2' + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - worker1 + - worker2 + # sriov interfaces + extraAnnotations: + cni.spidernet.io/rdma-resource-inject: gpu-sriov + EOF + + $ helm install rdma-tools spiderchart/rdma-tools -f ./values.yaml + ``` + + When the Pod is successfully Running, check if 8 RDMA network card annotations and 8 types of RDMA resources are successfully injected into the Pod. + + ```shell + # Pod multus annotations + k8s.v1.cni.cncf.io/networks: |- + [{"name":"gpu1-sriov","namespace":"spiderpool"}, + {"name":"gpu2-sriov","namespace":"spiderpool"}, + {"name":"gpu3-sriov","namespace":"spiderpool"}, + {"name":"gpu4-sriov","namespace":"spiderpool"}, + {"name":"gpu5-sriov","namespace":"spiderpool"}, + {"name":"gpu6-sriov","namespace":"spiderpool"}, + {"name":"gpu7-sriov","namespace":"spiderpool"}, + {"name":"gpu8-sriov","namespace":"spiderpool"}] + # sriov resource + resources: + requests: + spidernet.io/gpu1sriov: 1 + spidernet.io/gpu2sriov: 1 + spidernet.io/gpu3sriov: 1 + spidernet.io/gpu4sriov: 1 + spidernet.io/gpu5sriov: 1 + spidernet.io/gpu6sriov: 1 + spidernet.io/gpu7sriov: 1 + spidernet.io/gpu8sriov: 1 + #nvidia.com/gpu: 1 + ``` diff --git a/images/spiderpool-agent/Dockerfile b/images/spiderpool-agent/Dockerfile index 39f1d04e94..afe1b5436c 100644 --- a/images/spiderpool-agent/Dockerfile +++ b/images/spiderpool-agent/Dockerfile @@ -5,7 +5,7 @@ ARG BASE_IMAGE=ghcr.io/spidernet-io/spiderpool/spiderpool-base:1f8330482d25b58d2 ARG GOLANG_IMAGE=docker.io/library/golang:1.23.2@sha256:adee809c2d0009a4199a11a1b2618990b244c6515149fe609e2788ddf164bd10 #======= build bin ========== -FROM --platform=${BUILDPLATFORM} ${GOLANG_IMAGE} as builder +FROM --platform=${BUILDPLATFORM} ${GOLANG_IMAGE} AS builder ARG TARGETOS ARG TARGETARCH @@ -60,7 +60,7 @@ ARG VERSION ENV VERSION=${VERSION} RUN groupadd -f spidernet \ - && echo ". /etc/profile.d/bash_completion.sh" >> /etc/bash.bashrc + && echo ". /etc/profile.d/bash_completion.sh" >> /etc/bash.bashrc COPY --from=builder /tmp/install/${TARGETOS}/${TARGETARCH}/bin/* /usr/bin/ COPY --from=builder /tmp/install/${TARGETOS}/${TARGETARCH}/bash-completion/* /etc/bash_completion.d/ diff --git a/images/spiderpool-base/Dockerfile b/images/spiderpool-base/Dockerfile index 2bd69b4a89..3db7d32881 100644 --- a/images/spiderpool-base/Dockerfile +++ b/images/spiderpool-base/Dockerfile @@ -9,7 +9,7 @@ ARG UBUNTU_IMAGE=docker.io/library/ubuntu:20.04@sha256:bea6d19168bbfd6af8d77c2cc #========= build gops =============== -FROM --platform=${BUILDPLATFORM} ${GOLANG_IMAGE} as gops-cni-builder +FROM --platform=${BUILDPLATFORM} ${GOLANG_IMAGE} AS gops-cni-builder COPY /build-gops.sh /build-gops.sh #use alibaba debian source @@ -25,7 +25,7 @@ RUN chmod +x /build-gops.sh && /build-gops.sh #========== root image ============== -FROM ${UBUNTU_IMAGE} as rootfs +FROM ${UBUNTU_IMAGE} AS rootfs COPY /install-others.sh /install-others.sh COPY /configure-iptables-wrapper.sh /configure-iptables-wrapper.sh COPY /iptables-wrapper /usr/sbin/iptables-wrapper diff --git a/images/spiderpool-controller/Dockerfile b/images/spiderpool-controller/Dockerfile index 43c1ef771b..7a7c9cb52b 100644 --- a/images/spiderpool-controller/Dockerfile +++ b/images/spiderpool-controller/Dockerfile @@ -5,7 +5,7 @@ ARG BASE_IMAGE=ghcr.io/spidernet-io/spiderpool/spiderpool-base:1f8330482d25b58d2 ARG GOLANG_IMAGE=docker.io/library/golang:1.23.2@sha256:adee809c2d0009a4199a11a1b2618990b244c6515149fe609e2788ddf164bd10 #======= build bin ========== -FROM --platform=${BUILDPLATFORM} ${GOLANG_IMAGE} as builder +FROM --platform=${BUILDPLATFORM} ${GOLANG_IMAGE} AS builder ARG TARGETOS ARG TARGETARCH @@ -55,7 +55,7 @@ ARG VERSION ENV VERSION=${VERSION} RUN groupadd -f spidernet \ - && echo ". /etc/profile.d/bash_completion.sh" >> /etc/bash.bashrc + && echo ". /etc/profile.d/bash_completion.sh" >> /etc/bash.bashrc COPY --from=builder /tmp/install/${TARGETOS}/${TARGETARCH}/bin/* /usr/bin/ COPY --from=builder /tmp/install/${TARGETOS}/${TARGETARCH}/bash-completion/* /etc/bash_completion.d/ diff --git a/images/spiderpool-plugins/Dockerfile b/images/spiderpool-plugins/Dockerfile index 97f23c9f72..2709b3864b 100644 --- a/images/spiderpool-plugins/Dockerfile +++ b/images/spiderpool-plugins/Dockerfile @@ -4,7 +4,7 @@ ARG GOLANG_IMAGE=docker.io/library/golang:1.23.2@sha256:adee809c2d0009a4199a11a1b2618990b244c6515149fe609e2788ddf164bd10 #======= build plugins ========== -FROM --platform=${BUILDPLATFORM} ${GOLANG_IMAGE} as builder +FROM --platform=${BUILDPLATFORM} ${GOLANG_IMAGE} AS builder ARG TARGETOS ARG TARGETARCH @@ -44,7 +44,7 @@ RUN git clone https://github.com/Mellanox/ipoib-cni.git WORKDIR /src/rdma-cni RUN git checkout ${RDMA_VERSION} && make TARGET_ARCH=${TARGETARCH} \ - TARGET_OS=${TARGETOS} build + TARGET_OS=${TARGETOS} build WORKDIR /src/ovs-cni RUN mkdir -p build && GOOS=${TARGETOS} GOARCH=${TARGETARCH} CGO_ENABLED=0 go build -v -ldflags="-s -w" -tags no_openssl -mod vendor -o build/ovs ./cmd/plugin diff --git a/pkg/constant/k8s.go b/pkg/constant/k8s.go index 1e06cc774c..9b88acde5e 100644 --- a/pkg/constant/k8s.go +++ b/pkg/constant/k8s.go @@ -51,7 +51,10 @@ const ( ) const ( - AnnotationPre = "ipam.spidernet.io" + // DEPRETED, Maintain backward compatibility, don't remove it. + // and all new annotations use spidernet.io + AnnotationPre = "ipam.spidernet.io" + CNIAnnotationPre = "cni.spidernet.io" AnnoPodIPPool = AnnotationPre + "/ippool" AnnoPodIPPools = AnnotationPre + "/ippools" @@ -100,6 +103,10 @@ const ( //dra DraAnnotationPre = "dra.spidernet.io" AnnoDraCdiVersion = AnnotationPre + "/cdi-version" + + // webhook + PodMutatingWebhookName = "pods.spiderpool.spidernet.io" + AnnoPodResourceInject = CNIAnnotationPre + "/rdma-resource-inject" ) const ( diff --git a/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1/rbac.go b/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1/rbac.go index dffe4b4535..e086256096 100644 --- a/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1/rbac.go +++ b/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1/rbac.go @@ -15,7 +15,7 @@ // +kubebuilder:rbac:groups="",resources=namespaces;endpoints;pods;pods/status;configmaps,verbs=get;list;watch;update;patch;delete;deletecollection // +kubebuilder:rbac:groups=k8s.cni.cncf.io,resources=network-attachment-definitions,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=kubevirt.io,resources=virtualmachines;virtualmachineinstances,verbs=get;list -// +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=mutatingwebhookconfigurations;validatingwebhookconfigurations,verbs=get;list;watch;delete +// +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=mutatingwebhookconfigurations;validatingwebhookconfigurations,verbs=get;list;watch;delete;update // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=apps.kruise.io,resources=clonesets;statefulsets,verbs=get;list;watch // +kubebuilder:rbac:groups=crd.projectcalico.org,resources=ippools,verbs=get;list;watch diff --git a/pkg/multuscniconfig/utils.go b/pkg/multuscniconfig/utils.go index afa3ece680..5aad1b5140 100644 --- a/pkg/multuscniconfig/utils.go +++ b/pkg/multuscniconfig/utils.go @@ -30,6 +30,7 @@ import ( coordinatorcmd "github.com/spidernet-io/spiderpool/cmd/coordinator/cmd" spiderpoolcmd "github.com/spidernet-io/spiderpool/cmd/spiderpool/cmd" + "github.com/spidernet-io/spiderpool/pkg/constant" spiderpoolv2beta1 "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1" ) @@ -220,3 +221,28 @@ func ParsePodNetworkObjectName(podnetwork string) (string, string, string, error return netNsName, networkName, netIfName, nil } + +// resourceName returns the appropriate resource name based on the CNI type and configuration +// of the given SpiderMultusConfig. +func ResourceName(smc *spiderpoolv2beta1.SpiderMultusConfig) string { + switch *smc.Spec.CniType { + case constant.MacvlanCNI: + // For Macvlan CNI, return RDMA resource name if RDMA is enabled + if smc.Spec.MacvlanConfig != nil && smc.Spec.MacvlanConfig.EnableRdma { + return smc.Spec.MacvlanConfig.RdmaResourceName + } + case constant.IPVlanCNI: + if smc.Spec.IPVlanConfig != nil && smc.Spec.IPVlanConfig.EnableRdma { + return smc.Spec.IPVlanConfig.RdmaResourceName + } + case constant.SriovCNI: + if smc.Spec.SriovConfig != nil { + return smc.Spec.SriovConfig.ResourceName + } + case constant.IBSriovCNI: + if smc.Spec.IbSriovConfig != nil { + return smc.Spec.IbSriovConfig.ResourceName + } + } + return "" +} diff --git a/pkg/podmanager/pod_manager.go b/pkg/podmanager/pod_manager.go index 016e864b38..600454e125 100644 --- a/pkg/podmanager/pod_manager.go +++ b/pkg/podmanager/pod_manager.go @@ -7,6 +7,7 @@ import ( "context" "fmt" + crdclientset "github.com/spidernet-io/spiderpool/pkg/k8s/client/clientset/versioned" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -27,8 +28,9 @@ type PodManager interface { } type podManager struct { - client client.Client - apiReader client.Reader + client client.Client + apiReader client.Reader + SpiderClient crdclientset.Interface } func NewPodManager(client client.Client, apiReader client.Reader) (PodManager, error) { diff --git a/pkg/podmanager/pod_webhook.go b/pkg/podmanager/pod_webhook.go new file mode 100644 index 0000000000..1a1221e51d --- /dev/null +++ b/pkg/podmanager/pod_webhook.go @@ -0,0 +1,124 @@ +// Copyright 2022 Authors of spidernet-io +// SPDX-License-Identifier: Apache-2.0 +package podmanager + +import ( + "context" + + "github.com/spidernet-io/spiderpool/pkg/constant" + crdclientset "github.com/spidernet-io/spiderpool/pkg/k8s/client/clientset/versioned" + "github.com/spidernet-io/spiderpool/pkg/logutils" + "go.uber.org/zap" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + admissionregistrationv1 "k8s.io/client-go/kubernetes/typed/admissionregistration/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" +) + +var PodWebhookExcludeNamespaces = []string{ + metav1.NamespaceSystem, + metav1.NamespacePublic, + constant.Spiderpool, + "metallb-system", + "istio-system", + // more system namespaces to be added +} + +type PodWebhook interface { + admission.CustomDefaulter + admission.CustomValidator +} + +type podWebhook struct { + spiderClient crdclientset.Interface +} + +// InitPodWebhook initializes the pod webhook. +// It sets up the mutating webhook for pods and registers it with the manager. +// Parameters: +// - client: The Kubernetes client +// - mgr: The controller manager +// - mutatingWebhookName: The name of the mutating webhook +// +// Returns an error if initialization fails. +func InitPodWebhook( + admissionClient admissionregistrationv1.AdmissionregistrationV1Interface, + mgr ctrl.Manager, + mutatingWebhookName string, + webhookNamespaceExclude []string, + webhookNamespaceInclude []string) error { + spiderClient, err := crdclientset.NewForConfig(ctrl.GetConfigOrDie()) + if err != nil { + return err + } + + pw := &podWebhook{ + spiderClient: spiderClient, + } + + if len(webhookNamespaceExclude) != 0 { + PodWebhookExcludeNamespaces = webhookNamespaceExclude + } + + if err = AddPodMutatingWebhook(admissionClient, mutatingWebhookName, webhookNamespaceInclude); err != nil { + return err + } + + // setup mutating webhook for pods + if err = ctrl.NewWebhookManagedBy(mgr). + For(&corev1.Pod{}). + WithDefaulter(pw). + Complete(); err != nil { + return err + } + return nil +} + +// Default implements the defaulting webhook for pods. +// It injects network resources into the pod if it has the appropriate annotation. +// Parameters: +// - ctx: The context +// - obj: The runtime object (expected to be a Pod) +// +// Returns an error if defaulting fails. +func (pw *podWebhook) Default(ctx context.Context, obj runtime.Object) error { + logger := logutils.FromContext(ctx) + pod := obj.(*corev1.Pod) + mutateLogger := logger.Named("PodMutating").With( + zap.String("Pod", pod.GenerateName)) + mutateLogger.Sugar().Debugf("Request Pod: %+v", *pod) + + _, ok := pod.Annotations[constant.AnnoPodResourceInject] + if !ok { + return nil + } + + mutateLogger.Sugar().Debugf("Pod %s/%s is annotated with %s, start injecting network resources", pod.Namespace, pod.GenerateName, constant.AnnoPodResourceInject) + err := podNetworkMutatingWebhook(pw.spiderClient, pod) + if err != nil { + mutateLogger.Sugar().Errorf("Failed to inject network resources for pod %s/%s: %v", pod.Namespace, pod.GenerateName, err) + return err + } + mutateLogger.Sugar().Debugf("Pod %s/%s network resources injected, Pod: %v", pod.Namespace, pod.GenerateName, pod) + return nil +} + +// ValidateCreate implements the validation webhook for pod creation. +// Currently, it performs no validation and always returns nil. +func (pw *podWebhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { + return nil, nil +} + +// ValidateUpdate implements the validation webhook for pod updates. +// Currently, it performs no validation and always returns nil. +func (pw *podWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { + return nil, nil +} + +// ValidateDelete implements the validation webhook for pod deletion. +// Currently, it performs no validation and always returns nil. +func (pw *podWebhook) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { + return nil, nil +} diff --git a/pkg/podmanager/utils.go b/pkg/podmanager/utils.go index eaf4712295..b0b551c17e 100644 --- a/pkg/podmanager/utils.go +++ b/pkg/podmanager/utils.go @@ -4,12 +4,23 @@ package podmanager import ( + "context" + "fmt" + + crdclientset "github.com/spidernet-io/spiderpool/pkg/k8s/client/clientset/versioned" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + k8s_resource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + admissionClientv1 "k8s.io/client-go/kubernetes/typed/admissionregistration/v1" + "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" kubevirtv1 "kubevirt.io/api/core/v1" "github.com/spidernet-io/spiderpool/pkg/constant" + "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1" + "github.com/spidernet-io/spiderpool/pkg/multuscniconfig" ) func IsPodAlive(pod *corev1.Pod) bool { @@ -53,3 +64,317 @@ func IsStaticIPPod(enableStatefulSet, enableKubevirtStaticIP bool, pod *corev1.P return false } + +// podNetworkMutatingWebhook handles the mutating webhook for pod networks. +// It checks if the pod has the required label for mutation, retrieves the corresponding +// SpiderMultusConfigs, and injects the network configuration into the pod. +// +// Parameters: +// - apiReader: A client.Reader interface for accessing Kubernetes API objects +// - pod: A pointer to the corev1.Pod object to be mutated +// +// Returns: +// - An error if any step in the process fails, nil otherwise +func podNetworkMutatingWebhook(spiderClient crdclientset.Interface, pod *corev1.Pod) error { + multusLabelValue, ok := pod.Annotations[constant.AnnoPodResourceInject] + if !ok { + return nil + } + + labelSelector := metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: constant.AnnoPodResourceInject, + Operator: metav1.LabelSelectorOpIn, + Values: []string{multusLabelValue}, + }, + }, + } + + selector, err := metav1.LabelSelectorAsSelector(&labelSelector) + if err != nil { + return fmt.Errorf("failed to create label selector: %v", err) + } + + multusConfigs, err := spiderClient.SpiderpoolV2beta1().SpiderMultusConfigs("").List(context.TODO(), metav1.ListOptions{ + LabelSelector: selector.String(), + }) + if err != nil { + return err + } + + if len(multusConfigs.Items) == 0 { + return fmt.Errorf("No spidermultusconfig with label %v:%v found", constant.AnnoPodResourceInject, multusLabelValue) + } + + return InjectPodNetwork(pod, *multusConfigs) +} + +// injectPodNetwork injects network configurations into the pod based on the provided SpiderMultusConfigs. +// It checks for CNI type consistency, updates the pod's network attachment annotations, +// and prepares a map of resources to be injected. +// +// Parameters: +// - pod: A pointer to the corev1.Pod object to be updated +// - multusConfigs: A list of SpiderMultusConfig objects to be applied to the pod +// +// Returns: +// - An error if there's an inconsistency in CNI types, nil otherwise +func InjectPodNetwork(pod *corev1.Pod, multusConfigs v2beta1.SpiderMultusConfigList) error { + var cniType string + resourcesMap := make(map[string]bool, len(multusConfigs.Items)) + for _, mc := range multusConfigs.Items { + // Check the consistency of CNI type + if cniType != "" && cniType != *mc.Spec.CniType { + return fmt.Errorf("spidermultusconfig %s/%s cniType %s is not consistent with %s", mc.Namespace, mc.Name, *mc.Spec.CniType, cniType) + } else { + // If it's the first time setting, or consistent with the previous + // type, update cniType + cniType = *mc.Spec.CniType + } + + if err := doValidateRdmaResouceAndIPPools(mc); err != nil { + return err + } + + // Update the pod's network attachment + if networks, ok := pod.Annotations[constant.MultusNetworkAttachmentAnnot]; !ok { + pod.Annotations[constant.MultusNetworkAttachmentAnnot] = fmt.Sprintf("%s/%s", mc.Namespace, mc.Name) + } else { + pod.Annotations[constant.MultusNetworkAttachmentAnnot] = networks + "," + fmt.Sprintf("%s/%s", mc.Namespace, mc.Name) + } + + resourceName := multuscniconfig.ResourceName(&mc) + if resourceName == "" { + continue + } + + if _, ok := resourcesMap[resourceName]; !ok { + resourcesMap[resourceName] = false + } + } + InjectRdmaResourceToPod(resourcesMap, pod) + return nil +} + +// injectRdmaResourceToPod injects RDMA resources into the pod's containers. +// It checks each container for existing resource requests/limits and updates +// the resourceMap accordingly. If a resource is not found in any container, +// it is injected into the first container's resource requests. +// +// Parameters: +// - resourceMap: A map of resource names to boolean values indicating if they've been found +// - pod: A pointer to the corev1.Pod object to be updated +func InjectRdmaResourceToPod(resourceMap map[string]bool, pod *corev1.Pod) { + for _, c := range pod.Spec.Containers { + for resource := range resourceMap { + if resourceMap[resource] { + // the resource has found in pod, skip + continue + } + + // try to find the resource in container resources.limits + if _, ok := c.Resources.Limits[corev1.ResourceName(resource)]; ok { + resourceMap[resource] = true + } + } + } + + for resource, found := range resourceMap { + if found { + continue + } + if pod.Spec.Containers[0].Resources.Limits == nil { + pod.Spec.Containers[0].Resources.Limits = make(corev1.ResourceList) + } + pod.Spec.Containers[0].Resources.Limits[corev1.ResourceName(resource)] = k8s_resource.MustParse("1") + } +} + +// InitPodMutatingWebhook initializes a mutating webhook for pods based on a template webhook. +// It sets up the webhook configuration including name, admission review versions, failure policy, +// object selector, client config, and rules for pod creation and update operations. +// +// Parameters: +// - from: An admissionregistrationv1.MutatingWebhook object to use as a template +// +// Returns: +// - A new admissionregistrationv1.MutatingWebhook object configured for pod mutation +func InitPodMutatingWebhook(from admissionregistrationv1.MutatingWebhook, webhookNamespaceInclude []string) admissionregistrationv1.MutatingWebhook { + wb := admissionregistrationv1.MutatingWebhook{ + Name: constant.PodMutatingWebhookName, + AdmissionReviewVersions: from.AdmissionReviewVersions, + FailurePolicy: ptr.To(admissionregistrationv1.Fail), + NamespaceSelector: &metav1.LabelSelector{}, + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + CABundle: from.ClientConfig.CABundle, + }, + Rules: []admissionregistrationv1.RuleWithOperations{ + { + Operations: []admissionregistrationv1.OperationType{ + admissionregistrationv1.Create, + admissionregistrationv1.Update, + }, + Rule: admissionregistrationv1.Rule{ + APIGroups: []string{""}, + APIVersions: []string{"v1"}, + Resources: []string{"pods"}, + }, + }, + }, + SideEffects: ptr.To(admissionregistrationv1.SideEffectClassNone), + } + + if from.ClientConfig.Service != nil { + wb.ClientConfig.Service = &admissionregistrationv1.ServiceReference{ + Name: from.ClientConfig.Service.Name, + Namespace: from.ClientConfig.Service.Namespace, + Port: from.ClientConfig.Service.Port, + // format: /mutate--- + Path: ptr.To("/mutate--v1-pod"), + } + } + + if len(PodWebhookExcludeNamespaces) != 0 { + wb.NamespaceSelector.MatchExpressions = []metav1.LabelSelectorRequirement{ + { + Key: corev1.LabelMetadataName, + Operator: metav1.LabelSelectorOpNotIn, + Values: PodWebhookExcludeNamespaces, + }, + } + } + + if len(webhookNamespaceInclude) != 0 { + wb.NamespaceSelector.MatchExpressions = append(wb.NamespaceSelector.MatchExpressions, metav1.LabelSelectorRequirement{ + Key: corev1.LabelMetadataName, + Operator: metav1.LabelSelectorOpIn, + Values: webhookNamespaceInclude, + }) + } + return wb +} + +// addPodMutatingWebhook updates the MutatingWebhookConfiguration for pods. +// It retrieves the existing configuration, adds a new webhook for pods, +// and updates the configuration in the Kubernetes API server. +func AddPodMutatingWebhook(admissionClient admissionClientv1.AdmissionregistrationV1Interface, mutatingWebhookName string, webhookNamespaceInclude []string) error { + retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { + mwc, err := admissionClient.MutatingWebhookConfigurations().Get(context.TODO(), mutatingWebhookName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get MutatingWebhookConfiguration: %v", err) + } + + if len(mwc.Webhooks) == 0 { + return fmt.Errorf("no any mutating webhook found in MutatingWebhookConfiguration %s", mutatingWebhookName) + } + + for _, wb := range mwc.Webhooks { + // if the webhook already exists, do nothing + if wb.Name == constant.PodMutatingWebhookName { + return nil + } + } + podWebhook := InitPodMutatingWebhook(*mwc.Webhooks[0].DeepCopy(), webhookNamespaceInclude) + mwc.Webhooks = append(mwc.Webhooks, podWebhook) + + _, updateErr := admissionClient.MutatingWebhookConfigurations().Update(context.TODO(), mwc, metav1.UpdateOptions{}) + return updateErr + }) + if retryErr != nil { + return fmt.Errorf("update MutatingWebhookConfiguration %s failed: %v", mutatingWebhookName, retryErr) + } + + return nil +} + +// RemovePodMutatingWebhook removes the mutating webhook for pods. +// It retrieves the existing configuration, removes the webhook for pods, +// and updates the configuration in the Kubernetes API server. +func RemovePodMutatingWebhook(admissionClient admissionClientv1.AdmissionregistrationV1Interface, mutatingWebhookName string) error { + retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { + mwc, err := admissionClient.MutatingWebhookConfigurations().Get(context.TODO(), mutatingWebhookName, metav1.GetOptions{}) + if err != nil { + return err + } + + var newWebhooks []admissionregistrationv1.MutatingWebhook + for _, wb := range mwc.Webhooks { + if wb.Name != constant.PodMutatingWebhookName { + newWebhooks = append(newWebhooks, wb) + } + } + + if len(newWebhooks) == len(mwc.Webhooks) { + return nil + } + + mwc.Webhooks = newWebhooks + _, err = admissionClient.MutatingWebhookConfigurations().Update(context.TODO(), mwc, metav1.UpdateOptions{}) + if err != nil { + return err + } + return nil + }) + if retryErr != nil { + return fmt.Errorf("removes the mutating webhook for pods: %v", retryErr) + } + return nil +} + +func doValidateRdmaResouceAndIPPools(mc v2beta1.SpiderMultusConfig) error { + doValidateIPPools := func(name, namespace string, ippools *v2beta1.SpiderpoolPools) error { + if ippools == nil { + return fmt.Errorf("no any ippools config for spidermultusconfig %s/%s", namespace, name) + } + + if len(ippools.IPv4IPPool)+len(ippools.IPv6IPPool) == 0 { + return fmt.Errorf("no any ippools config for spidermultusconfig %s/%s", namespace, name) + } + return nil + } + + spec := mc.Spec + switch *spec.CniType { + case constant.MacvlanCNI: + if !spec.MacvlanConfig.EnableRdma { + return fmt.Errorf("spidermultusconfig %s/%s not enable RDMA", mc.Namespace, mc.Name) + } + + if spec.MacvlanConfig.RdmaResourceName == "" { + return fmt.Errorf("rdmaResourceName can not empty for spidermultusconfig %s/%s", mc.Namespace, mc.Name) + } + + return doValidateIPPools(mc.Name, mc.Namespace, spec.MacvlanConfig.SpiderpoolConfigPools) + case constant.IPVlanCNI: + if !spec.IPVlanConfig.EnableRdma { + return fmt.Errorf("spidermultusconfig %s/%s not enable RDMA", mc.Namespace, mc.Name) + } + + if spec.IPVlanConfig.RdmaResourceName == "" { + return fmt.Errorf("rdmaResourceName can not empty for spidermultusconfig %s/%s", mc.Namespace, mc.Name) + } + + return doValidateIPPools(mc.Name, mc.Namespace, spec.IPVlanConfig.SpiderpoolConfigPools) + case constant.SriovCNI: + if !spec.SriovConfig.EnableRdma { + return fmt.Errorf("spidermultusconfig %s/%s not enable RDMA", mc.Namespace, mc.Name) + } + + if spec.SriovConfig.ResourceName == "" { + return fmt.Errorf("resourceName can not empty for spidermultusconfig %s/%s", mc.Namespace, mc.Name) + } + + return doValidateIPPools(mc.Name, mc.Namespace, spec.SriovConfig.SpiderpoolConfigPools) + case constant.IBSriovCNI: + if spec.IbSriovConfig.ResourceName == "" { + return fmt.Errorf("resourceName can not empty for spidermultusconfig %s/%s", mc.Namespace, mc.Name) + } + + return doValidateIPPools(mc.Name, mc.Namespace, spec.IbSriovConfig.SpiderpoolConfigPools) + case constant.IPoIBCNI: + return doValidateIPPools(mc.Name, mc.Namespace, spec.IpoibConfig.SpiderpoolConfigPools) + default: + return fmt.Errorf("RDMA resource injection does not support cniType: %s", *spec.CniType) + } +} diff --git a/pkg/podmanager/utils_test.go b/pkg/podmanager/utils_test.go index b91c7f8c50..7909e4afad 100644 --- a/pkg/podmanager/utils_test.go +++ b/pkg/podmanager/utils_test.go @@ -4,10 +4,17 @@ package podmanager_test import ( + "context" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/spidernet-io/spiderpool/pkg/constant" + "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" "k8s.io/utils/ptr" "github.com/spidernet-io/spiderpool/pkg/podmanager" @@ -75,4 +82,506 @@ var _ = Describe("PodManager utils", Label("pod_manager_utils_test"), func() { Expect(isAlive).To(BeTrue()) }) }) + + Describe("Test injectPodNetwork", Label("inject_pod_network_test"), func() { + var pod *corev1.Pod + var multusConfigs v2beta1.SpiderMultusConfigList + + BeforeEach(func() { + pod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: make(map[string]string), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{}, + Limits: corev1.ResourceList{}, + }, + }, + }, + }, + } + }) + + It("should successfully inject network configuration", func() { + multusConfigs = v2beta1.SpiderMultusConfigList{ + Items: []v2beta1.SpiderMultusConfig{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config1", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource1", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config2", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource2", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + }, + } + err := podmanager.InjectPodNetwork(pod, multusConfigs) + Expect(err).NotTo(HaveOccurred()) + Expect(pod.Annotations[constant.MultusNetworkAttachmentAnnot]).To(Equal("default/config1,default/config2")) + + Expect(pod.Spec.Containers[0].Resources.Limits).To(HaveKey(corev1.ResourceName("spidernet.io/rdma-resource1"))) + Expect(pod.Spec.Containers[0].Resources.Limits).To(HaveKey(corev1.ResourceName("spidernet.io/rdma-resource2"))) + }) + + It("should return an error when no ippools configured", func() { + multusConfigs = v2beta1.SpiderMultusConfigList{ + Items: []v2beta1.SpiderMultusConfig{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config1", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource1", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config2", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource2", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + }, + } + err := podmanager.InjectPodNetwork(pod, multusConfigs) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("no any ippools config")) + }) + + It("should return an error when not disable rdma", func() { + multusConfigs = v2beta1.SpiderMultusConfigList{ + Items: []v2beta1.SpiderMultusConfig{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config1", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: false, + RdmaResourceName: "spidernet.io/rdma-resource1", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config2", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource2", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + }, + } + err := podmanager.InjectPodNetwork(pod, multusConfigs) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("not enable RDMA")) + }) + + It("should preserve existing resources in the Pod", func() { + // Set some pre-existing resources + pod.Spec.Containers[0].Resources.Limits = corev1.ResourceList{ + corev1.ResourceName("spidernet.io/rdma-resource1"): resource.MustParse("1"), + corev1.ResourceName("existing-resource"): resource.MustParse("10"), + } + + multusConfigs = v2beta1.SpiderMultusConfigList{ + Items: []v2beta1.SpiderMultusConfig{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config1", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource1", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config2", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource2", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + }, + } + + err := podmanager.InjectPodNetwork(pod, multusConfigs) + Expect(err).NotTo(HaveOccurred()) + Expect(pod.Annotations[constant.MultusNetworkAttachmentAnnot]).To(Equal("default/config1,default/config2")) + + // Verify that existing resources are preserved + Expect(pod.Spec.Containers[0].Resources.Limits).To(HaveKey(corev1.ResourceName("spidernet.io/rdma-resource1"))) + Expect(pod.Spec.Containers[0].Resources.Limits).To(HaveKey(corev1.ResourceName("spidernet.io/rdma-resource2"))) + Expect(pod.Spec.Containers[0].Resources.Limits).To(HaveKey(corev1.ResourceName("existing-resource"))) + Expect(pod.Spec.Containers[0].Resources.Limits[corev1.ResourceName("existing-resource")]).To(Equal(resource.MustParse("10"))) + }) + + It("should return an error when CNI types are inconsistent", func() { + multusConfigs = v2beta1.SpiderMultusConfigList{ + Items: []v2beta1.SpiderMultusConfig{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config1", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("macvlan"), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource1", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "config2", + Namespace: "default", + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To("ipvlan"), + IPVlanConfig: &v2beta1.SpiderIPvlanCniConfig{ + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma-resource2", + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test1"}, + }, + }, + }, + }, + }, + } + + err := podmanager.InjectPodNetwork(pod, multusConfigs) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cniType ipvlan is not consistent with macvlan")) + }) + }) + + Describe("Utils", func() { + Context("initPodMutatingWebhook", func() { + It("should properly initialize pod mutating webhook with full configuration", func() { + // Prepare test data + testCABundle := []byte("test-ca-bundle") + fromWebhook := admissionregistrationv1.MutatingWebhook{ + AdmissionReviewVersions: []string{"v1", "v1beta1"}, + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + CABundle: testCABundle, + Service: &admissionregistrationv1.ServiceReference{ + Name: "test-service", + Namespace: "test-namespace", + Port: ptr.To(int32(443)), + }, + }, + } + + // Call the function under test + podWebhookNamespaceInclude := []string{ + "test", + } + result := podmanager.InitPodMutatingWebhook(fromWebhook, podWebhookNamespaceInclude) + + // Verify results + Expect(result.Name).To(Equal(constant.PodMutatingWebhookName)) + Expect(result.AdmissionReviewVersions).To(Equal(fromWebhook.AdmissionReviewVersions)) + Expect(*result.FailurePolicy).To(Equal(admissionregistrationv1.Fail)) + + // Verify NamespaceSelector + Expect(result.NamespaceSelector).NotTo(BeNil()) + Expect(result.NamespaceSelector.MatchExpressions).To(HaveLen(2)) + Expect(result.NamespaceSelector.MatchExpressions[0].Key).To(Equal(corev1.LabelMetadataName)) + Expect(result.NamespaceSelector.MatchExpressions[0].Operator).To(Equal(metav1.LabelSelectorOpNotIn)) + Expect(result.NamespaceSelector.MatchExpressions[1].Key).To(Equal(corev1.LabelMetadataName)) + Expect(result.NamespaceSelector.MatchExpressions[1].Operator).To(Equal(metav1.LabelSelectorOpIn)) + + // Verify ClientConfig + Expect(result.ClientConfig.CABundle).To(Equal(testCABundle)) + Expect(result.ClientConfig.Service).NotTo(BeNil()) + Expect(result.ClientConfig.Service.Name).To(Equal("test-service")) + Expect(result.ClientConfig.Service.Namespace).To(Equal("test-namespace")) + Expect(*result.ClientConfig.Service.Port).To(Equal(int32(443))) + Expect(*result.ClientConfig.Service.Path).To(Equal("/mutate--v1-pod")) + + // Verify Rules + Expect(result.Rules).To(HaveLen(1)) + Expect(result.Rules[0].Operations).To(ConsistOf( + admissionregistrationv1.Create, + admissionregistrationv1.Update, + )) + Expect(result.Rules[0].Rule.APIGroups).To(Equal([]string{""})) + Expect(result.Rules[0].Rule.APIVersions).To(Equal([]string{"v1"})) + Expect(result.Rules[0].Rule.Resources).To(Equal([]string{"pods"})) + + // Verify SideEffects + Expect(*result.SideEffects).To(Equal(admissionregistrationv1.SideEffectClassNone)) + }) + + It("should properly initialize webhook without Service configuration", func() { + // Prepare test data + fromWebhook := admissionregistrationv1.MutatingWebhook{ + AdmissionReviewVersions: []string{"v1"}, + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + CABundle: []byte("test-ca-bundle"), + }, + } + + // Call the function under test + result := podmanager.InitPodMutatingWebhook(fromWebhook, []string{}) + + // Verify results + Expect(result.ClientConfig.Service).To(BeNil()) + Expect(result.Name).To(Equal(constant.PodMutatingWebhookName)) + }) + }) + }) + + Describe("AddPodMutatingWebhook", func() { + var ( + fakeClient *fake.Clientset + webhookName string + existingConfig *admissionregistrationv1.MutatingWebhookConfiguration + podWebhookNamespaceInclude []string + ) + + BeforeEach(func() { + // Initialize test variables + fakeClient = fake.NewSimpleClientset() + webhookName = "test-webhook-config" + podWebhookNamespaceInclude = []string{ + "test", + } + + // Create a basic webhook configuration + existingConfig = &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookName, + }, + Webhooks: []admissionregistrationv1.MutatingWebhook{ + { + Name: "existing-webhook", + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + CABundle: []byte("test-ca-bundle"), + Service: &admissionregistrationv1.ServiceReference{ + Name: "webhook-service", + Namespace: "default", + Port: ptr.To(int32(443)), + }, + }, + AdmissionReviewVersions: []string{"v1"}, + }, + }, + } + }) + + Context("when adding pod mutating webhook", func() { + It("should successfully add webhook when it doesn't exist", func() { + // Create initial webhook configuration + _, err := fakeClient.AdmissionregistrationV1().MutatingWebhookConfigurations().Create( + context.TODO(), existingConfig, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Call the function under test + err = podmanager.AddPodMutatingWebhook(fakeClient.AdmissionregistrationV1(), webhookName, podWebhookNamespaceInclude) + Expect(err).NotTo(HaveOccurred()) + + // Verify the webhook was added + updatedConfig, err := fakeClient.AdmissionregistrationV1().MutatingWebhookConfigurations().Get( + context.TODO(), webhookName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(updatedConfig.Webhooks).To(HaveLen(2)) + Expect(updatedConfig.Webhooks[1].Name).To(Equal(constant.PodMutatingWebhookName)) + }) + + It("should not add webhook when it already exists", func() { + // Add pod webhook to initial configuration + podWebhook := podmanager.InitPodMutatingWebhook(existingConfig.Webhooks[0], podWebhookNamespaceInclude) + existingConfig.Webhooks = append(existingConfig.Webhooks, podWebhook) + + // Create webhook configuration with pod webhook + _, err := fakeClient.AdmissionregistrationV1().MutatingWebhookConfigurations().Create( + context.TODO(), existingConfig, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Call the function under test + err = podmanager.AddPodMutatingWebhook(fakeClient.AdmissionregistrationV1(), webhookName, podWebhookNamespaceInclude) + Expect(err).NotTo(HaveOccurred()) + + // Verify no additional webhook was added + updatedConfig, err := fakeClient.AdmissionregistrationV1().MutatingWebhookConfigurations().Get( + context.TODO(), webhookName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(updatedConfig.Webhooks).To(HaveLen(2)) + }) + + It("should return error when webhook configuration doesn't exist", func() { + // Call the function under test without creating webhook configuration + err := podmanager.AddPodMutatingWebhook(fakeClient.AdmissionregistrationV1(), webhookName, podWebhookNamespaceInclude) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to get MutatingWebhookConfiguration")) + }) + + It("should return error when webhook configuration is empty", func() { + // Create empty webhook configuration + emptyConfig := &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookName, + }, + } + _, err := fakeClient.AdmissionregistrationV1().MutatingWebhookConfigurations().Create( + context.TODO(), emptyConfig, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Call the function under test + err = podmanager.AddPodMutatingWebhook(fakeClient.AdmissionregistrationV1(), webhookName, podWebhookNamespaceInclude) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("no any mutating webhook found")) + }) + }) + }) + + var _ = Describe("RemovePodMutatingWebhook", func() { + var ( + // Mock admission client + fakeClient *fake.Clientset + // Test webhook name + webhookName string + ) + + BeforeEach(func() { + // Initialize test variables + // Initialize test variables + fakeClient = fake.NewSimpleClientset() + webhookName = "test-webhook-config" + }) + + Context("when removing pod mutating webhook", func() { + It("should successfully remove the webhook if it exists", func() { + // Prepare existing webhook configuration + existingWebhooks := []admissionregistrationv1.MutatingWebhook{ + {Name: constant.PodMutatingWebhookName}, + {Name: "other-webhook"}, + } + + mwc := &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookName, + }, + Webhooks: existingWebhooks, + } + + // Setup mock behavior + _, err := fakeClient.AdmissionregistrationV1().MutatingWebhookConfigurations().Create( + context.TODO(), mwc, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Execute test + err = podmanager.RemovePodMutatingWebhook(fakeClient.AdmissionregistrationV1(), webhookName) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should return nil if webhook doesn't exist", func() { + // Prepare existing webhook configuration + existingWebhooks := []admissionregistrationv1.MutatingWebhook{ + {Name: "other-webhook"}, + } + + mwc := &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: webhookName, + }, + Webhooks: existingWebhooks, + } + + // Setup mock behavior + _, err := fakeClient.AdmissionregistrationV1().MutatingWebhookConfigurations().Create( + context.TODO(), mwc, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Execute test + err = podmanager.RemovePodMutatingWebhook(fakeClient.AdmissionregistrationV1(), webhookName) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should return error when getting webhook configuration fails", func() { + err := podmanager.RemovePodMutatingWebhook(fakeClient.AdmissionregistrationV1(), webhookName) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("not found")) + }) + }) + }) }) diff --git a/pkg/types/k8s.go b/pkg/types/k8s.go index 8cb7c2d9fb..f034de56ba 100644 --- a/pkg/types/k8s.go +++ b/pkg/types/k8s.go @@ -109,13 +109,20 @@ type AutoPoolProperty struct { } type SpiderpoolConfigmapConfig struct { - IpamUnixSocketPath string `yaml:"ipamUnixSocketPath"` - EnableIPv4 bool `yaml:"enableIPv4"` - EnableIPv6 bool `yaml:"enableIPv6"` - TuneSysctlConfig bool `yaml:"tuneSysctlConfig"` - EnableStatefulSet bool `yaml:"enableStatefulSet"` - EnableKubevirtStaticIP bool `yaml:"enableKubevirtStaticIP"` - EnableSpiderSubnet bool `yaml:"enableSpiderSubnet"` - EnableAutoPoolForApplication bool `yaml:"enableAutoPoolForApplication"` - ClusterSubnetAutoPoolDefaultRedundantIPNumber int `yaml:"clusterSubnetAutoPoolDefaultRedundantIPNumber"` + IpamUnixSocketPath string `yaml:"ipamUnixSocketPath"` + EnableIPv4 bool `yaml:"enableIPv4"` + EnableIPv6 bool `yaml:"enableIPv6"` + TuneSysctlConfig bool `yaml:"tuneSysctlConfig"` + EnableStatefulSet bool `yaml:"enableStatefulSet"` + EnableKubevirtStaticIP bool `yaml:"enableKubevirtStaticIP"` + EnableSpiderSubnet bool `yaml:"enableSpiderSubnet"` + EnableAutoPoolForApplication bool `yaml:"enableAutoPoolForApplication"` + ClusterSubnetAutoPoolDefaultRedundantIPNumber int `yaml:"clusterSubnetAutoPoolDefaultRedundantIPNumber"` + PodResourceInjectConfig PodResourceInjectConfig `yaml:"podResourceInject"` +} + +type PodResourceInjectConfig struct { + Enabled bool `yaml:"enabled"` + NamespacesExclude []string `yaml:"namespacesExclude"` + NamespacesInclude []string `yaml:"namespacesInclude"` } diff --git a/test/Makefile b/test/Makefile index 0a9f2b1943..9b02267078 100644 --- a/test/Makefile +++ b/test/Makefile @@ -333,6 +333,7 @@ setup_spiderpool: HELM_OPTION+=" --set clusterDefaultPool.ipv4IPRanges={$${ipv4_ip_range}} --set clusterDefaultPool.ipv6IPRanges={$${ipv6_ip_range}}" ; \ HELM_OPTION+=" --set ipam.enableIPv4=true --set ipam.enableIPv6=true" ; \ fi ; \ + HELM_OPTION+=" --set spiderpoolController.podResourceInject.enabled=true " ; \ HELM_OPTION+=" --set spiderpoolAgent.prometheus.enabled=true --set spiderpoolController.prometheus.enabled=true " ; \ HELM_OPTION+=" --set spiderpoolAgent.prometheus.enabledDebugMetric=true --set spiderpoolController.prometheus.enabledDebugMetric=true " ; \ if [ -n "$(PYROSCOPE_LOCAL_PORT)" ] ; then \ @@ -357,6 +358,7 @@ setup_spiderpool: --set spiderpoolController.image.registry="" \ --set spiderpoolController.image.repository=$(SPIDERPOOL_CONTROLLER_IMAGE_NAME) \ --set spiderpoolController.image.tag=$(E2E_SPIDERPOOL_TAG) \ + --set spiderpoolController.enablePodNetworkResourceInject=true \ --set spiderpoolInit.image.registry="" \ --set spiderpoolInit.image.repository=$(SPIDERPOOL_CONTROLLER_IMAGE_NAME) \ --set spiderpoolInit.image.tag=$(E2E_SPIDERPOOL_TAG) \ @@ -429,6 +431,7 @@ helm_upgrade_spiderpool: --set spiderpoolController.image.registry="" \ --set spiderpoolController.image.repository=$(SPIDERPOOL_CONTROLLER_IMAGE_NAME) \ --set spiderpoolController.image.tag=$(E2E_SPIDERPOOL_TAG) \ + --set spiderpoolController.podResourceInject.enabled=true \ --set spiderpoolInit.image.registry="" \ --set spiderpoolInit.image.repository=$(SPIDERPOOL_CONTROLLER_IMAGE_NAME) \ --set spiderpoolInit.image.tag=$(E2E_SPIDERPOOL_TAG) \ diff --git a/test/doc/podwebhook.md b/test/doc/podwebhook.md new file mode 100644 index 0000000000..e3c9ed63e9 --- /dev/null +++ b/test/doc/podwebhook.md @@ -0,0 +1,5 @@ +# E2E Cases for Pod Webhook + +| Case ID | Title | Priority | Smoke | Status | Other | +| ------- | --------------------------------------------------------------------------------- | -------- | ----- | ------ | ----- | +| H00001 | test pod webhook auto inject resource to pod | p1 | true | done | | diff --git a/test/e2e/podwebhook/podwebhook_suite_test.go b/test/e2e/podwebhook/podwebhook_suite_test.go new file mode 100644 index 0000000000..bcae769fc7 --- /dev/null +++ b/test/e2e/podwebhook/podwebhook_suite_test.go @@ -0,0 +1,29 @@ +// Copyright 2024 Authors of spidernet-io +// SPDX-License-Identifier: Apache-2.0 + +package podwebhook_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + e2e "github.com/spidernet-io/e2eframework/framework" + spiderpool "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1" + "k8s.io/apimachinery/pkg/runtime" +) + +func TestPodwebhook(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Podwebhook Suite") +} + +var frame *e2e.Framework + +var _ = BeforeSuite(func() { + defer GinkgoRecover() + var e error + frame, e = e2e.NewFramework(GinkgoT(), []func(*runtime.Scheme) error{spiderpool.AddToScheme}) + Expect(e).NotTo(HaveOccurred()) +}) diff --git a/test/e2e/podwebhook/podwebhook_test.go b/test/e2e/podwebhook/podwebhook_test.go new file mode 100644 index 0000000000..3a999dfdc4 --- /dev/null +++ b/test/e2e/podwebhook/podwebhook_test.go @@ -0,0 +1,112 @@ +// Copyright 2024 Authors of spidernet-io +// SPDX-License-Identifier: Apache-2.0 + +package podwebhook_test + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/spidernet-io/spiderpool/pkg/constant" + "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1" + "github.com/spidernet-io/spiderpool/test/e2e/common" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +var _ = Describe("Podwebhook", func() { + var namespace string + + BeforeEach(func() { + // create namespace + namespace = "ns-" + common.GenerateString(10, true) + err := frame.CreateNamespaceUntilDefaultServiceAccountReady(namespace, common.ServiceAccountReadyTimeout) + Expect(err).NotTo(HaveOccurred()) + + DeferCleanup(func() { + if CurrentSpecReport().Failed() { + GinkgoWriter.Println("If the use case fails, the cleanup step will be skipped") + return + } + + err := frame.DeleteNamespace(namespace) + Expect(err).NotTo(HaveOccurred(), "Failed to delete namespace %v") + }) + }) + + Context("Test inject pod network resources", func() { + It("Test inject pod network resources", Label("H00001"), func() { + // Define multus cni NetworkAttachmentDefinition and create + createNad := func(name string) *v2beta1.SpiderMultusConfig { + return &v2beta1.SpiderMultusConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + constant.AnnoPodResourceInject: "macvlan-rdma", + }, + }, + Spec: v2beta1.MultusCNIConfigSpec{ + CniType: ptr.To(constant.MacvlanCNI), + MacvlanConfig: &v2beta1.SpiderMacvlanCniConfig{ + Master: []string{common.NIC1}, + EnableRdma: true, + RdmaResourceName: "spidernet.io/rdma_resource" + "_" + name, + SpiderpoolConfigPools: &v2beta1.SpiderpoolPools{ + IPv4IPPool: []string{"test-ipv4-pool"}, + }, + }, + }, + } + } + + By("Create spiderMultusConfig: nad1 for testing") + Expect(frame.CreateSpiderMultusInstance(createNad("nad1"))).NotTo(HaveOccurred()) + By("Create spiderMultusConfig: nad2 for testing") + Expect(frame.CreateSpiderMultusInstance(createNad("nad2"))).NotTo(HaveOccurred()) + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: namespace, + Annotations: map[string]string{ + constant.AnnoPodResourceInject: "macvlan-rdma", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "samplepod", + Image: "alpine", + ImagePullPolicy: "IfNotPresent", + Command: []string{"/bin/ash", "-c", "while true; do echo 'HTTP/1.1 200 OK Hello, World!' | nc -l -p 80; done"}, + Ports: []corev1.ContainerPort{ + { + Name: "samplepod", + ContainerPort: 80, + }, + }, + }, + }, + }, + } + + By("Create Pod for testing network resources inject") + err := frame.CreatePod(pod) + Expect(err).NotTo(HaveOccurred()) + + By("Check pod network annotations and resources") + p, err := frame.GetPod(pod.Name, pod.Namespace) + Expect(err).NotTo(HaveOccurred(), "failed to get pod: %v", err) + + GinkgoWriter.Printf("Pod annotations: %v\n", p.Annotations) + GinkgoWriter.Printf("Pod resources: %v\n", p.Spec.Containers[0].Resources.Limits) + Expect(p.Annotations[constant.MultusNetworkAttachmentAnnot]).To(Equal(fmt.Sprintf("%s/%s,%s/%s", namespace, "nad1", namespace, "nad2"))) + Expect(p.Spec.Containers[0].Resources.Requests).To(HaveKey(corev1.ResourceName("spidernet.io/rdma_resource_nad1"))) + Expect(p.Spec.Containers[0].Resources.Requests).To(HaveKey(corev1.ResourceName("spidernet.io/rdma_resource_nad2"))) + }) + }) +}) diff --git a/test/e2e/reliability/reliability_test.go b/test/e2e/reliability/reliability_test.go index 58e5608630..0ff199a090 100644 --- a/test/e2e/reliability/reliability_test.go +++ b/test/e2e/reliability/reliability_test.go @@ -62,7 +62,7 @@ var _ = Describe("test reliability", Label("reliability"), Serial, func() { // Define a set of daemonSets with Pods on each node to verify that the components on each node can provide services for the Pods. dsName := "ds" + tools.RandomName() - dsYaml := common.GenerateExampleDaemonSetYaml(dsName, namespace) + dsYaml := common.GenerateExampleDaemonSetYaml(dsName, "kube-public") podIppoolAnnoStr := common.GeneratePodIPPoolAnnotations(frame, common.NIC1, globalDefaultV4IppoolList, globalDefaultV6IppoolList) dsYaml.Spec.Template.Annotations = map[string]string{constant.AnnoPodIPPool: podIppoolAnnoStr} @@ -126,7 +126,7 @@ var _ = Describe("test reliability", Label("reliability"), Serial, func() { return err } - if err := frame.DeleteDaemonSet(dsName, namespace); err != nil { + if err := frame.DeleteDaemonSet(dsName, "kube-public"); err != nil { return err }