Skip to content

Commit

Permalink
Merge pull request #1010 from elezar/add-imex-init-container
Browse files Browse the repository at this point in the history
Add init container to handle imex nodes config mount
  • Loading branch information
elezar authored Oct 28, 2024
2 parents 6decc15 + 7fc6642 commit c09799f
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 29 deletions.
1 change: 0 additions & 1 deletion api/config/v1/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error) {
if c.IsSet("imex-required") {
config.Imex.Required = c.Bool("imex-required")
}
updateFromCLIFlag(&config.Imex.NodesConfigFile, c, "imex-nodes-config-file")

// If nvidiaDevRoot (the path to the device nodes on the host) is not set,
// we default to using the driver root on the host.
Expand Down
4 changes: 0 additions & 4 deletions api/config/v1/imex.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ type Imex struct {
// If it is not required its injection is skipped if the device nodes do not exist or if its
// existence cannot be queried.
Required bool `json:"required,omitempty" yaml:"required,omitempty"`
// NodesConfigFile defines the location to the IMEX nodes config file.
// Such a nodes config file contains the IP addresses of nodes that are part of the IMEX domain.
// Note that this is the absolute path to the file in the device plugin container.
NodesConfigFile *string `json:"nodesConfigFile,omitempty" yaml:"nodesConfigFile,omitempty"`
}

// AssertChannelIDsIsValid checks whether the specified list of channel IDs is valid.
Expand Down
6 changes: 0 additions & 6 deletions cmd/gpu-feature-discovery/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,6 @@ func main() {
Value: "/etc/kubernetes/node-feature-discovery/features.d/gfd",
EnvVars: []string{"GFD_OUTPUT_FILE"},
},
&cli.StringFlag{
Name: "imex-nodes-config-file",
Usage: "Path to the IMEX nodes config file. This file contains a list of IP addresses of the nodes in the IMEX domain.",
Value: "/etc/nvidia-imex/nodes_config.cfg",
EnvVars: []string{"GFD_IMEX_NODES_CONFIG_FILE"},
},
&cli.StringFlag{
Name: "machine-type-file",
Value: "/sys/class/dmi/id/product_name",
Expand Down
40 changes: 33 additions & 7 deletions deployments/helm/nvidia-device-plugin/templates/daemonset-gfd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,35 @@ spec:
{{- end }}
{{- if $options.hasConfigMap }}
shareProcessNamespace: true
{{- end }}
initContainers:
- image: {{ include "nvidia-device-plugin.fullimage" . }}
name: gpu-feature-discovery-imex-init
command: ["/bin/bash", "-c"]
args:
- |
IMEX_NODES_CONFIG_FILE=/etc/nvidia-imex/nodes_config.cfg
if [[ -f /config/${IMEX_NODES_CONFIG_FILE} ]]; then
echo "Removing cached IMEX nodes config"
rm -f /config/${IMEX_NODES_CONFIG_FILE}
fi
if [[ ! -f /driver-root/${IMEX_NODES_CONFIG_FILE} ]]; then
echo "No IMEX nodes config path detected; Skipping"
exit 0
fi
echo "Copying IMEX nodes config"
mkdir -p $(dirname /config/${IMEX_NODES_CONFIG_FILE})
cp /driver-root/${IMEX_NODES_CONFIG_FILE} /config/${IMEX_NODES_CONFIG_FILE}
volumeMounts:
- name: config
mountPath: /config
- name: driver-root
mountPath: /driver-root/etc
subPath: etc
readOnly: true
{{- if $options.hasConfigMap }}
- image: {{ include "nvidia-device-plugin.fullimage" . }}
name: gpu-feature-discovery-init
command: ["config-manager"]
Expand Down Expand Up @@ -182,14 +210,12 @@ spec:
mountPath: "/etc/kubernetes/node-feature-discovery/features.d"
- name: host-sys
mountPath: "/sys"
- name: nvidia-imex-dir
mountPath: "/etc/nvidia-imex"
{{- if $options.hasConfigMap }}
- name: available-configs
mountPath: /available-configs
{{- end }}
- name: config
mountPath: /config
{{- end }}
{{- with .Values.resources }}
resources:
{{- toYaml . | nindent 10 }}
Expand All @@ -201,17 +227,17 @@ spec:
- name: host-sys
hostPath:
path: "/sys"
- name: nvidia-imex-dir
type: DirectoryOrCreate
- name: driver-root
hostPath:
path: {{ clean ( join "/" ( list "/" .Values.nvidiaDriverRoot "/etc/nvidia-imex" ) ) | quote }}
path: {{ clean ( join "/" ( list "/" .Values.nvidiaDriverRoot ) ) | quote }}
type: Directory
{{- if $options.hasConfigMap }}
- name: available-configs
configMap:
name: {{ $configMapName }}
{{- end }}
- name: config
emptyDir: {}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
32 changes: 21 additions & 11 deletions internal/lm/fabric.go → internal/lm/imex.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,16 @@ import (
"github.com/NVIDIA/k8s-device-plugin/internal/resource"
)

func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) {
if config.Imex.NodesConfigFile == nil || *config.Imex.NodesConfigFile == "" {
// No imex config file, return empty labels
return empty{}, nil
}

nodesConfigFiles := []string{*config.Imex.NodesConfigFile}
if root := config.Flags.Plugin.ContainerDriverRoot; root != nil && *root != "" {
nodesConfigFiles = append(nodesConfigFiles, filepath.Join(*root, *config.Imex.NodesConfigFile))
}
const (
// ImexNodesConfigFilePath is the path to the IMEX nodes config file.
// This file contains a list of IP addresses of the nodes in the IMEX domain.
ImexNodesConfigFilePath = "/etc/nvidia-imex/nodes_config.cfg"
)

func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) {
var errs error
for _, configFilePath := range nodesConfigFiles {
for _, root := range imexNodesConfigFilePathSearchRoots(config) {
configFilePath := filepath.Join(root, ImexNodesConfigFilePath)
imexLabeler, err := imexLabelerForConfigFile(configFilePath, devices)
if err != nil {
errs = errors.Join(errs, err)
Expand All @@ -64,6 +61,19 @@ func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, er
return empty{}, nil
}

// imexNodesConfigFilePathSearchRoots returns a list of roots to search for the IMEX nodes config file.
func imexNodesConfigFilePathSearchRoots(config *spec.Config) []string {
// By default, search / and /config for config files.
roots := []string{"/", "/config"}

if config == nil || config.Flags.Plugin == nil || config.Flags.Plugin.ContainerDriverRoot == nil {
return roots
}

// If a driver root is specified, it is also searched.
return append(roots, *config.Flags.Plugin.ContainerDriverRoot)
}

func imexLabelerForConfigFile(configFilePath string, devices []resource.Device) (Labeler, error) {
imexConfigFile, err := os.Open(configFilePath)
if os.IsNotExist(err) {
Expand Down
File renamed without changes.

0 comments on commit c09799f

Please sign in to comment.