diff --git a/cmd/k8s-kata-manager/main.go b/cmd/k8s-kata-manager/main.go index c2464cf1..3031c222 100644 --- a/cmd/k8s-kata-manager/main.go +++ b/cmd/k8s-kata-manager/main.go @@ -25,7 +25,6 @@ import ( "path/filepath" "strconv" "syscall" - "time" "github.com/pelletier/go-toml" "github.com/urfave/cli/v2" @@ -42,12 +41,14 @@ import ( "github.com/NVIDIA/k8s-kata-manager/internal/oras" "github.com/NVIDIA/k8s-kata-manager/internal/runtime" containerd "github.com/NVIDIA/k8s-kata-manager/internal/runtime/containerd" + "github.com/NVIDIA/k8s-kata-manager/internal/runtime/crio" "github.com/NVIDIA/k8s-kata-manager/internal/version" ) const ( defaultContainerdConfigFilePath = "/etc/containerd/config.toml" defaultContainerdSocketFilePath = "/run/containerd/containerd.sock" + defaultCrioConfigFilePath = "/etc/crio/crio.conf" cdiRoot = "/var/run/cdi" ) @@ -74,6 +75,8 @@ type worker struct { ContainerdSocket string LoadKernelModules bool CDIEnabled bool + Runtime string + CrioConfig string } // newWorker returns a new worker struct @@ -146,6 +149,20 @@ func main() { Destination: &worker.CDIEnabled, EnvVars: []string{"CDI_ENABLED"}, }, + &cli.StringFlag{ + Name: "runtime", + Usage: "Runtime name", + Value: "", + Destination: &worker.Runtime, + EnvVars: []string{"RUNTIME"}, + }, + &cli.StringFlag{ + Name: "crio-config", + Usage: "Path to the CRI-O config file", + Value: defaultCrioConfigFilePath, + Destination: &worker.CrioConfig, + EnvVars: []string{"CRIO_CONFIG"}, + }, } c.Before = func(c *cli.Context) error { @@ -245,9 +262,7 @@ func (w *worker) Run(c *cli.Context) error { return fmt.Errorf("failed to generate CDI spec: %w", err) } } - - options := runtime.Options{Path: w.ContainerdConfig, RuntimeType: "io.containerd.kata.v2", PodAnnotations: []string{"io.katacontainers.*"}} - ctrdConfig, err := containerd.Setup(&options) + runtimeConfig, err := w.getRuntimeConfig() if err != nil { klog.Errorf("error creating containerd.config client : %s", err) return err @@ -292,7 +307,7 @@ func (w *worker) Run(c *cli.Context) error { return fmt.Errorf("error transforming kata configuration file: %w", err) } - err = ctrdConfig.AddRuntime( + err = runtimeConfig.AddRuntime( rc.Name, kataConfigPath, false, @@ -302,23 +317,21 @@ func (w *worker) Run(c *cli.Context) error { } } - - n, err := ctrdConfig.Save(w.ContainerdConfig) + n, err := runtimeConfig.Save() if err != nil { return fmt.Errorf("unable to flush config: %w", err) } - if n == 0 { - klog.Infof("Removed empty config from %v", w.ContainerdConfig) + klog.Infof("Removed empty config") } else { - klog.Infof("Wrote updated config to %v", w.ContainerdConfig) + klog.Infof("Wrote updated config") } - klog.Infof("Restarting containerd") - if err := restartContainerd(w.ContainerdSocket); err != nil { - return fmt.Errorf("unable to restart containerd: %w", err) + klog.Infof("Restarting runtime") + if err := runtimeConfig.Restart(); err != nil { + return fmt.Errorf("unable to restart runtime service: %w", err) } - klog.Info("containerd successfully restarted") + klog.Info("runtime successfully restarted") if err := waitForSignal(); err != nil { return fmt.Errorf("unable to wait for signal: %w", err) @@ -331,7 +344,25 @@ func (w *worker) Run(c *cli.Context) error { return nil } -// CleanUp reverts the containerd config to remove the nvidia-container-runtime +func (w *worker) getRuntimeConfig() (runtime.Runtime, error) { + var ctrdConfig runtime.Runtime + var err error + klog.Infof("Vishesh RUNTIME %s", w.Runtime) + if w.Runtime == "crio" { + options := runtime.Options{Path: w.CrioConfig, RuntimeType: "vm", PodAnnotations: []string{"io.katacontainers.*"}} + ctrdConfig, err = crio.Setup(&options) + } else if w.Runtime == "containerd" { + options := runtime.Options{Path: w.ContainerdConfig, RuntimeType: "io.containerd.kata.v2", PodAnnotations: []string{"io.katacontainers.*"}, Socket: w.ContainerdSocket} + ctrdConfig, err = containerd.Setup(&options) + } + if err != nil { + klog.Errorf("error creating containerd.config client : %s", err) + return nil, err + } + return ctrdConfig, nil +} + +// CleanUp reverts the runtime config added by kata manager func (w *worker) CleanUp() error { ctrdConfig, err := containerd.New( containerd.WithPath(w.ContainerdConfig), @@ -346,7 +377,7 @@ func (w *worker) CleanUp() error { return fmt.Errorf("unable to revert config for runtime class '%v': %w", rc, err) } } - n, err := ctrdConfig.Save(w.ContainerdConfig) + n, err := ctrdConfig.Save() if err != nil { return fmt.Errorf("unable to flush config: %w", err) } @@ -356,7 +387,7 @@ func (w *worker) CleanUp() error { } else { klog.Infof("Wrote updated config to %v", w.ContainerdConfig) } - if err := restartContainerd(w.ContainerdSocket); err != nil { + if err := ctrdConfig.Restart(); err != nil { return fmt.Errorf("unable to restart containerd: %w", err) } return nil @@ -385,48 +416,6 @@ func initialize() error { return nil } -func restartContainerd(containerdSocket string) error { - - // Create a channel to receive signals - sigs := make(chan os.Signal, 1) - signal.Notify(sigs, syscall.SIGTERM, syscall.SIGHUP) - - // Set up a timer to ignore the signal for 5 seconds - ignoreTimer := time.NewTimer(5 * time.Second) - - // Create a channel to signal when the function has finished executing - done := make(chan error) - - // Start the function in a goroutine - go func() { - // Execute your function here - err := containerd.RestartContainerd(containerdSocket) - if err != nil { - klog.Errorf("error restarting containerd: %v", err) - done <- err - } - // Since we are restarintg Containerd we need to - // Ignore the SIGTERM signal for 5 seconds - <-ignoreTimer.C - // Signal that the function has finished executing - done <- nil - }() - - // Wait for the function to finish executing or for the signal to be received - select { - case err := <-done: - if err != nil { - return err - } - case s := <-sigs: - fmt.Printf("Received signal %v", s) - // Reset the timer to ignore the signal for another 5 seconds - ignoreTimer.Reset(5 * time.Second) - } - - return nil -} - func transformKataConfig(path string) error { config, err := toml.LoadFile(path) if err != nil { diff --git a/internal/cdi/nvpci-interface_mock.go b/internal/cdi/nvpci-interface_mock.go index 6a01a266..799e6133 100644 --- a/internal/cdi/nvpci-interface_mock.go +++ b/internal/cdi/nvpci-interface_mock.go @@ -4,8 +4,9 @@ package cdi import ( - "github.com/NVIDIA/go-nvlib/pkg/nvpci" "sync" + + "github.com/NVIDIA/go-nvlib/pkg/nvpci" ) // Ensure, that nvpciInterfaceMock does implement nvpciInterface. diff --git a/internal/runtime/containerd/containerd.go b/internal/runtime/containerd/containerd.go index 16b85869..b398464b 100644 --- a/internal/runtime/containerd/containerd.go +++ b/internal/runtime/containerd/containerd.go @@ -19,8 +19,12 @@ package containerd import ( "fmt" "os" + "os/signal" + "syscall" + "time" "github.com/pelletier/go-toml" + "k8s.io/klog/v2" "github.com/NVIDIA/k8s-kata-manager/internal/runtime" ) @@ -31,6 +35,8 @@ type Config struct { RuntimeType string UseDefaultRuntimeName bool PodAnnotations []string + Path string + Socket string } func Setup(o *runtime.Options) (runtime.Runtime, error) { @@ -38,6 +44,7 @@ func Setup(o *runtime.Options) (runtime.Runtime, error) { WithPath(o.Path), WithPodAnnotations(o.PodAnnotations...), WithRuntimeType(o.RuntimeType), + WithSocket(o.Socket), ) return ctrdConfig, err } @@ -134,29 +141,29 @@ func (c *Config) RemoveRuntime(name string) error { } // Save writes the config to the specified path -func (c *Config) Save(path string) (int64, error) { +func (c *Config) Save() (int64, error) { config := c.Tree output, err := config.ToTomlString() if err != nil { return 0, fmt.Errorf("unable to convert to TOML: %w", err) } - if path == "" { + if c.Path == "" { os.Stdout.WriteString(fmt.Sprintf("%s\n", output)) return int64(len(output)), nil } if len(output) == 0 { - err := os.Remove(path) + err := os.Remove(c.Path) if err != nil { return 0, fmt.Errorf("unable to remove empty file: %w", err) } return 0, nil } - f, err := os.Create(path) + f, err := os.Create(c.Path) if err != nil { - return 0, fmt.Errorf("unable to open '%s' for writing: %w", path, err) + return 0, fmt.Errorf("unable to open '%s' for writing: %w", c.Path, err) } defer f.Close() @@ -167,3 +174,45 @@ func (c *Config) Save(path string) (int64, error) { return int64(n), err } + +func (c *Config) Restart() error { + + // Create a channel to receive signals + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGTERM, syscall.SIGHUP) + + // Set up a timer to ignore the signal for 5 seconds + ignoreTimer := time.NewTimer(5 * time.Second) + + // Create a channel to signal when the function has finished executing + done := make(chan error) + + // Start the function in a goroutine + go func() { + // Execute your function here + err := RestartContainerd(c.Socket) + if err != nil { + klog.Errorf("error restarting containerd: %v", err) + done <- err + } + // Since we are restarintg Containerd we need to + // Ignore the SIGTERM signal for 5 seconds + <-ignoreTimer.C + // Signal that the function has finished executing + done <- nil + }() + + // Wait for the function to finish executing or for the signal to be received + select { + case err := <-done: + if err != nil { + return err + } + case s := <-sigs: + fmt.Printf("Received signal %v", s) + // Reset the timer to ignore the signal for another 5 seconds + ignoreTimer.Reset(5 * time.Second) + } + + return nil +} diff --git a/internal/runtime/containerd/option.go b/internal/runtime/containerd/option.go index 7cfb0b14..6f366692 100644 --- a/internal/runtime/containerd/option.go +++ b/internal/runtime/containerd/option.go @@ -33,6 +33,7 @@ type builder struct { runtimeType string useLegacyConfig bool podAnnotations []string + socket string } // Option defines a function that can be used to configure the config builder @@ -66,6 +67,13 @@ func WithPodAnnotations(podAnnotations ...string) Option { } } +// WithSocket sets the socket for the config builder +func WithSocket(socket string) Option { + return func(b *builder) { + b.socket = socket + } +} + func (b *builder) build() (*Config, error) { if b.path == "" { return &Config{}, fmt.Errorf("config path is empty") @@ -82,6 +90,8 @@ func (b *builder) build() (*Config, error) { config.RuntimeType = b.runtimeType config.UseDefaultRuntimeName = !b.useLegacyConfig config.PodAnnotations = b.podAnnotations + config.Path = b.path + config.Socket = b.socket return config, nil } diff --git a/internal/runtime/crio/crio.go b/internal/runtime/crio/crio.go new file mode 100644 index 00000000..20db031a --- /dev/null +++ b/internal/runtime/crio/crio.go @@ -0,0 +1,178 @@ +/** +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package crio + +import ( + "fmt" + "os" + "os/exec" + + "github.com/pelletier/go-toml" + "k8s.io/klog/v2" + + "github.com/NVIDIA/k8s-kata-manager/internal/runtime" +) + +// Config represents the crio config +type Config struct { + *toml.Tree + RuntimeType string + UseDefaultRuntimeName bool + PodAnnotations []string + Path string +} + +func Setup(o *runtime.Options) (runtime.Runtime, error) { + ctrdConfig, err := New( + WithPath(o.Path), + WithPodAnnotations(o.PodAnnotations...), + WithRuntimeType(o.RuntimeType), + ) + return ctrdConfig, err +} + +// New creates a crio config with the specified options +func New(opts ...Option) (*Config, error) { + b := &builder{} + for _, opt := range opts { + opt(b) + } + + return b.build() +} + +// AddRuntime adds a runtime to the crio config +func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { + if c == nil { + return fmt.Errorf("config is nil") + } + + config := *c.Tree + + // By default we extract the runtime options from the runc settings; if this does not exist we get the options from the default runtime specified in the config. + runtimeNamesForConfig := []string{"runc"} + if name, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok && name != "" { + runtimeNamesForConfig = append(runtimeNamesForConfig, name) + } + for _, r := range runtimeNamesForConfig { + if options, ok := config.GetPath([]string{"crio", "runtime", "runtimes", r}).(*toml.Tree); ok { + options, _ = toml.Load(options.String()) + config.SetPath([]string{"crio", "runtime", "runtimes", name}, options) + break + } + } + + config.SetPath([]string{"crio", "runtime", "runtimes", name, "runtime_path"}, path) + config.SetPath([]string{"crio", "runtime", "runtimes", name, "runtime_type"}, "vm") + config.SetPath([]string{"crio", "runtime", "runtimes", name, "privileged_without_host_devices"}, "true") + + if setAsDefault { + config.SetPath([]string{"crio", "runtime", "default_runtime"}, name) + } + + *c.Tree = config + return nil +} + +// DefaultRuntime returns the default runtime for the crio config +func (c *Config) DefaultRuntime() string { + if c == nil || c.Tree == nil { + return "" + } + if runtime, ok := c.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok { + return runtime + } + return "" +} + +// RemoveRuntime removes a runtime from the crio config +func (c *Config) RemoveRuntime(name string) error { + if c == nil { + return nil + } + + config := *c.Tree + if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok { + if runtime == name { + err := config.DeletePath([]string{"crio", "runtime", "default_runtime"}) + if err != nil { + return err + } + + } + } + + runtimeClassPath := []string{"crio", "runtime", "runtimes", name} + err := config.DeletePath(runtimeClassPath) + if err != nil { + return err + } + for i := 0; i < len(runtimeClassPath); i++ { + remainingPath := runtimeClassPath[:len(runtimeClassPath)-i] + if entry, ok := config.GetPath(remainingPath).(*toml.Tree); ok { + if len(entry.Keys()) != 0 { + break + } + err := config.DeletePath(remainingPath) + if err != nil { + return err + } + } + } + + *c.Tree = config + return nil +} + +// Save writes the config to the specified path +func (c *Config) Save() (int64, error) { + config := c.Tree + output, err := config.Marshal() + if err != nil { + return 0, fmt.Errorf("unable to convert to TOML: %v", err) + } + f, err := os.Create(c.Path) + if err != nil { + return 0, fmt.Errorf("unable to open '%s' for writing: %w", c.Path, err) + } + defer f.Close() + + n, err := f.Write(output) + if err != nil { + return 0, fmt.Errorf("unable to write output: %w", err) + } + + return int64(n), err +} + +func (c *Config) Restart() error { + var args []string + args = append(args, "chroot", "/host", "systemctl", "restart", "crio") + + klog.Infof("Restarting crio using systemd") + + //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection + cmd := exec.Command(args[0], args[1:]...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err := cmd.Run() + if err != nil { + return fmt.Errorf("error restarting crio using systemd: %v", err) + } + + return nil +} diff --git a/internal/runtime/crio/option.go b/internal/runtime/crio/option.go new file mode 100644 index 00000000..555d7bda --- /dev/null +++ b/internal/runtime/crio/option.go @@ -0,0 +1,116 @@ +/** +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package crio + +import ( + "fmt" + "os" + + "github.com/pelletier/go-toml" + "k8s.io/klog/v2" +) + +const ( + defaultRuntimeType = "oci" +) + +type builder struct { + path string + runtimeType string + useLegacyConfig bool + podAnnotations []string +} + +// Option defines a function that can be used to configure the config builder +type Option func(*builder) + +// WithPath sets the path for the config builder +func WithPath(path string) Option { + return func(b *builder) { + b.path = path + } +} + +// WithRuntimeType sets the runtime type for the config builder +func WithRuntimeType(runtimeType string) Option { + return func(b *builder) { + b.runtimeType = runtimeType + } +} + +// WithUseLegacyConfig sets the useLegacyConfig flag for the config builder +func WithUseLegacyConfig(useLegacyConfig bool) Option { + return func(b *builder) { + b.useLegacyConfig = useLegacyConfig + } +} + +// WithPodAnnotations sets the container annotations for the config builder +func WithPodAnnotations(podAnnotations ...string) Option { + return func(b *builder) { + b.podAnnotations = podAnnotations + } +} + +func (b *builder) build() (*Config, error) { + if b.path == "" { + return &Config{}, fmt.Errorf("config path is empty") + } + + if b.runtimeType == "" { + b.runtimeType = defaultRuntimeType + } + + config, err := loadConfig(b.path) + if err != nil { + return &Config{}, fmt.Errorf("failed to load config: %w", err) + } + config.RuntimeType = b.runtimeType + config.UseDefaultRuntimeName = !b.useLegacyConfig + config.PodAnnotations = b.podAnnotations + config.Path = b.path + + return config, nil +} + +// loadConfig loads the crio config from disk +func loadConfig(config string) (*Config, error) { + klog.Infof("Loading config: %v", config) + + info, err := os.Stat(config) + if os.IsExist(err) && info.IsDir() { + return nil, fmt.Errorf("config file is a directory") + } + + configFile := config + if os.IsNotExist(err) { + configFile = "/dev/null" + klog.Infof("Config file does not exist, creating new one") + } + + tomlConfig, err := toml.LoadFile(configFile) + if err != nil { + return nil, err + } + + klog.Infof("Successfully loaded config") + + cfg := Config{ + Tree: tomlConfig, + } + return &cfg, nil +} diff --git a/internal/runtime/types.go b/internal/runtime/types.go index 4c79c957..509a532b 100644 --- a/internal/runtime/types.go +++ b/internal/runtime/types.go @@ -20,11 +20,13 @@ type Runtime interface { AddRuntime(name string, path string, setAsDefault bool) error DefaultRuntime() string RemoveRuntime(name string) error - Save(path string) (int64, error) + Save() (int64, error) + Restart() error } type Options struct { PodAnnotations []string Path string RuntimeType string + Socket string }