diff --git a/go.mod b/go.mod index efaf1154..9741ff92 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ toolchain go1.22.5 require ( github.com/NVIDIA/go-nvlib v0.7.0 - github.com/NVIDIA/nvidia-container-toolkit v1.16.0 + github.com/NVIDIA/nvidia-container-toolkit v1.16.2 github.com/opencontainers/image-spec v1.1.0 github.com/pelletier/go-toml v1.9.5 github.com/sirupsen/logrus v1.9.3 @@ -54,7 +54,7 @@ require ( github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect - golang.org/x/mod v0.19.0 // indirect + golang.org/x/mod v0.20.0 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.7.0 // indirect diff --git a/go.sum b/go.sum index 0509af03..72232884 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/NVIDIA/go-nvlib v0.7.0 h1:Z/J7skMdLbTiHvomKVsGYsttfQMZj5FwNYIFXhZ4i/c github.com/NVIDIA/go-nvlib v0.7.0/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY= github.com/NVIDIA/go-nvml v0.12.4-0 h1:4tkbB3pT1O77JGr0gQ6uD8FrsUPqP1A/EOEm2wI1TUg= github.com/NVIDIA/go-nvml v0.12.4-0/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ= -github.com/NVIDIA/nvidia-container-toolkit v1.16.0 h1:NZyKfW0s8nfghoBSJJUth7OZB5ZzRGYbn3RaiTDYdHM= -github.com/NVIDIA/nvidia-container-toolkit v1.16.0/go.mod h1:jJXYvHEdqqpDcRXvolaiFCBsgLxvCwmJWSBZM3zQPY8= +github.com/NVIDIA/nvidia-container-toolkit v1.16.2 h1:udrrtB8JrAs2KkKQ4njgSb/anUOC1b9tP5LjUtbjE+k= +github.com/NVIDIA/nvidia-container-toolkit v1.16.2/go.mod h1:2heVmOldqyMBVXMn0A1Cpjze7VwQTQAGzP8AJ12/HLs= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= @@ -142,8 +142,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= -golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/graphics.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/graphics.go index b5c248d4..e87f85c3 100644 --- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/graphics.go +++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/discover/graphics.go @@ -146,6 +146,27 @@ func newGraphicsLibrariesDiscoverer(logger logger.Interface, driver *root.Driver } } +// Mounts discovers the required libraries and filters out libnvidia-allocator.so. +// The library libnvidia-allocator.so is already handled by either the *.RM_VERSION +// injection or by libnvidia-container. We therefore filter it out here as a +// workaround for the case where libnvidia-container will re-mount this in the +// container, which causes issues with shared mount propagation. +func (d graphicsDriverLibraries) Mounts() ([]Mount, error) { + mounts, err := d.Discover.Mounts() + if err != nil { + return nil, fmt.Errorf("failed to get library mounts: %v", err) + } + + var filtered []Mount + for _, mount := range mounts { + if d.isDriverLibrary(filepath.Base(mount.Path), "libnvidia-allocator.so") { + continue + } + filtered = append(filtered, mount) + } + return filtered, nil +} + // Create necessary library symlinks for graphics drivers func (d graphicsDriverLibraries) Hooks() ([]Hook, error) { mounts, err := d.Discover.Mounts() diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/dgpu.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/dgpu.go index 00982a62..b79f6bd4 100644 --- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/dgpu.go +++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/dgpu.go @@ -21,24 +21,29 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps" ) // NewForDevice creates a discoverer for the specified Device. func NewForDevice(d device.Device, opts ...Option) (discover.Discover, error) { - o := &options{} - for _, opt := range opts { - opt(o) - } - - if o.logger == nil { - o.logger = logger.New() - } + o := new(opts...) return o.newNvmlDGPUDiscoverer(&toRequiredInfo{d}) } // NewForDevice creates a discoverer for the specified device and its associated MIG device. func NewForMigDevice(d device.Device, mig device.MigDevice, opts ...Option) (discover.Discover, error) { + o := new(opts...) + + return o.newNvmlMigDiscoverer( + &toRequiredMigInfo{ + MigDevice: mig, + parent: &toRequiredInfo{d}, + }, + ) +} + +func new(opts ...Option) *options { o := &options{} for _, opt := range opts { opt(o) @@ -48,10 +53,15 @@ func NewForMigDevice(d device.Device, mig device.MigDevice, opts ...Option) (dis o.logger = logger.New() } - return o.newNvmlMigDiscoverer( - &toRequiredMigInfo{ - MigDevice: mig, - parent: &toRequiredInfo{d}, - }, - ) + if o.migCaps == nil { + migCaps, err := nvcaps.NewMigCaps() + if err != nil { + o.logger.Debugf("ignoring error getting MIG capability device paths: %v", err) + o.migCapsError = err + } else { + o.migCaps = migCaps + } + } + + return o } diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/nvml.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/nvml.go index e4b67641..f24f4d55 100644 --- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/nvml.go +++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/nvml.go @@ -78,24 +78,23 @@ type requiredMigInfo interface { } func (o *options) newNvmlMigDiscoverer(d requiredMigInfo) (discover.Discover, error) { - gpu, gi, ci, err := d.getPlacementInfo() - if err != nil { - return nil, fmt.Errorf("error getting placement info: %w", err) + if o.migCaps == nil || o.migCapsError != nil { + return nil, fmt.Errorf("error getting MIG capability device paths: %v", o.migCapsError) } - migCaps, err := nvcaps.NewMigCaps() + gpu, gi, ci, err := d.getPlacementInfo() if err != nil { - return nil, fmt.Errorf("error getting MIG capability device paths: %v", err) + return nil, fmt.Errorf("error getting placement info: %w", err) } giCap := nvcaps.NewGPUInstanceCap(gpu, gi) - giCapDevicePath, err := migCaps.GetCapDevicePath(giCap) + giCapDevicePath, err := o.migCaps.GetCapDevicePath(giCap) if err != nil { return nil, fmt.Errorf("failed to get GI cap device path: %v", err) } ciCap := nvcaps.NewComputeInstanceCap(gpu, gi, ci) - ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap) + ciCapDevicePath, err := o.migCaps.GetCapDevicePath(ciCap) if err != nil { return nil, fmt.Errorf("failed to get CI cap device path: %v", err) } @@ -145,9 +144,9 @@ type toRequiredMigInfo struct { } func (d *toRequiredMigInfo) getPlacementInfo() (int, int, int, error) { - gpu, ret := d.parent.GetMinorNumber() - if ret != nvml.SUCCESS { - return 0, 0, 0, fmt.Errorf("error getting GPU minor: %v", ret) + gpu, err := d.parent.GetMinorNumber() + if err != nil { + return 0, 0, 0, fmt.Errorf("error getting GPU minor: %w", err) } gi, ret := d.GetGpuInstanceId() diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/options.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/options.go index cea58c6d..41e4d7a9 100644 --- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/options.go +++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/dgpu/options.go @@ -18,12 +18,18 @@ package dgpu import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps" ) type options struct { logger logger.Interface devRoot string nvidiaCDIHookPath string + + // migCaps stores the MIG capabilities for the system. + // If MIG is not available, this is nil. + migCaps nvcaps.MigCaps + migCapsError error } type Option func(*options) @@ -48,3 +54,10 @@ func WithNVIDIACDIHookPath(path string) Option { l.nvidiaCDIHookPath = path } } + +// WithMIGCaps sets the MIG capabilities. +func WithMIGCaps(migCaps nvcaps.MigCaps) Option { + return func(l *options) { + l.migCaps = migCaps + } +} diff --git a/vendor/golang.org/x/mod/LICENSE b/vendor/golang.org/x/mod/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/mod/LICENSE +++ b/vendor/golang.org/x/mod/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/modules.txt b/vendor/modules.txt index ff146374..6ab76ee0 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -14,7 +14,7 @@ github.com/NVIDIA/go-nvlib/pkg/pciids ## explicit; go 1.20 github.com/NVIDIA/go-nvml/pkg/dl github.com/NVIDIA/go-nvml/pkg/nvml -# github.com/NVIDIA/nvidia-container-toolkit v1.16.0 +# github.com/NVIDIA/nvidia-container-toolkit v1.16.2 ## explicit; go 1.20 github.com/NVIDIA/nvidia-container-toolkit/internal/config/image github.com/NVIDIA/nvidia-container-toolkit/internal/discover @@ -161,7 +161,7 @@ github.com/x448/float16 # github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 ## explicit; go 1.15 github.com/xrash/smetrics -# golang.org/x/mod v0.19.0 +# golang.org/x/mod v0.20.0 ## explicit; go 1.18 golang.org/x/mod/semver # golang.org/x/net v0.26.0