-
Notifications
You must be signed in to change notification settings - Fork 287
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
datadog-agent and datadog-agent-nvml: various fixes to work with latest
- add libpcap - symlink so build avoids downloading libpcap and uses system lib - regen dep bump patch - build with python 3.12 as integration dependencies require it ERROR: Package 'datadog-slurm' requires a different Python: 3.11.11 not in '>=3.12' - compile datadog-agent-nvml with python 3.12 - patch to disable gpu monitor as causing test failures, upstream issue tracked DataDog/datadog-agent#32419 Signed-off-by: James Rawlings <[email protected]>
- Loading branch information
Showing
4 changed files
with
132 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
diff --git a/cmd/system-probe/modules/gpu.go b/cmd/system-probe/modules/gpu.go | ||
index 27f3f0c..f1ceba6 100644 | ||
--- a/cmd/system-probe/modules/gpu.go | ||
+++ b/cmd/system-probe/modules/gpu.go | ||
@@ -8,20 +8,13 @@ | ||
package modules | ||
|
||
import ( | ||
- "fmt" | ||
- "net/http" | ||
- "time" | ||
- | ||
- "github.com/NVIDIA/go-nvml/pkg/nvml" | ||
- "go.uber.org/atomic" | ||
- | ||
+ "errors" | ||
"github.com/DataDog/datadog-agent/cmd/system-probe/api/module" | ||
"github.com/DataDog/datadog-agent/cmd/system-probe/config" | ||
sysconfigtypes "github.com/DataDog/datadog-agent/cmd/system-probe/config/types" | ||
"github.com/DataDog/datadog-agent/cmd/system-probe/utils" | ||
- "github.com/DataDog/datadog-agent/pkg/gpu" | ||
gpuconfig "github.com/DataDog/datadog-agent/pkg/gpu/config" | ||
- "github.com/DataDog/datadog-agent/pkg/util/log" | ||
+ "net/http" | ||
) | ||
|
||
var _ module.Module = &GPUMonitoringModule{} | ||
@@ -32,53 +25,21 @@ var GPUMonitoring = module.Factory{ | ||
Name: config.GPUMonitoringModule, | ||
ConfigNamespaces: gpuMonitoringConfigNamespaces, | ||
Fn: func(_ *sysconfigtypes.Config, deps module.FactoryDependencies) (module.Module, error) { | ||
- | ||
- c := gpuconfig.NewConfig() | ||
- probeDeps := gpu.ProbeDependencies{ | ||
- Telemetry: deps.Telemetry, | ||
- //if the config parameter doesn't exist or is empty string, the default value is used as defined in go-nvml library | ||
- //(https://github.com/NVIDIA/go-nvml/blob/main/pkg/nvml/lib.go#L30) | ||
- NvmlLib: nvml.New(nvml.WithLibraryPath(c.NVMLLibraryPath)), | ||
- } | ||
- | ||
- ret := probeDeps.NvmlLib.Init() | ||
- if ret != nvml.SUCCESS && ret != nvml.ERROR_ALREADY_INITIALIZED { | ||
- return nil, fmt.Errorf("unable to initialize NVML library: %v", ret) | ||
- } | ||
- | ||
- t, err := gpu.NewProbe(c, probeDeps) | ||
- if err != nil { | ||
- return nil, fmt.Errorf("unable to start GPU monitoring: %w", err) | ||
- } | ||
- | ||
- return &GPUMonitoringModule{ | ||
- Probe: t, | ||
- lastCheck: atomic.NewInt64(0), | ||
- }, nil | ||
+ return nil, errors.New("GPU monitoring disabled at build time") | ||
}, | ||
NeedsEBPF: func() bool { | ||
- return true | ||
+ return false | ||
}, | ||
} | ||
|
||
// GPUMonitoringModule is a module for GPU monitoring | ||
type GPUMonitoringModule struct { | ||
- *gpu.Probe | ||
- lastCheck *atomic.Int64 | ||
} | ||
|
||
// Register registers the GPU monitoring module | ||
func (t *GPUMonitoringModule) Register(httpMux *module.Router) error { | ||
httpMux.HandleFunc("/check", func(w http.ResponseWriter, _ *http.Request) { | ||
- t.lastCheck.Store(time.Now().Unix()) | ||
- stats, err := t.Probe.GetAndFlush() | ||
- if err != nil { | ||
- log.Errorf("Error getting GPU stats: %v", err) | ||
- w.WriteHeader(500) | ||
- return | ||
- } | ||
- | ||
- utils.WriteAsJSON(w, stats) | ||
+ utils.WriteAsJSON(w, map[string]interface{}{}) | ||
}) | ||
|
||
return nil | ||
@@ -86,12 +47,9 @@ func (t *GPUMonitoringModule) Register(httpMux *module.Router) error { | ||
|
||
// GetStats returns the last check time | ||
func (t *GPUMonitoringModule) GetStats() map[string]interface{} { | ||
- return map[string]interface{}{ | ||
- "last_check": t.lastCheck.Load(), | ||
- } | ||
+ return map[string]interface{}{} | ||
} | ||
|
||
// Close closes the GPU monitoring module | ||
func (t *GPUMonitoringModule) Close() { | ||
- t.Probe.Close() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
diff --git a/agent_requirements.in b/agent_requirements.in | ||
index b4c724713e..0713f9b365 100644 | ||
index 859d088..11f529c 100644 | ||
--- a/agent_requirements.in | ||
+++ b/agent_requirements.in | ||
@@ -66,7 +66,7 @@ semver==3.0.2 | ||
@@ -65,7 +65,7 @@ securesystemslib[crypto,pynacl]==0.28.0 | ||
semver==3.0.2 | ||
service-identity[idna]==24.1.0 | ||
simplejson==3.19.3 | ||
six==1.16.0 | ||
-snowflake-connector-python==3.12.1 | ||
+snowflake-connector-python==3.12.3; python_version > '3.0' | ||
supervisor==4.2.5 | ||
tuf==4.0.0 | ||
uptime==3.0.1 | ||
uptime==3.0.1 |