diff --git a/datadog-agent-nvml.yaml b/datadog-agent-nvml.yaml index c1334a732d3..a8c625e9ced 100644 --- a/datadog-agent-nvml.yaml +++ b/datadog-agent-nvml.yaml @@ -1,7 +1,7 @@ package: name: datadog-agent-nvml version: 1.0.9 - epoch: 3 + epoch: 4 description: "Checks NVIDIA Management Library (NVML) exposed metrics through the Datadog Agent and can correlate them with the exposed Kubernetes devices" copyright: - license: Apache-2.0 @@ -18,14 +18,15 @@ environment: - busybox - datadog-agent - datadog-agent-core-integrations - - python-${{vars.python_version}}-dev # strictly requires python3.11 + - py${{vars.python_version}}-grpcio + - python-${{vars.python_version}}-dev - rsync vars: dd_conf: /etc/datadog-agent/conf.d dd_home: / # agent being run by root expects /. dd_shared: /opt/datadog-agent/embedded - python_version: "3.11" + python_version: "3.12" pipeline: # This integration wheel comes from the integrations-extras repository @@ -35,6 +36,11 @@ pipeline: tag: nvml-${{package.version}} expected-commit: d38c5cdb4ab4d07f4432afb25e0ccd70341efb51 + - runs: | + # use system python grpcio to avoid compiling it + # this makes the build use a system grpcio package which is greater than or equal to the one in the requirements.in + sed -i 's/grpcio==\(.*\)/grpcio>=\1/' ./nvml/requirements.in + - runs: | # Create and activate a virtual environment. python -m venv .venv diff --git a/datadog-agent.yaml b/datadog-agent.yaml index a72444a8528..aeb2e0d04fa 100644 --- a/datadog-agent.yaml +++ b/datadog-agent.yaml @@ -13,11 +13,12 @@ package: - datadog-agent-core-integrations - findutils - grep + - libpcap - libseccomp - shadow vars: - py-version: "3.11" + py-version: "3.12" destd: /opt/datadog-agent var-transforms: @@ -62,6 +63,7 @@ environment: - krb5-dev - libbpf-dev - libedit-dev + - libpcap-dev - libzip - linux-headers - ninja @@ -69,7 +71,7 @@ environment: - procps-dev - py${{vars.py-version}}-pip - py${{vars.py-version}}-semver - - python-${{vars.py-version}}-dev # strictly requires python3.11 + - python-${{vars.py-version}}-dev - systemd-dev - util-linux-misc # unshare - wget # Required for downloading clang-12 and kernel headers from debian @@ -77,12 +79,11 @@ environment: # CGo allows Go programs to call C code CGO_ENABLED: "1" # -Os optimizes the code for size and add the directory to rtlinkers includes - CGO_CFLAGS: "-Os -I${{targets.destdir}}/usr/include/" + CGO_CFLAGS: "-Os -I/usr/include/" # Pass options to the linker. - CGO_LDFLAGS: "-L${{targets.destdir}}/usr/lib/" + CGO_LDFLAGS: "-L/usr/lib/" # disables generation of debugging information - # omits the symbol table and debug information, further reducing the size of the binary. - GOFLAGS: "-ldflags=-w -ldflags=-s" + GOFLAGS: "-ldflags=-w" # The version of linux-headers to fetch kernel headers for LINUX_HEADERS_VERSION: "5.10.0-0.deb10.29" # The version of linux to fetch kernel headers for @@ -95,6 +96,13 @@ pipeline: tag: ${{package.version}} expected-commit: 646618687e4f9351b5fe19cce678c9cd4b011e74 + # disable GPU support for the agent as it causes test failures at runtime + # error: agent: undefined symbol: nvmlVgpuTypeGetCapabilities + # upstream issue to track https://github.com/DataDog/datadog-agent/issues/32419 + - uses: patch + with: + patches: /home/build/disable-gpu.patch + # Install `invoke` (build) dependencies. We ultimately package with venv so # these won't leak into the package. - runs: | @@ -135,6 +143,11 @@ pipeline: wget "https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.1/clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz" -O /tmp/clang.tar.xz -o /dev/null echo "6b3cc55d3ef413be79785c4dc02828ab3bd6b887872b143e3091692fc6acefe7 /tmp/clang.tar.xz" | sha256sum --check + # need to link libpcap.a to /home/build/dev/lib/libpcap.a else the build will attempt to download libpcap + - runs: | + mkdir -p /home/build/dev/lib + ln -s /usr/lib/libpcap.a /home/build/dev/lib/libpcap.a + - runs: | wget -O common.deb http://deb.debian.org/debian-security/pool/updates/main/l/linux-5.10/linux-headers-${LINUX_HEADERS_VERSION}-common_${LINUX_KERNEL_VERSION}_all.deb dpkg -x common.deb /tmp/common @@ -165,7 +178,6 @@ pipeline: - runs: | invoke -e rtloader.make \ - --python-runtimes=3 \ --install-prefix="${{targets.destdir}}/usr" \ --cmake-options="\ -DCMAKE_INSTALL_LIBDIR=lib \ @@ -181,7 +193,6 @@ pipeline: --bundle system-probe \ --bundle security-agent \ --exclude-rtloader \ - --python-runtimes 3 \ --no-development \ --bundle-ebpf \ --embedded-path /usr/lib @@ -287,7 +298,7 @@ subpackages: with: repository: https://github.com/DataDog/integrations-core branch: ${{vars.datadog-major-minor-x}} # 7.59.x - expected-commit: cff91adb18fe879fd875d62d285a67deb69040fe # needs to be updated with each new release + expected-commit: 3189af0e0ae840c9a4bab3131662c7fd6b0de7fb # needs to be updated with each new release - uses: patch with: patches: /home/build/int-core-datadog_checks_dev-pyproject-toml.patch /home/build/int-core-mysql-hatch-toml.patch /home/build/int-core-singlestore-hatch-toml.patch /home/build/int-core-agent_requirements-in.patch /home/build/int-core-snowflake-pyproject-toml.patch @@ -299,7 +310,7 @@ subpackages: python${{vars.py-version}} -m venv .venv # Install locked dependencies - .venv/bin/pip install --require-hashes --only-binary=:all: --no-deps -r .deps/resolved/linux-${{build.arch}}_py3.txt + .venv/bin/pip install --require-hashes --only-binary=:all: --no-deps -r .deps/resolved/linux-${{build.arch}}_${{vars.py-version}}.txt excludes="datadog_checks_base datadog_checks_dev datadog_checks_tests_helper docker_daemon esxi teleport" checks=$(invoke -r /home/build agent.collect-integrations /home/integrations/ 3 linux --excluded "$excludes") diff --git a/datadog-agent/disable-gpu.patch b/datadog-agent/disable-gpu.patch new file mode 100644 index 00000000000..c9cfdac6428 --- /dev/null +++ b/datadog-agent/disable-gpu.patch @@ -0,0 +1,98 @@ +diff --git a/cmd/system-probe/modules/gpu.go b/cmd/system-probe/modules/gpu.go +index 27f3f0c..f1ceba6 100644 +--- a/cmd/system-probe/modules/gpu.go ++++ b/cmd/system-probe/modules/gpu.go +@@ -8,20 +8,13 @@ + package modules + + import ( +- "fmt" +- "net/http" +- "time" +- +- "github.com/NVIDIA/go-nvml/pkg/nvml" +- "go.uber.org/atomic" +- ++ "errors" + "github.com/DataDog/datadog-agent/cmd/system-probe/api/module" + "github.com/DataDog/datadog-agent/cmd/system-probe/config" + sysconfigtypes "github.com/DataDog/datadog-agent/cmd/system-probe/config/types" + "github.com/DataDog/datadog-agent/cmd/system-probe/utils" +- "github.com/DataDog/datadog-agent/pkg/gpu" + gpuconfig "github.com/DataDog/datadog-agent/pkg/gpu/config" +- "github.com/DataDog/datadog-agent/pkg/util/log" ++ "net/http" + ) + + var _ module.Module = &GPUMonitoringModule{} +@@ -32,53 +25,21 @@ var GPUMonitoring = module.Factory{ + Name: config.GPUMonitoringModule, + ConfigNamespaces: gpuMonitoringConfigNamespaces, + Fn: func(_ *sysconfigtypes.Config, deps module.FactoryDependencies) (module.Module, error) { +- +- c := gpuconfig.NewConfig() +- probeDeps := gpu.ProbeDependencies{ +- Telemetry: deps.Telemetry, +- //if the config parameter doesn't exist or is empty string, the default value is used as defined in go-nvml library +- //(https://github.com/NVIDIA/go-nvml/blob/main/pkg/nvml/lib.go#L30) +- NvmlLib: nvml.New(nvml.WithLibraryPath(c.NVMLLibraryPath)), +- } +- +- ret := probeDeps.NvmlLib.Init() +- if ret != nvml.SUCCESS && ret != nvml.ERROR_ALREADY_INITIALIZED { +- return nil, fmt.Errorf("unable to initialize NVML library: %v", ret) +- } +- +- t, err := gpu.NewProbe(c, probeDeps) +- if err != nil { +- return nil, fmt.Errorf("unable to start GPU monitoring: %w", err) +- } +- +- return &GPUMonitoringModule{ +- Probe: t, +- lastCheck: atomic.NewInt64(0), +- }, nil ++ return nil, errors.New("GPU monitoring disabled at build time") + }, + NeedsEBPF: func() bool { +- return true ++ return false + }, + } + + // GPUMonitoringModule is a module for GPU monitoring + type GPUMonitoringModule struct { +- *gpu.Probe +- lastCheck *atomic.Int64 + } + + // Register registers the GPU monitoring module + func (t *GPUMonitoringModule) Register(httpMux *module.Router) error { + httpMux.HandleFunc("/check", func(w http.ResponseWriter, _ *http.Request) { +- t.lastCheck.Store(time.Now().Unix()) +- stats, err := t.Probe.GetAndFlush() +- if err != nil { +- log.Errorf("Error getting GPU stats: %v", err) +- w.WriteHeader(500) +- return +- } +- +- utils.WriteAsJSON(w, stats) ++ utils.WriteAsJSON(w, map[string]interface{}{}) + }) + + return nil +@@ -86,12 +47,9 @@ func (t *GPUMonitoringModule) Register(httpMux *module.Router) error { + + // GetStats returns the last check time + func (t *GPUMonitoringModule) GetStats() map[string]interface{} { +- return map[string]interface{}{ +- "last_check": t.lastCheck.Load(), +- } ++ return map[string]interface{}{} + } + + // Close closes the GPU monitoring module + func (t *GPUMonitoringModule) Close() { +- t.Probe.Close() + } diff --git a/datadog-agent/int-core-agent_requirements-in.patch b/datadog-agent/int-core-agent_requirements-in.patch index 82016d47ec8..2c404c59820 100644 --- a/datadog-agent/int-core-agent_requirements-in.patch +++ b/datadog-agent/int-core-agent_requirements-in.patch @@ -1,13 +1,13 @@ diff --git a/agent_requirements.in b/agent_requirements.in -index b4c724713e..0713f9b365 100644 +index 859d088..11f529c 100644 --- a/agent_requirements.in +++ b/agent_requirements.in -@@ -66,7 +66,7 @@ semver==3.0.2 +@@ -65,7 +65,7 @@ securesystemslib[crypto,pynacl]==0.28.0 + semver==3.0.2 service-identity[idna]==24.1.0 simplejson==3.19.3 - six==1.16.0 -snowflake-connector-python==3.12.1 +snowflake-connector-python==3.12.3; python_version > '3.0' supervisor==4.2.5 tuf==4.0.0 - uptime==3.0.1 + uptime==3.0.1 \ No newline at end of file