From 5a2d2ae837dffdf67bf7d020b641f2bdddfaf29a Mon Sep 17 00:00:00 2001 From: Sebastian Woehrl Date: Wed, 15 May 2024 08:39:28 +0200 Subject: [PATCH] Add option to enable pprof endpoints (#813) ### Description There have been reports of memory leaks in the operator (#700). This PR adds an option to enable the [go pprof](https://pkg.go.dev/net/http/pprof) endpoints. With them users can get heap and allocation profiles that hopefully help in tracking down the leak. ### Issues Resolved Fixes #626 ### Check List - [x] Commits are signed per the DCO using --signoff - [-] Unittest added for the new/changed functionality and all unit tests are successful - [x] Customer-visible features documented - [x] No linter warnings (`make lint`) If CRDs are changed: - [-] CRD YAMLs updated (`make manifests`) and also copied into the helm chart - [-] Changes to CRDs documented By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/OpenSearch/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). Signed-off-by: Sebastian Woehrl --- ...ch-operator-controller-manager-deployment.yaml | 2 ++ charts/opensearch-operator/values.yaml | 3 +++ docs/userguide/main.md | 11 +++++++++++ opensearch-operator/main.go | 15 +++++++++++++++ 4 files changed, 31 insertions(+) diff --git a/charts/opensearch-operator/templates/opensearch-operator-controller-manager-deployment.yaml b/charts/opensearch-operator/templates/opensearch-operator-controller-manager-deployment.yaml index 2d1fafbd..08f089f4 100755 --- a/charts/opensearch-operator/templates/opensearch-operator-controller-manager-deployment.yaml +++ b/charts/opensearch-operator/templates/opensearch-operator-controller-manager-deployment.yaml @@ -63,6 +63,8 @@ spec: value: {{ .Values.manager.dnsBase }} - name: PARALLEL_RECOVERY_ENABLED value: "{{ .Values.manager.parallelRecoveryEnabled }}" + - name: PPROF_ENDPOINTS_ENABLED + value: "{{ .Values.manager.pprofEndpointsEnabled }}" {{- if .Values.manager.extraEnv }} {{- toYaml .Values.manager.extraEnv | nindent 8 }} {{- end }} diff --git a/charts/opensearch-operator/values.yaml b/charts/opensearch-operator/values.yaml index 4afa24a5..96bdc268 100644 --- a/charts/opensearch-operator/values.yaml +++ b/charts/opensearch-operator/values.yaml @@ -39,6 +39,9 @@ manager: # Set this to false to disable the experimental parallel recovery in case you are experiencing problems parallelRecoveryEnabled: true + # Set this to true to enable the standard go pprof endpoints on port 6060 (https://pkg.go.dev/net/http/pprof) + # Should only be used for debugging purposes + pprofEndpointsEnabled: false image: repository: opensearchproject/opensearch-operator diff --git a/docs/userguide/main.md b/docs/userguide/main.md index e554abc4..7f55780e 100644 --- a/docs/userguide/main.md +++ b/docs/userguide/main.md @@ -100,6 +100,17 @@ manager: # value: somevalue ``` +### Pprof endpoints + +There have been situations reported where the operator is leaking memory. To help diagnose these situations the standard go [pprof](https://pkg.go.dev/net/http/pprof) endpoints can be enabled by adding the following to your `values.yaml`: + +```yaml +manager: + pprofEndpointsEnabled: true +``` + +The access the endpoints you will need to use a port-forward as for security reasons the endpoints are only exposed on localhost inside the pod: `kubectl port-forward deployment/opensearch-operator-controller-manager 6060`. Then from another terminal you can use the [go pprof tool](https://pkg.go.dev/net/http/pprof#hdr-Usage_examples), e.g.: `go tool pprof http://localhost:6060/debug/pprof/heap`. + ## Configuring OpenSearch The main job of the operator is to deploy and manage OpenSearch clusters. As such it offers a wide range of options to configure clusters. diff --git a/opensearch-operator/main.go b/opensearch-operator/main.go index 3c898fbe..0b81ace3 100644 --- a/opensearch-operator/main.go +++ b/opensearch-operator/main.go @@ -37,6 +37,9 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "net/http" + _ "net/http/pprof" ) var ( @@ -87,6 +90,11 @@ func main() { setupLog.Info("Enabled debug logging via environment variable OPERATOR_DEV_LOGGING") } } + pprofEndpoints, err := strconv.ParseBool(os.Getenv("PPROF_ENDPOINTS_ENABLED")) + if err != nil { + // by default to not enable endpoints + pprofEndpoints = false + } ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) @@ -188,6 +196,13 @@ func main() { os.Exit(1) } + if pprofEndpoints { + go func() { + listenError := http.ListenAndServe("localhost:6060", nil) + setupLog.Error(listenError, "Failed to start pprof endpoint listener") + }() + } + setupLog.Info("Starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager")