Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Functional Test Setup #1586

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .chloggen/fix-operator-install-operations.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix
# The name of the component, or a single word describing the area of concern, (e.g. agent, clusterReceiver, gateway, operator, chart, other)
component: operator
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Additional fixes for issues where sometimes certificates and Instrumentation opentelemetry.io/v1alpha1 are installed too early
# One or more tracking issues related to the change
issues: [1586]
# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ kind: Certificate
metadata:
annotations:
helm.sh/hook: post-install,post-upgrade
helm.sh/hook-weight: "1"
helm.sh/hook-weight: "3"
labels:
helm.sh/chart: operator-0.71.2
app.kubernetes.io/name: operator
Expand Down Expand Up @@ -34,7 +34,7 @@ kind: Issuer
metadata:
annotations:
helm.sh/hook: post-install,post-upgrade
helm.sh/hook-weight: "1"
helm.sh/hook-weight: "2"
labels:
helm.sh/chart: operator-0.71.2
app.kubernetes.io/name: operator
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
---
# Source: splunk-otel-collector/templates/operator/job-startupapicheck.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: default-splunk-otel-collector-operator-startupapicheck
namespace: default
labels:
app.kubernetes.io/name: splunk-otel-collector
helm.sh/chart: splunk-otel-collector-0.113.0
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/instance: default
app.kubernetes.io/version: "0.113.0"
app: splunk-otel-collector
component: otel-operator
chart: splunk-otel-collector-0.113.0
release: default
heritage: Helm
app.kubernetes.io/component: otel-operator
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "4"
spec:
template:
spec:
containers:
- name: startupapicheck
image: registry.access.redhat.com/ubi9/ubi:latest
env:
- name: MANAGER_METRICS_SERVICE_CLUSTERIP
value: "default-splunk-otel-collector-operator"
- name: MANAGER_METRICS_SERVICE_PORT
value: "8443"
- name: WEBHOOK_SERVICE_CLUSTERIP
value: "default-splunk-otel-collector-operator-webhook"
- name: WEBHOOK_SERVICE_PORT
value: "443"
command:
- sh
- -c
- |
operator_service_checked=false
operator_webhook_service_checked=false

for i in $(seq 1 300); do
# Checks for the Operator and Operator Webhook service availability using curl
# The services are ready when curl receives an HTTP 400 error response.

if [ "$operator_service_checked" = false ]; then
curl_output_operator=$(curl -s -o /dev/null -w "%{http_code}" "$MANAGER_METRICS_SERVICE_CLUSTERIP:$MANAGER_METRICS_SERVICE_PORT")
if [ "$curl_output_operator" = "400" ]; then
operator_service_checked=true
fi
fi

if [ "$operator_webhook_service_checked" = false ]; then
curl_output_webhook=$(curl -s -o /dev/null -w "%{http_code}" "$WEBHOOK_SERVICE_CLUSTERIP:$WEBHOOK_SERVICE_PORT")
if [ "$curl_output_webhook" = "400" ]; then
operator_webhook_service_checked=true
fi
fi

echo "Attempt $i: Operator Service=${operator_service_checked}, Operator Webhook Service=${operator_webhook_service_checked}"
sleep 1

if [ "$operator_service_checked" = true ] && [ "$operator_webhook_service_checked" = true ]; then
echo "All checks passed."
exit 0
fi
done
echo "Timeout reached. Not all checks completed successfully."
exit 1
restartPolicy: Never
41 changes: 32 additions & 9 deletions functional_tests/functional_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"bytes"
"context"
"fmt"
"k8s.io/cli-runtime/pkg/genericclioptions"
"k8s.io/client-go/discovery"
"os"
"path/filepath"
"regexp"
Expand Down Expand Up @@ -94,6 +96,20 @@ type sinks struct {
tracesConsumer *consumertest.TracesSink
}

type cacheInvalidatingRESTClientGetter struct {
genericclioptions.RESTClientGetter
log func(string, ...interface{})
}

func (c *cacheInvalidatingRESTClientGetter) ToDiscoveryClient() (discovery.CachedDiscoveryInterface, error) {
discoveryClient, err := c.RESTClientGetter.ToDiscoveryClient()
if err == nil {
c.log("Clearing discovery cache")
discoveryClient.Invalidate()
}
return discoveryClient, err
}

func setupOnce(t *testing.T) *sinks {
setupRun.Do(func() {
// create an API server
Expand Down Expand Up @@ -184,24 +200,31 @@ func deployChartsAndApps(t *testing.T) {
err = yaml.Unmarshal(buf.Bytes(), &values)
require.NoError(t, err)

// When using the Helm sdk, there can be cache invalidation issues we can get around with this,
// Helm cli users don't face this issue. Deploying CRD instances via Helm templates
// and CR instances via a post-install hook will fail without this.
// Related issue: https://github.com/helm/helm/issues/6452
// Draft of upstream fix: https://github.com/jvoravong/helm/pull/1/files
actionConfig := new(action.Configuration)
if err := actionConfig.Init(kube.GetConfig(testKubeConfig, "", "default"), "default", os.Getenv("HELM_DRIVER"), func(format string, v ...interface{}) {
wrappedGetter := &cacheInvalidatingRESTClientGetter{
RESTClientGetter: kube.GetConfig(testKubeConfig, "", "default"),
log: func(format string, args ...interface{}) {
fmt.Printf(format+"\n", args...)
},
}

if err := actionConfig.Init(wrappedGetter, "default", os.Getenv("HELM_DRIVER"), func(format string, v ...interface{}) {
t.Logf(format+"\n", v...)
}); err != nil {
require.NoError(t, err)
}

install := action.NewInstall(actionConfig)
install.Namespace = "default"
install.ReleaseName = "sock"
install.Timeout = 5 * time.Minute

_, err = install.Run(chart, values)
if err != nil {
t.Logf("error reported during helm install: %v\n", err)
retryUpgrade := action.NewUpgrade(actionConfig)
retryUpgrade.Namespace = "default"
retryUpgrade.Install = true
_, err = retryUpgrade.Run("sock", chart, values)
require.NoError(t, err)
}

waitForAllDeploymentsToStart(t, client)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
{{/*Create the startupapicheck job image name.*/}}
{{- define "splunk-otel-collector.operator.instrumentation-job.image" -}}
{{- printf "%s:%s" .Values.instrumentation.jobs.startupapicheck.repository .Values.instrumentation.jobs.startupapicheck.tag | trimSuffix ":" -}}
{{- end -}}

{{/*
Helper to ensure the correct usage of the Splunk OpenTelemetry Collector Operator.
- Checks for a valid endpoint for exporting telemetry data.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{{- if and .Values.operator.enabled .Values.instrumentation.jobs.startupapicheck.enabled }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ template "splunk-otel-collector.fullname" . }}-operator-startupapicheck
namespace: {{ template "splunk-otel-collector.namespace" . }}
labels:
{{- include "splunk-otel-collector.commonLabels" . | nindent 4 }}
app: {{ template "splunk-otel-collector.name" . }}
component: otel-operator
chart: {{ template "splunk-otel-collector.chart" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
app.kubernetes.io/component: otel-operator
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "4"
spec:
template:
spec:
containers:
- name: startupapicheck
image: {{ template "splunk-otel-collector.operator.instrumentation-job.image" . }}
env:
- name: MANAGER_METRICS_SERVICE_CLUSTERIP
value: "{{ template "splunk-otel-collector.fullname" . }}-operator"
- name: MANAGER_METRICS_SERVICE_PORT
value: "8443"
- name: WEBHOOK_SERVICE_CLUSTERIP
value: "{{ template "splunk-otel-collector.fullname" . }}-operator-webhook"
- name: WEBHOOK_SERVICE_PORT
value: "443"
command:
- sh
- -c
- |
operator_service_checked=false
operator_webhook_service_checked=false

for i in $(seq 1 300); do
# Checks for the Operator and Operator Webhook service availability using curl
# The services are ready when curl receives an HTTP 400 error response.

if [ "$operator_service_checked" = false ]; then
curl_output_operator=$(curl -s -o /dev/null -w "%{http_code}" "$MANAGER_METRICS_SERVICE_CLUSTERIP:$MANAGER_METRICS_SERVICE_PORT")
if [ "$curl_output_operator" = "400" ]; then
operator_service_checked=true
fi
fi

if [ "$operator_webhook_service_checked" = false ]; then
curl_output_webhook=$(curl -s -o /dev/null -w "%{http_code}" "$WEBHOOK_SERVICE_CLUSTERIP:$WEBHOOK_SERVICE_PORT")
if [ "$curl_output_webhook" = "400" ]; then
operator_webhook_service_checked=true
fi
fi

echo "Attempt $i: Operator Service=${operator_service_checked}, Operator Webhook Service=${operator_webhook_service_checked}"
sleep 1

if [ "$operator_service_checked" = true ] && [ "$operator_webhook_service_checked" = true ]; then
echo "All checks passed."
exit 0
fi
done
echo "Timeout reached. Not all checks completed successfully."
exit 1
restartPolicy: Never
{{ end }}
21 changes: 21 additions & 0 deletions helm-charts/splunk-otel-collector/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,27 @@
}
},
"required": ["repository", "tag"]
},
"jobs": {
"type": "object",
"additionalProperties": false,
"properties": {
"startupapicheck": {
"type": "object",
"additionalProperties": false,
"properties": {
"enabled": {
"type": "boolean"
},
"repository": {
"type": "string"
},
"tag": {
"type": "string"
}
}
}
}
}
}
},
Expand Down
19 changes: 13 additions & 6 deletions helm-charts/splunk-otel-collector/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,6 @@ image:
# The policy that specifies when the user wants the Universal Base images to be pulled
pullPolicy: IfNotPresent


################################################################################
# Extra system configuration
################################################################################
Expand Down Expand Up @@ -1178,13 +1177,14 @@ operator:
enabled: false
admissionWebhooks:
certManager:
# Annotate the certificate and issuer to ensure they are created after the cert-manager CRDs have been installed.
certificateAnnotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
# Annotate the issuer and certificate to ensure they are created after the cert-manager CRDs
# have been installed and cert-manager is ready.
issuerAnnotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-weight": "2"
certificateAnnotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "3"
# Collector deployment via the operator is not supported at this time.
# The collector image repository is specified here to meet operator subchart constraints.
manager:
Expand Down Expand Up @@ -1271,6 +1271,13 @@ instrumentation:
# - name: NGINX_ENV_VAR
# value: nginx_value
# Auto-instrumentation Libraries (End)
# Jobs related to installing, uninstalling, or managing Operator Instrumentation.
jobs:
# Enables waiting for the operator to be available and ready before deploying instrumentation as a hook post-install step.
startupapicheck:
enabled: true
repository: registry.access.redhat.com/ubi9/ubi
tag: latest

# The cert-manager is a CNCF application deployed as a subchart and used for supporting operators that require TLS certificates.
# Full list of Helm value configurations: https://artifacthub.io/packages/helm/cert-manager/cert-manager?modal=values
Expand Down
Loading