Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: allow reconcile on storage size changes #1651

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cicd-scripts/run-e2e-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ printf "\n baseDomain: ${base_domain}" >>${OPTIONSFILE}
printf "\n kubeconfig: ${kubeconfig_hub_path}" >>${OPTIONSFILE}
printf "\n kubecontext: ${kubecontext}" >>${OPTIONSFILE}



Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed?

if command -v ginkgo &>/dev/null; then
GINKGO_CMD=ginkgo
else
Expand Down
65 changes: 46 additions & 19 deletions cicd-scripts/setup-e2e-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ AGENT_NS="open-cluster-management-agent"
HUB_NS="open-cluster-management-hub"
OBSERVABILITY_NS="open-cluster-management-observability"
IMAGE_REPO="quay.io/stolostron"
export MANAGED_CLUSTER="local-cluster" # registration-operator needs this
export LOCAL_CLUSTER="local-cluster" # registration-operator needs this
export MANAGED_CLUSTER="managed-cluster" # registration-operator needs this

SED_COMMAND=${SED}' -i-e -e'

Expand All @@ -45,7 +46,7 @@ deploy_hub_spoke_core() {
if [[ ! -d "_repo_ocm" ]]; then
git clone --depth 1 --branch $OCM_BRANCH https://github.com/stolostron/ocm.git ./_repo_ocm
fi
${SED_COMMAND} "s~clusterName: cluster1$~clusterName: ${MANAGED_CLUSTER}~g" ./_repo_ocm/deploy/klusterlet/config/samples/operator_open-cluster-management_klusterlets.cr.yaml
${SED_COMMAND} "s~clusterName: cluster1$~clusterName: ${LOCAL_CLUSTER}~g" ./_repo_ocm/deploy/klusterlet/config/samples/operator_open-cluster-management_klusterlets.cr.yaml

make deploy-hub cluster-ip deploy-spoke-operator apply-spoke-cr -C ./_repo_ocm

Expand All @@ -57,28 +58,53 @@ deploy_hub_spoke_core() {
kubectl -n "${HUB_NS}" rollout status deploy cluster-manager-registration-controller --timeout=120s
kubectl -n "${HUB_NS}" rollout status deploy cluster-manager-registration-webhook --timeout=120s
kubectl -n "${HUB_NS}" rollout status deploy cluster-manager-work-webhook --timeout=120s

}

deploy_managed_cluster() {
cd ${ROOTDIR}

export OCM_BRANCH=main
export IMAGE_NAME=quay.io/stolostron/registration-operator:$LATEST_SNAPSHOT
export REGISTRATION_OPERATOR_IMAGE=quay.io/stolostron/registration-operator:$LATEST_SNAPSHOT
export REGISTRATION_IMAGE=quay.io/stolostron/registration:$LATEST_SNAPSHOT
export WORK_IMAGE=quay.io/stolostron/work:$LATEST_SNAPSHOT
export PLACEMENT_IMAGE=quay.io/stolostron/placement:$LATEST_SNAPSHOT
export ADDON_MANAGER_IMAGE=quay.io/stolostron/addon-manager:$LATEST_SNAPSHOT

if [[ ! -d "_repo_ocm_2" ]]; then
git clone --depth 1 --branch $OCM_BRANCH https://github.com/stolostron/ocm.git ./_repo_ocm_2
fi
${SED_COMMAND} "s~clusterName: cluster1$~clusterName: ${MANAGED_CLUSTER}~g" ./_repo_ocm_2/deploy/klusterlet/config/samples/operator_open-cluster-management_klusterlets.cr.yaml

make cluster-ip deploy-spoke-operator apply-spoke-cr -C ./_repo_ocm_2
}


# approve the CSR for cluster join request
approve_csr_joinrequest() {
echo "wait for CSR for cluster join reqest is created..."
for i in {1..60}; do
# TODO(morvencao): remove the hard-coded cluster label
csrs=$(kubectl get csr -lopen-cluster-management.io/cluster-name=${MANAGED_CLUSTER})
if [[ -n ${csrs} ]]; then
csrnames=$(kubectl get csr -lopen-cluster-management.io/cluster-name=${MANAGED_CLUSTER} -o jsonpath={.items..metadata.name})
for csrname in ${csrnames}; do
echo "approve CSR: ${csrname}"
kubectl certificate approve ${csrname}
done
break
fi
if [[ ${i} -eq 60 ]]; then
echo "timeout wait for CSR is created."
exit 1
fi
echo "retrying in 10s..."
sleep 10
clusters=($LOCAL_CLUSTER $MANAGED_CLUSTER)
for cluster in "${clusters[@]}"; do
echo "Processing CSR for cluster: $cluster"
for i in {1..60}; do
# TODO(morvencao): remove the hard-coded cluster label
csrs=$(kubectl get csr -lopen-cluster-management.io/cluster-name=${cluster})
if [[ -n ${csrs} ]]; then
csrnames=$(kubectl get csr -lopen-cluster-management.io/cluster-name=${cluster} -o jsonpath={.items..metadata.name})
for csrname in ${csrnames}; do
echo "approve CSR: ${csrname}"
kubectl certificate approve ${csrname}
done
break
fi
if [[ ${i} -eq 60 ]]; then
echo "timeout wait for CSR is created."
exit 1
fi
echo "retrying in 10s..."
sleep 10
done
done

for i in {1..20}; do
Expand Down Expand Up @@ -234,6 +260,7 @@ wait_for_deployment_ready() {
# function execute is the main routine to do the actual work
execute() {
deploy_hub_spoke_core
deploy_managed_cluster
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The managed cluster tests are great! But should be a separate PR.

approve_csr_joinrequest
deploy_mco_operator
deploy_grafana_test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func (r *MultiClusterObservabilityReconciler) Reconcile(ctx context.Context, req

// handle storagesize changes
result, err := r.HandleStorageSizeChange(instance)
if result != nil {
if err != nil {
return *result, err
}

Expand Down Expand Up @@ -372,13 +372,13 @@ func (r *MultiClusterObservabilityReconciler) Reconcile(ctx context.Context, req

// create an Observatorium CR
result, err = GenerateObservatoriumCR(r.Client, r.Scheme, instance)
if result != nil {
if err != nil {
return *result, err
}

// generate grafana datasource to point to observatorium api gateway
result, err = GenerateGrafanaDataSource(r.Client, r.Scheme, instance)
if result != nil {
if err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the fix? Only returning if we have a nil error?

return *result, err
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
package multiclusterobservability

import (
"bytes"
"context"
"k8s.io/apimachinery/pkg/api/equality"

// The import of crypto/md5 below is not for cryptographic use. It is used to hash the contents of files to track
// changes and thus it's not a security issue.
Expand Down Expand Up @@ -162,6 +162,9 @@ func GenerateObservatoriumCR(
oldSpec := observatoriumCRFound.Spec
newSpec := observatoriumCR.Spec

log.Info("Coleen Old observatorium CR spec", "oldSpec", oldSpec.Thanos)
log.Info("Coleen New observatorium CR spec", "newSpec", newSpec.Thanos)

// keep the tenant id unchanged and ensure the new spec has the same tenant ID as the old spec to prevent Observatorium
// from updating
for i, newTenant := range newSpec.API.Tenants {
Expand All @@ -170,12 +173,13 @@ func GenerateObservatoriumCR(
}
}

oldSpecBytes, _ := yaml.Marshal(oldSpec)
newSpecBytes, _ := yaml.Marshal(newSpec)
if bytes.Equal(newSpecBytes, oldSpecBytes) &&
if !equality.Semantic.DeepDerivative(oldSpec, newSpec) &&
labels[obsCRConfigHashLabelName] == observatoriumCRFound.Labels[obsCRConfigHashLabelName] {
log.Info("Coleen Observatorium CR spec and hash are the same, skipping update")
log.Info("Coleen found labels", "labels", observatoriumCRFound.Labels)
return nil, nil
}
log.Info("Coleen Observatorium CR spec and hash are different, updating")

log.Info("Updating observatorium CR",
"observatorium", observatoriumCR.Name,
Expand Down
2 changes: 2 additions & 0 deletions operators/multiclusterobservability/pkg/config/test
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
024-10-24T15:26:45.177Z INFO controller_multiclustermonitoring Coleen Old observatorium CR spec {"oldSpec": {"image":"registry.redhat.io/rhacm2/thanos-rhel9@sha256:caf27fd686b2ae1d6cb0ebe02cf195a2e7caece3ac8bda0c0e88f92678da6bd2","imagePullPolicy":"IfNotPresent","compact":{"replicas":1,"volumeClaimTemplate":{"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"200Gi"}},"storageClassName":"gp3-csi"}},"retentionResolutionRaw":"365d","retentionResolution5m":"365d","retentionResolution1h":"365d","enableDownsampling":true,"resources":{"requests":{"cpu":"100m","memory":"512Mi"}},"serviceMonitor":true,"deleteDelay":"48h"},"receiveController":{"image":"registry.redhat.io/rhacm2/thanos-receive-controller-rhel9@sha256:739292da294b1b536cfea4d4cb63bc39f95cb917695026b1660f9022a73d58ee","imagePullPolicy":"IfNotPresent","version":"master-2022-04-01-b58820f","resources":{"requests":{"cpu":"4m","memory":"32Mi"}},"serviceMonitor":true},"receivers":{"replicas":3,"volumeClaimTemplate":{"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"100Gi"}},"storageClassName":"gp3-csi"}},"replicationFactor":3,"resources":{"requests":{"cpu":"300m","memory":"512Mi"}},"serviceMonitor":true,"retention":"24h"},"queryFrontend":{"replicas":2,"resources":{"requests":{"cpu":"100m","memory":"256Mi"}},"serviceMonitor":true,"cache":{"image":"registry.redhat.io/rhacm2/memcached-rhel9@sha256:9600034019ce695e89aecb6ac255d591b201fc40a7f3d9796f42d043f0340623","imagePullPolicy":"IfNotPresent","version":"1.6.3-alpine","exporterImage":"registry.redhat.io/rhacm2/memcached-exporter-rhel9@sha256:2c6e39ecbeed3dc4d30225b9a2ed0840c6d41c032ee08d63c7f8f092f3ee81e6","exporterImagePullPolicy":"IfNotPresent","exporterVersion":"v0.9.0","replicas":3,"memoryLimitMb":1024,"maxItemSize":"1m","connectionLimit":1024,"resources":{"requests":{"cpu":"45m","memory":"128Mi"}},"exporterResources":{"requests":{"cpu":"5m","memory":"50Mi"}},"serviceMonitor":true}},"store":{"volumeClaimTemplate":{"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"10Gi"}},"storageClassName":"gp3-csi"}},"shards":3,"cache":{"image":"registry.redhat.io/rhacm2/memcached-rhel9@sha256:9600034019ce695e89aecb6ac255d591b201fc40a7f3d9796f42d043f0340623","imagePullPolicy":"IfNotPresent","version":"1.6.3-alpine","exporterImage":"registry.redhat.io/rhacm2/memcached-exporter-rhel9@sha256:2c6e39ecbeed3dc4d30225b9a2ed0840c6d41c032ee08d63c7f8f092f3ee81e6","exporterImagePullPolicy":"IfNotPresent","exporterVersion":"v0.9.0","replicas":3,"memoryLimitMb":1024,"maxItemSize":"1m","connectionLimit":1024,"resources":{"requests":{"cpu":"45m","memory":"128Mi"}},"exporterResources":{"requests":{"cpu":"5m","memory":"50Mi"}},"serviceMonitor":true},"resources":{"requests":{"cpu":"100m","memory":"1Gi"}},"serviceMonitor":true},"rule":{"replicas":3,"volumeClaimTemplate":{"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"2Gi"}},"storageClassName":"gp3-csi"}},"rulesConfig":[{"name":"thanos-ruler-default-rules","key":"default_rules.yaml"}],"extraVolumeMounts":[{"type":"configMap","mountPath":"/etc/thanos/configmaps/alertmanager-ca-bundle","name":"alertmanager-ca-bundle","key":"service-ca.crt"}],"alertmanagerConfigFile":{"name":"thanos-ruler-config","key":"config.yaml"},"reloaderImage":"registry.redhat.io/openshift4/ose-configmap-reloader@sha256:fa5b2e42a27e3ab5f0a0d2a103b5602b81d1b29eb788b54cc60ad547744ad053","reloaderImagePullPolicy":"IfNotPresent","resources":{"requests":{"cpu":"50m","memory":"512Mi"}},"reloaderResources":{"requests":{"cpu":"4m","memory":"25Mi"}},"serviceMonitor":true,"blockDuration":"2h","retention":"24h","evalInterval":"300s"},"query":{"replicas":2,"resources":{"requests":{"cpu":"300m","memory":"1Gi"}},"serviceMonitor":true,"lookbackDelta":"600s"}}}
2024-10-24T15:26:45.177Z INFO controller_multiclustermonitoring Coleen New observatorium CR spec {"newSpec": {"image":"registry.redhat.io/rhacm2/thanos-rhel9@sha256:caf27fd686b2ae1d6cb0ebe02cf195a2e7caece3ac8bda0c0e88f92678da6bd2","imagePullPolicy":"IfNotPresent","compact":{"replicas":1,"volumeClaimTemplate":{"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"250Gi"}},"storageClassName":"gp3-csi"}},"retentionResolutionRaw":"365d","retentionResolution5m":"365d","retentionResolution1h":"365d","enableDownsampling":true,"resources":{"requests":{"cpu":"500m","memory":"1Gi"}},"serviceMonitor":true,"deleteDelay":"48h"},"receiveController":{"image":"registry.redhat.io/rhacm2/thanos-receive-controller-rhel9@sha256:739292da294b1b536cfea4d4cb63bc39f95cb917695026b1660f9022a73d58ee","imagePullPolicy":"IfNotPresent","version":"master-2022-04-01-b58820f","resources":{"requests":{"cpu":"4m","memory":"32Mi"}},"serviceMonitor":true},"receivers":{"replicas":3,"volumeClaimTemplate":{"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"100Gi"}},"storageClassName":"gp3-csi"}},"replicationFactor":3,"resources":{"requests":{"cpu":"300m","memory":"512Mi"}},"serviceMonitor":true,"retention":"24h"},"queryFrontend":{"replicas":2,"resources":{"requests":{"cpu":"100m","memory":"256Mi"}},"serviceMonitor":true,"cache":{"image":"registry.redhat.io/rhacm2/memcached-rhel9@sha256:9600034019ce695e89aecb6ac255d591b201fc40a7f3d9796f42d043f0340623","imagePullPolicy":"IfNotPresent","version":"1.6.3-alpine","exporterImage":"registry.redhat.io/rhacm2/memcached-exporter-rhel9@sha256:2c6e39ecbeed3dc4d30225b9a2ed0840c6d41c032ee08d63c7f8f092f3ee81e6","exporterImagePullPolicy":"IfNotPresent","exporterVersion":"v0.9.0","replicas":3,"memoryLimitMb":1024,"maxItemSize":"1m","connectionLimit":1024,"resources":{"requests":{"cpu":"45m","memory":"128Mi"}},"exporterResources":{"requests":{"cpu":"5m","memory":"50Mi"}},"serviceMonitor":true}},"store":{"volumeClaimTemplate":{"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"10Gi"}},"storageClassName":"gp3-csi"}},"shards":3,"cache":{"image":"registry.redhat.io/rhacm2/memcached-rhel9@sha256:9600034019ce695e89aecb6ac255d591b201fc40a7f3d9796f42d043f0340623","imagePullPolicy":"IfNotPresent","version":"1.6.3-alpine","exporterImage":"registry.redhat.io/rhacm2/memcached-exporter-rhel9@sha256:2c6e39ecbeed3dc4d30225b9a2ed0840c6d41c032ee08d63c7f8f092f3ee81e6","exporterImagePullPolicy":"IfNotPresent","exporterVersion":"v0.9.0","replicas":3,"memoryLimitMb":1024,"maxItemSize":"1m","connectionLimit":1024,"resources":{"requests":{"cpu":"45m","memory":"128Mi"}},"exporterResources":{"requests":{"cpu":"5m","memory":"50Mi"}},"serviceMonitor":true},"resources":{"requests":{"cpu":"100m","memory":"1Gi"}},"serviceMonitor":true},"rule":{"replicas":3,"volumeClaimTemplate":{"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"2Gi"}},"storageClassName":"gp3-csi"}},"rulesConfig":[{"name":"thanos-ruler-default-rules","key":"default_rules.yaml"}],"extraVolumeMounts":[{"type":"configMap","mountPath":"/etc/thanos/configmaps/alertmanager-ca-bundle","name":"alertmanager-ca-bundle","key":"service-ca.crt"}],"alertmanagerConfigFile":{"name":"thanos-ruler-config","key":"config.yaml"},"reloaderImage":"registry.redhat.io/openshift4/ose-configmap-reloader@sha256:fa5b2e42a27e3ab5f0a0d2a103b5602b81d1b29eb788b54cc60ad547744ad053","reloaderImagePullPolicy":"IfNotPresent","resources":{"requests":{"cpu":"50m","memory":"512Mi"}},"reloaderResources":{"requests":{"cpu":"4m","memory":"25Mi"}},"serviceMonitor":true,"blockDuration":"2h","retention":"24h","evalInterval":"300s"},"query":{"replicas":2,"resources":{"requests":{"cpu":"300m","memory":"1Gi"}},"serviceMonitor":true,"lookbackDelta":"600s"}}}
17 changes: 17 additions & 0 deletions tests/run-in-kind/kind/kind-managed.config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
extraPortMappings:
- containerPort: 80
hostPort: 81
listenAddress: "0.0.0.0"
- containerPort: 443
hostPort: 444
listenAddress: "0.0.0.0"
- containerPort: 6443
hostPort: 32807
listenAddress: "0.0.0.0"
- containerPort: 31001
hostPort: 31002
listenAddress: "127.0.0.1"
Loading