From 1e7d9a2a231d3800f4683a927f82cfba159a37d5 Mon Sep 17 00:00:00 2001 From: Yuedong Wu <57584831+lunarwhite@users.noreply.github.com> Date: Wed, 22 Jan 2025 10:31:32 +0800 Subject: [PATCH] refactor cert-manager-custom-api-ingress CI steps (#60303) --- ci-operator/step-registry/cert-manager/OWNERS | 8 +- .../cert-manager/clusterissuer/OWNERS | 9 +- .../cert-manager-clusterissuer-commands.sh | 178 +++++----- ...rt-manager-clusterissuer-ref.metadata.json | 8 +- .../cert-manager-clusterissuer-ref.yaml | 32 +- .../clusterissuer/hypershift/OWNERS | 12 +- ...nager-clusterissuer-hypershift-commands.sh | 34 +- ...clusterissuer-hypershift-ref.metadata.json | 12 +- ...-manager-clusterissuer-hypershift-ref.yaml | 10 +- .../custom-aggregated-cert/OWNERS | 9 +- .../custom-aggregated-cert/hypershift/OWNERS | 12 +- ...tom-aggregated-cert-hypershift-commands.sh | 17 +- ...gregated-cert-hypershift-ref.metadata.json | 12 +- ...custom-aggregated-cert-hypershift-ref.yaml | 13 +- .../custom-api-ingress-cert/OWNERS | 9 +- ...ustom-api-ingress-cert-chain.metadata.json | 8 +- ...manager-custom-api-ingress-cert-chain.yaml | 5 +- .../cert-manager/custom-apiserver-cert/OWNERS | 9 +- ...-manager-custom-apiserver-cert-commands.sh | 326 ++++++++---------- ...er-custom-apiserver-cert-ref.metadata.json | 8 +- ...ert-manager-custom-apiserver-cert-ref.yaml | 32 +- .../cert-manager/custom-ingress-cert/OWNERS | 9 +- ...rt-manager-custom-ingress-cert-commands.sh | 254 +++++++------- ...ager-custom-ingress-cert-ref.metadata.json | 8 +- .../cert-manager-custom-ingress-cert-ref.yaml | 32 +- .../step-registry/cert-manager/install/OWNERS | 9 +- .../cert-manager-install-ref.metadata.json | 8 +- 27 files changed, 520 insertions(+), 563 deletions(-) mode change 100644 => 120000 ci-operator/step-registry/cert-manager/clusterissuer/OWNERS mode change 100644 => 120000 ci-operator/step-registry/cert-manager/custom-aggregated-cert/OWNERS mode change 100644 => 120000 ci-operator/step-registry/cert-manager/custom-api-ingress-cert/OWNERS mode change 100644 => 120000 ci-operator/step-registry/cert-manager/custom-apiserver-cert/OWNERS mode change 100644 => 120000 ci-operator/step-registry/cert-manager/custom-ingress-cert/OWNERS mode change 100644 => 120000 ci-operator/step-registry/cert-manager/install/OWNERS diff --git a/ci-operator/step-registry/cert-manager/OWNERS b/ci-operator/step-registry/cert-manager/OWNERS index 134ab6afb3d5..e9b8738fadfd 100644 --- a/ci-operator/step-registry/cert-manager/OWNERS +++ b/ci-operator/step-registry/cert-manager/OWNERS @@ -1,8 +1,10 @@ approvers: -- jhou1 -- liangxia - xingxingxia - lunarwhite - swghosh - TrilokGeer - +reviewers: +- xingxingxia +- lunarwhite +- swghosh +- TrilokGeer diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/OWNERS b/ci-operator/step-registry/cert-manager/clusterissuer/OWNERS deleted file mode 100644 index 134ab6afb3d5..000000000000 --- a/ci-operator/step-registry/cert-manager/clusterissuer/OWNERS +++ /dev/null @@ -1,8 +0,0 @@ -approvers: -- jhou1 -- liangxia -- xingxingxia -- lunarwhite -- swghosh -- TrilokGeer - diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/OWNERS b/ci-operator/step-registry/cert-manager/clusterissuer/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/cert-manager/clusterissuer/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-commands.sh b/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-commands.sh index 201a63d75e32..eb0e8be05797 100755 --- a/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-commands.sh +++ b/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-commands.sh @@ -4,67 +4,70 @@ set -e set -u set -o pipefail -if [ -f "${SHARED_DIR}/proxy-conf.sh" ] ; then - source "${SHARED_DIR}/proxy-conf.sh" - echo "proxy: ${SHARED_DIR}/proxy-conf.sh" -fi - -timestamp() { +function timestamp() { date -u --rfc-3339=seconds } -# Define common variables -SUB="openshift-cert-manager-operator" -OPERATOR_NAMESPACE="cert-manager-operator" -OPERAND_NAMESPACE="cert-manager" -CLUSTERISSUER_NAME="cluster-certs-clusterissuer" # This clusterissuer is consumed by the 'cert-manager-custom-apiserver-cert' and 'cert-manager-custom-ingress-cert' refs. -INTERVAL=10 - -function wait_cert_manager_rollout() { - local OLD_POD=$1 - local MAX_RETRY=12 - local COUNTER=0 - - echo "# Waiting for the pod to finish rollout." - while :; - do - echo "Checking the cert-manager controller pod status for the #${COUNTER}-th time ..." - NEW_POD_OUTPUT=$(oc get po -l app.kubernetes.io/name=cert-manager -n $OPERAND_NAMESPACE) - if [[ ! "$NEW_POD_OUTPUT" =~ $OLD_POD ]] && [[ "$NEW_POD_OUTPUT" == *1/1*Running* ]]; then - echo "[$(timestamp)] Finished the cert-manager controller pod rollout." - break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "[$(timestamp)] The cert-manager controller pod didn't finish rollout after $((MAX_RETRY * INTERVAL)) seconds. Dumping status:" - oc get po -n $OPERAND_NAMESPACE - exit 1 - fi - sleep $INTERVAL +function run_command() { + local cmd="$1" + echo "Running Command: ${cmd}" + eval "${cmd}" +} + +function set_proxy () { + if test -s "${SHARED_DIR}/proxy-conf.sh" ; then + echo "Setting proxy configuration..." + source "${SHARED_DIR}/proxy-conf.sh" + else + echo "No proxy settings found. Skipping proxy configuration..." + fi +} + +function wait_for_state() { + local object="$1" + local state="$2" + local timeout="$3" + local namespace="${4:-}" + local selector="${5:-}" + + echo "Waiting for '${object}' in namespace '${namespace}' with selector '${selector}' to exist..." + for _ in {1..30}; do + oc get ${object} --selector="${selector}" -n=${namespace} |& grep -ivE "(no resources found|not found)" && break || sleep 5 done + + echo "Waiting for '${object}' in namespace '${namespace}' with selector '${selector}' to become '${state}'..." + oc wait --for=${state} --timeout=${timeout} ${object} --selector="${selector}" -n="${namespace}" + return $? } -function configure_cloud_credentials() { - local MANIFEST=$1 - local SECRET_NAME=$2 +function check_cm_operator() { + echo "Checking the persence of the cert-manager Operator as prerequisite..." + if ! oc wait deployment/cert-manager-operator-controller-manager -n cert-manager-operator --for=condition=Available --timeout=0; then + echo "The cert-manager Operator is not installed or unavailable. Skipping rest of steps..." + exit 0 + fi +} - echo -e "# Creating a credentialsrequest object for '$SECRET_NAME'." - oc create -f - <<< "${MANIFEST}" +function configure_cloud_credentials() { + local manifest="$1" + local secret_name="$2" - OLD_CONTROLLER_POD="$(oc get po -l app.kubernetes.io/name=cert-manager -n cert-manager -o=jsonpath='{.items[*].metadata.name}')" + echo "Creating a CredentialsRequest object for '$secret_name'..." + oc apply -f - <<< "${manifest}" - # Patch the cloud credential secret to the subscription, so that it can be used as ambient credentials for dns01 challenge validation. - oc -n $OPERATOR_NAMESPACE patch subscription $SUB --type=merge -p '{"spec":{"config":{"env":[{"name":"CLOUD_CREDENTIALS_SECRET_NAME","value":"'"${SECRET_NAME}"'"}]}}}' + echo "Patching the generated secret to the Subscription as ambient credentials for DNS01 challenge validation..." + local json_path='{"spec":{"config":{"env":[{"name":"CLOUD_CREDENTIALS_SECRET_NAME","value":"'"${secret_name}"'"}]}}}' + oc patch subscription openshift-cert-manager-operator --type=merge -p "$json_path" -n cert-manager-operator - # Override dns nameservers for dns01 self-check, in case that the target hosted zone in dns01 solver overlaps with the cluster's default private hosted zone. - oc patch certmanager cluster --type=merge -p='{"spec":{"controllerConfig":{"overrideArgs":["--dns01-recursive-nameservers=1.1.1.1:53,8.8.4.4:53", "--dns01-recursive-nameservers-only"]}}}' + echo "Configuring the DNS nameservers for DNS01 recursive self-check..." + json_path='{"spec":{"controllerConfig":{"overrideArgs":["--dns01-recursive-nameservers=1.1.1.1:53,8.8.4.4:53", "--dns01-recursive-nameservers-only"]}}}' + oc patch certmanager cluster --type=merge -p "$json_path" - # Wait for the cert-manager controller pod to finish rollout - wait_cert_manager_rollout "$OLD_CONTROLLER_POD" + wait_for_state "deployment/cert-manager" "condition=Available" "2m" "cert-manager" } function create_aws_route53_clusterissuer() { - AWS_CREDREQUEST=$(cat <<EOF + aws_credential_request=$(cat <<EOF apiVersion: cloudcredential.openshift.io/v1 kind: CredentialsRequest metadata: @@ -95,17 +98,16 @@ spec: - cert-manager EOF ) - configure_cloud_credentials "${AWS_CREDREQUEST}" "aws-creds" + configure_cloud_credentials "${aws_credential_request}" "aws-creds" - # retrieve configs to be used in the ClusterIssuer spec - BASE_DOMAIN=$(oc get dns cluster -o=jsonpath='{.spec.baseDomain}') - TARGET_DNS_DOMAIN=$(cut -d '.' -f 1 --complement <<< "$BASE_DOMAIN") - PUBLIC_ZONE_ID=$(oc get dns cluster -o=jsonpath='{.spec.publicZone.id}') + echo "Retrieving configs to be used in the ClusterIssuer spec..." + base_domain=$(oc get dns cluster -o=jsonpath='{.spec.baseDomain}') + target_dns_domain=$(cut -d '.' -f 1 --complement <<< "$base_domain") + public_zone_id=$(oc get dns cluster -o=jsonpath='{.spec.publicZone.id}') + region=$(oc get infrastructure cluster -o=jsonpath='{.status.platformStatus.aws.region}') - # hostedZoneID must be specified when alternative Api FQDN is used, otherwise `oc get challenge -o wide` later will be stuck - # in "failed to determine Route 53 hosted zone ID: zone not found in Route 53 for domain _acme-challenge.alt-api.BASE_DOMAIN." - echo "# Creating a clusterissuer with the ACME DNS01 AWS Route53 solver." - oc create -f - << EOF + echo "Creating an ACME DNS01 ClusterIssuer configured with AWS Route53..." + oc apply -f - << EOF apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: @@ -118,16 +120,16 @@ spec: solvers: - selector: dnsZones: - - "$TARGET_DNS_DOMAIN" + - "$target_dns_domain" dns01: route53: - region: us-east-2 - hostedZoneID: "$PUBLIC_ZONE_ID" + region: $region + hostedZoneID: $public_zone_id EOF } function create_gcp_clouddns_clusterissuer() { - GCP_CREDREQUEST=$(cat <<EOF + gcp_credential_request=$(cat <<EOF apiVersion: cloudcredential.openshift.io/v1 kind: CredentialsRequest metadata: @@ -146,13 +148,13 @@ spec: - cert-manager EOF ) - configure_cloud_credentials "${GCP_CREDREQUEST}" "gcp-credentials" + configure_cloud_credentials "${gcp_credential_request}" "gcp-credentials" - # retrieve configs to be used in the ClusterIssuer spec - PROJECT_ID=$(oc get infrastructure cluster -o=jsonpath='{.status.platformStatus.gcp.projectID}') + echo "Retrieving configs to be used in the ClusterIssuer spec..." + project_id=$(oc get infrastructure cluster -o=jsonpath='{.status.platformStatus.gcp.projectID}') - echo "# Creating a clusterissuer with the ACME DNS01 Google CloudDNS solver." - oc create -f - << EOF + echo "Creating an ACME DNS01 ClusterIssuer configured with Google CloudDNS..." + oc apply -f - << EOF apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: @@ -165,18 +167,25 @@ spec: solvers: - dns01: cloudDNS: - project: $PROJECT_ID + project: $project_id EOF } -echo "# Checking if the cert-manager operator is already installed." -INSTALLED_CSV=$(oc get subscription $SUB -n $OPERATOR_NAMESPACE -o=jsonpath='{.status.installedCSV}' || true) -if [ -z "${INSTALLED_CSV}" ]; then - echo "The cert-manager operator is not installed. Please ensure the 'cert-manager-install' ref is executed first." - exit 1 -fi +function is_clusterisser_ready() { + if wait_for_state "clusterissuer/$CLUSTERISSUER_NAME" "condition=Ready" "2m"; then + echo "ClusterIssuer is ready" + else + echo "Timed out after 2m. Dumping resources for debugging..." + run_command "oc describe clusterissuer $CLUSTERISSUER_NAME" + exit 1 + fi +} -echo -e "# Creating the clusterissuer based on the CLUSTER_TYPE '${CLUSTER_TYPE}'." +timestamp +set_proxy +check_cm_operator + +echo "Creating the ClusterIssuer based on CLUSTER_TYPE '${CLUSTER_TYPE}'..." case "${CLUSTER_TYPE}" in aws|aws-arm64) create_aws_route53_clusterissuer @@ -185,26 +194,11 @@ gcp|gcp-arm64) create_gcp_clouddns_clusterissuer ;; *) - echo "Cluster type '${CLUSTER_TYPE}' is not supported currently." >&2 + echo "Cluster type '${CLUSTER_TYPE}' unsupported, exiting..." >&2 exit 1 ;; esac -echo "# Waiting for the clusterissuer to be ready." -MAX_RETRY=12 -COUNTER=0 -while :; -do - echo "Checking the clusterissuer $CLUSTERISSUER_NAME status for the #${COUNTER}-th time ..." - if [[ "$(oc get --no-headers clusterissuer $CLUSTERISSUER_NAME)" =~ True ]]; then - echo "[$(timestamp)] The clusterissuer has become ready." - break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "[$(timestamp)] The clusterissuer status is still not ready after $((MAX_RETRY * INTERVAL)) seconds. Dumping status:" - oc get clusterissuer $CLUSTERISSUER_NAME -o=jsonpath='{.status}' - exit 1 - fi - sleep $INTERVAL -done +is_clusterisser_ready + +echo "[$(timestamp)] Succeeded in creating a ClusterIssuer configured with Let's Encrypt ACME DNS01 type!" diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-ref.metadata.json b/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-ref.metadata.json index 826b22846f14..ddd17d2ca9b7 100644 --- a/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-ref.metadata.json +++ b/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-ref.metadata.json @@ -2,8 +2,12 @@ "path": "cert-manager/clusterissuer/cert-manager-clusterissuer-ref.yaml", "owners": { "approvers": [ - "jhou1", - "liangxia", + "xingxingxia", + "lunarwhite", + "swghosh", + "TrilokGeer" + ], + "reviewers": [ "xingxingxia", "lunarwhite", "swghosh", diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-ref.yaml b/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-ref.yaml index 8541ea1aaa4b..ce6b0a40fcfd 100644 --- a/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-ref.yaml +++ b/ci-operator/step-registry/cert-manager/clusterissuer/cert-manager-clusterissuer-ref.yaml @@ -1,19 +1,15 @@ ref: - as: cert-manager-clusterissuer - from_image: - namespace: ci - name: verification-tests - tag: latest - grace_period: 20m - commands: cert-manager-clusterissuer-commands.sh - cli: latest - resources: - limits: - cpu: 500m - memory: 500Mi - requests: - cpu: 300m - memory: 200Mi - documentation: |- - Prepare the cert-manager resource ClusterIssuer to issue custom certificates for Apiserver and Ingress. - + as: cert-manager-clusterissuer + from: upi-installer + cli: latest + commands: cert-manager-clusterissuer-commands.sh + resources: + requests: + cpu: 100m + memory: 100Mi + env: + - name: CLUSTERISSUER_NAME + documentation: The name of the cert-manager ClusterIssuer to create in this step. + default: "letsencrypt-prodoction-ci" + documentation: |- + Create and configure a cert-manager ClusterIssuer resource to issue custom certificates. diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/OWNERS b/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/OWNERS index 6da3779f3236..6b8ffc0a93ef 100644 --- a/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/OWNERS +++ b/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/OWNERS @@ -1,10 +1,14 @@ approvers: -- jhou1 -- liangxia - xingxingxia - lunarwhite - swghosh - TrilokGeer -- fxierh +- LiangquanLi930 +- heliubj18 reviewers: -- fxierh \ No newline at end of file +- xingxingxia +- lunarwhite +- swghosh +- TrilokGeer +- LiangquanLi930 +- heliubj18 diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-commands.sh b/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-commands.sh index 3a7019a63ac0..995cc99bfc0a 100755 --- a/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-commands.sh +++ b/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-commands.sh @@ -2,6 +2,14 @@ set -euo pipefail +function check_cm_operator() { + echo "Checking the persence of the cert-manager Operator as prerequisite..." + if ! oc wait deployment/cert-manager-operator-controller-manager -n cert-manager-operator --for=condition=Available --timeout=0; then + echo "The cert-manager Operator is not installed or unavailable. Skipping rest of steps..." + exit 0 + fi +} + function create_azure_dns_clusterissuer() { # Create secret containing Azure client secret ( @@ -10,7 +18,7 @@ function create_azure_dns_clusterissuer() { ) # Create ClusterIssuer with Azure DNS resolver - oc create -f - << EOF + oc apply -f - << EOF apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: @@ -19,9 +27,9 @@ spec: acme: server: https://acme-v02.api.letsencrypt.org/directory privateKeySecretRef: - name: $PRIVATE_KEY_SECRET_NAME + name: acme-dns01-account-key solvers: - # For ingress + # For Ingress - selector: dnsZones: - $HYPERSHIFT_BASE_DOMAIN @@ -83,29 +91,21 @@ HYPERSHIFT_EXTERNAL_DNS_DOMAIN="$(cut -d '.' -f 1 --complement <<< "$KAS_ROUTE_H # CM configurations CLIENT_SECRET_KEY="client-secret" CLIENT_SECRET_NAME="azuredns-config" -CLUSTERISSUER_NAME="cluster-certs-clusterissuer" # referenced by the 'cert-manager-custom-apiserver-cert' and 'cert-manager-custom-ingress-cert' steps -OPERATOR_NAMESPACE="cert-manager-operator" OPERAND_NAMESPACE="cert-manager" -PRIVATE_KEY_SECRET_NAME="acme-dns01-account-key" -SUB="openshift-cert-manager-operator" # Check if CM is installed -INSTALLED_CSV="$(oc get subscription "$SUB" -n "$OPERATOR_NAMESPACE" -o=jsonpath='{.status.installedCSV}')" -if [[ -z "${INSTALLED_CSV}" ]]; then - echo "CM not installed. Invoke cert-manager-install first." >&2 - exit 1 -fi +check_cm_operator -# Creat clusterissuer -case "${CLUSTER_TYPE,,}" in +# Creat ClusterIssuer +case "${CLUSTER_TYPE}" in *azure*) create_azure_dns_clusterissuer ;; *) - echo "Cluster type ${CLUSTER_TYPE} unsupported, exiting" >&2 + echo "Cluster type '${CLUSTER_TYPE}' unsupported, exiting..." >&2 exit 1 ;; esac -# Wait for the clusterissuer to be ready -oc wait ClusterIssuer "$CLUSTERISSUER_NAME" --for=condition=Ready=True --timeout=180s +# Wait for the ClusterIssuer to be ready +oc wait clusterissuer "$CLUSTERISSUER_NAME" --for=condition=Ready=True --timeout=180s diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.metadata.json b/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.metadata.json index 7287accaca22..b923162338e1 100644 --- a/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.metadata.json +++ b/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.metadata.json @@ -2,16 +2,20 @@ "path": "cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.yaml", "owners": { "approvers": [ - "jhou1", - "liangxia", "xingxingxia", "lunarwhite", "swghosh", "TrilokGeer", - "fxierh" + "LiangquanLi930", + "heliubj18" ], "reviewers": [ - "fxierh" + "xingxingxia", + "lunarwhite", + "swghosh", + "TrilokGeer", + "LiangquanLi930", + "heliubj18" ] } } \ No newline at end of file diff --git a/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.yaml b/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.yaml index 5aa173ddc283..0406e2d03196 100644 --- a/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.yaml +++ b/ci-operator/step-registry/cert-manager/clusterissuer/hypershift/cert-manager-clusterissuer-hypershift-ref.yaml @@ -1,6 +1,6 @@ ref: as: cert-manager-clusterissuer-hypershift - from: tools + from: upi-installer timeout: 10m grace_period: 2m commands: cert-manager-clusterissuer-hypershift-commands.sh @@ -8,6 +8,10 @@ ref: requests: cpu: 100m memory: 100Mi + env: + - name: CLUSTERISSUER_NAME + documentation: The name of the cert-manager ClusterIssuer to create in this step. + default: "letsencrypt-prodoction-ci-hypershift" documentation: |- - Sets up a cert-manager ClusterIssuer to issue custom certificates for the API server and Ingress for hypershift. - Runs against Hypershift hosted clusters. + Create and configure a cert-manager ClusterIssuer resource to issue custom certificates. + Run against Hypershift hosted clusters. diff --git a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/OWNERS b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/OWNERS deleted file mode 100644 index 70109d2c7c7c..000000000000 --- a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/OWNERS +++ /dev/null @@ -1,8 +0,0 @@ -approvers: -- jhou1 -- liangxia -- xingxingxia -- lunarwhite -- swghosh -- TrilokGeer -- fxierh diff --git a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/OWNERS b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/OWNERS b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/OWNERS index 7c12ad6d26b5..6b8ffc0a93ef 100644 --- a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/OWNERS +++ b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/OWNERS @@ -1,10 +1,14 @@ approvers: -- jhou1 -- liangxia - xingxingxia - lunarwhite - swghosh - TrilokGeer -- fxierh +- LiangquanLi930 +- heliubj18 reviewers: -- fxierh +- xingxingxia +- lunarwhite +- swghosh +- TrilokGeer +- LiangquanLi930 +- heliubj18 diff --git a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-commands.sh b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-commands.sh index 17350c076782..4172ea62df87 100644 --- a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-commands.sh +++ b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-commands.sh @@ -34,8 +34,16 @@ function wait_for_hc_readiness() { wait "${pids_to_wait[@]}" } +function check_clusterissuer() { + echo "Checking the persence of ClusterIssuer '$CLUSTERISSUER_NAME' as prerequisite..." + if ! oc wait clusterissuer/$CLUSTERISSUER_NAME --for=condition=Ready --timeout=0; then + echo "ClusterIssuer is not created or not ready to use. Skipping rest of steps..." + exit 0 + fi +} + function create_aggregated_cert() { - oc create -f - << EOF + oc apply -f - << EOF apiVersion: cert-manager.io/v1 kind: Certificate metadata: @@ -134,8 +142,7 @@ fi export PS4='[$(date "+%Y-%m-%d %H:%M:%S")] ' # Check clusterissuer readiness -CLUSTERISSUER_NAME=cluster-certs-clusterissuer -oc wait clusterissuer "$CLUSTERISSUER_NAME" --for=condition=Ready=True --timeout=0 +check_clusterissuer # Get CP service hostnames KAS_ROUTE_HOSTNAME="$(mgmt oc get hc -A -o jsonpath='{.items[0].spec.services[?(@.service=="APIServer")].servicePublishingStrategy.route.hostname}')" @@ -151,8 +158,8 @@ fi HYPERSHIFT_EXTERNAL_DNS_DOMAIN="$(cut -d '.' -f 1 --complement <<< "$KAS_ROUTE_HOSTNAME")" # Create aggregated cert -AGGREGATED_CERT_NAME=custom-ingress-cert -AGGREGATED_CERT_SECRET_NAME=cert-manager-managed-ingress-cert-tls +AGGREGATED_CERT_NAME=custom-aggregated-cert +AGGREGATED_CERT_SECRET_NAME=cert-manager-managed-aggregated-cert-tls INGRESS_DOMAIN=$(oc get ingress.config cluster -o jsonpath='{.spec.domain}') HC_NAME="$(cut -d '.' -f 2 <<< "$INGRESS_DOMAIN")" HCP_NS="clusters-$HC_NAME" diff --git a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.metadata.json b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.metadata.json index 7cc2f6e1bbab..53645245077c 100644 --- a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.metadata.json +++ b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.metadata.json @@ -2,16 +2,20 @@ "path": "cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.yaml", "owners": { "approvers": [ - "jhou1", - "liangxia", "xingxingxia", "lunarwhite", "swghosh", "TrilokGeer", - "fxierh" + "LiangquanLi930", + "heliubj18" ], "reviewers": [ - "fxierh" + "xingxingxia", + "lunarwhite", + "swghosh", + "TrilokGeer", + "LiangquanLi930", + "heliubj18" ] } } \ No newline at end of file diff --git a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.yaml b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.yaml index abf05d541f87..5a708ee58947 100644 --- a/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.yaml +++ b/ci-operator/step-registry/cert-manager/custom-aggregated-cert/hypershift/cert-manager-custom-aggregated-cert-hypershift-ref.yaml @@ -1,9 +1,6 @@ ref: as: cert-manager-custom-aggregated-cert-hypershift - from_image: - namespace: ocp - name: "4.16" - tag: upi-installer + from: upi-installer timeout: 45m grace_period: 10m commands: cert-manager-custom-aggregated-cert-hypershift-commands.sh @@ -11,6 +8,10 @@ ref: requests: cpu: 100m memory: 100Mi + env: + - name: CLUSTERISSUER_NAME + documentation: The name of the cert-manager ClusterIssuer to use for the external certificates issuance. (Prerequsite is that the ClusterIssuer is created and ready.) + default: "letsencrypt-prodoction-ci-hypershift" documentation: |- - Creates and configures a common certificate for KAS, OAuth and ingress. - Runs against Hypershift hosted clusters. + Issue and configure public trusted certificates for KAS, OAuth and Ingress by using cert-manager. + Run against Hypershift hosted clusters. diff --git a/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/OWNERS b/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/OWNERS deleted file mode 100644 index 134ab6afb3d5..000000000000 --- a/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/OWNERS +++ /dev/null @@ -1,8 +0,0 @@ -approvers: -- jhou1 -- liangxia -- xingxingxia -- lunarwhite -- swghosh -- TrilokGeer - diff --git a/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/OWNERS b/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.metadata.json b/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.metadata.json index 261b66149f49..a3db44c53591 100644 --- a/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.metadata.json +++ b/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.metadata.json @@ -2,8 +2,12 @@ "path": "cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.yaml", "owners": { "approvers": [ - "jhou1", - "liangxia", + "xingxingxia", + "lunarwhite", + "swghosh", + "TrilokGeer" + ], + "reviewers": [ "xingxingxia", "lunarwhite", "swghosh", diff --git a/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.yaml b/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.yaml index 57e4a72711a6..da64032cbe6a 100644 --- a/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.yaml +++ b/ci-operator/step-registry/cert-manager/custom-api-ingress-cert/cert-manager-custom-api-ingress-cert-chain.yaml @@ -3,8 +3,7 @@ chain: steps: - ref: cert-manager-install - ref: cert-manager-clusterissuer - - ref: cert-manager-custom-apiserver-cert - ref: cert-manager-custom-ingress-cert + - ref: cert-manager-custom-apiserver-cert documentation: |- - Manage Apiserver serving certificate and Ingress default certificate using cert-manager Operator - + Manage default Ingress Controller and API Server serving certificates using cert-manager. diff --git a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/OWNERS b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/OWNERS deleted file mode 100644 index 134ab6afb3d5..000000000000 --- a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/OWNERS +++ /dev/null @@ -1,8 +0,0 @@ -approvers: -- jhou1 -- liangxia -- xingxingxia -- lunarwhite -- swghosh -- TrilokGeer - diff --git a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/OWNERS b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-commands.sh b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-commands.sh index 7c4faad409aa..44e5147d0aff 100644 --- a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-commands.sh +++ b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-commands.sh @@ -4,27 +4,57 @@ set -e set -u set -o pipefail -if [ -f "${SHARED_DIR}/proxy-conf.sh" ] ; then - source "${SHARED_DIR}/proxy-conf.sh" - echo "proxy: ${SHARED_DIR}/proxy-conf.sh" -fi - -CLUSTERISSUER_NAME=cluster-certs-clusterissuer -if [[ ! "$(oc get --no-headers clusterissuer $CLUSTERISSUER_NAME)" =~ True ]]; then - echo "The prerequsite clusterissuer $CLUSTERISSUER_NAME is not ready. Please ensure the cert-manager-clusterissuer ref is executed first." - exit 1 -fi - -TMP_DIR=/tmp/cert-manager-api-commands-tmp-dir -mkdir -p $TMP_DIR -cd $TMP_DIR - -# Apiserver uses port 6443 in convention. Therefore we configure "port: 6443" for the alternative Apiserver FQDN (NEW_API_FQDN) too. -oc create -f - << EOF +function timestamp() { + date -u --rfc-3339=seconds +} + +function run_command() { + local cmd="$1" + echo "Running Command: ${cmd}" + eval "${cmd}" +} + +function set_proxy () { + if test -s "${SHARED_DIR}/proxy-conf.sh" ; then + echo "Setting proxy configuration..." + source "${SHARED_DIR}/proxy-conf.sh" + else + echo "No proxy settings found. Skipping proxy configuration..." + fi +} + +function wait_for_state() { + local object="$1" + local state="$2" + local timeout="$3" + local namespace="${4:-}" + local selector="${5:-}" + + echo "Waiting for '${object}' in namespace '${namespace}' with selector '${selector}' to exist..." + for _ in {1..30}; do + oc get ${object} --selector="${selector}" -n=${namespace} |& grep -ivE "(no resources found|not found)" && break || sleep 5 + done + + echo "Waiting for '${object}' in namespace '${namespace}' with selector '${selector}' to become '${state}'..." + oc wait --for=${state} --timeout=${timeout} ${object} --selector="${selector}" -n="${namespace}" +} + +function check_clusterissuer() { + echo "Checking the persence of ClusterIssuer '$CLUSTERISSUER_NAME' as prerequisite..." + if ! oc wait clusterissuer/$CLUSTERISSUER_NAME --for=condition=Ready --timeout=0; then + echo "ClusterIssuer is not created or not ready to use. Skipping rest of steps..." + exit 0 + fi +} + +function configure_alt_apiserver_endpoint() { + echo "Creating a LoadBalancer service for the alternative API Server endpoint..." + # API Server uses port 6443 in convention. Thus we configure "port: 6443" for the alternative API Server FQDN (NEW_API_FQDN) as well. + oc apply -f - << EOF apiVersion: v1 kind: Service metadata: - name: cert-manager-managed-alt-apiserver + name: alt-apiserver-endpoint namespace: openshift-kube-apiserver spec: ports: @@ -37,78 +67,54 @@ spec: type: LoadBalancer EOF -# Wait for the LoadBalancer service status to become ready -MAX_RETRY=20 -INTERVAL=10 -COUNTER=0 -while :; -do - echo "Checking the LoadBalancer service's status for the #${COUNTER}-th time ..." - EXTERNAL_IP_OUTPUT=$(oc get service cert-manager-managed-alt-apiserver -n openshift-kube-apiserver -o jsonpath='{.status.loadBalancer.ingress}') - if grep -q '"ip"' <<< "$EXTERNAL_IP_OUTPUT"; then - EXTERNAL_IP=$(oc get service cert-manager-managed-alt-apiserver -n openshift-kube-apiserver -o jsonpath='{.status.loadBalancer.ingress[0].ip}') - RECORD_TYPE=A - break - elif grep -q '"hostname"' <<< "$EXTERNAL_IP_OUTPUT"; then - EXTERNAL_IP=$(oc get service cert-manager-managed-alt-apiserver -n openshift-kube-apiserver -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') - RECORD_TYPE=CNAME - break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "The LoadBalancer service's status does not show either ip or hostname after $((MAX_RETRY * INTERVAL)) seconds. Dumping status:" - oc get service cert-manager-managed-alt-apiserver -n openshift-kube-apiserver -o jsonpath='{.status}' - exit 1 + echo "Retrieving the created LoadBalancer ingress's Hostname or IP..." + for _ in {1..30}; do + EXTERNAL_IP=$(oc get service alt-apiserver-endpoint -n openshift-kube-apiserver -o=jsonpath='{.status.loadBalancer.ingress[0].hostname}') + if [[ -n "${EXTERNAL_IP}" ]]; then + RECORD_TYPE=CNAME + break + fi + + EXTERNAL_IP=$(oc get service alt-apiserver-endpoint -n openshift-kube-apiserver -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') + if [[ -n "${EXTERNAL_IP}" ]]; then + RECORD_TYPE=A + break + fi + + sleep 5 + done + if [[ -z "${EXTERNAL_IP}" ]]; then + echo "Timed out wait for Hostname or IP to be created. Skipping rest of steps..." + exit 0 fi - sleep $INTERVAL -done - -BASE_DOMAIN=$(oc get dns cluster -o=jsonpath='{.spec.baseDomain}') -ORIGINAL_API_FQDN=$(oc whoami --show-server | sed -e 's|https://||' -e 's/:6443//') -NEW_API_FQDN=alt-api.${BASE_DOMAIN} -oc create -f - << EOF + echo "Creating DNSRecord for the alternative API Server endpoint..." + oc apply -f - << EOF apiVersion: ingress.operator.openshift.io/v1 kind: DNSRecord metadata: - name: cert-manager-managed-alt-apiserver + name: alt-apiserver-endpoint namespace: openshift-ingress-operator spec: dnsManagementPolicy: Managed dnsName: "${NEW_API_FQDN}." recordTTL: 30 - recordType: ${RECORD_TYPE} + recordType: $RECORD_TYPE targets: - ${EXTERNAL_IP} EOF -# Wait for the dnsrecord status to become ready -MAX_RETRY=12 -INTERVAL=10 -COUNTER=0 -while :; -do - echo "Checking the cert-manager-managed-alt-apiserver dnsrecord status for the #${COUNTER}-th time ..." - DNSRECORD_STATUS="$(oc get dnsrecord cert-manager-managed-alt-apiserver -n openshift-ingress-operator '-o=jsonpath={.status.zones[*].conditions[?(@.type=="Published")].status}')" - if [[ ! "$DNSRECORD_STATUS" =~ False ]]; then - break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "The cert-manager-managed-alt-apiserver dnsrecord status is still not ready after $((MAX_RETRY * INTERVAL)) seconds. Dumping status:" - oc get dnsrecord cert-manager-managed-alt-apiserver -n openshift-ingress-operator -o jsonpath='{.status}' - exit 1 - fi - sleep $INTERVAL -done + echo "Waiting for the DNSRecord to become Published..." + oc wait dnsrecord alt-apiserver-endpoint -n openshift-ingress-operator --for=jsonpath='{.status.zones[0].conditions[?(@.type=="Published")].status}'=True --timeout=2m +} -CERT_NAME=alt-api-cert -oc create -f - << EOF +function create_apiserver_certificate() { + oc apply -f - << EOF apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: $CERT_NAME - namespace: openshift-config + namespace: $CERT_NAMESPACE spec: commonName: "$NEW_API_FQDN" dnsNames: @@ -118,117 +124,85 @@ spec: issuerRef: kind: ClusterIssuer name: $CLUSTERISSUER_NAME - secretName: cert-manager-managed-alt-api-tls -# privateKey: -# rotationPolicy: Always # Venafi required this + secretName: $CERT_SECRET_NAME + privateKey: + rotationPolicy: Always duration: 2h renewBefore: 1h30m EOF -# Wait for the certificate status to become ready -MAX_RETRY=15 -INTERVAL=20 -COUNTER=0 -while :; -do - echo "Checking the $CERT_NAME certificate status for the #${COUNTER}-th time ..." - if [[ "$(oc get --no-headers certificate $CERT_NAME -n openshift-config)" =~ True ]]; then - break + if wait_for_state "certificate/$CERT_NAME" "condition=Ready" "5m" "$CERT_NAMESPACE"; then + echo "Certificate is ready" + else + echo "Timed out after 5m. Dumping resources for debugging..." + run_command "oc describe certificate $CERT_NAME -n $CERT_NAMESPACE" + exit 1 fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "The $CERT_NAME certificate status is still not ready after $((MAX_RETRY * INTERVAL)) seconds." - echo "Dumping the certificate status:" - oc get certificate $CERT_NAME -n openshift-config -o jsonpath='{.status}' - echo "Dumping the challenge status:" - oc get challenge -n openshift-config -o wide +} + +function configure_apiserver_default_cert() { + echo "Patching the issued TLS secret to API Server's spec..." + local json_path='{"spec":{"servingCerts": {"namedCertificates": [{"names": ["'"$NEW_API_FQDN"'"], "servingCertificate": {"name": "'"$CERT_SECRET_NAME"'"}}]}}}' + oc patch apiserver cluster --type=merge -p "$json_path" + + echo "[$(timestamp)] Waiting for the Kube API Server ClusterOperator to finish rollout..." + oc wait co kube-apiserver --for=condition=Progressing=True --timeout=5m + oc wait co kube-apiserver --for=condition=Progressing=False --timeout=20m + echo "[$(timestamp)] Rollout progress completed" +} + +function extract_ca_from_secret() { + echo "Extracting the CA certificate from the issued TLS secret to local folder..." + oc extract secret/"$CERT_SECRET_NAME" -n $CERT_NAMESPACE + CA_FILE=$( [ -f ca.crt ] && echo "ca.crt" || echo "tls.crt" ) +} + +function validate_serving_cert() { + echo "Validating the serving certificate of '$NEW_API_URL'..." + output=$(curl -I -v --cacert $CA_FILE --connect-timeout 30 "$NEW_API_FQDN" 2>&1) + if [ $? -eq 0 ]; then + echo "The certificate is served by API Server as expected" + else + echo "Failed curl validation. Curl output: '$output'" exit 1 fi - sleep $INTERVAL -done +} -# The CA_FILE will be used later to update KUBECONFIG -oc extract secret/cert-manager-managed-alt-api-tls -n openshift-config -CA_FILE=ca.crt -if [ ! -f ca.crt ]; then - CA_FILE=tls.crt -fi +function update_kubeconfig_ca() { + echo "Backing up the old KUBECONFIG file..." + run_command "cp -f $KUBECONFIG $KUBECONFIG.old" -oc patch apiserver cluster --type=merge -p " -spec: - servingCerts: - namedCertificates: - - names: - - $NEW_API_FQDN - servingCertificate: - name: cert-manager-managed-alt-api-tls -" - -# Wait for the clusteroperator kube-apiserver to start rollout -# Note, if $NEW_API_FQDN is $ORIGINAL_API_FQDN other than an alternative FQDN, all oc commands afterwards need to add the --insecure-skip-tls-verify flag before the KUBECONFIG is updated later -MAX_RETRY=20 -INTERVAL=10 -COUNTER=0 -while :; -do - echo "Checking if clusteroperator kube-apiserver rollout has started for the #${COUNTER}-th time ..." - if [ "$(oc get clusteroperator kube-apiserver -o jsonpath='{.status.conditions[?(@.type=="Progressing")].status}')" == True ]; then - echo "The clusteroperator kube-apiserver Progressing status becomes True, indicates rollout has started." && break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "The clusteroperator kube-apiserver rollout did not start after $((MAX_RETRY * INTERVAL)) seconds. Dumping status:" - oc get clusteroperator kube-apiserver -o=jsonpath='{.status.conditions[?(@.type=="Progressing")]}' - exit 1 - fi - sleep $INTERVAL -done - -MAX_RETRY=50 # kube-apiserver rollout needs long time -INTERVAL=30 -COUNTER=0 -while :; -do - echo "Checking if clusteroperator kube-apiserver rollout finished for the #${COUNTER}-th time ..." - if [ "$(oc get --no-headers clusteroperator kube-apiserver | awk '{print $3 $4 $5}')" == TrueFalseFalse ]; then - echo 'The clusteroperator kube-apiserver status becomes "True False False", indicates rollout finished.' && break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "The clusteroperator kube-apiserver status is not ready after $((MAX_RETRY * INTERVAL)) seconds. Dumping status:" - oc get clusteroperator kube-apiserver -o=jsonpath='{.status}' - exit 1 - fi - sleep $INTERVAL -done - -echo "Validating the cert-manager customized Apiserver serving certificate." -MAX_RETRY=12 -INTERVAL=10 -COUNTER=0 -while :; -do - CURL_OUTPUT=$(curl -IsS -v --cacert $CA_FILE --connect-timeout 30 "https://$NEW_API_FQDN:6443" 2>&1 || true) - if [[ "$CURL_OUTPUT" =~ "HTTP/2 403" ]]; then - echo "The customized certificate is serving as expected." && break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo -e "Timeout after $((MAX_RETRY * INTERVAL)) seconds waiting for curl validation succeeded. Dumping the curl output:\n${CURL_OUTPUT}." - exit 1 - fi - sleep $INTERVAL -done - -# Update KUBECONFIG WRT CA of Apiserver certificate -cp "$KUBECONFIG" "$KUBECONFIG".before-custom-api.bak -oc config view --minify --raw --kubeconfig "$KUBECONFIG".before-custom-api.bak > "$KUBECONFIG" -grep certificate-authority-data "$KUBECONFIG" | awk '{print $2}' | base64 -d > origin-ca.crt -cat $CA_FILE >> origin-ca.crt -NEW_CA_DATA=$(base64 -w0 origin-ca.crt) -sed -i "s/certificate-authority-data:.*$/certificate-authority-data: $NEW_CA_DATA/" "$KUBECONFIG" -sed -i "s/$ORIGINAL_API_FQDN/$NEW_API_FQDN/" "$KUBECONFIG" # In case NEW_API_FQDN != ORIGINAL_API_FQDN -echo "[$(date -u --rfc-3339=seconds)] The KUBECONFIG content is updated with CA of new Apiserver certificate." - -echo "Validating the updated KUBECONFIG using any oc command." -oc get po -n openshift-kube-apiserver -L revision -l apiserver + echo "Appending the CA data of KUBECONFIG with the new CA certificate..." + CA_DATA=$(grep certificate-authority-data "$KUBECONFIG".old | awk '{print $2}' | base64 -d) + cat "$CA_FILE" >> <(echo "$CA_DATA") + NEW_CA_DATA=$(echo "$CA_DATA" | base64 -w0) + sed -i "s/certificate-authority-data:.*$/certificate-authority-data: $NEW_CA_DATA/" "$KUBECONFIG" + + echo "Validating the updated KUBECONFIG using any of oc command" + run_command "oc get pod -n openshift-kube-apiserver -L revision -l apiserver" +} + +timestamp +set_proxy +check_clusterissuer + +CERT_NAME=custom-apiserver-cert +CERT_NAMESPACE=openshift-config +CERT_SECRET_NAME=cert-manager-managed-apiserver-cert-tls +BASE_DOMAIN=$(oc get dns cluster -o=jsonpath='{.spec.baseDomain}') +NEW_API_FQDN=alt-api.${BASE_DOMAIN} +NEW_API_URL=https://${NEW_API_FQDN}:6443 + +configure_alt_apiserver_endpoint +create_apiserver_certificate +configure_apiserver_default_cert + +TMP_DIR=/tmp/cert-manager-custom-apiserver-cert +mkdir -p "$TMP_DIR" +cd "$TMP_DIR" + +extract_ca_from_secret +validate_serving_cert +update_kubeconfig_ca + +echo "[$(timestamp)] Succeeded in adding cert-manager managed certificates to the alternative API Server as named certificate!" diff --git a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.metadata.json b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.metadata.json index 22cda1296513..77b76b061ef8 100644 --- a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.metadata.json +++ b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.metadata.json @@ -2,8 +2,12 @@ "path": "cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.yaml", "owners": { "approvers": [ - "jhou1", - "liangxia", + "xingxingxia", + "lunarwhite", + "swghosh", + "TrilokGeer" + ], + "reviewers": [ "xingxingxia", "lunarwhite", "swghosh", diff --git a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.yaml b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.yaml index 4febe663e217..30d74f0599ba 100644 --- a/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.yaml +++ b/ci-operator/step-registry/cert-manager/custom-apiserver-cert/cert-manager-custom-apiserver-cert-ref.yaml @@ -1,18 +1,16 @@ ref: - as: cert-manager-custom-apiserver-cert - from_image: - namespace: ci - name: verification-tests - tag: latest - grace_period: 20m - commands: cert-manager-custom-apiserver-cert-commands.sh - cli: latest - resources: - limits: - cpu: 500m - memory: 500Mi - requests: - cpu: 300m - memory: 200Mi - documentation: |- - Manage OpenShift cluster's Apiserver certificate with cert-manager. + as: cert-manager-custom-apiserver-cert + from: upi-installer + cli: latest + commands: cert-manager-custom-apiserver-cert-commands.sh + resources: + requests: + cpu: 100m + memory: 100Mi + env: + - name: CLUSTERISSUER_NAME + documentation: The name of the cert-manager ClusterIssuer to use for the external certificates issuance. (Prerequsite is that the ClusterIssuer is created and ready.) + default: "letsencrypt-prodoction-ci" + documentation: |- + Create cert-manager Certificate resources issued from the given ClusterIssuer. + Add cert-manager managed certificates to an alternative API Server endpoint as named certificates. diff --git a/ci-operator/step-registry/cert-manager/custom-ingress-cert/OWNERS b/ci-operator/step-registry/cert-manager/custom-ingress-cert/OWNERS deleted file mode 100644 index 134ab6afb3d5..000000000000 --- a/ci-operator/step-registry/cert-manager/custom-ingress-cert/OWNERS +++ /dev/null @@ -1,8 +0,0 @@ -approvers: -- jhou1 -- liangxia -- xingxingxia -- lunarwhite -- swghosh -- TrilokGeer - diff --git a/ci-operator/step-registry/cert-manager/custom-ingress-cert/OWNERS b/ci-operator/step-registry/cert-manager/custom-ingress-cert/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/cert-manager/custom-ingress-cert/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-commands.sh b/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-commands.sh index ec37b3ca840e..29a83ec3c654 100644 --- a/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-commands.sh +++ b/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-commands.sh @@ -4,154 +4,144 @@ set -e set -u set -o pipefail -if [ -f "${SHARED_DIR}/proxy-conf.sh" ] ; then - source "${SHARED_DIR}/proxy-conf.sh" - echo "proxy: ${SHARED_DIR}/proxy-conf.sh" -fi - -CLUSTERISSUER_NAME=cluster-certs-clusterissuer -if [[ ! "$(oc get --no-headers clusterissuer $CLUSTERISSUER_NAME)" =~ True ]]; then - echo "The prerequsite clusterissuer $CLUSTERISSUER_NAME is not ready. Please ensure the cert-manager-clusterissuer ref is executed first." - exit 1 -fi - -TMP_DIR=/tmp/cert-manager-ingress-commands-tmp-dir -mkdir -p $TMP_DIR -cd $TMP_DIR - -INGRESS_DOMAIN=$(oc get ingress.config cluster -o jsonpath='{.spec.domain}') -CERT_NAME=custom-ingress-cert -oc create -f - << EOF +function timestamp() { + date -u --rfc-3339=seconds +} + +function run_command() { + local cmd="$1" + echo "Running Command: ${cmd}" + eval "${cmd}" +} + +function set_proxy () { + if test -s "${SHARED_DIR}/proxy-conf.sh" ; then + echo "Setting proxy configuration..." + source "${SHARED_DIR}/proxy-conf.sh" + else + echo "No proxy settings found. Skipping proxy configuration..." + fi +} + +function wait_for_state() { + local object="$1" + local state="$2" + local timeout="$3" + local namespace="${4:-}" + local selector="${5:-}" + + echo "Waiting for '${object}' in namespace '${namespace}' with selector '${selector}' to exist..." + for _ in {1..30}; do + oc get ${object} --selector="${selector}" -n=${namespace} |& grep -ivE "(no resources found|not found)" && break || sleep 5 + done + + echo "Waiting for '${object}' in namespace '${namespace}' with selector '${selector}' to become '${state}'..." + oc wait --for=${state} --timeout=${timeout} ${object} --selector="${selector}" -n="${namespace}" + return $? +} + +function check_clusterissuer() { + echo "Checking the persence of ClusterIssuer '$CLUSTERISSUER_NAME' as prerequisite..." + if ! oc wait clusterissuer/$CLUSTERISSUER_NAME --for=condition=Ready --timeout=0; then + echo "ClusterIssuer is not created or not ready to use. Skipping rest of steps..." + exit 0 + fi +} + +function create_ingress_certificate () { + echo "Creating the wildcard certificate for the Ingress Controller..." + oc apply -f - << EOF apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: $CERT_NAME - namespace: openshift-ingress + namespace: $CERT_NAMESPACE spec: - commonName: "*.$INGRESS_DOMAIN" + commonName: "*.${INGRESS_DOMAIN}" dnsNames: - - "*.$INGRESS_DOMAIN" + - "*.${INGRESS_DOMAIN}" usages: - server auth issuerRef: kind: ClusterIssuer name: $CLUSTERISSUER_NAME - secretName: cert-manager-managed-ingress-cert-tls -# privateKey: -# rotationPolicy: Always # Venafi need this + secretName: $CERT_SECRET_NAME + privateKey: + rotationPolicy: Always duration: 2h renewBefore: 1h30m EOF -# Wait for the certificate status to become ready -MAX_RETRY=30 -INTERVAL=10 -COUNTER=0 -while :; -do - echo "Checking the $CERT_NAME certificate status for the #${COUNTER}-th time ..." - if [[ "$(oc get --no-headers certificate $CERT_NAME -n openshift-ingress)" =~ True ]]; then - break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "The $CERT_NAME certificate status is still not ready after $((MAX_RETRY * INTERVAL)) seconds." - echo "Dumping the certificate status:" - oc get certificate $CERT_NAME -n openshift-ingress -o jsonpath='{.status}' - echo "Dumping the challenge status:" - oc get challenge -n openshift-ingress -o wide - exit 1 - fi - sleep $INTERVAL -done - -# TODO in future: check whether needed to oc patch proxy when the certificate is not issued by the trusted Let's Encrypt product env - -OLD_PROGRESSING_TIME="$(oc get co ingress '-o=jsonpath={.status.conditions[?(@.type=="Progressing")].lastTransitionTime}')" -oc patch ingresscontroller.operator default --type=merge -p '{"spec":{"defaultCertificate": {"name": "cert-manager-managed-ingress-cert-tls"}}}' -n openshift-ingress-operator -# Wait for the ingress pods to finish rollout -MAX_RETRY=12 -INTERVAL=10 -COUNTER=0 -while :; -do - echo "Checking if clusteroperator ingress rollout finished for the #${COUNTER}-th time ..." - NEW_PROGRESSING="$(oc get co ingress '-o=jsonpath={.status.conditions[?(@.type=="Progressing")]}')" - if [[ "$NEW_PROGRESSING" =~ '"status":"False"' ]] && [[ ! "$NEW_PROGRESSING" =~ lastTransitionTime\":\"$OLD_PROGRESSING_TIME ]]; then - echo "The ingress pods finished rollout." && break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "The ingress pods still do not finish rollout after $((MAX_RETRY * INTERVAL)) seconds. Dumping status:" - oc get co ingress -o=jsonpath='{.status}' - oc get po -n openshift-ingress - exit 1 - fi - sleep $INTERVAL -done - -echo "Creating a namespace from test usage." -TEST_NAMESPACE=test-ingress-cert -oc create ns $TEST_NAMESPACE - -echo "Creating the hello-openshift app and exposing a route from it." -oc new-app -n $TEST_NAMESPACE quay.io/openshifttest/hello-openshift@sha256:4200f438cf2e9446f6bcff9d67ceea1f69ed07a2f83363b7fb52529f7ddd8a83 -# Wait for the hello-openshift pod to be running -MAX_RETRY=12 -INTERVAL=10 -COUNTER=0 -while :; -do - echo "Checking the hello-openshift pod status for the #${COUNTER}-th time ..." - if [ "$(oc get pods -n $TEST_NAMESPACE -l deployment='hello-openshift' -o=jsonpath='{.items[*].status.phase}')" == "Running" ]; then - echo "The hello-openshift pod is up and running." - break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo "The hello-openshift pod is not running after $((MAX_RETRY * INTERVAL)) seconds. Dumping status:" - oc get pods -n $TEST_NAMESPACE -l deployment='hello-openshift' + if wait_for_state "certificate/$CERT_NAME" "condition=Ready" "5m" "$CERT_NAMESPACE"; then + echo "Certificate is ready" + else + echo "Timed out after 5m. Dumping resources for debugging..." + run_command "oc describe certificate $CERT_NAME -n $CERT_NAMESPACE" exit 1 fi - sleep $INTERVAL -done -oc create route edge -n $TEST_NAMESPACE --service hello-openshift -TEST_ROUTE=$(oc get route -n $TEST_NAMESPACE hello-openshift -o=jsonpath='{.status.ingress[?(@.routerName=="default")].host}') -echo "The exposed route's hostname is $TEST_ROUTE" - -echo "Validating the cert-manager customized default ingress certificate" -# The CA_FILE will be used later to update KUBECONFIG -oc extract secret/cert-manager-managed-ingress-cert-tls -n openshift-ingress -CA_FILE=ca.crt -if [ ! -f ca.crt ]; then - CA_FILE=tls.crt -fi - -MAX_RETRY=12 -INTERVAL=10 -COUNTER=0 -while :; -do - CURL_OUTPUT=$(curl -IsS -v --cacert $CA_FILE --connect-timeout 30 "https://$TEST_ROUTE" 2>&1 || true) - if [[ "$CURL_OUTPUT" =~ HTTP/1.1\ 200\ OK ]]; then - echo "The customized certificate is serving as expected." && break - fi - ((++COUNTER)) - if [[ $COUNTER -eq $MAX_RETRY ]]; then - echo -e "Timeout after $((MAX_RETRY * INTERVAL)) seconds waiting for curl validation succeeded. Dumping the curl output:\n${CURL_OUTPUT}." +} + +function configure_ingress_default_cert() { + echo "Patching the issued TLS secret to Ingress Controller's spec..." + local json_path='{"spec":{"defaultCertificate": {"name": "'"$CERT_SECRET_NAME"'"}}}' + oc patch ingresscontroller default --type=merge -p "$json_path" -n openshift-ingress-operator + + echo "[$(timestamp)] Waiting for the Ingress ClusterOperator to finish rollout..." + oc wait co ingress --for=condition=Progressing=True --timeout=2m + oc wait co ingress --for=condition=Progressing=False --timeout=5m + echo "[$(timestamp)] Rollout progress completed" +} + +function extract_ca_from_secret() { + echo "Extracting the CA certificate from the issued TLS secret to local folder..." + oc extract secret/"$CERT_SECRET_NAME" -n $CERT_NAMESPACE + CA_FILE=$( [ -f ca.crt ] && echo "ca.crt" || echo "tls.crt" ) +} + +function validate_serving_cert() { + echo "Validating the serving certificate of '$CONSOLE_URL'..." + output=$(curl -I -v --cacert $CA_FILE --connect-timeout 30 "$CONSOLE_URL" 2>&1) + if [ $? -eq 0 ]; then + echo "The certificate is served by Ingress Controller as expected" + else + echo "Failed curl validation. Curl output: '$output'" exit 1 fi - sleep $INTERVAL -done - -echo "Deleting the namespace as curl validation finished." -oc delete ns $TEST_NAMESPACE - -# Update KUBECONFIG WRT CA of ingress certificate otherwise oc login command will fail -cp "$KUBECONFIG" "$KUBECONFIG".before-custom-ingress.bak -oc config view --minify --raw --kubeconfig "$KUBECONFIG".before-custom-ingress.bak > "$KUBECONFIG" -grep certificate-authority-data "$KUBECONFIG" | awk '{print $2}' | base64 -d > origin-ca.crt -cat $CA_FILE >> origin-ca.crt -NEW_CA_DATA=$(base64 -w0 origin-ca.crt) -sed -i "s/certificate-authority-data:.*$/certificate-authority-data: $NEW_CA_DATA/" "$KUBECONFIG" -echo "[$(date -u --rfc-3339=seconds)] The KUBECONFIG content is updated with CA of new default ingress certificate." +} + +function update_kubeconfig_ca() { + echo "Backing up the old KUBECONFIG file..." + run_command "cp -f $KUBECONFIG $KUBECONFIG.old" + + echo "Appending the CA data of KUBECONFIG with the new CA certificate..." + CA_DATA=$(grep certificate-authority-data "$KUBECONFIG".old | awk '{print $2}' | base64 -d) + cat "$CA_FILE" >> <(echo "$CA_DATA") + NEW_CA_DATA=$(echo "$CA_DATA" | base64 -w0) + sed -i "s/certificate-authority-data:.*$/certificate-authority-data: $NEW_CA_DATA/" "$KUBECONFIG" + + echo "Validating the updated KUBECONFIG using any of oc command..." + run_command "oc get node" +} + +timestamp +set_proxy +check_clusterissuer + +CERT_NAME=custom-ingress-cert +CERT_NAMESPACE=openshift-ingress +CERT_SECRET_NAME=cert-manager-managed-ingress-cert-tls +INGRESS_DOMAIN=$(oc get ingress.config cluster -o=jsonpath='{.spec.domain}') +CONSOLE_URL=$(oc whoami --show-console) + +create_ingress_certificate +configure_ingress_default_cert + +TMP_DIR=/tmp/cert-manager-custom-ingress-cert +mkdir -p "$TMP_DIR" +cd "$TMP_DIR" + +extract_ca_from_secret +validate_serving_cert +update_kubeconfig_ca + +echo "[$(timestamp)] Succeeded in replacing the default Ingress Controller serving certificates with cert-manager managed ones!" diff --git a/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.metadata.json b/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.metadata.json index ee3e9c03886d..cc5f7fc9d725 100644 --- a/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.metadata.json +++ b/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.metadata.json @@ -2,8 +2,12 @@ "path": "cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.yaml", "owners": { "approvers": [ - "jhou1", - "liangxia", + "xingxingxia", + "lunarwhite", + "swghosh", + "TrilokGeer" + ], + "reviewers": [ "xingxingxia", "lunarwhite", "swghosh", diff --git a/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.yaml b/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.yaml index e3a0d3c80ae0..5b6688611f40 100644 --- a/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.yaml +++ b/ci-operator/step-registry/cert-manager/custom-ingress-cert/cert-manager-custom-ingress-cert-ref.yaml @@ -1,18 +1,16 @@ ref: - as: cert-manager-custom-ingress-cert - from_image: - namespace: ci - name: verification-tests - tag: latest - grace_period: 20m - commands: cert-manager-custom-ingress-cert-commands.sh - cli: latest - resources: - limits: - cpu: 500m - memory: 500Mi - requests: - cpu: 300m - memory: 200Mi - documentation: |- - Manage OpenShift cluster's default ingress certificate with cert-manager. + as: cert-manager-custom-ingress-cert + from: upi-installer + cli: latest + commands: cert-manager-custom-ingress-cert-commands.sh + resources: + requests: + cpu: 100m + memory: 100Mi + env: + - name: CLUSTERISSUER_NAME + documentation: The name of the cert-manager ClusterIssuer to use for the external certificates issuance. (Prerequsite is that the ClusterIssuer is created and ready.) + default: "letsencrypt-prodoction-ci" + documentation: |- + Create cert-manager Certificate resources issued from the given ClusterIssuer. + Replace default Ingress certificates with cert-manager managed certificates. diff --git a/ci-operator/step-registry/cert-manager/install/OWNERS b/ci-operator/step-registry/cert-manager/install/OWNERS deleted file mode 100644 index 134ab6afb3d5..000000000000 --- a/ci-operator/step-registry/cert-manager/install/OWNERS +++ /dev/null @@ -1,8 +0,0 @@ -approvers: -- jhou1 -- liangxia -- xingxingxia -- lunarwhite -- swghosh -- TrilokGeer - diff --git a/ci-operator/step-registry/cert-manager/install/OWNERS b/ci-operator/step-registry/cert-manager/install/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/cert-manager/install/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/cert-manager/install/cert-manager-install-ref.metadata.json b/ci-operator/step-registry/cert-manager/install/cert-manager-install-ref.metadata.json index ecdce4c39075..383351d84eeb 100644 --- a/ci-operator/step-registry/cert-manager/install/cert-manager-install-ref.metadata.json +++ b/ci-operator/step-registry/cert-manager/install/cert-manager-install-ref.metadata.json @@ -2,8 +2,12 @@ "path": "cert-manager/install/cert-manager-install-ref.yaml", "owners": { "approvers": [ - "jhou1", - "liangxia", + "xingxingxia", + "lunarwhite", + "swghosh", + "TrilokGeer" + ], + "reviewers": [ "xingxingxia", "lunarwhite", "swghosh",