From 2d87adf51d10990302cdbe034bd6ff8e3e80e136 Mon Sep 17 00:00:00 2001 From: Loren Yu Date: Fri, 3 Jan 2025 10:28:48 -0800 Subject: [PATCH] Add workflow that scans orphaned PR environments (#819) - Add bin/orphaned-prs - Add scan-orphaned-pr-environments.yml workflow - Pulled out get_app_names into reusable function in util.bash - Comment out default system notifications config --- .../scan-orphaned-pr-environments.yml | 68 +++++++++++++++++++ .../workflows/send-system-notification.yml | 9 ++- bin/infra-deploy-status-check-configs | 9 +-- bin/orphaned-pr-environments | 45 ++++++++++++ bin/util.sh | 9 +++ docs/infra/system-notifications.md | 7 ++ infra/README.md | 3 +- infra/project-config/system-notifications.tf | 41 ++++++++--- 8 files changed, 174 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/scan-orphaned-pr-environments.yml create mode 100755 bin/orphaned-pr-environments create mode 100644 bin/util.sh create mode 100644 docs/infra/system-notifications.md diff --git a/.github/workflows/scan-orphaned-pr-environments.yml b/.github/workflows/scan-orphaned-pr-environments.yml new file mode 100644 index 00000000..cb67e156 --- /dev/null +++ b/.github/workflows/scan-orphaned-pr-environments.yml @@ -0,0 +1,68 @@ +# This workflow scans for orphaned PR environments +name: Scan orphaned PR environments + +on: + workflow_dispatch: + schedule: + # Run every day at 07:30 UTC (3:30am ET, 12:30am PT) after engineers are likely done with work + - cron: "30 7 * * *" + +jobs: + get-app-names: + name: Get app names + runs-on: ubuntu-latest + outputs: + app_names: ${{ steps.get-app-names.outputs.app_names }} + steps: + - uses: actions/checkout@v4 + - name: Get app names + id: get-app-names + run: | + source bin/util.sh + app_names="$(get_app_names)" + # turn app_names into a json list using jq + app_names="$(echo "${app_names}" | jq -R -s -c 'split("\n")[:-1]')" + echo "App names retrieved: ${app_names}" + echo "app_names=${app_names}" >> "$GITHUB_OUTPUT" + shell: bash + scan: + name: Scan + runs-on: ubuntu-latest + needs: get-app-names + + strategy: + matrix: + app_name: ${{ fromJson(needs.get-app-names.outputs.app_names) }} + + permissions: + contents: read + id-token: write + + steps: + - uses: actions/checkout@v4 + + - name: Set up Terraform + uses: ./.github/actions/setup-terraform + + - name: Configure AWS credentials + uses: ./.github/actions/configure-aws-credentials + with: + app_name: ${{ matrix.app_name }} + environment: dev + + - name: List PR workspaces + run: | + ./bin/orphaned-pr-environments ${{ matrix.app_name }} + env: + GH_TOKEN: ${{ github.token }} + TF_IN_AUTOMATION: "true" + + notify: + name: Notify + needs: scan + if: failure() + uses: ./.github/workflows/send-system-notification.yml + with: + channel: "workflow-failures" + message: "🧹 [Orphaned PR environments for ${{ github.repository }}](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})" + secrets: inherit diff --git a/.github/workflows/send-system-notification.yml b/.github/workflows/send-system-notification.yml index 0dc400ba..f3f4e683 100644 --- a/.github/workflows/send-system-notification.yml +++ b/.github/workflows/send-system-notification.yml @@ -29,6 +29,9 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Set up Terraform + uses: ./.github/actions/setup-terraform + - name: Get channel configuration id: get-channel-type run: | @@ -51,6 +54,10 @@ jobs: slack_token_secret_name="$(echo "${channel_config}" | jq -r ".slack_token_secret_name")" echo "Slack token secret name: ${slack_token_secret_name}" echo "SLACK_TOKEN_SECRET_NAME=${slack_token_secret_name}" >> "$GITHUB_ENV" + + # Convert Markdown links in message [text](url) to Slack format + echo "Convert message from Markdown to Slack format" + echo "SLACK_MESSAGE=$(echo "${{ inputs.message }}" | sed -E 's/\[(.+)\]\((.+)\)/<\2|\1>/g')" >> "$GITHUB_ENV" fi shell: bash @@ -62,4 +69,4 @@ jobs: token: ${{ secrets[env.SLACK_TOKEN_SECRET_NAME] }} payload: | channel: ${{ secrets[env.CHANNEL_ID_SECRET_NAME] }} - text: ${{ inputs.message }} + text: ${{ env.SLACK_MESSAGE }} diff --git a/bin/infra-deploy-status-check-configs b/bin/infra-deploy-status-check-configs index 9dadc1c7..11e5f33a 100755 --- a/bin/infra-deploy-status-check-configs +++ b/bin/infra-deploy-status-check-configs @@ -47,6 +47,8 @@ # ----------------------------------------------------------------------------- set -euo pipefail +source bin/util.sh + # Return the names of Terraform backend configuration files in (without the ".s3.tfbackend" suffix) # for the root module given by "infra/${root_module_subdir}". # @@ -85,13 +87,6 @@ function get_root_module_configs() { done } -# Retrieve the names of the applications in the repo by listing the directories in the "infra" directory -# and filtering out the directories that are not applications. -# Returns: A list of application names. -function get_app_names() { - find "infra" -maxdepth 1 -type d -not -name "infra" -not -name "accounts" -not -name "modules" -not -name "networks" -not -name "project-config" -not -name "test" -exec basename {} \; -} - function get_account_layer_configs() { local configs configs=$(get_root_module_configs "accounts") diff --git a/bin/orphaned-pr-environments b/bin/orphaned-pr-environments new file mode 100755 index 00000000..4489e1c9 --- /dev/null +++ b/bin/orphaned-pr-environments @@ -0,0 +1,45 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# This script checks for orphaned PR environments by listing all PR workspaces +# and checking if the associated PR is closed. If the PR is closed the +# resources in the workspace should have been destroyed and the workspace +# deleted, so existing workspaces for closed PRs are considered orphaned. +# ----------------------------------------------------------------------------- +set -euo pipefail + +app_name="$1" + +echo "::group::Initialize Terraform" +echo terraform -chdir="infra/${app_name}/service" init -input=false -reconfigure -backend-config="dev.s3.tfbackend" +terraform -chdir="infra/${app_name}/service" init -input=false -reconfigure -backend-config="dev.s3.tfbackend" +echo "::endgroup::" + +echo "::group::List PRs with PR environments" +echo terraform -chdir="infra/${app_name}/service" workspace list +workspaces="$(terraform -chdir="infra/${app_name}/service" workspace list)" +pr_nums="$(echo "${workspaces}" | grep -o 'p-[0-9]\+' | sed 's/p-//')" +echo "PRs" +echo "${pr_nums}" +echo "::endgroup::" + +echo "::group::Check status of each PR" +closed_prs=() +for pr_num in $pr_nums; do + pr_status="$(gh pr view "$pr_num" --json state --jq ".state")" + echo "PR ${pr_num}: ${pr_status}" + + if [ "$pr_status" == "CLOSED" ]; then + closed_prs+=("$pr_num") + fi +done +echo "::endgroup::" + +# if closed_prs is not empty exit with 1 otherwise exit with 0 +if [ ${#closed_prs[@]} -gt 0 ]; then + echo "Found orphaned PR environments for the following PRs: ${closed_prs[*]}" + echo "Found orphaned PR environments for the following PRs: ${closed_prs[*]}" >> "${GITHUB_STEP_SUMMARY}" + exit 1 +fi + +echo "No orphaned PR environments" +echo "No orphaned PR environments" >> "${GITHUB_STEP_SUMMARY}" diff --git a/bin/util.sh b/bin/util.sh new file mode 100644 index 00000000..017bad13 --- /dev/null +++ b/bin/util.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# Utility functions + +# Retrieve the names of the applications in the repo by listing the directories in the "infra" directory +# and filtering out the directories that are not applications. +# Returns: A list of application names. +function get_app_names() { + find "infra" -maxdepth 1 -type d -not -name "infra" -not -name "accounts" -not -name "modules" -not -name "networks" -not -name "project-config" -not -name "test" -exec basename {} \; +} diff --git a/docs/infra/system-notifications.md b/docs/infra/system-notifications.md new file mode 100644 index 00000000..1ba86193 --- /dev/null +++ b/docs/infra/system-notifications.md @@ -0,0 +1,7 @@ +# System Notifications + +The project sends notifications as part of CI/CD workflows to notify the team about system events such as deployments and workflow failures. + +## System notifications configuration + +The configuration for system notifications is defined in the application's [project-config module](/infra/project-config/). The [system-notifications.tf](/infra/project-config/system-notifications.tf) file defines one or more notification channels that CI/CD workflows can send notifications to. Each channel can use a different notification type. Currently, Slack is the only supported notification type. diff --git a/infra/README.md b/infra/README.md index 0d104cac..3e730306 100644 --- a/infra/README.md +++ b/infra/README.md @@ -74,7 +74,8 @@ To set up this project for the first time (i.e., it has never been deployed to t 3. [Set up infrastructure developer tools](/docs/infra/set-up-infrastructure-tools.md) 4. [Set up AWS account](/docs/infra/set-up-aws-account.md) 5. [Set up the virtual network (VPC)](/docs/infra/set-up-network.md) -6. For each application: +6. Optionally [set up system notifications for CI/CD workflows](/docs/infra/system-notifications.md) +7. For each application: 1. [Set up application build repository](/docs/infra/set-up-app-build-repository.md) 2. [Set up application database](/docs/infra/set-up-database.md) 3. [Set up application environment](/docs/infra/set-up-app-env.md) diff --git a/infra/project-config/system-notifications.tf b/infra/project-config/system-notifications.tf index f1b57613..5d82121a 100644 --- a/infra/project-config/system-notifications.tf +++ b/infra/project-config/system-notifications.tf @@ -1,16 +1,41 @@ locals { - topics = { - "workflows" = { - } - } + # Configuration for system notifications + # used by CI/CD workflows to send notifications for deployments, + # failed workflows, etc. system_notifications_config = { + + # The `channels` map defines notification channels. Each key represents a + # notification channel, and each value is the channel's configuration. + # + # Each channel configuration includes the following attributes: + # - type: The type of notification channel (e.g., "slack" or "teams"). + # Currently, only "slack" is supported. + # + # If the `type` attribute is missing or null, notifications sent to that + # channel will be ignored (no-op). + # + # For channels with `type` set to "slack", the configuration must also + # include the following attributes: + # - channel_id_secret_name: The name of the secret in GitHub that contains + # the Slack channel ID. + # - slack_token_secret_name: The name of the secret in GitHub that contains + # the Slack bot token. + # + # Example: + # channels = { + # alerts = { + # type = "slack" + # channel_id_secret_name = "SYSTEM_NOTIFICATIONS_SLACK_CHANNEL_ID" + # slack_token_secret_name = "SYSTEM_NOTIFICATIONS_SLACK_BOT_TOKEN" + # } + # } channels = { workflow-failures = { - "type" = "slack" # or "teams" - # Name of the secret in GitHub - "channel_id_secret_name" = "SYSTEM_NOTIFICATIONS_SLACK_CHANNEL_ID" - "slack_token_secret_name" = "SYSTEM_NOTIFICATIONS_SLACK_BOT_TOKEN" + # Uncomment if you want to send workflow failure notifications to Slack + # "type" = "slack" + # "channel_id_secret_name" = "SYSTEM_NOTIFICATIONS_SLACK_CHANNEL_ID" + # "slack_token_secret_name" = "SYSTEM_NOTIFICATIONS_SLACK_BOT_TOKEN" } } }