From a9e3b70f830caed64bbd14672c3f6f0a05d17afa Mon Sep 17 00:00:00 2001
From: Curtis Vogt <curtis.vogt@gmail.com>
Date: Mon, 6 Jan 2025 11:46:01 -0600
Subject: [PATCH] Create GHA extract shell scripts action (#1)

---
 .editorconfig                            |  12 +++
 .github/workflows/integration-tests.yaml |  50 ++++++++++
 .github/workflows/shell.yaml             |  37 +++++++
 .github/workflows/unit-tests.yaml        |  27 ++++++
 .github/workflows/yaml.yaml              |  18 ++++
 .gitignore                               |   1 +
 .yamllint.yaml                           |   8 ++
 README.md                                |  67 ++++++++++++-
 action.yaml                              |  32 +++++++
 gha_extract_shell_scripts.py             | 117 +++++++++++++++++++++++
 requirements.txt                         |   1 +
 test/github-actions-demo.yaml            |  20 ++++
 test/nested-env.yaml                     |  18 ++++
 test/test_reference.py                   |  79 +++++++++++++++
 14 files changed, 485 insertions(+), 2 deletions(-)
 create mode 100644 .editorconfig
 create mode 100644 .github/workflows/integration-tests.yaml
 create mode 100644 .github/workflows/shell.yaml
 create mode 100644 .github/workflows/unit-tests.yaml
 create mode 100644 .github/workflows/yaml.yaml
 create mode 100644 .gitignore
 create mode 100644 .yamllint.yaml
 create mode 100644 action.yaml
 create mode 100755 gha_extract_shell_scripts.py
 create mode 100644 requirements.txt
 create mode 100644 test/github-actions-demo.yaml
 create mode 100644 test/nested-env.yaml
 create mode 100644 test/test_reference.py

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..28491b7
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,12 @@
+# https://editorconfig.org/
+
+# https://manpages.debian.org/testing/shfmt/shfmt.1.en.html#EXAMPLES
+[*.sh]
+indent_style = space
+indent_size = 4
+shell_variant      = bash  # --language-variant
+binary_next_line   = false
+switch_case_indent = true  # --case-indent
+space_redirects    = false
+keep_padding       = false
+function_next_line = false # --func-next-line
diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml
new file mode 100644
index 0000000..6458c08
--- /dev/null
+++ b/.github/workflows/integration-tests.yaml
@@ -0,0 +1,50 @@
+---
+name: Integration Tests
+on:
+  pull_request:
+    paths:
+      - "action.yaml"
+      - "gha_extract_shell_scripts.py"
+      - ".github/workflows/integration-tests.yaml"
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run action
+        id: self
+        uses: ./
+      - name: Target step
+        run: |
+          echo "${{ env.greeting }}, $name"
+        env:
+          greeting: Hello
+          name: Integration Tests
+      - name: Test extracted
+        run: |
+          if [[ -f "$output_file" ]]; then
+              echo "Output:"
+              cat -n "$output_file"
+              echo "Expected:"
+              cat -n <<<"$expected"
+          else
+              find "${output_dir:?}"
+              exit 1
+          fi
+          diff --color=always "${output_file:?}" <(echo "${expected:?}")
+        env:
+          output_dir: ${{ steps.self.outputs.output-dir }}
+          output_file: ${{ steps.self.outputs.output-dir }}/integration-tests.yaml/job=Test/step=Target_step.sh
+          expected: |-
+            #!/usr/bin/env bash
+            set -e
+            # shellcheck disable=SC2016,SC2034
+            greeting='Hello'
+            # shellcheck disable=SC2016,SC2034
+            name='Integration Tests'
+            # ---
+            echo ":env.greeting:, $name"
diff --git a/.github/workflows/shell.yaml b/.github/workflows/shell.yaml
new file mode 100644
index 0000000..e3721af
--- /dev/null
+++ b/.github/workflows/shell.yaml
@@ -0,0 +1,37 @@
+---
+name: Shell
+on:
+  pull_request:
+    paths:
+      - "**.sh"
+      - ".github/workflows/*"
+      - "action.yaml"
+      - "gha_extract_shell_scripts.py"
+
+jobs:
+  lint-format:
+    name: Lint & Format
+    needs: workflow-scripts
+    # These permissions are needed to:
+    # - Checkout the Git repo (`contents: read`)
+    # - Post a comments on PRs: https://github.com/luizm/action-sh-checker#secrets
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Extract workflow shell scripts
+        id: extract
+        uses: ./
+      - uses: luizm/action-sh-checker@c6edb3de93e904488b413636d96c6a56e3ad671a  # v0.8.0
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        with:
+          sh_checker_comment: true
+      # Support investigating linting/formatting errors
+      - uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: workflow-scripts
+          path: ${{ steps.extract.outputs.output-dir }}
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
new file mode 100644
index 0000000..3291282
--- /dev/null
+++ b/.github/workflows/unit-tests.yaml
@@ -0,0 +1,27 @@
+---
+name: Unit Tests
+on:
+  pull_request:
+    paths:
+      - "**/*.py"
+      - ".github/workflows/unit-tests.yaml"
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+      - name: Test with unittest
+        run: |
+          python test/test_reference.py
diff --git a/.github/workflows/yaml.yaml b/.github/workflows/yaml.yaml
new file mode 100644
index 0000000..91d390c
--- /dev/null
+++ b/.github/workflows/yaml.yaml
@@ -0,0 +1,18 @@
+---
+# https://yamllint.readthedocs.io/en/stable/integration.html#integration-with-github-actions
+name: YAML
+on:
+  pull_request:
+    paths:
+      - "**/*.yaml"
+      - "**/*.yml"
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install yamllint
+        run: pip install yamllint
+      - name: Lint YAML files
+        run: yamllint . --format=github
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bee8a64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/.yamllint.yaml b/.yamllint.yaml
new file mode 100644
index 0000000..da01502
--- /dev/null
+++ b/.yamllint.yaml
@@ -0,0 +1,8 @@
+---
+rules:
+  indentation:
+    spaces: 2
+    indent-sequences: true
+  document-start:
+    present: true
+  new-line-at-end-of-file: enable
diff --git a/README.md b/README.md
index 8ef4fa1..5822df4 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,65 @@
-# inline-workflow-shell-scripts
-Extracts inline shell scripts within GitHub Action workflows
+# GHA Extract Shell Scripts
+
+Processes the GitHub Action workflows contained within `.github/workflows` and extracts all steps which contain an embedded shell script for the purpose of running linting and formatting. Each workflow step containing a shell script will be written out to a file to make it easy to use existing tooling such as `shellcheck` and `shfmt`.
+
+## Example
+
+```yaml
+---
+name: Shell
+on:
+  pull_request:
+    paths:
+      - "**.sh"
+      - ".github/workflows/*"
+
+jobs:
+  lint-format:
+    name: Lint & Format
+    # These permissions are needed to:
+    # - Checkout the Git repo (`contents: read`)
+    # - Post a comments on PRs: https://github.com/luizm/action-sh-checker#secrets
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Extract workflow shell scripts
+        id: extract
+        uses: beacon-biosignals/gha-extract-shell-scripts@v1
+      - uses: luizm/action-sh-checker@c6edb3de93e904488b413636d96c6a56e3ad671a  # v0.8.0
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        with:
+          sh_checker_comment: true
+      # Support investigating linting/formatting errors
+      - uses: actions/upload-artifact@v4
+        if: ${{ failure() }}
+        with:
+          name: workflow-scripts
+          path: ${{ steps.extract.outputs.output-dir }}
+```
+
+## Inputs
+
+The `gha-extract-shell-scripts` action supports the following inputs:
+
+| Name                 | Description | Required | Example |
+|:---------------------|:------------|:---------|:--------|
+| `output-dir`         | Allows the user to specify the name of the directory containing the extracted workflow shell script steps. Defaults to `workflow_scripts`. | No | `workflow_scripts` |
+| `shellcheck-disable` | Ignore all the specified errors within the extracted shell scripts. | No | `SC2016,SC2050` |
+
+## Outputs
+
+| Name         | Description | Example |
+|:-------------|:------------|:--------|
+| `output-dir` | The name of the directory containing the various extracted workflow shell script steps. | `workflow_scripts` |
+
+## Permissions
+
+The following [job permissions](https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs) are required to run this action:
+
+```yaml
+permissions: {}
+```
diff --git a/action.yaml b/action.yaml
new file mode 100644
index 0000000..b813ef3
--- /dev/null
+++ b/action.yaml
@@ -0,0 +1,32 @@
+---
+inputs:
+  output-dir:
+    default: "workflow_scripts"
+  shellcheck-disable:
+    default: ""
+outputs:
+  output-dir:
+    value: ${{ inputs.output-dir }}
+runs:
+  using: composite
+  steps:
+    - name: Install dependencies
+      shell: bash
+      run: |
+        venv="$(mktemp -d venv.XXXXXX)"
+        python -m venv "$venv"
+        source "$venv/bin/activate"
+        python -m pip install -r "${GITHUB_ACTION_PATH}/requirements.txt"
+    - name: Extract shell scripts
+      shell: bash
+      run: |
+        args=()
+        if [[ -n "$disable" ]]; then
+            args+=(--disable "$disable")
+        fi
+        args+=("$input_dir" "$output_dir")
+        python "${GITHUB_ACTION_PATH}/gha_extract_shell_scripts.py" "${args[@]}"
+      env:
+        disable: ${{ inputs.shellcheck-disable }}
+        input_dir: .github/workflows
+        output_dir: ${{ inputs.output-dir }}
diff --git a/gha_extract_shell_scripts.py b/gha_extract_shell_scripts.py
new file mode 100755
index 0000000..3f91acf
--- /dev/null
+++ b/gha_extract_shell_scripts.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+# Reads shell scripts from `run` steps in GitHub Actions workflows and outputs
+# them as files so that tools like `shfmt` or ShellCheck can operate on them.
+#
+# Arguments:
+# - Path to output directory where shell scripts will be written.
+
+import os
+import re
+import sys
+
+import argparse
+from pathlib import Path
+
+import yaml
+
+
+def list_str(values):
+    return values.split(',')
+
+
+def sanitize(path):
+    # Needed filename replacements to satisfy both GHA artifacts and shellcheck.
+    replacements = {
+        " ": "_",
+        "/": "-",
+        '"': "",
+        "(": "",
+        ")": "",
+        "&": "",
+        "$": "",
+    }
+    return path.translate(str.maketrans(replacements))
+
+
+# Replace any GHA placeholders, e.g. ${{ matrix.version }}.
+def sanitize_gha_expression(string):
+    return re.sub(r"\${{\s*(.*?)\s*}}", r":\1:", string)
+
+
+def process_workflow_file(workflow_path: Path, output_dir: Path, ignored_errors=[]):
+    with workflow_path.open() as f:
+        workflow = yaml.safe_load(f)
+    workflow_file = workflow_path.name
+    # GHA allows workflow names to be defined as empty (e.g. `name:`)
+    workflow_name = sanitize(workflow.get("name") or workflow_path.stem)
+    workflow_default_shell = workflow.get("defaults", {}).get("run", {}).get("shell")
+    workflow_env = workflow.get("env", {})
+    count = 0
+    print(f"Processing {workflow_path} ({workflow_name})")
+    for job_key, job in workflow.get("jobs", {}).items():
+        # GHA allows job names to be defined as empty (e.g. `name:`)
+        job_name = sanitize(job.get("name") or job_key)
+        job_default_shell = (
+            job.get("defaults", {}).get("run", {}).get("shell", workflow_default_shell)
+        )
+        job_env = workflow_env | job.get("env", {})
+        for i, step in enumerate(job.get("steps", [])):
+            run = step.get("run")
+            if not run:
+                continue
+            run = sanitize_gha_expression(run)
+            shell = step.get("shell", job_default_shell)
+            if shell and shell not in ["bash", "sh"]:
+                print(f"Skipping command with unknown shell '{shell}'")
+                continue
+            env = job_env | step.get("env", {})
+            # GHA allows step names to be defined as empty (e.g. `name:`)
+            step_name = sanitize(step.get("name") or str(i + 1))
+            script_path = (
+                output_dir / workflow_file / f"job={job_name}" / f"step={step_name}.sh"
+            )
+            script_path.parent.mkdir(parents=True, exist_ok=True)
+            with script_path.open("w") as f:
+                # Default shell is bash.
+                f.write(f"#!/usr/bin/env {shell or 'bash'}\n")
+                # Ignore failure with GitHub expression variables such as:
+                # - SC2050: `[[ "${{ github.ref }}" == "refs/heads/main" ]]`
+                if ignored_errors:
+                    f.write(f"# shellcheck disable={','.join(ignored_errors)}\n")
+                    # Add a no-op command to ensure that additional shellcheck
+                    # disable directives aren't applied globally
+                    # https://github.com/koalaman/shellcheck/issues/657#issuecomment-213038218
+                    f.write("true\n")
+                # Whether or not it was explicitly set determines the arguments.
+                # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell
+                if not shell or shell == "sh":
+                    f.write("set -e\n")
+                elif shell == "bash":
+                    f.write("set -eo pipefail\n")
+                for k, v in env.items():
+                    f.write("# shellcheck disable=SC2016,SC2034\n")
+                    v = sanitize_gha_expression(str(v)).replace("'", "'\\''")
+                    f.write(f"{k}='{v}'\n")
+                f.write("# ---\n")
+                f.write(run)
+                if not run.endswith("\n"):
+                    f.write("\n")
+            count += 1
+    print(f"Produced {count} files")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input_dir", type=Path)
+    parser.add_argument("output_dir", type=Path)
+    parser.add_argument("--disable", type=list_str)
+    args = parser.parse_args()
+
+    print(f"Outputting scripts to {args.output_dir}")
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+    for file in os.listdir(args.input_dir):
+        if file.endswith(".yaml") or file.endswith(".yml"):
+            process_workflow_file(
+                args.input_dir / file, args.output_dir, args.disable
+            )
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8392d54
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+PyYAML==6.0.2
diff --git a/test/github-actions-demo.yaml b/test/github-actions-demo.yaml
new file mode 100644
index 0000000..ff07ef7
--- /dev/null
+++ b/test/github-actions-demo.yaml
@@ -0,0 +1,20 @@
+# https://docs.github.com/en/actions/writing-workflows/quickstart
+---
+name: GitHub Actions Demo
+run-name: ${{ github.actor }} is testing out GitHub Actions 🚀
+on: [push]
+jobs:
+  Explore-GitHub-Actions:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
+      - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
+      - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
+      - name: Check out repository code
+        uses: actions/checkout@v4
+      - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
+      - run: echo "🖥️ The workflow is now ready to test your code on the runner."
+      - name: List files in the repository
+        run: |
+          ls ${{ github.workspace }}
+      - run: echo "🍏 This job's status is ${{ job.status }}."
diff --git a/test/nested-env.yaml b/test/nested-env.yaml
new file mode 100644
index 0000000..4164f41
--- /dev/null
+++ b/test/nested-env.yaml
@@ -0,0 +1,18 @@
+---
+name: Nested Env
+on: [push]
+env:
+  foo: "1"
+  bar: "1"
+  baz: "1"
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    env:
+      bar: "2"
+      baz: "2"
+    steps:
+      - name: Print variables
+        run: echo -e "foo=$foo\nbar=$bar\nbaz=$baz"
+        env:
+          baz: "3"
diff --git a/test/test_reference.py b/test/test_reference.py
new file mode 100644
index 0000000..e3083e3
--- /dev/null
+++ b/test/test_reference.py
@@ -0,0 +1,79 @@
+import inspect
+from pathlib import Path
+import os
+import shutil
+import sys
+import textwrap
+import unittest
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.dirname(TEST_DIR))
+
+from gha_extract_shell_scripts import process_workflow_file
+
+def clean(string):
+    string = textwrap.dedent(string).rstrip()
+    return [l + "\n" for l in string.split("\n")]
+
+
+class TestReferenceWorkflows(unittest.TestCase):
+    def setUp(self):
+        self.maxDiff = None
+
+    def tearDown(self):
+        shutil.rmtree("workflow_scripts")
+
+    def test_github_actions_demo(self):
+        output_dir = Path("workflow_scripts")
+        process_workflow_file(Path(f"{TEST_DIR}/github-actions-demo.yaml"), output_dir)
+        self.assertTrue(os.path.isdir(f"{output_dir}"))
+        self.assertTrue(os.path.isdir(f"{output_dir}/github-actions-demo.yaml"))
+        self.assertTrue(os.path.isdir(f"{output_dir}/github-actions-demo.yaml/job=Explore-GitHub-Actions"))
+
+        self.assertTrue(os.path.isfile(f"{output_dir}/github-actions-demo.yaml/job=Explore-GitHub-Actions/step=1.sh"))
+        with open(f"{output_dir}/github-actions-demo.yaml/job=Explore-GitHub-Actions/step=1.sh") as f:
+            expected = """\
+                #!/usr/bin/env bash
+                set -e
+                # ---
+                echo "🎉 The job was automatically triggered by a :github.event_name: event."
+                """
+            self.assertListEqual(list(f), clean(expected))
+
+        self.assertFalse(os.path.isfile(f"{output_dir}/github-actions-demo.yaml/job=Explore-GitHub-Actions/step=4.sh"))
+        self.assertTrue(os.path.isfile(f"{output_dir}/github-actions-demo.yaml/job=Explore-GitHub-Actions/step=List_files_in_the_repository.sh"))
+        with open(f"{output_dir}/github-actions-demo.yaml/job=Explore-GitHub-Actions/step=List_files_in_the_repository.sh") as f:
+            expected = """\
+                #!/usr/bin/env bash
+                set -e
+                # ---
+                ls :github.workspace:
+                """
+            self.assertListEqual(list(f), clean(expected))
+
+    def test_nested_env(self):
+        output_dir = Path("workflow_scripts")
+        process_workflow_file(Path(f"{TEST_DIR}/nested-env.yaml"), output_dir)
+        self.assertTrue(os.path.isdir(f"{output_dir}"))
+        self.assertTrue(os.path.isdir(f"{output_dir}/nested-env.yaml"))
+        self.assertTrue(os.path.isdir(f"{output_dir}/nested-env.yaml/job=test"))
+
+        self.assertTrue(os.path.isfile(f"{output_dir}/nested-env.yaml/job=test/step=Print_variables.sh"))
+        with open(f"{output_dir}/nested-env.yaml/job=test/step=Print_variables.sh") as f:
+            expected = """\
+                #!/usr/bin/env bash
+                set -e
+                # shellcheck disable=SC2016,SC2034
+                foo='1'
+                # shellcheck disable=SC2016,SC2034
+                bar='2'
+                # shellcheck disable=SC2016,SC2034
+                baz='3'
+                # ---
+                echo -e "foo=$foo\\nbar=$bar\\nbaz=$baz"
+                """
+            self.assertListEqual(list(f), clean(expected))
+
+
+if __name__ == '__main__':
+    unittest.main()