.github/workflows/gpu-bench.yml

# Run regression check only when attempting to merge, shown as skipped status check beforehand
name: GPU benchmark regression test

on:
  pull_request:
    types: [opened, synchronize, reopened, ready_for_review]
    branches: [dev]
  merge_group:

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

jobs:
  # Run comparative benchmark against dev, open issue on regression
  gpu-benchmark:
    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
    name: Run benchmarks on GPU
    runs-on: [self-hosted, gpu-bench]
    steps:
      - uses: actions/checkout@v4
        with:
          repository: lurk-lab/ci-workflows
      - uses: ./.github/actions/gpu-setup
        with:
          gpu-framework: 'cuda'
      - uses: ./.github/actions/ci-env
      - uses: actions/checkout@v4
      # Install dependencies
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - uses: taiki-e/install-action@v2
        with:
          tool: just@1.22
      - name: Install criterion
        run: |
          cargo install cargo-criterion
          cargo install criterion-table
      - name: Set bench output format and base SHA
        run: |
          echo "ARECIBO_BENCH_OUTPUT=commit-comment" | tee -a $GITHUB_ENV
          echo "ARECIBO_BENCH_NUM_CONS=16384,1038732" | tee -a $GITHUB_ENV
          echo "BASE_COMMIT=${{ github.event.merge_group.base_sha }}" | tee -a $GITHUB_ENV
          GPU_NAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits | tail -n1)
          echo "GPU_ID=$(echo $GPU_NAME | awk '{ print $NF }')" | tee -a $GITHUB_ENV
          echo "GPU_NAME=$GPU_NAME" | tee -a $GITHUB_ENV
      # Checkout base branch for comparative bench
      - uses: actions/checkout@v4
        with:
          ref: dev
          path: dev
      # Copy the script so the base can bench with the same parameters
      - name: Run GPU bench on base branch
        run: |
          # Copy justfile to dev, overwriting existing config with that of PR branch
          cp ../benches/justfile .
          # Run benchmark
          just gpu-bench-ci supernova-ci
          # Copy bench output to PR branch
          cp supernova-ci-${{ env.BASE_COMMIT }}.json ..
        working-directory: ${{ github.workspace }}/dev
      - name: Run GPU bench on PR branch
        run: |
          just gpu-bench-ci supernova-ci
          cp supernova-ci-${{ github.sha }}.json ..
        working-directory: ${{ github.workspace }}/benches
      - name: copy the benchmark template and prepare it with data
        run: |
          cp .github/tables.toml .
          # Get CPU model
          CPU_MODEL=$(grep '^model name' /proc/cpuinfo | head -1 | awk -F ': ' '{ print $2 }')
          # Get vCPU count
          NUM_VCPUS=$(nproc --all)
          # Get total RAM in GB
          TOTAL_RAM=$(grep MemTotal /proc/meminfo | awk '{$2=$2/(1024^2); print int($2), "GB RAM";}')
          WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
          
          # Use conditionals to ensure that only non-empty variables are inserted
          [[ ! -z "${{ env.GPU_NAME }}" ]] && sed -i "/^\"\"\"$/i ${{ env.GPU_NAME }}" tables.toml
          [[ ! -z "$CPU_MODEL" ]] && sed -i "/^\"\"\"$/i $CPU_MODEL" tables.toml
          [[ ! -z "$NUM_VCPUS" ]] && sed -i "/^\"\"\"$/i $NUM_VCPUS vCPUs" tables.toml
          [[ ! -z "$TOTAL_RAM" ]] && sed -i "/^\"\"\"$/i $TOTAL_RAM" tables.toml          
          sed -i "/^\"\"\"$/i Workflow run: $WORKFLOW_URL" tables.toml
          echo "WORKFLOW_URL=$WORKFLOW_URL" | tee -a $GITHUB_ENV
        working-directory: ${{ github.workspace }}
      # Create a `criterion-table` and write in commit comment
      - name: Run `criterion-table`
        run: |
          cat supernova-ci-${{ env.BASE_COMMIT }}.json | criterion-table > BENCHMARKS.md
      - name: Write bench on commit comment
        uses: peter-evans/commit-comment@v3
        with:
          body-path: BENCHMARKS.md
      # Check for a slowdown >= `$ARECIBO_BENCH_NOISE_THRESHOLD` (fallback is 30%/1.3x). If so, open an issue but don't block merge
      # Since we are parsing for slowdowns, we simply add 1 to the noise threshold decimal to get the regression factor
      - name: Check for perf regression
        id: regression-check
        run: |
          REGRESSIONS=$(grep -o '[0-9.]*x slower' BENCHMARKS.md | cut -d 'x' -f1)
          echo $REGRESSIONS

          if [ ! -z "${{ env.ARECIBO_BENCH_NOISE_THRESHOLD}}" ]; then
            REGRESSION_FACTOR=$(echo "${{ env.ARECIBO_BENCH_NOISE_THRESHOLD }}+1" | bc)
          else
            REGRESSION_FACTOR=1.3
          fi

          for r in $REGRESSIONS
          do
            if (( $(echo "$r >= $REGRESSION_FACTOR" | bc -l) ))
            then
              exit 1
            fi
          done

          echo "NOISE_THRESHOLD=$("(REGRESSION_FACTOR-1)*100" | bc) | tee -a $GITHUB_ENV
        continue-on-error: true
      # Not possible to use ${{ github.event.number }} with the `merge_group` trigger
      - name: Get PR number from merge branch
        run: |
          echo "PR_NUMBER=$(echo ${{ github.event.merge_group.head_ref }} | sed -e 's/.*pr-\(.*\)-.*/\1/')" | tee -a $GITHUB_ENV
      - uses: JasonEtco/create-an-issue@v2
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          PR_NUMBER: ${{ env.PR_NUMBER }}
          GIT_SHA: ${{ github.sha }}
          WORKFLOW_URL: ${{ env.WORKFLOW_URL }}
          NOISE_THRESHOLD: ${{ env.NOISE_THRESHOLD }}
        with:
          filename: .github/PERF_REGRESSION.md