From c10048a08044fc0d5052146ff198c55f917b547d Mon Sep 17 00:00:00 2001 From: Vivek Miglani Date: Mon, 21 Oct 2024 09:22:54 -0700 Subject: [PATCH] Add auto retries for Captum OSS GitHub Actions (#1408) Summary: We frequently see sporadic failures in Captum GitHub actions test workflows, often related to package download, http errors, conda environment setup, etc. We add auto-retries to automatically retry failed workflows rather than needing to do this manually. Differential Revision: D64693773 --- .github/workflows/retry.yml | 19 +++++++++++++++++++ .github/workflows/test-conda-cpu.yml | 15 +++++++++++++++ .github/workflows/test-pip-cpu-with-mypy.yml | 15 +++++++++++++++ .github/workflows/test-pip-cpu.yml | 15 +++++++++++++++ .github/workflows/test-pip-gpu.yml | 15 +++++++++++++++ 5 files changed, 79 insertions(+) create mode 100644 .github/workflows/retry.yml diff --git a/.github/workflows/retry.yml b/.github/workflows/retry.yml new file mode 100644 index 0000000000..8acb101f9f --- /dev/null +++ b/.github/workflows/retry.yml @@ -0,0 +1,19 @@ +name: Retry Test +on: + workflow_dispatch: + inputs: + run_id: + required: true +jobs: + rerun-on-failure: + permissions: write-all + runs-on: ubuntu-latest + steps: + - name: rerun ${{ inputs.run_id }} + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh run watch ${{ inputs.run_id }} > /dev/null 2>&1 + gh run rerun ${{ inputs.run_id }} --failed diff --git a/.github/workflows/test-conda-cpu.yml b/.github/workflows/test-conda-cpu.yml index 3295edcca8..ab749a1bc9 100644 --- a/.github/workflows/test-conda-cpu.yml +++ b/.github/workflows/test-conda-cpu.yml @@ -32,3 +32,18 @@ jobs: # Run Tests python3 -m pytest -ra --cov=. --cov-report term-missing + + auto-retry: + name: Auto retry on failure + needs: tests + if: failure() && fromJSON(github.run_attempt) < 2 + runs-on: ubuntu-latest + steps: + - name: Start rerun workflow + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh workflow run retry.yml \ + -F run_id=${{ github.run_id }} diff --git a/.github/workflows/test-pip-cpu-with-mypy.yml b/.github/workflows/test-pip-cpu-with-mypy.yml index 7e166261e4..1d4921d40d 100644 --- a/.github/workflows/test-pip-cpu-with-mypy.yml +++ b/.github/workflows/test-pip-cpu-with-mypy.yml @@ -25,3 +25,18 @@ jobs: ./scripts/run_mypy.sh # Run Tests python3 -m pytest -ra --cov=. --cov-report term-missing + + auto-retry: + name: Auto retry on failure + needs: tests + if: failure() && fromJSON(github.run_attempt) < 2 + runs-on: ubuntu-latest + steps: + - name: Start rerun workflow + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh workflow run retry.yml \ + -F run_id=${{ github.run_id }} diff --git a/.github/workflows/test-pip-cpu.yml b/.github/workflows/test-pip-cpu.yml index a83f18e05d..849f6ae196 100644 --- a/.github/workflows/test-pip-cpu.yml +++ b/.github/workflows/test-pip-cpu.yml @@ -35,3 +35,18 @@ jobs: ./scripts/install_via_pip.sh ${{ matrix.pytorch_args }} ${{ matrix.transformers_args }} # Run Tests python3 -m pytest -ra --cov=. --cov-report term-missing + + auto-retry: + name: Auto retry on failure + needs: tests + if: failure() && fromJSON(github.run_attempt) < 2 + runs-on: ubuntu-latest + steps: + - name: Start rerun workflow + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh workflow run retry.yml \ + -F run_id=${{ github.run_id }} diff --git a/.github/workflows/test-pip-gpu.yml b/.github/workflows/test-pip-gpu.yml index 117f515f48..3b0ff90cf0 100644 --- a/.github/workflows/test-pip-gpu.yml +++ b/.github/workflows/test-pip-gpu.yml @@ -30,3 +30,18 @@ jobs: # Run Tests python3 -m pytest -ra --cov=. --cov-report term-missing + + auto-retry: + name: Auto retry on failure + needs: tests + if: failure() && fromJSON(github.run_attempt) < 2 + runs-on: ubuntu-latest + steps: + - name: Start rerun workflow + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh workflow run retry.yml \ + -F run_id=${{ github.run_id }}