Extract Full Model Execution Tests into Nightly Tests (#379)

### Ticket #378 ### Problem description Full model execution tests are only attempted in push/pr triggered workflows, but not in nightlies. This complicates queries and provides no single source of truth on actual full model execution status from tt-torch main. ### What's changed Duplicate the workflow in run-tests.yml into a self contained job that can be run in nightly tests. Execution tests are refactored to be run in parallel. ### Checklist - [x] New/Existing tests provide coverage for changes
tenstorrent · Feb 28, 2025 · 5fbaf8d · 5fbaf8d
1 parent 186ad28
commit 5fbaf8d
Show file tree

Hide file tree

Showing 2 changed files with 239 additions and 0 deletions.
diff --git a/.github/workflows/nightly-tests.yml b/.github/workflows/nightly-tests.yml
@@ -27,6 +27,10 @@ jobs:
     needs: build
     uses: ./.github/workflows/run-e2e-tests.yml
     secrets: inherit
+  test_full_model:
+    needs: build
+    uses: ./.github/workflows/run-full-model-execution-tests.yml
+    secrets: inherit
   download-report:
     if: success() || failure()
     needs: test_op_by_op

diff --git a/.github/workflows/run-full-model-execution-tests.yml b/.github/workflows/run-full-model-execution-tests.yml
@@ -0,0 +1,235 @@
+name: Run Full Model Execution Tests
+
+on:
+  workflow_dispatch:
+  workflow_call:
+  workflow_run:
+    workflows: [Build] # backref to run-build as dependency
+    types: [completed]
+
+jobs:
+  tests:
+    timeout-minutes: 90 # current onPR time runs ~60m
+    strategy:
+      fail-fast: false
+      matrix:
+        build: [
+          {
+            runs-on: wormhole_b0, name: "autoencoder", tests: "
+                  tests/models/autoencoder_linear/test_autoencoder_linear.py::test_autoencoder_linear[full-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "distilbert", tests: "
+                  tests/models/distilbert/test_distilbert.py::test_distilbert[full-distilbert-base-uncased-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "mlpmixer", tests: "
+                  tests/models/mlpmixer/test_mlpmixer.py::test_mlpmixer[full-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "mnist", tests: "
+                  tests/models/mnist/test_mnist.py::test_mnist_train[full-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "mobilenet", tests: "
+                  tests/models/MobileNetV2/test_MobileNetV2.py::test_MobileNetV2[full-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "openpose", tests: "
+                  tests/models/openpose/test_openpose_v2.py::test_openpose_v2[full-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "perceiver_io", tests: "
+                  tests/models/perceiver_io/test_perceiver_io.py::test_perceiver_io[full-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "resnet", tests: "
+                  tests/models/resnet/test_resnet.py::test_resnet[full-eval]
+                  tests/models/resnet50/test_resnet50.py::test_resnet[full-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "yolov3", tests: "
+                  tests/models/yolov3/test_yolov3.py::test_yolov3[full-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "albert_masked_lm", tests: "
+                  tests/models/albert/test_albert_masked_lm.py::test_albert_masked_lm[full-albert/albert-xxlarge-v2-eval]
+              "
+          },
+          {
+            runs-on: wormhole_b0, name: "llama_3b", tests: "
+                  tests/models/llama/test_llama_3b.py::test_llama_3b[full-meta-llama/Llama-3.2-3B-eval]
+              "
+          },
+          # {
+          #   runs-on: wormhole_b0, name: "torchvision_image_classification", tests: "
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-mobilenet_v2]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-mobilenet_v3_small]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-mobilenet_v3_large]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-resnet18]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-resnet34]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-resnet50]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-resnet101]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-resnet152]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-resnext50_32x4d]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-resnext101_32x8d]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-resnext101_64x4d]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-wide_resnet50_2]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-wide_resnet101_2]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_y_400mf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_y_800mf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_y_1_6gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_y_3_2gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_y_8gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_y_16gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_y_32gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_x_400mf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_x_800mf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_x_1_6gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_x_3_2gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_x_8gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_x_16gf]
+          #         tests/models/torchvision/test_torchvision_image_classification.py::test_torchvision_image_classification[full-eval-regnet_x_32gf]
+          #  "
+          # },
+
+        ]
+    runs-on:
+      - ${{ matrix.build.runs-on }}
+
+    name: "test execution (${{ matrix.build.runs-on }}, ${{ matrix.build.name }})"
+
+    container:
+      image: ghcr.io/tenstorrent/tt-torch/tt-torch-ci-ubuntu-22-04:latest
+      options: --user root --device /dev/tenstorrent/0 --shm-size=4gb
+      volumes:
+        - /dev/hugepages:/dev/hugepages
+        - /dev/hugepages-1G:/dev/hugepages-1G
+        - /etc/udev/rules.d:/etc/udev/rules.d
+        - /lib/modules:/lib/modules
+        - /opt/tt_metal_infra/provisioning/provisioning_env:/opt/tt_metal_infra/provisioning/provisioning_env
+        - /mnt/dockercache:/mnt/dockercache
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: recursive
+        lfs: true
+
+    - name: Fetch job id
+      id: fetch-job-id
+      uses: tenstorrent/tt-github-actions/.github/actions/job_id@main
+      with:
+        job_name: "test execution (${{ matrix.build.runs-on }}, ${{ matrix.build.name }})" # reference above tests.name
+
+    - name: Set reusable strings
+      id: strings
+      shell: bash
+      env:
+        JOB_ID: ${{ steps.fetch-job-id.outputs.job_id }}
+
+      run: |
+        echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT"
+        echo "install-dir=$(pwd)/install" >> "$GITHUB_OUTPUT"
+        echo "dist-dir=$(pwd)/dist" >> "$GITHUB_OUTPUT"
+        echo "test_report_path_models=report_models_$JOB_ID.xml" >> "$GITHUB_OUTPUT"
+    - name: Use build artifacts
+      uses: actions/download-artifact@v4
+      with:
+        name: install-artifacts
+        path: ${{ steps.strings.outputs.install-dir }}
+
+    - name: 'Untar install directory'
+      shell: bash
+      working-directory: ${{ steps.strings.outputs.install-dir }}
+      run: |
+        tar xvf artifact.tar
+        mkdir -p ${{ steps.strings.outputs.dist-dir }}
+        mv wheels/* ${{ steps.strings.outputs.dist-dir }}
+
+    - name: install tt-torch
+      shell: bash
+      run: |
+        source env/activate
+        pip install ${{ steps.strings.outputs.dist-dir }}/*.whl
+
+    - name: Run Full Model Execution Tests
+      env:
+        HF_HOME: /mnt/dockercache/huggingface
+        TORCH_HOME: /mnt/dockercache/torch
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      shell: bash
+      run: |
+        # Make sure we don't stop on first failure
+        set +e
+
+        tests_list=$(echo "${{ matrix.build.tests }}" | xargs -n1 echo)
+        total_tests=$(echo "$tests_list" | wc -l)
+
+        failures=0
+        counter=0
+        source env/activate
+
+        for test_case in $tests_list; do
+          pytest -v "$test_case" \
+           --junit-xml=${{ steps.strings.outputs.test_report_path_models }} \
+           --cov=tt_torch --cov-report term --cov-report xml:coverage.xml --cov-append
+
+          exit_code=$?
+
+          if [ $exit_code -eq 0 ]; then
+            echo "[ $counter / $total_tests ] $test_case PASSED"
+          else
+            echo "[ $counter / $total_tests ] $test_case FAILED"
+            failures=$((failures + 1))
+          fi
+        done
+
+
+        # If any test failed, exit nonzero to mark the job as failed
+        if [ $failures -ne 0 ]; then
+          echo "Total failures: $failures"
+          exit 1
+        fi
+    - name: Upload Test Report Models
+      uses: actions/upload-artifact@v4
+      if: success() || failure()
+      with:
+        name: test-reports-models-${{ matrix.build.runs-on }}-${{ matrix.build.name }}-${{ steps.fetch-job-id.outputs.job_id }}
+        path: ${{ steps.strings.outputs.test_report_path_models }}
+
+    - name: Show Test Report
+      uses: mikepenz/action-junit-report@v5
+      if: success() || failure()
+      with:
+        report_paths: ${{ steps.strings.outputs.test_report_path_torch }}
+        check_name: TT-Torch Tests
+        comment: true
+        updateComment: false
+        detailed_summary: true
+        group_suite: true
+    - name: Upload coverage reports to Codecov
+      if: success() || failure()
+      continue-on-error: true
+      uses: codecov/codecov-action@v5
+      with:
+        files: coverage.info,.coverage,coverage.xml
+        # disable_search: true
+        token: ${{ secrets.CODECOV_TOKEN }}
+
+    - name: Upload test results to Codecov
+      if: success() || failure()
+      continue-on-error: true
+      uses: codecov/test-results-action@v1
+      with:
+        files: ${{ steps.strings.outputs.test_report_path_torch }}, ${{ steps.strings.outputs.test_report_path_models }}, ${{ steps.strings.outputs.test_report_path_onnx }}
+        disable_search: true
+        token: ${{ secrets.CODECOV_TOKEN }}