tenstorrent · jbedichekTT · Feb 4, 2025 · Feb 6, 2025 · Feb 6, 2025 · Feb 6, 2025
diff --git a/.github/workflows/batch-experiment.yaml b/.github/workflows/batch-experiment.yaml
@@ -1,17 +1,205 @@
-name: Maximum Batch Size Experiment
-
 on:
   workflow_dispatch:
-    inputs:
-      branch:
-        description: "Branch name"
-        required: true
-        type: string
 
-jobs:
-  say-hello:
-    runs-on: ubuntu-latest
+permissions:
+  actions: read
+  contents: write
+  pages: write
+  id-token: write
+  pull-requests: write
 
+jobs:
+  model-tests:
+    runs-on: ["in-service"]
+    strategy:
+      matrix:
+        group: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+    env:      
+      pytest_verbosity: 0
+      pytest_report_title: "⭐️ Model Tests"
+      PYTHONPATH: ${{ github.workspace }}
     steps:
-      - name: Print Hello
-        run: echo "hello"
+      - uses: actions/checkout@v4
+        with:
+          lfs: true
+      - uses: ./.github/actions/common_repo_setup
+
+      - name: Run Model Tests in Parallel
+        shell: bash
+        run: |
+          set +e
+          num_iterations=1
+          file_prefix="tests/"
+
+          # Assign tests dynamically based on matrix group
+          TOTAL_GROUPS=24
+          CURRENT_GROUP=${{ matrix.group }}
+
+          mapfile -t test_ids_verbose < <(pytest --collect-only -q -m converted_end_to_end --ignore=tests/models/autoencoder_conv tests/models/ | awk -F '::' '{print $1}' | sort -u)
+          test_ids=()  
+
+          # Array of tests to exclude
+          exclude_tests=(
+            "models/mnist/test_mnist.py"
+            "models/MobileNetV2/test_MobileNetV2.py"
+            "models/openpose/test_openpose_v2.py"
+            "models/resnet/test_resnet.py"
+            "models/resnet50/test_resnet50.py"
+            "models/roberta/test_roberta.py"
+            "models/unet/test_unet.py"
+            "models/hand_landmark/test_hand_landmark.py"
+            "models/squeeze_bert/test_squeeze_bert.py"
+            "models/llama/test_llama.py"
+            "models/timm/test_timm_image_classification.py"
+            "models/torchvision/test_torchvision_image_classification.py"
+            "models/unet/test_unet.py"
+            "models/albert/test_albert_question_answering.py"
+            "models/albert/test_alert_sequence_classification.py"
+            "models/albert/test_albert_token_classification.py"
+            "models/albert/test_albert_masked_lm.py"
+            "models/unet_carvana/test_unet_carvana.py"
+            "models/autoencoder_linear/test_autoencoder_linear.py"
+            "models/perceiver_io/test_perceiver_io.py"
+          )
+
+          # Preprocess file paths
+          for file in "${test_ids_verbose[@]}"; do
+            if [[ "$file" == models/* ]]; then  
+              # Check if the file is in the exclude_tests array
+              skip=false
+              for exclude_test in "${exclude_tests[@]}"; do
+                if [[ "$file" == "$exclude_test" ]]; then
+                  skip=true
+                  break
+                fi
+              done
+
+              if ! $skip; then
+                test_ids+=("$file")  
+              fi
+            fi
+          done
+
+          TOTAL_TESTS=${#test_ids[@]}
+          TESTS_PER_GROUP=1
+
+          START_INDEX=$(( (CURRENT_GROUP - 1) * TESTS_PER_GROUP ))
+          END_INDEX=$(( CURRENT_GROUP * TESTS_PER_GROUP ))
+
+          if (( END_INDEX > TOTAL_TESTS )); then
+            END_INDEX=$TOTAL_TESTS
+          fi
+
+          # Slice the test array for the current group
+          group_test_ids=("${test_ids[@]:START_INDEX:TESTS_PER_GROUP}")
+          echo "All tests ($TOTAL_TESTS): ${test_ids[@]}"
+          echo "Running tests in group $CURRENT_GROUP..."
+          echo "Tests assigned to this group: ${group_test_ids[@]}"
+
+          failed_batch_and_test_array=()
+          max_batch_sizes_array=()
+          counter=0
+
+          # Define function for finding max batch size when uninitialized
+          find_max_batch_size_uninitialized() {
+            local test_path=$1
+            batch_range_lower=$2
+            batch_range_upper=$3
+            local batch_range=($batch_range_lower $batch_range_upper)
+
+            not_found=1
+            local min_failed_batch=0
+            local max_successful_batch=0
+
+            while (( not_found )); do
+              if (( batch_size_to_test == batch_range_upper - 2 )); then
+                batch_range_upper=$(( batch_range_upper * 2 ))  
+                batch_range[1]=$batch_range_upper # Update the upper bound in the array
+                echo "Expanding upper bound to: $batch_range_upper" # Optional logging
+              fi
+              local batch_size_to_test=$(( (batch_range[0] + batch_range[1]) / 2 ))
+              if (( batch_size_to_test % 2 != 0)); then
+                batch_size_to_test=$batch_size_to_test-1
+              fi
+
+              echo "Testing with batch size $batch_size_to_test"
+
+              python3 -m pytest "$test_path" -s --batch_size $batch_size_to_test --report_nth_iteration $num_iterations
+              exit_code=$?
+
+              if (( exit_code != 0 )); then 
+                batch_range[1]=$batch_size_to_test
+                min_failed_batch=$batch_size_to_test
+              else 
+                batch_range[0]=$batch_size_to_test
+                max_successful_batch=$batch_size_to_test
+              fi 
+
+              if (( min_failed_batch - max_successful_batch == 2)); then
+                not_found=0
+              fi
+            done
+            echo "min failed batch: $min_failed_batch"
+            echo "Max batch size for $test_path: $max_successful_batch"
+            max_batch_sizes_array+=("$max_successful_batch")
+            failed_batch_and_test_array+=("$max_successful_batch $test_path")
+          }
+
+          # Define function for finding max batch size when initialized
+          find_max_batch_size_initialized() {
+            test_path=$1
+            batch_range_lower=$2
+            batch_range_upper=$3
+            batch_range=($batch_range_lower $batch_range_upper)
+
+            prior_batches_array=($(printf "%s\n" "${max_batch_sizes_array[@]}" | sort -n))
+            not_found=1
+            min_failed_batch=0
+            max_successful_batch=0
+            first_iter=1
+
+            while (( not_found )); do
+              if (( first_iter )); then
+                median_index=$(( ${#prior_batches_array[@]} / 2 ))
+                batch_size_to_test=${prior_batches_array[$median_index]}
+                first_iter=0
+              else
+                batch_size_to_test=$(( (batch_range[0] + batch_range[1]) / 2 ))
+              fi
+
+              echo "Testing with batch size $batch_size_to_test"
+              python3 -m pytest "$test_path" -s --batch_size $batch_size_to_test --report_nth_iteration $num_iterations
+              exit_code=$?
+
+              if (( exit_code != 0 )); then 
+                batch_range[1]=$batch_size_to_test
+                min_failed_batch=$batch_size_to_test
+              else 
+                batch_range[0]=$batch_size_to_test
+                max_successful_batch=$batch_size_to_test
+              fi 
+
+              if (( min_failed_batch - max_successful_batch == 1)); then
+                not_found=0
+              fi
+            done
+
+            echo "Max batch size $max_successful_batch found for $test_path"
+            max_batch_sizes_array+=("$max_successful_batch")
+            failed_batch_and_test_array+=("$max_successful_batch $test_path")
+          }
+
+          # Main loop to distribute test runs across groups
+          for t in "${group_test_ids[@]}"; do
+            if [ -z "$t" ]; then 
+              continue
+            fi
+            echo "Running test: $t"
+            file_path="${file_prefix}${t%%::*}"
+            l_bound=1
+            u_bound=256
+            find_max_batch_size_uninitialized "$file_path" "$l_bound" "$u_bound"
+          done
+
+          echo "Final Max Batches: ${failed_batch_and_test_array[@]}"
+          exit 0
diff --git a/.github/workflows/empty-workflow.yaml b/.github/workflows/empty-workflow.yaml
diff --git a/.github/workflows/update-ttnn-wheel.yaml b/.github/workflows/update-ttnn-wheel.yaml
@@ -28,10 +28,13 @@ jobs:
         id: update-requirements
         run: |
           latest_version=${{ steps.fetch_release.outputs.release }}
-          latest_version_short=$(echo $latest_version | sed 's/-rc/rc/')          
-          sed -i '/^https:\/\/github\.com\/tenstorrent\/tt-metal\/releases\//d' requirements.txt
-          echo "ttnn@https://github.com/tenstorrent/tt-metal/releases/download/v$latest_version/ttnn-$latest_version_short+any-cp38-cp38-linux_x86_64.whl" >> requirements.txt
-
+          # Remove any existing ttnn lines (adjust the regex if needed)
+          sed -i '/^ttnn @ https:\/\/github\.com\/tenstorrent\/tt-metal\/releases\//d' requirements.txt
+
+          # Append the two lines for the different python versions.
+          echo "ttnn @ https://github.com/tenstorrent/tt-metal/releases/download/v${latest_version}/ttnn-$latest_version_short+any-cp38-cp38-linux_x86_64.whl ; python_version==\"3.8\"" >> requirements.txt
+          echo "ttnn @ https://github.com/tenstorrent/tt-metal/releases/download/v${latest_version}/ttnn-$latest_version_short+any-cp310-cp310-linux_x86_64.whl ; python_version==\"3.10\"" >> requirements.txt
+
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v7
         id: create-pr

diff --git a/batch-tests-to-run.txt b/batch-tests-to-run.txt
@@ -0,0 +1,24 @@
+tests/models/MobileNetV2/test_MobileNetV2.py
+tests/models/albert/test_albert_masked_lm.py
+tests/models/albert/test_albert_question_answering.py
+tests/models/albert/test_albert_sequence_classification.py
+tests/models/albert/test_albert_token_classification.py
+tests/models/autoencoder_linear/test_autoencoder_linear.py
+tests/models/beit/test_beit_image_classification.py
+tests/models/bert/test_bert.py
+tests/models/bloom/test_bloom.py
+tests/models/distilbert/test_distilbert.py
+tests/models/dpr/test_dpr.py
+tests/models/llama/test_llama.py
+tests/models/mlpmixer/test_mlpmixer.py
+tests/models/mnist/test_mnist.py
+tests/models/openpose/test_openpose_v2.py
+tests/models/perceiver_io/test_perceiver_io.py
+tests/models/resnet/test_resnet.py
+tests/models/resnet50/test_resnet50.py
+tests/models/roberta/test_roberta.py
+tests/models/squeeze_bert/test_squeeze_bert.py
+tests/models/unet/test_unet.py
+tests/models/unet_brain/test_unet_brain.py
+tests/models/unet_carvana/test_unet_carvana.py
+tests/models/yolov5/test_yolov5.py
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,10 @@
-torch@https://download.pytorch.org/whl/cpu/torch-2.2.1%2Bcpu-cp38-cp38-linux_x86_64.whl
-torchvision@https://download.pytorch.org/whl/cpu/torchvision-0.17.1%2Bcpu-cp38-cp38-linux_x86_64.whl
+torch==2.2.1+cpu
+torchvision==0.17.1+cpu
+
 tabulate==0.9.0
 networkx==3.1
 graphviz
 matplotlib==3.7.1
-ttnn@https://github.com/tenstorrent/tt-metal/releases/download/v0.56.0-rc9/ttnn-0.56.0rc9+any-cp38-cp38-linux_x86_64.whl
+
+ttnn @ https://github.com/tenstorrent/tt-metal/releases/download/v0.56.0-rc37/ttnn-0.56.0rc37+any-cp38-cp38-linux_x86_64.whl ; python_version=="3.8"
+ttnn @ https://github.com/tenstorrent/tt-metal/releases/download/v0.56.0-rc37/ttnn-0.56.0rc37+any-cp310-cp310-linux_x86_64.whl ; python_version=="3.10"
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -36,6 +36,7 @@ def pytest_addoption(parser):
         help="Run up to the specified iteration count and report metrics based on this iteration.",
     )
     parser.addoption("--gen_op_accuracy_tests", action="store_true")
+    parser.addoption("--batch_size", action="store", default=None, help="Batch size for testing")
 
 
 @pytest.fixture(scope="session")
@@ -70,6 +71,11 @@ def device():
     ttnn.close_device(device)
 
 
+@pytest.fixture(scope="session")
+def batch_size(request):
+    return request.config.getoption("--batch_size")
+
+
 def get_dispatch_core_type():
     # Instead of conditionally returning WORKER or ETH, here we always return ETH
     # Without setting this property, we get less cores availble on N300 than on N150, which might lead to inconsistent and sub-sufficient results
@@ -169,14 +175,15 @@ def compile_and_run(device, reset_torch_dynamo, request):
                 gen_graphviz=False,
                 run_mem_analysis=False,
                 metrics_path=model_name,
-                verbose=True,
+                verbose=False,
                 gen_op_accuracy_tests=request.config.getoption("--gen_op_accuracy_tests"),
             )
 
             for idx in range(int(request.config.getoption("--report_nth_iteration"))):
                 start = time.perf_counter() * 1000
                 # Don't need to reset options if inputs don't change because of cache
                 outputs_after = model_tester.test_model(as_ttnn=True, option=option)
+                # return
                 end = time.perf_counter() * 1000
                 run_time = end - start
                 if idx == 0:
@@ -196,8 +203,8 @@ def compile_and_run(device, reset_torch_dynamo, request):
                     model_name, option._aten_fx_graphs, option._out_fx_graphs, option._all_inputs
                 )
 
-            if len(option._out_fx_graphs) > 0:
-                option._out_fx_graphs[0].print_tabular()
+            # if len(option._out_fx_graphs) > 0:
+            #    option._out_fx_graphs[0].print_tabular()
 
             if model_name not in ["speecht5-tts", "ssd300_vgg16", "retinanet_resnet50_fpn_v2"]:
                 accuracy = calculate_accuracy(outputs, outputs_after)

diff --git a/tests/models/MobileNetV2/test_MobileNetV2.py b/tests/models/MobileNetV2/test_MobileNetV2.py
@@ -32,11 +32,12 @@ def _load_inputs(self):
     ["eval"],
 )
 @pytest.mark.converted_end_to_end
-def test_MobileNetV2(record_property, mode):
+def test_MobileNetV2(record_property, mode, batch_size):
     model_name = "MobileNetV2"
     record_property("model_name", model_name)
     record_property("mode", mode)
-    tester = ThisTester(model_name, mode)
+
+    tester = ThisTester(model_name, mode, batch_size)
     results = tester.test_model()
     if mode == "eval":
         # Print the top 5 predictions

diff --git a/tests/models/albert/test_albert_masked_lm.py b/tests/models/albert/test_albert_masked_lm.py
@@ -3,7 +3,7 @@
 from transformers import AutoTokenizer, AlbertForMaskedLM
 import torch
 import pytest
-from tests.utils import ModelTester
+from tests.utils import ModelTester, process_batched_logits
 
 
 class ThisTester(ModelTester):
@@ -40,15 +40,19 @@ def append_fake_loss_function(self, outputs):
         "albert/albert-xxlarge-v2",
     ],
 )
-def test_albert_masked_lm(record_property, model_name, mode):
+
+# @pytest.mark.converted_end_to_end
+def test_albert_masked_lm(record_property, model_name, mode, batch_size):
     record_property("model_name", model_name)
     record_property("mode", mode)
 
-    tester = ThisTester(model_name, mode)
+    tester = ThisTester(model_name, mode, batch_size)
     results = tester.test_model()
 
     if mode == "eval":
         # retrieve index of [MASK]
+
+        results.logits = process_batched_logits(results.logits, batch_size)
         logits = results.logits
         mask_token_index = (tester.inputs.input_ids == tester.tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]
         predicted_token_id = logits[0, mask_token_index].argmax(axis=-1)