diff --git a/.github/workflows/batch-experiment.yaml b/.github/workflows/batch-experiment.yaml index 1e5bed61c..1d9722478 100644 --- a/.github/workflows/batch-experiment.yaml +++ b/.github/workflows/batch-experiment.yaml @@ -1,17 +1,209 @@ -name: Maximum Batch Size Experiment - on: workflow_dispatch: - inputs: - branch: - description: "Branch name" - required: true - type: string -jobs: - say-hello: - runs-on: ubuntu-latest +permissions: + actions: read + contents: write + pages: write + id-token: write + pull-requests: write +jobs: + model-tests: + runs-on: ["in-service"] + strategy: + matrix: + group: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + env: + pytest_verbosity: 0 + pytest_report_title: "⭐️ Model Tests" + PYTHONPATH: ${{ github.workspace }} steps: - - name: Print Hello - run: echo "hello" \ No newline at end of file + - uses: actions/checkout@v4 + with: + lfs: true + - uses: ./.github/actions/common_repo_setup + + - name: Run Model Tests in Parallel + shell: bash + run: | + set +e + num_iterations=1 + file_prefix="tests/" + + # Assign tests dynamically based on matrix group + TOTAL_GROUPS=24 + CURRENT_GROUP=${{ matrix.group }} + + mapfile -t test_ids_verbose < <(pytest --collect-only -q -m converted_end_to_end --ignore=tests/models/autoencoder_conv tests/models/ | awk -F '::' '{print $1}' | sort -u) + test_ids=() + + # Array of tests to exclude + exclude_tests=( + "models/mnist/test_mnist.py" + "models/MobileNetV2/test_MobileNetV2.py" + "models/openpose/test_openpose_v2.py" + "models/resnet/test_resnet.py" + "models/resnet50/test_resnet50.py" + "models/roberta/test_roberta.py" + "models/hand_landmark/test_hand_landmark.py" + "models/squeeze_bert/test_squeeze_bert.py" + "models/llama/test_llama.py" + "models/timm/test_timm_image_classification.py" + "models/torchvision/test_torchvision_image_classification.py" + "models/unet/test_unet.py" + "models/albert/test_albert_question_answering.py" + "models/albert/test_alert_sequence_classification.py" + "models/albert/test_albert_token_classification.py" + "models/albert/test_albert_masked_lm.py" + "models/unet_carvana/test_unet_carvana.py" + "models/unet_brain/test_unet_brain.py" + "models/autoencoder_linear/test_autoencoder_linear.py" + "models/perceiver_io/test_perceiver_io.py" + "models/dpr/test_dpr.py" + "models/bloom/test_bloom.py" + "models/distilbert/test_distilbert.py" + + ) + + # Preprocess file paths + for file in "${test_ids_verbose[@]}"; do + if [[ "$file" == models/* ]]; then + # Check if the file is in the exclude_tests array + skip=false + for exclude_test in "${exclude_tests[@]}"; do + if [[ "$file" == "$exclude_test" ]]; then + skip=true + break + fi + done + + if ! $skip; then + test_ids+=("$file") + fi + fi + done + + TOTAL_TESTS=${#test_ids[@]} + TESTS_PER_GROUP=1 + + START_INDEX=$(( (CURRENT_GROUP - 1) * TESTS_PER_GROUP )) + END_INDEX=$(( CURRENT_GROUP * TESTS_PER_GROUP )) + + if (( END_INDEX > TOTAL_TESTS )); then + END_INDEX=$TOTAL_TESTS + fi + + # Slice the test array for the current group + group_test_ids=("${test_ids[@]:START_INDEX:TESTS_PER_GROUP}") + echo "All tests ($TOTAL_TESTS): ${test_ids[@]}" + echo "Running tests in group $CURRENT_GROUP..." + echo "Tests assigned to this group: ${group_test_ids[@]}" + + failed_batch_and_test_array=() + max_batch_sizes_array=() + counter=0 + + # Define function for finding max batch size when uninitialized + find_max_batch_size_uninitialized() { + local test_path=$1 + batch_range_lower=$2 + batch_range_upper=$3 + local batch_range=($batch_range_lower $batch_range_upper) + + not_found=1 + local min_failed_batch=0 + local max_successful_batch=0 + + while (( not_found )); do + if (( batch_size_to_test == batch_range_upper - 2 )); then + batch_range_upper=$(( batch_range_upper * 2 )) + batch_range[1]=$batch_range_upper # Update the upper bound in the array + echo "Expanding upper bound to: $batch_range_upper" # Optional logging + fi + local batch_size_to_test=$(( (batch_range[0] + batch_range[1]) / 2 )) + if (( batch_size_to_test % 2 != 0)); then + (( batch_size_to_test -= 1 )) + fi + + echo "Testing with batch size $batch_size_to_test" + + python3 -m pytest "$test_path" -s --batch_size $batch_size_to_test --report_nth_iteration $num_iterations + exit_code=$? + + if (( exit_code != 0 )); then + batch_range[1]=$batch_size_to_test + min_failed_batch=$batch_size_to_test + else + batch_range[0]=$batch_size_to_test + max_successful_batch=$batch_size_to_test + fi + + if (( min_failed_batch - max_successful_batch == 2)); then + not_found=0 + fi + done + echo "min failed batch: $min_failed_batch" + echo "Max batch size for $test_path: $max_successful_batch" + max_batch_sizes_array+=("$max_successful_batch") + failed_batch_and_test_array+=("$max_successful_batch $test_path") + } + + # Define function for finding max batch size when initialized + find_max_batch_size_initialized() { + test_path=$1 + batch_range_lower=$2 + batch_range_upper=$3 + batch_range=($batch_range_lower $batch_range_upper) + + prior_batches_array=($(printf "%s\n" "${max_batch_sizes_array[@]}" | sort -n)) + not_found=1 + min_failed_batch=0 + max_successful_batch=0 + first_iter=1 + + while (( not_found )); do + if (( first_iter )); then + median_index=$(( ${#prior_batches_array[@]} / 2 )) + batch_size_to_test=${prior_batches_array[$median_index]} + first_iter=0 + else + batch_size_to_test=$(( (batch_range[0] + batch_range[1]) / 2 )) + fi + + echo "Testing with batch size $batch_size_to_test" + python3 -m pytest "$test_path" -s --batch_size $batch_size_to_test --report_nth_iteration $num_iterations + exit_code=$? + + if (( exit_code != 0 )); then + batch_range[1]=$batch_size_to_test + min_failed_batch=$batch_size_to_test + else + batch_range[0]=$batch_size_to_test + max_successful_batch=$batch_size_to_test + fi + + if (( min_failed_batch - max_successful_batch == 1)); then + not_found=0 + fi + done + + echo "Max batch size $max_successful_batch found for $test_path" + max_batch_sizes_array+=("$max_successful_batch") + failed_batch_and_test_array+=("$max_successful_batch $test_path") + } + + # Main loop to distribute test runs across groups + for t in "${group_test_ids[@]}"; do + if [ -z "$t" ]; then + continue + fi + echo "Running test: $t" + file_path="${file_prefix}${t%%::*}" + l_bound=192 + u_bound=224 + find_max_batch_size_uninitialized "$file_path" "$l_bound" "$u_bound" + done + + echo "Final Max Batches: ${failed_batch_and_test_array[@]}" + exit 0 diff --git a/.github/workflows/empty-workflow.yaml b/.github/workflows/empty-workflow.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/.github/workflows/update-ttnn-wheel.yaml b/.github/workflows/update-ttnn-wheel.yaml index 78321736b..2bf35c141 100644 --- a/.github/workflows/update-ttnn-wheel.yaml +++ b/.github/workflows/update-ttnn-wheel.yaml @@ -28,10 +28,13 @@ jobs: id: update-requirements run: | latest_version=${{ steps.fetch_release.outputs.release }} - latest_version_short=$(echo $latest_version | sed 's/-rc/rc/') - sed -i '/^https:\/\/github\.com\/tenstorrent\/tt-metal\/releases\//d' requirements.txt - echo "ttnn@https://github.com/tenstorrent/tt-metal/releases/download/v$latest_version/ttnn-$latest_version_short+any-cp38-cp38-linux_x86_64.whl" >> requirements.txt - + # Remove any existing ttnn lines (adjust the regex if needed) + sed -i '/^ttnn @ https:\/\/github\.com\/tenstorrent\/tt-metal\/releases\//d' requirements.txt + + # Append the two lines for the different python versions. + echo "ttnn @ https://github.com/tenstorrent/tt-metal/releases/download/v${latest_version}/ttnn-$latest_version_short+any-cp38-cp38-linux_x86_64.whl ; python_version==\"3.8\"" >> requirements.txt + echo "ttnn @ https://github.com/tenstorrent/tt-metal/releases/download/v${latest_version}/ttnn-$latest_version_short+any-cp310-cp310-linux_x86_64.whl ; python_version==\"3.10\"" >> requirements.txt + - name: Create Pull Request uses: peter-evans/create-pull-request@v7 id: create-pr diff --git a/batch-tests-to-run.txt b/batch-tests-to-run.txt new file mode 100644 index 000000000..4d9361a9c --- /dev/null +++ b/batch-tests-to-run.txt @@ -0,0 +1,24 @@ +tests/models/MobileNetV2/test_MobileNetV2.py +tests/models/albert/test_albert_masked_lm.py +tests/models/albert/test_albert_question_answering.py +tests/models/albert/test_albert_sequence_classification.py +tests/models/albert/test_albert_token_classification.py +tests/models/autoencoder_linear/test_autoencoder_linear.py +tests/models/beit/test_beit_image_classification.py +tests/models/bert/test_bert.py +tests/models/bloom/test_bloom.py +tests/models/distilbert/test_distilbert.py +tests/models/dpr/test_dpr.py +tests/models/llama/test_llama.py +tests/models/mlpmixer/test_mlpmixer.py +tests/models/mnist/test_mnist.py +tests/models/openpose/test_openpose_v2.py +tests/models/perceiver_io/test_perceiver_io.py +tests/models/resnet/test_resnet.py +tests/models/resnet50/test_resnet50.py +tests/models/roberta/test_roberta.py +tests/models/squeeze_bert/test_squeeze_bert.py +tests/models/unet/test_unet.py +tests/models/unet_brain/test_unet_brain.py +tests/models/unet_carvana/test_unet_carvana.py +tests/models/yolov5/test_yolov5.py diff --git a/requirements.txt b/requirements.txt index 84d94a432..8bbfee967 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,10 @@ -torch@https://download.pytorch.org/whl/cpu/torch-2.2.1%2Bcpu-cp38-cp38-linux_x86_64.whl -torchvision@https://download.pytorch.org/whl/cpu/torchvision-0.17.1%2Bcpu-cp38-cp38-linux_x86_64.whl +torch==2.2.1+cpu +torchvision==0.17.1+cpu + tabulate==0.9.0 networkx==3.1 graphviz matplotlib==3.7.1 -ttnn@https://github.com/tenstorrent/tt-metal/releases/download/v0.56.0-rc9/ttnn-0.56.0rc9+any-cp38-cp38-linux_x86_64.whl + +ttnn @ https://github.com/tenstorrent/tt-metal/releases/download/v0.56.0-rc37/ttnn-0.56.0rc37+any-cp38-cp38-linux_x86_64.whl ; python_version=="3.8" +ttnn @ https://github.com/tenstorrent/tt-metal/releases/download/v0.56.0-rc37/ttnn-0.56.0rc37+any-cp310-cp310-linux_x86_64.whl ; python_version=="3.10" diff --git a/tests/conftest.py b/tests/conftest.py index ba2b491d9..0bcb0f8c4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -36,6 +36,7 @@ def pytest_addoption(parser): help="Run up to the specified iteration count and report metrics based on this iteration.", ) parser.addoption("--gen_op_accuracy_tests", action="store_true") + parser.addoption("--batch_size", action="store", default=None, help="Batch size for testing") @pytest.fixture(scope="session") @@ -70,6 +71,11 @@ def device(): ttnn.close_device(device) +@pytest.fixture(scope="session") +def batch_size(request): + return request.config.getoption("--batch_size") + + def get_dispatch_core_type(): # Instead of conditionally returning WORKER or ETH, here we always return ETH # Without setting this property, we get less cores availble on N300 than on N150, which might lead to inconsistent and sub-sufficient results @@ -169,7 +175,7 @@ def compile_and_run(device, reset_torch_dynamo, request): gen_graphviz=False, run_mem_analysis=False, metrics_path=model_name, - verbose=True, + verbose=False, gen_op_accuracy_tests=request.config.getoption("--gen_op_accuracy_tests"), ) @@ -177,6 +183,7 @@ def compile_and_run(device, reset_torch_dynamo, request): start = time.perf_counter() * 1000 # Don't need to reset options if inputs don't change because of cache outputs_after = model_tester.test_model(as_ttnn=True, option=option) + # return end = time.perf_counter() * 1000 run_time = end - start if idx == 0: @@ -196,8 +203,8 @@ def compile_and_run(device, reset_torch_dynamo, request): model_name, option._aten_fx_graphs, option._out_fx_graphs, option._all_inputs ) - if len(option._out_fx_graphs) > 0: - option._out_fx_graphs[0].print_tabular() + # if len(option._out_fx_graphs) > 0: + # option._out_fx_graphs[0].print_tabular() if model_name not in ["speecht5-tts", "ssd300_vgg16", "retinanet_resnet50_fpn_v2"]: accuracy = calculate_accuracy(outputs, outputs_after) diff --git a/tests/models/MobileNetV2/test_MobileNetV2.py b/tests/models/MobileNetV2/test_MobileNetV2.py index 95f75fcb7..55424432a 100644 --- a/tests/models/MobileNetV2/test_MobileNetV2.py +++ b/tests/models/MobileNetV2/test_MobileNetV2.py @@ -32,11 +32,12 @@ def _load_inputs(self): ["eval"], ) @pytest.mark.converted_end_to_end -def test_MobileNetV2(record_property, mode): +def test_MobileNetV2(record_property, mode, batch_size): model_name = "MobileNetV2" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": # Print the top 5 predictions diff --git a/tests/models/albert/test_albert_masked_lm.py b/tests/models/albert/test_albert_masked_lm.py index 57951a6d5..6c246f261 100644 --- a/tests/models/albert/test_albert_masked_lm.py +++ b/tests/models/albert/test_albert_masked_lm.py @@ -3,7 +3,7 @@ from transformers import AutoTokenizer, AlbertForMaskedLM import torch import pytest -from tests.utils import ModelTester +from tests.utils import ModelTester, process_batched_logits class ThisTester(ModelTester): @@ -40,15 +40,19 @@ def append_fake_loss_function(self, outputs): "albert/albert-xxlarge-v2", ], ) -def test_albert_masked_lm(record_property, model_name, mode): + +# @pytest.mark.converted_end_to_end +def test_albert_masked_lm(record_property, model_name, mode, batch_size): record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": # retrieve index of [MASK] + + results.logits = process_batched_logits(results.logits, batch_size) logits = results.logits mask_token_index = (tester.inputs.input_ids == tester.tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0] predicted_token_id = logits[0, mask_token_index].argmax(axis=-1) diff --git a/tests/models/albert/test_albert_question_answering.py b/tests/models/albert/test_albert_question_answering.py index 394575426..f45e62d4e 100644 --- a/tests/models/albert/test_albert_question_answering.py +++ b/tests/models/albert/test_albert_question_answering.py @@ -3,7 +3,7 @@ from transformers import AutoTokenizer, AlbertForQuestionAnswering import torch import pytest -from tests.utils import ModelTester +from tests.utils import ModelTester, process_batched_logits class ThisTester(ModelTester): @@ -24,17 +24,16 @@ def _load_inputs(self): ) @pytest.mark.converted_end_to_end @pytest.mark.parametrize("model_name", ["twmkn9/albert-base-v2-squad2"]) -def test_albert_question_answering(record_property, model_name, mode): +def test_albert_question_answering(record_property, model_name, mode, batch_size): record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": - answer_start_index = results.start_logits.argmax() - answer_end_index = results.end_logits.argmax() - + answer_start_index = process_batched_logits(results.start_logits, batch_size).argmax() + answer_end_index = process_batched_logits(results.end_logits, batch_size).argmax() predict_answer_tokens = tester.inputs.input_ids[0, answer_start_index : answer_end_index + 1] answer = tester.tokenizer.decode(predict_answer_tokens, skip_special_tokens=True) diff --git a/tests/models/albert/test_albert_sequence_classification.py b/tests/models/albert/test_albert_sequence_classification.py index 0c0156d74..0bee949dd 100644 --- a/tests/models/albert/test_albert_sequence_classification.py +++ b/tests/models/albert/test_albert_sequence_classification.py @@ -3,7 +3,7 @@ from transformers import AlbertTokenizer, AlbertForSequenceClassification import torch import pytest -from tests.utils import ModelTester +from tests.utils import ModelTester, process_batched_logits class ThisTester(ModelTester): @@ -23,15 +23,15 @@ def _load_inputs(self): ) @pytest.mark.converted_end_to_end @pytest.mark.parametrize("model_name", ["textattack/albert-base-v2-imdb"]) -def test_albert_sequence_classification(record_property, model_name, mode): +def test_albert_sequence_classification(record_property, model_name, mode, batch_size): record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": - logits = results.logits + logits = process_batched_logits(results.logits, batch_size) predicted_class_id = logits.argmax().item() predicted_label = tester.model.config.id2label[predicted_class_id] diff --git a/tests/models/albert/test_albert_token_classification.py b/tests/models/albert/test_albert_token_classification.py index a2ce7cc9e..a38554144 100644 --- a/tests/models/albert/test_albert_token_classification.py +++ b/tests/models/albert/test_albert_token_classification.py @@ -3,7 +3,7 @@ from transformers import AutoTokenizer, AlbertForTokenClassification import torch import pytest -from tests.utils import ModelTester +from tests.utils import ModelTester, process_batched_logits class ThisTester(ModelTester): @@ -27,15 +27,21 @@ def _load_inputs(self): pytest.param("albert/albert-base-v2", marks=pytest.mark.converted_end_to_end), ], ) -def test_albert_token_classification(record_property, model_name, mode): +def test_albert_token_classification(record_property, model_name, mode, batch_size): record_property("model_name", f"{model_name}-classification") record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": - logits = results.logits + if batch_size is not None: + results.logits = results.logits.squeeze(0) # Temporary fix, not the neatest solution + + logits = process_batched_logits(results.logits, batch_size).unsqueeze(0) + if batch_size is None: + logits = logits.squeeze(0) # Adjust dimensions to account for batch reshaping ^ + predicted_token_class_ids = logits.argmax(-1) # Note that tokens are classified rather then input words which means that diff --git a/tests/models/autoencoder_conv/test_autoencoder_conv_v2.py b/tests/models/autoencoder_conv/test_autoencoder_conv_v2.py index 55d482e6f..04a8032a8 100644 --- a/tests/models/autoencoder_conv/test_autoencoder_conv_v2.py +++ b/tests/models/autoencoder_conv/test_autoencoder_conv_v2.py @@ -71,12 +71,12 @@ def _load_inputs(self): "mode", ["train", "eval"], ) -def test_autoencoder_conv_v2(record_property, mode): +def test_autoencoder_conv_v2(record_property, mode, batch_size): model_name = f"Autoencoder (conv)" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": diff --git a/tests/models/autoencoder_linear/test_autoencoder_linear.py b/tests/models/autoencoder_linear/test_autoencoder_linear.py index 536568c07..f9af4035a 100644 --- a/tests/models/autoencoder_linear/test_autoencoder_linear.py +++ b/tests/models/autoencoder_linear/test_autoencoder_linear.py @@ -80,12 +80,12 @@ def _load_inputs(self): "mode", ["train", pytest.param("eval", marks=pytest.mark.converted_end_to_end)], ) -def test_autoencoder_linear(record_property, mode): +def test_autoencoder_linear(record_property, mode, batch_size): model_name = "Autoencoder (linear)" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": diff --git a/tests/models/beit/test_beit_image_classification.py b/tests/models/beit/test_beit_image_classification.py index 24ecb9887..54bf69c31 100644 --- a/tests/models/beit/test_beit_image_classification.py +++ b/tests/models/beit/test_beit_image_classification.py @@ -33,11 +33,11 @@ def get_results_train(self, model, inputs, outputs): @pytest.mark.parametrize("mode", ["train", "eval"]) @pytest.mark.parametrize("model_name", ["microsoft/beit-base-patch16-224", "microsoft/beit-large-patch16-224"]) -def test_beit_image_classification(record_property, model_name, mode): +def test_beit_image_classification(record_property, model_name, mode, batch_size): record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": diff --git a/tests/models/bert/test_bert.py b/tests/models/bert/test_bert.py index 4dddee2ee..c24678b93 100644 --- a/tests/models/bert/test_bert.py +++ b/tests/models/bert/test_bert.py @@ -35,12 +35,12 @@ def _load_inputs(self): ["eval"], ) @pytest.mark.converted_end_to_end -def test_bert(record_property, mode): +def test_bert(record_property, mode, batch_size): model_name = "BERT" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": diff --git a/tests/models/bloom/test_bloom.py b/tests/models/bloom/test_bloom.py index 53c4b997b..5d6bcdd46 100644 --- a/tests/models/bloom/test_bloom.py +++ b/tests/models/bloom/test_bloom.py @@ -33,12 +33,12 @@ def _load_inputs(self): ["eval"], ) @pytest.mark.converted_end_to_end -def test_bloom(record_property, mode): +def test_bloom(record_property, mode, batch_size): model_name = "Bloom" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": diff --git a/tests/models/distilbert/test_distilbert.py b/tests/models/distilbert/test_distilbert.py index 5b955bdec..c44aba443 100644 --- a/tests/models/distilbert/test_distilbert.py +++ b/tests/models/distilbert/test_distilbert.py @@ -22,11 +22,11 @@ def _load_inputs(self): ) @pytest.mark.converted_end_to_end @pytest.mark.parametrize("model_name", ["distilbert-base-uncased"]) -def test_distilbert(record_property, model_name, mode): +def test_distilbert(record_property, model_name, mode, batch_size): record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": diff --git a/tests/models/dpr/test_dpr.py b/tests/models/dpr/test_dpr.py index 454c109c2..be7bed735 100644 --- a/tests/models/dpr/test_dpr.py +++ b/tests/models/dpr/test_dpr.py @@ -27,12 +27,12 @@ def _load_inputs(self): ["eval"], ) @pytest.mark.converted_end_to_end -def test_dpr(record_property, mode): +def test_dpr(record_property, mode, batch_size): model_name = "DPR" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": diff --git a/tests/models/hand_landmark/test_hand_landmark.py b/tests/models/hand_landmark/test_hand_landmark.py index 2c14e4e24..9bec75dcb 100644 --- a/tests/models/hand_landmark/test_hand_landmark.py +++ b/tests/models/hand_landmark/test_hand_landmark.py @@ -40,7 +40,7 @@ def set_model_eval(self, model): ) @pytest.mark.usefixtures("manage_dependencies") @pytest.mark.converted_end_to_end -def test_hand_landmark(record_property, mode): +def test_hand_landmark(record_property, mode, batch_size): model_name = "Hand Landmark" record_property("model_name", model_name) record_property("mode", mode) @@ -81,7 +81,7 @@ def test_hand_landmark(record_property, mode): [sys.executable, "-m", "pip", "install", "--force-reinstall", "opencv-python-headless==4.8.0.74"] ) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() record_property("torch_ttnn", (tester, results)) diff --git a/tests/models/llama/test_llama.py b/tests/models/llama/test_llama.py index 630d74e9d..ff06873f6 100644 --- a/tests/models/llama/test_llama.py +++ b/tests/models/llama/test_llama.py @@ -36,13 +36,14 @@ def _load_inputs(self): ["eval"], ) @pytest.mark.converted_end_to_end -def test_llama(record_property, mode): +def test_llama(record_property, mode, batch_size): model_name = "Llama" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() + if mode == "eval": # Helper function to decode output to human-readable text def decode_output(outputs): diff --git a/tests/models/mlpmixer/test_mlpmixer.py b/tests/models/mlpmixer/test_mlpmixer.py index 9ad08962a..bd9044b5e 100644 --- a/tests/models/mlpmixer/test_mlpmixer.py +++ b/tests/models/mlpmixer/test_mlpmixer.py @@ -23,11 +23,11 @@ def _load_inputs(self): "mode", ["train", "eval"], ) -def test_mlpmixer(record_property, mode): +def test_mlpmixer(record_property, mode, batch_size): model_name = "MLPMixer" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() record_property("torch_ttnn", (tester, results)) diff --git a/tests/models/mnist/test_mnist.py b/tests/models/mnist/test_mnist.py index c09334e69..d61dfc84f 100644 --- a/tests/models/mnist/test_mnist.py +++ b/tests/models/mnist/test_mnist.py @@ -54,12 +54,11 @@ def _load_inputs(self): "mode", ["train", pytest.param("eval", marks=pytest.mark.converted_end_to_end)], ) -def test_mnist_train(record_property, mode): +def test_mnist_train(record_property, mode, batch_size): model_name = "Mnist" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() - record_property("torch_ttnn", (tester, results)) diff --git a/tests/models/openpose/test_openpose_v2.py b/tests/models/openpose/test_openpose_v2.py index 6d8de066e..cb9ee7101 100644 --- a/tests/models/openpose/test_openpose_v2.py +++ b/tests/models/openpose/test_openpose_v2.py @@ -47,12 +47,12 @@ def _load_inputs(self): pytest.param("eval", marks=pytest.mark.converted_end_to_end), ], ) -def test_openpose_v2(record_property, mode): +def test_openpose_v2(record_property, mode, batch_size): model_name = "OpenPose V2" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": print(f"Output: {results}") diff --git a/tests/models/perceiver_io/test_perceiver_io.py b/tests/models/perceiver_io/test_perceiver_io.py index 8071ad8c7..6e317d3ed 100644 --- a/tests/models/perceiver_io/test_perceiver_io.py +++ b/tests/models/perceiver_io/test_perceiver_io.py @@ -29,12 +29,12 @@ def _load_inputs(self): ["eval"], ) @pytest.mark.converted_end_to_end -def test_perceiver_io(record_property, mode): +def test_perceiver_io(record_property, mode, batch_size): model_name = "Perceiver IO" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": logits = results.logits diff --git a/tests/models/resnet/test_resnet.py b/tests/models/resnet/test_resnet.py index 639993bd4..aa8689dee 100644 --- a/tests/models/resnet/test_resnet.py +++ b/tests/models/resnet/test_resnet.py @@ -23,12 +23,12 @@ def _load_inputs(self): pytest.param("eval", marks=pytest.mark.converted_end_to_end), ], ) -def test_resnet(record_property, mode): +def test_resnet(record_property, mode, batch_size): model_name = "ResNet18" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() # Check inference result diff --git a/tests/models/resnet50/test_resnet50.py b/tests/models/resnet50/test_resnet50.py index 68425e4b7..36e0b48ee 100644 --- a/tests/models/resnet50/test_resnet50.py +++ b/tests/models/resnet50/test_resnet50.py @@ -36,12 +36,12 @@ def _load_inputs(self): pytest.param("eval", marks=pytest.mark.converted_end_to_end), ], ) -def test_resnet(record_property, mode): +def test_resnet(record_property, mode, batch_size): model_name = "ResNet50" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": # Print the top 5 predictions diff --git a/tests/models/roberta/test_roberta.py b/tests/models/roberta/test_roberta.py index a6e242293..1dde90ea3 100644 --- a/tests/models/roberta/test_roberta.py +++ b/tests/models/roberta/test_roberta.py @@ -22,12 +22,12 @@ def _load_inputs(self): ["eval"], ) @pytest.mark.converted_end_to_end -def test_roberta(record_property, mode): +def test_roberta(record_property, mode, batch_size): model_name = "RoBERTa" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": logits = results.logits diff --git a/tests/models/squeeze_bert/test_squeeze_bert.py b/tests/models/squeeze_bert/test_squeeze_bert.py index 0d7e742c0..20aee2af3 100644 --- a/tests/models/squeeze_bert/test_squeeze_bert.py +++ b/tests/models/squeeze_bert/test_squeeze_bert.py @@ -24,12 +24,12 @@ def _load_inputs(self): ["eval"], ) @pytest.mark.converted_end_to_end -def test_squeeze_bert(record_property, mode): +def test_squeeze_bert(record_property, mode, batch_size): model_name = "SqueezeBERT" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() if mode == "eval": logits = results.logits diff --git a/tests/models/unet/test_unet.py b/tests/models/unet/test_unet.py index 656e991eb..bdf9ad7a4 100644 --- a/tests/models/unet/test_unet.py +++ b/tests/models/unet/test_unet.py @@ -45,13 +45,14 @@ def _load_inputs(self): pytest.param("eval", marks=pytest.mark.converted_end_to_end), ], ) -def test_unet(record_property, mode): +def test_unet(record_property, mode, batch_size): model_name = "U-Net" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() + if mode == "eval": results = torch.round(results[0]) diff --git a/tests/models/unet_brain/test_unet_brain.py b/tests/models/unet_brain/test_unet_brain.py index 3dbe5dfc1..8feeca02e 100644 --- a/tests/models/unet_brain/test_unet_brain.py +++ b/tests/models/unet_brain/test_unet_brain.py @@ -55,13 +55,14 @@ def _load_inputs(self): pytest.param("eval", marks=pytest.mark.converted_end_to_end), ], ) -def test_unet_brain(record_property, mode): +def test_unet_brain(record_property, mode, batch_size): model_name = "Unet-brain" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() + if mode == "eval": print(torch.round(results[0])) diff --git a/tests/models/unet_carvana/test_unet_carvana.py b/tests/models/unet_carvana/test_unet_carvana.py index 5df1c22b9..0668e9a1c 100644 --- a/tests/models/unet_carvana/test_unet_carvana.py +++ b/tests/models/unet_carvana/test_unet_carvana.py @@ -31,12 +31,12 @@ def _load_inputs(self): pytest.param("eval", marks=pytest.mark.converted_end_to_end), ], ) -def test_unet_carvana(record_property, mode): +def test_unet_carvana(record_property, mode, batch_size): model_name = "Unet-carvana" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() record_property("torch_ttnn", (tester, results)) diff --git a/tests/models/yolov5/test_yolov5.py b/tests/models/yolov5/test_yolov5.py index cec2b4d0b..41a31673a 100644 --- a/tests/models/yolov5/test_yolov5.py +++ b/tests/models/yolov5/test_yolov5.py @@ -8,6 +8,7 @@ import pytest from tests.utils import ModelTester + dependencies = ["ultralytics==8.2.92", "ultralytics-thop==2.0.6"] @@ -98,12 +99,12 @@ def teardown_module(module): ) @pytest.mark.converted_end_to_end @pytest.mark.usefixtures("manage_dependencies") -def test_yolov5(record_property, mode): +def test_yolov5(record_property, mode, batch_size): model_name = "YOLOv5" record_property("model_name", model_name) record_property("mode", mode) - tester = ThisTester(model_name, mode) + tester = ThisTester(model_name, mode, batch_size) results = tester.test_model() record_property("torch_ttnn", (tester, results)) diff --git a/tests/utils.py b/tests/utils.py index 94397ccc3..5b4a9cc1c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,18 +1,23 @@ +import collections.abc import torch import numpy as np import collections import re from typing import List, Dict, Tuple +import transformers class ModelTester: - def __init__(self, model_name, mode): + def __init__(self, model_name, mode, batch_size=None): if mode not in ["train", "eval"]: raise ValueError(f"Current mode is not supported: {mode}") self.model_name = model_name self.mode = mode self.model = self._load_model() self.inputs = self._load_inputs() + self.batch_size = batch_size + self.validate_batch_size() + self.batch_inputs() def _load_model(self): raise NotImplementedError("This method should be implemented in the derived class") @@ -60,9 +65,9 @@ def compile_model(self, model, option): return model def run_model(self, model, inputs): - if isinstance(inputs, collections.Mapping): + if isinstance(inputs, collections.abc.Mapping): return model(**inputs) - elif isinstance(inputs, collections.Sequence): + elif isinstance(inputs, collections.abc.Sequence): return model(*inputs) else: return model(inputs) @@ -143,6 +148,32 @@ def test_model(self, as_ttnn=False, option=None): else: raise ValueError(f"Current mode is not supported: {self.mode}") + def batch_inputs(self): + if self.batch_size is None: + return + if isinstance(self.inputs, dict) or isinstance(self.inputs, transformers.tokenization_utils_base.BatchEncoding): + keys = self.inputs.keys() + for key in keys: + if isinstance(self.inputs[key], torch.Tensor): + self.inputs[key] = self.inputs[key].repeat(self.batch_size, 1) + elif isinstance(self.inputs, torch.Tensor): + if self.inputs.shape[0] == 0: + self.inputs = self.inputs.squeeze(0) + self.inputs = self.inputs.repeat(self.batch_size, *([1] * (self.inputs.dim()))) + self.inputs = self.inputs.squeeze(1) + else: + raise TypeError(f"Unregonized inputs type: {type(self.inputs)}") + + def validate_batch_size(self): + if self.batch_size is None: + return + try: + self.batch_size = int(self.batch_size) + except Exception as e: + raise TypeError( + f"Failed to interpret batch size type {type(self.batch_size).__name__} (Must be an integer or None)" + ) + # Testing utils copied from tt-metal/tests/ttnn/utils_for_testing.py def comp_pcc(golden, calculated, pcc=0.99): @@ -552,3 +583,15 @@ def render_input_args_kwargs(self) -> Tuple[List, Dict, bool]: def render_metric_string_list_to_input_args_kwargs(op_name, input_strings) -> Tuple[List, Dict, bool]: handler = MetricStringListHandler(op_name, input_strings) return handler.render_input_args_kwargs() + + +def process_batched_logits(logits, batch_size): + if batch_size is None: + return logits + else: + if logits.dim() == 3: + return logits[0, :, :].squeeze(0) + elif logits.dim() == 2: + return logits[0, :].squeeze(0) + else: + raise ValueError(f"Unrecognized logit dimension: {logits.shape.numel()} (not 2D or 3D including batch)")