Skip to content

Commit

Permalink
Merge branch 'main' into dvartanians/fix_yolov4_faster_webdemo
Browse files Browse the repository at this point in the history
  • Loading branch information
mbahnasTT authored Feb 24, 2025
2 parents 19dde59 + d8837b6 commit b5a2167
Show file tree
Hide file tree
Showing 87 changed files with 1,023 additions and 489 deletions.
13 changes: 8 additions & 5 deletions .github/workflows/bisect-dispatch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ on:
required: true
type: string
description: "Timeout (eg: 5m, 1h)"
description:
type: string
default: "Git bisect dispatch"
patch:
required: false
type: string
description: "Commit-ish to cherry-pick for each step"

run-name: ${{ inputs.description }}
run-name: "Bisect on ${{ inputs.runner-label }}"
jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
Expand Down Expand Up @@ -88,8 +88,11 @@ jobs:
- uses: ./.github/actions/install-python-deps
- name: Run Git Bisect
shell: bash
env:
GIT_COMMITTER_NAME: "GitHub Actions"
GIT_COMMITTER_EMAIL: "[email protected]"
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
./tests/scripts/tt_bisect.sh -t ${{ inputs.timeout }} -f "${{ inputs.command }}" -b ${{ inputs.bad-commit }} -g ${{ inputs.good-commit }}
./tests/scripts/tt_bisect.sh -t ${{ inputs.timeout }} -f "${{ inputs.command }}" -b ${{ inputs.bad-commit }} -g ${{ inputs.good-commit }} -p "${{ inputs.patch }}"
65 changes: 53 additions & 12 deletions .github/workflows/tgg-unit-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ name: "[internal] TGG unit tests impl"

on:
workflow_call:
inputs:
docker-image:
required: true
type: string
wheel-artifact-name:
required: true
type: string

jobs:
TGG-tests:
Expand All @@ -17,26 +24,60 @@ jobs:
},
]
name: ${{ matrix.test-group.name }}
env:
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
runs-on: ${{ matrix.test-group.runs-on }}
container:
image: ${{ inputs.docker-image }}
env:
TT_METAL_HOME: /work
PYTHONPATH: /work
LD_LIBRARY_PATH: /work/build/lib
LOGURU_LEVEL: INFO
ARCH_NAME: ${{ matrix.test-group.arch }}
volumes:
- ${{ github.workspace }}/docker-job:/work # Subdir to workaround https://github.com/actions/runner/issues/691
- /dev/hugepages-1G:/dev/hugepages-1G
- /mnt/MLPerf:/mnt/MLPerf
options: "--device /dev/tenstorrent"
defaults:
run:
shell: bash
working-directory: /work # https://github.com/actions/runner/issues/878
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- name: ⬇️ Checkout
uses: actions/checkout@v4
with:
submodules: recursive
path: docker-job # Here be dragons; keep it scoped to our desired volume, yet must be under github.workspace and be sure to clean up at the end
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_any
path: /work
- name: Extract files
run: tar -xvf ttm_any.tar
- uses: ./.github/actions/install-python-deps
- name: ⬇️ Download Wheel
uses: actions/download-artifact@v4
with:
name: ${{ inputs.wheel-artifact-name }}
path: /work
- name: Install Wheel
run: |
WHEEL_FILENAME=$(ls -1 *.whl)
pip3 install $WHEEL_FILENAME
- name: Run unit regression tests
timeout-minutes: 60
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
set -x
pwd
echo $PYTHONPATH
ls -al
${{ matrix.test-group.cmd }}
- name: Cleanup
if: always()
run: |
# We are forced to checkout the repo into a subdir of the host's workdir; this pollutes the host
# with root-owned files. Be sure to clean up after ourselves in case we're on a non-ephemeral runner.
echo "pre rm"
ls -al /__w/tt-metal/tt-metal
rm -rf /__w/tt-metal/tt-metal/docker-job
echo "post rm"
ls -al /__w/tt-metal/tt-metal
5 changes: 5 additions & 0 deletions .github/workflows/tgg-unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
with:
build-wheel: true
TGG-tests:
needs: build-artifact
secrets: inherit
uses: ./.github/workflows/tgg-unit-tests-impl.yaml
with:
docker-image: ${{ needs.build-artifact.outputs.ci-build-docker-image }}
wheel-artifact-name: ${{ needs.build-artifact.outputs.wheel-artifact-name }}
8 changes: 7 additions & 1 deletion tests/scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,13 @@ set_up_chdir() {
return
fi
done
echo "Could not find the 'tt-metal' directory in your PYTHONPATH." 1>&2
for ENTRY in "${ENTRIES[@]}"; do
if [[ -d "$ENTRY/tt_metal" ]]; then
cd "$ENTRY"
return
fi
done
echo "Could not find the 'tt-metal' directory in your PYTHONPATH." 1>&2
exit 1
}

Expand Down
1 change: 1 addition & 0 deletions tests/scripts/t3000/run_t3000_unit_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ run_t3000_ttmetal_tests() {
./build/programming_examples/distributed/distributed_program_dispatch
./build/programming_examples/distributed/distributed_buffer_rw
./build/programming_examples/distributed/distributed_eltwise_add
./build/programming_examples/distributed/distributed_trace_and_events

# Record the end time
end_time=$(date +%s)
Expand Down
21 changes: 19 additions & 2 deletions tests/scripts/tt_bisect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Flags:
-f | --file : test file to run, also the test that broke
-g | --good : good commit to start bisect
-b | --bad : bad commit to start bisect
-p | --path : commit-ish to cherry-pick onto each commit before building
-t | --timeout : timeout duration for the test
Example:
./tests/scripts/tt_bisect.sh -f ./build/test/tt_metal/test_add_two_ints -b HEAD -g 1eb7930
If the test involves multiple words you have to do "test_file":
Expand All @@ -19,7 +21,8 @@ source python_env/bin/activate
export PYTHONPATH=$TT_METAL_HOME

timeout_duration=2m
while getopts "f:g:b:t:" opt; do
patch=""
while getopts "f:g:b:t:p:" opt; do
case $opt in
f | file)
test=$OPTARG
Expand All @@ -33,6 +36,9 @@ while getopts "f:g:b:t:" opt; do
t | timeout)
timeout_duration=$OPTARG
;;
p | patch)
patch=$OPTARG
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
Expand All @@ -48,14 +54,20 @@ fi
echo "Time to find who broke it :)"
echo "Good commit:" $good_commit
echo "Bad commit:" $bad_commit
if ([ ! -z "$patch" ]); then
echo "Cherry-pick commit:" $patch
fi

found=false

git bisect start $bad_commit $good_commit --

while [[ "$found" = "false" ]]; do
git submodule update --recursive
echo "::group::Building `git rev-parse HEAD`"
if ([ ! -z "$patch" ]); then
git cherry-pick $patch
fi
git submodule update --recursive
build_rc=0
./build_metal.sh --build-tests > /dev/null || build_rc=$?
echo "::endgroup::"
Expand All @@ -70,6 +82,11 @@ while [[ "$found" = "false" ]]; do
timeout_rc=0
timeout "$timeout_duration" bash -c "$test" || timeout_rc=$?
echo "Exit code: $timeout_rc"

if ([ ! -z "$patch" ]); then
# Must reset HEAD or git bisect good/bad will retry the merge base and we'll be stuck in a loop
git reset --hard HEAD^
fi
echo "::endgroup::"

if [ $timeout_rc -eq 0 ]; then
Expand Down
3 changes: 2 additions & 1 deletion tests/tt_metal/distributed/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
set(UNIT_TESTS_DISTRIBUTED_SRC
${CMAKE_CURRENT_SOURCE_DIR}/test_distributed.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_mesh_buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_mesh_coord.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_mesh_device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_mesh_device_reshape.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_mesh_workload.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_mesh_sub_device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_mesh_allocator.cpp
Expand Down
31 changes: 0 additions & 31 deletions tests/tt_metal/distributed/test_distributed.cpp

This file was deleted.

93 changes: 93 additions & 0 deletions tests/tt_metal/distributed/test_mesh_device.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <gtest/gtest.h>
#include <gmock/gmock.h>

#include "mesh_device.hpp"
#include "system_mesh.hpp"

#include "tests/tt_metal/tt_metal/common/multi_device_fixture.hpp"

namespace tt::tt_metal::distributed {
namespace {

using ::testing::IsEmpty;
using ::testing::SizeIs;
using ::tt::tt_metal::distributed::MeshContainer;

TEST(MeshDeviceInitTest, Init1x1Mesh) {
auto& sys = SystemMesh::instance();

auto config = tt::tt_metal::distributed::MeshDeviceConfig{.mesh_shape = MeshShape(1, 1)};

EXPECT_NO_THROW({
auto mesh = tt::tt_metal::distributed::MeshDevice::create(
config, DEFAULT_L1_SMALL_SIZE, DEFAULT_TRACE_REGION_SIZE, 1, tt::tt_metal::DispatchCoreType::WORKER);
mesh->close();
});
}

using MeshDeviceTest = T3000MeshDeviceFixture;

TEST_F(MeshDeviceTest, SystemMeshTearDownWithoutClose) {
auto& sys = SystemMesh::instance();

const auto system_shape = sys.get_shape();
ASSERT_EQ(system_shape.dims(), 2);
EXPECT_EQ(system_shape[0], 2);
EXPECT_EQ(system_shape[1], 4);
}

TEST_F(MeshDeviceTest, MemoryAllocationStatistics) {
auto stats = mesh_device_->allocator()->get_statistics(tt::tt_metal::BufferType::DRAM);
for (auto* device : mesh_device_->get_devices()) {
auto device_stats = device->allocator()->get_statistics(tt::tt_metal::BufferType::DRAM);
EXPECT_EQ(stats.total_allocatable_size_bytes, device_stats.total_allocatable_size_bytes);
}
}

TEST_F(MeshDeviceTest, NumDramChannels) {
EXPECT_EQ(mesh_device_->num_dram_channels(), 96); // 8 devices * 12 channels
}

TEST_F(MeshDeviceTest, ViewIs2D) {
std::vector<IDevice*> devices = mesh_device_->get_devices();

MeshContainer<IDevice*> container_1d(SimpleMeshShape(8), devices);
MeshDeviceView view_1d(container_1d);
EXPECT_FALSE(view_1d.is_mesh_2d());

MeshContainer<IDevice*> container_2d(SimpleMeshShape(2, 4), devices);
MeshDeviceView view_2d(container_2d);
EXPECT_TRUE(view_2d.is_mesh_2d());

MeshContainer<IDevice*> container_3d(SimpleMeshShape(2, 2, 2), devices);
MeshDeviceView view_3d(container_3d);
EXPECT_FALSE(view_3d.is_mesh_2d());
}

TEST_F(MeshDeviceTest, Submesh) {
EXPECT_EQ(mesh_device_->shape().num_rows, 2);
EXPECT_EQ(mesh_device_->shape().num_cols, 4);
EXPECT_THAT(mesh_device_->get_devices(), SizeIs(8));
EXPECT_TRUE(mesh_device_->is_parent_mesh());
EXPECT_THAT(mesh_device_->get_submeshes(), IsEmpty());

auto submesh = mesh_device_->create_submesh(MeshShape{1, 2}, MeshOffset{1, 1});
EXPECT_THAT(mesh_device_->get_submeshes(), SizeIs(1));
EXPECT_EQ(submesh->shape().num_rows, 1);
EXPECT_EQ(submesh->shape().num_cols, 2);
EXPECT_THAT(submesh->get_devices(), SizeIs(2));
EXPECT_FALSE(submesh->is_parent_mesh());
EXPECT_THAT(submesh->get_submeshes(), IsEmpty());

// Verify coordinates are correct.
EXPECT_EQ(mesh_device_->get_device(MeshCoordinate{1, 1})->id(), submesh->get_device(MeshCoordinate{0, 0})->id());
EXPECT_EQ(mesh_device_->get_device(MeshCoordinate{1, 2})->id(), submesh->get_device(MeshCoordinate{0, 1})->id());
EXPECT_EQ(submesh->get_device(1, 1), nullptr);
}

} // namespace
} // namespace tt::tt_metal::distributed
Loading

0 comments on commit b5a2167

Please sign in to comment.