diff --git a/.github/workflows/build-and-run-all-tests.yml b/.github/workflows/build-and-run-all-tests.yml index 3ad4e86c..f435f5d4 100644 --- a/.github/workflows/build-and-run-all-tests.yml +++ b/.github/workflows/build-and-run-all-tests.yml @@ -14,7 +14,6 @@ jobs: fail-fast: false matrix: test-group: [ - {arch: grayskull}, {arch: wormhole_b0}, {arch: blackhole}, ] @@ -35,9 +34,6 @@ jobs: fail-fast: false matrix: test-group: [ - {arch: grayskull, card: e75, timeout: 10}, - {arch: grayskull, card: e150, timeout: 10}, - {arch: grayskull, card: e300, timeout: 10}, {arch: wormhole_b0, card: n150, timeout: 5}, {arch: wormhole_b0, card: n300, timeout: 15}, {arch: blackhole, card: p150, timeout: 15}, diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml index b7bfa2ee..1d7513d0 100644 --- a/.github/workflows/build-tests.yml +++ b/.github/workflows/build-tests.yml @@ -21,7 +21,6 @@ on: description: 'The architecture to build for' type: choice options: - - grayskull - wormhole_b0 - blackhole ubuntu-version: diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 4215dd51..500f1414 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -23,7 +23,6 @@ on: description: 'The architecture to build for' type: choice options: - - grayskull - wormhole_b0 - blackhole ubuntu-version: @@ -38,9 +37,6 @@ on: description: 'The card to run tests on' type: choice options: - - e75 - - e150 - - e300 - n150 - n300 timeout: diff --git a/.github/workflows/test-runner.yaml b/.github/workflows/test-runner.yaml index c871c773..f1c0ac6a 100644 --- a/.github/workflows/test-runner.yaml +++ b/.github/workflows/test-runner.yaml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - machine: [ubuntu-20.04, ubuntu-22.04, e75, e150, e300, n150, n300] + machine: [ubuntu-20.04, ubuntu-22.04, n150, n300] name: Check runner runs-on: ${{ matrix.machine }} @@ -64,7 +64,7 @@ jobs: strategy: fail-fast: false matrix: - machine: [ubuntu-20.04, ubuntu-22.04, e75, e150, e300, n150, n300] + machine: [ubuntu-20.04, ubuntu-22.04, n150, n300] image: [tt-umd-ci-ubuntu-22.04, tt-umd-ci-ubuntu-20.04] name: Check runner docker diff --git a/README.md b/README.md index 9325f773..955dc47c 100644 --- a/README.md +++ b/README.md @@ -163,3 +163,7 @@ You can also manually auto format the whole repo using mentioned pre-commit: ```bash pre-commit run --all-files ``` + +# Grayskull End of Life + +Grayskull is no longer actively supported by Tenstorrent. [Last UMD commit](https://github.com/tenstorrent/tt-umd/commit/a5b4719b7d44f0c7c953542803faf6851574329a) supporting Grayskull. \ No newline at end of file diff --git a/tests/api/CMakeLists.txt b/tests/api/CMakeLists.txt index ffc15872..7a25f273 100644 --- a/tests/api/CMakeLists.txt +++ b/tests/api/CMakeLists.txt @@ -2,7 +2,6 @@ set(API_TESTS_SRCS test_chip.cpp test_cluster_descriptor.cpp test_cluster.cpp - test_core_coord_translation_gs.cpp test_core_coord_translation_wh.cpp test_core_coord_translation_bh.cpp test_mockup_device.cpp diff --git a/tests/api/test_core_coord_translation_gs.cpp b/tests/api/test_core_coord_translation_gs.cpp deleted file mode 100644 index 944501fa..00000000 --- a/tests/api/test_core_coord_translation_gs.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/* - * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. - * - * SPDX-License-Identifier: Apache-2.0 - */ -#include "gtest/gtest.h" -#include "umd/device/coordinate_manager.h" -#include "umd/device/grayskull_implementation.h" - -using namespace tt::umd; - -// Tests that all physical coordinates are same as all virtual coordinates -// when there is no harvesting. -TEST(CoordinateManager, CoordinateManagerGrayskullNoHarvesting) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false); - - // We expect full grid size since there is no harvesting. - tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE; - for (size_t x = 0; x < tensix_grid_size.x; x++) { - for (size_t y = 0; y < tensix_grid_size.y; y++) { - CoreCoord logical_coords = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL); - CoreCoord virtual_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL); - CoreCoord physical_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL); - - // Virtual and physical coordinates should be the same. - EXPECT_EQ(physical_coords.x, virtual_coords.x); - EXPECT_EQ(physical_coords.y, virtual_coords.y); - } - } -} - -// Test basic translation to virtual and physical noc coordinates. -// We expect that the top left core will have virtual and physical coordinates (1, 1) and (1, 2) for -// the logical coordinates if the first row is harvested. -TEST(CoordinateManager, CoordinateManagerGrayskullTopLeftCore) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false); - - CoreCoord logical_coords = CoreCoord(0, 0, CoreType::TENSIX, CoordSystem::LOGICAL); - - // Always expect same virtual coordinate for (0, 0) logical coordinate. - CoreCoord virtual_cords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL); - EXPECT_EQ(virtual_cords, CoreCoord(1, 1, CoreType::TENSIX, CoordSystem::VIRTUAL)); - - // This depends on harvesting mask. So expected physical coord is specific to this test and Wormhole arch. - CoreCoord physical_cords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL); - EXPECT_EQ(physical_cords, CoreCoord(1, 1, CoreType::TENSIX, CoordSystem::PHYSICAL)); -} - -// Test basic translation to virtual and physical noc coordinates with harvesting. -// We expect that the top left core will have virtual and physical coordinates (1, 1) and (1, 2) for -// the logical coordinates if the first row is harvested. -TEST(CoordinateManager, CoordinateManagerGrayskullTopLeftCoreHarvesting) { - // This is targeting first row of Tensix cores on NOC layout. - const size_t tensix_harvesting_mask = (1 << 0); - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask}); - - CoreCoord logical_coords = CoreCoord(0, 0, CoreType::TENSIX, CoordSystem::LOGICAL); - - // Always expect same virtual coordinate for (0, 0) logical coordinate. - CoreCoord virtual_cords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL); - EXPECT_EQ(virtual_cords, CoreCoord(1, 1, CoreType::TENSIX, CoordSystem::VIRTUAL)); - - // This depends on harvesting mask. So expected physical coord is specific to this test and Wormhole arch. - CoreCoord physical_cords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL); - EXPECT_EQ(physical_cords, CoreCoord(1, 2, CoreType::TENSIX, CoordSystem::PHYSICAL)); -} - -// Test logical to physical, virtual and translated coordinates. -// We always expect that physical, virtual and translated coordinates are the same. -TEST(CoordinateManager, CoordinateManagerGrayskullTranslatingCoords) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false); - tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE; - - for (size_t x = 0; x < tensix_grid_size.x; x++) { - for (size_t y = 0; y < tensix_grid_size.y; y++) { - CoreCoord logical_coords = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL); - CoreCoord virtual_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL); - CoreCoord physical_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL); - CoreCoord translated_coords = - coordinate_manager->translate_coord_to(logical_coords, CoordSystem::TRANSLATED); - - // Virtual, physical and translated coordinates should be the same. - EXPECT_EQ(physical_coords.x, virtual_coords.x); - EXPECT_EQ(physical_coords.y, virtual_coords.y); - - EXPECT_EQ(physical_coords.x, translated_coords.x); - EXPECT_EQ(physical_coords.y, translated_coords.y); - } - } -} - -// Test logical to physical coordinate translation. -// For the full grid of logical coordinates we expect that there are no duplicates of physical coordinates. -// For the reverse mapping back of physical to logical coordinates we expect that same logical coordinates are returned -// as from original mapping. -TEST(CoordinateManager, CoordinateManagerGrayskullLogicalPhysicalMapping) { - const size_t max_num_harvested_y = 10; - const tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE; - - for (size_t tensix_harvesting_mask = 0; tensix_harvesting_mask < (1 << max_num_harvested_y); - tensix_harvesting_mask++) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask}); - - std::map logical_to_physical; - std::set physical_coords_set; - - size_t num_harvested_y = CoordinateManager::get_num_harvested(tensix_harvesting_mask); - - for (size_t x = 0; x < tensix_grid_size.x; x++) { - for (size_t y = 0; y < tensix_grid_size.y - num_harvested_y; y++) { - CoreCoord logical_coords = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL); - CoreCoord physical_coords = - coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL); - logical_to_physical[logical_coords] = physical_coords; - - // Expect that logical to physical translation is 1-1 mapping. No duplicates for physical coordinates. - EXPECT_EQ(physical_coords_set.count(physical_coords), 0); - physical_coords_set.insert(physical_coords); - } - } - - // Expect that the number of physical coordinates is equal to the number of workers minus the number of - // harvested rows. - EXPECT_EQ(physical_coords_set.size(), tensix_grid_size.x * (tensix_grid_size.y - num_harvested_y)); - - for (auto it : logical_to_physical) { - CoreCoord physical_coords = it.second; - CoreCoord logical_coords = coordinate_manager->translate_coord_to(physical_coords, CoordSystem::LOGICAL); - - // Expect that reverse mapping of physical coordinates gives the same logical coordinates - // using which we got the physical coordinates. - EXPECT_EQ(it.first, logical_coords); - } - } -} - -// Test logical to virtual coordinate translation. -// For the full grid of logical coordinates we expect that there are no duplicates of virtual coordinates. -// For the reverse mapping back of virtual to logical coordinates we expect that same logical coordinates are returned -// as from original mapping. -TEST(CoordinateManager, CoordinateManagerGrayskullLogicalVirtualMapping) { - const size_t max_num_harvested_y = 10; - const tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE; - - for (size_t tensix_harvesting_mask = 0; tensix_harvesting_mask < (1 << max_num_harvested_y); - tensix_harvesting_mask++) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask}); - - std::map logical_to_virtual; - std::set virtual_coords_set; - - size_t num_harvested_y = CoordinateManager::get_num_harvested(tensix_harvesting_mask); - - for (size_t x = 0; x < tensix_grid_size.x; x++) { - for (size_t y = 0; y < tensix_grid_size.y - num_harvested_y; y++) { - CoreCoord logical_coords = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL); - CoreCoord virtual_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL); - logical_to_virtual[logical_coords] = virtual_coords; - - // Expect that logical to virtual translation is 1-1 mapping. No duplicates for virtual coordinates. - EXPECT_EQ(virtual_coords_set.count(virtual_coords), 0); - virtual_coords_set.insert(virtual_coords); - } - } - - for (auto it : logical_to_virtual) { - CoreCoord virtual_coords = it.second; - CoreCoord logical_coords = coordinate_manager->translate_coord_to(virtual_coords, CoordSystem::LOGICAL); - - // Expect that reverse mapping of virtual coordinates gives the same logical coordinates - // using which we got the virtual coordinates. - EXPECT_EQ(it.first, logical_coords); - } - } -} - -// Test that harvested physical coordinates map to the last row of the virtual coordinates. -TEST(CoordinateManager, CoordinateManagerGrayskullPhysicalHarvestedMapping) { - // Harvest first and second NOC layout row. - const size_t tensix_harvesting_mask = (1 << 0) | (1 << 1); - const size_t num_harvested = CoordinateManager::get_num_harvested(tensix_harvesting_mask); - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask}); - - const std::vector tensix_cores = tt::umd::grayskull::TENSIX_CORES; - const tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE; - - size_t virtual_index = (tensix_grid_size.y - num_harvested) * tensix_grid_size.x; - - for (size_t index = 0; index < num_harvested * tensix_grid_size.x; index++) { - const CoreCoord physical_core = - CoreCoord(tensix_cores[index].x, tensix_cores[index].y, CoreType::TENSIX, CoordSystem::PHYSICAL); - const CoreCoord virtual_core = coordinate_manager->translate_coord_to(physical_core, CoordSystem::VIRTUAL); - - EXPECT_EQ(virtual_core.x, tensix_cores[virtual_index].x); - EXPECT_EQ(virtual_core.y, tensix_cores[virtual_index].y); - - virtual_index++; - } -} - -// Test that harvested physical coordinates map to the last row of the virtual coordinates. -TEST(CoordinateManager, CoordinateManagerGrayskullPhysicalTranslatedHarvestedMapping) { - // Harvest first and second NOC layout row. - const size_t tensix_harvesting_mask = (1 << 0) | (1 << 1); - const size_t num_harvested = CoordinateManager::get_num_harvested(tensix_harvesting_mask); - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask}); - - const std::vector tensix_cores = tt::umd::grayskull::TENSIX_CORES; - const tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE; - - size_t virtual_index = (tensix_grid_size.y - num_harvested) * tensix_grid_size.x; - - for (size_t index = 0; index < num_harvested * tensix_grid_size.x; index++) { - const CoreCoord physical_core = - CoreCoord(tensix_cores[index].x, tensix_cores[index].y, CoreType::TENSIX, CoordSystem::PHYSICAL); - const CoreCoord translated_core = - coordinate_manager->translate_coord_to(physical_core, CoordSystem::TRANSLATED); - - const CoreCoord virtual_core = CoreCoord( - tensix_cores[virtual_index].x, tensix_cores[virtual_index].y, CoreType::TENSIX, CoordSystem::VIRTUAL); - const CoreCoord translated_core_from_virtual = - coordinate_manager->translate_coord_to(virtual_core, CoordSystem::TRANSLATED); - - EXPECT_EQ(translated_core, translated_core_from_virtual); - - EXPECT_EQ(physical_core.x, translated_core.x); - EXPECT_EQ(physical_core.y, translated_core.y); - - virtual_index++; - } -} - -// Test mapping of DRAM coordinates from logical to physical. We have no DRAM harvesting on Grayskull, -// so logical coordinates should cover all physical coordinates. -TEST(CoordinateManager, CoordinateManagerGrayskullDRAMNoHarvesting) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false); - - const size_t num_dram_banks = tt::umd::grayskull::NUM_DRAM_BANKS; - const std::vector& dram_cores = tt::umd::grayskull::DRAM_CORES; - - for (size_t dram_bank = 0; dram_bank < num_dram_banks; dram_bank++) { - const CoreCoord dram_logical(dram_bank, 0, CoreType::DRAM, CoordSystem::LOGICAL); - const CoreCoord expected_physical = - CoreCoord(dram_cores[dram_bank].x, dram_cores[dram_bank].y, CoreType::DRAM, CoordSystem::PHYSICAL); - - const CoreCoord dram_physical = coordinate_manager->translate_coord_to(dram_logical, CoordSystem::PHYSICAL); - - EXPECT_EQ(dram_physical, expected_physical); - } -} - -// Test that virtual, physical and translated coordinates are the same for all logical PCIE coordinates. -TEST(CoordinateManager, CoordinateManagerGrayskullPCIETranslation) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false); - const tt_xy_pair pcie_grid_size = tt::umd::grayskull::PCIE_GRID_SIZE; - - for (size_t x = 0; x < pcie_grid_size.x; x++) { - for (size_t y = 0; y < pcie_grid_size.y; y++) { - const CoreCoord pcie_logical = CoreCoord(x, y, CoreType::PCIE, CoordSystem::LOGICAL); - const CoreCoord pcie_virtual = coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::VIRTUAL); - const CoreCoord pcie_physical = coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::PHYSICAL); - const CoreCoord pcie_translated = - coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::TRANSLATED); - - EXPECT_EQ(pcie_virtual.x, pcie_physical.x); - EXPECT_EQ(pcie_virtual.y, pcie_physical.y); - - EXPECT_EQ(pcie_physical.x, pcie_translated.x); - EXPECT_EQ(pcie_physical.y, pcie_translated.y); - } - } -} - -// Test that virtual, physical and translated coordinates are the same for all logical ARC coordinates. -TEST(CoordinateManager, CoordinateManagerGrayskullARCTranslation) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false); - const tt_xy_pair arc_grid_size = tt::umd::grayskull::ARC_GRID_SIZE; - - for (size_t x = 0; x < arc_grid_size.x; x++) { - for (size_t y = 0; y < arc_grid_size.y; y++) { - const CoreCoord arc_logical = CoreCoord(x, y, CoreType::ARC, CoordSystem::LOGICAL); - const CoreCoord arc_virtual = coordinate_manager->translate_coord_to(arc_logical, CoordSystem::VIRTUAL); - const CoreCoord arc_physical = coordinate_manager->translate_coord_to(arc_logical, CoordSystem::PHYSICAL); - const CoreCoord arc_translated = - coordinate_manager->translate_coord_to(arc_logical, CoordSystem::TRANSLATED); - - EXPECT_EQ(arc_virtual.x, arc_physical.x); - EXPECT_EQ(arc_virtual.y, arc_physical.y); - - EXPECT_EQ(arc_physical.x, arc_translated.x); - EXPECT_EQ(arc_physical.y, arc_translated.y); - } - } -} - -// Test that we assert properly if DRAM harvesting mask is non-zero for Grayskull. -TEST(CoordinateManager, CoordinateManagerGrayskullDRAMHarvestingAssert) { - EXPECT_THROW(CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {0, 1}), std::runtime_error); -} - -// Test that we assert properly if ETH harvesting mask is non-zero for Grayskull. -TEST(CoordinateManager, CoordinateManagerGrayskullETHHarvestingAssert) { - EXPECT_THROW( - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {0, 0, 1}), std::runtime_error); -} - -// Test that we properly get harvesting mask that is based on the physical layout of the chip. -TEST(CoordinateManager, CoordinateManagerGrayskullPhysicalLayoutTensixHarvestingMask) { - const size_t max_num_harvested_y = 10; - - for (size_t tensix_harvesting_mask = 0; tensix_harvesting_mask < (1 << max_num_harvested_y); - tensix_harvesting_mask++) { - const HarvestingMasks harvesting_masks = {.tensix_harvesting_mask = tensix_harvesting_mask}; - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, harvesting_masks); - - EXPECT_EQ(coordinate_manager->get_harvesting_masks().tensix_harvesting_mask, tensix_harvesting_mask); - } -} - -// Test whether we properly shuffle the harvesting mask based on the physical layout of the chip. -TEST(CoordinateManager, CoordinateManagerGrayskullHarvestingShuffle) { - for (size_t i = 0; i < tt::umd::grayskull::LOGICAL_HARVESTING_LAYOUT.size(); i++) { - const size_t tensix_harvesting_mask_physical_layout = (1 << tt::umd::grayskull::LOGICAL_HARVESTING_LAYOUT[i]); - const size_t tensix_harvesting_mask = CoordinateManager::shuffle_tensix_harvesting_mask( - tt::ARCH::GRAYSKULL, tensix_harvesting_mask_physical_layout); - - EXPECT_EQ(tensix_harvesting_mask, 1 << i); - } -} - -TEST(CoordinateManager, CoordinateManagerGrayskullTranslationWithoutCoreType) { - std::shared_ptr coordinate_manager = - CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false); - - EXPECT_EQ( - coordinate_manager->translate_coord_to({0, 0}, CoordSystem::PHYSICAL, CoordSystem::PHYSICAL).core_type, - CoreType::ROUTER_ONLY); - EXPECT_EQ( - coordinate_manager->translate_coord_to({0, 0}, CoordSystem::VIRTUAL, CoordSystem::PHYSICAL).core_type, - CoreType::ROUTER_ONLY); - EXPECT_EQ( - coordinate_manager->translate_coord_to({2, 2}, CoordSystem::PHYSICAL, CoordSystem::PHYSICAL).core_type, - CoreType::TENSIX); - // Not allowed for logical coord system. - EXPECT_THROW( - coordinate_manager->translate_coord_to({0, 0}, CoordSystem::LOGICAL, CoordSystem::PHYSICAL), - std::runtime_error); - // Throws if nothing is located at this coordinate. - EXPECT_THROW( - coordinate_manager->translate_coord_to({100, 100}, CoordSystem::PHYSICAL, CoordSystem::PHYSICAL), - std::runtime_error); -} diff --git a/tests/grayskull/CMakeLists.txt b/tests/grayskull/CMakeLists.txt deleted file mode 100644 index 5231b27d..00000000 --- a/tests/grayskull/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -set(UNIT_TESTS_GS_SRCS test_cluster_gs.cpp) - -add_executable(unit_tests_grayskull ${UNIT_TESTS_GS_SRCS}) -target_link_libraries(unit_tests_grayskull PRIVATE test_common) -set_target_properties( - unit_tests_grayskull - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY - ${CMAKE_BINARY_DIR}/test/umd/grayskull - OUTPUT_NAME - unit_tests -) - -add_custom_target(umd_unit_tests DEPENDS unit_tests_grayskull) diff --git a/tests/grayskull/test_cluster_gs.cpp b/tests/grayskull/test_cluster_gs.cpp deleted file mode 100644 index d819da64..00000000 --- a/tests/grayskull/test_cluster_gs.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#include - -#include "gtest/gtest.h" -#include "l1_address_map.h" -#include "tests/test_utils/device_test_utils.hpp" -#include "tests/test_utils/generate_cluster_desc.hpp" -#include "umd/device/cluster.h" -#include "umd/device/grayskull_implementation.h" -#include "umd/device/tt_cluster_descriptor.h" -#include "umd/device/tt_soc_descriptor.h" - -using namespace tt::umd; - -constexpr std::uint32_t DRAM_BARRIER_BASE = 0; - -static void set_barrier_params(Cluster& cluster) { - // Populate address map and NOC parameters that the driver needs for memory barriers. - // Grayskull doesn't have ETH, so we don't need to populate the ETH barrier address. - cluster.set_barrier_address_params({l1_mem::address_map::L1_BARRIER_BASE, 0u, DRAM_BARRIER_BASE}); -} - -TEST(SiliconDriverGS, CreateDestroySequential) { - std::set target_devices = {0}; - uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_device_params default_params; - for (int i = 0; i < 100; i++) { - Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true); - cluster.start_device(default_params); - cluster.close_device(); - } -} - -TEST(SiliconDriverGS, CreateMultipleInstance) { - std::set target_devices = {0}; - uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_device_params default_params; - default_params.init_device = false; - std::unordered_map concurrent_devices = {}; - for (int i = 0; i < 100; i++) { - concurrent_devices.insert({i, new Cluster(num_host_mem_ch_per_mmio_device, false, true)}); - concurrent_devices.at(i)->start_device(default_params); - } - - for (auto& cluster : concurrent_devices) { - cluster.second->close_device(); - delete cluster.second; - } -} - -TEST(SiliconDriverGS, Harvesting) { - std::unordered_map simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}}; - uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); - - for (const auto& chip_id : cluster.get_target_device_ids()) { - auto soc_desc = cluster.get_soc_descriptor(chip_id); - ASSERT_NE(soc_desc.get_harvested_grid_size(CoreType::TENSIX), tt_xy_pair(0, 0)) - << "Expected Driver to have performed harvesting"; - ASSERT_LE(soc_desc.get_cores(CoreType::TENSIX).size(), 96) - << "Expected SOC descriptor with harvesting to have less than or equal to 96 workers for chip " << chip_id; - - // harvesting info stored in soc descriptor is in logical coordinates. - ASSERT_EQ( - soc_desc.harvesting_masks.tensix_harvesting_mask & - simulated_harvesting_masks.at(chip_id).tensix_harvesting_mask, - simulated_harvesting_masks.at(chip_id).tensix_harvesting_mask) - << "Expected first chip to include simulated harvesting mask of 6"; - } - cluster.close_device(); -} - -TEST(SiliconDriverGS, CustomSocDesc) { - std::set target_devices = {0}; - std::unordered_map simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}}; - uint32_t num_host_mem_ch_per_mmio_device = 1; - // Initialize the driver with a 1x1 descriptor and explicitly do not perform harvesting - Cluster cluster = Cluster( - test_utils::GetAbsPath("./tests/soc_descs/grayskull_1x1_arch.yaml"), - target_devices, - num_host_mem_ch_per_mmio_device, - false, - true, - false, - simulated_harvesting_masks); - for (const auto& chip_id : cluster.get_target_device_ids()) { - auto soc_desc = cluster.get_soc_descriptor(chip_id); - ASSERT_NE(soc_desc.get_harvested_grid_size(CoreType::TENSIX), tt_xy_pair(0, 0)) - << "SOC descriptors should not be modified when harvesting is disabled"; - ASSERT_EQ(soc_desc.get_cores(CoreType::TENSIX).size(), 1) - << "Expected 1x1 SOC descriptor to be unmodified by driver"; - } -} - -TEST(SiliconDriverGS, HarvestingRuntime) { - auto get_static_tlb_index = [](tt_xy_pair target) { - int flat_index = target.y * tt::umd::grayskull::GRID_SIZE_X + target.x; - if (flat_index == 0) { - return -1; - } - return flat_index; - }; - - std::set target_devices = {0}; - std::unordered_map simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}}; - uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); - - for (int i = 0; i < target_devices.size(); i++) { - // Iterate over devices and only setup static TLBs for functional worker cores - auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { - // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. - cluster.configure_tlb(i, core, get_static_tlb_index(core), l1_mem::address_map::DATA_BUFFER_SPACE_BASE); - } - } - - tt_device_params default_params; - cluster.start_device(default_params); - - std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector dynamic_tlb_vector_to_write = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; - std::vector dynamic_readback_vec = {}; - std::vector readback_vec = {}; - std::vector zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - float timeout_in_seconds = 10; - // Check functionality of Static TLBs by reading adn writing from statically mapped address space - for (int i = 0; i < target_devices.size(); i++) { - std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; - std::uint32_t dynamic_write_address = 0x30000000; - for (int loop = 0; loop < 100; - loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { - cluster.write_to_device( - vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), i, core, address, ""); - cluster.write_to_device( - vector_to_write.data(), - vector_to_write.size() * sizeof(std::uint32_t), - i, - core, - dynamic_write_address, - "SMALL_READ_WRITE_TLB"); - auto start_time = std::chrono::high_resolution_clock::now(); - while (!(vector_to_write == readback_vec)) { - float wait_duration = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start_time) - .count(); - if (wait_duration > timeout_in_seconds) { - break; - } - test_utils::read_data_from_device(cluster, readback_vec, i, core, address, 40, ""); - test_utils::read_data_from_device( - cluster, dynamic_readback_vec, i, core, dynamic_write_address, 40, "SMALL_READ_WRITE_TLB"); - } - ASSERT_EQ(vector_to_write, readback_vec) - << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; - cluster.write_to_device( - zeros.data(), - zeros.size() * sizeof(std::uint32_t), - i, - core, - address, - "SMALL_READ_WRITE_TLB"); // Clear any written data - cluster.write_to_device( - zeros.data(), - zeros.size() * sizeof(std::uint32_t), - i, - core, - dynamic_write_address, - "SMALL_READ_WRITE_TLB"); // Clear any written data - readback_vec = {}; - dynamic_readback_vec = {}; - } - address += 0x20; // Increment by uint32_t size for each write - dynamic_write_address += 0x20; - } - } - cluster.close_device(); -} - -TEST(SiliconDriverGS, StaticTLB_RW) { - auto get_static_tlb_index = [](tt_xy_pair target) { - int flat_index = target.y * tt::umd::grayskull::GRID_SIZE_X + target.x; - if (flat_index == 0) { - return -1; - } - return flat_index; - }; - std::set target_devices = {0}; - - uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true); - for (int i = 0; i < target_devices.size(); i++) { - // Iterate over devices and only setup static TLBs for worker cores - auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { - // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. - cluster.configure_tlb( - i, core, get_static_tlb_index(core), l1_mem::address_map::DATA_BUFFER_SPACE_BASE, TLB_DATA::Posted); - } - } - - tt_device_params default_params; - cluster.start_device(default_params); - - std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector readback_vec = {}; - std::vector zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - float timeout_in_seconds = 10; - // Check functionality of Static TLBs by reading adn writing from statically mapped address space - for (int i = 0; i < target_devices.size(); i++) { - std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; - for (int loop = 0; loop < 100; - loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { - cluster.write_to_device( - vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), i, core, address, ""); - auto start_time = std::chrono::high_resolution_clock::now(); - while (!(vector_to_write == readback_vec)) { - float wait_duration = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start_time) - .count(); - if (wait_duration > timeout_in_seconds) { - break; - } - test_utils::read_data_from_device(cluster, readback_vec, i, core, address, 40, ""); - } - ASSERT_EQ(vector_to_write, readback_vec) - << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; - cluster.write_to_device( - zeros.data(), - zeros.size() * sizeof(std::uint32_t), - i, - core, - address, - "SMALL_READ_WRITE_TLB"); // Clear any written data - readback_vec = {}; - } - address += 0x20; // Increment by uint32_t size for each write - } - } - cluster.close_device(); -} - -TEST(SiliconDriverGS, DynamicTLB_RW) { - // Don't use any static TLBs in this test. All writes go through a dynamic TLB that needs to be reconfigured for - // each transaction - std::set target_devices = {0}; - - uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true); - cluster.set_fallback_tlb_ordering_mode( - "SMALL_READ_WRITE_TLB", TLB_DATA::Posted); // Explicitly test API to set fallback tlb ordering mode - tt_device_params default_params; - cluster.start_device(default_params); - - std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - std::vector readback_vec = {}; - float timeout_in_seconds = 10; - - for (int i = 0; i < target_devices.size(); i++) { - std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; - for (int loop = 0; loop < 100; - loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { - cluster.write_to_device( - vector_to_write.data(), - vector_to_write.size() * sizeof(std::uint32_t), - i, - core, - address, - "SMALL_READ_WRITE_TLB"); - auto start_time = std::chrono::high_resolution_clock::now(); - while (!(vector_to_write == readback_vec)) { - float wait_duration = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start_time) - .count(); - if (wait_duration > timeout_in_seconds) { - break; - } - test_utils::read_data_from_device( - cluster, readback_vec, tt_cxy_pair(i, core), address, 40, "SMALL_READ_WRITE_TLB"); - } - - ASSERT_EQ(vector_to_write, readback_vec) - << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; - cluster.write_to_device( - zeros.data(), - zeros.size() * sizeof(std::uint32_t), - i, - core, - address, - "SMALL_READ_WRITE_TLB"); // Clear any written data - readback_vec = {}; - } - address += 0x20; // Increment by uint32_t size for each write - } - } - cluster.close_device(); -} - -TEST(SiliconDriverGS, MultiThreadedDevice) { - // Have 2 threads read and write from a single device concurrently - // All transactions go through a single Dynamic TLB. We want to make sure this is thread/process safe - - std::set target_devices = {0}; - - uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true); - - tt_device_params default_params; - cluster.start_device(default_params); - - std::thread th1 = std::thread([&] { - std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector readback_vec = {}; - float timeout_in_seconds = 10; - std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; - for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { - cluster.write_to_device( - vector_to_write.data(), - vector_to_write.size() * sizeof(std::uint32_t), - 0, - core, - address, - "SMALL_READ_WRITE_TLB"); - auto start_time = std::chrono::high_resolution_clock::now(); - while (!(vector_to_write == readback_vec)) { - float wait_duration = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start_time) - .count(); - if (wait_duration > timeout_in_seconds) { - break; - } - test_utils::read_data_from_device( - cluster, readback_vec, 0, core, address, 40, "SMALL_READ_WRITE_TLB"); - } - ASSERT_EQ(vector_to_write, readback_vec) - << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; - readback_vec = {}; - } - address += 0x20; - } - }); - - std::thread th2 = std::thread([&] { - std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector readback_vec = {}; - float timeout_in_seconds = 10; - std::uint32_t address = 0x30000000; - for (auto& core_ls : cluster.get_soc_descriptor(0).get_dram_cores()) { - for (int loop = 0; loop < 100; loop++) { - for (auto& core : core_ls) { - cluster.write_to_device( - vector_to_write.data(), - vector_to_write.size() * sizeof(std::uint32_t), - 0, - core, - address, - "SMALL_READ_WRITE_TLB"); - auto start_time = std::chrono::high_resolution_clock::now(); - while (!(vector_to_write == readback_vec)) { - float wait_duration = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start_time) - .count(); - if (wait_duration > timeout_in_seconds) { - break; - } - test_utils::read_data_from_device( - cluster, readback_vec, 0, core, address, 40, "SMALL_READ_WRITE_TLB"); - } - ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y - << "does not match what was written"; - readback_vec = {}; - } - address += 0x20; - } - } - }); - - th1.join(); - th2.join(); - cluster.close_device(); -} - -TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run - // Have 2 threads read and write from a single device concurrently - // All (fairly large) transactions go through a static TLB. - // We want to make sure the memory barrier is thread/process safe. - - // Memory barrier flags get sent to address 0 for all channels in this test - - auto get_static_tlb_index = [](tt_xy_pair target) { - int flat_index = target.y * tt::umd::grayskull::GRID_SIZE_X + target.x; - if (flat_index == 0) { - return -1; - } - return flat_index; - }; - - std::set target_devices = {0}; - uint32_t base_addr = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; - uint32_t num_host_mem_ch_per_mmio_device = 1; - - Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true); - - for (int i = 0; i < target_devices.size(); i++) { - // Iterate over devices and only setup static TLBs for functional worker cores - auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { - // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. - cluster.configure_tlb(i, core, get_static_tlb_index(core), base_addr); - } - } - - tt_device_params default_params; - cluster.start_device(default_params); - std::vector readback_membar_vec = {}; - for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { - test_utils::read_data_from_device( - cluster, readback_membar_vec, 0, core, l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); - ASSERT_EQ( - readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all workers - readback_membar_vec = {}; - } - - for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { - test_utils::read_data_from_device( - cluster, readback_membar_vec, 0, core, l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); - ASSERT_EQ( - readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all workers - readback_membar_vec = {}; - } - - for (int chan = 0; chan < cluster.get_soc_descriptor(0).get_num_dram_channels(); chan++) { - auto core = cluster.get_soc_descriptor(0).get_dram_core_for_channel(chan, 0); - test_utils::read_data_from_device( - cluster, readback_membar_vec, 0, core, DRAM_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); - ASSERT_EQ( - readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all DRAM - readback_membar_vec = {}; - } - // Launch 2 thread accessing different locations of L1 and using memory barrier between write and read - // Ensure now RAW race and membars are thread safe - std::vector vec1(25600); - std::vector vec2(25600); - std::vector zeros(25600, 0); - - for (int i = 0; i < vec1.size(); i++) { - vec1.at(i) = i; - } - for (int i = 0; i < vec2.size(); i++) { - vec2.at(i) = vec1.size() + i; - } - - std::thread th1 = std::thread([&] { - std::uint32_t address = base_addr; - for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { - std::vector readback_vec = {}; - cluster.write_to_device(vec1.data(), vec1.size() * sizeof(std::uint32_t), 0, core, address, ""); - cluster.l1_membar(0, "", {core}); - test_utils::read_data_from_device(cluster, readback_vec, 0, core, address, 4 * vec1.size(), ""); - ASSERT_EQ(readback_vec, vec1); - cluster.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), 0, core, address, ""); - readback_vec = {}; - } - } - }); - - std::thread th2 = std::thread([&] { - std::uint32_t address = base_addr + vec1.size() * 4; - for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { - std::vector readback_vec = {}; - cluster.write_to_device(vec2.data(), vec2.size() * sizeof(std::uint32_t), 0, core, address, ""); - cluster.l1_membar(0, "", {core}); - test_utils::read_data_from_device(cluster, readback_vec, 0, core, address, 4 * vec2.size(), ""); - ASSERT_EQ(readback_vec, vec2); - cluster.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), 0, core, address, ""); - readback_vec = {}; - } - } - }); - - th1.join(); - th2.join(); - - for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { - test_utils::read_data_from_device( - cluster, readback_membar_vec, 0, core, l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); - ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in correct sate workers - readback_membar_vec = {}; - } - - cluster.close_device(); -} - -/** - * Copied from Wormhole unit tests. - */ -TEST(SiliconDriverGS, SysmemTestWithPcie) { - Cluster cluster( - test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), - {0}, - 1, // one "host memory channel", currently a 1G huge page - false, // skip driver allocs - no (don't skip) - true, // clean system resources - yes - true); // perform harvesting - yes - - cluster.start_device(tt_device_params{}); // no special parameters - - const chip_id_t mmio_chip_id = 0; - const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).get_cores(CoreType::PCIE).at(0); - const size_t test_size_bytes = 0x4000; // Arbitrarilly chosen, but small size so the test runs quickly. - - // PCIe core is at (x=0, y=4) on Grayskull NOC0. - ASSERT_EQ(PCIE.x, 0); - ASSERT_EQ(PCIE.y, 4); - - // Bad API: how big is the buffer? How do we know it's big enough? - // Situation today is that there's a 1G hugepage behind it, although this is - // unclear from the API and may change in the future. - uint8_t* sysmem = (uint8_t*)cluster.host_dma_address(0, 0, 0); - ASSERT_NE(sysmem, nullptr); - - uint64_t base_address = cluster.get_pcie_base_addr_from_device(mmio_chip_id); - - // Buffer that we will use to read sysmem into, then write sysmem from. - std::vector buffer(test_size_bytes, 0x0); - - // Step 1: Fill sysmem with random bytes. - test_utils::fill_with_random_bytes(sysmem, test_size_bytes); - - // Step 2: Read sysmem into buffer. - cluster.read_from_device(&buffer[0], mmio_chip_id, PCIE, base_address, buffer.size(), "REG_TLB"); - - // Step 3: Verify that buffer matches sysmem. - ASSERT_EQ(buffer, std::vector(sysmem, sysmem + test_size_bytes)); - - // Step 4: Fill buffer with random bytes. - test_utils::fill_with_random_bytes(&buffer[0], test_size_bytes); - - // Step 5: Write buffer into sysmem, overwriting what was there. - cluster.write_to_device(&buffer[0], buffer.size(), mmio_chip_id, PCIE, base_address, "REG_TLB"); - - // Step 5b: Read back sysmem into a throwaway buffer. The intent is to - // ensure the write has completed before we check sysmem against buffer. - std::vector throwaway(test_size_bytes, 0x0); - cluster.read_from_device(&throwaway[0], mmio_chip_id, PCIE, base_address, throwaway.size(), "REG_TLB"); - - // Step 6: Verify that sysmem matches buffer. - ASSERT_EQ(buffer, std::vector(sysmem, sysmem + test_size_bytes)); -}