diff --git a/.github/workflows/build-and-run-all-tests.yml b/.github/workflows/build-and-run-all-tests.yml
index 3ad4e86c..f435f5d4 100644
--- a/.github/workflows/build-and-run-all-tests.yml
+++ b/.github/workflows/build-and-run-all-tests.yml
@@ -14,7 +14,6 @@ jobs:
       fail-fast: false
       matrix:
         test-group: [
-          {arch: grayskull},
           {arch: wormhole_b0},
           {arch: blackhole},
         ]
@@ -35,9 +34,6 @@ jobs:
       fail-fast: false
       matrix:
         test-group: [
-          {arch: grayskull, card: e75, timeout: 10},
-          {arch: grayskull, card: e150, timeout: 10},
-          {arch: grayskull, card: e300, timeout: 10},
           {arch: wormhole_b0, card: n150, timeout: 5},
           {arch: wormhole_b0, card: n300, timeout: 15},
           {arch: blackhole, card: p150, timeout: 15},
diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml
index b7bfa2ee..1d7513d0 100644
--- a/.github/workflows/build-tests.yml
+++ b/.github/workflows/build-tests.yml
@@ -21,7 +21,6 @@ on:
         description: 'The architecture to build for'
         type: choice
         options:
-          - grayskull
           - wormhole_b0
           - blackhole
       ubuntu-version:
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 4215dd51..500f1414 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -23,7 +23,6 @@ on:
         description: 'The architecture to build for'
         type: choice
         options:
-          - grayskull
           - wormhole_b0
           - blackhole
       ubuntu-version:
@@ -38,9 +37,6 @@ on:
         description: 'The card to run tests on'
         type: choice
         options:
-          - e75
-          - e150
-          - e300
           - n150
           - n300
       timeout:
diff --git a/.github/workflows/test-runner.yaml b/.github/workflows/test-runner.yaml
index c871c773..f1c0ac6a 100644
--- a/.github/workflows/test-runner.yaml
+++ b/.github/workflows/test-runner.yaml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        machine: [ubuntu-20.04, ubuntu-22.04, e75, e150, e300, n150, n300]
+        machine: [ubuntu-20.04, ubuntu-22.04, n150, n300]
 
     name: Check runner
     runs-on: ${{ matrix.machine }}
@@ -64,7 +64,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        machine: [ubuntu-20.04, ubuntu-22.04, e75, e150, e300, n150, n300]
+        machine: [ubuntu-20.04, ubuntu-22.04, n150, n300]
         image: [tt-umd-ci-ubuntu-22.04, tt-umd-ci-ubuntu-20.04]
 
     name: Check runner docker
diff --git a/README.md b/README.md
index 9325f773..955dc47c 100644
--- a/README.md
+++ b/README.md
@@ -163,3 +163,7 @@ You can also manually auto format the whole repo using mentioned pre-commit:
 ```bash
    pre-commit run --all-files
 ```
+
+# Grayskull End of Life
+
+Grayskull is no longer actively supported by Tenstorrent. [Last UMD commit](https://github.com/tenstorrent/tt-umd/commit/a5b4719b7d44f0c7c953542803faf6851574329a) supporting Grayskull.
\ No newline at end of file
diff --git a/tests/api/CMakeLists.txt b/tests/api/CMakeLists.txt
index ffc15872..7a25f273 100644
--- a/tests/api/CMakeLists.txt
+++ b/tests/api/CMakeLists.txt
@@ -2,7 +2,6 @@ set(API_TESTS_SRCS
     test_chip.cpp
     test_cluster_descriptor.cpp
     test_cluster.cpp
-    test_core_coord_translation_gs.cpp
     test_core_coord_translation_wh.cpp
     test_core_coord_translation_bh.cpp
     test_mockup_device.cpp
diff --git a/tests/api/test_core_coord_translation_gs.cpp b/tests/api/test_core_coord_translation_gs.cpp
deleted file mode 100644
index 944501fa..00000000
--- a/tests/api/test_core_coord_translation_gs.cpp
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc.
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-#include "gtest/gtest.h"
-#include "umd/device/coordinate_manager.h"
-#include "umd/device/grayskull_implementation.h"
-
-using namespace tt::umd;
-
-// Tests that all physical coordinates are same as all virtual coordinates
-// when there is no harvesting.
-TEST(CoordinateManager, CoordinateManagerGrayskullNoHarvesting) {
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false);
-
-    // We expect full grid size since there is no harvesting.
-    tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE;
-    for (size_t x = 0; x < tensix_grid_size.x; x++) {
-        for (size_t y = 0; y < tensix_grid_size.y; y++) {
-            CoreCoord logical_coords = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL);
-            CoreCoord virtual_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL);
-            CoreCoord physical_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL);
-
-            // Virtual and physical coordinates should be the same.
-            EXPECT_EQ(physical_coords.x, virtual_coords.x);
-            EXPECT_EQ(physical_coords.y, virtual_coords.y);
-        }
-    }
-}
-
-// Test basic translation to virtual and physical noc coordinates.
-// We expect that the top left core will have virtual and physical coordinates (1, 1) and (1, 2) for
-// the logical coordinates if the first row is harvested.
-TEST(CoordinateManager, CoordinateManagerGrayskullTopLeftCore) {
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false);
-
-    CoreCoord logical_coords = CoreCoord(0, 0, CoreType::TENSIX, CoordSystem::LOGICAL);
-
-    // Always expect same virtual coordinate for (0, 0) logical coordinate.
-    CoreCoord virtual_cords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL);
-    EXPECT_EQ(virtual_cords, CoreCoord(1, 1, CoreType::TENSIX, CoordSystem::VIRTUAL));
-
-    // This depends on harvesting mask. So expected physical coord is specific to this test and Wormhole arch.
-    CoreCoord physical_cords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL);
-    EXPECT_EQ(physical_cords, CoreCoord(1, 1, CoreType::TENSIX, CoordSystem::PHYSICAL));
-}
-
-// Test basic translation to virtual and physical noc coordinates with harvesting.
-// We expect that the top left core will have virtual and physical coordinates (1, 1) and (1, 2) for
-// the logical coordinates if the first row is harvested.
-TEST(CoordinateManager, CoordinateManagerGrayskullTopLeftCoreHarvesting) {
-    // This is targeting first row of Tensix cores on NOC layout.
-    const size_t tensix_harvesting_mask = (1 << 0);
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask});
-
-    CoreCoord logical_coords = CoreCoord(0, 0, CoreType::TENSIX, CoordSystem::LOGICAL);
-
-    // Always expect same virtual coordinate for (0, 0) logical coordinate.
-    CoreCoord virtual_cords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL);
-    EXPECT_EQ(virtual_cords, CoreCoord(1, 1, CoreType::TENSIX, CoordSystem::VIRTUAL));
-
-    // This depends on harvesting mask. So expected physical coord is specific to this test and Wormhole arch.
-    CoreCoord physical_cords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL);
-    EXPECT_EQ(physical_cords, CoreCoord(1, 2, CoreType::TENSIX, CoordSystem::PHYSICAL));
-}
-
-// Test logical to physical, virtual and translated coordinates.
-// We always expect that physical, virtual and translated coordinates are the same.
-TEST(CoordinateManager, CoordinateManagerGrayskullTranslatingCoords) {
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false);
-    tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE;
-
-    for (size_t x = 0; x < tensix_grid_size.x; x++) {
-        for (size_t y = 0; y < tensix_grid_size.y; y++) {
-            CoreCoord logical_coords = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL);
-            CoreCoord virtual_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL);
-            CoreCoord physical_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL);
-            CoreCoord translated_coords =
-                coordinate_manager->translate_coord_to(logical_coords, CoordSystem::TRANSLATED);
-
-            // Virtual, physical and translated coordinates should be the same.
-            EXPECT_EQ(physical_coords.x, virtual_coords.x);
-            EXPECT_EQ(physical_coords.y, virtual_coords.y);
-
-            EXPECT_EQ(physical_coords.x, translated_coords.x);
-            EXPECT_EQ(physical_coords.y, translated_coords.y);
-        }
-    }
-}
-
-// Test logical to physical coordinate translation.
-// For the full grid of logical coordinates we expect that there are no duplicates of physical coordinates.
-// For the reverse mapping back of physical to logical coordinates we expect that same logical coordinates are returned
-// as from original mapping.
-TEST(CoordinateManager, CoordinateManagerGrayskullLogicalPhysicalMapping) {
-    const size_t max_num_harvested_y = 10;
-    const tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE;
-
-    for (size_t tensix_harvesting_mask = 0; tensix_harvesting_mask < (1 << max_num_harvested_y);
-         tensix_harvesting_mask++) {
-        std::shared_ptr<CoordinateManager> coordinate_manager =
-            CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask});
-
-        std::map<CoreCoord, CoreCoord> logical_to_physical;
-        std::set<CoreCoord> physical_coords_set;
-
-        size_t num_harvested_y = CoordinateManager::get_num_harvested(tensix_harvesting_mask);
-
-        for (size_t x = 0; x < tensix_grid_size.x; x++) {
-            for (size_t y = 0; y < tensix_grid_size.y - num_harvested_y; y++) {
-                CoreCoord logical_coords = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL);
-                CoreCoord physical_coords =
-                    coordinate_manager->translate_coord_to(logical_coords, CoordSystem::PHYSICAL);
-                logical_to_physical[logical_coords] = physical_coords;
-
-                // Expect that logical to physical translation is 1-1 mapping. No duplicates for physical coordinates.
-                EXPECT_EQ(physical_coords_set.count(physical_coords), 0);
-                physical_coords_set.insert(physical_coords);
-            }
-        }
-
-        // Expect that the number of physical coordinates is equal to the number of workers minus the number of
-        // harvested rows.
-        EXPECT_EQ(physical_coords_set.size(), tensix_grid_size.x * (tensix_grid_size.y - num_harvested_y));
-
-        for (auto it : logical_to_physical) {
-            CoreCoord physical_coords = it.second;
-            CoreCoord logical_coords = coordinate_manager->translate_coord_to(physical_coords, CoordSystem::LOGICAL);
-
-            // Expect that reverse mapping of physical coordinates gives the same logical coordinates
-            // using which we got the physical coordinates.
-            EXPECT_EQ(it.first, logical_coords);
-        }
-    }
-}
-
-// Test logical to virtual coordinate translation.
-// For the full grid of logical coordinates we expect that there are no duplicates of virtual coordinates.
-// For the reverse mapping back of virtual to logical coordinates we expect that same logical coordinates are returned
-// as from original mapping.
-TEST(CoordinateManager, CoordinateManagerGrayskullLogicalVirtualMapping) {
-    const size_t max_num_harvested_y = 10;
-    const tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE;
-
-    for (size_t tensix_harvesting_mask = 0; tensix_harvesting_mask < (1 << max_num_harvested_y);
-         tensix_harvesting_mask++) {
-        std::shared_ptr<CoordinateManager> coordinate_manager =
-            CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask});
-
-        std::map<CoreCoord, CoreCoord> logical_to_virtual;
-        std::set<CoreCoord> virtual_coords_set;
-
-        size_t num_harvested_y = CoordinateManager::get_num_harvested(tensix_harvesting_mask);
-
-        for (size_t x = 0; x < tensix_grid_size.x; x++) {
-            for (size_t y = 0; y < tensix_grid_size.y - num_harvested_y; y++) {
-                CoreCoord logical_coords = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL);
-                CoreCoord virtual_coords = coordinate_manager->translate_coord_to(logical_coords, CoordSystem::VIRTUAL);
-                logical_to_virtual[logical_coords] = virtual_coords;
-
-                // Expect that logical to virtual translation is 1-1 mapping. No duplicates for virtual coordinates.
-                EXPECT_EQ(virtual_coords_set.count(virtual_coords), 0);
-                virtual_coords_set.insert(virtual_coords);
-            }
-        }
-
-        for (auto it : logical_to_virtual) {
-            CoreCoord virtual_coords = it.second;
-            CoreCoord logical_coords = coordinate_manager->translate_coord_to(virtual_coords, CoordSystem::LOGICAL);
-
-            // Expect that reverse mapping of virtual coordinates gives the same logical coordinates
-            // using which we got the virtual coordinates.
-            EXPECT_EQ(it.first, logical_coords);
-        }
-    }
-}
-
-// Test that harvested physical coordinates map to the last row of the virtual coordinates.
-TEST(CoordinateManager, CoordinateManagerGrayskullPhysicalHarvestedMapping) {
-    // Harvest first and second NOC layout row.
-    const size_t tensix_harvesting_mask = (1 << 0) | (1 << 1);
-    const size_t num_harvested = CoordinateManager::get_num_harvested(tensix_harvesting_mask);
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask});
-
-    const std::vector<tt_xy_pair> tensix_cores = tt::umd::grayskull::TENSIX_CORES;
-    const tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE;
-
-    size_t virtual_index = (tensix_grid_size.y - num_harvested) * tensix_grid_size.x;
-
-    for (size_t index = 0; index < num_harvested * tensix_grid_size.x; index++) {
-        const CoreCoord physical_core =
-            CoreCoord(tensix_cores[index].x, tensix_cores[index].y, CoreType::TENSIX, CoordSystem::PHYSICAL);
-        const CoreCoord virtual_core = coordinate_manager->translate_coord_to(physical_core, CoordSystem::VIRTUAL);
-
-        EXPECT_EQ(virtual_core.x, tensix_cores[virtual_index].x);
-        EXPECT_EQ(virtual_core.y, tensix_cores[virtual_index].y);
-
-        virtual_index++;
-    }
-}
-
-// Test that harvested physical coordinates map to the last row of the virtual coordinates.
-TEST(CoordinateManager, CoordinateManagerGrayskullPhysicalTranslatedHarvestedMapping) {
-    // Harvest first and second NOC layout row.
-    const size_t tensix_harvesting_mask = (1 << 0) | (1 << 1);
-    const size_t num_harvested = CoordinateManager::get_num_harvested(tensix_harvesting_mask);
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {tensix_harvesting_mask});
-
-    const std::vector<tt_xy_pair> tensix_cores = tt::umd::grayskull::TENSIX_CORES;
-    const tt_xy_pair tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE;
-
-    size_t virtual_index = (tensix_grid_size.y - num_harvested) * tensix_grid_size.x;
-
-    for (size_t index = 0; index < num_harvested * tensix_grid_size.x; index++) {
-        const CoreCoord physical_core =
-            CoreCoord(tensix_cores[index].x, tensix_cores[index].y, CoreType::TENSIX, CoordSystem::PHYSICAL);
-        const CoreCoord translated_core =
-            coordinate_manager->translate_coord_to(physical_core, CoordSystem::TRANSLATED);
-
-        const CoreCoord virtual_core = CoreCoord(
-            tensix_cores[virtual_index].x, tensix_cores[virtual_index].y, CoreType::TENSIX, CoordSystem::VIRTUAL);
-        const CoreCoord translated_core_from_virtual =
-            coordinate_manager->translate_coord_to(virtual_core, CoordSystem::TRANSLATED);
-
-        EXPECT_EQ(translated_core, translated_core_from_virtual);
-
-        EXPECT_EQ(physical_core.x, translated_core.x);
-        EXPECT_EQ(physical_core.y, translated_core.y);
-
-        virtual_index++;
-    }
-}
-
-// Test mapping of DRAM coordinates from logical to physical. We have no DRAM harvesting on Grayskull,
-// so logical coordinates should cover all physical coordinates.
-TEST(CoordinateManager, CoordinateManagerGrayskullDRAMNoHarvesting) {
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false);
-
-    const size_t num_dram_banks = tt::umd::grayskull::NUM_DRAM_BANKS;
-    const std::vector<tt_xy_pair>& dram_cores = tt::umd::grayskull::DRAM_CORES;
-
-    for (size_t dram_bank = 0; dram_bank < num_dram_banks; dram_bank++) {
-        const CoreCoord dram_logical(dram_bank, 0, CoreType::DRAM, CoordSystem::LOGICAL);
-        const CoreCoord expected_physical =
-            CoreCoord(dram_cores[dram_bank].x, dram_cores[dram_bank].y, CoreType::DRAM, CoordSystem::PHYSICAL);
-
-        const CoreCoord dram_physical = coordinate_manager->translate_coord_to(dram_logical, CoordSystem::PHYSICAL);
-
-        EXPECT_EQ(dram_physical, expected_physical);
-    }
-}
-
-// Test that virtual, physical and translated coordinates are the same for all logical PCIE coordinates.
-TEST(CoordinateManager, CoordinateManagerGrayskullPCIETranslation) {
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false);
-    const tt_xy_pair pcie_grid_size = tt::umd::grayskull::PCIE_GRID_SIZE;
-
-    for (size_t x = 0; x < pcie_grid_size.x; x++) {
-        for (size_t y = 0; y < pcie_grid_size.y; y++) {
-            const CoreCoord pcie_logical = CoreCoord(x, y, CoreType::PCIE, CoordSystem::LOGICAL);
-            const CoreCoord pcie_virtual = coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::VIRTUAL);
-            const CoreCoord pcie_physical = coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::PHYSICAL);
-            const CoreCoord pcie_translated =
-                coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::TRANSLATED);
-
-            EXPECT_EQ(pcie_virtual.x, pcie_physical.x);
-            EXPECT_EQ(pcie_virtual.y, pcie_physical.y);
-
-            EXPECT_EQ(pcie_physical.x, pcie_translated.x);
-            EXPECT_EQ(pcie_physical.y, pcie_translated.y);
-        }
-    }
-}
-
-// Test that virtual, physical and translated coordinates are the same for all logical ARC coordinates.
-TEST(CoordinateManager, CoordinateManagerGrayskullARCTranslation) {
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false);
-    const tt_xy_pair arc_grid_size = tt::umd::grayskull::ARC_GRID_SIZE;
-
-    for (size_t x = 0; x < arc_grid_size.x; x++) {
-        for (size_t y = 0; y < arc_grid_size.y; y++) {
-            const CoreCoord arc_logical = CoreCoord(x, y, CoreType::ARC, CoordSystem::LOGICAL);
-            const CoreCoord arc_virtual = coordinate_manager->translate_coord_to(arc_logical, CoordSystem::VIRTUAL);
-            const CoreCoord arc_physical = coordinate_manager->translate_coord_to(arc_logical, CoordSystem::PHYSICAL);
-            const CoreCoord arc_translated =
-                coordinate_manager->translate_coord_to(arc_logical, CoordSystem::TRANSLATED);
-
-            EXPECT_EQ(arc_virtual.x, arc_physical.x);
-            EXPECT_EQ(arc_virtual.y, arc_physical.y);
-
-            EXPECT_EQ(arc_physical.x, arc_translated.x);
-            EXPECT_EQ(arc_physical.y, arc_translated.y);
-        }
-    }
-}
-
-// Test that we assert properly if DRAM harvesting mask is non-zero for Grayskull.
-TEST(CoordinateManager, CoordinateManagerGrayskullDRAMHarvestingAssert) {
-    EXPECT_THROW(CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {0, 1}), std::runtime_error);
-}
-
-// Test that we assert properly if ETH harvesting mask is non-zero for Grayskull.
-TEST(CoordinateManager, CoordinateManagerGrayskullETHHarvestingAssert) {
-    EXPECT_THROW(
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, {0, 0, 1}), std::runtime_error);
-}
-
-// Test that we properly get harvesting mask that is based on the physical layout of the chip.
-TEST(CoordinateManager, CoordinateManagerGrayskullPhysicalLayoutTensixHarvestingMask) {
-    const size_t max_num_harvested_y = 10;
-
-    for (size_t tensix_harvesting_mask = 0; tensix_harvesting_mask < (1 << max_num_harvested_y);
-         tensix_harvesting_mask++) {
-        const HarvestingMasks harvesting_masks = {.tensix_harvesting_mask = tensix_harvesting_mask};
-        std::shared_ptr<CoordinateManager> coordinate_manager =
-            CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false, harvesting_masks);
-
-        EXPECT_EQ(coordinate_manager->get_harvesting_masks().tensix_harvesting_mask, tensix_harvesting_mask);
-    }
-}
-
-// Test whether we properly shuffle the harvesting mask based on the physical layout of the chip.
-TEST(CoordinateManager, CoordinateManagerGrayskullHarvestingShuffle) {
-    for (size_t i = 0; i < tt::umd::grayskull::LOGICAL_HARVESTING_LAYOUT.size(); i++) {
-        const size_t tensix_harvesting_mask_physical_layout = (1 << tt::umd::grayskull::LOGICAL_HARVESTING_LAYOUT[i]);
-        const size_t tensix_harvesting_mask = CoordinateManager::shuffle_tensix_harvesting_mask(
-            tt::ARCH::GRAYSKULL, tensix_harvesting_mask_physical_layout);
-
-        EXPECT_EQ(tensix_harvesting_mask, 1 << i);
-    }
-}
-
-TEST(CoordinateManager, CoordinateManagerGrayskullTranslationWithoutCoreType) {
-    std::shared_ptr<CoordinateManager> coordinate_manager =
-        CoordinateManager::create_coordinate_manager(tt::ARCH::GRAYSKULL, false);
-
-    EXPECT_EQ(
-        coordinate_manager->translate_coord_to({0, 0}, CoordSystem::PHYSICAL, CoordSystem::PHYSICAL).core_type,
-        CoreType::ROUTER_ONLY);
-    EXPECT_EQ(
-        coordinate_manager->translate_coord_to({0, 0}, CoordSystem::VIRTUAL, CoordSystem::PHYSICAL).core_type,
-        CoreType::ROUTER_ONLY);
-    EXPECT_EQ(
-        coordinate_manager->translate_coord_to({2, 2}, CoordSystem::PHYSICAL, CoordSystem::PHYSICAL).core_type,
-        CoreType::TENSIX);
-    // Not allowed for logical coord system.
-    EXPECT_THROW(
-        coordinate_manager->translate_coord_to({0, 0}, CoordSystem::LOGICAL, CoordSystem::PHYSICAL),
-        std::runtime_error);
-    // Throws if nothing is located at this coordinate.
-    EXPECT_THROW(
-        coordinate_manager->translate_coord_to({100, 100}, CoordSystem::PHYSICAL, CoordSystem::PHYSICAL),
-        std::runtime_error);
-}
diff --git a/tests/grayskull/CMakeLists.txt b/tests/grayskull/CMakeLists.txt
deleted file mode 100644
index 5231b27d..00000000
--- a/tests/grayskull/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-set(UNIT_TESTS_GS_SRCS test_cluster_gs.cpp)
-
-add_executable(unit_tests_grayskull ${UNIT_TESTS_GS_SRCS})
-target_link_libraries(unit_tests_grayskull PRIVATE test_common)
-set_target_properties(
-    unit_tests_grayskull
-    PROPERTIES
-        RUNTIME_OUTPUT_DIRECTORY
-            ${CMAKE_BINARY_DIR}/test/umd/grayskull
-        OUTPUT_NAME
-            unit_tests
-)
-
-add_custom_target(umd_unit_tests DEPENDS unit_tests_grayskull)
diff --git a/tests/grayskull/test_cluster_gs.cpp b/tests/grayskull/test_cluster_gs.cpp
deleted file mode 100644
index d819da64..00000000
--- a/tests/grayskull/test_cluster_gs.cpp
+++ /dev/null
@@ -1,559 +0,0 @@
-// SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include <thread>
-
-#include "gtest/gtest.h"
-#include "l1_address_map.h"
-#include "tests/test_utils/device_test_utils.hpp"
-#include "tests/test_utils/generate_cluster_desc.hpp"
-#include "umd/device/cluster.h"
-#include "umd/device/grayskull_implementation.h"
-#include "umd/device/tt_cluster_descriptor.h"
-#include "umd/device/tt_soc_descriptor.h"
-
-using namespace tt::umd;
-
-constexpr std::uint32_t DRAM_BARRIER_BASE = 0;
-
-static void set_barrier_params(Cluster& cluster) {
-    // Populate address map and NOC parameters that the driver needs for memory barriers.
-    // Grayskull doesn't have ETH, so we don't need to populate the ETH barrier address.
-    cluster.set_barrier_address_params({l1_mem::address_map::L1_BARRIER_BASE, 0u, DRAM_BARRIER_BASE});
-}
-
-TEST(SiliconDriverGS, CreateDestroySequential) {
-    std::set<chip_id_t> target_devices = {0};
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-    tt_device_params default_params;
-    for (int i = 0; i < 100; i++) {
-        Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true);
-        cluster.start_device(default_params);
-        cluster.close_device();
-    }
-}
-
-TEST(SiliconDriverGS, CreateMultipleInstance) {
-    std::set<chip_id_t> target_devices = {0};
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-    tt_device_params default_params;
-    default_params.init_device = false;
-    std::unordered_map<int, Cluster*> concurrent_devices = {};
-    for (int i = 0; i < 100; i++) {
-        concurrent_devices.insert({i, new Cluster(num_host_mem_ch_per_mmio_device, false, true)});
-        concurrent_devices.at(i)->start_device(default_params);
-    }
-
-    for (auto& cluster : concurrent_devices) {
-        cluster.second->close_device();
-        delete cluster.second;
-    }
-}
-
-TEST(SiliconDriverGS, Harvesting) {
-    std::unordered_map<chip_id_t, HarvestingMasks> simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}};
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-    Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks);
-
-    for (const auto& chip_id : cluster.get_target_device_ids()) {
-        auto soc_desc = cluster.get_soc_descriptor(chip_id);
-        ASSERT_NE(soc_desc.get_harvested_grid_size(CoreType::TENSIX), tt_xy_pair(0, 0))
-            << "Expected Driver to have performed harvesting";
-        ASSERT_LE(soc_desc.get_cores(CoreType::TENSIX).size(), 96)
-            << "Expected SOC descriptor with harvesting to have less than or equal to 96 workers for chip " << chip_id;
-
-        // harvesting info stored in soc descriptor is in logical coordinates.
-        ASSERT_EQ(
-            soc_desc.harvesting_masks.tensix_harvesting_mask &
-                simulated_harvesting_masks.at(chip_id).tensix_harvesting_mask,
-            simulated_harvesting_masks.at(chip_id).tensix_harvesting_mask)
-            << "Expected first chip to include simulated harvesting mask of 6";
-    }
-    cluster.close_device();
-}
-
-TEST(SiliconDriverGS, CustomSocDesc) {
-    std::set<chip_id_t> target_devices = {0};
-    std::unordered_map<chip_id_t, HarvestingMasks> simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}};
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-    // Initialize the driver with a 1x1 descriptor and explicitly do not perform harvesting
-    Cluster cluster = Cluster(
-        test_utils::GetAbsPath("./tests/soc_descs/grayskull_1x1_arch.yaml"),
-        target_devices,
-        num_host_mem_ch_per_mmio_device,
-        false,
-        true,
-        false,
-        simulated_harvesting_masks);
-    for (const auto& chip_id : cluster.get_target_device_ids()) {
-        auto soc_desc = cluster.get_soc_descriptor(chip_id);
-        ASSERT_NE(soc_desc.get_harvested_grid_size(CoreType::TENSIX), tt_xy_pair(0, 0))
-            << "SOC descriptors should not be modified when harvesting is disabled";
-        ASSERT_EQ(soc_desc.get_cores(CoreType::TENSIX).size(), 1)
-            << "Expected 1x1 SOC descriptor to be unmodified by driver";
-    }
-}
-
-TEST(SiliconDriverGS, HarvestingRuntime) {
-    auto get_static_tlb_index = [](tt_xy_pair target) {
-        int flat_index = target.y * tt::umd::grayskull::GRID_SIZE_X + target.x;
-        if (flat_index == 0) {
-            return -1;
-        }
-        return flat_index;
-    };
-
-    std::set<chip_id_t> target_devices = {0};
-    std::unordered_map<chip_id_t, HarvestingMasks> simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}};
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-    Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks);
-
-    for (int i = 0; i < target_devices.size(); i++) {
-        // Iterate over devices and only setup static TLBs for functional worker cores
-        auto& sdesc = cluster.get_soc_descriptor(i);
-        for (auto& core : sdesc.get_cores(CoreType::TENSIX)) {
-            // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE.
-            cluster.configure_tlb(i, core, get_static_tlb_index(core), l1_mem::address_map::DATA_BUFFER_SPACE_BASE);
-        }
-    }
-
-    tt_device_params default_params;
-    cluster.start_device(default_params);
-
-    std::vector<uint32_t> vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    std::vector<uint32_t> dynamic_tlb_vector_to_write = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
-    std::vector<uint32_t> dynamic_readback_vec = {};
-    std::vector<uint32_t> readback_vec = {};
-    std::vector<uint32_t> zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-    float timeout_in_seconds = 10;
-    // Check functionality of Static TLBs by reading adn writing from statically mapped address space
-    for (int i = 0; i < target_devices.size(); i++) {
-        std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE;
-        std::uint32_t dynamic_write_address = 0x30000000;
-        for (int loop = 0; loop < 100;
-             loop++) {  // Write to each core a 100 times at different statically mapped addresses
-            for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) {
-                cluster.write_to_device(
-                    vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), i, core, address, "");
-                cluster.write_to_device(
-                    vector_to_write.data(),
-                    vector_to_write.size() * sizeof(std::uint32_t),
-                    i,
-                    core,
-                    dynamic_write_address,
-                    "SMALL_READ_WRITE_TLB");
-                auto start_time = std::chrono::high_resolution_clock::now();
-                while (!(vector_to_write == readback_vec)) {
-                    float wait_duration = std::chrono::duration_cast<std::chrono::seconds>(
-                                              std::chrono::high_resolution_clock::now() - start_time)
-                                              .count();
-                    if (wait_duration > timeout_in_seconds) {
-                        break;
-                    }
-                    test_utils::read_data_from_device(cluster, readback_vec, i, core, address, 40, "");
-                    test_utils::read_data_from_device(
-                        cluster, dynamic_readback_vec, i, core, dynamic_write_address, 40, "SMALL_READ_WRITE_TLB");
-                }
-                ASSERT_EQ(vector_to_write, readback_vec)
-                    << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written";
-                cluster.write_to_device(
-                    zeros.data(),
-                    zeros.size() * sizeof(std::uint32_t),
-                    i,
-                    core,
-                    address,
-                    "SMALL_READ_WRITE_TLB");  // Clear any written data
-                cluster.write_to_device(
-                    zeros.data(),
-                    zeros.size() * sizeof(std::uint32_t),
-                    i,
-                    core,
-                    dynamic_write_address,
-                    "SMALL_READ_WRITE_TLB");  // Clear any written data
-                readback_vec = {};
-                dynamic_readback_vec = {};
-            }
-            address += 0x20;  // Increment by uint32_t size for each write
-            dynamic_write_address += 0x20;
-        }
-    }
-    cluster.close_device();
-}
-
-TEST(SiliconDriverGS, StaticTLB_RW) {
-    auto get_static_tlb_index = [](tt_xy_pair target) {
-        int flat_index = target.y * tt::umd::grayskull::GRID_SIZE_X + target.x;
-        if (flat_index == 0) {
-            return -1;
-        }
-        return flat_index;
-    };
-    std::set<chip_id_t> target_devices = {0};
-
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-    Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true);
-    for (int i = 0; i < target_devices.size(); i++) {
-        // Iterate over devices and only setup static TLBs for worker cores
-        auto& sdesc = cluster.get_soc_descriptor(i);
-        for (auto& core : sdesc.get_cores(CoreType::TENSIX)) {
-            // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE.
-            cluster.configure_tlb(
-                i, core, get_static_tlb_index(core), l1_mem::address_map::DATA_BUFFER_SPACE_BASE, TLB_DATA::Posted);
-        }
-    }
-
-    tt_device_params default_params;
-    cluster.start_device(default_params);
-
-    std::vector<uint32_t> vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    std::vector<uint32_t> readback_vec = {};
-    std::vector<uint32_t> zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-    float timeout_in_seconds = 10;
-    // Check functionality of Static TLBs by reading adn writing from statically mapped address space
-    for (int i = 0; i < target_devices.size(); i++) {
-        std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE;
-        for (int loop = 0; loop < 100;
-             loop++) {  // Write to each core a 100 times at different statically mapped addresses
-            for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) {
-                cluster.write_to_device(
-                    vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), i, core, address, "");
-                auto start_time = std::chrono::high_resolution_clock::now();
-                while (!(vector_to_write == readback_vec)) {
-                    float wait_duration = std::chrono::duration_cast<std::chrono::seconds>(
-                                              std::chrono::high_resolution_clock::now() - start_time)
-                                              .count();
-                    if (wait_duration > timeout_in_seconds) {
-                        break;
-                    }
-                    test_utils::read_data_from_device(cluster, readback_vec, i, core, address, 40, "");
-                }
-                ASSERT_EQ(vector_to_write, readback_vec)
-                    << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written";
-                cluster.write_to_device(
-                    zeros.data(),
-                    zeros.size() * sizeof(std::uint32_t),
-                    i,
-                    core,
-                    address,
-                    "SMALL_READ_WRITE_TLB");  // Clear any written data
-                readback_vec = {};
-            }
-            address += 0x20;  // Increment by uint32_t size for each write
-        }
-    }
-    cluster.close_device();
-}
-
-TEST(SiliconDriverGS, DynamicTLB_RW) {
-    // Don't use any static TLBs in this test. All writes go through a dynamic TLB that needs to be reconfigured for
-    // each transaction
-    std::set<chip_id_t> target_devices = {0};
-
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-    Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true);
-    cluster.set_fallback_tlb_ordering_mode(
-        "SMALL_READ_WRITE_TLB", TLB_DATA::Posted);  // Explicitly test API to set fallback tlb ordering mode
-    tt_device_params default_params;
-    cluster.start_device(default_params);
-
-    std::vector<uint32_t> vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    std::vector<uint32_t> zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-    std::vector<uint32_t> readback_vec = {};
-    float timeout_in_seconds = 10;
-
-    for (int i = 0; i < target_devices.size(); i++) {
-        std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE;
-        for (int loop = 0; loop < 100;
-             loop++) {  // Write to each core a 100 times at different statically mapped addresses
-            for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) {
-                cluster.write_to_device(
-                    vector_to_write.data(),
-                    vector_to_write.size() * sizeof(std::uint32_t),
-                    i,
-                    core,
-                    address,
-                    "SMALL_READ_WRITE_TLB");
-                auto start_time = std::chrono::high_resolution_clock::now();
-                while (!(vector_to_write == readback_vec)) {
-                    float wait_duration = std::chrono::duration_cast<std::chrono::seconds>(
-                                              std::chrono::high_resolution_clock::now() - start_time)
-                                              .count();
-                    if (wait_duration > timeout_in_seconds) {
-                        break;
-                    }
-                    test_utils::read_data_from_device(
-                        cluster, readback_vec, tt_cxy_pair(i, core), address, 40, "SMALL_READ_WRITE_TLB");
-                }
-
-                ASSERT_EQ(vector_to_write, readback_vec)
-                    << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written";
-                cluster.write_to_device(
-                    zeros.data(),
-                    zeros.size() * sizeof(std::uint32_t),
-                    i,
-                    core,
-                    address,
-                    "SMALL_READ_WRITE_TLB");  // Clear any written data
-                readback_vec = {};
-            }
-            address += 0x20;  // Increment by uint32_t size for each write
-        }
-    }
-    cluster.close_device();
-}
-
-TEST(SiliconDriverGS, MultiThreadedDevice) {
-    // Have 2 threads read and write from a single device concurrently
-    // All transactions go through a single Dynamic TLB. We want to make sure this is thread/process safe
-
-    std::set<chip_id_t> target_devices = {0};
-
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-    Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true);
-
-    tt_device_params default_params;
-    cluster.start_device(default_params);
-
-    std::thread th1 = std::thread([&] {
-        std::vector<uint32_t> vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-        std::vector<uint32_t> readback_vec = {};
-        float timeout_in_seconds = 10;
-        std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE;
-        for (int loop = 0; loop < 100; loop++) {
-            for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) {
-                cluster.write_to_device(
-                    vector_to_write.data(),
-                    vector_to_write.size() * sizeof(std::uint32_t),
-                    0,
-                    core,
-                    address,
-                    "SMALL_READ_WRITE_TLB");
-                auto start_time = std::chrono::high_resolution_clock::now();
-                while (!(vector_to_write == readback_vec)) {
-                    float wait_duration = std::chrono::duration_cast<std::chrono::seconds>(
-                                              std::chrono::high_resolution_clock::now() - start_time)
-                                              .count();
-                    if (wait_duration > timeout_in_seconds) {
-                        break;
-                    }
-                    test_utils::read_data_from_device(
-                        cluster, readback_vec, 0, core, address, 40, "SMALL_READ_WRITE_TLB");
-                }
-                ASSERT_EQ(vector_to_write, readback_vec)
-                    << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written";
-                readback_vec = {};
-            }
-            address += 0x20;
-        }
-    });
-
-    std::thread th2 = std::thread([&] {
-        std::vector<uint32_t> vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-        std::vector<uint32_t> readback_vec = {};
-        float timeout_in_seconds = 10;
-        std::uint32_t address = 0x30000000;
-        for (auto& core_ls : cluster.get_soc_descriptor(0).get_dram_cores()) {
-            for (int loop = 0; loop < 100; loop++) {
-                for (auto& core : core_ls) {
-                    cluster.write_to_device(
-                        vector_to_write.data(),
-                        vector_to_write.size() * sizeof(std::uint32_t),
-                        0,
-                        core,
-                        address,
-                        "SMALL_READ_WRITE_TLB");
-                    auto start_time = std::chrono::high_resolution_clock::now();
-                    while (!(vector_to_write == readback_vec)) {
-                        float wait_duration = std::chrono::duration_cast<std::chrono::seconds>(
-                                                  std::chrono::high_resolution_clock::now() - start_time)
-                                                  .count();
-                        if (wait_duration > timeout_in_seconds) {
-                            break;
-                        }
-                        test_utils::read_data_from_device(
-                            cluster, readback_vec, 0, core, address, 40, "SMALL_READ_WRITE_TLB");
-                    }
-                    ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y
-                                                             << "does not match what was written";
-                    readback_vec = {};
-                }
-                address += 0x20;
-            }
-        }
-    });
-
-    th1.join();
-    th2.join();
-    cluster.close_device();
-}
-
-TEST(SiliconDriverGS, MultiThreadedMemBar) {  // this tests takes ~5 mins to run
-                                              // Have 2 threads read and write from a single device concurrently
-                                              // All (fairly large) transactions go through a static TLB.
-                                              // We want to make sure the memory barrier is thread/process safe.
-
-    // Memory barrier flags get sent to address 0 for all channels in this test
-
-    auto get_static_tlb_index = [](tt_xy_pair target) {
-        int flat_index = target.y * tt::umd::grayskull::GRID_SIZE_X + target.x;
-        if (flat_index == 0) {
-            return -1;
-        }
-        return flat_index;
-    };
-
-    std::set<chip_id_t> target_devices = {0};
-    uint32_t base_addr = l1_mem::address_map::DATA_BUFFER_SPACE_BASE;
-    uint32_t num_host_mem_ch_per_mmio_device = 1;
-
-    Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true);
-
-    for (int i = 0; i < target_devices.size(); i++) {
-        // Iterate over devices and only setup static TLBs for functional worker cores
-        auto& sdesc = cluster.get_soc_descriptor(i);
-        for (auto& core : sdesc.get_cores(CoreType::TENSIX)) {
-            // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE.
-            cluster.configure_tlb(i, core, get_static_tlb_index(core), base_addr);
-        }
-    }
-
-    tt_device_params default_params;
-    cluster.start_device(default_params);
-    std::vector<uint32_t> readback_membar_vec = {};
-    for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) {
-        test_utils::read_data_from_device(
-            cluster, readback_membar_vec, 0, core, l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB");
-        ASSERT_EQ(
-            readback_membar_vec.at(0), 187);  // Ensure that memory barriers were correctly initialized on all workers
-        readback_membar_vec = {};
-    }
-
-    for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) {
-        test_utils::read_data_from_device(
-            cluster, readback_membar_vec, 0, core, l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB");
-        ASSERT_EQ(
-            readback_membar_vec.at(0), 187);  // Ensure that memory barriers were correctly initialized on all workers
-        readback_membar_vec = {};
-    }
-
-    for (int chan = 0; chan < cluster.get_soc_descriptor(0).get_num_dram_channels(); chan++) {
-        auto core = cluster.get_soc_descriptor(0).get_dram_core_for_channel(chan, 0);
-        test_utils::read_data_from_device(
-            cluster, readback_membar_vec, 0, core, DRAM_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB");
-        ASSERT_EQ(
-            readback_membar_vec.at(0), 187);  // Ensure that memory barriers were correctly initialized on all DRAM
-        readback_membar_vec = {};
-    }
-    // Launch 2 thread accessing different locations of L1 and using memory barrier between write and read
-    // Ensure now RAW race and membars are thread safe
-    std::vector<uint32_t> vec1(25600);
-    std::vector<uint32_t> vec2(25600);
-    std::vector<uint32_t> zeros(25600, 0);
-
-    for (int i = 0; i < vec1.size(); i++) {
-        vec1.at(i) = i;
-    }
-    for (int i = 0; i < vec2.size(); i++) {
-        vec2.at(i) = vec1.size() + i;
-    }
-
-    std::thread th1 = std::thread([&] {
-        std::uint32_t address = base_addr;
-        for (int loop = 0; loop < 100; loop++) {
-            for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) {
-                std::vector<uint32_t> readback_vec = {};
-                cluster.write_to_device(vec1.data(), vec1.size() * sizeof(std::uint32_t), 0, core, address, "");
-                cluster.l1_membar(0, "", {core});
-                test_utils::read_data_from_device(cluster, readback_vec, 0, core, address, 4 * vec1.size(), "");
-                ASSERT_EQ(readback_vec, vec1);
-                cluster.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), 0, core, address, "");
-                readback_vec = {};
-            }
-        }
-    });
-
-    std::thread th2 = std::thread([&] {
-        std::uint32_t address = base_addr + vec1.size() * 4;
-        for (int loop = 0; loop < 100; loop++) {
-            for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) {
-                std::vector<uint32_t> readback_vec = {};
-                cluster.write_to_device(vec2.data(), vec2.size() * sizeof(std::uint32_t), 0, core, address, "");
-                cluster.l1_membar(0, "", {core});
-                test_utils::read_data_from_device(cluster, readback_vec, 0, core, address, 4 * vec2.size(), "");
-                ASSERT_EQ(readback_vec, vec2);
-                cluster.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), 0, core, address, "");
-                readback_vec = {};
-            }
-        }
-    });
-
-    th1.join();
-    th2.join();
-
-    for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) {
-        test_utils::read_data_from_device(
-            cluster, readback_membar_vec, 0, core, l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB");
-        ASSERT_EQ(readback_membar_vec.at(0), 187);  // Ensure that memory barriers end up in correct sate workers
-        readback_membar_vec = {};
-    }
-
-    cluster.close_device();
-}
-
-/**
- * Copied from Wormhole unit tests.
- */
-TEST(SiliconDriverGS, SysmemTestWithPcie) {
-    Cluster cluster(
-        test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"),
-        {0},
-        1,      // one "host memory channel", currently a 1G huge page
-        false,  // skip driver allocs - no (don't skip)
-        true,   // clean system resources - yes
-        true);  // perform harvesting - yes
-
-    cluster.start_device(tt_device_params{});  // no special parameters
-
-    const chip_id_t mmio_chip_id = 0;
-    const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).get_cores(CoreType::PCIE).at(0);
-    const size_t test_size_bytes = 0x4000;  // Arbitrarilly chosen, but small size so the test runs quickly.
-
-    // PCIe core is at (x=0, y=4) on Grayskull NOC0.
-    ASSERT_EQ(PCIE.x, 0);
-    ASSERT_EQ(PCIE.y, 4);
-
-    // Bad API: how big is the buffer?  How do we know it's big enough?
-    // Situation today is that there's a 1G hugepage behind it, although this is
-    // unclear from the API and may change in the future.
-    uint8_t* sysmem = (uint8_t*)cluster.host_dma_address(0, 0, 0);
-    ASSERT_NE(sysmem, nullptr);
-
-    uint64_t base_address = cluster.get_pcie_base_addr_from_device(mmio_chip_id);
-
-    // Buffer that we will use to read sysmem into, then write sysmem from.
-    std::vector<uint8_t> buffer(test_size_bytes, 0x0);
-
-    // Step 1: Fill sysmem with random bytes.
-    test_utils::fill_with_random_bytes(sysmem, test_size_bytes);
-
-    // Step 2: Read sysmem into buffer.
-    cluster.read_from_device(&buffer[0], mmio_chip_id, PCIE, base_address, buffer.size(), "REG_TLB");
-
-    // Step 3: Verify that buffer matches sysmem.
-    ASSERT_EQ(buffer, std::vector<uint8_t>(sysmem, sysmem + test_size_bytes));
-
-    // Step 4: Fill buffer with random bytes.
-    test_utils::fill_with_random_bytes(&buffer[0], test_size_bytes);
-
-    // Step 5: Write buffer into sysmem, overwriting what was there.
-    cluster.write_to_device(&buffer[0], buffer.size(), mmio_chip_id, PCIE, base_address, "REG_TLB");
-
-    // Step 5b: Read back sysmem into a throwaway buffer.  The intent is to
-    // ensure the write has completed before we check sysmem against buffer.
-    std::vector<uint8_t> throwaway(test_size_bytes, 0x0);
-    cluster.read_from_device(&throwaway[0], mmio_chip_id, PCIE, base_address, throwaway.size(), "REG_TLB");
-
-    // Step 6: Verify that sysmem matches buffer.
-    ASSERT_EQ(buffer, std::vector<uint8_t>(sysmem, sysmem + test_size_bytes));
-}