Merge branch 'main' into dgomezTT-16418

tenstorrent · Feb 18, 2025 · 2fdee2e · 2fdee2e
2 parents 6b955ed + 2d4f994
commit 2fdee2e
Show file tree

Hide file tree

Showing 36 changed files with 748 additions and 530 deletions.
diff --git a/infra/data_collection/github/download_cicd_logs_and_artifacts.sh b/infra/data_collection/github/download_cicd_logs_and_artifacts.sh
@@ -17,7 +17,9 @@ download_artifacts() {
     local repo=$1
     local workflow_run_id=$2
 
-    if gh api --paginate /repos/$repo/actions/runs/$workflow_run_id/artifacts | jq '.artifacts[] | .name' | grep -q "test_reports_"; then
+    echo "[info] Downloading test reports for workflow run $workflow_run_id"
+    api_output=$(gh api --paginate /repos/$repo/actions/runs/$workflow_run_id/artifacts | jq -r '.artifacts[] | .name')
+    if echo "$api_output" | grep -q "test_reports_"; then
         gh run download --repo $repo -D generated/cicd/$workflow_run_id/artifacts --pattern test_reports_* $workflow_run_id
     else
         echo "[Warning] Test reports not found for workflow run $workflow_run_id"

diff --git a/tests/tt_metal/distributed/test_mesh_buffer.cpp b/tests/tt_metal/distributed/test_mesh_buffer.cpp
@@ -25,11 +25,11 @@ struct DeviceLocalShardedBufferTestConfig {
     TensorMemoryLayout mem_config = TensorMemoryLayout::HEIGHT_SHARDED;
     ShardOrientation shard_orientation = ShardOrientation::ROW_MAJOR;
 
-    Shape2D tensor2d_shape() {
+    Shape2D tensor2d_shape_in_pages() {
         return {num_pages_per_core.height() * num_cores.height(), num_pages_per_core.width() * num_cores.width()};
     }
 
-    uint32_t num_pages() { return tensor2d_shape().height() * tensor2d_shape().width(); }
+    uint32_t num_pages() { return tensor2d_shape_in_pages().height() * tensor2d_shape_in_pages().width(); }
 
     std::array<uint32_t, 2> shard_shape() {
         return {num_pages_per_core.height() * page_shape.height(), num_pages_per_core.width() * page_shape.width()};
@@ -44,7 +44,11 @@ struct DeviceLocalShardedBufferTestConfig {
 
     ShardSpecBuffer shard_parameters() {
         return ShardSpecBuffer(
-            this->shard_grid(), this->shard_shape(), this->shard_orientation, this->page_shape, this->tensor2d_shape());
+            this->shard_grid(),
+            this->shard_shape(),
+            this->shard_orientation,
+            this->page_shape,
+            this->tensor2d_shape_in_pages());
     }
 };
 

diff --git a/...metal/tt_metal/dispatch/dispatch_buffer/test_EnqueueWriteBuffer_and_EnqueueReadBuffer.cpp b/...metal/tt_metal/dispatch/dispatch_buffer/test_EnqueueWriteBuffer_and_EnqueueReadBuffer.cpp
@@ -56,11 +56,11 @@ class BufferStressTestConfigSharded {
         this->num_cores = cores;
     }
 
-    std::array<uint32_t, 2> tensor2d_shape() {
+    std::array<uint32_t, 2> tensor2d_shape_in_pages() {
         return {num_pages_per_core[0] * num_cores[0], num_pages_per_core[1] * num_cores[1]};
     }
 
-    uint32_t num_pages() { return tensor2d_shape()[0] * tensor2d_shape()[1]; }
+    uint32_t num_pages() { return tensor2d_shape_in_pages()[0] * tensor2d_shape_in_pages()[1]; }
 
     std::array<uint32_t, 2> shard_shape() {
         return {num_pages_per_core[0] * page_shape[0], num_pages_per_core[1] * page_shape[1]};
@@ -73,7 +73,11 @@ class BufferStressTestConfigSharded {
 
     ShardSpecBuffer shard_parameters() {
         return ShardSpecBuffer(
-            this->shard_grid(), this->shard_shape(), this->shard_orientation, this->page_shape, this->tensor2d_shape());
+            this->shard_grid(),
+            this->shard_shape(),
+            this->shard_orientation,
+            this->page_shape,
+            this->tensor2d_shape_in_pages());
     }
 
     uint32_t page_size() { return page_shape[0] * page_shape[1] * element_size; }
@@ -87,7 +91,7 @@ struct ShardedSubBufferStressTestConfig {
     CoreRangeSet cores;
     Shape2D shard_shape;
     Shape2D page_shape;
-    Shape2D tensor2d_shape;
+    Shape2D tensor2d_shape_in_pages;
     TensorMemoryLayout layout;
     ShardOrientation orientation;
 };
@@ -133,11 +137,12 @@ vector<ShardedSubBufferStressTestConfig> generate_sharded_sub_buffer_test_config
                             uint32_t page_shape_width_div_factor = 1;
                             while (page_shape_width_div_factor <= num_pages_per_shard) {
                                 if (page_shape_width_div_factor * page_shape_height_div_factor == num_pages_per_shard) {
-                                    uint32_t tensor2d_shape_height = page_shape_height_div_factor;
-                                    while (tensor2d_shape_height <= num_pages) {
-                                        uint32_t tensor2d_shape_width = page_shape_width_div_factor;
-                                        while (tensor2d_shape_width <= num_pages) {
-                                            if (tensor2d_shape_height * tensor2d_shape_width == num_pages) {
+                                    uint32_t tensor2d_shape_in_pages_height = page_shape_height_div_factor;
+                                    while (tensor2d_shape_in_pages_height <= num_pages) {
+                                        uint32_t tensor2d_shape_in_pages_width = page_shape_width_div_factor;
+                                        while (tensor2d_shape_in_pages_width <= num_pages) {
+                                            if (tensor2d_shape_in_pages_height * tensor2d_shape_in_pages_width ==
+                                                num_pages) {
                                                 for (TensorMemoryLayout layout :
                                                      {TensorMemoryLayout::HEIGHT_SHARDED,
                                                       TensorMemoryLayout::BLOCK_SHARDED,
@@ -157,17 +162,18 @@ vector<ShardedSubBufferStressTestConfig> generate_sharded_sub_buffer_test_config
                                                                      page_shape_height_div_factor,
                                                                  tt::constants::TILE_WIDTH /
                                                                      page_shape_width_div_factor},
-                                                            .tensor2d_shape =
-                                                                {tensor2d_shape_height, tensor2d_shape_width},
+                                                            .tensor2d_shape_in_pages =
+                                                                {tensor2d_shape_in_pages_height,
+                                                                 tensor2d_shape_in_pages_width},
                                                             .layout = layout,
                                                             .orientation = orientation};
                                                         configs.push_back(config);
                                                     }
                                                 }
                                             }
-                                            tensor2d_shape_width += page_shape_width_div_factor;
+                                            tensor2d_shape_in_pages_width += page_shape_width_div_factor;
                                         }
-                                        tensor2d_shape_height += page_shape_height_div_factor;
+                                        tensor2d_shape_in_pages_height += page_shape_height_div_factor;
                                     }
                                 }
                                 page_shape_width_div_factor += 1;
@@ -1018,7 +1024,7 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestReadWriteShardedSubBufferForL1)
             tt::log_debug(
                 tt::LogTest,
                 "Device: {} buffer_size: {} page_size: {} region_offset: {} region_size: {} shard_shape: [{}, {}] "
-                "page_shape: [{}, {}] tensor2d_shape: [{}, {}] layout: {} orientation: {} cores: {}",
+                "page_shape: [{}, {}] tensor2d_shape_in_pages: [{}, {}] layout: {} orientation: {} cores: {}",
                 device->id(),
                 config.buffer_size,
                 config.page_size,
@@ -1028,8 +1034,8 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestReadWriteShardedSubBufferForL1)
                 config.shard_shape.width(),
                 config.page_shape.height(),
                 config.page_shape.width(),
-                config.tensor2d_shape.height(),
-                config.tensor2d_shape.width(),
+                config.tensor2d_shape_in_pages.height(),
+                config.tensor2d_shape_in_pages.width(),
                 magic_enum::enum_name(config.layout).data(),
                 magic_enum::enum_name(config.orientation).data(),
                 config.cores.str());
@@ -1039,7 +1045,7 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestReadWriteShardedSubBufferForL1)
                 {tt::constants::TILE_HEIGHT, tt::constants::TILE_WIDTH},
                 config.orientation,
                 config.page_shape,
-                config.tensor2d_shape);
+                config.tensor2d_shape_in_pages);
             auto buffer =
                 Buffer::create(device, config.buffer_size, config.page_size, BufferType::L1, config.layout, shard_spec);
 

diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/tt_fabric_traffic_gen_tx.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/tt_fabric_traffic_gen_tx.cpp
@@ -83,10 +83,6 @@ packet_header_t packet_header __attribute__((aligned(16)));
 uint32_t target_address;
 uint32_t noc_offset;
 uint32_t rx_addr_hi;
-
-uint32_t gk_interface_addr_l;
-uint32_t gk_interface_addr_h;
-
 uint32_t controller_noc_offset;
 
 // flag to check if need to zero out notification addr
@@ -389,11 +385,9 @@ void kernel_main() {
     src_endpoint_id = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
     noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
     controller_noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
-    uint32_t routing_plane = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
+    uint32_t outbound_eth_chan = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
     dest_device = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
     uint32_t rx_buf_size = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
-    gk_interface_addr_l = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
-    gk_interface_addr_h = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
 
     if constexpr (ASYNC_WR & test_command) {
         base_target_address = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
@@ -462,9 +456,8 @@ void kernel_main() {
     uint32_t packet_count = 0;
 
     // initalize client
-    fabric_endpoint_init(client_interface_addr, gk_interface_addr_l, gk_interface_addr_h);
-    routing_table = reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(
-        client_interface->routing_tables_l1_offset + sizeof(fabric_router_l1_config_t) * routing_plane);
+    fabric_endpoint_init(client_interface_addr, outbound_eth_chan);
+    routing_table = reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(client_interface->routing_tables_l1_offset);
 
     while (true) {
         iter++;

diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/tt_fabric_tx_ubench.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/tt_fabric_tx_ubench.cpp
@@ -68,8 +68,6 @@ volatile fabric_client_interface_t* client_interface;
 uint64_t xy_local_addr;
 uint32_t target_address;
 uint32_t noc_offset;
-uint32_t gk_interface_addr_l;
-uint32_t gk_interface_addr_h;
 uint32_t controller_noc_offset;
 uint32_t time_seed;
 
@@ -94,11 +92,9 @@ void kernel_main() {
     src_endpoint_id = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
     noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
     controller_noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
-    uint32_t routing_plane = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
+    uint32_t outbound_eth_chan = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
     dest_device = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
     uint32_t rx_buf_size = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
-    gk_interface_addr_l = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
-    gk_interface_addr_h = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
 
     if constexpr (ASYNC_WR & test_command) {
         base_target_address = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
@@ -140,7 +136,7 @@ void kernel_main() {
     }
 
     // initalize client
-    fabric_endpoint_init(client_interface_addr, gk_interface_addr_l, gk_interface_addr_h);
+    fabric_endpoint_init(client_interface_addr, outbound_eth_chan);
 
     // notify the controller kernel that this worker is ready to proceed
     notify_traffic_controller();
@@ -161,7 +157,7 @@ void kernel_main() {
         client_interface->local_pull_request.pull_request.words_read = 0;
         if constexpr (mcast_data) {
             fabric_async_write_multicast<ASYNC_WR_SEND>(
-                routing_plane,           // the network plane to use for this transaction
+                0,                       // the network plane to use for this transaction
                 data_buffer_start_addr,  // source address in sender’s memory
                 dest_device >> 16,
                 dest_device & 0xFFFF,
@@ -173,7 +169,7 @@ void kernel_main() {
                 s_depth);
         } else {
             fabric_async_write<ASYNC_WR_SEND>(
-                routing_plane,           // the network plane to use for this transaction
+                0,                       // the network plane to use for this transaction
                 data_buffer_start_addr,  // source address in sender’s memory
                 dest_device >> 16,
                 dest_device & 0xFFFF,