Skip to content

Commit

Permalink
Merge branch 'main' into dgomezTT-16418
Browse files Browse the repository at this point in the history
  • Loading branch information
dgomezTT authored Feb 18, 2025
2 parents 6b955ed + 2d4f994 commit 2fdee2e
Show file tree
Hide file tree
Showing 36 changed files with 748 additions and 530 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ download_artifacts() {
local repo=$1
local workflow_run_id=$2

if gh api --paginate /repos/$repo/actions/runs/$workflow_run_id/artifacts | jq '.artifacts[] | .name' | grep -q "test_reports_"; then
echo "[info] Downloading test reports for workflow run $workflow_run_id"
api_output=$(gh api --paginate /repos/$repo/actions/runs/$workflow_run_id/artifacts | jq -r '.artifacts[] | .name')
if echo "$api_output" | grep -q "test_reports_"; then
gh run download --repo $repo -D generated/cicd/$workflow_run_id/artifacts --pattern test_reports_* $workflow_run_id
else
echo "[Warning] Test reports not found for workflow run $workflow_run_id"
Expand Down
10 changes: 7 additions & 3 deletions tests/tt_metal/distributed/test_mesh_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ struct DeviceLocalShardedBufferTestConfig {
TensorMemoryLayout mem_config = TensorMemoryLayout::HEIGHT_SHARDED;
ShardOrientation shard_orientation = ShardOrientation::ROW_MAJOR;

Shape2D tensor2d_shape() {
Shape2D tensor2d_shape_in_pages() {
return {num_pages_per_core.height() * num_cores.height(), num_pages_per_core.width() * num_cores.width()};
}

uint32_t num_pages() { return tensor2d_shape().height() * tensor2d_shape().width(); }
uint32_t num_pages() { return tensor2d_shape_in_pages().height() * tensor2d_shape_in_pages().width(); }

std::array<uint32_t, 2> shard_shape() {
return {num_pages_per_core.height() * page_shape.height(), num_pages_per_core.width() * page_shape.width()};
Expand All @@ -44,7 +44,11 @@ struct DeviceLocalShardedBufferTestConfig {

ShardSpecBuffer shard_parameters() {
return ShardSpecBuffer(
this->shard_grid(), this->shard_shape(), this->shard_orientation, this->page_shape, this->tensor2d_shape());
this->shard_grid(),
this->shard_shape(),
this->shard_orientation,
this->page_shape,
this->tensor2d_shape_in_pages());
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ class BufferStressTestConfigSharded {
this->num_cores = cores;
}

std::array<uint32_t, 2> tensor2d_shape() {
std::array<uint32_t, 2> tensor2d_shape_in_pages() {
return {num_pages_per_core[0] * num_cores[0], num_pages_per_core[1] * num_cores[1]};
}

uint32_t num_pages() { return tensor2d_shape()[0] * tensor2d_shape()[1]; }
uint32_t num_pages() { return tensor2d_shape_in_pages()[0] * tensor2d_shape_in_pages()[1]; }

std::array<uint32_t, 2> shard_shape() {
return {num_pages_per_core[0] * page_shape[0], num_pages_per_core[1] * page_shape[1]};
Expand All @@ -73,7 +73,11 @@ class BufferStressTestConfigSharded {

ShardSpecBuffer shard_parameters() {
return ShardSpecBuffer(
this->shard_grid(), this->shard_shape(), this->shard_orientation, this->page_shape, this->tensor2d_shape());
this->shard_grid(),
this->shard_shape(),
this->shard_orientation,
this->page_shape,
this->tensor2d_shape_in_pages());
}

uint32_t page_size() { return page_shape[0] * page_shape[1] * element_size; }
Expand All @@ -87,7 +91,7 @@ struct ShardedSubBufferStressTestConfig {
CoreRangeSet cores;
Shape2D shard_shape;
Shape2D page_shape;
Shape2D tensor2d_shape;
Shape2D tensor2d_shape_in_pages;
TensorMemoryLayout layout;
ShardOrientation orientation;
};
Expand Down Expand Up @@ -133,11 +137,12 @@ vector<ShardedSubBufferStressTestConfig> generate_sharded_sub_buffer_test_config
uint32_t page_shape_width_div_factor = 1;
while (page_shape_width_div_factor <= num_pages_per_shard) {
if (page_shape_width_div_factor * page_shape_height_div_factor == num_pages_per_shard) {
uint32_t tensor2d_shape_height = page_shape_height_div_factor;
while (tensor2d_shape_height <= num_pages) {
uint32_t tensor2d_shape_width = page_shape_width_div_factor;
while (tensor2d_shape_width <= num_pages) {
if (tensor2d_shape_height * tensor2d_shape_width == num_pages) {
uint32_t tensor2d_shape_in_pages_height = page_shape_height_div_factor;
while (tensor2d_shape_in_pages_height <= num_pages) {
uint32_t tensor2d_shape_in_pages_width = page_shape_width_div_factor;
while (tensor2d_shape_in_pages_width <= num_pages) {
if (tensor2d_shape_in_pages_height * tensor2d_shape_in_pages_width ==
num_pages) {
for (TensorMemoryLayout layout :
{TensorMemoryLayout::HEIGHT_SHARDED,
TensorMemoryLayout::BLOCK_SHARDED,
Expand All @@ -157,17 +162,18 @@ vector<ShardedSubBufferStressTestConfig> generate_sharded_sub_buffer_test_config
page_shape_height_div_factor,
tt::constants::TILE_WIDTH /
page_shape_width_div_factor},
.tensor2d_shape =
{tensor2d_shape_height, tensor2d_shape_width},
.tensor2d_shape_in_pages =
{tensor2d_shape_in_pages_height,
tensor2d_shape_in_pages_width},
.layout = layout,
.orientation = orientation};
configs.push_back(config);
}
}
}
tensor2d_shape_width += page_shape_width_div_factor;
tensor2d_shape_in_pages_width += page_shape_width_div_factor;
}
tensor2d_shape_height += page_shape_height_div_factor;
tensor2d_shape_in_pages_height += page_shape_height_div_factor;
}
}
page_shape_width_div_factor += 1;
Expand Down Expand Up @@ -1018,7 +1024,7 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestReadWriteShardedSubBufferForL1)
tt::log_debug(
tt::LogTest,
"Device: {} buffer_size: {} page_size: {} region_offset: {} region_size: {} shard_shape: [{}, {}] "
"page_shape: [{}, {}] tensor2d_shape: [{}, {}] layout: {} orientation: {} cores: {}",
"page_shape: [{}, {}] tensor2d_shape_in_pages: [{}, {}] layout: {} orientation: {} cores: {}",
device->id(),
config.buffer_size,
config.page_size,
Expand All @@ -1028,8 +1034,8 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestReadWriteShardedSubBufferForL1)
config.shard_shape.width(),
config.page_shape.height(),
config.page_shape.width(),
config.tensor2d_shape.height(),
config.tensor2d_shape.width(),
config.tensor2d_shape_in_pages.height(),
config.tensor2d_shape_in_pages.width(),
magic_enum::enum_name(config.layout).data(),
magic_enum::enum_name(config.orientation).data(),
config.cores.str());
Expand All @@ -1039,7 +1045,7 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestReadWriteShardedSubBufferForL1)
{tt::constants::TILE_HEIGHT, tt::constants::TILE_WIDTH},
config.orientation,
config.page_shape,
config.tensor2d_shape);
config.tensor2d_shape_in_pages);
auto buffer =
Buffer::create(device, config.buffer_size, config.page_size, BufferType::L1, config.layout, shard_spec);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,6 @@ packet_header_t packet_header __attribute__((aligned(16)));
uint32_t target_address;
uint32_t noc_offset;
uint32_t rx_addr_hi;

uint32_t gk_interface_addr_l;
uint32_t gk_interface_addr_h;

uint32_t controller_noc_offset;

// flag to check if need to zero out notification addr
Expand Down Expand Up @@ -389,11 +385,9 @@ void kernel_main() {
src_endpoint_id = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
controller_noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t routing_plane = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t outbound_eth_chan = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
dest_device = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t rx_buf_size = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
gk_interface_addr_l = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
gk_interface_addr_h = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));

if constexpr (ASYNC_WR & test_command) {
base_target_address = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
Expand Down Expand Up @@ -462,9 +456,8 @@ void kernel_main() {
uint32_t packet_count = 0;

// initalize client
fabric_endpoint_init(client_interface_addr, gk_interface_addr_l, gk_interface_addr_h);
routing_table = reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(
client_interface->routing_tables_l1_offset + sizeof(fabric_router_l1_config_t) * routing_plane);
fabric_endpoint_init(client_interface_addr, outbound_eth_chan);
routing_table = reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(client_interface->routing_tables_l1_offset);

while (true) {
iter++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ volatile fabric_client_interface_t* client_interface;
uint64_t xy_local_addr;
uint32_t target_address;
uint32_t noc_offset;
uint32_t gk_interface_addr_l;
uint32_t gk_interface_addr_h;
uint32_t controller_noc_offset;
uint32_t time_seed;

Expand All @@ -94,11 +92,9 @@ void kernel_main() {
src_endpoint_id = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
controller_noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t routing_plane = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t outbound_eth_chan = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
dest_device = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t rx_buf_size = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
gk_interface_addr_l = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
gk_interface_addr_h = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));

if constexpr (ASYNC_WR & test_command) {
base_target_address = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
Expand Down Expand Up @@ -140,7 +136,7 @@ void kernel_main() {
}

// initalize client
fabric_endpoint_init(client_interface_addr, gk_interface_addr_l, gk_interface_addr_h);
fabric_endpoint_init(client_interface_addr, outbound_eth_chan);

// notify the controller kernel that this worker is ready to proceed
notify_traffic_controller();
Expand All @@ -161,7 +157,7 @@ void kernel_main() {
client_interface->local_pull_request.pull_request.words_read = 0;
if constexpr (mcast_data) {
fabric_async_write_multicast<ASYNC_WR_SEND>(
routing_plane, // the network plane to use for this transaction
0, // the network plane to use for this transaction
data_buffer_start_addr, // source address in sender’s memory
dest_device >> 16,
dest_device & 0xFFFF,
Expand All @@ -173,7 +169,7 @@ void kernel_main() {
s_depth);
} else {
fabric_async_write<ASYNC_WR_SEND>(
routing_plane, // the network plane to use for this transaction
0, // the network plane to use for this transaction
data_buffer_start_addr, // source address in sender’s memory
dest_device >> 16,
dest_device & 0xFFFF,
Expand Down
Loading

0 comments on commit 2fdee2e

Please sign in to comment.