Skip to content

Commit

Permalink
Cluster set power state for Blackhole
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Feb 28, 2025
1 parent da0cc30 commit f08d12a
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 8 deletions.
5 changes: 5 additions & 0 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -1195,6 +1195,8 @@ class Cluster : public tt_device {
static std::unique_ptr<tt_ClusterDescriptor> create_cluster_descriptor(
const std::unordered_map<chip_id_t, std::unique_ptr<tt::umd::Chip>>& chips);

void initialize_arc_communication();

// State variables
std::vector<tt::ARCH> archs_in_cluster = {};
std::set<chip_id_t> all_chip_ids_ = {};
Expand All @@ -1205,6 +1207,8 @@ class Cluster : public tt_device {

std::shared_ptr<tt_ClusterDescriptor> cluster_desc;

std::unique_ptr<tt::umd::BlackholeArcMessageQueue> bh_arc_msg_queue = nullptr;

// remote eth transfer setup
static constexpr std::uint32_t NUM_ETH_CORES_FOR_NON_MMIO_TRANSFERS = 6;
static constexpr std::uint32_t NON_EPOCH_ETH_CORES_FOR_NON_MMIO_TRANSFERS = 4;
Expand All @@ -1229,6 +1233,7 @@ class Cluster : public tt_device {
std::unordered_map<chip_id_t, std::unordered_set<tt_xy_pair>> workers_per_chip = {};
std::unordered_set<tt_xy_pair> eth_cores = {};
std::unordered_set<tt_xy_pair> dram_cores = {};
std::unordered_map<chip_id_t, std::unique_ptr<BlackholeArcMessageQueue>> bh_arc_msg_queues = {};

std::map<std::set<chip_id_t>, std::unordered_map<chip_id_t, std::vector<std::vector<int>>>> bcast_header_cache = {};
bool perform_harvesting_on_sdesc = false;
Expand Down
28 changes: 26 additions & 2 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ void Cluster::construct_cluster(
}
}
}

initialize_arc_communication();
}

std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(
Expand Down Expand Up @@ -3230,6 +3232,17 @@ void Cluster::broadcast_tensix_risc_reset_to_cluster(const TensixSoftResetOption
}
}

void Cluster::initialize_arc_communication() {
if (arch_name == tt::ARCH::BLACKHOLE) {
for (auto& chip : all_chip_ids_) {
bh_arc_msg_queues.insert(
{chip,
BlackholeArcMessageQueue::get_blackhole_arc_message_queue(
this, chip, BlackholeArcMessageQueueIndex::APPLICATION)});
}
}
}

void Cluster::set_power_state(tt_DevicePowerState device_state) {
// MT Initial BH - ARC messages not supported in Blackhole
if (arch_name != tt::ARCH::BLACKHOLE) {
Expand All @@ -3242,6 +3255,16 @@ void Cluster::set_power_state(tt_DevicePowerState device_state) {
exit_code == 0, "Failed to set power state to {} with exit code: {}", (int)device_state, exit_code);
}
}
} else {
for (auto& chip : all_chip_ids_) {
std::unique_ptr<BlackholeArcMessageQueue>& bh_arc_msg_queue = bh_arc_msg_queues.at(chip);

if (device_state == tt_DevicePowerState::BUSY) {
bh_arc_msg_queue->send_message(tt::umd::blackhole::ArcMessageType::AICLK_GO_BUSY);
} else {
bh_arc_msg_queue->send_message(tt::umd::blackhole::ArcMessageType::AICLK_GO_LONG_IDLE);
}
}
}
}

Expand Down Expand Up @@ -3302,9 +3325,10 @@ void Cluster::deassert_resets_and_set_power_state() {
}
enable_ethernet_queue(30);
}
// Set power state to busy
set_power_state(tt_DevicePowerState::BUSY);
}

// Set power state to busy
set_power_state(tt_DevicePowerState::BUSY);
}

void Cluster::verify_eth_fw() {
Expand Down
8 changes: 2 additions & 6 deletions tests/blackhole/test_cluster_bh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ TEST(SiliconDriverBH, CreateDestroy) {
// true,
// simulated_harvesting_masks);

// ASSERT_EQ(cluster.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting";

// for (const auto& chip : sdesc_per_chip) {
// ASSERT_EQ(chip.second.workers.size(), 48)
// << "Expected SOC descriptor with harvesting to have 48 workers for chip" << chip.first;
Expand Down Expand Up @@ -156,8 +154,6 @@ TEST(SiliconDriverBH, CreateDestroy) {
// false,
// simulated_harvesting_masks);

// ASSERT_EQ(cluster.using_harvested_soc_descriptors(), false)
// << "SOC descriptors should not be modified when harvesting is disabled";
// for (const auto& chip : sdesc_per_chip) {
// ASSERT_EQ(chip.second.workers.size(), 1) << "Expected 1x1 SOC descriptor to be unmodified by driver";
// }
Expand Down Expand Up @@ -767,8 +763,8 @@ TEST(SiliconDriverBH, DISABLED_VirtualCoordinateBroadcast) { // same problem as
tt_device_params default_params;
cluster.start_device(default_params);
auto eth_version = cluster.get_ethernet_fw_version();
bool virtual_bcast_supported =
(eth_version >= tt_version(6, 8, 0) || eth_version == tt_version(6, 7, 241)) && cluster.translation_tables_en;
bool virtual_bcast_supported = (eth_version >= tt_version(6, 8, 0) || eth_version == tt_version(6, 7, 241)) &&
cluster.get_soc_descriptor(*target_devices.begin()).noc_translation_id_enabled;
if (!virtual_bcast_supported) {
cluster.close_device();
GTEST_SKIP() << "SiliconDriverWH.VirtualCoordinateBroadcast skipped since ethernet version does not support "
Expand Down

0 comments on commit f08d12a

Please sign in to comment.