diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index f27f2305..51b0b904 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -7,6 +7,7 @@ #pragma once #include #include +#include #include #include #include @@ -689,15 +690,16 @@ class Cluster : public tt_device { * Simplest form, creates a cluster of all available devices on the system. * * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). - * @param skip_driver_allocs + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. * @param simulated_harvesting_masks Manually specify additional harvesting masks for the devices in the cluster. * The ones defined by the devices itself have to be used, they will be merged with the ones passed here. + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. */ Cluster( const uint32_t& num_host_mem_ch_per_mmio_device = 1, - const bool skip_driver_allocs = false, + const bool create_mock_chips = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); @@ -708,7 +710,7 @@ class Cluster : public tt_device { * * @param target_devices Devices to target. * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). - * @param skip_driver_allocs + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. * @param simulated_harvesting_masks Manually specify additional harvesting masks for the devices in the cluster. @@ -717,7 +719,7 @@ class Cluster : public tt_device { Cluster( const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device = 1, - const bool skip_driver_allocs = false, + const bool create_mock_chips = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); @@ -731,7 +733,7 @@ class Cluster : public tt_device { * harvesting info of the devices in the cluster. * @param target_devices Devices to target. * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). - * @param skip_driver_allocs + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. * @param simulated_harvesting_masks Manually specify additional harvesting masks for the devices in the cluster. @@ -741,38 +743,33 @@ class Cluster : public tt_device { const std::string& sdesc_path, const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device = 1, - const bool skip_driver_allocs = false, + const bool create_mock_chips = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); /** * Cluster constructor. - * This constructor offers maximal flexibility, allowing the user to pass manually created Chips. - * The user has to know what they are doing. - * TODO: Could fail if logical_ids not match the ones in cluster descriptor, while Cluster still uses cluster - * descriptor. + * This constructor can be used with custom cluster descriptor. If the cluster descriptor does not match the + * actual devices on the system, the constructor will throw an exception. If create_mock_chips is set to true, + * the constructor will create mock chips for the devices in the cluster descriptor. * - * @param chips Map of logical device ids to Chip instances. + * @param cluster_descriptor Cluster descriptor object based on which Cluster is going to be created. * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). - * @param skip_driver_allocs + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. - * @param simulated_harvesting_masks + * @param simulated_harvesting_masks Manually specify additional harvesting masks for the devices in the cluster. + * The ones defined by the devices itself have to be used, they will be merged with the ones passed here. */ Cluster( - std::unordered_map>& chips, + std::unique_ptr cluster_descriptor, const uint32_t& num_host_mem_ch_per_mmio_device = 1, - const bool skip_driver_allocs = false, + const bool create_mock_chips = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); - /** - * Cluster constructor which creates a cluster with Mock chips. - */ - static std::unique_ptr create_mock_cluster(); - // Existing API we want to keep. UMD is transitioning to use CoreCoord instead of tt_xy_pair. // This set of function shouldn't be removed even after the transition. // TODO: regroup the functions from this set into setup/teardown, runtime, and misc functions. @@ -996,6 +993,11 @@ class Cluster : public tt_device { const chip_id_t chip, const std::unordered_set& cores, const std::string& fallback_tlb); static std::unique_ptr create_cluster_descriptor(); + + static std::string serialize(); + + static std::filesystem::path serialize_to_file(); + // Destructor virtual ~Cluster(); @@ -1005,7 +1007,7 @@ class Cluster : public tt_device { void create_device( const std::set& target_mmio_device_ids, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources); void initialize_interprocess_mutexes(int logical_device_id, bool cleanup_mutexes_in_shm); void cleanup_shared_host_state(); @@ -1137,18 +1139,23 @@ class Cluster : public tt_device { // Helper functions for constructing the chips from the cluster descriptor. std::unique_ptr construct_chip_from_cluster( - chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, tt_SocDescriptor& soc_desc); + chip_id_t chip_id, + tt_ClusterDescriptor* cluster_desc, + tt_SocDescriptor& soc_desc, + const bool create_mock_chip = false); std::unique_ptr construct_chip_from_cluster( const std::string& soc_desc_path, chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks); + std::unordered_map& simulated_harvesting_masks, + const bool create_mock_chip = false); std::unique_ptr construct_chip_from_cluster( chip_id_t logical_device_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks); + std::unordered_map& simulated_harvesting_masks, + const bool create_mock_chip = false); void add_chip(chip_id_t chip_id, std::unique_ptr chip); HarvestingMasks get_harvesting_masks( chip_id_t chip_id, @@ -1174,7 +1181,7 @@ class Cluster : public tt_device { std::unordered_map& simulated_harvesting_masks); void construct_cluster( const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks); diff --git a/device/api/umd/device/tt_cluster_descriptor.h b/device/api/umd/device/tt_cluster_descriptor.h index 1700e5b7..c95b9c9f 100644 --- a/device/api/umd/device/tt_cluster_descriptor.h +++ b/device/api/umd/device/tt_cluster_descriptor.h @@ -122,4 +122,8 @@ class tt_ClusterDescriptor { chip_id_t local_chip, ethernet_channel_t local_ethernet_channel) const; void enable_all_devices(); + + std::string serialize() const; + + std::filesystem::path serialize_to_file() const; }; diff --git a/device/api/umd/device/types/cluster_descriptor_types.h b/device/api/umd/device/types/cluster_descriptor_types.h index 1f5e71ac..f1d94087 100644 --- a/device/api/umd/device/types/cluster_descriptor_types.h +++ b/device/api/umd/device/types/cluster_descriptor_types.h @@ -11,6 +11,7 @@ #include #include +#include "fmt/core.h" #include "umd/device/types/harvesting.h" // Small performant hash combiner taken from boost library. @@ -51,6 +52,33 @@ enum BoardType : uint32_t { UNKNOWN, }; +inline std::string board_type_to_string(const BoardType board_type) { + switch (board_type) { + case BoardType::E75: + return "e75"; + case BoardType::E150: + return "e150"; + case BoardType::E300: + return "e300"; + case BoardType::N150: + return "n150"; + case BoardType::N300: + return "n300"; + case BoardType::P100: + return "p100"; + case BoardType::P150: + return "p150"; + case BoardType::P300: + return "p300"; + case BoardType::GALAXY: + return "galaxy"; + case BoardType::UNKNOWN: + return "unknown"; + } + + throw std::runtime_error("Unknown board type passed for conversion to string."); +} + // TODO: add Wormhole and Grayskull board types to this function inline BoardType get_board_type_from_board_id(const uint64_t board_id) { uint64_t upi = (board_id >> 36) & 0xFFFFF; diff --git a/device/cluster.cpp b/device/cluster.cpp index bc7b5605..97a13bbb 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -36,6 +36,7 @@ #include #include +#include "api/umd/device/cluster.h" #include "api/umd/device/tt_core_coordinates.h" #include "logger.hpp" #include "umd/device/architecture_implementation.h" @@ -195,7 +196,7 @@ void Cluster::initialize_interprocess_mutexes(int logical_device_id, bool cleanu void Cluster::create_device( const std::set& target_mmio_device_ids, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources) { log_debug(LogSiliconDriver, "Cluster::Cluster"); @@ -206,40 +207,38 @@ void Cluster::create_device( target_mmio_device_ids.size() > 0, "Must provide set of target_mmio_device_ids to Cluster constructor now."); for (const chip_id_t& logical_device_id : target_mmio_device_ids) { - auto pci_device = get_tt_device(logical_device_id)->get_pci_device(); - - int num_host_mem_channels = num_host_mem_ch_per_mmio_device; - - // TODO: get rid of this when the following Metal CI issue is resolved. - // https://github.com/tenstorrent/tt-metal/issues/15675 - // The notion that we should clamp the number of host mem channels to - // what we have available and emit a warning is wrong, since the - // application might try to use the channels it asked for. We should - // just fail early since the error message will be actionable instead of - // a segfault or memory corruption. - if (!pci_device->is_iommu_enabled()) { - uint16_t pcie_device_id = pci_device->get_pci_device_id(); - uint32_t pcie_revision = pci_device->get_pci_revision(); - num_host_mem_channels = - get_available_num_host_mem_channels(num_host_mem_ch_per_mmio_device, pcie_device_id, pcie_revision); - } + if (!create_mock_chips) { + auto pci_device = get_tt_device(logical_device_id)->get_pci_device(); + + int num_host_mem_channels = num_host_mem_ch_per_mmio_device; + + // TODO: get rid of this when the following Metal CI issue is resolved. + // https://github.com/tenstorrent/tt-metal/issues/15675 + // The notion that we should clamp the number of host mem channels to + // what we have available and emit a warning is wrong, since the + // application might try to use the channels it asked for. We should + // just fail early since the error message will be actionable instead of + // a segfault or memory corruption. + if (!pci_device->is_iommu_enabled()) { + uint16_t pcie_device_id = pci_device->get_pci_device_id(); + uint32_t pcie_revision = pci_device->get_pci_revision(); + num_host_mem_channels = + get_available_num_host_mem_channels(num_host_mem_ch_per_mmio_device, pcie_device_id, pcie_revision); + } - log_debug( - LogSiliconDriver, - "Using {} Hugepages/NumHostMemChannels for PCIDevice (logical_device_id: {} pci_interface_id: {} " - "device_id: 0x{:x} revision: {})", - num_host_mem_channels, - logical_device_id, - pci_device->get_device_num(), - pci_device->get_device_num(), - pci_device->revision_id); + log_debug( + LogSiliconDriver, + "Using {} Hugepages/NumHostMemChannels for PCIDevice (logical_device_id: {} pci_interface_id: {} " + "device_id: 0x{:x} revision: {})", + num_host_mem_channels, + logical_device_id, + pci_device->get_device_num(), + pci_device->get_device_num(), + pci_device->revision_id); - // TODO: This will be moved to a dedicated Locking class. - initialize_interprocess_mutexes(logical_device_id, clean_system_resources); + // TODO: This will be moved to a dedicated Locking class. + initialize_interprocess_mutexes(logical_device_id, clean_system_resources); - // MT: Initial BH - hugepages will fail init - // For using silicon driver without workload to query mission mode params, no need for hugepage. - if (!skip_driver_allocs) { bool hugepages_initialized = pci_device->init_hugepage(num_host_mem_channels); // Large writes to remote chips require hugepages to be initialized. // Conservative assert - end workload if remote chips present but hugepages not initialized (failures caused @@ -253,6 +252,7 @@ void Cluster::create_device( log_warning(LogSiliconDriver, "No hugepage mapping at device {}.", logical_device_id); } } + // translation layer for harvested coords. Default is identity map harvested_coord_translation.insert({logical_device_id, create_harvested_coord_translation(arch_name, true)}); } @@ -280,11 +280,11 @@ std::unordered_map Cluster::get_harvesting_masks_for_soc_de void Cluster::construct_cluster( const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { - if (!skip_driver_allocs) { + if (!create_mock_chips) { auto available_device_ids = detect_available_device_ids(); log_info(LogSiliconDriver, "Detected PCI devices: {}", available_device_ids); log_info( @@ -293,7 +293,7 @@ void Cluster::construct_cluster( perform_harvesting_on_sdesc = perform_harvesting; - create_device(local_chip_ids_, num_host_mem_ch_per_mmio_device, skip_driver_allocs, clean_system_resources); + create_device(local_chip_ids_, num_host_mem_ch_per_mmio_device, create_mock_chips, clean_system_resources); // Disable dependency to ethernet firmware for all BH devices and WH devices with all chips having MMIO (e.g. UBB // Galaxy), do not disable for N150, was seeing some issues in CI @@ -355,7 +355,12 @@ void Cluster::construct_cluster( } else if (arch_name == tt::ARCH::GRAYSKULL) { // Multichip harvesting is supported for GS. for (auto chip_id = all_chip_ids_.begin(); chip_id != all_chip_ids_.end(); chip_id++) { - harvested_rows_per_target[*chip_id] = get_harvested_noc_rows_for_chip(*chip_id); + if (create_mock_chips) { + harvested_rows_per_target[*chip_id] = + get_harvested_noc_rows((uint32_t)(cluster_desc->get_harvesting_info().at(*chip_id))); + } else { + harvested_rows_per_target[*chip_id] = get_harvested_noc_rows_for_chip(*chip_id); + } num_rows_harvested.insert({*chip_id, 0}); // Only set for broadcast TLB to get RISCS out of reset. We want // all rows to have a reset signal sent. if (harvested_rows_per_target[*chip_id]) { @@ -427,7 +432,11 @@ void Cluster::construct_cluster( } std::unique_ptr Cluster::construct_chip_from_cluster( - chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, tt_SocDescriptor& soc_desc) { + chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, tt_SocDescriptor& soc_desc, const bool create_mock_chip) { + if (create_mock_chip) { + return std::make_unique(soc_desc); + } + if (cluster_desc->is_chip_mmio_capable(chip_id)) { return std::make_unique(soc_desc, cluster_desc->get_chips_with_mmio().at(chip_id)); } else { @@ -440,24 +449,26 @@ std::unique_ptr Cluster::construct_chip_from_cluster( chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks) { + std::unordered_map& simulated_harvesting_masks, + const bool create_mock_chip) { HarvestingMasks harvesting_masks = get_harvesting_masks(chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks); tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, cluster_desc->get_noc_translation_table_en().at(chip_id), harvesting_masks); - return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc); + return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc, create_mock_chip); } std::unique_ptr Cluster::construct_chip_from_cluster( chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks) { + std::unordered_map& simulated_harvesting_masks, + const bool create_mock_chip) { tt::ARCH arch = cluster_desc->get_arch(chip_id); const BoardType chip_board_type = cluster_desc->get_board_type(chip_id); std::string soc_desc_path = tt_SocDescriptor::get_soc_descriptor_path(arch, chip_board_type); return construct_chip_from_cluster( - soc_desc_path, chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks); + soc_desc_path, chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks, create_mock_chip); } void Cluster::add_chip(chip_id_t chip_id, std::unique_ptr chip) { @@ -466,7 +477,7 @@ void Cluster::add_chip(chip_id_t chip_id, std::unique_ptr chip) { "Chip with id {} already exists in cluster. Cannot add another chip with the same id.", chip_id); all_chip_ids_.insert(chip_id); - if (chip->is_mmio_capable()) { + if (cluster_desc->is_chip_mmio_capable(chip_id)) { local_chip_ids_.insert(chip_id); } else { remote_chip_ids_.insert(chip_id); @@ -550,7 +561,7 @@ HarvestingMasks Cluster::get_harvesting_masks( Cluster::Cluster( const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { @@ -559,7 +570,8 @@ Cluster::Cluster( for (auto& chip_id : cluster_desc->get_all_chips()) { add_chip( chip_id, - construct_chip_from_cluster(chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks)); + construct_chip_from_cluster( + chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks, create_mock_chips)); } // TODO: work on removing this member altogether. Currently assumes all have the same arch. @@ -567,7 +579,7 @@ Cluster::Cluster( construct_cluster( num_host_mem_ch_per_mmio_device, - skip_driver_allocs, + create_mock_chips, clean_system_resources, perform_harvesting, simulated_harvesting_masks); @@ -576,7 +588,7 @@ Cluster::Cluster( Cluster::Cluster( const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { @@ -589,7 +601,8 @@ Cluster::Cluster( chip_id); add_chip( chip_id, - construct_chip_from_cluster(chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks)); + construct_chip_from_cluster( + chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks, create_mock_chips)); } // TODO: work on removing this member altogether. Currently assumes all have the same arch. @@ -597,7 +610,7 @@ Cluster::Cluster( construct_cluster( num_host_mem_ch_per_mmio_device, - skip_driver_allocs, + create_mock_chips, clean_system_resources, perform_harvesting, simulated_harvesting_masks); @@ -607,7 +620,7 @@ Cluster::Cluster( const std::string& sdesc_path, const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { @@ -621,7 +634,12 @@ Cluster::Cluster( add_chip( chip_id, construct_chip_from_cluster( - sdesc_path, chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks)); + sdesc_path, + chip_id, + cluster_desc.get(), + perform_harvesting, + simulated_harvesting_masks, + create_mock_chips)); log_assert( cluster_desc->get_arch(chip_id) == chips_.at(chip_id)->get_soc_descriptor().arch, "Passed soc descriptor has {} arch, but for chip id {} has arch {}", @@ -635,23 +653,26 @@ Cluster::Cluster( construct_cluster( num_host_mem_ch_per_mmio_device, - skip_driver_allocs, + create_mock_chips, clean_system_resources, perform_harvesting, simulated_harvesting_masks); } Cluster::Cluster( - std::unordered_map>& chips, + std::unique_ptr cluster_descriptor, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, - const std::unordered_map simulated_harvesting_masks) { - cluster_desc = Cluster::create_cluster_descriptor(); + std::unordered_map simulated_harvesting_masks) { + cluster_desc = std::move(cluster_descriptor); - for (auto& [chip_id, chip] : chips) { - add_chip(chip_id, std::move(chip)); + for (auto& chip_id : cluster_desc->get_all_chips()) { + add_chip( + chip_id, + construct_chip_from_cluster( + chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks, create_mock_chips)); } // TODO: work on removing this member altogether. Currently assumes all have the same arch. @@ -659,30 +680,12 @@ Cluster::Cluster( construct_cluster( num_host_mem_ch_per_mmio_device, - skip_driver_allocs, + create_mock_chips, clean_system_resources, perform_harvesting, simulated_harvesting_masks); } -// TODO:This likely won't work well as long as cluster_descriptor is used throughout the code. -/* static */ std::unique_ptr Cluster::create_mock_cluster() { - // TBD how this would look like for simulated cluster. - // Arbitrary arch used for mock cluster. - // Note that this arch currently has an impact on some stuff in Cluster class, based on the produced cluster - // descriptor on the system. This should not be true in the future when we start taking stuff in Cluster from Chip - // rather than ClusterDescriptor. - tt::ARCH arch = tt::ARCH::GRAYSKULL; - chip_id_t mock_chip_id = 0; - tt_SocDescriptor soc_desc = - tt_SocDescriptor(tt_SocDescriptor::get_soc_descriptor_path(arch, BoardType::UNKNOWN), false); - std::unique_ptr chip = std::make_unique(soc_desc); - - std::unordered_map> chips; - chips.emplace(mock_chip_id, std::move(chip)); - return std::make_unique(chips); -} - void Cluster::configure_active_ethernet_cores_for_mmio_device( chip_id_t mmio_chip, const std::unordered_set& active_eth_cores_per_chip) { // Makes UMD aware of which ethernet cores have active links. @@ -3537,4 +3540,8 @@ std::unique_ptr Cluster::create_cluster_descriptor( return desc; } +std::string Cluster::serialize() { return Cluster::create_cluster_descriptor()->serialize(); } + +std::filesystem::path Cluster::serialize_to_file() { return Cluster::create_cluster_descriptor()->serialize_to_file(); } + } // namespace tt::umd diff --git a/device/tt_cluster_descriptor.cpp b/device/tt_cluster_descriptor.cpp index 3a5f1d56..b4730717 100644 --- a/device/tt_cluster_descriptor.cpp +++ b/device/tt_cluster_descriptor.cpp @@ -789,6 +789,13 @@ void tt_ClusterDescriptor::load_harvesting_information(YAML::Node &yaml, tt_Clus void tt_ClusterDescriptor::enable_all_devices() { this->enabled_active_chips = this->all_chips; } void tt_ClusterDescriptor::fill_chips_grouped_by_closest_mmio() { + // TODO: remote ethernet coordinates if new eth fw is ported for back Wormhole. + // For newer topologies every chip will have a direct connection to MMIO chip, so there won't be + // ethernet coordinates, represented by chip locations, to calculate the closest MMIO chip. + if (this->chip_locations.empty()) { + return; + } + for (const auto &chip : this->all_chips) { // This will also fill up the closest_mmio_chip_cache chip_id_t closest_mmio_chip = get_closest_mmio_capable_chip(chip); @@ -896,3 +903,78 @@ tt_ClusterDescriptor::get_chips_grouped_by_closest_mmio() const { } chip_id_t tt_ClusterDescriptor::get_chip_id(const ChipUID &chip_uid) const { return chip_uid_to_chip_id.at(chip_uid); } + +std::string tt_ClusterDescriptor::serialize() const { + YAML::Emitter out; + + out << YAML::BeginMap; + + // Section: arch + out << YAML::Key << "arch" << YAML::Value << YAML::BeginMap; + for (const auto &[chip_id, arch] : chip_arch) { + out << YAML::Key << chip_id << YAML::Value << tt::arch_to_str(arch); + } + out << YAML::EndMap; + + // Section: ethernet_connections + out << YAML::Key << "ethernet_connections" << YAML::Value << YAML::BeginSeq; + std::set> serialized_connections; + for (const auto &[src_chip, channels] : ethernet_connections) { + for (const auto &[src_chan, dest] : channels) { + if (serialized_connections.find({src_chip, src_chan}) != serialized_connections.end()) { + continue; + } + auto [dest_chip, dest_chan] = dest; + serialized_connections.insert({dest_chip, dest_chan}); + out << YAML::BeginSeq; + out << YAML::BeginMap << YAML::Key << "chip" << YAML::Value << src_chip << YAML::Key << "chan" + << YAML::Value << src_chan << YAML::EndMap; + out << YAML::BeginMap << YAML::Key << "chip" << YAML::Value << dest_chip << YAML::Key << "chan" + << YAML::Value << dest_chan << YAML::EndMap; + out << YAML::EndSeq; + } + } + + out << YAML::EndSeq; + + // Section: chips_with_mmio + out << YAML::Key << "chips_with_mmio" << YAML::Value << YAML::BeginSeq; + for (const auto &chip_with_mmio : chips_with_mmio) { + out << YAML::BeginMap << YAML::Key << chip_with_mmio.first << YAML::Value << chip_with_mmio.second + << YAML::EndMap; + } + out << YAML::EndSeq; + + // Section: harvesting + out << YAML::Key << "harvesting" << YAML::Value << YAML::BeginMap; + for (const int &chip : all_chips) { + out << YAML::Key << chip << YAML::Value << YAML::BeginMap; + out << YAML::Key << "noc_translation" << YAML::Value << noc_translation_enabled.at(chip); + out << YAML::Key << "harvest_mask" << YAML::Value << harvesting_masks.at(chip); + out << YAML::EndMap; + } + out << YAML::EndMap; + + // Section: boardtype + out << YAML::Key << "boardtype" << YAML::Value << YAML::BeginMap; + for (const int &chip : all_chips) { + out << YAML::Key << chip << YAML::Value << board_type_to_string(chip_board_type.at(chip)); + } + out << YAML::EndMap; + + out << YAML::EndMap; + + return out.c_str(); +} + +std::filesystem::path tt_ClusterDescriptor::serialize_to_file() const { + std::filesystem::path temp_path = std::filesystem::temp_directory_path(); + std::string cluster_path_dir_template = temp_path / "umd_XXXXXX"; + std::filesystem::path cluster_path_dir = mkdtemp(cluster_path_dir_template.data()); + std::filesystem::path cluster_path = cluster_path_dir / "cluster_descriptor.yaml"; + std::ofstream file(cluster_path); + file << serialize(); + file.close(); + + return cluster_path; +} diff --git a/tests/api/test_cluster.cpp b/tests/api/test_cluster.cpp index 6abe5146..f86f78dc 100644 --- a/tests/api/test_cluster.cpp +++ b/tests/api/test_cluster.cpp @@ -71,12 +71,12 @@ TEST(ApiClusterTest, DifferentConstructors) { umd_cluster = std::make_unique(sdesc_path, target_devices); umd_cluster = nullptr; - // TODO: This doesn't work at the moment. - // It will start working when we move enough stuff to the chips. At the moment this was disabled, it was mostly due - // to harvesting info. - // // 4. Constructor for creating a cluster with mock chip. - // umd_cluster = Cluster::create_mock_cluster(); - // umd_cluster = nullptr; + // 4. Constructor taking cluster descriptor based on which to create cluster. + // Create mock chips is set to true in order to create mock chips for the devices in the cluster descriptor. + std::filesystem::path cluster_path = tt::umd::Cluster::serialize_to_file(); + std::unordered_map simulated_harvesting_masks = {}; + std::unique_ptr cluster = std::make_unique( + tt_ClusterDescriptor::create_from_yaml(cluster_path), 1, true, false, true, simulated_harvesting_masks); } TEST(ApiClusterTest, SimpleIOAllChips) {