From 472ef2f9d780453dcb9b6a3436fbae841af4cd1b Mon Sep 17 00:00:00 2001 From: Pavle Janevski <165378935+pjanevskiTT@users.noreply.github.com> Date: Tue, 25 Feb 2025 14:42:53 +0100 Subject: [PATCH] Serialize cluster into yaml (#499) ### Issue Solving part of #431 ### Description Add a function to serialize cluster to the yaml (reverse of what we were doing so far). Add flag to all Cluster constructors to create mock chips inside Cluster if user wants that. Expose this (de)serializing through the API as mentioned in the issue above. Output yaml look like (note the styling difference than original, but in YAML world it is the same) ``` arch: 1: wormhole_b0 0: wormhole_b0 ethernet_connections: - - chip: 1 chan: 1 - chip: 0 chan: 9 - - chip: 1 chan: 0 - chip: 0 chan: 8 chips_with_mmio: - 0: 0 harvesting: 1: noc_translation: true harvest_mask: 513 0: noc_translation: true harvest_mask: 129 boardtype: 1: n300 0: n300 ``` - Add serialize function to Cluster - Add serialize function cluste descriptor - Add flag to all Cluster constructors to create mock chips --- device/api/umd/device/cluster.h | 57 ++++--- device/api/umd/device/tt_cluster_descriptor.h | 4 + .../device/types/cluster_descriptor_types.h | 28 +++ device/cluster.cpp | 159 +++++++++--------- device/tt_cluster_descriptor.cpp | 82 +++++++++ tests/api/test_cluster.cpp | 12 +- 6 files changed, 235 insertions(+), 107 deletions(-) diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index f27f2305..51b0b904 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -7,6 +7,7 @@ #pragma once #include #include +#include #include #include #include @@ -689,15 +690,16 @@ class Cluster : public tt_device { * Simplest form, creates a cluster of all available devices on the system. * * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). - * @param skip_driver_allocs + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. * @param simulated_harvesting_masks Manually specify additional harvesting masks for the devices in the cluster. * The ones defined by the devices itself have to be used, they will be merged with the ones passed here. + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. */ Cluster( const uint32_t& num_host_mem_ch_per_mmio_device = 1, - const bool skip_driver_allocs = false, + const bool create_mock_chips = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); @@ -708,7 +710,7 @@ class Cluster : public tt_device { * * @param target_devices Devices to target. * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). - * @param skip_driver_allocs + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. * @param simulated_harvesting_masks Manually specify additional harvesting masks for the devices in the cluster. @@ -717,7 +719,7 @@ class Cluster : public tt_device { Cluster( const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device = 1, - const bool skip_driver_allocs = false, + const bool create_mock_chips = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); @@ -731,7 +733,7 @@ class Cluster : public tt_device { * harvesting info of the devices in the cluster. * @param target_devices Devices to target. * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). - * @param skip_driver_allocs + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. * @param simulated_harvesting_masks Manually specify additional harvesting masks for the devices in the cluster. @@ -741,38 +743,33 @@ class Cluster : public tt_device { const std::string& sdesc_path, const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device = 1, - const bool skip_driver_allocs = false, + const bool create_mock_chips = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); /** * Cluster constructor. - * This constructor offers maximal flexibility, allowing the user to pass manually created Chips. - * The user has to know what they are doing. - * TODO: Could fail if logical_ids not match the ones in cluster descriptor, while Cluster still uses cluster - * descriptor. + * This constructor can be used with custom cluster descriptor. If the cluster descriptor does not match the + * actual devices on the system, the constructor will throw an exception. If create_mock_chips is set to true, + * the constructor will create mock chips for the devices in the cluster descriptor. * - * @param chips Map of logical device ids to Chip instances. + * @param cluster_descriptor Cluster descriptor object based on which Cluster is going to be created. * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). - * @param skip_driver_allocs + * @param create_mock_chips Create mock chips for the devices in the cluster descriptor. * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. - * @param simulated_harvesting_masks + * @param simulated_harvesting_masks Manually specify additional harvesting masks for the devices in the cluster. + * The ones defined by the devices itself have to be used, they will be merged with the ones passed here. */ Cluster( - std::unordered_map>& chips, + std::unique_ptr cluster_descriptor, const uint32_t& num_host_mem_ch_per_mmio_device = 1, - const bool skip_driver_allocs = false, + const bool create_mock_chips = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); - /** - * Cluster constructor which creates a cluster with Mock chips. - */ - static std::unique_ptr create_mock_cluster(); - // Existing API we want to keep. UMD is transitioning to use CoreCoord instead of tt_xy_pair. // This set of function shouldn't be removed even after the transition. // TODO: regroup the functions from this set into setup/teardown, runtime, and misc functions. @@ -996,6 +993,11 @@ class Cluster : public tt_device { const chip_id_t chip, const std::unordered_set& cores, const std::string& fallback_tlb); static std::unique_ptr create_cluster_descriptor(); + + static std::string serialize(); + + static std::filesystem::path serialize_to_file(); + // Destructor virtual ~Cluster(); @@ -1005,7 +1007,7 @@ class Cluster : public tt_device { void create_device( const std::set& target_mmio_device_ids, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources); void initialize_interprocess_mutexes(int logical_device_id, bool cleanup_mutexes_in_shm); void cleanup_shared_host_state(); @@ -1137,18 +1139,23 @@ class Cluster : public tt_device { // Helper functions for constructing the chips from the cluster descriptor. std::unique_ptr construct_chip_from_cluster( - chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, tt_SocDescriptor& soc_desc); + chip_id_t chip_id, + tt_ClusterDescriptor* cluster_desc, + tt_SocDescriptor& soc_desc, + const bool create_mock_chip = false); std::unique_ptr construct_chip_from_cluster( const std::string& soc_desc_path, chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks); + std::unordered_map& simulated_harvesting_masks, + const bool create_mock_chip = false); std::unique_ptr construct_chip_from_cluster( chip_id_t logical_device_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks); + std::unordered_map& simulated_harvesting_masks, + const bool create_mock_chip = false); void add_chip(chip_id_t chip_id, std::unique_ptr chip); HarvestingMasks get_harvesting_masks( chip_id_t chip_id, @@ -1174,7 +1181,7 @@ class Cluster : public tt_device { std::unordered_map& simulated_harvesting_masks); void construct_cluster( const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks); diff --git a/device/api/umd/device/tt_cluster_descriptor.h b/device/api/umd/device/tt_cluster_descriptor.h index 1700e5b7..c95b9c9f 100644 --- a/device/api/umd/device/tt_cluster_descriptor.h +++ b/device/api/umd/device/tt_cluster_descriptor.h @@ -122,4 +122,8 @@ class tt_ClusterDescriptor { chip_id_t local_chip, ethernet_channel_t local_ethernet_channel) const; void enable_all_devices(); + + std::string serialize() const; + + std::filesystem::path serialize_to_file() const; }; diff --git a/device/api/umd/device/types/cluster_descriptor_types.h b/device/api/umd/device/types/cluster_descriptor_types.h index 1f5e71ac..f1d94087 100644 --- a/device/api/umd/device/types/cluster_descriptor_types.h +++ b/device/api/umd/device/types/cluster_descriptor_types.h @@ -11,6 +11,7 @@ #include #include +#include "fmt/core.h" #include "umd/device/types/harvesting.h" // Small performant hash combiner taken from boost library. @@ -51,6 +52,33 @@ enum BoardType : uint32_t { UNKNOWN, }; +inline std::string board_type_to_string(const BoardType board_type) { + switch (board_type) { + case BoardType::E75: + return "e75"; + case BoardType::E150: + return "e150"; + case BoardType::E300: + return "e300"; + case BoardType::N150: + return "n150"; + case BoardType::N300: + return "n300"; + case BoardType::P100: + return "p100"; + case BoardType::P150: + return "p150"; + case BoardType::P300: + return "p300"; + case BoardType::GALAXY: + return "galaxy"; + case BoardType::UNKNOWN: + return "unknown"; + } + + throw std::runtime_error("Unknown board type passed for conversion to string."); +} + // TODO: add Wormhole and Grayskull board types to this function inline BoardType get_board_type_from_board_id(const uint64_t board_id) { uint64_t upi = (board_id >> 36) & 0xFFFFF; diff --git a/device/cluster.cpp b/device/cluster.cpp index bc7b5605..97a13bbb 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -36,6 +36,7 @@ #include #include +#include "api/umd/device/cluster.h" #include "api/umd/device/tt_core_coordinates.h" #include "logger.hpp" #include "umd/device/architecture_implementation.h" @@ -195,7 +196,7 @@ void Cluster::initialize_interprocess_mutexes(int logical_device_id, bool cleanu void Cluster::create_device( const std::set& target_mmio_device_ids, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources) { log_debug(LogSiliconDriver, "Cluster::Cluster"); @@ -206,40 +207,38 @@ void Cluster::create_device( target_mmio_device_ids.size() > 0, "Must provide set of target_mmio_device_ids to Cluster constructor now."); for (const chip_id_t& logical_device_id : target_mmio_device_ids) { - auto pci_device = get_tt_device(logical_device_id)->get_pci_device(); - - int num_host_mem_channels = num_host_mem_ch_per_mmio_device; - - // TODO: get rid of this when the following Metal CI issue is resolved. - // https://github.com/tenstorrent/tt-metal/issues/15675 - // The notion that we should clamp the number of host mem channels to - // what we have available and emit a warning is wrong, since the - // application might try to use the channels it asked for. We should - // just fail early since the error message will be actionable instead of - // a segfault or memory corruption. - if (!pci_device->is_iommu_enabled()) { - uint16_t pcie_device_id = pci_device->get_pci_device_id(); - uint32_t pcie_revision = pci_device->get_pci_revision(); - num_host_mem_channels = - get_available_num_host_mem_channels(num_host_mem_ch_per_mmio_device, pcie_device_id, pcie_revision); - } + if (!create_mock_chips) { + auto pci_device = get_tt_device(logical_device_id)->get_pci_device(); + + int num_host_mem_channels = num_host_mem_ch_per_mmio_device; + + // TODO: get rid of this when the following Metal CI issue is resolved. + // https://github.com/tenstorrent/tt-metal/issues/15675 + // The notion that we should clamp the number of host mem channels to + // what we have available and emit a warning is wrong, since the + // application might try to use the channels it asked for. We should + // just fail early since the error message will be actionable instead of + // a segfault or memory corruption. + if (!pci_device->is_iommu_enabled()) { + uint16_t pcie_device_id = pci_device->get_pci_device_id(); + uint32_t pcie_revision = pci_device->get_pci_revision(); + num_host_mem_channels = + get_available_num_host_mem_channels(num_host_mem_ch_per_mmio_device, pcie_device_id, pcie_revision); + } - log_debug( - LogSiliconDriver, - "Using {} Hugepages/NumHostMemChannels for PCIDevice (logical_device_id: {} pci_interface_id: {} " - "device_id: 0x{:x} revision: {})", - num_host_mem_channels, - logical_device_id, - pci_device->get_device_num(), - pci_device->get_device_num(), - pci_device->revision_id); + log_debug( + LogSiliconDriver, + "Using {} Hugepages/NumHostMemChannels for PCIDevice (logical_device_id: {} pci_interface_id: {} " + "device_id: 0x{:x} revision: {})", + num_host_mem_channels, + logical_device_id, + pci_device->get_device_num(), + pci_device->get_device_num(), + pci_device->revision_id); - // TODO: This will be moved to a dedicated Locking class. - initialize_interprocess_mutexes(logical_device_id, clean_system_resources); + // TODO: This will be moved to a dedicated Locking class. + initialize_interprocess_mutexes(logical_device_id, clean_system_resources); - // MT: Initial BH - hugepages will fail init - // For using silicon driver without workload to query mission mode params, no need for hugepage. - if (!skip_driver_allocs) { bool hugepages_initialized = pci_device->init_hugepage(num_host_mem_channels); // Large writes to remote chips require hugepages to be initialized. // Conservative assert - end workload if remote chips present but hugepages not initialized (failures caused @@ -253,6 +252,7 @@ void Cluster::create_device( log_warning(LogSiliconDriver, "No hugepage mapping at device {}.", logical_device_id); } } + // translation layer for harvested coords. Default is identity map harvested_coord_translation.insert({logical_device_id, create_harvested_coord_translation(arch_name, true)}); } @@ -280,11 +280,11 @@ std::unordered_map Cluster::get_harvesting_masks_for_soc_de void Cluster::construct_cluster( const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { - if (!skip_driver_allocs) { + if (!create_mock_chips) { auto available_device_ids = detect_available_device_ids(); log_info(LogSiliconDriver, "Detected PCI devices: {}", available_device_ids); log_info( @@ -293,7 +293,7 @@ void Cluster::construct_cluster( perform_harvesting_on_sdesc = perform_harvesting; - create_device(local_chip_ids_, num_host_mem_ch_per_mmio_device, skip_driver_allocs, clean_system_resources); + create_device(local_chip_ids_, num_host_mem_ch_per_mmio_device, create_mock_chips, clean_system_resources); // Disable dependency to ethernet firmware for all BH devices and WH devices with all chips having MMIO (e.g. UBB // Galaxy), do not disable for N150, was seeing some issues in CI @@ -355,7 +355,12 @@ void Cluster::construct_cluster( } else if (arch_name == tt::ARCH::GRAYSKULL) { // Multichip harvesting is supported for GS. for (auto chip_id = all_chip_ids_.begin(); chip_id != all_chip_ids_.end(); chip_id++) { - harvested_rows_per_target[*chip_id] = get_harvested_noc_rows_for_chip(*chip_id); + if (create_mock_chips) { + harvested_rows_per_target[*chip_id] = + get_harvested_noc_rows((uint32_t)(cluster_desc->get_harvesting_info().at(*chip_id))); + } else { + harvested_rows_per_target[*chip_id] = get_harvested_noc_rows_for_chip(*chip_id); + } num_rows_harvested.insert({*chip_id, 0}); // Only set for broadcast TLB to get RISCS out of reset. We want // all rows to have a reset signal sent. if (harvested_rows_per_target[*chip_id]) { @@ -427,7 +432,11 @@ void Cluster::construct_cluster( } std::unique_ptr Cluster::construct_chip_from_cluster( - chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, tt_SocDescriptor& soc_desc) { + chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, tt_SocDescriptor& soc_desc, const bool create_mock_chip) { + if (create_mock_chip) { + return std::make_unique(soc_desc); + } + if (cluster_desc->is_chip_mmio_capable(chip_id)) { return std::make_unique(soc_desc, cluster_desc->get_chips_with_mmio().at(chip_id)); } else { @@ -440,24 +449,26 @@ std::unique_ptr Cluster::construct_chip_from_cluster( chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks) { + std::unordered_map& simulated_harvesting_masks, + const bool create_mock_chip) { HarvestingMasks harvesting_masks = get_harvesting_masks(chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks); tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, cluster_desc->get_noc_translation_table_en().at(chip_id), harvesting_masks); - return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc); + return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc, create_mock_chip); } std::unique_ptr Cluster::construct_chip_from_cluster( chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks) { + std::unordered_map& simulated_harvesting_masks, + const bool create_mock_chip) { tt::ARCH arch = cluster_desc->get_arch(chip_id); const BoardType chip_board_type = cluster_desc->get_board_type(chip_id); std::string soc_desc_path = tt_SocDescriptor::get_soc_descriptor_path(arch, chip_board_type); return construct_chip_from_cluster( - soc_desc_path, chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks); + soc_desc_path, chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks, create_mock_chip); } void Cluster::add_chip(chip_id_t chip_id, std::unique_ptr chip) { @@ -466,7 +477,7 @@ void Cluster::add_chip(chip_id_t chip_id, std::unique_ptr chip) { "Chip with id {} already exists in cluster. Cannot add another chip with the same id.", chip_id); all_chip_ids_.insert(chip_id); - if (chip->is_mmio_capable()) { + if (cluster_desc->is_chip_mmio_capable(chip_id)) { local_chip_ids_.insert(chip_id); } else { remote_chip_ids_.insert(chip_id); @@ -550,7 +561,7 @@ HarvestingMasks Cluster::get_harvesting_masks( Cluster::Cluster( const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { @@ -559,7 +570,8 @@ Cluster::Cluster( for (auto& chip_id : cluster_desc->get_all_chips()) { add_chip( chip_id, - construct_chip_from_cluster(chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks)); + construct_chip_from_cluster( + chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks, create_mock_chips)); } // TODO: work on removing this member altogether. Currently assumes all have the same arch. @@ -567,7 +579,7 @@ Cluster::Cluster( construct_cluster( num_host_mem_ch_per_mmio_device, - skip_driver_allocs, + create_mock_chips, clean_system_resources, perform_harvesting, simulated_harvesting_masks); @@ -576,7 +588,7 @@ Cluster::Cluster( Cluster::Cluster( const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { @@ -589,7 +601,8 @@ Cluster::Cluster( chip_id); add_chip( chip_id, - construct_chip_from_cluster(chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks)); + construct_chip_from_cluster( + chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks, create_mock_chips)); } // TODO: work on removing this member altogether. Currently assumes all have the same arch. @@ -597,7 +610,7 @@ Cluster::Cluster( construct_cluster( num_host_mem_ch_per_mmio_device, - skip_driver_allocs, + create_mock_chips, clean_system_resources, perform_harvesting, simulated_harvesting_masks); @@ -607,7 +620,7 @@ Cluster::Cluster( const std::string& sdesc_path, const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { @@ -621,7 +634,12 @@ Cluster::Cluster( add_chip( chip_id, construct_chip_from_cluster( - sdesc_path, chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks)); + sdesc_path, + chip_id, + cluster_desc.get(), + perform_harvesting, + simulated_harvesting_masks, + create_mock_chips)); log_assert( cluster_desc->get_arch(chip_id) == chips_.at(chip_id)->get_soc_descriptor().arch, "Passed soc descriptor has {} arch, but for chip id {} has arch {}", @@ -635,23 +653,26 @@ Cluster::Cluster( construct_cluster( num_host_mem_ch_per_mmio_device, - skip_driver_allocs, + create_mock_chips, clean_system_resources, perform_harvesting, simulated_harvesting_masks); } Cluster::Cluster( - std::unordered_map>& chips, + std::unique_ptr cluster_descriptor, const uint32_t& num_host_mem_ch_per_mmio_device, - const bool skip_driver_allocs, + const bool create_mock_chips, const bool clean_system_resources, bool perform_harvesting, - const std::unordered_map simulated_harvesting_masks) { - cluster_desc = Cluster::create_cluster_descriptor(); + std::unordered_map simulated_harvesting_masks) { + cluster_desc = std::move(cluster_descriptor); - for (auto& [chip_id, chip] : chips) { - add_chip(chip_id, std::move(chip)); + for (auto& chip_id : cluster_desc->get_all_chips()) { + add_chip( + chip_id, + construct_chip_from_cluster( + chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks, create_mock_chips)); } // TODO: work on removing this member altogether. Currently assumes all have the same arch. @@ -659,30 +680,12 @@ Cluster::Cluster( construct_cluster( num_host_mem_ch_per_mmio_device, - skip_driver_allocs, + create_mock_chips, clean_system_resources, perform_harvesting, simulated_harvesting_masks); } -// TODO:This likely won't work well as long as cluster_descriptor is used throughout the code. -/* static */ std::unique_ptr Cluster::create_mock_cluster() { - // TBD how this would look like for simulated cluster. - // Arbitrary arch used for mock cluster. - // Note that this arch currently has an impact on some stuff in Cluster class, based on the produced cluster - // descriptor on the system. This should not be true in the future when we start taking stuff in Cluster from Chip - // rather than ClusterDescriptor. - tt::ARCH arch = tt::ARCH::GRAYSKULL; - chip_id_t mock_chip_id = 0; - tt_SocDescriptor soc_desc = - tt_SocDescriptor(tt_SocDescriptor::get_soc_descriptor_path(arch, BoardType::UNKNOWN), false); - std::unique_ptr chip = std::make_unique(soc_desc); - - std::unordered_map> chips; - chips.emplace(mock_chip_id, std::move(chip)); - return std::make_unique(chips); -} - void Cluster::configure_active_ethernet_cores_for_mmio_device( chip_id_t mmio_chip, const std::unordered_set& active_eth_cores_per_chip) { // Makes UMD aware of which ethernet cores have active links. @@ -3537,4 +3540,8 @@ std::unique_ptr Cluster::create_cluster_descriptor( return desc; } +std::string Cluster::serialize() { return Cluster::create_cluster_descriptor()->serialize(); } + +std::filesystem::path Cluster::serialize_to_file() { return Cluster::create_cluster_descriptor()->serialize_to_file(); } + } // namespace tt::umd diff --git a/device/tt_cluster_descriptor.cpp b/device/tt_cluster_descriptor.cpp index 3a5f1d56..b4730717 100644 --- a/device/tt_cluster_descriptor.cpp +++ b/device/tt_cluster_descriptor.cpp @@ -789,6 +789,13 @@ void tt_ClusterDescriptor::load_harvesting_information(YAML::Node &yaml, tt_Clus void tt_ClusterDescriptor::enable_all_devices() { this->enabled_active_chips = this->all_chips; } void tt_ClusterDescriptor::fill_chips_grouped_by_closest_mmio() { + // TODO: remote ethernet coordinates if new eth fw is ported for back Wormhole. + // For newer topologies every chip will have a direct connection to MMIO chip, so there won't be + // ethernet coordinates, represented by chip locations, to calculate the closest MMIO chip. + if (this->chip_locations.empty()) { + return; + } + for (const auto &chip : this->all_chips) { // This will also fill up the closest_mmio_chip_cache chip_id_t closest_mmio_chip = get_closest_mmio_capable_chip(chip); @@ -896,3 +903,78 @@ tt_ClusterDescriptor::get_chips_grouped_by_closest_mmio() const { } chip_id_t tt_ClusterDescriptor::get_chip_id(const ChipUID &chip_uid) const { return chip_uid_to_chip_id.at(chip_uid); } + +std::string tt_ClusterDescriptor::serialize() const { + YAML::Emitter out; + + out << YAML::BeginMap; + + // Section: arch + out << YAML::Key << "arch" << YAML::Value << YAML::BeginMap; + for (const auto &[chip_id, arch] : chip_arch) { + out << YAML::Key << chip_id << YAML::Value << tt::arch_to_str(arch); + } + out << YAML::EndMap; + + // Section: ethernet_connections + out << YAML::Key << "ethernet_connections" << YAML::Value << YAML::BeginSeq; + std::set> serialized_connections; + for (const auto &[src_chip, channels] : ethernet_connections) { + for (const auto &[src_chan, dest] : channels) { + if (serialized_connections.find({src_chip, src_chan}) != serialized_connections.end()) { + continue; + } + auto [dest_chip, dest_chan] = dest; + serialized_connections.insert({dest_chip, dest_chan}); + out << YAML::BeginSeq; + out << YAML::BeginMap << YAML::Key << "chip" << YAML::Value << src_chip << YAML::Key << "chan" + << YAML::Value << src_chan << YAML::EndMap; + out << YAML::BeginMap << YAML::Key << "chip" << YAML::Value << dest_chip << YAML::Key << "chan" + << YAML::Value << dest_chan << YAML::EndMap; + out << YAML::EndSeq; + } + } + + out << YAML::EndSeq; + + // Section: chips_with_mmio + out << YAML::Key << "chips_with_mmio" << YAML::Value << YAML::BeginSeq; + for (const auto &chip_with_mmio : chips_with_mmio) { + out << YAML::BeginMap << YAML::Key << chip_with_mmio.first << YAML::Value << chip_with_mmio.second + << YAML::EndMap; + } + out << YAML::EndSeq; + + // Section: harvesting + out << YAML::Key << "harvesting" << YAML::Value << YAML::BeginMap; + for (const int &chip : all_chips) { + out << YAML::Key << chip << YAML::Value << YAML::BeginMap; + out << YAML::Key << "noc_translation" << YAML::Value << noc_translation_enabled.at(chip); + out << YAML::Key << "harvest_mask" << YAML::Value << harvesting_masks.at(chip); + out << YAML::EndMap; + } + out << YAML::EndMap; + + // Section: boardtype + out << YAML::Key << "boardtype" << YAML::Value << YAML::BeginMap; + for (const int &chip : all_chips) { + out << YAML::Key << chip << YAML::Value << board_type_to_string(chip_board_type.at(chip)); + } + out << YAML::EndMap; + + out << YAML::EndMap; + + return out.c_str(); +} + +std::filesystem::path tt_ClusterDescriptor::serialize_to_file() const { + std::filesystem::path temp_path = std::filesystem::temp_directory_path(); + std::string cluster_path_dir_template = temp_path / "umd_XXXXXX"; + std::filesystem::path cluster_path_dir = mkdtemp(cluster_path_dir_template.data()); + std::filesystem::path cluster_path = cluster_path_dir / "cluster_descriptor.yaml"; + std::ofstream file(cluster_path); + file << serialize(); + file.close(); + + return cluster_path; +} diff --git a/tests/api/test_cluster.cpp b/tests/api/test_cluster.cpp index 6abe5146..f86f78dc 100644 --- a/tests/api/test_cluster.cpp +++ b/tests/api/test_cluster.cpp @@ -71,12 +71,12 @@ TEST(ApiClusterTest, DifferentConstructors) { umd_cluster = std::make_unique(sdesc_path, target_devices); umd_cluster = nullptr; - // TODO: This doesn't work at the moment. - // It will start working when we move enough stuff to the chips. At the moment this was disabled, it was mostly due - // to harvesting info. - // // 4. Constructor for creating a cluster with mock chip. - // umd_cluster = Cluster::create_mock_cluster(); - // umd_cluster = nullptr; + // 4. Constructor taking cluster descriptor based on which to create cluster. + // Create mock chips is set to true in order to create mock chips for the devices in the cluster descriptor. + std::filesystem::path cluster_path = tt::umd::Cluster::serialize_to_file(); + std::unordered_map simulated_harvesting_masks = {}; + std::unique_ptr cluster = std::make_unique( + tt_ClusterDescriptor::create_from_yaml(cluster_path), 1, true, false, true, simulated_harvesting_masks); } TEST(ApiClusterTest, SimpleIOAllChips) {