Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check KMD version prior to querying device info #173

Merged
merged 4 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,7 @@ jobs:
- name: Run API tests
run: |
${{ env.TEST_OUTPUT_DIR }}/umd/api/api_tests
- name: Run MISC tests
run: |
${{ env.TEST_OUTPUT_DIR }}/umd/misc/umd_misc_tests
73 changes: 73 additions & 0 deletions common/semver.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <cstdint>
#include <sstream>
#include <string>

#include "fmt/format.h"

namespace tt::umd {

/**
* Based on Semantic Versioning 2.0.0 (https://semver.org/) but more permissive.
* TT-KMD reports version strings that are technically not semver compliant.
*/

class semver_t {
public:
const uint64_t major;
const uint64_t minor;
const uint64_t patch;

semver_t(uint64_t major, uint64_t minor, uint64_t patch) : major(major), minor(minor), patch(patch) {}

semver_t(const std::string& version_str) : semver_t(parse(version_str)) {}

bool operator<(const semver_t& other) const {
return std::tie(major, minor, patch) < std::tie(other.major, other.minor, other.patch);
}

bool operator>(const semver_t& other) const { return other < *this; }

bool operator==(const semver_t& other) const {
return std::tie(major, minor, patch) == std::tie(other.major, other.minor, other.patch);
}

bool operator!=(const semver_t& other) const { return !(*this == other); }

bool operator<=(const semver_t& other) const { return !(other < *this); }

bool operator>=(const semver_t& other) const { return !(*this < other); }

std::string to_string() const { return fmt::format("{}.{}.{}", major, minor, patch); }

private:
static semver_t parse(const std::string& version_str) {
std::istringstream iss(version_str);
std::string token;
uint64_t major = 0;
uint64_t minor = 0;
uint64_t patch = 0;

if (std::getline(iss, token, '.')) {
major = std::stoull(token);

if (std::getline(iss, token, '.')) {
minor = std::stoull(token);

if (std::getline(iss, token, '.')) {
patch = std::stoull(token);
}
}
}
return semver_t(major, minor, patch);
}
};

} // namespace tt::umd
20 changes: 19 additions & 1 deletion device/pcie/pci_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,10 @@ PCIDevice::PCIDevice(int pci_device_number, int logical_device_id) :
numa_node(read_sysfs<int>(info, "numa_node")),
revision(read_sysfs<int>(info, "revision")),
arch(detect_arch(info.device_id, revision)),
architecture_implementation(tt::umd::architecture_implementation::create(arch)) {
architecture_implementation(tt::umd::architecture_implementation::create(arch)),
kmd_version(read_kmd_version()) {
log_info(LogSiliconDriver, "Opened PCI device {}; KMD version: {}", pci_device_num, kmd_version.to_string());

struct {
tenstorrent_query_mappings query_mappings;
tenstorrent_mapping mapping_array[8];
Expand Down Expand Up @@ -809,3 +812,18 @@ void PCIDevice::print_file_contents(std::string filename, std::string hint) {
}
}
}

semver_t PCIDevice::read_kmd_version() {
static const std::string path = "/sys/module/tenstorrent/version";
std::ifstream file(path);

if (!file.is_open()) {
log_warning(LogSiliconDriver, "Failed to open file: {}", path);
return {0, 0, 0};
}

std::string version_str;
std::getline(file, version_str);

return semver_t(version_str);
}
11 changes: 10 additions & 1 deletion device/pcie/pci_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
#include <unordered_map>
#include <vector>

#include "common/semver.hpp"
#include "device/tlb.h"
#include "device/tt_arch_types.h"
#include "device/tt_cluster_descriptor_types.h"
#include "device/tt_xy_pair.h"
#include "fmt/format.h"

// TODO: this is used up in cluster.cpp but that logic ought to be
// lowered into the PCIDevice class since it is specific to PCIe cards.
Expand All @@ -31,7 +33,8 @@ constexpr unsigned int c_hang_read_value = 0xffffffffu;

namespace tt::umd {
class architecture_implementation;
}
struct semver_t;
} // namespace tt::umd

struct dynamic_tlb {
uint64_t bar_offset; // Offset that address is mapped to, within the PCI BAR.
Expand All @@ -57,6 +60,9 @@ struct PciDeviceInfo {
// onto this struct as methods? e.g. current_link_width etc.
};

// Do we want to put everything into this file into tt::umd namespace?
using tt::umd::semver_t;

class PCIDevice {
const std::string device_path; // Path to character device: /dev/tenstorrent/N
const int pci_device_num; // N in /dev/tenstorrent/N
Expand All @@ -66,6 +72,7 @@ class PCIDevice {
const int numa_node; // -1 if non-NUMA
const int revision; // PCI revision value from sysfs
const tt::ARCH arch; // e.g. Grayskull, Wormhole, Blackhole
const semver_t kmd_version; // KMD version
std::unique_ptr<tt::umd::architecture_implementation> architecture_implementation;

public:
Expand Down Expand Up @@ -230,5 +237,7 @@ class PCIDevice {
// For debug purposes when various stages fails.
void print_file_contents(std::string filename, std::string hint = "");

semver_t read_kmd_version();

std::vector<hugepage_mapping> hugepage_mapping_per_channel;
};
2 changes: 2 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/microbenchmark)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/api)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/pcie)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/misc)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/simulation)
if($ENV{ARCH_NAME} STREQUAL "wormhole_b0")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/wormhole)
Expand All @@ -82,4 +83,5 @@ add_custom_target(
simulation_tests
test_pcie_device
api_tests
umd_misc_tests
)
12 changes: 12 additions & 0 deletions tests/misc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
set(UMD_MISC_TESTS_SRCS test_semver.cpp)

add_executable(umd_misc_tests ${UMD_MISC_TESTS_SRCS})
target_link_libraries(umd_misc_tests PRIVATE test_common)
set_target_properties(
umd_misc_tests
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR}/test/umd/misc
OUTPUT_NAME
umd_misc_tests
)
80 changes: 80 additions & 0 deletions tests/misc/test_semver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#include <gtest/gtest.h>

#include <map>

#include "common/semver.hpp"

using tt::umd::semver_t;

TEST(Semver, Valid) {
const std::map<std::string, semver_t> valid_test_cases = {
{"1.29", semver_t(1, 29, 0)}, // technically invalid, but seen from TT-KMD
{"1.28-bh2", semver_t(1, 28, 0)}, // technically invalid, but seen from TT-KMD
{"0.0.4", semver_t(0, 0, 4)},
{"1.2.3", semver_t(1, 2, 3)},
{"10.20.30", semver_t(10, 20, 30)},
{"1.1.2-prerelease+meta", semver_t(1, 1, 2)},
{"1.1.2+meta", semver_t(1, 1, 2)},
{"1.1.2+meta-valid", semver_t(1, 1, 2)},
{"1.0.0-alpha", semver_t(1, 0, 0)},
{"1.0.0-beta", semver_t(1, 0, 0)},
{"1.0.0-alpha.beta", semver_t(1, 0, 0)},
{"1.0.0-alpha.beta.1", semver_t(1, 0, 0)},
{"1.0.0-alpha.1", semver_t(1, 0, 0)},
{"1.0.0-alpha0.valid", semver_t(1, 0, 0)},
{"1.0.0-alpha.0valid", semver_t(1, 0, 0)},
{"1.0.0-alpha-a.b-c-somethinglong+build.1-aef.1-its-okay", semver_t(1, 0, 0)},
{"1.0.0-rc.1+build.1", semver_t(1, 0, 0)},
{"2.0.0-rc.1+build.123", semver_t(2, 0, 0)},
{"1.2.3-beta", semver_t(1, 2, 3)},
{"10.2.3-DEV-SNAPSHOT", semver_t(10, 2, 3)},
{"1.2.3-SNAPSHOT-123", semver_t(1, 2, 3)},
{"1.0.0", semver_t(1, 0, 0)},
{"2.0.0", semver_t(2, 0, 0)},
{"1.1.7", semver_t(1, 1, 7)},
{"2.0.0+build.1848", semver_t(2, 0, 0)},
{"2.0.1-alpha.1227", semver_t(2, 0, 1)},
{"1.0.0-alpha+beta", semver_t(1, 0, 0)},
{"1.2.3----RC-SNAPSHOT.12.9.1--.12+788", semver_t(1, 2, 3)},
{"1.2.3----R-S.12.9.1--.12+meta", semver_t(1, 2, 3)},
{"1.2.3----RC-SNAPSHOT.12.9.1--.12", semver_t(1, 2, 3)},
{"1.0.0+0.build.1-rc.10000aaa-kk-0.1", semver_t(1, 0, 0)},
{"1.0.0-0A.is.legal", semver_t(1, 0, 0)}};

for (const auto &[version_str, expected] : valid_test_cases) {
semver_t actual(version_str);
EXPECT_EQ(actual.major, expected.major);
EXPECT_EQ(actual.minor, expected.minor);
EXPECT_EQ(actual.patch, expected.patch);
}
}

TEST(Semver, Invalid) {
std::vector<std::string> invalid_test_cases = {
"+invalid",
"-invalid",
"-invalid+invalid",
"-invalid.01",
"alpha",
"alpha.beta",
"alpha.beta.1",
"alpha.1",
"alpha+beta",
"alpha_beta",
"alpha.",
"alpha..",
"beta",
"-alpha.",
"+justmeta",
};

for (const auto &version_str : invalid_test_cases) {
EXPECT_THROW(semver_t{version_str}, std::exception) << "'" << version_str << "' should be invalid";
}
}
Loading