Skip to content

Commit

Permalink
PCIDevice class cleanup (#149)
Browse files Browse the repository at this point in the history
* Rehome code in pcie/utils.hpp to the PCIDevice class
* Attempt to clean up ambiguity RE: what "device id" actually means
* Start enforcing an abstractional boundary at the PCI device level
  • Loading branch information
joelsmithTT authored Oct 15, 2024
1 parent 2361d68 commit 54cefa7
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 275 deletions.
2 changes: 1 addition & 1 deletion Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ WARN_LOGFILE =
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.

INPUT = device/tt_device.h
INPUT = device/tt_device.h device/pcie/pci_device.hpp

# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
Expand Down
208 changes: 100 additions & 108 deletions device/pcie/pci_device.cpp

Large diffs are not rendered by default.

155 changes: 105 additions & 50 deletions device/pcie/pci_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@
*/

#pragma once

#include <cstdint>
#include <cstdio>
#include <vector>

#include "device/tt_arch_types.h"
#include "device/architecture_implementation.h"

static uint32_t GS_BAR0_WC_MAPPING_SIZE = (156<<20) + (10<<21) + (18<<24);
static uint32_t BH_BAR0_WC_MAPPING_SIZE = 188<<21; // Defines the address for WC region. addresses 0 to BH_BAR0_WC_MAPPING_SIZE are in WC, above that are UC

static const uint32_t BH_NOC_NODE_ID_OFFSET = 0x1FD04044;
static const uint32_t GS_WH_ARC_SCRATCH_6_OFFSET = 0x1FF30078;

// TODO: this is used up in tt_silicon_driver.cpp but that logic ought to be
// lowered into the PCIDevice class since it is specific to PCIe cards.
// See /vendor_ip/synopsys/052021/bh_pcie_ctl_gen5/export/configuration/DWC_pcie_ctl.h
static const uint64_t UNROLL_ATU_OFFSET_BAR = 0x1200;

// TODO: this is a bit of a hack... something to revisit when we formalize an
// abstraction for IO.
// BAR0 size for Blackhole, used to determine whether write block should use BAR0 or BAR4
const uint64_t BAR0_BH_SIZE = 512 * 1024 * 1024;
static const uint64_t BAR0_BH_SIZE = 512 * 1024 * 1024;

constexpr unsigned int c_hang_read_value = 0xffffffffu;
struct PciDeviceInfo
Expand All @@ -35,83 +35,138 @@ struct PciDeviceInfo
};

class PCIDevice {
const std::string device_path; // Path to character device: /dev/tenstorrent/N
const int pci_device_num; // N in /dev/tenstorrent/N
const int logical_id; // Unique identifier for each device in entire network topology
const int pci_device_file_desc; // Character device file descriptor
const PciDeviceInfo info; // PCI device info
const int numa_node; // -1 if non-NUMA
const int revision; // PCI revision value from sysfs
const tt::ARCH arch; // e.g. Grayskull, Wormhole, Blackhole
std::unique_ptr<tt::umd::architecture_implementation> architecture_implementation;

public:
/**
* Return a list of integers corresponding to character devices in /dev/tenstorrent/
* @return a list of integers corresponding to character devices in /dev/tenstorrent/
*/
static std::vector<int> enumerate_devices();

PCIDevice(int device_id, int logical_device_id);
/**
* PCI device constructor.
*
* Opens the character device file descriptor, reads device information from
* sysfs, and maps device memory region(s) into the process address space.
*
* @param pci_device_number N in /dev/tenstorrent/N
* @param logical_device_id unique identifier for this device in the network topology
*/
PCIDevice(int pci_device_number, int logical_device_id = 0);

/**
* PCIDevice destructor.
* Unmaps device memory and closes chardev file descriptor.
*/
~PCIDevice();

PCIDevice(const PCIDevice&) = delete; // copy
void operator=(const PCIDevice&) = delete; // copy assignment

/**
* @return PCI device info
*/
const PciDeviceInfo get_device_info() const { return info; }

/**
* @return which NUMA node this device is associated with, or -1 if non-NUMA
*/
int get_numa_node() const { return numa_node; }

/**
* @return underlying file descriptor
* TODO: this is an abstraction violation to be removed when this class
* assumes control over hugepage/DMA mapping code.
*/
int get_fd() const { return pci_device_file_desc; }

/**
* @return N in /dev/tenstorrent/N
* TODO: target for removal; upper layers should not care about this.
*/
int get_device_num() const { return pci_device_num; }

/**
* @return unique integer for each device in entire network topology
* TODO: target for removal; upper layers shouldn't to pass this in here. It
* is unused by this class.
*/
int get_logical_id() const { return logical_id; }

/**
* @return PCI device id
*/
int get_pci_device_id() const { return info.device_id; }

/**
* @return PCI revision value from sysfs.
* TODO: target for removal; upper layers should not care about this.
*/
int get_pci_revision() const { return revision; }

/**
* @return what architecture this device is (e.g. Wormhole, Blackhole, etc.)
*/
tt::ARCH get_arch() const { return arch; }

// Note: byte_addr is (mostly but not always) offset into BAR0. This
// interface assumes the caller knows what they are doing - but it's unclear
// how to use this interface correctly without knowing details of the chip
// and its state.
// TODO: build a proper abstraction for IO. At this level, that is access
// to registers in BAR0 (although possibly the right abstraction is to add
// methods that perform specific operations as opposed to generic register
// read/write methods) and access to segments of BAR0/4 that are mapped to
// NOC endpoints. Probably worth waiting for the KMD to start owning the
// resource management aspect of these PCIe->NOC mappings (the "TLBs")
// before doing too much work here...
void write_block(uint64_t byte_addr, uint64_t num_bytes, const uint8_t* buffer_addr);
void read_block(uint64_t byte_addr, uint64_t num_bytes, uint8_t* buffer_addr);
void write_regs(uint32_t byte_addr, uint32_t word_len, const void *data);
void write_regs(volatile uint32_t *dest, const uint32_t *src, uint32_t word_len);
void read_regs(uint32_t byte_addr, uint32_t word_len, void *data);
void write_tlb_reg(uint32_t byte_addr, std::uint64_t value_lower, std::uint64_t value_upper, std::uint32_t tlb_cfg_reg_size);

void open_hugepage_per_host_mem_ch(uint32_t num_host_mem_channels);
tt::umd::architecture_implementation* get_architecture_implementation() const { return architecture_implementation.get(); }
void detect_hang_read(uint32_t data_read = c_hang_read_value);

PciDeviceInfo info;

int device_id; // N in /dev/tenstorrent/N
int logical_id; // TODO: does not belong in here
int device_fd = -1;

// PCIe device info
int numa_node;
std::uint32_t pcie_device_id;
std::uint32_t pcie_revision_id;

// BAR and regs mapping setup
std::vector<int> device_fd_per_host_ch;
public:
// TODO: we can and should make all of these private.
void *bar0_uc = nullptr;
std::size_t bar0_uc_size = 0;
std::size_t bar0_uc_offset = 0;
size_t bar0_uc_size = 0;
size_t bar0_uc_offset = 0;

void *bar0_wc = nullptr;
std::size_t bar0_wc_size = 0;
size_t bar0_wc_size = 0;

void *bar2_uc = nullptr;
std::size_t bar2_uc_size;
size_t bar2_uc_size;

void *bar4_wc = nullptr;
std::uint64_t bar4_wc_size;
uint64_t bar4_wc_size;

// TODO: let's get rid of this unless we need to run UMD on WH systems with
// shrunk BAR0. If we don't (and we shouldn't), then we can just use BAR0
// and simplify the code.
void *system_reg_mapping = nullptr;
std::size_t system_reg_mapping_size;

// These two are currently not used.
void *system_reg_wc_mapping = nullptr;
std::size_t system_reg_wc_mapping_size;

std::uint32_t system_reg_start_offset; // Registers >= this are system regs, use the mapping.
std::uint32_t system_reg_offset_adjust; // This is the offset of the first reg in the system reg mapping.
size_t system_reg_mapping_size;
uint32_t system_reg_start_offset; // Registers >= this are system regs, use the mapping.
uint32_t system_reg_offset_adjust; // This is the offset of the first reg in the system reg mapping.

std::uint32_t read_checking_offset;
uint32_t read_checking_offset;

tt::ARCH get_arch() const;

void detect_hang_read(std::uint32_t data_read = c_hang_read_value);

private:
void setup_device();
void close_device();

bool is_hardware_hung();

template <typename T>
T* get_register_address(std::uint32_t register_offset);

tt::ARCH arch;
std::unique_ptr<tt::umd::architecture_implementation> architecture_implementation;

T* get_register_address(uint32_t register_offset);
};

tt::ARCH detect_arch(int device_id=0);
87 changes: 0 additions & 87 deletions device/pcie/utils.hpp

This file was deleted.

3 changes: 3 additions & 0 deletions device/tt_arch_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#include "device/architecture.h"

namespace tt {

// TODO: why do we have ARCH and architecture? This is a mess. Can we have just one?
// Can we get rid of the entries that (for all practical purposes) do not exist?
/**
* @brief ARCH Enums
*/
Expand Down
Loading

0 comments on commit 54cefa7

Please sign in to comment.