Skip to content

Commit

Permalink
Remove firmware dependency on generated_bank_to_noc_coord_mapping.h (#…
Browse files Browse the repository at this point in the history
…15070)

### Ticket
[12844](#12844)

### Problem description
Firmware build is dependent on runtime values. 

### What's changed
Firmware now declares a global array for dram_bank_to_noc_xy,
l1_bank_to_noc_xy, bank_to_dram_offset, bank_to_l1_offset. During build,
values are written to L1 memory. Firmware during initialization would
copy these values from L1 to the above global arrays.
Moved l1_to_local_mem_copy to substitutes.cpp. Removed 'inline' keyword
as the function is used in multiple places and let LTO decide the
inlining.

### Checklist
- [x] Post commit CI passes -
https://github.com/tenstorrent/tt-metal/actions/runs/12132649762
- [x] Blackhole Post commit -
https://github.com/tenstorrent/tt-metal/actions/runs/12143429777
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] New/Existing tests provide coverage for changes
  • Loading branch information
spoojaryTT authored Dec 5, 2024
1 parent 2d835ee commit 976948b
Show file tree
Hide file tree
Showing 31 changed files with 245 additions and 197 deletions.
10 changes: 9 additions & 1 deletion tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "tools/profiler/kernel_profiler.hpp"
#include "dev_msgs.h"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"
#include "circular_buffer_init.h"
#include "dataflow_api.h"
Expand Down Expand Up @@ -67,6 +66,13 @@ uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used));

// These arrays are stored in local memory of FW, but primarily used by the kernel which shares
// FW symbols. Hence mark these as 'used' so that FW compiler doesn't optimize it out.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((section(".uninit"), used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((section(".uninit"), used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((section(".uninit"), used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((section(".uninit"), used));

#define MEM_MOVER_VIEW_IRAM_BASE_ADDR (0x4 << 12)

#if defined(PROFILE_KERNEL)
Expand Down Expand Up @@ -343,6 +349,8 @@ int main() {

do_crt1((uint32_t*)MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH);

noc_bank_table_init(MEM_BANK_TO_NOC_SCRATCH);

mailboxes->launch_msg_rd_ptr = 0; // Initialize the rdptr to 0
noc_index = 0;
risc_init();
Expand Down
10 changes: 9 additions & 1 deletion tt_metal/hw/firmware/src/erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "ethernet/dataflow_api.h"
#include "ethernet/tunneling.h"
#include "firmware_common.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "noc_parameters.h"
#include "risc_attribs.h"
#include "tools/profiler/kernel_profiler.hpp"
Expand Down Expand Up @@ -34,6 +33,13 @@ uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used));

// These arrays are stored in local memory of FW, but primarily used by the kernel which shares
// FW symbols. Hence mark these as 'used' so that FW compiler doesn't optimize it out.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((section(".uninit"), used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((section(".uninit"), used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((section(".uninit"), used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((section(".uninit"), used));

void __attribute__((noinline)) Application(void) {
WAYPOINT("I");

Expand All @@ -43,6 +49,8 @@ void __attribute__((noinline)) Application(void) {

rtos_context_switch_ptr = (void (*)())RtosTable[0];

noc_bank_table_init(eth_l1_mem::address_map::ERISC_MEM_BANK_TO_NOC_SCRATCH);

risc_init();
noc_init(MEM_NOC_ATOMIC_RET_VAL_ADDR);

Expand Down
10 changes: 9 additions & 1 deletion tt_metal/hw/firmware/src/idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "tools/profiler/kernel_profiler.hpp"
#include "dev_msgs.h"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"
#include "dataflow_api.h"

Expand All @@ -42,6 +41,13 @@ uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used
uint8_t my_x[NUM_NOCS] __attribute__((used));
uint8_t my_y[NUM_NOCS] __attribute__((used));

// These arrays are stored in local memory of FW, but primarily used by the kernel which shares
// FW symbols. Hence mark these as 'used' so that FW compiler doesn't optimize it out.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((section(".uninit"), used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((section(".uninit"), used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((section(".uninit"), used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((section(".uninit"), used));

//c_tensix_core core;

tt_l1_ptr mailboxes_t * const mailboxes = (tt_l1_ptr mailboxes_t *)(MEM_IERISC_MAILBOX_BASE);
Expand Down Expand Up @@ -101,6 +107,8 @@ int main() {
do_crt1((uint32_t *)MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH);
uint32_t heartbeat = 0;

noc_bank_table_init(MEM_IERISC_BANK_TO_NOC_SCRATCH);

risc_init();

mailboxes->slave_sync.all = RUN_SYNC_MSG_ALL_SLAVES_DONE;
Expand Down
10 changes: 9 additions & 1 deletion tt_metal/hw/firmware/src/ncrisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "firmware_common.h"
#include "tools/profiler/kernel_profiler.hpp"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"
#include "circular_buffer_init.h"

Expand Down Expand Up @@ -40,6 +39,13 @@ uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used));

// These arrays are stored in local memory of FW, but primarily used by the kernel which shares
// FW symbols. Hence mark these as 'used' so that FW compiler doesn't optimize it out.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((section(".uninit"), used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((section(".uninit"), used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((section(".uninit"), used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((section(".uninit"), used));

#if defined(PROFILE_KERNEL)
namespace kernel_profiler {
uint32_t wIndex __attribute__((used));
Expand Down Expand Up @@ -79,6 +85,8 @@ int main(int argc, char *argv[]) {

do_crt1((uint32_t tt_l1_ptr *)MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH);

noc_bank_table_init(MEM_BANK_TO_NOC_SCRATCH);

risc_init();

// If NCRISC has IRAM it needs to halt before BRISC copies data from L1 to IRAM
Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/slave_idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "firmware_common.h"
#include "tools/profiler/kernel_profiler.hpp"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"

#include "debug/waypoint.h"
Expand Down
11 changes: 11 additions & 0 deletions tt_metal/hw/inc/blackhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
#define MEM_NCRISC_LOCAL_SIZE (8 * 1024)
#define MEM_TRISC_LOCAL_SIZE (4 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

/////////////
// Firmware/kernel code holes
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 128)
Expand Down Expand Up @@ -91,6 +96,9 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand Down Expand Up @@ -130,6 +138,9 @@
#define MEM_IERISC_STACK_BASE (MEM_LOCAL_BASE + MEM_IERISC_LOCAL_SIZE - MEM_IERISC_STACK_SIZE)
#define MEM_SLAVE_IERISC_STACK_BASE (MEM_LOCAL_BASE + MEM_SLAVE_IERISC_LOCAL_SIZE - MEM_SLAVE_IERISC_STACK_SIZE)

#define MEM_IERISC_BANK_TO_NOC_SCRATCH (MEM_SLAVE_IERISC_INIT_LOCAL_L1_BASE_SCRATCH + MEM_SLAVE_IERISC_LOCAL_SIZE)
#define MEM_IERISC_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)

/////////////
// Padding/alignment restriction needed in linker scripts for erisc
#define MEM_IERISC_KERNEL_PAD 32
10 changes: 10 additions & 0 deletions tt_metal/hw/inc/blackhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ struct address_map {
static constexpr std::int32_t DATA_BUFFER_SIZE_ETH = 4 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE_NOC = 16 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE = 24 * 1024;
// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS +
// NUM_L1_BANKS)
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_XY_SIZE = 1024;
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS +
// NUM_L1_BANKS)
static constexpr std::int32_t ERISC_MEM_BANK_OFFSET_SIZE = 1024;

// Kernel config buffer is WIP
// Size is presently based on the old sizes of the RTAs + CB config + Sems
static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_SIZE = 96 * 4 + 8 * 16;
Expand Down Expand Up @@ -65,6 +72,9 @@ struct address_map {

static_assert((ERISC_L1_UNRESERVED_BASE % 32) == 0);

static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_SCRATCH = (ERISC_L1_KERNEL_CONFIG_BASE + ERISC_L1_KERNEL_CONFIG_SIZE + 31) & ~31; //Same as ERISC_L1_UNRESERVED_BASE
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_SIZE = ERISC_MEM_BANK_TO_NOC_XY_SIZE + ERISC_MEM_BANK_OFFSET_SIZE;

static constexpr std::int32_t LAUNCH_ERISC_APP_FLAG = L1_EPOCH_Q_BASE + 4;

// BIDIR Tunneling Kernel Space
Expand Down
14 changes: 11 additions & 3 deletions tt_metal/hw/inc/dataflow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
#include "chlkc_unpack_tile_dims.h"
#define DATA_FORMATS_DEFINED
#endif
#if __has_include("generated_bank_to_noc_coord_mapping.h")
#include "generated_bank_to_noc_coord_mapping.h"
#endif
#include <noc/noc_parameters.h>

#include <stdint.h>

Expand All @@ -37,9 +35,15 @@ constexpr uint8_t proc_type = static_cast<std::underlying_type_t<TensixProcessor
constexpr uint8_t noc_index = NOC_INDEX;
constexpr uint8_t noc_mode = NOC_MODE;
#else

extern uint8_t noc_index;
constexpr uint8_t noc_mode = DM_DEDICATED_NOC;
#endif
extern uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS];
extern int32_t bank_to_dram_offset[NUM_DRAM_BANKS];
extern uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS];
extern int32_t bank_to_l1_offset[NUM_L1_BANKS];

extern uint32_t tt_l1_ptr* rta_l1_base;
extern uint32_t tt_l1_ptr* crta_l1_base;
extern uint32_t tt_l1_ptr* sem_l1_base[];
Expand Down Expand Up @@ -89,6 +93,10 @@ constexpr uint32_t write_at_cmd_buf = NCRISC_AT_CMD_BUF;
#define EXCLUDE_START_X_OFFSET 8
#define DYNAMIC_NOC_DIRECTION(noc, direction) (noc == 1 ? 1 - direction : direction)

static_assert(NUM_NOCS == 2);
// "Scratch" in L1 has space allocated for 256 DRAM and L1 enteries, to store offsets and NOC XY data. (MEM_BANK_TO_NOC_XY_SCRATCH and MEM_BANK_OFFSET_SCRATCH)
static_assert((NUM_DRAM_BANKS + NUM_L1_BANKS) <= 256);

namespace interleaved_addr_gen {

template <bool DRAM>
Expand Down
50 changes: 20 additions & 30 deletions tt_metal/hw/inc/firmware_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,17 @@
#include "dev_mem_map.h"
#include "hostdevcommon/kernel_structs.h"
#include "dev_msgs.h"
#include "noc/noc_parameters.h"
#include "debug/dprint.h"

extern uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS];
extern int32_t bank_to_dram_offset[NUM_DRAM_BANKS];
extern uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS];
extern int32_t bank_to_l1_offset[NUM_L1_BANKS];

extern void kernel_init(uint32_t kernel_init);
extern void kernel_launch(uint32_t kernel_base_addr);

inline void l1_to_local_mem_copy(uint32_t* dst, uint32_t tt_l1_ptr* src, int32_t len) {
#pragma GCC unroll 0
while (len >= 3) {
auto v0 = src[0], v1 = src[1], v2 = src[2];
// 1) Make sure the optimizer does not think this is memcpy by
// hiding the pointer bookkeeping in an asm.
// 2) The scheduler doesn't know the above loads have 6 cycle
// latency. We emit the 3 bookkeeping adds as a single block
// in the load shadow before the stores. The optimizer will
// not be able to move these.
// 3) We don't need early clobbers here because of the +r
// constraint -- early clobbers would pessimize.
asm inline(
"addi %0,%0,3*%3\n\t"
"addi %1,%1,3*%3\n\t"
"addi %2,%2,-3"
: "+r"(src), "+r"(dst), "+r"(len)
: "i"(sizeof(v0)));
dst[-3] = v0, dst[-2] = v1, dst[-1] = v2;
}
// There are 0, 1 or 2 words of residue. This is smaller than a loop.
// We get smaller code layout by expecting the conditions to be true.
if (__builtin_expect(len >= 1, true)) {
dst[0] = src[0];
if (__builtin_expect(len >= 2, true)) {
dst[1] = src[1];
}
}
}
void l1_to_local_mem_copy(uint32_t* dst, uint32_t tt_l1_ptr* src, int32_t len);

inline void do_crt1(uint32_t tt_l1_ptr* data_image) {
// Clear bss.
Expand All @@ -59,6 +37,18 @@ inline void do_crt1(uint32_t tt_l1_ptr* data_image) {
l1_to_local_mem_copy(__ldm_data_start, data_image, __ldm_data_end - __ldm_data_start);
}

inline void noc_bank_table_init(uint64_t mem_bank_to_noc_addr) {
int32_t dram_to_noc_size_bytes = sizeof(dram_bank_to_noc_xy);
l1_to_local_mem_copy((uint*)dram_bank_to_noc_xy, (uint tt_l1_ptr*)mem_bank_to_noc_addr, dram_to_noc_size_bytes >> 2);
int32_t l1_to_noc_size_bytes = sizeof(l1_bank_to_noc_xy);
l1_to_local_mem_copy((uint*)l1_bank_to_noc_xy, (uint tt_l1_ptr*)(mem_bank_to_noc_addr + dram_to_noc_size_bytes), l1_to_noc_size_bytes >> 2);

int32_t dram_offsets_size_bytes = sizeof(bank_to_dram_offset);
l1_to_local_mem_copy((uint*)bank_to_dram_offset, (uint tt_l1_ptr*)(mem_bank_to_noc_addr + dram_to_noc_size_bytes + l1_to_noc_size_bytes), dram_offsets_size_bytes >> 2);
int32_t l1_offsets_size_bytes = sizeof(bank_to_l1_offset);
l1_to_local_mem_copy((uint*)bank_to_l1_offset, (uint tt_l1_ptr*)(mem_bank_to_noc_addr + dram_to_noc_size_bytes + l1_to_noc_size_bytes + dram_offsets_size_bytes), l1_offsets_size_bytes >> 2);
}

FORCE_INLINE
uint32_t firmware_config_init(
tt_l1_ptr mailboxes_t* const mailboxes, uint32_t core_type_index, uint32_t dispatch_class) {
Expand Down
14 changes: 12 additions & 2 deletions tt_metal/hw/inc/grayskull/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,20 @@
#define MEM_NCRISC_LOCAL_SIZE (4 * 1024)
#define MEM_TRISC_LOCAL_SIZE (2 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

#define NCRISC_HAS_IRAM 1
#define MEM_NCRISC_IRAM_BASE 0xFFC00000
#define MEM_NCRISC_IRAM_SIZE (16 * 1024)

/////////////
// Firmware/kernel code holes
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 416)
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 624)
// TODO: perhaps put NCRISC FW in the scratch area and free 1.5K after init (GS/WH)
#define MEM_NCRISC_FIRMWARE_SIZE 1616
#define MEM_NCRISC_FIRMWARE_SIZE 1824
#define MEM_TRISC0_FIRMWARE_SIZE 1536
#define MEM_TRISC1_FIRMWARE_SIZE 1536
#define MEM_TRISC2_FIRMWARE_SIZE 1536
Expand Down Expand Up @@ -100,6 +105,9 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand All @@ -125,5 +133,7 @@
#define MEM_IERISC_MAP_END 0
#define MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH 0
#define MEM_IERISC_STACK_SIZE 0
#define MEM_IERISC_BANK_TO_NOC_SCRATCH 0
#define MEM_IERISC_BANK_TO_NOC_SIZE 0

#define MEM_IERISC_KERNEL_PAD 0
2 changes: 2 additions & 0 deletions tt_metal/hw/inc/grayskull/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ struct address_map {

static constexpr std::int32_t ERISC_L1_UNRESERVED_SIZE = 0;
static constexpr std::int32_t ERISC_L1_TUNNEL_BUFFER_SIZE = 0;
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_SCRATCH = 0;
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_SIZE = 0;

static constexpr std::uint32_t RETRAIN_COUNT_ADDR = 0x1EDC;
static constexpr std::uint32_t RETRAIN_FORCE_ADDR = 0x1EFC;
Expand Down
14 changes: 13 additions & 1 deletion tt_metal/hw/inc/wormhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,18 @@
#define MEM_NCRISC_LOCAL_SIZE (4 * 1024)
#define MEM_TRISC_LOCAL_SIZE (2 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

#define NCRISC_HAS_IRAM 1
#define MEM_NCRISC_IRAM_BASE 0xFFC00000
#define MEM_NCRISC_IRAM_SIZE (16 * 1024)

/////////////
// Firmware/kernel code holes
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 64)
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 256)
// TODO: perhaps put NCRISC FW in the scratch area and free 1.5K after init (GS/WH)
#define MEM_NCRISC_FIRMWARE_SIZE 1536
#define MEM_TRISC0_FIRMWARE_SIZE 1536
Expand Down Expand Up @@ -102,6 +107,9 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand Down Expand Up @@ -137,6 +145,10 @@
#define MEM_IERISC_STACK_SIZE 1024
#define MEM_IERISC_STACK_BASE (MEM_LOCAL_BASE + MEM_IERISC_LOCAL_SIZE - MEM_IERISC_STACK_SIZE)

#define MEM_IERISC_BANK_TO_NOC_SCRATCH (MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH + MEM_IERISC_LOCAL_SIZE)
#define MEM_IERISC_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)


/////////////
// Padding/alignment restriction needed in linker scripts for erisc
#define MEM_IERISC_KERNEL_PAD 32
Loading

0 comments on commit 976948b

Please sign in to comment.