diff --git a/tt_metal/hw/firmware/src/brisc.cc b/tt_metal/hw/firmware/src/brisc.cc index 731679ca77f..43e05819fe0 100644 --- a/tt_metal/hw/firmware/src/brisc.cc +++ b/tt_metal/hw/firmware/src/brisc.cc @@ -282,7 +282,7 @@ inline void init_ncrisc_iram() { inline void deassert_ncrisc_trisc() { // Below sets ncrisc to go so we can wait until it is cleared on first iteration - mailboxes->slave_sync.all = RUN_SYNC_MSG_ALL_SLAVES_DONE; + //mailboxes->slave_sync.all = RUN_SYNC_MSG_ALL_SLAVES_DONE; init_ncrisc_iram(); @@ -316,11 +316,12 @@ inline void set_ncrisc_kernel_resume_deassert_address() { inline void run_triscs(dispatch_core_processor_masks enables) { // Wait for init_sync_registers to complete. Should always be done by the time we get here. - while (mailboxes->slave_sync.trisc0 != RUN_SYNC_MSG_DONE) { + while (get_stream_register_component(STREAM_CHANNEL, 1) != PROC_DONE) { invalidate_l1_cache(); } if (enables & DISPATCH_CLASS_MASK_TENSIX_ENABLE_COMPUTE) { + increment_stream_register(STREAM_CHANNEL, (PROG_GO << 12) | (PROG_GO << 8) | (PROG_GO << 4)); mailboxes->slave_sync.trisc0 = RUN_SYNC_MSG_GO; mailboxes->slave_sync.trisc1 = RUN_SYNC_MSG_GO; mailboxes->slave_sync.trisc2 = RUN_SYNC_MSG_GO; @@ -332,6 +333,7 @@ inline void finish_ncrisc_copy_and_run(dispatch_core_processor_masks enables) { #if !defined(NCRISC_FIRMWARE_KERNEL_SPLIT) if (enables & DISPATCH_CLASS_MASK_TENSIX_ENABLE_DM1) { l1_to_ncrisc_iram_copy_wait(); + modify_stream_register_component(STREAM_CHANNEL, 0, PROC_LOAD, PROC_GO); mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_GO; #if NCRISC_FIRMWARE_IN_IRAM @@ -347,8 +349,9 @@ inline void start_ncrisc_kernel_run(dispatch_core_processor_masks enables) { if (enables & DISPATCH_CLASS_MASK_TENSIX_ENABLE_DM1) { // The NCRISC behaves badly if it jumps from L1 to IRAM, so instead halt it and then reset it to the IRAM // address it provides. - while (mailboxes->slave_sync.dm1 != RUN_SYNC_MSG_WAITING_FOR_RESET); - mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_GO; + while (get_stream_register_component(STREAM_CHANNEL, 0) != PROC_WAITING_FOR_RESET); + // mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_GO; + modify_stream_register_component(STREAM_CHANNEL, 0, PROC_WAITING_FOR_RESET, PROC_GO); volatile tt_reg_ptr uint32_t* cfg_regs = core.cfg_regs_base(0); cfg_regs[NCRISC_RESET_PC_PC_ADDR32] = mailboxes->ncrisc_halt.resume_addr; assert_just_ncrisc_reset(); @@ -359,15 +362,19 @@ inline void start_ncrisc_kernel_run(dispatch_core_processor_masks enables) { inline void wait_ncrisc_trisc() { WAYPOINT("NTW"); - while (mailboxes->slave_sync.all != RUN_SYNC_MSG_ALL_SLAVES_DONE) { + while (get_stream_register_value(STREAM_CHANNEL) != 0) { invalidate_l1_cache(); } WAYPOINT("NTD"); } -inline void trigger_sync_register_init() { mailboxes->slave_sync.trisc0 = RUN_SYNC_MSG_INIT_SYNC_REGISTERS; } +inline void trigger_sync_register_init() { + modify_stream_register_component(STREAM_CHANNEL, 1, PROC_DONW, PROC_INIT_SYNC_REGISTERS); + mailboxes->slave_sync.trisc0 = RUN_SYNC_MSG_INIT_SYNC_REGISTERS; +} int main() { + reset_stream_register(STREAM_CHANNEL); configure_l1_data_cache(); DIRTY_STACK_MEMORY(); WAYPOINT("I"); @@ -383,7 +390,7 @@ int main() { // Set ncrisc's resume address to 0 so we know when ncrisc has overwritten it mailboxes->ncrisc_halt.resume_addr = 0; - mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_GO; + //mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_GO; deassert_ncrisc_trisc(); // When NCRISC has IRAM, it needs to be halted before data can be copied from L1 to IRAM // This routine allows us to resume NCRISC after the copy is done @@ -468,6 +475,7 @@ int main() { #if !NCRISC_FIRMWARE_IN_IRAM // On Wormhole and Blackhole, trigger the NCRISC to start loading CBs and IRAM as soon as possible. if (enables & DISPATCH_CLASS_MASK_TENSIX_ENABLE_DM1) { + modify_stream_register_component(STREAM_CHANNEL, 0, PROC_DONE, PROC_LOAD); mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_LOAD; } #endif diff --git a/tt_metal/hw/firmware/src/ncrisc.cc b/tt_metal/hw/firmware/src/ncrisc.cc index f44c513f3f6..262915c1104 100644 --- a/tt_metal/hw/firmware/src/ncrisc.cc +++ b/tt_metal/hw/firmware/src/ncrisc.cc @@ -75,10 +75,11 @@ inline __attribute__((always_inline)) void notify_brisc_and_wait() { #else while (true) { uint8_t run_value = *ncrisc_run; - if (run_value == RUN_SYNC_MSG_GO || run_value == RUN_SYNC_MSG_LOAD) { + uint8_t run_value = get_stream_register_component(STREAM_CHANNEL, 0); + if (run_value == PROC_LOAD || run_value == PROC_GO) { break; } - invalidate_l1_cache(); + //invalidate_l1_cache(); } #endif } @@ -151,8 +152,7 @@ int main(int argc, char *argv[]) { void (*kernel_address)(uint32_t) = (void (*)(uint32_t)) (kernel_config_base + launch_msg->kernel_config.kernel_text_offset[index]); #ifdef ARCH_BLACKHOLE - while (*ncrisc_run != RUN_SYNC_MSG_GO) { - invalidate_l1_cache(); + while (get_stream_register_component(STREAM_CHANNEL, 0) != PROC_GO) { } (*kernel_address)((uint32_t)kernel_address); #elif defined(ARCH_WORMHOLE) diff --git a/tt_metal/hw/inc/firmware_common.h b/tt_metal/hw/inc/firmware_common.h index ee712817c69..e27a05ca4f2 100644 --- a/tt_metal/hw/inc/firmware_common.h +++ b/tt_metal/hw/inc/firmware_common.h @@ -16,6 +16,7 @@ #include "noc/noc_parameters.h" #include "debug/dprint.h" #include "risc_common.h" +#include "noc_overlay_parameters.h" extern uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS]; extern int32_t bank_to_dram_offset[NUM_DRAM_BANKS]; @@ -26,6 +27,20 @@ extern void kernel_init(uint32_t kernel_init); extern void kernel_launch(uint32_t kernel_base_addr); void l1_to_local_mem_copy(uint32_t* dst, uint32_t tt_l1_ptr* src, int32_t len); +#define STREAM_CHANNEL 31 +// DM1, TRISC0, TRISC1, TRISC2 +#define STREAM_COMPONENT_SHIFT 4 + +enum { + PROC_DONE = 0, + PROC_INIT = 1, + PROC_GO = 2, + PROC_LOAD = 3, + PROC_WAITING_FOR_RESET = 4, + PROC_INIT_SYNC_REGISTERS = 5, +}; + + inline void do_crt1(uint32_t tt_l1_ptr* data_image) { // Clear bss. extern uint32_t __ldm_bss_start[]; @@ -82,3 +97,29 @@ void wait_for_go_message() { invalidate_l1_cache(); } } + +FORCE_INLINE +void reset_stream_register(uint32_t stream_id) { + NOC_STREAM_WRITE_REG(stream_id, STREAM_REMOTE_DEST_BUF_SIZE_REG_INDEX, 0); +} + +FORCE_INLINE +void increment_stream_register(uint32_t stream_id, uint32_t value) { + NOC_STREAM_WRITE_REG( + stream_id, STREAM_REMOTE_DEST_BUF_SPACE_AVAILABLE_UPDATE_REG_INDEX, value << REMOTE_DEST_BUF_WORDS_FREE_INC); +} + +FORCE_INLINE +void modify_stream_register_component(uint32_t stream_id, uint32_t index, uint32_t from, uint32_t to) { + increment_stream_register(stream_id, (to - from) << (STREAM_COMPONENT_SHIFT * index)); +} + +FORCE_INLINE +uint32_t get_stream_register_value(uint32_t stream_id) { + return NOC_STREAM_READ_REG(stream_id, STREAM_REMOTE_DEST_BUF_SPACE_AVAILABLE_REG_INDEX); +} + +FORCE_INLINE +uint32_t get_stream_register_component(uint32_t stream_id, uint32_t index) { + return get_stream_register_value(stream_id) >> (STREAM_COMPONENT_SHIFT * index) & 0xf; +} \ No newline at end of file