Skip to content

Commit

Permalink
fw: remove namespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
jnider committed Aug 3, 2023
1 parent 300b19d commit 1edb850
Showing 1 changed file with 122 additions and 116 deletions.
238 changes: 122 additions & 116 deletions runtime_lib/controller/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ uint8_t shim_dma_cols[NUM_SHIM_DMAS] = {2, 3, 6, 7, 10, 11, 18, 19,
uint8_t col_dma_cols[NUM_COL_DMAS] = {7, 8, 9, 10};
#define NUM_DMAS (NUM_SHIM_DMAS + NUM_COL_DMAS)

// NOTE 4 slots per shim DMA
#define MAX_ND_SLOTS (NUM_DMAS * 4)

/*
* Tile address format:
* --------------------------------------------
Expand Down Expand Up @@ -168,6 +171,17 @@ struct HerdConfig {
uint32_t num_cols;
};

typedef struct staged_nd_memcpy_s {
uint32_t valid;
dispatch_packet_t *pkt;
uint64_t paddr[3];
uint32_t index[3];
} staged_nd_memcpy_t;

// GLOBAL storage for 'in progress' ND memcpy work
// each entry is 48B therefore @ 64 slots ~3kB
static staged_nd_memcpy_t staged_nd_slot[MAX_ND_SLOTS];

HerdConfig HerdCfgInst;

/*
Expand All @@ -188,7 +202,6 @@ uint64_t offset_to_phys(uint64_t offset) {

#ifdef ARM_CONTROLLER

namespace xaie2 {
void mlir_aie_init_libxaie(aie_libxaie_ctx_t *ctx) {
if (!ctx)
return;
Expand Down Expand Up @@ -265,8 +278,6 @@ int mlir_aie_reinit_device(aie_libxaie_ctx_t *ctx) {
return 0;
}

} // namespace xaie2

#endif

/*
Expand All @@ -278,8 +289,6 @@ u64 getTileAddr(u16 ColIdx, u16 RowIdx) {
(RowIdx << AIE_ROW_SHIFT));
}

//namespace xaie {

/*
read 32 bit value from specified address
*/
Expand Down Expand Up @@ -311,7 +320,17 @@ u32 maskpoll32(u64 Addr, u32 Mask, u32 Value, u32 TimeOut) {
return Ret;
}

//} // namespace xaie
static const char *decode_dma_state(uint32_t state) {
switch (state) {
case 0:
return "idle";
case 1:
return "starting";
case 2:
return "running";
}
return "unknown";
}

int xaie_shim_dma_wait_idle(uint64_t TileAddr, int direction, int channel) {
uint32_t shimDMAchannel = channel;
Expand Down Expand Up @@ -623,7 +642,7 @@ void xaie_device_init(void) {
xaie_array_reset();

#ifdef ARM_CONTROLLER
int err = xaie2::mlir_aie_reinit_device(_xaie);
int err = mlir_aie_reinit_device(_xaie);
if (err)
xil_printf("ERROR initializing device.\n\r");
#endif
Expand Down Expand Up @@ -704,10 +723,13 @@ void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row) {
u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;

xil_printf("DMA [%d, %d]\r\nmm2s_status=%08X ctrl0=%02X ctrl1=%02X\r\n"
"s2mm_status=%08X ctrl0=%02X ctrl1=%02X\r\n",
col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
xil_printf("DMA [%d, %d] tile addr=0x%lx\r\n", col, row, tileAddr);
xil_printf(" mm2s (0=%s 1=%s) status=%08X ctrl0=%02X ctrl1=%02X\r\n"
" s2mm (0=%s 1=%s) status=%08X ctrl0=%02X ctrl1=%02X\r\n",
decode_dma_state(mm2s_ch0_running), decode_dma_state(mm2s_ch1_running),
dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
decode_dma_state(s2mm_ch0_running), decode_dma_state(s2mm_ch1_running),
dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);

xil_printf("Descriptors:\r\n");
for (uint32_t bd = 0; bd < NUM_BD; bd++) {
Expand Down Expand Up @@ -798,31 +820,21 @@ void mlir_aie_print_shimdma_status(aie_libxaie_ctx_t *ctx, uint16_t col) {
u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;

/*
xil_printf("Shim DMA [%u] mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
"s2mm_status/0ctrl/1ctrl is %08X %02X %02X, BD0_Addr_A is %08X, "
"BD0_control is %08X\n\r",
col, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
*/
xil_printf("Shim DMA [%u]\r\nmm2s_status=%08X ctrl0=%02X ctrl1=%02X\r\n"
"s2mm_status=%08X ctrl0=%02X ctrl1=%02X\r\n",
col, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
xil_printf("Shim DMA [%u] tile addr=0x%lx\r\n", col, tileAddr);
xil_printf("mm2s status=%08X ctrl0=%02X ctrl1=%02X\r\n"
"s2mm status=%08X ctrl0=%02X ctrl1=%02X\r\n",
dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);

xil_printf("Descriptors:\r\n");
for (int bd = 0; bd < 16; bd++) {
u32 dma_bd_addr_a;
XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D000 + (0x14 * bd),
&dma_bd_addr_a);
u32 dma_bd_buffer_length;
XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D004 + (0x14 * bd),
&dma_bd_buffer_length);
u32 dma_bd_control;
XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D008 + (0x14 * bd),
&dma_bd_control);
if (dma_bd_control & 0x1) {
u32 dma_bd_addr_a = in32(tileAddr + 0x0001D000 + (0x14 * bd));
u32 dma_bd_buffer_length = in32(tileAddr + 0x0001D004 + (0x14 * bd));
u32 dma_bd_control = in32(tileAddr + 0x0001D008 + (0x14 * bd));

if (dma_bd_control & 0x1)
xil_printf("BD %d valid\n\r", bd);

int current_s2mm_ch0 = (dma_s2mm_status >> 16) & 0xf;
int current_s2mm_ch1 = (dma_s2mm_status >> 20) & 0xf;
int current_mm2s_ch0 = (dma_mm2s_status >> 16) & 0xf;
Expand Down Expand Up @@ -865,7 +877,6 @@ void mlir_aie_print_shimdma_status(aie_libxaie_ctx_t *ctx, uint16_t col) {
enable_lock_release, lock_release_val, use_release_val);

xil_printf(" ");
}
}
}

Expand All @@ -887,10 +898,11 @@ void mlir_aie_print_tile_status(int col, int row) {
R4 = in32(tileAddr + 0x00030040);

xil_printf("Core [%d, %d] addr is 0x%08lX\n\r", col, row, tileAddr);
xil_printf("Core [%d, %d] status is 0x%08X, timer is %u, PC is 0x%08X, locks are "
"%08X, LR is %08X, SP is %08X, R0 is %08X,R4 is %08X\n\r",
col, row, status, coreTimerLow, PC, locks, LR, SP, R0, R4);
xil_printf("Core [%d, %d] trace status is %08X\n\r", col, row, trace_status);
xil_printf(" status is 0x%08X, timer is %u, locks=0x%08X\r\n",
status, coreTimerLow, locks);
xil_printf(" PC=%08X, LR=%08X, SP=%08X, R0=%08X R4=%08X\n\r",
PC, LR, SP, R0, R4);
xil_printf(" trace status is %08X\n\r", trace_status);

for (int lock = 0; lock < 16; lock++) {
u32 two_bits = (locks >> (lock * 2)) & 0x3;
Expand Down Expand Up @@ -986,6 +998,77 @@ void unlock_uart() {
ulb[0] = 0;
}

/*
return the global index of a slot given a column and memory space
*/
static uint32_t get_slot(uint16_t col, uint16_t space) {
if (space == 2) {
for (uint16_t i = 0; i < NUM_SHIM_DMAS; i++) {
if (col == shim_dma_cols[i]) {
return i * 4;
}
}
} else if (space == 1) {
for (uint16_t i = 0; i < NUM_COL_DMAS; i++) {
if (col == col_dma_cols[i]) {
return i * 4 + NUM_SHIM_DMAS * 4;
}
}
}
return 0;
}

static void nd_dma_put_checkpoint(dispatch_packet_t **pkt, uint32_t slot,
uint32_t idx_4d, uint32_t idx_3d, uint32_t idx_2d,
uint64_t pad_3d, uint64_t pad_2d, uint64_t pad_1d) {
staged_nd_slot[slot].pkt = *pkt;
staged_nd_slot[slot].paddr[0] = pad_1d;
staged_nd_slot[slot].paddr[1] = pad_2d;
staged_nd_slot[slot].paddr[2] = pad_3d;
staged_nd_slot[slot].index[0] = idx_2d;
staged_nd_slot[slot].index[1] = idx_3d;
staged_nd_slot[slot].index[2] = idx_4d;
}

static void nd_dma_get_checkpoint(dispatch_packet_t **pkt, uint32_t slot,
uint32_t &idx_4d, uint32_t &idx_3d, uint32_t &idx_2d,
uint64_t &pad_3d, uint64_t &pad_2d,
uint64_t &pad_1d) {
*pkt = staged_nd_slot[slot].pkt;
pad_1d = staged_nd_slot[slot].paddr[0];
pad_2d = staged_nd_slot[slot].paddr[1];
pad_3d = staged_nd_slot[slot].paddr[2];
idx_2d = staged_nd_slot[slot].index[0];
idx_3d = staged_nd_slot[slot].index[1];
idx_4d = staged_nd_slot[slot].index[2];
}

/*
Schedule the memcpy by dividing it into descriptors
*/
static int stage_packet_nd_memcpy(dispatch_packet_t *pkt, uint32_t slot,
uint32_t memory_space) {
air_printf("stage_packet_nd_memcpy %d\n\r", slot);
if (staged_nd_slot[slot].valid) {
air_printf("STALL: ND Memcpy Slot %d Busy!\n\r", slot);
return 2;
}
packet_set_active(pkt, true);

uint64_t paddr = offset_to_phys(pkt->arg[1]);
xil_printf("ND_MEMCPY: Got physical address 0x%lx\r\n", paddr);

if (memory_space == 2) {
nd_dma_put_checkpoint(&pkt, slot, 0, 0, 0, paddr, paddr, paddr);
staged_nd_slot[slot].valid = 1;
return 0;
} else {
air_printf("NOT SUPPORTED: Cannot program memory space %d DMAs\n\r",
memory_space);
return 1;
}
}

int queue_create(uint32_t size, queue_t **queue, uint32_t mb_id) {
uint64_t queue_address[1] = {base_address + sizeof(dispatch_packet_t)};
uint64_t queue_base_address[1] = {
Expand Down Expand Up @@ -1446,59 +1529,6 @@ void handle_packet_hello(dispatch_packet_t *pkt, uint32_t mb_id) {
unlock_uart();
}

typedef struct staged_nd_memcpy_s {
uint32_t valid;
dispatch_packet_t *pkt;
uint64_t paddr[3];
uint32_t index[3];
} staged_nd_memcpy_t; // about 48B therefore @ 64 slots ~3kB

uint32_t get_slot(uint16_t col, uint16_t space) {
if (space == 2) {
for (uint16_t i = 0; i < NUM_SHIM_DMAS; i++) {
if (col == shim_dma_cols[i]) {
return i * 4;
}
}
} else if (space == 1) {
for (uint16_t i = 0; i < NUM_COL_DMAS; i++) {
if (col == col_dma_cols[i]) {
return i * 4 + NUM_SHIM_DMAS * 4;
}
}
}
return 0;
}

// GLOBAL storage for 'in progress' ND memcpy work
// NOTE 4 slots per shim DMA
staged_nd_memcpy_t staged_nd_slot[NUM_DMAS * 4];

void nd_dma_put_checkpoint(dispatch_packet_t **pkt, uint32_t slot,
uint32_t idx_4d, uint32_t idx_3d, uint32_t idx_2d,
uint64_t pad_3d, uint64_t pad_2d, uint64_t pad_1d) {
staged_nd_slot[slot].pkt = *pkt;
staged_nd_slot[slot].paddr[0] = pad_1d;
staged_nd_slot[slot].paddr[1] = pad_2d;
staged_nd_slot[slot].paddr[2] = pad_3d;
staged_nd_slot[slot].index[0] = idx_2d;
staged_nd_slot[slot].index[1] = idx_3d;
staged_nd_slot[slot].index[2] = idx_4d;
}

void nd_dma_get_checkpoint(dispatch_packet_t **pkt, uint32_t slot,
uint32_t &idx_4d, uint32_t &idx_3d, uint32_t &idx_2d,
uint64_t &pad_3d, uint64_t &pad_2d,
uint64_t &pad_1d) {
*pkt = staged_nd_slot[slot].pkt;
pad_1d = staged_nd_slot[slot].paddr[0];
pad_2d = staged_nd_slot[slot].paddr[1];
pad_3d = staged_nd_slot[slot].paddr[2];
idx_2d = staged_nd_slot[slot].index[0];
idx_3d = staged_nd_slot[slot].index[1];
idx_4d = staged_nd_slot[slot].index[2];
}

int do_packet_nd_memcpy(uint32_t slot) {
dispatch_packet_t *a_pkt;
uint64_t paddr_3d;
Expand Down Expand Up @@ -1578,36 +1608,12 @@ int do_packet_memcpy(uint32_t slot) {
}
}

int stage_packet_nd_memcpy(dispatch_packet_t *pkt, uint32_t slot,
uint32_t memory_space) {
air_printf("stage_packet_nd_memcpy %d\n\r", slot);
if (staged_nd_slot[slot].valid) {
air_printf("STALL: ND Memcpy Slot %d Busy!\n\r", slot);
return 2;
}
packet_set_active(pkt, true);

//uint64_t paddr = offset_to_phys(pkt->arg[1]);
uint64_t paddr = pkt->arg[1];
xil_printf("ND_MEMCPY: Got physical address 0x%lx\r\n", paddr);

if (memory_space == 2) {
nd_dma_put_checkpoint(&pkt, slot, 0, 0, 0, paddr, paddr, paddr);
staged_nd_slot[slot].valid = 1;
return 0;
} else {
air_printf("NOT SUPPORTED: Cannot program memory space %d DMAs\n\r",
memory_space);
return 1;
}
}

void handle_agent_dispatch_packet(queue_t *q, uint32_t mb_id) {
uint64_t rd_idx = queue_load_read_index(q);
dispatch_packet_t *pkt =
&((dispatch_packet_t *)q->base_address)[mymod(rd_idx)];
int last_slot = 0;
int max_slot = 4 * NUM_DMAS - 1;
int max_slot = MAX_ND_SLOTS - 1;

int num_active_packets = 1;
int packets_processed = 0;
Expand Down Expand Up @@ -1872,8 +1878,8 @@ int main() {

aie_libxaie_ctx_t ctx;
_xaie = &ctx;
xaie2::mlir_aie_init_libxaie(_xaie);
int err = xaie2::mlir_aie_init_device(_xaie);
mlir_aie_init_libxaie(_xaie);
int err = mlir_aie_init_device(_xaie);
if (err)
xil_printf("ERROR initializing device.\n\r");
int user1 = 1;
Expand Down

0 comments on commit 1edb850

Please sign in to comment.