From 70dce13c6763fd364f9177f4f238b93b41a5c502 Mon Sep 17 00:00:00 2001 From: Navaneeth-Kunhi Purayil Date: Thu, 30 Jan 2025 15:17:44 +0100 Subject: [PATCH] [spatz_vlsu] added fix for indexed loads, verified with fft kernel --- hw/ip/spatz/src/spatz.sv | 4 ++-- hw/ip/spatz/src/spatz_vlsu.sv | 22 +++++++++++++--------- hw/ip/spatz/src/spatz_vrf.sv | 22 +++++++++++----------- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/hw/ip/spatz/src/spatz.sv b/hw/ip/spatz/src/spatz.sv index c4bd2d9..354a8e3 100644 --- a/hw/ip/spatz/src/spatz.sv +++ b/hw/ip/spatz/src/spatz.sv @@ -477,8 +477,8 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( .vrf_re_o (sb_re[VLSU_VD_RD1:VLSU_VS2_RD0] ), .vrf_rdata_i (vrf_rdata[VLSU_VD_RD1:VLSU_VS2_RD0] ), .vrf_rvalid_i (vrf_rvalid[VLSU_VD_RD1:VLSU_VS2_RD0] ), - .vrf_id_o ({sb_id[SB_VLSU_VD_WD1], sb_id[VLSU_VD_RD1], sb_id[VLSU_VS2_RD1], - sb_id[SB_VLSU_VD_WD0], sb_id[VLSU_VD_RD0], sb_id[VLSU_VS2_RD0]}), + .vrf_id_o ({sb_id[SB_VLSU_VD_WD1], sb_id[VLSU_VD_RD1], sb_id[VLSU_VD_RD0], // VLSU Interface-1 + sb_id[SB_VLSU_VD_WD0], sb_id[VLSU_VS2_RD1], sb_id[VLSU_VS2_RD0]}), // VLSU Interface-0 // Interface Memory .spatz_mem_req_o (spatz_mem_req_o ), .spatz_mem_req_valid_o (spatz_mem_req_valid_o ), diff --git a/hw/ip/spatz/src/spatz_vlsu.sv b/hw/ip/spatz/src/spatz_vlsu.sv index 2445ab5..1bbfa38 100644 --- a/hw/ip/spatz/src/spatz_vlsu.sv +++ b/hw/ip/spatz/src/spatz_vlsu.sv @@ -429,7 +429,7 @@ module spatz_vlsu elen_t [NrInterfaces-1:0] [N_FU-1:0] mem_req_addr; vrf_addr_t [NrInterfaces-1:0] vd_vreg_addr; - vrf_addr_t [NrInterfaces-1:0] vs2_vreg_addr; + vrf_addr_t [NrInterfaces-1:0] vs2_vreg_addr, vs2_vreg_idx_addr; // Current element index and byte index that are being accessed at the register file vreg_elem_t [NrInterfaces-1:0] vd_elem_id; @@ -462,7 +462,7 @@ module spatz_vlsu automatic logic [1:0] data_index_width_diff = int'(mem_spatz_req.vtype.vsew) - int'(mem_spatz_req.op_mem.ew); // Pointer to index - automatic logic [idx_width(NrMemPorts*ELENB)-1:0] word_index = (port << (MAXEW - data_index_width_diff)) + (maxew_t'(idx_offset << data_index_width_diff) >> data_index_width_diff) + (maxew_t'(idx_offset >> (MAXEW - data_index_width_diff)) << (MAXEW - data_index_width_diff)) * NrMemPorts; + automatic logic [idx_width(NrMemPorts*ELENB)-1:0] word_index = (fu << int'(mem_spatz_req.op_mem.ew)) + (idx_offset << $clog2(N_FU)); // Index unique case (mem_spatz_req.op_mem.ew) @@ -492,12 +492,18 @@ module spatz_vlsu always_comb begin : gen_vreg_addr for (int intf = 0; intf < NrInterfaces; intf++) begin : gen_vreg_addr_intf vd_vreg_addr[intf] = (commit_insn_q.vd << $clog2(NrWordsPerVector)) + $unsigned(vd_elem_id[intf]); + + // For indices for indexed operations vs2_vreg_addr[intf] = (mem_spatz_req.vs2 << $clog2(NrWordsPerVector)) + $unsigned(vs2_elem_id_q[intf]); + vs2_vreg_idx_addr[intf] = vs2_vreg_addr[intf]; + + // The second interface starts from half of the vector to straighten the write-back VRF access pattern + if (intf == 1) begin + vd_vreg_addr[intf] += commit_insn_q.vl / (2 * N_FU * ELENB); + vs2_vreg_idx_addr[intf] += ((mem_spatz_req.vl >> (MAXEW - int'(mem_spatz_req.op_mem.ew))) / (2 * N_FU * ELENB)); + end - // The second interface starts from half of the vector to straighten the write-back VRF access pattern - // HARDCODED implementation just for explorative purposes! This does not generalize, don't use this!!!!! - if (!mem_is_indexed && !mem_is_strided && intf == 1) vd_vreg_addr[intf] += commit_insn_q.vl / (2 * N_FU * ELENB); - end + end end /////////////// @@ -665,8 +671,6 @@ module spatz_vlsu // Check if interface 1 is the interface trying to commit, if so take resp information from interface 1 assign resp_intf = vrf_commit_intf_valid_q [1] == 1'b0 ? 1'b1 : 1'b0; assign vlsu_rsp_o = &vrf_commit_intf_valid && |vrf_req_valid_q ? vrf_req_q[resp_intf].rsp : '{id: commit_insn_q.id, default: '0}; - - // TODO : Check if this is the same and fix if required assign vlsu_rsp_valid_o = &vrf_commit_intf_valid && |vrf_req_valid_q ? |vrf_req_ready_q : vlsu_finished_req && !commit_insn_q.is_load; ////////////// @@ -825,7 +829,7 @@ module spatz_vlsu // verilator lint_off LATCH always_comb begin for (int intf = 0; intf < NrInterfaces; intf++) begin - vrf_raddr_o[intf] = {vs2_vreg_addr[intf], vd_vreg_addr[intf]}; + vrf_raddr_o[intf] = {vs2_vreg_idx_addr[intf], vd_vreg_addr[intf]}; vrf_re_o[intf] = '0; vrf_req_d[intf] = '0; vrf_req_valid_d[intf] = '0; diff --git a/hw/ip/spatz/src/spatz_vrf.sv b/hw/ip/spatz/src/spatz_vrf.sv index a3773f4..93f2c21 100644 --- a/hw/ip/spatz/src/spatz_vrf.sv +++ b/hw/ip/spatz/src/spatz_vrf.sv @@ -211,7 +211,7 @@ module spatz_vrf // the VFU (vs1) and then by the slide unit. Port two can be accessed first by the // VFU (vd), then by the LSU. for (int unsigned bank = 0; bank < NrVRFBanks; bank++) begin - // Bank read port 0 - Priority: VFU (2) -> VLSU + // Bank read port 0 - Priority: VFU (2) -> VLSU int0 data if (read_request[bank][VFU_VS2_RD]) begin raddr[bank][0] = f_vreg(raddr_i[VFU_VS2_RD]); rdata_o[VFU_VS2_RD] = rdata[bank][0]; @@ -222,30 +222,30 @@ module spatz_vrf rvalid_o[VLSU_VS2_RD0] = 1'b1; end - // Bank read port 1 - Priority: VFU (1) -> VSLDU + // Bank read port 1 - Priority: VFU (1) -> VLSU int1 data -> VSLDU if (read_request[bank][VFU_VS1_RD]) begin raddr[bank][1] = f_vreg(raddr_i[VFU_VS1_RD]); rdata_o[VFU_VS1_RD] = rdata[bank][1]; rvalid_o[VFU_VS1_RD] = 1'b1; - end else if (read_request[bank][VLSU_VS2_RD1]) begin - raddr[bank][0] = f_vreg(raddr_i[VLSU_VS2_RD1]); - rdata_o[VLSU_VS2_RD1] = rdata[bank][0]; - rvalid_o[VLSU_VS2_RD1] = 1'b1; + end else if (read_request[bank][VLSU_VD_RD0]) begin + raddr[bank][1] = f_vreg(raddr_i[VLSU_VD_RD0]); + rdata_o[VLSU_VD_RD0] = rdata[bank][1]; + rvalid_o[VLSU_VD_RD0] = 1'b1; end else if (read_request[bank][VSLDU_VS2_RD]) begin raddr[bank][1] = f_vreg(raddr_i[VSLDU_VS2_RD]); rdata_o[VSLDU_VS2_RD] = rdata[bank][1]; rvalid_o[VSLDU_VS2_RD] = 1'b1; end - // Bank read port 2 - Priority: VFU (D) -> VLSU + // Bank read port 2 - Priority: VFU (D) -> VLSU int0 indices -> VLSU int1 indices if (read_request[bank][VFU_VD_RD]) begin raddr[bank][2] = f_vreg(raddr_i[VFU_VD_RD]); rdata_o[VFU_VD_RD] = rdata[bank][2]; rvalid_o[VFU_VD_RD] = 1'b1; - end else if (read_request[bank][VLSU_VD_RD0]) begin - raddr[bank][2] = f_vreg(raddr_i[VLSU_VD_RD0]); - rdata_o[VLSU_VD_RD0] = rdata[bank][2]; - rvalid_o[VLSU_VD_RD0] = 1'b1; + end else if (read_request[bank][VLSU_VS2_RD1]) begin + raddr[bank][2] = f_vreg(raddr_i[VLSU_VS2_RD1]); + rdata_o[VLSU_VS2_RD1] = rdata[bank][2]; + rvalid_o[VLSU_VS2_RD1] = 1'b1; end else if (read_request[bank][VLSU_VD_RD1]) begin raddr[bank][2] = f_vreg(raddr_i[VLSU_VD_RD1]); rdata_o[VLSU_VD_RD1] = rdata[bank][2];