diff --git a/CHANGELOG.md b/CHANGELOG.md index 11fb6780a..62999f912 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Add `apb` dependency of version 0.2.4 - Add support for the `FENCE` instruction - Add support for DRAMsys5.0 co-simulation +- Add support for atomics in L2 ### Changes - Add physical feasible TeraPool configuration with SubGroup hierarchy. diff --git a/hardware/deps/snitch/Bender.yml b/hardware/deps/snitch/Bender.yml index 21e259e6f..a7fa87775 100644 --- a/hardware/deps/snitch/Bender.yml +++ b/hardware/deps/snitch/Bender.yml @@ -29,5 +29,3 @@ sources: - src/snitch_fp_divsqrt.sv - src/snitch_fpu.sv - src/snitch_shared_muldiv.sv - - src/snitch_demux.sv - - src/snitch_axi_adapter.sv diff --git a/hardware/deps/snitch/src/snitch_axi_adapter.sv b/hardware/deps/snitch/src/snitch_axi_adapter.sv deleted file mode 100644 index 3c991a570..000000000 --- a/hardware/deps/snitch/src/snitch_axi_adapter.sv +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright 2018-2019 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 -// -// File: axi_adapter.sv -// Author: Florian Zaruba -// Date: 1.8.2018 -// -// Description: Manages communication with the AXI Bus - -module snitch_axi_adapter #( - parameter int unsigned WriteFIFODepth = 2, - parameter int unsigned ReadFIFODepth = 2, - parameter type addr_t = logic, - parameter type data_t = logic, - parameter type strb_t = logic, - parameter type axi_mst_req_t = logic, - parameter type axi_mst_resp_t = logic -) ( - input logic clk_i, - input logic rst_ni, - // AXI port - input axi_mst_resp_t axi_resp_i, - output axi_mst_req_t axi_req_o, - - input addr_t slv_qaddr_i, - input logic slv_qwrite_i, - input logic [3:0] slv_qamo_i, - input data_t slv_qdata_i, - input logic [2:0] slv_qsize_i, - input strb_t slv_qstrb_i, - input logic [7:0] slv_qrlen_i, - input logic slv_qvalid_i, - output logic slv_qready_o, - output data_t slv_pdata_o, - output logic slv_pwrite_o, - output logic slv_perror_o, - output logic slv_plast_o, - output logic slv_pvalid_o, - input logic slv_pready_i -); - - localparam DataWidth = $bits(data_t); - localparam StrbWidth = $bits(strb_t); - localparam SlvByteOffset = $clog2($bits(strb_t)); - localparam AxiByteOffset = $clog2($bits(axi_req_o.w.strb)); - - typedef enum logic [3:0] { - AMONone = 4'h0, - AMOSwap = 4'h1, - AMOAdd = 4'h2, - AMOAnd = 4'h3, - AMOOr = 4'h4, - AMOXor = 4'h5, - AMOMax = 4'h6, - AMOMaxu = 4'h7, - AMOMin = 4'h8, - AMOMinu = 4'h9, - AMOLR = 4'hA, - AMOSC = 4'hB - } amo_op_t; - - typedef struct packed { - data_t data; - strb_t strb; - } write_t; - - typedef struct packed { - data_t data; - logic write; - logic error; - logic last; - } resp_t; - - logic write_full; - logic write_empty; - logic read_full; - write_t write_data_in; - write_t write_data_out; - write_t r_data; - - assign axi_req_o.aw.addr = slv_qaddr_i; - assign axi_req_o.aw.prot = 3'b0; - assign axi_req_o.aw.region = 4'b0; - assign axi_req_o.aw.size = slv_qsize_i; - assign axi_req_o.aw.len = '0; - assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; - assign axi_req_o.aw.lock = 1'b0; - assign axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE; - assign axi_req_o.aw.qos = 4'b0; - assign axi_req_o.aw.id = '0; - assign axi_req_o.aw.user = '0; - assign axi_req_o.aw_valid = ~write_full & slv_qvalid_i & slv_qwrite_i; - - always_comb begin - write_data_in.data = slv_qdata_i; - write_data_in.strb = slv_qstrb_i; - unique case (amo_op_t'(slv_qamo_i)) - // RISC-V atops have a load semantic - AMOSwap: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ATOMICSWAP}; - AMOAdd: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD}; - AMOAnd: begin - // in this case we need to invert the data to get a "CLR" - write_data_in.data = ~slv_qdata_i; - axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR}; - end - AMOOr: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET}; - AMOXor: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR}; - AMOMax: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX}; - AMOMaxu: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX}; - AMOMin: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN}; - AMOMinu: axi_req_o.aw.atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN}; - default: axi_req_o.aw.atop = '0; - endcase - end - - localparam int unsigned ShiftWidth = (SlvByteOffset == AxiByteOffset) ? 1 : AxiByteOffset - SlvByteOffset; - typedef logic [ShiftWidth-1:0] shift_t; - typedef struct packed { - write_t data; - shift_t shift; - } write_ext_t; - - if (SlvByteOffset == AxiByteOffset) begin : gen_w_data - // Write - fifo_v3 #( - .DEPTH ( WriteFIFODepth ), - .dtype ( write_t ) - ) i_fifo_w_data ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .full_o ( write_full ), - .empty_o ( write_empty ), - .usage_o ( /* NC */ ), - .data_i ( write_data_in ), - .push_i ( slv_qvalid_i & slv_qready_o & slv_qwrite_i ), - .data_o ( write_data_out ), - .pop_i ( axi_req_o.w_valid & axi_resp_i.w_ready ) - ); - assign axi_req_o.w.data = write_data_out.data; - assign axi_req_o.w.strb = write_data_out.strb; - - // Read - assign read_full = 1'b0; - assign r_data = axi_resp_i.r.data; - end else begin : gen_w_data - // Write - write_ext_t write_data_ext_in, write_data_ext_out; - - fifo_v3 #( - .DEPTH ( WriteFIFODepth ), - .dtype ( write_ext_t ) - ) i_fifo_w_data ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .full_o ( write_full ), - .empty_o ( write_empty ), - .usage_o ( /* NC */ ), - .data_i ( write_data_ext_in ), - .push_i ( slv_qvalid_i & slv_qready_o & slv_qwrite_i ), - .data_o ( write_data_ext_out ), - .pop_i ( axi_req_o.w_valid & axi_resp_i.w_ready ) - ); - - assign write_data_ext_in.data = write_data_in; - assign write_data_ext_in.shift = slv_qaddr_i[AxiByteOffset-1:SlvByteOffset]; - assign axi_req_o.w.data = {'0, write_data_ext_out.data.data} << ($bits(data_t) * write_data_ext_out.shift); - assign axi_req_o.w.strb = {'0, write_data_ext_out.data.strb} << ($bits(strb_t) * write_data_ext_out.shift); - - // Read - shift_t read_shift; - - fifo_v3 #( - .DEPTH ( ReadFIFODepth ), - .DATA_WIDTH ( AxiByteOffset-SlvByteOffset ) - ) i_fifo_r_shift ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .full_o ( read_full ), - .empty_o ( /* NC */ ), - .usage_o ( /* NC */ ), - .data_i ( slv_qaddr_i[AxiByteOffset-1:SlvByteOffset] ), - .push_i ( slv_qvalid_i & slv_qready_o & ~slv_qwrite_i ), - .data_o ( read_shift ), - .pop_i ( axi_resp_i.r_valid & axi_req_o.r_ready ) - ); - - assign r_data = axi_resp_i.r.data >> ($bits(data_t) * read_shift); - end - assign axi_req_o.w.last = 1'b1; - assign axi_req_o.w.user = '0; - assign axi_req_o.w_valid = ~write_empty; - - assign axi_req_o.ar.addr = slv_qaddr_i; - assign axi_req_o.ar.prot = 3'b0; - assign axi_req_o.ar.region = 4'b0; - assign axi_req_o.ar.size = slv_qsize_i; - assign axi_req_o.ar.len = slv_qrlen_i; - assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; - assign axi_req_o.ar.lock = 1'b0; - assign axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE; - assign axi_req_o.ar.qos = 4'b0; - assign axi_req_o.ar.id = '0; - assign axi_req_o.ar.user = '0; - assign axi_req_o.ar_valid = ~read_full & slv_qvalid_i & ~slv_qwrite_i; - - // Response arbitration because we can get an R and B response simultaneously - resp_t r_resp, b_resp, slv_resp; - logic r_error, b_error; - - assign r_error = (axi_resp_i.r.resp inside {axi_pkg::RESP_EXOKAY, axi_pkg::RESP_OKAY}) ? 1'b0 : 1'b1; - assign b_error = (axi_resp_i.b.resp inside {axi_pkg::RESP_EXOKAY, axi_pkg::RESP_OKAY}) ? 1'b0 : 1'b1; - - assign r_resp = '{ - data: r_data, - write: 1'b0, - error: r_error, - last: axi_resp_i.r.last - }; - - assign b_resp = '{ - data: r_data, - write: 1'b1, - error: b_error, - last: 1'b1 - }; - - rr_arb_tree #( - .NumIn (2), - .DataType (resp_t), - .ExtPrio (1'b1), - .AxiVldRdy (1'b1), - .LockIn (1'b0) - ) i_response_arbiter ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .flush_i('0 ), - .rr_i ('0 ), - .req_i ({axi_resp_i.b_valid,axi_resp_i.r_valid}), - .gnt_o ({axi_req_o.b_ready,axi_req_o.r_ready} ), - .data_i ({b_resp,r_resp} ), - .gnt_i (slv_pready_i ), - .req_o (slv_pvalid_o ), - .data_o (slv_resp ), - .idx_o ( ) - ); - - assign slv_pdata_o = slv_resp.data; - assign slv_pwrite_o = slv_resp.write; - assign slv_perror_o = slv_resp.error; - assign slv_plast_o = slv_resp.last; - - assign slv_qready_o = (axi_resp_i.ar_ready & axi_req_o.ar_valid) - | (axi_resp_i.aw_ready & axi_req_o.aw_valid); - - `ifndef VERILATOR - // pragma translate_off - hot_one : assert property ( - @(posedge clk_i) disable iff (!rst_ni) (slv_qvalid_i & slv_qwrite_i & slv_qready_o) |-> (slv_qrlen_i == 0)) - else $warning("Bursts are not supported for write transactions"); - // pragma translate_on - `endif -endmodule diff --git a/hardware/deps/snitch/src/snitch_demux.sv b/hardware/deps/snitch/src/snitch_demux.sv deleted file mode 100644 index 387979170..000000000 --- a/hardware/deps/snitch/src/snitch_demux.sv +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2020 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -/// Arbitrates request/response interface -/// Author: Florian Zaruba - -/// Demux based on arbitration -module snitch_demux #( - parameter int unsigned NrPorts = 4, - parameter type req_t = snitch_pkg::dreq_t, - parameter type resp_t = snitch_pkg::dresp_t, - parameter int unsigned RespDepth = 8, - parameter bit [NrPorts-1:0] RegisterReq = '0, - parameter Arbiter = "rr" // "rr" or "prio" -) ( - input logic clk_i, - input logic rst_ni, - // request port - input req_t [NrPorts-1:0] req_payload_i, - input logic [NrPorts-1:0] req_valid_i, - output logic [NrPorts-1:0] req_ready_o, - - output resp_t [NrPorts-1:0] resp_payload_o, - output logic [NrPorts-1:0] resp_last_o, - output logic [NrPorts-1:0] resp_valid_o, - input logic [NrPorts-1:0] resp_ready_i, - // response port - output req_t req_payload_o, - output logic req_valid_o, - input logic req_ready_i, - - input resp_t resp_payload_i, - input logic resp_last_i, - input logic resp_valid_i, - output logic resp_ready_o -); - - localparam LogNrPorts = (NrPorts > 1) ? $clog2(NrPorts) : 1; - - logic req_valid_mask; - logic req_ready_mask; - logic [LogNrPorts-1:0] idx, idx_r, idx_w, idx_rsp; - logic full_r, full_w, full; - - req_t [NrPorts-1:0] req_payload_q; - logic [NrPorts-1:0] req_valid_q; - logic [NrPorts-1:0] req_ready_q; - - // Cut the incoming path - for (genvar i = 0; i < NrPorts; i++) begin : gen_spill_regs - spill_register #( - .T ( req_t ), - .Bypass ( !RegisterReq[i] ) - ) i_spill_register_tcdm_req ( - .clk_i, - .rst_ni, - .valid_i ( req_valid_i[i] ), - .ready_o ( req_ready_o[i] ), - .data_i ( req_payload_i[i] ), - .valid_o ( req_valid_q[i] ), - .ready_i ( req_ready_q[i] ), - .data_o ( req_payload_q[i] ) - ); - end - - assign req_valid_o = req_valid_mask & ~full; - assign req_ready_mask = req_ready_i & ~full; - - /// Arbitrate on instruction request port - stream_arbiter #( - .DATA_T ( req_t ), - .N_INP ( NrPorts ), - .ARBITER ( Arbiter ) - ) i_stream_arbiter_req ( - .clk_i, - .rst_ni, - .inp_data_i ( req_payload_q ), - .inp_valid_i ( req_valid_q ), - .inp_ready_o ( req_ready_q ), - .oup_data_o ( req_payload_o ), - .oup_valid_o ( req_valid_mask ), - .oup_ready_i ( req_ready_mask ) - ); - - if (NrPorts == 1) begin : gen_connection - assign idx_rsp = 0; - assign full = 1'b0; - end else begin : gen_demux - onehot_to_bin #( - .ONEHOT_WIDTH ( NrPorts ) - ) i_onehot_to_bin ( - .onehot ( req_valid_q & req_ready_q ), - .bin ( idx ) - ); - - fifo_v3 #( - .DATA_WIDTH ( LogNrPorts ), - .DEPTH ( RespDepth ) - ) i_r_resp_fifo ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .full_o ( full_r ), - .empty_o ( ), - .usage_o ( ), - .data_i ( idx ), - .push_i ( req_valid_o && req_ready_i && !req_payload_o.write ), - .data_o ( idx_r ), - .pop_i ( resp_ready_o && resp_valid_i && resp_last_i && !resp_payload_i.write ) - ); - - fifo_v3 #( - .DATA_WIDTH ( LogNrPorts ), - .DEPTH ( RespDepth ) - ) i_w_resp_fifo ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .full_o ( full_w ), - .empty_o ( ), - .usage_o ( ), - .data_i ( idx ), - .push_i ( req_valid_o && req_ready_i && req_payload_o.write ), - .data_o ( idx_w ), - .pop_i ( resp_ready_o && resp_valid_i && resp_last_i && resp_payload_i.write ) - ); - - assign idx_rsp = resp_payload_i.write ? idx_w : idx_r; - assign full = req_payload_o.write ? full_w : full_r; - end - - stream_demux #( - .N_OUP ( NrPorts ) - ) i_stream_demux_resp ( - .inp_valid_i ( resp_valid_i ), - .inp_ready_o ( resp_ready_o ), - .oup_sel_i ( idx_rsp ), - .oup_valid_o ( resp_valid_o ), - .oup_ready_i ( resp_ready_i ) - ); - - for (genvar i = 0; i < NrPorts; i++) begin - assign resp_payload_o[i] = resp_payload_i; - assign resp_last_o[i] = resp_last_i; - end - -endmodule diff --git a/hardware/scripts/questa/wave_tile.tcl b/hardware/scripts/questa/wave_tile.tcl index 4dd08b9ab..9740b30ac 100644 --- a/hardware/scripts/questa/wave_tile.tcl +++ b/hardware/scripts/questa/wave_tile.tcl @@ -89,13 +89,9 @@ if {$config == {terapool}} { add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/local_resp_* add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/soc_data_* add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/mask_map - add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/soc_req_o - add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/soc_resp_i - add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/soc_qvalid - add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/soc_qready - add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/soc_pvalid - add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/soc_pready - + add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/snitch_to_soc_* + add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/mux_to_soc_* + add wave -noupdate -group group_[$1] -group Tile_[$2] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/mux_to_soc_* for {set i 0} {$i < 16} {incr i} { add wave -noupdate -group group_[$1] -group Tile_[$2] -group tcdm_adapter[$i] /mempool_tb/dut/i_mempool_cluster/gen_groups[$1]/i_group/gen_tiles[$2]/i_tile/gen_banks[$i]/i_tcdm_adapter/* } diff --git a/hardware/src/mempool_pkg.sv b/hardware/src/mempool_pkg.sv index feedd66e9..5d427feec 100644 --- a/hardware/src/mempool_pkg.sv +++ b/hardware/src/mempool_pkg.sv @@ -110,13 +110,21 @@ package mempool_pkg; `AXI_TYPEDEF_AW_CHAN_T(axi_core_aw_t, addr_t, axi_core_id_t, logic); - `AXI_TYPEDEF_W_CHAN_T(axi_core_w_t, axi_data_t, axi_strb_t, logic); + `AXI_TYPEDEF_W_CHAN_T(axi_core_w_t, data_t, strb_t, logic); `AXI_TYPEDEF_B_CHAN_T(axi_core_b_t, axi_core_id_t, logic); `AXI_TYPEDEF_AR_CHAN_T(axi_core_ar_t, addr_t, axi_core_id_t, logic); - `AXI_TYPEDEF_R_CHAN_T(axi_core_r_t, axi_data_t, axi_core_id_t, logic); + `AXI_TYPEDEF_R_CHAN_T(axi_core_r_t, data_t, axi_core_id_t, logic); `AXI_TYPEDEF_REQ_T(axi_core_req_t, axi_core_aw_t, axi_core_w_t, axi_core_ar_t); `AXI_TYPEDEF_RESP_T(axi_core_resp_t, axi_core_b_t, axi_core_r_t ); + `AXI_TYPEDEF_AW_CHAN_T(axi_cache_aw_t, addr_t, axi_core_id_t, logic); + `AXI_TYPEDEF_W_CHAN_T(axi_cache_w_t, axi_data_t, axi_strb_t, logic); + `AXI_TYPEDEF_B_CHAN_T(axi_cache_b_t, axi_core_id_t, logic); + `AXI_TYPEDEF_AR_CHAN_T(axi_cache_ar_t, addr_t, axi_core_id_t, logic); + `AXI_TYPEDEF_R_CHAN_T(axi_cache_r_t, axi_data_t, axi_core_id_t, logic); + `AXI_TYPEDEF_REQ_T(axi_cache_req_t, axi_cache_aw_t, axi_cache_w_t, axi_cache_ar_t); + `AXI_TYPEDEF_RESP_T(axi_cache_resp_t, axi_cache_b_t, axi_cache_r_t ); + `AXI_TYPEDEF_AW_CHAN_T(axi_tile_aw_t, addr_t, axi_tile_id_t, logic); `AXI_TYPEDEF_W_CHAN_T(axi_tile_w_t, axi_data_t, axi_strb_t, logic); `AXI_TYPEDEF_B_CHAN_T(axi_tile_b_t, axi_tile_id_t, logic); diff --git a/hardware/src/mempool_system.sv b/hardware/src/mempool_system.sv index b7fbb00d0..98c6fde07 100644 --- a/hardware/src/mempool_system.sv +++ b/hardware/src/mempool_system.sv @@ -30,6 +30,8 @@ module mempool_system import axi_pkg::xbar_cfg_t; import axi_pkg::xbar_rule_32_t; + `include "reqrsp_interface/typedef.svh" + /********* * AXI * *********/ @@ -238,104 +240,173 @@ module mempool_system * L2 SRAM * *************/ - localparam int unsigned NumAXIMastersLog2 = NumAXIMasters == 1 ? 1 : $clog2(NumAXIMasters); - typedef logic [L2AddrWidth-1:0] l2_mem_addr_t; + `REQRSP_TYPEDEF_ALL(axi_to_l2, addr_t, axi_data_t, axi_strb_t) typedef logic [L2BankAddrWidth-1:0] l2_bank_addr_t; - typedef logic [NumAXIMastersLog2-1:0] bank_ini_t; - // Axi2Mems to l2_xbar - logic [NumAXIMasters-1:0] mem_req; - logic [NumAXIMasters-1:0] mem_gnt; - logic [NumAXIMasters-1:0] mem_rvalid; - addr_t [NumAXIMasters-1:0] mem_addr_full; - l2_mem_addr_t [NumAXIMasters-1:0] mem_addr; - axi_data_t [NumAXIMasters-1:0] mem_wdata; - axi_strb_t [NumAXIMasters-1:0] mem_strb; - logic [NumAXIMasters-1:0] mem_we; - axi_data_t [NumAXIMasters-1:0] mem_rdata; - // l2_xbar to banks + // Axi2ReqRsp + axi_to_l2_req_t [NumAXIMasters-1:0] axi_to_l2_req; + axi_to_l2_rsp_t [NumAXIMasters-1:0] axi_to_l2_rsp; + // Axi2ReqRsp unpacked + localparam int unsigned NumAXIMastersWidth = (NumAXIMasters > 32'd1) ? unsigned'($clog2(NumAXIMasters)) : 32'd1; + localparam int unsigned NumL2BanksWidth = (NumL2Banks > 32'd1) ? unsigned'($clog2(NumL2Banks)) : 32'd1; + typedef logic [NumAXIMastersWidth-1:0] l2_axi_idx_t; + typedef logic [NumL2BanksWidth-1:0] l2_bank_idx_t; + axi_to_l2_req_chan_t [NumAXIMasters-1:0] axi_to_l2_req_chan; + axi_to_l2_rsp_chan_t [NumAXIMasters-1:0] axi_to_l2_rsp_chan; + logic [NumAXIMasters-1:0] axi_to_l2_q_throttle_valid; + logic [NumAXIMasters-1:0] axi_to_l2_q_throttle_ready; + logic [NumAXIMasters-1:0] axi_to_l2_q_valid; + logic [NumAXIMasters-1:0] axi_to_l2_q_ready; + l2_bank_idx_t [NumAXIMasters-1:0] axi_to_l2_q_sel; + l2_axi_idx_t [NumL2Banks-1:0] axi_to_l2_q_idx; + logic [NumAXIMasters-1:0] axi_to_l2_p_valid; + logic [NumAXIMasters-1:0] axi_to_l2_p_ready; + l2_axi_idx_t [NumL2Banks-1:0] axi_to_l2_p_sel; + // Axi2ReqRsp to bank_adapter + axi_to_l2_req_chan_t [NumL2Banks-1:0] mem_req_chan; + axi_to_l2_rsp_chan_t [NumL2Banks-1:0] mem_rsp_chan; + logic [NumL2Banks-1:0] mem_req_valid; + logic [NumL2Banks-1:0] mem_req_ready; + logic [NumL2Banks-1:0] mem_rsp_valid; + logic [NumL2Banks-1:0] mem_rsp_ready; + // bank_adapter to banks logic [NumL2Banks-1:0] bank_req; - logic [NumL2Banks-1:0] bank_gnt; - logic [NumL2Banks-1:0] bank_rvalid; + logic [NumL2Banks-1:0] bank_we; l2_bank_addr_t [NumL2Banks-1:0] bank_addr; - bank_ini_t [NumL2Banks-1:0] bank_ini_d, bank_ini_q; axi_data_t [NumL2Banks-1:0] bank_wdata; axi_strb_t [NumL2Banks-1:0] bank_strb; - logic [NumL2Banks-1:0] bank_we; axi_data_t [NumL2Banks-1:0] bank_rdata; for (genvar i = 0; i < NumAXIMasters; i++) begin : gen_l2_adapters - axi2mem #( - .axi_req_t (axi_tile_req_t ), - .axi_resp_t(axi_tile_resp_t), - .AddrWidth (L2AddrWidth ), - .DataWidth (AxiDataWidth ), - .IdWidth (AxiTileIdWidth ), - .NumBanks (1 ), - .BufDepth (3 ) - ) i_axi2mem ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .busy_o (/*unsused*/ ), - .axi_req_i (axi_l2_req[i] ), - .axi_resp_o (axi_l2_resp[i]), - .mem_req_o (mem_req[i] ), - .mem_gnt_i (mem_gnt[i] ), - .mem_addr_o (mem_addr[i] ), - .mem_wdata_o (mem_wdata[i] ), - .mem_strb_o (mem_strb[i] ), - .mem_atop_o (/*unused*/ ), - .mem_we_o (mem_we[i] ), - .mem_rvalid_i(mem_rvalid[i] ), - .mem_rdata_i (mem_rdata[i] ) + axi_to_reqrsp #( + .axi_req_t (axi_tile_req_t ), + .axi_rsp_t (axi_tile_resp_t), + .AddrWidth (L2AddrWidth ), + .DataWidth (AxiDataWidth ), + .IdWidth (AxiTileIdWidth ), + .BufDepth (2 ), + .reqrsp_req_t (axi_to_l2_req_t), + .reqrsp_rsp_t (axi_to_l2_rsp_t) + ) i_axi_to_reqrsp ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .busy_o (/*unused*/ ), + .axi_req_i (axi_l2_req[i] ), + .axi_rsp_o (axi_l2_resp[i] ), + .reqrsp_req_o (axi_to_l2_req[i]), + .reqrsp_rsp_i (axi_to_l2_rsp[i]) + ); + // Repack the structs for the xbar + assign axi_to_l2_req_chan[i] = axi_to_l2_req[i].q; + assign axi_to_l2_q_valid[i] = axi_to_l2_req[i].q_valid; + assign axi_to_l2_rsp[i].q_ready = axi_to_l2_q_ready[i]; + assign axi_to_l2_rsp[i].p = axi_to_l2_rsp_chan[i]; + assign axi_to_l2_rsp[i].p_valid = axi_to_l2_p_valid[i]; + assign axi_to_l2_p_ready[i] = axi_to_l2_req[i].p_ready; + // Generate the selection signal + assign axi_to_l2_q_sel[i] = axi_to_l2_req_chan[i].addr[$clog2(L2BankBeWidth)+:NumL2BanksWidth]; + // Throttle the to one oustanding transaction to avoid reordering without a ROB + stream_throttle #( + .MaxNumPending (1) + ) i_stream_throttle ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .req_valid_i (axi_to_l2_q_valid[i] ), + .req_valid_o (axi_to_l2_q_throttle_valid[i]), + .req_ready_i (axi_to_l2_q_throttle_ready[i]), + .req_ready_o (axi_to_l2_q_ready[i] ), + .rsp_valid_i (axi_to_l2_p_valid[i] ), + .rsp_ready_i (axi_to_l2_p_ready[i] ), + .credit_i (1'b1 ) ); end - variable_latency_interconnect #( - .NumIn (NumAXIMasters ), - .NumOut (NumL2Banks ), - .AddrWidth (L2AddrWidth ), - .DataWidth (L2BankWidth ), - .BeWidth (L2BankBeWidth ), - .AddrMemWidth (L2BankAddrWidth), - .AxiVldRdy (1'b1 ), - .SpillRegisterReq (64'b1 ), - .SpillRegisterResp(64'b1 ) - ) i_l2_xbar ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - // master side - .req_valid_i (mem_req ), - .req_ready_o (mem_gnt ), - .req_tgt_addr_i (mem_addr ), - .req_wen_i (mem_we ), - .req_wdata_i (mem_wdata ), - .req_be_i (mem_strb ), - .resp_valid_o (mem_rvalid ), - .resp_ready_i ('1 ), - .resp_rdata_o (mem_rdata ), - // slave side - .req_valid_o (bank_req ), - .req_ready_i ('1 ), - .req_ini_addr_o (bank_ini_d ), - .req_tgt_addr_o (bank_addr ), - .req_wen_o (bank_we ), - .req_wdata_o (bank_wdata ), - .req_be_o (bank_strb ), - .resp_valid_i (bank_rvalid), - .resp_ready_o (/*unused*/ ), // This only works because resp_ready_i = 1 - .resp_ini_addr_i(bank_ini_q ), - .resp_rdata_i (bank_rdata ) + stream_xbar #( + .NumInp (NumAXIMasters ), + .NumOut (NumL2Banks ), + .payload_t (axi_to_l2_req_chan_t), + .OutSpillReg (1'b1 ), + .ExtPrio (1'b0 ), + .AxiVldRdy (1'b1 ), + .LockIn (1'b1 ) + ) i_l2_req_xbar ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .flush_i (1'b0 ), + .rr_i ('0 ), + .data_i (axi_to_l2_req_chan ), + .sel_i (axi_to_l2_q_sel ), + .valid_i (axi_to_l2_q_throttle_valid), + .ready_o (axi_to_l2_q_throttle_ready), + .data_o (mem_req_chan ), + .idx_o (axi_to_l2_q_idx ), + .valid_o (mem_req_valid ), + .ready_i (mem_req_ready ) ); - `FF(bank_rvalid, bank_req, 1'b0, clk_i, rst_ni) - `FF(bank_ini_q, bank_ini_d, 1'b0, clk_i, rst_ni) + stream_xbar #( + .NumInp (NumL2Banks ), + .NumOut (NumAXIMasters ), + .payload_t (axi_to_l2_rsp_chan_t), + .OutSpillReg (1'b1 ), + .ExtPrio (1'b0 ), + .AxiVldRdy (1'b1 ), + .LockIn (1'b1 ) + ) i_l2_rsp_xbar ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .flush_i (1'b0 ), + .rr_i ('0 ), + .data_i (mem_rsp_chan ), + .sel_i (axi_to_l2_p_sel ), + .valid_i (mem_rsp_valid ), + .ready_o (mem_rsp_ready ), + .data_o (axi_to_l2_rsp_chan), + .idx_o (/*unused*/ ), + .valid_o (axi_to_l2_p_valid ), + .ready_i (axi_to_l2_p_ready ) + ); // The initialization at reset is not supported by Verilator. Therefore, we disable the SimInit at // reset for Verilator. Since our preloading through the SystemVerilog testbench requires the // SimInit value to be assigned at reset, we use the "custom" string to invoke the initialization // without setting the memory to known values like "ones" or "zeros". localparam L2SimInit = `ifdef VERILATOR "none" `else "custom" `endif; + localparam L2BankAddrIndex = $clog2(L2BankBeWidth)+$clog2(NumL2Banks); for (genvar i = 0; i < NumL2Banks; i++) begin : gen_l2_banks + // Address scrambling: Cut out the bits used to index the individual banks + logic [AddrWidth-1:0] addr_scrambled; + assign addr_scrambled = {'0, mem_req_chan[i].addr[AddrWidth-1:L2BankAddrIndex], mem_req_chan[i].addr[0+:$clog2(L2BankBeWidth)]}; + tcdm_adapter #( + .AddrWidth (AddrWidth ), + .BankAddrWidth (L2BankAddrWidth ), + .DataWidth (L2BankWidth ), + .metadata_t (l2_axi_idx_t ), + .LrScEnable (1'b0 ), + .RegisterAmo (1'b0 ) + ) i_bank_adapter ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .in_valid_i (mem_req_valid[i] ), + .in_ready_o (mem_req_ready[i] ), + .in_address_i(addr_scrambled ), + .in_amo_i (mem_req_chan[i].amo ), + .in_write_i (mem_req_chan[i].write), + .in_wdata_i (mem_req_chan[i].data ), + .in_meta_i (axi_to_l2_q_idx[i] ), + .in_be_i (mem_req_chan[i].strb ), + .in_valid_o (mem_rsp_valid[i] ), + .in_ready_i (mem_rsp_ready[i] ), + .in_rdata_o (mem_rsp_chan[i].data ), + .in_meta_o (axi_to_l2_p_sel[i] ), + .out_req_o (bank_req[i] ), + .out_add_o (bank_addr[i] ), + .out_write_o (bank_we[i] ), + .out_wdata_o (bank_wdata[i] ), + .out_be_o (bank_strb[i] ), + .out_rdata_i (bank_rdata[i] ) + ); + assign mem_rsp_chan[i].error = 1'b0; + tc_sram #( .DataWidth(L2BankWidth ), .NumWords (L2BankNumWords), @@ -713,7 +784,7 @@ module mempool_system .mst_ports_req_o (axi_lite_slv_req ), .mst_ports_resp_i (axi_lite_slv_resp ), .addr_map_i (axi_lite_xbar_rules), - .en_default_mst_port_i('1 ), + .en_default_mst_port_i(1'b1 ), .default_mst_port_i (CtrlRegisters ) ); diff --git a/hardware/src/mempool_tile.sv b/hardware/src/mempool_tile.sv index f1e083548..961488bc4 100644 --- a/hardware/src/mempool_tile.sv +++ b/hardware/src/mempool_tile.sv @@ -60,6 +60,7 @@ module mempool_tile ****************/ `include "common_cells/registers.svh" + `include "reqrsp_interface/typedef.svh" /***************** * Definitions * @@ -283,8 +284,8 @@ module mempool_tile * Instruction Cache * ***********************/ // Instruction interface - axi_core_req_t [NumCaches-1:0] axi_cache_req_d, axi_cache_req_q; - axi_core_resp_t [NumCaches-1:0] axi_cache_resp_d, axi_cache_resp_q; + axi_cache_req_t [NumCaches-1:0] axi_cache_req_d, axi_cache_req_q; + axi_cache_resp_t [NumCaches-1:0] axi_cache_resp_d, axi_cache_resp_q; for (genvar c = 0; unsigned'(c) < NumCaches; c++) begin: gen_caches snitch_icache #( @@ -309,8 +310,8 @@ module mempool_tile .EARLY_LATCH (1 ), .L0_EARLY_TAG_WIDTH (11 ), .ISO_CROSSING (0 ), - .axi_req_t (axi_core_req_t ), - .axi_rsp_t (axi_core_resp_t ) + .axi_req_t (axi_cache_req_t ), + .axi_rsp_t (axi_cache_resp_t ) ) i_snitch_icache ( .clk_i (clk_i ), .clk_d2_i (clk_i ), @@ -331,13 +332,13 @@ module mempool_tile .axi_rsp_i (axi_cache_resp_q[c] ) ); axi_cut #( - .aw_chan_t (axi_core_aw_t ), - .w_chan_t (axi_core_w_t ), - .b_chan_t (axi_core_b_t ), - .ar_chan_t (axi_core_ar_t ), - .r_chan_t (axi_core_r_t ), - .axi_req_t (axi_core_req_t ), - .axi_resp_t(axi_core_resp_t) + .aw_chan_t (axi_cache_aw_t ), + .w_chan_t (axi_cache_w_t ), + .b_chan_t (axi_cache_b_t ), + .ar_chan_t (axi_cache_ar_t ), + .r_chan_t (axi_cache_r_t ), + .axi_req_t (axi_cache_req_t ), + .axi_resp_t(axi_cache_resp_t) ) axi_cache_slice ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -509,17 +510,18 @@ module mempool_tile assign bank_resp_wide[b] = meta_out.wide; tcdm_adapter #( - .AddrWidth (TCDMAddrMemWidth), - .DataWidth (DataWidth ), - .metadata_t (bank_metadata_t ), - .LrScEnable (LrScEnable ), - .RegisterAmo(1'b0 ) + .AddrWidth (TCDMAddrMemWidth+ByteOffset), + .BankAddrWidth (TCDMAddrMemWidth ), + .DataWidth (DataWidth ), + .metadata_t (bank_metadata_t ), + .LrScEnable (LrScEnable ), + .RegisterAmo (1'b0 ) ) i_tcdm_adapter ( .clk_i (clk_i ), .rst_ni (rst_ni ), .in_valid_i (bank_req_valid[b] ), .in_ready_o (bank_req_ready[b] ), - .in_address_i(bank_req_payload[b].tgt_addr[idx_width(NumBanksPerTile) +: TCDMAddrMemWidth]), + .in_address_i({bank_req_payload[b].tgt_addr[idx_width(NumBanksPerTile) +: TCDMAddrMemWidth],{ByteOffset{1'b0}}}), .in_amo_i (bank_req_payload[b].wdata.amo ), .in_write_i (bank_req_payload[b].wen ), .in_wdata_i (bank_req_payload[b].wdata.data ), @@ -933,7 +935,6 @@ module mempool_tile .soc_qvalid_o (soc_data_qvalid[c] ), .soc_qready_i (soc_data_qready[c] ), .soc_pdata_i (soc_data_p[c].data ), - .soc_pwrite_i (soc_data_p[c].write ), .soc_perror_i (soc_data_p[c].error ), .soc_pvalid_i (soc_data_pvalid[c] ), .soc_pready_o (soc_data_pready[c] ), @@ -1004,72 +1005,67 @@ module mempool_tile * AXI Plug * ****************/ - snitch_pkg::dreq_t soc_req_o; - snitch_pkg::dresp_t soc_resp_i; - - logic soc_qvalid; - logic soc_qready; - logic soc_pvalid; - logic soc_pready; - - // We don't care about this - assign soc_resp_i.id = 'x; - - snitch_demux #( - .NrPorts (NumCoresPerTile ), - .req_t (snitch_pkg::dreq_t ), - .resp_t (snitch_pkg::dresp_t) - ) i_snitch_demux_data ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - // Inputs - .req_payload_i (soc_data_q ), - .req_valid_i (soc_data_qvalid), - .req_ready_o (soc_data_qready), - .resp_payload_o(soc_data_p ), - .resp_last_o (/* Unused */ ), - .resp_valid_o (soc_data_pvalid), - .resp_ready_i (soc_data_pready), - // Output - .req_payload_o (soc_req_o ), - .req_valid_o (soc_qvalid ), - .req_ready_i (soc_qready ), - .resp_payload_i(soc_resp_i ), - .resp_last_i (1'b1 ), - .resp_valid_i (soc_pvalid ), - .resp_ready_o (soc_pready ) - ); + `REQRSP_TYPEDEF_ALL(soc, snitch_pkg::addr_t, snitch_pkg::data_t, snitch_pkg::strb_t) - // Core request + // Pack the cores' soc_req/rsp into a reqrsp bus + soc_req_t [NumCoresPerTile-1:0] snitch_to_soc_req; + soc_rsp_t [NumCoresPerTile-1:0] snitch_to_soc_rsp; + soc_req_t mux_to_soc_req; + soc_rsp_t mux_to_soc_rsp; + // AXI core request axi_core_req_t axi_cores_req_d, axi_cores_req_q; axi_core_resp_t axi_cores_resp_d, axi_cores_resp_q; + axi_cache_req_t axi_cores_wide_req; + axi_cache_resp_t axi_cores_wide_resp; + + for (genvar c = 0; c < NumCoresPerTile; c++) begin: gen_core_soc_reqrsp + assign snitch_to_soc_req[c].q.addr = soc_data_q[c].addr; + assign snitch_to_soc_req[c].q.write = soc_data_q[c].write; + assign snitch_to_soc_req[c].q.amo = reqrsp_pkg::amo_op_e'(soc_data_q[c].amo); + assign snitch_to_soc_req[c].q.data = soc_data_q[c].data; + assign snitch_to_soc_req[c].q.strb = soc_data_q[c].strb; + assign snitch_to_soc_req[c].q.size = 3'b010; // AXI-style size: 2^x bytes + assign snitch_to_soc_req[c].q_valid = soc_data_qvalid[c]; + assign soc_data_qready[c] = snitch_to_soc_rsp[c].q_ready; + assign soc_data_p[c].data = snitch_to_soc_rsp[c].p.data; + assign soc_data_p[c].error = snitch_to_soc_rsp[c].p.error; + assign soc_data_p[c].id = '0; // Don't care + assign soc_data_p[c].write = '0; // Don't care + assign soc_data_pvalid[c] = snitch_to_soc_rsp[c].p_valid; + assign snitch_to_soc_req[c].p_ready = soc_data_pready[c]; + end + + reqrsp_mux #( + .NrPorts (NumCoresPerTile), + .AddrWidth (AddrWidth ), + .DataWidth (DataWidth ), + .req_t (soc_req_t ), + .rsp_t (soc_rsp_t ), + .RespDepth (NumCoresPerTile), + .RegisterReq ('0 ) + ) i_reqrsp_mux_snitch_soc ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .slv_req_i (snitch_to_soc_req), + .slv_rsp_o (snitch_to_soc_rsp), + .mst_req_o (mux_to_soc_req ), + .mst_rsp_i (mux_to_soc_rsp ) + ); - snitch_axi_adapter #( - .addr_t (snitch_pkg::addr_t), - .data_t (snitch_pkg::data_t), - .strb_t (snitch_pkg::strb_t), - .axi_mst_req_t (axi_core_req_t ), - .axi_mst_resp_t (axi_core_resp_t ) - ) i_snitch_core_axi_adapter ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .slv_qaddr_i (soc_req_o.addr ), - .slv_qwrite_i(soc_req_o.write ), - .slv_qamo_i (soc_req_o.amo ), - .slv_qdata_i (soc_req_o.data ), - .slv_qsize_i (3'b010 ), - .slv_qstrb_i (soc_req_o.strb ), - .slv_qrlen_i ('0 ), - .slv_qvalid_i(soc_qvalid ), - .slv_qready_o(soc_qready ), - .slv_pdata_o (soc_resp_i.data ), - .slv_pwrite_o(soc_resp_i.write), - .slv_perror_o(soc_resp_i.error), - .slv_plast_o (/* Unused */ ), - .slv_pvalid_o(soc_pvalid ), - .slv_pready_i(soc_pready ), - .axi_req_o (axi_cores_req_d ), - .axi_resp_i (axi_cores_resp_q) + reqrsp_to_axi #( + .MaxTrans (NumCoresPerTile), + .DataWidth (DataWidth ), + .reqrsp_req_t (soc_req_t ), + .reqrsp_rsp_t (soc_rsp_t ), + .axi_req_t (axi_core_req_t ), + .axi_rsp_t (axi_core_resp_t) + ) i_reqrsp_snitch_to_axi ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .reqrsp_req_i (mux_to_soc_req ), + .reqrsp_rsp_o (mux_to_soc_rsp ), + .axi_req_o (axi_cores_req_d ), + .axi_rsp_i (axi_cores_resp_q) ); axi_cut #( @@ -1089,32 +1085,58 @@ module mempool_tile .mst_resp_i(axi_cores_resp_d) ); + axi_dw_converter #( + .AxiMaxReads (NumCoresPerTile ), + .AxiSlvPortDataWidth (DataWidth ), + .AxiMstPortDataWidth (AxiDataWidth ), + .AxiAddrWidth (AddrWidth ), + .AxiIdWidth (AxiCoreIdWidth ), + .aw_chan_t (axi_core_aw_t ), + .mst_w_chan_t (axi_cache_w_t ), + .slv_w_chan_t (axi_core_w_t ), + .b_chan_t (axi_core_b_t ), + .ar_chan_t (axi_core_ar_t ), + .mst_r_chan_t (axi_cache_r_t ), + .slv_r_chan_t (axi_core_r_t ), + .axi_mst_req_t (axi_cache_req_t ), + .axi_mst_resp_t (axi_cache_resp_t), + .axi_slv_req_t (axi_core_req_t ), + .axi_slv_resp_t (axi_core_resp_t ) + ) i_axi_dw_converter_cores ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .slv_req_i (axi_cores_req_q ), + .slv_resp_o (axi_cores_resp_d ), + .mst_req_o (axi_cores_wide_req ), + .mst_resp_i (axi_cores_wide_resp) + ); + axi_mux #( - .SlvAxiIDWidth (AxiCoreIdWidth ), - .slv_aw_chan_t (axi_core_aw_t ), - .mst_aw_chan_t (axi_tile_aw_t ), - .w_chan_t (axi_tile_w_t ), - .slv_b_chan_t (axi_core_b_t ), - .mst_b_chan_t (axi_tile_b_t ), - .slv_ar_chan_t (axi_core_ar_t ), - .mst_ar_chan_t (axi_tile_ar_t ), - .slv_r_chan_t (axi_core_r_t ), - .mst_r_chan_t (axi_tile_r_t ), - .slv_req_t (axi_core_req_t ), - .slv_resp_t (axi_core_resp_t), - .mst_req_t (axi_tile_req_t ), - .mst_resp_t (axi_tile_resp_t), - .NoSlvPorts (1+NumCaches ), - .MaxWTrans (8 ), - .FallThrough (1 ) + .SlvAxiIDWidth (AxiCoreIdWidth ), + .slv_aw_chan_t (axi_cache_aw_t ), + .mst_aw_chan_t (axi_tile_aw_t ), + .w_chan_t (axi_cache_w_t ), + .slv_b_chan_t (axi_cache_b_t ), + .mst_b_chan_t (axi_tile_b_t ), + .slv_ar_chan_t (axi_cache_ar_t ), + .mst_ar_chan_t (axi_tile_ar_t ), + .slv_r_chan_t (axi_cache_r_t ), + .mst_r_chan_t (axi_tile_r_t ), + .slv_req_t (axi_cache_req_t ), + .slv_resp_t (axi_cache_resp_t), + .mst_req_t (axi_tile_req_t ), + .mst_resp_t (axi_tile_resp_t ), + .NoSlvPorts (1+NumCaches ), + .MaxWTrans (NumCoresPerTile ), + .FallThrough (1 ) ) i_axi_mux ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .test_i (1'b0 ), - .slv_reqs_i ({axi_cores_req_q, axi_cache_req_q} ), - .slv_resps_o({axi_cores_resp_d, axi_cache_resp_d}), - .mst_req_o (axi_mst_req_o ), - .mst_resp_i (axi_mst_resp_i ) + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .test_i (1'b0 ), + .slv_reqs_i ({axi_cores_wide_req, axi_cache_req_q} ), + .slv_resps_o({axi_cores_wide_resp, axi_cache_resp_d}), + .mst_req_o (axi_mst_req_o ), + .mst_resp_i (axi_mst_resp_i ) ); /****************** diff --git a/hardware/src/tcdm_adapter.sv b/hardware/src/tcdm_adapter.sv index 6723f8070..72c3cb2c3 100644 --- a/hardware/src/tcdm_adapter.sv +++ b/hardware/src/tcdm_adapter.sv @@ -11,37 +11,38 @@ `include "common_cells/registers.svh" module tcdm_adapter #( - parameter int unsigned AddrWidth = 32, - parameter int unsigned DataWidth = 32, - parameter type metadata_t = logic, - parameter bit LrScEnable = 1, + parameter int unsigned AddrWidth = 32, + parameter int unsigned BankAddrWidth = AddrWidth, + parameter int unsigned DataWidth = 32, + parameter type metadata_t = logic, + parameter bit LrScEnable = 1, // Cut path between request and response at the cost of increased AMO latency parameter bit RegisterAmo = 1'b0, // Dependent parameters. DO NOT CHANGE. localparam int unsigned BeWidth = DataWidth/8 ) ( - input logic clk_i, - input logic rst_ni, + input logic clk_i, + input logic rst_ni, // master side - input logic in_valid_i, // Bank request - output logic in_ready_o, // Bank grant - input logic [AddrWidth-1:0] in_address_i, // Address - input logic [3:0] in_amo_i, // Atomic Memory Operation - input logic in_write_i, // 1: Store, 0: Load - input logic [DataWidth-1:0] in_wdata_i, // Write data - input metadata_t in_meta_i, // Meta data - input logic [BeWidth-1:0] in_be_i, // Byte enable - output logic in_valid_o, // Read data - input logic in_ready_i, // Read data - output logic [DataWidth-1:0] in_rdata_o, // Read data - output metadata_t in_meta_o, // Meta data + input logic in_valid_i, // Bank request + output logic in_ready_o, // Bank grant + input logic [AddrWidth-1:0] in_address_i, // Address + input logic [3:0] in_amo_i, // Atomic Memory Operation + input logic in_write_i, // 1: Store, 0: Load + input logic [DataWidth-1:0] in_wdata_i, // Write data + input metadata_t in_meta_i, // Meta data + input logic [BeWidth-1:0] in_be_i, // Byte enable + output logic in_valid_o, // Read data + input logic in_ready_i, // Read data + output logic [DataWidth-1:0] in_rdata_o, // Read data + output metadata_t in_meta_o, // Meta data // slave side - output logic out_req_o, // Bank request - output logic [AddrWidth-1:0] out_add_o, // Address - output logic out_write_o, // 1: Store, 0: Load - output logic [DataWidth-1:0] out_wdata_o, // Write data - output logic [BeWidth-1:0] out_be_o, // Bit enable - input logic [DataWidth-1:0] out_rdata_i // Read data + output logic out_req_o, // Bank request + output logic [BankAddrWidth-1:0] out_add_o, // Address + output logic out_write_o, // 1: Store, 0: Load + output logic [DataWidth-1:0] out_wdata_o, // Write data + output logic [BeWidth-1:0] out_be_o, // Bit enable + input logic [DataWidth-1:0] out_rdata_i // Read data ); import mempool_pkg::NumCores; @@ -49,6 +50,8 @@ module tcdm_adapter #( import mempool_pkg::NumCoresPerTile; import cf_math_pkg::idx_width; + localparam int unsigned AmoWidth = 32; // Only 32 is tested for now + typedef enum logic [3:0] { AMONone = 4'h0, AMOSwap = 4'h1, @@ -77,15 +80,19 @@ module tcdm_adapter #( Idle, DoAMO, WriteBackAMO } state_q, state_d; - logic load_amo; - amo_op_t amo_op_q; - logic amo_wb; - logic [BeWidth-1:0] be_expand; - logic [AddrWidth-1:0] addr_q; + logic load_amo; + amo_op_t amo_op_q; + logic amo_wb; + logic [BankAddrWidth-1:0] in_address_bank; + + logic [AddrWidth-1:0] amo_addr_q; + logic [BeWidth-1:0] amo_be_q; + logic [AmoWidth-1:0] amo_operand_a; + logic [AmoWidth-1:0] amo_operand_b_q; + logic [AmoWidth-1:0] amo_result, amo_result_q; - logic [31:0] amo_operand_a; - logic [31:0] amo_operand_b_q; - logic [31:0] amo_result, amo_result_q; + // Cut off the bits indexing the bytes of the same bank word + assign in_address_bank = in_address_i[$clog2(BeWidth)+:BankAddrWidth]; // Store the metadata at handshake spill_register #( @@ -161,7 +168,7 @@ module tcdm_adapter #( /// This address is aligned to the memory size /// implying that the reservation happen on a set size /// equal to the word width of the memory (32 or 64 bit). - logic [AddrWidth-1:0] addr; + logic [BankAddrWidth-1:0] addr; /// Which core made this reservation. Important to /// track the reservations from different cores and /// to prevent any live-locking. @@ -200,7 +207,7 @@ module tcdm_adapter #( if (amo_op_t'(in_amo_i) == AMOLR && (!reservation_q.valid || reservation_q.core == unique_core_id)) begin reservation_d.valid = 1'b1; - reservation_d.addr = in_address_i; + reservation_d.addr = in_address_bank; reservation_d.core = unique_core_id; end @@ -211,7 +218,7 @@ module tcdm_adapter #( // check whether another core has made a write attempt if ((unique_core_id != reservation_q.core) && - (in_address_i == reservation_q.addr) && + (in_address_bank == reservation_q.addr) && (!(amo_op_t'(in_amo_i) inside {AMONone, AMOLR, AMOSC}) || in_write_i)) begin reservation_d.valid = 1'b0; end @@ -220,7 +227,7 @@ module tcdm_adapter #( if (reservation_q.valid && amo_op_t'(in_amo_i) == AMOSC && reservation_q.core == unique_core_id) begin reservation_d.valid = 1'b0; - sc_successful_d = (reservation_q.addr == in_address_i); + sc_successful_d = (reservation_q.addr == in_address_bank); end end end // always_comb @@ -238,7 +245,7 @@ module tcdm_adapter #( // feed-through in_ready_o = rdata_ready; out_req_o = in_valid_i && in_ready_o; - out_add_o = in_address_i; + out_add_o = in_address_bank; out_write_o = in_write_i || (sc_successful_d && (amo_op_t'(in_amo_i) == AMOSC)); out_wdata_o = in_wdata_i; out_be_o = in_be_i; @@ -262,13 +269,13 @@ module tcdm_adapter #( amo_wb = 1'b1; out_req_o = 1'b1; out_write_o = 1'b1; - out_add_o = addr_q; - out_be_o = 4'b1111; + out_add_o = amo_addr_q[$clog2(BeWidth)+:BankAddrWidth]; + out_be_o = amo_be_q; // serve from register if we cut the path if (RegisterAmo) begin - out_wdata_o = amo_result_q; + out_wdata_o[amo_addr_q[0+:$clog2(BeWidth)]*8+:AmoWidth] = amo_result_q; end else begin - out_wdata_o = amo_result; + out_wdata_o[amo_addr_q[0+:$clog2(BeWidth)]*8+:AmoWidth] = amo_result; end end default:; @@ -285,14 +292,16 @@ module tcdm_adapter #( if (!rst_ni) begin state_q <= Idle; amo_op_q <= amo_op_t'('0); - addr_q <= '0; + amo_addr_q <= '0; + amo_be_q <= '0; amo_operand_b_q <= '0; end else begin state_q <= state_d; if (load_amo) begin amo_op_q <= amo_op_t'(in_amo_i); - addr_q <= in_address_i; - amo_operand_b_q <= in_wdata_i; + amo_addr_q <= in_address_i; + amo_be_q <= in_be_i; + amo_operand_b_q <= in_wdata_i[in_address_i[0+:$clog2(BeWidth)]*8+:AmoWidth]; end else begin amo_op_q <= AMONone; end @@ -305,7 +314,7 @@ module tcdm_adapter #( logic [33:0] adder_sum; logic [32:0] adder_operand_a, adder_operand_b; - assign amo_operand_a = out_rdata_i; + assign amo_operand_a = out_rdata_i[amo_addr_q[0+:$clog2(BeWidth)]*8+:AmoWidth]; assign adder_sum = adder_operand_a + adder_operand_b; /* verilator lint_off WIDTH */ always_comb begin : amo_alu @@ -345,11 +354,6 @@ module tcdm_adapter #( end // pragma translate_off - // Check for unsupported parameters - if (DataWidth != 32) begin - $error($sformatf("Module currently only supports DataWidth = 32. DataWidth is currently set to: %0d", DataWidth)); - end - `ifndef VERILATOR assert_rdata_full : assert property( @(posedge clk_i) disable iff (~rst_ni) (out_gnt |-> !rdata_full)) diff --git a/hardware/src/tcdm_shim.sv b/hardware/src/tcdm_shim.sv index 5da72f997..d7c09ed1f 100644 --- a/hardware/src/tcdm_shim.sv +++ b/hardware/src/tcdm_shim.sv @@ -45,7 +45,6 @@ module tcdm_shim output logic [NrSoC-1:0] soc_qvalid_o, input logic [NrSoC-1:0] soc_qready_i, input logic [NrSoC-1:0] [DataWidth-1:0] soc_pdata_i, - input logic [NrSoC-1:0] soc_pwrite_i, input logic [NrSoC-1:0] soc_perror_i, input logic [NrSoC-1:0] soc_pvalid_i, output logic [NrSoC-1:0] soc_pready_o, @@ -68,67 +67,46 @@ module tcdm_shim ); // Imports - import snitch_pkg::dreq_t ; + import snitch_pkg::dreq_t; import snitch_pkg::dresp_t; // Includes `include "common_cells/registers.svh" - dreq_t data_qpayload ; - dreq_t [NrSoC-1:0] soc_qpayload ; + dreq_t data_qpayload; + dreq_t [NrSoC-1:0] soc_qpayload; dreq_t [NrTCDM-1:0] tcdm_qpayload; - dresp_t data_ppayload ; - dresp_t [NrSoC-1:0] soc_ppayload ; + dresp_t data_ppayload; + dresp_t [NrSoC-1:0] soc_ppayload; dresp_t [NrTCDM-1:0] tcdm_ppayload; for (genvar i = 0; i < NrTCDM; i++) begin : gen_tcdm_ppayload - assign tcdm_ppayload[i].id = tcdm_resp_id_i[i] ; + assign tcdm_ppayload[i].id = tcdm_resp_id_i[i]; assign tcdm_ppayload[i].data = tcdm_resp_rdata_i[i]; - assign tcdm_ppayload[i].write = 1'b0 ; // Don't care - assign tcdm_ppayload[i].error = 1'b0 ; + assign tcdm_ppayload[i].write = 1'b0; // Don't care + assign tcdm_ppayload[i].error = 1'b0; end // ROB IDs of the SoC requests (come back in order) logic [NrSoC-1:0][MetaIdWidth-1:0] soc_meta_id; for (genvar i = 0; i < NrSoC; i++) begin: gen_soc_meta_id_fifo - logic [NrSoC-1:0][MetaIdWidth-1:0] meta_read; - logic [NrSoC-1:0][MetaIdWidth-1:0] meta_write; - - assign soc_meta_id[i] = soc_pwrite_i ? meta_write : meta_read; - - fifo_v3 #( - .DEPTH (MaxOutStandingTrans), - .DATA_WIDTH(MetaIdWidth ) - ) i_soc_meta_id_read_fifo ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .flush_i (1'b0 ), - .testmode_i(1'b0 ), - .data_i (data_qid_i ), - .push_i (soc_qvalid_o[i] & soc_qready_i[i] &!soc_qwrite_o[i]), - .full_o (/* Unused */ ), - .data_o (meta_read ), - .pop_i (soc_pvalid_i[i] & soc_pready_o[i] & !soc_pwrite_i ), - .empty_o (/* Unused */ ), - .usage_o (/* Unused */ ) - ); fifo_v3 #( .DEPTH (MaxOutStandingTrans), .DATA_WIDTH(MetaIdWidth ) - ) i_soc_meta_id_write_fifo ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .flush_i (1'b0 ), - .testmode_i(1'b0 ), - .data_i (data_qid_i ), - .push_i (soc_qvalid_o[i] & soc_qready_i[i] & soc_qwrite_o[i]), - .full_o (/* Unused */ ), - .data_o (meta_write ), - .pop_i (soc_pvalid_i[i] & soc_pready_o[i] & soc_pwrite_i ), - .empty_o (/* Unused */ ), - .usage_o (/* Unused */ ) + ) i_soc_meta_id_fifo ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .flush_i (1'b0 ), + .testmode_i(1'b0 ), + .data_i (data_qid_i ), + .push_i (soc_qvalid_o[i] & soc_qready_i[i]), + .full_o (/* Unused */ ), + .data_o (soc_meta_id ), + .pop_i (soc_pvalid_i[i] & soc_pready_o[i]), + .empty_o (/* Unused */ ), + .usage_o (/* Unused */ ) ); end: gen_soc_meta_id_fifo @@ -160,39 +138,39 @@ module tcdm_shim // Connect TCDM output ports for (genvar i = 0; i < NrTCDM; i++) begin : gen_tcdm_con - assign tcdm_req_tgt_addr_o[i] = tcdm_qpayload[i].addr ; - assign tcdm_req_wdata_o[i] = tcdm_qpayload[i].data ; - assign tcdm_req_amo_o[i] = tcdm_qpayload[i].amo ; - assign tcdm_req_id_o[i] = tcdm_qpayload[i].id ; + assign tcdm_req_tgt_addr_o[i] = tcdm_qpayload[i].addr; + assign tcdm_req_wdata_o[i] = tcdm_qpayload[i].data; + assign tcdm_req_amo_o[i] = tcdm_qpayload[i].amo; + assign tcdm_req_id_o[i] = tcdm_qpayload[i].id; assign tcdm_req_wen_o[i] = tcdm_qpayload[i].write; - assign tcdm_req_be_o[i] = tcdm_qpayload[i].strb ; + assign tcdm_req_be_o[i] = tcdm_qpayload[i].strb; end // Connect SOCs for (genvar i = 0; i < NrSoC; i++) begin : gen_soc_con - assign soc_qaddr_o[i] = soc_qpayload[i].addr ; + assign soc_qaddr_o[i] = soc_qpayload[i].addr; assign soc_qwrite_o[i] = soc_qpayload[i].write; - assign soc_qamo_o[i] = soc_qpayload[i].amo ; - assign soc_qdata_o[i] = soc_qpayload[i].data ; - assign soc_qstrb_o[i] = soc_qpayload[i].strb ; - assign soc_ppayload[i].data = soc_pdata_i[i] ; - assign soc_ppayload[i].id = soc_meta_id[i] ; - assign soc_ppayload[i].write = soc_pwrite_i[i] ; - assign soc_ppayload[i].error = soc_perror_i[i] ; + assign soc_qamo_o[i] = soc_qpayload[i].amo; + assign soc_qdata_o[i] = soc_qpayload[i].data; + assign soc_qstrb_o[i] = soc_qpayload[i].strb; + assign soc_ppayload[i].data = soc_pdata_i[i]; + assign soc_ppayload[i].id = soc_meta_id[i]; + assign soc_ppayload[i].write = '0; // Don't care + assign soc_ppayload[i].error = soc_perror_i[i]; end // Request interface - assign data_qpayload.addr = data_qaddr_i ; + assign data_qpayload.addr = data_qaddr_i; assign data_qpayload.write = data_qwrite_i; - assign data_qpayload.amo = data_qamo_i ; - assign data_qpayload.data = data_qdata_i ; - assign data_qpayload.id = data_qid_i ; - assign data_qpayload.strb = data_qstrb_i ; + assign data_qpayload.amo = data_qamo_i; + assign data_qpayload.data = data_qdata_i; + assign data_qpayload.id = data_qid_i; + assign data_qpayload.strb = data_qstrb_i; // Response interface - assign data_pdata_o = data_ppayload.data ; + assign data_pdata_o = data_ppayload.data; assign data_perror_o = data_ppayload.error; - assign data_pid_o = data_ppayload.id ; + assign data_pid_o = data_ppayload.id; // Elaboration-time assertions diff --git a/software/tests/baremetal/atomics/main.c b/software/tests/baremetal/atomics/main.c new file mode 100644 index 000000000..850deb476 --- /dev/null +++ b/software/tests/baremetal/atomics/main.c @@ -0,0 +1,142 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Author: Samuel Riedel, ETH Zurich + +#include +#include + +#include "runtime.h" + +uint32_t volatile l1 __attribute__((section(".l1"))); +uint32_t volatile l2 __attribute__((section(".l2"))); + +int atomics(uint32_t volatile *addr) { + uint32_t golden, ret, op; + + // Init + *addr = 0x12345678; + + // AMO Swap + golden = *addr; + op = 0x23456789; + asm volatile("amoswap.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 1; + } + + // AMO Add + golden = *addr; + op = 0x199; + asm volatile("amoadd.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 11; + } + if (*addr != golden + op) { + return 12; + } + + // AMO Xor + golden = *addr; + op = 0x12345678; + asm volatile("amoxor.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 21; + } + if (*addr != (golden ^ op)) { + return 22; + } + + // AMO And + golden = *addr; + op = 0x0000FF33; + asm volatile("amoand.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 31; + } + if (*addr != (golden & op)) { + return 32; + } + + // AMO Or + golden = *addr; + op = 0x12340000; + asm volatile("amoor.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 41; + } + if (*addr != (golden | op)) { + return 42; + } + + // AMO Min + golden = *addr; + op = 0xF0000001; + asm volatile("amomin.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 51; + } + if (*addr != ((int32_t)golden < (int32_t)op ? golden : op)) { + return 52; + } + + // AMO Max + golden = *addr; + op = 0x00000001; + asm volatile("amomax.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 61; + } + if (*addr != ((int32_t)golden > (int32_t)op ? golden : op)) { + return 62; + } + + // AMO UMin + golden = *addr; + op = 0x00000010; + asm volatile("amominu.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 71; + } + if (*addr != (golden < op ? golden : op)) { + return 72; + } + + // AMO UMax + golden = *addr; + op = 0x00000010; + asm volatile("amomaxu.w %0, %1, (%2)" : "=r"(ret) : "r"(op), "r"(addr)); + if (ret != golden) { + return 81; + } + if (*addr != (golden > op ? golden : op)) { + return 82; + } + + return 0; +} + +int main() { + uint32_t core_id = mempool_get_core_id(); + + if (core_id != 0) { + mempool_wfi(); + } + + int ret = 0; + + // L1 memory + ret = atomics(&l1); + if (ret) { + return ret; + } + + // L2 memory + ret = atomics(&l2); + if (ret) { + return ret + 100; + } + + return 0; +}