diff --git a/Bender.lock b/Bender.lock index 736cdd5..a26736a 100644 --- a/Bender.lock +++ b/Bender.lock @@ -69,7 +69,7 @@ packages: - common_cells - register_interface cheshire: - revision: 4dfba37385eae48c44080defdfaa3f580921a60c + revision: 8aa5c40f2af14f0a40ed08ba4b24c3759ae944e5 version: null source: Git: https://github.com/pulp-platform/cheshire.git @@ -198,8 +198,8 @@ packages: - common_verification - tech_cells_generic obi: - revision: c2141a653c755461ff44f61d12aeb5d99fc8e760 - version: 0.1.3 + revision: 5321106817e177d6c16ecc4daa922b96b1bc946b + version: 0.1.5 source: Git: https://github.com/pulp-platform/obi.git dependencies: diff --git a/Bender.yml b/Bender.yml index bb8a0d3..8011e07 100644 --- a/Bender.yml +++ b/Bender.yml @@ -10,7 +10,7 @@ package: dependencies: register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.3 } axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.2 } - cheshire: { git: "https://github.com/pulp-platform/cheshire.git", rev: 4dfba37385eae48c44080defdfaa3f580921a60c} + cheshire: { git: "https://github.com/pulp-platform/cheshire.git", rev: 8aa5c40} snitch_cluster: { git: "https://github.com/pulp-platform/snitch_cluster.git", rev: c12ce9b2af1ac8edf3d4feb18939e1ad20c42225} common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.31.1} idma: { git: "https://github.com/pulp-platform/iDMA.git", rev: 9edf489f57389dce5e71252c79e337f527d3aded} @@ -27,6 +27,8 @@ sources: - hw/bootrom/snitch/snitch_bootrom.sv - hw/narrow_adapter.sv - hw/chimera_cluster_adapter.sv + - hw/chimera_cluster.sv + - hw/chimera_clu_domain.sv - hw/chimera_top_wrapper.sv - target: any(simulation, test) diff --git a/hw/bootrom/snitch/snitch_startup.c b/hw/bootrom/snitch/snitch_startup.c index 8f2d570..a484822 100644 --- a/hw/bootrom/snitch/snitch_startup.c +++ b/hw/bootrom/snitch/snitch_startup.c @@ -43,19 +43,19 @@ void cluster_return(uint32_t ret) { *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_SNITCH_CLUSTER_0_RETURN_REG_OFFSET)) = retVal; break; - case 10: + case 1 + CLUSTER_0_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_SNITCH_CLUSTER_1_RETURN_REG_OFFSET)) = retVal; break; - case 19: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_SNITCH_CLUSTER_2_RETURN_REG_OFFSET)) = retVal; break; - case 28: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES + CLUSTER_2_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_SNITCH_CLUSTER_3_RETURN_REG_OFFSET)) = retVal; break; - case 37: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES + CLUSTER_2_NUMCORES + CLUSTER_3_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_SNITCH_CLUSTER_4_RETURN_REG_OFFSET)) = retVal; break; @@ -74,16 +74,16 @@ void clean_busy() { case 1: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_0_BUSY_REG_OFFSET)) = 0; break; - case 10: + case 1 + CLUSTER_0_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_1_BUSY_REG_OFFSET)) = 0; break; - case 19: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_2_BUSY_REG_OFFSET)) = 0; break; - case 28: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES + CLUSTER_2_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_3_BUSY_REG_OFFSET)) = 0; break; - case 37: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES + CLUSTER_2_NUMCORES + CLUSTER_3_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_4_BUSY_REG_OFFSET)) = 0; break; } @@ -101,16 +101,16 @@ void set_busy() { case 1: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_0_BUSY_REG_OFFSET)) = 1; break; - case 10: + case 1 + CLUSTER_0_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_1_BUSY_REG_OFFSET)) = 1; break; - case 19: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_2_BUSY_REG_OFFSET)) = 1; break; - case 28: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES + CLUSTER_2_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_3_BUSY_REG_OFFSET)) = 1; break; - case 37: + case 1 + CLUSTER_0_NUMCORES + CLUSTER_1_NUMCORES + CLUSTER_2_NUMCORES + CLUSTER_3_NUMCORES: *((volatile uint32_t *)(SOC_CTRL_BASE + CHIMERA_CLUSTER_4_BUSY_REG_OFFSET)) = 1; break; } diff --git a/hw/chimera_clu_domain.sv b/hw/chimera_clu_domain.sv new file mode 100644 index 0000000..f7c083d --- /dev/null +++ b/hw/chimera_clu_domain.sv @@ -0,0 +1,86 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Moritz Scherer + +`define NRCORES(extClusterIdx) ChimeraClusterCfg.NrCores[extClusterIdx] +`define PREVNRCORES(extClusterIdx) \ + _sumVector( \ + ChimeraClusterCfg.NrCores, extClusterIdx \ + ) + +// Wraps all snitch-type clusters in chimera +module chimera_clu_domain + import chimera_pkg::*; + import cheshire_pkg::*; +#( + parameter cheshire_cfg_t Cfg = '0, + parameter type narrow_in_req_t = logic, + parameter type narrow_in_resp_t = logic, + parameter type narrow_out_req_t = logic, + parameter type narrow_out_resp_t = logic, + parameter type wide_out_req_t = logic, + parameter type wide_out_resp_t = logic +) ( + input logic soc_clk_i, + input logic [ ExtClusters-1:0] clu_clk_i, + input logic rst_ni, + input logic [ ExtClusters-1:0] widemem_bypass_i, + //----------------------------- + // Interrupt ports + //----------------------------- + input logic [iomsb(NumIrqCtxts*Cfg.NumExtIrqHarts):0] xeip_i, + input logic [ iomsb(Cfg.NumExtIrqHarts):0] mtip_i, + input logic [ iomsb(Cfg.NumExtIrqHarts):0] msip_i, + input logic [ iomsb(Cfg.NumExtDbgHarts):0] debug_req_i, + //----------------------------- + // Narrow AXI ports + //----------------------------- + input narrow_in_req_t [ iomsb(Cfg.AxiExtNumSlv):0] narrow_in_req_i, + output narrow_in_resp_t [ iomsb(Cfg.AxiExtNumSlv):0] narrow_in_resp_o, + output narrow_out_req_t [ iomsb(Cfg.AxiExtNumMst):0] narrow_out_req_o, + input narrow_out_resp_t [ iomsb(Cfg.AxiExtNumMst):0] narrow_out_resp_i, + //----------------------------- + // Wide AXI ports + //----------------------------- + output wide_out_req_t [ iomsb(Cfg.AxiExtNumWideMst):0] wide_out_req_o, + input wide_out_resp_t [ iomsb(Cfg.AxiExtNumWideMst):0] wide_out_resp_i +); + + for (genvar extClusterIdx = 0; extClusterIdx < ExtClusters; extClusterIdx++) begin : gen_clusters + + chimera_cluster #( + .Cfg (Cfg), + .NrCores (`NRCORES(extClusterIdx)), + .narrow_in_req_t (narrow_in_req_t), + .narrow_in_resp_t (narrow_in_resp_t), + .narrow_out_req_t (narrow_out_req_t), + .narrow_out_resp_t(narrow_out_resp_t), + .wide_out_req_t (wide_out_req_t), + .wide_out_resp_t (wide_out_resp_t) + ) i_chimera_cluster ( + .soc_clk_i (soc_clk_i), + .clu_clk_i (clu_clk_i[extClusterIdx]), + .rst_ni, + .widemem_bypass_i (widemem_bypass_i[extClusterIdx]), + .debug_req_i (debug_req_i[`PREVNRCORES(extClusterIdx)+:`NRCORES(extClusterIdx)]), + .meip_i (xeip_i[`PREVNRCORES(extClusterIdx)+:`NRCORES(extClusterIdx)]), + .mtip_i (mtip_i[`PREVNRCORES(extClusterIdx)+:`NRCORES(extClusterIdx)]), + .msip_i (msip_i[`PREVNRCORES(extClusterIdx)+:`NRCORES(extClusterIdx)]), + .hart_base_id_i (10'(`PREVNRCORES(extClusterIdx) + 1)), + .cluster_base_addr_i(Cfg.AxiExtRegionStart[extClusterIdx][Cfg.AddrWidth-1:0]), + .boot_addr_i (SnitchBootROMRegionStart[31:0]), + + .narrow_in_req_i (narrow_in_req_i[extClusterIdx]), + .narrow_in_resp_o (narrow_in_resp_o[extClusterIdx]), + .narrow_out_req_o (narrow_out_req_o[2*extClusterIdx+:2]), + .narrow_out_resp_i(narrow_out_resp_i[2*extClusterIdx+:2]), + .wide_out_req_o (wide_out_req_o[extClusterIdx]), + .wide_out_resp_i (wide_out_resp_i[extClusterIdx]) + ); + + end : gen_clusters + + +endmodule diff --git a/hw/chimera_cluster.sv b/hw/chimera_cluster.sv new file mode 100644 index 0000000..c49bfb6 --- /dev/null +++ b/hw/chimera_cluster.sv @@ -0,0 +1,296 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Moritz Scherer + +module chimera_cluster + import chimera_pkg::*; + import cheshire_pkg::*; +#( + parameter cheshire_cfg_t Cfg = '0, + + parameter int unsigned NrCores = 9, + parameter type narrow_in_req_t = logic, + parameter type narrow_in_resp_t = logic, + parameter type narrow_out_req_t = logic, + parameter type narrow_out_resp_t = logic, + parameter type wide_out_req_t = logic, + parameter type wide_out_resp_t = logic +) ( + + input logic soc_clk_i, + input logic clu_clk_i, + input logic rst_ni, + input logic widemem_bypass_i, + //----------------------------- + // Interrupt ports + //----------------------------- + input logic [ NrCores-1:0] debug_req_i, + input logic [ NrCores-1:0] meip_i, + input logic [ NrCores-1:0] mtip_i, + input logic [ NrCores-1:0] msip_i, + //----------------------------- + // Cluster base addressing + //----------------------------- + input logic [ 9:0] hart_base_id_i, + input logic [Cfg.AddrWidth-1:0] cluster_base_addr_i, + input logic [ 31:0] boot_addr_i, + //----------------------------- + // Narrow AXI ports + //----------------------------- + input narrow_in_req_t narrow_in_req_i, + output narrow_in_resp_t narrow_in_resp_o, + output narrow_out_req_t [ 1:0] narrow_out_req_o, + input narrow_out_resp_t [ 1:0] narrow_out_resp_i, + //----------------------------- + //Wide AXI ports + //----------------------------- + output wide_out_req_t wide_out_req_o, + input wide_out_resp_t wide_out_resp_i +); + + `include "axi/typedef.svh" + + localparam int WideDataWidth = $bits(wide_out_req_o.w.data); + + localparam int WideMasterIdWidth = $bits(wide_out_req_o.aw.id); + localparam int WideSlaveIdWidth = WideMasterIdWidth + $clog2(Cfg.AxiExtNumWideMst) - 1; + + localparam int NarrowSlaveIdWidth = $bits(narrow_in_req_i.aw.id); + localparam int NarrowMasterIdWidth = $bits(narrow_out_req_o[0].aw.id); + + typedef logic [Cfg.AddrWidth-1:0] axi_addr_t; + typedef logic [Cfg.AxiUserWidth-1:0] axi_user_t; + + typedef logic [Cfg.AxiDataWidth-1:0] axi_soc_data_narrow_t; + typedef logic [Cfg.AxiDataWidth/8-1:0] axi_soc_strb_narrow_t; + + typedef logic [ClusterDataWidth-1:0] axi_cluster_data_narrow_t; + typedef logic [ClusterDataWidth/8-1:0] axi_cluster_strb_narrow_t; + + typedef logic [WideDataWidth-1:0] axi_cluster_data_wide_t; + typedef logic [WideDataWidth/8-1:0] axi_cluster_strb_wide_t; + + typedef logic [ClusterNarrowAxiMstIdWidth-1:0] axi_cluster_mst_id_width_narrow_t; + typedef logic [ClusterNarrowAxiMstIdWidth-1+2:0] axi_cluster_slv_id_width_narrow_t; + + typedef logic [NarrowMasterIdWidth-1:0] axi_soc_mst_id_width_narrow_t; + typedef logic [NarrowSlaveIdWidth-1:0] axi_soc_slv_id_width_narrow_t; + + typedef logic [WideMasterIdWidth-1:0] axi_mst_id_width_wide_t; + typedef logic [WideMasterIdWidth-1+2:0] axi_slv_id_width_wide_t; + + `AXI_TYPEDEF_ALL(axi_cluster_out_wide, axi_addr_t, axi_slv_id_width_wide_t, + axi_cluster_data_wide_t, axi_cluster_strb_wide_t, axi_user_t) + `AXI_TYPEDEF_ALL(axi_cluster_in_wide, axi_addr_t, axi_mst_id_width_wide_t, + axi_cluster_data_wide_t, axi_cluster_strb_wide_t, axi_user_t) + + `AXI_TYPEDEF_ALL(axi_soc_out_narrow, axi_addr_t, axi_soc_slv_id_width_narrow_t, + axi_soc_data_narrow_t, axi_soc_strb_narrow_t, axi_user_t) + `AXI_TYPEDEF_ALL(axi_soc_in_narrow, axi_addr_t, axi_soc_mst_id_width_narrow_t, + axi_soc_data_narrow_t, axi_soc_strb_narrow_t, axi_user_t) + + `AXI_TYPEDEF_ALL(axi_cluster_out_narrow, axi_addr_t, axi_cluster_slv_id_width_narrow_t, + axi_cluster_data_narrow_t, axi_cluster_strb_narrow_t, axi_user_t) + `AXI_TYPEDEF_ALL(axi_cluster_in_narrow, axi_addr_t, axi_cluster_mst_id_width_narrow_t, + axi_cluster_data_narrow_t, axi_cluster_strb_narrow_t, axi_user_t) + + `AXI_TYPEDEF_ALL(axi_cluster_out_narrow_socIW, axi_addr_t, axi_soc_mst_id_width_narrow_t, + axi_cluster_data_narrow_t, axi_cluster_strb_narrow_t, axi_user_t) + `AXI_TYPEDEF_ALL(axi_cluster_in_narrow_socIW, axi_addr_t, axi_soc_slv_id_width_narrow_t, + axi_cluster_data_narrow_t, axi_cluster_strb_narrow_t, axi_user_t) + + // Cluster-side in- and out- narrow ports used in chimera adapter + axi_cluster_in_narrow_req_t clu_axi_adapter_slv_req; + axi_cluster_in_narrow_resp_t clu_axi_adapter_slv_resp; + axi_cluster_out_narrow_req_t clu_axi_adapter_mst_req; + axi_cluster_out_narrow_resp_t clu_axi_adapter_mst_resp; + + // Cluster-side in- and out- narrow ports used in narrow adapter + axi_cluster_in_narrow_socIW_req_t clu_axi_narrow_slv_req; + axi_cluster_in_narrow_socIW_resp_t clu_axi_narrow_slv_rsp; + axi_cluster_out_narrow_socIW_req_t [1:0] clu_axi_narrow_mst_req; + axi_cluster_out_narrow_socIW_resp_t [1:0] clu_axi_narrow_mst_rsp; + + // Cluster-side out wide ports + axi_cluster_out_wide_req_t clu_axi_wide_mst_req; + axi_cluster_out_wide_resp_t clu_axi_wide_mst_resp; + + if (ClusterDataWidth != Cfg.AxiDataWidth) begin : gen_narrow_adapter + + narrow_adapter #( + .narrow_in_req_t (axi_soc_out_narrow_req_t), + .narrow_in_resp_t (axi_soc_out_narrow_resp_t), + .narrow_out_req_t (axi_soc_in_narrow_req_t), + .narrow_out_resp_t(axi_soc_in_narrow_resp_t), + + .clu_narrow_in_req_t (axi_cluster_in_narrow_socIW_req_t), + .clu_narrow_in_resp_t (axi_cluster_in_narrow_socIW_resp_t), + .clu_narrow_out_req_t (axi_cluster_out_narrow_socIW_req_t), + .clu_narrow_out_resp_t(axi_cluster_out_narrow_socIW_resp_t), + + .MstPorts(2), + .SlvPorts(1) + + ) i_cluster_narrow_adapter ( + .soc_clk_i(soc_clk_i), + .rst_ni, + + // SoC side narrow. + .narrow_in_req_i (narrow_in_req_i), + .narrow_in_resp_o (narrow_in_resp_o), + .narrow_out_req_o (narrow_out_req_o), + .narrow_out_resp_i(narrow_out_resp_i), + + // Cluster side narrow + .clu_narrow_in_req_o (clu_axi_narrow_slv_req), + .clu_narrow_in_resp_i (clu_axi_narrow_slv_rsp), + .clu_narrow_out_req_i (clu_axi_narrow_mst_req), + .clu_narrow_out_resp_o(clu_axi_narrow_mst_rsp) + + ); + + end else begin : gen_skip_narrow_adapter // if (ClusterDataWidth != Cfg.AxiDataWidth) + + assign clu_axi_narrow_slv_req = narrow_in_req_i; + assign narrow_in_resp_o = clu_axi_narrow_slv_rsp; + assign narrow_out_req_o = clu_axi_narrow_mst_req; + assign clu_axi_narrow_mst_rsp = narrow_out_resp_i; + + end + + chimera_cluster_adapter #( + .WidePassThroughRegionStart(Cfg.MemIslRegionStart), + .WidePassThroughRegionEnd (Cfg.MemIslRegionEnd), + + .narrow_in_req_t (axi_cluster_in_narrow_socIW_req_t), + .narrow_in_resp_t (axi_cluster_in_narrow_socIW_resp_t), + .narrow_out_req_t (axi_cluster_out_narrow_socIW_req_t), + .narrow_out_resp_t(axi_cluster_out_narrow_socIW_resp_t), + + .clu_narrow_in_req_t (axi_cluster_in_narrow_req_t), + .clu_narrow_in_resp_t (axi_cluster_in_narrow_resp_t), + .clu_narrow_out_req_t (axi_cluster_out_narrow_req_t), + .clu_narrow_out_resp_t(axi_cluster_out_narrow_resp_t), + + .wide_out_req_t (wide_out_req_t), + .wide_out_resp_t(wide_out_resp_t), + + .clu_wide_out_req_t (axi_cluster_out_wide_req_t), + .clu_wide_out_resp_t(axi_cluster_out_wide_resp_t) + + ) i_cluster_axi_adapter ( + .soc_clk_i(soc_clk_i), + .clu_clk_i(clu_clk_i), + .rst_ni, + + .narrow_in_req_i (clu_axi_narrow_slv_req), + .narrow_in_resp_o (clu_axi_narrow_slv_rsp), + .narrow_out_req_o (clu_axi_narrow_mst_req), + .narrow_out_resp_i(clu_axi_narrow_mst_rsp), + + .clu_narrow_in_req_o (clu_axi_adapter_slv_req), + .clu_narrow_in_resp_i (clu_axi_adapter_slv_resp), + .clu_narrow_out_req_i (clu_axi_adapter_mst_req), + .clu_narrow_out_resp_o(clu_axi_adapter_mst_resp), + + .wide_out_req_o (wide_out_req_o), + .wide_out_resp_i (wide_out_resp_i), + .clu_wide_out_req_i (clu_axi_wide_mst_req), + .clu_wide_out_resp_o(clu_axi_wide_mst_resp), + + .wide_mem_bypass_mode_i(widemem_bypass_i) + ); + + typedef struct packed { + logic [2:0] ema; + logic [1:0] emaw; + logic [0:0] emas; + } sram_cfg_t; + + typedef struct packed { + sram_cfg_t icache_tag; + sram_cfg_t icache_data; + sram_cfg_t tcdm; + } sram_cfgs_t; + + localparam int unsigned NumIntOutstandingLoads[NrCores] = '{NrCores{32'h1}}; + localparam int unsigned NumIntOutstandingMem[NrCores] = '{NrCores{32'h4}}; + + snitch_cluster #( + .PhysicalAddrWidth(Cfg.AddrWidth), + .NarrowDataWidth (ClusterDataWidth), // SCHEREMO: Convolve needs this... + .WideDataWidth (WideDataWidth), + .NarrowIdWidthIn (ClusterNarrowAxiMstIdWidth), + .WideIdWidthIn (WideMasterIdWidth), + .NarrowUserWidth (Cfg.AxiUserWidth), + .WideUserWidth (Cfg.AxiUserWidth), + + .BootAddr(SnitchBootROMRegionStart), + + .NrHives (1), + .NrCores (NrCores), + .TCDMDepth (1024), + .ZeroMemorySize (64), + .ClusterPeriphSize(64), + .NrBanks (16), + + .DMANumAxInFlight(3), + .DMAReqFifoDepth (3), + + .ICacheLineWidth('{256}), + .ICacheLineCount('{16}), + .ICacheSets ('{2}), + + .VMSupport(0), + .Xdma ({1'b1, {(NrCores - 1) {1'b0}}}), + + .NumIntOutstandingLoads(NumIntOutstandingLoads), + .NumIntOutstandingMem (NumIntOutstandingMem), + .RegisterOffloadReq (1), + .RegisterOffloadRsp (1), + .RegisterCoreReq (1), + .RegisterCoreRsp (1), + + .narrow_in_req_t (axi_cluster_in_narrow_req_t), + .narrow_in_resp_t(axi_cluster_in_narrow_resp_t), + .wide_in_req_t (axi_cluster_in_wide_req_t), + .wide_in_resp_t (axi_cluster_in_wide_resp_t), + + .narrow_out_req_t (axi_cluster_out_narrow_req_t), + .narrow_out_resp_t(axi_cluster_out_narrow_resp_t), + .wide_out_req_t (axi_cluster_out_wide_req_t), + .wide_out_resp_t (axi_cluster_out_wide_resp_t), + + .sram_cfg_t (sram_cfg_t), + .sram_cfgs_t(sram_cfgs_t), + + .RegisterExtWide ('0), + .RegisterExtNarrow('0) + ) i_test_cluster ( + + .clk_i (clu_clk_i), + .clk_d2_bypass_i('0), + .rst_ni, + + .debug_req_i(debug_req_i), + .meip_i (meip_i), + .mtip_i (mtip_i), + .msip_i (msip_i), + + .hart_base_id_i (hart_base_id_i), + .cluster_base_addr_i(cluster_base_addr_i), + .sram_cfgs_i ('0), + + .narrow_in_req_i (clu_axi_adapter_slv_req), + .narrow_in_resp_o (clu_axi_adapter_slv_resp), + .narrow_out_req_o (clu_axi_adapter_mst_req), + .narrow_out_resp_i(clu_axi_adapter_mst_resp), + .wide_in_req_i ('0), + .wide_in_resp_o (), + .wide_out_req_o (clu_axi_wide_mst_req), + .wide_out_resp_i (clu_axi_wide_mst_resp) + + ); +endmodule diff --git a/hw/chimera_cluster_adapter.sv b/hw/chimera_cluster_adapter.sv index bef175a..173643f 100644 --- a/hw/chimera_cluster_adapter.sv +++ b/hw/chimera_cluster_adapter.sv @@ -9,9 +9,6 @@ // to memory island / narrow crossbar module chimera_cluster_adapter #( - // Needs to be defined since there is no wide slave port - parameter int WideSlaveIdWidth = 0, - // Start address of Memory Island parameter int WidePassThroughRegionStart = '0, // End address of Memory Island @@ -23,37 +20,34 @@ module chimera_cluster_adapter #( parameter type narrow_out_resp_t = logic, parameter type wide_out_req_t = logic, parameter type wide_out_resp_t = logic, - parameter type wide_in_req_t = logic, - parameter type wide_in_resp_t = logic, + parameter type clu_narrow_in_req_t = logic, + parameter type clu_narrow_in_resp_t = logic, parameter type clu_narrow_out_req_t = logic, parameter type clu_narrow_out_resp_t = logic, parameter type clu_wide_out_req_t = logic, parameter type clu_wide_out_resp_t = logic ) ( - input logic soc_clk_i, - input logic clu_clk_i, - input logic rst_ni, - + input logic soc_clk_i, + input logic clu_clk_i, + input logic rst_ni, // From SOC - input narrow_in_req_t narrow_in_req_i, - output narrow_in_resp_t narrow_in_resp_o, - output narrow_out_req_t [1:0] narrow_out_req_o, - input narrow_out_resp_t [1:0] narrow_out_resp_i, - output wide_out_req_t wide_out_req_o, - input wide_out_resp_t wide_out_resp_i, - + input narrow_in_req_t narrow_in_req_i, + output narrow_in_resp_t narrow_in_resp_o, + output narrow_out_req_t [1:0] narrow_out_req_o, + input narrow_out_resp_t [1:0] narrow_out_resp_i, + output wide_out_req_t wide_out_req_o, + input wide_out_resp_t wide_out_resp_i, // To Cluster - output narrow_in_req_t clu_narrow_in_req_o, - input narrow_in_resp_t clu_narrow_in_resp_i, - input clu_narrow_out_req_t clu_narrow_out_req_i, - output clu_narrow_out_resp_t clu_narrow_out_resp_o, - input clu_wide_out_req_t clu_wide_out_req_i, - output clu_wide_out_resp_t clu_wide_out_resp_o, - + output clu_narrow_in_req_t clu_narrow_in_req_o, + input clu_narrow_in_resp_t clu_narrow_in_resp_i, + input clu_narrow_out_req_t clu_narrow_out_req_i, + output clu_narrow_out_resp_t clu_narrow_out_resp_o, + input clu_wide_out_req_t clu_wide_out_req_i, + output clu_wide_out_resp_t clu_wide_out_resp_o, // Testing - input logic wide_mem_bypass_mode_i + input logic wide_mem_bypass_mode_i ); `include "axi/typedef.svh" @@ -66,6 +60,7 @@ module chimera_cluster_adapter #( localparam int UserWidth = $bits(narrow_out_req_o[0].aw.user); localparam int ClusterNarrowMasterIdWidth = $bits(clu_narrow_out_req_i.aw.id); + localparam int ClusterNarrowSlaveIdWidth = $bits(clu_narrow_in_req_o.aw.id); localparam int ClusterWideMasterIdWidth = $bits(clu_wide_out_req_i.aw.id); localparam int SocNarrowMasterIdWidth = $bits(narrow_out_req_o[0].aw.id); @@ -83,29 +78,32 @@ module chimera_cluster_adapter #( typedef logic [SocNarrowMasterIdWidth-1:0] axi_soc_narrow_mst_id_width_t; typedef logic [SocNarrowSlaveIdWidth-1:0] axi_soc_narrow_slv_id_width_t; - typedef logic [SocWideMasterIdWidth-1:0] axi_soc_wide_mst_id_width_y; - `AXI_TYPEDEF_ALL(axi_clu_wide_out, axi_addr_width_t, axi_soc_wide_mst_id_width_y, + typedef logic [SocWideMasterIdWidth-1:0] axi_soc_wide_mst_id_width_t; + + `AXI_TYPEDEF_ALL(axi_wide_clu_out, axi_addr_width_t, axi_soc_wide_mst_id_width_t, axi_wide_data_width_t, axi_wide_strb_width_t, axi_user_width_t) - `AXI_TYPEDEF_ALL(axi_narrow_in, axi_addr_width_t, axi_soc_narrow_slv_id_width_t, + + `AXI_TYPEDEF_ALL(axi_narrow_soc_in, axi_addr_width_t, axi_soc_narrow_slv_id_width_t, axi_narrow_data_width_t, axi_narrow_strb_width_t, axi_user_width_t) - `AXI_TYPEDEF_ALL(axi_narrow_out, axi_addr_width_t, axi_soc_narrow_mst_id_width_t, + + `AXI_TYPEDEF_ALL(axi_narrow_soc_out, axi_addr_width_t, axi_soc_narrow_mst_id_width_t, axi_narrow_data_width_t, axi_narrow_strb_width_t, axi_user_width_t) - `AXI_TYPEDEF_ALL(axi_chimera_cluster_wrapper_out_wide_to_narrow, axi_addr_width_t, - axi_soc_narrow_mst_id_width_t, axi_wide_data_width_t, axi_wide_strb_width_t, - axi_user_width_t) + + `AXI_TYPEDEF_ALL(axi_wide_clu_wide_to_narrow, axi_addr_width_t, axi_soc_narrow_mst_id_width_t, + axi_wide_data_width_t, axi_wide_strb_width_t, axi_user_width_t) // Direct mst outputs of cluster -> has extra id bits on mst, gets iw converted - clu_narrow_out_req_t axi_from_cluster_iwc_req; - clu_narrow_out_resp_t axi_from_cluster_iwc_resp; + clu_narrow_out_req_t axi_from_cluster_narrow_iwc_req; + clu_narrow_out_resp_t axi_from_cluster_narrow_iwc_resp; clu_wide_out_req_t axi_from_cluster_wide_iwc_req; clu_wide_out_resp_t axi_from_cluster_wide_iwc_resp; // Id width adapted mst outputs of cluster - narrow_out_req_t axi_from_cluster_req; - narrow_out_resp_t axi_from_cluster_resp; + narrow_out_req_t axi_from_cluster_narrow_req; + narrow_out_resp_t axi_from_cluster_narrow_resp; wide_out_req_t axi_from_cluster_wide_req; wide_out_resp_t axi_from_cluster_wide_resp; @@ -123,21 +121,23 @@ module chimera_cluster_adapter #( // Rest of SoC is width converted from wide to narrow - axi_chimera_cluster_wrapper_out_wide_to_narrow_req_t axi_from_cluster_wide_to_narrow_iwc_req; - axi_chimera_cluster_wrapper_out_wide_to_narrow_resp_t axi_from_cluster_wide_to_narrow_iwc_resp; + axi_wide_clu_wide_to_narrow_req_t axi_from_cluster_wide_to_narrow_iwc_req; + axi_wide_clu_wide_to_narrow_resp_t axi_from_cluster_wide_to_narrow_iwc_resp; // Direct slv ports from SoC crossbar - narrow_in_resp_t axi_to_cluster_resp; - narrow_in_req_t axi_to_cluster_req; + narrow_in_resp_t axi_to_cluster_narrow_resp; + narrow_in_req_t axi_to_cluster_narrow_req; - assign clu_narrow_in_req_o = axi_to_cluster_req; - assign axi_to_cluster_resp = clu_narrow_in_resp_i; - assign axi_from_cluster_iwc_req = clu_narrow_out_req_i; - assign clu_narrow_out_resp_o = axi_from_cluster_iwc_resp; + assign axi_from_cluster_narrow_iwc_req = clu_narrow_out_req_i; + assign clu_narrow_out_resp_o = axi_from_cluster_narrow_iwc_resp; + + assign wide_out_req_o = axi_from_cluster_wide_memisl_req; + assign axi_from_cluster_wide_memisl_resp = wide_out_resp_i; + + assign axi_from_cluster_wide_iwc_req = clu_wide_out_req_i; + assign clu_wide_out_resp_o = axi_from_cluster_wide_iwc_resp; - assign axi_from_cluster_wide_iwc_req = clu_wide_out_req_i; - assign clu_wide_out_resp_o = axi_from_cluster_wide_iwc_resp; // WIDE-TO-NARROW CONVERSION // Catch requests over the wide port which do not go to the memory island; reroute them over the narrow AXI bus. @@ -157,15 +157,17 @@ module chimera_cluster_adapter #( end end + // SoC side wide demux for bypasses + axi_demux_simple #( - .AxiIdWidth (WideSlaveIdWidth), + .AxiIdWidth (SocWideMasterIdWidth), .AtopSupport(0), .axi_req_t (wide_out_req_t), .axi_resp_t (wide_out_resp_t), .NoMstPorts (2), - .MaxTrans (2), - .AxiLookBits(WideSlaveIdWidth), - .UniqueIds ('1) + .MaxTrans (16), // TODO: Tune this + .AxiLookBits(SocWideMasterIdWidth), + .UniqueIds (0) ) i_wide_demux ( .clk_i (soc_clk_i), .rst_ni, @@ -178,25 +180,27 @@ module chimera_cluster_adapter #( .mst_resps_i ({axi_from_cluster_wide_memisl_resp, axi_from_cluster_wide_to_narrow_resp}) ); - assign wide_out_req_o = axi_from_cluster_wide_memisl_req; - assign axi_from_cluster_wide_memisl_resp = wide_out_resp_i; + // SoC side Wide-to-narrow ID width converter for bypasses axi_iw_converter #( - .AxiSlvPortIdWidth (WideSlaveIdWidth), - .AxiMstPortIdWidth (SocNarrowMasterIdWidth), - .AxiSlvPortMaxUniqIds (1), - .AxiSlvPortMaxTxnsPerId(1), - .AxiSlvPortMaxTxns (2), - .AxiMstPortMaxUniqIds (2), - .AxiMstPortMaxTxnsPerId(2), - .AxiAddrWidth (AddrWidth), - .AxiDataWidth (WideDataWidth), - .AxiUserWidth (UserWidth), + .AxiSlvPortIdWidth(SocWideMasterIdWidth), + .AxiMstPortIdWidth(SocNarrowMasterIdWidth), + + .AxiSlvPortMaxUniqIds (2 ** SocWideMasterIdWidth), + .AxiSlvPortMaxTxnsPerId(4), // TODO: Tune this + .AxiSlvPortMaxTxns (16), // TODO: Tune this + + .AxiMstPortMaxUniqIds (2 ** SocNarrowMasterIdWidth), + .AxiMstPortMaxTxnsPerId(16), // TODO: Tune this + + .AxiAddrWidth(AddrWidth), + .AxiDataWidth(WideDataWidth), + .AxiUserWidth(UserWidth), .slv_req_t (wide_out_req_t), .slv_resp_t(wide_out_resp_t), - .mst_req_t (axi_chimera_cluster_wrapper_out_wide_to_narrow_req_t), - .mst_resp_t(axi_chimera_cluster_wrapper_out_wide_to_narrow_resp_t) + .mst_req_t (axi_wide_clu_wide_to_narrow_req_t), + .mst_resp_t(axi_wide_clu_wide_to_narrow_resp_t) ) wide_to_narrow_mst_iw_converter ( .clk_i (soc_clk_i), .rst_ni (rst_ni), @@ -206,27 +210,29 @@ module chimera_cluster_adapter #( .mst_resp_i(axi_from_cluster_wide_to_narrow_iwc_resp) ); + // SoC side Wide-to-narrow data width converter for bypasses + axi_dw_converter #( - .AxiMaxReads(2), + .AxiMaxReads(4), .AxiSlvPortDataWidth(WideDataWidth), .AxiMstPortDataWidth(NarrowDataWidth), .AxiAddrWidth (AddrWidth), .AxiIdWidth (SocNarrowMasterIdWidth), - .aw_chan_t(axi_narrow_out_aw_chan_t), - .b_chan_t (axi_narrow_out_b_chan_t), - .ar_chan_t(axi_narrow_out_ar_chan_t), + .aw_chan_t(axi_narrow_soc_out_aw_chan_t), + .b_chan_t (axi_narrow_soc_out_b_chan_t), + .ar_chan_t(axi_narrow_soc_out_ar_chan_t), - .slv_r_chan_t(axi_chimera_cluster_wrapper_out_wide_to_narrow_r_chan_t), - .slv_w_chan_t(axi_chimera_cluster_wrapper_out_wide_to_narrow_w_chan_t), - .mst_r_chan_t(axi_narrow_out_r_chan_t), - .mst_w_chan_t(axi_narrow_out_w_chan_t), + .slv_r_chan_t(axi_wide_clu_wide_to_narrow_r_chan_t), + .slv_w_chan_t(axi_wide_clu_wide_to_narrow_w_chan_t), + .mst_r_chan_t(axi_narrow_soc_out_r_chan_t), + .mst_w_chan_t(axi_narrow_soc_out_w_chan_t), .axi_mst_req_t (narrow_out_req_t), .axi_mst_resp_t(narrow_out_resp_t), - .axi_slv_req_t (axi_chimera_cluster_wrapper_out_wide_to_narrow_req_t), - .axi_slv_resp_t(axi_chimera_cluster_wrapper_out_wide_to_narrow_resp_t) + .axi_slv_req_t (axi_wide_clu_wide_to_narrow_req_t), + .axi_slv_resp_t(axi_wide_clu_wide_to_narrow_resp_t) ) i_wide_to_narrow_dw_converter ( .clk_i (soc_clk_i), .rst_ni, @@ -236,18 +242,49 @@ module chimera_cluster_adapter #( .mst_resp_i(narrow_out_resp_i[1]) ); - // NARROW MASTER PORT ID WIDTH CONVERSION + // Cluster-side reduce ID Width from SoC AXI Slave ID Width to 1 + // This relaxes pressure from Snitch Cluster Interco + + axi_iw_converter #( + .AxiSlvPortIdWidth(SocNarrowSlaveIdWidth), + .AxiMstPortIdWidth(ClusterNarrowSlaveIdWidth), + + .AxiSlvPortMaxUniqIds (2 ** SocNarrowSlaveIdWidth), + .AxiSlvPortMaxTxnsPerId(16), // TODO: Tune this + .AxiSlvPortMaxTxns (16), // TODO: Tune this + + .AxiMstPortMaxUniqIds (2 ** ClusterNarrowSlaveIdWidth), + .AxiMstPortMaxTxnsPerId(16), // TODO: Tune this + + .AxiAddrWidth(AddrWidth), + .AxiDataWidth(WideDataWidth), + .AxiUserWidth(UserWidth), + + .slv_req_t (narrow_in_req_t), + .slv_resp_t(narrow_in_resp_t), + .mst_req_t (clu_narrow_in_req_t), + .mst_resp_t(clu_narrow_in_resp_t) + ) i_narrow_slv_to_narrow_mst_iw_converter ( + .clk_i (clu_clk_i), + .rst_ni (rst_ni), + .slv_req_i (axi_to_cluster_narrow_req), + .slv_resp_o(axi_to_cluster_narrow_resp), + .mst_req_o (clu_narrow_in_req_o), + .mst_resp_i(clu_narrow_in_resp_i) + ); + + // NARROW MASTER PORT Cluster-side ID WIDTH CONVERSION axi_iw_converter #( .AxiSlvPortIdWidth(ClusterNarrowMasterIdWidth), .AxiMstPortIdWidth(SocNarrowMasterIdWidth), - .AxiSlvPortMaxUniqIds (2), - .AxiSlvPortMaxTxnsPerId(2), - .AxiSlvPortMaxTxns (4), + .AxiSlvPortMaxUniqIds (2 ** ClusterNarrowMasterIdWidth), + .AxiSlvPortMaxTxnsPerId(4), // TODO: Tune this + .AxiSlvPortMaxTxns (4), // TODO: Tune this - .AxiMstPortMaxUniqIds (2), - .AxiMstPortMaxTxnsPerId(4), + .AxiMstPortMaxUniqIds (2 ** SocNarrowMasterIdWidth), + .AxiMstPortMaxTxnsPerId(4), // TODO: Tune this .AxiAddrWidth(AddrWidth), .AxiDataWidth(NarrowDataWidth), @@ -259,23 +296,22 @@ module chimera_cluster_adapter #( ) narrow_mst_iw_converter ( .clk_i (clu_clk_i), .rst_ni (rst_ni), - .slv_req_i (axi_from_cluster_iwc_req), - .slv_resp_o(axi_from_cluster_iwc_resp), - .mst_req_o (axi_from_cluster_req), - .mst_resp_i(axi_from_cluster_resp) + .slv_req_i (axi_from_cluster_narrow_iwc_req), + .slv_resp_o(axi_from_cluster_narrow_iwc_resp), + .mst_req_o (axi_from_cluster_narrow_req), + .mst_resp_i(axi_from_cluster_narrow_resp) ); - // WIDE MASTER PORT ID WIDTH CONVERSION + // WIDE MASTER PORT Cluster-side ID WIDTH CONVERSION axi_iw_converter #( .AxiSlvPortIdWidth(ClusterWideMasterIdWidth), - .AxiMstPortIdWidth(WideSlaveIdWidth), + .AxiMstPortIdWidth(SocWideMasterIdWidth), - .AxiSlvPortMaxUniqIds (2), - .AxiSlvPortMaxTxnsPerId(2), + .AxiSlvPortMaxUniqIds (2 ** ClusterWideMasterIdWidth), + .AxiSlvPortMaxTxnsPerId(4), .AxiSlvPortMaxTxns (4), - - .AxiMstPortMaxUniqIds (2), + .AxiMstPortMaxUniqIds (2 ** SocWideMasterIdWidth), .AxiMstPortMaxTxnsPerId(4), .AxiAddrWidth(AddrWidth), @@ -294,14 +330,14 @@ module chimera_cluster_adapter #( .mst_resp_i(axi_from_cluster_wide_resp) ); - // AXI CDCS + // AXI Narrow CDC from SoC to Cluster axi_cdc #( - .aw_chan_t (axi_narrow_in_aw_chan_t), - .w_chan_t (axi_narrow_in_w_chan_t), - .b_chan_t (axi_narrow_in_b_chan_t), - .ar_chan_t (axi_narrow_in_ar_chan_t), - .r_chan_t (axi_narrow_in_r_chan_t), + .aw_chan_t (axi_narrow_soc_in_aw_chan_t), + .w_chan_t (axi_narrow_soc_in_w_chan_t), + .b_chan_t (axi_narrow_soc_in_b_chan_t), + .ar_chan_t (axi_narrow_soc_in_ar_chan_t), + .r_chan_t (axi_narrow_soc_in_r_chan_t), .axi_req_t (narrow_in_req_t), .axi_resp_t(narrow_in_resp_t) ) narrow_slv_cdc ( @@ -312,24 +348,25 @@ module chimera_cluster_adapter #( .dst_clk_i (clu_clk_i), .dst_rst_ni(rst_ni), - .dst_req_o (axi_to_cluster_req), - .dst_resp_i(axi_to_cluster_resp) + .dst_req_o (axi_to_cluster_narrow_req), + .dst_resp_i(axi_to_cluster_narrow_resp) ); + // AXI Narrow CDC from Cluster to SoC axi_cdc #( - .aw_chan_t (axi_narrow_out_aw_chan_t), - .w_chan_t (axi_narrow_out_w_chan_t), - .b_chan_t (axi_narrow_out_b_chan_t), - .ar_chan_t (axi_narrow_out_ar_chan_t), - .r_chan_t (axi_narrow_out_r_chan_t), + .aw_chan_t (axi_narrow_soc_out_aw_chan_t), + .w_chan_t (axi_narrow_soc_out_w_chan_t), + .b_chan_t (axi_narrow_soc_out_b_chan_t), + .ar_chan_t (axi_narrow_soc_out_ar_chan_t), + .r_chan_t (axi_narrow_soc_out_r_chan_t), .axi_req_t (narrow_out_req_t), .axi_resp_t(narrow_out_resp_t) ) narrow_mst_cdc ( .src_clk_i (clu_clk_i), .src_rst_ni(rst_ni), - .src_req_i (axi_from_cluster_req), - .src_resp_o(axi_from_cluster_resp), + .src_req_i (axi_from_cluster_narrow_req), + .src_resp_o(axi_from_cluster_narrow_resp), .dst_clk_i (soc_clk_i), .dst_rst_ni(rst_ni), @@ -337,12 +374,14 @@ module chimera_cluster_adapter #( .dst_resp_i(narrow_out_resp_i[0]) ); + // AXI Wide CDC from Cluster to SoC + axi_cdc #( - .aw_chan_t (axi_clu_wide_out_aw_chan_t), - .w_chan_t (axi_clu_wide_out_w_chan_t), - .b_chan_t (axi_clu_wide_out_b_chan_t), - .ar_chan_t (axi_clu_wide_out_ar_chan_t), - .r_chan_t (axi_clu_wide_out_r_chan_t), + .aw_chan_t (axi_wide_clu_out_aw_chan_t), + .w_chan_t (axi_wide_clu_out_w_chan_t), + .b_chan_t (axi_wide_clu_out_b_chan_t), + .ar_chan_t (axi_wide_clu_out_ar_chan_t), + .r_chan_t (axi_wide_clu_out_r_chan_t), .axi_req_t (wide_out_req_t), .axi_resp_t(wide_out_resp_t) ) wide_mst_cdc ( diff --git a/hw/chimera_pkg.sv b/hw/chimera_pkg.sv index d17a4c7..4400037 100644 --- a/hw/chimera_pkg.sv +++ b/hw/chimera_pkg.sv @@ -39,7 +39,6 @@ package chimera_pkg; localparam int ExtRegNum = SnitchBootROM + 1; localparam int ClusterDataWidth = 64; - localparam int SnitchBootROMIdx = 0; localparam doub_bt SnitchBootROMRegionStart = 64'h3000_0000; localparam doub_bt SnitchBootROMRegionEnd = 64'h3000_1000; @@ -48,6 +47,8 @@ package chimera_pkg; localparam doub_bt TopLevelRegionStart = 64'h3000_1000; localparam doub_bt TopLevelRegionEnd = 64'h3000_2000; + localparam aw_bt ClusterNarrowAxiMstIdWidth = 1; + function automatic cheshire_cfg_t gen_chimera_cfg(); localparam int AddrWidth = DefaultCfg.AddrWidth; @@ -66,13 +67,15 @@ package chimera_pkg; cfg.LlcOutConnect = 0; // AXI CFG - // SCHEREMO: Assume 2 Master per cluster -> 5 clusters, 1 host core, 1 DMA, 1 DBG Unit - cfg.AxiMstIdWidth = 4; + cfg.AxiMstIdWidth = 2; + cfg.MemIslAxiMstIdWidth = 1; cfg.AxiDataWidth = 32; cfg.AddrWidth = 32; cfg.LlcOutRegionEnd = 'hFFFF_FFFF; cfg.MemIslWidePorts = $countones(ChimeraClusterCfg.hasWideMasterPort); + cfg.MemIslNarrowToWideFactor = 4; + cfg.AxiExtNumWideMst = $countones(ChimeraClusterCfg.hasWideMasterPort); // SCHEREMO: Two ports for each cluster: one to convert stray wides, one for the original narrow cfg.AxiExtNumMst = ExtClusters + $countones(ChimeraClusterCfg.hasWideMasterPort); diff --git a/hw/chimera_top_wrapper.sv b/hw/chimera_top_wrapper.sv index e5c15c9..18b8205 100644 --- a/hw/chimera_top_wrapper.sv +++ b/hw/chimera_top_wrapper.sv @@ -55,7 +55,6 @@ module chimera_top_wrapper output logic [ 31:0] gpio_en_o ); - `include "axi/typedef.svh" `include "common_cells/registers.svh" `include "common_cells/assertions.svh" `include "cheshire/typedef.svh" @@ -257,7 +256,14 @@ module chimera_top_wrapper .data_o(snitch_bootrom_data) ); - // Cluster clock gates + logic [ExtClusters-1:0] wide_mem_bypass_mode; + assign wide_mem_bypass_mode = { + reg2hw.wide_mem_cluster_4_bypass.q, + reg2hw.wide_mem_cluster_3_bypass.q, + reg2hw.wide_mem_cluster_2_bypass.q, + reg2hw.wide_mem_cluster_1_bypass.q, + reg2hw.wide_mem_cluster_0_bypass.q + }; logic [ExtClusters-1:0] cluster_clock_gate_en; logic [ExtClusters-1:0] clu_clk_gated; @@ -278,280 +284,30 @@ module chimera_top_wrapper ); end - // Synch debug signals & interrupts - // SCHEREMO: These signals are synchronize in the Snitch cluster! - - logic [iomsb(NumIrqCtxts*Cfg.NumExtIrqHarts):0] clu_xeip_ext; - logic [ iomsb(Cfg.NumExtIrqHarts):0] clu_mtip_ext; - logic [ iomsb(Cfg.NumExtIrqHarts):0] clu_msip_ext; - logic [ iomsb(Cfg.NumExtDbgHarts):0] clu_dbg_ext_req; - - assign clu_xeip_ext = xeip_ext; - assign clu_mtip_ext = mtip_ext; - assign clu_msip_ext = msip_ext; - assign clu_dbg_ext_req = dbg_ext_req; - - localparam int WideDataWidth = $bits(axi_wide_mst_req[0].w.data); - - localparam int WideSlaveIdWidth = $bits(axi_wide_mst_req[0].aw.id); - localparam int NarrowSlaveIdWidth = $bits(axi_slv_req[0].aw.id); - localparam int NarrowMasterIdWidth = $bits(axi_mst_req[0].aw.id); - - typedef logic [Cfg.AddrWidth-1:0] axi_cluster_addr_t; - typedef logic [Cfg.AxiUserWidth-1:0] axi_cluster_user_t; - - typedef logic [Cfg.AxiDataWidth-1:0] axi_soc_data_narrow_t; - typedef logic [Cfg.AxiDataWidth/8-1:0] axi_soc_strb_narrow_t; - - typedef logic [ClusterDataWidth-1:0] axi_cluster_data_narrow_t; - typedef logic [ClusterDataWidth/8-1:0] axi_cluster_strb_narrow_t; - - typedef logic [NarrowSlaveIdWidth +2 -1:0] axi_cluster_slv_id_width_narrow_t; - typedef logic [NarrowSlaveIdWidth -1:0] axi_cluster_mst_id_width_narrow_t; - - typedef logic [NarrowMasterIdWidth -1:0] axi_soc_mst_id_width_narrow_t; - - typedef logic [WideDataWidth-1:0] axi_cluster_data_wide_t; - typedef logic [WideDataWidth/8-1:0] axi_cluster_strb_wide_t; - typedef logic [WideSlaveIdWidth +2 -1:0] axi_cluster_slv_id_width_wide_t; - - `AXI_TYPEDEF_ALL(axi_cluster_out_wide, axi_cluster_addr_t, axi_cluster_slv_id_width_wide_t, - axi_cluster_data_wide_t, axi_cluster_strb_wide_t, axi_cluster_user_t) - - `AXI_TYPEDEF_ALL(axi_cluster_soc_out_narrow, axi_cluster_addr_t, - axi_cluster_slv_id_width_narrow_t, axi_cluster_data_narrow_t, - axi_cluster_strb_narrow_t, axi_cluster_user_t) - - `AXI_TYPEDEF_ALL(axi_cluster_out_narrow, axi_cluster_addr_t, axi_soc_mst_id_width_narrow_t, - axi_cluster_data_narrow_t, axi_cluster_strb_narrow_t, axi_cluster_user_t) - - `AXI_TYPEDEF_ALL(axi_cluster_out_socside_narrow, axi_cluster_addr_t, - axi_soc_mst_id_width_narrow_t, axi_soc_data_narrow_t, axi_soc_strb_narrow_t, - axi_cluster_user_t) - - `AXI_TYPEDEF_ALL(axi_cluster_in_narrow, axi_cluster_addr_t, axi_cluster_mst_id_width_narrow_t, - axi_cluster_data_narrow_t, axi_cluster_strb_narrow_t, axi_cluster_user_t) - - `AXI_TYPEDEF_ALL(axi_cluster_in_socside_narrow, axi_cluster_addr_t, - axi_cluster_mst_id_width_narrow_t, axi_soc_data_narrow_t, axi_soc_strb_narrow_t, - axi_cluster_user_t) - - // Cluster-side in- and out- narrow ports used in chimera adapter - axi_cluster_in_narrow_req_t [iomsb(Cfg.AxiExtNumSlv):0] clu_axi_adapter_slv_req; - axi_cluster_in_narrow_resp_t [iomsb(Cfg.AxiExtNumSlv):0] clu_axi_adapter_slv_resp; - axi_cluster_soc_out_narrow_req_t [iomsb(Cfg.AxiExtNumMst):0] clu_axi_adapter_mst_req; - axi_cluster_soc_out_narrow_resp_t [iomsb(Cfg.AxiExtNumMst):0] clu_axi_adapter_mst_resp; - - // Cluster-side in- and out- narrow ports used in narrow adapter - axi_cluster_in_narrow_req_t [iomsb(Cfg.AxiExtNumSlv):0] clu_axi_narrow_slv_req; - axi_cluster_in_narrow_resp_t [iomsb(Cfg.AxiExtNumSlv):0] clu_axi_narrow_slv_rsp; - axi_cluster_out_narrow_req_t [iomsb(Cfg.AxiExtNumMst):0] clu_axi_narrow_mst_req; - axi_cluster_out_narrow_resp_t [iomsb(Cfg.AxiExtNumMst):0] clu_axi_narrow_mst_rsp; - - // Cluster-side out wide ports - axi_cluster_out_wide_req_t [ iomsb(ExtClusters):0] clu_axi_wide_mst_req; - axi_cluster_out_wide_resp_t [ iomsb(ExtClusters):0] clu_axi_wide_mst_resp; - - // Cluster Adapters - logic [ ExtClusters-1:0] wide_mem_bypass_mode; - assign wide_mem_bypass_mode = { - reg2hw.wide_mem_cluster_4_bypass.q, - reg2hw.wide_mem_cluster_3_bypass.q, - reg2hw.wide_mem_cluster_2_bypass.q, - reg2hw.wide_mem_cluster_1_bypass.q, - reg2hw.wide_mem_cluster_0_bypass.q - }; - - for ( - genvar extClusterIdx = 0; extClusterIdx < ExtClusters; extClusterIdx++ - ) begin : gen_clusters_adapters - - if (ClusterDataWidth != Cfg.AxiDataWidth) begin : gen_narrow_adapter - - narrow_adapter #( - .narrow_in_req_t (axi_cluster_in_socside_narrow_req_t), - .narrow_in_resp_t (axi_cluster_in_socside_narrow_resp_t), - .narrow_out_req_t (axi_cluster_out_socside_narrow_req_t), - .narrow_out_resp_t(axi_cluster_out_socside_narrow_resp_t), - - .clu_narrow_in_req_t (axi_cluster_in_narrow_req_t), - .clu_narrow_in_resp_t (axi_cluster_in_narrow_resp_t), - .clu_narrow_out_req_t (axi_cluster_out_narrow_req_t), - .clu_narrow_out_resp_t(axi_cluster_out_narrow_resp_t), - - .MstPorts(2), - .SlvPorts(1) - - ) i_cluster_narrow_adapter ( - .soc_clk_i(soc_clk_i), - .rst_ni, - - // SoC side narrow. - .narrow_in_req_i (axi_slv_req[extClusterIdx]), - .narrow_in_resp_o (axi_slv_rsp[extClusterIdx]), - .narrow_out_req_o (axi_mst_req[2*extClusterIdx+:2]), - .narrow_out_resp_i(axi_mst_rsp[2*extClusterIdx+:2]), - - // Cluster side narrow - .clu_narrow_in_req_o (clu_axi_narrow_slv_req[extClusterIdx]), - .clu_narrow_in_resp_i (clu_axi_narrow_slv_rsp[extClusterIdx]), - .clu_narrow_out_req_i (clu_axi_narrow_mst_req[2*extClusterIdx+:2]), - .clu_narrow_out_resp_o(clu_axi_narrow_mst_rsp[2*extClusterIdx+:2]) - - ); - - - end else begin : gen_skip_narrow_adapter // if (ClusterDataWidth != Cfg.AxiDataWidth) - - assign clu_axi_narrow_slv_req = axi_slv_req; - assign clu_axi_narrow_slv_rsp = axi_slv_rsp; - assign clu_axi_narrow_mst_req = axi_mst_req; - assign clu_axi_narrow_mst_rsp = axi_mst_rsp; - - end - - - chimera_cluster_adapter #( - .WideSlaveIdWidth(WideSlaveIdWidth), - - .WidePassThroughRegionStart(Cfg.MemIslRegionStart), - .WidePassThroughRegionEnd (Cfg.MemIslRegionEnd), - - .narrow_in_req_t (axi_cluster_in_narrow_req_t), - .narrow_in_resp_t (axi_cluster_in_narrow_resp_t), - .narrow_out_req_t (axi_cluster_out_narrow_req_t), - .narrow_out_resp_t(axi_cluster_out_narrow_resp_t), - - .clu_narrow_out_req_t (axi_cluster_soc_out_narrow_req_t), - .clu_narrow_out_resp_t(axi_cluster_soc_out_narrow_resp_t), - - .wide_in_req_t (axi_wide_slv_req_t), - .wide_in_resp_t (axi_wide_slv_rsp_t), - .wide_out_req_t (axi_wide_mst_req_t), - .wide_out_resp_t(axi_wide_mst_rsp_t), - - .clu_wide_out_req_t (axi_cluster_out_wide_req_t), - .clu_wide_out_resp_t(axi_cluster_out_wide_resp_t) - - ) i_cluster_axi_adapter ( - .soc_clk_i(soc_clk_i), - .clu_clk_i(clu_clk_gated[extClusterIdx]), - .rst_ni, - - .narrow_in_req_i (clu_axi_narrow_slv_req[extClusterIdx]), - .narrow_in_resp_o (clu_axi_narrow_slv_rsp[extClusterIdx]), - .narrow_out_req_o (clu_axi_narrow_mst_req[2*extClusterIdx+:2]), - .narrow_out_resp_i(clu_axi_narrow_mst_rsp[2*extClusterIdx+:2]), - - .clu_narrow_in_req_o (clu_axi_adapter_slv_req[extClusterIdx]), - .clu_narrow_in_resp_i (clu_axi_adapter_slv_resp[extClusterIdx]), - .clu_narrow_out_req_i (clu_axi_adapter_mst_req[extClusterIdx]), - .clu_narrow_out_resp_o(clu_axi_adapter_mst_resp[extClusterIdx]), - - .wide_out_req_o (axi_wide_mst_req[extClusterIdx]), - .wide_out_resp_i (axi_wide_mst_rsp[extClusterIdx]), - .clu_wide_out_req_i (clu_axi_wide_mst_req[extClusterIdx]), - .clu_wide_out_resp_o(clu_axi_wide_mst_resp[extClusterIdx]), - - .wide_mem_bypass_mode_i(wide_mem_bypass_mode[extClusterIdx]) - ); - - end : gen_clusters_adapters - - // Clusters - - typedef struct packed { - logic [2:0] ema; - logic [1:0] emaw; - logic [0:0] emas; - } sram_cfg_t; - - typedef struct packed { - sram_cfg_t icache_tag; - sram_cfg_t icache_data; - sram_cfg_t tcdm; - } sram_cfgs_t; - - localparam int unsigned NumIntOutstandingLoads[9] = '{1, 1, 1, 1, 1, 1, 1, 1, 1}; - localparam int unsigned NumIntOutstandingMem[9] = '{4, 4, 4, 4, 4, 4, 4, 4, 4}; - - for (genvar extClusterIdx = 0; extClusterIdx < ExtClusters; extClusterIdx++) begin : gen_clusters - snitch_cluster #( - .PhysicalAddrWidth(Cfg.AddrWidth), - .NarrowDataWidth (ClusterDataWidth), // SCHEREMO: Convolve needs this... - .WideDataWidth (WideDataWidth), - .NarrowIdWidthIn (NarrowSlaveIdWidth), - .WideIdWidthIn (WideSlaveIdWidth), - .NarrowUserWidth (Cfg.AxiUserWidth), - .WideUserWidth (Cfg.AxiUserWidth), - - .BootAddr(SnitchBootROMRegionStart), - - .NrHives (1), - .NrCores (9), - .TCDMDepth (1024), - .ZeroMemorySize (64), - .ClusterPeriphSize(64), - .NrBanks (16), - - .DMANumAxInFlight(3), - .DMAReqFifoDepth (3), - - .ICacheLineWidth('{256}), - .ICacheLineCount('{16}), - .ICacheSets ('{2}), - - .VMSupport(0), - .Xdma (9'b100000000), - - .NumIntOutstandingLoads(NumIntOutstandingLoads), - .NumIntOutstandingMem (NumIntOutstandingMem), - .RegisterOffloadReq (1), - .RegisterOffloadRsp (1), - .RegisterCoreReq (1), - .RegisterCoreRsp (1), - - .narrow_in_req_t (axi_cluster_in_narrow_req_t), - .narrow_in_resp_t(axi_cluster_in_narrow_resp_t), - .wide_in_req_t (axi_wide_slv_req_t), - .wide_in_resp_t (axi_wide_slv_rsp_t), - - .narrow_out_req_t (axi_cluster_soc_out_narrow_req_t), - .narrow_out_resp_t(axi_cluster_soc_out_narrow_resp_t), - .wide_out_req_t (axi_cluster_out_wide_req_t), - .wide_out_resp_t (axi_cluster_out_wide_resp_t), - - .sram_cfg_t (sram_cfg_t), - .sram_cfgs_t(sram_cfgs_t), - - .RegisterExtWide ('0), - .RegisterExtNarrow('0) - ) i_test_cluster ( - - .clk_i (clu_clk_i), - .clk_d2_bypass_i('0), - .rst_ni, - - .debug_req_i(clu_dbg_ext_req[extClusterIdx*9+:9]), - .meip_i (clu_xeip_ext[extClusterIdx*9+:9]), - .mtip_i (clu_mtip_ext[extClusterIdx*9+:9]), - .msip_i (clu_msip_ext[extClusterIdx*9+:9]), - - .hart_base_id_i (10'(extClusterIdx * 9 + 1)), - .cluster_base_addr_i(Cfg.AxiExtRegionStart[extClusterIdx][Cfg.AddrWidth-1:0]), - .sram_cfgs_i ('0), - - .narrow_in_req_i (clu_axi_adapter_slv_req[extClusterIdx]), - .narrow_in_resp_o (clu_axi_adapter_slv_resp[extClusterIdx]), - .narrow_out_req_o (clu_axi_adapter_mst_req[extClusterIdx]), - .narrow_out_resp_i(clu_axi_adapter_mst_resp[extClusterIdx]), - .wide_in_req_i ('0), - .wide_in_resp_o (), - .wide_out_req_o (clu_axi_wide_mst_req[extClusterIdx]), - .wide_out_resp_i (clu_axi_wide_mst_resp[extClusterIdx]) - - ); + chimera_clu_domain #( + .Cfg (Cfg), + .narrow_in_req_t (axi_slv_req_t), + .narrow_in_resp_t (axi_slv_rsp_t), + .narrow_out_req_t (axi_mst_req_t), + .narrow_out_resp_t(axi_mst_rsp_t), + .wide_out_req_t (axi_wide_mst_req_t), + .wide_out_resp_t (axi_wide_mst_rsp_t) + ) i_cluster_domain ( + .soc_clk_i (soc_clk_i), + .clu_clk_i (clu_clk_gated), + .rst_ni, + .widemem_bypass_i (wide_mem_bypass_mode), + .debug_req_i (dbg_ext_req), + .xeip_i (xeip_ext), + .mtip_i (mtip_ext), + .msip_i (msip_ext), + .narrow_in_req_i (axi_slv_req), + .narrow_in_resp_o (axi_slv_rsp), + .narrow_out_req_o (axi_mst_req), + .narrow_out_resp_i(axi_mst_rsp), + .wide_out_req_o (axi_wide_mst_req), + .wide_out_resp_i (axi_wide_mst_rsp) - end : gen_clusters + ); endmodule diff --git a/sw/include/soc_addr_map.h b/sw/include/soc_addr_map.h index b748a0e..c04a2f1 100644 --- a/sw/include/soc_addr_map.h +++ b/sw/include/soc_addr_map.h @@ -4,6 +4,8 @@ // // Moritz Scherer +#include + #define CLINT_CTRL_BASE 0x02040000 #define SOC_CTRL_BASE 0x30001000 @@ -13,3 +15,13 @@ #define CLUSTER_2_BASE 0x40400000 #define CLUSTER_3_BASE 0x40600000 #define CLUSTER_4_BASE 0x40800000 + +#define CLUSTER_0_NUMCORES 9 +#define CLUSTER_1_NUMCORES 9 +#define CLUSTER_2_NUMCORES 9 +#define CLUSTER_3_NUMCORES 9 +#define CLUSTER_4_NUMCORES 9 + +static uint8_t _chimera_numCores[] = {CLUSTER_0_NUMCORES, CLUSTER_1_NUMCORES, CLUSTER_2_NUMCORES, + CLUSTER_3_NUMCORES, CLUSTER_4_NUMCORES}; +#define _chimera_numClusters 5 diff --git a/sw/lib/offload.c b/sw/lib/offload.c index 45a8335..6171ffa 100644 --- a/sw/lib/offload.c +++ b/sw/lib/offload.c @@ -43,7 +43,10 @@ void offloadToCluster(void *function, uint8_t clusterId) { *snitchBootAddr = function; - uint32_t hartId = clusterId * 9 + 1; + uint32_t hartId = 1; + for (uint32_t i = 0; i < clusterId; i++) { + hartId += _chimera_numCores[i]; + } volatile uint32_t *interruptTarget = ((uint32_t *)CLINT_CTRL_BASE) + hartId; waitClusterBusy(clusterId);