From 2cc29c71b76a6ed245c7ece31507199a965fa88a Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 15 May 2024 11:03:28 +0200 Subject: [PATCH 01/17] Added independent pipeline control / aux module. --- Bender.yml | 1 + src/fpnew_aux.sv | 122 +++++++++++++++++++++++++++++++++++++++++++++++ src_files.yml | 1 + 3 files changed, 124 insertions(+) create mode 100644 src/fpnew_aux.sv diff --git a/Bender.yml b/Bender.yml index b635aa07..91847b2c 100644 --- a/Bender.yml +++ b/Bender.yml @@ -37,6 +37,7 @@ sources: - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v + - src/fpnew_aux.sv - src/fpnew_divsqrt_th_32.sv - src/fpnew_divsqrt_th_64_multi.sv - src/fpnew_divsqrt_multi.sv diff --git a/src/fpnew_aux.sv b/src/fpnew_aux.sv new file mode 100644 index 00000000..28059db5 --- /dev/null +++ b/src/fpnew_aux.sv @@ -0,0 +1,122 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Maurus Item +// +// Description Aux chain for FPNew, handles transmitting of shared handshake and aux data +// And enables the correct lanes so they always stay in sync. + +`include "common_cells/registers.svh" + +module fpnew_aux #( + parameter int unsigned NumPipeRegs = 0, + parameter type TagType = logic, + parameter type AuxType = logic, + parameter int unsigned NumLanes = 1 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input TagType tag_i, + input AuxType aux_i, + input logic is_vector_i, + input logic [NumLanes-1:0] lane_active_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output TagType tag_o, + output AuxType aux_o, + output logic is_vector_o, + output logic [NumLanes-1:0] lane_active_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Register Enable for Lanes + output logic [NumPipeRegs-1:0] reg_enable_o, + output logic [NumPipeRegs-1:0] vector_reg_enable_o, + output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, + // Indication of valid data in flight + output logic busy_o +); + + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + TagType [0:NumPipeRegs] tag; + AuxType [0:NumPipeRegs] aux; + logic [0:NumPipeRegs] is_vector; + logic [0:NumPipeRegs][NumLanes-1:0] lane_active; + logic [0:NumPipeRegs] valid; + + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] ready; + + // First element of pipeline is taken from inputs + assign tag [0] = tag_i; + assign aux [0] = aux_i; + assign is_vector [0] = is_vector_i; + assign valid [0] = in_valid_i; + assign lane_active[0] = lane_active_i; + + // Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_input_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign ready[i] = ready[i+1] | ~valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(valid[i+1], valid[i], ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = ready[i] & valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[i] = reg_ena & is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][i] = reg_ena & lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( tag[i+1], tag[i], reg_ena, TagType'('0)) + `FFL( aux[i+1], aux[i], reg_ena, AuxType'('0)) + `FFL( is_vector[i+1], is_vector[i], reg_ena, '0 ) + `FFL(lane_active[i+1], lane_active[i], reg_ena, '0 ) + end + + // Ready travels backwards from output side, driven by downstream circuitry + assign ready[NumPipeRegs] = out_ready_i; + + // Assign module outputs + assign tag_o = tag [NumPipeRegs]; + assign aux_o = aux [NumPipeRegs]; + assign is_vector_o = is_vector [NumPipeRegs]; + assign out_valid_o = valid [NumPipeRegs]; + assign lane_active_o = lane_active[NumPipeRegs]; + + // Assign output Flags: Busy if any element inside the pipe is valid + assign busy_o = |valid; +endmodule diff --git a/src_files.yml b/src_files.yml index 84348a98..31746ec2 100644 --- a/src_files.yml +++ b/src_files.yml @@ -33,6 +33,7 @@ fpnew: vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v, + src/fpnew_aux.sv, src/fpnew_divsqrt_th_32.sv, src/fpnew_divsqrt_th_64_multi.sv, src/fpnew_divsqrt_multi.sv, From cc694071fa43bb94eacf939a8420f275bfed5c63 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 14:20:29 +0200 Subject: [PATCH 02/17] Moved handshake and aux chains outside of computational elements. --- src/fpnew_cast_multi.sv | 96 +++++--------------------------- src/fpnew_fma.sv | 93 ++++--------------------------- src/fpnew_fma_multi.sv | 92 ++++-------------------------- src/fpnew_noncomp.sv | 70 +++-------------------- src/fpnew_sdotp_multi.sv | 95 ++++--------------------------- src/fpnew_sdotp_multi_wrapper.sv | 30 +--------- 6 files changed, 60 insertions(+), 416 deletions(-) diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index fca5f3b6..59827da4 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -21,8 +21,7 @@ module fpnew_cast_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, + // Do not change localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), fpnew_pkg::max_int_width(IntFmtConfig)), @@ -39,25 +38,14 @@ module fpnew_cast_multi #( input fpnew_pkg::fp_format_e src_fmt_i, input fpnew_pkg::fp_format_e dst_fmt_i, input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -117,12 +105,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -133,24 +116,14 @@ module fpnew_cast_multi #( assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_int_fmt_q[0] = int_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -160,9 +133,7 @@ module fpnew_cast_multi #( `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -318,9 +289,8 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e src_fmt_q2; fpnew_pkg::fp_format_e dst_fmt_q2; fpnew_pkg::int_format_e int_fmt_q2; - // Internal pipeline signals, index i holds signal after i register stages - + // Internal pipeline signals, index i holds signal after i register stages logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; @@ -334,12 +304,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_input_sign_q[0] = input_sign; @@ -355,25 +320,14 @@ module fpnew_cast_multi #( assign mid_pipe_src_fmt_q[0] = src_fmt_q; assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; assign mid_pipe_int_fmt_q[0] = int_fmt_q; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) @@ -388,9 +342,7 @@ module fpnew_cast_multi #( `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; @@ -749,52 +701,30 @@ module fpnew_cast_multi #( logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_ext_bit_q[0] = extension_bit; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index d725a5d1..c6ef899a 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -19,8 +19,6 @@ module fpnew_fma #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( @@ -32,25 +30,14 @@ module fpnew_fma #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -105,12 +92,7 @@ module fpnew_fma #( fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -118,33 +100,21 @@ module fpnew_fma #( assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // ----------------- @@ -412,12 +382,7 @@ module fpnew_fma #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction; @@ -432,25 +397,14 @@ module fpnew_fma #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -464,10 +418,9 @@ module fpnew_fma #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; @@ -647,50 +600,28 @@ module fpnew_fma #( // Output pipeline signals, index i holds signal after i register stages fp_t [0:NUM_OUT_REGS] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index e2320846..77886424 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -19,8 +19,6 @@ module fpnew_fma_multi #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -35,25 +33,14 @@ module fpnew_fma_multi #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -118,12 +105,7 @@ module fpnew_fma_multi #( logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -133,24 +115,14 @@ module fpnew_fma_multi #( assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -159,9 +131,7 @@ module fpnew_fma_multi #( `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -497,12 +467,7 @@ module fpnew_fma_multi #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction; @@ -518,25 +483,14 @@ module fpnew_fma_multi #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -551,9 +505,7 @@ module fpnew_fma_multi #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; @@ -796,50 +748,28 @@ module fpnew_fma_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index 8a182617..afd4721c 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -19,8 +19,6 @@ module fpnew_noncomp #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( @@ -32,27 +30,16 @@ module fpnew_noncomp #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, output fpnew_pkg::classmask_e class_mask_o, output logic is_class_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -90,12 +77,7 @@ module fpnew_noncomp #( fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -103,33 +85,21 @@ module fpnew_noncomp #( assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // --------------------- @@ -358,12 +328,7 @@ module fpnew_noncomp #( logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; logic [0:NUM_OUT_REGS] out_pipe_is_class_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; @@ -371,45 +336,28 @@ module fpnew_noncomp #( assign out_pipe_extension_bit_q[0] = extension_bit_d; assign out_pipe_class_mask_q[0] = class_mask_d; assign out_pipe_is_class_q[0] = is_class_d; - assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_sdotp_multi.sv b/src/fpnew_sdotp_multi.sv index a08419cc..c504edf1 100644 --- a/src/fpnew_sdotp_multi.sv +++ b/src/fpnew_sdotp_multi.sv @@ -49,8 +49,6 @@ module fpnew_sdotp_multi #( // Supported destination formats (FP16, FP16ALTt, FP32) parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change localparam int unsigned SRC_WIDTH = fpnew_pkg::max_fp_width(SrcDotpFpFmtConfig), @@ -75,25 +73,14 @@ module fpnew_sdotp_multi #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, // format of op_a, op_b, op_c, op_d input fpnew_pkg::fp_format_e dst_fmt_i, // format of the accumulator (op_e) and result - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [DST_WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -183,12 +170,7 @@ module fpnew_sdotp_multi #( logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operand_a_q[0] = operand_a_i; @@ -202,24 +184,14 @@ module fpnew_sdotp_multi #( assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operand_a_q[i+1], inp_pipe_operand_a_q[i], reg_ena, '0) `FFL(inp_pipe_operand_b_q[i+1], inp_pipe_operand_b_q[i], reg_ena, '0) @@ -232,9 +204,7 @@ module fpnew_sdotp_multi #( `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::FP8) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::FP16) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operand_a_q = inp_pipe_operand_a_q[NUM_INP_REGS]; @@ -969,13 +939,8 @@ module fpnew_sdotp_multi #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_dst_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; logic [0:NUM_MID_REGS] mid_pipe_sum_carry_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction_first; @@ -1001,26 +966,15 @@ module fpnew_sdotp_multi #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; assign mid_pipe_sum_carry_q[0] = sum_carry; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_final_sign_zero_q[i+1], mid_pipe_final_sign_zero_q[i], reg_ena, '0) @@ -1045,9 +999,7 @@ module fpnew_sdotp_multi #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) `FFL(mid_pipe_sum_carry_q[i+1], mid_pipe_sum_carry_q[i], reg_ena, '0) end // Output stage: assign selected pipe outputs to signals for later use @@ -1314,8 +1266,7 @@ module fpnew_sdotp_multi #( ? final_sign_zero_q : final_sign_z; logic enable_rsr; - assign enable_rsr = (rnd_mode_q == fpnew_pkg::RSR) && (mid_pipe_ready[NUM_MID_REGS] - && mid_pipe_valid_q[NUM_MID_REGS]); + assign enable_rsr = (rnd_mode_q == fpnew_pkg::RSR) && reg_enable_i[NUM_MID_REGS]; // Perform the rounding fpnew_rounding #( .AbsWidth ( SUPER_DST_EXP_BITS + SUPER_DST_MAN_BITS ), @@ -1395,50 +1346,28 @@ module fpnew_sdotp_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][DST_WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_sdotp_multi_wrapper.sv b/src/fpnew_sdotp_multi_wrapper.sv index d402b67a..108629b0 100644 --- a/src/fpnew_sdotp_multi_wrapper.sv +++ b/src/fpnew_sdotp_multi_wrapper.sv @@ -22,8 +22,6 @@ module fpnew_sdotp_multi_wrapper #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change localparam fpnew_pkg::fmt_logic_t FpSrcFmtConfig = FpFmtConfig[0] ? (FpFmtConfig & 6'b001111) : (FpFmtConfig & 6'b000101), @@ -44,25 +42,14 @@ module fpnew_sdotp_multi_wrapper #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [OPERAND_WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -147,8 +134,6 @@ module fpnew_sdotp_multi_wrapper #( .DstDotpFpFmtConfig ( FpDstFmtConfig ), // FP32, FP16, FP16ALT .NumPipeRegs ( NumPipeRegs ), .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( AuxType ), .StochasticRndImplementation ( StochasticRndImplementation ) ) i_fpnew_sdotp_multi ( .clk_i, @@ -165,21 +150,12 @@ module fpnew_sdotp_multi_wrapper #( .op_mod_i, .src_fmt_i, // format of the multiplicands .dst_fmt_i, // format of the addend and result - .tag_i, .mask_i, - .aux_i, - .in_valid_i, - .in_ready_o , - .flush_i, .result_o ( local_result[DST_WIDTH-1:0] ), .status_o, .extension_bit_o, - .tag_o, .mask_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o + .reg_enable_i ); if(OPERAND_WIDTH > DST_WIDTH) begin From faed7308096eb0d9f3708046e95502c46eca2efb Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 12 Jun 2024 11:53:58 +0200 Subject: [PATCH 03/17] Added aux module that can deal with lane FSMs. --- Bender.yml | 1 + src/fpnew_aux_fsm.sv | 306 +++++++++++++++++++++++++++++++++++++++++++ src_files.yml | 1 + 3 files changed, 308 insertions(+) create mode 100644 src/fpnew_aux_fsm.sv diff --git a/Bender.yml b/Bender.yml index 91847b2c..ab2bc73c 100644 --- a/Bender.yml +++ b/Bender.yml @@ -38,6 +38,7 @@ sources: - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v - src/fpnew_aux.sv + - src/fpnew_aux_fsm.sv - src/fpnew_divsqrt_th_32.sv - src/fpnew_divsqrt_th_64_multi.sv - src/fpnew_divsqrt_multi.sv diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv new file mode 100644 index 00000000..4b1c6013 --- /dev/null +++ b/src/fpnew_aux_fsm.sv @@ -0,0 +1,306 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Maurus Item +// +// Description Aux chain for FPNew, handles transmitting of shared handshake and aux data +// And enables the correct lanes so they always stay in sync. +// This version can be used for lanes that have some form of FSM in them and only eventually are ready + +`include "common_cells/registers.svh" + +module fpnew_aux_fsm #( + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + parameter int unsigned NumLanes = 1 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input TagType tag_i, + input AuxType aux_i, + input logic is_vector_i, + input logic [NumLanes-1:0] lane_active_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output TagType tag_o, + output AuxType aux_o, + output logic is_vector_o, + output logic [NumLanes-1:0] lane_active_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Register Enable for Lanes + output logic [NumPipeRegs-1:0] reg_enable_o, + output logic [NumPipeRegs-1:0] vector_reg_enable_o, + output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, + // Signals for the Lane FSMs + // Signal to start the FSM, will be asserted for one cycle + output logic [NumLanes-1:0] lane_fsm_start_o, + // Signal that the FSM finished it's operation, should be asserted continuously + input logic [NumLanes-1:0] lane_fsm_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Pipeline Distribution + // ---------- + // This must match between this module and modules that use this module as reg enable input! + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // Always have one reg to use for FSM Input + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + TagType [0:NUM_INP_REGS] in_tag; + AuxType [0:NUM_INP_REGS] in_aux; + logic [0:NUM_INP_REGS] in_is_vector; + logic [0:NUM_INP_REGS][NumLanes-1:0] in_lane_active; + logic [0:NUM_INP_REGS] in_valid; + + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] in_ready; + + // First element of pipeline is taken from inputs + assign in_tag [0] = tag_i; + assign in_aux [0] = aux_i; + assign in_is_vector [0] = is_vector_i; + assign in_valid [0] = in_valid_i; + assign in_lane_active[0] = lane_active_i; + + // Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = in_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign in_ready[i] = in_ready[i+1] | ~in_valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(in_valid[i+1], in_valid[i], in_ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = in_ready[i] & in_valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[i] = reg_ena & in_is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][i] = reg_ena & in_lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( in_tag[i+1], in_tag[i], reg_ena, TagType'('0)) + `FFL( in_aux[i+1], in_aux[i], reg_ena, AuxType'('0)) + `FFL( in_is_vector[i+1], in_is_vector[i], reg_ena, '0 ) + `FFL(in_lane_active[i+1], in_lane_active[i], reg_ena, '0 ) + end + + // ---------- + // Global FSM + // ---------- + + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Input & Output Handshake + logic fsm_in_valid, fsm_in_ready; + logic fsm_out_valid, fsm_out_ready; + + // Synchronisazion signals + logic fsm_start, fsm_ready, fsm_busy; + + // Data holding signals + TagType held_tag; + AuxType held_aux; + logic held_is_vector; + logic [NumLanes-1:0] held_lane_active; + + // Upstream Handshake Connection + assign fsm_in_valid = in_valid[NUM_INP_REGS]; + assign in_ready[NUM_INP_REGS] = fsm_in_ready; + + // Done when all active lanes are done + assign fsm_ready = &(lane_fsm_ready_i | ~held_lane_active); + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + fsm_out_valid = 1'b0; + fsm_in_ready = 1'b0; + fsm_start = 1'b0; + fsm_busy = 1'b0; + state_d = state_q; + + unique case (state_q) + IDLE: begin + fsm_in_ready = '1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end + end + BUSY: begin + fsm_busy = 1'b1; + // If all active lanes are done send data down chain + if (fsm_ready) begin + fsm_out_valid = 1'b1; + if (fsm_out_ready) begin + fsm_in_ready = 1'b1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end else begin + state_d = IDLE; + end + end else begin + state_d = HOLD; + end + end + end + HOLD: begin + // Exact same as BUSY, but outer condition is already given + fsm_out_valid = 1'b1; + if (fsm_out_ready) begin + fsm_in_ready = 1'b1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end else begin + state_d = IDLE; + end + end else begin + state_d = HOLD; + end + end + + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + fsm_out_valid = 1'b0; + state_d = IDLE; + end + end + + `FF(state_q, state_d, IDLE); + + // Start Lanes when FSM starts and lane is active + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_fsm_start_o[l] = fsm_start && in_lane_active[NUM_INP_REGS][l]; + end + + // ---------------- + // Data Holding FFs + // ---------------- + + `FFL( held_tag, in_tag[NUM_INP_REGS], fsm_start, TagType'('0)); + `FFL( held_aux, in_aux[NUM_INP_REGS], fsm_start, AuxType'('0)); + `FFL( held_is_vector, in_is_vector[NUM_INP_REGS], fsm_start, '0); + `FFL(held_lane_active, in_lane_active[NUM_INP_REGS], fsm_start, '0); + + // --------------- + // Output pipeline + // --------------- + + // Output pipeline signals, index i holds signal after i register stages + TagType [0:NUM_OUT_REGS] out_tag; + AuxType [0:NUM_OUT_REGS] out_aux; + logic [0:NUM_OUT_REGS] out_is_vector; + logic [0:NUM_OUT_REGS][NumLanes-1:0] out_lane_active; + logic [0:NUM_OUT_REGS] out_valid; + + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_ready; + + // Connect to upstream Handshake + assign out_valid[0] = fsm_out_valid; + assign fsm_out_ready = out_ready[0]; + + // Connect to Hold Register + assign out_tag [0] = held_tag; + assign out_aux [0] = held_aux; + assign out_is_vector [0] = held_is_vector; + assign out_lane_active[0] = held_lane_active; + + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_ready[i] = out_ready[i+1] | ~out_valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_valid[i+1], out_valid[i], out_ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_ready[i] & out_valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[NUM_INP_REGS + i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[NUM_INP_REGS + i] = reg_ena & out_is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][NUM_INP_REGS + i] = reg_ena & out_lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( out_tag[i+1], out_tag[i], reg_ena, TagType'('0)) + `FFL( out_aux[i+1], out_aux[i], reg_ena, AuxType'('0)) + `FFL( out_is_vector[i+1], out_is_vector[i], reg_ena, '0 ) + `FFL(out_lane_active[i+1], out_lane_active[i], reg_ena, '0 ) + end + + // Ready travels backwards from output side, driven by downstream circuitry + assign out_ready[NUM_OUT_REGS] = out_ready_i; + + // Assign module outputs + assign tag_o = out_tag [NUM_OUT_REGS]; + assign aux_o = out_aux [NUM_OUT_REGS]; + assign is_vector_o = out_is_vector [NUM_OUT_REGS]; + assign out_valid_o = out_valid [NUM_OUT_REGS]; + assign lane_active_o = out_lane_active[NUM_OUT_REGS]; + + // Assign output Flags: Busy if any element inside the pipe is valid + assign busy_o = |in_valid | |out_valid | fsm_busy; + +endmodule diff --git a/src_files.yml b/src_files.yml index 31746ec2..90c34eb8 100644 --- a/src_files.yml +++ b/src_files.yml @@ -34,6 +34,7 @@ fpnew: vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v, src/fpnew_aux.sv, + src/fpnew_aux_fsm.sv, src/fpnew_divsqrt_th_32.sv, src/fpnew_divsqrt_th_64_multi.sv, src/fpnew_divsqrt_multi.sv, From 939e3b4e3c3d55ed7bf32695da278c905dd4b68c Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 14:23:30 +0200 Subject: [PATCH 04/17] Removed previous synchronisation of divsqrt module --- src/fpnew_divsqrt_multi.sv | 46 ++++------------------------ src/fpnew_divsqrt_th_64_multi.sv | 52 ++++---------------------------- 2 files changed, 12 insertions(+), 86 deletions(-) diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index ac23c43e..44a030a1 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -41,10 +41,6 @@ module fpnew_divsqrt_multi #( // Input Handshake input logic in_valid_i, output logic in_ready_o, - output logic divsqrt_done_o, - input logic simd_synch_done_i, - output logic divsqrt_ready_o, - input logic simd_synch_rdy_i, input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, @@ -170,11 +166,10 @@ module fpnew_divsqrt_multi #( logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done, unit_done_q; // status signals from unit instance + logic unit_ready, unit_done; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts logic out_valid, out_ready; // output handshake with downstream logic unit_busy; // valid data in flight - logic simd_synch_done; // FSM states typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; fsm_state_e state_q, state_d; @@ -198,21 +193,8 @@ module fpnew_divsqrt_multi #( `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - // Wait for other lanes only if the operation is vectorial - assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; - - // Valid synch with other lanes - // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes - // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); - // Tell the other units that this unit has finished now or in the past - assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; - - // Ready synch with other lanes - // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes - assign divsqrt_ready_o = in_ready; - // Upstream ready comes from sanitization FSM, and it is synched among all the lanes - assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; + // Upstream ready comes from FSM + assign inp_pipe_ready[NUM_INP_REGS] = in_ready; // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm @@ -234,7 +216,7 @@ module fpnew_divsqrt_multi #( BUSY: begin unit_busy = 1'b1; // data in flight // If all the lanes are done with processing - if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin + if (unit_done) begin out_valid = 1'b1; // try to commit result downstream // If downstream accepts our result if (out_ready) begin @@ -305,22 +287,6 @@ module fpnew_divsqrt_multi #( // Adjust result width and fix FP8 assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; - // Hold the result when one lane has finished execution, except when all the lanes finish together, - // or the operation is not vectorial, and the result can be accepted downstream - assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); - // The Hold register (load, no reset) - `FFLNR(held_result_q, adjusted_result, hold_en, clk_i) - `FFLNR(held_status_q, unit_status, hold_en, clk_i) - - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d = unit_done_q ? held_result_q : adjusted_result; - assign status_d = unit_done_q ? held_status_q : unit_status; - // ---------------- // Output Pipeline // ---------------- @@ -335,8 +301,8 @@ module fpnew_divsqrt_multi #( logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; + assign out_pipe_result_q[0] = adjusted_result; + assign out_pipe_status_q[0] = unit_status; assign out_pipe_tag_q[0] = result_tag_q; assign out_pipe_mask_q[0] = result_mask_q; assign out_pipe_aux_q[0] = result_aux_q; diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index eff0620d..df781554 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -39,14 +39,9 @@ module fpnew_divsqrt_th_64_multi #( input TagType tag_i, input logic mask_i, input AuxType aux_i, - input logic vectorial_op_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, - output logic divsqrt_done_o, - input logic simd_synch_done_i, - output logic divsqrt_ready_o, - input logic simd_synch_rdy_i, input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, @@ -95,7 +90,6 @@ module fpnew_divsqrt_th_64_multi #( TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages logic [0:NUM_INP_REGS] inp_pipe_ready; @@ -108,7 +102,6 @@ module fpnew_divsqrt_th_64_multi #( assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_vec_op_q[0] = vectorial_op_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to upstream circuitry assign in_ready_o = inp_pipe_ready[0]; @@ -132,7 +125,6 @@ module fpnew_divsqrt_th_64_multi #( `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) - `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -181,11 +173,11 @@ module fpnew_divsqrt_th_64_multi #( logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done, unit_done_q; // status signals from unit instance + logic unit_ready, unit_done; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts logic out_valid, out_ready; // output handshake with downstream logic unit_busy; // valid data in flight - logic simd_synch_done; + // FSM states typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; fsm_state_e state_q, state_d; @@ -200,29 +192,13 @@ module fpnew_divsqrt_th_64_multi #( TagType result_tag_q; logic result_mask_q; AuxType result_aux_q; - logic result_vec_op_q; // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - - // Wait for other lanes only if the operation is vectorial - assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; - - // Valid synch with other lanes - // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes - // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); - // Tell the other units that this unit has finished now or in the past - assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; - // Ready synch with other lanes - // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes - assign divsqrt_ready_o = in_ready; - // Upstream ready comes from sanitization FSM, and it is synched among all the lanes - assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; + assign inp_pipe_ready[NUM_INP_REGS] = in_ready; // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm @@ -244,7 +220,7 @@ module fpnew_divsqrt_th_64_multi #( BUSY: begin unit_busy = 1'b1; // data in flight // If all the lanes are done with processing - if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin + if (unit_done) begin out_valid = 1'b1; // try to commit result downstream // If downstream accepts our result if (out_ready) begin @@ -410,22 +386,6 @@ module fpnew_divsqrt_th_64_multi #( assign unit_ready = !vfdsu_dp_fdiv_busy; - // Hold the result when one lane has finished execution, except when all the lanes finish together, - // or the operation is not vectorial, and the result can be accepted downstream - assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); - // The Hold register (load, no reset) - `FFLNR(held_result_q, unit_result, hold_en, clk_i) - `FFLNR(held_status_q, unit_status, hold_en, clk_i) - - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d[WIDTH-1:0] = unit_done_q ? held_result_q[WIDTH-1:0] : unit_result[WIDTH-1:0]; - assign status_d = unit_done_q ? held_status_q : unit_status; - // ---------------- // Output Pipeline // ---------------- @@ -440,8 +400,8 @@ module fpnew_divsqrt_th_64_multi #( logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; + assign out_pipe_result_q[0] = unit_result; + assign out_pipe_status_q[0] = unit_status; assign out_pipe_tag_q[0] = result_tag_q; assign out_pipe_mask_q[0] = result_mask_q; assign out_pipe_aux_q[0] = result_aux_q; From 875ee093fb79437bb5d2a3ba20534f2b08abf173 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 14:24:30 +0200 Subject: [PATCH 05/17] Converted division to use shared FSM. --- src/fpnew_divsqrt_multi.sv | 210 ++++++----------------------- src/fpnew_divsqrt_th_32.sv | 218 ++++++------------------------- src/fpnew_divsqrt_th_64_multi.sv | 212 +++++++----------------------- 3 files changed, 126 insertions(+), 514 deletions(-) diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index 44a030a1..71dfe5b7 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -20,8 +20,6 @@ module fpnew_divsqrt_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -34,26 +32,17 @@ module fpnew_divsqrt_multi #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - input logic vectorial_op_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -79,61 +68,39 @@ module fpnew_divsqrt_multi #( fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; fpnew_pkg::fp_format_e dst_fmt_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_vec_op_q[0] = vectorial_op_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to upstream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) - `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -160,112 +127,19 @@ module fpnew_divsqrt_multi #( divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1]; end - // ------------ - // Control FSM - // ------------ - - logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done; // status signals from unit instance - logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic unit_busy; // valid data in flight - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; - assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; - assign op_starting = div_valid | sqrt_valid; - - // Hold additional information while the operation is in progress - logic result_is_fp8_q; - TagType result_tag_q; - logic result_mask_q; - AuxType result_aux_q; - logic result_vec_op_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - - // Upstream ready comes from FSM - assign inp_pipe_ready[NUM_INP_REGS] = in_ready; - - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - in_ready = 1'b1; // we're ready - if (in_valid_q && unit_ready) begin // New work arrives - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - unit_busy = 1'b1; // data in flight - // If all the lanes are done with processing - if (unit_done) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - in_ready = 1'b1; // we acknowledge the instruction - if (in_valid_q && unit_ready) begin // ..unless new work comes in - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) + assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; + assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; // ----------------- // DIVSQRT instance // ----------------- - logic [63:0] unit_result; - logic [WIDTH-1:0] adjusted_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; - logic hold_en; + logic [63:0] raw_unit_result; + logic [WIDTH-1:0] unit_result; + logic unit_done; + fpnew_pkg::status_t unit_status; div_sqrt_top_mvp i_divsqrt_lei ( .Clk_CI ( clk_i ), @@ -278,14 +152,28 @@ module fpnew_divsqrt_multi #( .Precision_ctl_SI ( '0 ), .Format_sel_SI ( divsqrt_fmt ), .Kill_SI ( flush_i ), - .Result_DO ( unit_result ), + .Result_DO ( raw_unit_result ), .Fflags_SO ( unit_status ), - .Ready_SO ( unit_ready ), + .Ready_SO ( fsm_ready_o ), .Done_SO ( unit_done ) ); // Adjust result width and fix FP8 - assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + assign unit_result = input_is_fp8 ? raw_unit_result >> 8 : raw_unit_result; + + // ---------------- + // Hold Result + // ---------------- + logic [WIDTH-1:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], fsm_start_i, '0); // Mask is stored on start -> Dont need a bypass mux + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -293,50 +181,28 @@ module fpnew_divsqrt_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = adjusted_result; - assign out_pipe_status_q[0] = unit_status; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_divsqrt_th_32.sv b/src/fpnew_divsqrt_th_32.sv index 71d23068..f4f6bb44 100644 --- a/src/fpnew_divsqrt_th_32.sv +++ b/src/fpnew_divsqrt_th_32.sv @@ -23,8 +23,6 @@ module fpnew_divsqrt_th_32 #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = 32, localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,25 +34,17 @@ module fpnew_divsqrt_th_32 #( input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -79,73 +69,45 @@ module fpnew_divsqrt_th_32 #( logic [1:0][WIDTH-1:0] operands_q; fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; - // ------------ - // Control FSM - // ------------ - logic in_ready; // input handshake with upstream + // ----------------- + // Input processing + // ----------------- logic div_op, sqrt_op; // input signalling with unit - logic unit_ready_q, unit_done; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic hold_result; // whether to put result into hold register - logic data_is_held; // data in hold register is valid - logic unit_busy; // valid data in flight - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Operations are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_op = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; //in_ready delete, valid independent of ready - assign sqrt_op = in_valid_q & (op_q == fpnew_pkg::SQRT) & in_ready & ~flush_i; + assign div_op = (op_q == fpnew_pkg::DIV) & fsm_start_i; //in_ready delete, valid independent of ready + assign sqrt_op = (op_q == fpnew_pkg::SQRT) & fsm_start_i; assign op_starting = div_op | sqrt_op; //start computing or handshake, modify tb handshake right logic fdsu_fpu_ex1_stall, fdsu_fpu_ex1_stall_q; @@ -159,92 +121,11 @@ module fpnew_divsqrt_th_32 #( `FFL(div_op_q, div_op_d, 1'b1, '0) `FFL(sqrt_op_q, sqrt_op_d, 1'b1, '0) - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - hold_result = 1'b0; - data_is_held = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - inp_pipe_ready[NUM_INP_REGS] = unit_ready_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - // in_ready = 1'b1; // we're ready - in_ready = unit_ready_q; //*** - if (in_valid_q && unit_ready_q) begin // New work arrives - inp_pipe_ready[NUM_INP_REGS] = unit_ready_q && !fdsu_fpu_ex1_stall; - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - inp_pipe_ready[NUM_INP_REGS] = fdsu_fpu_ex1_stall_q; - unit_busy = 1'b1; // data in flight - // If the unit is done with processing - if (unit_done) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - if (in_valid_q && unit_ready_q) begin // ..unless new work comes in - in_ready = 1'b1; // we acknowledge the instruction - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - hold_result = 1'b1; // activate the hold register - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - data_is_held = 1'b1; // data in hold register is valid - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready_q) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) - - // Hold additional information while the operation is in progress - TagType result_tag_q; - AuxType result_aux_q; - logic result_mask_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - // ----------------- // DIVSQRT instance // ----------------- - logic [WIDTH-1:0] unit_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; + logic [WIDTH-1:0] unit_result; + fpnew_pkg::status_t unit_status; // thead define fdsu module's input and output logic ctrl_fdsu_ex1_sel; @@ -276,7 +157,8 @@ module fpnew_divsqrt_th_32 #( logic [4:0] fpu_idu_fwd_fflags; logic fpu_idu_fwd_vld; - logic unit_ready_d; + logic unit_done; // status signals from unit instance + logic unit_ready_d, unit_ready_q; // unit_ready_q related to state machine, different under special and normal cases. always_comb begin @@ -295,6 +177,8 @@ module fpnew_divsqrt_th_32 #( `FFL(unit_ready_q, unit_ready_d, 1'b1, 1'b1) + assign fsm_ready_o = unit_ready_q && !fdsu_fpu_ex1_stall; + // determine input of time to select operands always_comb begin ctrl_fdsu_ex1_sel = 1'b0; @@ -408,18 +292,23 @@ module fpnew_divsqrt_th_32 #( unit_done = fpu_idu_fwd_vld; end + // ---------------- + // Hold Result + // ---------------- + + // Hold additional information while the operation is in progress + logic [WIDTH-1:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) // The Hold register (load, no reset) - `FFLNR(held_result_q, unit_result, hold_result, clk_i) - `FFLNR(held_status_q, unit_status, hold_result, clk_i) - - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d = data_is_held ? held_result_q : unit_result; - assign status_d = data_is_held ? held_status_q : unit_status; + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -427,50 +316,29 @@ module fpnew_divsqrt_th_32 #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); + endmodule diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index df781554..89fda8c5 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -13,7 +13,7 @@ // Authors: Stefan Mach // Roman Marquart - +// Maurus Item `include "common_cells/registers.svh" @@ -22,8 +22,6 @@ module fpnew_divsqrt_th_64_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,25 +34,17 @@ module fpnew_divsqrt_th_64_multi #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -80,58 +70,39 @@ module fpnew_divsqrt_th_64_multi #( fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; fpnew_pkg::fp_format_e dst_fmt_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to upstream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -167,108 +138,23 @@ module fpnew_divsqrt_th_64_multi #( $fatal(1, "DivSqrt THMULTI: Unsupported WIDTH (the supported width are 64, 32, 16)"); end - // ------------ - // Control FSM - // ------------ - - logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done; // status signals from unit instance - logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic unit_busy; // valid data in flight // FSM states typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; - assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; - assign op_starting = div_valid | sqrt_valid; - - // Hold additional information while the operation is in progress - - TagType result_tag_q; - logic result_mask_q; - AuxType result_aux_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - - assign inp_pipe_ready[NUM_INP_REGS] = in_ready; - - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - in_ready = 1'b1; // we're ready - if (in_valid_q && unit_ready) begin // New work arrives - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - unit_busy = 1'b1; // data in flight - // If all the lanes are done with processing - if (unit_done) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - in_ready = 1'b1; // we acknowledge the instruction - if (in_valid_q && unit_ready) begin // ..unless new work comes in - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) + assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; + assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; // ----------------- // DIVSQRT instance // ----------------- - logic [63:0] unit_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; - logic hold_en; + logic unit_done; // Unit output is valid and should be saved + + logic [63:0] unit_result; + fpnew_pkg::status_t unit_status; logic vfdsu_dp_fdiv_busy; @@ -281,11 +167,11 @@ module fpnew_divsqrt_th_64_multi #( logic [63:0] srcf0, srcf1; // Save operands in regs, C910 saves all the following information in its regs in the next cycle. - `FFL(rm_q, rnd_mode_q, op_starting, fpnew_pkg::RNE) - `FFL(divsqrt_fmt_q, divsqrt_fmt, op_starting, '0) - `FFL(divsqrt_op_q, op_q, op_starting, fpnew_pkg::DIV) - `FFL(srcf0_q, operands_q[0], op_starting, '0) - `FFL(srcf1_q, operands_q[1], op_starting, '0) + `FFL(rm_q, rnd_mode_q, fsm_start_i, fpnew_pkg::RNE) + `FFL(divsqrt_fmt_q, divsqrt_fmt, fsm_start_i, '0) + `FFL(divsqrt_op_q, op_q, fsm_start_i, fpnew_pkg::DIV) + `FFL(srcf0_q, operands_q[0], fsm_start_i, '0) + `FFL(srcf1_q, operands_q[1], fsm_start_i, '0) // NaN-box inputs with max WIDTH if(WIDTH == 64) begin : gen_fmt_64_bits @@ -346,7 +232,7 @@ module fpnew_divsqrt_th_64_multi #( // Select func 1 cycle after div issue logic func_sel; - `FFLARNC(func_sel, 1'b1, op_starting, func_sel, 1'b0, clk_i, rst_ni) + `FFLARNC(func_sel, 1'b1, fsm_start_i, func_sel, 1'b0, clk_i, rst_ni) // Select operands 2 cycles after div issue logic op_sel; @@ -364,7 +250,7 @@ module fpnew_divsqrt_th_64_multi #( .dp_vfdsu_ex1_pipex_srcf0 ( srcf0 ), // Input for operand 0 .dp_vfdsu_ex1_pipex_srcf1 ( srcf1 ), // Input for operand 1 .dp_vfdsu_fdiv_gateclk_issue ( 1'b1 ), // Local clock enable (same as above) - .dp_vfdsu_idu_fdiv_issue ( op_starting ), // 1. Issue fdiv (FSM in ctrl) + .dp_vfdsu_idu_fdiv_issue ( fsm_start_i ), // 1. Issue fdiv (FSM in ctrl) .forever_cpuclk ( clk_i ), // Clock input .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 13'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) .idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func @@ -384,7 +270,21 @@ module fpnew_divsqrt_th_64_multi #( .vfdsu_ifu_debug_pipe_busy ( ) // Debug output ); - assign unit_ready = !vfdsu_dp_fdiv_busy; + assign fsm_ready_o = !vfdsu_dp_fdiv_busy; + + // ---------------- + // Hold Result + // ---------------- + logic [63:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], fsm_start_i, '0); // Mask is stored on start -> Dont need a bypass mux + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -392,51 +292,29 @@ module fpnew_divsqrt_th_64_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = unit_result; - assign out_pipe_status_q[0] = unit_status; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); -endmodule +endmodule From 1cabbca83eadb35b7c64df1ba9f7ec9a5b0b8b44 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 15:26:46 +0200 Subject: [PATCH 06/17] Converted multiformat slice to use new aux chain --- src/fpnew_opgroup_multifmt_slice.sv | 345 +++++++++++++--------------- 1 file changed, 157 insertions(+), 188 deletions(-) diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index ff6f1a14..cc6f963a 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -91,12 +91,11 @@ or on 16b inputs producing 32b outputs"); // We will send the format information along with the data localparam int unsigned FMT_BITS = fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); - localparam int unsigned AUX_BITS = FMT_BITS + 4; // also add vectorial and integer flags + localparam int unsigned AUX_BITS = FMT_BITS + 3; // add integer flags - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes logic vectorial_op; logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation - logic [AUX_BITS-1:0] aux_data; + logic [AUX_BITS-1:0] in_aux, out_aux; // aux signals to pass along with the operation // additional flags for CONV logic dst_fmt_is_int, dst_is_cpk; @@ -113,12 +112,9 @@ or on 16b inputs producing 32b outputs"); fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used - TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; - logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used - logic [NUM_LANES-1:0] lane_busy; // dito - logic result_is_vector, result_is_vsum, op_is_vsum; + logic result_is_vsum, op_is_vsum; logic [FMT_BITS-1:0] result_fmt; logic result_fmt_is_int, result_is_cpk; logic [1:0] result_vec_op; // info for vectorial results (for packing) @@ -132,7 +128,6 @@ or on 16b inputs producing 32b outputs"); // RSR supported only on SDOTP module assign rnd_mode = (rnd_mode_i == fpnew_pkg::RSR) ? fpnew_pkg::RNE : rnd_mode_i; - assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled // Cast-and-Pack ops are encoded in operation and modifier @@ -149,7 +144,7 @@ or on 16b inputs producing 32b outputs"); assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i; // The data sent along consists of the vectorial flag and format bits - assign aux_data = {dst_is_cpk, dst_fmt_is_int, vectorial_op, dst_fmt, op_is_vsum}; + assign in_aux = {dst_is_cpk, dst_fmt_is_int, dst_fmt, op_is_vsum}; assign target_aux_d = dst_vec_op; // CONV passes one operand for assembly after the unit: opC for cpk, opB for others @@ -170,6 +165,74 @@ or on 16b inputs producing 32b outputs"); end end + // --------------- + // Generate Aux Chain + // --------------- + // Signals to transmit reg enable to other modules + logic [NumPipeRegs-1:0] vector_reg_enable; + + logic [NUM_LANES-1:0] in_lane_active, out_lane_active, lane_fsm_ready, lane_fsm_start; + logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enabe; + + if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_aux + fpnew_aux_fsm #( + .NumPipeRegs( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ), + .NumLanes ( NUM_LANES ) + ) i_aux_fsm ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( in_aux ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( out_aux ), + .is_vector_o ( /* Unused */ ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( vector_reg_enable ), + .lane_reg_enable_o ( lane_reg_enabe ), + .lane_fsm_start_o ( lane_fsm_start ), + .lane_fsm_ready_i ( lane_fsm_ready ) + ); + end else begin: gen_direct_aux + fpnew_aux #( + .NumPipeRegs( NumPipeRegs ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ), + .NumLanes ( NUM_LANES ) + ) i_aux ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( in_aux ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( out_aux ), + .is_vector_o ( /* Unused */ ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( vector_reg_enable ), + .lane_reg_enable_o ( lane_reg_enabe ) + ); + end + // --------------- // Generate Lanes // --------------- @@ -207,16 +270,17 @@ or on 16b inputs producing 32b outputs"); // Generate instances only if needed, lane 0 always generated if ((lane == 0) || (EnableVectors & (!(OpGroup == fpnew_pkg::DOTP && (lane >= NUM_DOTP_LANES)) && !(OpGroup == fpnew_pkg::DIVSQRT && (lane >= NUM_DIVSQRT_LANES))))) begin : active_lane - logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands logic [LANE_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - logic lane_is_used; - assign lane_is_used = (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | - (LANE_FORMATS[dst_fmt_i] & is_up_cast) | (OpGroup == fpnew_pkg::DIVSQRT); - assign in_valid = in_valid_i & ((lane == 0) | vectorial_op) & lane_is_used; // upper lanes only for vectors + // Figure out if lane is active e.g. should be used + assign in_lane_active[lane] = ( + (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | + (LANE_FORMATS[dst_fmt_i] & is_up_cast) | + (OpGroup == fpnew_pkg::DIVSQRT) + ) & ((lane == 0) | vectorial_op); // Slice out the operands for this lane, upper bits are ignored in the unit always_comb begin : prepare_input @@ -256,9 +320,7 @@ or on 16b inputs producing 32b outputs"); fpnew_fma_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_fma_multi ( .clk_i, .rst_ni, @@ -269,30 +331,19 @@ or on 16b inputs producing 32b outputs"); .op_mod_i, .src_fmt_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ) ); end else if (OpGroup == fpnew_pkg::DOTP) begin : lane_instance fpnew_sdotp_multi_wrapper #( - .LaneWidth ( LANE_WIDTH ), - .FpFmtConfig ( LANE_FORMATS ), // fp64 and fp32 not supported - .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ), + .LaneWidth ( LANE_WIDTH ), + .FpFmtConfig ( LANE_FORMATS ), // fp64 and fp32 not supported + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), .StochasticRndImplementation ( StochasticRndImplementation ) ) i_fpnew_sdotp_multi_wrapper ( .clk_i, @@ -305,136 +356,91 @@ or on 16b inputs producing 32b outputs"); .op_mod_i, .src_fmt_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ) ); + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt + if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt // The T-head-based DivSqrt unit is supported only in FP32-only configurations fpnew_divsqrt_th_32 #( .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_multi_th ( .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt fpnew_divsqrt_th_64_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_th_64_c910 ( - .clk_i, + .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .divsqrt_done_o ( divsqrt_done[lane] ), - .simd_synch_done_i( simd_synch_done ), - .divsqrt_ready_o ( divsqrt_ready[lane] ), - .simd_synch_rdy_i ( simd_synch_rdy ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else begin : gen_pulp_divsqrt fpnew_divsqrt_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_multi ( .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .divsqrt_done_o ( divsqrt_done[lane] ), - .simd_synch_done_i( simd_synch_done ), - .divsqrt_ready_o ( divsqrt_ready[lane] ), - .simd_synch_rdy_i ( simd_synch_rdy ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end - end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance - end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance fpnew_cast_multi #( .FpFmtConfig ( LANE_FORMATS ), .IntFmtConfig ( CONV_INT_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_cast_multi ( .clk_i, .rst_ni, @@ -446,45 +452,32 @@ or on 16b inputs producing 32b outputs"); .src_fmt_i, .dst_fmt_i, .int_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ) ); end // ADD OTHER OPTIONS HERE - // Handshakes are only done if the lane is actually used - assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); - assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + // Guard against accidentally using the wrong aux module + if (OpGroup != fpnew_pkg::DIVSQRT) begin : lane_fsm_guard + assign lane_fsm_ready[lane] = 1'b0; // Lane does not have a FSM, it can not be ready! + end // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : {(LANE_WIDTH){lane_ext_bit[0]}}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = out_lane_active[lane] ? op_result: '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; // Otherwise generate constant sign-extension end else begin : inactive_lane - assign lane_out_valid[lane] = 1'b0; // unused lane - assign lane_in_ready[lane] = 1'b0; // unused lane - assign lane_aux[lane] = 1'b0; // unused lane assign lane_masks[lane] = 1'b1; // unused lane - assign lane_tags[lane] = 1'b0; // unused lane - assign divsqrt_done[lane] = 1'b0; // unused lane - assign divsqrt_ready[lane] = 1'b0; // unused lane assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; - assign lane_busy[lane] = 1'b0; + assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active + assign lane_fsm_ready[lane] = 1'b0; // Lane does not exist, it can not be ready end // Generate result packing depending on float format @@ -569,32 +562,22 @@ or on 16b inputs producing 32b outputs"); // Bypass pipeline signals, index i holds signal after i register stages logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; logic [0:NumPipeRegs][1:0] byp_pipe_aux_q; - logic [0:NumPipeRegs] byp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] byp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign byp_pipe_target_q[0] = conv_target_d; assign byp_pipe_aux_q[0] = target_aux_d; - assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline - // Internal register enable for this stage - logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Internal register enable for this stage + logic reg_ena; + // Enable register is set externally + assign reg_ena = vector_reg_enable[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; @@ -626,20 +609,10 @@ or on 16b inputs producing 32b outputs"); assign conv_target_q = '0; end - if ((DivSqrtSel != fpnew_pkg::TH32) && (OpGroup == fpnew_pkg::DIVSQRT)) begin - // Synch lanes if there is more than one - assign simd_synch_rdy = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0]; - assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0] : divsqrt_done[0]; - end else begin - // Unused (TH32 divider only supported for scalar FP32 divsqrt) - assign simd_synch_rdy = '0; - assign simd_synch_done = '0; - end - // ------------ // Output Side // ------------ - assign {result_is_cpk, result_fmt_is_int, result_is_vector, result_fmt, result_is_vsum} = lane_aux[0]; + assign {result_is_cpk, result_fmt_is_int, result_fmt, result_is_vsum} = out_aux; assign result_o = result_fmt_is_int ? ifmt_slice_result[result_fmt] : result_is_cpk ? fmt_conv_cpk_result[result_fmt][result_vec_op] : @@ -647,10 +620,6 @@ or on 16b inputs producing 32b outputs"); fmt_slice_result[result_fmt]; assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones - assign tag_o = lane_tags[0]; // don't care about upper ones - assign busy_o = (| lane_busy); - - assign out_valid_o = lane_out_valid[0]; // don't care about upper ones // Collapse the status always_comb begin : output_processing From 38dbfa1be51e9b199b9c57d0a7c8be8d4c5bca01 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 15:23:58 +0200 Subject: [PATCH 07/17] Converted fmt slice to new aux chain --- src/fpnew_opgroup_fmt_slice.sv | 156 +++++++++++++-------------------- 1 file changed, 59 insertions(+), 97 deletions(-) diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 60353f21..47c3384a 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -58,9 +58,7 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); - localparam int unsigned AUX_BITS = 2; - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes logic vectorial_op, cmp_op; logic [NUM_LANES*FP_WIDTH-1:0] slice_result; @@ -70,10 +68,8 @@ module fpnew_opgroup_fmt_slice #( fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; - TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; - logic [NUM_LANES-1:0] lane_busy, lane_is_class; // dito - logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // dito + logic [NUM_LANES-1:0] lane_is_class; // only the first one is actually used logic result_is_vector, result_is_class, result_is_cmp; @@ -84,11 +80,43 @@ module fpnew_opgroup_fmt_slice #( // ----------- // RSR supported only on SDOTP module assign rnd_mode = (rnd_mode_i == fpnew_pkg::RSR) ? fpnew_pkg::RNE : rnd_mode_i; - - assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled assign cmp_op = (op_i == fpnew_pkg::CMP); + // --------------- + // Generate Aux Chain + // --------------- + // Signals to transmit reg enable to other modules + logic [NUM_LANES-1:0] in_lane_active, out_lane_active; + logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enable; + + fpnew_aux #( + .NumPipeRegs( NumPipeRegs ), + .TagType ( TagType ), + .AuxType ( logic ), + .NumLanes ( NUM_LANES ) + ) i_aux ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( cmp_op ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( result_is_cmp ), + .is_vector_o ( result_is_vector ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( /* Unused */ ), + .lane_reg_enable_o ( lane_reg_enable ) + ); + // --------------- // Generate Lanes // --------------- @@ -98,15 +126,13 @@ module fpnew_opgroup_fmt_slice #( // Generate instances only if needed, lane 0 always generated if ((lane == 0) || EnableVectors) begin : active_lane - logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands logic [FP_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - logic [AUX_BITS-1:0] local_aux_data_input; - assign local_aux_data_input = {vectorial_op, cmp_op}; - assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + assign in_lane_active[lane] = (lane == 0) | vectorial_op; // upper lanes only for vectors + // Slice out the operands for this lane always_comb begin : prepare_input for (int i = 0; i < int'(NUM_OPERANDS); i++) begin @@ -119,116 +145,58 @@ module fpnew_opgroup_fmt_slice #( fpnew_fma #( .FpFormat ( FpFormat ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fma ( .clk_i, .rst_ni, .operands_i ( local_operands ), .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - .rnd_mode_i ( rnd_mode ), + .rnd_mode_i ( rnd_mode ), .op_i, .op_mod_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( local_aux_data_input ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable[lane] ) ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; - end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - // fpnew_divsqrt #( - // .FpFormat (FpFormat), - // .NumPipeRegs(NumPipeRegs), - // .PipeConfig (PipeConfig), - // .TagType (TagType), - // .AuxType (logic) - // ) i_divsqrt ( - // .clk_i, - // .rst_ni, - // .operands_i ( local_operands ), - // .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - // .rnd_mode_i ( rnd_mode ), - // .op_i, - // .op_mod_i, - // .tag_i, - // .aux_i ( vectorial_op ), // Remember whether operation was vectorial - // .in_valid_i ( in_valid ), - // .in_ready_o ( lane_in_ready[lane] ), - // .flush_i, - // .result_o ( op_result ), - // .status_o ( op_status ), - // .extension_bit_o ( lane_ext_bit[lane] ), - // .tag_o ( lane_tags[lane] ), - // .aux_o ( lane_aux[lane] ), - // .out_valid_o ( out_valid ), - // .out_ready_i ( out_ready ), - // .busy_o ( lane_busy[lane] ) - // ); - // assign lane_is_class[lane] = 1'b0; end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance fpnew_noncomp #( .FpFormat ( FpFormat ), .NumPipeRegs( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_noncomp ( .clk_i, .rst_ni, .operands_i ( local_operands ), .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - .rnd_mode_i ( rnd_mode ), + .rnd_mode_i ( rnd_mode ), .op_i, .op_mod_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( local_aux_data_input ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .class_mask_o ( lane_class_mask[lane] ), - .is_class_o ( lane_is_class[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .class_mask_o ( lane_class_mask[lane] ), + .is_class_o ( lane_is_class[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable[lane] ) ); end // ADD OTHER OPTIONS HERE - // Handshakes are only done if the lane is actually used - assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); - assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); - // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = out_lane_active[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; // Otherwise generate constant sign-extension end else begin - assign lane_out_valid[lane] = 1'b0; // unused lane - assign lane_in_ready[lane] = 1'b0; // unused lane assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box assign lane_status[lane] = '0; - assign lane_busy[lane] = 1'b0; assign lane_is_class[lane] = 1'b0; + assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active end // Insert lane result into slice result @@ -267,8 +235,6 @@ module fpnew_opgroup_fmt_slice #( // ------------ // Output Side // ------------ - assign result_is_vector = lane_aux[0][1]; - assign result_is_cmp = lane_aux[0][0]; assign result_is_class = lane_is_class[0]; assign slice_regular_result = $signed({extension_bit_o, slice_result}); @@ -294,11 +260,7 @@ module fpnew_opgroup_fmt_slice #( assign result_o = result_is_class ? slice_class_result : slice_regular_result; end - assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused - assign tag_o = lane_tags[0]; // upper lanes unused - assign busy_o = (| lane_busy); - assign out_valid_o = lane_out_valid[0]; // upper lanes unused - + assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused // Collapse the lane status always_comb begin : output_processing From 7a7fc6f57fb2a3b6a411c20911fb99fee7eddcb9 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Fri, 21 Jun 2024 16:57:46 +0200 Subject: [PATCH 08/17] Improved synchronization in case of faults in division and prevented potential cases where a bitflip causes a stall due to activating a division lane that does not exist. --- src/fpnew_aux_fsm.sv | 2 +- src/fpnew_opgroup_multifmt_slice.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv index 4b1c6013..7ab7763f 100644 --- a/src/fpnew_aux_fsm.sv +++ b/src/fpnew_aux_fsm.sv @@ -154,7 +154,7 @@ module fpnew_aux_fsm #( assign in_ready[NUM_INP_REGS] = fsm_in_ready; // Done when all active lanes are done - assign fsm_ready = &(lane_fsm_ready_i | ~held_lane_active); + assign fsm_ready = &lane_fsm_ready_i; // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index cc6f963a..390b918c 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -477,7 +477,7 @@ or on 16b inputs producing 32b outputs"); assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active - assign lane_fsm_ready[lane] = 1'b0; // Lane does not exist, it can not be ready + assign lane_fsm_ready[lane] = 1'b1; // Lane does not exist, it is always ready just in case erronous data gets to the FSM in this slot end // Generate result packing depending on float format From 2bb668db986eee7850c73634d84133cbc7556112 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Tue, 16 Jul 2024 14:52:07 +0200 Subject: [PATCH 09/17] Removed FSM Enum and signals that are no longer used. --- src/fpnew_divsqrt_th_64_multi.sv | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index 89fda8c5..fd6f3fdb 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -140,10 +140,6 @@ module fpnew_divsqrt_th_64_multi #( logic div_valid, sqrt_valid; // input signalling with unit - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; - // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; From 2ed34a2307c2f50b12982e094997d875af4f04a1 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 17 Jul 2024 21:58:01 +0200 Subject: [PATCH 10/17] Made aux chain modules lane-agnostic. --- src/fpnew_aux.sv | 34 +----- src/fpnew_aux_fsm.sv | 81 +++----------- src/fpnew_opgroup_fmt_slice.sv | 54 +++++----- src/fpnew_opgroup_multifmt_slice.sv | 160 +++++++++++++++++----------- 4 files changed, 148 insertions(+), 181 deletions(-) diff --git a/src/fpnew_aux.sv b/src/fpnew_aux.sv index 28059db5..dd93eb44 100644 --- a/src/fpnew_aux.sv +++ b/src/fpnew_aux.sv @@ -21,16 +21,13 @@ module fpnew_aux #( parameter int unsigned NumPipeRegs = 0, parameter type TagType = logic, - parameter type AuxType = logic, - parameter int unsigned NumLanes = 1 + parameter type AuxType = logic ) ( input logic clk_i, input logic rst_ni, // Input signals input TagType tag_i, input AuxType aux_i, - input logic is_vector_i, - input logic [NumLanes-1:0] lane_active_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -38,15 +35,11 @@ module fpnew_aux #( // Output signals output TagType tag_o, output AuxType aux_o, - output logic is_vector_o, - output logic [NumLanes-1:0] lane_active_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Register Enable for Lanes output logic [NumPipeRegs-1:0] reg_enable_o, - output logic [NumPipeRegs-1:0] vector_reg_enable_o, - output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, // Indication of valid data in flight output logic busy_o ); @@ -58,8 +51,6 @@ module fpnew_aux #( // Input pipeline signals, index i holds signal after i register stages TagType [0:NumPipeRegs] tag; AuxType [0:NumPipeRegs] aux; - logic [0:NumPipeRegs] is_vector; - logic [0:NumPipeRegs][NumLanes-1:0] lane_active; logic [0:NumPipeRegs] valid; // Ready signal is combinatorial for all stages @@ -68,9 +59,7 @@ module fpnew_aux #( // First element of pipeline is taken from inputs assign tag [0] = tag_i; assign aux [0] = aux_i; - assign is_vector [0] = is_vector_i; assign valid [0] = in_valid_i; - assign lane_active[0] = lane_active_i; // Propagate pipeline ready signal to upstream circuitry assign in_ready_o = ready[0]; @@ -88,23 +77,12 @@ module fpnew_aux #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(valid[i+1], valid[i], ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = ready[i] & valid[i]; - - // Drive external registers with reg enable - assign reg_enable_o[i] = reg_ena; - - // Drive external vector registers with reg enable if operation is a vector - assign vector_reg_enable_o[i] = reg_ena & is_vector[i]; - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_reg_enable_o[l][i] = reg_ena & lane_active[i][l]; - end + // Enable register if pipeline ready and a valid data item is present + assign reg_enable_o[i] = ready[i] & valid[i]; // Generate the pipeline registers within the stages, use enable-registers - `FFL( tag[i+1], tag[i], reg_ena, TagType'('0)) - `FFL( aux[i+1], aux[i], reg_ena, AuxType'('0)) - `FFL( is_vector[i+1], is_vector[i], reg_ena, '0 ) - `FFL(lane_active[i+1], lane_active[i], reg_ena, '0 ) + `FFL( tag[i+1], tag[i], reg_enable_o[i], TagType'('0)) + `FFL( aux[i+1], aux[i], reg_enable_o[i], AuxType'('0)) end // Ready travels backwards from output side, driven by downstream circuitry @@ -113,9 +91,7 @@ module fpnew_aux #( // Assign module outputs assign tag_o = tag [NumPipeRegs]; assign aux_o = aux [NumPipeRegs]; - assign is_vector_o = is_vector [NumPipeRegs]; assign out_valid_o = valid [NumPipeRegs]; - assign lane_active_o = lane_active[NumPipeRegs]; // Assign output Flags: Busy if any element inside the pipe is valid assign busy_o = |valid; diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv index 7ab7763f..7f774427 100644 --- a/src/fpnew_aux_fsm.sv +++ b/src/fpnew_aux_fsm.sv @@ -23,16 +23,13 @@ module fpnew_aux_fsm #( parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, - parameter type AuxType = logic, - parameter int unsigned NumLanes = 1 + parameter type AuxType = logic ) ( input logic clk_i, input logic rst_ni, // Input signals input TagType tag_i, input AuxType aux_i, - input logic is_vector_i, - input logic [NumLanes-1:0] lane_active_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -40,20 +37,14 @@ module fpnew_aux_fsm #( // Output signals output TagType tag_o, output AuxType aux_o, - output logic is_vector_o, - output logic [NumLanes-1:0] lane_active_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Register Enable for Lanes output logic [NumPipeRegs-1:0] reg_enable_o, - output logic [NumPipeRegs-1:0] vector_reg_enable_o, - output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, // Signals for the Lane FSMs - // Signal to start the FSM, will be asserted for one cycle - output logic [NumLanes-1:0] lane_fsm_start_o, - // Signal that the FSM finished it's operation, should be asserted continuously - input logic [NumLanes-1:0] lane_fsm_ready_i, + output logic fsm_start_o, + input logic fsm_ready_i, // Indication of valid data in flight output logic busy_o ); @@ -79,8 +70,6 @@ module fpnew_aux_fsm #( // Input pipeline signals, index i holds signal after i register stages TagType [0:NUM_INP_REGS] in_tag; AuxType [0:NUM_INP_REGS] in_aux; - logic [0:NUM_INP_REGS] in_is_vector; - logic [0:NUM_INP_REGS][NumLanes-1:0] in_lane_active; logic [0:NUM_INP_REGS] in_valid; // Ready signal is combinatorial for all stages @@ -89,16 +78,14 @@ module fpnew_aux_fsm #( // First element of pipeline is taken from inputs assign in_tag [0] = tag_i; assign in_aux [0] = aux_i; - assign in_is_vector [0] = is_vector_i; assign in_valid [0] = in_valid_i; - assign in_lane_active[0] = lane_active_i; // Propagate pipeline ready signal to upstream circuitry assign in_ready_o = in_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline - + // Internal register enable for this stage logic reg_ena; // Determine the ready signal of the current stage - advance the pipeline: @@ -110,22 +97,11 @@ module fpnew_aux_fsm #( `FFLARNC(in_valid[i+1], in_valid[i], in_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = in_ready[i] & in_valid[i]; - - // Drive external registers with reg enable - assign reg_enable_o[i] = reg_ena; - - // Drive external vector registers with reg enable if operation is a vector - assign vector_reg_enable_o[i] = reg_ena & in_is_vector[i]; - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_reg_enable_o[l][i] = reg_ena & in_lane_active[i][l]; - end + assign reg_enable_o[i] = in_ready[i] & in_valid[i]; // Generate the pipeline registers within the stages, use enable-registers - `FFL( in_tag[i+1], in_tag[i], reg_ena, TagType'('0)) - `FFL( in_aux[i+1], in_aux[i], reg_ena, AuxType'('0)) - `FFL( in_is_vector[i+1], in_is_vector[i], reg_ena, '0 ) - `FFL(in_lane_active[i+1], in_lane_active[i], reg_ena, '0 ) + `FFL( in_tag[i+1], in_tag[i], reg_enable_o[i], TagType'('0)) + `FFL( in_aux[i+1], in_aux[i], reg_enable_o[i], AuxType'('0)) end // ---------- @@ -140,28 +116,22 @@ module fpnew_aux_fsm #( logic fsm_in_valid, fsm_in_ready; logic fsm_out_valid, fsm_out_ready; - // Synchronisazion signals - logic fsm_start, fsm_ready, fsm_busy; + logic fsm_busy; // Data holding signals TagType held_tag; AuxType held_aux; - logic held_is_vector; - logic [NumLanes-1:0] held_lane_active; // Upstream Handshake Connection assign fsm_in_valid = in_valid[NUM_INP_REGS]; assign in_ready[NUM_INP_REGS] = fsm_in_ready; - // Done when all active lanes are done - assign fsm_ready = &lane_fsm_ready_i; - // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm // Default assignments fsm_out_valid = 1'b0; fsm_in_ready = 1'b0; - fsm_start = 1'b0; + fsm_start_o = 1'b0; fsm_busy = 1'b0; state_d = state_q; @@ -170,19 +140,19 @@ module fpnew_aux_fsm #( fsm_in_ready = '1; if (fsm_in_valid) begin state_d = BUSY; - fsm_start = 1'b1; + fsm_start_o = 1'b1; end end BUSY: begin fsm_busy = 1'b1; // If all active lanes are done send data down chain - if (fsm_ready) begin + if (fsm_ready_i) begin fsm_out_valid = 1'b1; if (fsm_out_ready) begin fsm_in_ready = 1'b1; if (fsm_in_valid) begin state_d = BUSY; - fsm_start = 1'b1; + fsm_start_o = 1'b1; end else begin state_d = IDLE; end @@ -198,7 +168,7 @@ module fpnew_aux_fsm #( fsm_in_ready = 1'b1; if (fsm_in_valid) begin state_d = BUSY; - fsm_start = 1'b1; + fsm_start_o = 1'b1; end else begin state_d = IDLE; end @@ -220,19 +190,12 @@ module fpnew_aux_fsm #( `FF(state_q, state_d, IDLE); - // Start Lanes when FSM starts and lane is active - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_fsm_start_o[l] = fsm_start && in_lane_active[NUM_INP_REGS][l]; - end - // ---------------- // Data Holding FFs // ---------------- - `FFL( held_tag, in_tag[NUM_INP_REGS], fsm_start, TagType'('0)); - `FFL( held_aux, in_aux[NUM_INP_REGS], fsm_start, AuxType'('0)); - `FFL( held_is_vector, in_is_vector[NUM_INP_REGS], fsm_start, '0); - `FFL(held_lane_active, in_lane_active[NUM_INP_REGS], fsm_start, '0); + `FFL( held_tag, in_tag[NUM_INP_REGS], fsm_start_o, TagType'('0)); + `FFL( held_aux, in_aux[NUM_INP_REGS], fsm_start_o, AuxType'('0)); // --------------- // Output pipeline @@ -241,8 +204,6 @@ module fpnew_aux_fsm #( // Output pipeline signals, index i holds signal after i register stages TagType [0:NUM_OUT_REGS] out_tag; AuxType [0:NUM_OUT_REGS] out_aux; - logic [0:NUM_OUT_REGS] out_is_vector; - logic [0:NUM_OUT_REGS][NumLanes-1:0] out_lane_active; logic [0:NUM_OUT_REGS] out_valid; // Ready signal is combinatorial for all stages @@ -255,8 +216,6 @@ module fpnew_aux_fsm #( // Connect to Hold Register assign out_tag [0] = held_tag; assign out_aux [0] = held_aux; - assign out_is_vector [0] = held_is_vector; - assign out_lane_active[0] = held_lane_active; // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline @@ -277,17 +236,9 @@ module fpnew_aux_fsm #( // Drive external registers with reg enable assign reg_enable_o[NUM_INP_REGS + i] = reg_ena; - // Drive external vector registers with reg enable if operation is a vector - assign vector_reg_enable_o[NUM_INP_REGS + i] = reg_ena & out_is_vector[i]; - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_reg_enable_o[l][NUM_INP_REGS + i] = reg_ena & out_lane_active[i][l]; - end - // Generate the pipeline registers within the stages, use enable-registers `FFL( out_tag[i+1], out_tag[i], reg_ena, TagType'('0)) `FFL( out_aux[i+1], out_aux[i], reg_ena, AuxType'('0)) - `FFL( out_is_vector[i+1], out_is_vector[i], reg_ena, '0 ) - `FFL(out_lane_active[i+1], out_lane_active[i], reg_ena, '0 ) end // Ready travels backwards from output side, driven by downstream circuitry @@ -296,9 +247,7 @@ module fpnew_aux_fsm #( // Assign module outputs assign tag_o = out_tag [NUM_OUT_REGS]; assign aux_o = out_aux [NUM_OUT_REGS]; - assign is_vector_o = out_is_vector [NUM_OUT_REGS]; assign out_valid_o = out_valid [NUM_OUT_REGS]; - assign lane_active_o = out_lane_active[NUM_OUT_REGS]; // Assign output Flags: Busy if any element inside the pipe is valid assign busy_o = |in_valid | |out_valid | fsm_busy; diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 47c3384a..87a95462 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -13,6 +13,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_opgroup_fmt_slice #( parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), @@ -58,8 +60,9 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); + localparam int unsigned AUX_BITS = 2; - logic vectorial_op, cmp_op; + logic [AUX_BITS-1:0] aux_in, aux_out; logic [NUM_LANES*FP_WIDTH-1:0] slice_result; logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result; @@ -71,7 +74,7 @@ module fpnew_opgroup_fmt_slice #( logic [NUM_LANES-1:0] lane_masks; logic [NUM_LANES-1:0] lane_is_class; // only the first one is actually used - logic result_is_vector, result_is_class, result_is_cmp; + logic result_is_class; fpnew_pkg::roundmode_e rnd_mode; @@ -80,41 +83,32 @@ module fpnew_opgroup_fmt_slice #( // ----------- // RSR supported only on SDOTP module assign rnd_mode = (rnd_mode_i == fpnew_pkg::RSR) ? fpnew_pkg::RNE : rnd_mode_i; - assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled - assign cmp_op = (op_i == fpnew_pkg::CMP); + assign aux_in[0] = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + assign aux_in[1] = (op_i == fpnew_pkg::CMP); // --------------- // Generate Aux Chain // --------------- - // Signals to transmit reg enable to other modules - logic [NUM_LANES-1:0] in_lane_active, out_lane_active; - logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enable; + logic [NumPipeRegs-1:0] reg_enable; fpnew_aux #( .NumPipeRegs( NumPipeRegs ), .TagType ( TagType ), - .AuxType ( logic ), - .NumLanes ( NUM_LANES ) + .AuxType ( logic [AUX_BITS-1:0] ) ) i_aux ( .clk_i, .rst_ni, .tag_i, - .aux_i ( cmp_op ), - .is_vector_i ( vectorial_op ), - .lane_active_i ( in_lane_active ), + .aux_i ( aux_in ), .in_valid_i, .in_ready_o, .flush_i, .tag_o, - .aux_o ( result_is_cmp ), - .is_vector_o ( result_is_vector ), - .lane_active_o ( out_lane_active ), + .aux_o ( aux_out ), .out_valid_o, .out_ready_i, .busy_o, - .reg_enable_o ( /* Unused */ ), - .vector_reg_enable_o ( /* Unused */ ), - .lane_reg_enable_o ( lane_reg_enable ) + .reg_enable_o ( reg_enable ) ); // --------------- @@ -131,7 +125,16 @@ module fpnew_opgroup_fmt_slice #( logic [FP_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - assign in_lane_active[lane] = (lane == 0) | vectorial_op; // upper lanes only for vectors + // Build reg_enable for lane + logic [NumPipeRegs-1:0] lane_reg_enable; + logic [0:NumPipeRegs] lane_active; + + assign lane_active[0] = (lane == 0) | aux_in[0]; // upper lanes only for vectors + + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_enable + `FFL(lane_active[i+1], lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = lane_active[i] & reg_enable[i]; + end // Slice out the operands for this lane always_comb begin : prepare_input @@ -159,7 +162,7 @@ module fpnew_opgroup_fmt_slice #( .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enable[lane] ) + .reg_enable_i ( lane_reg_enable ) ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; @@ -183,20 +186,19 @@ module fpnew_opgroup_fmt_slice #( .class_mask_o ( lane_class_mask[lane] ), .is_class_o ( lane_is_class[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enable[lane] ) + .reg_enable_i ( lane_reg_enable ) ); end // ADD OTHER OPTIONS HERE // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = out_lane_active[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; + assign local_result = lane_active[NumPipeRegs] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_active[NumPipeRegs] ? op_status : '0; // Otherwise generate constant sign-extension end else begin assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box assign lane_status[lane] = '0; assign lane_is_class[lane] = 1'b0; - assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active end // Insert lane result into slice result @@ -250,12 +252,12 @@ module fpnew_opgroup_fmt_slice #( // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; - assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0]; + assign slice_class_result = aux_out[0] ? slice_vec_class_result : lane_class_mask[0]; // Select the proper result if (CompressedVecCmpResult) begin assign result_o = result_is_class ? slice_class_result : - result_is_cmp ? {'0, slice_cmp_result} : slice_regular_result; + aux_out[1] ? {'0, slice_cmp_result} : slice_regular_result; end else begin assign result_o = result_is_class ? slice_class_result : slice_regular_result; end diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 390b918c..6c4bb720 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -119,7 +119,6 @@ or on 16b inputs producing 32b outputs"); logic result_fmt_is_int, result_is_cpk; logic [1:0] result_vec_op; // info for vectorial results (for packing) - logic simd_synch_rdy, simd_synch_done; fpnew_pkg::roundmode_e rnd_mode; // ----------- @@ -169,67 +168,54 @@ or on 16b inputs producing 32b outputs"); // Generate Aux Chain // --------------- // Signals to transmit reg enable to other modules - logic [NumPipeRegs-1:0] vector_reg_enable; + logic [NumPipeRegs-1:0] reg_enable; - logic [NUM_LANES-1:0] in_lane_active, out_lane_active, lane_fsm_ready, lane_fsm_start; - logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enabe; + logic fsm_start, fsm_ready; + logic [NUM_LANES-1:0] lane_fsm_ready; + assign fsm_ready = &lane_fsm_ready; if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_aux fpnew_aux_fsm #( .NumPipeRegs( NumPipeRegs ), .PipeConfig ( PipeConfig ), .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ), - .NumLanes ( NUM_LANES ) + .AuxType ( logic [AUX_BITS-1:0] ) ) i_aux_fsm ( .clk_i, .rst_ni, .tag_i, - .aux_i ( in_aux ), - .is_vector_i ( vectorial_op ), - .lane_active_i ( in_lane_active ), + .aux_i ( in_aux ), .in_valid_i, .in_ready_o, .flush_i, .tag_o, - .aux_o ( out_aux ), - .is_vector_o ( /* Unused */ ), - .lane_active_o ( out_lane_active ), + .aux_o ( out_aux ), .out_valid_o, .out_ready_i, .busy_o, - .reg_enable_o ( /* Unused */ ), - .vector_reg_enable_o ( vector_reg_enable ), - .lane_reg_enable_o ( lane_reg_enabe ), - .lane_fsm_start_o ( lane_fsm_start ), - .lane_fsm_ready_i ( lane_fsm_ready ) + .reg_enable_o ( reg_enable ), + .fsm_start_o ( fsm_start ), + .fsm_ready_i ( fsm_ready ) ); end else begin: gen_direct_aux fpnew_aux #( .NumPipeRegs( NumPipeRegs ), .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ), - .NumLanes ( NUM_LANES ) + .AuxType ( logic [AUX_BITS-1:0] ) ) i_aux ( .clk_i, .rst_ni, .tag_i, - .aux_i ( in_aux ), - .is_vector_i ( vectorial_op ), - .lane_active_i ( in_lane_active ), + .aux_i ( in_aux ), .in_valid_i, .in_ready_o, .flush_i, .tag_o, - .aux_o ( out_aux ), - .is_vector_o ( /* Unused */ ), - .lane_active_o ( out_lane_active ), + .aux_o ( out_aux ), .out_valid_o, .out_ready_i, .busy_o, - .reg_enable_o ( /* Unused */ ), - .vector_reg_enable_o ( vector_reg_enable ), - .lane_reg_enable_o ( lane_reg_enabe ) + .reg_enable_o ( reg_enable ) ); end @@ -275,13 +261,68 @@ or on 16b inputs producing 32b outputs"); logic [LANE_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; + + + // Build reg_enable for lane + logic [NumPipeRegs-1:0] lane_reg_enable; + logic lane_fsm_start; + // Figure out if lane is active e.g. should be used - assign in_lane_active[lane] = ( + logic in_lane_active, out_lane_active; + + assign in_lane_active = ( (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | (LANE_FORMATS[dst_fmt_i] & is_up_cast) | (OpGroup == fpnew_pkg::DIVSQRT) ) & ((lane == 0) | vectorial_op); + if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_reg_enable + // This must match between this module and modules that use this module as reg enable input! + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // Always have one reg to use for FSM Input + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + + logic [0:NUM_INP_REGS] inp_pipe_lane_active; + logic [0:NUM_OUT_REGS] out_pipe_lane_active; + + assign inp_pipe_lane_active[0] = in_lane_active; + + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_in_pipe_enable + `FFL(inp_pipe_lane_active[i+1], inp_pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = inp_pipe_lane_active[i] & reg_enable[i]; + end + + assign lane_fsm_start = fsm_start & inp_pipe_lane_active[NUM_INP_REGS]; + `FFL(out_pipe_lane_active[0], inp_pipe_lane_active[NUM_INP_REGS], fsm_start, '0 ) + + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_out_pipe_enable + `FFL(out_pipe_lane_active[i+1], out_pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[NUM_INP_REGS + i] = out_pipe_lane_active[i] & reg_enable[i]; + end + + assign out_lane_active = out_pipe_lane_active[NUM_OUT_REGS]; + + end else begin: gen_direct_reg_enable + logic [0:NumPipeRegs] pipe_lane_active; + + assign pipe_lane_active[0] = in_lane_active; + + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_enable + `FFL(pipe_lane_active[i+1], pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = pipe_lane_active[i] & reg_enable[i]; + end + + assign out_lane_active = pipe_lane_active[NumPipeRegs]; + end + // Slice out the operands for this lane, upper bits are ignored in the unit always_comb begin : prepare_input for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin @@ -331,12 +372,12 @@ or on 16b inputs producing 32b outputs"); .op_mod_i, .src_fmt_i, .dst_fmt_i, - .mask_i ( simd_mask_i[lane] ), - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); end else if (OpGroup == fpnew_pkg::DOTP) begin : lane_instance fpnew_sdotp_multi_wrapper #( @@ -356,12 +397,12 @@ or on 16b inputs producing 32b outputs"); .op_mod_i, .src_fmt_i, .dst_fmt_i, - .mask_i ( simd_mask_i[lane] ), - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance @@ -384,8 +425,8 @@ or on 16b inputs producing 32b outputs"); .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ), - .fsm_start_i ( lane_fsm_start[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt @@ -407,8 +448,8 @@ or on 16b inputs producing 32b outputs"); .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ), - .fsm_start_i ( lane_fsm_start[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else begin : gen_pulp_divsqrt @@ -430,8 +471,8 @@ or on 16b inputs producing 32b outputs"); .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ), - .fsm_start_i ( lane_fsm_start[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), .fsm_ready_o ( lane_fsm_ready[lane] ) ); end @@ -444,20 +485,20 @@ or on 16b inputs producing 32b outputs"); ) i_fpnew_cast_multi ( .clk_i, .rst_ni, - .operands_i ( local_operands[0] ), - .is_boxed_i ( is_boxed_1op ), - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[0] ), + .is_boxed_i ( is_boxed_1op ), + .rnd_mode_i ( rnd_mode ), .op_i, .op_mod_i, .src_fmt_i, .dst_fmt_i, .int_fmt_i, - .mask_i ( simd_mask_i[lane] ), - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); end // ADD OTHER OPTIONS HERE @@ -467,8 +508,8 @@ or on 16b inputs producing 32b outputs"); end // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = out_lane_active[lane] ? op_result: '{default: lane_ext_bit[0]}; - assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; + assign local_result = out_lane_active ? op_result: '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active ? op_status : '0; // Otherwise generate constant sign-extension end else begin : inactive_lane @@ -476,7 +517,6 @@ or on 16b inputs producing 32b outputs"); assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; - assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active assign lane_fsm_ready[lane] = 1'b1; // Lane does not exist, it is always ready just in case erronous data gets to the FSM in this slot end @@ -572,7 +612,7 @@ or on 16b inputs producing 32b outputs"); // Internal register enable for this stage logic reg_ena; // Enable register is set externally - assign reg_ena = vector_reg_enable[i]; + assign reg_ena = reg_enable[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) From 19ec5edd43f17e98e84ceb1883bf29f1618db5ed Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Fri, 12 Apr 2024 15:00:09 +0200 Subject: [PATCH 11/17] Added Redundancy Cells dependency. --- Bender.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/Bender.yml b/Bender.yml index ab2bc73c..73b9df75 100644 --- a/Bender.yml +++ b/Bender.yml @@ -8,6 +8,7 @@ package: dependencies: common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: 1.21.0} fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.4} + redundancy_cells: { git: "git@github.com:Lynx005F/redundancy_cells.git", rev: c594afeb384bc36547ca89dd17c41887cf15beac} sources: - src/fpnew_pkg.sv From 4515020eee421c87a75f5392941d06729504f0a1 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Fri, 12 Apr 2024 13:32:13 +0200 Subject: [PATCH 12/17] Added time based redundancy modules and switched to lockable RR-Arbiter. - New input signal redundancy_enable_i to switch in between redundant and non-redundant modes. - New output signal fault_detected_o for statistics - Redundancy Implementation selected via Enum in fpnew_pkg. - For TMR based redundancy use a large ID, for DMR base use a small ID and stall for divisions renamed modules --- src/fpnew_opgroup_block.sv | 15 +- src/fpnew_pkg.sv | 52 ++++ src/fpnew_top.sv | 514 +++++++++++++++++++++++++++++++------ 3 files changed, 503 insertions(+), 78 deletions(-) diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index db2c3032..18bbb17c 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -28,6 +28,7 @@ module fpnew_opgroup_block #( parameter logic TrueSIMDClass = 1'b0, parameter logic CompressedVecCmpResult = 1'b0, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, + parameter int unsigned LockRepetition = 1, // Do not change localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), @@ -61,6 +62,7 @@ module fpnew_opgroup_block #( // Output handshake output logic out_valid_o, input logic out_ready_i, + input logic [LockRepetition-1:0] out_lock_i, // Indication of valid data in flight output logic busy_o ); @@ -222,16 +224,23 @@ module fpnew_opgroup_block #( // ------------------ output_t arbiter_output; + logic [LockRepetition-1:0] flush; + for (genvar r = 0; r < LockRepetition; r++) begin: gen_rr_flush + assign flush[r] = flush_i; + end + // Round-Robin arbiter to decide which result to use - rr_arb_tree #( + rr_arb_tree_lock #( .NumIn ( NUM_FORMATS ), .DataType ( output_t ), - .AxiVldRdy ( 1'b1 ) + .AxiVldRdy ( 1'b1 ), + .InternalRedundancy ( LockRepetition > 1 ) ) i_arbiter ( .clk_i, .rst_ni, - .flush_i, + .flush_i ( flush ), .rr_i ( '0 ), + .lock_rr_i ( out_lock_i ), .req_i ( fmt_out_valid ), .gnt_o ( fmt_out_ready ), .data_i ( fmt_outputs ), diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 42d0df6b..47366dc3 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -325,6 +325,32 @@ package fpnew_pkg; LfsrInternalPrecision: 32 }; + // Different kinds of Redundancy that might be used + typedef enum logic [2:0] { + NONE, // No redundancy module is generated - redundancy can not be enabled + TTR, // Operands will be tripplicated in time - always output after 3 cycles (shorter critical path) + TTR_FAST, // Operands will be tripplicated in time - if nothing goes wrong output after 2 cycles (longer critical path) + TTR_SMALL, // Operands will be tripplicated in time, storage is deferred to handshake (might cause stalls) + DTR, // Operands will be duplicated in time and are retried on failure + DTR_INORDER // Operands will be duplicated in time and are retried on failure - always keeps the order of outputs the same + } redundancy_type_t; + + // FPU configuration: redundancy + typedef struct packed { + logic TripplicateRepetition; // Whether to tripplicate the state machines for redundant operations + redundancy_type_t RedundancyType; + } redundancy_features_t; + + localparam redundancy_features_t DEFAULT_NO_REDUNDANCY = '{ + TripplicateRepetition: 1'b0, + RedundancyType: NONE + }; + + localparam redundancy_features_t DEFAULT_REDUNDANCY = '{ + TripplicateRepetition: 1'b1, + RedundancyType: TTR_FAST + }; + // ----------------------- // Synthesis optimization // ----------------------- @@ -589,4 +615,30 @@ package fpnew_pkg; return res; endfunction + // Returns the number data elements in the longest path of the FPU + function automatic int unsigned longest_path(fmt_unsigned_t regs, fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i]) res = maximum(res, regs[i]); + end + return res + 1; + endfunction + + // Returns the number data elements in the shortest path of the FPU + function automatic int unsigned shortest_path(fmt_unsigned_t regs, fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i]) res = minimum(res, regs[i]); + end + return res + 1; + endfunction + + // Return whether any active format is set as MERGED + function automatic logic division_enabled(opgrp_fmt_unit_types_t unit_types); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (unit_types[DIVSQRT][i] != DISABLED) return 1'b1; + end + return 1'b0; + endfunction + endpackage diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index b564286d..8f7421a7 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -13,17 +13,20 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_top #( // FPU configuration - parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, - parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, + parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, + parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, // DivSqrtSel chooses among PULP, TH32, or THMULTI (see documentation and fpnew_pkg.sv for further details) - parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, - parameter type TagType = logic, - parameter logic TrueSIMDClass = 1'b0, - parameter logic EnableSIMDMask = 1'b0, - parameter logic CompressedVecCmpResult = 1'b0, // conceived for RV32FD cores + parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, + parameter type TagType = logic, + parameter logic TrueSIMDClass = 1'b0, + parameter logic EnableSIMDMask = 1'b0, + parameter logic CompressedVecCmpResult = 1'b0, // conceived for RV32FD cores parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, + parameter fpnew_pkg::redundancy_features_t RedundancyFeatures = fpnew_pkg::DEFAULT_NO_REDUNDANCY, // Do not change localparam int unsigned NumLanes = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors), localparam type MaskType = logic [NumLanes-1:0], @@ -33,6 +36,7 @@ module fpnew_top #( input logic clk_i, input logic rst_ni, input logic [31:0] hart_id_i, + input logic redundancy_enable_i, // Input signals input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, input fpnew_pkg::roundmode_e rnd_mode_i, @@ -56,31 +60,263 @@ module fpnew_top #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + output logic fault_detected_o ); localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS; localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS; + localparam int LOCK_TIMEOUT = fpnew_pkg::division_enabled(Implementation.UnitTypes) ? 60: 5; + + localparam bit DIVISION_ENABLED = fpnew_pkg::division_enabled(Implementation.UnitTypes); + + localparam bit TTR_ENABLED = + RedundancyFeatures.RedundancyType == fpnew_pkg::TTR || + RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_FAST || + RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_SMALL; + + localparam bit DTR_ENABLED = + RedundancyFeatures.RedundancyType == fpnew_pkg::DTR || + RedundancyFeatures.RedundancyType == fpnew_pkg::DTR_INORDER; + + localparam bit SELF_CHECKING = RedundancyFeatures.TripplicateRepetition; + + localparam int MAX_DELAY = + // Base formula for how long something can stay in chain + 2 * fpnew_pkg::longest_path(Implementation.PipeRegs, Implementation.PipeConfig) + - fpnew_pkg::shortest_path(Implementation.PipeRegs, Implementation.PipeConfig) + // In case of a DTR based approach the retry has another storage element that we need to account for + + (DTR_ENABLED ? 1 : 0); + // The ternary operator ? 1 : 0 is needed since True / False might not evaluate to 1 / 0 in all tools + // For example in synopsys-2022.03-kgf dc_shell the True evaluates to 2 or 3 in this line + + // Based of the max delay we can not calculate how big of an ID is needed to ensure ids are locally unique + localparam int unsigned ID_SIZE_BASE = fpnew_pkg::maximum( + 1, + $clog2(MAX_DELAY) + (DIVISION_ENABLED ? (TTR_ENABLED ? 4 : 1) : 0) + // In case of a TTR approach we add extra ID Bits for the Division since it can take up to 12 cycles + // For DTR we only need 1 bit extra as we split the storage + ); + + // We have an extra bit for DMR methods to do error detection + localparam int unsigned ID_SIZE = ID_SIZE_BASE + (DTR_ENABLED ? 1 : 0); // ---------------- // Type Definition // ---------------- typedef struct packed { - logic [WIDTH-1:0] result; - fpnew_pkg::status_t status; - TagType tag; - } output_t; + logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands; + fpnew_pkg::roundmode_e rnd_mode; + fpnew_pkg::operation_e op; + logic op_mod; + fpnew_pkg::fp_format_e src_fmt; + fpnew_pkg::fp_format_e dst_fmt; + fpnew_pkg::int_format_e int_fmt; + logic vectorial_op; + TagType tag; + MaskType simd_mask; + } tmr_in_stacked_t; + + typedef struct packed { + TagType tag; + logic [ID_SIZE-1:0] opid; + } submodules_stacked_t; + + typedef struct packed { + logic [WIDTH-1:0] result; + fpnew_pkg::status_t status; + TagType tag; + logic [ID_SIZE-1:0] opid; + } rr_stacked_t; + + typedef struct packed { + logic [WIDTH-1:0] result; + fpnew_pkg::status_t status; + TagType tag; + } tmr_out_stacked_t; + + // ---------------- + // Enable / Disable Redundancy + // ---------------- + + logic in_gated_valid, in_gated_ready; + logic internal_busy, gated_redundancy_enable; + + if (RedundancyFeatures.RedundancyType == fpnew_pkg::NONE) begin : gen_no_redundandcy_controller + assign in_gated_valid = in_valid_i; + assign in_ready_o = in_gated_ready; + assign busy_o = internal_busy; + assign gated_redundancy_enable = 0; + end else begin: gen_redundancy_controller + redundancy_controller # ( + .InternalRedundancy ( SELF_CHECKING ), + .LockTimeout ( LOCK_TIMEOUT ) + ) i_redundancy_controller ( + .clk_i, + .rst_ni, + .enable_i ( redundancy_enable_i ), + .busy_o ( busy_o ), + .busy_i ( internal_busy ), + .enable_o ( gated_redundancy_enable ), + .valid_i ( in_valid_i ), + .ready_o ( in_ready_o ), + .valid_o ( in_gated_valid ), + .ready_i ( in_gated_valid ) + ); + end + + // ----------- + // Repeat Signals for Redundancy + // ----------- + tmr_in_stacked_t in_data, in_redundant_data; + logic [ID_SIZE-1:0] in_redundant_opid; + logic in_redundant_valid, in_redundant_ready; + + assign in_data.operands = operands_i; + assign in_data.rnd_mode = rnd_mode_i; + assign in_data.op = op_i; + assign in_data.op_mod = op_mod_i; + assign in_data.src_fmt = src_fmt_i; + assign in_data.dst_fmt = dst_fmt_i; + assign in_data.int_fmt = int_fmt_i; + assign in_data.vectorial_op = vectorial_op_i; + assign in_data.tag = tag_i; + assign in_data.simd_mask = simd_mask_i | ~{NumLanes{EnableSIMDMask}}; // Filter out the mask if not used + + // Connection down to counterpart + retry_interface #( + .IDSize ( ID_SIZE -1 ) + ) retry_connection (); + + // Connection down to counterpart + DTR_interface #( + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ) + ) dtr_connection (); + + if (TTR_ENABLED) begin: gen_in_ttr + + localparam bit SKIP_STORAGE = RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_SMALL; + + TTR_start #( + .DataType ( tmr_in_stacked_t ), + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ), + .EarlyReadyEnable ( !SKIP_STORAGE ) + ) i_TTR_start ( + .clk_i, + .rst_ni, + .enable_i( gated_redundancy_enable ), + .data_i ( in_data ), + .valid_i ( in_gated_valid ), + .ready_o ( in_gated_ready ), + .data_o ( in_redundant_data ), + .id_o ( in_redundant_opid ), + .valid_o ( in_redundant_valid ), + .ready_i ( in_redundant_ready ) + ); + + // Don't care for dtr specific signals + assign dmr_next_id = fpnew_pkg::DONT_CARE; + assign retry_ready = fpnew_pkg::DONT_CARE; + assign retry_replacement_id = fpnew_pkg::DONT_CARE; + + end else if (DTR_ENABLED) begin: gen_in_dtr + // Connection directly to next module + tmr_in_stacked_t retry2dmr_data; + logic [ID_SIZE-2:0] retry2dmr_opid; + logic retry2dmr_valid, retry2dmr_ready; + + logic op_is_div; + assign op_is_div = in_data.op == fpnew_pkg::SQRT || in_data.op == fpnew_pkg::DIV; + + if (RedundancyFeatures.RedundancyType == fpnew_pkg::DTR) begin: gen_in_oo_retry + retry_start #( + .DataType ( tmr_in_stacked_t ), + .IDSize ( ID_SIZE - 1 ), + .ExternalIDBits ( DIVISION_ENABLED ? 1: 0 ) + ) i_retry_start ( + .clk_i, + .rst_ni, + .data_i ( in_data ), + .ext_id_bits_i ( op_is_div ), + .valid_i ( in_gated_valid ), + .ready_o ( in_gated_ready ), + .data_o ( retry2dmr_data ), + .id_o ( retry2dmr_opid ), + .valid_o ( retry2dmr_valid ), + .ready_i ( retry2dmr_ready ), + .retry ( retry_connection ) + ); + assign retry_replacement_id = fpnew_pkg::DONT_CARE; + + end else begin: gen_in_io_retry + + + retry_inorder_start #( + .DataType ( tmr_in_stacked_t ), + .IDSize ( ID_SIZE - 1 ), + .ExternalIDBits ( DIVISION_ENABLED ? 1: 0 ) + ) i_retry_inorder_start ( + .clk_i, + .rst_ni, + .data_i ( in_data ), + .ext_id_bits_i ( op_is_div ), + .valid_i ( in_gated_valid ), + .ready_o ( in_gated_ready ), + .data_o ( retry2dmr_data ), + .id_o ( retry2dmr_opid ), + .valid_o ( retry2dmr_valid ), + .ready_i ( retry2dmr_ready ), + .retry ( retry_connection ) + ); + end + + DTR_start #( + .DataType ( tmr_in_stacked_t ), + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ), + .UseExternalId ( 1 ), + .EarlyReadyEnable ( 1 ) + ) i_DTR_start ( + .clk_i, + .rst_ni, + .enable_i ( gated_redundancy_enable ), + .dtr_interface ( dtr_connection ), + .data_i ( retry2dmr_data ), + .id_i ( retry2dmr_opid ), + .valid_i ( retry2dmr_valid ), + .ready_o ( retry2dmr_ready ), + .data_o ( in_redundant_data ), + .id_o ( in_redundant_opid ), + .valid_o ( in_redundant_valid ), + .ready_i ( in_redundant_ready ) + ); + end else begin: gen_in_no_redundancy + assign in_redundant_data = in_data; + assign in_redundant_valid = in_gated_valid; + assign in_gated_ready = in_redundant_ready; + assign in_redundant_opid = 0; + + assign dmr_next_id = fpnew_pkg::DONT_CARE; + assign retry_ready = fpnew_pkg::DONT_CARE; + assign retry_replacement_id = fpnew_pkg::DONT_CARE; + end // Handshake signals for the blocks - logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy; - output_t [NUM_OPGROUPS-1:0] opgrp_outputs; + logic [NUM_OPGROUPS-1:0] in_opgrp_ready, out_opgrp_valid, out_opgrp_ready, out_opgrp_ext, opgrp_busy; + rr_stacked_t [NUM_OPGROUPS-1:0] out_opgrp_data; + + localparam int LockRepetition = RedundancyFeatures.TripplicateRepetition ? 3 : 1; + logic [LockRepetition-1:0] out_rr_lock; logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed; // ----------- // Input Side // ----------- - assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)]; + assign in_redundant_ready = in_redundant_valid & in_opgrp_ready[fpnew_pkg::get_opgroup(in_redundant_data.op)]; + assign internal_busy = (| opgrp_busy); // NaN-boxing check for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check @@ -88,8 +324,8 @@ module fpnew_top #( // NaN boxing is only generated if it's enabled and needed if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands - assign is_boxed[fmt][op] = (!vectorial_op_i) - ? operands_i[op][WIDTH-1:FP_WIDTH] == '1 + assign is_boxed[fmt][op] = (!in_redundant_data.vectorial_op) + ? in_redundant_data.operands[op][WIDTH-1:FP_WIDTH] == '1 : 1'b1; end end else begin : no_check @@ -97,10 +333,6 @@ module fpnew_top #( end end - // Filter out the mask if not used - MaskType simd_mask; - assign simd_mask = simd_mask_i | ~{NumLanes{EnableSIMDMask}}; - // ------------------------- // Generate Operation Blocks // ------------------------- @@ -110,7 +342,7 @@ module fpnew_top #( logic in_valid; logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed; - assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp)); + assign in_valid = in_redundant_valid & (fpnew_pkg::get_opgroup(in_redundant_data.op) == fpnew_pkg::opgroup_e'(opgrp)); // slice out input boxing always_comb begin : slice_inputs @@ -118,77 +350,209 @@ module fpnew_top #( input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0]; end + submodules_stacked_t in_tag, out_tag; + + assign in_tag.tag = in_redundant_data.tag; + assign in_tag.opid = in_redundant_opid; + fpnew_opgroup_block #( - .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), - .Width ( WIDTH ), - .EnableVectors ( Features.EnableVectors ), - .DivSqrtSel ( DivSqrtSel ), - .FpFmtMask ( Features.FpFmtMask ), - .IntFmtMask ( Features.IntFmtMask ), - .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), - .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), - .PipeConfig ( Implementation.PipeConfig ), - .TagType ( TagType ), - .TrueSIMDClass ( TrueSIMDClass ), - .CompressedVecCmpResult ( CompressedVecCmpResult ), - .StochasticRndImplementation ( StochasticRndImplementation ) + .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), + .Width ( WIDTH ), + .EnableVectors ( Features.EnableVectors ), + .DivSqrtSel ( DivSqrtSel ), + .FpFmtMask ( Features.FpFmtMask ), + .IntFmtMask ( Features.IntFmtMask ), + .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), + .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), + .PipeConfig ( Implementation.PipeConfig ), + .TagType ( submodules_stacked_t ), + .TrueSIMDClass ( TrueSIMDClass ), + .CompressedVecCmpResult ( CompressedVecCmpResult ), + .StochasticRndImplementation ( StochasticRndImplementation ), + .LockRepetition ( LockRepetition ) ) i_opgroup_block ( .clk_i, .rst_ni, - .hart_id_i, - .operands_i ( operands_i[NUM_OPS-1:0] ), - .is_boxed_i ( input_boxed ), - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i, - .dst_fmt_i, - .int_fmt_i, - .vectorial_op_i, - .tag_i, - .simd_mask_i ( simd_mask ), - .in_valid_i ( in_valid ), - .in_ready_o ( opgrp_in_ready[opgrp] ), + .hart_id_i ( hart_id_i ), + .operands_i ( in_redundant_data.operands[NUM_OPS-1:0] ), + .is_boxed_i ( input_boxed ), + .rnd_mode_i ( in_redundant_data.rnd_mode ), + .op_i ( in_redundant_data.op ), + .op_mod_i ( in_redundant_data.op_mod ), + .src_fmt_i ( in_redundant_data.src_fmt ), + .dst_fmt_i ( in_redundant_data.dst_fmt ), + .int_fmt_i ( in_redundant_data.int_fmt ), + .vectorial_op_i ( in_redundant_data.vectorial_op ), + .tag_i ( in_tag ), + .simd_mask_i ( in_redundant_data.simd_mask ), + .in_valid_i ( in_valid ), + .in_ready_o ( in_opgrp_ready[opgrp] ), .flush_i, - .result_o ( opgrp_outputs[opgrp].result ), - .status_o ( opgrp_outputs[opgrp].status ), - .extension_bit_o ( opgrp_ext[opgrp] ), - .tag_o ( opgrp_outputs[opgrp].tag ), - .out_valid_o ( opgrp_out_valid[opgrp] ), - .out_ready_i ( opgrp_out_ready[opgrp] ), - .busy_o ( opgrp_busy[opgrp] ) + .result_o ( out_opgrp_data[opgrp].result ), + .status_o ( out_opgrp_data[opgrp].status ), + .extension_bit_o ( out_opgrp_ext[opgrp] ), + .tag_o ( out_tag ), + .out_valid_o ( out_opgrp_valid[opgrp] ), + .out_lock_i ( out_rr_lock ), + .out_ready_i ( out_opgrp_ready[opgrp] ), + .busy_o ( opgrp_busy[opgrp] ) ); + + assign out_opgrp_data[opgrp].tag = out_tag.tag; + assign out_opgrp_data[opgrp].opid = out_tag.opid; + end // ------------------ // Arbitrate Outputs // ------------------ - output_t arbiter_output; + logic out_redundant_valid, out_redundant_ready; + rr_stacked_t out_redundant_data; + + logic [LockRepetition-1:0] flush; + for (genvar r = 0; r < LockRepetition; r++) begin: gen_rr_flush + assign flush[r] = flush_i; + end // Round-Robin arbiter to decide which result to use - rr_arb_tree #( - .NumIn ( NUM_OPGROUPS ), - .DataType ( output_t ), - .AxiVldRdy ( 1'b1 ) + rr_arb_tree_lock #( + .NumIn ( NUM_OPGROUPS ), + .DataType ( rr_stacked_t ), + .AxiVldRdy ( 1'b1 ), + .FairArb ( 1'b1 ), + .InternalRedundancy ( SELF_CHECKING ) ) i_arbiter ( .clk_i, .rst_ni, - .flush_i, - .rr_i ( '0 ), - .req_i ( opgrp_out_valid ), - .gnt_o ( opgrp_out_ready ), - .data_i ( opgrp_outputs ), - .gnt_i ( out_ready_i ), - .req_o ( out_valid_o ), - .data_o ( arbiter_output ), - .idx_o ( /* unused */ ) + .flush_i ( flush ), + .rr_i ( '0 ), + .lock_rr_i ( out_rr_lock ), + .req_i ( out_opgrp_valid ), + .gnt_o ( out_opgrp_ready ), + .data_i ( out_opgrp_data ), + .gnt_i ( out_redundant_ready ), + .req_o ( out_redundant_valid ), + .data_o ( out_redundant_data ), + .idx_o ( /* Unused */ ) ); - // Unpack output - assign result_o = arbiter_output.result; - assign status_o = arbiter_output.status; - assign tag_o = arbiter_output.tag; + // ------------------ + // Unrepeat Outputs + // ------------------ + + tmr_out_stacked_t out_data, out_redundant_data_noid; + assign out_redundant_data_noid.tag = out_redundant_data.tag; + assign out_redundant_data_noid.status = out_redundant_data.status; + assign out_redundant_data_noid.result = out_redundant_data.result; + + if (TTR_ENABLED) begin : gen_out_ttr + localparam bit EARLY_RETURN = RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_FAST; + + TTR_end #( + .DataType ( tmr_out_stacked_t ), + .LockTimeout ( LOCK_TIMEOUT ), + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ), + .EarlyValidEnable ( EARLY_RETURN ) + ) i_TTR_end ( + .clk_i, + .rst_ni, + .enable_i ( gated_redundancy_enable ), + .data_i ( out_redundant_data_noid ), + .id_i ( out_redundant_data.opid ), + .valid_i ( out_redundant_valid ), + .ready_o ( out_redundant_ready ), + .lock_o ( out_rr_lock ), + .data_o ( out_data ), + .valid_o ( out_valid_o ), + .ready_i ( out_ready_i ), + .fault_detected_o ( fault_detected_o ) + ); + + assign retry_opid = fpnew_pkg::DONT_CARE; + assign retry_valid = fpnew_pkg::DONT_CARE; + assign retry_lock = fpnew_pkg::DONT_CARE; + + end else if (DTR_ENABLED) begin : gen_out_dmr + tmr_out_stacked_t dmr2retry_data; + logic [ID_SIZE-2:0] dmr2retry_opid; + logic dmr2retry_valid, dmr2retry_ready, dmr2retry_needs_retry; - assign busy_o = (| opgrp_busy); + DTR_end #( + .DataType ( tmr_out_stacked_t ), + .LockTimeout ( LOCK_TIMEOUT ), + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ) + ) i_DTR_end ( + .clk_i, + .rst_ni, + .enable_i ( gated_redundancy_enable ), + .dtr_interface ( dtr_connection ), + .data_i ( out_redundant_data_noid ), + .id_i ( out_redundant_data.opid ), + .valid_i ( out_redundant_valid ), + .ready_o ( out_redundant_ready ), + .lock_o ( out_rr_lock ), + .data_o ( dmr2retry_data ), + .id_o ( dmr2retry_opid ), + .needs_retry_o ( dmr2retry_needs_retry ), + .valid_o ( dmr2retry_valid ), + .ready_i ( dmr2retry_ready ), + .fault_detected_o ( fault_detected_o ) + ); + + if (RedundancyFeatures.RedundancyType == fpnew_pkg::DTR) begin: gen_out_oo_retry + retry_end #( + .DataType ( tmr_out_stacked_t ), + .IDSize ( ID_SIZE - 1 ) + ) i_retry_end ( + .clk_i, + .rst_ni, + .data_i ( dmr2retry_data ), + .id_i ( dmr2retry_opid ), + .needs_retry_i ( dmr2retry_needs_retry ), + .valid_i ( dmr2retry_valid ), + .ready_o ( dmr2retry_ready ), + .data_o ( out_data ), + .valid_o ( out_valid_o ), + .ready_i ( out_ready_i ), + .retry ( retry_connection ) + ); + assign retry_lock = fpnew_pkg::DONT_CARE; + + end else begin: gen_out_io_retry + retry_inorder_end #( + .DataType ( tmr_out_stacked_t ), + .IDSize ( ID_SIZE - 1 ) + ) i_retry_inorder_end ( + .clk_i, + .rst_ni, + .data_i ( dmr2retry_data ), + .id_i ( dmr2retry_opid ), + .needs_retry_i ( dmr2retry_needs_retry ), + .valid_i ( dmr2retry_valid ), + .ready_o ( dmr2retry_ready ), + .data_o ( out_data ), + .valid_o ( out_valid_o ), + .ready_i ( out_ready_i ), + .retry ( retry_connection ) + ); + end + end else begin : gen_out_no_redundancy + assign out_data = out_redundant_data_noid; + assign out_valid_o = out_redundant_valid; + assign out_redundant_ready = out_ready_i; + assign out_rr_lock = 0; + assign fault_detected_o = 0; + + assign retry_opid = fpnew_pkg::DONT_CARE; + assign retry_valid = fpnew_pkg::DONT_CARE; + assign retry_lock = fpnew_pkg::DONT_CARE; + end + + // Unpack output + assign result_o = out_data.result; + assign status_o = out_data.status; + assign tag_o = out_data.tag; endmodule From 8cd7e5066f72d9fafff50dc8ab58cc0cd29b88ce Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Fri, 14 Jun 2024 11:20:55 +0200 Subject: [PATCH 13/17] Fixed Various internals that caused problems when bitflips occur. - Removed recursive assignments that can cause trouble in simulation - Removed repeated assignments in always_comb of classifier that cause trouble in simulation - Fixed format selection can evaluate to X when faults are injected into Enum in simulation. --- src/fpnew_classifier.sv | 21 +++++++++++---------- src/fpnew_fma.sv | 18 ++++++++++++------ src/fpnew_fma_multi.sv | 17 ++++++++++++----- src/fpnew_opgroup_block.sv | 2 +- src/fpnew_pkg.sv | 2 +- 5 files changed, 37 insertions(+), 23 deletions(-) diff --git a/src/fpnew_classifier.sv b/src/fpnew_classifier.sv index a322946d..927cf051 100644 --- a/src/fpnew_classifier.sv +++ b/src/fpnew_classifier.sv @@ -55,20 +55,21 @@ module fpnew_classifier #( is_boxed = is_boxed_i[op]; is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1); is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0); - is_subnormal = is_boxed && (value.exponent == '0) && !is_zero; is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0)); + is_subnormal = is_boxed && (value.exponent == '0) && !is_zero; is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0)); is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0); is_quiet = is_nan && !is_signalling; - // Assign output for current input - info_o[op].is_normal = is_normal; - info_o[op].is_subnormal = is_subnormal; - info_o[op].is_zero = is_zero; - info_o[op].is_inf = is_inf; - info_o[op].is_nan = is_nan; - info_o[op].is_signalling = is_signalling; - info_o[op].is_quiet = is_quiet; - info_o[op].is_boxed = is_boxed; end + + // Assign output for current input + assign info_o[op].is_normal = is_normal; + assign info_o[op].is_subnormal = is_subnormal; + assign info_o[op].is_zero = is_zero; + assign info_o[op].is_inf = is_inf; + assign info_o[op].is_nan = is_nan; + assign info_o[op].is_signalling = is_signalling; + assign info_o[op].is_quiet = is_quiet; + assign info_o[op].is_boxed = is_boxed; end endmodule diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index c6ef899a..992dbadd 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -147,22 +147,28 @@ module fpnew_fma #( // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode // | *others* | \c - | *invalid* // \note \c op_mod_q always inverts the sign of the addend. - always_comb begin : op_select + // Fix for InjectaFault + fp_t operand_a_base, operand_b_base, operand_c_base; + assign operand_a_base = inp_pipe_operands_q[NUM_INP_REGS][0]; + assign operand_b_base = inp_pipe_operands_q[NUM_INP_REGS][1]; + assign operand_c_base = inp_pipe_operands_q[NUM_INP_REGS][2]; + + always_comb begin : op_select // Default assignments - packing-order-agnostic - operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; - operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; - operand_c = inp_pipe_operands_q[NUM_INP_REGS][2]; + operand_a = operand_a_base; + operand_b = operand_b_base; + operand_c = operand_c_base; info_a = info_q[0]; info_b = info_q[1]; info_c = info_q[2]; // op_mod_q inverts sign of operand C - operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + operand_c.sign = operand_c_base.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::FMADD: ; // do nothing - fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a_base.sign; // invert sign of product fpnew_pkg::ADD: begin // Set multiplicand to +1 operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0}; info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index 77886424..89dce97f 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -196,22 +196,29 @@ module fpnew_fma_multi #( // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode // | *others* | \c - | *invalid* // \note \c op_mod_q always inverts the sign of the addend. + + // Fix for InjectaFault + fp_t operand_a_base, operand_b_base, operand_c_base; + assign operand_a_base = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]}; + assign operand_b_base = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]}; + assign operand_c_base = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]}; + always_comb begin : op_select // Default assignments - packing-order-agnostic - operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]}; - operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]}; - operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]}; + operand_a = operand_a_base; + operand_b = operand_b_base; + operand_c = operand_c_base; info_a = info_q[src_fmt_q][0]; info_b = info_q[src_fmt_q][1]; info_c = info_q[dst_fmt_q][2]; // op_mod_q inverts sign of operand C - operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + operand_c.sign = operand_c_base.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::FMADD: ; // do nothing - fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a_base.sign; // invert sign of product fpnew_pkg::ADD: begin // Set multiplicand to +1 operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0}; info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index 18bbb17c..76b2912b 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -177,7 +177,7 @@ module fpnew_opgroup_block #( logic in_valid; - assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED); + assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED && dst_fmt_i < dst_fmt_i.num()); fpnew_opgroup_multifmt_slice #( .OpGroup ( OpGroup ), diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 47366dc3..7b545654 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -99,7 +99,7 @@ package fpnew_pkg; INT64: return 64; default: begin // pragma translate_off - $fatal(1, "Invalid INT format supplied"); + $error(1, "Invalid INT format supplied"); // pragma translate_on // just return any integer to avoid any latches // hopefully this error is caught by simulation From 512b94cf6859ead40a75551780d353e320bfc52f Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 27 Nov 2024 09:30:12 +0100 Subject: [PATCH 14/17] Updated redundancy_cells dependency so external id width mismatch warning is fixed. --- Bender.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Bender.yml b/Bender.yml index 73b9df75..d61e135c 100644 --- a/Bender.yml +++ b/Bender.yml @@ -8,7 +8,7 @@ package: dependencies: common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: 1.21.0} fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.4} - redundancy_cells: { git: "git@github.com:Lynx005F/redundancy_cells.git", rev: c594afeb384bc36547ca89dd17c41887cf15beac} + redundancy_cells: { git: "git@github.com:Lynx005F/redundancy_cells.git", rev: 4df64e39c4e70ff03880cae6b79739c2853f8e7e} sources: - src/fpnew_pkg.sv From 8b4a203a3ea3fae02cc61010daf16b48e6aeef7d Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 27 Nov 2024 17:20:13 +0100 Subject: [PATCH 15/17] Fixed typo in instantiation --- src/fpnew_top.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index 8f7421a7..c8629f2e 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -161,7 +161,7 @@ module fpnew_top #( .valid_i ( in_valid_i ), .ready_o ( in_ready_o ), .valid_o ( in_gated_valid ), - .ready_i ( in_gated_valid ) + .ready_i ( in_gated_ready ) ); end From 02d1d30c8fb51cd7b334b03171d8f92a8b566c22 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Fri, 13 Dec 2024 18:21:49 +0100 Subject: [PATCH 16/17] Switched to new experimental DTR version. --- Bender.yml | 2 +- src/fpnew_top.sv | 26 +++++++++----------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/Bender.yml b/Bender.yml index d61e135c..ea9cc0b1 100644 --- a/Bender.yml +++ b/Bender.yml @@ -8,7 +8,7 @@ package: dependencies: common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: 1.21.0} fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.4} - redundancy_cells: { git: "git@github.com:Lynx005F/redundancy_cells.git", rev: 4df64e39c4e70ff03880cae6b79739c2853f8e7e} + redundancy_cells: { git: "git@github.com:Lynx005F/redundancy_cells.git", rev: 8c9cfee55faffbe4e1e8af4dc8676439d9c92152} sources: - src/fpnew_pkg.sv diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index c8629f2e..2a08e0d6 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -96,10 +96,10 @@ module fpnew_top #( $clog2(MAX_DELAY) + (DIVISION_ENABLED ? (TTR_ENABLED ? 4 : 1) : 0) // In case of a TTR approach we add extra ID Bits for the Division since it can take up to 12 cycles // For DTR we only need 1 bit extra as we split the storage - ); + ); // We have an extra bit for DMR methods to do error detection - localparam int unsigned ID_SIZE = ID_SIZE_BASE + (DTR_ENABLED ? 1 : 0); + localparam int unsigned ID_SIZE = ID_SIZE_BASE + (DTR_ENABLED ? 3 : 0); // ---------------- // Type Definition @@ -185,15 +185,9 @@ module fpnew_top #( // Connection down to counterpart retry_interface #( - .IDSize ( ID_SIZE -1 ) + .IDSize ( ID_SIZE ) ) retry_connection (); - // Connection down to counterpart - DTR_interface #( - .IDSize ( ID_SIZE ), - .InternalRedundancy ( SELF_CHECKING ) - ) dtr_connection (); - if (TTR_ENABLED) begin: gen_in_ttr localparam bit SKIP_STORAGE = RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_SMALL; @@ -224,7 +218,7 @@ module fpnew_top #( end else if (DTR_ENABLED) begin: gen_in_dtr // Connection directly to next module tmr_in_stacked_t retry2dmr_data; - logic [ID_SIZE-2:0] retry2dmr_opid; + logic [ID_SIZE-1:0] retry2dmr_opid; logic retry2dmr_valid, retry2dmr_ready; logic op_is_div; @@ -233,7 +227,7 @@ module fpnew_top #( if (RedundancyFeatures.RedundancyType == fpnew_pkg::DTR) begin: gen_in_oo_retry retry_start #( .DataType ( tmr_in_stacked_t ), - .IDSize ( ID_SIZE - 1 ), + .IDSize ( ID_SIZE ), .ExternalIDBits ( DIVISION_ENABLED ? 1: 0 ) ) i_retry_start ( .clk_i, @@ -255,7 +249,7 @@ module fpnew_top #( retry_inorder_start #( .DataType ( tmr_in_stacked_t ), - .IDSize ( ID_SIZE - 1 ), + .IDSize ( ID_SIZE ), .ExternalIDBits ( DIVISION_ENABLED ? 1: 0 ) ) i_retry_inorder_start ( .clk_i, @@ -282,7 +276,6 @@ module fpnew_top #( .clk_i, .rst_ni, .enable_i ( gated_redundancy_enable ), - .dtr_interface ( dtr_connection ), .data_i ( retry2dmr_data ), .id_i ( retry2dmr_opid ), .valid_i ( retry2dmr_valid ), @@ -475,7 +468,7 @@ module fpnew_top #( end else if (DTR_ENABLED) begin : gen_out_dmr tmr_out_stacked_t dmr2retry_data; - logic [ID_SIZE-2:0] dmr2retry_opid; + logic [ID_SIZE-1:0] dmr2retry_opid; logic dmr2retry_valid, dmr2retry_ready, dmr2retry_needs_retry; DTR_end #( @@ -487,7 +480,6 @@ module fpnew_top #( .clk_i, .rst_ni, .enable_i ( gated_redundancy_enable ), - .dtr_interface ( dtr_connection ), .data_i ( out_redundant_data_noid ), .id_i ( out_redundant_data.opid ), .valid_i ( out_redundant_valid ), @@ -504,7 +496,7 @@ module fpnew_top #( if (RedundancyFeatures.RedundancyType == fpnew_pkg::DTR) begin: gen_out_oo_retry retry_end #( .DataType ( tmr_out_stacked_t ), - .IDSize ( ID_SIZE - 1 ) + .IDSize ( ID_SIZE ) ) i_retry_end ( .clk_i, .rst_ni, @@ -523,7 +515,7 @@ module fpnew_top #( end else begin: gen_out_io_retry retry_inorder_end #( .DataType ( tmr_out_stacked_t ), - .IDSize ( ID_SIZE - 1 ) + .IDSize ( ID_SIZE ) ) i_retry_inorder_end ( .clk_i, .rst_ni, From 9ae365eed4d35a5f24ee03b6d28e8736691049ed Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Mon, 16 Dec 2024 15:27:31 +0100 Subject: [PATCH 17/17] Removed inorder stuff and switched to newer DTR version. --- Bender.yml | 2 +- src/fpnew_top.sv | 122 ++++++++++++++--------------------------------- 2 files changed, 36 insertions(+), 88 deletions(-) diff --git a/Bender.yml b/Bender.yml index ea9cc0b1..41a29486 100644 --- a/Bender.yml +++ b/Bender.yml @@ -8,7 +8,7 @@ package: dependencies: common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: 1.21.0} fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.4} - redundancy_cells: { git: "git@github.com:Lynx005F/redundancy_cells.git", rev: 8c9cfee55faffbe4e1e8af4dc8676439d9c92152} + redundancy_cells: { git: "git@github.com:Lynx005F/redundancy_cells.git", rev: a08ea1b3a19e38eb47a4e7b270c5f9e6fd0c07f9} sources: - src/fpnew_pkg.sv diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index 2a08e0d6..3887e1a6 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -210,11 +210,6 @@ module fpnew_top #( .ready_i ( in_redundant_ready ) ); - // Don't care for dtr specific signals - assign dmr_next_id = fpnew_pkg::DONT_CARE; - assign retry_ready = fpnew_pkg::DONT_CARE; - assign retry_replacement_id = fpnew_pkg::DONT_CARE; - end else if (DTR_ENABLED) begin: gen_in_dtr // Connection directly to next module tmr_in_stacked_t retry2dmr_data; @@ -224,47 +219,23 @@ module fpnew_top #( logic op_is_div; assign op_is_div = in_data.op == fpnew_pkg::SQRT || in_data.op == fpnew_pkg::DIV; - if (RedundancyFeatures.RedundancyType == fpnew_pkg::DTR) begin: gen_in_oo_retry - retry_start #( - .DataType ( tmr_in_stacked_t ), - .IDSize ( ID_SIZE ), - .ExternalIDBits ( DIVISION_ENABLED ? 1: 0 ) - ) i_retry_start ( - .clk_i, - .rst_ni, - .data_i ( in_data ), - .ext_id_bits_i ( op_is_div ), - .valid_i ( in_gated_valid ), - .ready_o ( in_gated_ready ), - .data_o ( retry2dmr_data ), - .id_o ( retry2dmr_opid ), - .valid_o ( retry2dmr_valid ), - .ready_i ( retry2dmr_ready ), - .retry ( retry_connection ) - ); - assign retry_replacement_id = fpnew_pkg::DONT_CARE; - - end else begin: gen_in_io_retry - - - retry_inorder_start #( - .DataType ( tmr_in_stacked_t ), - .IDSize ( ID_SIZE ), - .ExternalIDBits ( DIVISION_ENABLED ? 1: 0 ) - ) i_retry_inorder_start ( - .clk_i, - .rst_ni, - .data_i ( in_data ), - .ext_id_bits_i ( op_is_div ), - .valid_i ( in_gated_valid ), - .ready_o ( in_gated_ready ), - .data_o ( retry2dmr_data ), - .id_o ( retry2dmr_opid ), - .valid_o ( retry2dmr_valid ), - .ready_i ( retry2dmr_ready ), - .retry ( retry_connection ) - ); - end + retry_start #( + .DataType ( tmr_in_stacked_t ), + .IDSize ( ID_SIZE ), + .ExternalIDBits ( DIVISION_ENABLED ? 1: 0 ) + ) i_retry_start ( + .clk_i, + .rst_ni, + .data_i ( in_data ), + .ext_id_bits_i ( op_is_div ), + .valid_i ( in_gated_valid ), + .ready_o ( in_gated_ready ), + .data_o ( retry2dmr_data ), + .id_o ( retry2dmr_opid ), + .valid_o ( retry2dmr_valid ), + .ready_i ( retry2dmr_ready ), + .retry ( retry_connection ) + ); DTR_start #( .DataType ( tmr_in_stacked_t ), @@ -290,10 +261,6 @@ module fpnew_top #( assign in_redundant_valid = in_gated_valid; assign in_gated_ready = in_redundant_ready; assign in_redundant_opid = 0; - - assign dmr_next_id = fpnew_pkg::DONT_CARE; - assign retry_ready = fpnew_pkg::DONT_CARE; - assign retry_replacement_id = fpnew_pkg::DONT_CARE; end // Handshake signals for the blocks @@ -493,43 +460,24 @@ module fpnew_top #( .fault_detected_o ( fault_detected_o ) ); - if (RedundancyFeatures.RedundancyType == fpnew_pkg::DTR) begin: gen_out_oo_retry - retry_end #( - .DataType ( tmr_out_stacked_t ), - .IDSize ( ID_SIZE ) - ) i_retry_end ( - .clk_i, - .rst_ni, - .data_i ( dmr2retry_data ), - .id_i ( dmr2retry_opid ), - .needs_retry_i ( dmr2retry_needs_retry ), - .valid_i ( dmr2retry_valid ), - .ready_o ( dmr2retry_ready ), - .data_o ( out_data ), - .valid_o ( out_valid_o ), - .ready_i ( out_ready_i ), - .retry ( retry_connection ) - ); - assign retry_lock = fpnew_pkg::DONT_CARE; - - end else begin: gen_out_io_retry - retry_inorder_end #( - .DataType ( tmr_out_stacked_t ), - .IDSize ( ID_SIZE ) - ) i_retry_inorder_end ( - .clk_i, - .rst_ni, - .data_i ( dmr2retry_data ), - .id_i ( dmr2retry_opid ), - .needs_retry_i ( dmr2retry_needs_retry ), - .valid_i ( dmr2retry_valid ), - .ready_o ( dmr2retry_ready ), - .data_o ( out_data ), - .valid_o ( out_valid_o ), - .ready_i ( out_ready_i ), - .retry ( retry_connection ) - ); - end + retry_end #( + .DataType ( tmr_out_stacked_t ), + .IDSize ( ID_SIZE ) + ) i_retry_end ( + .clk_i, + .rst_ni, + .data_i ( dmr2retry_data ), + .id_i ( dmr2retry_opid ), + .needs_retry_i ( dmr2retry_needs_retry ), + .valid_i ( dmr2retry_valid ), + .ready_o ( dmr2retry_ready ), + .data_o ( out_data ), + .valid_o ( out_valid_o ), + .ready_i ( out_ready_i ), + .retry ( retry_connection ) + ); + assign retry_lock = fpnew_pkg::DONT_CARE; + end else begin : gen_out_no_redundancy assign out_data = out_redundant_data_noid; assign out_valid_o = out_redundant_valid;