-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathpsi_fix_fir_dec_ser_nch_chtdm_conf.vhd
396 lines (354 loc) · 16.2 KB
/
psi_fix_fir_dec_ser_nch_chtdm_conf.vhd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
------------------------------------------------------------------------------
-- Copyright (c) 2018 by Paul Scherrer Institute, Switzerland
-- All rights reserved.
-- Authors: Oliver Bruendler, Radoslaw Rybaniec
------------------------------------------------------------------------------
------------------------------------------------------------------------------
-- Description
------------------------------------------------------------------------------
-- This component calculateas an FIR filter with the following limitations:
-- - Filter is calculated serially (one tap after the other)
-- - The number of channels is configurable
-- - All channels are processed in parallel and their data must be synchronized
-- - Coefficients are configurable but the same for each channel
------------------------------------------------------------------------------
--
-- Required Memory depth per channel = max_taps_g + max_ratio_g
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.psi_fix_pkg.all;
use work.psi_common_math_pkg.all;
use work.psi_common_array_pkg.all;
entity psi_fix_fir_dec_ser_nch_chtdm_conf is
generic(
in_fmt_g : psi_fix_fmt_t := (1, 0, 17); -- internal format
out_fmt_g : psi_fix_fmt_t := (1, 0, 17); -- output format
coef_fmt_g : psi_fix_fmt_t := (1, 0, 17); -- coefficient format
channels_g : natural := 2; -- channels
max_ratio_g : natural := 8; -- max decimation ratio
max_taps_g : natural := 1024; -- max number of taps
rnd_g : psi_fix_rnd_t := psi_fix_round; -- rounding truncation
sat_g : psi_fix_sat_t := psi_fix_sat; -- saturate or wrap
use_fix_coefs_g : boolean := false; -- use fix coefficients or update them
coefs_g : t_areal := (0.0, 0.0); -- see doc
ram_behavior_g : string := "RBW"; -- RBW = Read before write, WBR = Write before read
rst_pol_g : std_logic := '1' -- reset polarity active high ='1'
);
port(
clk_i : in std_logic; -- system clock
rst_i : in std_logic; -- system reset
dat_i : in std_logic_vector(psi_fix_size(in_fmt_g) - 1 downto 0); -- data input
vld_i : in std_logic; -- valid input Frequency sampling
dat_o : out std_logic_vector(psi_fix_size(out_fmt_g) - 1 downto 0);-- data output
vld_o : out std_logic; -- valid output new frequency sampling
-- Parallel Configuration Interface
cfg_ratio_i : in std_logic_vector(log2ceil(max_ratio_g) - 1 downto 0) := std_logic_vector(to_unsigned(max_ratio_g - 1, log2ceil(max_ratio_g))); -- Ratio - 1 (0 => Ratio 1, 4 => Ratio 5)
cfg_taps_i : in std_logic_vector(log2ceil(max_taps_g) - 1 downto 0) := std_logic_vector(to_unsigned(max_taps_g - 1, log2ceil(max_taps_g))); -- Number of taps - 1
-- Coefficient interface
coef_if_clk_i : in std_logic := '0'; -- clock for coef intereface
coef_if_wr_i : in std_logic := '0'; -- write enable
coef_if_addr_i : in std_logic_vector(log2ceil(max_taps_g) - 1 downto 0) := (others => '0'); -- address of coef access
coef_if_wr_dat_i : in std_logic_vector(psi_fix_size(coef_fmt_g) - 1 downto 0) := (others => '0'); -- coef to write
coef_if_rd_dat_o : out std_logic_vector(psi_fix_size(coef_fmt_g) - 1 downto 0); -- coef read
-- Status Output
busy_o : out std_logic -- calculation on going active high
);
end entity;
architecture rtl of psi_fix_fir_dec_ser_nch_chtdm_conf is
-- Data Memory needs twice the depth since a almost a full set of data can arrive until the last channel is fully processed
constant DataMemDepthRequired_c : natural := max_taps_g + max_ratio_g; -- max_ratio_g is the maximum samples to arrive before a calculation starts
constant DataMemAddBits_c : natural := log2ceil(DataMemDepthRequired_c);
constant DataMemDepthApplied_c : natural := 2**DataMemAddBits_c;
constant CoefMemDepthApplied_c : natural := 2**log2ceil(max_taps_g);
-- Constants
constant MultFmt_c : psi_fix_fmt_t := (max(in_fmt_g.S, coef_fmt_g.S), in_fmt_g.I + coef_fmt_g.I, in_fmt_g.F + coef_fmt_g.F);
constant AccuFmt_c : psi_fix_fmt_t := (1, out_fmt_g.I + 1, in_fmt_g.F + coef_fmt_g.F);
constant RndFmt_c : psi_fix_fmt_t := (1, out_fmt_g.I + 1, out_fmt_g.F);
-- types
subtype InData_t is std_logic_vector(psi_fix_size(in_fmt_g) - 1 downto 0);
type InData_a is array (natural range <>) of InData_t;
subtype Mult_t is std_logic_vector(psi_fix_size(MultFmt_c) - 1 downto 0);
subtype Accu_t is std_logic_vector(psi_fix_size(AccuFmt_c) - 1 downto 0);
subtype Rnd_t is std_logic_vector(psi_fix_size(RndFmt_c) - 1 downto 0);
subtype Out_t is std_logic_vector(psi_fix_size(out_fmt_g) - 1 downto 0);
type ChNr_a is array (natural range <>) of std_logic_vector(log2ceil(channels_g) - 1 downto 0);
-- Two process method
type two_process_r is record
FirstAfterRst : std_logic;
Vld : std_logic_vector(0 to 1);
InSig : InData_a(0 to 1);
ChannelNr : ChNr_a(0 to 3);
TapWrAddr_1 : std_logic_vector(DataMemAddBits_c - 1 downto 0);
Tap0Addr_1 : std_logic_vector(DataMemAddBits_c - 1 downto 0);
DecCnt_1 : std_logic_vector(log2ceil(max_ratio_g) - 1 downto 0);
TapCnt_1 : std_logic_vector(log2ceil(max_taps_g) - 1 downto 0);
CalcChnl_1 : std_logic_vector(log2ceil(channels_g) - 1 downto 0);
CalcChnl_2 : std_logic_vector(log2ceil(channels_g) - 1 downto 0);
TapRdAddr_2 : std_logic_vector(DataMemAddBits_c - 1 downto 0);
CoefRdAddr_2 : std_logic_vector(log2ceil(max_taps_g) - 1 downto 0);
CalcOn : std_logic_vector(1 to 7);
Last : std_logic_vector(1 to 7);
First : std_logic_vector(1 to 6);
MultInTap_4 : InData_t;
MultInCoef_4 : std_logic_vector(psi_fix_size(coef_fmt_g) - 1 downto 0);
MultInTap_5 : InData_t;
MultInCoef_5 : std_logic_vector(psi_fix_size(coef_fmt_g) - 1 downto 0);
MultOut_6 : Mult_t;
Accu_7 : Accu_t;
Rnd_8 : Rnd_t;
RndVld_8 : std_logic;
Output_9 : Out_t;
OutVld_9 : std_logic;
FirstTapLoop_3 : std_logic;
TapRdAddr_3 : std_logic_vector(DataMemAddBits_c - 1 downto 0);
ReplaceZero_4 : std_logic;
-- Status
CalcOngoing : std_logic;
end record;
signal r, r_next : two_process_r;
-- Component Interface Signals
signal DataRamWrAddr_1 : std_logic_vector(DataMemAddBits_c + log2ceil(channels_g) - 1 downto 0);
signal DataRamRdAddr_2 : std_logic_vector(DataMemAddBits_c + log2ceil(channels_g) - 1 downto 0);
signal DataRamDout_3 : std_logic_vector(psi_fix_size(in_fmt_g) - 1 downto 0);
signal CoefRamDout_3 : std_logic_vector(psi_fix_size(coef_fmt_g) - 1 downto 0);
-- coef ROM
type CoefRom_t is array (0 to 2**log2ceil(max_taps_g) - 1) of std_logic_vector(psi_fix_size(coef_fmt_g) - 1 downto 0); -- full power of two to ensure index is always valid
signal CoefRom : CoefRom_t := (others => (others => '0'));
begin
assert channels_g >= 2 report "###ERROR###: psi_fix_fir_dec_ser_nch_chtdm_conf only works for channels_g >= 2, use psi_fix_fir_dec_ser_nch_chtpar_conf for single channel implementation" severity error;
--------------------------------------------
-- Combinatorial Process
--------------------------------------------
p_comb : process(r, vld_i, dat_i, cfg_ratio_i, cfg_taps_i, DataRamDout_3, CoefRamDout_3)
variable v : two_process_r;
variable AccuIn_v : std_logic_vector(psi_fix_size(AccuFmt_c) - 1 downto 0);
begin
-- *** Hold variables stable ***
v := r;
-- *** Pipe Handling ***
v.Vld(v.Vld'low + 1 to v.Vld'high) := r.Vld(r.Vld'low to r.Vld'high - 1);
v.InSig(v.InSig'low + 1 to v.InSig'high) := r.InSig(r.InSig'low to r.InSig'high - 1);
v.CalcOn(v.CalcOn'low + 1 to v.CalcOn'high) := r.CalcOn(r.CalcOn'low to r.CalcOn'high - 1);
v.Last(v.Last'low + 1 to v.Last'high) := r.Last(r.Last'low to r.Last'high - 1);
v.First(v.First'low + 1 to v.First'high) := r.First(r.First'low to r.First'high - 1);
v.ChannelNr(v.ChannelNr'low + 1 to v.ChannelNr'high) := r.ChannelNr(r.ChannelNr'low to r.ChannelNr'high - 1);
-- *** Stage 0 ***
-- Input Registers
v.Vld(0) := vld_i;
v.InSig(0) := dat_i;
-- Calculate channel number
if vld_i = '1' then
if unsigned(r.ChannelNr(0)) = channels_g - 1 or r.FirstAfterRst = '1' then
v.ChannelNr(0) := (others => '0');
v.FirstAfterRst := '0';
else
v.ChannelNr(0) := std_logic_vector(unsigned(r.ChannelNr(0)) + 1);
end if;
end if;
-- *** Stage 1 ***
-- Increment tap address after data was written for last channel
if (r.Vld(1) = '1') and (unsigned(r.ChannelNr(1)) = channels_g - 1) then
v.TapWrAddr_1 := std_logic_vector(unsigned(r.TapWrAddr_1) + 1);
end if;
-- Decimation & Calculation Control
-- Initial value
v.First(1) := '0';
v.Last(1) := '0';
-- normal update
v.TapCnt_1 := std_logic_vector(unsigned(r.TapCnt_1) - 1);
-- last tap of a channel
if unsigned(r.TapCnt_1) = 1 or unsigned(cfg_taps_i) = 0 then
v.Last(1) := '1';
end if;
-- goto next channel or finish calculation
if unsigned(r.TapCnt_1) = 0 then
-- last channel
if unsigned(r.CalcChnl_1) = channels_g - 1 then
v.CalcOn(1) := '0';
-- goto next channel
else
v.First(1) := '1';
v.CalcChnl_1 := std_logic_vector(unsigned(r.CalcChnl_1) + 1);
v.TapCnt_1 := cfg_taps_i;
end if;
end if;
-- start of calculation and decimation
if r.Vld(0) = '1' then
-- Start calculation (data from all channels available)
if unsigned(r.ChannelNr(0)) = channels_g - 1 then
if (unsigned(r.DecCnt_1) = 0) or (max_ratio_g = 0) then
v.Tap0Addr_1 := r.TapWrAddr_1;
v.TapCnt_1 := cfg_taps_i;
v.CalcOn(1) := '1';
v.First(1) := '1';
v.CalcChnl_1 := (others => '0');
v.DecCnt_1 := cfg_ratio_i;
else
v.DecCnt_1 := std_logic_vector(unsigned(r.DecCnt_1) - 1);
end if;
end if;
end if;
-- *** Stage 2 ***
-- pipelining
v.CalcChnl_2 := r.CalcChnl_1;
-- Tap read address
v.TapRdAddr_2 := std_logic_vector(unsigned(r.Tap0Addr_1) - unsigned(r.TapCnt_1));
v.CoefRdAddr_2 := r.TapCnt_1;
-- *** Stage 3 ***
-- Pipelining
v.TapRdAddr_3 := r.TapRdAddr_2;
-- *** Stage 4 ***
-- Multiplier input registering
-- Replace taps that are not yet written with zeros for bittrueness
if r.ReplaceZero_4 = '0' or unsigned(r.TapRdAddr_3) <= unsigned(cfg_ratio_i) then
v.MultInTap_4 := DataRamDout_3;
else
v.MultInTap_4 := (others => '0');
end if;
-- Detect when the Zero-replacement can be stopped since the taps are already filled with correct data
if r.FirstTapLoop_3 = '0' then
v.ReplaceZero_4 := '0';
elsif r.CalcOn(3) = '1' then
if r.First(3) = '1' and unsigned(r.TapRdAddr_3) <= unsigned(cfg_ratio_i) then
v.ReplaceZero_4 := '0';
if unsigned(r.ChannelNr(3)) = channels_g - 1 then
v.FirstTapLoop_3 := '0';
end if;
elsif r.Last(3) = '1' then
v.ReplaceZero_4 := '1';
elsif unsigned(r.TapRdAddr_3) = 0 then
v.ReplaceZero_4 := '0';
end if;
end if;
v.MultInCoef_4 := CoefRamDout_3;
-- *** Stage 5 ***
-- Multiplier input registers
v.MultInTap_5 := r.MultInTap_4;
v.MultInCoef_5 := r.MultInCoef_4;
-- *** Stage 6 ***
-- Multiplication
v.MultOut_6 := psi_fix_mult(r.MultInTap_5, in_fmt_g,
r.MultInCoef_5, coef_fmt_g,
MultFmt_c); -- Full precision, no rounding or saturation required
-- *** Stage 7 ***
-- Accumulator
if r.First(6) = '1' then
AccuIn_v := (others => '0');
else
AccuIn_v := r.Accu_7;
end if;
v.Accu_7 := psi_fix_add(r.MultOut_6, MultFmt_c,
AccuIn_v, AccuFmt_c,
AccuFmt_c); -- Overflows compensate at the end of the calculation and rounding not required
-- *** Stage 8 ***
-- Rounding
v.RndVld_8 := '0';
if r.Last(7) = '1' then
v.Rnd_8 := psi_fix_resize(r.Accu_7, AccuFmt_c, RndFmt_c, rnd_g, psi_fix_wrap);
v.RndVld_8 := r.CalcOn(7);
end if;
-- *** Stage 9 ***
-- Output Handling and saturation
v.OutVld_9 := r.RndVld_8;
v.Output_9 := psi_fix_resize(r.Rnd_8, RndFmt_c, out_fmt_g, psi_fix_trunc, sat_g);
-- *** Status Output ***
if (unsigned(r.Vld) /= 0) or (unsigned(r.CalcOn) /= 0) or (r.RndVld_8 = '1') then
v.CalcOngoing := '1';
else
v.CalcOngoing := '0';
end if;
-- *** Outputs ***
vld_o <= r.OutVld_9;
dat_o <= r.Output_9;
busy_o <= r.CalcOngoing or r.Vld(0);
-- *** Assign to signal ***
r_next <= v;
end process;
--------------------------------------------
-- Sequential Process
--------------------------------------------
p_seq : process(clk_i)
begin
if rising_edge(clk_i) then
r <= r_next;
if rst_i = rst_pol_g then
r.Vld <= (others => '0');
r.ChannelNr(0) <= (others => '0');
r.CalcChnl_1 <= (others => '0');
r.TapWrAddr_1 <= (others => '0');
r.DecCnt_1 <= (others => '0');
r.CalcOn <= (others => '0');
r.RndVld_8 <= '0';
r.OutVld_9 <= '0';
r.Last <= (others => '0');
r.ReplaceZero_4 <= '1';
r.CalcOngoing <= '0';
r.FirstAfterRst <= '1';
r.FirstTapLoop_3 <= '1';
r.TapCnt_1 <= cfg_taps_i;
end if;
end if;
end process;
--------------------------------------------
-- Component Instantiations
--------------------------------------------
-- Coefficient RAM for configurable coefficients
g_nFixCoef : if not use_fix_coefs_g generate
i_coef_ram : entity work.psi_fix_param_ram
generic map(
depth_g => CoefMemDepthApplied_c,
fmt_g => coef_fmt_g,
behavior_g => ram_behavior_g,
init_g => coefs_g
)
port map(
ClkA => coef_if_clk_i,
AddrA => coef_if_addr_i,
WrA => coef_if_wr_i,
DinA => coef_if_wr_dat_i,
DoutA => coef_if_rd_dat_o,
ClkB => clk_i,
AddrB => r.CoefRdAddr_2,
WrB => '0',
DinB => (others => '0'),
DoutB => CoefRamDout_3
);
end generate;
-- Coefficient ROM for non-configurable coefficients
g_FixCoef : if use_fix_coefs_g generate
-- Table must be generated outside of the ROM process to make code synthesizable
g_CoefTable : for i in coefs_g'low to coefs_g'high generate
CoefRom(i) <= psi_fix_from_real(coefs_g(i), coef_fmt_g);
end generate;
-- Assign unused outputs
coef_if_rd_dat_o <= (others => '0');
-- Coefficient ROM
p_coef_rom : process(clk_i)
begin
if rising_edge(clk_i) then
CoefRamDout_3 <= CoefRom(to_integer(unsigned(r.CoefRdAddr_2)));
end if;
end process;
end generate;
DataRamWrAddr_1 <= r.ChannelNr(1) & r.TapWrAddr_1;
DataRamRdAddr_2 <= r.CalcChnl_2 & r.TapRdAddr_2;
i_data_ram : entity work.psi_common_tdp_ram
generic map(
depth_g => DataMemDepthApplied_c * channels_g,
width_g => psi_fix_size(in_fmt_g),
behavior_g => ram_behavior_g
)
port map(
a_clk_i => clk_i,
a_addr_i => DataRamWrAddr_1,
a_wr_i => r.Vld(1),
a_dat_i => r.InSig(1),
a_dat_o => open,
b_clk_i => clk_i,
b_addr_i => DataRamRdAddr_2,
b_wr_i => '0',
b_dat_i => (others => '0'),
b_dat_o => DataRamDout_3
);
end architecture;