-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMPDRbeamformer.m
217 lines (187 loc) · 9.45 KB
/
MPDRbeamformer.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
%Author: Dimme de Groot
%Date: May 2024
%Descr: This object implements the MPDR beamformer. It is constructed by calling
% MPDR = MPDRbeamformer(c, fs, window_length, pad_length, lambda, regFactor, flag_full_axis, analysis_window, synthesis_window)
%
% The frames are updated by calling
% outputFrame = MPDR.computeOutputFrame(inputFrame, flag_bias_correction, VAD)
% with flag_bias_correction an option to do bias correction. This might be handy if you've only a few samples to estimate the PSD matrices/.
% VAD means "voice activity detected". For the MPDR, VAD is always false. In case you have a voice activity detector available,
% you can set it to true based or false based on the output of the voice activity detector. This effectively transforms the MPDR to an MVDR.
% If VAD=true, the estimates of the PSD matrices wont be updated.
%
% In some cases you might want to reset the estimates of the PSD matrices. In that case you can just do
% MPDR.resetPSD()
classdef MPDRbeamformer < handle
properties
c %[m/s], speed of sound
fs %[Hz], sampling frequency
window_length_act %[s], actual window length
N_t %[-], window length in samples
pad_length_act %[s], actual padding length
N_pad %[-], padding length in samples
N_hop %[-], hop length in samples
w_analysis %[-], analysis window
w_synthesis %[-], synthesis window
lambda %[-], Parameter of exponential window which controls the update of the covariance matrix.
regFactor %[?], Parameter of regularisation applied to estimated covariance matrics
Rpsd %[?], The estimated covariance matrix (excluding regularisation)
Rpsd_reg %[?], The regularisation term for the estimated covariance matrix
look_vector %[-], the look vector. Essentially specifies which direction we like to keep.
flag_full_axis %[-], True for full frequency axis [-Fs/2, Fs/2). False for [0, Fs/2]
k_ax %[?], wavenumber
N_k %[-], length of k_axis
%For debugging
w %[-], the beamformer weights
FFTinputFrame %[-]
FFToutputFrame %[-]
Rpsd_bias_corr %In case the bias correction flag is true, updating Rpsd also requires tracking the "biased" version of the Rpsd. In case the flag is false, Rpsd = Rpsd_bias_corr
frame_counter %[-], counts the number of times Rpsd has been updated
end
methods
function obj = MPDRbeamformer(c, fs, window_length, pad_length, lambda, regFactor, flag_full_axis, analysis_window, synthesis_window)
if nargin == 0
c = 342;
fs = 16000;
window_length = 0.016;
pad_length = 0.016;
lambda = 0.9;
regFactor = 1e-5;
flag_full_axis = false;
analysis_window = "sqrthann";
synthesis_window = "sqrthann";
end
obj.c = c;
obj.fs = fs;
obj.N_t = 2^nextpow2(floor(fs.*window_length));
if pad_length ~= 0
obj.N_pad = 2^nextpow2(floor(fs.*pad_length));
else
obj.N_pad = 0;
end
obj.window_length_act = obj.N_t/fs;
obj.pad_length_act = obj.N_pad/fs;
obj.lambda = lambda;
obj.regFactor = regFactor;
obj.flag_full_axis = flag_full_axis;
if analysis_window == "sqrthann"
obj.N_hop = obj.N_t/2;
obj.w_analysis = methodSqrthann(obj);
else
disp("currently only sqrthann is supported as window :S")
end
if synthesis_window == "sqrthann"
obj.w_synthesis = methodSqrthann(obj);
else
disp("currently only sqrthann is supported as window :S")
end
obj.k_ax = methodK_ax(obj);
obj.N_k = length(obj.k_ax);
end
%Some methods
function window = methodSqrthann(obj)
%See, i.e., Smith, J.O. Spectral Audio Signal Processing, http://ccrma.stanford.edu/~jos/sasp/, online book, 2011 edition.
n = (0:1:obj.N_t-1).'; %note: obj.N_t is the length of the window
m = n-obj.N_t/2;
window = sqrt(ones(obj.N_t, 1).*(0.5+0.5*cos(2*pi/obj.N_t * m)));
end
function k_ax = methodK_ax(obj)
N_k = obj.N_t + obj.N_pad; %[-], total number of frequency bins (even)
if obj.flag_full_axis
k_ax = (-N_k/2:1:N_k/2-1)/N_k*2*pi*obj.fs/obj.c;
k_ax = fftshift(k_ax);
else
k_ax = (0:1:N_k/2)/N_k*2*pi*obj.fs/obj.c; %[rad/m], wave number -> only half of the axis [0, Fs/2] is relevant
end
end
function methodUpdateRpsd(obj, FFTinputFrame, flag_bias_correction)
if nargin == 2
flag_bias_correction = false;
end
if flag_bias_correction
if isempty(obj.frame_counter)
obj.frame_counter = 1;
else
obj.frame_counter = obj.frame_counter + 1;
end
end
%Loop over k-axis
for k=1:obj.N_k
audioRecFrame_k = FFTinputFrame(k,:).';
Rpsd_new = (1-obj.lambda)*(audioRecFrame_k*audioRecFrame_k') + obj.lambda*obj.Rpsd_bias_corr(:,:,k);
obj.Rpsd_bias_corr(:,:,k) = Rpsd_new;
if flag_bias_correction
obj.Rpsd(:,:,k) = Rpsd_new/(1-obj.lambda^obj.frame_counter);
else
obj.Rpsd(:,:,k) = Rpsd_new;
end
end
end
function methodComputeBeamformerWeights(obj) %Eq. 3.23 of Brandstein 2001.
%Compute beamformed weights
for k=1:obj.N_k
R = obj.Rpsd(:,:,k) + obj.Rpsd_reg;
lookVec = obj.look_vector(:,k);
RinvV = R\lookVec;
obj.w(:,k) = RinvV/(lookVec'*RinvV);
obj.w(:,k) = obj.w(:,k);
end
end
%Functions which are used during usage of beamformer
function updateLookDirection(obj, look_location, receiver_location)
%A function which computes the look direction vector obj.look_vector.
%Inputs: (1) look_location of size 1 x 3.
% (2) receiver_location of size Nr x 3, with Nr the number of receivers
%Outputs: an updated value of MPDRbeamformer.look_vector
r = vecnorm(look_location-receiver_location,2,2);
obj.look_vector = exp(-1j*obj.k_ax.*r)./(4*pi*r); %Note: no need to multiply by a constant or so since we are interested in a location
%obj.look_vector = look_vector./vecnorm(look_vector); %Make the length equal to 1 per frequency bin: we now have a direction
end
function outputFrame = computeOutputFrame(obj, inputFrame, flag_bias_correction, VAD)
%A function which computes the beamformed output frame.
%Inputs: (1) the inputFrame - an unwindowned frame of signals as received at the mic of size window_length N_t x number of receivers N_r
%Outputs: (1) output frame after beamforming
if nargin == 2
flag_bias_correction = false;
VAD = false;
end
if nargin == 3
VAD = false;
end
Nr = size(inputFrame,2); %number of receivers
inputFrame = obj.w_analysis.*inputFrame; %window the input frame
%Initialise PSD matrices if needed
if isempty(obj.Rpsd_reg)
obj.Rpsd_reg = eye(Nr)*obj.regFactor;
end
if isempty(obj.Rpsd)
obj.Rpsd = zeros(Nr, Nr, obj.N_k);
obj.Rpsd_bias_corr = zeros(Nr, Nr, obj.N_k);
end
%Take fft and select correct part of axis
obj.FFTinputFrame = fft(inputFrame, obj.N_t+obj.N_pad, 1); %Take columnwise fft and zero pad to proper length
if ~obj.flag_full_axis
obj.FFTinputFrame = obj.FFTinputFrame(1:obj.N_k,:);
end
%Update PSD matrix
if ~VAD
obj.methodUpdateRpsd(obj.FFTinputFrame, flag_bias_correction);
end
%Compute beamformer weights
obj.methodComputeBeamformerWeights;
%Compute output frame
obj.FFToutputFrame = obj.w'.*obj.FFTinputFrame;
if obj.flag_full_axis
outputFrame = ifft(sum(obj.FFToutputFrame,2), 'symmetric');
else
outputFrame = sum(ifft([obj.FFToutputFrame; zeros(obj.N_t+obj.N_pad-obj.N_k,Nr)], 'symmetric'),2);
end
outputFrame = outputFrame(1:obj.N_t).*obj.w_synthesis;
end
function resetPSD(obj)
obj.Rpsd = [];
obj.Rpsd_reg = [];
obj.frame_counter = [];
end
end
end