-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneedlemanWunsch.m
300 lines (246 loc) · 11.1 KB
/
needlemanWunsch.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
classdef needlemanWunsch
%NEEDLEMANWUNSCH Summary of this class goes here
% Detailed explanation goes here
properties (Constant)
gapPenalty = -5;
mismatchPenalty = -5;
matchAward = 20;
smallDistAward = 5
end
methods (Static)
%%
function out = matchScore(alpha, beta)
out = -abs(alpha-beta);
end
%%
%dif
function [LogAligned,NirsAligned,BlockAligned] = AlignNeedlemanWunsch(mSeq1,mSeq2,BlockInfo)
%NEEDLEMANWUNSCH Construct an instance of this class
% mSeq1 should be diff of Log times
% mSeq2 should be diff of NIRS times
% Detailed explanation goes here
tolerance = .2;
if(nargin<2)
return;
end
if ~exist('BlockInfo','var')
BlockInfo = cell(length(mSeq2),1);
end
% mSeq1 = [NaN;mSeq1];
% mSeq2 = [NaN;mSeq2];% need to pad a nan value to work better with the score table
seq1Len = length(mSeq1);
seq2Len = length(mSeq2);
% nw_score = NaN(seq1Len,seq2Len);
% nw_score(:,1,1) = mSeq1(:);
% nw_score(1,:,1) = mSeq2(:);
LogAligned = {};%using cells because we need to be able to represent
NirsAligned = {};
BlockAligned = cell(0,size(BlockInfo,2));
D=0;
R=0;
b_index = 1;
a_index = 1;
while(a_index<seq1Len&&b_index<seq2Len)
fprintf('Comparing %g in mSeq1 with %g in mSeq2\n',a_index,b_index)
if(abs(mSeq1(a_index)-mSeq2(b_index))<tolerance)
fprintf('%02.1f and %02.1f within tolerance\n\n',mSeq1(a_index),mSeq2(b_index));
score_current = needlemanWunsch.matchScore(mSeq2(b_index),mSeq1(a_index));
score_right = needlemanWunsch.matchScore(mSeq2(b_index),sum(mSeq1(a_index+1-R:a_index+1)));
score_down = needlemanWunsch.matchScore(sum(mSeq2(b_index+1-D:b_index+1)),mSeq1(a_index));
%score_next = needlemanWunsch.matchScore(mSeq2(b_index+1),mSeq1(a_index+1));
% all scores are negative, so take max to get closest
% value to zero
best_cost = max([...
score_current,...
score_right+needlemanWunsch.gapPenalty*(R+1),...
score_down+needlemanWunsch.gapPenalty*(1+D)...
]);
if(best_cost == score_current)
LogAligned(end+1) = {mSeq1(a_index)};
NirsAligned(end+1) = {mSeq2(b_index)};
BlockAligned(end+1,:) = BlockInfo(a_index,:);
downflag = false;
rightflag = false;
R = 0;
D = 0;
a_index=a_index+1;
b_index=b_index+1;
elseif(best_cost == score_right+needlemanWunsch.gapPenalty*R)
a_index=a_index+1;
LogAligned(end+1) = {mSeq1(a_index)};
NirsAligned(end+1) = {nan};
BlockAligned(end+1,:) = BlockInfo(a_index,:);
R=R+1;
elseif(best_cost == score_down+needlemanWunsch.gapPenalty*D)
b_index=b_index+1;
NirsAligned(end+1) = {mSeq2(a_index)};
LogAligned(end+1) = {nan};
BlockAligned(end+1,:) = {'missing'};
D=D+1;
else
BlockAligned(end+1,:) = BlockInfo(a_index,:);
a_index=a_index+1;
b_index=b_index+1;
end
else
if(mSeq1(a_index)>mSeq2(b_index))
fprintf('%02.1f > %02.1f ',mSeq1(a_index),mSeq2(b_index));
if b_index==1
fprintf('Passing over presumed start trigger\n\n');
NirsAligned(end+1) = {mSeq2(b_index)};
LogAligned(end+1) = {nan};
BlockAligned(end+1,:) = {'start'};
b_index=b_index+1;%if seq1 is greater then we need to gap before it and recompare
D=D+1;
else
fprintf('Splitting the larger time diff in mSeq1\n\n');
LogAligned(end+1) = {mSeq2(b_index)};
mSeq1(a_index) = mSeq1(a_index) - mSeq2(b_index);
NirsAligned(end+1) = {nan};
BlockAligned(end+1,:) = {'missing'};
b_index=b_index+1;%if seq1 is greater then we need to gap before it and recompare
D=D+1;
end
else
fprintf('%02.1f < %02.1f ',mSeq1(a_index),mSeq2(b_index));
if b_index==1
fprintf('Passing over presumed start trigger\n\n');
NirsAligned(end+1) = {mSeq2(b_index)};
LogAligned(end+1) = {nan};
BlockAligned(end+1,:) = {'start'};
b_index=b_index+1;
D=D+1;
else
fprintf('Splitting the larger time diff in mSeq2\n\n');
NirsAligned(end+1) = {mSeq1(a_index)};
mSeq2(b_index) = mSeq2(b_index)-mSeq1(a_index);
LogAligned(end+1) = {nan};
BlockAligned(end+1,:) = BlockInfo(a_index,:);
a_index=a_index+1;
R=R+1;
end
end
end
end
%were at the end. Append the missed value
LogAligned(end+1) = {mSeq1(a_index)};
NirsAligned(end+1)= {mSeq2(b_index)};
a_index= a_index+1;
b_index= b_index+1;
%Catch all the stragglers
while a_index<=seq1Len
LogAligned(end+1)= {mSeq1(a_index)};
NirsAligned(end+1)= {nan};
%BlockAligned(end+1,:) = BlockInfo(a_index,:);
a_index= a_index +1;
end
while b_index<=seq2Len
NirsAligned(end+1)= {mSeq2(b_index)};
LogAligned(end+1)= {nan};
%BlockAligned(end+1,:) = BlockInfo(b_index,:);
b_index= b_index +1;
end
BlockAligned = [BlockAligned;BlockInfo(a_index-1:end,:)];
if size(BlockAligned,1) < (length(NirsAligned)+1)
BlockAligned(end+1,:) = {'stop'};
end
NirsAligned(cellfun(@isnan,NirsAligned)) = LogAligned(cellfun(@isnan,NirsAligned));
end
function nirsTimes = reconstructFromAlignedDiff(nirsDiff,logDiff,nirsStartTime)
if(nargin<3)
disp('Too few parameters,cannot align');
return;
end
if(isa(nirsDiff,'cell')&&isa(logDiff,'cell'))
nirsDiff = transpose(cell2mat(nirsDiff));
logDiff = transpose(cell2mat(logDiff));
end
nirsTimes = cumsum([nirsStartTime;nirsDiff]);
end
%% Done
%not Needed
%https://github.com/SPIN-Scorcerer/Error-Analysis/blob/master/error_analysis_code.py
function outputDist = charLevenshtein(seq1,seq2, normalized,maxDist)
if(nargin<4)
if(~exist('maxDist',1))
maxDist = -1;
end
if(normalized=='')
normalized=0;%false
end
end
if(nargin<3)
if(~exist('maxDist',1))
maxDist = -1;
end
if(normalized=='')
normalized=0;%false
end
end
if(noramlized)
return; %%not yet implemented
end
if(seq1==seq2)
outputDist = 0;
return;
end
len1 = length(seq1);
len2 = length(seq2);
if(max_dist>=0 && abs(len1-len2)>maxDist)
outputDist = -1;
return;
end
if len1==0
outputDist = len2;
return;
end
if len2 == 0
outputDist = len1;
return;
end
if(len1<len2)
tmp = len1;
len1 = len2;
len2 = tmp;
clear(tmp);
tmp = seq1;
seq1 = seq2;
seq2 = tmp;
clear(tmp);
end
column = zeros(len2+1,1);
for x = 2:len2+1
column(0) =x;
last = x-1;
for y= 2:len2+1
old = column(y);
cost = (seq1(x-1)~=seq2(y-1));%%
column(y) = min(last+cost,column(y)+1,column(y-1)+1);
last = old;
end
if max_dist>=0 && min(column)>maxdist
outputDist = -1;
return;
end
end
if(maxDist>=0 && column(len2)>maxDist)
outputDist = -1;
return;
end
outputDist =column(len2);
return;
end
%%
%%the function to find path with shortest cost; I'm not really
%%familiar with Matlab so i'm just gonna put down my thoughts here:
%%take in the two difference vectors(converted from the fNirs and
%%python file) and the difference table that we just computed in
%%method intLevenshtein.
%%use a double for-loop to parse through the 2d array
%%3 cases that we should consider: 1. if two values are pretty much
%%the same, then sum+=cost; 2. they are not close and none of them
%%is a gap 3. the two values are not close but one of them is a
%%gap, then we would need to add the previous value to the current
%%one
end
end