-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreadbrukermaldi.m
315 lines (262 loc) · 10.1 KB
/
readbrukermaldi.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
function [mass,spectra,foldernames,filenames] = readbrukermaldi(foldernames)
% READBRUKERMALDI Reads the Bruker Flex spectrum file format
% Version 1.3
%
% usage:
% [mass,spectra,filenames] = readbrukermaldi(foldernames);
% or
% [mass,spectra,filenames] = readbrukermaldi();
% (The second version prompts for one or more folder names.)
%
% Takes zero, one or more file names.
% Returns: 'mass' a mass vector
% 'spectra' a matrix of spectra in rows
% 'foldernames' a matrix of folder names used in the order the
% spectra appear
% There is NO binning into fixed mass steps. Where the spectra are
% misaligned the data is interpolated (linearly). The maximum and minimum
% mass limits are determined by the spectrum with the smallest mass range,
% such that the spectra matrix only contains the mass range that overlaps
% all input spectra. The data are aligned such that each column of the
% spectra matrix corresponds to the same mass.
%
% Copyright (c) 2015-2017, Alex Henderson.
% Contact email: [email protected]
% Licenced under the GNU Lesser General Public License (LGPL) version 3
% https://www.gnu.org/copyleft/lesser.html
% Other licensing options are available, please contact Alex for details
% If you use this file in your work, please acknowledge the author(s) in
% your publications.
%
% Version 1.3, Alex Henderson April 2017
% Version 1.3, Alex Henderson April 2017
% Incorporated find_value2 function.
% Version 1.2, Alex Henderson April 2015
% Fixed bug when passing in a folder name.
% Version 1.1, Alex Henderson March 2015
% Added Linux compatibility.
% Version 1.0 Alex Henderson, February 2015
% Based on biotofspectrum version 1.1
% Code to read the file format is at the end of this file
% The Bruker file format comprises a folder with a collection of files in
% subfolders. We're interested in two files: fid and acqus. The problem
% with reading multiple Bruker spectra is that they can't appear in the
% same folder since they'd overwrite one another. Therefore we need to have
% a method of selecting multiple folders rather than files. MATLAB doesn't
% have this built in, so we're using a component from the MATLAB File
% Exchange: uipickfiles
% http://www.mathworks.com/matlabcentral/fileexchange/10867-uipickfiles--uigetfile-on-steroids
%% Check whether uipickfiles function is available
if ~exist('uipickfiles','file')
error('This function relies ''uipickfiles'' available from MATLAB File Exchange: http://www.mathworks.com/matlabcentral/fileexchange/10867-uipickfiles--uigetfile-on-steroids');
end
%% Check whether the user has supplied a list of folders
if ~exist('foldernames', 'var')
foldernames = uipickfiles();
if (isfloat(foldernames) && (foldernames == 0))
% Nothing chosen
return;
end
end
% The uipickfiles code returns a cell array of filenames of dimension
% 1 x many. We need a list of dimension many x 1. However, we may have been
% passed a column of cells containing foldernames. Another possibility is
% that we have a char array passed in as a row vector.
if iscell(foldernames)
if (size(foldernames,2) > 1)
foldernames = foldernames';
end
end
numberoffolders = size(foldernames,1);
% With the foldernames (either supplied or chosen using the GUI) we look
% for files called 'fid'
% We need to traverse a number of folders since the Windows DIR command
% can't look for a specific file name in a remote folder. Therefore we need
% to record where we start from so we can return back there later.
wherewewere = pwd;
% Somewhere to put the filenames
filenames = [];
try
% We're using a try/catch mechanism to make sure we return to the
% original folder if we get an error.
for i = 1:numberoffolders
folder = char(foldernames(i,:));
% Move into the folder
cd(folder);
% Look for files called 'fid' in all subfolders
status = 1;
if ispc()
[status,result] = system('dir fid /S /B');
else
if isunix()
[status,result] = system(['find "', folder, '" -name "fid"']);
end
end
if (status == 0)
% We found something
try
% strsplit introduced after R2012a
filesinfolder = strsplit(result,'\n');
catch exception
[filesinfolder,matches] = regexp(result,'\n','split','match');
end
filenames = vertcat(filenames,filesinfolder(1:end-1)');
end
end
catch exception
% Go back to the original folder if there is an error
cd(wherewewere);
error(exception.identifier,exception.message);
end
% Go back to the original folder anyway
cd(wherewewere);
numberoffiles = size(filenames,1);
%% Now we have a list of 'fid' files so we can open them and extract the spectra
for i = 1:numberoffiles
[mass_i,spectrum_i] = brukerflex(char(filenames(i,:)));
if (i == 1)
% First time through we initialise the data array
spectra = zeros(numberoffiles,length(spectrum_i));
spectra(1,:) = spectrum_i;
mass = mass_i;
end
needtointerpolate = 0;
if (length(mass_i) ~= length(mass))
% Different number of data points, so we need to interpolate the
% data. This is examined separately otherwise, if the two vectors
% are of different length, MATLAB raises an error.
needtointerpolate = 1;
else
if (mass_i ~= mass)
% Different values of mass, so we need to interpolate the data
needtointerpolate = 1;
end
end
if needtointerpolate
% First determine the range over which the mismatched spectra
% overlap. Then truncate both the mass vector and data matrix for
% the data already processed and the new data.
lowmass = max(mass(1),mass_i(1));
highmass = min(mass(end),mass_i(end));
idx = find_value2(mass,[lowmass,highmass]);
mass = mass(idx(1):idx(2));
spectra = spectra(:,idx(1):idx(2));
idx = find_value2(mass_i,[lowmass,highmass]);
mass_i = mass_i(idx(1):idx(2));
spectrum_i = spectrum_i(idx(1):idx(2));
% Now interpolate the new spectrum vector to match the existing
% data.
spectrum_i = interp1(mass_i,spectrum_i,mass,'linear');
end
spectra(i,:) = spectrum_i;
end
% Sometimes the interpolation turns up a NaN in either the first or last
% channel. Possibly both. Here we truncate the data to remove them.
if find(isnan(spectra(:,1)))
mass = mass(2:end);
spectra = spectra(:,2:end);
end
if find(isnan(spectra(:,end)))
mass = mass(1:end-1);
spectra = spectra(:,1:end-1);
end
end % function readbrukermaldi
%%~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
function [mass,spectrum] = brukerflex(fidfilename)
% BRUKERFLEX Reads the Bruker Flex spectrum file format
% Version 1.0
%
% syntax: [mass, spectrum] = brukerflex(filename);
% or
% syntax: [mass, spectrum] = brukerflex();
%
% The second version prompts for a file name.
%
% Version 1.0
% Reads the Bruker Flex spectrum file format
%
% Takes a file name
% Returns two equal length column vectors - mass and spectrum
% There is NO binning into fixed mass steps
%
% Copyright (c) Alex Henderson, February 2015
% Contact email: [email protected]
% Licenced under the GNU Lesser General Public License (LGPL) version 3
% https://www.gnu.org/copyleft/lesser.html
% Other licensing options are available, please contact Alex for details
% If you use this file in your work, please acknowledge the author(s) in
% your publications.
%
% Version 1.0, February 2015
% Version 1.0 Alex Henderson, February 2015
% Based on biotofspectrum version 1.1
% Credits to...
% Sebastian Gibb for interpretation of the calibration contants.
% https://github.com/sgibb/readBrukerFlexData
% Implementation of the mass calculation taken from:
% https://github.com/sgibb/readBrukerFlexData/commits/master/R/tof2mass-functions.R
% Commit number 75a05247e631b2b5c9ba9698f6acff83df57e79e (initial import)
%
% Martin Strohalm and BH Clowers for interpretation of the tof spacing and
% offsets.
% https://code.google.com/p/toolz/
% Implementation taken from:
% http://toolz.googlecode.com/svn-history/r43/trunk/SimpleView/flexReader.py
%
% If you need R or Python code to read the Bruker Flex format, check out
% these resources.
try
[filehandle,message] = fopen(fidfilename);
if (filehandle == -1)
error(message);
end;
spectrum = fread(filehandle,inf,'uint');
fclose(filehandle);
catch exception
fclose(filehandle);
error(exception.identifier,exception.message);
end
[pathstr] = fileparts(fidfilename);
acqusfilename = fullfile(pathstr,'acqus');
acqus = fileread(acqusfilename);
matchStr = regexp(acqus,'##\$ML1= (.+?) ','tokens');
constant1 = str2double(matchStr{1});
matchStr = regexp(acqus,'##\$ML2= (.+?) ','tokens');
constant2 = str2double(matchStr{1});
matchStr = regexp(acqus,'##\$ML3= (.+?) ','tokens');
constant3 = str2double(matchStr{1});
matchStr = regexp(acqus,'##\$DELAY= (.+?) ','tokens');
delay = str2double(matchStr{1});
matchStr = regexp(acqus,'##\$DW= (.+?) ','tokens');
step = str2double(matchStr{1});
numpoints = length(spectrum);
tof = transpose(delay : step : (delay+step*(numpoints-1)));
picoseconds = 1e12;
a = constant3;
b = sqrt(picoseconds / constant1);
c = constant2 - tof;
mass = (-b + sqrt((b * b) - (4 * a * c))) / (2 * a);
sign = 1;
if (mass < 0)
sign = -1;
end
mass = (mass .^2) * sign;
end % function brukerflex
%%~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
function [vec_index] = find_value2(vector,value)
% This function finds the indices for a value in vector
%
% Normal vector, not saisir structure
%
% Usage [vec_index]=find_value2(vector,value)
% Modified version of find_value to allow more than one value to be
% calculated. Alex Henderson, January 2013
if (numel(value) > 1)
vec_index = zeros(size(value));
for i = 1:length(value)
[output_value,vec_index(i)] = min(abs(vector-value(i)));
end
else
[output_value,vec_index] = min(abs(vector-value));
end
end