diff --git a/AutomaticQC/qcFilterMain.m b/AutomaticQC/qcFilterMain.m index b4660af7e..35b921da4 100644 --- a/AutomaticQC/qcFilterMain.m +++ b/AutomaticQC/qcFilterMain.m @@ -83,9 +83,13 @@ goodIdx = canBeFlagGoodIdx & goodIdx; probGoodIdx = canBeFlagProbGoodIdx & probGoodIdx; probBadIdx = canBeFlagProbBadIdx & probBadIdx; - badIdx = canBeFlagBadIdx & badIdx; + badIdx = canBeFlagBadIdx & badIdx; end + % call the sub fuction to add the results of the failed tests + % into a new variable + addFailedTestsFlags() + sam.(type{m}){k}.flags(goodIdx) = fsam.(type{m}){k}.flags(goodIdx); sam.(type{m}){k}.flags(probGoodIdx) = fsam.(type{m}){k}.flags(probGoodIdx); sam.(type{m}){k}.flags(probBadIdx) = fsam.(type{m}){k}.flags(probBadIdx); @@ -206,6 +210,10 @@ probBadIdx = f == probBadFlag; badIdx = f == badFlag; + % call the sub fuction to add the results of the failed tests + % into a new variable + addFailedTestsFlags() + % update new flags in current variable goodIdx = canBeFlagGoodIdx & goodIdx; probGoodIdx = canBeFlagProbGoodIdx & probGoodIdx; @@ -268,4 +276,110 @@ end end end -end + + function addFailedTestsFlags() + % addFailedTestsFlags inherits all the variable available in the + % workspace. + % + % we now have for the test filterName the result (good, probgood, probBad, bad) value for each variable and each datapoint. + % For most QC routines, a failed test is where data is not flagged + % as good. + % However, more information is available on + % https://github.com/aodn/imos-toolbox/wiki/QCProcedures + % + % This function aims to store, in a new variable named "failed_tests" + % the result of why a data point wasn't flagged as good, i.e. which + % QC routine was responsible for "rejecting" a data point. + % + % To do so, each failed QC routine is linked to the power of 2 of + % an integer. + % For example: + % manualQC is linked to 2^1 + % imosCorrMagVelocitySetQC is linked to 2^2 + % ... + % Any integer can be written as the sum of disting powers of 2 + % numbers (see various proofs online + % https://math.stackexchange.com/questions/176678/strong-induction-proof-every-natural-number-sum-of-distinct-powers-of-2) + % + % + % Note that QC tests can be rerun by the user multiple times. This + % however should be dealt in the FlowManager/displayManager.m + % function + % + % + % the failed tests depends on the test. It is not always the same + % for all. See table for each individual test at + % https://github.com/aodn/imos-toolbox/wiki/QCProcedures + if strcmp(filterName, 'imosTiltVelocitySetQC') % see https://github.com/aodn/imos-toolbox/wiki/QCProcedures#adcp-tilt-test---imostiltvelocitysetqc---compulsory + % For the imosTiltVelocitySetQC, users would like to know if the + % failed test is because the data was flagged as bad or as probably + % good + % this test (imosTiltVelocitySetQC) + % there is a two level flag for failed tests. We're taking this + % into account below + failedTestsIdx_probGood = probGoodIdx; + failedTestsIdx_bad = badIdx; + elseif strcmp(filterName, 'imosErrorVelocitySetQC') % see https://github.com/aodn/imos-toolbox/wiki/QCProcedures#adcp-error-velocity-test---imoserrorvelocitysetqc---optional + % for this specific QC routine, a test is considerer to be pass + % if the data is flagged good or probably good if ECUR is NaN + + % find variable index for ECUR + notdone = true; + while notdone + for kk = 1:length(sam.(type{m})) + if strcmp(sam.(type{m}){kk}.name, 'ECUR') + notdone = false; + break + end + end + end + + if not(notdone) + if all(isnan(sam.(type{m}){kk}.data(:))) + failedTestsIdx = badIdx | probBadIdx; + else + failedTestsIdx = badIdx | probBadIdx | probGoodIdx ; + end + end + + else + failedTestsIdx = probGoodIdx | probBadIdx | badIdx; % result matrice for all variables + end + + + if strcmp(filterName, 'imosTiltVelocitySetQC') + failedTests_probGood = zeros(size(failedTestsIdx_probGood)); + failedTests_probGood(logical(failedTestsIdx_probGood)) = imosQCTest(strcat(filterName, '_probably_good')); + failedTests_probGood = int32(failedTests_probGood); + + failedTests_bad = zeros(size(failedTestsIdx_bad)); + failedTests_bad(logical(failedTestsIdx_bad)) = imosQCTest(strcat(filterName, '_bad')); + failedTests_bad = int32(failedTests_bad); + + if ~isfield(sam.(type{m}){k}, 'failed_tests') % if the flat_tests field does not exist yet + sam.(type{m}){k}.failed_tests = failedTests_bad + failedTests_probGood; + + else + sam.(type{m}){k}.failed_tests = sam.(type{m}){k}.failed_tests + failedTests_bad + failedTests_probGood; + end + else + + failedTests = zeros(size(failedTestsIdx)); + if strcmp(filterName, 'imosHistoricalManualSetQC') + failedTests(logical(failedTestsIdx)) = imosQCTest('userManualQC'); % we replace imosHistoricalManualSetQC with userManualQC so that we only have 1 flag_value/flag_meaning combo in the NetCDF for something very similar in the end + else + failedTests(logical(failedTestsIdx)) = imosQCTest(filterName); + end + failedTests = int32(failedTests); + + if ~isfield(sam.(type{m}){k},'failed_tests') % if the flat_tests field does not exist yet + sam.(type{m}){k}.failed_tests = failedTests; + else + % if it already exists, we sum the previous array with the new one + sam.(type{m}){k}.failed_tests = sam.(type{m}){k}.failed_tests + failedTests; + + + end + end + end +end \ No newline at end of file diff --git a/FlowManager/displayManager.m b/FlowManager/displayManager.m index de8b72647..fbc9a50fd 100644 --- a/FlowManager/displayManager.m +++ b/FlowManager/displayManager.m @@ -623,8 +623,20 @@ function resetPropQCCallback() for j=1:length(resetIdx) for i=1:length(sample_data{resetIdx(j)}.variables) - if isfield(sample_data{resetIdx(j)}.variables(i), 'ancillary_comment') - sample_data{resetIdx(j)}.variables(i) = rmfield(sample_data{resetIdx(j)}.variables(i), 'ancillary_comment'); + % previous code could not run since it was using variables(i) instead of variables{i}. + % TODO ask @ggalibert about this + if isfield(sample_data{resetIdx(j)}.variables{i}, 'ancillary_comment') + sample_data{resetIdx(j)}.variables{i} = rmfield(sample_data{resetIdx(j)}.variables{i}, 'ancillary_comment'); + end + + % delete the failed_tests previous results so that we don't + % sum up again the failed QC routines. The good thing is + % that all QC routines a reset anyway, which makes our + % life easier + if isfield(sample_data{resetIdx(j)}.variables{i}, 'failed_tests') + % we don't bother doing heavy calculation. + % We simply delete the "failed_tests" variable + sample_data{resetIdx(j)}.variables{i} = rmfield(sample_data{resetIdx(j)}.variables{i}, 'failed_tests'); end end diff --git a/FlowManager/flowManager.m b/FlowManager/flowManager.m index bd76b8c56..f9339108b 100644 --- a/FlowManager/flowManager.m +++ b/FlowManager/flowManager.m @@ -341,6 +341,15 @@ function manualQCRequestCallback(setIdx, varIdx, dataIdx, flag, manualQcComment) % we update the flags values autoQCData{setIdx}.variables{varIdx}.flags(dataIdx) = flag; + % update the failed_tests field containing information on failed tests + % with manual QC information + if isfield(autoQCData{setIdx}.variables{varIdx}, 'failed_tests') + autoQCData{setIdx}.variables{varIdx}.failed_tests(dataIdx) = imosQCTest('userManualQC'); % if manual QC done on a point, we overwrite previous failed QC routine information + else + autoQCData{setIdx}.variables{varIdx}.failed_tests(dataIdx) = zeros(size(dataIdx)); % initialise array + autoQCData{setIdx}.variables{varIdx}.failed_tests(dataIdx) = imosQCTest('userManualQC'); + end + qcSet = str2double(readProperty('toolbox.qc_set')); rawFlag = imosQCFlag('raw', qcSet, 'flag'); diff --git a/IMOS/imosQCTest.m b/IMOS/imosQCTest.m new file mode 100644 index 000000000..f89d2baf0 --- /dev/null +++ b/IMOS/imosQCTest.m @@ -0,0 +1,57 @@ +function value = imosQCTest( testName ) +%imosQCTest Returns an appropriate QC flag_value (integer) +% given a qc test routine (String) + +% The value returned by this function is the power of 2 of the qc routine +% positional integer available in imosQCTests.txt. This is used to store +% information in a variable in the form of integers +% +% +% Inputs: +% +% testName - name of QC test +% +% Outputs: +% value - integer +% +% Author: Laurent Besnard +% +% Example: +% +% value = imosQCTest( 'userManualQC'); +% +% +% assert(value==2) +% + +narginchk(1, 1); +if ~ischar(testName), error('field must be a string'); end + +value = ''; + +% open the IMOSQCFTests.txt file - it should be +% in the same directory as this m-file +path = ''; +if ~isdeployed, [path, ~, ~] = fileparts(which('imosToolbox.m')); end +if isempty(path), path = pwd; end +path = fullfile(path, 'IMOS'); + +fidS = -1; +try + % read in the QC sets + fidS = fopen([path filesep 'imosQCTests.txt'], 'rt'); + if fidS == -1, return; end + sets = textscan(fidS, '%s%f', 'delimiter', ',', 'commentStyle', '%'); + fclose(fidS); + + [~, idx] = ismember(testName, sets{1,1}); + + value = int32(2^sets{1,2}(idx)); % return the + + +catch e + if fidS ~= -1, fclose(fidS); end + rethrow(e); +end + + diff --git a/IMOS/imosQCTests.m b/IMOS/imosQCTests.m new file mode 100644 index 000000000..10630153f --- /dev/null +++ b/IMOS/imosQCTests.m @@ -0,0 +1,64 @@ +function values = imosQCTests() +%imosQCTests Returns a 1x2 cell array reading the info found in +%ImosQCTests.txt +% - the first one contains a list of QC routines +% - the second cell returns a positional integer for each routine +% +% Inputs: +% +% N.A. +% +% Outputs: +% 1x2 cell array +% +% Author: Laurent Besnard +% + +% +% Copyright (C) 2023, Australian Ocean Data Network (AODN) and Integrated +% Marine Observing System (IMOS). +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation version 3 of the License. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. + +% You should have received a copy of the GNU General Public License +% along with this program. +% If not, see . +% + +% open the IMOSQCFTests file - it should be +% in the same directory as this m-file +path = ''; +if ~isdeployed, [path, ~, ~] = fileparts(which('imosToolbox.m')); end +if isempty(path), path = pwd; end +path = fullfile(path, 'IMOS'); + +fidS = -1; +try + % read in the QC sets + fidS = fopen([path filesep 'imosQCTests.txt'], 'rt'); + if fidS == -1, return; end + sets = textscan(fidS, '%s%f', 'delimiter', ',', 'commentStyle', '%'); + fclose(fidS); + + % rewrite sets with correct qc flag test values using the imosQCTest + % function + nQcFlags = length(sets{1}); + for k = 1:nQcFlags + sets{1,2}(k) = imosQCTest(sets{1,1}{k}); + end + + values = sets; + +catch e + if fidS ~= -1, fclose(fidS); end + rethrow(e); +end + + diff --git a/IMOS/imosQCTests.txt b/IMOS/imosQCTests.txt new file mode 100644 index 000000000..38e8d2ccc --- /dev/null +++ b/IMOS/imosQCTests.txt @@ -0,0 +1,35 @@ +% +% A list of all the toolbox QC routines flag values. This list is intended +% to by used by the qcFilterMain.m function to tag, for each data point, +% which QC test failed +% +% the positional integer value has to be strictly above 0 since 0 is used +% as a _FillValue +% +% QC routine, positional integer +userManualQC, 0 +imosCorrMagVelocitySetQC, 1 +imosDensityInversionSetQC, 2 +imosEchoIntensitySetQC, 3 +imosEchoIntensityVelocitySetQC, 4 +imosEchoRangeSetQC, 5 +imosErrorVelocitySetQC, 6 +imosGlobalRangeQC, 7 +imosHorizontalVelocitySetQC, 8 +imosImpossibleDateQC, 10 +imosImpossibleDepthQC, 11 +imosImpossibleLocationSetQC, 12 +imosInOutWaterQC, 13 +imosPercentGoodVelocitySetQC, 14 +imosRateOfChangeQC, 15 +imosRegionalRangeQC, 16 +imosSalinityFromPTQC, 17 +imosSideLobeVelocitySetQC, 18 +imosStationarityQC, 19 +imosSurfaceDetectionByDepthSetQC, 20 +imosTier2ProfileVelocitySetQC, 21 +imosTiltVelocitySetQC_probably_good,22 +imosTiltVelocitySetQC_bad, 23 +imosTimeSeriesSpikeQC, 24 +imosVerticalSpikeQC, 25 +imosVerticalVelocityQC, 26 diff --git a/NetCDF/exportNetCDF.m b/NetCDF/exportNetCDF.m index b3bd3bee4..70bbf8741 100644 --- a/NetCDF/exportNetCDF.m +++ b/NetCDF/exportNetCDF.m @@ -61,6 +61,13 @@ dateFmt = readProperty('exportNetCDF.dateFormat'); qcSet = str2double(readProperty('toolbox.qc_set')); qcType = imosQCFlag('', qcSet, 'type'); + + % lbesnard: tried the following + % qcFlagType = 'NC_INT'; % as described in + % https://au.mathworks.com/help/matlab/ref/netcdf.defvar.html however it + % breaks the code in the netcdf3tomatlabtype function. TODO: ask + % @ggalibert why it was coded this way. + qcFlagType = 'int'; qcDimId = []; try @@ -153,6 +160,7 @@ dimAtts = rmfield(dimAtts, {'data', 'name'}); if isfield(dimAtts, 'typeCastFunc'), dimAtts = rmfield(dimAtts, 'typeCastFunc'); end if isfield(dimAtts, 'flags'), dimAtts = rmfield(dimAtts, 'flags'); end + if isfield(dimAtts, 'failed_tests'), dimAtts = rmfield(dimAtts, 'failed_tests'); end % QC variable is not CF for dimensions if isfield(dimAtts, 'FillValue_'), dimAtts = rmfield(dimAtts, 'FillValue_'); end % _FillValues for dimensions are not CF dimAtts = rmfield(dimAtts, 'stringlen'); @@ -299,6 +307,7 @@ varAtts = rmfield(varAtts, {'data', 'dimensions', 'stringlen', 'name'}); if isfield(varAtts, 'typeCastFunc'), varAtts = rmfield(varAtts, 'typeCastFunc'); end if isfield(varAtts, 'flags'), varAtts = rmfield(varAtts, 'flags'); end + if isfield(varAtts, 'failed_tests'), varAtts = rmfield(varAtts, 'failed_tests'); end if isfield(varAtts, 'ancillary_comment'), varAtts = rmfield(varAtts, 'ancillary_comment');end if isfield(vars{m}, 'flags') && sample_data.meta.level > 0 && ~isempty(vars{m}.dimensions) % ancillary variables for coordinate scalar variable is not CF @@ -306,6 +315,12 @@ % to the ancillary variables attribute varAtts.ancillary_variables = [varname '_quality_control']; end + + if isfield(vars{m}, 'failed_tests') && sample_data.meta.level > 0 && ~isempty(vars{m}.dimensions) % ancillary variables for coordinate scalar variable is not CF + % add the *_failed_tests variable (defined below) + % to the ancillary variables attribute + varAtts.ancillary_variables = [varAtts.ancillary_variables ,' ' ,varname '_failed_tests']; + end % add the attributes putAtts(fid, vid, vars{m}, varAtts, 'variable', varNetcdfType{end}, dateFmt, mode); @@ -322,10 +337,25 @@ else qcvid = NaN; end + + + if isfield(vars{m}, 'failed_tests') && sample_data.meta.level > 0 && ~isempty(vars{m}.dimensions) % ancillary variables for coordinate scalar variable is not CF + % create the ancillary *_failed_tests variable + qcflagvid = addQCFlagsVar(... + fid, sample_data, m, [qcDimId dids], 'variables', qcFlagType, qcSet, dateFmt, mode); + + if ~isempty(dimLen) + netcdf.defVarChunking(fid, qcflagvid, 'CHUNKED', dimLen); + netcdf.defVarDeflate(fid, qcflagvid, true, true, compressionLevel); + end + else + qcflagvid = NaN; + end % save variable IDs for later reference sample_data.variables{m}.vid = vid; sample_data.variables{m}.qcvid = qcvid; + sample_data.variables{m}.qcflagvid = qcflagvid; end % we're finished defining dimensions/attributes/variables @@ -433,6 +463,18 @@ netcdf.putVar(fid, qcvid, flags); end + + if isfield(vars{m}, 'failed_tests') && sample_data.meta.level > 0 && ~isempty(vars{m}.dimensions) % ancillary variables for coordinate scalar variable is not CF + failed_tests = vars{m}.failed_tests; + qcflagvid = vars{m}.qcflagvid; + + typeCastFunction = str2func(netcdf3ToMatlabType(qcFlagType)); + failed_tests = typeCastFunction(failed_tests); + + if nDims > 1, failed_tests = permute(failed_tests, nDims:-1:1); end + + netcdf.putVar(fid, qcflagvid, failed_tests); + end end % @@ -470,7 +512,7 @@ % switch(type) case 'dimensions' - var = sample_data.dimensions{varIdx}; + var = sample_data.dimensions{varIdx}; % when is this even triggered? (Loz, 20230624) template = 'qc_coord'; case 'variables' var = sample_data.variables{varIdx}; @@ -600,6 +642,89 @@ putAtts(fid, vid, var, qcAtts, template, netcdfType, dateFmt, ''); end + + +function vid = addQCFlagsVar(... + fid, sample_data, varIdx, dims, type, netcdfType, ~, dateFmt, ~) +%addQCFlagsVar Adds an ancillary variable, containing failed QC routines info +% for the variable with the given index. +% +% Inputs: +% fid - NetCDF file identifier +% sample_data - Struct containing entire data set +% varIdx - Index into sample_data.variables, specifying the +% variable. +% dims - Vector of NetCDF dimension identifiers. +% type - either 'dimensions' or 'variables', to differentiate between +% coordinate variables and data variables. +% netcdfType - The netCDF type in which the flags should be output. +% dateFmt - Date format in which date attributes should be output. +% mode - Toolbox processing mode. +% +% Outputs: +% vid - NetCDF variable identifier of the QC variable that was +% created. +% + switch(type) + case 'variables' + var = sample_data.variables{varIdx}; + template = 'failed_tests'; + otherwise + error(['invalid type: ' type]); + end + + path = readProperty('toolbox.templateDir'); + if isempty(path) || ~exist(path, 'dir') + path = ''; + if ~isdeployed, [path, ~, ~] = fileparts(which('imosToolbox.m')); end + if isempty(path), path = pwd; end + path = fullfile(path, 'NetCDF', 'template'); + end + + varname = [var.name '_failed_tests']; + + qcAtts = parseNetCDFTemplate(... + fullfile(path, [template '_attributes.txt']), sample_data, varIdx); + + % get qc flag values + qcFlags = imosQCTests; + nQcFlags = length(qcFlags{1}); + qcDescs = cell(nQcFlags, 1); + + % get flag descriptions + for k = 1:nQcFlags + qcDescs{k} = qcFlags{1}{k}; + end + + % see + % http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html#flags + % The flag_masks and flag_meanings attributes describe a number of + % independent Boolean conditions using bit field notation by setting + % unique bits in each flag_masks value. The flag_masks attribute is the + % same type as the variable to which it is attached, and contains a list + % of values matching unique bit fields. The flag_meanings attribute is + % defined as above, one for each flag_masks value. A flagged condition is + % identified by performing a bitwise AND of the variable value and each + % flag_masks value; a non-zero result indicates a true condition. Thus, + % any or all of the flagged conditions may be true, depending on the + % variable bit settings. The following example illustrates the use of + % flag_masks to express six sensor status conditions. + % + % In the case of this [variable]_failed_tests variable, we're using the + % flag_masks rather than flag_values + + qcAtts.flag_masks = int32(qcFlags{2}); + + % turn descriptions into space separated string + qcDescs = cellfun(@(x)(sprintf('%s ', x)), qcDescs, 'UniformOutput', false); + qcDescs{end}(end) = []; + qcAtts.flag_meanings = [qcDescs{:}]; + + vid = netcdf.defVar(fid, varname, netcdfType, dims); + putAtts(fid, vid, var, qcAtts, template, netcdfType, dateFmt, ''); % netcdf.getAtt(fid,vid,'flag_values') check why it's doing something funky +end + + function putAtts(fid, vid, var, template, templateFile, netcdfType, dateFmt, mode) %PUTATTS Puts all the attributes from the given template into the given NetCDF % variable. diff --git a/NetCDF/template/failed_tests_attributes.txt b/NetCDF/template/failed_tests_attributes.txt new file mode 100644 index 000000000..dafa47f25 --- /dev/null +++ b/NetCDF/template/failed_tests_attributes.txt @@ -0,0 +1,12 @@ +S, long_name = failed tests for [mat imosParameters(sample_data.variables{k}.name, 'long_name')] +S, standard_name = [mat regexprep(strcat(imosParameters(sample_data.variables{k}.name, 'standard_name'), ' status_flag'), '^ .*', '')] +Q, _FillValue = [mat int32(0)] +N, add_offset = +N, scale_factor = +S, comment = The flag_masks attribute is the same type as the variable to which it is attached, and contains a list of values matching unique bit fields. The flag_meanings attribute is defined as above, one for each flag_masks value. A flagged condition is identified by performing a bitwise AND of the variable value and each flag_masks value; a non-zero result indicates a true condition. Thus, any or all of the flagged conditions may be true, depending on the variable bit settings. +S, history = +S, references = + +% these fields are automatically populated upon NetCDF export +Q, flag_masks = +S, flag_meanings = \ No newline at end of file