diff --git a/Exareme-Docker/Dockerfile b/Exareme-Docker/Dockerfile index 5838e2ad0..f36cd5db3 100644 --- a/Exareme-Docker/Dockerfile +++ b/Exareme-Docker/Dockerfile @@ -43,6 +43,8 @@ RUN apt update RUN apt install -y r-base RUN Rscript -e 'install.packages("randomForest", repos="https://cloud.r-project.org")' +RUN Rscript -e 'install.packages("caret")' +RUN Rscript -e 'install.packages("e1071")' RUN pip install rpy2==2.8.6 # Add Madis Server diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java index 481655f20..653f79c4c 100644 --- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java +++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java @@ -263,7 +263,7 @@ private static void validateAlgorithmParameterValueType( String algorithmName, String value, ParameterProperties parameterProperties - ) throws AlgorithmException { + ) throws AlgorithmException, UserException { if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.json)) { try { new JSONObject(value); @@ -285,19 +285,19 @@ private static void validateAlgorithmParameterValueType( try { Double.parseDouble(curValue); } catch (NumberFormatException nfe) { - throw new AlgorithmException(algorithmName, + throw new UserException( "The value of the parameter '" + parameterProperties.getName() + "' should be a real number."); } } else if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.integer)) { try { Integer.parseInt(curValue); } catch (NumberFormatException e) { - throw new AlgorithmException(algorithmName, + throw new UserException( "The value of the parameter '" + parameterProperties.getName() + "' should be an integer."); } } else if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.string)) { if (curValue.equals("")) { - throw new AlgorithmException(algorithmName, + throw new UserException( "The value of the parameter '" + parameterProperties.getName() + "' contains an empty string."); } diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py deleted file mode 100644 index e83160a32..000000000 --- a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py +++ /dev/null @@ -1,89 +0,0 @@ -from array import * - -try: - from collections import OrderedDict -except ImportError: - # Python 2.6 - from lib.collections26 import OrderedDict - -__docformat__ = 'reStructuredText en' - - -class highchartheatmap: - # i, j, val, title - # - # ''' chart: { type: 'heatmap', marginTop: 40, marginBottom: 80, plotBorderWidth: 1 }, - # title: { text: 'Sales per employee per weekday' }, - # xAxis: { categories: ['Alexander', 'Marie', 'Maximilian']}, - # yAxis: { categories: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'], title: null }, - # colorAxis: { min: 0, minColor: '#FFFFFF', maxColor: Highcharts.getOptions().colors[0] }, - # legend: { align: 'right',layout: 'vertical', margin: 0, verticalAlign: 'top', y: 25, symbolHeight: 280 }, - # tooltip: { formatter: function () { - # return '(' + this.series.xAxis.categories[this.point.x] + ", "+ this.series.yAxis.categories[this.point.y]+')= '+ this.point.value + ''; - # } - # }, - # series: [{ - # borderWidth: 1, - # data: [[0, 0, 10], [0, 1, 19], [0, 2, 8], [0, 3, 24], [0, 4, 67], [1, 0, 92], [1, 1, 58], [1, 2, 78], [1, 3, 117], [1, 4, 48], [2, 0, 35], [2, 1, 15], [2, 2, 123], [2, 3, 64], [2, 4, 52]], - # dataLabels: { - # enabled: true, - # color: '#000000' - # } - # }] - # ''' - registered = True # Value to define db operator - - def __init__(self): - self.n = 0 - self.xcategories = [] - self.ycategories = [] - self.mydata = [] - - def step(self, *args): - try: - if str(args[0]) not in self.xcategories: - self.xcategories.append(str(args[0])) - if str(args[1]) not in self.ycategories: - self.ycategories.append(str(args[1])) - - self.mydata.append( - [self.xcategories.index(str(args[0])), self.ycategories.index(str(args[1])), float(args[2])]) - self.title = str(args[3]) - self.xtitle = str(args[4]) - self.ytitle = str(args[5]) - except (ValueError, TypeError): - raise - - def final(self): - # print "self.xcategories", self.xcategories - # print "self.ycategories", self.ycategories - yield ('highchartheatmap',) - # print self.mydata - myresult = "chart: { type: 'heatmap', marginTop: 40, marginBottom: 80, plotBorderWidth: 1 }," - myresult += " title: { text: '" + self.title + "' }," - myresult += " xAxis: { categories: " + str(self.xcategories) + "}," - myresult += " yAxis: { categories: " + str(self.ycategories) + "}," - myresult += " colorAxis: { min: 0, minColor: '#FFFFFF', maxColor: Highcharts.getOptions().colors[0] }," - myresult += " legend: { align: 'right',layout: 'vertical', margin: 0, verticalAlign: 'top', y: 25, symbolHeight: 280 }," - # myresult += " tooltip: { formatter: function () {return '(' + this.series.xAxis.categories[this.point.x] + ", "+ this.series.yAxis.categories[this.point.y]+')= '+ this.point.value + '';}}," - myresult += " series: [{ borderWidth: 1, data: " - myresult += str(self.mydata) - myresult += ", dataLabels: { enabled: true,color: '#000000'}}]" - yield (myresult,) - - -if not ('.' in __name__): - """ - This is needed to be able to test the function, put it at the end of every - new function you create - """ - import sys - from functions import * - - testfunction() - if __name__ == "__main__": - reload(sys) - sys.setdefaultencoding('utf-8') - import doctest - - doctest.testmod() diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py index ff1a22fe8..922669a29 100644 --- a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py +++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py @@ -58,6 +58,15 @@ def holdoutvalidation_inputerrorchecking2(train_size, test_size): holdoutvalidation_inputerrorchecking2.registered = True +def naive_bayes_training_inputerrorchecking(colname,noLevels): + if (noLevels < 2): + raise functions.OperatorError("ExaremeError", colname + ": should contain more than two distinct values") + else: + return "OK" + +naive_bayes_training_inputerrorchecking.registered = True + + # def maxnumberofiterations_errorhandling(maxnumberofiterations,no): # For most of the iterative algorithms # if maxnumberofiterations< no: # raise functions.OperatorError("ExaremeError", "The algorithm could not complete in the max number of iterations given. Please increase the iterations_max_number and try again.") diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py new file mode 100644 index 000000000..c8bfb131d --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py @@ -0,0 +1,89 @@ +import setpath +import functions +import json +registered=True + +''' +Highcharts.chart('container', + { "chart": {"type": "heatmap","marginTop": 40,"marginBottom": 80,"plotBorderWidth": 1}, + "title": {"text": " confusion matrix "}, + "xAxis": {"title": { "text": " actual values "},"categories": [ "AD","CN","Other"]}, + "yAxis": {"title": { "text": " predicted values "},"categories": [ "AD", "CN", "Other"]}, + "colorAxis": {"min": 0,"minColor": "#FFFFFF","maxColor": "#6699ff"}, + "legend": {"align": "right","layout": "vertical","margin": 0,"verticalAlign": "top","y": 25,"symbolHeight": 280}, + "series": [{ "borderWidth": 1, "data": [ [ 0, 0, 46], + [ 0, 1, 39], + [ 0, 2, 0], + [ 1, 0, 20], + [ 1, 1,76], + [ 1, 2, 0], + [2, 0, 26], + [ 2, 1,33], + [2, 2,0]], + "dataLabels": {"enabled": true,"color": "#000000" }}]} +); +''' +class highchartheatmap(functions.vtable.vtbase.VT): + def VTiter(self, *parsedArgs,**envars): + largs, dictargs = self.full_parse(parsedArgs) + + if 'query' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1],"No query argument ") + query = dictargs['query'] + if 'title' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1],"No title argument ") + if 'xtitle' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1],"No xtitle argument ") + if 'ytitle' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1],"No ytitle argument ") + + cur = envars['db'].cursor() + c=cur.execute(query) + schema = cur.getdescriptionsafe() + + mydata = [] + xcategories = [] + ycategories = [] + + for myrow in c: + if str(myrow[0]) not in xcategories: + xcategories.append(str(myrow[0])) + if str(myrow[1]) not in ycategories: + ycategories.append(str(myrow[1])) + mydata.append([xcategories.index(str(myrow[0])), ycategories.index(str(myrow[1])), float(myrow[2])]) + + myresult = { + "type" : "application/vnd.highcharts+json", + "data" :{ "chart": {"type": "heatmap","marginTop": 40,"marginBottom": 80,"plotBorderWidth": 1}, + "title": {"text": str(dictargs['title'])}, + "xAxis": {"title": { "text":str(dictargs['xtitle'])},"categories": xcategories}, + "yAxis": {"title": { "text":str(dictargs['ytitle'])},"categories": ycategories}, + "colorAxis": {"min": 0,"minColor": "#FFFFFF","maxColor": "#6699ff"}, + "legend": {"align": "right","layout": "vertical","margin": 0,"verticalAlign": "top","y": 25,"symbolHeight": 280}, + "series": [{ "borderWidth": 1, "data": mydata, + "dataLabels": {"enabled": True,"color": "#000000" }}] + } + } + myjsonresult = json.dumps(myresult) + yield [('highchartresult',)] + yield (myjsonresult,) + + +def Source(): + return functions.vtable.vtbase.VTGenerator(highchartheatmap) + + +if not ('.' in __name__): + """ + This is needed to be able to test the function, put it at the end of every + new function you create + """ + import sys + import setpath + from functions import * + testfunction() + if __name__ == "__main__": + reload(sys) + sys.setdefaultencoding('utf-8') + import doctest + doctest.tesdoctest.tes diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py new file mode 100644 index 000000000..ea8805dc2 --- /dev/null +++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py @@ -0,0 +1,157 @@ +""" + +""" +import setpath +import functions +import json +import sys +from rpy2.robjects import StrVector +from rpy2.robjects.packages import importr +from rpy2.rinterface import RRuntimeError + +import warnings +warnings.filterwarnings("ignore") + +caret = importr('caret') +e = importr('e1071') +base = importr('base') + +### Classic stream iterator +registered=True + +class rconfusionmatrixtable(functions.vtable.vtbase.VT): #predictedclass,actualclass,val + def VTiter(self, *parsedArgs, **envars): + largs, dictargs = self.full_parse(parsedArgs) + + if 'query' not in dictargs: + raise functions.OperatorError(__name__.rsplit('.')[-1], "No query argument") + query = dictargs['query'] + + cur = envars['db'].cursor() + c = cur.execute(query) + + predictedclasses =[] + actualclasses = [] + classnames = [] + for myrow in c: + for i in xrange(myrow[2]): + predictedclasses.append(myrow[0]) + actualclasses.append(myrow[1]) + if myrow[0] not in classnames: + classnames.append(myrow[0]) + + numberofclassnames = len(classnames) + + print "Predicted vector:", predictedclasses + print "Actual vector:", actualclasses + + #print (classnames) + predictedData = base.factor(base.c(StrVector(predictedclasses)), base.c(StrVector(classnames))) + truthData = base.factor(base.c(StrVector(actualclasses)), base.c(StrVector(classnames))) + Rresult = caret.confusionMatrix(predictedData,truthData) + print 'Rresult[1]', Rresult[1] + print 'Rresult[2]', Rresult[2] + print 'Rresult[3]', Rresult[3] + + ##################################################### + dataOverall = [] + if numberofclassnames == 2: + dataOverall.append(["Positive Class",Rresult[0][0]]) + else: + dataOverall.append(["Positive Class",None]) + + #Rresult[1] -->Table (I have already computed this) + #Rresult[2] -->overall statistics + dataOverall.append(["Accuracy",(Rresult[2][0])]) + dataOverall.append(["Kappa",(Rresult[2][1])]) + dataOverall.append(["Accuracy Lower",(Rresult[2][2])]) + dataOverall.append(["Accuracy Upper",(Rresult[2][3])]) + dataOverall.append(["Accuracy Null",(Rresult[2][4])]) + dataOverall.append(["Accuracy P Value",(Rresult[2][5])]) + dataOverall.append(["Mcnemar P Value",(Rresult[2][6])]) + + ResultOverall = { "data": { + "profile": "tabular-data-resource", + "data": dataOverall, + "name": "Overall Statistic Results", + "schema": { + "fields": [ + { + "type": "text", + "name": "Statistic Name" + }, + { + "type": "real", + "name": "Value" + } + ] + } + }, + "type": "application/vnd.dataresource+json" + } + print "ResultOverall", ResultOverall + ##################################################### + + FieldClassNames = [ + { "type": "text", + "name": "Statistic Name" }] + for i in range(len(classnames)): + FieldClassNames.append( + { + "type": "real", + "name": classnames[i] + " class" + }) + + DataClassNames = [["Sensitivity"],["Specificity"],["Pos Pred Value"],["Neg Pred Value"],["Precision"],["Recall"], + ["F1"],["Prevalence"],["Detection Rate"],["Detection Prevalence"],["Balanced Accuracy"]] + + #Rresult[3] -->byClass statistics + + i = 0 + for k in range(len(DataClassNames)): + for l in range(len(classnames)): + if str(Rresult[3][i])!='nan' and str(Rresult[3][i])!='NA': + DataClassNames[k].append(Rresult[3][i]) + else: + DataClassNames[k].append(None) + i = i + 1 + + ResultClassNames = { + "data": { + "profile": "tabular-data-resource", + "data": DataClassNames, + "name": "Statistic Results per Class", + "schema": {"fields": FieldClassNames} + }, + "type": "application/vnd.dataresource+json"} + + print "resultClassNames", ResultClassNames + + yield (['statscolname'],['statsval'],) + + a = json.dumps(ResultOverall) + #a = a.replace(' ','') + yield ("ResultOverall" , a) + + b = json.dumps(ResultClassNames) + #b = b.replace(' ','') + yield ("ResultClassNames",b) + + +def Source(): + return functions.vtable.vtbase.VTGenerator(rconfusionmatrixtable) + +if not ('.' in __name__): + """ + This is needed to be able to test the function, put it at the end of every + new function you create + """ + import sys + import setpath + from functions import * + testfunction() + if __name__ == "__main__": + reload(sys) + sys.setdefaultencoding('utf-8') + import doctest + doctest.testmod() diff --git a/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json b/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json index 10d98d0ab..d04b7628c 100644 --- a/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json +++ b/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json @@ -51,7 +51,7 @@ "desc": "Number of pieces the dataset will be split", "type": "other", "value": "3", - "valueMin": 1, + "valueMin": 2, "valueNotBlank": true, "valueMultiple": false, "valueType": "integer" diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql index 0947a43ba..d460a892a 100644 --- a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql @@ -7,8 +7,24 @@ select iterationNumber, actualclass, predictedclass, sum(val) as val from %{input_global_tbl} group by actualclass,predictedclass; +--drop table if exists defaultDB.lala; +--create table defaultDB.lala as select * from global_oneconfusionmatrix; + var 'jsonResult' from select '{ "type": "application/json", "data": ' || componentresult || '}' from ( select tabletojson(actualclass,predictedclass,val, "actualclass,predictedclass,val",0) as componentresult from global_oneconfusionmatrix ); -select '{"result": [' || '%{jsonResult}' || ']}'; +--var 'heatmap' from select highchartheatmap(actualclass,predictedclass,val,"confusion matrix", "actual values", "predicted values") from global_oneconfusionmatrix; + +var 'heatmap' from select * from (highchartheatmap title:Confusion_Matrix, xtitle:Actual_Values, ytitle:Predicted_Values select actualclass,predictedclass,val from global_oneconfusionmatrix); + +drop table if exists confusionmatrixstats; +create temp table confusionmatrixstats as +rconfusionmatrixtable select predictedclass,actualclass,val from global_oneconfusionmatrix; + + +var 'a' from select statsval from confusionmatrixstats where statscolname = 'ResultOverall'; +var 'b' from select statsval from confusionmatrixstats where statscolname = 'ResultClassNames'; + + +select '{"result": [' || '%{jsonResult}' ||','||'%{heatmap}' ||','||'%{a}'||',' || '%{b}' ||']}'; diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql index 7616a0cfe..dc7147c7e 100644 --- a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql +++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql @@ -76,6 +76,10 @@ from statistics where colname in (select code from defaultDB.globalmetadatatbl where categorical= 0) and colname <> '%{y}'; + +select Naive_Bayes_Training_inputerrorchecking('%{y}',no) from (select count(distinct classval) as no from global_probabilities); + + --select * from global_probabilities; var 'jsonResult' from select '{ "type": "application/json", "data": ' || componentresult || ', "dbIdentifier": ' || '%{dbIdentifier}' || '}' from ( select tabletojson(colname,val,classval,average,sigma,probability, "colname,val,classval,average,sigma,probability",0) as componentresult