diff --git a/Exareme-Docker/Dockerfile b/Exareme-Docker/Dockerfile
index 5838e2ad0..f36cd5db3 100644
--- a/Exareme-Docker/Dockerfile
+++ b/Exareme-Docker/Dockerfile
@@ -43,6 +43,8 @@ RUN apt update
RUN apt install -y r-base
RUN Rscript -e 'install.packages("randomForest", repos="https://cloud.r-project.org")'
+RUN Rscript -e 'install.packages("caret")'
+RUN Rscript -e 'install.packages("e1071")'
RUN pip install rpy2==2.8.6
# Add Madis Server
diff --git a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java
index 481655f20..653f79c4c 100644
--- a/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java
+++ b/Exareme-Docker/src/exareme/exareme-master/src/main/java/madgik/exareme/master/queryProcessor/HBP/AlgorithmProperties.java
@@ -263,7 +263,7 @@ private static void validateAlgorithmParameterValueType(
String algorithmName,
String value,
ParameterProperties parameterProperties
- ) throws AlgorithmException {
+ ) throws AlgorithmException, UserException {
if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.json)) {
try {
new JSONObject(value);
@@ -285,19 +285,19 @@ private static void validateAlgorithmParameterValueType(
try {
Double.parseDouble(curValue);
} catch (NumberFormatException nfe) {
- throw new AlgorithmException(algorithmName,
+ throw new UserException(
"The value of the parameter '" + parameterProperties.getName() + "' should be a real number.");
}
} else if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.integer)) {
try {
Integer.parseInt(curValue);
} catch (NumberFormatException e) {
- throw new AlgorithmException(algorithmName,
+ throw new UserException(
"The value of the parameter '" + parameterProperties.getName() + "' should be an integer.");
}
} else if (parameterProperties.getValueType().equals(ParameterProperties.ParameterValueType.string)) {
if (curValue.equals("")) {
- throw new AlgorithmException(algorithmName,
+ throw new UserException(
"The value of the parameter '" + parameterProperties.getName()
+ "' contains an empty string.");
}
diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py
deleted file mode 100644
index e83160a32..000000000
--- a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/aggregate/highchartsvisualizations.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from array import *
-
-try:
- from collections import OrderedDict
-except ImportError:
- # Python 2.6
- from lib.collections26 import OrderedDict
-
-__docformat__ = 'reStructuredText en'
-
-
-class highchartheatmap:
- # i, j, val, title
- #
- # ''' chart: { type: 'heatmap', marginTop: 40, marginBottom: 80, plotBorderWidth: 1 },
- # title: { text: 'Sales per employee per weekday' },
- # xAxis: { categories: ['Alexander', 'Marie', 'Maximilian']},
- # yAxis: { categories: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'], title: null },
- # colorAxis: { min: 0, minColor: '#FFFFFF', maxColor: Highcharts.getOptions().colors[0] },
- # legend: { align: 'right',layout: 'vertical', margin: 0, verticalAlign: 'top', y: 25, symbolHeight: 280 },
- # tooltip: { formatter: function () {
- # return '(' + this.series.xAxis.categories[this.point.x] + ", "+ this.series.yAxis.categories[this.point.y]+')= '+ this.point.value + '';
- # }
- # },
- # series: [{
- # borderWidth: 1,
- # data: [[0, 0, 10], [0, 1, 19], [0, 2, 8], [0, 3, 24], [0, 4, 67], [1, 0, 92], [1, 1, 58], [1, 2, 78], [1, 3, 117], [1, 4, 48], [2, 0, 35], [2, 1, 15], [2, 2, 123], [2, 3, 64], [2, 4, 52]],
- # dataLabels: {
- # enabled: true,
- # color: '#000000'
- # }
- # }]
- # '''
- registered = True # Value to define db operator
-
- def __init__(self):
- self.n = 0
- self.xcategories = []
- self.ycategories = []
- self.mydata = []
-
- def step(self, *args):
- try:
- if str(args[0]) not in self.xcategories:
- self.xcategories.append(str(args[0]))
- if str(args[1]) not in self.ycategories:
- self.ycategories.append(str(args[1]))
-
- self.mydata.append(
- [self.xcategories.index(str(args[0])), self.ycategories.index(str(args[1])), float(args[2])])
- self.title = str(args[3])
- self.xtitle = str(args[4])
- self.ytitle = str(args[5])
- except (ValueError, TypeError):
- raise
-
- def final(self):
- # print "self.xcategories", self.xcategories
- # print "self.ycategories", self.ycategories
- yield ('highchartheatmap',)
- # print self.mydata
- myresult = "chart: { type: 'heatmap', marginTop: 40, marginBottom: 80, plotBorderWidth: 1 },"
- myresult += " title: { text: '" + self.title + "' },"
- myresult += " xAxis: { categories: " + str(self.xcategories) + "},"
- myresult += " yAxis: { categories: " + str(self.ycategories) + "},"
- myresult += " colorAxis: { min: 0, minColor: '#FFFFFF', maxColor: Highcharts.getOptions().colors[0] },"
- myresult += " legend: { align: 'right',layout: 'vertical', margin: 0, verticalAlign: 'top', y: 25, symbolHeight: 280 },"
- # myresult += " tooltip: { formatter: function () {return '(' + this.series.xAxis.categories[this.point.x] + ", "+ this.series.yAxis.categories[this.point.y]+')= '+ this.point.value + '';}},"
- myresult += " series: [{ borderWidth: 1, data: "
- myresult += str(self.mydata)
- myresult += ", dataLabels: { enabled: true,color: '#000000'}}]"
- yield (myresult,)
-
-
-if not ('.' in __name__):
- """
- This is needed to be able to test the function, put it at the end of every
- new function you create
- """
- import sys
- from functions import *
-
- testfunction()
- if __name__ == "__main__":
- reload(sys)
- sys.setdefaultencoding('utf-8')
- import doctest
-
- doctest.testmod()
diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py
index ff1a22fe8..922669a29 100644
--- a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py
+++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/row/errorchecking.py
@@ -58,6 +58,15 @@ def holdoutvalidation_inputerrorchecking2(train_size, test_size):
holdoutvalidation_inputerrorchecking2.registered = True
+def naive_bayes_training_inputerrorchecking(colname,noLevels):
+ if (noLevels < 2):
+ raise functions.OperatorError("ExaremeError", colname + ": should contain more than two distinct values")
+ else:
+ return "OK"
+
+naive_bayes_training_inputerrorchecking.registered = True
+
+
# def maxnumberofiterations_errorhandling(maxnumberofiterations,no): # For most of the iterative algorithms
# if maxnumberofiterations< no:
# raise functions.OperatorError("ExaremeError", "The algorithm could not complete in the max number of iterations given. Please increase the iterations_max_number and try again.")
diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py
new file mode 100644
index 000000000..c8bfb131d
--- /dev/null
+++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/highchartheatmap.py
@@ -0,0 +1,89 @@
+import setpath
+import functions
+import json
+registered=True
+
+'''
+Highcharts.chart('container',
+ { "chart": {"type": "heatmap","marginTop": 40,"marginBottom": 80,"plotBorderWidth": 1},
+ "title": {"text": " confusion matrix "},
+ "xAxis": {"title": { "text": " actual values "},"categories": [ "AD","CN","Other"]},
+ "yAxis": {"title": { "text": " predicted values "},"categories": [ "AD", "CN", "Other"]},
+ "colorAxis": {"min": 0,"minColor": "#FFFFFF","maxColor": "#6699ff"},
+ "legend": {"align": "right","layout": "vertical","margin": 0,"verticalAlign": "top","y": 25,"symbolHeight": 280},
+ "series": [{ "borderWidth": 1, "data": [ [ 0, 0, 46],
+ [ 0, 1, 39],
+ [ 0, 2, 0],
+ [ 1, 0, 20],
+ [ 1, 1,76],
+ [ 1, 2, 0],
+ [2, 0, 26],
+ [ 2, 1,33],
+ [2, 2,0]],
+ "dataLabels": {"enabled": true,"color": "#000000" }}]}
+);
+'''
+class highchartheatmap(functions.vtable.vtbase.VT):
+ def VTiter(self, *parsedArgs,**envars):
+ largs, dictargs = self.full_parse(parsedArgs)
+
+ if 'query' not in dictargs:
+ raise functions.OperatorError(__name__.rsplit('.')[-1],"No query argument ")
+ query = dictargs['query']
+ if 'title' not in dictargs:
+ raise functions.OperatorError(__name__.rsplit('.')[-1],"No title argument ")
+ if 'xtitle' not in dictargs:
+ raise functions.OperatorError(__name__.rsplit('.')[-1],"No xtitle argument ")
+ if 'ytitle' not in dictargs:
+ raise functions.OperatorError(__name__.rsplit('.')[-1],"No ytitle argument ")
+
+ cur = envars['db'].cursor()
+ c=cur.execute(query)
+ schema = cur.getdescriptionsafe()
+
+ mydata = []
+ xcategories = []
+ ycategories = []
+
+ for myrow in c:
+ if str(myrow[0]) not in xcategories:
+ xcategories.append(str(myrow[0]))
+ if str(myrow[1]) not in ycategories:
+ ycategories.append(str(myrow[1]))
+ mydata.append([xcategories.index(str(myrow[0])), ycategories.index(str(myrow[1])), float(myrow[2])])
+
+ myresult = {
+ "type" : "application/vnd.highcharts+json",
+ "data" :{ "chart": {"type": "heatmap","marginTop": 40,"marginBottom": 80,"plotBorderWidth": 1},
+ "title": {"text": str(dictargs['title'])},
+ "xAxis": {"title": { "text":str(dictargs['xtitle'])},"categories": xcategories},
+ "yAxis": {"title": { "text":str(dictargs['ytitle'])},"categories": ycategories},
+ "colorAxis": {"min": 0,"minColor": "#FFFFFF","maxColor": "#6699ff"},
+ "legend": {"align": "right","layout": "vertical","margin": 0,"verticalAlign": "top","y": 25,"symbolHeight": 280},
+ "series": [{ "borderWidth": 1, "data": mydata,
+ "dataLabels": {"enabled": True,"color": "#000000" }}]
+ }
+ }
+ myjsonresult = json.dumps(myresult)
+ yield [('highchartresult',)]
+ yield (myjsonresult,)
+
+
+def Source():
+ return functions.vtable.vtbase.VTGenerator(highchartheatmap)
+
+
+if not ('.' in __name__):
+ """
+ This is needed to be able to test the function, put it at the end of every
+ new function you create
+ """
+ import sys
+ import setpath
+ from functions import *
+ testfunction()
+ if __name__ == "__main__":
+ reload(sys)
+ sys.setdefaultencoding('utf-8')
+ import doctest
+ doctest.tesdoctest.tes
diff --git a/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py
new file mode 100644
index 000000000..ea8805dc2
--- /dev/null
+++ b/Exareme-Docker/src/exareme/exareme-tools/madis/src/functionslocal/vtable/rconfusionmatrixtable.py
@@ -0,0 +1,157 @@
+"""
+
+"""
+import setpath
+import functions
+import json
+import sys
+from rpy2.robjects import StrVector
+from rpy2.robjects.packages import importr
+from rpy2.rinterface import RRuntimeError
+
+import warnings
+warnings.filterwarnings("ignore")
+
+caret = importr('caret')
+e = importr('e1071')
+base = importr('base')
+
+### Classic stream iterator
+registered=True
+
+class rconfusionmatrixtable(functions.vtable.vtbase.VT): #predictedclass,actualclass,val
+ def VTiter(self, *parsedArgs, **envars):
+ largs, dictargs = self.full_parse(parsedArgs)
+
+ if 'query' not in dictargs:
+ raise functions.OperatorError(__name__.rsplit('.')[-1], "No query argument")
+ query = dictargs['query']
+
+ cur = envars['db'].cursor()
+ c = cur.execute(query)
+
+ predictedclasses =[]
+ actualclasses = []
+ classnames = []
+ for myrow in c:
+ for i in xrange(myrow[2]):
+ predictedclasses.append(myrow[0])
+ actualclasses.append(myrow[1])
+ if myrow[0] not in classnames:
+ classnames.append(myrow[0])
+
+ numberofclassnames = len(classnames)
+
+ print "Predicted vector:", predictedclasses
+ print "Actual vector:", actualclasses
+
+ #print (classnames)
+ predictedData = base.factor(base.c(StrVector(predictedclasses)), base.c(StrVector(classnames)))
+ truthData = base.factor(base.c(StrVector(actualclasses)), base.c(StrVector(classnames)))
+ Rresult = caret.confusionMatrix(predictedData,truthData)
+ print 'Rresult[1]', Rresult[1]
+ print 'Rresult[2]', Rresult[2]
+ print 'Rresult[3]', Rresult[3]
+
+ #####################################################
+ dataOverall = []
+ if numberofclassnames == 2:
+ dataOverall.append(["Positive Class",Rresult[0][0]])
+ else:
+ dataOverall.append(["Positive Class",None])
+
+ #Rresult[1] -->Table (I have already computed this)
+ #Rresult[2] -->overall statistics
+ dataOverall.append(["Accuracy",(Rresult[2][0])])
+ dataOverall.append(["Kappa",(Rresult[2][1])])
+ dataOverall.append(["Accuracy Lower",(Rresult[2][2])])
+ dataOverall.append(["Accuracy Upper",(Rresult[2][3])])
+ dataOverall.append(["Accuracy Null",(Rresult[2][4])])
+ dataOverall.append(["Accuracy P Value",(Rresult[2][5])])
+ dataOverall.append(["Mcnemar P Value",(Rresult[2][6])])
+
+ ResultOverall = { "data": {
+ "profile": "tabular-data-resource",
+ "data": dataOverall,
+ "name": "Overall Statistic Results",
+ "schema": {
+ "fields": [
+ {
+ "type": "text",
+ "name": "Statistic Name"
+ },
+ {
+ "type": "real",
+ "name": "Value"
+ }
+ ]
+ }
+ },
+ "type": "application/vnd.dataresource+json"
+ }
+ print "ResultOverall", ResultOverall
+ #####################################################
+
+ FieldClassNames = [
+ { "type": "text",
+ "name": "Statistic Name" }]
+ for i in range(len(classnames)):
+ FieldClassNames.append(
+ {
+ "type": "real",
+ "name": classnames[i] + " class"
+ })
+
+ DataClassNames = [["Sensitivity"],["Specificity"],["Pos Pred Value"],["Neg Pred Value"],["Precision"],["Recall"],
+ ["F1"],["Prevalence"],["Detection Rate"],["Detection Prevalence"],["Balanced Accuracy"]]
+
+ #Rresult[3] -->byClass statistics
+
+ i = 0
+ for k in range(len(DataClassNames)):
+ for l in range(len(classnames)):
+ if str(Rresult[3][i])!='nan' and str(Rresult[3][i])!='NA':
+ DataClassNames[k].append(Rresult[3][i])
+ else:
+ DataClassNames[k].append(None)
+ i = i + 1
+
+ ResultClassNames = {
+ "data": {
+ "profile": "tabular-data-resource",
+ "data": DataClassNames,
+ "name": "Statistic Results per Class",
+ "schema": {"fields": FieldClassNames}
+ },
+ "type": "application/vnd.dataresource+json"}
+
+ print "resultClassNames", ResultClassNames
+
+ yield (['statscolname'],['statsval'],)
+
+ a = json.dumps(ResultOverall)
+ #a = a.replace(' ','')
+ yield ("ResultOverall" , a)
+
+ b = json.dumps(ResultClassNames)
+ #b = b.replace(' ','')
+ yield ("ResultClassNames",b)
+
+
+def Source():
+ return functions.vtable.vtbase.VTGenerator(rconfusionmatrixtable)
+
+if not ('.' in __name__):
+ """
+ This is needed to be able to test the function, put it at the end of every
+ new function you create
+ """
+ import sys
+ import setpath
+ from functions import *
+ testfunction()
+ if __name__ == "__main__":
+ reload(sys)
+ sys.setdefaultencoding('utf-8')
+ import doctest
+ doctest.testmod()
diff --git a/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json b/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json
index 10d98d0ab..d04b7628c 100644
--- a/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json
+++ b/Exareme-Docker/src/mip-algorithms/CROSS_VALIDATION_K_FOLD/properties.json
@@ -51,7 +51,7 @@
"desc": "Number of pieces the dataset will be split",
"type": "other",
"value": "3",
- "valueMin": 1,
+ "valueMin": 2,
"valueNotBlank": true,
"valueMultiple": false,
"valueType": "integer"
diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql
index 0947a43ba..d460a892a 100644
--- a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql
+++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TESTING/global.template.sql
@@ -7,8 +7,24 @@ select iterationNumber, actualclass, predictedclass, sum(val) as val
from %{input_global_tbl}
group by actualclass,predictedclass;
+--drop table if exists defaultDB.lala;
+--create table defaultDB.lala as select * from global_oneconfusionmatrix;
+
var 'jsonResult' from select '{ "type": "application/json", "data": ' || componentresult || '}' from
( select tabletojson(actualclass,predictedclass,val, "actualclass,predictedclass,val",0) as componentresult
from global_oneconfusionmatrix );
-select '{"result": [' || '%{jsonResult}' || ']}';
+--var 'heatmap' from select highchartheatmap(actualclass,predictedclass,val,"confusion matrix", "actual values", "predicted values") from global_oneconfusionmatrix;
+
+var 'heatmap' from select * from (highchartheatmap title:Confusion_Matrix, xtitle:Actual_Values, ytitle:Predicted_Values select actualclass,predictedclass,val from global_oneconfusionmatrix);
+
+drop table if exists confusionmatrixstats;
+create temp table confusionmatrixstats as
+rconfusionmatrixtable select predictedclass,actualclass,val from global_oneconfusionmatrix;
+
+
+var 'a' from select statsval from confusionmatrixstats where statscolname = 'ResultOverall';
+var 'b' from select statsval from confusionmatrixstats where statscolname = 'ResultClassNames';
+
+
+select '{"result": [' || '%{jsonResult}' ||','||'%{heatmap}' ||','||'%{a}'||',' || '%{b}' ||']}';
diff --git a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql
index 7616a0cfe..dc7147c7e 100644
--- a/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql
+++ b/Exareme-Docker/src/mip-algorithms/NAIVE_BAYES_TRAINING/global.template.sql
@@ -76,6 +76,10 @@ from statistics
where colname in (select code from defaultDB.globalmetadatatbl where categorical= 0)
and colname <> '%{y}';
+
+select Naive_Bayes_Training_inputerrorchecking('%{y}',no) from (select count(distinct classval) as no from global_probabilities);
+
+
--select * from global_probabilities;
var 'jsonResult' from select '{ "type": "application/json", "data": ' || componentresult || ', "dbIdentifier": ' || '%{dbIdentifier}' || '}' from
( select tabletojson(colname,val,classval,average,sigma,probability, "colname,val,classval,average,sigma,probability",0) as componentresult