diff --git a/Tests/NerlnetFullFlowTest.sh b/Tests/NerlnetFullFlowTest.sh index 952ebd49..e1fb16fa 100755 --- a/Tests/NerlnetFullFlowTest.sh +++ b/Tests/NerlnetFullFlowTest.sh @@ -13,6 +13,7 @@ NERLNET_CONFIG_INPUT_DATA_DIR=$NERLNET_CONFIG_DIR/inputDataDir.nerlconfig NERLNET_CONFIG_INPUT_DATA_DIR_BACKUP=$NERLNET_CONFIG_DIR/inputDataDir.nerlconfig.bac TEST_INPUT_JSONS_FILES_DIR="$TESTS_PATH/inputJsonsFiles" +export TESTS_BASELINE="$TEST_INPUT_JSONS_FILES_DIR/accuracy_stats_synt_1d_2c_4r_4w.json" TEST_ARCH_JSON_NOIP_0=$TEST_INPUT_JSONS_FILES_DIR/arch_test_synt_1d_2c_1s_4r_4w.json.noip TEST_ARCH_JSON_0=$TEST_INPUT_JSONS_FILES_DIR/arch_test_synt_1d_2c_1s_4r_4w.json diff --git a/Tests/inputJsonsFiles/accuracy_stats_synt_1d_2c_4r_4w.json b/Tests/inputJsonsFiles/accuracy_stats_synt_1d_2c_4r_4w.json new file mode 100644 index 00000000..b1c3983c --- /dev/null +++ b/Tests/inputJsonsFiles/accuracy_stats_synt_1d_2c_4r_4w.json @@ -0,0 +1,166 @@ +{ + "w1": { + "0": { + "TN": 36860, + "FP": 4, + "FN": 0, + "TP": 12136, + "Accuracy": 0.9999183673469387, + "Balanced Accuracy": 0.9999457465277778, + "Precision": 0.999670510708402, + "Recall": 1.0, + "True Negative Rate": 0.9998914930555556, + "Informedness": 0.9998914930555556, + "F1": 0.9998352282089307 + }, + "1": { + "TN": 36498, + "FP": 208, + "FN": 0, + "TP": 12294, + "Accuracy": 0.9957551020408163, + "Balanced Accuracy": 0.9971666757478341, + "Precision": 0.9833626619740842, + "Recall": 1.0, + "True Negative Rate": 0.9943333514956683, + "Informedness": 0.9943333514956683, + "F1": 0.9916115502500404 + }, + "2": { + "TN": 24430, + "FP": 0, + "FN": 255, + "TP": 24315, + "Accuracy": 0.9947959183673469, + "Balanced Accuracy": 0.9948107448107448, + "Precision": 1.0, + "Recall": 0.9896214896214897, + "True Negative Rate": 1.0, + "Informedness": 0.9896214896214897, + "F1": 0.9947836759742252 + } + }, + "w2": { + "0": { + "TN": 36754, + "FP": 110, + "FN": 0, + "TP": 12136, + "Accuracy": 0.9977551020408163, + "Balanced Accuracy": 0.9985080295138888, + "Precision": 0.9910174750939083, + "Recall": 1.0, + "True Negative Rate": 0.9970160590277778, + "Informedness": 0.9970160590277777, + "F1": 0.9954884751045854 + }, + "1": { + "TN": 36413, + "FP": 293, + "FN": 139, + "TP": 12155, + "Accuracy": 0.9911836734693877, + "Balanced Accuracy": 0.9903556627496741, + "Precision": 0.9764620822622108, + "Recall": 0.9886936717097772, + "True Negative Rate": 0.9920176537895712, + "Informedness": 0.9807113254993483, + "F1": 0.9825398108479508 + }, + "2": { + "TN": 24430, + "FP": 0, + "FN": 251, + "TP": 24319, + "Accuracy": 0.9948775510204082, + "Balanced Accuracy": 0.9948921448921448, + "Precision": 1.0, + "Recall": 0.9897842897842898, + "True Negative Rate": 1.0, + "Informedness": 0.9897842897842897, + "F1": 0.994865920759271 + } + }, + "w3": { + "0": { + "TN": 36863, + "FP": 1, + "FN": 0, + "TP": 12136, + "Accuracy": 0.9999795918367347, + "Balanced Accuracy": 0.9999864366319444, + "Precision": 0.9999176073164703, + "Recall": 1.0, + "True Negative Rate": 0.9999728732638888, + "Informedness": 0.9999728732638888, + "F1": 0.9999588019610266 + }, + "1": { + "TN": 36365, + "FP": 341, + "FN": 1, + "TP": 12293, + "Accuracy": 0.9930204081632653, + "Balanced Accuracy": 0.9953143125909452, + "Precision": 0.9730093398765237, + "Recall": 0.9999186595087034, + "True Negative Rate": 0.9907099656731869, + "Informedness": 0.9906286251818903, + "F1": 0.986280487804878 + }, + "2": { + "TN": 24430, + "FP": 0, + "FN": 348, + "TP": 24222, + "Accuracy": 0.9928979591836735, + "Balanced Accuracy": 0.9929181929181929, + "Precision": 1.0, + "Recall": 0.9858363858363859, + "True Negative Rate": 1.0, + "Informedness": 0.9858363858363859, + "F1": 0.9928676832267586 + } + }, + "w4": { + "0": { + "TN": 36863, + "FP": 1, + "FN": 0, + "TP": 12136, + "Accuracy": 0.9999795918367347, + "Balanced Accuracy": 0.9999864366319444, + "Precision": 0.9999176073164703, + "Recall": 1.0, + "True Negative Rate": 0.9999728732638888, + "Informedness": 0.9999728732638888, + "F1": 0.9999588019610266 + }, + "1": { + "TN": 36339, + "FP": 367, + "FN": 1, + "TP": 12293, + "Accuracy": 0.9924897959183674, + "Balanced Accuracy": 0.9949601470594245, + "Precision": 0.9710110584518168, + "Recall": 0.9999186595087034, + "True Negative Rate": 0.9900016346101455, + "Informedness": 0.989920294118849, + "F1": 0.9852528652721007 + }, + "2": { + "TN": 24430, + "FP": 0, + "FN": 354, + "TP": 24216, + "Accuracy": 0.9927755102040816, + "Balanced Accuracy": 0.9927960927960928, + "Precision": 1.0, + "Recall": 0.9855921855921856, + "True Negative Rate": 1.0, + "Informedness": 0.9855921855921856, + "F1": 0.9927438199483458 + } + } +} \ No newline at end of file diff --git a/src_py/apiServer/apiServer.py b/src_py/apiServer/apiServer.py index fb5a3c4c..9db3cf76 100644 --- a/src_py/apiServer/apiServer.py +++ b/src_py/apiServer/apiServer.py @@ -252,223 +252,6 @@ def print_saved_experiments(self): for i, exp in enumerate(self.experiments, start=1): print(f"{i}) {exp.name}") - - - def plot_loss(self, expNum): - expForStats = self.experiments[expNum-1] - - # Create a new folder for to save an image of the plot: - if not os.path.exists(f'{EXPERIMENT_RESULTS_PATH}/{expForStats.name}/Training'): - os.mkdir(f'{EXPERIMENT_RESULTS_PATH}/{expForStats.name}/Training') - - - ####### THIS IS TO PICK ONLY A SPECIFIC SOURCE FOR PLOT: - - # numOfCsvs = len(expForStats.trainingResList) - # print(f"\nThe training phase contains {numOfCsvs} source CSVs:") - - # for i, csvRes in enumerate(expForStats.trainingResList, start=1): - # print(f"{i}) {csvRes.name}") - - # while True: - # print("\nPlease choose a CSV number for the plot (for multiple CSVs, seperate their numbers with ', '):", end = ' ') - # csvNumsStr = input() - - # try: - # csvNumsList = csvNumsStr.strip().split(',') - # csvNumsList = [int(csvNum) for csvNum in csvNumsList] - # except ValueError: - # print("\nIllegal Input") - # continue - - # if (all(csvNum > 0 and csvNum <= numOfCsvs for csvNum in csvNumsList)): # Check if all CSV indexes are in the correct range. - # break - # else: print("\nInvalid Input") - - # Draw the plot using Matplotlib: - plt.figure(figsize = (30,15), dpi = 150) - plt.rcParams.update({'font.size': 22}) - workers = [] - for csvRes in expForStats.trainingResList: - workers.extend(csvRes.workers) - for workerRes in csvRes.workersResList: - data = workerRes.resList - plt.plot(data, linewidth = 3) - - expTitle = (expForStats.name) - plt.title(f"Training - Loss Function - {expTitle}", fontsize=38) - plt.xlabel('Batch No.', fontsize = 30) - plt.ylabel('Loss (MSE)', fontsize = 30) - plt.yscale('log') - plt.xlim(left=0) - plt.ylim(bottom=0) - plt.legend(workers) - plt.grid(visible=True, which='major', linestyle='-') - plt.minorticks_on() - plt.grid(visible=True, which='minor', linestyle='-', alpha=0.7) - - plt.show() - fileName = globe.experiment_focused_on.name - plt.savefig(f'{EXPERIMENT_RESULTS_PATH}/{expForStats.name}/Training/{fileName}.png') - print(f'\n{fileName}.png was Saved...') - - def accuracy_matrix(self, normalizeEnabled = False): - expForStats = globe.experiment_focused_on - - # Choose the matching (to the original labeled CSV) CSV from the prediction results list: - - numOfCsvs = len(expForStats.predictionResList) - print(f"\nThe prediction phase contains {numOfCsvs} CSVs:") ## these are source unlabeled CSVs that need to be compared to test labeled CSVs - for i, csvRes in enumerate(expForStats.predictionResList, start=1): - print(f"{i}) {csvRes.name}: samples starting at {csvRes.indexOffset}") - - # while True: ####### FOR MULTIPLE CSVs - # print("\nPlease choose a CSV number for accuracy calculation and confusion matrix (for multiple CSVs, seperate their numbers with ', '):", end = ' ') - # csvNumsStr = input() - - # try: - # csvNumsList = csvNumsStr.split(', ') - # csvNumsList = [int(csvNum) for csvNum in csvNumsList] - - # except ValueError: - # print("\nIllegal Input") - # continue - - # # Check if all CSV indexes are in the correct range. - # if (all(csvNum > 0 and csvNum <= numOfCsvs for csvNum in csvNumsList)): break - # else: print("\nInvalid Input") - - ################### for single csv: - # while True: - # print("\nPlease enter the name of the FULL LABELED PREDICTION DATA (including .csv):", end = ' ') - # labelsCsvPath = input() - labelsCsvPath = f"{expForStats.predictionResList[0].name}_test.csv" - for root, _ , filenames in os.walk(self.input_data_path): - for filename in filenames: - if filename == labelsCsvPath: - labelsCsvPath = os.path.join(root, filename) - try: - labelsCsvDf = pd.read_csv(labelsCsvPath, header=None) - # break - - except OSError: print("\nInvalid path\n") - break - - - # Extract the labels columns from the CSV. Create a list of labels: - labelsLen = 1 - try: labelsLen = len(expForStats.expFlow["Labels"]) - finally: print(f"assuming {labelsLen} lables") - - try: labelNames = expForStats.expFlow["Labels"] ## get label names from experimnet JSON - except: labelNames = [str(i) for i in range(labelsLen)] #if no labels, set to 1,2,3.... - - labelsSeries = labelsCsvDf.iloc[:,-labelsLen:] ## this is the full list of only labels - - workersList = [] - workerNeuronRes = {} - for csvRes in expForStats.predictionResList: - for worker in csvRes.workers: - workerNeuronRes[worker] = None # each worker creates its own [predL, trueL] lists - workersList.append(worker) - - ## create a different confusion matrix for each label - - for sourceCSV in expForStats.predictionResList: #### TODO: match specific batch number of predict vector to test.csv of labels - - # Generate the samples' indexes from the results: - for worker in sourceCSV.workersResList: - predlabels = [[] for i in range(labelsLen)] - trueLabels = [[] for i in range(labelsLen)] - - for batchRes in worker.resList: - batchRanges = batchRes.indexRange # (startSampNum, endSampNum) - # print(f"testing sample ranges: {batchRanges}") - for ind, sample in enumerate(range(batchRanges[0], batchRanges[1])): - for label in range(labelsLen): - truelabel = str(labelsSeries.iloc[sample,label]) - assert truelabel == '0' or truelabel == '1', f"true label at {sample},{label} is {truelabel}" - if batchRes.predictions[ind][label] > 0.5 : - predlabel = '1' - else: - predlabel = '0' - # predlabel = str(round(batchRes.predictions[ind][label])) - trueLabels[label].append(truelabel) - predlabels[label].append(predlabel) - # print(f"for worker {worker.name} have true={trueLabels}, pred={predlabels}") - workerNeuronRes[worker.name] = (trueLabels, predlabels) - - # print(f"{worker.name} for sample #{sampleNum} predicted {batch.predictions[i][j]}, real is {labelsSeries.iloc[sampleNum+i,j]}") - - # # Create a confusion matrix based on the results: - - ################## THIS IS *NOT* FOR MULTICLASS DATA, but for multi-label data (output neurons are binary) - confMatList = {} - f, axes = plt.subplots(len(workersList), labelsLen, figsize=(globe.MATRIX_DISP_SCALING*labelsLen, globe.MATRIX_DISP_SCALING*len(workersList))) - for i, worker in enumerate(workersList): - confMatList[worker] = [[] for i in range(labelsLen)] - - for j in range(labelsLen): - # print(f"worker {worker}, has {len(workerNeuronRes[worker][TRUE_LABLE_IND])} labels, with {len(workerNeuronRes[worker][TRUE_LABLE_IND][j])} samples") - # print(f"confusion {worker}:{j}, has is of {workerNeuronRes[worker][TRUE_LABLE_IND][j]}, {workerNeuronRes[worker][PRED_LABLE_IND][j]}") - if normalizeEnabled == True : - confMatList[worker][j] = confusion_matrix(workerNeuronRes[worker][globe.TRUE_LABLE_IND][j], workerNeuronRes[worker][globe.PRED_LABLE_IND][j], normalize='all') - else: - confMatList[worker][j] = confusion_matrix(workerNeuronRes[worker][globe.TRUE_LABLE_IND][j], workerNeuronRes[worker][globe.PRED_LABLE_IND][j]) - # print(confMatList[worker][j]) - disp = ConfusionMatrixDisplay(confMatList[worker][j], display_labels=["X", labelNames[j]]) - disp.plot(ax=axes[i, j], colorbar=False) - disp.ax_.set_title(f'{worker}, class #{j}\nAccuracy={round(accuracy_score(workerNeuronRes[worker][globe.TRUE_LABLE_IND][j], workerNeuronRes[worker][globe.PRED_LABLE_IND][j]), 3)}') - if i < len(workersList) - 1: - disp.ax_.set_xlabel('') #remove "predicted label" - if j != 0: - disp.ax_.set_ylabel('') #remove "true label" - # disp.im_.colorbar.remove() #remove individual colorbars - - plt.subplots_adjust(wspace=1, hspace=0.15) ## adjust for spacing between matrix - f.colorbar(disp.im_, ax=axes) - plt.show() - - fileName = sourceCSV.name.rsplit('/', 1)[-1] # If the CSV name contains a path, then take everything to the right of the last '/'. - disp.figure_.savefig(f'{EXPERIMENT_RESULTS_PATH}/{expForStats.name}/Prediction/{fileName}.png') - print(f'\n{fileName}.png Saved...') - - ## print and save prediction stats - statFileName = f'{EXPERIMENT_RESULTS_PATH}/{expForStats.name}/Prediction/stats.txt' - if os.path.exists(statFileName): os.remove(statFileName) - statFile = open(statFileName, "a") - - for worker in confMatList: - for j, label in enumerate(confMatList[worker]): - # Calculate the accuracy and other stats: - tn, fp, fn, tp = label.ravel() - acc = (tp + tn) / (tp + tn + fp + fn) - ppv = tp / (tp + fp) - tpr = tp / (tp + fn) - tnr = tn / (tn + fp) - bacc = (tpr + tnr) / 2 - inf = tpr + tnr - 1 - - print(f"{worker}, class #{j}:") - print(f"Accuracy acquired (TP+TN / Tot): {round(acc*100, 3)}%.") - print(f"Balanced Accuracy (TPR+TNR / 2): {round(bacc*100, 3)}%.") - print(f"Positive Predictive Rate (Precision of P): {round(ppv*100, 3)}%.") - print(f"True Pos Rate (Sensitivity / Hit Rate): {round(tpr*100, 3)}%.") - print(f"True Neg Rate (Selectivity): {round(tnr*100, 3)}%.") - print(f"Informedness (of making decision): {round(inf*100, 3)}%.\n\n") - - statFile.write(f"{worker}, class #{j}:\n") - statFile.write(f"Accuracy acquired (TP+TN / Tot): {round(acc*100, 3)}%.\n") - statFile.write(f"Balanced Accuracy (TPR+TNR / 2): {round(bacc*100, 3)}%.\n") - statFile.write(f"Positive Predictive Rate (Precision of P): {round(ppv*100, 3)}%.\n") - statFile.write(f"True Pos Rate (Sensitivity / Hit Rate): {round(tpr*100, 3)}%.\n") - statFile.write(f"True Neg Rate (Selectivity): {round(tnr*100, 3)}%.\n") - statFile.write(f"Informedness (of making decision): {round(inf*100, 3)}%.\n") - print("=========================================================\n") - statFile.write("=========================================================\n") - statFile.close() - print(f'\nstats file saved...') - def communication_stats(self): self.transmitter.statistics() @@ -518,7 +301,7 @@ def export_results(self, expNum): print(f'{fileName}.csv Saved...') # change statistics from input to API - def statistics(self): + def statistics(self): # Deprecated? while True: print("\nPlease choose an experiment number:", end = ' ') expNum = input() diff --git a/src_py/apiServer/definitions.py b/src_py/apiServer/definitions.py index eafb6bfa..2c0621cb 100644 --- a/src_py/apiServer/definitions.py +++ b/src_py/apiServer/definitions.py @@ -1,5 +1,7 @@ import os - +import json +from pathlib import Path +from collections import OrderedDict # nerlconfig files NERLCONFIG_INPUT_DATA_DIR = "/usr/local/lib/nerlnet-lib/NErlNet/config/inputDataDir.nerlconfig" NERLCONFIG_JSONS_DIR = '/usr/local/lib/nerlnet-lib/NErlNet/config/jsonsDir.nerlconfig' @@ -51,4 +53,16 @@ def search_file(filename : str , rootdir : str) -> str: for root, _, files in os.walk(rootdir): if filename in files: return os.path.join(root, filename) - return None \ No newline at end of file + return None + +def export_dict_json(filepath : str , dict : OrderedDict): + Path(filepath).parent.mkdir(parents=True, exist_ok=True) + json_obj = json.dumps(dict, indent=4) + + # Writing to sample.json + with open(filepath, "w") as outfile: + outfile.write(json_obj) + +def import_dict_json(filepath : str): + with open(filepath, "r") as infile: + return json.load(infile , object_pairs_hook=OrderedDict) \ No newline at end of file diff --git a/src_py/apiServer/experiment_flow_debug.py b/src_py/apiServer/experiment_flow_debug.py index 22e7b6fb..54cd2839 100644 --- a/src_py/apiServer/experiment_flow_debug.py +++ b/src_py/apiServer/experiment_flow_debug.py @@ -38,7 +38,17 @@ def print_test(in_str : str): loss = exp_stats.get_loss() loss_min = exp_stats.get_loss_min() conf = exp_stats.get_confusion_matrices() -stats = exp_stats.get_accuracy_stats(conf) +acc_stats = exp_stats.get_accuracy_stats(conf , show=True , saveToFile=True) +for worker in acc_stats.keys(): + for j in acc_stats[worker].keys(): + print(f'{worker} class {j} F1 Score: {acc_stats[worker][j]["F1"]}') +baseline_acc_stats = import_dict_json('/home/guyperets/Desktop/NErlNet/Tests/inputJsonsFiles/accuracy_stats_synt_1d_2c_4r_4w.json') +diff_from_baseline = [] +for worker in acc_stats.keys(): + for j in acc_stats[worker].keys(): + diff = abs(acc_stats[worker][j]["F1"] - baseline_acc_stats[worker][str(j)]["F1"]) + diff_from_baseline.append(diff/baseline_acc_stats[worker][str(j)]["F1"]) + #print(f'Loss Dict: {loss}') #print(f'Loss Min Dict: {loss_min}') #print(f'Confusion Matrices: {conf}') @@ -49,18 +59,11 @@ def print_test(in_str : str): #api_server_instance.accuracy_matrix(1) #api_server_instance.statistics() -nerlnet_stop_cmd = RunCommand(NERLNET_RUN_STOP_SCRIPT, NERLNET_PATH) # stdout, stderr, rc = nerlnet_run_cmd.sync(NERLNET_RUNNING_TIMEOUT_SEC) # print_test(f'rc: {rc}') # if stderr: # print_test(stderr) # else: # print_test(stdout) -stdout, stderr, rc = nerlnet_stop_cmd.sync(0) -print_test(f'rc stop: {rc}') -if stderr: - print_test(stderr) -else: - print_test(stdout) # api_server_instance.stop() # TODO implement \ No newline at end of file diff --git a/src_py/apiServer/experiment_flow_test.py b/src_py/apiServer/experiment_flow_test.py index 1a60c5b4..12074f6e 100644 --- a/src_py/apiServer/experiment_flow_test.py +++ b/src_py/apiServer/experiment_flow_test.py @@ -11,6 +11,7 @@ def print_test(in_str : str): NERLNET_PATH = os.getenv('NERLNET_PATH') TESTS_PATH = os.getenv('TESTS_PATH') +TESTS_BASELINE = os.getenv('TESTS_BASELINE') NERLNET_RUN_SCRIPT = "./NerlnetRun.sh --run-mode release" NERLNET_RUN_STOP_SCRIPT = "./NerlnetRun.sh --run-mode stop" NERLNET_RUNNING_TIMEOUT_SEC = int(os.getenv('NERLNET_RUNNING_TIMEOUT_SEC')) @@ -45,17 +46,6 @@ def print_test(in_str : str): api_server_instance.predict() experiment_inst = api_server_instance.get_experiment(experiment_name) -exp_stats = Stats(experiment_inst) -data = exp_stats.get_loss_min() -print("min loss of each worker") -print(data) -#api_server_instance.statistics() TODO change statistics input requests to API! -# TODO validation of statistics with baseline - margin up to 10% - -#api_server_instance.plot_loss(1) -#api_server_instance.accuracy_matrix(1) -#api_server_instance.statistics() - nerlnet_stop_cmd = RunCommand(NERLNET_RUN_STOP_SCRIPT, NERLNET_PATH) stdout, stderr, rc = nerlnet_run_cmd.sync(NERLNET_RUNNING_TIMEOUT_SEC) print_test(f'rc: {rc}') @@ -70,4 +60,20 @@ def print_test(in_str : str): else: print_test(stdout) -# api_server_instance.stop() # TODO implement \ No newline at end of file +exp_stats = Stats(experiment_inst) +data = exp_stats.get_loss_min() +print("min loss of each worker") +print(data) + +conf = exp_stats.get_confusion_matrices() +acc_stats = exp_stats.get_accuracy_stats(conf) +baseline_acc_stats = import_dict_json(TESTS_BASELINE) +diff_from_baseline = [] +for worker in acc_stats.keys(): + for j in acc_stats[worker].keys(): + diff = abs(acc_stats[worker][j]["F1"] - baseline_acc_stats[worker][str(j)]["F1"]) + diff_from_baseline.append(diff/baseline_acc_stats[worker][str(j)]["F1"]) +anomaly_detected = not all([x < 0.01 for x in diff_from_baseline]) +if anomaly_detected: + exit(1) + diff --git a/src_py/apiServer/stats.py b/src_py/apiServer/stats.py index 9bb9def2..f4be4149 100644 --- a/src_py/apiServer/stats.py +++ b/src_py/apiServer/stats.py @@ -1,6 +1,7 @@ from collections import OrderedDict from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score import matplotlib.pyplot as plt +from datetime import datetime from pathlib import Path from experiment import Experiment import globalVars as globe @@ -13,8 +14,9 @@ def __init__(self, experiment : Experiment): Path(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}').mkdir(parents=True, exist_ok=True) Path(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Training').mkdir(parents=True, exist_ok=True) Path(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Prediction').mkdir(parents=True, exist_ok=True) - - def get_loss(self , plot : bool = False): + self.exp_path = f'{self.experiment.name}_{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}' + # TODO + def get_loss(self , plot : bool = False , saveToFile : bool = False): """ Returns a dictionary of {worker : loss list} for each worker in the experiment. use plot=True to plot the loss function. @@ -44,7 +46,7 @@ def get_loss(self , plot : bool = False): plt.savefig(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Training/Loss_graph.png') return loss_dict - def get_loss_min(self , plot : bool = False): + def get_loss_min(self , plot : bool = False , saveToFile : bool = False): """ Returns a dictionary of {worker : min loss} for each worker in the experiment. use plot=True to plot the min loss of each worker. @@ -69,7 +71,7 @@ def get_loss_min(self , plot : bool = False): plt.savefig(f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Training/Min_loss_graph.png') return min_loss_dict - def get_confusion_matrices(self , normalize : bool = False ,plot : bool = False): + def get_confusion_matrices(self , normalize : bool = False ,plot : bool = False , saveToFile : bool = False): """ Returns a dictionary of {worker : confusion matrix} for each worker in the experiment. use plot=True to plot the confusion matrix. @@ -110,52 +112,47 @@ def get_confusion_matrices(self , normalize : bool = False ,plot : bool = False) plt.show() return workers_confusion_matrices - def get_accuracy_stats(self , confMatDict , show : bool = False) -> dict: + def get_accuracy_stats(self , confMatDict , show : bool = False , saveToFile : bool = False) -> dict: """ Returns a dictionary of {worker : accuracy} for each worker in the experiment. """ - statFileName = f'{EXPERIMENT_RESULTS_PATH}/{self.experiment.name}/Prediction/stats.txt' - if os.path.exists(statFileName): - os.remove(statFileName) - statFile = open(statFileName, "a") - workers_accuracy = {worker : {} for worker in confMatDict.keys()} + workers_accuracy = OrderedDict() for worker in confMatDict.keys(): - for j, label in enumerate(confMatDict[worker]): - tn, fp, fn, tp = label.ravel() + workers_accuracy[worker] = OrderedDict() + for j, label_stats in enumerate(confMatDict[worker]): # Multi-Class + workers_accuracy[worker][j] = OrderedDict() + tn, fp, fn, tp = label_stats.ravel() + tn = int(tn) + fp = int(fp) + fn = int(fn) + tp = int(tp) acc = (tp + tn) / (tp + tn + fp + fn) - ppv = tp / (tp + fp) - tpr = tp / (tp + fn) + ppv = tp / (tp + fp) # Precision + tpr = tp / (tp + fn) # Recall tnr = tn / (tn + fp) bacc = (tpr + tnr) / 2 inf = tpr + tnr - 1 + f1 = 2 * (ppv * tpr) / (ppv + tpr) # F1-Score - workers_accuracy[worker]['TN'] = tn - workers_accuracy[worker]['FP'] = fp - workers_accuracy[worker]['FN'] = fn - workers_accuracy[worker]['TP'] = tp - workers_accuracy[worker]['Accuracy'] = acc - workers_accuracy[worker]['Balanced Accuracy'] = bacc - workers_accuracy[worker]['Positive Predictive Rate'] = ppv - workers_accuracy[worker]['True Positive Rate'] = tpr - workers_accuracy[worker]['True Negative Rate'] = tnr - workers_accuracy[worker]['Informedness'] = inf - - statFile.write(f"{worker}, class #{j}:\n") - statFile.write(f"Accuracy acquired (TP+TN / Tot): {round(acc*100, 3)}%.\n") - statFile.write(f"Balanced Accuracy (TPR+TNR / 2): {round(bacc*100, 3)}%.\n") - statFile.write(f"Positive Predictive Rate (Precision of P): {round(ppv*100, 3)}%.\n") - statFile.write(f"True Pos Rate (Sensitivity / Hit Rate): {round(tpr*100, 3)}%.\n") - statFile.write(f"True Neg Rate (Selectivity): {round(tnr*100, 3)}%.\n") - statFile.write(f"Informedness (of making decision): {round(inf*100, 3)}%.\n") + workers_accuracy[worker][j]['TN'] = tn + workers_accuracy[worker][j]['FP'] = fp + workers_accuracy[worker][j]['FN'] = fn + workers_accuracy[worker][j]['TP'] = tp + workers_accuracy[worker][j]['Accuracy'] = acc + workers_accuracy[worker][j]['Balanced Accuracy'] = bacc + workers_accuracy[worker][j]['Precision'] = ppv + workers_accuracy[worker][j]['Recall'] = tpr + workers_accuracy[worker][j]['True Negative Rate'] = tnr + workers_accuracy[worker][j]['Informedness'] = inf + workers_accuracy[worker][j]['F1'] = f1 - if show: - print(f"{worker}, class #{j}:") - print(f"Accuracy acquired (TP+TN / Tot): {round(acc*100, 3)}%.") - print(f"Balanced Accuracy (TPR+TNR / 2): {round(bacc*100, 3)}%.") - print(f"Positive Predictive Rate (Precision of P): {round(ppv*100, 3)}%.") - print(f"True Pos Rate (Sensitivity / Hit Rate): {round(tpr*100, 3)}%.") - print(f"True Neg Rate (Selectivity): {round(tnr*100, 3)}%.") - print(f"Informedness (of making decision): {round(inf*100, 3)}%.\n\n") - statFile.close() + if show: + print(f"{worker}, class #{j}:") + print(f"{workers_accuracy[worker][j]}\n") + + if saveToFile: + export_dict_json(f'{EXPERIMENT_RESULTS_PATH}/{self.exp_path}/accuracy_stats.json', workers_accuracy) + return workers_accuracy + diff --git a/src_py/apiServer/transmitter.py b/src_py/apiServer/transmitter.py index f6e4d3e2..fa6265e2 100644 --- a/src_py/apiServer/transmitter.py +++ b/src_py/apiServer/transmitter.py @@ -66,34 +66,20 @@ def updateCSV(self, phase): # currentPhase is either "Training", "Prediction" or break SourceData = [] - if globe.CSVsplit == 2: ## send entire file to sources - linesPerSource = 0 - - for source in globe.experiment_focused_on.expFlow[currentPhase]: # Itterate over sources in accordance to current phase - sourceName = source['source name'] - workersUnderSource = source['workers'] - try: epochs = 1 if currentPhase == "Prediction" else globe.components.sourceEpochs[sourceName] - except: epochs = 1 - response = requests.post(self.updateCSVAddress, data=f'{sourceName}#{workersUnderSource}#{epochs}#{csvfile}') - - else: ## split file and send to sources - linesPerSource = int(len(csvfile)/len(globe.components.sources)) - for row in range(0,len(csvfile),linesPerSource): - SourceData.append(csvfile[row:row+linesPerSource]) - - for i,source in enumerate(globe.experiment_focused_on.expFlow[currentPhase]): # Itterate over sources in accordance to current phase - sourceName = source['source name'] - workersUnderSource = source['workers'] - SourceStr = "" - for Line in SourceData[i]: - SourceStr += Line - - try: epochs = 1 if currentPhase == "Prediction" else globe.components.sourceEpochs[sourceName] - except: epochs = 1 - - dataStr = f'{sourceName}#{workersUnderSource}#{epochs}#{SourceStr}' - - response = requests.post(self.updateCSVAddress, data=dataStr) + linesPerSource = int(len(csvfile)/len(globe.components.sources)) + for row in range(0,len(csvfile),linesPerSource): + SourceData.append(csvfile[row:row+linesPerSource]) + + for i,source in enumerate(globe.experiment_focused_on.expFlow[currentPhase]): # Itterate over sources in accordance to current phase + sourceName = source['source name'] + workersUnderSource = source['workers'] + + try: epochs = 1 if currentPhase == "Prediction" else globe.components.sourceEpochs[sourceName] + except: epochs = 1 + + dataStr = f'{sourceName}#{workersUnderSource}#{epochs}#{SourceData[i]}' + + response = requests.post(self.updateCSVAddress, data=dataStr) LOG_INFO("Data sent to sources")