diff --git a/GUI/GUIFunctions.py b/GUI/GUIFunctions.py index df1a0ec..0b31e0e 100644 --- a/GUI/GUIFunctions.py +++ b/GUI/GUIFunctions.py @@ -64,13 +64,16 @@ def csvParaExtract(file_name,para_name): else: mavlink_data[typ][param].append(float(row[mavlink_index[typ][param]-1])) data = [] + count = 0 for typ in range(len(mavlink_types)): for param in range(len(mavlink_param[typ])): if mavlink_param[typ][param] == para_name: - for i in range(len(mavlink_data[typ][param])): - data.append(mavlink_data[typ][param][i]) - rate = mavlink_rate[typ] - mav_type = mavlink_types[typ] + if count == 0: + for i in range(len(mavlink_data[typ][param])): + data.append(mavlink_data[typ][param][i]) + rate = mavlink_rate[typ] + count = count + 1 + mav_type = mavlink_types[typ] print("Done collecting "+para_name+" data!") return data,rate,mav_type diff --git a/__pycache__/k_fold_cv.cpython-37.pyc b/__pycache__/k_fold_cv.cpython-37.pyc index 9ba7768..f246560 100644 Binary files a/__pycache__/k_fold_cv.cpython-37.pyc and b/__pycache__/k_fold_cv.cpython-37.pyc differ diff --git a/find_best_k.py b/find_best_k.py new file mode 100644 index 0000000..1833446 --- /dev/null +++ b/find_best_k.py @@ -0,0 +1,89 @@ + +# -*- coding: utf-8 -*- +""" +A function to see the best k for each parameter + +@author: Jeff Xie +""" +import matplotlib.pyplot as plt +import numpy as np +import time +from knndtw import KnnDtw +from knndtw import ProgressBar +from scipy import stats + +from k_fold_cv import k_fold_cross_val +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.metrics import precision_recall_fscore_support as score + +def find_best_k(param_list,k_range,warp_val,datapath,folds): + start_time = time.time() + param_k = [] + for dataparam in param_list: + trainingdatafile = datapath + 'train_' + dataparam + '.txt' + traininglabelfile = datapath + 'train_labels.txt' + + testdatafile = datapath + 'test_' + dataparam + '.txt' + testlabelfile = datapath + 'test_labels.txt' + + # Open training data file, x:data, y:label + x_train_file = open(trainingdatafile, 'r') + y_train_file = open(traininglabelfile, 'r') + + #Open test data file, x:data, y:label + x_test_file = open(testdatafile, 'r') + y_test_file = open(testlabelfile, 'r') + + + # Create empty lists + x_train = [] + y_train = [] + x_test = [] + y_test = [] + + # Mapping table for classes + labels = {1:'Hover', 2:'Impact (Front Left)', 3:'Impact (Front Right)', 4:'Impact (Back Left)', 5:'Impact (Back Right)', + 6:'Gust (from Left)', 7:'Gust (from Right)', 8: 'Gust (from front)' } + + i = 0 + # Loop through datasets + for x in x_train_file: + x_train.append([float(ts) for ts in x.split()]) + for y in y_train_file: + y_train.append(int(y.rstrip('\n'))) + + for x in x_test_file: + x_test.append([float(ts) for ts in x.split()]) + + for y in y_test_file: + y_test.append(int(y.rstrip('\n'))) + + + + #close data files + x_train_file.close() + y_train_file.close() + x_test_file.close() + y_test_file.close() + + # Convert to numpy for efficienc + x_train = np.array(x_train) + y_train = np.array(y_train) + x_test = np.array(x_test) + y_test = np.array(y_test) + + k_best = k_fold_cross_val(k_range,x_train,y_train,folds) + param_k.append((dataparam,k_best)) + + + print("k-fold cross val results for the included paramters") + for kv in param_k: + print(kv[0],": ",kv[1]) + print("--- %s seconds ---" % (time.time() - start_time)) #let's see how long this takes... +#Testing +plist1 = ['mavlink_raw_imu_t_Xaccel','mavlink_raw_imu_t_Yaccel','mavlink_raw_imu_t_Zaccel','mavlink_raw_imu_t_XGyro','mavlink_raw_imu_t_YGyro','mavlink_raw_imu_t_ZGyro'] +plist2 = ['mavlink_attitude_t_pitch angle','mavlink_attitude_t_roll angle','mavlink_attitude_t_yaw angle','mavlink_attitude_t_pitch rate','mavlink_attitude_t_yaw rate','mavlink_attitude_t_roll rate'] +plist3 = ['mavlink_raw_imu_t_XMag','mavlink_raw_imu_t_YMag','mavlink_raw_imu_t_ZMag','mavlink_vibration_t_vibration_x','mavlink_vibration_t_vibration_y','mavlink_vibration_t_vibration_z'] +p_all = plist1+plist2+plist3 +k_list = list(range(1,6)) +find_best_k(p_all,k_list,100,'Data4/',5) \ No newline at end of file diff --git a/k_fold_cv.py b/k_fold_cv.py index fe63752..b42c9b9 100644 --- a/k_fold_cv.py +++ b/k_fold_cv.py @@ -23,6 +23,7 @@ def k_fold_cross_val(k_list,train,label,folds): + #Randomly shuffle the data and label in to the same sequence seed = np.arange(train.shape[0]) np.random.shuffle(seed) @@ -32,7 +33,8 @@ def k_fold_cross_val(k_list,train,label,folds): k_scores = [] #averaged scores for each k value, num of scores = num of K #we want to split train data into test and train - label_name = {1:'Hover', 2:'Impact (tapping)', 3:'Wind'} + label_name = {1:'Hover', 2:'Impact (Front Left)', 3:'Impact (Front Right)', 4:'Impact (Back Left)', 5:'Impact (Back Right)', + 6:'Gust (from Left)', 7:'Gust (from Right)', 8: 'Gust (from front)' } clf = KnnDtw(n_neighbors=1, max_warping_window=100) #Initialize classifier kf = KFold(n_splits=folds) kf.get_n_splits(train) @@ -54,8 +56,11 @@ def k_fold_cross_val(k_list,train,label,folds): score = np.average(scores) #averages the fold scores to a single socre for the k k_scores.append(score) #Plot the average accuracy score for each k, recommend a besk (highest accuracy) k + k_best = k_list[np.argmax(k_scores)] + plt.bar(k_list, k_scores,width=0.2) plt.xlabel('k (nearest neighbors)') plt.ylabel('Accuracy (average)') plt.xticks(k_list) - print('Best k value from list is:',k_list[np.argmax(k_scores)]) \ No newline at end of file + print('Best k value from list is:',k_best) + return k_best \ No newline at end of file diff --git a/param_ranking.py b/param_ranking.py new file mode 100644 index 0000000..e968341 --- /dev/null +++ b/param_ranking.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +""" +A function to rank parameter based on precision (ability to not label a negative) + +@author: Jeff Xie +""" +import matplotlib.pyplot as plt +import numpy as np +import time +from knndtw import KnnDtw +from knndtw import ProgressBar +from scipy import stats + +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.metrics import precision_recall_fscore_support as score + +def param_ranking(param_list,k_val,warp_val,datapath,avg_type): + start_time = time.time() + p = [] + r = [] + f = [] + for dataparam in param_list: + trainingdatafile = datapath + 'train_' + dataparam + '.txt' + traininglabelfile = datapath + 'train_labels.txt' + + testdatafile = datapath + 'test_' + dataparam + '.txt' + testlabelfile = datapath + 'test_labels.txt' + + # Open training data file, x:data, y:label + x_train_file = open(trainingdatafile, 'r') + y_train_file = open(traininglabelfile, 'r') + + #Open test data file, x:data, y:label + x_test_file = open(testdatafile, 'r') + y_test_file = open(testlabelfile, 'r') + + + # Create empty lists + x_train = [] + y_train = [] + x_test = [] + y_test = [] + + # Mapping table for classes + labels = {1:'Hover', 2:'Impact (Front Left)', 3:'Impact (Front Right)', 4:'Impact (Back Left)', 5:'Impact (Back Right)', + 6:'Gust (from Left)', 7:'Gust (from Right)', 8: 'Gust (from front)' } + + i = 0 + # Loop through datasets + for x in x_train_file: + x_train.append([float(ts) for ts in x.split()]) + for y in y_train_file: + y_train.append(int(y.rstrip('\n'))) + + for x in x_test_file: + x_test.append([float(ts) for ts in x.split()]) + + for y in y_test_file: + y_test.append(int(y.rstrip('\n'))) + + + + #close data files + x_train_file.close() + y_train_file.close() + x_test_file.close() + y_test_file.close() + + + # Convert to numpy for efficiency + + + x_train = np.array(x_train) + y_train = np.array(y_train) + x_test = np.array(x_test) + y_test = np.array(y_test) + + m = KnnDtw(n_neighbors=k_val, max_warping_window=warp_val) + m.fit(x_train, y_train) + label, proba = m.predict(x_test) + + precision, recall, f_score, _ = score(y_test,label,average=avg_type) + p.append(precision) + r.append(recall) + f.append(f_score) + + precision_rank = sorted(list(zip(param_list,p)),key=lambda x: x[1]) + recall_rank = sorted(list(zip(param_list,r)),key=lambda x: x[1]) + fscore_rank = sorted(list(zip(param_list,f)),key=lambda x: x[1]) + #("Parameter rank by precision is:",precision_rank) + print('Ranking for k = %s, max warping window = %s' %(k_val,warp_val)) + for rank in precision_rank[::-1]: + print(rank[0],": ",rank[1]) + #print("Parameter rank by recall is:",recall_rank) + #print("Parameter rank by f-score is:",fscore_rank) + print("--- %s seconds ---" % (time.time() - start_time)) #let's see how long this takes... +#Testing +plist1 = ['mavlink_raw_imu_t_Xaccel','mavlink_raw_imu_t_Yaccel','mavlink_raw_imu_t_Zaccel','mavlink_raw_imu_t_XGyro','mavlink_raw_imu_t_YGyro','mavlink_raw_imu_t_ZGyro'] +plist2 = ['mavlink_attitude_t_pitch angle','mavlink_attitude_t_roll angle','mavlink_attitude_t_yaw angle','mavlink_attitude_t_pitch rate','mavlink_attitude_t_yaw rate','mavlink_attitude_t_roll rate'] +plist3 = ['mavlink_raw_imu_t_XMag','mavlink_raw_imu_t_YMag','mavlink_raw_imu_t_ZMag','mavlink_vibration_t_vibration_x','mavlink_vibration_t_vibration_y','mavlink_vibration_t_vibration_z'] +p_all = plist1+plist2+plist3 +param_ranking(p_all,1,100,'Data6/','weighted') +#param_ranking(p_all,1,100,'Data4/','weighted') \ No newline at end of file