diff --git a/OneShotFeatureGenerator.py b/OneShotFeatureGenerator.py
index 5a4effb..6707d96 100644
--- a/OneShotFeatureGenerator.py
+++ b/OneShotFeatureGenerator.py
@@ -56,10 +56,10 @@ def _autoencode(features):
     #    decoder_layer = autoencoder.layers[-1]
     #   decoder = Model(encoded_input, decoder_layer(encoded_input))
 
-    autoencoder.compile(optimizer='adam', loss='MSE')
+    autoencoder.compile(optimizer='adadelta', loss='MSE')
 
     autoencoder.fit(features, features,
-                    epochs=20,
+                    epochs=20, #tried 20
                     batch_size=256,
                     shuffle=True, verbose=False)
 
@@ -269,19 +269,19 @@ def _count_action_for_voter(self, action, voter_df):
 
         return action_counter
 
-    def _generate_A_ratios(self, df, X_train, y_train ,voter):
+    def _generate_A_ratios(self, df, X_train, y_train ,voter_index):
         """Generate A ratios - That is TRT-ratio, CMP-ratio, WLB-ratio, SLB-ratio, DOM-ratio
             Action is in {TRT,DLB,SLB,WLB,CMP,DOM}
             Scenario is in {A,B,C,D,E,F}
         """
 
-        voter_df = pd.concat([X_train.loc[X_train['VoterID'] == voter.VoterID,] , y_train], axis=1, join='inner')
+        voter_df = pd.concat([X_train.loc[X_train.index & voter_index], y_train], axis=1, join='inner')
 
         for action in self._get_actions():
             availability_counter = np.count_nonzero([x[1].Scenario in self._get_scenarios_by_action(action) for x in voter_df.iterrows()])
             action_counter = self._count_action_for_voter(action, voter_df)
-            df.loc[df['VoterID'] == voter.VoterID, action + '-ratio'] = float(action_counter/availability_counter if availability_counter > 0 else 0)
-            df.loc[df['VoterID'] == voter.VoterID, action + '-counter'] = float(action_counter)
+            df.loc[voter_index, action + '-ratio'] = float(action_counter/availability_counter if availability_counter > 0 else 0)
+            df.loc[voter_index, action + '-counter'] = float(action_counter)
 
         return df
 
@@ -302,22 +302,29 @@ def _generate_voter_type(self, df):
 
 
 
-    def _generate_feature_aggregation_class_dependant(self, df, X_train, y_train, scenarios, voter, feature_name, aggregation_func):
+    def _generate_feature_aggregation_class_dependant(self, df, X_train, y_train, scenarios, voter_index, feature_name, aggregation_func):
 
         X = df
-        X_train, y_train = X_train.loc[X_train['Scenario'].isin(scenarios)], y_train.loc[X_train['Scenario'].isin(scenarios)]
+        #X_train, y_train = X_train.loc[X_train['Scenario'].isin(scenarios)], y_train.loc[X_train['Scenario'].isin(scenarios)]
 
         #X_train, y_train = X_train, y_train #X.drop([self.target_index], axis=1),X[self.target_index]
 
-        for action in range(1, self.n_candidates + 1):
-            actioni_list = [float(x[1][feature_name]) for x in
-                            X_train.loc[(X_train['VoterID'] == voter.VoterID) & (y_train == action)].iterrows()]
-            if len(actioni_list) > 0:
-                X.loc[X['VoterID'] == voter.VoterID, feature_name + '_action'+ str(action) + '_' + aggregation_func.__name__] = aggregation_func(
-                    actioni_list)
+
+
+
+        voter_train = X_train.loc[X_train.index & voter_index]
+        voter_train = voter_train.loc[voter_train["Scenario"].isin(scenarios)]
+        voter_targets = y_train.loc[voter_train.index]
+        if len(voter_train) > 0:
+            for action in range(1, self.n_candidates + 1):
+                actioni_list = [float(x[1][feature_name]) for x in
+                                voter_train.loc[voter_targets == action,:].iterrows()]
+                if len(actioni_list) > 0:
+                    X.loc[voter_index, feature_name + '_action'+ str(action) + '_' + aggregation_func.__name__] = aggregation_func(
+                        actioni_list)
         return X
 
-    def _generate_action_aggregation_features(self, df, X_train, y_train, voter):
+    def _generate_action_aggregation_features(self, df, X_train, y_train, voter_index):
         X = df
 
         aggregators = [np.average, np.std, np.median]
@@ -325,17 +332,17 @@ def _generate_action_aggregation_features(self, df, X_train, y_train, voter):
 
         scenarios = self._get_scenarios_by_actions(self._get_strategic_actions())
 
-        X_train, y_train = X_train.loc[X_train['Scenario'].isin(scenarios)], y_train.loc[X_train['Scenario'].isin(scenarios)]
-
-        voter_train = X_train.loc[(X_train['VoterID'] == voter.VoterID)]
+        voter_train = X_train.loc[X_train.index & voter_index]
+        voter_train = voter_train.loc[voter_train["Scenario"].isin(scenarios)]
+        voter_targets = y_train.loc[voter_train.index]
 
         for aggregation_func in aggregators:
-            X.loc[X['VoterID'] == voter.VoterID, feature_name + "_" + aggregation_func.__name__] = aggregation_func(
-                [float(y_train[x[0]]) for x in voter_train.iterrows()])
+            X.loc[voter_index, feature_name + "_" + aggregation_func.__name__] = aggregation_func(
+                [float(voter_targets[x[0]]) for x in voter_train.iterrows()])
 
         return X
 
-    def _generate_gaps_features(self, df, X_train, y_train, voter):
+    def _generate_gaps_features(self, df, X_train, y_train, voter_index):
         X = df
 
         features = self._get_gap_pref_features()
@@ -344,7 +351,7 @@ def _generate_gaps_features(self, df, X_train, y_train, voter):
 
         for aggregator in aggregators:
             for feature in features:
-                X = self._generate_feature_aggregation_class_dependant(X, X_train, y_train, scenarios, voter, feature, aggregator)
+                X = self._generate_feature_aggregation_class_dependant(X, X_train, y_train, scenarios, voter_index, feature, aggregator)
 
         return X
 
@@ -369,18 +376,19 @@ def _dynamic_feature_generation(self, df, X_train, y_train):
         a_ratio_columns, gaps_columns = [], []
         all_voters = pd.DataFrame(X["VoterID"].drop_duplicates())
         for voter in all_voters.iterrows():
+            voter_index = X.loc[X['VoterID'] == voter[1].VoterID,].index
             before_columns = len(X.columns)
-            X = self._generate_A_ratios(X, X_train, y_train, voter[1])
+            X = self._generate_A_ratios(X, X_train, y_train, voter_index)
             if len(a_ratio_columns) == 0:
                 a_ratio_columns = list(range(before_columns, len(X.columns)))
 
             before_columns = len(X.columns)
-            X = self._generate_gaps_features(X, X_train, y_train, voter[1])
+            X = self._generate_gaps_features(X, X_train, y_train, voter_index)
             if len(gaps_columns) == 0:
                 gaps_columns = list(range(before_columns, len(X.columns)))
 
 
-            X = self._generate_action_aggregation_features(X, X_train, y_train, voter[1])
+            X = self._generate_action_aggregation_features(X, X_train, y_train, voter_index)
 
         # Gaps features encoding
         X = X.fillna(
@@ -402,31 +410,31 @@ def _dynamic_feature_generation(self, df, X_train, y_train):
 
         normalized_gap_fs = pd.DataFrame(preprocessing.normalize(OneShotDataPreparation._prepare_dataset(X.iloc[:, total_gaps_columns])))
 
-        #Try auto encode each voter separately
-        # encoded_gap_fs = pd.DataFrame()
-        #
-        # for voter in all_voters.iterrows():
-        #     voter_index = X.loc[X['VoterID'] == voter[1].VoterID].index
-        #     voter_encoded_gap_fs = pd.DataFrame(_autoencode(normalized_gap_fs.iloc[voter_index.tolist(),:]))
-        #     voter_encoded_gap_fs.index = voter_index
-        #
-        #     # aggregate results
-        #     if len(encoded_gap_fs) == 0:
-        #         encoded_gap_fs = pd.DataFrame(voter_encoded_gap_fs)
-        #     else:
-        #         encoded_gap_fs = pd.concat([encoded_gap_fs, pd.DataFrame(voter_encoded_gap_fs)])
-        #
-        # encoded_gap_fs = pd.DataFrame(encoded_gap_fs)
-        #
-        # X = pd.concat([X, encoded_gap_fs], axis=1, join='inner')
-
-
-
         encoded_gap_fs = pd.DataFrame(_autoencode(normalized_gap_fs))
 
+        encoded_gap_fs.index = X.index
+        X = pd.concat([X, encoded_gap_fs], axis=1, join='inner')
 
 
-        X = pd.concat([X, encoded_gap_fs], axis=1, join='inner')
+        # #Try auto encode each voter separately
+        # # encoded_gap_fs = pd.DataFrame()
+        # #
+        # # for voter in all_voters.iterrows():
+        # #     voter_index = X.loc[X['VoterID'] == voter[1].VoterID].index
+        # #     voter_encoded_gap_fs = pd.DataFrame(_autoencode(normalized_gap_fs.iloc[voter_index.tolist(),:]))
+        # #     voter_encoded_gap_fs.index = voter_index
+        # #
+        # #     # aggregate results
+        # #     if len(encoded_gap_fs) == 0:
+        # #         encoded_gap_fs = pd.DataFrame(voter_encoded_gap_fs)
+        # #     else:
+        # #         encoded_gap_fs = pd.concat([encoded_gap_fs, pd.DataFrame(voter_encoded_gap_fs)])
+        # #
+        # # encoded_gap_fs = pd.DataFrame(encoded_gap_fs)
+        # #
+        # # X = pd.concat([X, encoded_gap_fs], axis=1, join='inner')
+        #
+
 
         #X = X.drop(X.columns[gaps_columns + gaps_dif_columns], axis=1)
 
@@ -439,29 +447,15 @@ def _dynamic_feature_generation(self, df, X_train, y_train):
         # plt.show()
 
         # Correlation with output variable
-        cor_target = abs(pd.concat([X.loc[X_train.index].drop(["Action"],axis=1), y_train], axis=1, join='inner').corr()["Action"])
-        # Selecting highly correlated features
-        relevant_features = cor_target[cor_target > 0.4]
-        print(relevant_features)
-
-        cols = list(X.columns)
-        model = RandomForestRegressor(random_state=1)
-        # Initializing RFE model
-        rfe = RFE(model, 20)
-        # Transforming data using RFE
-        #data_trans = X.loc[X_train.index].fillna( X.loc[X_train.index].mean())
-        #OneShotDataPreparation._prepare_dataset(X["VoterType"])
-        #OneShotDataPreparation._prepare_dataset(X["Scenario_type"])
-        X_rfe = rfe.fit_transform(OneShotDataPreparation._prepare_dataset(X.loc[[x in X_train.index for x in X.index.tolist()]]), y_train)
-        # Fitting the data to model
-        model.fit(X_rfe, y_train)
-        temp = pd.Series(rfe.support_, index=cols)
-        selected_features_rfe = temp[temp == True].index
-        X = X.drop(X.columns[[not (x in selected_features_rfe) for x in X.columns]].tolist(), axis=1)
-        print(selected_features_rfe)
+        # cor_target = abs(pd.concat([X.loc[X_train.index], y_train], axis=1, join='inner').corr()["Action"])
+        # # Selecting highly correlated features
+        # relevant_features = cor_target[cor_target > 0.4]
+        # print(relevant_features)
+        #
+
 
         return X
-RandomForestRegressor
+
 
 
 
diff --git a/OneShot_NewAnalysis_N4.py b/OneShot_NewAnalysis_N4.py
index 767c626..3fbd7ae 100644
--- a/OneShot_NewAnalysis_N4.py
+++ b/OneShot_NewAnalysis_N4.py
@@ -32,6 +32,7 @@
 from ExpertModels import DecisionTreeBaseline
 from sklearn.ensemble import ExtraTreesClassifier
 from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.linear_model import LinearRegression
 
 from sklearn.model_selection import train_test_split
 
@@ -73,12 +74,13 @@ def _get_k_folds(X,k):
             folds.append(X.iloc[test_indices].RoundIndex)
     return folds
 
-def _select_features(features_train, targets_train, features_ext_df):
+def _features_importance(features_ext_df, features_train, targets_train):
     #feature importance
     feature_importance = pd.DataFrame()
 
     rf_for_fs = RandomForestClassifier(n_estimators=100)
-    rf_for_fs.fit(X=features_train.values, y=targets_train)
+    transformed_features_train = OneShotDataPreparation._prepare_dataset(features_ext_df.loc[features_train.index, :])
+    rf_for_fs.fit(X=transformed_features_train.values, y=targets_train)
     current_feature_importances = pd.DataFrame(rf_for_fs.feature_importances_,
                                                index=features_ext_df.columns,
                                                columns=['importance']).sort_values('importance',
@@ -89,11 +91,30 @@ def _select_features(features_train, targets_train, features_ext_df):
         feature_importance['importance'] = feature_importance['importance'] + current_feature_importances['importance']
 
     feature_importance['importance_percentage'] = feature_importance['importance']/np.max(feature_importance['importance'])
-    selected_comlumns = feature_importance.iloc[[feature_importance['importance_percentage']>0.2],].index.tolist()
-    return selected_comlumns
 
+    return feature_importance
 
-def _evaluation(raw_data, clfs, target, folds, scenario_filter, action_table_df, scenarios_df,n_candidates = 3):
+
+def _select_features(X, X_train, y_train, top_k=25):
+    cols = list(X.columns)
+    model = RandomForestClassifier(n_estimators=100, random_state=1)
+    # Initializing RFE model
+    rfe = RFE(model, top_k)
+    # Transforming data using RFE
+    # data_trans = X.loc[X_train.index].fillna( X.loc[X_train.index].mean())
+    # OneShotDataPreparation._prepare_dataset(X["VoterType"])
+    # OneShotDataPreparation._prepare_dataset(X["Scenario_type"])
+    X_rfe = rfe.fit_transform(OneShotDataPreparation._prepare_dataset(X.loc[X_train.index, :]), y_train)
+    # Fitting the data to model
+    model.fit(X_rfe, y_train)
+    temp = pd.Series(rfe.support_, index=cols)
+    selected_features_rfe = temp[temp == True].index
+
+    print(selected_features_rfe)
+
+    return selected_features_rfe
+
+def _evaluation(raw_data, clfs, target, folds, scenario_filter, action_table_df, scenarios_df, n_candidates = 3):
     data = raw_data.copy()
     data = data.drop(["Vote"], axis=1)
 
@@ -109,6 +130,9 @@ def _evaluation(raw_data, clfs, target, folds, scenario_filter, action_table_df,
 
     prediction = pd.DataFrame(np.matrix([]))
 
+    features_importance = pd.DataFrame(np.matrix([]))
+    selected_features = pd.DataFrame(np.matrix([]))
+
     features_train = pd.DataFrame()
     # 10 fold cross validation
     for i in range(0,len(folds)):
@@ -121,21 +145,51 @@ def _evaluation(raw_data, clfs, target, folds, scenario_filter, action_table_df,
             test_indices = data.index.tolist()
             train_indices = data.index.tolist()
         else:
-            test_indices = data.index[[x[1].RoundIndex in folds[i].tolist() for x in data.iterrows()]].tolist()
+            test_indices = data.index[[(x[1].RoundIndex in folds[i].tolist()) for x in data.iterrows()]].tolist()
             train_indices = data.index[[not (x[1].RoundIndex in folds[i].tolist()) for x in data.iterrows()]].tolist()
-         # Feature Generation
-        features_train = features_df.loc[[ii for ii in train_indices],]
-        targets_train = target_df[[ii for ii in train_indices]]
+        # Feature Generation
+        features_train = features_df.loc[train_indices]
+        targets_train = target_df[train_indices]
         features_ext_df = oneshot_dyn_fg._dynamic_feature_generation(features_df, features_train, targets_train)
 #        features_ext_df = features_ext_df.drop(["Vote"], axis=1)
+
+        # Feature Selection
+        selected_features_rfe = _select_features(features_ext_df, features_train, targets_train)
+        current_selected_features = pd.DataFrame(selected_features_rfe)
+        current_selected_features.loc[:, "FOLD"] = str(i+1)
+        if len(selected_features) == 0:
+            selected_features = current_selected_features
+        else:
+            selected_features = pd.concat([selected_features, current_selected_features])
+
+        baseline_set = features_ext_df.loc[:, ["Scenario", "VoterType"]]
+
+        features_ext_df = features_ext_df.drop(
+            features_ext_df.columns[[not (x in selected_features_rfe) for x in
+                                     features_ext_df.columns]].tolist(),
+            axis=1)
+
+
+        #Feature Importance
+        current_feature_importance = _features_importance(features_ext_df, features_train, targets_train)
+        current_feature_importance.loc[:, "FOLD"] = str(i+1)
+        if len(features_importance) == 0:
+            features_importance = current_feature_importance
+        else:
+            features_importance = pd.concat([features_importance, current_feature_importance])
+
+
         # encoding the dataframes
         features_encoded_df = OneShotDataPreparation._prepare_dataset(features_ext_df.copy())
+        features_encoded_df.index = data.index
+
         target_encoded_df = target_df
         # make training and testing datasets
-        features_train = features_encoded_df.loc[[ii for ii in train_indices],]
-        features_test = features_encoded_df.loc[[ii for ii in test_indices],]
-        targets_train = target_encoded_df[[ii for ii in train_indices]]
-        targets_test = target_encoded_df[[ii for ii in test_indices]]
+        features_train = features_encoded_df.loc[train_indices]
+        features_test = features_encoded_df.loc[test_indices]
+        targets_train = target_encoded_df[train_indices]
+        targets_test = target_encoded_df[test_indices]
+
 
         # select features
         #selected_columns = _select_features(features_train, targets_train, features_ext_df)
@@ -154,7 +208,7 @@ def _evaluation(raw_data, clfs, target, folds, scenario_filter, action_table_df,
             if "DecisionTreeBaseline" in clf_name:
                 features_ext_df.to_csv("datasets/oneshot/test_features.csv")
                 targets_test.to_csv("datasets/oneshot/test_target.csv")
-                predicated = clf.predict(features_ext_df.loc[[ii for ii in test_indices],])
+                predicated = clf.predict(baseline_set.loc[[ii for ii in test_indices],])
             else:
                 # Test
                 predicated = clf.predict(features_test.values)
@@ -166,7 +220,7 @@ def _evaluation(raw_data, clfs, target, folds, scenario_filter, action_table_df,
             else:
                 prediction = pd.concat([prediction, pd.DataFrame(predicated)])
 
-            raw_data.loc[[ii for ii in test_indices],"Prediction" + "_" + clf_name] =  predicated
+            raw_data.loc[test_indices,"Prediction" + "_" + clf_name] =  predicated
 
             raw_data = _convert_prediction(raw_data, "Prediction" + "_" + clf_name, n_candidates)
 
@@ -183,7 +237,7 @@ def _evaluation(raw_data, clfs, target, folds, scenario_filter, action_table_df,
 
 
             #results_df.Result = results_df.Result.apply(lambda x: x / n_folds)
-    return results_df, raw_data#, feature_importances
+    return results_df, raw_data, features_importance, selected_features
 
 def _build_data_by_folds(data, folds):
     transformed_data = pd.DataFrame()
@@ -248,7 +302,7 @@ def _get_classifiers(df, n_candidates):
     # maxlikelihood_clf = MLHClassifier()
     if n_candidates == 3:
         baseline_clf = DecisionTreeBaseline()
-        classifiers = [rf_clf3]#[baseline_clf, extra_tree_clf, gb_clf, rfi1_clf, rfi2_clf, rfi3_clf, rfi4_clf, ordinal_clf ,personal_nn_clf,neural_net_cf,nn_cf_2, nn_cf_3, two_layer_nn_cf, three_layer_nn_cf, rf_clf1,rf_clf2, rf_clf3,rf_clf4,rf_clf5, rf_clf6, dt_clf,adaboost_clf,adaboost_clf2, adaboost_clf3,adaboost_clf4, svm_clf, svm_clf2, svm_clf3,logistics_clf]
+        classifiers = [baseline_clf, extra_tree_clf, gb_clf, rfi1_clf, rfi2_clf, rfi3_clf, rfi4_clf, ordinal_clf ,personal_nn_clf,neural_net_cf,nn_cf_2, nn_cf_3, two_layer_nn_cf, three_layer_nn_cf, rf_clf1,rf_clf2, rf_clf3,rf_clf4,rf_clf5, rf_clf6, dt_clf,adaboost_clf,adaboost_clf2, adaboost_clf3,adaboost_clf4, svm_clf, svm_clf2, svm_clf3,logistics_clf]
     else:
         classifiers = [extra_tree_clf, gb_clf, rfi1_clf, rfi2_clf, rfi3_clf, rfi4_clf, ordinal_clf,
                        personal_nn_clf, neural_net_cf, nn_cf_2, nn_cf_3, two_layer_nn_cf, three_layer_nn_cf, rf_clf1,
@@ -267,8 +321,7 @@ def _load_and_run(datasets, load_folds, scenarios = ['NONE'], is_loo = False, fo
             data = pd.read_excel(file_path, sheet_name=sheet)
 
             #Take sample from data
-            data = data.sample(frac=0.05,replace=False, random_state=1)
-
+            #data = data.loc[data["VoterID"].isin(data["VoterID"].sample(frac=0.001, replace=False, random_state=1))]
             d_df = data.fillna(data.mean())
 
             n_candidates = d_df.iloc[0]["NumberOfCandidates"]
@@ -288,9 +341,11 @@ def _load_and_run(datasets, load_folds, scenarios = ['NONE'], is_loo = False, fo
 
                 for scenario in scenarios:  # ['A','B','C','D','E','F','NONE']:
                     raw_data = d_df.copy()
-                    d_performance_df, d_pred = _evaluation(raw_data, classifiers, 'Action', folds, scenario, actions_table, scenarios_table, n_candidates)
+                    d_performance_df, d_pred, d_feature_importance, d_selected_features = _evaluation(raw_data, classifiers, 'Action', folds, scenario, actions_table, scenarios_table, n_candidates)
                     d_performance_df.to_csv("Results\\" + dataset + "_" + sheet + "_performance_df_" + scenario + "_" + str(n_folds) + ".csv")
                     d_pred.to_csv("Results\\" + dataset + "_" + sheet + "_pred_" + scenario + "_" + str(n_folds) + ".csv")
+                    d_feature_importance.to_csv("Results\\" + dataset + "_" + sheet + "_feature_importance_" + scenario + "_" + str(n_folds) + ".csv")
+                    d_selected_features.to_csv("Results\\" + dataset + "_" + sheet + "_selected_features_" + scenario + "_" + str(n_folds) + ".csv")
 
     pass
 
@@ -305,9 +360,9 @@ def _load_and_run(datasets, load_folds, scenarios = ['NONE'], is_loo = False, fo
 #
 # _load_and_run(datasets=datasets, load_folds=True, classifiers=classifiers, n_candidates=n_candidates)
 #
-datasets = ["d36_updated_train"]#["schram_train","tal_train","d36_updated_train","d32_updated_train","N4_first_90_train"] #["N4_first_90", "d32_updated", "d36_updated", "tal", "schram"]#["N4_first_90_sample", "d32_updated_sample", "d36_updated_sample", "tal_sample", "schram_sample"]#["N4_first_90", "d32_updated", "d36_updated", "tal", "schram"]
+datasets = ['voter_sample_for_test']#["d36_updated_train","tal_train","d36_updated_train","schram_train","N4_first_90"]#["schram_train","tal_train","d36_updated_train","d32_updated_train","N4_first_90_train"] #["N4_first_90", "d32_updated", "d36_updated", "tal", "schram"]#["N4_first_90_sample", "d32_updated_sample", "d36_updated_sample", "tal_sample", "schram_sample"]#["N4_first_90", "d32_updated", "d36_updated", "tal", "schram"]
 fold_set = [10]#, 10]
-_load_and_run(datasets=datasets, load_folds=False, fold_set=fold_set)
+_load_and_run(datasets=datasets, load_folds=False,fold_set=fold_set)
 #
 
 # datasets = ["N4_first_90", "d32_updated", "d36_updated", "tal", "schram", "N4_first_90_train", "d32_updated_train", "d36_updated_train", "tal_train", "schram_train"]