-
Notifications
You must be signed in to change notification settings - Fork 84
/
Copy pathModel_train_TF_onBalance.py
193 lines (169 loc) · 11 KB
/
Model_train_TF_onBalance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
"""https://github.com/Leci37/LecTrade LecTrade is a tool created by github user @Leci37. instagram @luis__leci Shared on 2022/11/12 . . No warranty, rights reserved """
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from Model_TF_definitions import ModelDefinition
from Utils import UtilsL, Utils_model_predict, Utils_plotter
import _KEYS_DICT
# early_stopping = tf.keras.callbacks.EarlyStopping(
# monitor='loss' , #'monitor argument of tf.keras.callbacks.EarlyStopping has 4 values: 'loss','accuracy','val_loss','val_accuracy'.
# verbose=2,
# patience=9,
# mode='auto',#min_delta=1 By default, any change in the performance measure, no matter how fractional, will be considered an improvement
# restore_best_weights=True)
from Utils.Utils_model_predict import __print_csv_accuracy_loss_models, get_model_summary_format
def get_EarlyStopping(model_h5_name):
monitor_type = 'val_accuracy'#EarlyStopping has 4 values: 'loss','accuracy','val_loss','val_accuracy'.
# if _KEYS_DICT.MODEL_TYPE_COLM.VGOOD.value in model_h5_name:
# monitor_type = 'accuracy'
# elif _KEYS_DICT.MODEL_TYPE_COLM.GOOD.value in model_h5_name:
# monitor_type = 'val_loss'
# elif _KEYS_DICT.MODEL_TYPE_COLM.REG.value in model_h5_name:
# monitor_type = 'val_loss'
return tf.keras.callbacks.EarlyStopping(
monitor=monitor_type , #'monitor argument of tf.keras.callbacks.EarlyStopping has 4 values: 'loss','accuracy','val_loss','val_accuracy'.
verbose=2,
patience=12,
mode='auto',#min_delta=1 By default, any change in the performance measure, no matter how fractional, will be considered an improvement
restore_best_weights=True)
#https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/TensorBoard
# Comando de apertura de tensorFlow board
# tensorboard --logdir=C:\Users\Luis\Desktop\LecTrade\LecTrade\Models\logs\TF_MELI_pos_low1_28.h5
def get_EarlyStopping_TensorFlowBoard(model_h5_name):
path_tf_board = "Models/logs/" + model_h5_name
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=path_tf_board, histogram_freq=1, write_graph=True,
write_images=True)
return tensorboard_callback
Y_TARGET = 'buy_sell_point'
EPOCHS = 160
BATCH_SIZE = 1024
MODEL_FOLDER_TF = "Models/TF_balance/"
#model_h5_name = 'TF_in_balance.h5'
#train_labels, val_labels, test_labels, train_features, val_features, test_features, bool_train_labels = None
def train_TF_onBalance_One_dimension(columns_selection , model_h5_name , path_csv , op_buy_sell : _KEYS_DICT.Op_buy_sell, type_model_dime : _KEYS_DICT.MODEL_TF_DENSE_TYPE_ONE_DIMENSI):
#LOAD
global train_labels, val_labels, test_labels, train_features, val_features, test_features, bool_train_labels
df = Utils_model_predict.load_and_clean_DF_Train_from_csv(path_csv, op_buy_sell, columns_selection)
neg, pos = np.bincount(df[Y_TARGET])
initial_bias = np.log([pos / neg])
df = Utils_model_predict.prepare_to_split_SMOTETomek_and_scaler01(df)
# graficos de relaciones
# Utils_plotter.plot_relationdist_main_val_and_all_rest_val(df[["mtum_RSI","mtum_STOCH_k","mtum_STOCH_d", Y_TARGET]],Y_TARGET ,path = model_folder+"plot_relationdistplot_")
train_labels, val_labels, test_labels, train_features, val_features, test_features, bool_train_labels = Utils_model_predict.scaler_split_TF_onbalance(
df, label_name=Y_TARGET, will_shuffle=True)
# END LOAD
# TRAIN
# model = ModelDefinition(shape_inputs_m=train_features.shape[-1], num_features_m=None).get_dicts_models_One_dimension()[type_model_dime]
# print(get_model_summary_format(model))
#
# results = model.evaluate(train_features, train_labels, batch_size=BATCH_SIZE, verbose=2)
# print("Loss: {:0.4f} without output_bias ".format(results[0]))
# model.predict(train_features[:10])
model = ModelDefinition(shape_inputs_m=train_features.shape[-1], num_features_m=None, output_bias_m= initial_bias).get_dicts_models_One_dimension()[type_model_dime]
results = model.evaluate(train_features, train_labels, batch_size=BATCH_SIZE, verbose=2)
print("Loss: {:0.4f} with output_bias".format(results[0]))
# model.predict(train_features[:10])
initial_weights = MODEL_FOLDER_TF + "initial_weights/initial_weights_" + model_h5_name
model.save_weights(initial_weights)
print("model.save_weights initial_weights: ", initial_weights)
resampled_ds = Utils_model_predict.get_resampled_ds_onBalance(train_features, train_labels, bool_train_labels,
BATCH_SIZE)
resampled_steps_per_epoch = np.ceil(2.0 * neg / BATCH_SIZE)
# Train on the oversampled data
# Now try training the model with the resampled data set instead of using class weights to see how these methods compare.
# Note: Because the data was balanced by replicating the positive examples, the total dataset size is larger, and each epoch runs for more training steps.
resampled_model = ModelDefinition(shape_inputs_m=train_features.shape[-1], num_features_m=None).get_dicts_models_One_dimension()[type_model_dime]
resampled_model.load_weights(initial_weights)
print(get_model_summary_format(resampled_model))
# Reset the bias to zero, since this dataset is balanced.
output_layer = resampled_model.layers[-1]
output_layer.bias.assign([0])
val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels)).cache()
val_ds = val_ds.batch(BATCH_SIZE).prefetch(2)
#early_stopping = get_EarlyStopping(model_h5_name)
early_stopping = Utils_model_predict.CustomEarlyStopping(patience=10)
#early_stopping_board = get_EarlyStopping_TensorFlowBoard(model_h5_name)
resampled_history = resampled_model.fit(
resampled_ds,
epochs=EPOCHS,
steps_per_epoch=resampled_steps_per_epoch,
callbacks=[early_stopping], #callbacks=[early_stopping, early_stopping_board],
validation_data=val_ds)
__print_csv_accuracy_loss_models(MODEL_FOLDER_TF, model_h5_name, resampled_history)
resampled_model.save(MODEL_FOLDER_TF + model_h5_name)
print(" Save model : ", MODEL_FOLDER_TF + model_h5_name)
# def train_TF_onBalance_64(columns_selection, model_h5_name,path_csv, op_buy_sell : _KEYS_DICT.Op_buy_sell):
# # LOAD
# global train_labels, val_labels, test_labels, train_features, val_features, test_features, bool_train_labels
# df = Utils_model_predict.load_and_clean_DF_Train_from_csv(path_csv, op_buy_sell, columns_selection)
#
# # graficos de relaciones
# # Utils_plotter.plot_relationdist_main_val_and_all_rest_val(df[["mtum_RSI","mtum_STOCH_k","mtum_STOCH_d", Y_TARGET]],Y_TARGET ,path = model_folder+"plot_relationdistplot_")
# train_labels, val_labels, test_labels, train_features, val_features, test_features, bool_train_labels = Utils_model_predict.scaler_split_TF_onbalance(
# df, label_name=Y_TARGET)
# # END LOAD
#
# # TRAIN
# neg, pos = np.bincount(df[Y_TARGET])
# initial_bias = np.log([pos / neg])
# model = Utils_model_predict.make_model_TF_onbalance_fine_64(shape_features=train_features.shape[-1])
# print(model.summary())
# results = model.evaluate(train_features, train_labels, batch_size=BATCH_SIZE, verbose=2)
# print("Loss: {:0.4f} without output_bias ".format(results[0]))
# # model.predict(train_features[:10])
# model = Utils_model_predict.make_model_TF_onbalance_fine_64(shape_features=train_features.shape[-1],
# output_bias=initial_bias)
# results = model.evaluate(train_features, train_labels, batch_size=BATCH_SIZE, verbose=2)
# print("Loss: {:0.4f} with output_bias".format(results[0]))
# # model.predict(train_features[:10])
# initial_weights = MODEL_FOLDER_TF + "initial_weights/initial_weights_" + model_h5_name
# model.save_weights(initial_weights)
# print("model.save_weights initial_weights: ", initial_weights)
# resampled_ds = Utils_model_predict.get_resampled_ds_onBalance(train_features, train_labels, bool_train_labels,
# BATCH_SIZE)
# resampled_steps_per_epoch = np.ceil(2.0 * neg / BATCH_SIZE)
# # Train on the oversampled data
# # Now try training the model with the resampled data set instead of using class weights to see how these methods compare.
# # Note: Because the data was balanced by replicating the positive examples, the total dataset size is larger, and each epoch runs for more training steps.
# resampled_model = Utils_model_predict.make_model_TF_onbalance_fine_64(shape_features=train_features.shape[-1])
# resampled_model.load_weights(initial_weights)
# # Reset the bias to zero, since this dataset is balanced.
# output_layer = resampled_model.layers[-1]
# output_layer.bias.assign([0])
# val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels)).cache()
# val_ds = val_ds.batch(BATCH_SIZE).prefetch(2)
#
# #early_stopping = get_EarlyStopping(model_h5_name)
# early_stopping = Utils_model_predict.CustomEarlyStopping(patience=10)
# early_stopping_board = get_EarlyStopping_TensorFlowBoard(model_h5_name)
#
# resampled_history = resampled_model.fit(
# resampled_ds,
# epochs=EPOCHS,
# steps_per_epoch=resampled_steps_per_epoch,
# callbacks=[early_stopping], # callbacks=[early_stopping, early_stopping_board],
# validation_data=val_ds)
#
# __print_csv_accuracy_loss_models(MODEL_FOLDER_TF, model_h5_name, resampled_history)
# resampled_model.save(MODEL_FOLDER_TF + model_h5_name)
# print(" Save model : ", MODEL_FOLDER_TF + model_h5_name)
def predict_TF_onBalance(X_test, model_folder, model_h5_name):
print(" \n", model_folder + model_h5_name)
resampled_model_2 = keras.models.load_model(model_folder + model_h5_name)
"""### Re-check training history"""
# plot_metrics(resampled_history)
"""### Evaluate metrics"""
test_predictions_resampled = resampled_model_2.predict(X_test, batch_size=BATCH_SIZE)
resampled_results = resampled_model_2.evaluate(X_test, test_labels,
batch_size=BATCH_SIZE, verbose=0)
for name, value in zip(resampled_model_2.metrics_names, resampled_results):
print(name, ': ', value)
print()
p_tolerance = 0.7
# for to in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: # [0.45,0.47,0.5,0.53,0.56,0.6]:
# p_tolerance = to
Utils_plotter.plot_cm_TF_imbalance(test_labels, test_predictions_resampled,
path=model_folder + "plot_TFbalance_"+model_h5_name.replace(".h5","")+"_CM_"+ str(p_tolerance) + ".png", p=p_tolerance)
# Utils_plotter.plot_confusion_matrix(cf_matrix, model_folder + "plot_confusion_matrix.png")
return test_predictions_resampled