-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathregression.py
122 lines (93 loc) · 4.03 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
#import cupy as np
import pandas as pd
from mlp import MLP
class Regression(MLP):
def __init__(self, layer):
np.random.seed(1234)
self.RND_MEAN = 0
self.RND_STD = 0.0030
self.LEARNING_RATE = 0.001
self.set_hidden(layer)
def load_abalone_dataset(self):
df = pd.read_csv('data/chap01/abalone.csv', header=None, skiprows=1)
self.input_cnt, self.output_cnt = 10, 1
self.data = np.zeros([len(df), self.input_cnt+self.output_cnt])
# 원래있던 sex칼럼을 원핫 인코딩을 적용하여 3 칼럼으로 만들고 나머지 칼럼을 복사해온다.
for index, row in df.iterrows():
if row[0] == 'I':
self.data[index, 0] = 1
if row[0] == 'M':
self.data[index, 1] = 1
if row[0] == 'F':
self.data[index, 2] = 1
self.data[:, 3:] = df.loc[:, 1:]
def train_and_test(self, epoch_count, mb_size, report):
step_count = self.arrange_data(mb_size)
test_x, test_y = self.get_test_data()
for epoch in range(epoch_count):
losses, accs = [], []
for n in range(epoch_count):
train_x, train_y = self.get_train_data(mb_size, n)
loss, acc = self.run_train(train_x, train_y)
losses.append(loss)
accs.append(acc)
if report > 0 and (epoch + 1) % report == 0:
acc = self.run_test(test_x, test_y)
print('Epoch {}: loss={:5.3f}, accuracy={:5.3f}/{:5.3f}'.\
format(epoch + 1, np.mean(losses), np.mean(accs), acc))
final_acc = self.run_test(test_x, test_y)
print('\nFinal Test: final accuracy = {:5.3f}'.format(final_acc))
def arrange_data(self, mb_size):
self.shuffle_map = np.arange(self.data.shape[0])
np.random.shuffle(self.shuffle_map)
step_count = int(self.data.shape[0] * 0.8) // mb_size
self.test_begin_idx = step_count * mb_size
return step_count
def get_test_data(self):
test_data = self.data[self.shuffle_map[self.test_begin_idx:]]
return test_data[:, :-self.output_cnt], test_data[:, -self.output_cnt:]
def get_train_data(self, mb_size, nth):
if nth == 0:
np.random.shuffle(self.shuffle_map[:self.test_begin_idx])
train_data = self.data[self.shuffle_map[mb_size * nth:mb_size * (nth + 1)]]
return train_data[:, :-self.output_cnt], train_data[:, -self.output_cnt:]
def run_train(self, x, y):
output, aux_nn = self.forward_neuralnet(x)
loss, aux_pp = self.forward_postproc(output, y)
accuracy = self.eval_accuracy(output, y)
G_loss = 1.0
G_output = self.backprop_postproc(G_loss, aux_pp)
self.backprop_neuralnet(G_output, aux_nn)
return loss, accuracy
def run_test(self, x, y):
output, _ = self.forward_neuralnet(x)
accuracy = self.eval_accuracy(output, y)
return accuracy
def forward_postproc(self, output, y):
diff = output - y
square = np.square(diff)
loss = np.mean(square)
return loss, diff
def backprop_postproc(self, G_loss, diff):
shape = diff.shape
g_loss_square = np.ones(shape) / np.prod(shape)
g_square_diff = 2 * diff
g_diff_output = 1
G_square = g_loss_square * G_loss
G_diff = g_square_diff * G_square
G_output = g_diff_output * G_diff
return G_output
def backprop_postproc_oneline(self, diff):
return 2 * diff / np.prod(diff.shape)
def eval_accuracy(self, output, y):
mdiff = np.mean(np.abs((output - y) / y))
return 1 - mdiff
def abalone_exec(self, epoch_count=10, mb_size=10, report=1):
self.load_abalone_dataset()
self.init_model()
self.train_and_test(epoch_count, mb_size, report)
if __name__ == "__main__":
regression = Regression([6])
regression.abalone_exec(epoch_count=10, report=10)
print(regression.data)