-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfinal pro.py
106 lines (85 loc) · 3.34 KB
/
final pro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
from keras.layers import Input, Dense
from keras.models import Model
from keras import optimizers
from keras.layers import Conv1D, MaxPooling2D
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import *
import numpy as np
import tensorflow as tf
mis=[]
data = pd.read_csv("pima.csv", na_values=mis)
print((data.isna().sum()))
# data.dropna(inplace= True)
inp = 8
X = data.values[:,0:inp]
y = data.values[: , inp]
y_edit= np.array([1 if yinstance==1 else 0 for yinstance in y ])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# 32 floats -> compression of factor 24.5, assuming the input is 784 floats
#####Without prepocessing#########
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
LogregModel = logreg.fit(X_train, y_train)
predicts=LogregModel.predict(X_test)
fpr, tpr, thresholds = roc_curve(y_test,predicts)
logauc= auc(fpr, tpr)
print("AUC without preprocessing:",logauc)
# this is the size of our encoded representations
dim = 4
####PCA#########
pca_model = PCA(n_components=dim )
X_train_new = pca_model.fit_transform(X_train)
X_test_new = pca_model.fit_transform(X_test)
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
LogregModel = logreg.fit(X_train_new, y_train)
predicts=LogregModel.predict(X_test_new )
fpr, tpr, thresholds = roc_curve(y_test,predicts)
log_PCauc= auc(fpr, tpr)
print("AUC with PCA:",log_PCauc)
#######Autoencoder############
# this is our input placeholder
input_img = Input(shape=(inp,))
# "encoded" is the encoded representation of the input
encoded = Dense(inp, activation='relu')(input_img)
encoded = Dense(5, activation='relu')(encoded)
encoded = Dense(dim, activation='relu')(encoded)
decoded = Dense(5, activation='relu')(encoded)
decoded = Dense(inp, activation='relu')(decoded)
# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)
# this model maps an input to its encoded representation
encoder= Model(input_img, encoded)
autoencoder.compile(optimizer=optimizers.Adadelta(lr=0.004), loss='mse')
# optimizer=Adadelta(lr=0.004)
# Adam(lr=0.02)
autoencoder.fit(x = X_train, y = X_train,
epochs=5000,batch_size= 750,
validation_data=(X_test, X_test))
weight1 = autoencoder.layers[1].get_weights()
weight2 = autoencoder.layers[2].get_weights()
weight3 = autoencoder.layers[3].get_weights()
# weight0 = autoencoder.layers[0].get_weights()
# weight0 = autoencoder.layers[0].get_weights()
i1 = tf.matmul( tf.cast(X_train, tf.float32),weight1[0])+weight1[1]
i2 = tf.matmul(i1,weight2[0])+weight2[1]
i3 = tf.matmul(i2, weight3[0])+weight3[1]
j1 = tf.matmul( tf.cast(X_test, tf.float32),weight1[0])+weight1[1]
j2 = tf.matmul(j1,weight2[0])+weight2[1]
j3 = tf.matmul(j2, weight3[0])+weight3[1]
with tf.Session() as ses:
encoded_train = ses.run(i3)
with tf.Session() as ses1:
encoded_test = ses1.run(j3)
# encoded_test = encoder.predict(X_test)
# encoded_train =encoder.predict(X_train)
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver='lbfgs')
LogregModel = logreg.fit(encoded_train, y_train)
predicts=LogregModel.predict(encoded_test)
fpr, tpr, thresholds = roc_curve(y_test,predicts)
log_encodauc= auc(fpr, tpr)
print("AUC with Autoencoder:",log_encodauc)
1+1