-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathFake Instagram Detection_Using_ANN.py
195 lines (138 loc) · 4.98 KB
/
Fake Instagram Detection_Using_ANN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# libraries imported
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report,accuracy_score,roc_curve,confusion_matrix
# Load the training dataset
instagram_df_train=pd.read_csv('insta_train.csv')
instagram_df_train
# Load the testing data
instagram_df_test=pd.read_csv('insta_test.csv')
instagram_df_test
instagram_df_train.head()
instagram_df_train.tail()
instagram_df_test.head()
instagram_df_test.tail()
#Performing Exploratory Data Analysis EDA
# Getting dataframe info
instagram_df_train.info()
# Get the statistical summary of the dataframe
instagram_df_train.describe()
# Checking if null values exist
instagram_df_train.isnull().sum()
# Get the number of unique values in the "profile pic" feature
instagram_df_train['profile pic'].value_counts()
# Get the number of unique values in "fake" (Target column)
instagram_df_train['fake'].value_counts()
instagram_df_test.info()
instagram_df_test.describe()
instagram_df_test.isnull().sum()
instagram_df_test['fake'].value_counts()
# Perform Data Visualizations
# Visualize the data
sns.countplot(instagram_df_train['fake'])
plt.show()
# Visualize the private column data
sns.countplot(instagram_df_train['private'])
plt.show()
# Visualize the "profile pic" column data
sns.countplot(instagram_df_train['profile pic'])
plt.show()
# Visualize the data
plt.figure(figsize = (20, 10))
sns.distplot(instagram_df_train['nums/length username'])
plt.show()
# Correlation plot
plt.figure(figsize=(20, 20))
cm = instagram_df_train.corr()
ax = plt.subplot()
# heatmap for correlation matrix
sns.heatmap(cm, annot = True, ax = ax)
plt.show()
sns.countplot(instagram_df_test['fake'])
sns.countplot(instagram_df_test['private'])
sns.countplot(instagram_df_test['profile pic'])
# Preparing Data to Train the Model
# Training and testing dataset (inputs)
X_train = instagram_df_train.drop(columns = ['fake'])
X_test = instagram_df_test.drop(columns = ['fake'])
X_train
X_test
# Training and testing dataset (Outputs)
y_train = instagram_df_train['fake']
y_test = instagram_df_test['fake']
y_train
y_test
# Scale the data before training the model
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler_x = StandardScaler()
X_train = scaler_x.fit_transform(X_train)
X_test = scaler_x.transform(X_test)
y_train = tf.keras.utils.to_categorical(y_train, num_classes = 2)
y_test = tf.keras.utils.to_categorical(y_test, num_classes = 2)
y_train
y_test
# print the shapes of training and testing datasets
X_train.shape, X_test.shape, y_train.shape, y_test.shape
Training_data = len(X_train)/( len(X_test) + len(X_train) ) * 100
Training_data
Testing_data = len(X_test)/( len(X_test) + len(X_train) ) * 100
Testing_data
# Building and Training Deep Training Model
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
model = Sequential()
model.add(Dense(50, input_dim=11, activation='relu'))
model.add(Dense(150, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(150, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(25, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(2,activation='softmax'))
model.summary()
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
epochs_hist = model.fit(X_train, y_train, epochs = 50, verbose = 1, validation_split = 0.1)
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
model = Sequential()
model.add(Dense(50, input_dim=11, activation='relu'))
model.add(Dense(150, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(25, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(25, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(2, activation='softmax'))
model.summary()
# Access the Performance of the model
print(epochs_hist.history.keys())
plt.plot(epochs_hist.history['loss'])
plt.plot(epochs_hist.history['val_loss'])
plt.title('Model Loss Progression During Training/Validation')
plt.ylabel('Training and Validation Losses')
plt.xlabel('Epoch Number')
plt.legend(['Training Loss', 'Validation Loss'])
plt.show()
predicted = model.predict(X_test)
predicted_value = []
test = []
for i in predicted:
predicted_value.append(np.argmax(i))
for i in y_test:
test.append(np.argmax(i))
print(classification_report(test, predicted_value))
plt.figure(figsize=(10, 10))
cm=confusion_matrix(test, predicted_value)
sns.heatmap(cm, annot=True)
plt.show()