-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmental_fitness_tracker.py
332 lines (267 loc) · 11.3 KB
/
mental_fitness_tracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
# -*- coding: utf-8 -*-
"""Untitled5.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1j8lNAuqmeUW6zIpccPflTmgM6j-p2mVc
"""
from IPython.display import HTML
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Mental Fitness Tracker Project</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
<style>
body {
font-family: Verdana, sans-serif;
}
.project-info {
color: black;
display: fill;
border-radius: 25px;
background-color: #808080; /* Grey background color */
font-size: 110%;
font-family: Verdana;
letter-spacing: 0.5px;
padding: 20px;
text-align: center;
max-width: 500px;
margin: 0 auto;
}
a {
color: red; /* Hyperlink color (change to your desired color) */
text-decoration: none; /* Optional: Remove underline from the hyperlink */
}
.github-button {
display: flex;
align-items: center;
justify-content: center;
margin: 10px auto; /* Adjust margin to separate the links */
padding: 8px 12px; /* Smaller padding */
width: 100px; /* Adjust width as needed */
background-color: #24292e; /* GitHub color */
color: white;
border: none;
border-radius: 5px;
font-size: 16px;
text-decoration: none;
transition: background-color 0.2s ease-in-out;
}
.github-button i {
margin-right: 5px; /* Reduce space between icon and text */
}
.github-button:hover {
background-color: #1c2024; /* GitHub color on hover */
cursor: pointer;
}
</style>
</head>
<body>
<div>
<a class="github-button" href="https://github.com/SrSurajithPranav/Mental_Fitness_Tracker_Project">
<i class="fab fa-github"></i>GitHub
</a>
</div>
<div class="project-info">
<p style="color: black;">
Mental Fitness Tracker Project by <a href="https://www.linkedin.com/in/surajith-pranav-234a2b221">Surajith Pranav</a>
</p>
</div>
</body>
</html>"""
display(HTML(html_content))
from IPython.display import HTML
html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 100px; height: 70px;">
<p style="padding: 0; margin: 5px; color: black;">
IMPORT LIBRARIES
</p>
</div>
"""
display(HTML(html_content))
import warnings
warnings.filterwarnings('ignore')
#import all libraries
import pandas as pd #data processing ,CSV I/O
import numpy as np #linear algebra
# import matplotlib.pyplot as plt
# import seaborn as sns
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression, BayesianRidge
# from sklearn.svm import SVR
# from sklearn.tree import DecisionTreeRegressor
# from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
# from sklearn.preprocessing import PolynomialFeatures
# from sklearn.metrics import mean_squared_error, r2_score
# from xgboost import XGBRegressor
# from sklearn.neighbors import KNeighborsRegressor
# from sklearn.neural_network import MLPRegressor
import seaborn as sns #seaborn in python data visulization library basesd on matplotlib
import matplotlib.pyplot as plt #matplotlib is a low level graph plotting library in python that serves as a visulization utility
import plotly.express as px #allows you to create interactive plots with very little code
#prevalence-by-mental-and-substance-use-disorder.csv
df1 = pd.read_csv('prevalence-by-mental-and-substance-use-disorder.csv')
#mental-and-substance-use-as-share-of-disease.csv
df2 = pd.read_csv('mental-and-substance-use-as-share-of-disease.csv')
df1.head()
df2.head()
from IPython.display import HTML
html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 100px; height: 70px;">
<p style="padding: 0; margin: 5px; color: black;">
MERGING DATASETS
</p>
</div>
"""
display(HTML(html_content))
#merging two datasets prevalence-by-mental-and-substance-use-disorder.csv &mental-and-substance-use-as-share-of-disease.csv
data = pd.merge(df1, df2)
data.head(10)
from IPython.display import HTML
html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 100px; height: 70px;">
<p style="padding: 0; margin: 5px; color: black;">
DATA CLEANING
</p>
</div>
"""
display(HTML(html_content))
#filling missing values in dataset
data.isnull().sum()
#drop the column
data.drop('Code', axis=1, inplace=True)
#view the data
data.head(10)
#size =row*column ,shape=tuple of array dimensions(row,col)
data.size,data.shape
#column set
data.set_axis(['Country','Year','Schizophrenia', 'Bipolar_disorder', 'Eating_disorder','Anxiety','drug_usage','depression','alcohol','mental_fitness'], axis='columns', inplace=True)
data.head(10) #our target or dependent if mental_fitness
from IPython.display import HTML
html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 100%; font-family: Verdana; letter-spacing: 0.5px; width: 140px; height: 80px;">
<p style="padding: 0; margin: 5px; color: black;">
DATA VISUALIZATION
</p>
</div>
"""
display(HTML(html_content))
plt.figure(figsize=(12,6))
sns.heatmap(data.corr(),annot=True,cmap='Greens') #heatmap is defined as graphical representation of data using colors for visual representation of matrix
plt.plot()
sns.jointplot(data,x="Schizophrenia",y="mental_fitness",kind="reg",color="m")
plt.show()
sns.jointplot(data,x='Bipolar_disorder',y='mental_fitness',kind='reg',color='blue')
plt.show()
sns.pairplot(data,corner=True) #paiwise relation ships in a dataset
plt.show()
mean = data['mental_fitness'].mean()
mean
fig = px.pie(data, values='mental_fitness', names='Year')
fig.show()
fig=px.bar(data.head(10),x='Year',y='mental_fitness',color='Year',template='ggplot2')
fig.show()
fig = px.line(data, x="Year", y="mental_fitness", color='Country',markers=True,color_discrete_sequence=['red','blue'],template='plotly_dark')
fig.show()
df=data.copy()
df.head()
#information about the data
df.info()
#transform non-numeric labels to numeric labeles
from sklearn.preprocessing import LabelEncoder
l=LabelEncoder()
for i in df.columns:
if df[i].dtype == 'object': #transform non-numerical labels (as long as they are hashable and comparable) to numeric labels
df[i]=l.fit_transform(df[i])
df.shape
from IPython.display import HTML
html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 130px; height9070px;">
<p style="padding: 0; margin: 5px; color: black;">
DATA TRAINING AND TESTING
</p>
</div>
"""
display(HTML(html_content))
X = df.drop('mental_fitness',axis=1)
y = df['mental_fitness']
from sklearn.model_selection import train_test_split #used to split the data into training data and testing data
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=2)
#random_state simply set seeds to the random generator,so that your train test splits are always deterministic,if you don't set seed it will different each time
#tainning(6840,10)
#6840*80/100=5472
#6840*20/100=1368
print("xtrain: ", xtrain.shape)
print("xtest: ", xtest.shape)
print("ytrain: ", ytrain.shape)
print("ytest: ", ytest.shape)
from IPython.display import HTML
html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 120px; height :80px;">
<p style="padding: 0; margin: 5px; color: black;">
LINEAR REGRESSION
</p>
</div>
"""
display(HTML(html_content))
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
lr = LinearRegression()
lr.fit(xtrain,ytrain) #fit trainng data
# model evaluation for training set
ytrain_pred = lr.predict(xtrain)
#the mean square error is the average of the square of the difference between observed and predicted value of a variable
mse = mean_squared_error(ytrain, ytrain_pred) #observed value and predicted value
#root mean square error measures the average difference between values predicted by model and actua values
rmse = (np.sqrt(mean_squared_error(ytrain, ytrain_pred)))
#the coefficent of determination or R2,is a measure that priovides information about the goodness of fit of a model.In the context of regression it is a statistical measure oif
r2 = r2_score(ytrain, ytrain_pred)
print("The model performance for training set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print("\n")
from IPython.display import HTML
html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 120px; height: 90px;">
<p style="padding: 0; margin: 5px; color: black;">
RANDOM FOREST REGRESSOR
</p>
</div>
"""
display(HTML(html_content))
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
rf.fit(xtrain, ytrain)
# model evaluation for training set
ytrain_pred = rf.predict(xtrain)
mse = mean_squared_error(ytrain, ytrain_pred)
rmse = (np.sqrt(mean_squared_error(ytrain, ytrain_pred)))
r2 = r2_score(ytrain, ytrain_pred)
print("The model performance for training set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print("\n")
#linear regression model evaluation for testing set
ytest_pred = lr.predict(xtest) # (unseen data)
mse = mean_squared_error(ytest, ytest_pred)
rmse = (np.sqrt(mean_squared_error(ytest, ytest_pred)))
r2 = r2_score(ytest, ytest_pred)
print("linear regression model performance for testing set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
# random forest model evaluation for testing set
ytest_pred = rf.predict(xtest) # (unseen data)
mse = mean_squared_error(ytest, ytest_pred)
rmse = (np.sqrt(mean_squared_error(ytest, ytest_pred)))
r2 = r2_score(ytest, ytest_pred)
print(" random forest model performance for testing set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))