mental_fitness_tracker.py

# -*- coding: utf-8 -*-
"""Untitled5.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1j8lNAuqmeUW6zIpccPflTmgM6j-p2mVc
"""

from IPython.display import HTML

html_content = """
<!DOCTYPE html>
<html>

<head>
    <title>Mental Fitness Tracker Project</title>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
    <style>
        body {
            font-family: Verdana, sans-serif;
        }

        .project-info {
            color: black;
            display: fill;
            border-radius: 25px;
            background-color: #808080; /* Grey background color */
            font-size: 110%;
            font-family: Verdana;
            letter-spacing: 0.5px;
            padding: 20px;
            text-align: center;
            max-width: 500px;
            margin: 0 auto;
        }

        a {
            color: red; /* Hyperlink color (change to your desired color) */
            text-decoration: none; /* Optional: Remove underline from the hyperlink */
        }

        .github-button {
            display: flex;
            align-items: center;
            justify-content: center;
            margin: 10px auto; /* Adjust margin to separate the links */
            padding: 8px 12px; /* Smaller padding */
            width: 100px; /* Adjust width as needed */
            background-color: #24292e; /* GitHub color */
            color: white;
            border: none;
            border-radius: 5px;
            font-size: 16px;
            text-decoration: none;
            transition: background-color 0.2s ease-in-out;
        }

        .github-button i {
            margin-right: 5px; /* Reduce space between icon and text */
        }

        .github-button:hover {
            background-color: #1c2024; /* GitHub color on hover */
            cursor: pointer;
        }
    </style>
</head>

<body>
    <div>
        <a class="github-button" href="https://github.com/SrSurajithPranav/Mental_Fitness_Tracker_Project">
            <i class="fab fa-github"></i>GitHub
        </a>
    </div>

    <div class="project-info">
        <p style="color: black;">
            Mental Fitness Tracker Project by <a href="https://www.linkedin.com/in/surajith-pranav-234a2b221">Surajith Pranav</a>
        </p>
    </div>
</body>

</html>"""


display(HTML(html_content))

from IPython.display import HTML

html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 100px; height: 70px;">
    <p style="padding: 0; margin: 5px; color: black;">
        IMPORT LIBRARIES
    </p>
</div>
"""

display(HTML(html_content))

import warnings
warnings.filterwarnings('ignore')
#import all libraries
import pandas as pd   #data processing ,CSV I/O
import numpy as np    #linear algebra
# import matplotlib.pyplot as plt
# import seaborn as sns
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression, BayesianRidge
# from sklearn.svm import SVR
# from sklearn.tree import DecisionTreeRegressor
# from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
# from sklearn.preprocessing import PolynomialFeatures
# from sklearn.metrics import mean_squared_error, r2_score
# from xgboost import XGBRegressor
# from sklearn.neighbors import KNeighborsRegressor
# from sklearn.neural_network import MLPRegressor
import seaborn as sns #seaborn in python data visulization library basesd on matplotlib
import matplotlib.pyplot as plt #matplotlib is a low level graph plotting library in python that serves as a visulization utility
import plotly.express as px #allows you to create interactive plots with very little code

#prevalence-by-mental-and-substance-use-disorder.csv
df1 = pd.read_csv('prevalence-by-mental-and-substance-use-disorder.csv')
#mental-and-substance-use-as-share-of-disease.csv
df2 = pd.read_csv('mental-and-substance-use-as-share-of-disease.csv')

df1.head()

df2.head()

from IPython.display import HTML

html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 100px; height: 70px;">
    <p style="padding: 0; margin: 5px; color: black;">
        MERGING DATASETS
    </p>
</div>
"""

display(HTML(html_content))

#merging two datasets prevalence-by-mental-and-substance-use-disorder.csv &mental-and-substance-use-as-share-of-disease.csv
data = pd.merge(df1, df2)
data.head(10)

from IPython.display import HTML

html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 100px; height: 70px;">
    <p style="padding: 0; margin: 5px; color: black;">
        DATA CLEANING
    </p>
</div>
"""

display(HTML(html_content))

#filling missing values in dataset
data.isnull().sum()
#drop the column
data.drop('Code', axis=1, inplace=True)
#view the data
data.head(10)
#size =row*column ,shape=tuple of array dimensions(row,col)
data.size,data.shape
#column set
data.set_axis(['Country','Year','Schizophrenia', 'Bipolar_disorder', 'Eating_disorder','Anxiety','drug_usage','depression','alcohol','mental_fitness'], axis='columns', inplace=True)
data.head(10) #our target or dependent if mental_fitness

from IPython.display import HTML

html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 100%; font-family: Verdana; letter-spacing: 0.5px; width: 140px; height: 80px;">
    <p style="padding: 0; margin: 5px; color: black;">
        DATA VISUALIZATION
    </p>
</div>
"""

display(HTML(html_content))

plt.figure(figsize=(12,6))
sns.heatmap(data.corr(),annot=True,cmap='Greens')  #heatmap is defined as graphical representation of data using colors for visual representation of matrix
plt.plot()

sns.jointplot(data,x="Schizophrenia",y="mental_fitness",kind="reg",color="m")
plt.show()

sns.jointplot(data,x='Bipolar_disorder',y='mental_fitness',kind='reg',color='blue')
plt.show()

sns.pairplot(data,corner=True)  #paiwise relation ships in a dataset
plt.show()

mean = data['mental_fitness'].mean()
mean

fig = px.pie(data, values='mental_fitness', names='Year')
fig.show()

fig=px.bar(data.head(10),x='Year',y='mental_fitness',color='Year',template='ggplot2')
fig.show()

fig = px.line(data, x="Year", y="mental_fitness", color='Country',markers=True,color_discrete_sequence=['red','blue'],template='plotly_dark')
fig.show()

df=data.copy()
df.head()

#information about the data
df.info()

#transform non-numeric labels to numeric labeles
from sklearn.preprocessing import LabelEncoder
l=LabelEncoder()
for i in df.columns:
    if df[i].dtype == 'object': #transform non-numerical labels (as long as they are hashable and comparable) to numeric labels
        df[i]=l.fit_transform(df[i])

df.shape

from IPython.display import HTML

html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 130px; height9070px;">
    <p style="padding: 0; margin: 5px; color: black;">
        DATA TRAINING AND TESTING
    </p>
</div>
"""

display(HTML(html_content))

X = df.drop('mental_fitness',axis=1)
y = df['mental_fitness']
from sklearn.model_selection import train_test_split   #used to split the data into training data and testing data
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=2)
#random_state simply set seeds to the random generator,so that your train test splits are always deterministic,if you don't set seed it will different each time
#tainning(6840,10)
#6840*80/100=5472
#6840*20/100=1368
print("xtrain: ", xtrain.shape)
print("xtest: ", xtest.shape)
print("ytrain: ", ytrain.shape)
print("ytest: ", ytest.shape)

from IPython.display import HTML

html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 120px; height :80px;">
    <p style="padding: 0; margin: 5px; color: black;">
        LINEAR REGRESSION
    </p>
</div>
"""

display(HTML(html_content))

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
lr = LinearRegression()
lr.fit(xtrain,ytrain)   #fit trainng data

# model evaluation for training set
ytrain_pred = lr.predict(xtrain)
#the mean square error is the average of the square of the difference between observed and predicted value of a variable
mse = mean_squared_error(ytrain, ytrain_pred)   #observed value and predicted value
#root mean square error measures the average difference between values predicted by model and actua values
rmse = (np.sqrt(mean_squared_error(ytrain, ytrain_pred)))
#the coefficent of determination or R2,is a measure that priovides information about the goodness of fit of a model.In the context of regression it is a statistical measure oif
r2 = r2_score(ytrain, ytrain_pred)

print("The model performance for training set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print("\n")

from IPython.display import HTML

html_content = """
<div style="color:black; display: flex; justify-content: center; align-items: center; border-radius: 25px; background-color: #808080; font-size: 110%; font-family: Verdana; letter-spacing: 0.5px; width: 120px; height: 90px;">
    <p style="padding: 0; margin: 5px; color: black;">
        RANDOM FOREST REGRESSOR
    </p>
</div>
"""

display(HTML(html_content))

from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
rf.fit(xtrain, ytrain)

# model evaluation for training set
ytrain_pred = rf.predict(xtrain)
mse = mean_squared_error(ytrain, ytrain_pred)
rmse = (np.sqrt(mean_squared_error(ytrain, ytrain_pred)))
r2 = r2_score(ytrain, ytrain_pred)

print("The model performance for training set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print("\n")

#linear regression model evaluation for testing set
ytest_pred = lr.predict(xtest)  # (unseen data)
mse = mean_squared_error(ytest, ytest_pred)
rmse = (np.sqrt(mean_squared_error(ytest, ytest_pred)))
r2 = r2_score(ytest, ytest_pred)

print("linear regression model performance for testing set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
# random forest model evaluation for testing set
ytest_pred = rf.predict(xtest)   # (unseen data)
mse = mean_squared_error(ytest, ytest_pred)
rmse = (np.sqrt(mean_squared_error(ytest, ytest_pred)))
r2 = r2_score(ytest, ytest_pred)

print(" random forest model performance for testing set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))