-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app.py
329 lines (287 loc) · 15 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
import datetime
import pandas as pd
import streamlit as st
import joblib
from sklearn.preprocessing import StandardScaler
import io
buffer = io.BytesIO()
def data_preprocessing(data_input, single_data, n):
df = pd.read_csv('student_data_filtered.csv')
df = df.drop(columns=['Status'], axis=1)
df = pd.concat([data_input, df])
df = StandardScaler().fit_transform(df)
if single_data:
return df[[n]]
else:
return df[0 : n]
def model_predict(df):
model = joblib.load('model_rf.joblib')
return model.predict(df)
def color_mapping(value):
color = 'green' if value == 'Graduate' else 'red'
return f'color: {color}'
def main():
st.title('Jaya Jaya Institute Student Prediction')
gender_mapping = {
'Male': 1,
'Female': 0
}
marital_status_mapping = {
'Single': 1,
'Married': 2,
'Widower': 3,
'Divorced': 4,
'Facto Union': 5,
'Legally Seperated': 6
}
application_mapping = {
'1st Phase - General Contingent': 1,
'1st Phase - Special Contingent (Azores Island)': 5,
'1st Phase - Special Contingent (Madeira Island)': 16,
'2nd Phase - General Contingent': 17,
'3rd Phase - General Contingent': 18,
'Ordinance No. 612/93': 2,
'Ordinance No. 854-B/99': 10,
'Ordinance No. 533-A/99, Item B2 (Different Plan)': 26,
'Ordinance No. 533-A/99, Item B3 (Other Institution)': 27,
'International Student (Bachelor)': 15,
'Over 23 Years Old': 39,
'Transfer': 42,
'Change of Course': 43,
'Holders of Other Higher Courses': 7,
'Short Cycle Diploma Holders': 53,
'Technological Specialization Diploma Holders': 44,
'Change of Institution/Course': 51,
'Change of Institution/Course (International)': 57,
}
# Seperate predictions for single data and multiple data
tab_single, tab_multiple = st.tabs(['Single Data', 'Multiple Data'])
# Prediction container for single data using input field
with tab_single:
with st.container():
col_gender, col_age, col_marital = st.columns([2, 2, 3])
with col_gender:
gender = st.radio('Gender', options=['Male', 'Female'],
help='The gender of the student')
with col_age:
age = st.number_input('Age at Enrollment', min_value=17, max_value=70,
help='The age of the student at the time of enrollment')
with col_marital:
marital_status = st.selectbox('Marital Status', ('Single', 'Married',
'Widower', 'Divorced', 'Facto Union', 'Legally Seperated'),
help='The marital status of the student')
st.write('')
st.write('')
with st.container():
col_application, col_prev_grade, col_admission_grade = st.columns([3, 1.65, 1.1])
with col_application:
application_mode = st.selectbox('Application Mode', (
'1st Phase - General Contingent',
'1st Phase - Special Contingent (Azores Island)',
'1st Phase - Special Contingent (Madeira Island)',
'2nd Phase - General Contingent', '3rd Phase - General Contingent',
'Ordinance No. 612/93', 'Ordinance No. 854-B/99',
'Ordinance No. 533-A/99, Item B2 (Different Plan)',
'Ordinance No. 533-A/99, Item B3 (Other Institution)',
'International Student (Bachelor)', 'Over 23 Years Old',
'Transfer', 'Change of Course', 'Holders of Other Higher Courses',
'Short Cycle Diploma Holders',
'Technological Specialization Diploma Holders',
'Change of Institution/Course',
'Change of Institution/Course (International)'),
help='The method of application used by the student')
with col_prev_grade:
prev_qualification_grade = st.number_input('Previous Qualification Grade',
help='Grade of previous qualification (0-200)', min_value=0, max_value=200)
with col_admission_grade:
admission_grade = st.number_input('Admission Grade',
help="Student's admission grade (0-200)", min_value=0, max_value=200)
with st.container():
col_scholarship, col_tuition, col_displaced, col_debtor = st.columns([1.7, 2.1, 1.55, 1])
with col_scholarship:
scholarship_holder = 1 if st.checkbox(
'Scholarship', help='Whether the student is a scholarship holder') else 0
with col_tuition:
tuition_fees = 1 if st.checkbox(
'Tuition up to date', help="Whether the student's tuition fees are up to date") else 0
with col_displaced:
displaced = 1 if st.checkbox(
'Displaced', help='Whether the student is a displaced person') else 0
with col_debtor:
debtor = 1 if st.checkbox(
'Debtor', help='Whether the student is a debtor') else 0
st.write('')
st.write('')
with st.container():
col_1_enroll, col_2_enroll, col_2_eval = st.columns([1, 1, 1.2])
with col_1_enroll:
curricular_units_1st_sem_enrolled = st.number_input(
'Units 1st Semester Enrolled', min_value=0, max_value=26,
help='The number of curricular units enrolled by the student in the first semester')
with col_2_enroll:
curricular_units_2nd_sem_enrolled = st.number_input(
'Units 2nd Semester Enrolled', min_value=0, max_value=23,
help='The number of curricular units enrolled by the student in the second semester')
with col_2_eval:
curricular_units_2nd_sem_evaluations = st.number_input(
'Units 2nd Semester Evaluations', min_value=0, max_value=33,
help='The number of curricular units evaluations by the student in the second semester')
with st.container():
col_1_approved, col_2_approved, col_2_noeval = st.columns([1, 1, 1.2])
with col_1_approved:
curricular_units_1st_sem_approved = st.number_input(
'Units 1st Semester Approved', min_value=0, max_value=26,
help='The number of curricular units approved by the student in the first semester')
with col_2_approved:
curricular_units_2nd_sem_approved = st.number_input(
'Units 2nd Semester Approved', min_value=0, max_value=20,
help='The number of curricular units approved by the student in the second semester')
with col_2_noeval:
curricular_units_2nd_sem_without_evaluations = st.number_input(
'Units 2nd Semester No Evaluations', min_value=0, max_value=12,
help='The number of curricular units without evaluations by the student in the second semester')
with st.container():
col_1_grade, col_2_grade, col_2_empty = st.columns([1, 1, 1.2])
with col_1_grade:
curricular_units_1st_sem_grade = st.number_input(
'Units 1st Semester Grade', min_value=0, max_value=20,
help='The number of curricular units grade by the student in the first semester')
with col_2_grade:
curricular_units_2nd_sem_grade = st.number_input(
'Units 2nd Semester Grade', min_value=0, max_value=20,
help='The number of curricular units grade by the student in the second semester')
# Mapping the categorical data
gender = gender_mapping.get(gender)
marital_status = marital_status_mapping.get(marital_status)
application_mode = application_mapping.get(application_mode)
data = [[marital_status, application_mode, prev_qualification_grade,
admission_grade, displaced, debtor, tuition_fees,
gender, scholarship_holder, age,
curricular_units_1st_sem_enrolled,
curricular_units_1st_sem_approved, curricular_units_1st_sem_grade,
curricular_units_2nd_sem_enrolled,
curricular_units_2nd_sem_evaluations,
curricular_units_2nd_sem_approved, curricular_units_2nd_sem_grade,
curricular_units_2nd_sem_without_evaluations]]
df = pd.DataFrame(data, columns=[
'Marital_status', 'Application_mode', 'Previous_qualification_grade',
'Admission_grade', 'Displaced', 'Debtor', 'Tuition_fees_up_to_date',
'Gender', 'Scholarship_holder', 'Age_at_enrollment',
'Curricular_units_1st_sem_enrolled',
'Curricular_units_1st_sem_approved', 'Curricular_units_1st_sem_grade',
'Curricular_units_2nd_sem_enrolled',
'Curricular_units_2nd_sem_evaluations',
'Curricular_units_2nd_sem_approved', 'Curricular_units_2nd_sem_grade',
'Curricular_units_2nd_sem_without_evaluations'])
# Prediction result
@st.dialog('Result')
def prediction(output):
if output == 1:
st.success('Student Status Prediction: **Graduate**')
else:
st.error('Student Status Prediction: **Dropout**')
# Single data predict button
if st.button('✨ Predict'):
data_input = data_preprocessing(df, True, 0)
output = model_predict(data_input)
prediction(output)
# Prediction container for multiple data using file upload
with tab_multiple:
with st.expander('**User Guide**'):
st.write("""
1. First download the student data Excel template.
2. Complete all student data columns in the Excel file.
3. Upload the student data Excel file.
4. Click the (**✨ Predict Data**) button.
5. The prediction results will appear in table below
6. The prediction results table can be downloaded in Excel format.
""")
# File template download button
with open('student_data_template.xlsx', 'rb') as file:
st.download_button(
label='Download Template',
data=file,
file_name='Student Data Template.xlsx',
mime='application/vnd.ms-excel',
help='Download student data excel template')
# File upload button
uploaded_file = st.file_uploader(
label='Upload Student Data',
type=['xlsx', 'xls'],
help='Upload student data with the template format')
if uploaded_file is not None:
up = pd.read_excel(uploaded_file)
up['ID'] = up['ID'].astype(str)
st.write('')
st.write('')
# Preview uploaded data
preview = st.slider('**Preview Rows**', 1, len(up), 2)
st.dataframe(up.head(preview))
# Move column order based on raw data
df_up = pd.DataFrame(up, columns=['ID', 'Name', 'Marital Status',
'Application Mode', 'Previous Qualification Grade', 'Admission Grade',
'Displaced', 'Debtor', 'Tuition up to date', 'Gender', 'Scholarship',
'Age at Enrollment', 'Units 1st Semester Enrolled',
'Units 1st Semester Approved', 'Units 1st Semester Grade',
'Units 2nd Semester Enrolled', 'Units 2nd Semester Approved',
'Units 2nd Semester Grade', 'Units 2nd Semester Evaluations',
'Units 2nd Semester No Evaluations'])
# Rename column based on the raw data
df_up.rename(columns={
'Marital Status': 'Marital_status',
'Application Mode': 'Application_mode',
'Previous Qualification Grade': 'Previous_qualification_grade',
'Admission Grade': 'Admission_grade',
'Tuition up to date': 'Tuition_fees_up_to_date',
'Scholarship': 'Scholarship_holder',
'Age at Enrollment': 'Age_at_enrollment',
'Units 1st Semester Enrolled': 'Curricular_units_1st_sem_enrolled',
'Units 1st Semester Approved': 'Curricular_units_1st_sem_approved',
'Units 1st Semester Grade': 'Curricular_units_1st_sem_grade',
'Units 2nd Semester Enrolled': 'Curricular_units_2nd_sem_enrolled',
'Units 2nd Semester Approved': 'Curricular_units_2nd_sem_approved',
'Units 2nd Semester Grade': 'Curricular_units_2nd_sem_grade',
'Units 2nd Semester Evaluations': 'Curricular_units_2nd_sem_evaluations',
'Units 2nd Semester No Evaluations': 'Curricular_units_2nd_sem_without_evaluations'
}, inplace=True)
# Seperate the ID and Name columns
student_ids = df_up['ID']
student_names = df_up['Name']
df_up = df_up.drop(columns=['ID', 'Name'])
# Categorical data columns data mapping
df_up['Gender'] = df_up['Gender'].map(gender_mapping)
df_up['Marital_status'] = df_up['Marital_status'].map(marital_status_mapping)
df_up['Application_mode'] = df_up['Application_mode'].map(application_mapping)
# Multiple data predict button
if st.button('✨ Predict Data'):
df_input = data_preprocessing(df_up, False, len(up))
output = model_predict(df_input)
prediction = ['Graduate' if pred == 1 else 'Dropout' for pred in output]
result = pd.DataFrame({
'ID': student_ids,
'Name': student_names,
'Status': prediction
})
st.write('')
st.write('')
st.write('**Results**')
st.dataframe(result.style.applymap(color_mapping, subset=['Status']))
# Prepare the dataframe conversion to Excel file
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
result.to_excel(writer, sheet_name='Prediction', index=False)
# Prediction file download button
st.download_button(
label='Download Prediction',
data=buffer.getvalue(),
file_name='Student Data Prediction.xlsx',
mime='application/vnd.ms-excel',
help='Download student data prediction Excel file')
st.write('')
st.write('')
year_now = datetime.date.today().year
year = year_now if year_now == 2024 else f'2024 - {year_now}'
name = "[Andrew Benedictus Jamesie](http://linkedin.com/in/andrewbjamesie 'Andrew Benedictus Jamesie | LinkedIn')"
copyright = 'Copyright © ' + str(year) + ' ' + name
st.caption(copyright)
if __name__ == '__main__':
main()